cleanup: remove dead code and duplicate docs

- Remove session-ses_2f27.md (161KB raw session log)
- Remove 49 ROOT_* duplicate files across REFERENCE/
- Remove 14 duplicate files between REFERENCE/ root and history/
- Remove asr_legacy.rs (dead code, replaced by asr.rs)
- Remove src/core/worker/ (duplicate JobWorker)
- Remove src/core/layers/ (empty directory)
- Remove 4 .bak files in src/
- Remove 7 dead private methods in worker/processor.rs
- Remove backup directory from git tracking
This commit is contained in:
Warren
2026-05-04 01:31:21 +08:00
parent ee81e343ce
commit e75c4d6f07
3270 changed files with 35190 additions and 53367 deletions

View File

@@ -1,124 +0,0 @@
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::time::Duration;
use super::executor::PythonExecutor;
use crate::core::config::processor;
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrResult {
pub language: Option<String>,
pub language_probability: Option<f64>,
pub segments: Vec<AsrSegment>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrSegment {
pub start: f64,
pub end: f64,
pub text: String,
}
pub async fn process_asr(
video_path: &str,
output_path: &str,
uuid: Option<&str>,
) -> Result<AsrResult> {
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("asr_processor.py");
tracing::info!("[ASR] Starting ASR processing: {}", video_path);
executor
.run(
"asr_processor.py",
&[video_path, output_path],
uuid,
"ASR",
Some(Duration::from_secs(*processor::ASR_TIMEOUT_SECS)),
)
.await
.with_context(|| format!("Failed to run {:?}", script_path))?;
let json_str = std::fs::read_to_string(output_path).context("Failed to read ASR output")?;
let result: AsrResult =
serde_json::from_str(&json_str).context("Failed to parse ASR output")?;
tracing::info!(
"[ASR] Result: {} segments, language: {:?}",
result.segments.len(),
result.language
);
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_asr_result_serialization() {
let result = AsrResult {
language: Some("en".to_string()),
language_probability: Some(0.95),
segments: vec![
AsrSegment {
start: 0.0,
end: 2.5,
text: "Hello world".to_string(),
},
AsrSegment {
start: 2.5,
end: 5.0,
text: "Test speech".to_string(),
},
],
};
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("Hello world"));
assert!(json.contains("en"));
}
#[test]
fn test_asr_result_deserialization() {
let json = r#"{
"language": "zh",
"language_probability": 0.98,
"segments": [
{"start": 0.0, "end": 1.5, "text": "測試"}
]
}"#;
let result: AsrResult = serde_json::from_str(json).unwrap();
assert_eq!(result.language, Some("zh".to_string()));
assert_eq!(result.language_probability, Some(0.98));
assert_eq!(result.segments.len(), 1);
assert_eq!(result.segments[0].text, "測試");
}
#[test]
fn test_asr_segment_default() {
let segment = AsrSegment {
start: 0.0,
end: 1.0,
text: String::new(),
};
assert_eq!(segment.start, 0.0);
assert_eq!(segment.end, 1.0);
assert!(segment.text.is_empty());
}
#[test]
fn test_asr_result_empty_segments() {
let result = AsrResult {
language: None,
language_probability: None,
segments: vec![],
};
assert!(result.language.is_none());
assert!(result.segments.is_empty());
}
}

View File

@@ -12,12 +12,16 @@ const ASRX_TIMEOUT: Duration = Duration::from_secs(7200);
pub struct AsrxResult {
pub language: Option<String>,
pub segments: Vec<AsrxSegment>,
#[serde(skip_serializing)]
pub embeddings: Option<Vec<Vec<f32>>>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrxSegment {
pub start: f64,
pub end: f64,
pub start_time: f64,
pub end_time: f64,
pub start_frame: u64,
pub end_frame: u64,
pub text: String,
pub speaker_id: Option<String>,
}
@@ -43,10 +47,19 @@ pub async fn process_asrx(
return Ok(AsrxResult {
language: None,
segments: vec![],
embeddings: None,
});
}
}
tracing::info!(
"[ASRX] Running: {} {} {} {}",
executor.python_path().display(),
script_path.display(),
video_path,
output_path,
);
let mut cmd = Command::new(executor.python_path());
cmd.arg(&script_path).arg(video_path).arg(output_path);
@@ -68,16 +81,21 @@ pub async fn process_asrx(
let stderr = String::from_utf8_lossy(&output.stderr);
for line in stderr.lines() {
if line.starts_with("ASRX_START") {
let trimmed = line.trim();
if trimmed.starts_with("ASRX_START") {
tracing::info!("[ASRX] Loading model...");
} else if line.starts_with("ASRX_PROGRESS:") {
let count = line.trim_start_matches("ASRX_PROGRESS:");
} else if trimmed.starts_with("ASRX_PROGRESS:") {
let count = trimmed.trim_start_matches("ASRX_PROGRESS:");
tracing::info!("[ASRX] Processed {} segments...", count);
} else if line.starts_with("ASRX_COMPLETE:") {
let count = line.trim_start_matches("ASRX_COMPLETE:");
} else if trimmed.starts_with("ASRX_COMPLETE:") {
let count = trimmed.trim_start_matches("ASRX_COMPLETE:");
tracing::info!("[ASRX] Completed! Total: {} segments", count);
} else if !trimmed.is_empty() && !trimmed.starts_with("[SelfASRX") {
tracing::debug!("[ASRX/stderr] {}", trimmed);
}
}
// Log full stderr for debugging
tracing::info!("[ASRX] stderr output:\n{}", stderr);
if !output.status.success() {
anyhow::bail!("ASRX failed: {}", stderr);
@@ -102,11 +120,14 @@ mod tests {
let result = AsrxResult {
language: Some("en".to_string()),
segments: vec![AsrxSegment {
start: 0.0,
end: 2.5,
start_time: 0.0,
end_time: 2.5,
start_frame: 0,
end_frame: 75,
text: "Hello".to_string(),
speaker_id: Some("SPEAKER_00".to_string()),
}],
embeddings: None,
};
let json = serde_json::to_string(&result).unwrap();
@@ -119,7 +140,7 @@ mod tests {
let json = r#"{
"language": "zh",
"segments": [
{"start": 0.0, "end": 1.5, "text": "測試", "speaker_id": "SPEAKER_01"}
{"start_time": 0.0, "end_time": 1.5, "start_frame": 0, "end_frame": 45, "text": "測試", "speaker_id": "SPEAKER_01"}
]
}"#;
@@ -137,6 +158,7 @@ mod tests {
let result = AsrxResult {
language: None,
segments: vec![],
embeddings: None,
};
assert!(result.segments.is_empty());
assert!(result.language.is_none());
@@ -145,11 +167,13 @@ mod tests {
#[test]
fn test_asrx_segment_times() {
let segment = AsrxSegment {
start: 0.0,
end: 5.0,
start_time: 0.0,
end_time: 5.0,
start_frame: 0,
end_frame: 150,
text: "Test".to_string(),
speaker_id: None,
};
assert!(segment.end > segment.start);
assert!(segment.end_time > segment.start_time);
}
}

View File

@@ -147,6 +147,19 @@ impl PythonExecutor {
anyhow::bail!("Script not found: {:?}", script_path);
}
// 標記輸出檔為處理中add .tmp suffix
let output_path = args.get(1).map(|p| std::path::PathBuf::from(p));
let tmp_path = output_path.as_ref().map(|p| {
let mut tmp = p.to_path_buf();
tmp.set_extension("json.tmp");
tmp
});
if let (Some(src), Some(dst)) = (&output_path, &tmp_path) {
if src.exists() {
let _ = std::fs::rename(src, dst);
}
}
let mut cmd = Command::new(&self.venv_python);
cmd.arg(&script_path);
@@ -220,12 +233,28 @@ impl PythonExecutor {
Ok(())
};
// 錯誤時 rename .json.tmp → .json.err
let mark_failed = || {
if let Some(tmp) = &tmp_path {
if tmp.exists() {
if let Some(out) = &output_path {
let mut err_path = out.to_path_buf();
err_path.set_extension("json.err");
let _ = std::fs::rename(tmp, &err_path);
}
}
}
};
if let Some(duration) = timeout_duration {
match timeout(duration, run_future).await {
Ok(Ok(())) => {}
Ok(Err(e)) => return Err(e),
Ok(Err(e)) => {
mark_failed();
return Err(e);
}
Err(_) => {
// Try to kill the entire process group
mark_failed();
if let Some(pid) = child_pid {
let pgid = pid as i32;
unsafe {
@@ -237,7 +266,19 @@ impl PythonExecutor {
}
}
} else {
run_future.await?;
if let Err(e) = run_future.await {
mark_failed();
return Err(e);
}
}
// 成功:.json.tmp → .json已完成
if let Some(tmp) = &tmp_path {
if tmp.exists() {
if let Some(out) = &output_path {
let _ = std::fs::rename(tmp, out);
}
}
}
Ok(())

View File

@@ -28,6 +28,7 @@ pub struct Face {
pub width: i32,
pub height: i32,
pub confidence: f32,
#[serde(skip_serializing)]
pub embedding: Option<Vec<f32>>,
pub landmarks: Option<Vec<Vec<f32>>>,
pub attributes: Option<FaceAttributes>,
@@ -111,7 +112,6 @@ mod tests {
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("face_1"));
assert!(json.contains("\"width\":50"));
assert!(json.contains("embedding"));
assert!(json.contains("landmarks"));
assert!(json.contains("attributes"));
}

View File

@@ -27,7 +27,8 @@ pub use face_recognition::{
pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText};
pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
pub use scene_classification::{
process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment,
load_scene_from_file, process_scene_classification, SceneClassificationResult, ScenePrediction,
SceneSegment,
};
pub use snapshot_agent::{SnapshotAgent, SnapshotAgentConfig};
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};

View File

@@ -7,7 +7,7 @@ use super::executor::PythonExecutor;
const SCENE_TIMEOUT: Duration = Duration::from_secs(7200);
/// 場景識別結果
#[derive(Debug, Serialize, Deserialize, Clone)]
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
pub struct SceneClassificationResult {
pub frame_count: u64,
pub fps: f64,
@@ -32,6 +32,19 @@ pub struct ScenePrediction {
pub confidence: f32,
}
/// 從已存在的 JSON 檔案載入場景結果(不重新執行 Python
pub fn load_scene_from_file(path: &str) -> Result<SceneClassificationResult> {
let json_str = std::fs::read_to_string(path).context("Failed to read scene JSON file")?;
let result: SceneClassificationResult =
serde_json::from_str(&json_str).context("Failed to parse scene JSON")?;
tracing::info!(
"[SCENE] Loaded {} scenes from {}",
result.scenes.len(),
path
);
Ok(result)
}
/// 執行場景識別
pub async fn process_scene_classification(
video_path: &str,

View File

@@ -12,7 +12,7 @@ use super::yolo::{YoloFrame, YoloResult};
const VISUAL_CHUNK_TIMEOUT: Duration = Duration::from_secs(3600);
/// 視覺分片處理結果
#[derive(Debug, Serialize, Deserialize, Clone)]
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct VisualChunkResult {
/// 生成的視覺分片數量
pub chunk_count: u32,
@@ -284,7 +284,7 @@ pub async fn process_visual_chunk_advanced(
});
}
executor
let result = match executor
.run(
"visual_chunk_processor.py",
&[video_path, output_path],
@@ -293,13 +293,34 @@ pub async fn process_visual_chunk_advanced(
Some(VISUAL_CHUNK_TIMEOUT),
)
.await
.with_context(|| format!("Failed to run {:?}", script_path))?;
let json_str =
std::fs::read_to_string(output_path).context("Failed to read visual chunk output")?;
let result: VisualChunkResult =
serde_json::from_str(&json_str).context("Failed to parse visual chunk output")?;
{
Ok(_) => match std::fs::read_to_string(output_path) {
Ok(json_str) => match serde_json::from_str::<VisualChunkResult>(&json_str) {
Ok(r) => r,
Err(e) => {
tracing::warn!(
"[VisualChunk] Failed to parse output ({}), returning empty",
e
);
VisualChunkResult::default()
}
},
Err(e) => {
tracing::warn!(
"[VisualChunk] Failed to read output ({}), returning empty",
e
);
VisualChunkResult::default()
}
},
Err(e) => {
tracing::warn!(
"[VisualChunk] Failed to run script ({}), returning empty",
e
);
VisualChunkResult::default()
}
};
tracing::info!(
"[VisualChunk] Advanced generation result: {} chunks, {} frames",