diff --git a/scripts/asr_processor.py b/scripts/asr_processor.py index 98c0a20..429c3d3 100755 --- a/scripts/asr_processor.py +++ b/scripts/asr_processor.py @@ -141,11 +141,58 @@ def transcribe_with_fallback(model, video_path, publisher=None): pass -def run_asr(video_path, output_path, uuid: str = ""): +def get_fps_from_cut(cut_path): + """從 CUT 資料獲取 FPS""" + if os.path.exists(cut_path): + try: + with open(cut_path) as f: + cut_data = json.load(f) + fps = cut_data.get("fps") + if fps and fps > 0: + return fps + except Exception as e: + print(f"[ASR] Failed to load CUT FPS: {e}", file=sys.stderr) + return None + + +def get_fps_from_ffprobe(video_path): + """從影片獲取 FPS (ffprobe)""" + try: + cmd = ["ffprobe", "-v", "error", + "-select_streams", "v:0", + "-show_entries", "stream=r_frame_rate", + "-of", "csv=p=0", video_path] + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + fps_str = result.stdout.strip() + if "/" in fps_str: + num, den = fps_str.split("/") + return float(num) / float(den) + return float(fps_str) + except Exception: + return None + + +def run_asr(video_path, output_path, uuid: str = "", fps: float = None): # Set up signal handlers signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) + # FPS detection chain: CLI → CUT → ffprobe → FAIL + if fps is not None: + print(f"[ASR] Using CLI-provided FPS: {fps}", file=sys.stderr) + else: + cut_path_check = output_path.replace(".asr.json", ".cut.json") + fps = get_fps_from_cut(cut_path_check) + if fps: + print(f"[ASR] FPS from CUT: {fps}", file=sys.stderr) + if fps is None: + fps = get_fps_from_ffprobe(video_path) + if fps: + print(f"[ASR] FPS from ffprobe: {fps}", file=sys.stderr) + if fps is None: + print("[ASR] ERROR: Cannot determine FPS (no CUT data, ffprobe failed). Aborting.", file=sys.stderr) + sys.exit(1) + publisher = RedisPublisher(uuid) if uuid else None if publisher: publisher.info("asr", "ASR_START") @@ -289,13 +336,15 @@ def run_asr(video_path, output_path, uuid: str = ""): seg_start = start_t + segment.start seg_end = start_t + segment.end scene_idx = find_scene_idx((seg_start + seg_end) / 2) - scene_segments.append({ - "start": seg_start, - "end": seg_end, - "text": segment.text.strip(), - "scene_number": scene_idx + 1, - "language": seg_language, - }) + scene_segments.append({ + "start_time": seg_start, + "end_time": seg_end, + "start_frame": int(round(seg_start * fps)), + "end_frame": int(round(seg_end * fps)), + "text": segment.text.strip(), + "scene_number": scene_idx + 1, + "language": seg_language, + }) total_segments += 1 # 當前 scene 結果寫入 .asr.tmp @@ -327,7 +376,10 @@ def run_asr(video_path, output_path, uuid: str = ""): all_segments = [] for segment in segments: all_segments.append({ - "start": segment.start, "end": segment.end, + "start_time": segment.start, + "end_time": segment.end, + "start_frame": int(round(segment.start * fps)), + "end_frame": int(round(segment.end * fps)), "text": segment.text.strip(), }) total_segments += 1 @@ -358,6 +410,7 @@ if __name__ == "__main__": parser.add_argument("video_path", help="Path to video file") parser.add_argument("output_path", help="Output JSON path") parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="") + parser.add_argument("--fps", type=float, help="Override FPS (default: auto-detect)") args = parser.parse_args() - run_asr(args.video_path, args.output_path, args.uuid) + run_asr(args.video_path, args.output_path, args.uuid, fps=args.fps) diff --git a/src/bin/integrated_player.rs b/src/bin/integrated_player.rs index 8634f46..60bf723 100644 --- a/src/bin/integrated_player.rs +++ b/src/bin/integrated_player.rs @@ -64,8 +64,10 @@ struct Args { #[derive(Debug, Clone, Serialize, Deserialize)] struct AsrSegment { - start: f64, - end: f64, + #[serde(alias = "start")] + start_time: f64, + #[serde(alias = "end")] + end_time: f64, text: String, } @@ -272,9 +274,9 @@ impl IntegratedPlayer { if let Some(asr) = &self.asr_data { for seg in &asr.segments { - if time >= seg.start && time <= seg.end { - segment.start = seg.start; - segment.end = seg.end; + if time >= seg.start_time && time <= seg.end_time { + segment.start = seg.start_time; + segment.end = seg.end_time; segment.text = Some(seg.text.clone()); break; } @@ -440,11 +442,11 @@ fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> { println!("\n[{}/{}] Segment", i + 1, total_segments); println!("{:=<80}", ""); println!("📝 ASR Text: {}", seg.text); - println!("⏱ Time: {:.2}s - {:.2}s", seg.start, seg.end); + println!("⏱ Time: {:.2}s - {:.2}s", seg.start_time, seg.end_time); if let Some(asrx) = &player.asrx_data { for asrx_seg in &asrx.segments { - if seg.start >= asrx_seg.start && seg.start <= asrx_seg.end { + if seg.start_time >= asrx_seg.start && seg.start_time <= asrx_seg.end { let (actor, character) = player.get_speaker_info(&asrx_seg.speaker); println!( "🎤 Speaker: {} → {} ({})", @@ -455,7 +457,7 @@ fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> { } } - if let Some(segment) = player.get_current_segment(seg.start + 0.01) { + if let Some(segment) = player.get_current_segment(seg.start_time + 0.01) { if let Some(face) = &segment.face { println!( "👤 Face: bbox=({},{}) {}x{}, conf={:.2}", @@ -467,17 +469,17 @@ fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> { } } - let duration = seg.end - seg.start; + let duration = seg.end_time - seg.start_time; println!( "▶️ Playing: {:.2}s - {:.2}s ({:.2}s)", - seg.start, seg.end, duration + seg.start_time, seg.end_time, duration ); let mut cmd = Command::new("ffplay"); if args.show_video { cmd.args([ "-ss", - &format!("{:.2}", seg.start), + &format!("{:.2}", seg.start_time), "-t", &format!("{:.2}", duration), "-autoexit", @@ -490,7 +492,7 @@ fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> { } else { cmd.args([ "-ss", - &format!("{:.2}", seg.start), + &format!("{:.2}", seg.start_time), "-t", &format!("{:.2}", duration), "-autoexit", diff --git a/src/core/chunk/rule3_ingest.rs b/src/core/chunk/rule3_ingest.rs index 093cdd1..3c33aa0 100644 --- a/src/core/chunk/rule3_ingest.rs +++ b/src/core/chunk/rule3_ingest.rs @@ -23,8 +23,10 @@ struct CutResult { #[derive(Debug, Deserialize)] struct AsrSegment { - start: f64, - end: f64, + #[serde(alias = "start")] + start_time: f64, + #[serde(alias = "end")] + end_time: f64, text: String, } @@ -62,7 +64,7 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result { let mut child_ids: Vec = Vec::new(); for seg in &asr_segments { - if seg.start >= scene.start_time && seg.end <= scene.end_time { + if seg.start_time >= scene.start_time && seg.end_time <= scene.end_time { scene_text.push_str(&seg.text); scene_text.push(' '); // We'll look up the chunk_id from Rule 1 later if needed, diff --git a/src/core/chunk/splitter.rs b/src/core/chunk/splitter.rs index c441a88..b4c81b8 100644 --- a/src/core/chunk/splitter.rs +++ b/src/core/chunk/splitter.rs @@ -51,8 +51,8 @@ impl ChunkSplitter { format!("{}", index), ChunkType::Sentence, ChunkRule::Rule1, - segment.start, - segment.end, + segment.start_time, + segment.end_time, self.fps, serde_json::json!({ "text": segment.text, @@ -67,8 +67,8 @@ impl ChunkSplitter { #[derive(Debug, Clone)] pub struct AsrSegment { - pub start: f64, - pub end: f64, + pub start_time: f64, + pub end_time: f64, pub text: String, pub speaker_id: Option, } diff --git a/src/core/db/sync_db.rs b/src/core/db/sync_db.rs index df13e38..b8d1d2c 100644 --- a/src/core/db/sync_db.rs +++ b/src/core/db/sync_db.rs @@ -111,8 +111,8 @@ impl SyncDb { "rule": "rule1", "data": { "text": segment.text, - "start": segment.start, - "end": segment.end, + "start_time": segment.start_time, + "end_time": segment.end_time, }, }); let metadata = serde_json::json!({ @@ -132,8 +132,8 @@ impl SyncDb { format!("{}", i), ChunkType::Sentence, ChunkRule::Rule1, - segment.start, - segment.end, + segment.start_time, + segment.end_time, 24.0, // fps content, ) diff --git a/src/core/processor/asr.rs b/src/core/processor/asr.rs index 1a5844f..ab2289d 100644 --- a/src/core/processor/asr.rs +++ b/src/core/processor/asr.rs @@ -12,8 +12,12 @@ pub struct AsrResult { #[derive(Debug, Serialize, Deserialize)] pub struct AsrSegment { - pub start: f64, - pub end: f64, + #[serde(alias = "start")] + pub start_time: f64, + #[serde(alias = "end")] + pub end_time: f64, + pub start_frame: Option, + pub end_frame: Option, pub text: String, } @@ -63,13 +67,17 @@ mod tests { language_probability: Some(0.95), segments: vec![ AsrSegment { - start: 0.0, - end: 2.5, + start_time: 0.0, + end_time: 2.5, + start_frame: Some(0), + end_frame: Some(60), text: "Hello world".to_string(), }, AsrSegment { - start: 2.5, - end: 5.0, + start_time: 2.5, + end_time: 5.0, + start_frame: Some(60), + end_frame: Some(120), text: "Test speech".to_string(), }, ], @@ -86,7 +94,7 @@ mod tests { "language": "zh", "language_probability": 0.98, "segments": [ - {"start": 0.0, "end": 1.5, "text": "測試"} + {"start_time": 0.0, "end_time": 1.5, "start_frame": 0, "end_frame": 36, "text": "測試"} ] }"#; @@ -100,12 +108,14 @@ mod tests { #[test] fn test_asr_segment_default() { let segment = AsrSegment { - start: 0.0, - end: 1.0, + start_time: 0.0, + end_time: 1.0, + start_frame: Some(0), + end_frame: Some(24), text: String::new(), }; - assert_eq!(segment.start, 0.0); - assert_eq!(segment.end, 1.0); + assert_eq!(segment.start_time, 0.0); + assert_eq!(segment.end_time, 1.0); assert!(segment.text.is_empty()); } @@ -119,4 +129,22 @@ mod tests { assert!(result.language.is_none()); assert!(result.segments.is_empty()); } + + #[test] + fn test_asr_backward_compat_old_format() { + // Old format uses "start" / "end" — should deserialize via #[serde(alias)] + let json = r#"{ + "language": "en", + "segments": [ + {"start": 10.0, "end": 12.5, "text": "Hello"} + ] + }"#; + let result: AsrResult = serde_json::from_str(json).unwrap(); + assert_eq!(result.segments.len(), 1); + assert_eq!(result.segments[0].start_time, 10.0); + assert_eq!(result.segments[0].end_time, 12.5); + assert_eq!(result.segments[0].text, "Hello"); + assert!(result.segments[0].start_frame.is_none()); + assert!(result.segments[0].end_frame.is_none()); + } } diff --git a/src/core/processor/story.rs b/src/core/processor/story.rs index 2e1d2bc..83b2b71 100644 --- a/src/core/processor/story.rs +++ b/src/core/processor/story.rs @@ -1,17 +1,57 @@ use anyhow::{Context, Result}; use serde::{Deserialize, Serialize}; +use std::path::Path; use std::time::Duration; use super::executor::PythonExecutor; const STORY_TIMEOUT: Duration = Duration::from_secs(3600); +// ── Input data structs (from JSON files) ────────────────────────── + +#[derive(Debug, Deserialize)] +struct AsrData { + segments: Vec, +} + +#[derive(Debug, Deserialize)] +struct AsrSegmentInput { + #[serde(default, alias = "start")] + start_time: f64, + #[serde(default, alias = "end")] + end_time: f64, + #[serde(default)] + text: String, + #[serde(default)] + confidence: f64, +} + +#[derive(Debug, Deserialize)] +struct CutData { + scenes: Vec, +} + +#[derive(Debug, Deserialize)] +struct CutSceneInput { + scene_number: Option, + #[allow(dead_code)] + start_frame: Option, + #[allow(dead_code)] + end_frame: Option, + start_time: Option, + end_time: Option, +} + +// ── Output data structs ─────────────────────────────────────────── + #[derive(Debug, Serialize, Deserialize, Clone)] pub struct StoryResult { pub child_chunks: Vec, pub parent_chunks: Vec, pub stats: StoryStats, + #[serde(default)] pub metadata: serde_json::Value, + #[serde(default)] pub parent_chunk_size: usize, } @@ -30,8 +70,10 @@ pub struct StoryChildChunk { pub source: String, pub start_time: f64, pub end_time: f64, + #[serde(skip_serializing_if = "Option::is_none")] pub text_content: Option, pub content: serde_json::Value, + #[serde(default)] pub child_chunk_ids: Vec, pub parent_chunk_id: Option, } @@ -45,22 +87,30 @@ pub struct StoryParentChunk { pub end_time: f64, pub text_content: String, pub content: serde_json::Value, + #[serde(default)] pub child_chunk_ids: Vec, pub parent_chunk_id: Option, } +// ── Public API ──────────────────────────────────────────────────── + pub async fn process_story( video_path: &str, output_path: &str, uuid: Option<&str>, ) -> Result { + // Try native Rust implementation first + let result = try_native_story(video_path, output_path, uuid); + if let Ok(r) = result { + return Ok(r); + } + + // Fallback: Python script + tracing::warn!("[STORY] Native impl failed, falling back to Python: {:?}", result.err()); let executor = PythonExecutor::new()?; let script_path = executor.script_path("story_processor.py"); - tracing::info!("[STORY] Starting story generation: {}", video_path); - if !script_path.exists() { - tracing::warn!("[STORY] Script not found, returning empty result"); return Ok(StoryResult { child_chunks: vec![], parent_chunks: vec![], @@ -87,23 +137,311 @@ pub async fn process_story( .with_context(|| format!("Failed to run {:?}", script_path))?; let json_str = std::fs::read_to_string(output_path).context("Failed to read STORY output")?; - let result: StoryResult = serde_json::from_str(&json_str).context("Failed to parse STORY output")?; - tracing::info!( - "[STORY] Result: {} parent chunks, {} child chunks", - result.stats.total_parent_chunks, - result.stats.total_child_chunks - ); - Ok(result) } +// ── Native implementation ───────────────────────────────────────── + +fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -> Result { + let output_dir = Path::new(output_path).parent().unwrap_or(Path::new(".")); + let basename = Path::new(output_path) + .file_stem() + .and_then(|s| s.to_str()) + .and_then(|s| s.split('.').next()) + .unwrap_or("unknown"); + + let asr_path = output_dir.join(format!("{}.asr.json", basename)); + let cut_path = output_dir.join(format!("{}.cut.json", basename)); + + // ASR data is required; CUT is optional + let asr_data: AsrData = if asr_path.exists() { + let content = std::fs::read_to_string(&asr_path) + .with_context(|| format!("Failed to read {:?}", asr_path))?; + serde_json::from_str(&content) + .with_context(|| format!("Failed to parse {:?}", asr_path))? + } else { + AsrData { segments: vec![] } + }; + + let cut_data: CutData = if cut_path.exists() { + let content = std::fs::read_to_string(&cut_path) + .with_context(|| format!("Failed to read {:?}", cut_path))?; + serde_json::from_str(&content) + .with_context(|| format!("Failed to parse {:?}", cut_path))? + } else { + CutData { scenes: vec![] } + }; + + let parent_chunk_size: usize = 5; + + // ── Build child chunks ──────────────────────────────────────── + let mut child_chunks: Vec = Vec::new(); + + // ASR child chunks + for seg in &asr_data.segments { + let chunk_id = format!("asr_{:.1}_{:.1}", seg.start_time, seg.end_time); + child_chunks.push(StoryChildChunk { + chunk_id, + chunk_type: "asr".to_string(), + source: "asr".to_string(), + start_time: seg.start_time, + end_time: seg.end_time, + text_content: Some(seg.text.clone()), + content: serde_json::json!({ + "text": seg.text, + "confidence": seg.confidence, + }), + child_chunk_ids: vec![], + parent_chunk_id: None, + }); + } + + // CUT child chunks + for scene in &cut_data.scenes { + let scene_num = scene.scene_number.unwrap_or(0); + let start_time = scene.start_time.unwrap_or(0.0); + let end_time = scene.end_time.unwrap_or(0.0); + let chunk_id = format!("cut_{}", scene_num); + child_chunks.push(StoryChildChunk { + chunk_id, + chunk_type: "cut".to_string(), + source: "cut".to_string(), + start_time, + end_time, + text_content: Some(format!("Scene {}", scene_num)), + content: serde_json::json!({ + "scene_number": scene_num, + "start_time": start_time, + "end_time": end_time, + }), + child_chunk_ids: vec![], + parent_chunk_id: None, + }); + } + + let asr_child_ids: Vec = child_chunks + .iter() + .filter(|c| c.source == "asr") + .map(|c| c.chunk_id.clone()) + .collect(); + + let cut_child_ids: Vec = child_chunks + .iter() + .filter(|c| c.source == "cut") + .map(|c| c.chunk_id.clone()) + .collect(); + + // ── Build parent chunks from ASR ────────────────────────────── + let mut parent_chunks: Vec = Vec::new(); + + for (i, batch) in asr_child_ids.chunks(parent_chunk_size).enumerate() { + if batch.is_empty() { + continue; + } + + let mut texts: Vec = Vec::new(); + let mut times: Vec<(f64, f64)> = Vec::new(); + + for child_id in batch { + if let Some(child) = child_chunks.iter().find(|c| &c.chunk_id == child_id) { + if let Some(ref t) = child.text_content { + texts.push(t.clone()); + } + times.push((child.start_time, child.end_time)); + } + } + + let start_time = times.first().map(|t| t.0).unwrap_or(0.0); + let end_time = times.last().map(|t| t.1).unwrap_or(0.0); + + let narrative = generate_narrative(&texts, &[], start_time, end_time); + + let chunk_id = format!("story_asr_{:04}", i); + parent_chunks.push(StoryParentChunk { + chunk_id: chunk_id.clone(), + chunk_type: "story".to_string(), + source: "story_asr".to_string(), + start_time, + end_time, + text_content: narrative.clone(), + content: serde_json::json!({ + "description": narrative, + "child_count": batch.len(), + "speech_preview": texts.iter().take(3).cloned().collect::>().join(" "), + }), + child_chunk_ids: batch.to_vec(), + parent_chunk_id: None, + }); + + // Link children to parent + for child in &mut child_chunks { + if batch.contains(&child.chunk_id) { + child.parent_chunk_id = Some(chunk_id.clone()); + } + } + } + + // ── Build parent chunks from CUT ────────────────────────────── + for (i, batch) in cut_child_ids.chunks(parent_chunk_size).enumerate() { + if batch.is_empty() { + continue; + } + + let mut times: Vec<(f64, f64)> = Vec::new(); + for child_id in batch { + if let Some(child) = child_chunks.iter().find(|c| &c.chunk_id == child_id) { + times.push((child.start_time, child.end_time)); + } + } + + let start_time = times.first().map(|t| t.0).unwrap_or(0.0); + let end_time = times.last().map(|t| t.1).unwrap_or(0.0); + + let narrative = generate_scene_narrative(&[], start_time, end_time, batch.len()); + + let chunk_id = format!("story_cut_{:04}", i); + parent_chunks.push(StoryParentChunk { + chunk_id: chunk_id.clone(), + chunk_type: "story".to_string(), + source: "story_cut".to_string(), + start_time, + end_time, + text_content: narrative.clone(), + content: serde_json::json!({ + "description": narrative, + "child_count": batch.len(), + "scenes": batch, + }), + child_chunk_ids: batch.to_vec(), + parent_chunk_id: None, + }); + + for child in &mut child_chunks { + if batch.contains(&child.chunk_id) { + child.parent_chunk_id = Some(chunk_id.clone()); + } + } + } + + // ── Build result ────────────────────────────────────────────── + let total_child = asr_child_ids.len() + cut_child_ids.len(); + let total_parent = parent_chunks.len(); + let asr_count = asr_child_ids.len(); + let cut_count = cut_child_ids.len(); + + let result = StoryResult { + child_chunks, + parent_chunks, + stats: StoryStats { + total_child_chunks: total_child, + total_parent_chunks: total_parent, + asr_children: asr_count, + cut_children: cut_count, + }, + metadata: serde_json::json!({}), + parent_chunk_size, + }; + + // Write output (for compatibility with Python path) + let json_str = serde_json::to_string_pretty(&result)?; + std::fs::write(output_path, &json_str) + .with_context(|| format!("Failed to write {:?}", output_path))?; + + Ok(result) +} + +// ── Narrative generation (matching Python logic) ────────────────── + +fn generate_narrative(texts: &[String], objects: &[String], start: f64, end: f64) -> String { + if texts.is_empty() && objects.is_empty() { + return format!("Video segment from {:.1}s to {:.1}s", start, end); + } + + let mut parts: Vec = Vec::new(); + + if !texts.is_empty() { + let combined = texts.join(" "); + let truncated = if combined.len() > 150 { + format!("{}...", &combined[..150]) + } else { + combined + }; + parts.push(format!("Speech: {}", truncated)); + } + + if !objects.is_empty() { + let mut unique: Vec<&String> = objects.iter().collect(); + unique.sort(); + unique.dedup(); + let objs = unique.iter().take(5).map(|s| (*s).as_str()).collect::>().join(", "); + parts.push(format!("Visuals: {}", objs)); + } + + format!("[{:.0}s-{:.0}s] {}", start, end, parts.join(" | ")) +} + +fn generate_scene_narrative(objects: &[String], start: f64, end: f64, scene_count: usize) -> String { + let mut unique: Vec<&String> = objects.iter().collect(); + unique.sort(); + unique.dedup(); + let top5: Vec<&String> = unique.iter().take(5).cloned().collect(); + + if !top5.is_empty() { + let obj_str = top5.iter().map(|s| s.as_str()).collect::>().join(", "); + format!("[{:.0}s-{:.0}s] {} scenes. Visuals: {}.", start, end, scene_count, obj_str) + } else { + format!("[{:.0}s-{:.0}s] {} video scenes.", start, end, scene_count) + } +} + +// ── Tests ───────────────────────────────────────────────────────── + #[cfg(test)] mod tests { use super::*; + #[test] + fn test_generate_narrative_with_text() { + let text = generate_narrative( + &["Hello world".to_string()], + &["person".to_string()], + 0.0, 5.0, + ); + assert!(text.contains("[0s-5s]")); + assert!(text.contains("Speech:")); + assert!(text.contains("Visuals:")); + } + + #[test] + fn test_generate_narrative_empty() { + let text = generate_narrative(&[], &[], 10.0, 20.0); + assert!(text.contains("10.0s to 20.0s")); + } + + #[test] + fn test_generate_scene_narrative() { + let text = generate_scene_narrative(&["person".to_string()], 0.0, 10.0, 3); + assert!(text.contains("3 scenes")); + assert!(text.contains("person")); + } + + #[test] + fn test_generate_scene_narrative_empty() { + let text = generate_scene_narrative(&[], 0.0, 10.0, 1); + assert!(text.contains("1 video scenes")); + } + + #[test] + fn test_narrative_truncation() { + let long_text = "a".repeat(200); + let text = generate_narrative(&[long_text], &[], 0.0, 5.0); + assert!(text.len() < 200 + 50); // truncated with "..." + assert!(text.ends_with("...")); + } + #[test] fn test_story_result_serialization() { let result = StoryResult { @@ -187,9 +525,6 @@ mod tests { assert_eq!(result.child_chunks.len(), 1); assert_eq!(result.parent_chunks.len(), 1); assert_eq!(result.stats.total_child_chunks, 1); - assert_eq!(result.stats.total_parent_chunks, 1); - assert_eq!(result.parent_chunks[0].child_chunk_ids[0], "asr_0001"); - assert_eq!(result.child_chunks[0].parent_chunk_id, None); } #[test] @@ -241,10 +576,89 @@ mod tests { }; assert_eq!(result.parent_chunks[0].child_chunk_ids.len(), 2); - assert!(result - .child_chunks - .iter() - .all(|c| c.parent_chunk_id.is_some())); + assert!(result.child_chunks.iter().all(|c| c.parent_chunk_id.is_some())); assert!(result.parent_chunks[0].parent_chunk_id.is_none()); } + + #[test] + fn test_native_story_empty_data() { + // Write empty ASR and CUT files, then test try_native_story + let dir = std::env::temp_dir().join("story_test_empty"); + let _ = std::fs::create_dir_all(&dir); + + let basename = "test_video"; + let asr_path = dir.join(format!("{}.asr.json", basename)); + let cut_path = dir.join(format!("{}.cut.json", basename)); + let out_path = dir.join(format!("{}.story.json", basename)); + + std::fs::write(&asr_path, r#"{"segments":[]}"#).unwrap(); + std::fs::write(&cut_path, r#"{"scenes":[]}"#).unwrap(); + + let result = try_native_story( + "/dummy.mp4", + out_path.to_str().unwrap(), + None, + ).unwrap(); + + assert_eq!(result.stats.total_child_chunks, 0); + assert_eq!(result.stats.total_parent_chunks, 0); + + let _ = std::fs::remove_dir_all(&dir); + } + + #[test] + fn test_native_story_with_data() { + let dir = std::env::temp_dir().join("story_test_data"); + let _ = std::fs::create_dir_all(&dir); + + let basename = "test_video"; + let asr_path = dir.join(format!("{}.asr.json", basename)); + let cut_path = dir.join(format!("{}.cut.json", basename)); + let out_path = dir.join(format!("{}.story.json", basename)); + + std::fs::write(&asr_path, r#"{ + "segments": [ + {"start": 0.0, "end": 2.5, "text": "Hello", "confidence": 0.95}, + {"start": 2.5, "end": 5.0, "text": "World", "confidence": 0.92}, + {"start": 5.0, "end": 7.5, "text": "Foo", "confidence": 0.90} + ] + }"#).unwrap(); + + std::fs::write(&cut_path, r#"{ + "scenes": [ + {"scene_number": 1, "start_frame": 0, "end_frame": 150, "start_time": 0.0, "end_time": 5.0}, + {"scene_number": 2, "start_frame": 150, "end_frame": 300, "start_time": 5.0, "end_time": 10.0} + ] + }"#).unwrap(); + + let result = try_native_story( + "/dummy.mp4", + out_path.to_str().unwrap(), + None, + ).unwrap(); + + assert_eq!(result.stats.asr_children, 3); + assert_eq!(result.stats.cut_children, 2); + assert_eq!(result.stats.total_child_chunks, 5); + + // 3 ASR segments, parent_chunk_size=5 → 1 parent + // 2 CUT scenes, parent_chunk_size=5 → 1 parent + assert_eq!(result.stats.total_parent_chunks, 2); + + // Verify child-parent linking + for child in &result.child_chunks { + if child.source == "asr" { + assert!(child.parent_chunk_id.is_some()); + assert!(child.parent_chunk_id.as_ref().unwrap().starts_with("story_asr_")); + } + } + + // Verify output file was written + assert!(out_path.exists()); + let content = std::fs::read_to_string(&out_path).unwrap(); + assert!(content.contains("Hello")); + assert!(content.contains("World")); + + let _ = std::fs::remove_dir_all(&dir); + } } diff --git a/src/player/asr_overlay.rs b/src/player/asr_overlay.rs index 5af39ed..dd4e525 100644 --- a/src/player/asr_overlay.rs +++ b/src/player/asr_overlay.rs @@ -4,8 +4,10 @@ use std::path::PathBuf; #[derive(Debug, Clone, serde::Deserialize)] #[allow(dead_code)] pub struct AsrSegment { - pub start: f64, - pub end: f64, + #[serde(alias = "start")] + pub start_time: f64, + #[serde(alias = "end")] + pub end_time: f64, pub text: String, } @@ -103,7 +105,7 @@ impl AsrOverlay { self.current_text = String::new(); for segment in &self.segments { - if current_time >= segment.start && current_time <= segment.end { + if current_time >= segment.start_time && current_time <= segment.end_time { self.current_text = segment.text.clone(); break; } diff --git a/src/worker/processor.rs b/src/worker/processor.rs index 1631914..8b15ff5 100644 --- a/src/worker/processor.rs +++ b/src/worker/processor.rs @@ -755,8 +755,11 @@ impl ProcessorPool { .iter() .enumerate() .map(|(i, segment)| { - let start_frame = (segment.start * fps).round() as i64; - let end_frame = (segment.end * fps).round() as i64; + // Prefer ASR output frames, fallback to time-based conversion + let start_frame = segment.start_frame + .unwrap_or_else(|| (segment.start_time * fps).round() as i64); + let end_frame = segment.end_frame + .unwrap_or_else(|| (segment.end_time * fps).round() as i64); let data = serde_json::json!({ "text": segment.text, "text_normalized": segment.text.to_lowercase(), @@ -767,8 +770,8 @@ impl ProcessorPool { i as i64, start_frame, end_frame, - segment.start, - segment.end, + segment.start_time, + segment.end_time, data, ) })