feat: ASR output frame numbers + rename start/end to start_time/end_time

- Python: asr_processor.py detects FPS from CUT/ffprobe (no fallback), outputs start_frame/end_frame
- Rust: All AsrSegment structs use start_time/end_time with #[serde(alias)] for backward compat
- store_asr_chunks: prefers ASR output frames, falls back to time-based conversion
- Added backward compatibility test for old JSON format (start/end)

Breaking change: ffprobe/CUT FPS failure now aborts instead of using default 24fps
This commit is contained in:
Accusys
2026-05-19 13:22:38 +08:00
parent 26725dcab7
commit 67ca846ccd
9 changed files with 572 additions and 68 deletions

View File

@@ -141,11 +141,58 @@ def transcribe_with_fallback(model, video_path, publisher=None):
pass
def run_asr(video_path, output_path, uuid: str = ""):
def get_fps_from_cut(cut_path):
"""從 CUT 資料獲取 FPS"""
if os.path.exists(cut_path):
try:
with open(cut_path) as f:
cut_data = json.load(f)
fps = cut_data.get("fps")
if fps and fps > 0:
return fps
except Exception as e:
print(f"[ASR] Failed to load CUT FPS: {e}", file=sys.stderr)
return None
def get_fps_from_ffprobe(video_path):
"""從影片獲取 FPS (ffprobe)"""
try:
cmd = ["ffprobe", "-v", "error",
"-select_streams", "v:0",
"-show_entries", "stream=r_frame_rate",
"-of", "csv=p=0", video_path]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
fps_str = result.stdout.strip()
if "/" in fps_str:
num, den = fps_str.split("/")
return float(num) / float(den)
return float(fps_str)
except Exception:
return None
def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
# Set up signal handlers
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# FPS detection chain: CLI → CUT → ffprobe → FAIL
if fps is not None:
print(f"[ASR] Using CLI-provided FPS: {fps}", file=sys.stderr)
else:
cut_path_check = output_path.replace(".asr.json", ".cut.json")
fps = get_fps_from_cut(cut_path_check)
if fps:
print(f"[ASR] FPS from CUT: {fps}", file=sys.stderr)
if fps is None:
fps = get_fps_from_ffprobe(video_path)
if fps:
print(f"[ASR] FPS from ffprobe: {fps}", file=sys.stderr)
if fps is None:
print("[ASR] ERROR: Cannot determine FPS (no CUT data, ffprobe failed). Aborting.", file=sys.stderr)
sys.exit(1)
publisher = RedisPublisher(uuid) if uuid else None
if publisher:
publisher.info("asr", "ASR_START")
@@ -289,13 +336,15 @@ def run_asr(video_path, output_path, uuid: str = ""):
seg_start = start_t + segment.start
seg_end = start_t + segment.end
scene_idx = find_scene_idx((seg_start + seg_end) / 2)
scene_segments.append({
"start": seg_start,
"end": seg_end,
"text": segment.text.strip(),
"scene_number": scene_idx + 1,
"language": seg_language,
})
scene_segments.append({
"start_time": seg_start,
"end_time": seg_end,
"start_frame": int(round(seg_start * fps)),
"end_frame": int(round(seg_end * fps)),
"text": segment.text.strip(),
"scene_number": scene_idx + 1,
"language": seg_language,
})
total_segments += 1
# 當前 scene 結果寫入 .asr.tmp
@@ -327,7 +376,10 @@ def run_asr(video_path, output_path, uuid: str = ""):
all_segments = []
for segment in segments:
all_segments.append({
"start": segment.start, "end": segment.end,
"start_time": segment.start,
"end_time": segment.end,
"start_frame": int(round(segment.start * fps)),
"end_frame": int(round(segment.end * fps)),
"text": segment.text.strip(),
})
total_segments += 1
@@ -358,6 +410,7 @@ if __name__ == "__main__":
parser.add_argument("video_path", help="Path to video file")
parser.add_argument("output_path", help="Output JSON path")
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
parser.add_argument("--fps", type=float, help="Override FPS (default: auto-detect)")
args = parser.parse_args()
run_asr(args.video_path, args.output_path, args.uuid)
run_asr(args.video_path, args.output_path, args.uuid, fps=args.fps)

View File

@@ -64,8 +64,10 @@ struct Args {
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AsrSegment {
start: f64,
end: f64,
#[serde(alias = "start")]
start_time: f64,
#[serde(alias = "end")]
end_time: f64,
text: String,
}
@@ -272,9 +274,9 @@ impl IntegratedPlayer {
if let Some(asr) = &self.asr_data {
for seg in &asr.segments {
if time >= seg.start && time <= seg.end {
segment.start = seg.start;
segment.end = seg.end;
if time >= seg.start_time && time <= seg.end_time {
segment.start = seg.start_time;
segment.end = seg.end_time;
segment.text = Some(seg.text.clone());
break;
}
@@ -440,11 +442,11 @@ fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
println!("\n[{}/{}] Segment", i + 1, total_segments);
println!("{:=<80}", "");
println!("📝 ASR Text: {}", seg.text);
println!("⏱ Time: {:.2}s - {:.2}s", seg.start, seg.end);
println!("⏱ Time: {:.2}s - {:.2}s", seg.start_time, seg.end_time);
if let Some(asrx) = &player.asrx_data {
for asrx_seg in &asrx.segments {
if seg.start >= asrx_seg.start && seg.start <= asrx_seg.end {
if seg.start_time >= asrx_seg.start && seg.start_time <= asrx_seg.end {
let (actor, character) = player.get_speaker_info(&asrx_seg.speaker);
println!(
"🎤 Speaker: {}{} ({})",
@@ -455,7 +457,7 @@ fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
}
}
if let Some(segment) = player.get_current_segment(seg.start + 0.01) {
if let Some(segment) = player.get_current_segment(seg.start_time + 0.01) {
if let Some(face) = &segment.face {
println!(
"👤 Face: bbox=({},{}) {}x{}, conf={:.2}",
@@ -467,17 +469,17 @@ fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
}
}
let duration = seg.end - seg.start;
let duration = seg.end_time - seg.start_time;
println!(
"▶️ Playing: {:.2}s - {:.2}s ({:.2}s)",
seg.start, seg.end, duration
seg.start_time, seg.end_time, duration
);
let mut cmd = Command::new("ffplay");
if args.show_video {
cmd.args([
"-ss",
&format!("{:.2}", seg.start),
&format!("{:.2}", seg.start_time),
"-t",
&format!("{:.2}", duration),
"-autoexit",
@@ -490,7 +492,7 @@ fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
} else {
cmd.args([
"-ss",
&format!("{:.2}", seg.start),
&format!("{:.2}", seg.start_time),
"-t",
&format!("{:.2}", duration),
"-autoexit",

View File

@@ -23,8 +23,10 @@ struct CutResult {
#[derive(Debug, Deserialize)]
struct AsrSegment {
start: f64,
end: f64,
#[serde(alias = "start")]
start_time: f64,
#[serde(alias = "end")]
end_time: f64,
text: String,
}
@@ -62,7 +64,7 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
let mut child_ids: Vec<String> = Vec::new();
for seg in &asr_segments {
if seg.start >= scene.start_time && seg.end <= scene.end_time {
if seg.start_time >= scene.start_time && seg.end_time <= scene.end_time {
scene_text.push_str(&seg.text);
scene_text.push(' ');
// We'll look up the chunk_id from Rule 1 later if needed,

View File

@@ -51,8 +51,8 @@ impl ChunkSplitter {
format!("{}", index),
ChunkType::Sentence,
ChunkRule::Rule1,
segment.start,
segment.end,
segment.start_time,
segment.end_time,
self.fps,
serde_json::json!({
"text": segment.text,
@@ -67,8 +67,8 @@ impl ChunkSplitter {
#[derive(Debug, Clone)]
pub struct AsrSegment {
pub start: f64,
pub end: f64,
pub start_time: f64,
pub end_time: f64,
pub text: String,
pub speaker_id: Option<String>,
}

View File

@@ -111,8 +111,8 @@ impl SyncDb {
"rule": "rule1",
"data": {
"text": segment.text,
"start": segment.start,
"end": segment.end,
"start_time": segment.start_time,
"end_time": segment.end_time,
},
});
let metadata = serde_json::json!({
@@ -132,8 +132,8 @@ impl SyncDb {
format!("{}", i),
ChunkType::Sentence,
ChunkRule::Rule1,
segment.start,
segment.end,
segment.start_time,
segment.end_time,
24.0, // fps
content,
)

View File

@@ -12,8 +12,12 @@ pub struct AsrResult {
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrSegment {
pub start: f64,
pub end: f64,
#[serde(alias = "start")]
pub start_time: f64,
#[serde(alias = "end")]
pub end_time: f64,
pub start_frame: Option<i64>,
pub end_frame: Option<i64>,
pub text: String,
}
@@ -63,13 +67,17 @@ mod tests {
language_probability: Some(0.95),
segments: vec![
AsrSegment {
start: 0.0,
end: 2.5,
start_time: 0.0,
end_time: 2.5,
start_frame: Some(0),
end_frame: Some(60),
text: "Hello world".to_string(),
},
AsrSegment {
start: 2.5,
end: 5.0,
start_time: 2.5,
end_time: 5.0,
start_frame: Some(60),
end_frame: Some(120),
text: "Test speech".to_string(),
},
],
@@ -86,7 +94,7 @@ mod tests {
"language": "zh",
"language_probability": 0.98,
"segments": [
{"start": 0.0, "end": 1.5, "text": "測試"}
{"start_time": 0.0, "end_time": 1.5, "start_frame": 0, "end_frame": 36, "text": "測試"}
]
}"#;
@@ -100,12 +108,14 @@ mod tests {
#[test]
fn test_asr_segment_default() {
let segment = AsrSegment {
start: 0.0,
end: 1.0,
start_time: 0.0,
end_time: 1.0,
start_frame: Some(0),
end_frame: Some(24),
text: String::new(),
};
assert_eq!(segment.start, 0.0);
assert_eq!(segment.end, 1.0);
assert_eq!(segment.start_time, 0.0);
assert_eq!(segment.end_time, 1.0);
assert!(segment.text.is_empty());
}
@@ -119,4 +129,22 @@ mod tests {
assert!(result.language.is_none());
assert!(result.segments.is_empty());
}
#[test]
fn test_asr_backward_compat_old_format() {
// Old format uses "start" / "end" — should deserialize via #[serde(alias)]
let json = r#"{
"language": "en",
"segments": [
{"start": 10.0, "end": 12.5, "text": "Hello"}
]
}"#;
let result: AsrResult = serde_json::from_str(json).unwrap();
assert_eq!(result.segments.len(), 1);
assert_eq!(result.segments[0].start_time, 10.0);
assert_eq!(result.segments[0].end_time, 12.5);
assert_eq!(result.segments[0].text, "Hello");
assert!(result.segments[0].start_frame.is_none());
assert!(result.segments[0].end_frame.is_none());
}
}

View File

@@ -1,17 +1,57 @@
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::path::Path;
use std::time::Duration;
use super::executor::PythonExecutor;
const STORY_TIMEOUT: Duration = Duration::from_secs(3600);
// ── Input data structs (from JSON files) ──────────────────────────
#[derive(Debug, Deserialize)]
struct AsrData {
segments: Vec<AsrSegmentInput>,
}
#[derive(Debug, Deserialize)]
struct AsrSegmentInput {
#[serde(default, alias = "start")]
start_time: f64,
#[serde(default, alias = "end")]
end_time: f64,
#[serde(default)]
text: String,
#[serde(default)]
confidence: f64,
}
#[derive(Debug, Deserialize)]
struct CutData {
scenes: Vec<CutSceneInput>,
}
#[derive(Debug, Deserialize)]
struct CutSceneInput {
scene_number: Option<i64>,
#[allow(dead_code)]
start_frame: Option<i64>,
#[allow(dead_code)]
end_frame: Option<i64>,
start_time: Option<f64>,
end_time: Option<f64>,
}
// ── Output data structs ───────────────────────────────────────────
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct StoryResult {
pub child_chunks: Vec<StoryChildChunk>,
pub parent_chunks: Vec<StoryParentChunk>,
pub stats: StoryStats,
#[serde(default)]
pub metadata: serde_json::Value,
#[serde(default)]
pub parent_chunk_size: usize,
}
@@ -30,8 +70,10 @@ pub struct StoryChildChunk {
pub source: String,
pub start_time: f64,
pub end_time: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub text_content: Option<String>,
pub content: serde_json::Value,
#[serde(default)]
pub child_chunk_ids: Vec<String>,
pub parent_chunk_id: Option<String>,
}
@@ -45,22 +87,30 @@ pub struct StoryParentChunk {
pub end_time: f64,
pub text_content: String,
pub content: serde_json::Value,
#[serde(default)]
pub child_chunk_ids: Vec<String>,
pub parent_chunk_id: Option<String>,
}
// ── Public API ────────────────────────────────────────────────────
pub async fn process_story(
video_path: &str,
output_path: &str,
uuid: Option<&str>,
) -> Result<StoryResult> {
// Try native Rust implementation first
let result = try_native_story(video_path, output_path, uuid);
if let Ok(r) = result {
return Ok(r);
}
// Fallback: Python script
tracing::warn!("[STORY] Native impl failed, falling back to Python: {:?}", result.err());
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("story_processor.py");
tracing::info!("[STORY] Starting story generation: {}", video_path);
if !script_path.exists() {
tracing::warn!("[STORY] Script not found, returning empty result");
return Ok(StoryResult {
child_chunks: vec![],
parent_chunks: vec![],
@@ -87,23 +137,311 @@ pub async fn process_story(
.with_context(|| format!("Failed to run {:?}", script_path))?;
let json_str = std::fs::read_to_string(output_path).context("Failed to read STORY output")?;
let result: StoryResult =
serde_json::from_str(&json_str).context("Failed to parse STORY output")?;
tracing::info!(
"[STORY] Result: {} parent chunks, {} child chunks",
result.stats.total_parent_chunks,
result.stats.total_child_chunks
);
Ok(result)
}
// ── Native implementation ─────────────────────────────────────────
fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -> Result<StoryResult> {
let output_dir = Path::new(output_path).parent().unwrap_or(Path::new("."));
let basename = Path::new(output_path)
.file_stem()
.and_then(|s| s.to_str())
.and_then(|s| s.split('.').next())
.unwrap_or("unknown");
let asr_path = output_dir.join(format!("{}.asr.json", basename));
let cut_path = output_dir.join(format!("{}.cut.json", basename));
// ASR data is required; CUT is optional
let asr_data: AsrData = if asr_path.exists() {
let content = std::fs::read_to_string(&asr_path)
.with_context(|| format!("Failed to read {:?}", asr_path))?;
serde_json::from_str(&content)
.with_context(|| format!("Failed to parse {:?}", asr_path))?
} else {
AsrData { segments: vec![] }
};
let cut_data: CutData = if cut_path.exists() {
let content = std::fs::read_to_string(&cut_path)
.with_context(|| format!("Failed to read {:?}", cut_path))?;
serde_json::from_str(&content)
.with_context(|| format!("Failed to parse {:?}", cut_path))?
} else {
CutData { scenes: vec![] }
};
let parent_chunk_size: usize = 5;
// ── Build child chunks ────────────────────────────────────────
let mut child_chunks: Vec<StoryChildChunk> = Vec::new();
// ASR child chunks
for seg in &asr_data.segments {
let chunk_id = format!("asr_{:.1}_{:.1}", seg.start_time, seg.end_time);
child_chunks.push(StoryChildChunk {
chunk_id,
chunk_type: "asr".to_string(),
source: "asr".to_string(),
start_time: seg.start_time,
end_time: seg.end_time,
text_content: Some(seg.text.clone()),
content: serde_json::json!({
"text": seg.text,
"confidence": seg.confidence,
}),
child_chunk_ids: vec![],
parent_chunk_id: None,
});
}
// CUT child chunks
for scene in &cut_data.scenes {
let scene_num = scene.scene_number.unwrap_or(0);
let start_time = scene.start_time.unwrap_or(0.0);
let end_time = scene.end_time.unwrap_or(0.0);
let chunk_id = format!("cut_{}", scene_num);
child_chunks.push(StoryChildChunk {
chunk_id,
chunk_type: "cut".to_string(),
source: "cut".to_string(),
start_time,
end_time,
text_content: Some(format!("Scene {}", scene_num)),
content: serde_json::json!({
"scene_number": scene_num,
"start_time": start_time,
"end_time": end_time,
}),
child_chunk_ids: vec![],
parent_chunk_id: None,
});
}
let asr_child_ids: Vec<String> = child_chunks
.iter()
.filter(|c| c.source == "asr")
.map(|c| c.chunk_id.clone())
.collect();
let cut_child_ids: Vec<String> = child_chunks
.iter()
.filter(|c| c.source == "cut")
.map(|c| c.chunk_id.clone())
.collect();
// ── Build parent chunks from ASR ──────────────────────────────
let mut parent_chunks: Vec<StoryParentChunk> = Vec::new();
for (i, batch) in asr_child_ids.chunks(parent_chunk_size).enumerate() {
if batch.is_empty() {
continue;
}
let mut texts: Vec<String> = Vec::new();
let mut times: Vec<(f64, f64)> = Vec::new();
for child_id in batch {
if let Some(child) = child_chunks.iter().find(|c| &c.chunk_id == child_id) {
if let Some(ref t) = child.text_content {
texts.push(t.clone());
}
times.push((child.start_time, child.end_time));
}
}
let start_time = times.first().map(|t| t.0).unwrap_or(0.0);
let end_time = times.last().map(|t| t.1).unwrap_or(0.0);
let narrative = generate_narrative(&texts, &[], start_time, end_time);
let chunk_id = format!("story_asr_{:04}", i);
parent_chunks.push(StoryParentChunk {
chunk_id: chunk_id.clone(),
chunk_type: "story".to_string(),
source: "story_asr".to_string(),
start_time,
end_time,
text_content: narrative.clone(),
content: serde_json::json!({
"description": narrative,
"child_count": batch.len(),
"speech_preview": texts.iter().take(3).cloned().collect::<Vec<_>>().join(" "),
}),
child_chunk_ids: batch.to_vec(),
parent_chunk_id: None,
});
// Link children to parent
for child in &mut child_chunks {
if batch.contains(&child.chunk_id) {
child.parent_chunk_id = Some(chunk_id.clone());
}
}
}
// ── Build parent chunks from CUT ──────────────────────────────
for (i, batch) in cut_child_ids.chunks(parent_chunk_size).enumerate() {
if batch.is_empty() {
continue;
}
let mut times: Vec<(f64, f64)> = Vec::new();
for child_id in batch {
if let Some(child) = child_chunks.iter().find(|c| &c.chunk_id == child_id) {
times.push((child.start_time, child.end_time));
}
}
let start_time = times.first().map(|t| t.0).unwrap_or(0.0);
let end_time = times.last().map(|t| t.1).unwrap_or(0.0);
let narrative = generate_scene_narrative(&[], start_time, end_time, batch.len());
let chunk_id = format!("story_cut_{:04}", i);
parent_chunks.push(StoryParentChunk {
chunk_id: chunk_id.clone(),
chunk_type: "story".to_string(),
source: "story_cut".to_string(),
start_time,
end_time,
text_content: narrative.clone(),
content: serde_json::json!({
"description": narrative,
"child_count": batch.len(),
"scenes": batch,
}),
child_chunk_ids: batch.to_vec(),
parent_chunk_id: None,
});
for child in &mut child_chunks {
if batch.contains(&child.chunk_id) {
child.parent_chunk_id = Some(chunk_id.clone());
}
}
}
// ── Build result ──────────────────────────────────────────────
let total_child = asr_child_ids.len() + cut_child_ids.len();
let total_parent = parent_chunks.len();
let asr_count = asr_child_ids.len();
let cut_count = cut_child_ids.len();
let result = StoryResult {
child_chunks,
parent_chunks,
stats: StoryStats {
total_child_chunks: total_child,
total_parent_chunks: total_parent,
asr_children: asr_count,
cut_children: cut_count,
},
metadata: serde_json::json!({}),
parent_chunk_size,
};
// Write output (for compatibility with Python path)
let json_str = serde_json::to_string_pretty(&result)?;
std::fs::write(output_path, &json_str)
.with_context(|| format!("Failed to write {:?}", output_path))?;
Ok(result)
}
// ── Narrative generation (matching Python logic) ──────────────────
fn generate_narrative(texts: &[String], objects: &[String], start: f64, end: f64) -> String {
if texts.is_empty() && objects.is_empty() {
return format!("Video segment from {:.1}s to {:.1}s", start, end);
}
let mut parts: Vec<String> = Vec::new();
if !texts.is_empty() {
let combined = texts.join(" ");
let truncated = if combined.len() > 150 {
format!("{}...", &combined[..150])
} else {
combined
};
parts.push(format!("Speech: {}", truncated));
}
if !objects.is_empty() {
let mut unique: Vec<&String> = objects.iter().collect();
unique.sort();
unique.dedup();
let objs = unique.iter().take(5).map(|s| (*s).as_str()).collect::<Vec<_>>().join(", ");
parts.push(format!("Visuals: {}", objs));
}
format!("[{:.0}s-{:.0}s] {}", start, end, parts.join(" | "))
}
fn generate_scene_narrative(objects: &[String], start: f64, end: f64, scene_count: usize) -> String {
let mut unique: Vec<&String> = objects.iter().collect();
unique.sort();
unique.dedup();
let top5: Vec<&String> = unique.iter().take(5).cloned().collect();
if !top5.is_empty() {
let obj_str = top5.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ");
format!("[{:.0}s-{:.0}s] {} scenes. Visuals: {}.", start, end, scene_count, obj_str)
} else {
format!("[{:.0}s-{:.0}s] {} video scenes.", start, end, scene_count)
}
}
// ── Tests ─────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_generate_narrative_with_text() {
let text = generate_narrative(
&["Hello world".to_string()],
&["person".to_string()],
0.0, 5.0,
);
assert!(text.contains("[0s-5s]"));
assert!(text.contains("Speech:"));
assert!(text.contains("Visuals:"));
}
#[test]
fn test_generate_narrative_empty() {
let text = generate_narrative(&[], &[], 10.0, 20.0);
assert!(text.contains("10.0s to 20.0s"));
}
#[test]
fn test_generate_scene_narrative() {
let text = generate_scene_narrative(&["person".to_string()], 0.0, 10.0, 3);
assert!(text.contains("3 scenes"));
assert!(text.contains("person"));
}
#[test]
fn test_generate_scene_narrative_empty() {
let text = generate_scene_narrative(&[], 0.0, 10.0, 1);
assert!(text.contains("1 video scenes"));
}
#[test]
fn test_narrative_truncation() {
let long_text = "a".repeat(200);
let text = generate_narrative(&[long_text], &[], 0.0, 5.0);
assert!(text.len() < 200 + 50); // truncated with "..."
assert!(text.ends_with("..."));
}
#[test]
fn test_story_result_serialization() {
let result = StoryResult {
@@ -187,9 +525,6 @@ mod tests {
assert_eq!(result.child_chunks.len(), 1);
assert_eq!(result.parent_chunks.len(), 1);
assert_eq!(result.stats.total_child_chunks, 1);
assert_eq!(result.stats.total_parent_chunks, 1);
assert_eq!(result.parent_chunks[0].child_chunk_ids[0], "asr_0001");
assert_eq!(result.child_chunks[0].parent_chunk_id, None);
}
#[test]
@@ -241,10 +576,89 @@ mod tests {
};
assert_eq!(result.parent_chunks[0].child_chunk_ids.len(), 2);
assert!(result
.child_chunks
.iter()
.all(|c| c.parent_chunk_id.is_some()));
assert!(result.child_chunks.iter().all(|c| c.parent_chunk_id.is_some()));
assert!(result.parent_chunks[0].parent_chunk_id.is_none());
}
#[test]
fn test_native_story_empty_data() {
// Write empty ASR and CUT files, then test try_native_story
let dir = std::env::temp_dir().join("story_test_empty");
let _ = std::fs::create_dir_all(&dir);
let basename = "test_video";
let asr_path = dir.join(format!("{}.asr.json", basename));
let cut_path = dir.join(format!("{}.cut.json", basename));
let out_path = dir.join(format!("{}.story.json", basename));
std::fs::write(&asr_path, r#"{"segments":[]}"#).unwrap();
std::fs::write(&cut_path, r#"{"scenes":[]}"#).unwrap();
let result = try_native_story(
"/dummy.mp4",
out_path.to_str().unwrap(),
None,
).unwrap();
assert_eq!(result.stats.total_child_chunks, 0);
assert_eq!(result.stats.total_parent_chunks, 0);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_native_story_with_data() {
let dir = std::env::temp_dir().join("story_test_data");
let _ = std::fs::create_dir_all(&dir);
let basename = "test_video";
let asr_path = dir.join(format!("{}.asr.json", basename));
let cut_path = dir.join(format!("{}.cut.json", basename));
let out_path = dir.join(format!("{}.story.json", basename));
std::fs::write(&asr_path, r#"{
"segments": [
{"start": 0.0, "end": 2.5, "text": "Hello", "confidence": 0.95},
{"start": 2.5, "end": 5.0, "text": "World", "confidence": 0.92},
{"start": 5.0, "end": 7.5, "text": "Foo", "confidence": 0.90}
]
}"#).unwrap();
std::fs::write(&cut_path, r#"{
"scenes": [
{"scene_number": 1, "start_frame": 0, "end_frame": 150, "start_time": 0.0, "end_time": 5.0},
{"scene_number": 2, "start_frame": 150, "end_frame": 300, "start_time": 5.0, "end_time": 10.0}
]
}"#).unwrap();
let result = try_native_story(
"/dummy.mp4",
out_path.to_str().unwrap(),
None,
).unwrap();
assert_eq!(result.stats.asr_children, 3);
assert_eq!(result.stats.cut_children, 2);
assert_eq!(result.stats.total_child_chunks, 5);
// 3 ASR segments, parent_chunk_size=5 → 1 parent
// 2 CUT scenes, parent_chunk_size=5 → 1 parent
assert_eq!(result.stats.total_parent_chunks, 2);
// Verify child-parent linking
for child in &result.child_chunks {
if child.source == "asr" {
assert!(child.parent_chunk_id.is_some());
assert!(child.parent_chunk_id.as_ref().unwrap().starts_with("story_asr_"));
}
}
// Verify output file was written
assert!(out_path.exists());
let content = std::fs::read_to_string(&out_path).unwrap();
assert!(content.contains("Hello"));
assert!(content.contains("World"));
let _ = std::fs::remove_dir_all(&dir);
}
}

View File

@@ -4,8 +4,10 @@ use std::path::PathBuf;
#[derive(Debug, Clone, serde::Deserialize)]
#[allow(dead_code)]
pub struct AsrSegment {
pub start: f64,
pub end: f64,
#[serde(alias = "start")]
pub start_time: f64,
#[serde(alias = "end")]
pub end_time: f64,
pub text: String,
}
@@ -103,7 +105,7 @@ impl AsrOverlay {
self.current_text = String::new();
for segment in &self.segments {
if current_time >= segment.start && current_time <= segment.end {
if current_time >= segment.start_time && current_time <= segment.end_time {
self.current_text = segment.text.clone();
break;
}

View File

@@ -755,8 +755,11 @@ impl ProcessorPool {
.iter()
.enumerate()
.map(|(i, segment)| {
let start_frame = (segment.start * fps).round() as i64;
let end_frame = (segment.end * fps).round() as i64;
// Prefer ASR output frames, fallback to time-based conversion
let start_frame = segment.start_frame
.unwrap_or_else(|| (segment.start_time * fps).round() as i64);
let end_frame = segment.end_frame
.unwrap_or_else(|| (segment.end_time * fps).round() as i64);
let data = serde_json::json!({
"text": segment.text,
"text_normalized": segment.text.to_lowercase(),
@@ -767,8 +770,8 @@ impl ProcessorPool {
i as i64,
start_frame,
end_frame,
segment.start,
segment.end,
segment.start_time,
segment.end_time,
data,
)
})