feat: ASR output frame numbers + rename start/end to start_time/end_time
- Python: asr_processor.py detects FPS from CUT/ffprobe (no fallback), outputs start_frame/end_frame - Rust: All AsrSegment structs use start_time/end_time with #[serde(alias)] for backward compat - store_asr_chunks: prefers ASR output frames, falls back to time-based conversion - Added backward compatibility test for old JSON format (start/end) Breaking change: ffprobe/CUT FPS failure now aborts instead of using default 24fps
This commit is contained in:
@@ -141,11 +141,58 @@ def transcribe_with_fallback(model, video_path, publisher=None):
|
||||
pass
|
||||
|
||||
|
||||
def run_asr(video_path, output_path, uuid: str = ""):
|
||||
def get_fps_from_cut(cut_path):
|
||||
"""從 CUT 資料獲取 FPS"""
|
||||
if os.path.exists(cut_path):
|
||||
try:
|
||||
with open(cut_path) as f:
|
||||
cut_data = json.load(f)
|
||||
fps = cut_data.get("fps")
|
||||
if fps and fps > 0:
|
||||
return fps
|
||||
except Exception as e:
|
||||
print(f"[ASR] Failed to load CUT FPS: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def get_fps_from_ffprobe(video_path):
|
||||
"""從影片獲取 FPS (ffprobe)"""
|
||||
try:
|
||||
cmd = ["ffprobe", "-v", "error",
|
||||
"-select_streams", "v:0",
|
||||
"-show_entries", "stream=r_frame_rate",
|
||||
"-of", "csv=p=0", video_path]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
fps_str = result.stdout.strip()
|
||||
if "/" in fps_str:
|
||||
num, den = fps_str.split("/")
|
||||
return float(num) / float(den)
|
||||
return float(fps_str)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
|
||||
# Set up signal handlers
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
# FPS detection chain: CLI → CUT → ffprobe → FAIL
|
||||
if fps is not None:
|
||||
print(f"[ASR] Using CLI-provided FPS: {fps}", file=sys.stderr)
|
||||
else:
|
||||
cut_path_check = output_path.replace(".asr.json", ".cut.json")
|
||||
fps = get_fps_from_cut(cut_path_check)
|
||||
if fps:
|
||||
print(f"[ASR] FPS from CUT: {fps}", file=sys.stderr)
|
||||
if fps is None:
|
||||
fps = get_fps_from_ffprobe(video_path)
|
||||
if fps:
|
||||
print(f"[ASR] FPS from ffprobe: {fps}", file=sys.stderr)
|
||||
if fps is None:
|
||||
print("[ASR] ERROR: Cannot determine FPS (no CUT data, ffprobe failed). Aborting.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("asr", "ASR_START")
|
||||
@@ -289,13 +336,15 @@ def run_asr(video_path, output_path, uuid: str = ""):
|
||||
seg_start = start_t + segment.start
|
||||
seg_end = start_t + segment.end
|
||||
scene_idx = find_scene_idx((seg_start + seg_end) / 2)
|
||||
scene_segments.append({
|
||||
"start": seg_start,
|
||||
"end": seg_end,
|
||||
"text": segment.text.strip(),
|
||||
"scene_number": scene_idx + 1,
|
||||
"language": seg_language,
|
||||
})
|
||||
scene_segments.append({
|
||||
"start_time": seg_start,
|
||||
"end_time": seg_end,
|
||||
"start_frame": int(round(seg_start * fps)),
|
||||
"end_frame": int(round(seg_end * fps)),
|
||||
"text": segment.text.strip(),
|
||||
"scene_number": scene_idx + 1,
|
||||
"language": seg_language,
|
||||
})
|
||||
total_segments += 1
|
||||
|
||||
# 當前 scene 結果寫入 .asr.tmp
|
||||
@@ -327,7 +376,10 @@ def run_asr(video_path, output_path, uuid: str = ""):
|
||||
all_segments = []
|
||||
for segment in segments:
|
||||
all_segments.append({
|
||||
"start": segment.start, "end": segment.end,
|
||||
"start_time": segment.start,
|
||||
"end_time": segment.end,
|
||||
"start_frame": int(round(segment.start * fps)),
|
||||
"end_frame": int(round(segment.end * fps)),
|
||||
"text": segment.text.strip(),
|
||||
})
|
||||
total_segments += 1
|
||||
@@ -358,6 +410,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
||||
parser.add_argument("--fps", type=float, help="Override FPS (default: auto-detect)")
|
||||
args = parser.parse_args()
|
||||
|
||||
run_asr(args.video_path, args.output_path, args.uuid)
|
||||
run_asr(args.video_path, args.output_path, args.uuid, fps=args.fps)
|
||||
|
||||
@@ -64,8 +64,10 @@ struct Args {
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct AsrSegment {
|
||||
start: f64,
|
||||
end: f64,
|
||||
#[serde(alias = "start")]
|
||||
start_time: f64,
|
||||
#[serde(alias = "end")]
|
||||
end_time: f64,
|
||||
text: String,
|
||||
}
|
||||
|
||||
@@ -272,9 +274,9 @@ impl IntegratedPlayer {
|
||||
|
||||
if let Some(asr) = &self.asr_data {
|
||||
for seg in &asr.segments {
|
||||
if time >= seg.start && time <= seg.end {
|
||||
segment.start = seg.start;
|
||||
segment.end = seg.end;
|
||||
if time >= seg.start_time && time <= seg.end_time {
|
||||
segment.start = seg.start_time;
|
||||
segment.end = seg.end_time;
|
||||
segment.text = Some(seg.text.clone());
|
||||
break;
|
||||
}
|
||||
@@ -440,11 +442,11 @@ fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
|
||||
println!("\n[{}/{}] Segment", i + 1, total_segments);
|
||||
println!("{:=<80}", "");
|
||||
println!("📝 ASR Text: {}", seg.text);
|
||||
println!("⏱ Time: {:.2}s - {:.2}s", seg.start, seg.end);
|
||||
println!("⏱ Time: {:.2}s - {:.2}s", seg.start_time, seg.end_time);
|
||||
|
||||
if let Some(asrx) = &player.asrx_data {
|
||||
for asrx_seg in &asrx.segments {
|
||||
if seg.start >= asrx_seg.start && seg.start <= asrx_seg.end {
|
||||
if seg.start_time >= asrx_seg.start && seg.start_time <= asrx_seg.end {
|
||||
let (actor, character) = player.get_speaker_info(&asrx_seg.speaker);
|
||||
println!(
|
||||
"🎤 Speaker: {} → {} ({})",
|
||||
@@ -455,7 +457,7 @@ fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(segment) = player.get_current_segment(seg.start + 0.01) {
|
||||
if let Some(segment) = player.get_current_segment(seg.start_time + 0.01) {
|
||||
if let Some(face) = &segment.face {
|
||||
println!(
|
||||
"👤 Face: bbox=({},{}) {}x{}, conf={:.2}",
|
||||
@@ -467,17 +469,17 @@ fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
let duration = seg.end - seg.start;
|
||||
let duration = seg.end_time - seg.start_time;
|
||||
println!(
|
||||
"▶️ Playing: {:.2}s - {:.2}s ({:.2}s)",
|
||||
seg.start, seg.end, duration
|
||||
seg.start_time, seg.end_time, duration
|
||||
);
|
||||
|
||||
let mut cmd = Command::new("ffplay");
|
||||
if args.show_video {
|
||||
cmd.args([
|
||||
"-ss",
|
||||
&format!("{:.2}", seg.start),
|
||||
&format!("{:.2}", seg.start_time),
|
||||
"-t",
|
||||
&format!("{:.2}", duration),
|
||||
"-autoexit",
|
||||
@@ -490,7 +492,7 @@ fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
|
||||
} else {
|
||||
cmd.args([
|
||||
"-ss",
|
||||
&format!("{:.2}", seg.start),
|
||||
&format!("{:.2}", seg.start_time),
|
||||
"-t",
|
||||
&format!("{:.2}", duration),
|
||||
"-autoexit",
|
||||
|
||||
@@ -23,8 +23,10 @@ struct CutResult {
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrSegment {
|
||||
start: f64,
|
||||
end: f64,
|
||||
#[serde(alias = "start")]
|
||||
start_time: f64,
|
||||
#[serde(alias = "end")]
|
||||
end_time: f64,
|
||||
text: String,
|
||||
}
|
||||
|
||||
@@ -62,7 +64,7 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
let mut child_ids: Vec<String> = Vec::new();
|
||||
|
||||
for seg in &asr_segments {
|
||||
if seg.start >= scene.start_time && seg.end <= scene.end_time {
|
||||
if seg.start_time >= scene.start_time && seg.end_time <= scene.end_time {
|
||||
scene_text.push_str(&seg.text);
|
||||
scene_text.push(' ');
|
||||
// We'll look up the chunk_id from Rule 1 later if needed,
|
||||
|
||||
@@ -51,8 +51,8 @@ impl ChunkSplitter {
|
||||
format!("{}", index),
|
||||
ChunkType::Sentence,
|
||||
ChunkRule::Rule1,
|
||||
segment.start,
|
||||
segment.end,
|
||||
segment.start_time,
|
||||
segment.end_time,
|
||||
self.fps,
|
||||
serde_json::json!({
|
||||
"text": segment.text,
|
||||
@@ -67,8 +67,8 @@ impl ChunkSplitter {
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AsrSegment {
|
||||
pub start: f64,
|
||||
pub end: f64,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub text: String,
|
||||
pub speaker_id: Option<String>,
|
||||
}
|
||||
|
||||
@@ -111,8 +111,8 @@ impl SyncDb {
|
||||
"rule": "rule1",
|
||||
"data": {
|
||||
"text": segment.text,
|
||||
"start": segment.start,
|
||||
"end": segment.end,
|
||||
"start_time": segment.start_time,
|
||||
"end_time": segment.end_time,
|
||||
},
|
||||
});
|
||||
let metadata = serde_json::json!({
|
||||
@@ -132,8 +132,8 @@ impl SyncDb {
|
||||
format!("{}", i),
|
||||
ChunkType::Sentence,
|
||||
ChunkRule::Rule1,
|
||||
segment.start,
|
||||
segment.end,
|
||||
segment.start_time,
|
||||
segment.end_time,
|
||||
24.0, // fps
|
||||
content,
|
||||
)
|
||||
|
||||
@@ -12,8 +12,12 @@ pub struct AsrResult {
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AsrSegment {
|
||||
pub start: f64,
|
||||
pub end: f64,
|
||||
#[serde(alias = "start")]
|
||||
pub start_time: f64,
|
||||
#[serde(alias = "end")]
|
||||
pub end_time: f64,
|
||||
pub start_frame: Option<i64>,
|
||||
pub end_frame: Option<i64>,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
@@ -63,13 +67,17 @@ mod tests {
|
||||
language_probability: Some(0.95),
|
||||
segments: vec![
|
||||
AsrSegment {
|
||||
start: 0.0,
|
||||
end: 2.5,
|
||||
start_time: 0.0,
|
||||
end_time: 2.5,
|
||||
start_frame: Some(0),
|
||||
end_frame: Some(60),
|
||||
text: "Hello world".to_string(),
|
||||
},
|
||||
AsrSegment {
|
||||
start: 2.5,
|
||||
end: 5.0,
|
||||
start_time: 2.5,
|
||||
end_time: 5.0,
|
||||
start_frame: Some(60),
|
||||
end_frame: Some(120),
|
||||
text: "Test speech".to_string(),
|
||||
},
|
||||
],
|
||||
@@ -86,7 +94,7 @@ mod tests {
|
||||
"language": "zh",
|
||||
"language_probability": 0.98,
|
||||
"segments": [
|
||||
{"start": 0.0, "end": 1.5, "text": "測試"}
|
||||
{"start_time": 0.0, "end_time": 1.5, "start_frame": 0, "end_frame": 36, "text": "測試"}
|
||||
]
|
||||
}"#;
|
||||
|
||||
@@ -100,12 +108,14 @@ mod tests {
|
||||
#[test]
|
||||
fn test_asr_segment_default() {
|
||||
let segment = AsrSegment {
|
||||
start: 0.0,
|
||||
end: 1.0,
|
||||
start_time: 0.0,
|
||||
end_time: 1.0,
|
||||
start_frame: Some(0),
|
||||
end_frame: Some(24),
|
||||
text: String::new(),
|
||||
};
|
||||
assert_eq!(segment.start, 0.0);
|
||||
assert_eq!(segment.end, 1.0);
|
||||
assert_eq!(segment.start_time, 0.0);
|
||||
assert_eq!(segment.end_time, 1.0);
|
||||
assert!(segment.text.is_empty());
|
||||
}
|
||||
|
||||
@@ -119,4 +129,22 @@ mod tests {
|
||||
assert!(result.language.is_none());
|
||||
assert!(result.segments.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asr_backward_compat_old_format() {
|
||||
// Old format uses "start" / "end" — should deserialize via #[serde(alias)]
|
||||
let json = r#"{
|
||||
"language": "en",
|
||||
"segments": [
|
||||
{"start": 10.0, "end": 12.5, "text": "Hello"}
|
||||
]
|
||||
}"#;
|
||||
let result: AsrResult = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(result.segments.len(), 1);
|
||||
assert_eq!(result.segments[0].start_time, 10.0);
|
||||
assert_eq!(result.segments[0].end_time, 12.5);
|
||||
assert_eq!(result.segments[0].text, "Hello");
|
||||
assert!(result.segments[0].start_frame.is_none());
|
||||
assert!(result.segments[0].end_frame.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,17 +1,57 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::executor::PythonExecutor;
|
||||
|
||||
const STORY_TIMEOUT: Duration = Duration::from_secs(3600);
|
||||
|
||||
// ── Input data structs (from JSON files) ──────────────────────────
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrData {
|
||||
segments: Vec<AsrSegmentInput>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrSegmentInput {
|
||||
#[serde(default, alias = "start")]
|
||||
start_time: f64,
|
||||
#[serde(default, alias = "end")]
|
||||
end_time: f64,
|
||||
#[serde(default)]
|
||||
text: String,
|
||||
#[serde(default)]
|
||||
confidence: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct CutData {
|
||||
scenes: Vec<CutSceneInput>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct CutSceneInput {
|
||||
scene_number: Option<i64>,
|
||||
#[allow(dead_code)]
|
||||
start_frame: Option<i64>,
|
||||
#[allow(dead_code)]
|
||||
end_frame: Option<i64>,
|
||||
start_time: Option<f64>,
|
||||
end_time: Option<f64>,
|
||||
}
|
||||
|
||||
// ── Output data structs ───────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct StoryResult {
|
||||
pub child_chunks: Vec<StoryChildChunk>,
|
||||
pub parent_chunks: Vec<StoryParentChunk>,
|
||||
pub stats: StoryStats,
|
||||
#[serde(default)]
|
||||
pub metadata: serde_json::Value,
|
||||
#[serde(default)]
|
||||
pub parent_chunk_size: usize,
|
||||
}
|
||||
|
||||
@@ -30,8 +70,10 @@ pub struct StoryChildChunk {
|
||||
pub source: String,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub text_content: Option<String>,
|
||||
pub content: serde_json::Value,
|
||||
#[serde(default)]
|
||||
pub child_chunk_ids: Vec<String>,
|
||||
pub parent_chunk_id: Option<String>,
|
||||
}
|
||||
@@ -45,22 +87,30 @@ pub struct StoryParentChunk {
|
||||
pub end_time: f64,
|
||||
pub text_content: String,
|
||||
pub content: serde_json::Value,
|
||||
#[serde(default)]
|
||||
pub child_chunk_ids: Vec<String>,
|
||||
pub parent_chunk_id: Option<String>,
|
||||
}
|
||||
|
||||
// ── Public API ────────────────────────────────────────────────────
|
||||
|
||||
pub async fn process_story(
|
||||
video_path: &str,
|
||||
output_path: &str,
|
||||
uuid: Option<&str>,
|
||||
) -> Result<StoryResult> {
|
||||
// Try native Rust implementation first
|
||||
let result = try_native_story(video_path, output_path, uuid);
|
||||
if let Ok(r) = result {
|
||||
return Ok(r);
|
||||
}
|
||||
|
||||
// Fallback: Python script
|
||||
tracing::warn!("[STORY] Native impl failed, falling back to Python: {:?}", result.err());
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("story_processor.py");
|
||||
|
||||
tracing::info!("[STORY] Starting story generation: {}", video_path);
|
||||
|
||||
if !script_path.exists() {
|
||||
tracing::warn!("[STORY] Script not found, returning empty result");
|
||||
return Ok(StoryResult {
|
||||
child_chunks: vec![],
|
||||
parent_chunks: vec![],
|
||||
@@ -87,23 +137,311 @@ pub async fn process_story(
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str = std::fs::read_to_string(output_path).context("Failed to read STORY output")?;
|
||||
|
||||
let result: StoryResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse STORY output")?;
|
||||
|
||||
tracing::info!(
|
||||
"[STORY] Result: {} parent chunks, {} child chunks",
|
||||
result.stats.total_parent_chunks,
|
||||
result.stats.total_child_chunks
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// ── Native implementation ─────────────────────────────────────────
|
||||
|
||||
fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -> Result<StoryResult> {
|
||||
let output_dir = Path::new(output_path).parent().unwrap_or(Path::new("."));
|
||||
let basename = Path::new(output_path)
|
||||
.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.and_then(|s| s.split('.').next())
|
||||
.unwrap_or("unknown");
|
||||
|
||||
let asr_path = output_dir.join(format!("{}.asr.json", basename));
|
||||
let cut_path = output_dir.join(format!("{}.cut.json", basename));
|
||||
|
||||
// ASR data is required; CUT is optional
|
||||
let asr_data: AsrData = if asr_path.exists() {
|
||||
let content = std::fs::read_to_string(&asr_path)
|
||||
.with_context(|| format!("Failed to read {:?}", asr_path))?;
|
||||
serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse {:?}", asr_path))?
|
||||
} else {
|
||||
AsrData { segments: vec![] }
|
||||
};
|
||||
|
||||
let cut_data: CutData = if cut_path.exists() {
|
||||
let content = std::fs::read_to_string(&cut_path)
|
||||
.with_context(|| format!("Failed to read {:?}", cut_path))?;
|
||||
serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse {:?}", cut_path))?
|
||||
} else {
|
||||
CutData { scenes: vec![] }
|
||||
};
|
||||
|
||||
let parent_chunk_size: usize = 5;
|
||||
|
||||
// ── Build child chunks ────────────────────────────────────────
|
||||
let mut child_chunks: Vec<StoryChildChunk> = Vec::new();
|
||||
|
||||
// ASR child chunks
|
||||
for seg in &asr_data.segments {
|
||||
let chunk_id = format!("asr_{:.1}_{:.1}", seg.start_time, seg.end_time);
|
||||
child_chunks.push(StoryChildChunk {
|
||||
chunk_id,
|
||||
chunk_type: "asr".to_string(),
|
||||
source: "asr".to_string(),
|
||||
start_time: seg.start_time,
|
||||
end_time: seg.end_time,
|
||||
text_content: Some(seg.text.clone()),
|
||||
content: serde_json::json!({
|
||||
"text": seg.text,
|
||||
"confidence": seg.confidence,
|
||||
}),
|
||||
child_chunk_ids: vec![],
|
||||
parent_chunk_id: None,
|
||||
});
|
||||
}
|
||||
|
||||
// CUT child chunks
|
||||
for scene in &cut_data.scenes {
|
||||
let scene_num = scene.scene_number.unwrap_or(0);
|
||||
let start_time = scene.start_time.unwrap_or(0.0);
|
||||
let end_time = scene.end_time.unwrap_or(0.0);
|
||||
let chunk_id = format!("cut_{}", scene_num);
|
||||
child_chunks.push(StoryChildChunk {
|
||||
chunk_id,
|
||||
chunk_type: "cut".to_string(),
|
||||
source: "cut".to_string(),
|
||||
start_time,
|
||||
end_time,
|
||||
text_content: Some(format!("Scene {}", scene_num)),
|
||||
content: serde_json::json!({
|
||||
"scene_number": scene_num,
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
}),
|
||||
child_chunk_ids: vec![],
|
||||
parent_chunk_id: None,
|
||||
});
|
||||
}
|
||||
|
||||
let asr_child_ids: Vec<String> = child_chunks
|
||||
.iter()
|
||||
.filter(|c| c.source == "asr")
|
||||
.map(|c| c.chunk_id.clone())
|
||||
.collect();
|
||||
|
||||
let cut_child_ids: Vec<String> = child_chunks
|
||||
.iter()
|
||||
.filter(|c| c.source == "cut")
|
||||
.map(|c| c.chunk_id.clone())
|
||||
.collect();
|
||||
|
||||
// ── Build parent chunks from ASR ──────────────────────────────
|
||||
let mut parent_chunks: Vec<StoryParentChunk> = Vec::new();
|
||||
|
||||
for (i, batch) in asr_child_ids.chunks(parent_chunk_size).enumerate() {
|
||||
if batch.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut texts: Vec<String> = Vec::new();
|
||||
let mut times: Vec<(f64, f64)> = Vec::new();
|
||||
|
||||
for child_id in batch {
|
||||
if let Some(child) = child_chunks.iter().find(|c| &c.chunk_id == child_id) {
|
||||
if let Some(ref t) = child.text_content {
|
||||
texts.push(t.clone());
|
||||
}
|
||||
times.push((child.start_time, child.end_time));
|
||||
}
|
||||
}
|
||||
|
||||
let start_time = times.first().map(|t| t.0).unwrap_or(0.0);
|
||||
let end_time = times.last().map(|t| t.1).unwrap_or(0.0);
|
||||
|
||||
let narrative = generate_narrative(&texts, &[], start_time, end_time);
|
||||
|
||||
let chunk_id = format!("story_asr_{:04}", i);
|
||||
parent_chunks.push(StoryParentChunk {
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "story".to_string(),
|
||||
source: "story_asr".to_string(),
|
||||
start_time,
|
||||
end_time,
|
||||
text_content: narrative.clone(),
|
||||
content: serde_json::json!({
|
||||
"description": narrative,
|
||||
"child_count": batch.len(),
|
||||
"speech_preview": texts.iter().take(3).cloned().collect::<Vec<_>>().join(" "),
|
||||
}),
|
||||
child_chunk_ids: batch.to_vec(),
|
||||
parent_chunk_id: None,
|
||||
});
|
||||
|
||||
// Link children to parent
|
||||
for child in &mut child_chunks {
|
||||
if batch.contains(&child.chunk_id) {
|
||||
child.parent_chunk_id = Some(chunk_id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Build parent chunks from CUT ──────────────────────────────
|
||||
for (i, batch) in cut_child_ids.chunks(parent_chunk_size).enumerate() {
|
||||
if batch.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut times: Vec<(f64, f64)> = Vec::new();
|
||||
for child_id in batch {
|
||||
if let Some(child) = child_chunks.iter().find(|c| &c.chunk_id == child_id) {
|
||||
times.push((child.start_time, child.end_time));
|
||||
}
|
||||
}
|
||||
|
||||
let start_time = times.first().map(|t| t.0).unwrap_or(0.0);
|
||||
let end_time = times.last().map(|t| t.1).unwrap_or(0.0);
|
||||
|
||||
let narrative = generate_scene_narrative(&[], start_time, end_time, batch.len());
|
||||
|
||||
let chunk_id = format!("story_cut_{:04}", i);
|
||||
parent_chunks.push(StoryParentChunk {
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "story".to_string(),
|
||||
source: "story_cut".to_string(),
|
||||
start_time,
|
||||
end_time,
|
||||
text_content: narrative.clone(),
|
||||
content: serde_json::json!({
|
||||
"description": narrative,
|
||||
"child_count": batch.len(),
|
||||
"scenes": batch,
|
||||
}),
|
||||
child_chunk_ids: batch.to_vec(),
|
||||
parent_chunk_id: None,
|
||||
});
|
||||
|
||||
for child in &mut child_chunks {
|
||||
if batch.contains(&child.chunk_id) {
|
||||
child.parent_chunk_id = Some(chunk_id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Build result ──────────────────────────────────────────────
|
||||
let total_child = asr_child_ids.len() + cut_child_ids.len();
|
||||
let total_parent = parent_chunks.len();
|
||||
let asr_count = asr_child_ids.len();
|
||||
let cut_count = cut_child_ids.len();
|
||||
|
||||
let result = StoryResult {
|
||||
child_chunks,
|
||||
parent_chunks,
|
||||
stats: StoryStats {
|
||||
total_child_chunks: total_child,
|
||||
total_parent_chunks: total_parent,
|
||||
asr_children: asr_count,
|
||||
cut_children: cut_count,
|
||||
},
|
||||
metadata: serde_json::json!({}),
|
||||
parent_chunk_size,
|
||||
};
|
||||
|
||||
// Write output (for compatibility with Python path)
|
||||
let json_str = serde_json::to_string_pretty(&result)?;
|
||||
std::fs::write(output_path, &json_str)
|
||||
.with_context(|| format!("Failed to write {:?}", output_path))?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// ── Narrative generation (matching Python logic) ──────────────────
|
||||
|
||||
fn generate_narrative(texts: &[String], objects: &[String], start: f64, end: f64) -> String {
|
||||
if texts.is_empty() && objects.is_empty() {
|
||||
return format!("Video segment from {:.1}s to {:.1}s", start, end);
|
||||
}
|
||||
|
||||
let mut parts: Vec<String> = Vec::new();
|
||||
|
||||
if !texts.is_empty() {
|
||||
let combined = texts.join(" ");
|
||||
let truncated = if combined.len() > 150 {
|
||||
format!("{}...", &combined[..150])
|
||||
} else {
|
||||
combined
|
||||
};
|
||||
parts.push(format!("Speech: {}", truncated));
|
||||
}
|
||||
|
||||
if !objects.is_empty() {
|
||||
let mut unique: Vec<&String> = objects.iter().collect();
|
||||
unique.sort();
|
||||
unique.dedup();
|
||||
let objs = unique.iter().take(5).map(|s| (*s).as_str()).collect::<Vec<_>>().join(", ");
|
||||
parts.push(format!("Visuals: {}", objs));
|
||||
}
|
||||
|
||||
format!("[{:.0}s-{:.0}s] {}", start, end, parts.join(" | "))
|
||||
}
|
||||
|
||||
fn generate_scene_narrative(objects: &[String], start: f64, end: f64, scene_count: usize) -> String {
|
||||
let mut unique: Vec<&String> = objects.iter().collect();
|
||||
unique.sort();
|
||||
unique.dedup();
|
||||
let top5: Vec<&String> = unique.iter().take(5).cloned().collect();
|
||||
|
||||
if !top5.is_empty() {
|
||||
let obj_str = top5.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ");
|
||||
format!("[{:.0}s-{:.0}s] {} scenes. Visuals: {}.", start, end, scene_count, obj_str)
|
||||
} else {
|
||||
format!("[{:.0}s-{:.0}s] {} video scenes.", start, end, scene_count)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Tests ─────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_generate_narrative_with_text() {
|
||||
let text = generate_narrative(
|
||||
&["Hello world".to_string()],
|
||||
&["person".to_string()],
|
||||
0.0, 5.0,
|
||||
);
|
||||
assert!(text.contains("[0s-5s]"));
|
||||
assert!(text.contains("Speech:"));
|
||||
assert!(text.contains("Visuals:"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_narrative_empty() {
|
||||
let text = generate_narrative(&[], &[], 10.0, 20.0);
|
||||
assert!(text.contains("10.0s to 20.0s"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_scene_narrative() {
|
||||
let text = generate_scene_narrative(&["person".to_string()], 0.0, 10.0, 3);
|
||||
assert!(text.contains("3 scenes"));
|
||||
assert!(text.contains("person"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_scene_narrative_empty() {
|
||||
let text = generate_scene_narrative(&[], 0.0, 10.0, 1);
|
||||
assert!(text.contains("1 video scenes"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_narrative_truncation() {
|
||||
let long_text = "a".repeat(200);
|
||||
let text = generate_narrative(&[long_text], &[], 0.0, 5.0);
|
||||
assert!(text.len() < 200 + 50); // truncated with "..."
|
||||
assert!(text.ends_with("..."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_story_result_serialization() {
|
||||
let result = StoryResult {
|
||||
@@ -187,9 +525,6 @@ mod tests {
|
||||
assert_eq!(result.child_chunks.len(), 1);
|
||||
assert_eq!(result.parent_chunks.len(), 1);
|
||||
assert_eq!(result.stats.total_child_chunks, 1);
|
||||
assert_eq!(result.stats.total_parent_chunks, 1);
|
||||
assert_eq!(result.parent_chunks[0].child_chunk_ids[0], "asr_0001");
|
||||
assert_eq!(result.child_chunks[0].parent_chunk_id, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -241,10 +576,89 @@ mod tests {
|
||||
};
|
||||
|
||||
assert_eq!(result.parent_chunks[0].child_chunk_ids.len(), 2);
|
||||
assert!(result
|
||||
.child_chunks
|
||||
.iter()
|
||||
.all(|c| c.parent_chunk_id.is_some()));
|
||||
assert!(result.child_chunks.iter().all(|c| c.parent_chunk_id.is_some()));
|
||||
assert!(result.parent_chunks[0].parent_chunk_id.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_native_story_empty_data() {
|
||||
// Write empty ASR and CUT files, then test try_native_story
|
||||
let dir = std::env::temp_dir().join("story_test_empty");
|
||||
let _ = std::fs::create_dir_all(&dir);
|
||||
|
||||
let basename = "test_video";
|
||||
let asr_path = dir.join(format!("{}.asr.json", basename));
|
||||
let cut_path = dir.join(format!("{}.cut.json", basename));
|
||||
let out_path = dir.join(format!("{}.story.json", basename));
|
||||
|
||||
std::fs::write(&asr_path, r#"{"segments":[]}"#).unwrap();
|
||||
std::fs::write(&cut_path, r#"{"scenes":[]}"#).unwrap();
|
||||
|
||||
let result = try_native_story(
|
||||
"/dummy.mp4",
|
||||
out_path.to_str().unwrap(),
|
||||
None,
|
||||
).unwrap();
|
||||
|
||||
assert_eq!(result.stats.total_child_chunks, 0);
|
||||
assert_eq!(result.stats.total_parent_chunks, 0);
|
||||
|
||||
let _ = std::fs::remove_dir_all(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_native_story_with_data() {
|
||||
let dir = std::env::temp_dir().join("story_test_data");
|
||||
let _ = std::fs::create_dir_all(&dir);
|
||||
|
||||
let basename = "test_video";
|
||||
let asr_path = dir.join(format!("{}.asr.json", basename));
|
||||
let cut_path = dir.join(format!("{}.cut.json", basename));
|
||||
let out_path = dir.join(format!("{}.story.json", basename));
|
||||
|
||||
std::fs::write(&asr_path, r#"{
|
||||
"segments": [
|
||||
{"start": 0.0, "end": 2.5, "text": "Hello", "confidence": 0.95},
|
||||
{"start": 2.5, "end": 5.0, "text": "World", "confidence": 0.92},
|
||||
{"start": 5.0, "end": 7.5, "text": "Foo", "confidence": 0.90}
|
||||
]
|
||||
}"#).unwrap();
|
||||
|
||||
std::fs::write(&cut_path, r#"{
|
||||
"scenes": [
|
||||
{"scene_number": 1, "start_frame": 0, "end_frame": 150, "start_time": 0.0, "end_time": 5.0},
|
||||
{"scene_number": 2, "start_frame": 150, "end_frame": 300, "start_time": 5.0, "end_time": 10.0}
|
||||
]
|
||||
}"#).unwrap();
|
||||
|
||||
let result = try_native_story(
|
||||
"/dummy.mp4",
|
||||
out_path.to_str().unwrap(),
|
||||
None,
|
||||
).unwrap();
|
||||
|
||||
assert_eq!(result.stats.asr_children, 3);
|
||||
assert_eq!(result.stats.cut_children, 2);
|
||||
assert_eq!(result.stats.total_child_chunks, 5);
|
||||
|
||||
// 3 ASR segments, parent_chunk_size=5 → 1 parent
|
||||
// 2 CUT scenes, parent_chunk_size=5 → 1 parent
|
||||
assert_eq!(result.stats.total_parent_chunks, 2);
|
||||
|
||||
// Verify child-parent linking
|
||||
for child in &result.child_chunks {
|
||||
if child.source == "asr" {
|
||||
assert!(child.parent_chunk_id.is_some());
|
||||
assert!(child.parent_chunk_id.as_ref().unwrap().starts_with("story_asr_"));
|
||||
}
|
||||
}
|
||||
|
||||
// Verify output file was written
|
||||
assert!(out_path.exists());
|
||||
let content = std::fs::read_to_string(&out_path).unwrap();
|
||||
assert!(content.contains("Hello"));
|
||||
assert!(content.contains("World"));
|
||||
|
||||
let _ = std::fs::remove_dir_all(&dir);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,8 +4,10 @@ use std::path::PathBuf;
|
||||
#[derive(Debug, Clone, serde::Deserialize)]
|
||||
#[allow(dead_code)]
|
||||
pub struct AsrSegment {
|
||||
pub start: f64,
|
||||
pub end: f64,
|
||||
#[serde(alias = "start")]
|
||||
pub start_time: f64,
|
||||
#[serde(alias = "end")]
|
||||
pub end_time: f64,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
@@ -103,7 +105,7 @@ impl AsrOverlay {
|
||||
self.current_text = String::new();
|
||||
|
||||
for segment in &self.segments {
|
||||
if current_time >= segment.start && current_time <= segment.end {
|
||||
if current_time >= segment.start_time && current_time <= segment.end_time {
|
||||
self.current_text = segment.text.clone();
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -755,8 +755,11 @@ impl ProcessorPool {
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, segment)| {
|
||||
let start_frame = (segment.start * fps).round() as i64;
|
||||
let end_frame = (segment.end * fps).round() as i64;
|
||||
// Prefer ASR output frames, fallback to time-based conversion
|
||||
let start_frame = segment.start_frame
|
||||
.unwrap_or_else(|| (segment.start_time * fps).round() as i64);
|
||||
let end_frame = segment.end_frame
|
||||
.unwrap_or_else(|| (segment.end_time * fps).round() as i64);
|
||||
let data = serde_json::json!({
|
||||
"text": segment.text,
|
||||
"text_normalized": segment.text.to_lowercase(),
|
||||
@@ -767,8 +770,8 @@ impl ProcessorPool {
|
||||
i as i64,
|
||||
start_frame,
|
||||
end_frame,
|
||||
segment.start,
|
||||
segment.end,
|
||||
segment.start_time,
|
||||
segment.end_time,
|
||||
data,
|
||||
)
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user