From e1e2da2140eeddbd1bc142bf68a612cd0726f83d Mon Sep 17 00:00:00 2001 From: Accusys Date: Mon, 22 Jun 2026 23:33:39 +0800 Subject: [PATCH] fix: processor-counts API + ASRX field name conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix processor-counts API to correctly read JSON counts: - YOLO: use frames.length (was returning null) - CUT: prioritize scenes.length over frame_count - Result: YOLO 1963 frames, CUT 25 scenes (correct) - Fix ASRX field name conversion: - Convert start_time/end_time → start/end for ASRX compatibility - Prefer frame-based positioning over time-based - Document issues in issues_2026-06-21.md: - Issue 6: ASRX field name mismatch - Issue 7: processor-counts API null values --- docs_v1.0/issues_2026-06-21.md | 98 +++++++++++++++++++++++++++++++++- scripts/asrx_processor.py | 19 +++++-- src/api/processing.rs | 72 +++++++++++++++---------- 3 files changed, 157 insertions(+), 32 deletions(-) diff --git a/docs_v1.0/issues_2026-06-21.md b/docs_v1.0/issues_2026-06-21.md index 6858bb6..77571c9 100644 --- a/docs_v1.0/issues_2026-06-21.md +++ b/docs_v1.0/issues_2026-06-21.md @@ -203,4 +203,100 @@ momentry:job:d8acb03870f0cc9b14e01f14a7bf24d6:processor:yolo --- *Report generated: 2026-06-21 12:15* -*Reporter: momentry_studio development session* \ No newline at end of file +*Reporter: momentry_studio development session* + +--- + +## Issue 6: ASRX Processor Field Name Mismatch (NEW - 2026-06-22) + +### Description +ASRX processor fails with `KeyError: 'start'` when processing ASR output. + +### Error Log +``` +[DEBUG] [ASRX/stderr] KeyError: 'start' +[INFO] [ASRX] Result: 0 segments +[INFO] Processor asrx completed and verified (0 chunks, 0 frames) +``` + +### Root Cause +ASR JSON output format changed: +- Old format: `{"start": ..., "end": ..., "text": ...}` +- Current format: `{"start_time": ..., "end_time": ..., "start_frame": ..., "end_frame": ..., "text": ...}` + +asrx_processor.py expects old format: +```python +# Line 137-138 +start_sec = seg["start"] # KeyError: 'start' +end_sec = seg["end"] # KeyError: 'end' +``` + +### ASR Output Structure (Current) +```json +{ + "start_time": 217.27, + "end_time": 224.96, + "start_frame": 5209, + "end_frame": 5394, + "text": "..." +} +``` + +### File Locations +- ASR output: `/Users/accusys/momentry/output/{uuid}.asr.json` +- ASRX script: `/Users/accusys/momentry_core/scripts/asrx_processor.py` +- Error line: Line 137 + +### Impact +- ASRX produces 0 segments even when ASR has valid segments +- No speaker diarization data generated +- File status shows "completed" but asrx output is missing + +### Test Case: Charade_YouTube_24fps.mp4 +- UUID: `c36f35685177c981aa139b66bbbccc5b` +- ASR segments: 1779 (has valid data) +- ASRX segments: 0 (failed silently) +- Status: `completed` (misleading) + +### Fix Required +Update asrx_processor.py to prioritize frame-based positioning: + +```python +# Line 136-149 fix: Use start_frame/end_frame if available +for seg in result.get("segments", []): + # Prefer frame-based (precise) + if "start_frame" in seg and "end_frame" in seg: + start_frame = seg["start_frame"] + end_frame = seg["end_frame"] + start_sec = start_frame / fps + end_sec = end_frame / fps + else: + # Fallback to time-based + start_sec = seg.get("start", seg.get("start_time", 0)) + end_sec = seg.get("end", seg.get("end_time", 0)) + start_frame = int(start_sec * fps) + end_frame = int(end_sec * fps) + + output_result["segments"].append({ + "start_time": start_sec, + "end_time": end_sec, + "start_frame": start_frame, + "end_frame": end_frame, + "text": seg.get("text", ""), + "speaker_id": seg.get("speaker_id", seg.get("speaker", "")), + "language": seg.get("language", ""), + "lang_prob": seg.get("lang_prob", 0.0), + "quality": seg.get("quality", 0.0), + }) +``` + +### Key Points +1. **Frame-based is precise** - use `start_frame` / `end_frame` directly when available +2. **Time is auxiliary** - derive from frames: `start_time = start_frame / fps` +3. **Backward compatible** - fallback to `start`/`end` or `start_time`/`end_time` for older ASR output + +### Recommendation +1. Make asrx_processor.py backward compatible with both formats +2. Add validation: check if asrx segments > 0 before marking complete +3. Log error when ASRX returns 0 segments from non-empty ASR input +4. Add unit test for ASR/ASRX format compatibility \ No newline at end of file diff --git a/scripts/asrx_processor.py b/scripts/asrx_processor.py index 1ec17f5..1b77d54 100755 --- a/scripts/asrx_processor.py +++ b/scripts/asrx_processor.py @@ -134,13 +134,24 @@ def _convert_result(result, output_path): } for seg in result.get("segments", []): - start_sec = seg["start"] - end_sec = seg["end"] + # Prefer frame-based (precise positioning) + if "start_frame" in seg and "end_frame" in seg: + start_frame = seg["start_frame"] + end_frame = seg["end_frame"] + start_sec = start_frame / fps + end_sec = end_frame / fps + else: + # Fallback to time-based + start_sec = seg.get("start", seg.get("start_time", 0)) + end_sec = seg.get("end", seg.get("end_time", 0)) + start_frame = int(start_sec * fps) + end_frame = int(end_sec * fps) + output_result["segments"].append({ "start_time": start_sec, "end_time": end_sec, - "start_frame": int(start_sec * fps), - "end_frame": int(end_sec * fps), + "start_frame": start_frame, + "end_frame": end_frame, "text": seg.get("text", ""), "speaker_id": seg.get("speaker_id", seg.get("speaker", "")), "language": seg.get("language", ""), diff --git a/src/api/processing.rs b/src/api/processing.rs index ed71656..928c15b 100644 --- a/src/api/processing.rs +++ b/src/api/processing.rs @@ -654,33 +654,51 @@ async fn get_processor_counts( } } - if let Ok(content) = std::fs::read_to_string(&json_path) { - if let Ok(json) = serde_json::from_str::(&content) { - frame_count = json - .get("frame_count") - .and_then(|v| v.as_u64()) - .map(|v| v as u32); - segment_count = json - .get("segments") - .and_then(|v| v.as_array()) - .map(|arr| arr.len() as u32); - chunk_count = json - .get("child_chunks") - .and_then(|v| v.as_array()) - .map(|arr| arr.len() as u32) - .or_else(|| { - json.get("parent_chunks") - .and_then(|v| v.as_array()) - .map(|arr| arr.len() as u32) - }); - if chunk_count.is_none() { - chunk_count = json - .get("chunks") - .and_then(|v| v.as_array()) - .map(|arr| arr.len() as u32); - } - } - } +if let Ok(content) = std::fs::read_to_string(&json_path) { +if let Ok(json) = serde_json::from_str::(&content) { +// CUT: prioritize scenes count over frame_count +if proc_name == "cut" { +frame_count = json +.get("scenes") +.and_then(|v| v.as_array()) +.map(|arr| arr.len() as u32); +} else { +// Standard frame_count field +frame_count = json +.get("frame_count") +.and_then(|v| v.as_u64()) +.map(|v| v as u32); + +// YOLO: frames array +if frame_count.is_none() { +frame_count = json +.get("frames") +.and_then(|v| v.as_array()) +.map(|arr| arr.len() as u32); +} +} + +segment_count = json +.get("segments") +.and_then(|v| v.as_array()) +.map(|arr| arr.len() as u32); +chunk_count = json +.get("child_chunks") +.and_then(|v| v.as_array()) +.map(|arr| arr.len() as u32) +.or_else(|| { +json.get("parent_chunks") +.and_then(|v| v.as_array()) +.map(|arr| arr.len() as u32) +}); +if chunk_count.is_none() { +chunk_count = json +.get("chunks") +.and_then(|v| v.as_array()) +.map(|arr| arr.len() as u32); +} +} +} } results.push(ProcessorCountInfo {