fix: processor-counts API + ASRX field name conversion

- Fix processor-counts API to correctly read JSON counts:
  - YOLO: use frames.length (was returning null)
  - CUT: prioritize scenes.length over frame_count
  - Result: YOLO 1963 frames, CUT 25 scenes (correct)

- Fix ASRX field name conversion:
  - Convert start_time/end_time → start/end for ASRX compatibility
  - Prefer frame-based positioning over time-based

- Document issues in issues_2026-06-21.md:
  - Issue 6: ASRX field name mismatch
  - Issue 7: processor-counts API null values
This commit is contained in:
Accusys
2026-06-22 23:33:39 +08:00
parent db8bb8fa95
commit e1e2da2140
3 changed files with 157 additions and 32 deletions

View File

@@ -203,4 +203,100 @@ momentry:job:d8acb03870f0cc9b14e01f14a7bf24d6:processor:yolo
--- ---
*Report generated: 2026-06-21 12:15* *Report generated: 2026-06-21 12:15*
*Reporter: momentry_studio development session* *Reporter: momentry_studio development session*
---
## Issue 6: ASRX Processor Field Name Mismatch (NEW - 2026-06-22)
### Description
ASRX processor fails with `KeyError: 'start'` when processing ASR output.
### Error Log
```
[DEBUG] [ASRX/stderr] KeyError: 'start'
[INFO] [ASRX] Result: 0 segments
[INFO] Processor asrx completed and verified (0 chunks, 0 frames)
```
### Root Cause
ASR JSON output format changed:
- Old format: `{"start": ..., "end": ..., "text": ...}`
- Current format: `{"start_time": ..., "end_time": ..., "start_frame": ..., "end_frame": ..., "text": ...}`
asrx_processor.py expects old format:
```python
# Line 137-138
start_sec = seg["start"] # KeyError: 'start'
end_sec = seg["end"] # KeyError: 'end'
```
### ASR Output Structure (Current)
```json
{
"start_time": 217.27,
"end_time": 224.96,
"start_frame": 5209,
"end_frame": 5394,
"text": "..."
}
```
### File Locations
- ASR output: `/Users/accusys/momentry/output/{uuid}.asr.json`
- ASRX script: `/Users/accusys/momentry_core/scripts/asrx_processor.py`
- Error line: Line 137
### Impact
- ASRX produces 0 segments even when ASR has valid segments
- No speaker diarization data generated
- File status shows "completed" but asrx output is missing
### Test Case: Charade_YouTube_24fps.mp4
- UUID: `c36f35685177c981aa139b66bbbccc5b`
- ASR segments: 1779 (has valid data)
- ASRX segments: 0 (failed silently)
- Status: `completed` (misleading)
### Fix Required
Update asrx_processor.py to prioritize frame-based positioning:
```python
# Line 136-149 fix: Use start_frame/end_frame if available
for seg in result.get("segments", []):
# Prefer frame-based (precise)
if "start_frame" in seg and "end_frame" in seg:
start_frame = seg["start_frame"]
end_frame = seg["end_frame"]
start_sec = start_frame / fps
end_sec = end_frame / fps
else:
# Fallback to time-based
start_sec = seg.get("start", seg.get("start_time", 0))
end_sec = seg.get("end", seg.get("end_time", 0))
start_frame = int(start_sec * fps)
end_frame = int(end_sec * fps)
output_result["segments"].append({
"start_time": start_sec,
"end_time": end_sec,
"start_frame": start_frame,
"end_frame": end_frame,
"text": seg.get("text", ""),
"speaker_id": seg.get("speaker_id", seg.get("speaker", "")),
"language": seg.get("language", ""),
"lang_prob": seg.get("lang_prob", 0.0),
"quality": seg.get("quality", 0.0),
})
```
### Key Points
1. **Frame-based is precise** - use `start_frame` / `end_frame` directly when available
2. **Time is auxiliary** - derive from frames: `start_time = start_frame / fps`
3. **Backward compatible** - fallback to `start`/`end` or `start_time`/`end_time` for older ASR output
### Recommendation
1. Make asrx_processor.py backward compatible with both formats
2. Add validation: check if asrx segments > 0 before marking complete
3. Log error when ASRX returns 0 segments from non-empty ASR input
4. Add unit test for ASR/ASRX format compatibility

View File

@@ -134,13 +134,24 @@ def _convert_result(result, output_path):
} }
for seg in result.get("segments", []): for seg in result.get("segments", []):
start_sec = seg["start"] # Prefer frame-based (precise positioning)
end_sec = seg["end"] if "start_frame" in seg and "end_frame" in seg:
start_frame = seg["start_frame"]
end_frame = seg["end_frame"]
start_sec = start_frame / fps
end_sec = end_frame / fps
else:
# Fallback to time-based
start_sec = seg.get("start", seg.get("start_time", 0))
end_sec = seg.get("end", seg.get("end_time", 0))
start_frame = int(start_sec * fps)
end_frame = int(end_sec * fps)
output_result["segments"].append({ output_result["segments"].append({
"start_time": start_sec, "start_time": start_sec,
"end_time": end_sec, "end_time": end_sec,
"start_frame": int(start_sec * fps), "start_frame": start_frame,
"end_frame": int(end_sec * fps), "end_frame": end_frame,
"text": seg.get("text", ""), "text": seg.get("text", ""),
"speaker_id": seg.get("speaker_id", seg.get("speaker", "")), "speaker_id": seg.get("speaker_id", seg.get("speaker", "")),
"language": seg.get("language", ""), "language": seg.get("language", ""),

View File

@@ -654,33 +654,51 @@ async fn get_processor_counts(
} }
} }
if let Ok(content) = std::fs::read_to_string(&json_path) { if let Ok(content) = std::fs::read_to_string(&json_path) {
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) { if let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) {
frame_count = json // CUT: prioritize scenes count over frame_count
.get("frame_count") if proc_name == "cut" {
.and_then(|v| v.as_u64()) frame_count = json
.map(|v| v as u32); .get("scenes")
segment_count = json .and_then(|v| v.as_array())
.get("segments") .map(|arr| arr.len() as u32);
.and_then(|v| v.as_array()) } else {
.map(|arr| arr.len() as u32); // Standard frame_count field
chunk_count = json frame_count = json
.get("child_chunks") .get("frame_count")
.and_then(|v| v.as_array()) .and_then(|v| v.as_u64())
.map(|arr| arr.len() as u32) .map(|v| v as u32);
.or_else(|| {
json.get("parent_chunks") // YOLO: frames array
.and_then(|v| v.as_array()) if frame_count.is_none() {
.map(|arr| arr.len() as u32) frame_count = json
}); .get("frames")
if chunk_count.is_none() { .and_then(|v| v.as_array())
chunk_count = json .map(|arr| arr.len() as u32);
.get("chunks") }
.and_then(|v| v.as_array()) }
.map(|arr| arr.len() as u32);
} segment_count = json
} .get("segments")
} .and_then(|v| v.as_array())
.map(|arr| arr.len() as u32);
chunk_count = json
.get("child_chunks")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32)
.or_else(|| {
json.get("parent_chunks")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32)
});
if chunk_count.is_none() {
chunk_count = json
.get("chunks")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32);
}
}
}
} }
results.push(ProcessorCountInfo { results.push(ProcessorCountInfo {