fix: ASRX duplication, TKG edges, trace ingest, and add pipeline progress publishing

- ASRX handler no longer stores duplicate 'asr' pre_chunks - Pre_chunks storage made idempotent (delete-before-insert) - Rule 1 + trace_ingest changed to query 'asrx' not 'asr' - Trace chunks removed (dynamic from TKG/Qdrant) - TKG scroll_face_points fixed: trace_id >= 1 (not == 1) - TKG AsrxSegmentEntry: start/end -> start_time/end_time (match ASRX JSON) - Unregister error handling: log instead of silent discard - Add publish_pipeline_progress calls at each pipeline stage (processors, rule1, face_trace, identity_agent, TKG, rule2, completion)
2026-07-02 10:43:46 +08:00
parent d791d138f2
commit 3eabd45882
65 changed files with 9481 additions and 3856 deletions
--- a/scripts/asr_processor.py
+++ b/scripts/asr_processor.py
@@ -201,7 +201,12 @@ def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
    if not has_audio_stream(video_path):
        if publisher:
            publisher.info("asr", "No audio stream detected, skipping transcription")
-        output = {"language": "", "language_probability": 0.0, "segments": []}
+        output = {
+            "status": "no_audio_track",
+            "language": "",
+            "language_probability": 0.0,
+            "segments": []
+        }
        with open(output_path, "w") as f:
            json.dump(output, f, indent=2)
        if publisher:
@@ -336,16 +341,16 @@ def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
                    seg_start = start_t + segment.start
                    seg_end = start_t + segment.end
                    scene_idx = find_scene_idx((seg_start + seg_end) / 2)
-                scene_segments.append({
-                    "start_time": seg_start,
-                    "end_time": seg_end,
-                    "start_frame": int(round(seg_start * fps)),
-                    "end_frame": int(round(seg_end * fps)),
-                    "text": segment.text.strip(),
-                    "scene_number": scene_idx + 1,
-                    "language": seg_language,
-                })
-                total_segments += 1
+                    scene_segments.append({
+                        "start_time": seg_start,
+                        "end_time": seg_end,
+                        "start_frame": int(round(seg_start * fps)),
+                        "end_frame": int(round(seg_end * fps)),
+                        "text": segment.text.strip(),
+                        "scene_number": scene_idx + 1,
+                        "language": seg_language,
+                    })
+                    total_segments += 1

                # 當前 scene 結果寫入 .asr.tmp
                all_segments.extend(scene_segments)
@@ -365,8 +370,18 @@ def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
        try: os.rmdir(temp_dir)
        except: pass

+        # Determine status for cut_scenes branch
+        if total_segments > 0:
+            status = "has_transcript"
+        else:
+            status = "silent_audio"
+        
        info_language = transcript_language or "unknown"
-        print(f"[ASR] Segmented transcription complete: {total_segments} segments", file=sys.stderr)
+        print(f"[ASR] Segmented transcription complete: {total_segments} segments, status={status}", file=sys.stderr)
+        
+        # Write final output with status
+        with open(tmp_path, "w") as f:
+            json.dump({"status": status, "language": info_language, "segments": all_segments}, f)
    else:
        # 無 CUT 資料，直接轉錄（原有流程）
        segments, info = transcribe_with_fallback(model, video_path, publisher)
@@ -386,8 +401,15 @@ def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
            if total_segments % 100 == 0:
                if publisher:
                    publisher.progress("asr", total_segments, 0, f"Segment {total_segments}")
+        
+        # Determine status for direct transcription branch
+        if total_segments > 0:
+            status = "has_transcript"
+        else:
+            status = "silent_audio"
+        
        with open(tmp_path, "w") as f:
-            json.dump({"language": info_language, "segments": all_segments}, f)
+            json.dump({"status": status, "language": info_language, "segments": all_segments}, f)

    if publisher:
        publisher.info("asr", f"ASR_LANGUAGE:{info_language}")
@@ -396,10 +418,10 @@ def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
    os.rename(tmp_path, output_path)

    if publisher:
-        publisher.complete("asr", f"{len(results)} segments")
+        publisher.complete("asr", f"{total_segments} segments")

    sys.stderr.write(
-        f"ASR: Transcription complete, {len(results)} segments written to {output_path}\n"
+        f"ASR: Transcription complete, {total_segments} segments written to {output_path}\n"
    )
    sys.stderr.flush()
    sys.exit(0)