fix: ASRX duplication, TKG edges, trace ingest, and add pipeline progress publishing
- ASRX handler no longer stores duplicate 'asr' pre_chunks - Pre_chunks storage made idempotent (delete-before-insert) - Rule 1 + trace_ingest changed to query 'asrx' not 'asr' - Trace chunks removed (dynamic from TKG/Qdrant) - TKG scroll_face_points fixed: trace_id >= 1 (not == 1) - TKG AsrxSegmentEntry: start/end -> start_time/end_time (match ASRX JSON) - Unregister error handling: log instead of silent discard - Add publish_pipeline_progress calls at each pipeline stage (processors, rule1, face_trace, identity_agent, TKG, rule2, completion)
This commit is contained in:
@@ -201,7 +201,12 @@ def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
|
||||
if not has_audio_stream(video_path):
|
||||
if publisher:
|
||||
publisher.info("asr", "No audio stream detected, skipping transcription")
|
||||
output = {"language": "", "language_probability": 0.0, "segments": []}
|
||||
output = {
|
||||
"status": "no_audio_track",
|
||||
"language": "",
|
||||
"language_probability": 0.0,
|
||||
"segments": []
|
||||
}
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(output, f, indent=2)
|
||||
if publisher:
|
||||
@@ -336,16 +341,16 @@ def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
|
||||
seg_start = start_t + segment.start
|
||||
seg_end = start_t + segment.end
|
||||
scene_idx = find_scene_idx((seg_start + seg_end) / 2)
|
||||
scene_segments.append({
|
||||
"start_time": seg_start,
|
||||
"end_time": seg_end,
|
||||
"start_frame": int(round(seg_start * fps)),
|
||||
"end_frame": int(round(seg_end * fps)),
|
||||
"text": segment.text.strip(),
|
||||
"scene_number": scene_idx + 1,
|
||||
"language": seg_language,
|
||||
})
|
||||
total_segments += 1
|
||||
scene_segments.append({
|
||||
"start_time": seg_start,
|
||||
"end_time": seg_end,
|
||||
"start_frame": int(round(seg_start * fps)),
|
||||
"end_frame": int(round(seg_end * fps)),
|
||||
"text": segment.text.strip(),
|
||||
"scene_number": scene_idx + 1,
|
||||
"language": seg_language,
|
||||
})
|
||||
total_segments += 1
|
||||
|
||||
# 當前 scene 結果寫入 .asr.tmp
|
||||
all_segments.extend(scene_segments)
|
||||
@@ -365,8 +370,18 @@ def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
|
||||
try: os.rmdir(temp_dir)
|
||||
except: pass
|
||||
|
||||
# Determine status for cut_scenes branch
|
||||
if total_segments > 0:
|
||||
status = "has_transcript"
|
||||
else:
|
||||
status = "silent_audio"
|
||||
|
||||
info_language = transcript_language or "unknown"
|
||||
print(f"[ASR] Segmented transcription complete: {total_segments} segments", file=sys.stderr)
|
||||
print(f"[ASR] Segmented transcription complete: {total_segments} segments, status={status}", file=sys.stderr)
|
||||
|
||||
# Write final output with status
|
||||
with open(tmp_path, "w") as f:
|
||||
json.dump({"status": status, "language": info_language, "segments": all_segments}, f)
|
||||
else:
|
||||
# 無 CUT 資料,直接轉錄(原有流程)
|
||||
segments, info = transcribe_with_fallback(model, video_path, publisher)
|
||||
@@ -386,8 +401,15 @@ def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
|
||||
if total_segments % 100 == 0:
|
||||
if publisher:
|
||||
publisher.progress("asr", total_segments, 0, f"Segment {total_segments}")
|
||||
|
||||
# Determine status for direct transcription branch
|
||||
if total_segments > 0:
|
||||
status = "has_transcript"
|
||||
else:
|
||||
status = "silent_audio"
|
||||
|
||||
with open(tmp_path, "w") as f:
|
||||
json.dump({"language": info_language, "segments": all_segments}, f)
|
||||
json.dump({"status": status, "language": info_language, "segments": all_segments}, f)
|
||||
|
||||
if publisher:
|
||||
publisher.info("asr", f"ASR_LANGUAGE:{info_language}")
|
||||
@@ -396,10 +418,10 @@ def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
|
||||
os.rename(tmp_path, output_path)
|
||||
|
||||
if publisher:
|
||||
publisher.complete("asr", f"{len(results)} segments")
|
||||
publisher.complete("asr", f"{total_segments} segments")
|
||||
|
||||
sys.stderr.write(
|
||||
f"ASR: Transcription complete, {len(results)} segments written to {output_path}\n"
|
||||
f"ASR: Transcription complete, {total_segments} segments written to {output_path}\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
sys.exit(0)
|
||||
|
||||
Reference in New Issue
Block a user