feat: ASR output frame numbers + rename start/end to start_time/end_time

- Python: asr_processor.py detects FPS from CUT/ffprobe (no fallback), outputs start_frame/end_frame
- Rust: All AsrSegment structs use start_time/end_time with #[serde(alias)] for backward compat
- store_asr_chunks: prefers ASR output frames, falls back to time-based conversion
- Added backward compatibility test for old JSON format (start/end)

Breaking change: ffprobe/CUT FPS failure now aborts instead of using default 24fps
This commit is contained in:
Accusys
2026-05-19 13:22:38 +08:00
parent 26725dcab7
commit 67ca846ccd
9 changed files with 572 additions and 68 deletions

View File

@@ -141,11 +141,58 @@ def transcribe_with_fallback(model, video_path, publisher=None):
pass
def run_asr(video_path, output_path, uuid: str = ""):
def get_fps_from_cut(cut_path):
"""從 CUT 資料獲取 FPS"""
if os.path.exists(cut_path):
try:
with open(cut_path) as f:
cut_data = json.load(f)
fps = cut_data.get("fps")
if fps and fps > 0:
return fps
except Exception as e:
print(f"[ASR] Failed to load CUT FPS: {e}", file=sys.stderr)
return None
def get_fps_from_ffprobe(video_path):
"""從影片獲取 FPS (ffprobe)"""
try:
cmd = ["ffprobe", "-v", "error",
"-select_streams", "v:0",
"-show_entries", "stream=r_frame_rate",
"-of", "csv=p=0", video_path]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
fps_str = result.stdout.strip()
if "/" in fps_str:
num, den = fps_str.split("/")
return float(num) / float(den)
return float(fps_str)
except Exception:
return None
def run_asr(video_path, output_path, uuid: str = "", fps: float = None):
# Set up signal handlers
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# FPS detection chain: CLI → CUT → ffprobe → FAIL
if fps is not None:
print(f"[ASR] Using CLI-provided FPS: {fps}", file=sys.stderr)
else:
cut_path_check = output_path.replace(".asr.json", ".cut.json")
fps = get_fps_from_cut(cut_path_check)
if fps:
print(f"[ASR] FPS from CUT: {fps}", file=sys.stderr)
if fps is None:
fps = get_fps_from_ffprobe(video_path)
if fps:
print(f"[ASR] FPS from ffprobe: {fps}", file=sys.stderr)
if fps is None:
print("[ASR] ERROR: Cannot determine FPS (no CUT data, ffprobe failed). Aborting.", file=sys.stderr)
sys.exit(1)
publisher = RedisPublisher(uuid) if uuid else None
if publisher:
publisher.info("asr", "ASR_START")
@@ -289,13 +336,15 @@ def run_asr(video_path, output_path, uuid: str = ""):
seg_start = start_t + segment.start
seg_end = start_t + segment.end
scene_idx = find_scene_idx((seg_start + seg_end) / 2)
scene_segments.append({
"start": seg_start,
"end": seg_end,
"text": segment.text.strip(),
"scene_number": scene_idx + 1,
"language": seg_language,
})
scene_segments.append({
"start_time": seg_start,
"end_time": seg_end,
"start_frame": int(round(seg_start * fps)),
"end_frame": int(round(seg_end * fps)),
"text": segment.text.strip(),
"scene_number": scene_idx + 1,
"language": seg_language,
})
total_segments += 1
# 當前 scene 結果寫入 .asr.tmp
@@ -327,7 +376,10 @@ def run_asr(video_path, output_path, uuid: str = ""):
all_segments = []
for segment in segments:
all_segments.append({
"start": segment.start, "end": segment.end,
"start_time": segment.start,
"end_time": segment.end,
"start_frame": int(round(segment.start * fps)),
"end_frame": int(round(segment.end * fps)),
"text": segment.text.strip(),
})
total_segments += 1
@@ -358,6 +410,7 @@ if __name__ == "__main__":
parser.add_argument("video_path", help="Path to video file")
parser.add_argument("output_path", help="Output JSON path")
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
parser.add_argument("--fps", type=float, help="Override FPS (default: auto-detect)")
args = parser.parse_args()
run_asr(args.video_path, args.output_path, args.uuid)
run_asr(args.video_path, args.output_path, args.uuid, fps=args.fps)