Files
momentry_core/scripts/cut_benchmark_runner.py
Warren e75c4d6f07 cleanup: remove dead code and duplicate docs
- Remove session-ses_2f27.md (161KB raw session log)
- Remove 49 ROOT_* duplicate files across REFERENCE/
- Remove 14 duplicate files between REFERENCE/ root and history/
- Remove asr_legacy.rs (dead code, replaced by asr.rs)
- Remove src/core/worker/ (duplicate JobWorker)
- Remove src/core/layers/ (empty directory)
- Remove 4 .bak files in src/
- Remove 7 dead private methods in worker/processor.rs
- Remove backup directory from git tracking
2026-05-04 01:31:21 +08:00

236 lines
7.5 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
CUT Processor Benchmark Runner
测试场景辨识的性能和质量
测试版本:
A. cut_processor.py (PySceneDetect)
B. cut_processor_contract_v1.py (Contract v1.0)
测试指标:
- 处理时间
- 内存峰值 (MB)
- 检测场景数
- 场景平均时长
"""
import os
import sys
import json
import time
import subprocess
from pathlib import Path
from datetime import datetime
SCRIPTS_DIR = Path(__file__).parent
OUTPUT_DIR = SCRIPTS_DIR.parent / "output" / "benchmark" / "cut_processor"
def get_memory_peak(pid):
"""获取进程内存峰值"""
try:
cmd = ["ps", "-p", str(pid), "-o", "rss="]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
return int(result.stdout.strip()) / 1024
except:
pass
return 0
def run_processor(script_name, video_path, output_path, uuid=""):
"""运行指定 CUT processor"""
script_path = SCRIPTS_DIR / script_name
if not script_path.exists():
print(f"❌ 脚本不存在: {script_path}")
return None
cmd = [sys.executable, str(script_path), video_path, output_path]
if uuid:
cmd.extend(["--uuid", uuid])
print(f"\n执行: {script_name}")
print(f"命令: {' '.join(cmd)}")
start_time = time.time()
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
peak_memory = 0
while process.poll() is None:
mem = get_memory_peak(process.pid)
if mem > peak_memory:
peak_memory = mem
time.sleep(0.5)
stdout, stderr = process.communicate()
elapsed_time = time.time() - start_time
if process.returncode != 0:
print(f"❌ 处理失败: {stderr}")
return None
if os.path.exists(output_path):
with open(output_path) as f:
result = json.load(f)
scenes = result.get("scenes", [])
total_scenes = len(scenes)
# 计算场景统计
avg_scene_duration = 0
min_scene_duration = 0
max_scene_duration = 0
if scenes:
durations = [s.get("end_time", 0) - s.get("start_time", 0) for s in scenes]
avg_scene_duration = sum(durations) / len(durations)
min_scene_duration = min(durations)
max_scene_duration = max(durations)
file_size_kb = os.path.getsize(output_path) / 1024
return {
"elapsed_time": elapsed_time,
"peak_memory_mb": peak_memory,
"total_scenes": total_scenes,
"avg_scene_duration": avg_scene_duration,
"min_scene_duration": min_scene_duration,
"max_scene_duration": max_scene_duration,
"file_size_kb": file_size_kb,
"fps": result.get("fps", 0),
"frame_count": result.get("frame_count", 0),
"stdout": stdout,
"stderr": stderr,
}
return None
def main():
print("=" * 80)
print("CUT Processor Benchmark 测试")
print("=" * 80)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# 测试视频
video_path = "/Users/accusys/momentry/var/sftpgo/data/demo/Gamma Carry Saves the World..mp4"
if not os.path.exists(video_path):
print(f"❌ 测试视频不存在: {video_path}")
sys.exit(1)
# 获取视频信息
cmd = [
"ffprobe",
"-v", "quiet",
"-print_format", "json",
"-show_format",
"-show_streams",
video_path
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
video_info = json.loads(result.stdout)
video_stream = next((s for s in video_info["streams"] if s["codec_type"] == "video"), None)
print("\n测试视频:")
print(f" 文件: {int(video_info['format'].get('size', 0)) / 1024 / 1024:.1f} MB")
print(f" 时长: {float(video_info['format'].get('duration', 0)):.1f}")
print(f" 分辨率: {video_stream.get('width', 0)}x{video_stream.get('height', 0)}")
print(f" FPS: {video_stream.get('r_frame_rate', 'unknown')}")
except:
print("⚠️ 无法获取视频信息")
processors = [
("A", "cut_processor.py", "PySceneDetect"),
("B", "cut_processor_contract_v1.py", "Contract v1.0"),
]
results = []
for scheme_id, script_name, description in processors:
print(f"\n{'=' * 80}")
print(f"方案 {scheme_id}: {description}")
print(f"{'=' * 80}")
output_path = OUTPUT_DIR / f"scheme_{scheme_id}_{script_name.replace('.py', '.json')}"
if os.path.exists(output_path):
os.remove(output_path)
result = run_processor(
script_name,
video_path,
str(output_path),
uuid=f"cut_bench_{scheme_id}"
)
if result:
results.append({
"scheme": scheme_id,
"script": script_name,
"description": description,
"elapsed_time": result["elapsed_time"],
"peak_memory_mb": result["peak_memory_mb"],
"total_scenes": result["total_scenes"],
"avg_scene_duration": result["avg_scene_duration"],
"min_scene_duration": result["min_scene_duration"],
"max_scene_duration": result["max_scene_duration"],
"fps": result["fps"],
"frame_count": result["frame_count"],
"file_size_kb": result["file_size_kb"],
})
print("\n✅ 处理完成:")
print(f" 时间: {result['elapsed_time']:.2f}")
print(f" 内存峰值: {result['peak_memory_mb']:.1f} MB")
print(f" 检测场景数: {result['total_scenes']}")
print(f" 场景平均时长: {result['avg_scene_duration']:.2f}")
print(f" 场景最短时长: {result['min_scene_duration']:.2f}")
print(f" 场景最长时长: {result['max_scene_duration']:.2f}")
print(f" FPS: {result['fps']}")
print(f" 输出大小: {result['file_size_kb']:.1f} KB")
else:
print(f"❌ 方案 {scheme_id} 处理失败")
results.append({
"scheme": scheme_id,
"script": script_name,
"description": description,
"error": "processing failed"
})
# 保存报告
report = {
"test_date": datetime.now().isoformat(),
"video_path": video_path,
"results": results,
}
report_path = OUTPUT_DIR / "CUT_BENCHMARK_REPORT.json"
with open(report_path, "w") as f:
json.dump(report, f, indent=2, ensure_ascii=False)
print(f"\n{'=' * 80}")
print("测试报告已保存:")
print(f" {report_path}")
print(f"{'=' * 80}")
print("\n【对比总结】")
print("\n| 方案 | 脚本 | 时间(秒) | 内存(MB) | 场景数 | 平均时长(秒) |")
print("|------|------|---------|---------|--------|-------------|")
for r in results:
if "error" not in r:
print(f"| {r['scheme']} | {r['script']} | {r['elapsed_time']:.2f} | {r['peak_memory_mb']:.1f} | {r['total_scenes']} | {r['avg_scene_duration']:.2f} |")
else:
print(f"| {r['scheme']} | {r['script']} | - | - | - | - |")
if __name__ == "__main__":
main()