- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
115 lines
3.8 KiB
Python
Executable File
115 lines
3.8 KiB
Python
Executable File
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
ASR + Lip 對應分析
|
|
分析 ASR 轉錄時間段與 Lip 嘴部檢測的對應關係
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
|
|
def load_json(path):
|
|
with open(path) as f:
|
|
return json.load(f)
|
|
|
|
def analyze_asr_lip(asr_path, lip_path):
|
|
"""分析 ASR 與 Lip 的對應關係"""
|
|
|
|
# 載入數據
|
|
print(f"[Load] ASR: {asr_path}")
|
|
asr_data = load_json(asr_path)
|
|
|
|
print(f"[Load] Lip: {lip_path}")
|
|
lip_data = load_json(lip_path)
|
|
|
|
asr_segments = asr_data.get('segments', [])
|
|
lip_frames = lip_data.get('frames', [])
|
|
|
|
print(f"\n[Data] ASR segments: {len(asr_segments)}")
|
|
print(f"[Data] Lip frames: {len(lip_frames)}")
|
|
print()
|
|
|
|
# 分析每個 ASR 段對應的 Lip 檢測
|
|
print("=" * 80)
|
|
print("ASR 與 Lip 對應分析")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
stats = {
|
|
'total_asr_segments': len(asr_segments),
|
|
'with_lip_detection': 0,
|
|
'without_lip_detection': 0,
|
|
'speaking_detected': 0,
|
|
'not_speaking': 0,
|
|
'avg_openness': [],
|
|
'match_rate': 0.0
|
|
}
|
|
|
|
print(f"{'ASR 段':<6} {'時間範圍':<15} {'文字':<30} {'Lip 幀數':<10} {'說話':<10} {'平均開合度'}")
|
|
print("-" * 100)
|
|
|
|
for i, asr_seg in enumerate(asr_segments[:20]): # 只分析前 20 段
|
|
asr_start = asr_seg['start']
|
|
asr_end = asr_seg['end']
|
|
asr_text = asr_seg.get('text', '')[:28]
|
|
|
|
# 找到時間範圍內的 Lip 幀
|
|
lip_in_range = [
|
|
f for f in lip_frames
|
|
if asr_start <= f['timestamp'] <= asr_end
|
|
]
|
|
|
|
if lip_in_range:
|
|
stats['with_lip_detection'] += 1
|
|
|
|
# 統計說話狀態
|
|
speaking_count = sum(1 for f in lip_in_range if f.get('is_speaking', False))
|
|
openness_values = [f.get('lip_openness', 0) for f in lip_in_range if f['face_detected']]
|
|
|
|
if speaking_count > 0:
|
|
stats['speaking_detected'] += 1
|
|
speak_status = f"✅ {speaking_count}/{len(lip_in_range)}"
|
|
else:
|
|
stats['not_speaking'] += 1
|
|
speak_status = f"❌ 0/{len(lip_in_range)}"
|
|
|
|
avg_openness = sum(openness_values) / len(openness_values) if openness_values else 0
|
|
stats['avg_openness'].append(avg_openness)
|
|
|
|
print(f"{i+1:<6} {asr_start:.1f}-{asr_end:.1f}s{'':<5} {asr_text:<30} {len(lip_in_range):<10} {speak_status:<10} {avg_openness:.3f}")
|
|
else:
|
|
stats['without_lip_detection'] += 1
|
|
print(f"{i+1:<6} {asr_start:.1f}-{asr_end:.1f}s{'':<5} {asr_text:<30} {'0':<10} {'-':<10} {'-':<10}")
|
|
|
|
# 計算匹配率
|
|
if stats['with_lip_detection'] > 0:
|
|
stats['match_rate'] = stats['speaking_detected'] / stats['with_lip_detection'] * 100
|
|
|
|
print()
|
|
print("=" * 80)
|
|
print("統計摘要")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
print(f"ASR 總段數:{stats['total_asr_segments']}")
|
|
print(f"有 Lip 檢測:{stats['with_lip_detection']} ({stats['with_lip_detection']/stats['total_asr_segments']*100:.1f}%)")
|
|
print(f"無 Lip 檢測:{stats['without_lip_detection']} ({stats['without_lip_detection']/stats['total_asr_segments']*100:.1f}%)")
|
|
print()
|
|
print(f"檢測到說話:{stats['speaking_detected']} ({stats['match_rate']:.1f}%)")
|
|
print(f"未檢測說話:{stats['not_speaking']}")
|
|
print()
|
|
|
|
if stats['avg_openness']:
|
|
overall_avg = sum(stats['avg_openness']) / len(stats['avg_openness'])
|
|
print(f"平均嘴部開合度:{overall_avg:.4f}")
|
|
|
|
print()
|
|
|
|
return stats
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 3:
|
|
print("Usage: python3 analyze_asr_lip.py <asr.json> <lip.json>")
|
|
sys.exit(1)
|
|
|
|
analyze_asr_lip(sys.argv[1], sys.argv[2])
|