feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
219
scripts/chunk_statistics.py
Normal file
219
scripts/chunk_statistics.py
Normal file
@@ -0,0 +1,219 @@
|
||||
#!/opt/bin/python3.11
|
||||
"""
|
||||
Chunk-based statistics for ASR, Face, and Speaker combinations.
|
||||
Generates a comprehensive report of each chunk's content.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
UUID = "384b0ff44aaaa1f1"
|
||||
BASE_DIR = f"output/{UUID}"
|
||||
CHUNK_DURATION = 60 # seconds per chunk
|
||||
|
||||
|
||||
def load_json(filepath):
|
||||
with open(filepath, "r") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def build_chunk_stats():
|
||||
print(f"📊 Building chunk statistics for {UUID}...")
|
||||
print(f" Chunk duration: {CHUNK_DURATION}s")
|
||||
|
||||
# Load data
|
||||
asr_data = load_json(os.path.join(BASE_DIR, f"{UUID}.asr.json"))
|
||||
face_data = load_json(os.path.join(BASE_DIR, f"{UUID}.face_clustered.json"))
|
||||
|
||||
# Get video duration
|
||||
segments = asr_data.get("segments", [])
|
||||
video_duration = max(seg.get("end", 0) for seg in segments) if segments else 0
|
||||
print(f" Video duration: {video_duration:.0f}s ({video_duration / 60:.1f} min)")
|
||||
|
||||
# Build chunk structure
|
||||
num_chunks = int(video_duration // CHUNK_DURATION) + 1
|
||||
chunks = []
|
||||
|
||||
for i in range(num_chunks):
|
||||
chunk_start = i * CHUNK_DURATION
|
||||
chunk_end = (i + 1) * CHUNK_DURATION
|
||||
chunks.append(
|
||||
{
|
||||
"chunk_id": i,
|
||||
"start": chunk_start,
|
||||
"end": chunk_end,
|
||||
"asr_count": 0,
|
||||
"asr_text_len": 0,
|
||||
"face_count": 0,
|
||||
"unique_persons": set(),
|
||||
"has_speech": False,
|
||||
"has_faces": False,
|
||||
}
|
||||
)
|
||||
|
||||
# Count ASR segments per chunk
|
||||
for seg in segments:
|
||||
start = seg.get("start", 0)
|
||||
end = seg.get("end", 0)
|
||||
text = seg.get("text", "")
|
||||
|
||||
# Find overlapping chunks
|
||||
chunk_start_idx = int(start // CHUNK_DURATION)
|
||||
chunk_end_idx = int(end // CHUNK_DURATION)
|
||||
|
||||
for ci in range(chunk_start_idx, min(chunk_end_idx + 1, len(chunks))):
|
||||
chunks[ci]["asr_count"] += 1
|
||||
chunks[ci]["asr_text_len"] += len(text)
|
||||
chunks[ci]["has_speech"] = True
|
||||
|
||||
# Count faces per chunk
|
||||
face_frames = face_data.get("frames", [])
|
||||
for frame in face_frames:
|
||||
timestamp = frame.get("timestamp", 0)
|
||||
faces = frame.get("faces", [])
|
||||
|
||||
chunk_idx = int(timestamp // CHUNK_DURATION)
|
||||
if chunk_idx < len(chunks):
|
||||
chunks[chunk_idx]["face_count"] += len(faces)
|
||||
chunks[chunk_idx]["has_faces"] = len(faces) > 0
|
||||
|
||||
for face in faces:
|
||||
pid = face.get("person_id")
|
||||
if pid:
|
||||
chunks[chunk_idx]["unique_persons"].add(pid)
|
||||
|
||||
# Convert sets to counts for serialization
|
||||
for chunk in chunks:
|
||||
chunk["unique_person_count"] = len(chunk["unique_persons"])
|
||||
chunk["top_persons"] = list(chunk["unique_persons"])[:10] # Top 10
|
||||
del chunk["unique_persons"]
|
||||
|
||||
return chunks, video_duration
|
||||
|
||||
|
||||
def print_summary(chunks):
|
||||
print("\n" + "=" * 80)
|
||||
print("📈 CHUNK STATISTICS SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
# Overall stats
|
||||
total_asr = sum(c["asr_count"] for c in chunks)
|
||||
total_faces = sum(c["face_count"] for c in chunks)
|
||||
total_speech_chunks = sum(1 for c in chunks if c["has_speech"])
|
||||
total_face_chunks = sum(1 for c in chunks if c["has_faces"])
|
||||
chunks_with_both = sum(1 for c in chunks if c["has_speech"] and c["has_faces"])
|
||||
chunks_with_neither = sum(
|
||||
1 for c in chunks if not c["has_speech"] and not c["has_faces"]
|
||||
)
|
||||
|
||||
print(f"\n📊 Overview:")
|
||||
print(f" Total chunks: {len(chunks)}")
|
||||
print(
|
||||
f" Chunks with speech: {total_speech_chunks} ({total_speech_chunks / len(chunks) * 100:.0f}%)"
|
||||
)
|
||||
print(
|
||||
f" Chunks with faces: {total_face_chunks} ({total_face_chunks / len(chunks) * 100:.0f}%)"
|
||||
)
|
||||
print(
|
||||
f" Both speech+faces: {chunks_with_both} ({chunks_with_both / len(chunks) * 100:.0f}%)"
|
||||
)
|
||||
print(
|
||||
f" Neither: {chunks_with_neither} ({chunks_with_neither / len(chunks) * 100:.0f}%)"
|
||||
)
|
||||
print(f" Total ASR segments: {total_asr}")
|
||||
print(f" Total face frames: {total_faces}")
|
||||
|
||||
# Combination breakdown
|
||||
print(f"\n🎯 ASR/Face Combination Breakdown:")
|
||||
|
||||
combos = {}
|
||||
for c in chunks:
|
||||
key = (c["has_speech"], c["has_faces"])
|
||||
if key not in combos:
|
||||
combos[key] = {"count": 0, "chunk_ids": []}
|
||||
combos[key]["count"] += 1
|
||||
combos[key]["chunk_ids"].append(c["chunk_id"])
|
||||
|
||||
for (has_speech, has_faces), info in sorted(combos.items()):
|
||||
speech_str = "🎤 Speech" if has_speech else " No Speech"
|
||||
face_str = "👤 Faces" if has_faces else " No Faces"
|
||||
chunk_range = (
|
||||
f"{min(info['chunk_ids'])}-{max(info['chunk_ids'])}"
|
||||
if len(info["chunk_ids"]) > 1
|
||||
else f"{info['chunk_ids'][0]}"
|
||||
)
|
||||
print(
|
||||
f" {speech_str} + {face_str}: {info['count']} chunks (IDs: {chunk_range})"
|
||||
)
|
||||
|
||||
# Top chunks by activity
|
||||
print(f"\n🔥 Top 10 Most Active Chunks (by ASR+Faces):")
|
||||
scored_chunks = []
|
||||
for c in chunks:
|
||||
score = c["asr_count"] + c["face_count"]
|
||||
scored_chunks.append((score, c))
|
||||
scored_chunks.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
for score, c in scored_chunks[:10]:
|
||||
persons = ", ".join(c["top_persons"][:3])
|
||||
print(
|
||||
f" Chunk {c['chunk_id']:3d} ({c['start']:5d}-{c['end']:5d}s): "
|
||||
f"ASR={c['asr_count']:3d}, Faces={c['face_count']:4d}, "
|
||||
f"Persons={c['unique_person_count']:2d} ({persons})"
|
||||
)
|
||||
|
||||
# Stamp scene chunk
|
||||
print(f"\n🔍 Special Interest Chunks:")
|
||||
for c in chunks:
|
||||
# Stamp scene around 5730s
|
||||
if c["start"] <= 5730 <= c["end"]:
|
||||
persons = ", ".join(c["top_persons"][:5])
|
||||
print(
|
||||
f" 🎯 Stamp scene chunk: {c['chunk_id']} ({c['start']}-{c['end']}s)"
|
||||
)
|
||||
print(
|
||||
f" ASR={c['asr_count']}, Faces={c['face_count']}, "
|
||||
f"Persons={c['unique_person_count']} ({persons})"
|
||||
)
|
||||
|
||||
# Magnifying glass scene around 5727s
|
||||
if c["start"] <= 5727 <= c["end"]:
|
||||
print(
|
||||
f" 🔍 Magnifier scene chunk: {c['chunk_id']} ({c['start']}-{c['end']}s)"
|
||||
)
|
||||
|
||||
# Vase scenes
|
||||
vase_times = [300, 660, 3720]
|
||||
for vt in vase_times:
|
||||
for c in chunks:
|
||||
if c["start"] <= vt <= c["end"]:
|
||||
persons = ", ".join(c["top_persons"][:3])
|
||||
print(
|
||||
f" 🏺 Vase scene chunk: {c['chunk_id']} ({c['start']}-{c['end']}s)"
|
||||
)
|
||||
print(
|
||||
f" ASR={c['asr_count']}, Faces={c['face_count']}, "
|
||||
f"Persons={c['unique_person_count']} ({persons})"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
chunks, duration = build_chunk_stats()
|
||||
print_summary(chunks)
|
||||
|
||||
# Save to file
|
||||
output_path = os.path.join(BASE_DIR, "chunk_statistics.json")
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(
|
||||
{
|
||||
"uuid": UUID,
|
||||
"duration": duration,
|
||||
"chunk_duration": CHUNK_DURATION,
|
||||
"chunks": chunks,
|
||||
},
|
||||
f,
|
||||
indent=2,
|
||||
)
|
||||
|
||||
print(f"\n💾 Saved detailed stats to: {output_path}")
|
||||
Reference in New Issue
Block a user