feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
115
scripts/generate_chunk_visual_stats.py
Normal file
115
scripts/generate_chunk_visual_stats.py
Normal file
@@ -0,0 +1,115 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Generate pre-computed visual statistics for chunks.
|
||||
Reads frame yolo_objects, counts them per chunk, and updates chunks.visual_stats.
|
||||
"""
|
||||
|
||||
import json
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
from collections import Counter
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "localhost",
|
||||
"user": "accusys",
|
||||
"dbname": "momentry",
|
||||
}
|
||||
|
||||
|
||||
def get_chunks_to_process(conn, schema="public"):
|
||||
"""Fetch all chunks that need visual_stats processing."""
|
||||
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||
# We check both public and dev chunks
|
||||
cur.execute(f"""
|
||||
SELECT id, uuid, start_time, end_time
|
||||
FROM {schema}.chunks
|
||||
WHERE (visual_stats IS NULL OR visual_stats = '{{}}'::jsonb)
|
||||
""")
|
||||
return cur.fetchall()
|
||||
|
||||
|
||||
def get_yolo_stats_for_range(conn, uuid, start_time, end_time, schema="public"):
|
||||
"""Aggregate YOLO object counts for a specific time range."""
|
||||
# We need to find file_id for the given uuid
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f"SELECT id FROM {schema}.videos WHERE uuid = %s", (uuid,))
|
||||
row = cur.fetchone()
|
||||
if not row:
|
||||
return {}
|
||||
file_id = row[0]
|
||||
|
||||
# Fetch yolo_objects from frames in range
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT yolo_objects
|
||||
FROM {schema}.frames
|
||||
WHERE file_id = %s
|
||||
AND timestamp >= %s
|
||||
AND timestamp <= %s
|
||||
AND yolo_objects IS NOT NULL
|
||||
""",
|
||||
(file_id, start_time, end_time),
|
||||
)
|
||||
|
||||
objects = Counter()
|
||||
for (yolo_data,) in cur.fetchall():
|
||||
# yolo_data is a JSON list of objects: [{"class_name": "person", ...}, ...]
|
||||
if isinstance(yolo_data, str):
|
||||
try:
|
||||
yolo_data = json.loads(yolo_data)
|
||||
except:
|
||||
continue
|
||||
|
||||
if isinstance(yolo_data, list):
|
||||
for obj in yolo_data:
|
||||
class_name = obj.get("class_name")
|
||||
if class_name:
|
||||
objects[class_name] += 1
|
||||
|
||||
return dict(objects)
|
||||
|
||||
|
||||
def update_chunk_visual_stats(conn, chunk_id, stats, schema="public"):
|
||||
"""Update the visual_stats column for a chunk."""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"UPDATE {schema}.chunks SET visual_stats = %s::jsonb WHERE id = %s",
|
||||
(json.dumps(stats), chunk_id),
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
print("🚀 Starting visual stats generation...")
|
||||
|
||||
conn = psycopg2.connect(**DB_CONFIG)
|
||||
|
||||
for schema in ["public", "dev"]:
|
||||
print(f"📊 Processing schema: {schema}")
|
||||
chunks = get_chunks_to_process(conn, schema)
|
||||
print(f" Found {len(chunks)} chunks to process.")
|
||||
|
||||
processed_count = 0
|
||||
for chunk in chunks:
|
||||
chunk_id = chunk["id"]
|
||||
uuid = chunk["uuid"]
|
||||
start_time = chunk["start_time"]
|
||||
end_time = chunk["end_time"]
|
||||
|
||||
stats = get_yolo_stats_for_range(conn, uuid, start_time, end_time, schema)
|
||||
|
||||
# Update DB even if empty to mark as processed (avoid re-scanning)
|
||||
update_chunk_visual_stats(conn, chunk_id, stats, schema)
|
||||
|
||||
processed_count += 1
|
||||
if processed_count % 100 == 0:
|
||||
conn.commit()
|
||||
print(f" ✅ Processed {processed_count}/{len(chunks)} chunks...")
|
||||
|
||||
conn.commit()
|
||||
print(f"🎉 Done with {schema}! Processed {processed_count} chunks.")
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user