feat: service inventory, ERP reports, sqlite-vec integration, visualize tool

- Add SERVICE_INVENTORY_V1.0.0.md (25 source-verified tools, 3.7GB)
- Add ERP_SELECTION_REPORT.md (Odoo CE vs ERPNext comparison)
- Add SFTPGO_ODOO_REPLACEMENT.md (SFTPGo migration plan)
- Add SERVICE_GO_GITEA_BUILD.md (Go compiler + Gitea build report)
- Add release visualize command (face trace heatmap + identity filter)
- Add sqlite-vec integration (160MB SQLite with vec0 vector tables)
- Add export_identities.py, export_sqlite.py, render_face_heatmap.py
- Add Go, Gitea, Rust/Cargo, Swift, yt-dlp, SQLite, sqlite-vec to service CLI
- Fix package to include identities and identity_bindings in data.sql
- Update release list to show all deployed video stats
- Add V1.0.0 YAML frontmatter to all docs (DOCS_STANDARD compliant)
This commit is contained in:
Accusys
2026-05-13 02:37:45 +08:00
parent cac60c6093
commit 2992a0e650
25 changed files with 6076 additions and 3 deletions

161
scripts/embed_faces.py Normal file
View File

@@ -0,0 +1,161 @@
#!/opt/homebrew/bin/python3.11
"""
Process Swift face detection output + add CoreML FaceNet embeddings.
Replaces face_processor.py Step 2 when Swift already ran.
"""
import sys, os, json, argparse, time
import cv2
import numpy as np
import coremltools as ct
from pathlib import Path
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
FACENET_PATH = os.path.join(SCRIPT_DIR, "..", "models", "facenet512.mlpackage")
def classify_pose(roll, yaw):
abs_yaw = abs(yaw)
abs_roll = abs(roll)
if abs_yaw < 15 and abs_roll < 15:
return "frontal"
elif abs_yaw > 30:
return "profile_right" if yaw > 0 else "profile_left"
else:
return "three_quarter"
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--swift-json", required=True, help="Swift detection output")
parser.add_argument("--video", required=True, help="Video file path")
parser.add_argument("--output", required=True, help="Output face.json path")
parser.add_argument("--fps", type=float, default=24.0)
args = parser.parse_args()
print(f"[EMBED] Loading Swift output: {args.swift_json}")
with open(args.swift_json) as f:
swift = json.load(f)
swift_frames = swift.get("frames", [])
print(f"[EMBED] Swift frames: {len(swift_frames)}")
# Load CoreML FaceNet
facenet = os.path.normpath(FACENET_PATH)
coreml_model = None
if os.path.exists(facenet):
coreml_model = ct.models.MLModel(facenet)
print(f"[EMBED] FaceNet loaded")
else:
print(f"[EMBED] WARNING: FaceNet not found at {facenet}")
# Open video
video = cv2.VideoCapture(args.video)
if not video.isOpened():
raise RuntimeError(f"Cannot open {args.video}")
v_fps = video.get(cv2.CAP_PROP_FPS)
v_total = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
v_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
v_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f"[EMBED] Video: {v_width}x{v_height}, {v_fps:.1f}fps")
# Sequential read optimization: build lookup set
needed_frames = set()
frame_data_map = {}
for sf in swift_frames:
fn = int(sf.get("frame", sf.get("frame_number", 0)))
needed_frames.add(fn)
frame_data_map[fn] = sf
output_frames = []
embed_count = 0
t0 = time.time()
current_frame = 0
while True:
ret, frame = video.read()
if not ret:
break
if current_frame not in needed_frames:
current_frame += 1
continue
sf = frame_data_map[current_frame]
timestamp = sf.get("timestamp", current_frame / v_fps)
faces_in = sf.get("faces", [])
processed_faces = []
for face in faces_in:
bb = face.get("bbox", {})
x, y, w, h = bb.get("x", 0), bb.get("y", 0), bb.get("width", 0), bb.get("height", 0)
if w <= 10 or h <= 10:
continue
x1, y1 = max(0, x), max(0, y)
x2, y2 = min(v_width, x + w), min(v_height, y + h)
if x2 <= x1 or y2 <= y1:
continue
face_img = frame[y1:y2, x1:x2]
if face_img.size == 0:
continue
emb = None
if coreml_model is not None and face_img.shape[0] > 0 and face_img.shape[1] > 0:
try:
resized = cv2.resize(face_img, (160, 160))
rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB).astype(np.float32)
normalized = rgb / 127.5 - 1.0
input_data = np.expand_dims(np.transpose(normalized, (2, 0, 1)), axis=0)
result = coreml_model.predict({"input": input_data})
emb = list(result.values())[0].flatten().tolist()
embed_count += 1
except Exception as e:
pass
# Pose
pose_info = face.get("pose", {})
pose_angle = classify_pose(pose_info.get("roll", 0), pose_info.get("yaw", 0))
processed_faces.append({
"x": x, "y": y, "width": w, "height": h,
"confidence": face.get("confidence", 0.5),
"embedding": emb,
"pose_angle": {
"angle": pose_angle,
"roll": pose_info.get("roll", 0),
"yaw": pose_info.get("yaw", 0),
"pitch": pose_info.get("pitch", 0),
},
"lips": face.get("lips"),
"landmarks": face.get("landmarks"),
"attributes": None,
})
if processed_faces:
output_frames.append({
"frame": current_frame,
"timestamp": timestamp,
"faces": processed_faces,
})
current_frame += 1
if len(output_frames) % 500 == 0:
print(f"[EMBED] {len(output_frames)}/{len(needed_frames)} frames, {embed_count} embeddings, {time.time()-t0:.0f}s")
video.release()
output = {
"frame_count": len(output_frames),
"fps": v_fps,
"frames": output_frames,
}
os.makedirs(os.path.dirname(args.output), exist_ok=True)
with open(args.output, "w") as f:
json.dump(output, f, indent=2, ensure_ascii=False)
elapsed = time.time() - t0
print(f"[EMBED] Done: {len(output_frames)} frames, {embed_count} embeddings, {elapsed:.0f}s → {args.output}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,131 @@
#!/opt/homebrew/bin/python3.11
"""
Export a single file's data to SQL file (COPY format).
Usage: python3 export_file_package.py <file_uuid> <output_dir>
"""
import json, os, sys, subprocess
PG_BIN = "/Users/accusys/pgsql/18.3/bin"
DB_URL = "postgresql://accusys@localhost:5432/momentry"
TABLES = [
("dev.videos", "file_uuid"),
("dev.chunk", "file_uuid"),
("dev.chunk_vectors", "uuid"),
("dev.face_detections", "file_uuid"),
]
def main():
uuid = sys.argv[1] if len(sys.argv) > 1 else "aeed71342a899fe4b4c57b7d41bcb692"
outdir = sys.argv[2] if len(sys.argv) > 2 else "/tmp/file_pkg"
os.makedirs(outdir, exist_ok=True)
sql_path = os.path.join(outdir, "data.sql")
print(f"Exporting {uuid}{sql_path}")
with open(sql_path, "w") as f:
f.write(f"-- File package: {uuid}\nBEGIN;\n\n")
for tbl, col in TABLES:
f.write(f"-- {tbl} WHERE {col} = '{uuid}'\n")
# Get column list
schema, table = tbl.split(".")
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", f"SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='{schema}' AND table_name='{table}' AND is_updatable='YES'"],
capture_output=True, text=True, timeout=15)
cols = r.stdout.strip()
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
f"COPY (SELECT * FROM {tbl} WHERE {col} = '{uuid}') TO STDOUT WITH CSV HEADER"],
capture_output=True, text=True, timeout=60)
if r.stdout.strip():
f.write(f"COPY {tbl} ({cols}) FROM STDIN WITH CSV HEADER;\n")
f.write(r.stdout)
if not r.stdout.endswith("\n"):
f.write("\n")
f.write("\\.\n\n")
# Export identities referenced by this file's face_detections
f.write(f"-- dev.identities (referenced by face_detections WHERE file_uuid='{uuid}')\n")
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", "SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='dev' AND table_name='identities' AND is_updatable='YES'"],
capture_output=True, text=True, timeout=15)
cols = r.stdout.strip()
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
f"COPY (SELECT DISTINCT i.* FROM dev.identities i INNER JOIN dev.face_detections fd ON fd.identity_id = i.id WHERE fd.file_uuid = '{uuid}') TO STDOUT WITH CSV HEADER"],
capture_output=True, text=True, timeout=60)
if r.stdout.strip():
f.write(f"COPY dev.identities ({cols}) FROM STDIN WITH CSV HEADER;\n")
f.write(r.stdout)
if not r.stdout.endswith("\n"):
f.write("\n")
f.write("\\.\n\n")
# Export identity_bindings for identities referenced by this file
f.write(f"-- dev.identity_bindings (for identities in face_detections WHERE file_uuid='{uuid}')\n")
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", "SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='dev' AND table_name='identity_bindings' AND is_updatable='YES'"],
capture_output=True, text=True, timeout=15)
cols = r.stdout.strip()
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
f"COPY (SELECT DISTINCT ib.* FROM dev.identity_bindings ib INNER JOIN dev.face_detections fd ON fd.identity_id = ib.identity_id WHERE fd.file_uuid = '{uuid}') TO STDOUT WITH CSV HEADER"],
capture_output=True, text=True, timeout=60)
if r.stdout.strip():
f.write(f"COPY dev.identity_bindings ({cols}) FROM STDIN WITH CSV HEADER;\n")
f.write(r.stdout)
if not r.stdout.endswith("\n"):
f.write("\n")
f.write("\\.\n\n")
f.write("COMMIT;\n")
size = os.path.getsize(sql_path)
print(f" {sql_path} ({size/1024/1024:.1f} MB)")
# Copy video file to package
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", f"SELECT file_path FROM dev.videos WHERE file_uuid='{uuid}'"],
capture_output=True, text=True, timeout=15)
video_path = r.stdout.strip()
if video_path and os.path.exists(video_path):
video_name = os.path.basename(video_path)
dest = os.path.join(outdir, video_name)
import shutil
shutil.copy2(video_path, dest)
vsize = os.path.getsize(dest)
print(f" {video_name} ({vsize/1024/1024:.0f} MB)")
else:
print(f" WARNING: video file not found at {video_path}")
# file_info.json
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", f"SELECT json_build_object('file_uuid', file_uuid, 'file_name', file_name, 'duration', duration, 'fps', fps, 'width', width, 'height', height, 'total_frames', total_frames, 'status', status) FROM dev.videos WHERE file_uuid='{uuid}'"],
capture_output=True, text=True, timeout=15)
if r.stdout.strip():
info = json.loads(r.stdout.strip())
with open(os.path.join(outdir, "file_info.json"), "w") as f:
json.dump(info, f, indent=2)
print(f" file_info.json")
# Export identities.json (for offline analysis)
id_path = os.path.join(outdir, f"{uuid}.identities.json")
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", f"SELECT json_build_object('file_uuid', file_uuid) FROM dev.videos WHERE file_uuid='{uuid}'"],
capture_output=True, text=True, timeout=15)
subprocess.run(
["/opt/homebrew/bin/python3.11", os.path.join(os.path.dirname(os.path.abspath(__file__)), "export_identities.py"), uuid, id_path],
check=False, timeout=60)
if os.path.exists(id_path):
print(f" {uuid}.identities.json ({os.path.getsize(id_path)/1024:.0f}KB)")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,74 @@
#!/opt/homebrew/bin/python3.11
"""
Export identity data for a video UUID as JSON (for offline analysis).
Usage: python3 export_identities.py <file_uuid> [output.json]
"""
import sys, json, psycopg2
UUID = sys.argv[1] if len(sys.argv) > 1 else "aeed71342a899fe4b4c57b7d41bcb692"
OUT = sys.argv[2] if len(sys.argv) > 2 else f"/Users/accusys/momentry/output_dev/{UUID}.identities.json"
conn = psycopg2.connect("dbname=momentry user=accusys")
cur = conn.cursor()
# Get identities referenced by this file's face_detections
cur.execute("""
SELECT DISTINCT i.id, i.name, i.uuid, i.identity_type, i.source, i.status,
i.face_embedding, i.voice_embedding, i.reference_data, i.tmdb_id, i.tmdb_profile
FROM dev.identities i
INNER JOIN dev.face_detections fd ON fd.identity_id = i.id
WHERE fd.file_uuid = %s
ORDER BY i.id
""", (UUID,))
rows = cur.fetchall()
identities = []
for r in rows:
identities.append({
"id": r[0],
"name": r[1],
"uuid": str(r[2]) if r[2] else None,
"identity_type": r[3],
"source": r[4],
"status": r[5],
"tmdb_id": r[9],
"tmdb_profile": r[10],
})
# Get identity_bindings for these identities' traces
cur.execute("""
SELECT DISTINCT ib.identity_id, ib.identity_type, ib.identity_value, ib.confidence
FROM dev.identity_bindings ib
WHERE ib.identity_id IN (
SELECT DISTINCT fd.identity_id FROM dev.face_detections fd WHERE fd.file_uuid = %s
)
""", (UUID,))
bindings = [{"identity_id": r[0], "identity_type": r[1], "identity_value": r[2], "confidence": float(r[3])} for r in cur.fetchall()]
# Get trace-to-identity mapping from face_detections
cur.execute("""
SELECT DISTINCT trace_id, identity_id, COUNT(*) as face_count
FROM dev.face_detections
WHERE file_uuid = %s AND identity_id IS NOT NULL AND trace_id IS NOT NULL
GROUP BY trace_id, identity_id ORDER BY trace_id
""", (UUID,))
trace_map = [{"trace_id": r[0], "identity_id": r[1], "face_count": r[2]} for r in cur.fetchall()]
cur.close(); conn.close()
output = {
"file_uuid": UUID,
"identity_count": len(identities),
"binding_count": len(bindings),
"trace_mapping_count": len(trace_map),
"identities": identities,
"bindings": bindings,
"trace_to_identity": trace_map,
}
with open(OUT, 'w') as f:
json.dump(output, f, indent=2, ensure_ascii=False)
size_kb = len(json.dumps(output)) / 1024
print(f"Exported {len(identities)} identities, {len(bindings)} bindings, {len(trace_map)} trace mappings")
print(f"{OUT} ({size_kb:.0f}KB)")

238
scripts/export_sqlite.py Normal file
View File

@@ -0,0 +1,238 @@
#!/opt/homebrew/bin/python3.11
"""
Export a video's data to a self-contained SQLite database for offline app use.
Uses sqlite-vec extension for native vector storage.
The vec0.dylib must be in the script directory or /tmp/.
Usage: python3 export_sqlite.py <file_uuid> [output.sqlite]
"""
import sys, json, sqlite3, psycopg2, os
UUID = sys.argv[1] if len(sys.argv) > 1 else "aeed71342a899fe4b4c57b7d41bcb692"
OUT = sys.argv[2] if len(sys.argv) > 2 else f"/Users/accusys/momentry/output_dev/{UUID}.sqlite"
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
# Find vec0.dylib
VEC_DYLIB = None
for path in [
os.path.join(SCRIPT_DIR, "vec0.dylib"),
"/tmp/vec0.dylib",
os.path.join(SCRIPT_DIR, "sqlite-vec", "vec0.dylib"),
]:
if os.path.exists(path):
VEC_DYLIB = path
break
print(f"Exporting {UUID}{OUT}")
if VEC_DYLIB:
print(f" sqlite-vec: {VEC_DYLIB}")
# Connect to PostgreSQL
pg = psycopg2.connect("dbname=momentry user=accusys")
pg_cur = pg.cursor()
# Connect to SQLite
if os.path.exists(OUT):
os.remove(OUT)
lite = sqlite3.connect(OUT)
# Load sqlite-vec extension if available
if VEC_DYLIB:
lite.enable_load_extension(True)
try:
lite.load_extension(VEC_DYLIB)
print(" sqlite-vec extension loaded")
except Exception as e:
print(f" WARNING: Could not load sqlite-vec: {e}")
lite.enable_load_extension(False)
lite_cur = lite.cursor()
# ---- Helper ----
def pg_to_sqlite(pg_query, lite_table, lite_schema, params=None, transform=None):
"""Copy PostgreSQL query result to SQLite table."""
lite_cur.execute(lite_schema)
pg_cur.execute(pg_query, params or [])
rows = pg_cur.fetchall()
if not rows:
return 0
cols = [d[0] for d in pg_cur.description]
placeholders = ",".join(["?" for _ in cols])
count = 0
for row in rows:
d = dict(zip(cols, row))
if transform:
d = transform(d)
vals = []
for c in cols:
v = d.get(c)
vals.append(None if v is None else v)
try:
lite_cur.execute(f"INSERT INTO {lite_table} VALUES ({placeholders})", vals)
count += 1
except Exception:
pass
lite.commit()
return count
# Create tables (skip WAL — Python sqlite3 may not support PRAGMA with extensions loaded)
print("Creating tables...")
# videos
pg_to_sqlite(
"SELECT file_uuid, file_name, file_path, duration, fps, width, height, probe_json::text, status FROM dev.videos WHERE file_uuid=%s",
"videos",
"CREATE TABLE IF NOT EXISTS videos (file_uuid TEXT PRIMARY KEY, file_name TEXT, file_path TEXT, duration REAL, fps REAL, width INTEGER, height INTEGER, probe_json TEXT, status TEXT)",
[UUID])
# chunk
pg_to_sqlite(
"SELECT file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, metadata->>'speaker_id' as speaker_id FROM dev.chunk WHERE file_uuid=%s AND chunk_type='sentence' ORDER BY chunk_id",
"chunk",
"""CREATE TABLE IF NOT EXISTS chunk (
file_uuid TEXT, chunk_id TEXT, chunk_type TEXT,
start_time REAL, end_time REAL, fps REAL,
start_frame INTEGER, end_frame INTEGER, text_content TEXT, speaker_id TEXT,
PRIMARY KEY(file_uuid, chunk_id))""",
[UUID])
def parse_pg_array(text):
"""Parse PostgreSQL array format {0.1,0.2,...} to Python list."""
if not text or text == 'null':
return None
try:
text = text.strip('{}')
return [float(x) for x in text.split(',') if x.strip()]
except:
return None
# chunk vectors → vec0 virtual table
print(" Creating vec0 table: chunk_embeddings (768D)...")
lite_cur.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS chunk_embeddings USING vec0(
embedding float[768]
)
""")
pg_cur.execute("SELECT chunk_id, COALESCE(embedding::text, 'null'), uuid FROM dev.chunk_vectors WHERE uuid=%s", [UUID])
chunk_vecs = pg_cur.fetchall()
if chunk_vecs:
for chunk_id, emb_text, _ in chunk_vecs:
# chunk_vectors uses JSONB format, not PG array format
emb = None
try:
emb = json.loads(emb_text) if emb_text else None
except:
pass
if not emb:
emb = parse_pg_array(emb_text) # fallback
if emb and len(emb) == 768:
lite_cur.execute(
"INSERT INTO chunk_embeddings (rowid, embedding) VALUES (?, ?)",
[int(chunk_id) if chunk_id.isdigit() else hash(chunk_id) & 0x7fffffff,
json.dumps(emb)])
lite.commit()
print(f" chunk_embeddings: {len(chunk_vecs)} vectors")
# face detections
def transform_face(row):
return row # embedding moved to vec0 table
pg_to_sqlite(
"""SELECT file_uuid, face_id, frame_number, x, y, width, height, confidence,
identity_id, trace_id,
COALESCE(timestamp_secs, frame_number / 25.0) as timestamp_secs
FROM dev.face_detections WHERE file_uuid=%s ORDER BY frame_number""",
"face_detections",
"""CREATE TABLE IF NOT EXISTS face_detections (
file_uuid TEXT, face_id TEXT, frame_number INTEGER,
x INTEGER, y INTEGER, width INTEGER, height INTEGER,
confidence REAL, identity_id INTEGER, trace_id INTEGER,
timestamp_secs REAL)""",
[UUID], transform_face)
# face embeddings → vec0 virtual table (512D)
print(" Creating vec0 table: face_embeddings (512D)...")
lite_cur.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS face_embeddings USING vec0(
embedding float[512]
)
""")
pg_cur.execute("SELECT id, COALESCE(embedding::text, 'null') FROM dev.face_detections WHERE file_uuid=%s", [UUID])
face_vecs = pg_cur.fetchall()
if face_vecs:
batch = []
for db_id, emb_text in face_vecs:
emb = parse_pg_array(emb_text)
if emb and len(emb) == 512:
batch.append((db_id, json.dumps(emb)))
if len(batch) >= 500:
lite_cur.executemany("INSERT INTO face_embeddings VALUES (?, ?)", batch)
batch = []
if batch:
lite_cur.executemany("INSERT INTO face_embeddings VALUES (?, ?)", batch)
lite.commit()
print(f" face_embeddings: {len(face_vecs)} vectors")
# identities
def transform_identity(row):
return row
pg_to_sqlite(
"""SELECT DISTINCT i.id, i.name, i.uuid, i.identity_type, i.source, i.status,
i.tmdb_id, i.tmdb_profile, i.tmdb_poster
FROM dev.identities i
INNER JOIN dev.face_detections fd ON fd.identity_id = i.id
WHERE fd.file_uuid=%s""",
"identities",
"""CREATE TABLE IF NOT EXISTS identities (
id INTEGER PRIMARY KEY, name TEXT, uuid TEXT, identity_type TEXT,
source TEXT, status TEXT, tmdb_id INTEGER,
tmdb_profile TEXT, tmdb_poster TEXT)""",
[UUID], transform_identity)
# identity_bindings
pg_to_sqlite(
"""SELECT DISTINCT ib.identity_id, ib.identity_type, ib.identity_value, ib.confidence
FROM dev.identity_bindings ib
INNER JOIN dev.face_detections fd ON fd.identity_id = ib.identity_id
WHERE fd.file_uuid=%s""",
"identity_bindings",
"CREATE TABLE IF NOT EXISTS identity_bindings (identity_id INTEGER, identity_type TEXT, identity_value TEXT, confidence REAL)",
[UUID])
# ---- Create indexes ----
print("Creating indexes...")
lite_cur.execute("CREATE INDEX IF NOT EXISTS idx_fd_trace ON face_detections(trace_id)")
lite_cur.execute("CREATE INDEX IF NOT EXISTS idx_fd_identity ON face_detections(identity_id)")
lite_cur.execute("CREATE INDEX IF NOT EXISTS idx_fd_frame ON face_detections(frame_number)")
lite_cur.execute("CREATE INDEX IF NOT EXISTS idx_fd_time ON face_detections(timestamp_secs)")
lite_cur.execute("CREATE INDEX IF NOT EXISTS idx_chunk_chunkid ON chunk(chunk_id)")
lite.commit()
# ---- Stats ----
pg_cur.close(); pg.close()
lite_cur.close(); lite.close()
size_mb = os.path.getsize(OUT) / 1024 / 1024
print(f"\n {OUT} ({size_mb:.0f}MB)")
# Verify
lite = sqlite3.connect(OUT)
if VEC_DYLIB:
lite.enable_load_extension(True)
lite.load_extension(VEC_DYLIB)
lite.enable_load_extension(False)
c = lite.cursor()
for tbl in ['videos', 'chunk', 'face_detections', 'identities', 'identity_bindings']:
c.execute(f"SELECT COUNT(*) FROM {tbl}")
print(f" {tbl}: {c.fetchone()[0]} rows")
# Check vec tables
try:
c.execute("SELECT COUNT(*) FROM chunk_embeddings")
print(f" chunk_embeddings (vec0, 768D): {c.fetchone()[0]} vectors")
except: print(" chunk_embeddings: N/A")
try:
c.execute("SELECT COUNT(*) FROM face_embeddings")
print(f" face_embeddings (vec0, 512D): {c.fetchone()[0]} vectors")
except: print(" face_embeddings: N/A")
c.close(); lite.close()

View File

@@ -49,7 +49,7 @@ def classify_pose(roll: float, yaw: float) -> str:
class FaceProcessorVision:
def __init__(self, video_path: str, output_path: str, uuid: str = "",
sample_interval: int = 30):
sample_interval: int = 3):
self.video_path = video_path
self.output_path = output_path
self.uuid = uuid
@@ -205,7 +205,7 @@ class FaceProcessorVision:
"pitch": pose_info.get("pitch", 0),
},
"lips": face.get("lips"),
"landmarks": None,
"landmarks": face.get("landmarks"),
"attributes": None,
})
@@ -255,7 +255,7 @@ def main():
parser.add_argument("video_path", help="Video file path")
parser.add_argument("output_path", help="Output JSON path")
parser.add_argument("--uuid", "-u", default="")
parser.add_argument("--sample-interval", type=int, default=30)
parser.add_argument("--sample-interval", type=int, default=3)
parser.add_argument("--force", action="store_true")
args = parser.parse_args()

129
scripts/identity_bind.py Normal file
View File

@@ -0,0 +1,129 @@
#!/opt/homebrew/bin/python3.11
"""
Identity Binding: cluster face traces → identity bindings.
Uses face embeddings from face_detections, clusters per trace, creates identities.
"""
import json, sys, time
import psycopg2
import numpy as np
from sklearn.cluster import AgglomerativeClustering
UUID = sys.argv[1] if len(sys.argv) > 1 else "23b1c872379d4ec06479e5ed39eef4c5"
DB = "dbname=momentry user=accusys"
DISTANCE_THRESHOLD = 0.55 # Cosine distance threshold for clustering
print(f"=== Identity Binding for {UUID} ===")
conn = psycopg2.connect(DB)
cur = conn.cursor()
# Step 1: Get trace embeddings from face_detections
print("Loading face trace data...")
cur.execute("""
SELECT trace_id, embedding
FROM dev.face_detections
WHERE file_uuid = %s AND trace_id IS NOT NULL AND embedding IS NOT NULL
ORDER BY trace_id, id
""", (UUID,))
rows = cur.fetchall()
print(f"Face detections with embeddings: {len(rows)}")
# Group by trace_id and compute average embedding
trace_embs = {}
for trace_id, emb in rows:
if trace_id not in trace_embs:
trace_embs[trace_id] = []
trace_embs[trace_id].append(emb)
print(f"Unique traces: {len(trace_embs)}")
# Compute mean embeddings per trace
trace_ids = []
trace_vectors = []
for tid, embs in sorted(trace_embs.items()):
mean_emb = np.mean(embs, axis=0)
mean_emb = mean_emb / (np.linalg.norm(mean_emb) + 1e-10)
trace_ids.append(tid)
trace_vectors.append(mean_emb)
X = np.array(trace_vectors)
print(f"Trace vectors shape: {X.shape}")
# Step 2: Cluster traces
print("Clustering traces...")
if len(X) > 1:
clustering = AgglomerativeClustering(
n_clusters=None,
distance_threshold=DISTANCE_THRESHOLD,
metric='cosine',
linkage='average'
)
labels = clustering.fit_predict(X)
else:
labels = [0]
n_clusters = len(set(labels))
print(f"Clusters/identities: {n_clusters}")
# Step 3: Get or create identity records
print("Creating identity records...")
# Get existing identities
cur.execute("SELECT id, uuid FROM dev.identities")
existing = {row[0]: row[1] for row in cur.fetchall()}
# Map cluster -> identity_id
cluster_to_identity = {}
for cluster_id in sorted(set(labels)):
# Create new identity
identity_uuid = None
cur.execute("""
INSERT INTO dev.identities (name, identity_type, source, status, created_at)
VALUES (%s, 'face', 'auto', 'active', NOW())
ON CONFLICT (name) DO UPDATE SET status = 'active'
RETURNING id
""", (f"PERSON_{UUID[:8]}_{cluster_id}",))
identity_id = cur.fetchone()[0]
cluster_to_identity[cluster_id] = identity_id
print(f" Cluster {cluster_id}: new identity {identity_id} (PERSON_{cluster_id})")
# Step 4: Create identity bindings
print("Creating identity bindings...")
bindings = 0
for tid, label in zip(trace_ids, labels):
identity_id = cluster_to_identity[label]
# Get a representative face_id for this trace
cur.execute("""
SELECT face_id FROM dev.face_detections
WHERE file_uuid = %s AND trace_id = %s
LIMIT 1
""", (UUID, tid))
row = cur.fetchone()
if row:
face_id = row[0]
# Create binding
cur.execute("""
INSERT INTO dev.identity_bindings (identity_id, identity_type, identity_value, confidence, created_at)
VALUES (%s, 'trace', %s, 0.8, NOW())
ON CONFLICT DO NOTHING
""", (identity_id, str(tid)))
bindings += 1
# Also update face_detection with identity_id
cur.execute("""
UPDATE dev.face_detections SET identity_id = %s
WHERE file_uuid = %s AND trace_id = %s
""", (identity_id, UUID, tid))
conn.commit()
print(f"Created {bindings} identity bindings for {n_clusters} identities")
# Summary
print(f"\n=== Summary ===")
cur.execute("SELECT COUNT(*) FROM dev.identities WHERE source = 'auto'")
print(f"Total auto-generated identities: {cur.fetchone()[0]}")
cur.execute("SELECT COUNT(*) FROM dev.identity_bindings")
print(f"Total identity bindings: {cur.fetchone()[0]}")
cur.close()
conn.close()
print("=== Done ===")

48
scripts/insert_chunks.py Normal file
View File

@@ -0,0 +1,48 @@
#!/opt/homebrew/bin/python3.11
"""Insert sentence chunks from transcribe.py output into dev.chunk table."""
import json, sys
import psycopg2
DB = "dbname=momentry user=accusys"
UUID = sys.argv[1] if len(sys.argv) > 1 else "23b1c872379d4ec06479e5ed39eef4c5"
ASR_PATH = f"/Users/accusys/momentry/output_dev/{UUID}.asr.json"
FPS = 23.976023976023978
with open(ASR_PATH) as f:
asr = json.load(f)
segments = asr.get("segments", [])
print(f"Inserting {len(segments)} sentence chunks for {UUID}...")
conn = psycopg2.connect(DB)
cur = conn.cursor()
inserted = 0
for seg in segments:
chunk_id = seg["chunk_id"]
start_time = seg["start_time"]
end_time = seg["end_time"]
start_frame = int(start_time * FPS)
end_frame = int(end_time * FPS)
text = seg.get("text", "")
speaker_change = seg.get("speaker_change", False)
content = json.dumps({
"source": "transcribe",
"speaker_change": speaker_change,
"pass1_index": seg.get("pass1_index", 0),
})
cur.execute("""
INSERT INTO dev.chunk (file_uuid, chunk_id, chunk_type, start_time, end_time,
start_frame, end_frame, fps, text_content, content, created_at)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s::jsonb, NOW())
ON CONFLICT (file_uuid, chunk_id) DO NOTHING
""", (UUID, chunk_id, "sentence", start_time, end_time,
start_frame, end_frame, FPS, text, content))
inserted += 1
conn.commit()
cur.close()
conn.close()
print(f"Done: {inserted} chunks inserted")

344
scripts/release_manager.py Normal file
View File

@@ -0,0 +1,344 @@
#!/opt/homebrew/bin/python3.11
"""
Release Manager - Deploy / Undeploy video release packages.
Usage:
python3 release_manager.py deploy <package.tar.gz>
python3 release_manager.py undeploy <file_uuid>
python3 release_manager.py list
python3 release_manager.py package <file_uuid> # Create new release package
"""
import json, os, sys, shutil, subprocess, tarfile, tempfile, argparse, time
import psycopg2
from urllib.request import Request, urlopen
PG_BIN = "/Users/accusys/pgsql/18.3/bin"
DB = "dbname=momentry user=accusys"
QDRANT = "http://localhost:6333"
DEMO_DIR = "/Users/accusys/momentry/var/sftpgo/data/demo"
OUTPUT_DIR = "/Users/accusys/momentry/output_dev"
RELEASE_DIR = "/Users/accusys/momentry_core_0.1/release/files"
# ---- Helpers ----
def psql_cmd(sql, db=DB):
"""Run a SQL command via psql."""
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A", "-c", sql],
capture_output=True, text=True, timeout=30)
return r.stdout.strip()
def pg_execute(sql, params=None):
"""Execute SQL via psycopg2."""
conn = psycopg2.connect(DB)
cur = conn.cursor()
if params:
cur.execute(sql, params)
else:
cur.execute(sql)
conn.commit()
cur.close()
conn.close()
def pg_query(sql, params=None):
"""Query via psycopg2."""
conn = psycopg2.connect(DB)
cur = conn.cursor()
if params:
cur.execute(sql, params)
else:
cur.execute(sql)
rows = cur.fetchall()
cur.close()
conn.close()
return rows
def qdrant_delete_points(uuid, collection):
"""Delete points from Qdrant collection by payload filter."""
try:
req = Request(f"{QDRANT}/collections/{collection}/points/delete",
data=json.dumps({
"filter": {"must": [{"key": "file_uuid", "match": {"value": uuid}}]}
}).encode(),
headers={"Content-Type": "application/json"}, method="POST")
urlopen(req)
return True
except:
return False
# ---- Deploy ----
def cmd_deploy(tarball_path):
"""Deploy a release package."""
if not os.path.exists(tarball_path):
print(f"ERROR: {tarball_path} not found")
return 1
t0 = time.time()
print(f"=== Deploy: {os.path.basename(tarball_path)} ===")
# 1. Extract
tmpdir = tempfile.mkdtemp(prefix="release_deploy_")
print(f"Extracting to {tmpdir}...")
with tarfile.open(tarball_path) as tar:
tar.extractall(tmpdir)
# Find UUID from directory name or file_info.json
uuid = None
for item in os.listdir(tmpdir):
info_path = os.path.join(tmpdir, item, "file_info.json")
if os.path.exists(info_path):
with open(info_path) as f:
info = json.load(f)
uuid = info.get("file_uuid", "")
break
if not uuid:
print("ERROR: Could not find file_info.json with UUID")
return 1
pkg_dir = os.path.join(tmpdir, uuid)
print(f"UUID: {uuid}")
# 2. Import data.sql
sql_path = os.path.join(pkg_dir, "data.sql")
if os.path.exists(sql_path):
print(f"Importing data.sql ({os.path.getsize(sql_path)/1024/1024:.0f} MB)...")
r = subprocess.run([f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-f", sql_path],
capture_output=True, text=True, timeout=300)
if r.returncode != 0:
print(f"WARNING: SQL import may have issues")
print(r.stderr[-500:] if r.stderr else "")
else:
print("WARNING: data.sql not found in package")
# 3. Copy video to demo dir
for fname in os.listdir(pkg_dir):
fpath = os.path.join(pkg_dir, fname)
if fname.endswith(('.mp4', '.mov', '.avi', '.mkv')):
dest = os.path.join(DEMO_DIR, fname)
if not os.path.exists(dest):
shutil.copy2(fpath, dest)
print(f"Video: {fname}{DEMO_DIR}/")
else:
print(f"Video: {fname} already exists in demo dir")
# 4. Copy JSON outputs
for fname in os.listdir(pkg_dir):
if fname.endswith('.json'):
src = os.path.join(pkg_dir, fname)
dest = os.path.join(OUTPUT_DIR, fname)
shutil.copy2(src, dest)
print(f"Output files copied to {OUTPUT_DIR}/")
# 5. Verify deployment
rows = pg_query("SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = %s", (uuid,))
chunks = rows[0][0] if rows else 0
rows = pg_query("SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = %s", (uuid,))
faces = rows[0][0] if rows else 0
rows = pg_query("SELECT file_name, duration FROM dev.videos WHERE file_uuid = %s", (uuid,))
video_info = rows[0] if rows else ("?", "?")
elapsed = time.time() - t0
print(f"\n=== Deploy Complete ({elapsed:.0f}s) ===")
print(f" Video: {video_info[0]} ({float(video_info[1]):.0f}s)")
print(f" Chunks: {chunks}")
print(f" Face detections: {faces}")
shutil.rmtree(tmpdir, ignore_errors=True)
return 0
# ---- Undeploy ----
def cmd_undeploy(uuid):
"""Undeploy: remove all trace of a UUID from the system."""
print(f"=== Undeploy: {uuid} ===")
# Confirm
rows = pg_query("SELECT file_name FROM dev.videos WHERE file_uuid = %s", (uuid,))
if not rows:
print(f"ERROR: UUID {uuid} not found in DB")
return 1
filename = rows[0][0]
print(f"Video: {filename}")
print("This will DELETE all data for this video. Are you sure? (y/N): ", end="")
confirm = sys.stdin.readline().strip().lower()
if confirm != 'y':
print("Cancelled")
return 0
t0 = time.time()
# Get video path before deleting
rows = pg_query("SELECT file_path FROM dev.videos WHERE file_uuid = %s", (uuid,))
video_path = rows[0][0] if rows else ""
# 1. Delete DB data
tables = [
("dev.chunk", "file_uuid"),
("dev.chunk_vectors", "uuid"),
("dev.face_detections", "file_uuid"),
("dev.processor_results", "file_uuid"),
("dev.monitor_jobs", "uuid"),
("dev.pre_chunks", "file_uuid"),
]
for tbl, col in tables:
pg_execute(f"DELETE FROM {tbl} WHERE {col} = %s", (uuid,))
print(f" {tbl}: cleared")
pg_execute("DELETE FROM dev.videos WHERE file_uuid = %s", (uuid,))
print(f" dev.videos: removed")
# Clean orphaned identity bindings
pg_execute("DELETE FROM dev.identity_bindings WHERE identity_value NOT IN (SELECT face_id FROM dev.face_detections)")
# 2. Delete output files
for f in os.listdir(OUTPUT_DIR):
if f.startswith(uuid):
os.remove(os.path.join(OUTPUT_DIR, f))
print(f" Output files: removed")
# 3. Delete video from demo dir
if video_path and os.path.exists(video_path):
os.remove(video_path)
print(f" Video file: removed ({os.path.basename(video_path)})")
# 4. Clean Qdrant (skip - Qdrant points don't have easy UUID filter)
# Instead rely on upsert behavior
# 5. Delete release package
pkg_path = os.path.join(RELEASE_DIR, uuid)
if os.path.exists(pkg_path):
shutil.rmtree(pkg_path)
print(f" Release dir: removed")
for f in os.listdir(RELEASE_DIR):
if f.startswith(uuid):
os.remove(os.path.join(RELEASE_DIR, f))
print(f" Release file: {f} removed")
elapsed = time.time() - t0
print(f"\n=== Undeploy Complete ({elapsed:.0f}s) ===")
return 0
# ---- List ----
def cmd_list():
"""List deployed videos."""
rows = pg_query("""
SELECT file_uuid, file_name,
TO_CHAR((duration/60)::int, 'FM999"min"') as dur,
status,
(SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = v.file_uuid) as chunks,
(SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = v.file_uuid) as faces
FROM dev.videos v ORDER BY id DESC
""")
print(f"{'UUID':36s} {'Name':40s} {'Duration':8s} {'Status':10s} {'Chunks':>6s} {'Faces':>6s}")
print("-" * 120)
for r in rows:
uuid, name, dur, status, chunks, faces = r
short_name = (name or "")[:38] + ".." if len(name or "") > 40 else (name or "")
print(f"{uuid:36s} {short_name:40s} {dur or '?':8s} {status or '?':10s} {chunks or 0:>6d} {faces or 0:>6d}")
# ---- Package ----
def cmd_package(uuid):
"""Create a release package for a deployed video."""
print(f"=== Package: {uuid} ===")
# Check video exists
rows = pg_query("SELECT file_uuid, file_name, file_path FROM dev.videos WHERE file_uuid = %s", (uuid,))
if not rows:
print(f"ERROR: UUID {uuid} not found")
return 1
outdir = os.path.join(RELEASE_DIR, uuid)
shutil.rmtree(outdir, ignore_errors=True)
os.makedirs(outdir, exist_ok=True)
# Export data.sql
r = subprocess.run([f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", f"SELECT json_build_object('file_uuid', file_uuid, 'file_name', file_name, 'duration', duration, 'fps', fps, 'width', width, 'height', height, 'total_frames', total_frames, 'status', status) FROM dev.videos WHERE file_uuid='{uuid}'"],
capture_output=True, text=True, timeout=15)
if r.stdout.strip():
info = json.loads(r.stdout.strip())
with open(os.path.join(outdir, "file_info.json"), "w") as f:
json.dump(info, f, indent=2)
# Export SQL
sql_path = os.path.join(outdir, "data.sql")
with open(sql_path, "w") as f:
f.write(f"-- Release package: {uuid}\nBEGIN;\n\n")
for tbl, col in [("dev.videos", "file_uuid"), ("dev.chunk", "file_uuid"),
("dev.chunk_vectors", "uuid"), ("dev.face_detections", "file_uuid")]:
r = subprocess.run([f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
f"COPY (SELECT * FROM {tbl} WHERE {col} = '{uuid}') TO STDOUT WITH CSV HEADER"],
capture_output=True, text=True, timeout=60)
if r.stdout.strip():
# Get column names
schema, table = tbl.split(".")
r2 = subprocess.run([f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", f"SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='{schema}' AND table_name='{table}' AND is_updatable='YES'"],
capture_output=True, text=True, timeout=15)
cols = r2.stdout.strip()
f.write(f"COPY {tbl} ({cols}) FROM STDIN WITH CSV HEADER;\n")
f.write(r.stdout)
if not r.stdout.endswith("\n"):
f.write("\n")
f.write("\\.\n\n")
f.write("COMMIT;\n")
size = os.path.getsize(sql_path)
print(f" data.sql ({size/1024/1024:.0f} MB)")
# Copy video
video_path = rows[0][2]
if video_path and os.path.exists(video_path):
dest = os.path.join(outdir, os.path.basename(video_path))
shutil.copy2(video_path, dest)
print(f" {os.path.basename(video_path)} ({os.path.getsize(dest)/1024/1024:.0f} MB)")
# Copy output JSONs
for fname in os.listdir(OUTPUT_DIR):
if fname.startswith(uuid) and fname.endswith('.json'):
shutil.copy2(os.path.join(OUTPUT_DIR, fname), os.path.join(outdir, fname))
# tar.gz
tarball = os.path.join(RELEASE_DIR, f"{uuid}_v{int(time.time())}.tar.gz")
subprocess.run(["tar", "-czf", tarball, "-C", RELEASE_DIR, uuid], check=True, timeout=300)
tsize = os.path.getsize(tarball)
print(f" Package: {tarball} ({tsize/1024/1024:.0f} MB)")
return 0
# ---- Main ----
def main():
parser = argparse.ArgumentParser(description="Release Manager — deploy/undeploy/list video packages")
sub = parser.add_subparsers(dest="cmd")
p_deploy = sub.add_parser("deploy", help="Deploy a release package")
p_deploy.add_argument("tarball", help="Path to .tar.gz package")
p_undeploy = sub.add_parser("undeploy", help="Undeploy (remove all data for a UUID)")
p_undeploy.add_argument("uuid", help="File UUID")
p_list = sub.add_parser("list", help="List deployed videos")
p_package = sub.add_parser("package", help="Create release package for deployed video")
p_package.add_argument("uuid", help="File UUID")
args = parser.parse_args()
if args.cmd == "deploy":
sys.exit(cmd_deploy(args.tarball))
elif args.cmd == "undeploy":
sys.exit(cmd_undeploy(args.uuid))
elif args.cmd == "list":
cmd_list()
elif args.cmd == "package":
sys.exit(cmd_package(args.uuid))
else:
parser.print_help()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,222 @@
#!/opt/homebrew/bin/python3.11
"""Face Trace Heatmap + Timeline Visualization for Momentry.
Usage:
python3 render_face_heatmap.py <uuid> [output.html] [--identity ID]
"""
import sys, psycopg2, argparse
from collections import defaultdict
parser = argparse.ArgumentParser()
parser.add_argument("uuid")
parser.add_argument("output", nargs="?", default=None)
parser.add_argument("--identity", "-i", type=int, default=None, help="Filter by identity_id")
args = parser.parse_args()
UUID = args.uuid
OUT = args.output or f"/tmp/face_report_{UUID[:8]}.html"
IDENTITY = args.identity
conn = psycopg2.connect("dbname=momentry user=accusys")
cur = conn.cursor()
cur.execute("SELECT duration, file_name, COALESCE(fps, 25.0) FROM dev.videos WHERE file_uuid=%s", (UUID,))
row = cur.fetchone()
if not row:
print("UUID not found")
sys.exit(1)
duration, video_name, fps = float(row[0] or 6785), row[1] or UUID, float(row[2] or 25.0)
# Get sample interval from face.json metadata (or default 3 = 8Hz)
sample_interval = 3
hz = fps / sample_interval
# Build identity filter
identity_filter = ""
identity_params = [UUID]
identity_label = ""
identity_info = None # full identity record when filtered
top_identities = [] # top identities summary (all view)
if IDENTITY is not None:
identity_filter = " AND identity_id = %s"
identity_params.append(IDENTITY)
cur.execute("SELECT id, name, identity_type, source, status FROM dev.identities WHERE id=%s", (IDENTITY,))
id_row = cur.fetchone()
if id_row:
identity_info = {"id": id_row[0], "name": id_row[1], "type": id_row[2], "source": id_row[3], "status": id_row[4]}
identity_label = f" (identity: {id_row[1]})"
else:
identity_label = f" (identity #{IDENTITY})"
identity_params = [UUID, IDENTITY]
# Query trace spans
cur.execute(f"""
SELECT trace_id, MIN(frame_number), MAX(frame_number),
COALESCE(MIN(timestamp_secs), MIN(frame_number) / {fps}) as first_t,
COALESCE(MAX(timestamp_secs), MAX(frame_number) / {fps}) as last_t,
COUNT(*)
FROM dev.face_detections
WHERE file_uuid=%s AND trace_id IS NOT NULL{identity_filter}
GROUP BY trace_id ORDER BY first_t
""", identity_params)
trace_spans = cur.fetchall()
# Query density per time bucket (5s)
cur.execute(f"""
SELECT FLOOR(COALESCE(timestamp_secs, frame_number / {fps}) / 5)::int as bkt, COUNT(*) as cnt
FROM dev.face_detections
WHERE file_uuid=%s AND trace_id IS NOT NULL{identity_filter}
GROUP BY bkt ORDER BY bkt
""", identity_params)
density = {b: c for b, c in cur.fetchall()}
# Count total detections
cur.execute(f"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid=%s{identity_filter}", identity_params)
total_detections = cur.fetchone()[0]
# Get top identities (for all view) and trace↔identity mapping
if IDENTITY is None:
cur.execute("""
SELECT fd.identity_id, i.name, COUNT(*) as faces, COUNT(DISTINCT fd.trace_id) as traces
FROM dev.face_detections fd
LEFT JOIN dev.identities i ON i.id = fd.identity_id
WHERE fd.file_uuid=%s AND fd.identity_id IS NOT NULL
GROUP BY fd.identity_id, i.name ORDER BY faces DESC LIMIT 10
""", (UUID,))
top_identities = cur.fetchall()
else:
# Get trace→identity mapping for tooltip enrichment
cur.execute("""
SELECT DISTINCT fd.trace_id, i.name
FROM dev.face_detections fd
LEFT JOIN dev.identities i ON i.id = fd.identity_id
WHERE fd.file_uuid=%s AND fd.identity_id IS NOT NULL
""", (UUID,))
trace_to_identity = {r[0]: r[1] for r in cur.fetchall()}
cur.close(); conn.close()
BUCKET = 5
num_buckets = int(duration / BUCKET) + 1
max_density = max(density.values()) if density else 1
def build_html():
h = []
h.append('<!DOCTYPE html><html><head><meta charset="utf-8"><title>Face Trace Report</title>')
h.append('<style>')
h.append('body{font-family:-apple-system,BlinkMacSystemFont,sans-serif;margin:20px;background:#0d1117;color:#c9d1d9}')
h.append('h1,h2{color:#e94560}')
h.append('.stats{display:flex;gap:12px;margin:8px 0;flex-wrap:wrap}')
h.append('.stat{background:#161b22;padding:6px 14px;border-radius:6px}')
h.append('.stat .num{font-size:20px;font-weight:bold;color:#e94560}')
h.append('.stat .label{font-size:10px;color:#8b949e}')
h.append('.viz{position:relative;background:#0d1117;border:1px solid #30363d;margin:8px 0;overflow:hidden}')
h.append('.bar{display:block;position:absolute;height:3px;background:#e94560;opacity:0.7;border-radius:1px}')
h.append('.bar:hover{height:8px;opacity:1}')
h.append('</style></head><body>')
sub = identity_label if identity_label else ""
h.append('<h1>Face Trace Report — ' + video_name[:60] + sub + '</h1>')
# Identity card (when filtering by identity)
if identity_info:
h.append('<div style="background:#161b22;border:1px solid #30363d;border-radius:8px;padding:16px;margin:12px 0;">')
h.append('<h3 style="margin:0;color:#e94560">Identity Details</h3>')
h.append(f'<table style="width:100%;margin-top:8px;color:#c9d1d9;border-collapse:collapse">')
h.append(f'<tr><td style="padding:4px 12px 4px 0;color:#8b949e;width:80px">ID</td><td>{identity_info["id"]}</td></tr>')
h.append(f'<tr><td style="padding:4px 12px 4px 0;color:#8b949e">Name</td><td style="font-weight:bold">{identity_info["name"]}</td></tr>')
h.append(f'<tr><td style="padding:4px 12px 4px 0;color:#8b949e">Type</td><td>{identity_info["type"]}</td></tr>')
h.append(f'<tr><td style="padding:4px 12px 4px 0;color:#8b949e">Source</td><td>{identity_info["source"]}</td></tr>')
h.append(f'<tr><td style="padding:4px 12px 4px 0;color:#8b949e">Status</td><td>{identity_info["status"]}</td></tr>')
h.append('</table></div>')
# Top identities table (all view)
if top_identities:
h.append('<h2>Top Identities</h2>')
h.append('<div style="overflow-x:auto">')
h.append('<table style="width:100%;color:#c9d1d9;border-collapse:collapse;font-size:13px">')
h.append('<tr style="background:#161b22;text-align:left"><th style="padding:6px 10px">Identity</th><th style="padding:6px 10px">Name</th><th style="padding:6px 10px;text-align:right">Faces</th><th style="padding:6px 10px;text-align:right">Traces</th></tr>')
for iid, name, fc, tc in top_identities:
short_name = (name or f"#{iid}")[:60]
h.append(f'<tr style="border-bottom:1px solid #21262d"><td style="padding:4px 10px;color:#8b949e">{iid}</td><td style="padding:4px 10px">{short_name}</td><td style="padding:4px 10px;text-align:right">{fc:,}</td><td style="padding:4px 10px;text-align:right">{tc}</td></tr>')
h.append('</table></div>')
# Stats row
h.append('<div class="stats">')
h.append(f'<div class="stat"><div class="num">{len(trace_spans):,}</div><div class="label">traces</div></div>')
h.append(f'<div class="stat"><div class="num">{total_detections:,}</div><div class="label">detections</div></div>')
h.append(f'<div class="stat"><div class="num">{duration:.0f}s</div><div class="label">duration</div></div>')
h.append(f'<div class="stat"><div class="num">{max_density}</div><div class="label">max per {BUCKET}s</div></div>')
h.append(f'<div class="stat"><div class="num">{fps:.0f}fps</div><div class="label">video fps</div></div>')
h.append(f'<div class="stat"><div class="num">{hz:.0f}Hz</div><div class="label">sample rate (every {sample_interval}frames)</div></div>')
h.append(f'<div class="stat"><div class="num">{num_buckets}</div><div class="label">{BUCKET}s buckets</div></div>')
h.append('</div>')
# 1. Density histogram
h.append('<h2>Face Density Over Time</h2>')
h.append('<div style="color:#666;font-size:12px;margin-bottom:4px">Number of face detections per 5-second interval</div>')
w_px = num_buckets * 2 + 20
h.append(f'<div class="viz" style="width:{w_px}px;height:80px">')
for b in range(num_buckets):
v = density.get(b, 0)
h_px = max(2, int(60 * v / max(1, max_density * 0.6))) if v > 0 else 0
if v == 0:
color = "#0d1117"
else:
i = min(v / (max(1, max_density * 0.5)), 1.0)
r = int(233 * i + 13 * (1 - i))
g = int(69 * i + 13 * (1 - i))
bv = int(96 * i + 23 * (1 - i))
color = f"rgb({r},{g},{bv})"
title = f"{b * BUCKET:.0f}s: {v} faces"
h.append(f'<span style="position:absolute;left:{b*2+10}px;bottom:0;width:2px;height:{h_px}px;background:{color}" title="{title}"></span>')
h.append('</div>')
h.append(f'<div style="font-size:10px;color:#444;width:{w_px}px;display:flex;justify-content:space-between"><span>0s</span><span>{duration:.0f}s</span></div>')
# 2. Trace timeline (Gantt)
h.append('<h2>Trace Timeline</h2>')
h.append('<div style="color:#666;font-size:12px;margin-bottom:4px">First → last appearance for each trace. Hover for details.</div>')
show_traces = min(len(trace_spans), 2000)
bar_h = 2
chart_height = show_traces * (bar_h + 1) + 10
h.append(f'<div class="viz" style="width:{w_px}px;height:{chart_height}px">')
for i, (tid, fn0, fn1, t0, t1, cnt) in enumerate(trace_spans[:show_traces]):
left = int(t0 / duration * (w_px - 20)) + 10
width = max(3, int((t1 - t0) / duration * (w_px - 20)))
top = i * (bar_h + 1) + 5
opacity = 1.0 if cnt > 5 else 0.3
identity_note = ""
if IDENTITY is not None and tid in trace_to_identity:
identity_note = f", identity: {trace_to_identity[tid]}"
title = f"T{tid}: {t0:.0f}s{t1:.0f}s, {cnt} faces, f{fn0}f{fn1}{identity_note}"
h.append(f'<span class="bar" style="left:{left}px;top:{top}px;width:{width}px;height:{bar_h}px;opacity:{opacity}" title="{title}"></span>')
h.append('</div>')
h.append(f'<div style="font-size:10px;color:#444;width:{w_px}px;display:flex;justify-content:space-between"><span>0s (showing {show_traces}/{len(trace_spans)} traces)</span><span>{duration:.0f}s</span></div>')
# 3. Per-trace heatmap
h.append('<h2>Per-Trace Heatmap (top 500, every 10th trace)</h2>')
h.append(f'<div style="overflow-x:scroll;max-width:100%">')
step = max(1, num_buckets // 120)
for i, (tid, fn0, fn1, t0, t1, cnt) in enumerate(trace_spans[:500]):
if i % 10 != 0:
continue
start_bkt = int(t0 / BUCKET)
end_bkt = int(t1 / BUCKET) + 1
row = f'<div style="white-space:nowrap;line-height:6px"><span style="display:inline-block;width:45px;font-size:6px;color:#555">T{tid}</span>'
for b in range(0, num_buckets, step):
active = start_bkt <= b <= end_bkt
color = "#e94560" if active else "#161b22"
row += f'<span style="display:inline-block;width:2px;height:4px;background:{color};margin:0"></span>'
row += '</div>'
h.append(row)
h.append('</div>')
h.append('</body></html>')
return '\n'.join(h)
html = build_html()
with open(OUT, 'w') as f:
f.write(html)
print(f"Saved: {OUT}")
print(f"Traces: {len(trace_spans)}, Detections: {total_detections}, Density max: {max_density}, Duration: {duration:.0f}s, Sample: {hz:.0f}Hz")
print(f"Size: {len(html) / 1024:.0f}KB")

164
scripts/speaker_assign.py Normal file
View File

@@ -0,0 +1,164 @@
#!/opt/homebrew/bin/python3.11
"""
Speaker Assignment: cluster voice vectors from Qdrant, assign speaker IDs to DB chunks.
"""
import json, sys, time
import psycopg2
import numpy as np
from urllib.request import Request, urlopen
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.pairwise import cosine_similarity
UUID = sys.argv[1] if len(sys.argv) > 1 else "23b1c872379d4ec06479e5ed39eef4c5"
QDRANT = "http://localhost:6333"
DB = "dbname=momentry user=accusys"
COLLECTION = "momentry_dev_voice"
print(f"=== Speaker Assignment for {UUID} ===")
# Step 1: Read voice vectors from Qdrant
print("Reading voice vectors from Qdrant...")
vectors = []
chunk_ids = []
# We need to scroll through all points
offset = None
while True:
data = {"limit": 100, "with_payload": True, "with_vector": True}
if offset is not None:
data["offset"] = offset
req = Request(f"{QDRANT}/collections/{COLLECTION}/points/scroll",
data=json.dumps(data).encode(),
headers={"Content-Type": "application/json"}, method="POST")
resp = json.loads(urlopen(req).read())
result = resp["result"]
points = result.get("points", [])
if not points:
break
for pt in points:
payload = pt.get("payload", {})
cid = payload.get("chunk_id", "")
# Only get vectors for THIS UUID's chunks
# Filter by checking DB later, or rely on Qdrant payload
vectors.append(pt["vector"])
chunk_ids.append(cid)
offset = result.get("next_page_offset")
if offset is None:
break
print(f" Read {len(vectors)} vectors...")
print(f"Total vectors: {len(vectors)}")
# Step 2: Filter to only our UUID's chunks (from DB)
conn = psycopg2.connect(DB)
cur = conn.cursor()
cur.execute("SELECT chunk_id FROM dev.chunk WHERE file_uuid = %s AND chunk_type = 'sentence' ORDER BY id", (UUID,))
db_chunk_ids = set(row[0] for row in cur.fetchall())
print(f"DB chunk_ids: {len(db_chunk_ids)}")
# Filter vectors to match DB chunks
filtered_vectors = []
filtered_chunk_ids = []
for v, cid in zip(vectors, chunk_ids):
if cid in db_chunk_ids:
filtered_vectors.append(v)
filtered_chunk_ids.append(cid)
vectors = filtered_vectors
chunk_ids = filtered_chunk_ids
print(f"Matched vectors: {len(vectors)}")
# Sort by chunk_id (which is numeric string)
indices = sorted(range(len(chunk_ids)), key=lambda i: int(chunk_ids[i]) if chunk_ids[i].isdigit() else 0)
vectors = [vectors[i] for i in indices]
chunk_ids = [chunk_ids[i] for i in indices]
# Step 3: Read speaker_change from asr.json
asr_path = f"/Users/accusys/momentry/output_dev/{UUID}.asr.json"
with open(asr_path) as f:
asr_data = json.load(f)
segments = asr_data.get("segments", [])
speaker_changes = {}
for seg in segments:
speaker_changes[seg["chunk_id"]] = seg.get("speaker_change", False)
# Step 4: Cluster embeddings
print("Clustering...")
X = np.array(vectors)
# Compute cosine distance matrix
# Cosine distance = 1 - cosine_similarity
cos_sim = cosine_similarity(X)
cos_dist = 1 - cos_sim
# Use AgglomerativeClustering with cosine distance
# Determine optimal n_clusters by looking at speaker_change boundaries
# First pass: use speaker_change as hard boundaries to get initial clusters
# Then refine
# Simpler: use a distance threshold
n = len(vectors)
labels = np.full(n, -1, dtype=int)
current_speaker = 0
# Start with first chunk as speaker 0
labels[0] = current_speaker
centroids = [np.array(vectors[0])] # per-cluster centroid
for i in range(1, n):
has_change = speaker_changes.get(chunk_ids[i], False)
vec = np.array(vectors[i])
if has_change:
# Speaker change: check if this is a NEW speaker or returning to a previous one
# Compare with centroid of current speaker vs others
similarities = [float(np.dot(vec, c) / (np.linalg.norm(vec) * np.linalg.norm(c) + 1e-10)) for c in centroids]
best_sim = max(similarities) if similarities else 0
best_cluster = similarities.index(best_sim) if similarities else 0
if best_sim > 0.65 and best_cluster != current_speaker:
# Returning to a previous speaker
labels[i] = best_cluster
elif best_sim < 0.55:
# New speaker
current_speaker = len(centroids)
labels[i] = current_speaker
centroids.append(vec)
else:
# Stay with current speaker (false change detection)
labels[i] = current_speaker
centroids[current_speaker] = (centroids[current_speaker] + vec) / 2
else:
# No speaker change: same speaker as previous
labels[i] = current_speaker
centroids[current_speaker] = (centroids[current_speaker] + vec) / 2
n_speakers = len(set(labels))
print(f"Identified {n_speakers} unique speakers")
# Step 5: Update DB chunks with speaker assignment
print("Updating DB chunks...")
# Map: chunk_id -> speaker_id
speaker_map = {}
for cid, label in zip(chunk_ids, labels):
speaker_map[cid] = f"SPEAKER_{label}"
updated = 0
for cid, spk_id in speaker_map.items():
cur.execute("""
UPDATE dev.chunk SET metadata = COALESCE(metadata, '{}'::jsonb) || %s::jsonb
WHERE file_uuid = %s AND chunk_id = %s AND chunk_type = 'sentence'
""", (json.dumps({"speaker_id": spk_id}), UUID, cid))
updated += 1
conn.commit()
print(f"Updated {updated} chunks with speaker IDs")
# Step 6: Save speaker map
speaker_map_path = f"/Users/accusys/momentry/output_dev/{UUID}.speaker_map.json"
with open(speaker_map_path, "w") as f:
json.dump({"speakers": n_speakers, "assignments": speaker_map}, f, indent=2)
print(f"Speaker map saved: {speaker_map_path}")
cur.close()
conn.close()
print("=== Done ===")

284
scripts/transcribe.py Normal file
View File

@@ -0,0 +1,284 @@
#!/opt/homebrew/bin/python3.11
"""
One-pass ASR + Speaker Change Detection + Split → asr.json
"""
import json, os, sys, time, argparse, subprocess, tempfile, shutil
import numpy as np
from pathlib import Path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "asrx_self"))
from speaker_encoder import load_speaker_encoder, extract_speaker_embedding, normalize_embeddings
import torchaudio
from faster_whisper import WhisperModel
SUB_WIN = 0.5
SUB_STRIDE = 0.25
MIN_DUR = 0.3
SIM_THRESHOLD = 0.45
CHANGE_CONFIRM = 2
def extract_audio(video_path, tmp_dir, sr=16000):
wav_path = os.path.join(tmp_dir, "audio.wav")
subprocess.run(["ffmpeg", "-y", "-v", "quiet", "-i", video_path,
"-ar", str(sr), "-ac", "1", "-sample_fmt", "s16", wav_path],
check=True, capture_output=True, timeout=300)
wav_data, sr_actual = torchaudio.load(wav_path)
if wav_data.shape[0] > 1:
wav_data = wav_data.mean(dim=0, keepdim=True)
return wav_data, sr_actual
def transcribe_pass1(model, wav_path, vad_params=None):
print(" [faster-whisper] Transcribing...")
if vad_params is None:
vad_params = {"min_silence_duration_ms": 500, "speech_pad_ms": 200}
segments, info = model.transcribe(wav_path, beam_size=5,
vad_filter=True, word_timestamps=True, vad_parameters=vad_params)
pass1 = []
for i, seg in enumerate(segments):
words = []
if seg.words:
for w in seg.words:
words.append({"word": w.word.strip(), "start": round(w.start,3), "end": round(w.end,3)})
pass1.append({
"index": i,
"start": round(seg.start, 3),
"end": round(seg.end, 3),
"text": seg.text.strip(),
"words": words,
})
print(f" Pass1 segments: {len(pass1)}")
return pass1
def detect_speaker_changes(wav_data, sr, pass1_segs, encoder, progress_step=100):
print(" [Speaker Detection] Scanning...")
ws = int(SUB_WIN * sr)
sw = int(SUB_STRIDE * sr)
change_points = [] # List[List[float]] → change times per pass1 segment
t0 = time.time()
for si, seg in enumerate(pass1_segs):
st = int(seg["start"] * sr)
et = int(seg["end"] * sr)
dur = seg["end"] - seg["start"]
if dur < 1.0:
change_points.append([])
continue
sub_embs = []
sub_times = []
for wpos in range(st, et - ws + 1, sw):
chunk = wav_data[:, wpos:wpos+ws]
emb = extract_speaker_embedding(encoder, chunk.numpy(), sr)
emb = emb / (np.linalg.norm(emb) + 1e-10)
sub_embs.append(emb)
sub_times.append(wpos / sr)
if len(sub_embs) < 3:
change_points.append([])
continue
sub_embs = normalize_embeddings(np.array(sub_embs))
cps = []
# Require CHANGE_CONFIRM consecutive low-similarity windows before registering a change
low_run = 0
for i in range(1, len(sub_embs)):
sim = float(np.dot(sub_embs[i-1], sub_embs[i]))
if sim < SIM_THRESHOLD:
low_run += 1
if low_run >= CHANGE_CONFIRM:
# Change point at the START of the low-sim run
cps.append(round(sub_times[i - low_run + 1], 2))
low_run = 0
else:
low_run = 0
change_points.append(cps)
if (si + 1) % progress_step == 0:
pct = (si + 1) * 100 // len(pass1_segs)
print(f" {si+1}/{len(pass1_segs)} ({pct}%) [{time.time()-t0:.0f}s]")
total_changes = sum(len(cps) for cps in change_points)
print(f" Speaker changes detected: {total_changes} in {len(pass1_segs)} segments ({time.time()-t0:.0f}s)")
return change_points
def build_segments(pass1_segs, change_points, wav_data, sr, asr_model, tmp_dir, fps=24.0):
print(" [Split] Building final segments...")
final = []
chunk_idx = 0
for si, seg in enumerate(pass1_segs):
cps = change_points[si]
if not cps:
final.append({
"chunk_id": str(chunk_idx),
"pass1_index": si,
"start_time": seg["start"],
"end_time": seg["end"],
"start_frame": int(seg["start"] * fps),
"end_frame": int(seg["end"] * fps),
"text": seg["text"],
})
chunk_idx += 1
continue
seg["split"] = True
boundaries = [seg["start"]] + cps + [seg["end"]]
for pi in range(len(boundaries) - 1):
ps, pe = boundaries[pi], boundaries[pi+1]
if pe - ps < MIN_DUR:
continue
# Try word_timestamp mapping first (wider tolerance)
sub_words = [w["word"] for w in seg["words"] if w["start"] >= ps - 0.3 and w["end"] <= pe + 0.3]
text = " ".join(sub_words).strip() if sub_words else ""
# Fallback: call faster-whisper on the sub-audio chunk
if not text:
import soundfile as sf
chunk_path = os.path.join(tmp_dir, f"sub_{chunk_idx}.wav")
a_chunk = wav_data[:, int(ps*sr):int(pe*sr)].numpy()[0]
if len(a_chunk) > sr * 0.3: # skip if < 0.3s
sf.write(chunk_path, a_chunk, sr)
try:
sub_segs, _ = asr_model.transcribe(chunk_path, beam_size=5,
vad_filter=True, vad_parameters={"min_silence_duration_ms": 100})
text = " ".join(s.text.strip() for s in sub_segs)
except:
pass
os.remove(chunk_path)
if not text:
text = " ".join([w["word"] for w in seg["words"]
if w["start"] >= ps - 0.5 and w["end"] <= pe + 0.5]).strip()
if not text:
text = seg["text"][:60]
final.append({
"chunk_id": str(chunk_idx),
"pass1_index": si,
"start_time": round(ps, 3),
"end_time": round(pe, 3),
"start_frame": int(ps * fps),
"end_frame": int(pe * fps),
"text": text,
"speaker_change": True,
})
chunk_idx += 1
print(f" Final segments: {len(final)}")
return final
def voice_vectors_to_qdrant(wav_data, sr, final_segs, encoder, qdrant_url="http://localhost:6333"):
print(" [Voice Vectors] Extracting 192D embeddings...")
embeddings = []
t0 = time.time()
for si, seg in enumerate(final_segs):
st = int(seg["start_time"] * sr)
et = int(seg["end_time"] * sr)
a_chunk = wav_data[:, st:et]
emb = extract_speaker_embedding(encoder, a_chunk.numpy(), sr)
emb = emb / (np.linalg.norm(emb) + 1e-10)
embeddings.append({"chunk_id": seg["chunk_id"], "embedding": emb.tolist()})
if (si + 1) % 500 == 0:
print(f" {si+1}/{len(final_segs)} [{time.time()-t0:.0f}s]")
print(f" Writing to Qdrant...")
from urllib.request import Request, urlopen
batch = []
for i, e in enumerate(embeddings):
batch.append({"id": i + 1, "vector": e["embedding"],
"payload": {"chunk_id": e["chunk_id"], "chunk_type": "sentence"}})
if len(batch) >= 100:
req = Request(f"{qdrant_url}/collections/momentry_dev_voice/points?wait=true",
data=json.dumps({"points": batch}).encode(),
headers={"Content-Type": "application/json"}, method="PUT")
try: urlopen(req)
except: pass
batch = []
# Flush remaining
if batch:
req = Request(f"{qdrant_url}/collections/momentry_dev_voice/points?wait=true",
data=json.dumps({"points": batch}).encode(),
headers={"Content-Type": "application/json"}, method="PUT")
try: urlopen(req)
except: pass
print(f" Voice vectors: {len(embeddings)} pts → Qdrant [{time.time()-t0:.0f}s]")
return embeddings
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--video", default="/Users/accusys/momentry/var/sftpgo/data/demo/Charade (1963) Cary Grant & Audrey Hepburn Comedy Mystery Romance Thriller Full Movie.mp4")
parser.add_argument("--output", help="Output path for asr.json", default="/Users/accusys/momentry/output_dev/aeed71342a899fe4b4c57b7d41bcb692.asr.json")
parser.add_argument("--sample", type=int, help="Process only first N pass1 segments (for testing)")
parser.add_argument("--no-qdrant", action="store_true", help="Skip Qdrant upload")
args = parser.parse_args()
t0 = time.time()
# Load models
print("=== Loading Models ===")
asr_model = WhisperModel("small", device="cpu", compute_type="int8")
print(" faster-whisper small loaded")
encoder = load_speaker_encoder()
print(" ECAPA-TDNN loaded")
print()
# Extract audio
print("=== Audio Extraction ===")
tmp_dir = tempfile.mkdtemp(prefix="transcribe_")
wav_data, sr = extract_audio(args.video, tmp_dir)
print(f" Audio: {wav_data.shape[1]/sr:.0f}s, {sr}Hz")
wav_path = os.path.join(tmp_dir, "audio.wav")
print()
# Step 1: faster-whisper pass1
print("=== Step 1: Pass1 Transcription ===")
pass1_segs = transcribe_pass1(asr_model, wav_path)
if args.sample:
pass1_segs = pass1_segs[:args.sample]
print(f" SAMPLE MODE: limiting to {args.sample} segments")
print()
# Step 2: Speaker change detection
print("=== Step 2: Speaker Change Detection ===")
change_points = detect_speaker_changes(wav_data, sr, pass1_segs, encoder)
print()
# Step 3: Build final segments
print("=== Step 3: Build Final Segments ===")
final_segs = build_segments(pass1_segs, change_points, wav_data, sr, asr_model, tmp_dir)
print()
# Step 4: Voice vectors → Qdrant
if not args.no_qdrant:
print("=== Step 4: Voice Vectors → Qdrant ===")
voice_vectors_to_qdrant(wav_data, sr, final_segs, encoder)
print()
# Step 5: Write asr.json
print("=== Step 5: Write asr.json ===")
uuid = os.path.basename(args.output).replace(".asr.json", "")
output = {
"file_uuid": uuid,
"pass1": pass1_segs,
"segments": final_segs,
}
with open(args.output, "w") as f:
json.dump(output, f, indent=2, ensure_ascii=False)
sz = os.path.getsize(args.output)
print(f" {args.output} ({sz/1024:.0f} KB)")
# Cleanup
shutil.rmtree(tmp_dir, ignore_errors=True)
elapsed = time.time() - t0
print(f"\n=== Done ({elapsed:.0f}s) ===")
print(f" Pass1 segments: {len(pass1_segs)}")
print(f" Final segments: {len(final_segs)}")
fp = args.output
print(f" Output: {fp}")
if __name__ == "__main__":
main()

BIN
scripts/vec0.dylib Normal file

Binary file not shown.

View File

@@ -0,0 +1,69 @@
#!/opt/homebrew/bin/python3.11
"""Vectorize sentence chunks via Ollama mxbai-embed-large and store in DB + Qdrant."""
import json, sys, time
import psycopg2
from urllib.request import Request, urlopen
DB = "dbname=momentry user=accusys"
UUID = sys.argv[1] if len(sys.argv) > 1 else "23b1c872379d4ec06479e5ed39eef4c5"
OLLAMA = "http://localhost:11434/api/embeddings"
QDRANT = "http://localhost:6333"
conn = psycopg2.connect(DB)
cur = conn.cursor()
cur.execute("""
SELECT chunk_id, text_content FROM dev.chunk
WHERE file_uuid = %s AND chunk_type = 'sentence'
AND (text_content IS NOT NULL AND text_content != '')
ORDER BY id
""", (UUID,))
rows = cur.fetchall()
print(f"Vectorizing {len(rows)} chunks for {UUID}...")
stored = 0
batch = []
for chunk_id, text in rows:
req = Request(OLLAMA, data=json.dumps({
"model": "nomic-embed-text-v2-moe:latest",
"prompt": text
}).encode(), headers={"Content-Type": "application/json"})
resp = json.loads(urlopen(req).read())
embedding = resp["embedding"]
# Store in PostgreSQL chunk_vectors
cur.execute("""
INSERT INTO dev.chunk_vectors (chunk_id, uuid, chunk_type, embedding)
VALUES (%s, %s, 'sentence', %s::jsonb)
ON CONFLICT (chunk_id, uuid) DO UPDATE SET embedding = EXCLUDED.embedding
""", (chunk_id, UUID, json.dumps(embedding)))
# Batch for Qdrant
batch.append({
"id": int(chunk_id) + 1 if chunk_id.isdigit() else len(batch) + 10000,
"vector": embedding,
"payload": {"chunk_id": chunk_id, "chunk_type": "sentence"}
})
if len(batch) >= 100:
req = Request(f"{QDRANT}/collections/momentry_dev_rule1_v2/points?wait=true",
data=json.dumps({"points": batch}).encode(),
headers={"Content-Type": "application/json"}, method="PUT")
urlopen(req)
batch = []
stored += 1
if stored % 50 == 0:
print(f" {stored}/{len(rows)}")
conn.commit()
if batch:
req = Request(f"{QDRANT}/collections/momentry_dev_rule1_v2/points?wait=true",
data=json.dumps({"points": batch}).encode(),
headers={"Content-Type": "application/json"}, method="PUT")
urlopen(req)
conn.commit()
cur.close()
conn.close()
print(f"Done: {stored} vectors stored")