feat: trace-level matching, health watcher/worker status, timezone config

This commit is contained in:
Accusys
2026-05-21 01:08:30 +08:00
parent 8ede4be159
commit bebaa743ed
60 changed files with 6110 additions and 1586 deletions

View File

@@ -29,7 +29,7 @@ REDIS_PASSWORD=accusys
# Qdrant Vector Database - Collection isolation # Qdrant Vector Database - Collection isolation
QDRANT_URL=http://localhost:6333 QDRANT_URL=http://localhost:6333
QDRANT_API_KEY=Test3200Test3200Test3200 QDRANT_API_KEY=Test3200Test3200Test3200
QDRANT_COLLECTION=momentry_dev_v1 QDRANT_COLLECTION=momentry_dev_rule1_v2
# Paths # Paths
MOMENTRY_OUTPUT_DIR=/Users/accusys/momentry/output_dev MOMENTRY_OUTPUT_DIR=/Users/accusys/momentry/output_dev

View File

@@ -22,6 +22,9 @@ QDRANT_COLLECTION=momentry_rule1
# === API Keys === # === API Keys ===
MOMENTRY_API_KEY=muser_your_key_here MOMENTRY_API_KEY=muser_your_key_here
MOMENTRY_DEMO_API_KEY=muser_your_demo_key_here MOMENTRY_DEMO_API_KEY=muser_your_demo_key_here
JWT_SECRET=your_jwt_secret_here_change_in_production
SFTPGO_BASE_URL=http://127.0.0.1:8080
TMDB_API_KEY=your_tmdb_api_key_here TMDB_API_KEY=your_tmdb_api_key_here
# === LLM === # === LLM ===

34
Cargo.lock generated
View File

@@ -178,6 +178,18 @@ dependencies = [
"password-hash", "password-hash",
] ]
[[package]]
name = "async-compression"
version = "0.4.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac"
dependencies = [
"compression-codecs",
"compression-core",
"pin-project-lite",
"tokio",
]
[[package]] [[package]]
name = "async-lock" name = "async-lock"
version = "3.4.2" version = "3.4.2"
@@ -615,6 +627,23 @@ dependencies = [
"static_assertions", "static_assertions",
] ]
[[package]]
name = "compression-codecs"
version = "0.4.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf"
dependencies = [
"compression-core",
"flate2",
"memchr",
]
[[package]]
name = "compression-core"
version = "0.4.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789"
[[package]] [[package]]
name = "concurrent-queue" name = "concurrent-queue"
version = "2.5.0" version = "2.5.0"
@@ -4861,13 +4890,18 @@ version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
dependencies = [ dependencies = [
"async-compression",
"bitflags 2.11.1", "bitflags 2.11.1",
"bytes", "bytes",
"futures-core",
"futures-util", "futures-util",
"http", "http",
"http-body", "http-body",
"http-body-util",
"iri-string", "iri-string",
"pin-project-lite", "pin-project-lite",
"tokio",
"tokio-util",
"tower 0.5.3", "tower 0.5.3",
"tower-layer", "tower-layer",
"tower-service", "tower-service",

View File

@@ -55,7 +55,7 @@ sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "js
mongodb = { version = "2", features = ["tokio-runtime"] } mongodb = { version = "2", features = ["tokio-runtime"] }
bson = { version = "2", features = ["chrono-0_4"] } bson = { version = "2", features = ["chrono-0_4"] }
qdrant-client = "1.7" qdrant-client = "1.7"
reqwest = { version = "0.12", features = ["json"] } reqwest = { version = "0.12", features = ["json", "gzip"] }
pgvector = { version = "0.3", features = ["sqlx"] } pgvector = { version = "0.3", features = ["sqlx"] }
# HTTP Server # HTTP Server

View File

@@ -60,7 +60,8 @@ fn sha256_hex(data: &[u8]) -> String {
use std::io::Write; use std::io::Write;
use std::process::{Command, Stdio}; use std::process::{Command, Stdio};
if let Ok(mut child) = Command::new("shasum") if let Ok(mut child) = Command::new("shasum")
.arg("-a").arg("256") .arg("-a")
.arg("256")
.stdin(Stdio::piped()) .stdin(Stdio::piped())
.stdout(Stdio::piped()) .stdout(Stdio::piped())
.spawn() .spawn()

View File

@@ -103,7 +103,7 @@ f4d1b4334a49357b74b80e390ad5a3d16263e51cbe5cab661af92bd2e9721f02 ./face_process
802015c73dfce0866f2a0bc94c645aa35ba30a6de78244af23090bb1f1828c6e ./face_processor_mps.py 802015c73dfce0866f2a0bc94c645aa35ba30a6de78244af23090bb1f1828c6e ./face_processor_mps.py
96ffdbde3f4d87e9942f9e1f4c93cbd999dc404b43e00d4cdcbb22de3c0f16b7 ./face_processor_optimized.py 96ffdbde3f4d87e9942f9e1f4c93cbd999dc404b43e00d4cdcbb22de3c0f16b7 ./face_processor_optimized.py
17e7d0bd142bddfead94b1dd959c1f41c0dad7063ffc677dff1a99d62aab6cf8 ./face_processor_v1.py 17e7d0bd142bddfead94b1dd959c1f41c0dad7063ffc677dff1a99d62aab6cf8 ./face_processor_v1.py
15877adf5c160d861da688a25b93fd2edc189f326f9646ffb4de063e554f773a ./face_processor.py d6ddad29a5e53b43b887554072d7965f0535e47fb62dad1a8b87e44fa1be6015 ./face_processor.py
8edab61189ad1a8fa60c203077e814e82d46c5bae67054fa2ab1958e199c05f9 ./face_recognition_processor.py 8edab61189ad1a8fa60c203077e814e82d46c5bae67054fa2ab1958e199c05f9 ./face_recognition_processor.py
9ea19f357b3fcec6c8b3875c538e53cb46e407ab188cd544963e0123e535fa03 ./face_registration.py 9ea19f357b3fcec6c8b3875c538e53cb46e407ab188cd544963e0123e535fa03 ./face_registration.py
72648816de611fd9b84d2b98c177b8b4f24374024b69184e8151c06cf44d633b ./face_statistics_report.py 72648816de611fd9b84d2b98c177b8b4f24374024b69184e8151c06cf44d633b ./face_statistics_report.py
@@ -174,15 +174,15 @@ fd39b779a0337f521940f3f7b159931f1f207f200eefd610183781fdcf3dfafd ./object_searc
42d2952fc78b57302b0d12bc3d45790a2c2c46d4ffa3c713a82686134bd63f13 ./ocr_benchmark_runner.py 42d2952fc78b57302b0d12bc3d45790a2c2c46d4ffa3c713a82686134bd63f13 ./ocr_benchmark_runner.py
7b3ccb5c4ddd4c62c5ad04d0e3aafaecc2c1441012b6a98613cdcf055e2e50e8 ./ocr_processor_contract_v1.py 7b3ccb5c4ddd4c62c5ad04d0e3aafaecc2c1441012b6a98613cdcf055e2e50e8 ./ocr_processor_contract_v1.py
271023eec42d6be4a1ce6ae2ce3f29e825210a57e6bb37554a6f7fdf54616f9a ./ocr_processor_mps.py 271023eec42d6be4a1ce6ae2ce3f29e825210a57e6bb37554a6f7fdf54616f9a ./ocr_processor_mps.py
e666bc8488bb93cc45bcd6a70a4ef38a74af6631d7b87a789381bfbdab4569f5 ./ocr_processor.py 2e73c41285e52ef013594fcd4d20df9f5781bfc26bcf62e54dd2c04ec44200c3 ./ocr_processor.py
62196108cb3337b5f9a873d70d2981ac8f49152369afbcc8a12b3a13de579e80 ./opencv_stamp_search.py 62196108cb3337b5f9a873d70d2981ac8f49152369afbcc8a12b3a13de579e80 ./opencv_stamp_search.py
b2e8d552c272fd173c77693e9453a85fe16dfc12f7c2cd304d299c6188c14077 ./paligemma_vs_gdino.py b2e8d552c272fd173c77693e9453a85fe16dfc12f7c2cd304d299c6188c14077 ./paligemma_vs_gdino.py
2c6767e763cf69917af832b8383528f754c65db5a3f02cb4d63e3f896d5920b6 ./parent_chunk_5w1h.py 1534d5b7617dbae77f7a37a2c33a89b90f965247a6828f00b73ea6b720f6f4fc ./parent_chunk_5w1h.py
5208c738d4b615282813d351daf09872ce516121bb604caa64968ef5e52c53d3 ./pipeline_checklist.py 5208c738d4b615282813d351daf09872ce516121bb604caa64968ef5e52c53d3 ./pipeline_checklist.py
8f80c3a2be5c330e2d1853d9250a171c75db84598dbf3304280c42237ed4fb1f ./pipeline_status.py 8f80c3a2be5c330e2d1853d9250a171c75db84598dbf3304280c42237ed4fb1f ./pipeline_status.py
94db44c0f49115a677d117d4901a1b7991c1517905300eaa495dd62b8ac1c79c ./pose_processor_contract_v1.py 94db44c0f49115a677d117d4901a1b7991c1517905300eaa495dd62b8ac1c79c ./pose_processor_contract_v1.py
167dee5e42c6bd46674bcffcfd92f368fc0b48a1f42c459c806853b281bc6482 ./pose_processor_mps.py 167dee5e42c6bd46674bcffcfd92f368fc0b48a1f42c459c806853b281bc6482 ./pose_processor_mps.py
a1cdb1efd992d229829ae156d8aa439347c51d664e2a606c14d2274a11c93a66 ./pose_processor.py a6ef3a785ef5c6dc47fa38dbed80d76bc7d4bf48cbaf0f7edb3d26df98d7262c ./pose_processor.py
45e6798dc5900f2f7c8776a2d260c122aae5068a075256b8a5c02e8d0be6c131 ./probe_file.py 45e6798dc5900f2f7c8776a2d260c122aae5068a075256b8a5c02e8d0be6c131 ./probe_file.py
139a68b5915680ec697d4bb5420adbd20b89637de2c16a15d68aca4fc22da02b ./qa/executor.py 139a68b5915680ec697d4bb5420adbd20b89637de2c16a15d68aca4fc22da02b ./qa/executor.py
4a59b36c29e1ee6e2b169db3b0201d2f7088c6ccbfdf642a3b522aeb182bbeea ./qa/judges/facenet.py 4a59b36c29e1ee6e2b169db3b0201d2f7088c6ccbfdf642a3b522aeb182bbeea ./qa/judges/facenet.py
@@ -197,7 +197,7 @@ c4e4424aad1847d822e9cf7dc98a1b2e903735a61e8ec056c6a9be75f79486bd ./qa/pipeline.
01c7b3c30c1531224f9605f0ee633285fe8489ab2d0a3c9c6a41f2b2b60d6626 ./quick_stamp_search.py 01c7b3c30c1531224f9605f0ee633285fe8489ab2d0a3c9c6a41f2b2b60d6626 ./quick_stamp_search.py
e3143673a2bff6139e05c82446fd8770c4b7e59a854a42c3b29662f5ac75efe2 ./rebuild_parents.py e3143673a2bff6139e05c82446fd8770c4b7e59a854a42c3b29662f5ac75efe2 ./rebuild_parents.py
4aa98981632d4f8a11039c510e86aa296ae1cd4b399fc871ed664ac11e445bd9 ./rebuild_story_content.py 4aa98981632d4f8a11039c510e86aa296ae1cd4b399fc871ed664ac11e445bd9 ./rebuild_story_content.py
45c437b412d34c7c6d5758e94b7205a2956b32b6fe170c3f56db7231ec6f5a15 ./redis_publisher.py 205cfc47b603b5ab94d97dae8c25486b342b7c2858afe6d6dae27615ca0b2aeb ./redis_publisher.py
750f778946b56bc57c47d9d2295332bb0f8cec2c1aa03c6b882d39ef4432673d ./refine_search.py 750f778946b56bc57c47d9d2295332bb0f8cec2c1aa03c6b882d39ef4432673d ./refine_search.py
0f8a6a6866a5797e964d3b17e2b7ef146fe7a798f09fcea982fcda6f629b4d06 ./regenerate_parent_5w1h.py 0f8a6a6866a5797e964d3b17e2b7ef146fe7a798f09fcea982fcda6f629b4d06 ./regenerate_parent_5w1h.py
3ee192b623f290136b36bd63abd018aad6e6639a9543970c3415734628b33bd6 ./register_sample_faces.py 3ee192b623f290136b36bd63abd018aad6e6639a9543970c3415734628b33bd6 ./register_sample_faces.py
@@ -303,7 +303,7 @@ d0ec8f4a67c1a1eb1356ad6e9b2f466575691bd336621cdbbfd31dd10159f2dc ./utils/test_m
ff98864f1b11795cc3bb64f30ccb6f8609771ddc7a5df2c003ba7c2233d16fc2 ./vectorize_chunks.py ff98864f1b11795cc3bb64f30ccb6f8609771ddc7a5df2c003ba7c2233d16fc2 ./vectorize_chunks.py
5880c128400e6e36c8eb7dffd009dbbc99dd13f8575b0037bdc854e25ddc41fb ./video_comparison_statistics.py 5880c128400e6e36c8eb7dffd009dbbc99dd13f8575b0037bdc854e25ddc41fb ./video_comparison_statistics.py
0a1501ffdc027236cdf88706b3d61229e2998ab268fd57fb60e399ccb734b6a1 ./vision_agent.py 0a1501ffdc027236cdf88706b3d61229e2998ab268fd57fb60e399ccb734b6a1 ./vision_agent.py
6831281de868d24ecd84151965909b57f895d534114d24300a81c396492c19f8 ./visual_chunk_processor.py eac8f90fbbb655614abcefc4b887e346bf94db5f015d33d37bc9514fb030489d ./visual_chunk_processor.py
c165dfc5fc981dc731b25ef414184ee58e56b73b148d41a32fdce985c701efd5 ./visualize_stamp.py c165dfc5fc981dc731b25ef414184ee58e56b73b148d41a32fdce985c701efd5 ./visualize_stamp.py
6c65a82fdd1d585e20bee4fcb2d1bdec2e6220bda71d6ef9cd00d6a3cf74c4d7 ./voice_embedding_extractor.py 6c65a82fdd1d585e20bee4fcb2d1bdec2e6220bda71d6ef9cd00d6a3cf74c4d7 ./voice_embedding_extractor.py
2b3a7b357db4ddd07ca30bf200c6600724e33441d8def0a4d9a39673e2cfb1c0 ./weather_sound_detector.py 2b3a7b357db4ddd07ca30bf200c6600724e33441d8def0a4d9a39673e2cfb1c0 ./weather_sound_detector.py
@@ -343,3 +343,4 @@ b2ee4f8a445a7e83f7b99ae5d4139fd525d9e3e58a360bfef054d441aa21d901 ./swift_proces
fbca5ba0783153c4e21c174b0cbf75b582514f6ef0f92750a82d3178bc170f48 ./test_search_modes.sh fbca5ba0783153c4e21c174b0cbf75b582514f6ef0f92750a82d3178bc170f48 ./test_search_modes.sh
f8c1647cdb4db8adef1829e41fbecd97f6b3b2e62927f195cd8e68127876069d ./troubleshoot.sh f8c1647cdb4db8adef1829e41fbecd97f6b3b2e62927f195cd8e68127876069d ./troubleshoot.sh
992296b5218f3ef97ce53325be12f71848f3c3aeb3ee81d764bfe4bd61e1de05 ./verify_package.sh 992296b5218f3ef97ce53325be12f71848f3c3aeb3ee81d764bfe4bd61e1de05 ./verify_package.sh
b6f95fa070cc0258bc5d005f10d13025ba8b08d3ee1598bcdad405ff1d3332ed ./tmdb_agent.py

View File

@@ -0,0 +1,84 @@
#!/opt/homebrew/bin/python3.11
"""
Extract face embedding from an image using InsightFace + CoreML FaceNet.
Usage:
python3 scripts/extract_face_embedding.py <image_path>
Output: JSON with "embedding" key (512 floats) or "error" key.
Exit code: 0 on success, 1 on failure.
"""
import json
import os
import sys
# Prefer venv if it exists (has insightface + coremltools installed)
VENV_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "venv")
VENV_SITE = os.path.join(VENV_PATH, "lib", "python3.11", "site-packages")
if os.path.isdir(VENV_SITE):
sys.path.insert(0, VENV_SITE)
import cv2
import numpy as np
MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "models")
FACENET_PATH = os.path.join(MODELS_DIR, "facenet512.mlpackage")
def extract_embedding(image_path: str):
import io
import warnings
warnings.filterwarnings("ignore")
# Suppress InsightFace verbose stdout during model loading
old_stdout = sys.stdout
sys.stdout = io.StringIO()
try:
import insightface
from insightface.app import FaceAnalysis
import coremltools as ct
app = FaceAnalysis(name="buffalo_l", providers=["CPUExecutionProvider"])
app.prepare(ctx_id=0, det_thresh=0.5)
coreml_model = ct.models.MLModel(FACENET_PATH)
finally:
sys.stdout = old_stdout
img_bytes = open(image_path, "rb").read()
nparr = np.frombuffer(img_bytes, np.uint8)
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if img is None:
print(json.dumps({"error": "Failed to decode image"}))
sys.exit(1)
# Detect faces
faces = app.get(img)
if not faces:
print(json.dumps({"error": "No face detected"}))
sys.exit(1)
largest = max(faces, key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]))
x1, y1, x2, y2 = [int(v) for v in largest.bbox]
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(img.shape[1], x2), min(img.shape[0], y2)
if x2 <= x1 or y2 <= y1:
print(json.dumps({"error": "Invalid face bbox"}))
sys.exit(1)
face_img = img[y1:y2, x1:x2]
face_img = cv2.resize(face_img, (160, 160))
normalized = (face_img.astype(np.float32) / 127.5) - 1.0
normalized = np.transpose(normalized, (2, 0, 1))
input_array = np.expand_dims(normalized, axis=0)
result = coreml_model.predict({"input": input_array})
emb_key = [k for k in result.keys() if k.startswith("var_")][0]
embedding = result[emb_key].flatten().tolist()
print(json.dumps({"embedding": embedding}))
if __name__ == "__main__":
if len(sys.argv) < 2:
print(json.dumps({"error": "Usage: extract_face_embedding.py <image_path>"}))
sys.exit(1)
extract_embedding(sys.argv[1])

View File

@@ -2,23 +2,30 @@
""" """
Face landmark QC: verify eyes/nose are within face bounding box. Face landmark QC: verify eyes/nose are within face bounding box.
Flags faces in DB where landmarks don't match the bbox. Flags faces in DB where landmarks don't match the bbox.
Usage: python3 face_landmark_qc.py <file_uuid> [--threshold 0.5] [--fix] Usage: python3 face_landmark_qc.py <file_uuid> [--threshold 0.5] [--apply]
""" """
import sys, json, psycopg2, argparse import sys, json, psycopg2, argparse, os
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("uuid") parser.add_argument("uuid")
parser.add_argument("--threshold", "-t", type=float, default=0.5, parser.add_argument("--threshold", "-t", type=float, default=0.5,
help="Fraction of landmark points that must be inside bbox (default: 0.5)") help="Fraction of landmark points that must be inside bbox (default: 0.5)")
parser.add_argument("--fix", action="store_true", help="Update face_detections QC flag in DB") parser.add_argument("--apply", action="store_true",
help="Write qc_ok to face_detections.metadata in DB")
parser.add_argument("--schema", default="dev",
help="DB schema (default: dev)")
args = parser.parse_args() args = parser.parse_args()
UUID = args.uuid UUID = args.uuid
THRESHOLD = args.threshold THRESHOLD = args.threshold
FACE_PATH = f"/Users/accusys/momentry/output_dev/{UUID}.face.json" SCHEMA = args.schema
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", f"/Users/accusys/momentry/output_dev")
FACE_PATH = f"{OUTPUT_DIR}/{UUID}.face.json"
print(f"=== Face Landmark QC ===") print(f"=== Face Landmark QC ===")
print(f"UUID: {UUID}") print(f"UUID: {UUID}")
print(f"Schema: {SCHEMA}")
print(f"Face file: {FACE_PATH}")
print(f"Threshold: {THRESHOLD * 100:.0f}% points must be inside bbox") print(f"Threshold: {THRESHOLD * 100:.0f}% points must be inside bbox")
# Load face.json # Load face.json
@@ -29,8 +36,7 @@ total_faces = 0
faces_with_lm = 0 faces_with_lm = 0
good_faces = 0 good_faces = 0
bad_faces = 0 bad_faces = 0
bad_frame_ids = set() qc_results = [] # list of (frame, face_idx, qc_ok, x, y, w, h)
bad_face_details = []
# Build frame lookup for fast access # Build frame lookup for fast access
frame_map = {} frame_map = {}
@@ -42,13 +48,22 @@ for frame_num, frm in frame_map.items():
total_faces += 1 total_faces += 1
lm = face.get('landmarks') lm = face.get('landmarks')
if not lm: if not lm:
bbox = face.get('bbox', {})
qc_results.append((frame_num, fi, False, bbox.get('x'), bbox.get('y'),
bbox.get('width'), bbox.get('height')))
bad_faces += 1
continue continue
faces_with_lm += 1 faces_with_lm += 1
x, y, w, h = face['x'], face['y'], face['width'], face['height'] bbox = face.get('bbox', {})
x, y, w, h = bbox.get('x'), bbox.get('y'), bbox.get('width'), bbox.get('height')
if None in (x, y, w, h):
qc_results.append((frame_num, fi, False, x, y, w, h))
bad_faces += 1
continue
inside_pts = 0 inside_pts = 0
total_pts = 0 total_pts = 0
eye_nose_inside = 0 # at least one point from each eye+nose inside eye_nose_inside = 0
for lm_type in ['left_eye', 'right_eye', 'nose']: for lm_type in ['left_eye', 'right_eye', 'nose']:
points = lm.get(lm_type, []) points = lm.get(lm_type, [])
@@ -63,53 +78,39 @@ for frame_num, frm in frame_map.items():
eye_nose_inside += 1 eye_nose_inside += 1
ratio = inside_pts / max(1, total_pts) ratio = inside_pts / max(1, total_pts)
qc_ok = (ratio >= THRESHOLD and eye_nose_inside >= 2)
if ratio >= THRESHOLD and eye_nose_inside >= 2: qc_results.append((frame_num, fi, qc_ok, x, y, w, h))
if qc_ok:
good_faces += 1 good_faces += 1
else: else:
bad_faces += 1 bad_faces += 1
bad_frame_ids.add(frame_num)
bad_face_details.append({
'frame': frame_num,
'face_idx': fi,
'bbox': [x, y, w, h],
'inside_pts': inside_pts,
'total_pts': total_pts,
'ratio': ratio,
'eye_nose_ok': eye_nose_inside,
})
print(f"\nTotal faces: {total_faces:,}") print(f"\nTotal faces: {total_faces:,}")
print(f"Faces with landmarks: {faces_with_lm:,}") print(f"Faces with landmarks: {faces_with_lm:,}")
print(f"✅ Good (≥{THRESHOLD*100:.0f}% inside + ≥2 features): {good_faces:,}") print(f"✅ Good (≥{THRESHOLD*100:.0f}% inside + ≥2 features): {good_faces:,}")
print(f"❌ Bad: {bad_faces:,}") print(f"❌ Bad (no eyes or insufficient landmarks): {bad_faces:,}")
print(f"Quality pass rate: {100 * good_faces / max(1, faces_with_lm):.1f}%") print(f"Quality pass rate: {100 * good_faces / max(1, faces_with_lm):.1f}%")
print(f"\nBad faces in {len(bad_frame_ids)} unique frames") # Apply mode: write qc_ok to face_detections.metadata
if args.apply:
# Show sample bad faces print(f"\n=== Applying QC results to {SCHEMA}.face_detections ===")
print(f"\nSample bad faces:") db_url = os.environ.get("DATABASE_URL", "postgres://accusys@localhost:5432/momentry")
for bf in sorted(bad_face_details, key=lambda b: b['ratio'])[:5]: conn = psycopg2.connect(db_url)
print(f" frame={bf['frame']}, bbox={bf['bbox']}, {bf['inside_pts']}/{bf['total_pts']} inside ({bf['ratio']*100:.0f}%), eye/nose={bf['eye_nose_ok']}/3") cur = conn.cursor()
updated = 0
# Show sample good faces for frame_num, fi, qc_ok, x, y, w, h in qc_results:
print(f"\nSample good faces:") qc_str = "true" if qc_ok else "false"
good_details = [] cur.execute(
for frame_num, frm in frame_map.items(): f"UPDATE {SCHEMA}.face_detections "
for face in frm.get('faces', []): f"SET metadata = jsonb_set(COALESCE(metadata, '{{}}'::jsonb), '{{qc_ok}}', '\"{qc_str}\"'::jsonb) "
lm = face.get('landmarks') f"WHERE file_uuid = %s AND frame_number = %s AND x = %s AND y = %s AND width = %s AND height = %s",
if not lm: (UUID, frame_num, x, y, w, h)
continue )
x, y, w, h = face['x'], face['y'], face['width'], face['height'] if cur.rowcount > 0:
inside = sum(1 for pts in lm.values() for pt in pts updated += 1
if (x <= pt[0] <= x + w) and (y <= pt[1] <= y + h)) conn.commit()
total = sum(len(pts) for pts in lm.values()) cur.close()
if inside / max(1, total) >= THRESHOLD: conn.close()
good_details.append((frame_num, x, y, w, h, inside, total)) print(f"Updated {updated} rows in {SCHEMA}.face_detections")
if len(good_details) >= 5: print(f"Skipped {len(qc_results) - updated} rows (no matching face_detections row)")
break
if len(good_details) >= 5:
break
for g in good_details:
print(f" frame={g[0]}, bbox=[{g[1]},{g[2]},{g[3]},{g[4]}], {g[5]}/{g[6]} inside ({100*g[5]/max(1,g[6]):.0f}%)")

View File

@@ -13,6 +13,7 @@ Detection cost: near-zero CPU (Vision ANE)
Embedding cost: near-zero CPU (CoreML ANE) Embedding cost: near-zero CPU (CoreML ANE)
""" """
import re
import sys import sys
import os import os
import json import json
@@ -29,6 +30,7 @@ from pathlib import Path
import coremltools as ct import coremltools as ct
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from redis_publisher import RedisPublisher
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
SWIFT_BIN = os.path.join(SCRIPT_DIR, "swift_processors", ".build", "debug", "swift_face") SWIFT_BIN = os.path.join(SCRIPT_DIR, "swift_processors", ".build", "debug", "swift_face")
@@ -49,11 +51,12 @@ def classify_pose(roll: float, yaw: float) -> str:
class FaceProcessorVision: class FaceProcessorVision:
def __init__(self, video_path: str, output_path: str, uuid: str = "", def __init__(self, video_path: str, output_path: str, uuid: str = "",
sample_interval: int = 3): sample_interval: int = 3, publisher: RedisPublisher = None):
self.video_path = video_path self.video_path = video_path
self.output_path = output_path self.output_path = output_path
self.uuid = uuid self.uuid = uuid
self.sample_interval = sample_interval self.sample_interval = sample_interval
self.publisher = publisher
# Load CoreML FaceNet # Load CoreML FaceNet
self.coreml_model = None self.coreml_model = None
@@ -127,7 +130,33 @@ class FaceProcessorVision:
print(f"[FACE_V2] Running: {' '.join(cmd)}") print(f"[FACE_V2] Running: {' '.join(cmd)}")
t0 = time.time() t0 = time.time()
subprocess.run(cmd, check=True) log_path = swift_out + ".log"
log_f = open(log_path, "w")
proc = subprocess.Popen(cmd, stdout=log_f, stderr=subprocess.STDOUT, text=True)
last_pct = -1
while proc.poll() is None:
time.sleep(10)
# Read latest log lines
try:
with open(log_path) as lf:
for line in lf:
line = line.strip()
m = re.search(r'(\d+)% complete', line)
if m:
pct = int(m.group(1))
if pct > last_pct:
last_pct = pct
if self.publisher:
self.publisher.progress("face", pct, 100, f"swift detect {pct}%")
except Exception:
pass
log_f.close()
if proc.returncode != 0:
stderr_out = proc.stderr.read()
if stderr_out:
print(stderr_out.strip(), file=sys.stderr)
raise RuntimeError(f"swift_face exited with code {proc.returncode}")
elapsed = time.time() - t0 elapsed = time.time() - t0
print(f"[FACE_V2] Detection done in {elapsed:.1f}s") print(f"[FACE_V2] Detection done in {elapsed:.1f}s")
@@ -156,6 +185,8 @@ class FaceProcessorVision:
t0 = time.time() t0 = time.time()
embed_count = 0 embed_count = 0
total_face_count = 0
last_pct = -1
for frame_info in frames: for frame_info in frames:
frame_num = frame_info["frame"] frame_num = frame_info["frame"]
@@ -220,6 +251,12 @@ class FaceProcessorVision:
if len(face_data["frames"]) % 100 == 0: if len(face_data["frames"]) % 100 == 0:
elapsed = time.time() - t0 elapsed = time.time() - t0
print(f"[FACE_V2] {len(face_data['frames'])} frames, {embed_count} embeddings, {elapsed:.0f}s") print(f"[FACE_V2] {len(face_data['frames'])} frames, {embed_count} embeddings, {elapsed:.0f}s")
if self.publisher:
pct = int(len(face_data["frames"]) * 100 / max(len(frames), 1))
if pct > last_pct:
last_pct = pct
self.publisher.progress("face", len(face_data["frames"]), len(frames),
f"{embed_count} faces", embed_count, "faces")
self.video.release() self.video.release()
@@ -259,19 +296,36 @@ def main():
parser.add_argument("--force", action="store_true") parser.add_argument("--force", action="store_true")
args = parser.parse_args() args = parser.parse_args()
publisher = RedisPublisher(args.uuid) if args.uuid else None
if publisher:
publisher.info("face", "FACE_START")
if args.force and os.path.exists(args.output_path): if args.force and os.path.exists(args.output_path):
os.remove(args.output_path) os.remove(args.output_path)
processor = FaceProcessorVision( processor = FaceProcessorVision(
args.video_path, args.output_path, args.video_path, args.output_path,
args.uuid, args.sample_interval args.uuid, args.sample_interval, publisher
) )
# Step 1: Vision detection (bbox + pose via ANE) # Step 1: Vision detection (bbox + pose via ANE)
detection = processor.process_with_swift() try:
detection = processor.process_with_swift()
except Exception as e:
if publisher:
publisher.error("face", f"Detection failed: {e}")
raise
# Step 2: CoreML embedding + save # Step 2: CoreML embedding + save
processor.embed_and_save(detection) try:
processor.embed_and_save(detection)
except Exception as e:
if publisher:
publisher.error("face", f"Embedding failed: {e}")
raise
if publisher:
publisher.complete("face", f"{len(detection.get('frames',[]))} frames")
# Clean up temp detection file # Clean up temp detection file
swift_out = args.output_path.replace(".json", "_detect.json") swift_out = args.output_path.replace(".json", "_detect.json")

View File

@@ -81,10 +81,10 @@ for cluster_id in sorted(set(labels)):
VALUES (%s, 'face', 'auto', 'active', NOW(), %s) VALUES (%s, 'face', 'auto', 'active', NOW(), %s)
ON CONFLICT (name) DO UPDATE SET status = 'active', file_uuid = COALESCE(dev.identities.file_uuid, %s) ON CONFLICT (name) DO UPDATE SET status = 'active', file_uuid = COALESCE(dev.identities.file_uuid, %s)
RETURNING id RETURNING id
""", (f"PERSON_{UUID[:8]}_{cluster_id}", UUID, UUID)) """, (f"stranger_{UUID}_{cluster_id}", UUID, UUID))
identity_id = cur.fetchone()[0] identity_id = cur.fetchone()[0]
cluster_to_identity[cluster_id] = identity_id cluster_to_identity[cluster_id] = identity_id
print(f" Cluster {cluster_id}: new identity {identity_id} (PERSON_{cluster_id})") print(f" Cluster {cluster_id}: new identity {identity_id} (stranger_{UUID}_{cluster_id})")
# Step 4: Create identity bindings # Step 4: Create identity bindings
print("Creating identity bindings...") print("Creating identity bindings...")

View File

@@ -0,0 +1,131 @@
#!/opt/homebrew/bin/python3.11
"""
Migrate Identity Files — one-time: DB identities → filesystem identity.json
Reads all identities from PostgreSQL, queries file bindings,
and writes identity.json + _index.json to {OUTPUT_DIR}/identities/{uuid}/
Usage:
python3 scripts/migrate_identity_files.py
python3 scripts/migrate_identity_files.py --db "dbname=momentry user=accusys"
python3 scripts/migrate_identity_files.py --output /path/to/output
"""
import argparse
import json
import os
from datetime import datetime, timezone
from pathlib import Path
import psycopg2
import psycopg2.extras
def main():
parser = argparse.ArgumentParser(description="Migrate identities to filesystem")
parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost"))
parser.add_argument("--output", default=os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output"))
args = parser.parse_args()
conn = psycopg2.connect(args.db)
identities_root = Path(args.output) / "identities"
identities_root.mkdir(parents=True, exist_ok=True)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
cur.execute("""
SELECT id, uuid::text, name, identity_type, source, status,
tmdb_id, tmdb_profile, metadata::text, created_at, updated_at
FROM identities
WHERE uuid IS NOT NULL
ORDER BY id
""")
rows = cur.fetchall()
if not rows:
print("No identities found in DB.")
return
index = {}
migrated = 0
skipped = 0
for row in rows:
uuid_raw = row["uuid"]
uuid_clean = uuid_raw.replace("-", "")
name = row["name"] or ""
dir_path = identities_root / uuid_clean
dir_path.mkdir(parents=True, exist_ok=True)
# Get bindings for this identity from face_detections
bindings_cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
bindings_cur.execute("""
SELECT fd.file_uuid,
COALESCE(array_agg(DISTINCT fd.trace_id) FILTER (WHERE fd.trace_id IS NOT NULL), '{}') AS trace_ids,
COUNT(*)::bigint AS face_count
FROM face_detections fd
WHERE fd.identity_id = %s
GROUP BY fd.file_uuid
ORDER BY fd.file_uuid
""", (row["id"],))
binding_rows = bindings_cur.fetchall()
bindings_cur.close()
file_bindings = []
for b in binding_rows:
trace_ids = b["trace_ids"]
if isinstance(trace_ids, list):
trace_ids = [int(t) for t in trace_ids if t is not None]
file_bindings.append({
"file_uuid": b["file_uuid"],
"trace_ids": trace_ids,
"face_count": int(b["face_count"]),
})
metadata = row.get("metadata")
if isinstance(metadata, str):
metadata = json.loads(metadata) if metadata else {}
elif metadata is None:
metadata = {}
fmt_time = lambda v: v.isoformat() if v else datetime.now(timezone.utc).isoformat()
identity_file = {
"version": 1,
"identity_uuid": uuid_clean,
"name": name,
"identity_type": row.get("identity_type"),
"source": row.get("source"),
"status": row.get("status"),
"tmdb_id": row.get("tmdb_id"),
"tmdb_profile": row.get("tmdb_profile"),
"metadata": metadata,
"file_bindings": file_bindings,
"created_at": fmt_time(row.get("created_at")),
"updated_at": fmt_time(row.get("updated_at")),
}
with open(dir_path / "identity.json", "w", encoding="utf-8") as f:
json.dump(identity_file, f, indent=2, ensure_ascii=False)
index[uuid_clean] = name
migrated += 1
print(f" [{migrated:5d}] {name} ({uuid_clean})")
cur.close()
conn.close()
# Write _index.json
index_file = {
"version": 1,
"updated_at": datetime.now(timezone.utc).isoformat(),
"entries": index,
}
with open(identities_root / "_index.json", "w", encoding="utf-8") as f:
json.dump(index_file, f, indent=2, ensure_ascii=False)
print(f"\nDone: {migrated} identities migrated")
print(f"Index: {identities_root / '_index.json'} ({len(index)} entries)")
if __name__ == "__main__":
main()

View File

@@ -4,6 +4,7 @@ OCR Processor Wrapper
Calls Swift Vision Framework OCR (swift_ocr) with fallback to PaddleOCR. Calls Swift Vision Framework OCR (swift_ocr) with fallback to PaddleOCR.
""" """
import re
import sys import sys
import json import json
import os import os
@@ -11,6 +12,10 @@ import subprocess
import argparse import argparse
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from redis_publisher import RedisPublisher
SWIFT_OCR_PATH = os.path.join( SWIFT_OCR_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)), os.path.dirname(os.path.abspath(__file__)),
"swift_processors/.build/debug/swift_ocr" "swift_processors/.build/debug/swift_ocr"
@@ -19,6 +24,7 @@ SWIFT_OCR_ALT = os.path.join(
os.path.dirname(os.path.abspath(__file__)), os.path.dirname(os.path.abspath(__file__)),
"swift_processors/.build/arm64-apple-macosx/debug/swift_ocr" "swift_processors/.build/arm64-apple-macosx/debug/swift_ocr"
) )
SWIFT_PROGRESS_RE = re.compile(r"\[SwiftOCR\] Progress:\s*(\d+)%")
def process_ocr( def process_ocr(
@@ -27,6 +33,7 @@ def process_ocr(
uuid: str = "", uuid: str = "",
sample_interval: int = 30, sample_interval: int = 30,
recognition_level: str = "accurate", recognition_level: str = "accurate",
publisher: RedisPublisher = None,
) -> dict: ) -> dict:
swift_bin = SWIFT_OCR_PATH swift_bin = SWIFT_OCR_PATH
if not os.path.exists(swift_bin): if not os.path.exists(swift_bin):
@@ -42,15 +49,34 @@ def process_ocr(
"--uuid", uuid] "--uuid", uuid]
print(f"[OCR] Running Swift OCR", file=sys.stderr) print(f"[OCR] Running Swift OCR", file=sys.stderr)
result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.stdout: last_pct = -1
print(result.stdout.strip(), file=sys.stderr) stdout_lines = []
if result.stderr: for line in proc.stdout:
print(result.stderr.strip(), file=sys.stderr) line = line.strip()
stdout_lines.append(line)
m = SWIFT_PROGRESS_RE.search(line)
if m:
pct = int(m.group(1))
if pct > last_pct:
last_pct = pct
print(f"[OCR] Progress: {pct}%", file=sys.stderr)
if publisher:
publisher.progress("ocr", pct, 100, f"{pct}%")
elif line:
print(line, file=sys.stderr)
if result.returncode != 0 or not os.path.exists(output_path): stderr_output = proc.stderr.read()
print(f"[OCR] Swift OCR failed, falling back to PaddleOCR", file=sys.stderr) if stderr_output:
print(stderr_output.strip(), file=sys.stderr)
proc.wait()
if proc.returncode != 0 or not os.path.exists(output_path):
print(f"[OCR] Swift OCR failed (exit={proc.returncode}), falling back to PaddleOCR", file=sys.stderr)
if publisher:
publisher.error("ocr", f"Swift OCR failed, using fallback")
return _fallback(video_path, output_path, uuid, sample_interval) return _fallback(video_path, output_path, uuid, sample_interval)
with open(output_path) as f: with open(output_path) as f:
@@ -81,9 +107,16 @@ if __name__ == "__main__":
parser.add_argument("--recognition-level", choices=["fast", "accurate"], default="accurate") parser.add_argument("--recognition-level", choices=["fast", "accurate"], default="accurate")
args = parser.parse_args() args = parser.parse_args()
publisher = RedisPublisher(args.uuid) if args.uuid else None
if publisher:
publisher.info("ocr", "OCR_START")
result = process_ocr(args.video_path, args.output_path, args.uuid, result = process_ocr(args.video_path, args.output_path, args.uuid,
args.sample_interval, args.recognition_level) args.sample_interval, args.recognition_level,
publisher)
with open(args.output_path, "w") as f: with open(args.output_path, "w") as f:
json.dump(result, f, indent=2) json.dump(result, f, indent=2)
print(f"OCR: {len(result.get('frames', []))} frames with text") print(f"OCR: {len(result.get('frames', []))} frames with text")
if publisher:
publisher.complete("ocr", f"{len(result.get('frames',[]))} frames")

View File

@@ -28,7 +28,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
DB_URL = os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry") DB_URL = os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
SCHEMA = os.getenv("DATABASE_SCHEMA", "dev") SCHEMA = os.getenv("DATABASE_SCHEMA", "dev")
OUTPUT_DIR = os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev") OUTPUT_DIR = os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
OLLAMA_URL = "http://localhost:11434/api" EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://localhost:11436/v1/embeddings")
def load_speaker_map(file_uuid: str) -> dict: def load_speaker_map(file_uuid: str) -> dict:
"""Load speaker→identity mapping from DB (generalized, not hardcoded)""" """Load speaker→identity mapping from DB (generalized, not hardcoded)"""
@@ -64,7 +64,7 @@ CURRENT_VERSIONS = {
"embedding_agent": "nomic-embed-768d/v1", "embedding_agent": "nomic-embed-768d/v1",
} }
LLM_URL = os.getenv("MOMENTRY_LLM_SUMMARY_URL", "http://127.0.0.1:8081/v1/chat/completions") LLM_URL = os.getenv("MOMENTRY_LLM_URL", os.getenv("MOMENTRY_LLM_SUMMARY_URL", "http://127.0.0.1:8082/v1/chat/completions"))
LLM_MODEL = os.getenv("MOMENTRY_LLM_SUMMARY_MODEL", "gemma4") LLM_MODEL = os.getenv("MOMENTRY_LLM_SUMMARY_MODEL", "gemma4")
@@ -97,7 +97,7 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
s, e = cs["start_time"], cs["end_time"] s, e = cs["start_time"], cs["end_time"]
children = [] children = []
for seg in asr_segs: for seg_idx, seg in enumerate(asr_segs):
st, en = seg.get("start", 0), seg.get("end", 0) st, en = seg.get("start", 0), seg.get("end", 0)
text = seg.get("text", "").strip() text = seg.get("text", "").strip()
if st < s or en > e or not text: continue if st < s or en > e or not text: continue
@@ -117,11 +117,11 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
"start": st, "end": en, "text": text, "start": st, "end": en, "text": text,
"speaker_id": spk_id, "speaker_name": character, "speaker_id": spk_id, "speaker_name": character,
"speaker_confidence": spk_conf, "speaker_confidence": spk_conf,
"chunk_id": f"{file_uuid}_{st:.0f}_{en:.0f}", "chunk_id": f"{file_uuid}_{seg_idx}",
}) })
# Boundary overlap: even empty scenes get partial children # Boundary overlap: even empty scenes get partial children
for seg in asr_segs: for seg_idx, seg in enumerate(asr_segs):
st, en = seg.get("start", 0), seg.get("end", 0) st, en = seg.get("start", 0), seg.get("end", 0)
text = seg.get("text", "").strip() text = seg.get("text", "").strip()
if not text: continue if not text: continue
@@ -141,7 +141,7 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
"start": st, "end": en, "text": text, "start": st, "end": en, "text": text,
"speaker_id": spk_id, "speaker_name": character, "speaker_id": spk_id, "speaker_name": character,
"speaker_confidence": spk_conf, "speaker_confidence": spk_conf,
"chunk_id": f"{file_uuid}_{st:.0f}_{en:.0f}", "chunk_id": f"{file_uuid}_{seg_idx}",
"overlap_type": "partial", "overlap_type": "partial",
}) })
@@ -215,14 +215,17 @@ def generate_llm_child_summary(child: dict, parent_summary: str) -> Optional[str
# ===== Embedding (Ollama nomic-embed) ===== # ===== Embedding (Ollama nomic-embed) =====
def embed_text(text: str, max_retries: int = 3) -> Optional[List[float]]: def embed_text(text: str, max_retries: int = 3) -> Optional[List[float]]:
"""Get embedding via Ollama nomic-embed-text""" """Get embedding via EmbeddingGemma server"""
for attempt in range(max_retries): for attempt in range(max_retries):
try: try:
resp = requests.post(f"{OLLAMA_URL}/embeddings", json={ resp = requests.post(EMBEDDING_URL, json={
"model": "nomic-embed-text-v2-moe", "prompt": text, "input": [text],
}, timeout=30) }, timeout=30)
if resp.status_code == 200: if resp.status_code == 200:
return resp.json()["embedding"] data = resp.json()
items = data.get("data", [])
if items:
return items[0]["embedding"]
except Exception as e: except Exception as e:
if attempt == max_retries - 1: if attempt == max_retries - 1:
print(f" ⚠️ Embedding failed: {e}") print(f" ⚠️ Embedding failed: {e}")
@@ -244,7 +247,7 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool,
# Get base chunk_index # Get base chunk_index
cur.execute( cur.execute(
f"SELECT COALESCE(MAX(chunk_index), 0) FROM {SCHEMA}.chunks WHERE file_uuid = %s", f"SELECT COALESCE(MAX(chunk_index), 0) FROM {SCHEMA}.chunk WHERE file_uuid = %s",
(file_uuid,), (file_uuid,),
) )
next_index = (cur.fetchone()[0] or 0) + 1 next_index = (cur.fetchone()[0] or 0) + 1
@@ -255,20 +258,38 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool,
parent_id = f"{mode}_parent_{file_uuid}_{scene['start_time']:.0f}_{scene['end_time']:.0f}" parent_id = f"{mode}_parent_{file_uuid}_{scene['start_time']:.0f}_{scene['end_time']:.0f}"
cur.execute( parent_embedding = embed_text(parent_text) if do_embed else None
f""" if do_embed and parent_embedding:
INSERT INTO {SCHEMA}.chunks (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index, cur.execute(
start_time, end_time, content, text_content, parent_chunk_id) f"""
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s) INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE start_time, end_time, content, text_content, parent_chunk_id, embedding)
SET content = EXCLUDED.content, text_content = EXCLUDED.text_content VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector)
""", ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
(parent_id, parent_id, file_uuid, parent_type, next_index, SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
scene["start_time"], scene["end_time"], embedding = EXCLUDED.embedding
json.dumps({"summary": parent_text, "mode": mode, "type": "parent", """,
"source_versions": CURRENT_VERSIONS}), (parent_id, parent_id, file_uuid, parent_type, next_index,
parent_text, None), scene["start_time"], scene["end_time"],
) json.dumps({"summary": parent_text, "mode": mode, "type": "parent",
"source_versions": CURRENT_VERSIONS}),
parent_text, None, parent_embedding),
)
else:
cur.execute(
f"""
INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
start_time, end_time, content, text_content, parent_chunk_id)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s)
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
SET content = EXCLUDED.content, text_content = EXCLUDED.text_content
""",
(parent_id, parent_id, file_uuid, parent_type, next_index,
scene["start_time"], scene["end_time"],
json.dumps({"summary": parent_text, "mode": mode, "type": "parent",
"source_versions": CURRENT_VERSIONS}),
parent_text, None),
)
next_index += 1 next_index += 1
parent_count += 1 parent_count += 1
@@ -276,22 +297,42 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool,
child_id = child["chunk_id"] child_id = child["chunk_id"]
child_text = generate_story_child_summary(child, parent_text) if mode == "story" else generate_llm_child_summary(child, parent_text) child_text = generate_story_child_summary(child, parent_text) if mode == "story" else generate_llm_child_summary(child, parent_text)
cur.execute( child_embedding = embed_text(child_text) if do_embed else None
f""" if do_embed and child_embedding:
INSERT INTO {SCHEMA}.chunks (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index, cur.execute(
start_time, end_time, content, text_content, parent_chunk_id) f"""
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s) INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE start_time, end_time, content, text_content, parent_chunk_id, embedding)
SET content = EXCLUDED.content, text_content = EXCLUDED.text_content, VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector)
parent_chunk_id = EXCLUDED.parent_chunk_id ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
""", SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
(child_id, child_id, file_uuid, child_type, next_index, parent_chunk_id = EXCLUDED.parent_chunk_id,
child["start"], child["end"], embedding = EXCLUDED.embedding
json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode, """,
"speaker_confidence": child.get("speaker_confidence", 0), (child_id, child_id, file_uuid, child_type, next_index,
"source_versions": CURRENT_VERSIONS}), child["start"], child["end"],
child_text, parent_id), json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode,
) "speaker_confidence": child.get("speaker_confidence", 0),
"source_versions": CURRENT_VERSIONS}),
child_text, parent_id, child_embedding),
)
else:
cur.execute(
f"""
INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
start_time, end_time, content, text_content, parent_chunk_id)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s)
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
parent_chunk_id = EXCLUDED.parent_chunk_id
""",
(child_id, child_id, file_uuid, child_type, next_index,
child["start"], child["end"],
json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode,
"speaker_confidence": child.get("speaker_confidence", 0),
"source_versions": CURRENT_VERSIONS}),
child_text, parent_id),
)
next_index += 1 next_index += 1
child_count += 1 child_count += 1
@@ -304,7 +345,7 @@ def main():
parser = argparse.ArgumentParser(description="Story Processor V2.0") parser = argparse.ArgumentParser(description="Story Processor V2.0")
parser.add_argument("--file-uuid", required=True) parser.add_argument("--file-uuid", required=True)
parser.add_argument("--mode", choices=["story", "llm"], default="story") parser.add_argument("--mode", choices=["story", "llm"], default="story")
parser.add_argument("--max-scenes", type=int, default=300) parser.add_argument("--max-scenes", type=int, default=99999)
parser.add_argument("--embed", action="store_true", help="Generate embeddings (Ollama)") parser.add_argument("--embed", action="store_true", help="Generate embeddings (Ollama)")
parser.add_argument("--no-db", action="store_true", help="Skip DB storage") parser.add_argument("--no-db", action="store_true", help="Skip DB storage")
args = parser.parse_args() args = parser.parse_args()

View File

@@ -5,12 +5,16 @@ Calls Swift Vision Framework pose (swift_pose) with fallback to YOLOv8 Pose.
Uses VNDetectHumanBodyPoseRequest with ANE acceleration. Uses VNDetectHumanBodyPoseRequest with ANE acceleration.
""" """
import re
import sys import sys
import json import json
import os import os
import subprocess import subprocess
import argparse import argparse
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from redis_publisher import RedisPublisher
SWIFT_POSE_PATH = os.path.join( SWIFT_POSE_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)), os.path.dirname(os.path.abspath(__file__)),
"swift_processors/.build/debug/swift_pose" "swift_processors/.build/debug/swift_pose"
@@ -21,11 +25,14 @@ SWIFT_POSE_ALT = os.path.join(
) )
SWIFT_POSE_PROGRESS_RE = re.compile(r"\[SwiftPose\] Progress:\s*(\d+)%")
def process_pose( def process_pose(
video_path: str, video_path: str,
output_path: str, output_path: str,
uuid: str = "", uuid: str = "",
sample_interval: int = 30, sample_interval: int = 30,
publisher: RedisPublisher = None,
) -> dict: ) -> dict:
swift_bin = SWIFT_POSE_PATH swift_bin = SWIFT_POSE_PATH
if not os.path.exists(swift_bin): if not os.path.exists(swift_bin):
@@ -33,6 +40,8 @@ def process_pose(
if not os.path.exists(swift_bin): if not os.path.exists(swift_bin):
print("[Pose] Swift binary not found, using YOLOv8 fallback", file=sys.stderr) print("[Pose] Swift binary not found, using YOLOv8 fallback", file=sys.stderr)
if publisher:
publisher.error("pose", "Swift binary not found, using fallback")
return _fallback(video_path, output_path, uuid, sample_interval) return _fallback(video_path, output_path, uuid, sample_interval)
cmd = [swift_bin, video_path, output_path, cmd = [swift_bin, video_path, output_path,
@@ -40,17 +49,32 @@ def process_pose(
"--uuid", uuid] "--uuid", uuid]
print(f"[Pose] Running Swift Pose (Vision Framework)", file=sys.stderr) print(f"[Pose] Running Swift Pose (Vision Framework)", file=sys.stderr)
result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.stdout: last_pct = -1
for line in result.stdout.strip().split("\n"): for line in proc.stdout:
print(f" {line}", file=sys.stderr) line = line.strip()
if result.stderr: m = SWIFT_POSE_PROGRESS_RE.search(line)
for line in result.stderr.strip().split("\n"): if m:
pct = int(m.group(1))
if pct > last_pct:
last_pct = pct
print(f"[Pose] Progress: {pct}%", file=sys.stderr)
if publisher:
publisher.progress("pose", pct, 100, f"{pct}%")
elif line:
print(f" {line}", file=sys.stderr) print(f" {line}", file=sys.stderr)
if result.returncode != 0 or not os.path.exists(output_path): stderr_output = proc.stderr.read()
print(f"[Pose] Swift Pose failed, falling back to YOLOv8", file=sys.stderr) if stderr_output:
print(stderr_output.strip(), file=sys.stderr)
proc.wait()
if proc.returncode != 0 or not os.path.exists(output_path):
print(f"[Pose] Swift Pose failed (exit={proc.returncode}), falling back to YOLOv8", file=sys.stderr)
if publisher:
publisher.error("pose", f"Swift Pose failed, using fallback")
return _fallback(video_path, output_path, uuid, sample_interval) return _fallback(video_path, output_path, uuid, sample_interval)
with open(output_path) as f: with open(output_path) as f:
@@ -113,7 +137,14 @@ if __name__ == "__main__":
parser.add_argument("--sample-interval", type=int, default=30) parser.add_argument("--sample-interval", type=int, default=30)
args = parser.parse_args() args = parser.parse_args()
result = process_pose(args.video_path, args.output_path, args.uuid, args.sample_interval) publisher = RedisPublisher(args.uuid) if args.uuid else None
if publisher:
publisher.info("pose", "POSE_START")
result = process_pose(args.video_path, args.output_path, args.uuid,
args.sample_interval, publisher)
with open(args.output_path, "w") as f: with open(args.output_path, "w") as f:
json.dump(result, f, indent=2) json.dump(result, f, indent=2)
print(f"Pose: {len(result.get('frames', []))} frames with poses") print(f"Pose: {len(result.get('frames', []))} frames with poses")
if publisher:
publisher.complete("pose", f"{len(result.get('frames',[]))} frames")

View File

@@ -34,6 +34,8 @@ class ProgressData:
message: Optional[str] = None message: Optional[str] = None
current: Optional[int] = None current: Optional[int] = None
total: Optional[int] = None total: Optional[int] = None
output_count: Optional[int] = None
output_type: Optional[str] = None
extra: Optional[Dict[str, Any]] = None extra: Optional[Dict[str, Any]] = None
@@ -49,7 +51,8 @@ class StructuredMessage:
class RedisPublisher: class RedisPublisher:
def __init__(self, uuid: str): def __init__(self, uuid: str):
self.uuid = uuid self.uuid = uuid
self.channel = f"momentry:progress:{uuid}" prefix = os.environ.get("MOMENTRY_REDIS_PREFIX", "momentry:")
self.channel = f"{prefix}progress:{uuid}"
self._enabled = False self._enabled = False
self._client = None self._client = None
self._connect() self._connect()
@@ -107,6 +110,8 @@ class RedisPublisher:
message: Optional[str] = None, message: Optional[str] = None,
current: Optional[int] = None, current: Optional[int] = None,
total: Optional[int] = None, total: Optional[int] = None,
output_count: Optional[int] = None,
output_type: Optional[str] = None,
extra: Optional[Dict[str, Any]] = None, extra: Optional[Dict[str, Any]] = None,
) -> bool: ) -> bool:
if not self._enabled: if not self._enabled:
@@ -121,6 +126,8 @@ class RedisPublisher:
message=message, message=message,
current=current, current=current,
total=total, total=total,
output_count=output_count,
output_type=output_type,
extra=extra, extra=extra,
), ),
) )
@@ -136,6 +143,8 @@ class RedisPublisher:
current: int, current: int,
total: int, total: int,
message: str = "", message: str = "",
output_count: Optional[int] = None,
output_type: Optional[str] = None,
) -> bool: ) -> bool:
return self.publish( return self.publish(
MessageType.PROGRESS, MessageType.PROGRESS,
@@ -143,6 +152,8 @@ class RedisPublisher:
message=message, message=message,
current=current, current=current,
total=total, total=total,
output_count=output_count,
output_type=output_type,
) )
def complete(self, processor: str, message: str = "") -> bool: def complete(self, processor: str, message: str = "") -> bool:

View File

@@ -0,0 +1,117 @@
#!/opt/homebrew/bin/python3.11
"""
Sync users from SFTPGo to Momentry users table.
Usage:
python3 scripts/sync_users_from_sftpgo.py
python3 scripts/sync_users_from_sftpgo.py --sftpgo-url http://localhost:8080
python3 scripts/sync_users_from_sftpgo.py --db "dbname=momentry user=accusys"
Environment:
SFTPGO_BASE_URL Default: http://localhost:8080
DATABASE_URL Default: dbname=momentry user=accusys host=localhost
This script does NOT copy passwords. It creates user records with placeholder
password hashes. The real password will be captured on the user's first
login through Momentry (which verifies against SFTPGo and caches the hash).
"""
import argparse
import json
import os
import sys
from typing import Any
import psycopg2
import psycopg2.extras
import requests
def get_sftpgo_users(sftpgo_url: str, admin_user: str, admin_pass: str) -> list[dict[str, Any]]:
"""Get all users from SFTPGo."""
# Get admin token (SFTPGo uses GET, not POST)
token_url = f"{sftpgo_url}/api/v2/token"
resp = requests.get(token_url, auth=(admin_user, admin_pass), timeout=10)
resp.raise_for_status()
token = resp.json().get("access_token")
if not token:
print("ERROR: Failed to get SFTPGo admin token", file=sys.stderr)
sys.exit(1)
# List users
users_url = f"{sftpgo_url}/api/v2/users"
headers = {"Authorization": f"Bearer {token}"}
resp = requests.get(users_url, headers=headers, timeout=10)
resp.raise_for_status()
return resp.json()
def main():
parser = argparse.ArgumentParser(description="Sync SFTPGo users to Momentry")
parser.add_argument("--sftpgo-url", default=os.getenv("SFTPGO_BASE_URL", "http://localhost:8080"))
parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost"))
parser.add_argument("--admin-user", default="admin")
parser.add_argument("--admin-pass", default=os.getenv("SFTPGO_ADMIN_PASSWORD", "Test3200Test3200"))
parser.add_argument("--dry-run", action="store_true", help="Print what would be done without executing")
args = parser.parse_args()
# Fetch users from SFTPGo
print(f"[SFTPGo] Connecting to {args.sftpgo_url}...")
try:
sftpgo_users = get_sftpgo_users(args.sftpgo_url, args.admin_user, args.admin_pass)
except Exception as e:
print(f"ERROR: Failed to fetch SFTPGo users: {e}", file=sys.stderr)
sys.exit(1)
print(f"[SFTPGo] Found {len(sftpgo_users)} users")
# Connect to Momentry DB and set schema
conn = psycopg2.connect(args.db)
cur = conn.cursor()
cur.execute("SET search_path TO dev")
synced = 0
skipped = 0
for user in sftpgo_users:
username = user.get("username")
status = user.get("status", 0)
if not username or status != 1:
skipped += 1
continue
role = "admin" if username == "admin" else "user"
# Placeholder hash — will be updated on first login via SFTPGo fallback
placeholder_hash = "$placeholder$synced_from_sftpgo"
if args.dry_run:
print(f" Would insert: {username} (role={role})")
synced += 1
continue
try:
cur.execute(
"INSERT INTO users (username, password_hash, role) VALUES (%s, %s, %s) "
"ON CONFLICT (username) DO NOTHING",
(username, placeholder_hash, role),
)
if cur.rowcount > 0:
print(f"{username} (role={role})")
synced += 1
else:
print(f" ⏭️ {username} already exists, skipped")
skipped += 1
except Exception as e:
print(f"{username}: {e}", file=sys.stderr)
skipped += 1
conn.commit()
cur.close()
conn.close()
print(f"\nDone: {synced} synced, {skipped} skipped/errors")
print("Note: Password hashes are placeholders. First login via Momentry will cache the real hash.")
if __name__ == "__main__":
main()

285
scripts/tmdb_agent.py Normal file
View File

@@ -0,0 +1,285 @@
#!/opt/homebrew/bin/python3.11
"""
TMDb Agent — pre-fetch TMDb data and write directly to identity files.
Usage:
python3 scripts/tmdb_agent.py --file-uuid <uuid>
python3 scripts/tmdb_agent.py --file-uuid <uuid> --db "dbname=momentry user=accusys"
Environment:
TMDB_API_KEY Required. TMDb API key.
MOMENTRY_OUTPUT_DIR Default: /Users/accusys/momentry/output
DATABASE_URL Default: dbname=momentry user=accusys host=localhost
Flow:
1. Query videos table for file_name
2. Extract movie name from filename
3. TMDB /search/movie → find best match
4. TMDB /movie/{id}/credits → fetch cast
5. TMDB /person/{id} → fetch person details
6. Write {OUTPUT}/identities/{uuid}/identity.json + profile.jpg for each cast member
7. Write {OUTPUT}/{uuid}.tmdb.json cache (movie info + identity uuid list)
"""
import argparse
import hashlib
import json
import os
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
import requests
import psycopg2
import psycopg2.extras
TMDB_BASE = "https://api.themoviedb.org/3"
TMDB_API_KEY = os.getenv("TMDB_API_KEY")
def extract_movie_name(filename: str) -> str | None:
"""Extract movie name from filename (e.g. 'Charade_1963.mp4''Charade 1963')"""
name = Path(filename).stem
cleaned = re.sub(r'[._]', ' ', name).strip()
# Strip text after separators like |, (, [, {
for sep in ('|', '(', '[', '{', '\u2502'):
idx = cleaned.find(sep)
if idx > 0:
cleaned = cleaned[:idx].strip()
# Strip common suffixes (quality, format, source, etc.)
suffixes = (
r'\d{3,4}p', r'\d{3,4}x\d{3,4}', r'\d+fps', r'bluray', r'web[ -]?dl',
r'webrip', r'hdrip', r'dvdrip', r'dvd', r'brrip', r'hdtv', r'xvid',
r'x264', r'h264', r'x265', r'h265', r'hevc', r'aac', r'mp3', r'ac3',
r'dts', r'5\.1', r'7\.1', r'dual[ -]?audio', r'multi[ -]?sub',
r'proper', r'repack', r'extended', r'unrated', r'directors[ -]?cut',
r'theatrical', r'internal', r'limited', r'complete', r'full[ -]?movie',
r'english', r'french', r'spanish', r'german', r'chinese',
r'youtube', r'yify', r'ettv', r'rarbg', r'tgx', r'axxo', r'ctrlhd',
)
pattern = r'\b(?:' + '|'.join(suffixes) + r')\b'
cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE).strip()
# Collapse multiple spaces
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
return cleaned if len(cleaned) >= 3 else None
def search_movie(query: str) -> dict | None:
"""Search TMDB for a movie by name. Returns first result."""
url = f"{TMDB_BASE}/search/movie"
params = {"query": query, "api_key": TMDB_API_KEY, "language": "en-US", "page": 1}
try:
resp = requests.get(url, params=params, timeout=15)
resp.raise_for_status()
results = resp.json().get("results", [])
return results[0] if results else None
except Exception as e:
print(f"TMDB search failed: {e}", file=sys.stderr)
return None
def get_credits(movie_id: int) -> list[dict]:
"""Get cast credits for a movie from TMDB."""
url = f"{TMDB_BASE}/movie/{movie_id}/credits"
params = {"api_key": TMDB_API_KEY, "language": "en-US"}
try:
resp = requests.get(url, params=params, timeout=15)
resp.raise_for_status()
return resp.json().get("cast", [])
except Exception as e:
print(f"TMDB credits failed: {e}", file=sys.stderr)
return []
def get_person_details(person_id: int) -> dict:
"""Fetch person details from TMDB /person/{id}."""
url = f"{TMDB_BASE}/person/{person_id}"
params = {"api_key": TMDB_API_KEY, "language": "en-US"}
try:
resp = requests.get(url, params=params, timeout=15)
resp.raise_for_status()
data = resp.json()
return {
"biography": data.get("biography"),
"birthday": data.get("birthday"),
"place_of_birth": data.get("place_of_birth"),
"also_known_as": data.get("also_known_as", []),
"imdb_id": data.get("imdb_id"),
"known_for_department": data.get("known_for_department"),
"popularity": data.get("popularity"),
"deathday": data.get("deathday"),
"gender": data.get("gender"),
"homepage": data.get("homepage"),
}
except Exception as e:
print(f"TMDB person details failed for {person_id}: {e}", file=sys.stderr)
return {}
def main():
parser = argparse.ArgumentParser(description="TMDb Agent — pre-fetch cache")
parser.add_argument("--file-uuid", required=True, help="File UUID to enrich")
parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost"))
parser.add_argument("--output", default=os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output"))
args = parser.parse_args()
if not TMDB_API_KEY:
print("ERROR: TMDB_API_KEY not set.", file=sys.stderr)
sys.exit(1)
# 1. Query DB for file_name
schema = os.getenv("DATABASE_SCHEMA", "").strip()
table = f"{schema}.videos" if schema else "videos"
conn = psycopg2.connect(args.db)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
cur.execute(f"SELECT file_name FROM {table} WHERE file_uuid = %s", (args.file_uuid,))
row = cur.fetchone()
cur.close()
conn.close()
if not row:
print(f"ERROR: File not found: {args.file_uuid}", file=sys.stderr)
sys.exit(1)
file_name = row["file_name"]
print(f"[TKG-AGENT] File: {file_name} ({args.file_uuid})")
# 2. Extract movie name
movie_name = extract_movie_name(file_name)
if not movie_name:
print(f"ERROR: Cannot extract movie name from: {file_name}", file=sys.stderr)
sys.exit(1)
print(f"[TKG-AGENT] Extracted movie name: '{movie_name}'")
# 3. Search TMDB
movie = search_movie(movie_name)
if not movie:
print(f"ERROR: No TMDB movie found for: {movie_name}", file=sys.stderr)
sys.exit(1)
print(f"[TKG-AGENT] Matched: {movie['title']} (TMDB id={movie['id']})")
# 4. Fetch credits
cast = get_credits(movie["id"])
if not cast:
print(f"WARN: No cast data found for movie {movie['id']}", file=sys.stderr)
# 5. Enrich each cast member with person details and write identity files
output = Path(args.output)
identities_root = output / "identities"
identities_root.mkdir(parents=True, exist_ok=True)
now = datetime.now(timezone.utc).isoformat()
created_identities = []
for i, m in enumerate(cast):
person_id = m["id"]
person = get_person_details(person_id)
# Generate deterministic UUID: SHA256("tmdb-{movie_id}-{person_id}-{name}")
uuid_raw = hashlib.sha256(f"tmdb-{movie['id']}-{person_id}-{m['name']}".encode()).hexdigest()[:32]
profile_url = (
f"https://image.tmdb.org/t/p/w185{m['profile_path']}"
if m.get("profile_path") else None
)
# Build identity.json
metadata = {
"tmdb_character": m.get("character", ""),
"tmdb_cast_order": i,
"tmdb_movie_id": movie["id"],
"tmdb_movie_title": movie["title"],
"tmdb_biography": person.get("biography"),
"tmdb_birthday": person.get("birthday"),
"tmdb_place_of_birth": person.get("place_of_birth"),
"tmdb_aliases": person.get("also_known_as", []),
"tmdb_imdb_id": person.get("imdb_id"),
"tmdb_department": person.get("known_for_department"),
"tmdb_popularity": person.get("popularity"),
"tmdb_deathday": person.get("deathday"),
"tmdb_gender": person.get("gender"),
"tmdb_homepage": person.get("homepage"),
}
identity = {
"version": 1,
"identity_uuid": uuid_raw,
"name": m["name"],
"identity_type": "people",
"source": "tmdb",
"status": "confirmed",
"tmdb_id": person_id,
"tmdb_profile": profile_url,
"metadata": {k: v for k, v in metadata.items() if v is not None or k == "tmdb_aliases"},
"file_bindings": [],
"created_at": now,
"updated_at": now,
}
# Write identity.json
identity_dir = identities_root / uuid_raw
identity_dir.mkdir(parents=True, exist_ok=True)
identity_path = identity_dir / "identity.json"
with open(identity_path, "w", encoding="utf-8") as f:
json.dump(identity, f, indent=2, ensure_ascii=False)
# Download profile.jpg
if profile_url:
img_path = identity_dir / "profile.jpg"
if not img_path.exists():
try:
resp = requests.get(profile_url, timeout=15)
if resp.status_code == 200:
img_path.write_bytes(resp.content)
except Exception as e:
print(f" [WARN] Failed to download profile for {m['name']}: {e}", file=sys.stderr)
created_identities.append({
"identity_uuid": uuid_raw,
"name": m["name"],
"tmdb_id": person_id,
"character": m.get("character", ""),
"order": i,
})
if (i + 1) % 5 == 0:
print(f"[TKG-AGENT] Wrote {i+1}/{len(cast)} identity files")
# Update _index.json
index_path = identities_root / "_index.json"
index = {}
if index_path.exists():
with open(index_path) as f:
index = json.load(f)
for ci in created_identities:
index[ci["identity_uuid"]] = ci["name"]
with open(index_path, "w", encoding="utf-8") as f:
json.dump(index, f, indent=2, ensure_ascii=False)
# Write movie cache ({uuid}.tmdb.json) — simplified, no per-person data
cache = {
"file_uuid": args.file_uuid,
"fetched_at": now,
"source": "agent",
"movie": {
"tmdb_id": movie["id"],
"title": movie["title"],
"release_date": movie.get("release_date"),
"overview": movie.get("overview"),
"poster_path": movie.get("poster_path"),
},
"cast_count": len(cast),
"identities_created": len(created_identities),
"identities": created_identities,
}
cache_path = output / f"{args.file_uuid}.tmdb.json"
with open(cache_path, "w", encoding="utf-8") as f:
json.dump(cache, f, indent=2, ensure_ascii=False)
print(f"[TKG-AGENT] Cache written: {cache_path}")
print(f"[TKG-AGENT] Identity files: {len(created_identities)} cast members → {identities_root}/")
if __name__ == "__main__":
main()

View File

@@ -384,6 +384,7 @@ def main():
parser.add_argument("video_path", help="視頻文件路徑") parser.add_argument("video_path", help="視頻文件路徑")
parser.add_argument("output_path", help="輸出文件路徑") parser.add_argument("output_path", help="輸出文件路徑")
parser.add_argument("--yolo-result", help="YOLO 結果文件路徑(可選)") parser.add_argument("--yolo-result", help="YOLO 結果文件路徑(可選)")
parser.add_argument("--uuid", help="檔案 UUID由 executor 傳入)")
parser.add_argument( parser.add_argument(
"--strategy", choices=["fixed", "similarity"], default="fixed", help="分片策略" "--strategy", choices=["fixed", "similarity"], default="fixed", help="分片策略"
) )

View File

@@ -57,17 +57,12 @@ async fn translate_text(
"temperature": 0.1 "temperature": 0.1
}); });
let response = client let response = client.post(llm_url).json(&body).send().await.map_err(|e| {
.post(llm_url) (
.json(&body) StatusCode::INTERNAL_SERVER_ERROR,
.send() format!("Failed to call LLM: {}", e),
.await )
.map_err(|e| { })?;
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to call LLM: {}", e),
)
})?;
let llm_resp: serde_json::Value = response.json().await.map_err(|e| { let llm_resp: serde_json::Value = response.json().await.map_err(|e| {
( (

View File

@@ -97,17 +97,25 @@ struct SceneSummaryResult {
fn llm_base_url() -> String { fn llm_base_url() -> String {
let v = std::env::var("MOMENTRY_LLM_URL"); let v = std::env::var("MOMENTRY_LLM_URL");
if v.is_ok() { return v.unwrap(); } if v.is_ok() {
return v.unwrap();
}
let v = std::env::var("MOMENTRY_LLM_SUMMARY_URL"); let v = std::env::var("MOMENTRY_LLM_SUMMARY_URL");
if v.is_ok() { return v.unwrap(); } if v.is_ok() {
return v.unwrap();
}
"http://localhost:8082/v1/chat/completions".to_string() "http://localhost:8082/v1/chat/completions".to_string()
} }
fn llm_model() -> String { fn llm_model() -> String {
let v = std::env::var("MOMENTRY_LLM_MODEL"); let v = std::env::var("MOMENTRY_LLM_MODEL");
if v.is_ok() { return v.unwrap(); } if v.is_ok() {
return v.unwrap();
}
let v = std::env::var("MOMENTRY_LLM_SUMMARY_MODEL"); let v = std::env::var("MOMENTRY_LLM_SUMMARY_MODEL");
if v.is_ok() { return v.unwrap(); } if v.is_ok() {
return v.unwrap();
}
"google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string() "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string()
} }
@@ -115,7 +123,7 @@ fn llm_model() -> String {
async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Vec<CutScene>> { async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Vec<CutScene>> {
let table = schema::table_name("chunk"); let table = schema::table_name("chunk");
sqlx::query_as::<_, (String, i64, i64, f64, f64, f64, serde_json::Value, serde_json::Value, Option<String>)>(&format!( sqlx::query_as::<_, (String, i64, i64, f64, Option<f64>, Option<f64>, serde_json::Value, Option<serde_json::Value>, Option<String>)>(&format!(
r#"SELECT chunk_id, start_frame, end_frame, fps, start_time, end_time, content, metadata, summary_text r#"SELECT chunk_id, start_frame, end_frame, fps, start_time, end_time, content, metadata, summary_text
FROM {} WHERE file_uuid = $1 AND chunk_type = 'cut' ORDER BY start_frame"#, table FROM {} WHERE file_uuid = $1 AND chunk_type = 'cut' ORDER BY start_frame"#, table
)) ))
@@ -123,7 +131,8 @@ async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Ve
.fetch_all(db.pool()).await? .fetch_all(db.pool()).await?
.into_iter().map(|r| Ok(CutScene { .into_iter().map(|r| Ok(CutScene {
chunk_id: r.0, start_frame: r.1, end_frame: r.2, chunk_id: r.0, start_frame: r.1, end_frame: r.2,
fps: r.3, start_time: r.4, end_time: r.5, content: r.6, metadata: r.7, summary_text: r.8, fps: r.3, start_time: r.4.unwrap_or(0.0), end_time: r.5.unwrap_or(0.0),
content: r.6, metadata: r.7.unwrap_or(serde_json::json!({})), summary_text: r.8,
})).collect() })).collect()
} }
@@ -133,7 +142,7 @@ async fn fetch_sentences_in_scene(
cut: &CutScene, cut: &CutScene,
) -> anyhow::Result<Vec<SentenceChunk>> { ) -> anyhow::Result<Vec<SentenceChunk>> {
let table = schema::table_name("chunk"); let table = schema::table_name("chunk");
sqlx::query_as::<_, (String, String, f64, f64, i64, i64, serde_json::Value)>(&format!( sqlx::query_as::<_, (String, String, Option<f64>, Option<f64>, i64, i64, serde_json::Value)>(&format!(
r#"SELECT chunk_id, COALESCE(text_content,''), start_time, end_time, start_frame, end_frame, content r#"SELECT chunk_id, COALESCE(text_content,''), start_time, end_time, start_frame, end_frame, content
FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence'
AND start_time >= $2 AND end_time <= $3 ORDER BY start_time"#, table AND start_time >= $2 AND end_time <= $3 ORDER BY start_time"#, table
@@ -141,7 +150,7 @@ async fn fetch_sentences_in_scene(
.bind(file_uuid).bind(cut.start_time).bind(cut.end_time) .bind(file_uuid).bind(cut.start_time).bind(cut.end_time)
.fetch_all(db.pool()).await? .fetch_all(db.pool()).await?
.into_iter().map(|r| Ok(SentenceChunk { .into_iter().map(|r| Ok(SentenceChunk {
chunk_id: r.0, text: r.1, start_time: r.2, end_time: r.3, chunk_id: r.0, text: r.1, start_time: r.2.unwrap_or(0.0), end_time: r.3.unwrap_or(0.0),
start_frame: r.4, end_frame: r.5, content: r.6, start_frame: r.4, end_frame: r.5, content: r.6,
})).collect() })).collect()
} }
@@ -540,10 +549,7 @@ async fn analyze_5w1h(
if let Some(ref t) = cut.summary_text { if let Some(ref t) = cut.summary_text {
if t.len() > 20 { if t.len() > 20 {
processed += 1; processed += 1;
prev_context.push(format!( prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
"Scene (t={:.0}s): {}",
cut.start_time, t
));
continue; continue;
} }
} }
@@ -621,10 +627,7 @@ async fn batch_analyze_5w1h(
if let Some(ref t) = cut.summary_text { if let Some(ref t) = cut.summary_text {
if t.len() > 20 { if t.len() > 20 {
processed += 1; processed += 1;
prev_context.push(format!( prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
"Scene (t={:.0}s): {}",
cut.start_time, t
));
continue; continue;
} }
} }
@@ -713,10 +716,7 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<
if let Some(ref t) = cut.summary_text { if let Some(ref t) = cut.summary_text {
if t.len() > 20 { if t.len() > 20 {
processed += 1; processed += 1;
prev_context.push(format!( prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
"Scene (t={:.0}s): {}",
cut.start_time, t
));
continue; continue;
} }
} }
@@ -764,38 +764,44 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<
qdrant.init_collection(768).await?; qdrant.init_collection(768).await?;
let chunk_table = schema::table_name("chunk"); let chunk_table = schema::table_name("chunk");
let rows = sqlx::query_as::<_, (String, String, String, f64, f64)>( let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64)>(&format!(
&format!("SELECT chunk_id, chunk_type, text_content, start_time, end_time \ "SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time \
FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL \ FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL \
AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table), AND (text_content IS NOT NULL AND text_content != '') ORDER BY id",
) chunk_table
))
.bind(file_uuid) .bind(file_uuid)
.fetch_all(db.pool()) .fetch_all(db.pool())
.await?; .await?;
let total_vec = rows.len(); let total_vec = rows.len();
let mut stored = 0usize; let mut stored = 0usize;
for (chunk_id, _ctype, text, start_time, end_time) in &rows { for (chunk_id, _ctype, text, start_frame, end_frame, start_time, end_time) in &rows {
let text = text.trim(); let text = text.trim();
if text.is_empty() || text.len() < 5 { if text.is_empty() || text.len() < 5 {
continue; continue;
} }
match embedder.embed_document(text).await { match embedder.embed_document(text).await {
Ok(vector) => { Ok(vector) => {
if let Err(e) = sqlx::query( if let Err(e) = sqlx::query(&format!(
&format!("UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3", chunk_table) "UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3",
) chunk_table
))
.bind(&vector as &[f32]) .bind(&vector as &[f32])
.bind(chunk_id) .bind(chunk_id)
.bind(file_uuid) .bind(file_uuid)
.execute(db.pool()).await { .execute(db.pool())
.await
{
tracing::error!("[Vectorize] PG failed for {}: {}", chunk_id, e); tracing::error!("[Vectorize] PG failed for {}: {}", chunk_id, e);
continue; continue;
} }
let payload = VectorPayload { let payload = VectorPayload {
uuid: file_uuid.to_string(), file_uuid: file_uuid.to_string(),
chunk_id: chunk_id.clone(), chunk_id: chunk_id.clone(),
chunk_type: "sentence".to_string(), chunk_type: "sentence".to_string(),
start_frame: *start_frame,
end_frame: *end_frame,
start_time: *start_time, start_time: *start_time,
end_time: *end_time, end_time: *end_time,
text: Some(text.to_string()), text: Some(text.to_string()),

View File

@@ -93,16 +93,15 @@ async fn create_identity(
})?; })?;
let id_table = crate::core::db::schema::table_name("identities"); let id_table = crate::core::db::schema::table_name("identities");
let name_col = if id_table.starts_with("dev.") { "name" } else { "real_name" };
let query = format!( let query = format!(
"SELECT uuid, reference_data->'total_references' as total, "SELECT uuid, reference_data->'total_references' as total,
reference_data->'angles_covered' as angles, reference_data->'angles_covered' as angles,
reference_data->'quality_avg' as quality reference_data->'quality_avg' as quality
FROM {} FROM {}
WHERE {} = $1 WHERE name = $1
ORDER BY created_at DESC ORDER BY created_at DESC
LIMIT 1", LIMIT 1",
id_table, name_col id_table
); );
let row: Option<(String, Option<i32>, Option<Vec<String>>, Option<f64>)> = let row: Option<(String, Option<i32>, Option<Vec<String>>, Option<f64>)> =
@@ -168,11 +167,19 @@ async fn list_identities(
let id_table = crate::core::db::schema::table_name("identities"); let id_table = crate::core::db::schema::table_name("identities");
let total: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", id_table)) let total: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", id_table))
.fetch_one(db.pool()).await .fetch_one(db.pool())
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Count error: {}", e)))?; .await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Count error: {}", e),
)
})?;
let name_col = if id_table.starts_with("dev.") { "name" } else { "real_name" }; let sql = format!(
let sql = format!("SELECT id::int, uuid, {} AS name, metadata FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2", name_col, id_table); "SELECT id::int, uuid, name, metadata FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2",
id_table
);
let rows: Vec<(i32, uuid::Uuid, String, Option<serde_json::Value>)> = match sqlx::query_as(&sql) let rows: Vec<(i32, uuid::Uuid, String, Option<serde_json::Value>)> = match sqlx::query_as(&sql)
.bind(page_size as i64) .bind(page_size as i64)
@@ -200,12 +207,25 @@ async fn list_identities(
.collect(); .collect();
let identities_table = crate::core::db::schema::table_name("identities"); let identities_table = crate::core::db::schema::table_name("identities");
let total_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", identities_table)) let total_identities: i64 =
.fetch_one(db.pool()).await.unwrap_or(0); sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", identities_table))
let tmdb_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", identities_table)) .fetch_one(db.pool())
.fetch_one(db.pool()).await.unwrap_or(0); .await
let auto_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE source = 'auto'", identities_table)) .unwrap_or(0);
.fetch_one(db.pool()).await.unwrap_or(0); let tmdb_identities: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE source = 'tmdb'",
identities_table
))
.fetch_one(db.pool())
.await
.unwrap_or(0);
let auto_identities: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE source = 'auto'",
identities_table
))
.fetch_one(db.pool())
.await
.unwrap_or(0);
Ok(Json(IdentityListResponse { Ok(Json(IdentityListResponse {
identities, identities,

View File

@@ -15,8 +15,14 @@ use crate::core::db::PostgresDb;
pub fn identity_agent_routes() -> Router<AppState> { pub fn identity_agent_routes() -> Router<AppState> {
Router::new() Router::new()
.route("/api/v1/agents/identity/match-from-photo", post(match_from_photo)) .route(
.route("/api/v1/agents/identity/match-from-trace", post(match_from_trace)) "/api/v1/agents/identity/match-from-photo",
post(match_from_photo),
)
.route(
"/api/v1/agents/identity/match-from-trace",
post(match_from_trace),
)
} }
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
@@ -73,13 +79,21 @@ async fn match_from_photo(
let uuid_clean = identity_uuid.replace('-', ""); let uuid_clean = identity_uuid.replace('-', "");
if uuid_clean.is_empty() || file_uuid.is_empty() { if uuid_clean.is_empty() || file_uuid.is_empty() {
return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({ return Err((
"success": false, "message": "identity_uuid and file_uuid are required" StatusCode::BAD_REQUEST,
})))); Json(serde_json::json!({
"success": false, "message": "identity_uuid and file_uuid are required"
})),
));
} }
let data = image_data.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({ let data = image_data.ok_or_else(|| {
"success": false, "message": "No image field found. Use field name 'image'." (
}))))?; StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"success": false, "message": "No image field found. Use field name 'image'."
})),
)
})?;
// 1. Save uploaded image to temp // 1. Save uploaded image to temp
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR") let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
@@ -88,11 +102,17 @@ async fn match_from_photo(
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string()); .unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
let temp_dir = std::env::temp_dir().join("momentry_match_face"); let temp_dir = std::env::temp_dir().join("momentry_match_face");
std::fs::create_dir_all(&temp_dir).map_err(|e| { std::fs::create_dir_all(&temp_dir).map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to create temp dir: {}", e)}))) (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Failed to create temp dir: {}", e)})),
)
})?; })?;
let temp_img = temp_dir.join(format!("{}.jpg", uuid_clean)); let temp_img = temp_dir.join(format!("{}.jpg", uuid_clean));
std::fs::write(&temp_img, &data).map_err(|e| { std::fs::write(&temp_img, &data).map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to save temp image: {}", e)}))) (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Failed to save temp image: {}", e)})),
)
})?; })?;
// 2. Extract face embedding via Python script // 2. Extract face embedding via Python script
@@ -103,79 +123,109 @@ async fn match_from_photo(
.output() .output()
.await .await
.map_err(|e| { .map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to run extractor: {}", e)}))) (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Failed to run extractor: {}", e)})),
)
})?; })?;
let _ = std::fs::remove_file(&temp_img); let _ = std::fs::remove_file(&temp_img);
if !output.status.success() { if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr); let stderr = String::from_utf8_lossy(&output.stderr);
return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({ return Err((
"success": false, "message": format!("Face extraction failed: {}", stderr) StatusCode::BAD_REQUEST,
})))); Json(serde_json::json!({
"success": false, "message": format!("Face extraction failed: {}", stderr)
})),
));
} }
let stdout = String::from_utf8_lossy(&output.stdout); let stdout = String::from_utf8_lossy(&output.stdout);
let extract_result: serde_json::Value = serde_json::from_str(&stdout).map_err(|_| { let extract_result: serde_json::Value = serde_json::from_str(&stdout).map_err(|_| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": "Failed to parse extractor output"}))) (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": "Failed to parse extractor output"})),
)
})?; })?;
let embedding: Vec<f64> = serde_json::from_value( let embedding: Vec<f64> = serde_json::from_value(
extract_result.get("embedding") extract_result
.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({"message": "No embedding in extractor output"}))))? .get("embedding")
.clone() .ok_or_else(|| {
).map_err(|_| { (
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": "Invalid embedding format"}))) StatusCode::BAD_REQUEST,
Json(serde_json::json!({"message": "No embedding in extractor output"})),
)
})?
.clone(),
)
.map_err(|_| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": "Invalid embedding format"})),
)
})?; })?;
let embedding_f32: Vec<f32> = embedding.into_iter().map(|v| v as f32).collect(); let embedding_f32: Vec<f32> = embedding.into_iter().map(|v| v as f32).collect();
// 3. Look up identity internal ID // 3. Look up identity internal ID
let id_table = schema::table_name("identities"); let id_table = schema::table_name("identities");
let identity_id_row: Option<(i32,)> = sqlx::query_as( let identity_id_row: Option<(i32,)> = sqlx::query_as(&format!(
&format!("SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table) "SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
) id_table
))
.bind(&uuid_clean) .bind(&uuid_clean)
.fetch_optional(state.db.pool()) .fetch_optional(state.db.pool())
.await .await
.map_err(|e| { .map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)}))) (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
)
})?; })?;
let identity_id = match identity_id_row { let identity_id = match identity_id_row {
Some((id,)) => id, Some((id,)) => id,
None => return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({ None => {
"success": false, "message": "Identity not found" return Err((
})))), StatusCode::NOT_FOUND,
Json(serde_json::json!({
"success": false, "message": "Identity not found"
})),
))
}
}; };
// 4. Find best matching trace (highest similarity, no threshold) // 4. Find best matching trace (highest similarity, no threshold)
let fd_table = schema::table_name("face_detections"); let fd_table = schema::table_name("face_detections");
let best_match: Option<(i32, i32, f64)> = sqlx::query_as( let best_match: Option<(i32, i32, f64)> = sqlx::query_as(&format!(
&format!( r#"SELECT id, trace_id,
r#"SELECT id, trace_id,
1 - (embedding::vector <=> $1::vector) as similarity 1 - (embedding::vector <=> $1::vector) as similarity
FROM {} FROM {}
WHERE file_uuid = $2 AND embedding IS NOT NULL WHERE file_uuid = $2 AND embedding IS NOT NULL
ORDER BY embedding::vector <=> $1::vector ORDER BY embedding::vector <=> $1::vector
LIMIT 1"#, LIMIT 1"#,
fd_table fd_table
) ))
)
.bind(&embedding_f32) .bind(&embedding_f32)
.bind(&file_uuid) .bind(&file_uuid)
.fetch_optional(state.db.pool()) .fetch_optional(state.db.pool())
.await .await
.map_err(|e| { .map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Search failed: {}", e)}))) (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Search failed: {}", e)})),
)
})?; })?;
// 5. Update best match face_detection // 5. Update best match face_detection
let mut traces_matched: Vec<i32> = Vec::new(); let mut traces_matched: Vec<i32> = Vec::new();
if let Some((fb_id, fb_trace, fb_sim)) = best_match { if let Some((fb_id, fb_trace, fb_sim)) = best_match {
let _ = sqlx::query( let _ = sqlx::query(&format!(
&format!("UPDATE {} SET identity_id = $1 WHERE id = $2", fd_table) "UPDATE {} SET identity_id = $1 WHERE id = $2",
) fd_table
))
.bind(identity_id) .bind(identity_id)
.bind(fb_id) .bind(fb_id)
.execute(state.db.pool()) .execute(state.db.pool())
@@ -191,7 +241,10 @@ async fn match_from_photo(
file_uuid, file_uuid,
matches: 1, matches: 1,
traces_matched, traces_matched,
message: format!("Best trace: trace_id={}, similarity={:.4}", fb_trace, fb_sim), message: format!(
"Best trace: trace_id={}, similarity={:.4}",
fb_trace, fb_sim
),
})) }))
} else { } else {
Ok(Json(MatchFromPhotoResponse { Ok(Json(MatchFromPhotoResponse {
@@ -221,26 +274,30 @@ async fn match_from_trace(
// 1. Get 3 best face embeddings from this trace at different angles // 1. Get 3 best face embeddings from this trace at different angles
// Divide trace frame range into 3 segments, pick best face from each // Divide trace frame range into 3 segments, pick best face from each
let fd_table = schema::table_name("face_detections"); let fd_table = schema::table_name("face_detections");
let all_faces: Vec<(Vec<f32>, i64)> = sqlx::query_as::<_, (Vec<f32>, i64)>( let all_faces: Vec<(Vec<f32>, i64)> = sqlx::query_as::<_, (Vec<f32>, i64)>(&format!(
&format!( "SELECT embedding, frame_number FROM {} \
"SELECT embedding, frame_number FROM {} \
WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \ WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \
ORDER BY frame_number ASC", ORDER BY frame_number ASC",
fd_table fd_table
) ))
)
.bind(&req.file_uuid) .bind(&req.file_uuid)
.bind(req.trace_id) .bind(req.trace_id)
.fetch_all(state.db.pool()) .fetch_all(state.db.pool())
.await .await
.map_err(|e| { .map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)}))) (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
)
})?; })?;
if all_faces.is_empty() { if all_faces.is_empty() {
return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({ return Err((
"success": false, "message": "No embedding found for this trace" StatusCode::NOT_FOUND,
})))); Json(serde_json::json!({
"success": false, "message": "No embedding found for this trace"
})),
));
} }
// Pick 3 samples: divide frame range into 3 segments, use face with largest area per segment // Pick 3 samples: divide frame range into 3 segments, use face with largest area per segment
@@ -254,14 +311,12 @@ async fn match_from_trace(
let mut query_embeddings: Vec<Vec<f32>> = Vec::new(); let mut query_embeddings: Vec<Vec<f32>> = Vec::new();
// Get width*height info if available (not all pipelines store it) // Get width*height info if available (not all pipelines store it)
let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>( let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>(&format!(
&format!( "SELECT frame_number, COALESCE(width, 0) * COALESCE(height, 0) AS area \
"SELECT frame_number, COALESCE(width, 0) * COALESCE(height, 0) AS area \
FROM {} WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \ FROM {} WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \
ORDER BY frame_number ASC", ORDER BY frame_number ASC",
fd_table fd_table
) ))
)
.bind(&req.file_uuid) .bind(&req.file_uuid)
.bind(req.trace_id) .bind(req.trace_id)
.fetch_all(state.db.pool()) .fetch_all(state.db.pool())
@@ -296,9 +351,8 @@ async fn match_from_trace(
let mut seen_trace_ids = std::collections::HashSet::new(); let mut seen_trace_ids = std::collections::HashSet::new();
for qemb in &query_embeddings { for qemb in &query_embeddings {
let top = sqlx::query_as::<_, (i32, i32, f64)>( let top = sqlx::query_as::<_, (i32, i32, f64)>(&format!(
&format!( r#"SELECT id, trace_id,
r#"SELECT id, trace_id,
1 - (embedding::vector <=> $1::vector) as similarity 1 - (embedding::vector <=> $1::vector) as similarity
FROM {} FROM {}
WHERE file_uuid = $2 WHERE file_uuid = $2
@@ -306,16 +360,18 @@ async fn match_from_trace(
AND embedding IS NOT NULL AND embedding IS NOT NULL
ORDER BY embedding::vector <=> $1::vector ORDER BY embedding::vector <=> $1::vector
LIMIT 1"#, LIMIT 1"#,
fd_table fd_table
) ))
)
.bind(qemb) .bind(qemb)
.bind(&req.file_uuid) .bind(&req.file_uuid)
.bind(req.trace_id) .bind(req.trace_id)
.fetch_optional(state.db.pool()) .fetch_optional(state.db.pool())
.await .await
.map_err(|e| { .map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Search failed: {}", e)}))) (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Search failed: {}", e)})),
)
})?; })?;
if let Some((cface_id, c_trace_id, c_sim)) = top { if let Some((cface_id, c_trace_id, c_sim)) = top {
@@ -327,35 +383,49 @@ async fn match_from_trace(
// 3. Look up identity internal ID // 3. Look up identity internal ID
let id_table = schema::table_name("identities"); let id_table = schema::table_name("identities");
let identity_id_row: Option<(i32,)> = sqlx::query_as( let identity_id_row: Option<(i32,)> = sqlx::query_as(&format!(
&format!("SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table) "SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
) id_table
))
.bind(&uuid_clean) .bind(&uuid_clean)
.fetch_optional(state.db.pool()) .fetch_optional(state.db.pool())
.await .await
.map_err(|e| { .map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)}))) (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
)
})?; })?;
let identity_id = match identity_id_row { let identity_id = match identity_id_row {
Some((id,)) => id, Some((id,)) => id,
None => return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({ None => {
"success": false, "message": "Identity not found" return Err((
})))), StatusCode::NOT_FOUND,
Json(serde_json::json!({
"success": false, "message": "Identity not found"
})),
))
}
}; };
// 4. Update matched face_detections // 4. Update matched face_detections
let mut traces_matched: Vec<i32> = Vec::new(); let mut traces_matched: Vec<i32> = Vec::new();
for (id, trace_id, _similarity) in &validated { for (id, trace_id, _similarity) in &validated {
if let Err(e) = sqlx::query( if let Err(e) = sqlx::query(&format!(
&format!("UPDATE {} SET identity_id = $1 WHERE id = $2", fd_table) "UPDATE {} SET identity_id = $1 WHERE id = $2",
) fd_table
))
.bind(identity_id) .bind(identity_id)
.bind(id) .bind(id)
.execute(state.db.pool()) .execute(state.db.pool())
.await .await
{ {
tracing::warn!("[match-from-trace] Failed to update face_detection {}: {}", id, e); tracing::warn!(
"[match-from-trace] Failed to update face_detection {}: {}",
id,
e
);
} else { } else {
if !traces_matched.contains(trace_id) { if !traces_matched.contains(trace_id) {
traces_matched.push(*trace_id); traces_matched.push(*trace_id);
@@ -364,9 +434,10 @@ async fn match_from_trace(
} }
// 5. Also bind the source trace itself // 5. Also bind the source trace itself
let _ = sqlx::query( let _ = sqlx::query(&format!(
&format!("UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3", fd_table) "UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3",
) fd_table
))
.bind(identity_id) .bind(identity_id)
.bind(&req.file_uuid) .bind(&req.file_uuid)
.bind(req.trace_id) .bind(req.trace_id)
@@ -388,7 +459,10 @@ async fn match_from_trace(
file_uuid: req.file_uuid, file_uuid: req.file_uuid,
matches: match_count, matches: match_count,
traces_matched, traces_matched,
message: format!("Matched {} faces ({} unique traces)", match_count, trace_count), message: format!(
"Matched {} faces ({} unique traces)",
match_count, trace_count
),
})) }))
} }
@@ -461,7 +535,10 @@ fn analyze_person_speaker_overlap(
} }
// Check if persons co-occur in time (frame proximity) // Check if persons co-occur in time (frame proximity)
let overlap = person.frames.iter().any(|f| other_person.frames.contains(f)); let overlap = person
.frames
.iter()
.any(|f| other_person.frames.contains(f));
if overlap { if overlap {
matched_persons.push(other_person.person_id.clone()); matched_persons.push(other_person.person_id.clone());
visited_persons.insert(other_person.person_id.clone()); visited_persons.insert(other_person.person_id.clone());
@@ -474,9 +551,10 @@ fn analyze_person_speaker_overlap(
person.frames.iter().max().copied().unwrap_or(0) as f64, person.frames.iter().max().copied().unwrap_or(0) as f64,
); );
for speaker in speakers { for speaker in speakers {
let has_overlap = speaker.segments.iter().any(|(start, end)| { let has_overlap = speaker
*start <= person_time_range.1 && *end >= person_time_range.0 .segments
}); .iter()
.any(|(start, end)| *start <= person_time_range.1 && *end >= person_time_range.0);
if has_overlap { if has_overlap {
if !matched_speakers.contains(&speaker.speaker_id) { if !matched_speakers.contains(&speaker.speaker_id) {
matched_speakers.push(speaker.speaker_id.clone()); matched_speakers.push(speaker.speaker_id.clone());
@@ -563,11 +641,12 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
// Step 2: 載入所有 face_detections含 frame_number按 trace_id 分組 // Step 2: 載入所有 face_detections含 frame_number按 trace_id 分組
let fd_table = schema::table_name("face_detections"); let fd_table = schema::table_name("face_detections");
let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>( let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(&format!(
&format!("SELECT trace_id, frame_number, embedding FROM {} \ "SELECT trace_id, frame_number, embedding FROM {} \
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \ WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
ORDER BY trace_id, frame_number", fd_table), ORDER BY trace_id, frame_number",
) fd_table
))
.bind(file_uuid) .bind(file_uuid)
.fetch_all(pool) .fetch_all(pool)
.await?; .await?;
@@ -647,16 +726,18 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
let fd_table = schema::table_name("face_detections"); let fd_table = schema::table_name("face_detections");
let mut updated = 0usize; let mut updated = 0usize;
for (tid, name) in &matched { for (tid, name) in &matched {
let id_opt = sqlx::query_scalar::<_, Option<i32>>( let id_opt = sqlx::query_scalar::<_, Option<i32>>(&format!(
&format!("SELECT id FROM {} WHERE name=$1 AND source='tmdb'", identities_table), "SELECT id FROM {} WHERE name=$1 AND source='tmdb'",
) identities_table
))
.bind(name) .bind(name)
.fetch_optional(pool) .fetch_optional(pool)
.await?; .await?;
if let Some(identity_id) = id_opt { if let Some(identity_id) = id_opt {
let _ = sqlx::query( let _ = sqlx::query(&format!(
&format!("UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", fd_table), "UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
) fd_table
))
.bind(identity_id) .bind(identity_id)
.bind(file_uuid) .bind(file_uuid)
.bind(tid) .bind(tid)
@@ -726,32 +807,32 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
// Step 6: 未匹配的 trace 設 stranger_id = trace_id // Step 6: 未匹配的 trace 設 stranger_id = trace_id
// trace_id 在同一個 file 內是 sequential integer直接複用為 stranger_id // trace_id 在同一個 file 內是 sequential integer直接複用為 stranger_id
let stranger_update = sqlx::query( let stranger_update = sqlx::query(&format!(
&format!( "UPDATE {} SET stranger_id = trace_id \
"UPDATE {} SET stranger_id = trace_id \
WHERE file_uuid = $1 AND trace_id IS NOT NULL AND identity_id IS NULL \ WHERE file_uuid = $1 AND trace_id IS NOT NULL AND identity_id IS NULL \
AND (stranger_id IS NULL OR stranger_id != trace_id)", AND (stranger_id IS NULL OR stranger_id != trace_id)",
fd_table fd_table
) ))
)
.bind(file_uuid) .bind(file_uuid)
.execute(pool) .execute(pool)
.await?; .await?;
let stranger_count = stranger_update.rows_affected(); let stranger_count = stranger_update.rows_affected();
// Step 7: Save identity files for all affected identities // Step 7: Save identity files for all affected identities
let affected = sqlx::query_scalar::<_, uuid::Uuid>( let affected = sqlx::query_scalar::<_, uuid::Uuid>(&format!(
&format!("SELECT DISTINCT i.uuid FROM {} i \ "SELECT DISTINCT i.uuid FROM {} i \
JOIN {} fd ON fd.identity_id = i.id \ JOIN {} fd ON fd.identity_id = i.id \
WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL", identities_table, fd_table) WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL",
) identities_table, fd_table
))
.bind(file_uuid) .bind(file_uuid)
.fetch_all(pool) .fetch_all(pool)
.await .await
.unwrap_or_default(); .unwrap_or_default();
for uuid in &affected { for uuid in &affected {
let us = uuid.to_string().replace('-', ""); let us = uuid.to_string().replace('-', "");
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await { if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await
{
tracing::warn!("[FaceMatch] Failed to save identity file {}: {}", us, e); tracing::warn!("[FaceMatch] Failed to save identity file {}: {}", us, e);
} }
} }
@@ -773,13 +854,15 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> { pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
// Load face traces with identity_id and frame numbers // Load face traces with identity_id and frame numbers
let fd_table = schema::table_name("face_detections"); let fd_table = schema::table_name("face_detections");
let traces = sqlx::query_as::<_, (i32, Vec<i32>)>( let traces = sqlx::query_as::<_, (i32, Vec<i32>)>(&format!(
&format!("SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \ "SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \
FROM {} WHERE file_uuid=$1 AND trace_id IS NOT NULL AND identity_id IS NOT NULL \ FROM {} WHERE file_uuid=$1 AND trace_id IS NOT NULL AND identity_id IS NOT NULL \
GROUP BY trace_id", fd_table) GROUP BY trace_id",
) fd_table
))
.bind(file_uuid) .bind(file_uuid)
.fetch_all(pool).await?; .fetch_all(pool)
.await?;
if traces.is_empty() { if traces.is_empty() {
tracing::info!("[SpeakerBind] No face traces with identities"); tracing::info!("[SpeakerBind] No face traces with identities");
@@ -945,9 +1028,8 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
let speakers = extract_speakers_from_asrx_data(&asrx_data); let speakers = extract_speakers_from_asrx_data(&asrx_data);
let identities = analyze_person_speaker_overlap(&persons, &speakers); let identities = analyze_person_speaker_overlap(&persons, &speakers);
let uuid_short = &file_uuid[..8.min(file_uuid.len())];
for (idx, id_result) in identities.iter().enumerate() { for (idx, id_result) in identities.iter().enumerate() {
let identity_name = format!("stranger_{}_{}", uuid_short, idx); let identity_name = format!("stranger_{}", idx);
let metadata = serde_json::json!({ let metadata = serde_json::json!({
"source": "identity_agent", "source": "identity_agent",
"trace_ids": id_result.person_ids, "trace_ids": id_result.person_ids,

View File

@@ -38,8 +38,18 @@ pub fn identity_routes() -> Router<crate::api::server::AppState> {
.route("/api/v1/resource/heartbeat", post(heartbeat_resource)) .route("/api/v1/resource/heartbeat", post(heartbeat_resource))
.route("/api/v1/resources", get(list_resources)) .route("/api/v1/resources", get(list_resources))
.route("/api/v1/identity/upload", post(upload_identity)) .route("/api/v1/identity/upload", post(upload_identity))
.route("/api/v1/identity/:identity_uuid/profile-image", post(upload_profile_image).get(get_profile_image)) .route(
.route("/api/v1/identity/:identity_uuid/json", get(get_identity_json)) "/api/v1/identity/:identity_uuid/profile-image",
post(upload_profile_image).get(get_profile_image),
)
.route(
"/api/v1/identity/:identity_uuid/status",
get(get_identity_status),
)
.route(
"/api/v1/identity/:identity_uuid/json",
get(get_identity_json),
)
// Experiment: identity text search (non-polluting, separate endpoint) // Experiment: identity text search (non-polluting, separate endpoint)
.route("/api/v1/search/identity_text", get(search_identity_text)) .route("/api/v1/search/identity_text", get(search_identity_text))
.route("/api/v1/identities/search", get(search_identities_by_text)) .route("/api/v1/identities/search", get(search_identities_by_text))
@@ -98,9 +108,10 @@ async fn list_files(
.await .await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let data = records.0 let data = records
.0
.into_iter() .into_iter()
.map(|r| FileItem { .map(|r| FileItem {
file_uuid: r.file_uuid, file_uuid: r.file_uuid,
file_name: r.file_name, file_name: r.file_name,
file_path: r.file_path, file_path: r.file_path,
@@ -163,7 +174,9 @@ async fn get_file_detail(
file_name: f.file_name, file_name: f.file_name,
file_path: f.file_path, file_path: f.file_path,
metadata: f.probe_json, metadata: f.probe_json,
created_at: chrono::DateTime::parse_from_rfc3339(&f.created_at).ok().map(|d| d.into()), created_at: chrono::DateTime::parse_from_rfc3339(&f.created_at)
.ok()
.map(|d| d.into()),
})), })),
None => Err(( None => Err((
StatusCode::NOT_FOUND, StatusCode::NOT_FOUND,
@@ -214,13 +227,42 @@ async fn get_file_identities(
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let fps = 25.0; let fps = 25.0;
let data: Vec<FileIdentityItem> = Vec::new(); let data: Vec<FileIdentityItem> = records
.into_iter()
.map(|r| FileIdentityItem {
identity_id: r.identity_id,
identity_uuid: r.identity_uuid,
name: r.name,
metadata: r.metadata,
face_count: r.face_count,
speaker_count: r.speaker_count,
start_frame: r.start_frame,
end_frame: r.end_frame,
start_time: r.start_time,
end_time: r.end_time,
confidence: r.confidence,
})
.collect();
let total = match sqlx::query_scalar::<_, i64>(
&format!(
"SELECT COUNT(DISTINCT fd.identity_id) FROM {} fd WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL",
crate::core::db::schema::table_name("face_detections")
)
)
.bind(&file_uuid)
.fetch_one(state.db.pool())
.await
{
Ok(c) => c,
Err(_) => data.len() as i64,
};
Ok(Json(FileIdentitiesResponse { Ok(Json(FileIdentitiesResponse {
success: true, success: true,
file_uuid: file_uuid, file_uuid: file_uuid,
fps, fps,
total: data.len() as i64, total,
page, page,
page_size, page_size,
data, data,
@@ -243,6 +285,16 @@ pub struct IdentityDetailResponse {
pub updated_at: Option<chrono::DateTime<chrono::Utc>>, pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
} }
#[derive(Debug, Serialize)]
pub struct IdentityStatusResponse {
pub success: bool,
pub identity_uuid: String,
pub name: String,
pub has_json: bool,
pub has_jpg: bool,
pub error: Option<String>,
}
fn strip_uuid(u: &uuid::Uuid) -> String { fn strip_uuid(u: &uuid::Uuid) -> String {
u.to_string().replace('-', "") u.to_string().replace('-', "")
} }
@@ -270,7 +322,11 @@ async fn get_identity_detail(
metadata: i.metadata, metadata: i.metadata,
reference_data: i.reference_data, reference_data: i.reference_data,
tmdb_id: i.tmdb_id, tmdb_id: i.tmdb_id,
tmdb_profile: Some(format!("{}/identities/{}/profile.jpg", crate::core::config::OUTPUT_DIR.as_str(), i.uuid.replace('-', ""))), tmdb_profile: Some(format!(
"{}/identities/{}/profile.jpg",
crate::core::config::OUTPUT_DIR.as_str(),
i.uuid.replace('-', "")
)),
created_at: i.created_at, created_at: i.created_at,
updated_at: i.updated_at, updated_at: i.updated_at,
})), })),
@@ -281,6 +337,44 @@ async fn get_identity_detail(
} }
} }
async fn get_identity_status(
State(state): State<crate::api::server::AppState>,
Path(identity_uuid): Path<String>,
) -> Result<Json<IdentityStatusResponse>, (StatusCode, String)> {
let uuid_clean = identity_uuid.replace('-', "");
let identity = state
.db
.get_identity_by_uuid(&uuid_clean)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
match identity {
Some(i) => {
// Check both UUID formats (with and without hyphens)
let dir_nohyphen = crate::core::identity::storage::identity_dir(&uuid_clean);
let uuid_hyphen = i.uuid.clone();
let dir_hyphen = crate::core::identity::storage::identity_dir(&uuid_hyphen);
let has_json = dir_nohyphen.join("identity.json").exists()
|| dir_hyphen.join("identity.json").exists();
let has_jpg = dir_nohyphen.join("profile.jpg").exists()
|| dir_hyphen.join("profile.jpg").exists();
Ok(Json(IdentityStatusResponse {
success: true,
identity_uuid: i.uuid.clone(),
name: i.name,
has_json,
has_jpg,
error: None,
}))
}
None => Err((
StatusCode::NOT_FOUND,
format!("Identity not found: {}", uuid_clean),
)),
}
}
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
pub struct IdentityFilesResponse { pub struct IdentityFilesResponse {
pub success: bool, pub success: bool,
@@ -375,10 +469,25 @@ async fn get_identity_files(
}) })
.collect(); .collect();
let total = match sqlx::query_scalar::<_, i64>(
&format!(
"SELECT COUNT(DISTINCT fd.file_uuid) FROM {} fd WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1)",
crate::core::db::schema::table_name("face_detections"),
crate::core::db::schema::table_name("identities"),
)
)
.bind(&uuid)
.fetch_one(state.db.pool())
.await
{
Ok(c) => c,
Err(_) => data.len() as i64,
};
Ok(Json(IdentityFilesResponse { Ok(Json(IdentityFilesResponse {
success: true, success: true,
identity_uuid: uuid.to_string().replace('-', ""), identity_uuid: uuid.to_string().replace('-', ""),
total: data.len() as i64, total,
page, page,
page_size, page_size,
data, data,
@@ -449,10 +558,25 @@ async fn get_identity_faces(
}) })
.collect(); .collect();
let total = match sqlx::query_scalar::<_, i64>(
&format!(
"SELECT COUNT(*) FROM {} fd WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1)",
crate::core::db::schema::table_name("face_detections"),
crate::core::db::schema::table_name("identities"),
)
)
.bind(&uuid)
.fetch_one(state.db.pool())
.await
{
Ok(c) => c,
Err(_) => data.len() as i64,
};
Ok(Json(IdentityFacesResponse { Ok(Json(IdentityFacesResponse {
success: true, success: true,
identity_uuid: uuid.to_string().replace('-', ""), identity_uuid: uuid.to_string().replace('-', ""),
total: data.len() as i64, total,
page, page,
page_size, page_size,
data, data,
@@ -721,12 +845,24 @@ async fn upload_profile_image(
let uuid_clean = identity_uuid.replace('-', ""); let uuid_clean = identity_uuid.replace('-', "");
// Verify identity exists // Verify identity exists
if state.db.get_identity_by_uuid(&uuid_clean).await.map_err(|_| { if state
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"success": false, "message": "DB error"}))) .db
})?.is_none() { .get_identity_by_uuid(&uuid_clean)
return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({ .await
"success": false, "message": "Identity not found" .map_err(|_| {
})))); (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"success": false, "message": "DB error"})),
)
})?
.is_none()
{
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"success": false, "message": "Identity not found"
})),
));
} }
// Process multipart upload // Process multipart upload
@@ -740,9 +876,14 @@ async fn upload_profile_image(
ext = match content_type.as_str() { ext = match content_type.as_str() {
"image/png" => "png", "image/png" => "png",
"image/jpeg" | "image/jpg" => "jpg", "image/jpeg" | "image/jpg" => "jpg",
_ => return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({ _ => {
"success": false, "message": "Unsupported image type. Use JPEG or PNG." return Err((
})))), StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"success": false, "message": "Unsupported image type. Use JPEG or PNG."
})),
))
}
}; };
image_data = Some(field.bytes().await.map_err(|_| { image_data = Some(field.bytes().await.map_err(|_| {
(StatusCode::BAD_REQUEST, Json(serde_json::json!({"success": false, "message": "Failed to read image data"}))) (StatusCode::BAD_REQUEST, Json(serde_json::json!({"success": false, "message": "Failed to read image data"})))
@@ -750,9 +891,14 @@ async fn upload_profile_image(
} }
} }
let data = image_data.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({ let data = image_data.ok_or_else(|| {
"success": false, "message": "No image field found. Use field name 'image'." (
}))))?; StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"success": false, "message": "No image field found. Use field name 'image'."
})),
)
})?;
// Write image file // Write image file
let dir = crate::core::identity::storage::identity_dir(&uuid_clean); let dir = crate::core::identity::storage::identity_dir(&uuid_clean);
@@ -789,8 +935,16 @@ async fn get_profile_image(
let path = dir.join(format!("profile.{}", ext)); let path = dir.join(format!("profile.{}", ext));
if path.exists() { if path.exists() {
let data = std::fs::read(&path).map_err(|_| StatusCode::NOT_FOUND)?; let data = std::fs::read(&path).map_err(|_| StatusCode::NOT_FOUND)?;
let content_type = if *ext == "png" { "image/png" } else { "image/jpeg" }; let content_type = if *ext == "png" {
return Ok((StatusCode::OK, [("content-type".to_string(), content_type.to_string())], data)); "image/png"
} else {
"image/jpeg"
};
return Ok((
StatusCode::OK,
[("content-type".to_string(), content_type.to_string())],
data,
));
} }
} }
Err(StatusCode::NOT_FOUND) Err(StatusCode::NOT_FOUND)
@@ -802,7 +956,14 @@ async fn get_identity_json(
) -> Result<(StatusCode, [(String, String); 1], Vec<u8>), StatusCode> { ) -> Result<(StatusCode, [(String, String); 1], Vec<u8>), StatusCode> {
let clean = identity_uuid.replace('-', ""); let clean = identity_uuid.replace('-', "");
let with_hyphens = if clean.len() == 32 { let with_hyphens = if clean.len() == 32 {
format!("{}-{}-{}-{}-{}", &clean[0..8], &clean[8..12], &clean[12..16], &clean[16..20], &clean[20..32]) format!(
"{}-{}-{}-{}-{}",
&clean[0..8],
&clean[8..12],
&clean[12..16],
&clean[16..20],
&clean[20..32]
)
} else { } else {
identity_uuid.clone() identity_uuid.clone()
}; };
@@ -821,7 +982,9 @@ async fn get_identity_json(
} }
// 2. Lazy Sync: If file missing, generate from DB and save // 2. Lazy Sync: If file missing, generate from DB and save
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(state.db.pool(), &clean).await { if let Err(e) =
crate::core::identity::storage::save_identity_file_by_pool(state.db.pool(), &clean).await
{
tracing::warn!("[identity-json] Lazy sync failed for {}: {}", clean, e); tracing::warn!("[identity-json] Lazy sync failed for {}: {}", clean, e);
return Err(StatusCode::NOT_FOUND); return Err(StatusCode::NOT_FOUND);
} }
@@ -858,7 +1021,7 @@ struct IdentityTextHit {
chunk_id: String, chunk_id: String,
start_time: f64, start_time: f64,
end_time: f64, end_time: f64,
text_content: String, text_content: Option<String>,
identity_id: Option<i32>, identity_id: Option<i32>,
identity_name: Option<String>, identity_name: Option<String>,
identity_source: Option<String>, identity_source: Option<String>,
@@ -889,7 +1052,7 @@ async fn search_identity_text(
let query = format!( let query = format!(
r#"SELECT c.file_uuid, c.chunk_id, c.start_time, c.end_time, c.text_content, r#"SELECT c.file_uuid, c.chunk_id, c.start_time, c.end_time, c.text_content,
fd.identity_id, CASE WHEN id_table LIKE 'dev.%' THEN i.name ELSE i.real_name END AS identity_name, i.source AS identity_source, fd.identity_id, i.name AS identity_name, i.source AS identity_source,
fd.trace_id fd.trace_id
FROM {} c FROM {} c
LEFT JOIN {} fd ON fd.file_uuid = c.file_uuid LEFT JOIN {} fd ON fd.file_uuid = c.file_uuid
@@ -902,18 +1065,42 @@ async fn search_identity_text(
chunk_table, fd_table, id_table chunk_table, fd_table, id_table
); );
let rows = sqlx::query_as::<_, (String, String, f64, f64, String, Option<i32>, Option<String>, Option<String>, Option<i32>)>(&query) let rows = sqlx::query_as::<
.bind(&params.uuid).bind(&like_q).bind(limit) _,
.fetch_all(state.db.pool()) (
.await String,
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; String,
f64,
f64,
Option<String>,
Option<i32>,
Option<String>,
Option<String>,
Option<i32>,
),
>(&query)
.bind(&params.uuid)
.bind(&like_q)
.bind(limit)
.fetch_all(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let results: Vec<IdentityTextHit> = rows let results: Vec<IdentityTextHit> = rows
.into_iter() .into_iter()
.map(|(fu, cid, st, et, txt, iid, iname, isrc, tid)| IdentityTextHit { .map(
file_uuid: fu, chunk_id: cid, start_time: st, end_time: et, text_content: txt, |(fu, cid, st, et, txt, iid, iname, isrc, tid)| IdentityTextHit {
identity_id: iid, identity_name: iname, identity_source: isrc, trace_id: tid, file_uuid: fu,
}) chunk_id: cid,
start_time: st,
end_time: et,
text_content: txt,
identity_id: iid,
identity_name: iname,
identity_source: isrc,
trace_id: tid,
},
)
.collect(); .collect();
let total = results.len() as i64; let total = results.len() as i64;
@@ -922,7 +1109,14 @@ async fn search_identity_text(
let start = (page - 1) * page_size; let start = (page - 1) * page_size;
let paged: Vec<IdentityTextHit> = results.into_iter().skip(start).take(page_size).collect(); let paged: Vec<IdentityTextHit> = results.into_iter().skip(start).take(page_size).collect();
let limit = params.limit.unwrap_or(50) as usize; let limit = params.limit.unwrap_or(50) as usize;
Ok(Json(IdentityTextResponse { success: true, total, page, page_size, limit, results: paged })) Ok(Json(IdentityTextResponse {
success: true,
total,
page,
page_size,
limit,
results: paged,
}))
} }
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
@@ -942,7 +1136,7 @@ struct IdentitySearchHit {
trace_id: Option<i32>, trace_id: Option<i32>,
chunk_id: String, chunk_id: String,
start_time: f64, start_time: f64,
text_content: String, text_content: Option<String>,
} }
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
@@ -965,7 +1159,7 @@ async fn search_identities_by_text(
let limit = params.limit.unwrap_or(50).min(100); let limit = params.limit.unwrap_or(50).min(100);
let query = format!( let query = format!(
r#"SELECT i.id::int, COALESCE(i.real_name, i.actor_name, i.name) AS name, i.source, i.tmdb_id, r#"SELECT i.id::int, i.name, i.source, i.tmdb_id,
fd.file_uuid, fd.trace_id, fd.file_uuid, fd.trace_id,
c.chunk_id, c.start_time, c.text_content c.chunk_id, c.start_time, c.text_content
FROM {} i FROM {} i
@@ -973,30 +1167,58 @@ async fn search_identities_by_text(
JOIN {} c ON c.file_uuid = fd.file_uuid JOIN {} c ON c.file_uuid = fd.file_uuid
AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0)
AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0)
WHERE COALESCE(i.real_name, i.actor_name, i.name) ILIKE $1 WHERE i.name ILIKE $1
AND ($2::text IS NULL OR fd.file_uuid = $2) AND ($2::text IS NULL OR fd.file_uuid = $2)
ORDER BY COALESCE(i.real_name, i.actor_name, i.name), c.start_time ORDER BY i.name, c.start_time
LIMIT $3"#, LIMIT $3"#,
id_table, fd_table, chunk_table id_table, fd_table, chunk_table
); );
let rows = sqlx::query_as::<_, (i32, String, Option<String>, Option<i32>, String, Option<i32>, String, f64, String)>(&query) let rows = sqlx::query_as::<
.bind(&like_q).bind(&params.uuid).bind(limit) _,
.fetch_all(state.db.pool()) (
.await i32,
.map_err(|e| { String,
tracing::error!("[identities/search] Query failed: {}", e); Option<String>,
StatusCode::INTERNAL_SERVER_ERROR Option<i32>,
})?; String,
Option<i32>,
String,
f64,
Option<String>,
),
>(&query)
.bind(&like_q)
.bind(&params.uuid)
.bind(limit)
.fetch_all(state.db.pool())
.await
.map_err(|e| {
tracing::error!("[identities/search] Query failed: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
let results: Vec<IdentitySearchHit> = rows let results: Vec<IdentitySearchHit> = rows
.into_iter() .into_iter()
.map(|(iid, name, src, tid, fu, trace_id, cid, st, txt)| IdentitySearchHit { .map(
identity_id: iid, name, source: src, tmdb_id: tid, |(iid, name, src, tid, fu, trace_id, cid, st, txt)| IdentitySearchHit {
file_uuid: fu, trace_id, chunk_id: cid, start_time: st, text_content: txt, identity_id: iid,
}) name,
source: src,
tmdb_id: tid,
file_uuid: fu,
trace_id,
chunk_id: cid,
start_time: st,
text_content: txt,
},
)
.collect(); .collect();
let total = results.len() as i64; let total = results.len() as i64;
Ok(Json(IdentitySearchResponse { success: true, total, results })) Ok(Json(IdentitySearchResponse {
success: true,
total,
results,
}))
} }

View File

@@ -1,5 +1,5 @@
use axum::{ use axum::{
extract::{Path, Query}, extract::{Path, Query, State},
http::StatusCode, http::StatusCode,
response::Json, response::Json,
routing::{get, post}, routing::{get, post},
@@ -77,7 +77,7 @@ pub async fn bind_identity(
// Get identity_id from identity_uuid // Get identity_id from identity_uuid
let identity_row: Option<(i64, String)> = sqlx::query_as(&format!( let identity_row: Option<(i64, String)> = sqlx::query_as(&format!(
"SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid", "SELECT id, name FROM {} WHERE uuid = $1::uuid",
id_table id_table
)) ))
.bind(&identity_uuid) .bind(&identity_uuid)
@@ -116,8 +116,14 @@ pub async fn bind_identity(
let uuid_clean = identity_uuid.replace('-', ""); let uuid_clean = identity_uuid.replace('-', "");
// Sync identity JSON file // Sync identity JSON file
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &uuid_clean).await { if let Err(e) =
tracing::warn!("[bind] Failed to sync identity file for {}: {}", uuid_clean, e); crate::core::identity::storage::save_identity_file_by_pool(&db, &uuid_clean).await
{
tracing::warn!(
"[bind] Failed to sync identity file for {}: {}",
uuid_clean,
e
);
} }
Ok(Json(ApiResponse { Ok(Json(ApiResponse {
@@ -189,8 +195,15 @@ pub async fn unbind_identity(
.ok() .ok()
.flatten(); .flatten();
if let Some(identity_uuid) = uuid { if let Some(identity_uuid) = uuid {
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &identity_uuid).await { if let Err(e) =
tracing::warn!("[unbind] Failed to sync identity file for {}: {}", identity_uuid, e); crate::core::identity::storage::save_identity_file_by_pool(&db, &identity_uuid)
.await
{
tracing::warn!(
"[unbind] Failed to sync identity file for {}: {}",
identity_uuid,
e
);
} }
} }
} }
@@ -221,7 +234,7 @@ pub async fn merge_identities(
// Get IDs for both identities // Get IDs for both identities
let from_row: Option<(i64, String)> = sqlx::query_as(&format!( let from_row: Option<(i64, String)> = sqlx::query_as(&format!(
"SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid", "SELECT id, name FROM {} WHERE uuid = $1::uuid",
id_table id_table
)) ))
.bind(&identity_uuid) .bind(&identity_uuid)
@@ -239,7 +252,7 @@ pub async fn merge_identities(
))?; ))?;
let into_row: Option<(i64, String)> = sqlx::query_as(&format!( let into_row: Option<(i64, String)> = sqlx::query_as(&format!(
"SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid", "SELECT id, name FROM {} WHERE uuid = $1::uuid",
id_table id_table
)) ))
.bind(&req.into_uuid) .bind(&req.into_uuid)
@@ -299,8 +312,14 @@ pub async fn merge_identities(
// Sync target identity JSON // Sync target identity JSON
let into_uuid_clean = req.into_uuid.replace('-', ""); let into_uuid_clean = req.into_uuid.replace('-', "");
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &into_uuid_clean).await { if let Err(e) =
tracing::warn!("[merge] Failed to sync target identity file for {}: {}", into_uuid_clean, e); crate::core::identity::storage::save_identity_file_by_pool(&db, &into_uuid_clean).await
{
tracing::warn!(
"[merge] Failed to sync target identity file for {}: {}",
into_uuid_clean,
e
);
} }
// Delete source identity JSON if not keeping history // Delete source identity JSON if not keeping history
@@ -339,6 +358,106 @@ pub struct ListIdentitiesParams {
pub offset: Option<i32>, pub offset: Option<i32>,
} }
#[derive(Debug, Serialize)]
pub struct IdentityTraceInfo {
pub file_uuid: String,
pub trace_id: i32,
pub frame_count: i64,
pub first_frame: i32,
pub last_frame: i32,
pub first_sec: f64,
pub last_sec: f64,
pub avg_confidence: f64,
}
#[derive(Debug, Serialize)]
pub struct IdentityTracesResponse {
pub success: bool,
pub identity_uuid: String,
pub name: String,
pub total_traces: usize,
pub total_faces: i64,
pub traces: Vec<IdentityTraceInfo>,
}
pub async fn get_identity_traces(
State(state): State<crate::api::server::AppState>,
Path(identity_uuid): Path<String>,
) -> Result<Json<IdentityTracesResponse>, (StatusCode, String)> {
let id_table = crate::core::db::schema::table_name("identities");
let fd_table = crate::core::db::schema::table_name("face_detections");
// Get identity name
let identity: Option<(i32, String)> = sqlx::query_as(&format!(
"SELECT id, name FROM {} WHERE uuid = $1::uuid",
id_table
))
.bind(&identity_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let (identity_id, name) =
identity.ok_or((StatusCode::NOT_FOUND, "Identity not found".to_string()))?;
// Get all traces for this identity across all files
let rows: Vec<(String, i32, i64, i32, i32, f64, f64, f64)> = sqlx::query_as(&format!(
r#"SELECT fd.file_uuid::text, fd.trace_id,
COUNT(*)::bigint AS frame_count,
MIN(fd.frame_number)::int AS first_frame,
MAX(fd.frame_number)::int AS last_frame,
ROUND(MIN(fd.frame_number)::numeric / 25.0, 1)::float8 AS first_sec,
ROUND(MAX(fd.frame_number)::numeric / 25.0, 1)::float8 AS last_sec,
ROUND(AVG(fd.confidence)::numeric, 4)::float8 AS avg_confidence
FROM {} fd
WHERE fd.identity_id = $1
GROUP BY fd.file_uuid, fd.trace_id
ORDER BY fd.file_uuid, fd.trace_id"#,
fd_table
))
.bind(identity_id)
.fetch_all(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let total_traces = rows.len();
let total_faces: i64 = rows.iter().map(|r| r.2).sum();
let traces: Vec<IdentityTraceInfo> = rows
.into_iter()
.map(
|(
file_uuid,
trace_id,
frame_count,
first_frame,
last_frame,
first_sec,
last_sec,
avg_confidence,
)| IdentityTraceInfo {
file_uuid,
trace_id,
frame_count,
first_frame,
last_frame,
first_sec,
last_sec,
avg_confidence,
},
)
.collect();
Ok(Json(IdentityTracesResponse {
success: true,
identity_uuid,
name,
total_traces,
total_faces,
traces,
}))
}
pub fn identity_binding_routes() -> Router<crate::api::server::AppState> { pub fn identity_binding_routes() -> Router<crate::api::server::AppState> {
Router::new() Router::new()
.route("/api/v1/identity/:identity_uuid/bind", post(bind_identity)) .route("/api/v1/identity/:identity_uuid/bind", post(bind_identity))
@@ -350,4 +469,8 @@ pub fn identity_binding_routes() -> Router<crate::api::server::AppState> {
"/api/v1/identity/:identity_uuid/mergeinto", "/api/v1/identity/:identity_uuid/mergeinto",
post(merge_identities), post(merge_identities),
) )
.route(
"/api/v1/identity/:identity_uuid/traces",
get(get_identity_traces),
)
} }

View File

@@ -14,8 +14,16 @@ use crate::core::db::{schema, PostgresDb};
/// Shared video query params: mode=normal|debug, audio=on|off /// Shared video query params: mode=normal|debug, audio=on|off
fn parse_video_params(params: &std::collections::HashMap<String, String>) -> (String, String) { fn parse_video_params(params: &std::collections::HashMap<String, String>) -> (String, String) {
let mode = params.get("mode").map(|s| s.as_str()).unwrap_or("normal").to_string(); let mode = params
let audio = params.get("audio").map(|s| s.as_str()).unwrap_or("on").to_string(); .get("mode")
.map(|s| s.as_str())
.unwrap_or("normal")
.to_string();
let audio = params
.get("audio")
.map(|s| s.as_str())
.unwrap_or("on")
.to_string();
(mode, audio) (mode, audio)
} }
@@ -142,9 +150,12 @@ struct BboxParams {
/// Priority: start_frame/end_frame > start/end > start_time/end_time. /// Priority: start_frame/end_frame > start/end > start_time/end_time.
/// If only time is given, convert via fps. /// If only time is given, convert via fps.
fn resolve_frame_range( fn resolve_frame_range(
start_frame: Option<i32>, end_frame: Option<i32>, start_frame: Option<i32>,
start: Option<i32>, end: Option<i32>, end_frame: Option<i32>,
start_time: Option<f64>, end_time: Option<f64>, start: Option<i32>,
end: Option<i32>,
start_time: Option<f64>,
end_time: Option<f64>,
fps: f64, fps: f64,
) -> (i32, i32) { ) -> (i32, i32) {
if let (Some(sf), Some(ef)) = (start_frame.or(start), end_frame.or(end)) { if let (Some(sf), Some(ef)) = (start_frame.or(start), end_frame.or(end)) {
@@ -186,7 +197,15 @@ async fn bbox_overlay_video(
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.unwrap_or(24.0); .unwrap_or(24.0);
let (start_f, end_f) = resolve_frame_range(p.start_frame, p.end_frame, p.start, p.end, p.start_time, p.end_time, fps); let (start_f, end_f) = resolve_frame_range(
p.start_frame,
p.end_frame,
p.start,
p.end,
p.start_time,
p.end_time,
fps,
);
let start_sec = start_f as f64 / fps; let start_sec = start_f as f64 / fps;
@@ -228,13 +247,26 @@ async fn bbox_overlay_video(
let dur = duration.to_string(); let dur = duration.to_string();
let mut bbox_args = vec!["-ss", &ss, "-i", &video_path, "-t", &dur]; let mut bbox_args = vec!["-ss", &ss, "-i", &video_path, "-t", &dur];
if vf != "null" { if vf != "null" {
bbox_args.extend_from_slice(&["-vf", &vf, "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28"]); bbox_args.extend_from_slice(&[
"-vf",
&vf,
"-c:v",
"libx264",
"-preset",
"ultrafast",
"-crf",
"28",
]);
} else { } else {
bbox_args.extend_from_slice(&["-c", "copy"]); bbox_args.extend_from_slice(&["-c", "copy"]);
} }
if bbox_audio == "off" { bbox_args.push("-an"); } if bbox_audio == "off" {
bbox_args.push("-an");
}
bbox_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]); bbox_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
let status = ffmpeg_cmd().args(&bbox_args).status() let status = ffmpeg_cmd()
.args(&bbox_args)
.status()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !status.success() { if !status.success() {
let _ = std::fs::remove_file(&tmp); let _ = std::fs::remove_file(&tmp);
@@ -315,14 +347,20 @@ async fn trace_video(
let sk = seek.to_string(); let sk = seek.to_string();
let du = duration.to_string(); let du = duration.to_string();
let mut cmd_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du, "-c", "copy"]; let mut cmd_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du, "-c", "copy"];
if audio == "off" { cmd_args.push("-an"); } if audio == "off" {
cmd_args.push("-an");
}
cmd_args.extend_from_slice(&["-y", &tmp_str]); cmd_args.extend_from_slice(&["-y", &tmp_str]);
let result = ffmpeg_cmd().args(&cmd_args).output() let result = ffmpeg_cmd()
.args(&cmd_args)
.output()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !result.status.success() { if !result.status.success() {
return Err(StatusCode::INTERNAL_SERVER_ERROR); return Err(StatusCode::INTERNAL_SERVER_ERROR);
} }
let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; let data = tokio::fs::read(&tmp)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let _ = std::fs::remove_file(&tmp); let _ = std::fs::remove_file(&tmp);
return Ok(Response::builder() return Ok(Response::builder()
.header(header::CONTENT_TYPE, "video/mp4") .header(header::CONTENT_TYPE, "video/mp4")
@@ -345,8 +383,11 @@ async fn trace_video(
ORDER BY fd.trace_id, fd.frame_number", ORDER BY fd.trace_id, fd.frame_number",
face_table, identities_table face_table, identities_table
)) ))
.bind(&file_uuid).bind(start_fn).bind(end_fn) .bind(&file_uuid)
.fetch_all(state.db.pool()).await .bind(start_fn)
.bind(end_fn)
.fetch_all(state.db.pool())
.await
.unwrap_or_default(); .unwrap_or_default();
// Group frames by trace_id, compute start_frame per trace; collect bbox per frame // Group frames by trace_id, compute start_frame per trace; collect bbox per frame
@@ -359,7 +400,9 @@ async fn trace_video(
if let Some(name) = name_opt { if let Some(name) = name_opt {
trace_identity.entry(*tid).or_insert_with(|| name.clone()); trace_identity.entry(*tid).or_insert_with(|| name.clone());
} else { } else {
trace_identity.entry(*tid).or_insert_with(|| format!("Stranger_{:03}", tid)); trace_identity
.entry(*tid)
.or_insert_with(|| format!("Stranger_{:03}", tid));
} }
} }
@@ -374,7 +417,8 @@ async fn trace_video(
.unwrap_or_else(|| "-".to_string()); .unwrap_or_else(|| "-".to_string());
// Sort traces for consistent ordering // Sort traces for consistent ordering
let mut sorted_traces: Vec<(i32, &Vec<i32>)> = trace_frames.iter().map(|(k, v)| (*k, v)).collect(); let mut sorted_traces: Vec<(i32, &Vec<i32>)> =
trace_frames.iter().map(|(k, v)| (*k, v)).collect();
sorted_traces.sort_by_key(|(tid, _)| *tid); sorted_traces.sort_by_key(|(tid, _)| *tid);
let frame_offset = first_frame as i64 - (padding * fps) as i64; let frame_offset = first_frame as i64 - (padding * fps) as i64;
@@ -389,10 +433,12 @@ async fn trace_video(
"drawtext=text='Frame %{{n}} %{{pts}}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=12" "drawtext=text='Frame %{{n}} %{{pts}}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=12"
)); ));
parts.push(format!( parts.push(format!(
"drawtext=text='Cut\\: {}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=56", cut_label "drawtext=text='Cut\\: {}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=56",
cut_label
)); ));
parts.push(format!( parts.push(format!(
"drawtext=text='{}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=100", file_uuid "drawtext=text='{}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=100",
file_uuid
)); ));
// Per-trace entries: show trace_id, start_frame, identity name // Per-trace entries: show trace_id, start_frame, identity name
@@ -400,11 +446,18 @@ async fn trace_video(
let mut y_pos = 144; let mut y_pos = 144;
for (tid, frames) in &sorted_traces { for (tid, frames) in &sorted_traces {
let start = frames.iter().min().unwrap_or(&first_frame); let start = frames.iter().min().unwrap_or(&first_frame);
let identity = trace_identity.get(tid).map(|s| s.as_str()).unwrap_or("unknown"); let identity = trace_identity
.get(tid)
.map(|s| s.as_str())
.unwrap_or("unknown");
let label = format!("Trace {}\\: start={} {}", tid, start, identity); let label = format!("Trace {}\\: start={} {}", tid, start, identity);
// Continuous range (interpolated): visible from first to last frame // Continuous range (interpolated): visible from first to last frame
let enable = format!("between(n,{},{})", frames[0] as i64 - frame_offset, frames[frames.len() - 1] as i64 - frame_offset); let enable = format!(
"between(n,{},{})",
frames[0] as i64 - frame_offset,
frames[frames.len() - 1] as i64 - frame_offset
);
parts.push(format!( parts.push(format!(
"drawtext=text='{}':fontsize=24:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y={}:enable='{}'", "drawtext=text='{}':fontsize=24:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y={}:enable='{}'",
@@ -415,7 +468,11 @@ async fn trace_video(
// Bounding boxes: interpolated (thickness=1) + actual (thickness=4) with trace_id label // Bounding boxes: interpolated (thickness=1) + actual (thickness=4) with trace_id label
for (tid, frames) in &sorted_traces { for (tid, frames) in &sorted_traces {
let range_enable = format!("between(n,{},{})", frames[0] as i64 - frame_offset, frames[frames.len() - 1] as i64 - frame_offset); let range_enable = format!(
"between(n,{},{})",
frames[0] as i64 - frame_offset,
frames[frames.len() - 1] as i64 - frame_offset
);
// Interpolated bbox at first known position across the whole trace range // Interpolated bbox at first known position across the whole trace range
if let Some((x, y, w, h)) = bbox_per_frame.get(&(*tid, frames[0])) { if let Some((x, y, w, h)) = bbox_per_frame.get(&(*tid, frames[0])) {
parts.push(format!( parts.push(format!(
@@ -448,23 +505,45 @@ async fn trace_video(
let tmp_str = tmp.to_str().unwrap_or("").to_string(); let tmp_str = tmp.to_str().unwrap_or("").to_string();
let sk = seek.to_string(); let sk = seek.to_string();
let du = duration.to_string(); let du = duration.to_string();
let mut debug_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du, let mut debug_args = vec![
"-/filter_complex", &filter_path, "-ss",
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "28"]; &sk,
if audio == "on" { debug_args.extend_from_slice(&["-c:a", "aac"]); } "-i",
&video_path,
"-t",
&du,
"-/filter_complex",
&filter_path,
"-c:v",
"libx264",
"-preset",
"ultrafast",
"-crf",
"28",
];
if audio == "on" {
debug_args.extend_from_slice(&["-c:a", "aac"]);
}
debug_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]); debug_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
let result = ffmpeg_cmd().args(&debug_args).output() let result = ffmpeg_cmd()
.args(&debug_args)
.output()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !result.status.success() { if !result.status.success() {
let stderr = String::from_utf8_lossy(&result.stderr); let stderr = String::from_utf8_lossy(&result.stderr);
let _ = std::fs::write("/tmp/ffmpeg_last_error.txt", stderr.as_bytes()); let _ = std::fs::write("/tmp/ffmpeg_last_error.txt", stderr.as_bytes());
tracing::error!("ffmpeg failed ({} bytes), see /tmp/ffmpeg_last_error.txt", stderr.len()); tracing::error!(
"ffmpeg failed ({} bytes), see /tmp/ffmpeg_last_error.txt",
stderr.len()
);
let _ = std::fs::remove_file(&filter_file); let _ = std::fs::remove_file(&filter_file);
let _ = std::fs::remove_file(&tmp); let _ = std::fs::remove_file(&tmp);
return Err(StatusCode::INTERNAL_SERVER_ERROR); return Err(StatusCode::INTERNAL_SERVER_ERROR);
} }
let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; let data = tokio::fs::read(&tmp)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let _ = std::fs::remove_file(&filter_file); let _ = std::fs::remove_file(&filter_file);
let _ = std::fs::remove_file(&tmp); let _ = std::fs::remove_file(&tmp);
Ok(Response::builder() Ok(Response::builder()
@@ -503,19 +582,27 @@ async fn stream_video(
// Chunk extraction with dual time/frame params // Chunk extraction with dual time/frame params
let start_time_param = params.get("start_time").and_then(|v| v.parse::<f64>().ok()); let start_time_param = params.get("start_time").and_then(|v| v.parse::<f64>().ok());
let end_time_param = params.get("end_time").and_then(|v| v.parse::<f64>().ok()); let end_time_param = params.get("end_time").and_then(|v| v.parse::<f64>().ok());
let start_frame_param = params.get("start_frame").and_then(|v| v.parse::<f64>().ok()); let start_frame_param = params
.get("start_frame")
.and_then(|v| v.parse::<f64>().ok());
let end_frame_param = params.get("end_frame").and_then(|v| v.parse::<f64>().ok()); let end_frame_param = params.get("end_frame").and_then(|v| v.parse::<f64>().ok());
let start_legacy = params.get("start").and_then(|v| v.parse::<f64>().ok()); let start_legacy = params.get("start").and_then(|v| v.parse::<f64>().ok());
let end_legacy = params.get("end").and_then(|v| v.parse::<f64>().ok()); let end_legacy = params.get("end").and_then(|v| v.parse::<f64>().ok());
let has_range = start_frame_param.is_some() || start_time_param.is_some() || start_legacy.is_some(); let has_range =
start_frame_param.is_some() || start_time_param.is_some() || start_legacy.is_some();
if has_range { if has_range {
let (start_sec, dur) = if let (Some(sf), Some(ef)) = (start_frame_param, end_frame_param) { let (start_sec, dur) = if let (Some(sf), Some(ef)) = (start_frame_param, end_frame_param) {
let _fps: f64 = sqlx::query_scalar(&format!( let _fps: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1", videos_table "SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
)).bind(&file_uuid).fetch_optional(state.db.pool()).await videos_table
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?.unwrap_or(24.0); ))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.unwrap_or(24.0);
(sf / _fps, (ef - sf) / _fps) (sf / _fps, (ef - sf) / _fps)
} else if let (Some(st), Some(et)) = (start_time_param, end_time_param) { } else if let (Some(st), Some(et)) = (start_time_param, end_time_param) {
(st, et - st) (st, et - st)
@@ -533,15 +620,21 @@ async fn stream_video(
let ss = start_sec.to_string(); let ss = start_sec.to_string();
let d = dur.to_string(); let d = dur.to_string();
let mut chunk_args = vec!["-ss", &ss, "-i", &file_path, "-t", &d, "-c", "copy"]; let mut chunk_args = vec!["-ss", &ss, "-i", &file_path, "-t", &d, "-c", "copy"];
if audio == "off" { chunk_args.push("-an"); } if audio == "off" {
chunk_args.push("-an");
}
chunk_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]); chunk_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
let status = ffmpeg_cmd().args(&chunk_args).status() let status = ffmpeg_cmd()
.args(&chunk_args)
.status()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !status.success() { if !status.success() {
let _ = std::fs::remove_file(&tmp); let _ = std::fs::remove_file(&tmp);
return Err(StatusCode::INTERNAL_SERVER_ERROR); return Err(StatusCode::INTERNAL_SERVER_ERROR);
} }
let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; let data = tokio::fs::read(&tmp)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let _ = std::fs::remove_file(&tmp); let _ = std::fs::remove_file(&tmp);
return Ok(Response::builder() return Ok(Response::builder()
.header(header::CONTENT_TYPE, "video/mp4") .header(header::CONTENT_TYPE, "video/mp4")
@@ -704,7 +797,7 @@ async fn video_clip(
let frame_count = ((e - s) * fps) as i64; let frame_count = ((e - s) * fps) as i64;
cmd.args(["-vframes", &frame_count.to_string()]); cmd.args(["-vframes", &frame_count.to_string()]);
} else { } else {
cmd.args(["-to", &e.to_string()]); cmd.args(["-t", &(e - s).to_string()]);
} }
if mode == "debug" { if mode == "debug" {
let debug_text = if let (Some(sf), Some(ef)) = (q.start_frame, q.end_frame) { let debug_text = if let (Some(sf), Some(ef)) = (q.start_frame, q.end_frame) {
@@ -717,8 +810,20 @@ async fn video_clip(
if audio == "off" { if audio == "off" {
cmd.args(["-an"]); cmd.args(["-an"]);
} }
cmd.args(["-c:v", "libx264", "-c:a", "aac", "-f", "mpegts", "-"]); cmd.args([
let output = cmd.output().map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; "-c:v",
"libx264",
"-c:a",
"aac",
"-movflags",
"frag_keyframe+empty_moov",
"-f",
"mp4",
"-",
]);
let output = cmd
.output()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !output.status.success() { if !output.status.success() {
return Err(StatusCode::INTERNAL_SERVER_ERROR); return Err(StatusCode::INTERNAL_SERVER_ERROR);
} }

View File

@@ -88,16 +88,10 @@ fn hex_val(c: u8) -> Option<u8> {
} }
fn extract_api_key(headers: &HeaderMap, uri: &axum::http::Uri) -> Result<String, StatusCode> { fn extract_api_key(headers: &HeaderMap, uri: &axum::http::Uri) -> Result<String, StatusCode> {
if let Some(key) = headers if let Some(key) = headers.get("X-API-Key").and_then(|v| v.to_str().ok()) {
.get("X-API-Key")
.and_then(|v| v.to_str().ok())
{
return Ok(key.to_string()); return Ok(key.to_string());
} }
if let Some(auth) = headers if let Some(auth) = headers.get("Authorization").and_then(|v| v.to_str().ok()) {
.get("Authorization")
.and_then(|v| v.to_str().ok())
{
// Check if it's a JWT (starts with eyJ) // Check if it's a JWT (starts with eyJ)
let trimmed = auth.strip_prefix("Bearer ").unwrap_or(auth); let trimmed = auth.strip_prefix("Bearer ").unwrap_or(auth);
if !jwt::is_jwt(trimmed) { if !jwt::is_jwt(trimmed) {
@@ -129,7 +123,11 @@ pub async fn unified_auth(
// Priority 1: Cookie session (Portal) // Priority 1: Cookie session (Portal)
let cookies = extract_cookies(headers); let cookies = extract_cookies(headers);
if let Some(sid) = cookies.iter().find(|(k, _)| k == "session_id").map(|(_, v)| v.clone()) { if let Some(sid) = cookies
.iter()
.find(|(k, _)| k == "session_id")
.map(|(_, v)| v.clone())
{
match state.db.get_session_by_id(&sid).await { match state.db.get_session_by_id(&sid).await {
Ok(Some((_id, user_id, api_key_id, _expires_at))) => { Ok(Some((_id, user_id, api_key_id, _expires_at))) => {
let key_hash = hash_key(&api_key_id); let key_hash = hash_key(&api_key_id);
@@ -162,15 +160,17 @@ pub async fn unified_auth(
} }
// Priority 2: JWT (Authorization: Bearer <eyJ...>) // Priority 2: JWT (Authorization: Bearer <eyJ...>)
if let Some(auth_header) = headers if let Some(auth_header) = headers.get("Authorization").and_then(|v| v.to_str().ok()) {
.get("Authorization")
.and_then(|v| v.to_str().ok())
{
if let Some(token) = auth_header.strip_prefix("Bearer ") { if let Some(token) = auth_header.strip_prefix("Bearer ") {
if jwt::is_jwt(token) { if jwt::is_jwt(token) {
match jwt::verify_jwt(token) { match jwt::verify_jwt(token) {
Ok(claims) => { Ok(claims) => {
if !state.db.is_jwt_blacklisted(&claims.jti).await.unwrap_or(false) { if !state
.db
.is_jwt_blacklisted(&claims.jti)
.await
.unwrap_or(false)
{
let exp = chrono::DateTime::from_timestamp(claims.exp as i64, 0); let exp = chrono::DateTime::from_timestamp(claims.exp as i64, 0);
let user_id: i32 = claims.sub.parse().unwrap_or(0); let user_id: i32 = claims.sub.parse().unwrap_or(0);
let auth = UserAuth { let auth = UserAuth {

File diff suppressed because it is too large Load Diff

View File

@@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};
use crate::api::server::AppState; use crate::api::server::AppState;
use crate::core::config; use crate::core::config;
use crate::core::db::PostgresDb; use crate::core::db::{PostgresDb, QdrantDb};
use crate::core::tmdb; use crate::core::tmdb;
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
@@ -64,10 +64,44 @@ struct FileUuidParam {
file_uuid: String, file_uuid: String,
} }
#[derive(Debug, Deserialize)]
struct TmdbFetchRequest {
file_uuid: String,
}
#[derive(Debug, Serialize)]
struct TmdbFetchMemberResult {
name: String,
character: Option<String>,
aliases: Vec<String>,
metadata: serde_json::Value,
status: String,
has_json: bool,
has_jpg: bool,
error: Option<String>,
}
#[derive(Debug, Serialize)]
struct TmdbFetchResponse {
success: bool,
movie_title: Option<String>,
tmdb_id: Option<u64>,
results: Vec<TmdbFetchMemberResult>,
summary: serde_json::Value,
}
pub fn tmdb_routes() -> Router<AppState> { pub fn tmdb_routes() -> Router<AppState> {
Router::new() Router::new()
.route("/api/v1/agents/tmdb/prefetch", post(tmdb_prefetch)) .route("/api/v1/agents/tmdb/prefetch", post(tmdb_prefetch))
.route("/api/v1/file/:file_uuid/tmdb-probe", post(tmdb_probe_handler)) .route(
"/api/v1/file/:file_uuid/tmdb-probe",
post(tmdb_probe_handler),
)
.route("/api/v1/tmdb/fetch", post(tmdb_fetch))
.route(
"/api/v1/agents/tmdb/match/:file_uuid",
post(tmdb_match_handler),
)
.route("/api/v1/resource/tmdb", get(tmdb_resource_status)) .route("/api/v1/resource/tmdb", get(tmdb_resource_status))
.route("/api/v1/resource/tmdb/check", post(tmdb_resource_check)) .route("/api/v1/resource/tmdb/check", post(tmdb_resource_check))
} }
@@ -79,9 +113,10 @@ async fn tmdb_prefetch(
let file_uuid = req.file_uuid; let file_uuid = req.file_uuid;
// Verify file exists in DB // Verify file exists in DB
let file_exists: bool = sqlx::query_scalar( let file_exists: bool = sqlx::query_scalar(&format!(
&format!("SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", crate::core::db::schema::table_name("videos")) "SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
) crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid) .bind(&file_uuid)
.fetch_one(state.db.pool()) .fetch_one(state.db.pool())
.await .await
@@ -182,18 +217,22 @@ async fn tmdb_probe_handler(
let file_uuid = params.file_uuid; let file_uuid = params.file_uuid;
// Verify file exists // Verify file exists
let file_exists: bool = sqlx::query_scalar( let file_exists: bool = sqlx::query_scalar(&format!(
&format!("SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", crate::core::db::schema::table_name("videos")) "SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
) crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid) .bind(&file_uuid)
.fetch_one(state.db.pool()) .fetch_one(state.db.pool())
.await .await
.unwrap_or(false); .unwrap_or(false);
if !file_exists { if !file_exists {
return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({ return Err((
"error": "Video not found", "file_uuid": file_uuid StatusCode::NOT_FOUND,
})))); Json(serde_json::json!({
"error": "Video not found", "file_uuid": file_uuid
})),
));
} }
match tmdb::probe::probe_from_cache(&state.db, &file_uuid).await { match tmdb::probe::probe_from_cache(&state.db, &file_uuid).await {
@@ -214,7 +253,10 @@ async fn tmdb_probe_handler(
.await .await
{ {
for uuid in rows { for uuid in rows {
let _ = crate::core::identity::storage::save_identity_file_by_pool(&pool, &uuid).await; let _ = crate::core::identity::storage::save_identity_file_by_pool(
&pool, &uuid,
)
.await;
} }
} }
}); });
@@ -245,24 +287,26 @@ async fn tmdb_probe_handler(
message: "No TMDb cache found. Run tmdb-prefetch first.".to_string(), message: "No TMDb cache found. Run tmdb-prefetch first.".to_string(),
})) }))
} else { } else {
Err((StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ Err((
"error": msg, "file_uuid": file_uuid StatusCode::INTERNAL_SERVER_ERROR,
})))) Json(serde_json::json!({
"error": msg, "file_uuid": file_uuid
})),
))
} }
} }
} }
} }
async fn tmdb_resource_status( async fn tmdb_resource_status(State(state): State<AppState>) -> Json<TmdbResourceResponse> {
State(state): State<AppState>,
) -> Json<TmdbResourceResponse> {
let status = tmdb::status::quick_status(); let status = tmdb::status::quick_status();
let identities_seeded = tmdb::status::count_tmdb_identities(state.db.pool()) let identities_seeded = tmdb::status::count_tmdb_identities(state.db.pool())
.await .await
.unwrap_or(0); .unwrap_or(0);
let identities_with_embedding = tmdb::status::count_tmdb_identities_with_embedding(state.db.pool()) let identities_with_embedding =
.await tmdb::status::count_tmdb_identities_with_embedding(state.db.pool())
.unwrap_or(0); .await
.unwrap_or(0);
let cache_files = tmdb::status::count_cache_files(); let cache_files = tmdb::status::count_cache_files();
Json(TmdbResourceResponse { Json(TmdbResourceResponse {
@@ -303,3 +347,383 @@ async fn tmdb_resource_check() -> Json<TmdbCheckResponse> {
status, status,
}) })
} }
async fn tmdb_fetch(
State(state): State<AppState>,
Json(req): Json<TmdbFetchRequest>,
) -> Result<Json<TmdbFetchResponse>, (StatusCode, Json<serde_json::Value>)> {
let file_uuid = req.file_uuid;
let filename: Option<String> = sqlx::query_scalar(&format!(
"SELECT file_name FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": e.to_string()})),
)
})?
.flatten();
let filename = filename.ok_or_else(|| {
(
StatusCode::NOT_FOUND,
Json(serde_json::json!({"error": "File not found"})),
)
})?;
// Run probe to create identities
match tmdb::probe::probe_movie(&state.db, &filename, &file_uuid).await {
Ok(Some(probe_result)) => {
let mut member_results = Vec::new();
// Read the cache to get cast list with names and profile URLs
if let Ok(cache) = tmdb::cache::read_tmdb_cache(&file_uuid) {
for member in &cache.cast {
let name = member.name.clone();
let character = if member.character.is_empty() {
None
} else {
Some(member.character.clone())
};
let aliases = member.also_known_as.clone();
let profile_url = member
.profile_path
.as_ref()
.map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));
let metadata = serde_json::json!({
"tmdb_id": member.id,
"name": member.name,
"character": member.character,
"aliases": member.also_known_as,
"profile_path": member.profile_path,
"order": member.order,
"biography": member.biography,
"birthday": member.birthday,
"place_of_birth": member.place_of_birth,
"imdb_id": member.imdb_id,
"known_for_department": member.known_for_department,
"popularity": member.popularity,
"deathday": member.deathday,
"gender": member.gender,
"homepage": member.homepage,
});
let identity_row = sqlx::query_as::<_, (i32, uuid::Uuid)>(&format!(
"SELECT id, uuid FROM {} WHERE name = $1 AND source = 'tmdb' LIMIT 1",
crate::core::db::schema::table_name("identities")
))
.bind(&name)
.fetch_optional(state.db.pool())
.await;
match identity_row {
Ok(Some((identity_id, uuid))) => {
let clean = uuid.to_string().replace('-', "");
let dir = crate::core::identity::storage::identity_dir(&clean);
std::fs::create_dir_all(&dir).ok();
let json_result = crate::core::identity::storage::save_identity_file(
&state.db, &clean,
)
.await;
let has_json = json_result.is_ok();
let has_jpg = if let Some(url) = &profile_url {
let jpg_path = dir.join("profile.jpg");
if jpg_path.exists() {
true
} else if let Ok(resp) = reqwest::get(url).await {
if let Ok(bytes) = resp.bytes().await {
std::fs::write(&jpg_path, &bytes).is_ok()
} else {
false
}
} else {
false
}
} else {
false
};
// Push face_embedding to Qdrant if available
let face_collection = format!(
"{}_faces",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
);
let emb_row: Option<(Vec<f32>,)> = sqlx::query_as(
&format!(
"SELECT face_embedding::real[] FROM {} WHERE uuid = $1 AND face_embedding IS NOT NULL",
crate::core::db::schema::table_name("identities")
)
)
.bind(&uuid)
.fetch_optional(state.db.pool())
.await
.unwrap_or(None);
if let Some((embedding,)) = emb_row {
let qdrant = QdrantDb::new();
qdrant.ensure_collection(&face_collection, 512).await.ok();
let _ = qdrant
.upsert_vector_to_collection(
&face_collection,
identity_id as u64,
&embedding,
Some(serde_json::json!({
"identity_id": identity_id,
"name": name,
"source": "tmdb",
})),
)
.await;
}
let status = if has_json && has_jpg {
"success"
} else {
"partial"
};
let error = if !has_json {
Some(format!("{:?}", json_result.err()))
} else if !has_jpg {
Some("profile download failed".to_string())
} else {
None
};
member_results.push(TmdbFetchMemberResult {
name: name.clone(),
character: character.clone(),
aliases: aliases.clone(),
metadata: metadata.clone(),
status: status.to_string(),
has_json,
has_jpg,
error,
});
}
Ok(None) => {
member_results.push(TmdbFetchMemberResult {
name: name.clone(),
character: character.clone(),
aliases: aliases.clone(),
metadata: metadata.clone(),
status: "skipped".to_string(),
has_json: false,
has_jpg: false,
error: None,
});
}
Err(e) => {
member_results.push(TmdbFetchMemberResult {
name: name.clone(),
character: character.clone(),
aliases: aliases.clone(),
metadata: metadata.clone(),
status: "error".to_string(),
has_json: false,
has_jpg: false,
error: Some(format!("DB error: {}", e)),
});
}
}
}
}
let total = member_results.len();
let success_count = member_results
.iter()
.filter(|r| r.status == "success")
.count();
let json_count = member_results.iter().filter(|r| r.has_json).count();
let jpg_count = member_results.iter().filter(|r| r.has_jpg).count();
Ok(Json(TmdbFetchResponse {
success: true,
movie_title: Some(probe_result.title),
tmdb_id: Some(probe_result.tmdb_id),
results: member_results,
summary: serde_json::json!({
"total": total,
"success": success_count,
"with_json": json_count,
"with_jpg": jpg_count,
}),
}))
}
Ok(None) => Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"error": "No movie found for this filename"
})),
)),
Err(e) => Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": e.to_string()
})),
)),
}
}
#[derive(Debug, Serialize)]
struct TmdbMatchResponse {
success: bool,
file_uuid: String,
bindings_created: usize,
tmdb_identities_available: usize,
message: String,
}
async fn tmdb_match_handler(
Path(params): Path<FileUuidParam>,
State(state): State<AppState>,
) -> Result<Json<TmdbMatchResponse>, (StatusCode, Json<serde_json::Value>)> {
let file_uuid = params.file_uuid;
// Verify file exists
let file_exists: bool = sqlx::query_scalar(&format!(
"SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid)
.fetch_one(state.db.pool())
.await
.unwrap_or(false);
if !file_exists {
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"error": "Video not found", "file_uuid": file_uuid
})),
));
}
// Get all TMDb identities with face_embedding
let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec<f32>)>(
&format!(
"SELECT id, name, face_embedding::real[] FROM {} WHERE source='tmdb' AND face_embedding IS NOT NULL",
crate::core::db::schema::table_name("identities")
)
)
.fetch_all(state.db.pool())
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
})?;
if tmdb_rows.is_empty() {
return Ok(Json(TmdbMatchResponse {
success: true,
file_uuid,
bindings_created: 0,
tmdb_identities_available: 0,
message: "No TMDb identities with face embeddings".to_string(),
}));
}
let face_collection = format!(
"{}_faces",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
);
let qdrant = QdrantDb::new();
let _ = qdrant.ensure_collection(&face_collection, 512).await;
let trace_collection = format!(
"{}_traces",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
);
let _ = qdrant.ensure_collection(&trace_collection, 512).await;
// Sync trace embeddings (idempotent)
if let Err(e) = crate::core::db::qdrant_db::sync_trace_embeddings(&file_uuid).await {
tracing::error!("[TKG-MATCH] Trace sync failed: {}", e);
}
let mut total_bindings = 0usize;
for (tmdb_id, tmdb_name, tmdb_embedding) in &tmdb_rows {
// Search Qdrant trace collection with this TMDb embedding
let results = match qdrant
.search_face_collection(
&trace_collection,
tmdb_embedding,
100,
"source",
"tmdb",
Some(&file_uuid),
)
.await
{
Ok(r) => r,
Err(e) => {
tracing::warn!("[TKG-MATCH] Qdrant search failed for {}: {}", tmdb_name, e);
continue;
}
};
// Filter results by threshold and file_uuid
let filtered: Vec<_> = results
.into_iter()
.filter(|(score, payload)| {
*score >= 0.50
&& payload.get("file_uuid").and_then(|v| v.as_str()) == Some(&file_uuid)
})
.collect();
if filtered.is_empty() {
continue;
}
// Bind matched traces directly
let mut bound_count = 0usize;
for (_score, payload) in &filtered {
if let Some(tid) = payload.get("trace_id").and_then(|v| v.as_i64()) {
let r = sqlx::query(&format!(
"UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
crate::core::db::schema::table_name("face_detections")
))
.bind(tmdb_id)
.bind(&file_uuid)
.bind(tid as i32)
.execute(state.db.pool())
.await;
if let Ok(result) = r {
bound_count += result.rows_affected() as usize;
}
}
}
if bound_count > 0 {
tracing::info!(
"[TKG-MATCH] {}: bound {} traces to TMDb identity {}",
tmdb_name,
bound_count,
tmdb_id
);
}
total_bindings += bound_count;
}
Ok(Json(TmdbMatchResponse {
success: true,
file_uuid,
bindings_created: total_bindings,
tmdb_identities_available: tmdb_rows.len(),
message: format!("{} traces matched to TMDb identities", total_bindings),
}))
}

View File

@@ -11,10 +11,7 @@ use crate::core::db::PostgresDb;
pub fn trace_agent_routes() -> Router<crate::api::server::AppState> { pub fn trace_agent_routes() -> Router<crate::api::server::AppState> {
Router::new() Router::new()
.route( .route("/api/v1/file/:file_uuid/traces", post(list_traces_sorted))
"/api/v1/file/:file_uuid/traces",
post(list_traces_sorted),
)
.route( .route(
"/api/v1/file/:file_uuid/trace/:trace_id/faces", "/api/v1/file/:file_uuid/trace/:trace_id/faces",
get(list_trace_faces), get(list_trace_faces),
@@ -78,14 +75,15 @@ async fn list_traces_sorted(
_ => "start_frame ASC", _ => "start_frame ASC",
}; };
let fps: f64 = let fps: f64 = sqlx::query_scalar(&format!(
sqlx::query_scalar(&format!("SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1", "SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos"))) crate::core::db::schema::table_name("videos")
.bind(&file_uuid) ))
.fetch_optional(state.db.pool()) .bind(&file_uuid)
.await .fetch_optional(state.db.pool())
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))? .await
.unwrap_or(24.0); .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
.unwrap_or(24.0);
let query = format!( let query = format!(
"SELECT tt.*, fd.id AS sample_face_id FROM ( "SELECT tt.*, fd.id AS sample_face_id FROM (
@@ -113,17 +111,16 @@ async fn list_traces_sorted(
crate::core::db::schema::table_name("face_detections"), crate::core::db::schema::table_name("face_detections"),
); );
let rows: Vec<(i32, i64, i32, i32, f64, f64, Option<i32>)> = let rows: Vec<(i32, i64, i32, i32, f64, f64, Option<i32>)> = sqlx::query_as(&query)
sqlx::query_as(&query) .bind(&file_uuid)
.bind(&file_uuid) .bind(min_faces)
.bind(min_faces) .bind(effective_limit)
.bind(effective_limit) .bind(db_offset)
.bind(db_offset) .bind(min_confidence)
.bind(min_confidence) .bind(max_confidence)
.bind(max_confidence) .fetch_all(state.db.pool())
.fetch_all(state.db.pool()) .await
.await .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let traces: Vec<TraceInfo> = rows let traces: Vec<TraceInfo> = rows
.into_iter() .into_iter()
@@ -220,19 +217,20 @@ async fn list_trace_faces(
}; };
let interpolate = q.interpolate.unwrap_or(false); let interpolate = q.interpolate.unwrap_or(false);
let fps: f64 = let fps: f64 = sqlx::query_scalar(&format!(
sqlx::query_scalar(&format!("SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1", "SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos"))) crate::core::db::schema::table_name("videos")
.bind(&file_uuid) ))
.fetch_optional(state.db.pool()) .bind(&file_uuid)
.await .fetch_optional(state.db.pool())
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))? .await
.unwrap_or(24.0); .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
.unwrap_or(24.0);
let total_detected: i64 = sqlx::query_scalar( let total_detected: i64 = sqlx::query_scalar(&format!(
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2", "SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2",
crate::core::db::schema::table_name("face_detections")) crate::core::db::schema::table_name("face_detections")
) ))
.bind(&file_uuid) .bind(&file_uuid)
.bind(trace_id) .bind(trace_id)
.fetch_one(state.db.pool()) .fetch_one(state.db.pool())
@@ -247,12 +245,12 @@ async fn list_trace_faces(
Option<i32>, Option<i32>,
Option<i32>, Option<i32>,
f32, f32,
)> = sqlx::query_as( )> = sqlx::query_as(&format!(
&format!("SELECT id, frame_number::int, x, y, width, height, confidence::float4 \ "SELECT id, frame_number::int, x, y, width, height, confidence::float4 \
FROM {} WHERE file_uuid = $1 AND trace_id = $2 \ FROM {} WHERE file_uuid = $1 AND trace_id = $2 \
ORDER BY frame_number ASC LIMIT $3 OFFSET $4", ORDER BY frame_number ASC LIMIT $3 OFFSET $4",
crate::core::db::schema::table_name("face_detections")) crate::core::db::schema::table_name("face_detections")
) ))
.bind(&file_uuid) .bind(&file_uuid)
.bind(trace_id) .bind(trace_id)
.bind(limit) .bind(limit)

View File

@@ -88,9 +88,9 @@ pub enum SearchResult {
}, },
#[serde(rename = "person")] #[serde(rename = "person")]
Person { Person {
person_id: String, identity_id: i32,
identity_uuid: String,
name: Option<String>, name: Option<String>,
speaker_id: Option<String>,
appearance_count: i32, appearance_count: i32,
score: f64, score: f64,
first_appearance_time: Option<f64>, first_appearance_time: Option<f64>,
@@ -168,7 +168,7 @@ pub async fn universal_search(
results.retain(|r| match r { results.retain(|r| match r {
SearchResult::Chunk { chunk_id, .. } => seen_chunks.insert(chunk_id.clone()), SearchResult::Chunk { chunk_id, .. } => seen_chunks.insert(chunk_id.clone()),
SearchResult::Frame { frame_number, .. } => seen_frames.insert(*frame_number), SearchResult::Frame { frame_number, .. } => seen_frames.insert(*frame_number),
SearchResult::Person { person_id, .. } => seen_persons.insert(person_id.clone()), SearchResult::Person { identity_id, .. } => seen_persons.insert(*identity_id),
}); });
} }
@@ -251,9 +251,9 @@ pub async fn search_persons(
let limit = query.limit.unwrap_or(20); let limit = query.limit.unwrap_or(20);
let persons = search_persons_by_query( let persons = search_persons_by_query(
&db, &db,
&query.file_uuid,
&query.query, &query.query,
query.min_appearances, query.min_appearances,
query.max_age,
limit, limit,
) )
.await .await
@@ -305,7 +305,6 @@ pub struct PersonSearchQuery {
pub file_uuid: String, pub file_uuid: String,
pub query: Option<String>, pub query: Option<String>,
pub min_appearances: Option<i32>, pub min_appearances: Option<i32>,
pub max_age: Option<i32>, // New filter for "children"
pub limit: Option<usize>, pub limit: Option<usize>,
} }
@@ -317,13 +316,9 @@ pub struct PersonSearchResponse {
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
pub struct PersonResult { pub struct PersonResult {
pub person_id: String, pub identity_id: i32,
pub identity_uuid: String,
pub name: Option<String>, pub name: Option<String>,
pub character_name: Option<String>,
pub aliases: Option<Vec<String>>,
pub age: Option<i32>,
pub gender: Option<String>,
pub speaker_id: Option<String>,
pub appearance_count: i32, pub appearance_count: i32,
pub first_appearance_time: Option<f64>, pub first_appearance_time: Option<f64>,
pub last_appearance_time: Option<f64>, pub last_appearance_time: Option<f64>,
@@ -594,43 +589,37 @@ async fn search_persons_internal(
db: &PostgresDb, db: &PostgresDb,
req: &UniversalSearchRequest, req: &UniversalSearchRequest,
) -> Result<Vec<SearchResult>, anyhow::Error> { ) -> Result<Vec<SearchResult>, anyhow::Error> {
let table = "person_identities"; let uuid = match &req.file_uuid {
Some(u) => u.replace('\'', "''"),
None => return Err(anyhow::anyhow!("file_uuid is required for person search")),
};
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let mut sql = format!( let mut sql = format!(
"SELECT person_id, name, speaker_id, appearance_count, first_appearance_time, last_appearance_time FROM {} WHERE 1=1", "SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
table MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \
FROM {} i JOIN {} fd ON fd.identity_id = i.id \
WHERE fd.file_uuid = '{}'",
id_table, fd_table, uuid
); );
if !req.query.is_empty() { if !req.query.is_empty() {
sql.push_str(&format!( let q = req.query.replace('\'', "''");
" AND (name ILIKE '%{}%' OR person_id ILIKE '%{}%' OR speaker_id ILIKE '%{}%')", sql.push_str(&format!(" AND i.name ILIKE '%{}%'", q));
req.query, req.query, req.query
));
}
if let Some(ref filters) = req.filters {
if let Some(ref speaker_id) = filters.speaker_id {
sql.push_str(&format!(" AND speaker_id = '{}'", speaker_id));
}
if let Some(ref person_id) = filters.person_id {
sql.push_str(&format!(" AND person_id = '{}'", person_id));
}
} }
sql.push_str(" GROUP BY i.id, i.uuid, i.name");
sql.push_str(" ORDER BY appearance_count DESC"); sql.push_str(" ORDER BY appearance_count DESC");
sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20))); sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));
let rows: Vec<( let rows: Vec<(i32, String, Option<String>, i64, Option<f64>, Option<f64>)> =
String, sqlx::query_as(&sql).fetch_all(db.pool()).await?;
Option<String>,
Option<String>,
i32,
Option<f64>,
Option<f64>,
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let results: Vec<SearchResult> = rows let results: Vec<SearchResult> = rows
.into_iter() .into_iter()
.map( .map(
|(person_id, name, speaker_id, appearance_count, first_time, last_time)| { |(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| {
let score = if !req.query.is_empty() let score = if !req.query.is_empty()
&& name.as_ref().map_or(false, |n| { && name.as_ref().map_or(false, |n| {
n.to_lowercase().contains(&req.query.to_lowercase()) n.to_lowercase().contains(&req.query.to_lowercase())
@@ -641,10 +630,10 @@ async fn search_persons_internal(
}; };
SearchResult::Person { SearchResult::Person {
person_id, identity_id,
identity_uuid,
name, name,
speaker_id, appearance_count: appearance_count as i32,
appearance_count,
score, score,
first_appearance_time: first_time, first_appearance_time: first_time,
last_appearance_time: last_time, last_appearance_time: last_time,
@@ -739,82 +728,49 @@ async fn search_frames_internal_v2(
async fn search_persons_by_query( async fn search_persons_by_query(
db: &PostgresDb, db: &PostgresDb,
file_uuid: &str,
query: &Option<String>, query: &Option<String>,
min_appearances: Option<i32>, min_appearances: Option<i32>,
max_age: Option<i32>,
limit: usize, limit: usize,
) -> Result<Vec<PersonResult>, anyhow::Error> { ) -> Result<Vec<PersonResult>, anyhow::Error> {
let table = "person_identities"; let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let mut sql = format!( let mut sql = format!(
"SELECT person_id, name, character_name, aliases, age, gender, speaker_id, appearance_count, first_appearance_time, last_appearance_time FROM {} WHERE 1=1", "SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
table MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \
FROM {} i JOIN {} fd ON fd.identity_id = i.id \
WHERE fd.file_uuid = '{}'",
id_table,
fd_table,
file_uuid.replace('\'', "''")
); );
if let Some(ref q) = query { if let Some(q) = query {
// Search name, character_name, aliases (cast to text), person_id, speaker_id let safe = q.replace('\'', "''");
sql.push_str(&format!( sql.push_str(&format!(" AND i.name ILIKE '%{}%'", safe));
" AND (name ILIKE '%{}%' OR character_name ILIKE '%{}%' OR aliases::text ILIKE '%{}%' OR person_id ILIKE '%{}%' OR speaker_id ILIKE '%{}%')",
q, q, q, q, q
));
} }
sql.push_str(" GROUP BY i.id, i.uuid, i.name");
if let Some(min) = min_appearances { if let Some(min) = min_appearances {
sql.push_str(&format!(" AND appearance_count >= {}", min)); sql.push_str(&format!(" HAVING COUNT(fd.id) >= {}", min));
}
if let Some(max_a) = max_age {
// Strictly filter for age <= max_age.
// Note: This excludes entries with NULL age.
sql.push_str(&format!(" AND age <= {}", max_a));
} }
sql.push_str(" ORDER BY appearance_count DESC"); sql.push_str(" ORDER BY appearance_count DESC");
sql.push_str(&format!(" LIMIT {}", limit)); sql.push_str(&format!(" LIMIT {}", limit));
let rows: Vec<( let rows: Vec<(i32, String, Option<String>, i64, Option<f64>, Option<f64>)> =
String, sqlx::query_as(&sql).fetch_all(db.pool()).await?;
Option<String>,
Option<String>,
Option<serde_json::Value>,
Option<i32>,
Option<String>,
Option<String>,
i32,
Option<f64>,
Option<f64>,
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let results: Vec<PersonResult> = rows let results: Vec<PersonResult> = rows
.into_iter() .into_iter()
.map( .map(
|( |(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| {
person_id,
name,
character_name,
aliases_json,
age,
gender,
speaker_id,
appearance_count,
first_time,
last_time,
)| {
let aliases = aliases_json.and_then(|v| {
v.as_array().map(|arr| {
arr.iter()
.filter_map(|val| val.as_str().map(String::from))
.collect()
})
});
PersonResult { PersonResult {
person_id, identity_id,
identity_uuid,
name, name,
character_name, appearance_count: appearance_count as i32,
aliases,
age,
gender,
speaker_id,
appearance_count,
first_appearance_time: first_time, first_appearance_time: first_time,
last_appearance_time: last_time, last_appearance_time: last_time,
} }

View File

@@ -392,8 +392,14 @@ pub async fn get_visual_chunk_statistics(
uuid.replace('\'', "''") uuid.replace('\'', "''")
); );
let row: (i64, Option<f64>, Option<f64>, Option<f64>, Option<i64>, Option<f64>) = let row: (
sqlx::query_as(&sql).fetch_one(db.pool()).await?; i64,
Option<f64>,
Option<f64>,
Option<f64>,
Option<i64>,
Option<f64>,
) = sqlx::query_as(&sql).fetch_one(db.pool()).await?;
let mut stats = HashMap::new(); let mut stats = HashMap::new();
stats.insert("total_chunks".to_string(), Value::from(row.0)); stats.insert("total_chunks".to_string(), Value::from(row.0));

View File

@@ -13,7 +13,14 @@ use std::path::{Path, PathBuf};
use std::process::Command; use std::process::Command;
fn dir_size(path: &Path) -> u64 { fn dir_size(path: &Path) -> u64 {
path.read_dir().map(|d| d.filter_map(|e| e.ok()).filter_map(|e| e.metadata().ok()).map(|m| m.len()).sum()).unwrap_or(0) path.read_dir()
.map(|d| {
d.filter_map(|e| e.ok())
.filter_map(|e| e.metadata().ok())
.map(|m| m.len())
.sum()
})
.unwrap_or(0)
} }
const DEMO_DIR: &str = "/Users/accusys/momentry/var/sftpgo/data/demo"; const DEMO_DIR: &str = "/Users/accusys/momentry/var/sftpgo/data/demo";
@@ -22,7 +29,10 @@ const RELEASE_DIR: &str = "/Users/accusys/momentry_core_0.1/release/files";
const PG_BIN: &str = "/Users/accusys/pgsql/18.3/bin"; const PG_BIN: &str = "/Users/accusys/pgsql/18.3/bin";
#[derive(Parser)] #[derive(Parser)]
#[command(name = "release", about = "Release Manager — deploy/undeploy video packages")] #[command(
name = "release",
about = "Release Manager — deploy/undeploy video packages"
)]
struct Cli { struct Cli {
#[command(subcommand)] #[command(subcommand)]
command: Commands, command: Commands,
@@ -107,7 +117,12 @@ fn extract_tarball(tarball: &Path) -> Result<PathBuf> {
fs::create_dir_all(&tmpdir)?; fs::create_dir_all(&tmpdir)?;
let status = Command::new("tar") let status = Command::new("tar")
.args(["-xzf", tarball.to_str().unwrap(), "-C", tmpdir.to_str().unwrap()]) .args([
"-xzf",
tarball.to_str().unwrap(),
"-C",
tmpdir.to_str().unwrap(),
])
.status() .status()
.context("tar extraction failed")?; .context("tar extraction failed")?;
if !status.success() { if !status.success() {
@@ -127,8 +142,8 @@ fn extract_tarball(tarball: &Path) -> Result<PathBuf> {
/// Get file_info.json from package directory /// Get file_info.json from package directory
fn read_file_info(pkg_dir: &Path) -> Result<serde_json::Value> { fn read_file_info(pkg_dir: &Path) -> Result<serde_json::Value> {
let info_path = pkg_dir.join("file_info.json"); let info_path = pkg_dir.join("file_info.json");
let content = fs::read_to_string(&info_path) let content =
.with_context(|| format!("Cannot read {:?}", info_path))?; fs::read_to_string(&info_path).with_context(|| format!("Cannot read {:?}", info_path))?;
serde_json::from_str(&content).context("Invalid file_info.json") serde_json::from_str(&content).context("Invalid file_info.json")
} }
@@ -140,7 +155,10 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
anyhow::bail!("File not found: {}", tarball); anyhow::bail!("File not found: {}", tarball);
} }
println!("=== Deploy: {} ===", tarball_path.file_name().unwrap().to_str().unwrap()); println!(
"=== Deploy: {} ===",
tarball_path.file_name().unwrap().to_str().unwrap()
);
// Extract // Extract
let pkg_dir = extract_tarball(tarball_path)?; let pkg_dir = extract_tarball(tarball_path)?;
@@ -148,7 +166,9 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
// Read file_info // Read file_info
let info = read_file_info(&pkg_dir)?; let info = read_file_info(&pkg_dir)?;
let uuid = info["file_uuid"].as_str().context("Missing file_uuid in file_info.json")?; let uuid = info["file_uuid"]
.as_str()
.context("Missing file_uuid in file_info.json")?;
let file_name = info["file_name"].as_str().unwrap_or("?"); let file_name = info["file_name"].as_str().unwrap_or("?");
println!("UUID: {}\nVideo: {}", uuid, file_name); println!("UUID: {}\nVideo: {}", uuid, file_name);
@@ -168,7 +188,8 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
let entry = entry?; let entry = entry?;
let fname = entry.file_name(); let fname = entry.file_name();
let fname_str = fname.to_str().unwrap_or(""); let fname_str = fname.to_str().unwrap_or("");
if fname_str.ends_with(".mp4") || fname_str.ends_with(".mov") || fname_str.ends_with(".avi") { if fname_str.ends_with(".mp4") || fname_str.ends_with(".mov") || fname_str.ends_with(".avi")
{
let dest = Path::new(DEMO_DIR).join(&fname); let dest = Path::new(DEMO_DIR).join(&fname);
if !dest.exists() { if !dest.exists() {
fs::copy(entry.path(), &dest)?; fs::copy(entry.path(), &dest)?;
@@ -192,12 +213,15 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
println!("Output files copied to {}", OUTPUT_DIR); println!("Output files copied to {}", OUTPUT_DIR);
// Verify // Verify
let chunk_count: (i64,) = sqlx::query_as( let chunk_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = $1")
"SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = $1" .bind(uuid)
).bind(uuid).fetch_one(db.pool()).await?; .fetch_one(db.pool())
let face_count: (i64,) = sqlx::query_as( .await?;
"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1" let face_count: (i64,) =
).bind(uuid).fetch_one(db.pool()).await?; sqlx::query_as("SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1")
.bind(uuid)
.fetch_one(db.pool())
.await?;
// Cleanup // Cleanup
fs::remove_dir_all(&pkg_dir.parent().unwrap_or(&pkg_dir))?; fs::remove_dir_all(&pkg_dir.parent().unwrap_or(&pkg_dir))?;
@@ -213,9 +237,11 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result<()> { async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result<()> {
// Get video info // Get video info
let rows: Vec<(String, String)> = sqlx::query_as( let rows: Vec<(String, String)> =
"SELECT file_name, file_path FROM dev.videos WHERE file_uuid = $1" sqlx::query_as("SELECT file_name, file_path FROM dev.videos WHERE file_uuid = $1")
).bind(uuid).fetch_all(db.pool()).await?; .bind(uuid)
.fetch_all(db.pool())
.await?;
if rows.is_empty() { if rows.is_empty() {
anyhow::bail!("UUID {} not found in DB", uuid); anyhow::bail!("UUID {} not found in DB", uuid);
@@ -252,7 +278,9 @@ async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result
println!(" {}: {} rows deleted", tbl, result.rows_affected()); println!(" {}: {} rows deleted", tbl, result.rows_affected());
} }
sqlx::query("DELETE FROM dev.videos WHERE file_uuid = $1") sqlx::query("DELETE FROM dev.videos WHERE file_uuid = $1")
.bind(uuid).execute(db.pool()).await?; .bind(uuid)
.execute(db.pool())
.await?;
println!(" dev.videos: removed"); println!(" dev.videos: removed");
// Delete output files // Delete output files
@@ -270,7 +298,10 @@ async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result
let vp = Path::new(file_path); let vp = Path::new(file_path);
if vp.exists() { if vp.exists() {
fs::remove_file(vp)?; fs::remove_file(vp)?;
println!(" Video file: removed ({})", vp.file_name().unwrap().to_str().unwrap_or("?")); println!(
" Video file: removed ({})",
vp.file_name().unwrap().to_str().unwrap_or("?")
);
} }
} }
@@ -292,11 +323,15 @@ async fn cmd_list(db: &PostgresDb) -> Result<()> {
"SELECT file_uuid, file_name, duration, status, "SELECT file_uuid, file_name, duration, status,
(SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = v.file_uuid) as chunks, (SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = v.file_uuid) as chunks,
(SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = v.file_uuid) as faces (SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = v.file_uuid) as faces
FROM dev.videos v ORDER BY id DESC" FROM dev.videos v ORDER BY id DESC",
).fetch_all(db.pool()).await?; )
.fetch_all(db.pool())
.await?;
println!("{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}", println!(
"UUID", "Name", "Duration", "Status", "Chunks", "Faces"); "{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
"UUID", "Name", "Duration", "Status", "Chunks", "Faces"
);
println!("{}", "-".repeat(116)); println!("{}", "-".repeat(116));
for row in &rows { for row in &rows {
@@ -318,10 +353,15 @@ async fn cmd_list(db: &PostgresDb) -> Result<()> {
name.clone() name.clone()
}; };
println!("{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}", println!(
uuid, short_name, dur_str, "{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
uuid,
short_name,
dur_str,
status.as_deref().unwrap_or("?"), status.as_deref().unwrap_or("?"),
chunks.unwrap_or(0), faces.unwrap_or(0)); chunks.unwrap_or(0),
faces.unwrap_or(0)
);
} }
Ok(()) Ok(())
} }
@@ -336,9 +376,23 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
"SELECT file_uuid, file_name, file_path, duration, fps, width, height FROM dev.videos WHERE file_uuid = $1" "SELECT file_uuid, file_name, file_path, duration, fps, width, height FROM dev.videos WHERE file_uuid = $1"
).bind(uuid).fetch_optional(db.pool()).await?; ).bind(uuid).fetch_optional(db.pool()).await?;
let (_, file_name, file_path, duration, fps, width, height): ( let (_, file_name, file_path, duration, fps, width, height): (
String, String, String, Option<f64>, Option<f64>, Option<i32>, Option<i32> String,
String,
String,
Option<f64>,
Option<f64>,
Option<i32>,
Option<i32>,
) = match row { ) = match row {
Some(r) => (r.get(0), r.get(1), r.get(2), r.get(3), r.get(4), r.get(5), r.get(6)), Some(r) => (
r.get(0),
r.get(1),
r.get(2),
r.get(3),
r.get(4),
r.get(5),
r.get(6),
),
None => anyhow::bail!("UUID {} not found", uuid), None => anyhow::bail!("UUID {} not found", uuid),
}; };
@@ -360,7 +414,10 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
"momentry_version": env!("CARGO_PKG_VERSION"), "momentry_version": env!("CARGO_PKG_VERSION"),
"momentry_build": env!("BUILD_GIT_HASH"), "momentry_build": env!("BUILD_GIT_HASH"),
}); });
fs::write(outdir.join("file_info.json"), serde_json::to_string_pretty(&info)?)?; fs::write(
outdir.join("file_info.json"),
serde_json::to_string_pretty(&info)?,
)?;
// Export per-table .sql files (avoid single 4.7GB psql load) // Export per-table .sql files (avoid single 4.7GB psql load)
let sql_dir = outdir.join("sql"); let sql_dir = outdir.join("sql");
@@ -376,7 +433,13 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
let mut import_order = vec!["master.sql"]; let mut import_order = vec!["master.sql"];
fn write_table_sql(outdir: &Path, tbl: &str, col: &str, uuid: &str, psql_exec: &dyn Fn(&str) -> Result<String>) -> Result<()> { fn write_table_sql(
outdir: &Path,
tbl: &str,
col: &str,
uuid: &str,
psql_exec: &dyn Fn(&str) -> Result<String>,
) -> Result<()> {
let safe_name = tbl.replace('.', "_"); let safe_name = tbl.replace('.', "_");
let path = outdir.join(format!("{}.sql", safe_name)); let path = outdir.join(format!("{}.sql", safe_name));
let parts: Vec<&str> = tbl.split('.').collect(); let parts: Vec<&str> = tbl.split('.').collect();
@@ -419,8 +482,16 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
let data = psql_exec(&idents_query)?; let data = psql_exec(&idents_query)?;
if !data.is_empty() { if !data.is_empty() {
let mut f = fs::File::create(&idents_path)?; let mut f = fs::File::create(&idents_path)?;
writeln!(f, "-- dev.identities WHERE file_uuid = '{}' OR global (tmdb/merged/user_defined)", uuid)?; writeln!(
writeln!(f, "COPY dev.identities ({}) FROM STDIN WITH CSV HEADER;", cols)?; f,
"-- dev.identities WHERE file_uuid = '{}' OR global (tmdb/merged/user_defined)",
uuid
)?;
writeln!(
f,
"COPY dev.identities ({}) FROM STDIN WITH CSV HEADER;",
cols
)?;
writeln!(f, "{}", data)?; writeln!(f, "{}", data)?;
writeln!(f, "\\.")?; writeln!(f, "\\.")?;
} }
@@ -440,7 +511,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
if !data.is_empty() { if !data.is_empty() {
let mut f = fs::File::create(&binds_path)?; let mut f = fs::File::create(&binds_path)?;
writeln!(f, "-- dev.identity_bindings (from face_detections JOIN)")?; writeln!(f, "-- dev.identity_bindings (from face_detections JOIN)")?;
writeln!(f, "COPY dev.identity_bindings ({}) FROM STDIN WITH CSV HEADER;", cols)?; writeln!(
f,
"COPY dev.identity_bindings ({}) FROM STDIN WITH CSV HEADER;",
cols
)?;
writeln!(f, "{}", data)?; writeln!(f, "{}", data)?;
writeln!(f, "\\.")?; writeln!(f, "\\.")?;
} }
@@ -469,7 +544,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
let sql_path = outdir.join("data.sql"); let sql_path = outdir.join("data.sql");
{ {
let mut f = fs::File::create(&sql_path)?; let mut f = fs::File::create(&sql_path)?;
writeln!(f, "-- Release package: {} — see sql/ for per-table files", uuid)?; writeln!(
f,
"-- Release package: {} — see sql/ for per-table files",
uuid
)?;
writeln!(f, "BEGIN;")?; writeln!(f, "BEGIN;")?;
writeln!(f, "\\i sql/dev_videos.sql")?; writeln!(f, "\\i sql/dev_videos.sql")?;
writeln!(f, "\\i sql/dev_chunk.sql")?; writeln!(f, "\\i sql/dev_chunk.sql")?;
@@ -492,7 +571,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
let dest = outdir.join(vp.file_name().unwrap()); let dest = outdir.join(vp.file_name().unwrap());
fs::copy(vp, &dest)?; fs::copy(vp, &dest)?;
let vsize = fs::metadata(&dest)?.len(); let vsize = fs::metadata(&dest)?.len();
println!(" {} ({} MB)", vp.file_name().unwrap().to_str().unwrap_or("?"), vsize / 1024 / 1024); println!(
" {} ({} MB)",
vp.file_name().unwrap().to_str().unwrap_or("?"),
vsize / 1024 / 1024
);
} }
} }
@@ -541,11 +624,18 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
let vec0_src = "/Users/accusys/momentry_core_0.1/scripts/vec0.dylib"; let vec0_src = "/Users/accusys/momentry_core_0.1/scripts/vec0.dylib";
if Path::new(vec0_src).exists() { if Path::new(vec0_src).exists() {
fs::copy(vec0_src, outdir.join("vec0.dylib"))?; fs::copy(vec0_src, outdir.join("vec0.dylib"))?;
println!(" vec0.dylib ({} KB)", fs::metadata(outdir.join("vec0.dylib"))?.len() / 1024); println!(
" vec0.dylib ({} KB)",
fs::metadata(outdir.join("vec0.dylib"))?.len() / 1024
);
} }
// Create tar.gz // Create tar.gz
let tarball = Path::new(RELEASE_DIR).join(format!("{}_v{}.tar.gz", uuid, Utc::now().format("%Y%m%d_%H%M%S"))); let tarball = Path::new(RELEASE_DIR).join(format!(
"{}_v{}.tar.gz",
uuid,
Utc::now().format("%Y%m%d_%H%M%S")
));
let status = Command::new("tar") let status = Command::new("tar")
.args(["-czf", tarball.to_str().unwrap(), "-C", RELEASE_DIR, uuid]) .args(["-czf", tarball.to_str().unwrap(), "-C", RELEASE_DIR, uuid])
.status()?; .status()?;
@@ -553,7 +643,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
anyhow::bail!("tar creation failed"); anyhow::bail!("tar creation failed");
} }
let tsize = fs::metadata(&tarball)?.len(); let tsize = fs::metadata(&tarball)?.len();
println!("\n Package: {} ({} MB)", tarball.display(), tsize / 1024 / 1024); println!(
"\n Package: {} ({} MB)",
tarball.display(),
tsize / 1024 / 1024
);
// Sanity check: warn if any sql file is suspiciously large // Sanity check: warn if any sql file is suspiciously large
println!(" Checking sql/ file sizes..."); println!(" Checking sql/ file sizes...");
@@ -564,33 +658,55 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
let sz = fs::metadata(&path)?.len() as f64 / 1024.0 / 1024.0; let sz = fs::metadata(&path)?.len() as f64 / 1024.0 / 1024.0;
let name = path.file_stem().and_then(|s| s.to_str()).unwrap_or("?"); let name = path.file_stem().and_then(|s| s.to_str()).unwrap_or("?");
match name { match name {
"dev_videos" | "master" if sz > 1.0 => "dev_videos" | "master" if sz > 1.0 => {
println!(" ⚠️ {} is {} MB, expected < 1 MB", name, sz as u64), println!(" ⚠️ {} is {} MB, expected < 1 MB", name, sz as u64)
"dev_chunk" if sz > 2.0 => }
println!(" ⚠️ {} is {} MB, expected < 2 MB for ~2.4K chunks", name, sz as u64), "dev_chunk" if sz > 2.0 => println!(
"dev_identities" if sz > 1.0 => " ⚠️ {} is {} MB, expected < 2 MB for ~2.4K chunks",
println!(" ⚠️ {} is {} MB, expected < 1 MB for ~428 identities", name, sz as u64), name, sz as u64
"dev_identity_bindings" if sz > 5.0 => ),
println!(" ⚠️ {} is {} MB, expected < 5 MB for ~7.6K bindings", name, sz as u64), "dev_identities" if sz > 1.0 => println!(
"dev_tkg_nodes" if sz > 10.0 => " ⚠️ {} is {} MB, expected < 1 MB for ~428 identities",
println!(" ⚠️ {} is {} MB, expected < 10 MB for ~6.4K nodes", name, sz as u64), name, sz as u64
"dev_tkg_edges" if sz > 20.0 => ),
println!(" ⚠️ {} is {} MB, expected < 20 MB for ~21K edges", name, sz as u64), "dev_identity_bindings" if sz > 5.0 => println!(
"dev_face_detections" if sz > 1000.0 => " ⚠️ {} is {} MB, expected < 5 MB for ~7.6K bindings",
println!(" ⚠️ {} is {} MB, expected < 1000 MB for ~70K faces (512D emb)", name, sz as u64), name, sz as u64
"dev_chunk_vectors" if sz > 200.0 => ),
println!(" ⚠️ {} is {} MB, expected < 200 MB for ~2.4K chunks (768D emb)", name, sz as u64), "dev_tkg_nodes" if sz > 10.0 => println!(
" ⚠️ {} is {} MB, expected < 10 MB for ~6.4K nodes",
name, sz as u64
),
"dev_tkg_edges" if sz > 20.0 => println!(
" ⚠️ {} is {} MB, expected < 20 MB for ~21K edges",
name, sz as u64
),
"dev_face_detections" if sz > 1000.0 => println!(
" ⚠️ {} is {} MB, expected < 1000 MB for ~70K faces (512D emb)",
name, sz as u64
),
"dev_chunk_vectors" if sz > 200.0 => println!(
" ⚠️ {} is {} MB, expected < 200 MB for ~2.4K chunks (768D emb)",
name, sz as u64
),
_ => {} _ => {}
} }
if sz > 2000.0 { if sz > 2000.0 {
println!(" ⚠️ {} is {:.0} MB — unusually large, verify query", name, sz); println!(
" ⚠️ {} is {:.0} MB — unusually large, verify query",
name, sz
);
} }
} }
} }
Ok(()) Ok(())
} }
fn cmd_visualize_offline(sqlite_path: &str, output: Option<&str>, identity: Option<i64>) -> Result<()> { fn cmd_visualize_offline(
sqlite_path: &str,
output: Option<&str>,
identity: Option<i64>,
) -> Result<()> {
let outpath = match output { let outpath = match output {
Some(p) => p.to_string(), Some(p) => p.to_string(),
None => sqlite_path.replace(".sqlite", "_report.html"), None => sqlite_path.replace(".sqlite", "_report.html"),
@@ -606,7 +722,10 @@ fn cmd_visualize_offline(sqlite_path: &str, output: Option<&str>, identity: Opti
.output() .output()
.context("Offline report script failed")?; .context("Offline report script failed")?;
if !output.status.success() { if !output.status.success() {
anyhow::bail!("Offline report: {}", String::from_utf8_lossy(&output.stderr)); anyhow::bail!(
"Offline report: {}",
String::from_utf8_lossy(&output.stderr)
);
} }
println!("{}", String::from_utf8_lossy(&output.stdout)); println!("{}", String::from_utf8_lossy(&output.stdout));
println!("\n Open: {}", outpath); println!("\n Open: {}", outpath);
@@ -624,7 +743,10 @@ fn cmd_visualize(uuid: &str, typ: &str, output: Option<&str>, identity: Option<i
match typ { match typ {
"heatmap" | "density" => generate_face_heatmap(uuid, &outpath, identity)?, "heatmap" | "density" => generate_face_heatmap(uuid, &outpath, identity)?,
"timeline" => generate_face_timeline(uuid, &outpath, identity)?, "timeline" => generate_face_timeline(uuid, &outpath, identity)?,
_ => anyhow::bail!("Unknown visualization type: {}. Try: heatmap, density, timeline", typ), _ => anyhow::bail!(
"Unknown visualization type: {}. Try: heatmap, density, timeline",
typ
),
} }
Ok(()) Ok(())
} }
@@ -698,16 +820,28 @@ fn cmd_stats() -> Result<()> {
for line in listing.lines() { for line in listing.lines() {
let trimmed = line.trim(); let trimmed = line.trim();
if trimmed.is_empty() || trimmed.ends_with('/') { continue; } if trimmed.is_empty() || trimmed.ends_with('/') {
continue;
}
// tar -tvzf format: perms link owner group size date_month date_day time path... // tar -tvzf format: perms link owner group size date_month date_day time path...
// Fields are space-separated; size is 5th field, path starts at 8th field // Fields are space-separated; size is 5th field, path starts at 8th field
let parts: Vec<&str> = trimmed.split_whitespace().collect(); let parts: Vec<&str> = trimmed.split_whitespace().collect();
if parts.len() < 8 { continue; } if parts.len() < 8 {
continue;
}
let fsize = parts[4].parse::<u64>().unwrap_or(0); let fsize = parts[4].parse::<u64>().unwrap_or(0);
let fpath = parts[8..].join(" "); let fpath = parts[8..].join(" ");
let fname = Path::new(&fpath).file_name().unwrap_or_default().to_str().unwrap_or("?"); let fname = Path::new(&fpath)
let ext = Path::new(&fpath).extension().unwrap_or_default().to_str().unwrap_or(""); .file_name()
.unwrap_or_default()
.to_str()
.unwrap_or("?");
let ext = Path::new(&fpath)
.extension()
.unwrap_or_default()
.to_str()
.unwrap_or("");
match ext { match ext {
"sql" => { "sql" => {
@@ -732,10 +866,26 @@ fn cmd_stats() -> Result<()> {
} }
println!(" ─────────────────────────────"); println!(" ─────────────────────────────");
println!(" SQL: {} files, {:.0} MB", sql_count, total_sql as f64 / 1048576.0); println!(
println!(" Video: {} files, {:.0} MB", video_count, total_video as f64 / 1048576.0); " SQL: {} files, {:.0} MB",
println!(" JSON: {} files, {:.0} MB", json_count, total_json as f64 / 1048576.0); sql_count,
println!(" Total: {:.0} MB (compressed: {:.0} MB)", (total_sql + total_video + total_json) as f64 / 1048576.0, pkg_size as f64 / 1048576.0); total_sql as f64 / 1048576.0
);
println!(
" Video: {} files, {:.0} MB",
video_count,
total_video as f64 / 1048576.0
);
println!(
" JSON: {} files, {:.0} MB",
json_count,
total_json as f64 / 1048576.0
);
println!(
" Total: {:.0} MB (compressed: {:.0} MB)",
(total_sql + total_video + total_json) as f64 / 1048576.0,
pkg_size as f64 / 1048576.0
);
println!(); println!();
} }
@@ -758,8 +908,17 @@ async fn main() -> Result<()> {
Commands::List => cmd_list(&db).await?, Commands::List => cmd_list(&db).await?,
Commands::Package { uuid } => cmd_package(&db, &uuid).await?, Commands::Package { uuid } => cmd_package(&db, &uuid).await?,
Commands::Stats => cmd_stats()?, Commands::Stats => cmd_stats()?,
Commands::Visualize { uuid, typ, output, identity } => cmd_visualize(&uuid, &typ, output.as_deref(), identity)?, Commands::Visualize {
Commands::VisualizeOffline { sqlite_path, output, identity } => cmd_visualize_offline(&sqlite_path, output.as_deref(), identity)?, uuid,
typ,
output,
identity,
} => cmd_visualize(&uuid, &typ, output.as_deref(), identity)?,
Commands::VisualizeOffline {
sqlite_path,
output,
identity,
} => cmd_visualize_offline(&sqlite_path, output.as_deref(), identity)?,
} }
Ok(()) Ok(())
} }

View File

@@ -16,7 +16,10 @@ const LOG_DIR: &str = "/Users/accusys/service_logs";
const LAUNCH_DIR: &str = "/Users/accusys/Library/LaunchAgents"; const LAUNCH_DIR: &str = "/Users/accusys/Library/LaunchAgents";
#[derive(Parser)] #[derive(Parser)]
#[command(name = "service", about = "Service Lifecycle Manager — source → build → install → config → launch → env")] #[command(
name = "service",
about = "Service Lifecycle Manager — source → build → install → config → launch → env"
)]
struct Cli { struct Cli {
#[command(subcommand)] #[command(subcommand)]
command: Commands, command: Commands,
@@ -111,22 +114,54 @@ fn cmd_source_list() -> Result<()> {
("pyenv", "pyenv/", "git repo"), ("pyenv", "pyenv/", "git repo"),
("cmake", "cmake-4.2.0-macos-universal.tar.gz", "binary"), ("cmake", "cmake-4.2.0-macos-universal.tar.gz", "binary"),
("llama.cpp", "llama.cpp/", "git repo"), ("llama.cpp", "llama.cpp/", "git repo"),
("libreoffice (src)", "libreoffice-26.2.3.2.tar.xz", "source tarball"), (
("libreoffice (dmg)", "LibreOffice_26.2.3_MacOS_aarch64.dmg", "binary (TDF)"), "libreoffice (src)",
("mermaid-cli", "mermaid-js-mermaid-cli-11.14.0.tgz", "npm package"), "libreoffice-26.2.3.2.tar.xz",
"source tarball",
),
(
"libreoffice (dmg)",
"LibreOffice_26.2.3_MacOS_aarch64.dmg",
"binary (TDF)",
),
(
"mermaid-cli",
"mermaid-js-mermaid-cli-11.14.0.tgz",
"npm package",
),
("librsvg", "librsvg/", "Rust source"), ("librsvg", "librsvg/", "Rust source"),
("GroundingDINO", "GroundingDINO/", "git repo (IDEA-Research)"), (
"GroundingDINO",
"GroundingDINO/",
"git repo (IDEA-Research)",
),
("PaliGemma", "paligemma/", "HuggingFace reference"), ("PaliGemma", "paligemma/", "HuggingFace reference"),
("Odoo 19 CE", "odoo/", "git repo (LGPL-3.0)"), ("Odoo 19 CE", "odoo/", "git repo (LGPL-3.0)"),
("ERPNext v15", "erpnext/", "git repo (GPL-3.0)"), ("ERPNext v15", "erpnext/", "git repo (GPL-3.0)"),
("Frappe Framework", "frappe/", "git repo (MIT)"), ("Frappe Framework", "frappe/", "git repo (MIT)"),
("Gitea v1.25", "gitea/", "git repo (MIT, Go)"), ("Gitea v1.25", "gitea/", "git repo (MIT, Go)"),
("Go v1.26", "go/", "git repo (BSD)"), ("Go v1.26", "go/", "git repo (BSD)"),
("Rust/Cargo", "rustc-1.92.0-src.tar.xz", "source tarball (Apache 2.0 / MIT)"), (
("rustup", "rustup-1.28.1.tar.gz", "source tarball (Apache 2.0)"), "Rust/Cargo",
("Swift v6.3", "swift-6.3.1-RELEASE.tar.gz", "source tarball (Apache 2.0)"), "rustc-1.92.0-src.tar.xz",
"source tarball (Apache 2.0 / MIT)",
),
(
"rustup",
"rustup-1.28.1.tar.gz",
"source tarball (Apache 2.0)",
),
(
"Swift v6.3",
"swift-6.3.1-RELEASE.tar.gz",
"source tarball (Apache 2.0)",
),
("yt-dlp", "yt-dlp/", "git repo (Unlicense)"), ("yt-dlp", "yt-dlp/", "git repo (Unlicense)"),
("SQLite", "sqlite-amalgamation-3490100.zip", "amalgamation (Public Domain)"), (
"SQLite",
"sqlite-amalgamation-3490100.zip",
"amalgamation (Public Domain)",
),
("sqlite-vec", "sqlite-vec/", "git repo (MIT)"), ("sqlite-vec", "sqlite-vec/", "git repo (MIT)"),
]; ];
@@ -164,7 +199,11 @@ fn cmd_source_verify() -> Result<()> {
("cmake", "cmake-4.2.0-macos-universal.tar.gz", false), ("cmake", "cmake-4.2.0-macos-universal.tar.gz", false),
("llama.cpp", "llama.cpp/", true), ("llama.cpp", "llama.cpp/", true),
("libreoffice (src)", "libreoffice-26.2.3.2.tar.xz", false), ("libreoffice (src)", "libreoffice-26.2.3.2.tar.xz", false),
("libreoffice (dmg)", "LibreOffice_26.2.3_MacOS_aarch64.dmg", false), (
"libreoffice (dmg)",
"LibreOffice_26.2.3_MacOS_aarch64.dmg",
false,
),
("mermaid-cli", "mermaid-js-mermaid-cli-11.14.0.tgz", false), ("mermaid-cli", "mermaid-js-mermaid-cli-11.14.0.tgz", false),
("librsvg", "librsvg/", true), ("librsvg", "librsvg/", true),
("GroundingDINO", "GroundingDINO/", true), ("GroundingDINO", "GroundingDINO/", true),
@@ -186,7 +225,11 @@ fn cmd_source_verify() -> Result<()> {
let mut missing = 0; let mut missing = 0;
for (name, path, is_dir) in &checks { for (name, path, is_dir) in &checks {
let full = src_dir.join(path); let full = src_dir.join(path);
let exists = if *is_dir { full.is_dir() } else { full.is_file() }; let exists = if *is_dir {
full.is_dir()
} else {
full.is_file()
};
if exists { if exists {
println!("{}", name); println!("{}", name);
ok += 1; ok += 1;
@@ -202,7 +245,10 @@ fn cmd_source_verify() -> Result<()> {
// ---- Build ---- // ---- Build ----
fn cmd_build(service: &str) -> Result<()> { fn cmd_build(service: &str) -> Result<()> {
let install_sh = Path::new(SERVICE_SRC).parent().unwrap().join("install_services.sh"); let install_sh = Path::new(SERVICE_SRC)
.parent()
.unwrap()
.join("install_services.sh");
if service == "all" { if service == "all" {
// Run the full install script // Run the full install script
@@ -224,8 +270,14 @@ fn cmd_build(service: &str) -> Result<()> {
"ffmpeg" => { "ffmpeg" => {
println!("Building ffmpeg (requires x264 + freetype)..."); println!("Building ffmpeg (requires x264 + freetype)...");
// Simplified: run the install script which handles incremental builds // Simplified: run the install script which handles incremental builds
let status = Command::new("bash").arg(&install_sh).env("PREFIX", PREFIX).env("SRC_DIR", SERVICE_SRC).status()?; let status = Command::new("bash")
if !status.success() { anyhow::bail!("Build failed"); } .arg(&install_sh)
.env("PREFIX", PREFIX)
.env("SRC_DIR", SERVICE_SRC)
.status()?;
if !status.success() {
anyhow::bail!("Build failed");
}
} }
"redis" => { "redis" => {
let src = format!("{}/redis-7.4.3.tar.gz", SERVICE_SRC); let src = format!("{}/redis-7.4.3.tar.gz", SERVICE_SRC);
@@ -236,37 +288,67 @@ fn cmd_build(service: &str) -> Result<()> {
run_build("postgresql", &src, &format!("cd /tmp && tar xzf {} && cd postgresql-18.3 && ./configure --prefix={}/pgsql/18.3 && make -j$(sysctl -n hw.ncpu) && make install", src, PREFIX))?; run_build("postgresql", &src, &format!("cd /tmp && tar xzf {} && cd postgresql-18.3 && ./configure --prefix={}/pgsql/18.3 && make -j$(sysctl -n hw.ncpu) && make install", src, PREFIX))?;
} }
"llama" => { "llama" => {
println!("Building llama.cpp from {}...", format!("{}/llama.cpp", SERVICE_SRC)); println!(
"Building llama.cpp from {}...",
format!("{}/llama.cpp", SERVICE_SRC)
);
let status = Command::new("cmake") let status = Command::new("cmake")
.args(["-B", "build", "-DCMAKE_INSTALL_PREFIX=/tmp/llama_install"]) .args(["-B", "build", "-DCMAKE_INSTALL_PREFIX=/tmp/llama_install"])
.current_dir(format!("{}/llama.cpp", SERVICE_SRC)) .current_dir(format!("{}/llama.cpp", SERVICE_SRC))
.status()?; .status()?;
if !status.success() { anyhow::bail!("cmake failed"); } if !status.success() {
let status = Command::new("cmake").args(["--build", "build", "--config", "Release", "-j"]).current_dir(format!("{}/llama.cpp", SERVICE_SRC)).status()?; anyhow::bail!("cmake failed");
if !status.success() { anyhow::bail!("build failed"); } }
let status = Command::new("cmake")
.args(["--build", "build", "--config", "Release", "-j"])
.current_dir(format!("{}/llama.cpp", SERVICE_SRC))
.status()?;
if !status.success() {
anyhow::bail!("build failed");
}
} }
"libreoffice" => { "libreoffice" => {
let dmg = format!("{}/LibreOffice_26.2.3_MacOS_aarch64.dmg", SERVICE_SRC); let dmg = format!("{}/LibreOffice_26.2.3_MacOS_aarch64.dmg", SERVICE_SRC);
let mount = "/tmp/lo_mount"; let mount = "/tmp/lo_mount";
println!("Extracting LibreOffice from DMG..."); println!("Extracting LibreOffice from DMG...");
// Mount // Mount
let status = Command::new("hdiutil").args(["attach", &dmg, "-nobrowse", "-quiet", "-mountpoint", mount]).status()?; let status = Command::new("hdiutil")
if !status.success() { anyhow::bail!("DMG mount failed"); } .args(["attach", &dmg, "-nobrowse", "-quiet", "-mountpoint", mount])
.status()?;
if !status.success() {
anyhow::bail!("DMG mount failed");
}
// Copy app // Copy app
let lo_dir = format!("{}/libreoffice", PREFIX); let lo_dir = format!("{}/libreoffice", PREFIX);
let _ = std::fs::remove_dir_all(format!("{}/LibreOffice.app", lo_dir)); let _ = std::fs::remove_dir_all(format!("{}/LibreOffice.app", lo_dir));
std::fs::create_dir_all(&lo_dir)?; std::fs::create_dir_all(&lo_dir)?;
let status = Command::new("cp").args(["-R", &format!("{}/LibreOffice.app", mount), &format!("{}/LibreOffice.app", lo_dir)]).status()?; let status = Command::new("cp")
if !status.success() { anyhow::bail!("Copy failed"); } .args([
"-R",
&format!("{}/LibreOffice.app", mount),
&format!("{}/LibreOffice.app", lo_dir),
])
.status()?;
if !status.success() {
anyhow::bail!("Copy failed");
}
// Create symlink // Create symlink
std::fs::create_dir_all(format!("{}/bin", lo_dir))?; std::fs::create_dir_all(format!("{}/bin", lo_dir))?;
let _ = std::fs::remove_file(format!("{}/bin/soffice", lo_dir)); let _ = std::fs::remove_file(format!("{}/bin/soffice", lo_dir));
std::os::unix::fs::symlink("../LibreOffice.app/Contents/MacOS/soffice", format!("{}/bin/soffice", lo_dir))?; std::os::unix::fs::symlink(
"../LibreOffice.app/Contents/MacOS/soffice",
format!("{}/bin/soffice", lo_dir),
)?;
// Unmount // Unmount
let _ = Command::new("hdiutil").args(["detach", mount, "-quiet"]).status(); let _ = Command::new("hdiutil")
.args(["detach", mount, "-quiet"])
.status();
println!(" libreoffice installed to {}/bin/soffice", lo_dir); println!(" libreoffice installed to {}/bin/soffice", lo_dir);
} }
_ => anyhow::bail!("Unknown service: {}. Try: all, ffmpeg, redis, postgres, llama, libreoffice, python", service), _ => anyhow::bail!(
"Unknown service: {}. Try: all, ffmpeg, redis, postgres, llama, libreoffice, python",
service
),
} }
Ok(()) Ok(())
} }
@@ -274,7 +356,9 @@ fn cmd_build(service: &str) -> Result<()> {
fn run_build(name: &str, src: &str, cmd: &str) -> Result<()> { fn run_build(name: &str, src: &str, cmd: &str) -> Result<()> {
println!("Building {} from {}...", name, src); println!("Building {} from {}...", name, src);
let status = Command::new("bash").arg("-c").arg(cmd).status()?; let status = Command::new("bash").arg("-c").arg(cmd).status()?;
if !status.success() { anyhow::bail!("{} build failed", name); } if !status.success() {
anyhow::bail!("{} build failed", name);
}
println!(" {} build complete", name); println!(" {} build complete", name);
Ok(()) Ok(())
} }
@@ -292,7 +376,10 @@ fn cmd_install(service: &str) -> Result<()> {
let rsvg_src = format!("{}/librsvg/bin/rsvg-convert", PREFIX); let rsvg_src = format!("{}/librsvg/bin/rsvg-convert", PREFIX);
let gitea_src = format!("{}/gitea/bin/gitea", PREFIX); let gitea_src = format!("{}/gitea/bin/gitea", PREFIX);
let go_src = format!("{}/go/bin/go", PREFIX); let go_src = format!("{}/go/bin/go", PREFIX);
let rustc_src = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc", PREFIX); let rustc_src = format!(
"{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc",
PREFIX
);
let swift_src = "/usr/bin/swift".to_string(); let swift_src = "/usr/bin/swift".to_string();
let ytdlp_src = "/opt/homebrew/bin/yt-dlp".to_string(); let ytdlp_src = "/opt/homebrew/bin/yt-dlp".to_string();
@@ -313,7 +400,9 @@ fn cmd_install(service: &str) -> Result<()> {
]; ];
for (name, src) in &installs { for (name, src) in &installs {
if service != "all" && service != *name { continue; } if service != "all" && service != *name {
continue;
}
if Path::new(src).exists() { if Path::new(src).exists() {
println!("{} installed: {}", name, src); println!("{} installed: {}", name, src);
} else { } else {
@@ -370,12 +459,18 @@ fn cmd_config(service: &str) -> Result<()> {
println!("MOMENTRY_LLM_SUMMARY_URL=http://localhost:8082/v1/chat/completions"); println!("MOMENTRY_LLM_SUMMARY_URL=http://localhost:8082/v1/chat/completions");
println!("MOMENTRY_OUTPUT_DIR={}/momentry/output_dev", PREFIX); println!("MOMENTRY_OUTPUT_DIR={}/momentry/output_dev", PREFIX);
println!("MOMENTRY_SCRIPTS_DIR={}/momentry_core_0.1/scripts", PREFIX); println!("MOMENTRY_SCRIPTS_DIR={}/momentry_core_0.1/scripts", PREFIX);
println!("MOMENTRY_PYTHON_PATH={}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX); println!(
"MOMENTRY_PYTHON_PATH={}/.pyenv/versions/3.11.15/bin/python3.11",
PREFIX
);
} }
if service == "all" || service == "embedding" { if service == "all" || service == "embedding" {
println!("\n--- Embedding Server config ---"); println!("\n--- Embedding Server config ---");
println!("# Start: {} embeddinggemma_server.py --port 11436", format!("{}/momentry_core_0.1/scripts", PREFIX)); println!(
"# Start: {} embeddinggemma_server.py --port 11436",
format!("{}/momentry_core_0.1/scripts", PREFIX)
);
println!("MODEL=google/embeddinggemma-300m"); println!("MODEL=google/embeddinggemma-300m");
println!("PORT=11436"); println!("PORT=11436");
println!("DEVICE=mps"); println!("DEVICE=mps");
@@ -393,25 +488,58 @@ fn cmd_launch_generate() -> Result<()> {
let pg_args = format!("-D {}/pgsql/18.3/data", PREFIX); let pg_args = format!("-D {}/pgsql/18.3/data", PREFIX);
let redis_bin = format!("{}/redis/bin/redis-server", PREFIX); let redis_bin = format!("{}/redis/bin/redis-server", PREFIX);
let redis_args = format!("{}/redis/redis.conf", PREFIX); let redis_args = format!("{}/redis/redis.conf", PREFIX);
let qdrant_bin = format!("{}/momentry_core_0.1/services/qdrant/target/release/qdrant", PREFIX); let qdrant_bin = format!(
"{}/momentry_core_0.1/services/qdrant/target/release/qdrant",
PREFIX
);
let embed_bin = format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX); let embed_bin = format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX);
let embed_args = format!("{}/momentry_core_0.1/scripts/embeddinggemma_server.py --port 11436", PREFIX); let embed_args = format!(
"{}/momentry_core_0.1/scripts/embeddinggemma_server.py --port 11436",
PREFIX
);
let llama_bin = format!("{}/llama/bin/llama-server", PREFIX); let llama_bin = format!("{}/llama/bin/llama-server", PREFIX);
let llama_args = format!("-m {}/models/google_gemma-4-26B-A4B-it-Q5_K_M.gguf --port 8082 -ngl 99 -c 16384", PREFIX); let llama_args = format!(
let play_bin = format!("{}/momentry_core_0.1/target/debug/momentry_playground", PREFIX); "-m {}/models/google_gemma-4-26B-A4B-it-Q5_K_M.gguf --port 8082 -ngl 99 -c 16384",
PREFIX
);
let play_bin = format!(
"{}/momentry_core_0.1/target/debug/momentry_playground",
PREFIX
);
let services: Vec<(&str, &str, &str, &str)> = vec![ let services: Vec<(&str, &str, &str, &str)> = vec![
("com.momentry.postgres", &pg_bin, &pg_args, "PostgreSQL"), ("com.momentry.postgres", &pg_bin, &pg_args, "PostgreSQL"),
("com.momentry.redis", &redis_bin, &redis_args, "Redis"), ("com.momentry.redis", &redis_bin, &redis_args, "Redis"),
("com.momentry.qdrant", &qdrant_bin, "", "Qdrant"), ("com.momentry.qdrant", &qdrant_bin, "", "Qdrant"),
("com.momentry.embedding", &embed_bin, &embed_args, "EmbeddingGemma"), (
("com.momentry.llama", &llama_bin, &llama_args, "LLM (llama.cpp)"), "com.momentry.embedding",
("com.momentry.playground", &play_bin, "server --port 3003", "Momentry Playground"), &embed_bin,
("com.momentry.worker", &play_bin, "worker --max-concurrent 2 --poll-interval 5", "Momentry Worker"), &embed_args,
"EmbeddingGemma",
),
(
"com.momentry.llama",
&llama_bin,
&llama_args,
"LLM (llama.cpp)",
),
(
"com.momentry.playground",
&play_bin,
"server --port 3003",
"Momentry Playground",
),
(
"com.momentry.worker",
&play_bin,
"worker --max-concurrent 2 --poll-interval 5",
"Momentry Worker",
),
]; ];
for (label, bin, args, _desc) in &services { for (label, bin, args, _desc) in &services {
let plist = format!(r#"<?xml version="1.0" encoding="UTF-8"?> let plist = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0"> <plist version="1.0">
<dict> <dict>
@@ -451,7 +579,11 @@ fn cmd_launch_generate() -> Result<()> {
fs::write(&plist_path, plist)?; fs::write(&plist_path, plist)?;
println!(" 📝 {}{:?}", label, plist_path.file_name().unwrap()); println!(" 📝 {}{:?}", label, plist_path.file_name().unwrap());
} }
println!("\n Generated {} plist files in {}", services.len(), LAUNCH_DIR); println!(
"\n Generated {} plist files in {}",
services.len(),
LAUNCH_DIR
);
Ok(()) Ok(())
} }
@@ -461,7 +593,9 @@ fn cmd_launch_load() -> Result<()> {
let path = entry.path(); let path = entry.path();
if path.extension().map_or(false, |e| e == "plist") { if path.extension().map_or(false, |e| e == "plist") {
let name = path.file_stem().unwrap().to_str().unwrap_or("?"); let name = path.file_stem().unwrap().to_str().unwrap_or("?");
let status = Command::new("launchctl").args(["load", "-w", path.to_str().unwrap()]).status(); let status = Command::new("launchctl")
.args(["load", "-w", path.to_str().unwrap()])
.status();
match status { match status {
Ok(s) if s.success() => println!(" ✅ loaded: {}", name), Ok(s) if s.success() => println!(" ✅ loaded: {}", name),
Ok(_) => println!(" ⚠️ load failed: {}", name), Ok(_) => println!(" ⚠️ load failed: {}", name),
@@ -478,7 +612,9 @@ fn cmd_launch_unload() -> Result<()> {
let path = entry.path(); let path = entry.path();
if path.extension().map_or(false, |e| e == "plist") { if path.extension().map_or(false, |e| e == "plist") {
let name = path.file_stem().unwrap().to_str().unwrap_or("?"); let name = path.file_stem().unwrap().to_str().unwrap_or("?");
let status = Command::new("launchctl").args(["unload", path.to_str().unwrap()]).status(); let status = Command::new("launchctl")
.args(["unload", path.to_str().unwrap()])
.status();
match status { match status {
Ok(s) if s.success() => println!(" ✅ unloaded: {}", name), Ok(s) if s.success() => println!(" ✅ unloaded: {}", name),
Ok(_) => println!(" ⚠️ unload failed: {}", name), Ok(_) => println!(" ⚠️ unload failed: {}", name),
@@ -504,7 +640,11 @@ fn cmd_launch_status() -> Result<()> {
Ok(o) if o.status.success() => { Ok(o) if o.status.success() => {
let stdout = String::from_utf8_lossy(&o.stdout); let stdout = String::from_utf8_lossy(&o.stdout);
if stdout.contains("PID") || stdout.lines().count() > 1 { if stdout.contains("PID") || stdout.lines().count() > 1 {
let pid = stdout.lines().nth(1).and_then(|l| l.split_whitespace().next()).unwrap_or("-"); let pid = stdout
.lines()
.nth(1)
.and_then(|l| l.split_whitespace().next())
.unwrap_or("-");
println!(" 🟢 {} (PID: {})", label, pid); println!(" 🟢 {} (PID: {})", label, pid);
} else { } else {
println!("{} (not running)", label); println!("{} (not running)", label);
@@ -519,7 +659,8 @@ fn cmd_launch_status() -> Result<()> {
// ---- Env ---- // ---- Env ----
fn cmd_env(output: &Option<String>) -> Result<()> { fn cmd_env(output: &Option<String>) -> Result<()> {
let env_content = format!(r#"# Momentry Core — Environment Configuration let env_content = format!(
r#"# Momentry Core — Environment Configuration
# Generated: {} # Generated: {}
# Service: {} env # Service: {} env
@@ -601,8 +742,14 @@ fn cmd_test() -> Result<()> {
let rsvg_bin = format!("{}/librsvg/bin/rsvg-convert", PREFIX); let rsvg_bin = format!("{}/librsvg/bin/rsvg-convert", PREFIX);
let gitea_bin = format!("{}/gitea/bin/gitea", PREFIX); let gitea_bin = format!("{}/gitea/bin/gitea", PREFIX);
let go_bin = format!("{}/go/bin/go", PREFIX); let go_bin = format!("{}/go/bin/go", PREFIX);
let rustc_bin = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc", PREFIX); let rustc_bin = format!(
let cargo_bin = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/cargo", PREFIX); "{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc",
PREFIX
);
let cargo_bin = format!(
"{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/cargo",
PREFIX
);
let swift_bin = "/usr/bin/swift".to_string(); let swift_bin = "/usr/bin/swift".to_string();
let ytdlp_bin = "/opt/homebrew/bin/yt-dlp".to_string(); let ytdlp_bin = "/opt/homebrew/bin/yt-dlp".to_string();
@@ -641,7 +788,11 @@ fn cmd_test() -> Result<()> {
let output = Command::new(bin).args(args).output(); let output = Command::new(bin).args(args).output();
match output { match output {
Ok(o) if o.status.success() => { Ok(o) if o.status.success() => {
let ver = String::from_utf8_lossy(&o.stdout).lines().next().unwrap_or("?").to_string(); let ver = String::from_utf8_lossy(&o.stdout)
.lines()
.next()
.unwrap_or("?")
.to_string();
println!("{}", ver.chars().take(70).collect::<String>()); println!("{}", ver.chars().take(70).collect::<String>());
pass += 1; pass += 1;
} }
@@ -666,14 +817,87 @@ fn cmd_test() -> Result<()> {
// Functional tests // Functional tests
println!("\n--- Functional Tests ---"); println!("\n--- Functional Tests ---");
// Create test docx for libreoffice test // Create test docx for libreoffice test
let _ = std::fs::write("/tmp/svc_test_func.docx", "Service test document for LibreOffice conversion"); let _ = std::fs::write(
"/tmp/svc_test_func.docx",
"Service test document for LibreOffice conversion",
);
let func_tests = [ let func_tests = [
("ffprobe probe", "ffprobe", vec!["-v", "error", "-show_entries", "format=duration", "-of", "csv=p=0", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4"]), (
("ffmpeg audio extract", "ffmpeg", vec!["-y", "-v", "quiet", "-i", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", "-t", "2", "-ar", "16000", "-ac", "1", "/tmp/svc_test_audio.wav"]), "ffprobe probe",
("ffmpeg frame extract", "ffmpeg", vec!["-y", "-v", "quiet", "-i", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", "-ss", "100", "-vframes", "1", "/tmp/svc_test_frame.jpg"]), "ffprobe",
("libreoffice doc→txt", "libreoffice", vec!["--headless", "--convert-to", "txt", "/tmp/svc_test_func.docx", "--outdir", "/tmp/"]), vec![
("rsvg-convert svg→png", "rsvg-convert", vec!["-o", "/tmp/svc_test_rsvg.png", "/tmp/test_rsvg.svg"]), "-v",
("mmdc mermaid→png", "mermaid-cli", vec!["-i", "/tmp/test_mermaid.mmd", "-o", "/tmp/svc_test_mmd.png", "-w", "200"]), "error",
"-show_entries",
"format=duration",
"-of",
"csv=p=0",
"/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4",
],
),
(
"ffmpeg audio extract",
"ffmpeg",
vec![
"-y",
"-v",
"quiet",
"-i",
"/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4",
"-t",
"2",
"-ar",
"16000",
"-ac",
"1",
"/tmp/svc_test_audio.wav",
],
),
(
"ffmpeg frame extract",
"ffmpeg",
vec![
"-y",
"-v",
"quiet",
"-i",
"/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4",
"-ss",
"100",
"-vframes",
"1",
"/tmp/svc_test_frame.jpg",
],
),
(
"libreoffice doc→txt",
"libreoffice",
vec![
"--headless",
"--convert-to",
"txt",
"/tmp/svc_test_func.docx",
"--outdir",
"/tmp/",
],
),
(
"rsvg-convert svg→png",
"rsvg-convert",
vec!["-o", "/tmp/svc_test_rsvg.png", "/tmp/test_rsvg.svg"],
),
(
"mmdc mermaid→png",
"mermaid-cli",
vec![
"-i",
"/tmp/test_mermaid.mmd",
"-o",
"/tmp/svc_test_mmd.png",
"-w",
"200",
],
),
]; ];
for (desc, bin_name, args) in &func_tests { for (desc, bin_name, args) in &func_tests {
@@ -689,8 +913,14 @@ fn cmd_test() -> Result<()> {
}; };
let output = Command::new(bin).args(args).output(); let output = Command::new(bin).args(args).output();
match output { match output {
Ok(o) if o.status.success() => { println!(""); pass += 1; } Ok(o) if o.status.success() => {
_ => { println!(""); fail += 1; } println!("");
pass += 1;
}
_ => {
println!("");
fail += 1;
}
} }
} }
@@ -706,7 +936,10 @@ fn cmd_test() -> Result<()> {
fn cmd_report() -> Result<()> { fn cmd_report() -> Result<()> {
println!("=== Momentry Service Report ==="); println!("=== Momentry Service Report ===");
println!("Generated: {}", chrono::Local::now().format("%Y-%m-%d %H:%M:%S")); println!(
"Generated: {}",
chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
);
println!(); println!();
// 1. Source status // 1. Source status
@@ -730,13 +963,25 @@ fn cmd_report() -> Result<()> {
println!("\n## 2. Binaries"); println!("\n## 2. Binaries");
let binaries = [ let binaries = [
("cmake", &format!("{}/bin/cmake", PREFIX)), ("cmake", &format!("{}/bin/cmake", PREFIX)),
("python3.11", &format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX)), (
"python3.11",
&format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX),
),
("ffmpeg", &format!("{}/ffmpeg_build/bin/ffmpeg", PREFIX)), ("ffmpeg", &format!("{}/ffmpeg_build/bin/ffmpeg", PREFIX)),
("ffprobe", &format!("{}/ffmpeg_build/bin/ffprobe", PREFIX)), ("ffprobe", &format!("{}/ffmpeg_build/bin/ffprobe", PREFIX)),
("redis-server", &format!("{}/redis/bin/redis-server", PREFIX)), (
"redis-server",
&format!("{}/redis/bin/redis-server", PREFIX),
),
("postgres", &format!("{}/pgsql/18.3/bin/postgres", PREFIX)), ("postgres", &format!("{}/pgsql/18.3/bin/postgres", PREFIX)),
("llama-server", &format!("{}/llama/bin/llama-server", PREFIX)), (
("libreoffice", &format!("{}/libreoffice/bin/soffice", PREFIX)), "llama-server",
&format!("{}/llama/bin/llama-server", PREFIX),
),
(
"libreoffice",
&format!("{}/libreoffice/bin/soffice", PREFIX),
),
]; ];
for (name, path) in &binaries { for (name, path) in &binaries {
let status = if Path::new(path).exists() { let status = if Path::new(path).exists() {
@@ -772,9 +1017,18 @@ fn cmd_report() -> Result<()> {
// 4. Ports // 4. Ports
println!("\n## 4. Port Status"); println!("\n## 4. Port Status");
let ports = [(3003, "Playground"), (5432, "PostgreSQL"), (6379, "Redis"), (6333, "Qdrant"), (8082, "LLM"), (11436, "Embedding")]; let ports = [
(3003, "Playground"),
(5432, "PostgreSQL"),
(6379, "Redis"),
(6333, "Qdrant"),
(8082, "LLM"),
(11436, "Embedding"),
];
for (port, name) in &ports { for (port, name) in &ports {
let output = Command::new("lsof").args(["-i", &format!(":{}", port)]).output(); let output = Command::new("lsof")
.args(["-i", &format!(":{}", port)])
.output();
match output { match output {
Ok(o) if o.status.success() => println!(" 🟢 :{} ({})", port, name), Ok(o) if o.status.success() => println!(" 🟢 :{} ({})", port, name),
_ => println!(" ⚪ :{} ({})", port, name), _ => println!(" ⚪ :{} ({})", port, name),
@@ -797,14 +1051,21 @@ fn cmd_report() -> Result<()> {
} }
fn format_bytes(bytes: u64) -> String { fn format_bytes(bytes: u64) -> String {
if bytes > 1024 * 1024 * 1024 { format!("{:.1}GB", bytes as f64 / 1_073_741_824.0) } if bytes > 1024 * 1024 * 1024 {
else if bytes > 1024 * 1024 { format!("{:.0}MB", bytes as f64 / 1_048_576.0) } format!("{:.1}GB", bytes as f64 / 1_073_741_824.0)
else if bytes > 1024 { format!("{:.0}KB", bytes as f64 / 1024.0) } } else if bytes > 1024 * 1024 {
else { format!("{}B", bytes) } format!("{:.0}MB", bytes as f64 / 1_048_576.0)
} else if bytes > 1024 {
format!("{:.0}KB", bytes as f64 / 1024.0)
} else {
format!("{}B", bytes)
}
} }
fn format_dir_size(path: &Path) -> String { fn format_dir_size(path: &Path) -> String {
let output = Command::new("du").args(["-sh", path.to_str().unwrap()]).output(); let output = Command::new("du")
.args(["-sh", path.to_str().unwrap()])
.output();
match output { match output {
Ok(o) if o.status.success() => { Ok(o) if o.status.success() => {
let s = String::from_utf8_lossy(&o.stdout); let s = String::from_utf8_lossy(&o.stdout);
@@ -824,7 +1085,10 @@ async fn main() -> Result<()> {
SourceAction::List => cmd_source_list()?, SourceAction::List => cmd_source_list()?,
SourceAction::Verify => cmd_source_verify()?, SourceAction::Verify => cmd_source_verify()?,
SourceAction::Download { name } => { SourceAction::Download { name } => {
println!("Downloading: {} (use install_services.sh for full download)", name); println!(
"Downloading: {} (use install_services.sh for full download)",
name
);
println!("Source URLs:"); println!("Source URLs:");
println!(" ffmpeg: https://ffmpeg.org/releases/ffmpeg-7.1.1.tar.xz"); println!(" ffmpeg: https://ffmpeg.org/releases/ffmpeg-7.1.1.tar.xz");
println!(" redis: https://download.redis.io/releases/redis-7.4.3.tar.gz"); println!(" redis: https://download.redis.io/releases/redis-7.4.3.tar.gz");

View File

@@ -75,15 +75,13 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
// Query chunks table for Rule 1 sentence chunks // Query chunks table for Rule 1 sentence chunks
let chunk_table = schema::table_name("chunk"); let chunk_table = schema::table_name("chunk");
let rule1_rows: Vec<(String,)> = sqlx::query_as( let rule1_rows: Vec<(String,)> = sqlx::query_as(&format!(
&format!( "SELECT chunk_id FROM {} \
"SELECT chunk_id FROM {} \
WHERE file_uuid = $1 AND chunk_type = 'sentence' \ WHERE file_uuid = $1 AND chunk_type = 'sentence' \
AND start_frame >= $2 \ AND start_frame >= $2 \
AND end_frame <= $3", AND end_frame <= $3",
chunk_table chunk_table
), ))
)
.bind(file_uuid) .bind(file_uuid)
.bind(scene.start_frame as i64) .bind(scene.start_frame as i64)
.bind(scene.end_frame as i64) .bind(scene.end_frame as i64)
@@ -101,16 +99,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
// Let's re-query text directly. // Let's re-query text directly.
} }
let texts: Vec<String> = sqlx::query_scalar( let texts: Vec<String> = sqlx::query_scalar(&format!(
&format!( "SELECT text_content FROM {} \
"SELECT text_content FROM {} \
WHERE file_uuid = $1 AND chunk_type = 'sentence' \ WHERE file_uuid = $1 AND chunk_type = 'sentence' \
AND start_frame >= $2 \ AND start_frame >= $2 \
AND end_frame <= $3 \ AND end_frame <= $3 \
ORDER BY start_frame ASC", ORDER BY start_frame ASC",
chunk_table chunk_table
), ))
)
.bind(file_uuid) .bind(file_uuid)
.bind(scene.start_frame as i64) .bind(scene.start_frame as i64)
.bind(scene.end_frame as i64) .bind(scene.end_frame as i64)
@@ -154,16 +150,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
"scene_number": scene.scene_number "scene_number": scene.scene_number
}); });
sqlx::query( sqlx::query(&format!(
&format!( "INSERT INTO {} (file_uuid, chunk_id, chunk_type, \
"INSERT INTO {} (file_uuid, chunk_id, chunk_type, \
start_time, end_time, fps, start_frame, end_frame, \ start_time, end_time, fps, start_frame, end_frame, \
content, text_content, summary_text, metadata, child_chunk_ids) \ content, text_content, summary_text, metadata, child_chunk_ids) \
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) \ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) \
ON CONFLICT (file_uuid, chunk_id) DO NOTHING", ON CONFLICT (file_uuid, chunk_id) DO NOTHING",
chunk_table chunk_table
), ))
)
.bind(file_uuid) .bind(file_uuid)
.bind(&chunk_id) .bind(&chunk_id)
.bind(scene.scene_number as i32) .bind(scene.scene_number as i32)

View File

@@ -20,8 +20,7 @@ pub fn set_cache_enabled(enabled: bool) {
} }
// Switch 1: watcher detects new file → auto-register // Switch 1: watcher detects new file → auto-register
pub static RUNTIME_WATCHER_AUTO_REGISTER: Lazy<RwLock<bool>> = pub static RUNTIME_WATCHER_AUTO_REGISTER: Lazy<RwLock<bool>> = Lazy::new(|| RwLock::new(false));
Lazy::new(|| RwLock::new(false));
pub fn get_watcher_auto_register() -> bool { pub fn get_watcher_auto_register() -> bool {
*RUNTIME_WATCHER_AUTO_REGISTER.read().unwrap() *RUNTIME_WATCHER_AUTO_REGISTER.read().unwrap()
@@ -33,8 +32,7 @@ pub fn set_watcher_auto_register(enabled: bool) {
} }
// Switch 2: register → auto-trigger processing pipeline // Switch 2: register → auto-trigger processing pipeline
pub static RUNTIME_AUTO_PIPELINE_ENABLED: Lazy<RwLock<bool>> = pub static RUNTIME_AUTO_PIPELINE_ENABLED: Lazy<RwLock<bool>> = Lazy::new(|| RwLock::new(false));
Lazy::new(|| RwLock::new(false));
pub fn get_auto_pipeline_enabled() -> bool { pub fn get_auto_pipeline_enabled() -> bool {
*RUNTIME_AUTO_PIPELINE_ENABLED.read().unwrap() *RUNTIME_AUTO_PIPELINE_ENABLED.read().unwrap()
@@ -107,6 +105,30 @@ pub static REDIS_KEY_PREFIX: Lazy<String> =
pub static DATABASE_SCHEMA: Lazy<String> = pub static DATABASE_SCHEMA: Lazy<String> =
Lazy::new(|| env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "public".to_string())); Lazy::new(|| env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "public".to_string()));
pub static SYSTEM_TIMEZONE: Lazy<String> = Lazy::new(|| {
if let Ok(tz) = env::var("MOMENTRY_TIMEZONE") {
if !tz.is_empty() {
return tz;
}
}
if let Ok(tz) = env::var("TZ") {
if !tz.is_empty() {
return tz;
}
}
// macOS: /etc/localtime → /var/db/timezone/zoneinfo/Asia/Taipei
// Linux: /etc/localtime → /usr/share/zoneinfo/Asia/Taipei
if let Ok(path) = std::fs::read_link("/etc/localtime") {
let s = path.to_string_lossy();
for prefix in &["/usr/share/zoneinfo/", "/var/db/timezone/zoneinfo/"] {
if let Some(tz) = s.strip_prefix(prefix) {
return tz.to_string();
}
}
}
"Asia/Taipei".to_string()
});
pub static MONGODB_DATABASE: Lazy<String> = pub static MONGODB_DATABASE: Lazy<String> =
Lazy::new(|| env::var("MONGODB_DATABASE").unwrap_or_else(|_| "momentry".to_string())); Lazy::new(|| env::var("MONGODB_DATABASE").unwrap_or_else(|_| "momentry".to_string()));

File diff suppressed because it is too large Load Diff

View File

@@ -15,9 +15,11 @@ pub struct QdrantDb {
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VectorPayload { pub struct VectorPayload {
pub uuid: String, pub file_uuid: String,
pub chunk_id: String, pub chunk_id: String,
pub chunk_type: String, pub chunk_type: String,
pub start_frame: i64,
pub end_frame: i64,
pub start_time: f64, pub start_time: f64,
pub end_time: f64, pub end_time: f64,
pub text: Option<String>, pub text: Option<String>,
@@ -189,6 +191,49 @@ impl QdrantDb {
Ok(()) Ok(())
} }
pub async fn upsert_vectors_batch(
&self,
collection: &str,
points: &[(u64, &[f32], Option<serde_json::Value>)],
) -> Result<()> {
let url = format!(
"{}/collections/{}/points?wait=true",
self.base_url, collection
);
let qdrant_points: Vec<serde_json::Value> = points
.iter()
.map(|(id, vec, payload)| {
let mut p = serde_json::json!({
"id": id,
"vector": vec,
});
if let Some(pl) = payload {
p["payload"] = pl.clone();
}
p
})
.collect();
let body = serde_json::json!({ "points": qdrant_points });
let response = self
.client
.put(&url)
.header("api-key", &self.api_key)
.json(&body)
.send()
.await
.context("Failed to send batch upsert request to Qdrant")?;
let status = response.status();
if !status.is_success() {
let response_text = response.text().await.unwrap_or_default();
anyhow::bail!("Qdrant batch upsert failed: {} - {}", status, response_text);
}
Ok(())
}
pub async fn upsert_vector( pub async fn upsert_vector(
&self, &self,
chunk_id: &str, chunk_id: &str,
@@ -207,12 +252,23 @@ impl QdrantDb {
); );
let mut payload_map = HashMap::new(); let mut payload_map = HashMap::new();
payload_map.insert("uuid".to_string(), serde_json::json!(payload.uuid)); payload_map.insert(
"file_uuid".to_string(),
serde_json::json!(payload.file_uuid),
);
payload_map.insert("chunk_id".to_string(), serde_json::json!(payload.chunk_id)); payload_map.insert("chunk_id".to_string(), serde_json::json!(payload.chunk_id));
payload_map.insert( payload_map.insert(
"chunk_type".to_string(), "chunk_type".to_string(),
serde_json::json!(payload.chunk_type), serde_json::json!(payload.chunk_type),
); );
payload_map.insert(
"start_frame".to_string(),
serde_json::json!(payload.start_frame),
);
payload_map.insert(
"end_frame".to_string(),
serde_json::json!(payload.end_frame),
);
payload_map.insert( payload_map.insert(
"start_time".to_string(), "start_time".to_string(),
serde_json::json!(payload.start_time), serde_json::json!(payload.start_time),
@@ -224,7 +280,7 @@ impl QdrantDb {
// Generate consistent point ID from uuid and chunk_id // Generate consistent point ID from uuid and chunk_id
// Qdrant requires integer or UUID point IDs. We'll use a simple integer hash. // Qdrant requires integer or UUID point IDs. We'll use a simple integer hash.
let point_id_str = format!("{}_{}", payload.uuid, chunk_id); let point_id_str = format!("{}_{}", payload.file_uuid, chunk_id);
use std::collections::hash_map::DefaultHasher; use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new(); let mut hasher = DefaultHasher::new();
@@ -240,9 +296,9 @@ impl QdrantDb {
}); });
tracing::debug!( tracing::debug!(
"Upserting vector to Qdrant: chunk_id={}, uuid={}, vector_len={}", "Upserting vector to Qdrant: chunk_id={}, file_uuid={}, vector_len={}",
chunk_id, chunk_id,
payload.uuid, payload.file_uuid,
vector.len() vector.len()
); );
@@ -337,7 +393,7 @@ impl QdrantDb {
.map(|r| { .map(|r| {
let uuid = r let uuid = r
.payload .payload
.get("uuid") .get("file_uuid")
.and_then(|v| v.as_str()) .and_then(|v| v.as_str())
.unwrap_or("unknown") .unwrap_or("unknown")
.to_string(); .to_string();
@@ -409,7 +465,7 @@ impl QdrantDb {
.map(|r| { .map(|r| {
let uuid = r let uuid = r
.payload .payload
.get("uuid") .get("file_uuid")
.and_then(|v| v.as_str()) .and_then(|v| v.as_str())
.unwrap_or("unknown") .unwrap_or("unknown")
.to_string(); .to_string();
@@ -471,7 +527,7 @@ impl QdrantDb {
"filter": { "filter": {
"must": [ "must": [
{ {
"key": "uuid", "key": "file_uuid",
"match": { "match": {
"value": uuid "value": uuid
} }
@@ -532,7 +588,7 @@ impl QdrantDb {
.map(|r| { .map(|r| {
let uuid = r let uuid = r
.payload .payload
.get("uuid") .get("file_uuid")
.and_then(|v| v.as_str()) .and_then(|v| v.as_str())
.unwrap_or("unknown") .unwrap_or("unknown")
.to_string(); .to_string();
@@ -553,6 +609,89 @@ impl QdrantDb {
Ok(search_results) Ok(search_results)
} }
pub async fn search_face_collection(
&self,
collection: &str,
query_vector: &[f32],
limit: usize,
exclude_payload_key: &str,
exclude_payload_value: &str,
include_file_uuid: Option<&str>,
) -> Result<Vec<(f64, HashMap<String, serde_json::Value>)>> {
let url = format!("{}/collections/{}/points/search", self.base_url, collection);
let mut filter = serde_json::json!({
"must_not": [
{
"key": exclude_payload_key,
"match": { "value": exclude_payload_value }
}
]
});
if let Some(file_uuid) = include_file_uuid {
filter["must"] = serde_json::json!([
{
"key": "file_uuid",
"match": { "value": file_uuid }
}
]);
}
let body = serde_json::json!({
"vector": query_vector,
"limit": limit,
"with_payload": true,
"filter": filter,
});
let response = self
.client
.post(&url)
.header("api-key", &self.api_key)
.header("Content-Type", "application/json")
.json(&body)
.send()
.await
.context("Failed to search Qdrant face collection")?;
let status = response.status();
let response_text = response
.text()
.await
.unwrap_or_else(|_| "Failed to read response".to_string());
if !status.is_success() {
return Err(anyhow::anyhow!(
"Qdrant search_face_collection failed: {} - {}",
status,
response_text
));
}
#[derive(Deserialize)]
struct QdrantSearchResult {
result: Vec<QdrantPoint>,
}
#[derive(Deserialize)]
struct QdrantPoint {
score: f64,
payload: HashMap<String, serde_json::Value>,
}
match serde_json::from_str::<QdrantSearchResult>(&response_text) {
Ok(parsed) => {
let results: Vec<(f64, HashMap<String, serde_json::Value>)> = parsed
.result
.into_iter()
.map(|r| (r.score, r.payload))
.collect();
Ok(results)
}
Err(e) => Err(anyhow::anyhow!("Failed to parse Qdrant response: {}", e)),
}
}
pub async fn delete_by_uuid(&self, uuid: &str) -> Result<()> { pub async fn delete_by_uuid(&self, uuid: &str) -> Result<()> {
let url = format!( let url = format!(
"{}/collections/{}/points/delete", "{}/collections/{}/points/delete",
@@ -563,7 +702,7 @@ impl QdrantDb {
"filter": { "filter": {
"must": [ "must": [
{ {
"key": "uuid", "key": "file_uuid",
"match": { "match": {
"value": uuid "value": uuid
} }
@@ -711,9 +850,11 @@ impl Database for QdrantDb {
impl VectorStore for QdrantDb { impl VectorStore for QdrantDb {
async fn store_vector(&self, chunk_id: &str, vector: &[f32]) -> Result<()> { async fn store_vector(&self, chunk_id: &str, vector: &[f32]) -> Result<()> {
let payload = VectorPayload { let payload = VectorPayload {
uuid: String::new(), file_uuid: String::new(),
chunk_id: chunk_id.to_string(), chunk_id: chunk_id.to_string(),
chunk_type: String::new(), chunk_type: String::new(),
start_frame: 0,
end_frame: 0,
start_time: 0.0, start_time: 0.0,
end_time: 0.0, end_time: 0.0,
text: None, text: None,
@@ -737,7 +878,9 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
let qdrant: QdrantDb = QdrantDb::new(); let qdrant: QdrantDb = QdrantDb::new();
let query = format!( let query = format!(
"SELECT id, trace_id, frame_number, embedding FROM {} WHERE file_uuid = $1 AND embedding IS NOT NULL", "SELECT id, trace_id, frame_number, embedding FROM {} \
WHERE file_uuid = $1 AND embedding IS NOT NULL \
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
table table
); );
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?; let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?;
@@ -767,3 +910,103 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
); );
Ok(()) Ok(())
} }
pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
use crate::core::config::DATABASE_URL;
use sqlx::Row;
let pool = sqlx::PgPool::connect(&DATABASE_URL).await?;
let table = crate::core::db::schema::table_name("face_detections");
let qdrant = QdrantDb::new();
let collection = format!(
"{}_traces",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
);
qdrant.ensure_collection(&collection, 512).await?;
// Read all face_detections with embeddings, grouped by trace_id in Rust
let rows = sqlx::query(&format!(
"SELECT trace_id, embedding FROM {} \
WHERE file_uuid = $1 AND embedding IS NOT NULL AND trace_id IS NOT NULL \
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
table
))
.bind(file_uuid)
.fetch_all(&pool)
.await?;
let mut trace_faces: std::collections::HashMap<i32, Vec<Vec<f32>>> =
std::collections::HashMap::new();
let mut trace_stats: std::collections::HashMap<i32, (i64, i64, i64)> =
std::collections::HashMap::new(); // (count, min_frame, max_frame)
for row in &rows {
let tid: Option<i32> = row.get(0);
let emb: Option<Vec<f32>> = row.get(1);
if let (Some(tid), Some(emb)) = (tid, emb) {
trace_faces.entry(tid).or_default().push(emb);
let entry = trace_stats.entry(tid).or_insert((0, i64::MAX, i64::MIN));
entry.0 += 1;
}
}
// Compute average embedding per trace
struct AvgTrace {
tid: i32,
avg_emb: Vec<f32>,
frame_count: i64,
}
let mut trace_avgs: Vec<AvgTrace> = Vec::new();
for (&tid, faces) in &trace_faces {
let dim = faces[0].len();
let mut avg = vec![0.0f32; dim];
for face in faces {
for (i, &v) in face.iter().enumerate() {
avg[i] += v;
}
}
let n = faces.len() as f32;
for v in &mut avg {
*v /= n;
}
let stats = trace_stats.get(&tid).unwrap_or(&(0, 0, 0));
trace_avgs.push(AvgTrace {
tid,
avg_emb: avg,
frame_count: stats.0,
});
}
// Push to Qdrant in batches
for chunk in trace_avgs.chunks(500) {
let batch: Vec<(u64, &[f32], Option<serde_json::Value>)> = chunk
.iter()
.map(|t| {
(
t.tid as u64,
t.avg_emb.as_slice(),
Some(serde_json::json!({
"trace_id": t.tid,
"file_uuid": file_uuid,
"frame_count": t.frame_count,
"source": "trace",
})),
)
})
.collect();
qdrant.upsert_vectors_batch(&collection, &batch).await?;
}
tracing::info!(
"Synced {} trace embeddings to Qdrant for {}",
trace_faces.len(),
file_uuid
);
Ok(())
}

View File

@@ -45,9 +45,11 @@ impl SyncDb {
} }
let payload = VectorPayload { let payload = VectorPayload {
uuid: uuid.clone(), file_uuid: uuid.clone(),
chunk_id: chunk_id.clone(), chunk_id: chunk_id.clone(),
chunk_type, chunk_type,
start_frame: chunk.start_frame,
end_frame: chunk.end_frame,
start_time, start_time,
end_time, end_time,
text: Some(text.to_string()), text: Some(text.to_string()),

View File

@@ -33,26 +33,38 @@ pub async fn run_consistency_checks(db: &PostgresDb) -> ConsistencyReport {
// Check 1: stale_processing — status=processing but job_id is null // Check 1: stale_processing — status=processing but job_id is null
let c1 = check_stale_processing(db).await; let c1 = check_stale_processing(db).await;
if c1.count > 0 { any_issue = true; } if c1.count > 0 {
any_issue = true;
}
checks.push(c1); checks.push(c1);
// Check 2: orphaned_processing — status=processing but no active monitor_job // Check 2: orphaned_processing — status=processing but no active monitor_job
let c2 = check_orphaned_processing(db).await; let c2 = check_orphaned_processing(db).await;
if c2.count > 0 { any_issue = true; } if c2.count > 0 {
any_issue = true;
}
checks.push(c2); checks.push(c2);
// Check 3: unregistered_with_uuid — DB rows left behind by migration // Check 3: unregistered_with_uuid — DB rows left behind by migration
let c3 = check_unregistered_with_uuid(db).await; let c3 = check_unregistered_with_uuid(db).await;
if c3.count > 0 { any_issue = true; } if c3.count > 0 {
any_issue = true;
}
checks.push(c3); checks.push(c3);
// Check 4: processing_job_done — status=processing but job already completed // Check 4: processing_job_done — status=processing but job already completed
let c4 = check_processing_job_done(db).await; let c4 = check_processing_job_done(db).await;
if c4.count > 0 { any_issue = true; } if c4.count > 0 {
any_issue = true;
}
checks.push(c4); checks.push(c4);
ConsistencyReport { ConsistencyReport {
status: if any_issue { "degraded".to_string() } else { "ok".to_string() }, status: if any_issue {
"degraded".to_string()
} else {
"ok".to_string()
},
checked_at, checked_at,
checks, checks,
} }
@@ -68,9 +80,17 @@ async fn check_stale_processing(db: &PostgresDb) -> ConsistencyCheck {
.await .await
.unwrap_or_default(); .unwrap_or_default();
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile { let files: Vec<ConsistencyFile> = rows
file_uuid, file_name, status, detail: "job_id is null".to_string(), .into_iter()
}).collect(); .map(
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
file_uuid,
file_name,
status,
detail: "job_id is null".to_string(),
},
)
.collect();
ConsistencyCheck { ConsistencyCheck {
check: "stale_processing".to_string(), check: "stale_processing".to_string(),
@@ -83,19 +103,28 @@ async fn check_stale_processing(db: &PostgresDb) -> ConsistencyCheck {
async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck { async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck {
let vt = schema::table_name("videos"); let vt = schema::table_name("videos");
let mj = schema::table_name("monitor_jobs"); let mj = schema::table_name("monitor_jobs");
let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!( let rows: Vec<(String, String, String)> =
"SELECT v.file_uuid, v.file_name, v.status \ sqlx::query_as::<_, (String, String, String)>(&format!(
"SELECT v.file_uuid, v.file_name, v.status \
FROM {} v LEFT JOIN {} m ON v.file_uuid = m.uuid AND m.status IN ('pending','running') \ FROM {} v LEFT JOIN {} m ON v.file_uuid = m.uuid AND m.status IN ('pending','running') \
WHERE v.status = 'processing' AND m.id IS NULL", WHERE v.status = 'processing' AND m.id IS NULL",
vt, mj vt, mj
)) ))
.fetch_all(db.pool()) .fetch_all(db.pool())
.await .await
.unwrap_or_default(); .unwrap_or_default();
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile { let files: Vec<ConsistencyFile> = rows
file_uuid, file_name, status, detail: "no active monitor_job".to_string(), .into_iter()
}).collect(); .map(
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
file_uuid,
file_name,
status,
detail: "no active monitor_job".to_string(),
},
)
.collect();
ConsistencyCheck { ConsistencyCheck {
check: "orphaned_processing".to_string(), check: "orphaned_processing".to_string(),
@@ -107,17 +136,26 @@ async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck {
async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck { async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck {
let vt = schema::table_name("videos"); let vt = schema::table_name("videos");
let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!( let rows: Vec<(String, String, String)> =
"SELECT file_uuid, file_name, status FROM {} WHERE status = 'unregistered'", sqlx::query_as::<_, (String, String, String)>(&format!(
vt "SELECT file_uuid, file_name, status FROM {} WHERE status = 'unregistered'",
)) vt
.fetch_all(db.pool()) ))
.await .fetch_all(db.pool())
.unwrap_or_default(); .await
.unwrap_or_default();
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile { let files: Vec<ConsistencyFile> = rows
file_uuid, file_name, status, detail: "migration residue".to_string(), .into_iter()
}).collect(); .map(
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
file_uuid,
file_name,
status,
detail: "migration residue".to_string(),
},
)
.collect();
ConsistencyCheck { ConsistencyCheck {
check: "unregistered_with_uuid".to_string(), check: "unregistered_with_uuid".to_string(),
@@ -130,19 +168,28 @@ async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck {
async fn check_processing_job_done(db: &PostgresDb) -> ConsistencyCheck { async fn check_processing_job_done(db: &PostgresDb) -> ConsistencyCheck {
let vt = schema::table_name("videos"); let vt = schema::table_name("videos");
let mj = schema::table_name("monitor_jobs"); let mj = schema::table_name("monitor_jobs");
let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!( let rows: Vec<(String, String, String)> =
"SELECT v.file_uuid, v.file_name, v.status \ sqlx::query_as::<_, (String, String, String)>(&format!(
"SELECT v.file_uuid, v.file_name, v.status \
FROM {} v JOIN {} m ON v.file_uuid = m.uuid \ FROM {} v JOIN {} m ON v.file_uuid = m.uuid \
WHERE v.status = 'processing' AND m.status = 'completed'", WHERE v.status = 'processing' AND m.status = 'completed'",
vt, mj vt, mj
)) ))
.fetch_all(db.pool()) .fetch_all(db.pool())
.await .await
.unwrap_or_default(); .unwrap_or_default();
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile { let files: Vec<ConsistencyFile> = rows
file_uuid, file_name, status, detail: "monitor_job already completed".to_string(), .into_iter()
}).collect(); .map(
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
file_uuid,
file_name,
status,
detail: "monitor_job already completed".to_string(),
},
)
.collect();
ConsistencyCheck { ConsistencyCheck {
check: "processing_job_done".to_string(), check: "processing_job_done".to_string(),

View File

@@ -54,8 +54,7 @@ pub fn read_identity_file(uuid: &str) -> Result<IdentityFile> {
let path = identity_file_path(uuid); let path = identity_file_path(uuid);
let content = std::fs::read_to_string(&path) let content = std::fs::read_to_string(&path)
.with_context(|| format!("Identity file not found: {} ({})", uuid, path.display()))?; .with_context(|| format!("Identity file not found: {} ({})", uuid, path.display()))?;
serde_json::from_str(&content) serde_json::from_str(&content).with_context(|| format!("Invalid identity.json: {}", uuid))
.with_context(|| format!("Invalid identity.json: {}", uuid))
} }
pub fn write_identity_file(file: &IdentityFile) -> Result<()> { pub fn write_identity_file(file: &IdentityFile) -> Result<()> {
@@ -167,7 +166,10 @@ pub fn rebuild_index() -> Result<usize> {
entries.insert(uuid.clone(), file.name); entries.insert(uuid.clone(), file.name);
} }
Err(e) => { Err(e) => {
warn!("[identity-storage] Skipping {} in index rebuild: {}", uuid, e); warn!(
"[identity-storage] Skipping {} in index rebuild: {}",
uuid, e
);
} }
} }
} }
@@ -187,18 +189,16 @@ pub async fn save_identity_file_by_pool(pool: &sqlx::PgPool, uuid: &str) -> Resu
let identity_table = crate::core::db::schema::table_name("identities"); let identity_table = crate::core::db::schema::table_name("identities");
let fd_table = crate::core::db::schema::table_name("face_detections"); let fd_table = crate::core::db::schema::table_name("face_detections");
// Schema-aware column selection: dev uses 'name', public uses 'real_name'
let name_col = if identity_table.starts_with("dev.") { "name" } else { "real_name" };
let clean = uuid.replace('-', ""); let clean = uuid.replace('-', "");
let record = sqlx::query_as::<_, crate::core::db::IdentityDetailRecord>( let record = sqlx::query_as::<_, crate::core::db::IdentityDetailRecord>(
&format!( &format!(
"SELECT id, uuid::text, {} AS name, identity_type, source, status, metadata, reference_data, \ "SELECT id, uuid::text, name, identity_type, source, status, metadata, reference_data, \
NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \ NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \
face_embedding::real[] as face_embedding, \ face_embedding::real[] as face_embedding, \
tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \ tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \
FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
name_col, identity_table identity_table
) )
) )
.bind(&clean) .bind(&clean)
@@ -322,8 +322,13 @@ pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result
let mut entries: HashMap<String, String> = if index_path.exists() { let mut entries: HashMap<String, String> = if index_path.exists() {
let content = std::fs::read_to_string(&index_path)?; let content = std::fs::read_to_string(&index_path)?;
let v: serde_json::Value = serde_json::from_str(&content).unwrap_or_default(); let v: serde_json::Value = serde_json::from_str(&content).unwrap_or_default();
v["entries"].as_object() v["entries"]
.map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string())).collect()) .as_object()
.map(|obj| {
obj.iter()
.map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string()))
.collect()
})
.unwrap_or_default() .unwrap_or_default()
} else { } else {
HashMap::new() HashMap::new()
@@ -338,7 +343,9 @@ pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result
} }
pub async fn save_identity_file(db: &PostgresDb, uuid: &str) -> Result<()> { pub async fn save_identity_file(db: &PostgresDb, uuid: &str) -> Result<()> {
let record = db.get_identity_by_uuid(uuid).await? let record = db
.get_identity_by_uuid(uuid)
.await?
.with_context(|| format!("Identity not found in DB: {}", uuid))?; .with_context(|| format!("Identity not found in DB: {}", uuid))?;
let identity_uuid = record.uuid.clone(); let identity_uuid = record.uuid.clone();
@@ -415,6 +422,7 @@ mod tests {
status: Some("confirmed".to_string()), status: Some("confirmed".to_string()),
tmdb_id: Some(112), tmdb_id: Some(112),
tmdb_profile: Some("https://image.tmdb.org/t/p/w185/test.jpg".to_string()), tmdb_profile: Some("https://image.tmdb.org/t/p/w185/test.jpg".to_string()),
local_profile: None,
metadata: serde_json::json!({"tmdb_character": "Test Role"}), metadata: serde_json::json!({"tmdb_character": "Test Role"}),
file_bindings: vec![FileBinding { file_bindings: vec![FileBinding {
file_uuid: "ffffffffffffffffffffffffffffffff".to_string(), file_uuid: "ffffffffffffffffffffffffffffffff".to_string(),
@@ -442,7 +450,9 @@ mod tests {
fn test_identity_dir_path() { fn test_identity_dir_path() {
let uuid = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; let uuid = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
let p = identity_dir(uuid); let p = identity_dir(uuid);
assert!(p.to_string_lossy().ends_with(&format!("identities/{}", uuid))); assert!(p
.to_string_lossy()
.ends_with(&format!("identities/{}", uuid)));
} }
#[test] #[test]
@@ -463,7 +473,10 @@ mod tests {
let base = Path::new("/tmp/test_base"); let base = Path::new("/tmp/test_base");
let uuid = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; let uuid = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
let p = identity_dir_at(base, uuid); let p = identity_dir_at(base, uuid);
assert_eq!(p, Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")); assert_eq!(
p,
Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")
);
} }
#[test] #[test]
@@ -490,7 +503,10 @@ mod tests {
assert_eq!(read.name, file.name); assert_eq!(read.name, file.name);
assert_eq!(read.source, file.source); assert_eq!(read.source, file.source);
assert_eq!(read.tmdb_id, file.tmdb_id); assert_eq!(read.tmdb_id, file.tmdb_id);
assert_eq!(read.file_bindings[0].face_count, file.file_bindings[0].face_count); assert_eq!(
read.file_bindings[0].face_count,
file.file_bindings[0].face_count
);
let _ = std::fs::remove_dir_all(&tmp); let _ = std::fs::remove_dir_all(&tmp);
} }
@@ -521,9 +537,21 @@ mod tests {
let _ = std::fs::remove_dir_all(&tmp); let _ = std::fs::remove_dir_all(&tmp);
let base = &tmp; let base = &tmp;
std::fs::create_dir_all(base.join("identities").join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")).unwrap(); std::fs::create_dir_all(
std::fs::create_dir_all(base.join("identities").join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")).unwrap(); base.join("identities")
std::fs::create_dir_all(base.join("identities").join("cccccccccccccccccccccccccccccccc")).unwrap(); .join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
)
.unwrap();
std::fs::create_dir_all(
base.join("identities")
.join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"),
)
.unwrap();
std::fs::create_dir_all(
base.join("identities")
.join("cccccccccccccccccccccccccccccccc"),
)
.unwrap();
std::fs::create_dir_all(base.join("identities").join("not_a_uuid")).unwrap(); std::fs::create_dir_all(base.join("identities").join("not_a_uuid")).unwrap();
std::fs::create_dir_all(base.join("identities").join("short")).unwrap(); std::fs::create_dir_all(base.join("identities").join("short")).unwrap();

View File

@@ -56,19 +56,25 @@ impl IngestionService {
.to_string(); .to_string();
// 1. Compute SHA256 for dedup // 1. Compute SHA256 for dedup
let content_hash = crate::core::storage::content_hash::compute_sha256(&canonical_path).ok().unwrap_or_default(); let content_hash = crate::core::storage::content_hash::compute_sha256(&canonical_path)
.ok()
.unwrap_or_default();
// 2. Hash check — same content = already registered // 2. Hash check — same content = already registered
let videos_table = schema::table_name("videos"); let videos_table = schema::table_name("videos");
if !content_hash.is_empty() { if !content_hash.is_empty() {
if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>( if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(&format!(
&format!("SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1", videos_table) "SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1",
) videos_table
))
.bind(&content_hash) .bind(&content_hash)
.fetch_optional(self.db.pool()) .fetch_optional(self.db.pool())
.await .await
{ {
info!("Content already registered: {} ({})", filename, existing_uuid); info!(
"Content already registered: {} ({})",
filename, existing_uuid
);
return Ok(Some(existing_uuid)); return Ok(Some(existing_uuid));
} }
} }
@@ -108,7 +114,8 @@ impl IngestionService {
let probe_result = probe::probe_video(file_path).ok(); let probe_result = probe::probe_video(file_path).ok();
let file_meta = std::fs::metadata(&canonical_path).ok(); let file_meta = std::fs::metadata(&canonical_path).ok();
let duration = probe_result.as_ref() let duration = probe_result
.as_ref()
.and_then(|r| r.format.duration.as_ref()) .and_then(|r| r.format.duration.as_ref())
.and_then(|s| s.parse::<f64>().ok()) .and_then(|s| s.parse::<f64>().ok())
.unwrap_or(0.0); .unwrap_or(0.0);
@@ -148,7 +155,11 @@ impl IngestionService {
} }
let total_frames = { let total_frames = {
let video_stream = probe_result.as_ref().and_then(|pr| pr.streams.iter().find(|s| s.codec_type.as_deref() == Some("video"))); let video_stream = probe_result.as_ref().and_then(|pr| {
pr.streams
.iter()
.find(|s| s.codec_type.as_deref() == Some("video"))
});
if let Some(stream) = video_stream { if let Some(stream) = video_stream {
if let Some(nb_frames_str) = &stream.nb_frames { if let Some(nb_frames_str) = &stream.nb_frames {
@@ -223,11 +234,14 @@ impl IngestionService {
// Store content_hash for dedup // Store content_hash for dedup
if !content_hash.is_empty() { if !content_hash.is_empty() {
let vt = schema::table_name("videos"); let vt = schema::table_name("videos");
let _ = sqlx::query(&format!("UPDATE {} SET content_hash = $1 WHERE file_uuid = $2", vt)) let _ = sqlx::query(&format!(
.bind(&content_hash) "UPDATE {} SET content_hash = $1 WHERE file_uuid = $2",
.bind(&uuid) vt
.execute(self.db.pool()) ))
.await; .bind(&content_hash)
.bind(&uuid)
.execute(self.db.pool())
.await;
} }
self.db self.db
@@ -243,5 +257,3 @@ impl IngestionService {
Ok(Some(uuid)) Ok(Some(uuid))
} }
} }

View File

@@ -17,42 +17,84 @@ mod tests {
#[test] #[test]
fn test_detect_category_image() { fn test_detect_category_image() {
assert_eq!(detect_category(Path::new("photo.jpg")), FileCategory::Image); assert_eq!(detect_category(Path::new("photo.jpg")), FileCategory::Image);
assert_eq!(detect_category(Path::new("photo.jpeg")), FileCategory::Image); assert_eq!(
detect_category(Path::new("photo.jpeg")),
FileCategory::Image
);
assert_eq!(detect_category(Path::new("photo.png")), FileCategory::Image); assert_eq!(detect_category(Path::new("photo.png")), FileCategory::Image);
assert_eq!(detect_category(Path::new("photo.svg")), FileCategory::Image); assert_eq!(detect_category(Path::new("photo.svg")), FileCategory::Image);
assert_eq!(detect_category(Path::new("photo.webp")), FileCategory::Image); assert_eq!(
detect_category(Path::new("photo.webp")),
FileCategory::Image
);
} }
#[test] #[test]
fn test_detect_category_document() { fn test_detect_category_document() {
assert_eq!(detect_category(Path::new("doc.pdf")), FileCategory::Document); assert_eq!(
assert_eq!(detect_category(Path::new("doc.docx")), FileCategory::Document); detect_category(Path::new("doc.pdf")),
assert_eq!(detect_category(Path::new("doc.pages")), FileCategory::Document); FileCategory::Document
assert_eq!(detect_category(Path::new("doc.txt")), FileCategory::Document); );
assert_eq!(
detect_category(Path::new("doc.docx")),
FileCategory::Document
);
assert_eq!(
detect_category(Path::new("doc.pages")),
FileCategory::Document
);
assert_eq!(
detect_category(Path::new("doc.txt")),
FileCategory::Document
);
} }
#[test] #[test]
fn test_detect_category_spreadsheet() { fn test_detect_category_spreadsheet() {
assert_eq!(detect_category(Path::new("data.xlsx")), FileCategory::Spreadsheet); assert_eq!(
assert_eq!(detect_category(Path::new("data.csv")), FileCategory::Spreadsheet); detect_category(Path::new("data.xlsx")),
assert_eq!(detect_category(Path::new("data.numbers")), FileCategory::Spreadsheet); FileCategory::Spreadsheet
);
assert_eq!(
detect_category(Path::new("data.csv")),
FileCategory::Spreadsheet
);
assert_eq!(
detect_category(Path::new("data.numbers")),
FileCategory::Spreadsheet
);
} }
#[test] #[test]
fn test_detect_category_presentation() { fn test_detect_category_presentation() {
assert_eq!(detect_category(Path::new("deck.pptx")), FileCategory::Presentation); assert_eq!(
assert_eq!(detect_category(Path::new("deck.key")), FileCategory::Presentation); detect_category(Path::new("deck.pptx")),
FileCategory::Presentation
);
assert_eq!(
detect_category(Path::new("deck.key")),
FileCategory::Presentation
);
} }
#[test] #[test]
fn test_detect_category_archive() { fn test_detect_category_archive() {
assert_eq!(detect_category(Path::new("files.zip")), FileCategory::Archive); assert_eq!(
assert_eq!(detect_category(Path::new("files.tar.gz")), FileCategory::Archive); detect_category(Path::new("files.zip")),
FileCategory::Archive
);
assert_eq!(
detect_category(Path::new("files.tar.gz")),
FileCategory::Archive
);
} }
#[test] #[test]
fn test_detect_category_unknown() { fn test_detect_category_unknown() {
assert_eq!(detect_category(Path::new("file.xyz")), FileCategory::Unknown); assert_eq!(
detect_category(Path::new("file.xyz")),
FileCategory::Unknown
);
assert_eq!(detect_category(Path::new("file")), FileCategory::Unknown); assert_eq!(detect_category(Path::new("file")), FileCategory::Unknown);
} }
@@ -84,13 +126,18 @@ pub enum FileCategory {
/// Detect file category from path extension /// Detect file category from path extension
pub fn detect_category(path: &Path) -> FileCategory { pub fn detect_category(path: &Path) -> FileCategory {
let ext = path.extension() let ext = path
.extension()
.and_then(|e| e.to_str()) .and_then(|e| e.to_str())
.map(|e| e.to_lowercase()); .map(|e| e.to_lowercase());
match ext.as_deref() { match ext.as_deref() {
Some("mp4" | "mov" | "mkv" | "avi" | "webm" | "m4v" | "mpeg") => FileCategory::Video, Some("mp4" | "mov" | "mkv" | "avi" | "webm" | "m4v" | "mpeg") => FileCategory::Video,
Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg" | "heic" | "tiff") => FileCategory::Image, Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg" | "heic" | "tiff") => {
Some("pdf" | "doc" | "docx" | "odt" | "pages" | "rtf" | "txt" | "md" | "rst") => FileCategory::Document, FileCategory::Image
}
Some("pdf" | "doc" | "docx" | "odt" | "pages" | "rtf" | "txt" | "md" | "rst") => {
FileCategory::Document
}
Some("xls" | "xlsx" | "csv" | "ods" | "numbers") => FileCategory::Spreadsheet, Some("xls" | "xlsx" | "csv" | "ods" | "numbers") => FileCategory::Spreadsheet,
Some("ppt" | "pptx" | "odp" | "key") => FileCategory::Presentation, Some("ppt" | "pptx" | "odp" | "key") => FileCategory::Presentation,
Some("zip" | "tar" | "gz" | "tgz" | "7z" | "rar") => FileCategory::Archive, Some("zip" | "tar" | "gz" | "tgz" | "7z" | "rar") => FileCategory::Archive,
@@ -102,16 +149,20 @@ pub fn detect_category(path: &Path) -> FileCategory {
pub fn base_format_info(path: &Path) -> serde_json::Value { pub fn base_format_info(path: &Path) -> serde_json::Value {
let meta = std::fs::metadata(path).ok(); let meta = std::fs::metadata(path).ok();
let size = meta.as_ref().map(|m| m.len()).unwrap_or(0); let size = meta.as_ref().map(|m| m.len()).unwrap_or(0);
let mtime = meta.as_ref() let mtime = meta
.as_ref()
.and_then(|m| m.modified().ok()) .and_then(|m| m.modified().ok())
.and_then(|t| { .and_then(|t| {
let secs = t.duration_since(SystemTime::UNIX_EPOCH).ok()?.as_secs() as i64; let secs = t.duration_since(SystemTime::UNIX_EPOCH).ok()?.as_secs() as i64;
chrono::DateTime::from_timestamp(secs, 0) chrono::DateTime::from_timestamp(secs, 0).map(|dt| dt.to_rfc3339())
.map(|dt| dt.to_rfc3339())
}) })
.unwrap_or_default(); .unwrap_or_default();
let fname = path.to_string_lossy().to_string(); let fname = path.to_string_lossy().to_string();
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("").to_lowercase(); let ext = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_lowercase();
let cat = detect_category(path); let cat = detect_category(path);
let file_type = match cat { let file_type = match cat {
FileCategory::Video => "video", FileCategory::Video => "video",
@@ -150,7 +201,13 @@ fn ffprobe_probe(path: &Path, format_base: serde_json::Value) -> serde_json::Val
} }
/// Run Python probe for document/spreadsheet/presentation files /// Run Python probe for document/spreadsheet/presentation files
fn python_probe(path: &Path, category: &FileCategory, scripts_dir: &str, python_path: &str, format_base: serde_json::Value) -> serde_json::Value { fn python_probe(
path: &Path,
category: &FileCategory,
scripts_dir: &str,
python_path: &str,
format_base: serde_json::Value,
) -> serde_json::Value {
let script = format!("{}/probe_file.py", scripts_dir); let script = format!("{}/probe_file.py", scripts_dir);
if !std::path::Path::new(&script).exists() { if !std::path::Path::new(&script).exists() {
return minimal_probe(format_base); return minimal_probe(format_base);
@@ -184,18 +241,12 @@ fn minimal_probe(format_base: serde_json::Value) -> serde_json::Value {
/// Unified probe: dispatches to the right probe based on file type /// Unified probe: dispatches to the right probe based on file type
/// Returns a probe_json-compatible Value /// Returns a probe_json-compatible Value
pub async fn unified_probe( pub async fn unified_probe(path: &Path, scripts_dir: &str, python_path: &str) -> serde_json::Value {
path: &Path,
scripts_dir: &str,
python_path: &str,
) -> serde_json::Value {
let cat = detect_category(path); let cat = detect_category(path);
let format_base = base_format_info(path); let format_base = base_format_info(path);
match cat { match cat {
FileCategory::Video | FileCategory::Image => { FileCategory::Video | FileCategory::Image => ffprobe_probe(path, format_base),
ffprobe_probe(path, format_base)
}
FileCategory::Document | FileCategory::Spreadsheet | FileCategory::Presentation => { FileCategory::Document | FileCategory::Spreadsheet | FileCategory::Presentation => {
python_probe(path, &cat, scripts_dir, python_path, format_base) python_probe(path, &cat, scripts_dir, python_path, format_base)
} }

View File

@@ -1,5 +1,6 @@
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::process::Command;
use std::time::Duration; use std::time::Duration;
use super::executor::PythonExecutor; use super::executor::PythonExecutor;
@@ -27,13 +28,21 @@ pub async fn process_cut(
output_path: &str, output_path: &str,
uuid: Option<&str>, uuid: Option<&str>,
) -> Result<CutResult> { ) -> Result<CutResult> {
// Try native ffmpeg-based scene detection first
let result = try_native_cut(video_path);
if let Ok(r) = result {
let json = serde_json::to_string_pretty(&r)?;
std::fs::write(output_path, &json)
.with_context(|| format!("Failed to write {:?}", output_path))?;
return Ok(r);
}
// Fallback: Python scenedetect
tracing::warn!("[CUT] Native impl failed, falling back to Python");
let executor = PythonExecutor::new()?; let executor = PythonExecutor::new()?;
let script_path = executor.script_path("cut_processor.py"); let script_path = executor.script_path("cut_processor.py");
tracing::info!("[CUT] Starting scene detection: {}", video_path);
if !script_path.exists() { if !script_path.exists() {
tracing::warn!("[CUT] Script not found, returning empty result");
return Ok(CutResult { return Ok(CutResult {
frame_count: 0, frame_count: 0,
fps: 0.0, fps: 0.0,
@@ -53,19 +62,179 @@ pub async fn process_cut(
.with_context(|| format!("Failed to run {:?}", script_path))?; .with_context(|| format!("Failed to run {:?}", script_path))?;
let json_str = std::fs::read_to_string(output_path).context("Failed to read CUT output")?; let json_str = std::fs::read_to_string(output_path).context("Failed to read CUT output")?;
let result: CutResult = let result: CutResult =
serde_json::from_str(&json_str).context("Failed to parse CUT output")?; serde_json::from_str(&json_str).context("Failed to parse CUT output")?;
tracing::info!("[CUT] Result: {} scenes detected", result.scenes.len());
Ok(result) Ok(result)
} }
// ── Native ffmpeg scene detection ─────────────────────────────────
fn try_native_cut(video_path: &str) -> Result<CutResult> {
// Step 1: Get video info (fps, frame count)
let probe = Command::new("ffprobe")
.args([
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
video_path,
])
.output()
.context("Failed to run ffprobe")?;
let probe_info: serde_json::Value =
serde_json::from_slice(&probe.stdout).context("Failed to parse ffprobe output")?;
let streams = probe_info["streams"]
.as_array()
.map_or(vec![], |s| s.clone());
let video_stream = streams.iter().find(|s| s["codec_type"] == "video");
let fps = video_stream
.and_then(|s| s["r_frame_rate"].as_str().and_then(parse_fraction))
.unwrap_or(30.0);
let total_frames: u64 = video_stream
.and_then(|s| s["nb_frames"].as_str())
.and_then(|s| s.parse().ok())
.unwrap_or(0);
let duration: f64 = probe_info["format"]["duration"]
.as_str()
.and_then(|s| s.parse().ok())
.unwrap_or(0.0);
// Step 2: Use ffmpeg scene detection filter
// The `scene` filter computes the difference between consecutive frames
// and outputs when the difference exceeds the threshold (0.3 = medium sensitivity)
let scene_output = Command::new("ffprobe")
.args([
"-v",
"quiet",
"-show_entries",
"frame=pts_time",
"-of",
"compact=p=0:nk=1",
"-f",
"lavfi",
&format!("movie={},select='gt(scene\\,0.3)',showinfo", video_path),
"-show_frames",
])
.output()
.context("Failed to run ffmpeg scene detection")?;
let stderr_output = String::from_utf8_lossy(&scene_output.stderr);
let mut scene_times: Vec<f64> = Vec::new();
// Parse ffmpeg showinfo output for scene changes
// Format: [Parsed_showinfo...] pts:123.456 pts_time:123.456 ...
for line in stderr_output.lines() {
if line.contains("pts_time:") {
if let Some(pos) = line.find("pts_time:") {
let rest = &line[pos + 9..];
let time_str = rest.split_whitespace().next().unwrap_or("");
if let Ok(t) = time_str.parse::<f64>() {
scene_times.push(t);
}
}
}
}
// Step 3: Build scenes from cut points
let mut scenes: Vec<CutScene> = Vec::new();
let mut prev_time = 0.0;
let mut prev_frame: u64 = 0;
for (i, &cut_time) in scene_times.iter().enumerate() {
let end_frame = (cut_time * fps).round() as u64;
let start_frame = prev_frame;
if end_frame > start_frame {
scenes.push(CutScene {
scene_number: (i + 1) as u32,
start_frame: prev_frame,
end_frame: end_frame.saturating_sub(1),
start_time: prev_time,
end_time: cut_time - (1.0 / fps),
});
}
prev_time = cut_time;
prev_frame = end_frame;
}
// Final scene (last cut point → end of video)
if total_frames > 0 && prev_frame < total_frames {
scenes.push(CutScene {
scene_number: (scenes.len() + 1) as u32,
start_frame: prev_frame,
end_frame: total_frames.saturating_sub(1),
start_time: prev_time,
end_time: duration,
});
}
// If no scenes detected, create a single scene covering the whole video
if scenes.is_empty() && total_frames > 0 {
scenes.push(CutScene {
scene_number: 1,
start_frame: 0,
end_frame: total_frames.saturating_sub(1),
start_time: 0.0,
end_time: duration,
});
}
Ok(CutResult {
frame_count: total_frames,
fps,
scenes,
})
}
/// Parse fractional fps like "30000/1001" into f64
fn parse_fraction(s: &str) -> Option<f64> {
if let Some(pos) = s.find('/') {
let num: f64 = s[..pos].parse().ok()?;
let den: f64 = s[pos + 1..].parse().ok()?;
if den > 0.0 {
return Some(num / den);
}
}
s.parse::<f64>().ok()
}
// ── Tests ─────────────────────────────────────────────────────────
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
#[test]
fn test_parse_fraction() {
let r = parse_fraction("30000/1001").unwrap();
assert!((r - 29.97).abs() < 0.01);
}
#[test]
fn test_parse_fraction_int() {
let r = parse_fraction("30").unwrap();
assert!((r - 30.0).abs() < 0.01);
}
#[test]
fn test_parse_fraction_invalid() {
assert!(parse_fraction("not/a/num").is_none());
}
#[test]
fn test_parse_fraction_zero_den() {
assert!(parse_fraction("1/0").is_none());
}
#[test] #[test]
fn test_cut_result_serialization() { fn test_cut_result_serialization() {
let result = CutResult { let result = CutResult {
@@ -81,8 +250,9 @@ mod tests {
}; };
let json = serde_json::to_string(&result).unwrap(); let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("frame_count"));
assert!(json.contains("scene_number")); assert!(json.contains("scene_number"));
assert!(json.contains("1")); assert!(json.contains("fps"));
} }
#[test] #[test]
@@ -90,20 +260,23 @@ mod tests {
let json = r#"{ let json = r#"{
"frame_count": 100, "frame_count": 100,
"fps": 30.0, "fps": 30.0,
"scenes": [ "scenes": [{
{"scene_number": 1, "start_frame": 0, "end_frame": 30, "start_time": 0.0, "end_time": 1.0}, "scene_number": 1,
{"scene_number": 2, "start_frame": 31, "end_frame": 60, "start_time": 1.033, "end_time": 2.0} "start_frame": 0,
] "end_frame": 30,
"start_time": 0.0,
"end_time": 1.0
}]
}"#; }"#;
let result: CutResult = serde_json::from_str(json).unwrap(); let result: CutResult = serde_json::from_str(json).unwrap();
assert_eq!(result.frame_count, 100); assert_eq!(result.scenes.len(), 1);
assert_eq!(result.scenes.len(), 2); assert_eq!(result.scenes[0].scene_number, 1);
assert_eq!(result.scenes[1].scene_number, 2); assert_eq!(result.scenes[0].start_frame, 0);
} }
#[test] #[test]
fn test_cut_result_empty_scenes() { fn test_empty_scenes() {
let result = CutResult { let result = CutResult {
frame_count: 0, frame_count: 0,
fps: 0.0, fps: 0.0,
@@ -111,17 +284,4 @@ mod tests {
}; };
assert!(result.scenes.is_empty()); assert!(result.scenes.is_empty());
} }
#[test]
fn test_cut_scene_times() {
let scene = CutScene {
scene_number: 1,
start_frame: 0,
end_frame: 30,
start_time: 0.0,
end_time: 1.0,
};
assert!(scene.end_time > scene.start_time);
assert_eq!(scene.scene_number, 1);
}
} }

View File

@@ -109,11 +109,10 @@ pub fn validate_python_env() -> Result<()> {
tracing::warn!("Expected Python 3.11, got: {}", version.trim()); tracing::warn!("Expected Python 3.11, got: {}", version.trim());
} }
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR") let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| {
.unwrap_or_else(|_| { let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR")); manifest.join("scripts").to_string_lossy().to_string()
manifest.join("scripts").to_string_lossy().to_string() });
});
let script_path = PathBuf::from(&scripts_dir); let script_path = PathBuf::from(&scripts_dir);
if !script_path.exists() { if !script_path.exists() {
anyhow::bail!("Scripts directory not found at {}", scripts_dir); anyhow::bail!("Scripts directory not found at {}", scripts_dir);
@@ -133,11 +132,10 @@ impl PythonExecutor {
pub fn new() -> Result<Self> { pub fn new() -> Result<Self> {
let python_path = std::env::var("MOMENTRY_PYTHON_PATH") let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string()); .unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR") let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| {
.unwrap_or_else(|_| { let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR")); manifest.join("scripts").to_string_lossy().to_string()
manifest.join("scripts").to_string_lossy().to_string() });
});
let python_bin = PathBuf::from(&python_path); let python_bin = PathBuf::from(&python_path);
let scripts_path = PathBuf::from(&scripts_dir); let scripts_path = PathBuf::from(&scripts_dir);
@@ -173,7 +171,8 @@ impl PythonExecutor {
if let Some(expected_hash) = self.checksums.get(&rel_path) { if let Some(expected_hash) = self.checksums.get(&rel_path) {
let output = std::process::Command::new("shasum") let output = std::process::Command::new("shasum")
.arg("-a").arg("256") .arg("-a")
.arg("256")
.arg(&script_path) .arg(&script_path)
.output() .output()
.context("Failed to run shasum for integrity check")?; .context("Failed to run shasum for integrity check")?;
@@ -235,8 +234,9 @@ impl PythonExecutor {
} }
// Verify script integrity via SHA256 checksum before execution // Verify script integrity via SHA256 checksum before execution
self.verify_script_integrity(script_name) self.verify_script_integrity(script_name).context(
.context("Pre-execution integrity check failed — possible version mismatch or corruption")?; "Pre-execution integrity check failed — possible version mismatch or corruption",
)?;
// 標記輸出檔為處理中add .tmp suffix // 標記輸出檔為處理中add .tmp suffix
let output_path = args.get(1).map(|p| std::path::PathBuf::from(p)); let output_path = args.get(1).map(|p| std::path::PathBuf::from(p));

View File

@@ -44,22 +44,59 @@ pub enum CrowdSize {
/// Indoor-indicative YOLO classes (COCO labels) /// Indoor-indicative YOLO classes (COCO labels)
const INDOOR_CLASSES: &[&str] = &[ const INDOOR_CLASSES: &[&str] = &[
"chair", "couch", "bed", "dining table", "toilet", "tv", "laptop", "chair",
"microwave", "oven", "refrigerator", "sink", "book", "clock", "couch",
"vase", "potted plant", "bed",
"dining table",
"toilet",
"tv",
"laptop",
"microwave",
"oven",
"refrigerator",
"sink",
"book",
"clock",
"vase",
"potted plant",
]; ];
/// Vehicle-indicative classes (person + vehicle = transport scene) /// Vehicle-indicative classes (person + vehicle = transport scene)
const VEHICLE_CLASSES: &[&str] = &[ const VEHICLE_CLASSES: &[&str] = &[
"car", "truck", "bus", "train", "boat", "aeroplane", "bicycle", "motorbike", "car",
"truck",
"bus",
"train",
"boat",
"aeroplane",
"bicycle",
"motorbike",
]; ];
/// Outdoor-indicative YOLO classes /// Outdoor-indicative YOLO classes
const OUTDOOR_CLASSES: &[&str] = &[ const OUTDOOR_CLASSES: &[&str] = &[
"car", "truck", "bus", "train", "boat", "airplane", "car",
"traffic light", "fire hydrant", "stop sign", "parking meter", "truck",
"bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bus",
"bear", "zebra", "giraffe", "tree", "train",
"boat",
"airplane",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"tree",
]; ];
/// Build heuristic scene metadata from disk files (yolo.json + DB face data). /// Build heuristic scene metadata from disk files (yolo.json + DB face data).
@@ -113,13 +150,14 @@ pub async fn build_heuristic_scene_meta(
// Get face counts grouped by frame // Get face counts grouped by frame
let fd_table = schema::table_name("face_detections"); let fd_table = schema::table_name("face_detections");
let face_rows: Vec<(i64, i64)> = sqlx::query_as( let face_rows: Vec<(i64, i64)> = sqlx::query_as(&format!(
&format!("SELECT frame_number, COUNT(*) as fc \ "SELECT frame_number, COUNT(*) as fc \
FROM {} \ FROM {} \
WHERE file_uuid = $1 AND frame_number IS NOT NULL \ WHERE file_uuid = $1 AND frame_number IS NOT NULL \
GROUP BY frame_number \ GROUP BY frame_number \
ORDER BY frame_number", fd_table), ORDER BY frame_number",
) fd_table
))
.bind(file_uuid) .bind(file_uuid)
.fetch_all(pool) .fetch_all(pool)
.await .await
@@ -166,7 +204,10 @@ pub async fn build_heuristic_scene_meta(
let outdoor_ratio = outdoor_objects as f64 / frame_count.max(1) as f64; let outdoor_ratio = outdoor_objects as f64 / frame_count.max(1) as f64;
let total_indicator = indoor_ratio + outdoor_ratio; let total_indicator = indoor_ratio + outdoor_ratio;
let (indoor_score, outdoor_score) = if total_indicator > 0.0 { let (indoor_score, outdoor_score) = if total_indicator > 0.0 {
(indoor_ratio / total_indicator, outdoor_ratio / total_indicator) (
indoor_ratio / total_indicator,
outdoor_ratio / total_indicator,
)
} else { } else {
(0.5, 0.5) (0.5, 0.5)
}; };
@@ -187,17 +228,13 @@ pub async fn build_heuristic_scene_meta(
.map(|c| class_frame_presence.get(*c).copied().unwrap_or(0)) .map(|c| class_frame_presence.get(*c).copied().unwrap_or(0))
.sum(); .sum();
let person_ratio = person_frames as f64 / frame_count.max(1) as f64; let person_ratio = person_frames as f64 / frame_count.max(1) as f64;
let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0 let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0 && outdoor_score > 0.3;
&& outdoor_score > 0.3;
// Dominant objects: rank by frame presence (not total count) // Dominant objects: rank by frame presence (not total count)
let mut sorted: Vec<_> = class_frame_presence.into_iter().collect(); let mut sorted: Vec<_> = class_frame_presence.into_iter().collect();
sorted.sort_by(|a, b| b.1.cmp(&a.1)); sorted.sort_by(|a, b| b.1.cmp(&a.1));
let dominant_objects: Vec<String> = sorted let dominant_objects: Vec<String> =
.iter() sorted.iter().take(3).map(|(cls, _)| cls.clone()).collect();
.take(3)
.map(|(cls, _)| cls.clone())
.collect();
segments.push(SceneSegmentMeta { segments.push(SceneSegmentMeta {
segment_index: idx as u32 + 1, segment_index: idx as u32 + 1,
@@ -229,12 +266,15 @@ pub async fn build_heuristic_scene_meta(
/// Full pipeline entry point: reads CUT data, generates heuristic metadata, writes JSON. /// Full pipeline entry point: reads CUT data, generates heuristic metadata, writes JSON.
/// Called from job_worker post-processing trigger. /// Called from job_worker post-processing trigger.
pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &str) -> Result<usize> { pub async fn generate_scene_meta(
db: &crate::core::db::PostgresDb,
file_uuid: &str,
) -> Result<usize> {
let pool = db.pool(); let pool = db.pool();
// Read CUT segment boundaries from cut.json // Read CUT segment boundaries from cut.json
let cut_path = Path::new(crate::core::config::OUTPUT_DIR.as_str()) let cut_path =
.join(format!("{}.cut.json", file_uuid)); Path::new(crate::core::config::OUTPUT_DIR.as_str()).join(format!("{}.cut.json", file_uuid));
let segments: Vec<(i64, i64, f64, f64)> = if cut_path.exists() { let segments: Vec<(i64, i64, f64, f64)> = if cut_path.exists() {
let cut_str = tokio::fs::read_to_string(&cut_path) let cut_str = tokio::fs::read_to_string(&cut_path)
.await .await
@@ -250,8 +290,7 @@ pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &s
start_time: f64, start_time: f64,
end_time: f64, end_time: f64,
} }
let cut: CutJson = serde_json::from_str(&cut_str) let cut: CutJson = serde_json::from_str(&cut_str).context("Failed to parse cut.json")?;
.context("Failed to parse cut.json")?;
cut.scenes cut.scenes
.into_iter() .into_iter()
.map(|s| (s.start_frame, s.end_frame, s.start_time, s.end_time)) .map(|s| (s.start_frame, s.end_frame, s.start_time, s.end_time))
@@ -259,9 +298,10 @@ pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &s
} else { } else {
// Fallback: query DB for video duration, make one segment // Fallback: query DB for video duration, make one segment
let videos_table = schema::table_name("videos"); let videos_table = schema::table_name("videos");
let (total_frames, duration): (Option<i64>, Option<f64>) = sqlx::query_as( let (total_frames, duration): (Option<i64>, Option<f64>) = sqlx::query_as(&format!(
&format!("SELECT total_frames, duration FROM {} WHERE file_uuid = $1", videos_table), "SELECT total_frames, duration FROM {} WHERE file_uuid = $1",
) videos_table
))
.bind(file_uuid) .bind(file_uuid)
.fetch_optional(pool) .fetch_optional(pool)
.await .await

View File

@@ -10,6 +10,7 @@ pub mod ocr;
pub mod pose; pub mod pose;
pub mod scene_classification; pub mod scene_classification;
pub mod story; pub mod story;
pub mod tkg;
pub mod visual_chunk; pub mod visual_chunk;
pub mod yolo; pub mod yolo;
@@ -25,7 +26,8 @@ pub use face_recognition::{
RecognizedFaceDetection, RecognizedFaceDetection,
}; };
pub use heuristic_scene::{ pub use heuristic_scene::{
build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta, SceneSegmentMeta, build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta,
SceneSegmentMeta,
}; };
pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText}; pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText};
pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult}; pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
@@ -34,5 +36,6 @@ pub use scene_classification::{
SceneSegment, SceneSegment,
}; };
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats}; pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
pub use tkg::{build_tkg, TkgResult};
pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult}; pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult}; pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};

View File

@@ -106,7 +106,10 @@ pub async fn process_story(
} }
// Fallback: Python script // Fallback: Python script
tracing::warn!("[STORY] Native impl failed, falling back to Python: {:?}", result.err()); tracing::warn!(
"[STORY] Native impl failed, falling back to Python: {:?}",
result.err()
);
let executor = PythonExecutor::new()?; let executor = PythonExecutor::new()?;
let script_path = executor.script_path("story_processor.py"); let script_path = executor.script_path("story_processor.py");
@@ -145,7 +148,11 @@ pub async fn process_story(
// ── Native implementation ───────────────────────────────────────── // ── Native implementation ─────────────────────────────────────────
fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -> Result<StoryResult> { fn try_native_story(
_video_path: &str,
output_path: &str,
_uuid: Option<&str>,
) -> Result<StoryResult> {
let output_dir = Path::new(output_path).parent().unwrap_or(Path::new(".")); let output_dir = Path::new(output_path).parent().unwrap_or(Path::new("."));
let basename = Path::new(output_path) let basename = Path::new(output_path)
.file_stem() .file_stem()
@@ -160,8 +167,7 @@ fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -
let asr_data: AsrData = if asr_path.exists() { let asr_data: AsrData = if asr_path.exists() {
let content = std::fs::read_to_string(&asr_path) let content = std::fs::read_to_string(&asr_path)
.with_context(|| format!("Failed to read {:?}", asr_path))?; .with_context(|| format!("Failed to read {:?}", asr_path))?;
serde_json::from_str(&content) serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", asr_path))?
.with_context(|| format!("Failed to parse {:?}", asr_path))?
} else { } else {
AsrData { segments: vec![] } AsrData { segments: vec![] }
}; };
@@ -169,8 +175,7 @@ fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -
let cut_data: CutData = if cut_path.exists() { let cut_data: CutData = if cut_path.exists() {
let content = std::fs::read_to_string(&cut_path) let content = std::fs::read_to_string(&cut_path)
.with_context(|| format!("Failed to read {:?}", cut_path))?; .with_context(|| format!("Failed to read {:?}", cut_path))?;
serde_json::from_str(&content) serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", cut_path))?
.with_context(|| format!("Failed to parse {:?}", cut_path))?
} else { } else {
CutData { scenes: vec![] } CutData { scenes: vec![] }
}; };
@@ -376,22 +381,39 @@ fn generate_narrative(texts: &[String], objects: &[String], start: f64, end: f64
let mut unique: Vec<&String> = objects.iter().collect(); let mut unique: Vec<&String> = objects.iter().collect();
unique.sort(); unique.sort();
unique.dedup(); unique.dedup();
let objs = unique.iter().take(5).map(|s| (*s).as_str()).collect::<Vec<_>>().join(", "); let objs = unique
.iter()
.take(5)
.map(|s| (*s).as_str())
.collect::<Vec<_>>()
.join(", ");
parts.push(format!("Visuals: {}", objs)); parts.push(format!("Visuals: {}", objs));
} }
format!("[{:.0}s-{:.0}s] {}", start, end, parts.join(" | ")) format!("[{:.0}s-{:.0}s] {}", start, end, parts.join(" | "))
} }
fn generate_scene_narrative(objects: &[String], start: f64, end: f64, scene_count: usize) -> String { fn generate_scene_narrative(
objects: &[String],
start: f64,
end: f64,
scene_count: usize,
) -> String {
let mut unique: Vec<&String> = objects.iter().collect(); let mut unique: Vec<&String> = objects.iter().collect();
unique.sort(); unique.sort();
unique.dedup(); unique.dedup();
let top5: Vec<&String> = unique.iter().take(5).cloned().collect(); let top5: Vec<&String> = unique.iter().take(5).cloned().collect();
if !top5.is_empty() { if !top5.is_empty() {
let obj_str = top5.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", "); let obj_str = top5
format!("[{:.0}s-{:.0}s] {} scenes. Visuals: {}.", start, end, scene_count, obj_str) .iter()
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(", ");
format!(
"[{:.0}s-{:.0}s] {} scenes. Visuals: {}.",
start, end, scene_count, obj_str
)
} else { } else {
format!("[{:.0}s-{:.0}s] {} video scenes.", start, end, scene_count) format!("[{:.0}s-{:.0}s] {} video scenes.", start, end, scene_count)
} }
@@ -408,7 +430,8 @@ mod tests {
let text = generate_narrative( let text = generate_narrative(
&["Hello world".to_string()], &["Hello world".to_string()],
&["person".to_string()], &["person".to_string()],
0.0, 5.0, 0.0,
5.0,
); );
assert!(text.contains("[0s-5s]")); assert!(text.contains("[0s-5s]"));
assert!(text.contains("Speech:")); assert!(text.contains("Speech:"));
@@ -576,7 +599,10 @@ mod tests {
}; };
assert_eq!(result.parent_chunks[0].child_chunk_ids.len(), 2); assert_eq!(result.parent_chunks[0].child_chunk_ids.len(), 2);
assert!(result.child_chunks.iter().all(|c| c.parent_chunk_id.is_some())); assert!(result
.child_chunks
.iter()
.all(|c| c.parent_chunk_id.is_some()));
assert!(result.parent_chunks[0].parent_chunk_id.is_none()); assert!(result.parent_chunks[0].parent_chunk_id.is_none());
} }
@@ -594,11 +620,7 @@ mod tests {
std::fs::write(&asr_path, r#"{"segments":[]}"#).unwrap(); std::fs::write(&asr_path, r#"{"segments":[]}"#).unwrap();
std::fs::write(&cut_path, r#"{"scenes":[]}"#).unwrap(); std::fs::write(&cut_path, r#"{"scenes":[]}"#).unwrap();
let result = try_native_story( let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap();
"/dummy.mp4",
out_path.to_str().unwrap(),
None,
).unwrap();
assert_eq!(result.stats.total_child_chunks, 0); assert_eq!(result.stats.total_child_chunks, 0);
assert_eq!(result.stats.total_parent_chunks, 0); assert_eq!(result.stats.total_parent_chunks, 0);
@@ -616,13 +638,17 @@ mod tests {
let cut_path = dir.join(format!("{}.cut.json", basename)); let cut_path = dir.join(format!("{}.cut.json", basename));
let out_path = dir.join(format!("{}.story.json", basename)); let out_path = dir.join(format!("{}.story.json", basename));
std::fs::write(&asr_path, r#"{ std::fs::write(
&asr_path,
r#"{
"segments": [ "segments": [
{"start": 0.0, "end": 2.5, "text": "Hello", "confidence": 0.95}, {"start": 0.0, "end": 2.5, "text": "Hello", "confidence": 0.95},
{"start": 2.5, "end": 5.0, "text": "World", "confidence": 0.92}, {"start": 2.5, "end": 5.0, "text": "World", "confidence": 0.92},
{"start": 5.0, "end": 7.5, "text": "Foo", "confidence": 0.90} {"start": 5.0, "end": 7.5, "text": "Foo", "confidence": 0.90}
] ]
}"#).unwrap(); }"#,
)
.unwrap();
std::fs::write(&cut_path, r#"{ std::fs::write(&cut_path, r#"{
"scenes": [ "scenes": [
@@ -631,11 +657,7 @@ mod tests {
] ]
}"#).unwrap(); }"#).unwrap();
let result = try_native_story( let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap();
"/dummy.mp4",
out_path.to_str().unwrap(),
None,
).unwrap();
assert_eq!(result.stats.asr_children, 3); assert_eq!(result.stats.asr_children, 3);
assert_eq!(result.stats.cut_children, 2); assert_eq!(result.stats.cut_children, 2);
@@ -649,7 +671,11 @@ mod tests {
for child in &result.child_chunks { for child in &result.child_chunks {
if child.source == "asr" { if child.source == "asr" {
assert!(child.parent_chunk_id.is_some()); assert!(child.parent_chunk_id.is_some());
assert!(child.parent_chunk_id.as_ref().unwrap().starts_with("story_asr_")); assert!(child
.parent_chunk_id
.as_ref()
.unwrap()
.starts_with("story_asr_"));
} }
} }

703
src/core/processor/tkg.rs Normal file
View File

@@ -0,0 +1,703 @@
use anyhow::{Context, Result};
use serde::Deserialize;
use sqlx::PgPool;
use std::collections::HashMap;
use std::path::Path;
use crate::core::db::postgres_db::PostgresDb;
fn t(name: &str) -> String {
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
if schema == "public" {
name.to_string()
} else {
format!("{}.{}", schema, name)
}
}
// ── Input data structs ────────────────────────────────────────────
#[derive(Debug, Deserialize)]
struct YoloJson {
#[serde(default)]
frames: HashMap<String, YoloFrameEntry>,
}
#[derive(Debug, Deserialize)]
struct YoloFrameEntry {
#[serde(default)]
detections: Vec<YoloDetEntry>,
#[serde(default)]
objects: Vec<YoloDetEntry>,
}
#[derive(Debug, Deserialize)]
struct YoloDetEntry {
#[serde(default)]
class_name: String,
#[serde(default)]
confidence: f64,
}
#[derive(Debug, Deserialize)]
struct AsrxJson {
#[serde(default)]
segments: Vec<AsrxSegmentEntry>,
#[serde(default)]
speaker_stats: Option<HashMap<String, AsrxSpeakerStat>>,
}
#[derive(Debug, Deserialize)]
struct AsrxSegmentEntry {
#[serde(default)]
speaker_id: String,
#[serde(default)]
start_time: f64,
#[serde(default)]
end_time: f64,
#[allow(dead_code)]
start_frame: i64,
#[allow(dead_code)]
end_frame: i64,
}
#[derive(Debug, Deserialize)]
struct AsrxSpeakerStat {
#[serde(default)]
count: i64,
}
// ── Face detection trace ──────────────────────────────────────────
#[derive(Debug, sqlx::FromRow)]
struct FaceTraceRow {
trace_id: i64,
frame_count: i64,
start_f: i64,
end_f: i64,
avg_x: Option<f64>,
avg_y: Option<f64>,
avg_w: Option<f64>,
avg_h: Option<f64>,
}
#[derive(Debug, sqlx::FromRow)]
struct FaceDetectionRow {
trace_id: i64,
frame_number: i64,
#[allow(dead_code)]
x: Option<f64>,
#[allow(dead_code)]
y: Option<f64>,
#[allow(dead_code)]
width: Option<f64>,
#[allow(dead_code)]
height: Option<f64>,
}
// ── Public API ────────────────────────────────────────────────────
pub struct TkgResult {
pub face_trace_nodes: usize,
pub object_nodes: usize,
pub speaker_nodes: usize,
pub co_occurrence_edges: usize,
pub speaker_face_edges: usize,
pub face_face_edges: usize,
}
pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Result<TkgResult> {
let pool = db.pool();
let n_face = build_face_trace_nodes(pool, file_uuid).await?;
let n_objects = build_yolo_object_nodes(pool, file_uuid, output_dir).await?;
let n_speakers = build_speaker_nodes(pool, file_uuid, output_dir).await?;
let e_co = build_co_occurrence_edges(pool, file_uuid, output_dir).await?;
let e_sf = build_speaker_face_edges(pool, file_uuid, output_dir).await?;
let e_ff = build_face_face_edges(pool, file_uuid).await?;
Ok(TkgResult {
face_trace_nodes: n_face,
object_nodes: n_objects,
speaker_nodes: n_speakers,
co_occurrence_edges: e_co,
speaker_face_edges: e_sf,
face_face_edges: e_ff,
})
}
// ── Node builders ─────────────────────────────────────────────────
async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str) -> Result<usize> {
let face_table = t("face_detections");
let nodes_table = t("tkg_nodes");
let rows = sqlx::query_as::<_, FaceTraceRow>(&format!(
r#"
SELECT trace_id,
COUNT(*)::bigint as frame_count,
MIN(frame_number) as start_f,
MAX(frame_number) as end_f,
AVG(x::float8) as avg_x,
AVG(y::float8) as avg_y,
AVG(width::float8) as avg_w,
AVG(height::float8) as avg_h
FROM {}
WHERE file_uuid = $1 AND trace_id IS NOT NULL
GROUP BY trace_id
ORDER BY trace_id
"#,
face_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
let mut count = 0;
for row in &rows {
let external_id = format!("trace_{}", row.trace_id);
let label = format!("Face Trace {}", row.trace_id);
let props = serde_json::json!({
"frame_count": row.frame_count,
"start_frame": row.start_f,
"end_frame": row.end_f,
"avg_bbox": {
"x": row.avg_x.unwrap_or(0.0).round() as i64,
"y": row.avg_y.unwrap_or(0.0).round() as i64,
"width": row.avg_w.unwrap_or(0.0).round() as i64,
"height": row.avg_h.unwrap_or(0.0).round() as i64,
}
});
sqlx::query(&format!(
r#"
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, node_type, external_id)
DO UPDATE SET
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties),
label = COALESCE(NULLIF(EXCLUDED.label, ''), tkg_nodes.label)
"#,
nodes_table
))
.bind("face_trace")
.bind(&external_id)
.bind(file_uuid)
.bind(&label)
.bind(serde_json::to_string(&props)?)
.execute(pool)
.await?;
count += 1;
}
Ok(count)
}
async fn build_yolo_object_nodes(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
) -> Result<usize> {
let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid));
if !yolo_path.exists() {
return Ok(0);
}
let content = std::fs::read_to_string(&yolo_path)
.with_context(|| format!("Failed to read {:?}", yolo_path))?;
let yolo: YoloJson = serde_json::from_str(&content)
.with_context(|| format!("Failed to parse {:?}", yolo_path))?;
let mut class_counts: HashMap<String, i64> = HashMap::new();
for fdata in yolo.frames.values() {
let dets = if !fdata.detections.is_empty() {
&fdata.detections
} else {
&fdata.objects
};
for det in dets {
*class_counts.entry(det.class_name.clone()).or_insert(0) += 1;
}
}
let nodes_table = t("tkg_nodes");
let mut count = 0;
for (cls, cnt) in &class_counts {
let props = serde_json::json!({ "total_detections": cnt });
sqlx::query(&format!(
r#"
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, node_type, external_id)
DO UPDATE SET
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)
"#,
nodes_table
))
.bind("object")
.bind(cls)
.bind(file_uuid)
.bind(cls)
.bind(serde_json::to_string(&props)?)
.execute(pool)
.await?;
count += 1;
}
Ok(count)
}
async fn build_speaker_nodes(pool: &PgPool, file_uuid: &str, output_dir: &str) -> Result<usize> {
let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid));
if !asrx_path.exists() {
return Ok(0);
}
let content = std::fs::read_to_string(&asrx_path)
.with_context(|| format!("Failed to read {:?}", asrx_path))?;
let asrx: AsrxJson = serde_json::from_str(&content)
.with_context(|| format!("Failed to parse {:?}", asrx_path))?;
let stats = asrx.speaker_stats.unwrap_or_default();
let nodes_table = t("tkg_nodes");
let mut count = 0;
for (sid, stat) in &stats {
let props = serde_json::json!({ "segment_count": stat.count });
sqlx::query(&format!(
r#"
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, node_type, external_id)
DO UPDATE SET
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)
"#,
nodes_table
))
.bind("speaker")
.bind(sid)
.bind(file_uuid)
.bind(sid)
.bind(serde_json::to_string(&props)?)
.execute(pool)
.await?;
count += 1;
}
Ok(count)
}
// ── Edge builders ─────────────────────────────────────────────────
async fn build_co_occurrence_edges(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
) -> Result<usize> {
let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid));
if !yolo_path.exists() {
return Ok(0);
}
let content = std::fs::read_to_string(&yolo_path)?;
let yolo: YoloJson = serde_json::from_str(&content)?;
let face_table = t("face_detections");
let nodes_table = t("tkg_nodes");
let edges_table = t("tkg_edges");
let face_rows = sqlx::query_as::<_, FaceDetectionRow>(&format!(
r#"SELECT trace_id, frame_number, x, y, width, height
FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL
ORDER BY frame_number"#,
face_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
let mut edge_count = 0;
for face in &face_rows {
let frame_str = face.frame_number.to_string();
let yolo_frame = match yolo.frames.get(&frame_str) {
Some(f) => f,
None => continue,
};
let dets = if !yolo_frame.detections.is_empty() {
&yolo_frame.detections
} else {
&yolo_frame.objects
};
if dets.is_empty() {
continue;
}
let external_id = format!("trace_{}", face.trace_id);
let face_node: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&external_id)
.fetch_optional(pool)
.await?;
let face_node_id = match face_node {
Some((id,)) => id,
None => continue,
};
for det in dets {
let obj_node: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='object' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&det.class_name)
.fetch_optional(pool)
.await?;
let obj_node_id = match obj_node {
Some((id,)) => id,
None => continue,
};
let edge_props = serde_json::json!({
"frame": face.frame_number,
"object_confidence": det.confidence,
});
if let Err(e) = sqlx::query(&format!(
r#"
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
"#,
edges_table
))
.bind("CO_OCCURS_WITH")
.bind(face_node_id)
.bind(obj_node_id)
.bind(file_uuid)
.bind(serde_json::to_string(&edge_props)?)
.execute(pool)
.await
{
tracing::warn!(
"[TKG] Edge insert failed (trace={}, obj={}): {}",
face.trace_id,
det.class_name,
e
);
continue;
}
edge_count += 1;
}
}
Ok(edge_count)
}
async fn build_speaker_face_edges(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
) -> Result<usize> {
let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid));
if !asrx_path.exists() {
return Ok(0);
}
let content = std::fs::read_to_string(&asrx_path)?;
let asrx: AsrxJson = serde_json::from_str(&content)?;
if asrx.segments.is_empty() {
return Ok(0);
}
let face_table = t("face_detections");
let nodes_table = t("tkg_nodes");
let edges_table = t("tkg_edges");
let traces = sqlx::query_as::<_, (i64, i64, i64)>(&format!(
r#"SELECT trace_id, MIN(frame_number) as start_f, MAX(frame_number) as end_f
FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL
GROUP BY trace_id"#,
face_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
// Calculate fps from last segment
let last = asrx.segments.last().unwrap();
let fps = if last.end_time > 0.0 {
last.end_frame as f64 / last.end_time
} else {
30.0
};
let mut edge_count = 0;
for (tid, sf, ef) in &traces {
let face_ext_id = format!("trace_{}", tid);
let face_node: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&face_ext_id)
.fetch_optional(pool)
.await?;
let face_node_id = match face_node {
Some((id,)) => id,
None => continue,
};
let face_start_sec = *sf as f64 / fps;
let face_end_sec = *ef as f64 / fps;
for seg in &asrx.segments {
let seg_start = seg.start_time;
let seg_end = seg.end_time;
let overlap_start = face_start_sec.max(seg_start);
let overlap_end = face_end_sec.min(seg_end);
if overlap_start >= overlap_end {
continue;
}
let overlap_dur = overlap_end - overlap_start;
let face_dur = face_end_sec - face_start_sec;
if face_dur <= 0.0 {
continue;
}
let overlap_ratio = overlap_dur / face_dur;
if overlap_ratio < 0.3 {
continue;
}
let speaker_node: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='speaker' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&seg.speaker_id)
.fetch_optional(pool)
.await?;
let speaker_node_id = match speaker_node {
Some((id,)) => id,
None => continue,
};
let edge_props = serde_json::json!({
"overlap_ratio": (overlap_ratio * 1000.0).round() / 1000.0,
"overlap_duration_s": (overlap_dur * 10.0).round() / 10.0,
"face_time_range": format!("{:.1}-{:.1}s", face_start_sec, face_end_sec),
"speaker_time_range": format!("{:.1}-{:.1}s", seg_start, seg_end),
});
sqlx::query(&format!(
r#"
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
"#,
edges_table
))
.bind("SPEAKS_AS")
.bind(face_node_id)
.bind(speaker_node_id)
.bind(file_uuid)
.bind(serde_json::to_string(&edge_props)?)
.execute(pool)
.await?;
edge_count += 1;
}
}
Ok(edge_count)
}
async fn build_face_face_edges(pool: &PgPool, file_uuid: &str) -> Result<usize> {
let face_table = t("face_detections");
let nodes_table = t("tkg_nodes");
let edges_table = t("tkg_edges");
let rows: Vec<(i64, i64, i64)> = sqlx::query_as(&format!(
r#"
SELECT a.trace_id AS tid_a, b.trace_id AS tid_b, a.frame_number
FROM {} a
JOIN {} b
ON a.file_uuid = b.file_uuid
AND a.frame_number = b.frame_number
AND a.trace_id < b.trace_id
WHERE a.file_uuid = $1
AND a.trace_id IS NOT NULL
AND b.trace_id IS NOT NULL
ORDER BY a.frame_number
"#,
face_table, face_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
if rows.is_empty() {
return Ok(0);
}
// Deduplicate by pair
let mut pair_frames: HashMap<(i64, i64), Vec<i64>> = HashMap::new();
for (tid_a, tid_b, frame) in &rows {
let key = if *tid_a < *tid_b {
(*tid_a, *tid_b)
} else {
(*tid_b, *tid_a)
};
pair_frames.entry(key).or_default().push(*frame);
}
let mut edge_count = 0;
for ((tid_a, tid_b), frames) in &pair_frames {
let ext_a = format!("trace_{}", tid_a);
let ext_b = format!("trace_{}", tid_b);
let n_a: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&ext_a)
.fetch_optional(pool)
.await?;
let n_b: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&ext_b)
.fetch_optional(pool)
.await?;
let (n_a_id, n_b_id) = match (n_a, n_b) {
(Some((a,)), Some((b,))) => (a, b),
_ => continue,
};
let edge_props = serde_json::json!({
"first_frame": frames[0],
"frame_count": frames.len() as i64,
});
sqlx::query(&format!(
r#"
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
"#,
edges_table
))
.bind("CO_OCCURS_WITH")
.bind(n_a_id)
.bind(n_b_id)
.bind(file_uuid)
.bind(serde_json::to_string(&edge_props)?)
.execute(pool)
.await?;
edge_count += 1;
}
Ok(edge_count)
}
// ── Tests ─────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_yolo_json_deserialize() {
let json = r#"{
"frames": {
"1": {"time_seconds": 0.0, "detections": [{"class_name": "person", "confidence": 0.9}]},
"2": {"time_seconds": 1.0, "detections": [{"class_name": "chair", "confidence": 0.8}]}
}
}"#;
let yolo: YoloJson = serde_json::from_str(json).unwrap();
assert_eq!(yolo.frames.len(), 2);
assert_eq!(yolo.frames["1"].detections[0].class_name, "person");
}
#[test]
fn test_yolo_json_empty_frames() {
let json = r#"{"frames": {}}"#;
let yolo: YoloJson = serde_json::from_str(json).unwrap();
assert!(yolo.frames.is_empty());
}
#[test]
fn test_asrx_json_deserialize() {
let json = r#"{
"segments": [
{"speaker_id": "SPEAKER_01", "start_time": 0.0, "end_time": 2.0, "start_frame": 0, "end_frame": 60}
],
"speaker_stats": {"SPEAKER_01": {"count": 1}}
}"#;
let asrx: AsrxJson = serde_json::from_str(json).unwrap();
assert_eq!(asrx.segments.len(), 1);
assert_eq!(asrx.segments[0].speaker_id, "SPEAKER_01");
}
#[test]
fn test_asrx_json_no_stats() {
let json = r#"{"segments": []}"#;
let asrx: AsrxJson = serde_json::from_str(json).unwrap();
assert!(asrx.speaker_stats.is_none());
}
#[test]
fn test_yolo_objects_fallback() {
let json = r#"{
"frames": {
"1": {"objects": [{"class_name": "person"}]}
}
}"#;
let yolo: YoloJson = serde_json::from_str(json).unwrap();
assert_eq!(yolo.frames["1"].objects[0].class_name, "person");
assert!(yolo.frames["1"].detections.is_empty());
}
#[test]
fn test_tkg_result() {
let r = TkgResult {
face_trace_nodes: 5,
object_nodes: 10,
speaker_nodes: 3,
co_occurrence_edges: 20,
speaker_face_edges: 8,
face_face_edges: 4,
};
assert_eq!(r.face_trace_nodes, 5);
assert_eq!(r.object_nodes, 10);
assert_eq!(r.speaker_nodes, 3);
}
}

View File

@@ -1,7 +1,7 @@
use anyhow::Result;
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use std::io::Read; use std::io::Read;
use std::path::Path; use std::path::Path;
use anyhow::Result;
/// Compute SHA256 of the entire file content /// Compute SHA256 of the entire file content
pub fn compute_sha256(path: &Path) -> Result<String> { pub fn compute_sha256(path: &Path) -> Result<String> {
@@ -10,7 +10,9 @@ pub fn compute_sha256(path: &Path) -> Result<String> {
let mut buf = [0u8; 65536]; let mut buf = [0u8; 65536];
loop { loop {
let n = file.read(&mut buf)?; let n = file.read(&mut buf)?;
if n == 0 { break; } if n == 0 {
break;
}
hasher.update(&buf[..n]); hasher.update(&buf[..n]);
} }
let hash = format!("{:x}", hasher.finalize()); let hash = format!("{:x}", hasher.finalize());

View File

@@ -65,7 +65,11 @@ pub fn tmdb_cache_path(file_uuid: &str) -> PathBuf {
pub fn read_tmdb_cache(file_uuid: &str) -> Result<TmdbCache> { pub fn read_tmdb_cache(file_uuid: &str) -> Result<TmdbCache> {
let path = tmdb_cache_path(file_uuid); let path = tmdb_cache_path(file_uuid);
if !path.exists() { if !path.exists() {
anyhow::bail!("TMDb cache not found: {} (expected: {})", file_uuid, path.display()); anyhow::bail!(
"TMDb cache not found: {} (expected: {})",
file_uuid,
path.display()
);
} }
let content = std::fs::read_to_string(&path) let content = std::fs::read_to_string(&path)
.with_context(|| format!("Failed to read TMDb cache: {}", path.display()))?; .with_context(|| format!("Failed to read TMDb cache: {}", path.display()))?;
@@ -96,9 +100,7 @@ pub fn count_cache_files() -> usize {
match std::fs::read_dir(&dir) { match std::fs::read_dir(&dir) {
Ok(entries) => entries Ok(entries) => entries
.filter_map(|e| e.ok()) .filter_map(|e| e.ok())
.filter(|e| { .filter(|e| e.file_name().to_string_lossy().ends_with(".tmdb.json"))
e.file_name().to_string_lossy().ends_with(".tmdb.json")
})
.count(), .count(),
Err(_) => 0, Err(_) => 0,
} }

View File

@@ -46,11 +46,12 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
// Step 2: Load face_detections grouped by trace_id // Step 2: Load face_detections grouped by trace_id
let fd_table = schema::table_name("face_detections"); let fd_table = schema::table_name("face_detections");
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>( let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(&format!(
&format!("SELECT trace_id, embedding FROM {} \ "SELECT trace_id, embedding FROM {} \
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \ WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
ORDER BY trace_id", fd_table), ORDER BY trace_id",
) fd_table
))
.bind(file_uuid) .bind(file_uuid)
.fetch_all(pool) .fetch_all(pool)
.await?; .await?;
@@ -156,9 +157,10 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
let fd_table = schema::table_name("face_detections"); let fd_table = schema::table_name("face_detections");
let mut after_qc = HashMap::new(); let mut after_qc = HashMap::new();
for (&tid, &(id, ref name)) in &matched { for (&tid, &(id, ref name)) in &matched {
let cnt: i64 = sqlx::query_scalar( let cnt: i64 = sqlx::query_scalar(&format!(
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2", fd_table), "SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2",
) fd_table
))
.bind(file_uuid) .bind(file_uuid)
.bind(tid) .bind(tid)
.fetch_one(pool) .fetch_one(pool)
@@ -194,9 +196,10 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
// Step 5: Update DB // Step 5: Update DB
let mut updated = 0usize; let mut updated = 0usize;
for (&tid, &(id, _)) in &matched { for (&tid, &(id, _)) in &matched {
let r = sqlx::query( let r = sqlx::query(&format!(
&format!("UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", fd_table), "UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
) fd_table
))
.bind(id) .bind(id)
.bind(file_uuid) .bind(file_uuid)
.bind(tid) .bind(tid)
@@ -223,9 +226,8 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str) -> Result<usize> { async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str) -> Result<usize> {
let fd_table = schema::table_name("face_detections"); let fd_table = schema::table_name("face_detections");
// Find all collision pairs: same identity, same frame, different trace // Find all collision pairs: same identity, same frame, different trace
let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>( let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>(&format!(
&format!( "SELECT a.identity_id, a.trace_id, b.trace_id, a.frame_number \
"SELECT a.identity_id, a.trace_id, b.trace_id, a.frame_number \
FROM {} a \ FROM {} a \
JOIN {} b \ JOIN {} b \
ON a.file_uuid = b.file_uuid \ ON a.file_uuid = b.file_uuid \
@@ -235,9 +237,8 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
AND a.identity_id IS NOT NULL \ AND a.identity_id IS NOT NULL \
AND a.identity_id = b.identity_id \ AND a.identity_id = b.identity_id \
ORDER BY a.identity_id, a.frame_number", ORDER BY a.identity_id, a.frame_number",
fd_table, fd_table fd_table, fd_table
), ))
)
.bind(file_uuid) .bind(file_uuid)
.fetch_all(pool) .fetch_all(pool)
.await?; .await?;
@@ -256,25 +257,36 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
let mut unbound = 0usize; let mut unbound = 0usize;
for ((id, ta, tb), overlap_frames) in &collision_groups { for ((id, ta, tb), overlap_frames) in &collision_groups {
// Get face detection count for each trace // Get face detection count for each trace
let cnt_a: i64 = sqlx::query_scalar( let cnt_a: i64 = sqlx::query_scalar(&format!(
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", fd_table) "SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3",
) fd_table
.bind(file_uuid).bind(ta).bind(id) ))
.fetch_one(pool).await.unwrap_or(0); .bind(file_uuid)
.bind(ta)
.bind(id)
.fetch_one(pool)
.await
.unwrap_or(0);
let cnt_b: i64 = sqlx::query_scalar( let cnt_b: i64 = sqlx::query_scalar(&format!(
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", fd_table) "SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3",
) fd_table
.bind(file_uuid).bind(tb).bind(id) ))
.fetch_one(pool).await.unwrap_or(0); .bind(file_uuid)
.bind(tb)
.bind(id)
.fetch_one(pool)
.await
.unwrap_or(0);
// Unbind the trace with fewer detections (likely the false positive) // Unbind the trace with fewer detections (likely the false positive)
let victim = if cnt_a <= cnt_b { *ta } else { *tb }; let victim = if cnt_a <= cnt_b { *ta } else { *tb };
let victim_cnt = if cnt_a <= cnt_b { cnt_a } else { cnt_b }; let victim_cnt = if cnt_a <= cnt_b { cnt_a } else { cnt_b };
sqlx::query( sqlx::query(&format!(
&format!("UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2", fd_table), "UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2",
) fd_table
))
.bind(file_uuid) .bind(file_uuid)
.bind(victim) .bind(victim)
.execute(pool) .execute(pool)

View File

@@ -45,7 +45,14 @@ fn extract_movie_name(filename: &str) -> Option<String> {
.file_stem() .file_stem()
.and_then(|s| s.to_str())?; .and_then(|s| s.to_str())?;
let cleaned = name.replace(['.', '_'], " ").trim().to_string(); // Take only the part before year patterns or separators
let cleaned = name
.replace(['.', '_'], " ")
.split(|c: char| c == '(' || c == '[' || c == '│' || c == '|')
.next()
.unwrap_or(&name)
.trim()
.to_string();
if cleaned.is_empty() || cleaned.len() < 3 { if cleaned.is_empty() || cleaned.len() < 3 {
return None; return None;
@@ -53,10 +60,7 @@ fn extract_movie_name(filename: &str) -> Option<String> {
Some(cleaned) Some(cleaned)
} }
pub async fn probe_from_cache( pub async fn probe_from_cache(db: &PostgresDb, file_uuid: &str) -> Result<TmdbProbeResult> {
db: &PostgresDb,
file_uuid: &str,
) -> Result<TmdbProbeResult> {
let cache = crate::core::tmdb::cache::read_tmdb_cache(file_uuid)?; let cache = crate::core::tmdb::cache::read_tmdb_cache(file_uuid)?;
if cache.identities.is_empty() && !cache.cast.is_empty() { if cache.identities.is_empty() && !cache.cast.is_empty() {
return create_identities_from_data(db, file_uuid, &cache.movie, &cache.cast).await; return create_identities_from_data(db, file_uuid, &cache.movie, &cache.cast).await;
@@ -83,7 +87,8 @@ async fn upsert_identities_from_disk(
} }
match std::fs::read_to_string(&path) { match std::fs::read_to_string(&path) {
Ok(content) => { Ok(content) => {
match serde_json::from_str::<crate::core::identity::storage::IdentityFile>(&content) { match serde_json::from_str::<crate::core::identity::storage::IdentityFile>(&content)
{
Ok(identity_file) => { Ok(identity_file) => {
let identities_table = crate::core::db::schema::table_name("identities"); let identities_table = crate::core::db::schema::table_name("identities");
let result = sqlx::query(&format!( let result = sqlx::query(&format!(
@@ -106,21 +111,35 @@ async fn upsert_identities_from_disk(
match result { match result {
Ok(_) => { Ok(_) => {
info!("[TMDB] Upserted identity: {} (uuid={})", identity_file.name, identity_file.identity_uuid); info!(
"[TMDB] Upserted identity: {} (uuid={})",
identity_file.name, identity_file.identity_uuid
);
identities_created += 1; identities_created += 1;
} }
Err(e) => { Err(e) => {
warn!("[TMDB] Failed to upsert identity '{}': {}", identity_file.name, e); warn!(
"[TMDB] Failed to upsert identity '{}': {}",
identity_file.name, e
);
} }
} }
} }
Err(e) => { Err(e) => {
warn!("[TMDB] Failed to parse identity file {}: {}", path.display(), e); warn!(
"[TMDB] Failed to parse identity file {}: {}",
path.display(),
e
);
} }
} }
} }
Err(e) => { Err(e) => {
warn!("[TMDB] Failed to read identity file {}: {}", path.display(), e); warn!(
"[TMDB] Failed to read identity file {}: {}",
path.display(),
e
);
} }
} }
} }
@@ -181,7 +200,9 @@ pub async fn create_identities_from_data(
continue; continue;
} }
let profile_url = member.profile_path.as_ref() let profile_url = member
.profile_path
.as_ref()
.map(|p| format!("https://image.tmdb.org/t/p/w185{}", p)); .map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));
let metadata = serde_json::json!({ let metadata = serde_json::json!({
@@ -226,8 +247,13 @@ pub async fn create_identities_from_data(
member.name, member.character, uuid_str member.name, member.character, uuid_str
); );
identities_created += 1; identities_created += 1;
if let Err(e) = crate::core::identity::storage::save_identity_file(db, &uuid_str).await { if let Err(e) =
warn!("[TMDB] Failed to save identity file for {}: {}", member.name, e); crate::core::identity::storage::save_identity_file(db, &uuid_str).await
{
warn!(
"[TMDB] Failed to save identity file for {}: {}",
member.name, e
);
} }
// Download and save TMDb profile image locally // Download and save TMDb profile image locally
if let Some(url) = &profile_url { if let Some(url) = &profile_url {
@@ -393,8 +419,10 @@ pub async fn probe_movie(
overview: movie.overview.clone(), overview: movie.overview.clone(),
poster_path: movie.poster_path.clone(), poster_path: movie.poster_path.clone(),
}; };
let cache_cast: Vec<cache::TmdbCastMember> = credits.cast.iter().map(|m| { let cache_cast: Vec<cache::TmdbCastMember> = credits
cache::TmdbCastMember { .cast
.iter()
.map(|m| cache::TmdbCastMember {
id: m.id, id: m.id,
name: m.name.clone(), name: m.name.clone(),
character: m.character.clone(), character: m.character.clone(),
@@ -410,8 +438,8 @@ pub async fn probe_movie(
deathday: None, deathday: None,
gender: None, gender: None,
homepage: None, homepage: None,
} })
}).collect(); .collect();
// Write TMDb cache so probe_from_cache can be used next time // Write TMDb cache so probe_from_cache can be used next time
let cache_obj = cache::TmdbCache { let cache_obj = cache::TmdbCache {

View File

@@ -60,7 +60,11 @@ pub async fn check_tmdb_api() -> TmdbResourceStatus {
enabled: *config::tmdb::PROBE_ENABLED, enabled: *config::tmdb::PROBE_ENABLED,
api_reachable: Some(reachable), api_reachable: Some(reachable),
api_latency_ms: Some(latency), api_latency_ms: Some(latency),
api_error: if reachable { None } else { Some(format!("HTTP {}", resp.status())) }, api_error: if reachable {
None
} else {
Some(format!("HTTP {}", resp.status()))
},
last_check_at: Some(chrono::Utc::now().to_rfc3339()), last_check_at: Some(chrono::Utc::now().to_rfc3339()),
} }
} }
@@ -84,9 +88,10 @@ pub fn count_cache_files() -> usize {
pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result<i64> { pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result<i64> {
let identities_table = crate::core::db::schema::table_name("identities"); let identities_table = crate::core::db::schema::table_name("identities");
let count: i64 = sqlx::query_scalar( let count: i64 = sqlx::query_scalar(&format!(
&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", identities_table) "SELECT COUNT(*) FROM {} WHERE source = 'tmdb'",
) identities_table
))
.fetch_one(pool) .fetch_one(pool)
.await?; .await?;
Ok(count) Ok(count)
@@ -94,9 +99,10 @@ pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result<i64> {
pub async fn count_tmdb_identities_with_embedding(pool: &sqlx::PgPool) -> Result<i64> { pub async fn count_tmdb_identities_with_embedding(pool: &sqlx::PgPool) -> Result<i64> {
let identities_table = crate::core::db::schema::table_name("identities"); let identities_table = crate::core::db::schema::table_name("identities");
let count: i64 = sqlx::query_scalar( let count: i64 = sqlx::query_scalar(&format!(
&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL", identities_table) "SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL",
) identities_table
))
.fetch_one(pool) .fetch_one(pool)
.await?; .await?;
Ok(count) Ok(count)

View File

@@ -147,7 +147,7 @@ impl ChunkSelector {
// Try to match UUID - either exact match or partial match // Try to match UUID - either exact match or partial match
let _uuid = payload let _uuid = payload
.and_then(|p| p.get("uuid")) .and_then(|p| p.get("file_uuid"))
.and_then(|v| v.as_str()) .and_then(|v| v.as_str())
.unwrap_or(""); .unwrap_or("");

View File

@@ -8,10 +8,10 @@ use tracing::{info, warn};
use momentry_core::core::api_key::{ApiKeyService, ApiKeyType}; use momentry_core::core::api_key::{ApiKeyService, ApiKeyType};
use momentry_core::core::chunk::types::{Chunk, ChunkRule, ChunkType}; use momentry_core::core::chunk::types::{Chunk, ChunkRule, ChunkType};
use momentry_core::core::db::schema;
use momentry_core::core::db::Database; use momentry_core::core::db::Database;
use momentry_core::core::time::FrameTime; use momentry_core::core::time::FrameTime;
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi}; use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
use momentry_core::core::db::schema;
use momentry_core::{ use momentry_core::{
Embedder, OutputDir, PostgresDb, QdrantDb, RedisClient, VectorPayload, VideoRecord, VideoStatus, Embedder, OutputDir, PostgresDb, QdrantDb, RedisClient, VectorPayload, VideoRecord, VideoStatus,
}; };
@@ -1985,7 +1985,8 @@ async fn main() -> Result<()> {
chunk_id: None, chunk_id: None,
created_at: String::new(), created_at: String::new(),
}; };
db.store_pre_chunk(&uuid, "asr", serde_json::to_value(&pre_chunk)?).await?; db.store_pre_chunk(&uuid, "asr", serde_json::to_value(&pre_chunk)?)
.await?;
asr_pre_chunk_ids.push(i as i64); asr_pre_chunk_ids.push(i as i64);
} }
@@ -2009,7 +2010,8 @@ async fn main() -> Result<()> {
chunk_id: None, chunk_id: None,
created_at: String::new(), created_at: String::new(),
}; };
db.store_pre_chunk(&uuid, "cut", serde_json::to_value(&pre_chunk)?).await?; db.store_pre_chunk(&uuid, "cut", serde_json::to_value(&pre_chunk)?)
.await?;
cut_pre_chunk_ids.push(i as i64); cut_pre_chunk_ids.push(i as i64);
} }
@@ -2037,7 +2039,8 @@ async fn main() -> Result<()> {
chunk_id: None, chunk_id: None,
created_at: String::new(), created_at: String::new(),
}; };
db.store_pre_chunk(&uuid, "time", serde_json::to_value(&pre_chunk)?).await?; db.store_pre_chunk(&uuid, "time", serde_json::to_value(&pre_chunk)?)
.await?;
time_pre_chunk_ids.push(time_pre_chunk_ids.len() as i64); time_pre_chunk_ids.push(time_pre_chunk_ids.len() as i64);
time_start = time_end; time_start = time_end;
} }
@@ -2117,7 +2120,8 @@ async fn main() -> Result<()> {
frame_path: None, frame_path: None,
created_at: String::new(), created_at: String::new(),
}; };
db.store_frame(&uuid, *frame_num as i64, serde_json::to_value(&frame)?).await?; db.store_frame(&uuid, *frame_num as i64, serde_json::to_value(&frame)?)
.await?;
} }
println!("Stored {} frames", all_frames.len()); println!("Stored {} frames", all_frames.len());
@@ -2357,8 +2361,7 @@ async fn main() -> Result<()> {
for frame in &context_frames { for frame in &context_frames {
if let Some(objects) = frame["yolo_objects"].as_array() { if let Some(objects) = frame["yolo_objects"].as_array() {
for obj in objects { for obj in objects {
if let Some(class_name) = if let Some(class_name) = obj.get("class_name").and_then(|v| v.as_str())
obj.get("class_name").and_then(|v| v.as_str())
{ {
*all_objects.entry(class_name.to_string()).or_insert(0) += 1; *all_objects.entry(class_name.to_string()).or_insert(0) += 1;
} }
@@ -2494,9 +2497,11 @@ async fn main() -> Result<()> {
} }
let qdrant_payload = VectorPayload { let qdrant_payload = VectorPayload {
uuid: chunk.uuid.clone(), file_uuid: chunk.uuid.clone(),
chunk_id: chunk.chunk_id.clone(), chunk_id: chunk.chunk_id.clone(),
chunk_type: "sentence".to_string(), chunk_type: "sentence".to_string(),
start_frame: chunk.start_frame,
end_frame: chunk.end_frame,
start_time: chunk.start_time().seconds(), start_time: chunk.start_time().seconds(),
end_time: chunk.end_time().seconds(), end_time: chunk.end_time().seconds(),
text: Some(text.to_string()), text: Some(text.to_string()),

View File

@@ -79,12 +79,8 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
None => VerificationResult::ok(proc_name, file_uuid), None => VerificationResult::ok(proc_name, file_uuid),
} }
} }
ProcessorType::Yolo => { ProcessorType::Yolo => VerificationResult::ok(proc_name, file_uuid),
VerificationResult::ok(proc_name, file_uuid) ProcessorType::Face => VerificationResult::ok(proc_name, file_uuid),
}
ProcessorType::Face => {
VerificationResult::ok(proc_name, file_uuid)
}
ProcessorType::Ocr => { ProcessorType::Ocr => {
let frames = value.get("frames").and_then(|v| v.as_array()); let frames = value.get("frames").and_then(|v| v.as_array());
match frames { match frames {
@@ -114,7 +110,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
ProcessorType::FiveW1H => { ProcessorType::FiveW1H => {
let scenes = value.get("scenes").and_then(|v| v.as_array()); let scenes = value.get("scenes").and_then(|v| v.as_array());
match scenes { match scenes {
Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 scenes"), Some(s) if s.is_empty() => {
VerificationResult::fail(proc_name, file_uuid, "0 scenes")
}
Some(_) => VerificationResult::ok(proc_name, file_uuid), Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid), None => VerificationResult::ok(proc_name, file_uuid),
} }

View File

@@ -37,7 +37,8 @@ pub async fn run_watcher() -> Result<()> {
info!("Watch directories: {:?}", dirs); info!("Watch directories: {:?}", dirs);
tokio::spawn(async move { tokio::spawn(async move {
let mut interval = time::interval(std::time::Duration::from_millis(config.poll_interval_ms)); let mut interval =
time::interval(std::time::Duration::from_millis(config.poll_interval_ms));
let mut known = std::collections::HashSet::new(); let mut known = std::collections::HashSet::new();
loop { loop {
interval.tick().await; interval.tick().await;
@@ -109,15 +110,43 @@ async fn auto_register_file(file_path: &str) {
} }
}; };
let file_name = pre.get("file_name").and_then(|v| v.as_str()).unwrap_or("unknown").to_string(); let file_name = pre
.get("file_name")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
let probe = pre.get("probe_json").cloned().unwrap_or_default(); let probe = pre.get("probe_json").cloned().unwrap_or_default();
let file_type = pre.get("file_type").and_then(|v| v.as_str()).unwrap_or("unknown").to_string(); let file_type = pre
let canonical_path = pre.get("file_path").and_then(|v| v.as_str()).unwrap_or(file_path).to_string(); .get("file_type")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
let canonical_path = pre
.get("file_path")
.and_then(|v| v.as_str())
.unwrap_or(file_path)
.to_string();
let duration = probe.get("format").and_then(|f| f.get("duration")).and_then(|v| v.as_f64()).unwrap_or(0.0); let duration = probe
let width = probe.get("format").and_then(|f| f.get("width")).and_then(|v| v.as_u64()).unwrap_or(0) as u32; .get("format")
let height = probe.get("format").and_then(|f| f.get("height")).and_then(|v| v.as_u64()).unwrap_or(0) as u32; .and_then(|f| f.get("duration"))
let fps_val = probe.get("format").and_then(|f| f.get("fps")).and_then(|v| v.as_f64()).unwrap_or(0.0); .and_then(|v| v.as_f64())
.unwrap_or(0.0);
let width = probe
.get("format")
.and_then(|f| f.get("width"))
.and_then(|v| v.as_u64())
.unwrap_or(0) as u32;
let height = probe
.get("format")
.and_then(|f| f.get("height"))
.and_then(|v| v.as_u64())
.unwrap_or(0) as u32;
let fps_val = probe
.get("format")
.and_then(|f| f.get("fps"))
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
let record = VideoRecord { let record = VideoRecord {
id: 0, id: 0,
@@ -158,7 +187,10 @@ async fn auto_register_file(file_path: &str) {
match db.register_video(&record).await { match db.register_video(&record).await {
Ok(id) => info!("[WATCHER] Auto-registered {} (id={})", record.file_uuid, id), Ok(id) => info!("[WATCHER] Auto-registered {} (id={})", record.file_uuid, id),
Err(e) => warn!("[WATCHER] Auto-register failed for {}: {}", record.file_uuid, e), Err(e) => warn!(
"[WATCHER] Auto-register failed for {}: {}",
record.file_uuid, e
),
} }
} }
@@ -175,10 +207,14 @@ pub async fn pre_process_file(file_path: &str) -> Option<String> {
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR") let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry/output_dev".to_string()); .unwrap_or_else(|_| "/Users/accusys/momentry/output_dev".to_string());
let birthday = std::fs::metadata(&path).ok() let birthday = std::fs::metadata(&path)
.ok()
.and_then(|m| m.modified().ok()) .and_then(|m| m.modified().ok())
.map(|t| { .map(|t| {
let secs = t.duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_secs(); let secs = t
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
chrono::DateTime::from_timestamp(secs as i64, 0) chrono::DateTime::from_timestamp(secs as i64, 0)
.map(|dt| dt.to_rfc3339()) .map(|dt| dt.to_rfc3339())
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339()) .unwrap_or_else(|| chrono::Utc::now().to_rfc3339())
@@ -186,9 +222,8 @@ pub async fn pre_process_file(file_path: &str) -> Option<String> {
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339()); .unwrap_or_else(|| chrono::Utc::now().to_rfc3339());
let mac = crate::core::storage::uuid::get_mac_address(); let mac = crate::core::storage::uuid::get_mac_address();
let file_uuid = crate::core::storage::uuid::compute_birth_uuid( let file_uuid =
&mac, &birthday, &canonical_str, &filename, crate::core::storage::uuid::compute_birth_uuid(&mac, &birthday, &canonical_str, &filename);
);
let pre_path = std::path::PathBuf::from(&output_dir).join(format!("{}.pre.json", file_uuid)); let pre_path = std::path::PathBuf::from(&output_dir).join(format!("{}.pre.json", file_uuid));
if pre_path.exists() { if pre_path.exists() {
@@ -198,15 +233,22 @@ pub async fn pre_process_file(file_path: &str) -> Option<String> {
info!("[PRE-PROCESS] Pre-processing: {} → {}", filename, file_uuid); info!("[PRE-PROCESS] Pre-processing: {} → {}", filename, file_uuid);
let content_hash = crate::core::storage::content_hash::compute_sha256(&path).unwrap_or_default(); let content_hash =
crate::core::storage::content_hash::compute_sha256(&path).unwrap_or_default();
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR") let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string()); .unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string());
let python_path = std::env::var("MOMENTRY_PYTHON_PATH") let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string()); .unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
let probe_json = crate::core::probe::unified::unified_probe(&path, &scripts_dir, &python_path).await; let probe_json =
crate::core::probe::unified::unified_probe(&path, &scripts_dir, &python_path).await;
let file_type = probe_json.get("format").and_then(|f| f.get("file_type")).and_then(|v| v.as_str()).unwrap_or("unknown").to_string(); let file_type = probe_json
.get("format")
.and_then(|f| f.get("file_type"))
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
let pre_data = serde_json::json!({ let pre_data = serde_json::json!({
"file_name": filename, "file_name": filename,

View File

@@ -12,12 +12,13 @@ use crate::core::chunk::{rule1_ingest, rule3_ingest};
use crate::core::config::OUTPUT_DIR; use crate::core::config::OUTPUT_DIR;
use crate::core::db::qdrant_db::QdrantDb; use crate::core::db::qdrant_db::QdrantDb;
use crate::core::db::{ use crate::core::db::{
schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus, schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload,
VideoStatus,
}; };
use crate::core::embedding::Embedder; use crate::core::embedding::Embedder;
use crate::core::processor::heuristic_scene::generate_scene_meta;
use crate::worker::config::WorkerConfig; use crate::worker::config::WorkerConfig;
use crate::worker::processor::{ProcessorPool, ProcessorTask}; use crate::worker::processor::{ProcessorPool, ProcessorTask};
use crate::core::processor::heuristic_scene::generate_scene_meta;
use crate::worker::resources::SystemResources; use crate::worker::resources::SystemResources;
use sqlx::PgPool; use sqlx::PgPool;
@@ -70,14 +71,15 @@ impl JobWorker {
// Reset stale running jobs: jobs stuck in 'running' with no active processor results // Reset stale running jobs: jobs stuck in 'running' with no active processor results
let monitor_jobs_table = schema::table_name("monitor_jobs"); let monitor_jobs_table = schema::table_name("monitor_jobs");
let processor_results_table = schema::table_name("processor_results"); let processor_results_table = schema::table_name("processor_results");
if let Err(e) = sqlx::query( if let Err(e) = sqlx::query(&format!(
&format!("UPDATE {} SET status = 'pending', updated_at = NOW() "UPDATE {} SET status = 'pending', updated_at = NOW()
WHERE status = 'running' WHERE status = 'running'
AND id NOT IN ( AND id NOT IN (
SELECT DISTINCT job_id FROM {} SELECT DISTINCT job_id FROM {}
WHERE status IN ('pending', 'running') WHERE status IN ('pending', 'running')
)", monitor_jobs_table, processor_results_table), )",
) monitor_jobs_table, processor_results_table
))
.execute(self.db.pool()) .execute(self.db.pool())
.await .await
{ {
@@ -608,12 +610,23 @@ impl JobWorker {
} }
let fu = uuid; let fu = uuid;
let rule1 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1")); let rule1 = check!(&format!(
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"
));
let vector = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1")); let vector = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1"));
let rule3 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1")); let rule3 = check!(&format!(
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"
));
let trace = check!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = '{fu}' AND trace_id IS NOT NULL")); let trace = check!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = '{fu}' AND trace_id IS NOT NULL"));
let tkg = check!(&format!("SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1", schema::table_name("tkg_nodes"))); let tkg = check!(&format!(
let scene_meta = std::path::Path::new(&format!("{}/{fu}.scene_meta.json", crate::core::config::OUTPUT_DIR.as_str())).exists(); "SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1",
schema::table_name("tkg_nodes")
));
let scene_meta = std::path::Path::new(&format!(
"{}/{fu}.scene_meta.json",
crate::core::config::OUTPUT_DIR.as_str()
))
.exists();
let five_w1h = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' AND summary_text IS NOT NULL AND summary_text != '' LIMIT 1")); let five_w1h = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' AND summary_text IS NOT NULL AND summary_text != '' LIMIT 1"));
let all_ok = rule1 && vector && rule3 && trace && tkg && scene_meta && five_w1h; let all_ok = rule1 && vector && rule3 && trace && tkg && scene_meta && five_w1h;
@@ -847,26 +860,23 @@ impl JobWorker {
Err(e) => error!("❌ Trace chunk ingestion failed: {}", e), Err(e) => error!("❌ Trace chunk ingestion failed: {}", e),
} }
// Build Temporal Knowledge Graph (TKG) // Build Temporal Knowledge Graph (TKG) — native Rust
info!("📝 Building TKG graph..."); info!("📝 Building TKG graph (Rust)...");
let executor = match crate::core::processor::PythonExecutor::new() { let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
Ok(ex) => ex, .unwrap_or_else(|_| ".".to_string());
Err(e) => { match crate::core::processor::tkg::build_tkg(
error!("Failed to create PythonExecutor for TKG: {}", e); db_clone.as_ref(),
return; &uuid_clone,
} &output_dir,
}; )
match executor .await
.run(
"tkg_builder.py",
&["--file-uuid", &uuid_clone],
Some(&uuid_clone),
"TKG_BUILDER",
Some(std::time::Duration::from_secs(300)),
)
.await
{ {
Ok(()) => info!("✅ TKG built for {}", uuid_clone), Ok(r) => info!(
"✅ TKG built for {}: {} face, {} obj, {} spk, {} co, {} sf, {} ff edges",
uuid_clone,
r.face_trace_nodes, r.object_nodes, r.speaker_nodes,
r.co_occurrence_edges, r.speaker_face_edges, r.face_face_edges,
),
Err(e) => error!("❌ TKG build failed for {}: {}", uuid_clone, e), Err(e) => error!("❌ TKG build failed for {}: {}", uuid_clone, e),
} }
} }
@@ -898,7 +908,7 @@ impl JobWorker {
let ids = sqlx::query_scalar::<_, uuid::Uuid>( let ids = sqlx::query_scalar::<_, uuid::Uuid>(
"SELECT DISTINCT i.uuid FROM identities i \ "SELECT DISTINCT i.uuid FROM identities i \
JOIN face_detections fd ON fd.identity_id = i.id \ JOIN face_detections fd ON fd.identity_id = i.id \
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL" WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL",
) )
.bind(&uuid_clone) .bind(&uuid_clone)
.fetch_all(db_clone.pool()) .fetch_all(db_clone.pool())
@@ -907,12 +917,18 @@ impl JobWorker {
for id_uuid in &ids { for id_uuid in &ids {
let us = id_uuid.to_string().replace('-', ""); let us = id_uuid.to_string().replace('-', "");
if let Err(e) = crate::core::identity::storage::save_identity_file( if let Err(e) = crate::core::identity::storage::save_identity_file(
&db_clone, &us &db_clone, &us,
).await { )
.await
{
warn!("[P2.5] Failed to save identity file {}: {}", us, e); warn!("[P2.5] Failed to save identity file {}: {}", us, e);
} }
} }
info!("[P2.5] {} identity files saved for {}", ids.len(), uuid_clone); info!(
"[P2.5] {} identity files saved for {}",
ids.len(),
uuid_clone
);
} }
Err(e) => error!("❌ TMDb face matching failed for {}: {}", uuid_clone, e), Err(e) => error!("❌ TMDb face matching failed for {}: {}", uuid_clone, e),
} }
@@ -1088,8 +1104,8 @@ impl JobWorker {
let pool = db.pool(); let pool = db.pool();
let chunk_table = schema::table_name("chunk"); let chunk_table = schema::table_name("chunk");
let rows = sqlx::query_as::<_, (String, String, String, f64, f64, String)>( let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64, String)>(
&format!("SELECT chunk_id, chunk_type, text_content, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table), &format!("SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
) )
.bind(uuid) .bind(uuid)
.fetch_all(pool) .fetch_all(pool)
@@ -1107,7 +1123,17 @@ impl JobWorker {
); );
let mut stored = 0usize; let mut stored = 0usize;
for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows { for (
chunk_id,
_chunk_type,
text,
start_frame,
end_frame,
start_time,
end_time,
_content_str,
) in &rows
{
if text.is_empty() { if text.is_empty() {
continue; continue;
} }
@@ -1119,9 +1145,11 @@ impl JobWorker {
continue; continue;
} }
let payload = VectorPayload { let payload = VectorPayload {
uuid: uuid.to_string(), file_uuid: uuid.to_string(),
chunk_id: chunk_id.clone(), chunk_id: chunk_id.clone(),
chunk_type: "sentence".to_string(), chunk_type: "sentence".to_string(),
start_frame: *start_frame,
end_frame: *end_frame,
start_time: *start_time, start_time: *start_time,
end_time: *end_time, end_time: *end_time,
text: Some(text.clone()), text: Some(text.clone()),

View File

@@ -237,11 +237,19 @@ impl ProcessorPool {
let key = format!("{}job:{}:processor:{}", prefix, &job.uuid, &processor_name); let key = format!("{}job:{}:processor:{}", prefix, &job.uuid, &processor_name);
let now = chrono::Utc::now().to_rfc3339(); let now = chrono::Utc::now().to_rfc3339();
let _: Option<String> = redis::cmd("HSET") let _: Option<String> = redis::cmd("HSET")
.arg(&key).arg("started_at").arg(&now) .arg(&key)
.query_async(&mut conn).await.ok(); .arg("started_at")
.arg(&now)
.query_async(&mut conn)
.await
.ok();
let _: Option<String> = redis::cmd("HSET") let _: Option<String> = redis::cmd("HSET")
.arg(&key).arg("embedding_started_at").arg(&now) .arg(&key)
.query_async(&mut conn).await.ok(); .arg("embedding_started_at")
.arg(&now)
.query_async(&mut conn)
.await
.ok();
} }
// Subscribe to Redis progress pub/sub and update processor hash in real-time // Subscribe to Redis progress pub/sub and update processor hash in real-time
@@ -254,10 +262,12 @@ impl ProcessorPool {
let cb_processor = sub_processor.clone(); let cb_processor = sub_processor.clone();
if let Err(e) = sub_redis if let Err(e) = sub_redis
.subscribe_and_callback(&sub_uuid, move |msg| { .subscribe_and_callback(&sub_uuid, move |msg| {
tracing::info!("[Subscriber] Got msg for={} cur={} tot={}", tracing::info!(
msg.processor, "[Subscriber] Got msg for={} cur={} tot={}",
msg.processor,
msg.data.current.unwrap_or(0), msg.data.current.unwrap_or(0),
msg.data.total.unwrap_or(0)); msg.data.total.unwrap_or(0)
);
if msg.processor == cb_processor { if msg.processor == cb_processor {
let cur = msg.data.current.unwrap_or(0); let cur = msg.data.current.unwrap_or(0);
let tot = msg.data.total.unwrap_or(0); let tot = msg.data.total.unwrap_or(0);
@@ -266,11 +276,18 @@ impl ProcessorPool {
let u = cb_uuid.clone(); let u = cb_uuid.clone();
let p = cb_processor.clone(); let p = cb_processor.clone();
tokio::spawn(async move { tokio::spawn(async move {
match r.update_worker_processor_status( match r
&u, &p, "running", None, .update_worker_processor_status(
cur, oc, tot, 0, 0, &u, &p, "running", None, cur, oc, tot, 0, 0,
).await { )
Ok(_) => tracing::info!("[Subscriber] Updated {}: cur={} tot={}", p, cur, tot), .await
{
Ok(_) => tracing::info!(
"[Subscriber] Updated {}: cur={} tot={}",
p,
cur,
tot
),
Err(e) => tracing::error!("[Subscriber] FAILED {}: {}", p, e), Err(e) => tracing::error!("[Subscriber] FAILED {}: {}", p, e),
} }
}); });
@@ -756,9 +773,11 @@ impl ProcessorPool {
.enumerate() .enumerate()
.map(|(i, segment)| { .map(|(i, segment)| {
// Prefer ASR output frames, fallback to time-based conversion // Prefer ASR output frames, fallback to time-based conversion
let start_frame = segment.start_frame let start_frame = segment
.start_frame
.unwrap_or_else(|| (segment.start_time * fps).round() as i64); .unwrap_or_else(|| (segment.start_time * fps).round() as i64);
let end_frame = segment.end_frame let end_frame = segment
.end_frame
.unwrap_or_else(|| (segment.end_time * fps).round() as i64); .unwrap_or_else(|| (segment.end_time * fps).round() as i64);
let data = serde_json::json!({ let data = serde_json::json!({
"text": segment.text, "text": segment.text,
@@ -892,7 +911,11 @@ impl ProcessorPool {
tracing::info!( tracing::info!(
"Storing {} Face pre-chunks + {} detections for video {}", "Storing {} Face pre-chunks + {} detections for video {}",
frames_count, frames_count,
face_result.frames.iter().map(|f| f.faces.len()).sum::<usize>(), face_result
.frames
.iter()
.map(|f| f.faces.len())
.sum::<usize>(),
uuid uuid
); );
@@ -911,7 +934,10 @@ impl ProcessorPool {
detections_to_store.push(( detections_to_store.push((
frame.frame as i64, frame.frame as i64,
frame.timestamp, frame.timestamp,
face.x, face.y, face.width, face.height, face.x,
face.y,
face.width,
face.height,
face.confidence, face.confidence,
)); ));
} }
@@ -1170,9 +1196,10 @@ impl ProcessorPool {
"top_5": scene.top_5, "top_5": scene.top_5,
}); });
let chunk_table = crate::core::db::schema::table_name("chunk"); let chunk_table = crate::core::db::schema::table_name("chunk");
let _ = sqlx::query( let _ = sqlx::query(&format!(
&format!("UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3", chunk_table) "UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3",
) chunk_table
))
.bind(&meta) .bind(&meta)
.bind(uuid) .bind(uuid)
.bind(&chk_id) .bind(&chk_id)