diff --git a/.env.development b/.env.development index df75941..4004f0c 100644 --- a/.env.development +++ b/.env.development @@ -29,7 +29,7 @@ REDIS_PASSWORD=accusys # Qdrant Vector Database - Collection isolation QDRANT_URL=http://localhost:6333 QDRANT_API_KEY=Test3200Test3200Test3200 -QDRANT_COLLECTION=momentry_dev_v1 +QDRANT_COLLECTION=momentry_dev_rule1_v2 # Paths MOMENTRY_OUTPUT_DIR=/Users/accusys/momentry/output_dev diff --git a/.env.example b/.env.example index 9c36d71..1cfbbdb 100644 --- a/.env.example +++ b/.env.example @@ -22,6 +22,9 @@ QDRANT_COLLECTION=momentry_rule1 # === API Keys === MOMENTRY_API_KEY=muser_your_key_here MOMENTRY_DEMO_API_KEY=muser_your_demo_key_here +JWT_SECRET=your_jwt_secret_here_change_in_production +SFTPGO_BASE_URL=http://127.0.0.1:8080 + TMDB_API_KEY=your_tmdb_api_key_here # === LLM === diff --git a/Cargo.lock b/Cargo.lock index c2d24e6..a9cb697 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -178,6 +178,18 @@ dependencies = [ "password-hash", ] +[[package]] +name = "async-compression" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "async-lock" version = "3.4.2" @@ -615,6 +627,23 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "compression-codecs" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf" +dependencies = [ + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789" + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -4861,13 +4890,18 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ + "async-compression", "bitflags 2.11.1", "bytes", + "futures-core", "futures-util", "http", "http-body", + "http-body-util", "iri-string", "pin-project-lite", + "tokio", + "tokio-util", "tower 0.5.3", "tower-layer", "tower-service", diff --git a/Cargo.toml b/Cargo.toml index 9869f26..c5fb540 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,7 +55,7 @@ sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "js mongodb = { version = "2", features = ["tokio-runtime"] } bson = { version = "2", features = ["chrono-0_4"] } qdrant-client = "1.7" -reqwest = { version = "0.12", features = ["json"] } +reqwest = { version = "0.12", features = ["json", "gzip"] } pgvector = { version = "0.3", features = ["sqlx"] } # HTTP Server diff --git a/build.rs b/build.rs index 8ce82bd..673b6b2 100644 --- a/build.rs +++ b/build.rs @@ -60,7 +60,8 @@ fn sha256_hex(data: &[u8]) -> String { use std::io::Write; use std::process::{Command, Stdio}; if let Ok(mut child) = Command::new("shasum") - .arg("-a").arg("256") + .arg("-a") + .arg("256") .stdin(Stdio::piped()) .stdout(Stdio::piped()) .spawn() diff --git a/scripts/__pycache__/redis_publisher.cpython-311.pyc b/scripts/__pycache__/redis_publisher.cpython-311.pyc index 37e4885..5217280 100644 Binary files a/scripts/__pycache__/redis_publisher.cpython-311.pyc and b/scripts/__pycache__/redis_publisher.cpython-311.pyc differ diff --git a/scripts/checksums.sha256 b/scripts/checksums.sha256 index b65bc53..e0068a5 100644 --- a/scripts/checksums.sha256 +++ b/scripts/checksums.sha256 @@ -103,7 +103,7 @@ f4d1b4334a49357b74b80e390ad5a3d16263e51cbe5cab661af92bd2e9721f02 ./face_process 802015c73dfce0866f2a0bc94c645aa35ba30a6de78244af23090bb1f1828c6e ./face_processor_mps.py 96ffdbde3f4d87e9942f9e1f4c93cbd999dc404b43e00d4cdcbb22de3c0f16b7 ./face_processor_optimized.py 17e7d0bd142bddfead94b1dd959c1f41c0dad7063ffc677dff1a99d62aab6cf8 ./face_processor_v1.py -15877adf5c160d861da688a25b93fd2edc189f326f9646ffb4de063e554f773a ./face_processor.py +d6ddad29a5e53b43b887554072d7965f0535e47fb62dad1a8b87e44fa1be6015 ./face_processor.py 8edab61189ad1a8fa60c203077e814e82d46c5bae67054fa2ab1958e199c05f9 ./face_recognition_processor.py 9ea19f357b3fcec6c8b3875c538e53cb46e407ab188cd544963e0123e535fa03 ./face_registration.py 72648816de611fd9b84d2b98c177b8b4f24374024b69184e8151c06cf44d633b ./face_statistics_report.py @@ -174,15 +174,15 @@ fd39b779a0337f521940f3f7b159931f1f207f200eefd610183781fdcf3dfafd ./object_searc 42d2952fc78b57302b0d12bc3d45790a2c2c46d4ffa3c713a82686134bd63f13 ./ocr_benchmark_runner.py 7b3ccb5c4ddd4c62c5ad04d0e3aafaecc2c1441012b6a98613cdcf055e2e50e8 ./ocr_processor_contract_v1.py 271023eec42d6be4a1ce6ae2ce3f29e825210a57e6bb37554a6f7fdf54616f9a ./ocr_processor_mps.py -e666bc8488bb93cc45bcd6a70a4ef38a74af6631d7b87a789381bfbdab4569f5 ./ocr_processor.py +2e73c41285e52ef013594fcd4d20df9f5781bfc26bcf62e54dd2c04ec44200c3 ./ocr_processor.py 62196108cb3337b5f9a873d70d2981ac8f49152369afbcc8a12b3a13de579e80 ./opencv_stamp_search.py b2e8d552c272fd173c77693e9453a85fe16dfc12f7c2cd304d299c6188c14077 ./paligemma_vs_gdino.py -2c6767e763cf69917af832b8383528f754c65db5a3f02cb4d63e3f896d5920b6 ./parent_chunk_5w1h.py +1534d5b7617dbae77f7a37a2c33a89b90f965247a6828f00b73ea6b720f6f4fc ./parent_chunk_5w1h.py 5208c738d4b615282813d351daf09872ce516121bb604caa64968ef5e52c53d3 ./pipeline_checklist.py 8f80c3a2be5c330e2d1853d9250a171c75db84598dbf3304280c42237ed4fb1f ./pipeline_status.py 94db44c0f49115a677d117d4901a1b7991c1517905300eaa495dd62b8ac1c79c ./pose_processor_contract_v1.py 167dee5e42c6bd46674bcffcfd92f368fc0b48a1f42c459c806853b281bc6482 ./pose_processor_mps.py -a1cdb1efd992d229829ae156d8aa439347c51d664e2a606c14d2274a11c93a66 ./pose_processor.py +a6ef3a785ef5c6dc47fa38dbed80d76bc7d4bf48cbaf0f7edb3d26df98d7262c ./pose_processor.py 45e6798dc5900f2f7c8776a2d260c122aae5068a075256b8a5c02e8d0be6c131 ./probe_file.py 139a68b5915680ec697d4bb5420adbd20b89637de2c16a15d68aca4fc22da02b ./qa/executor.py 4a59b36c29e1ee6e2b169db3b0201d2f7088c6ccbfdf642a3b522aeb182bbeea ./qa/judges/facenet.py @@ -197,7 +197,7 @@ c4e4424aad1847d822e9cf7dc98a1b2e903735a61e8ec056c6a9be75f79486bd ./qa/pipeline. 01c7b3c30c1531224f9605f0ee633285fe8489ab2d0a3c9c6a41f2b2b60d6626 ./quick_stamp_search.py e3143673a2bff6139e05c82446fd8770c4b7e59a854a42c3b29662f5ac75efe2 ./rebuild_parents.py 4aa98981632d4f8a11039c510e86aa296ae1cd4b399fc871ed664ac11e445bd9 ./rebuild_story_content.py -45c437b412d34c7c6d5758e94b7205a2956b32b6fe170c3f56db7231ec6f5a15 ./redis_publisher.py +205cfc47b603b5ab94d97dae8c25486b342b7c2858afe6d6dae27615ca0b2aeb ./redis_publisher.py 750f778946b56bc57c47d9d2295332bb0f8cec2c1aa03c6b882d39ef4432673d ./refine_search.py 0f8a6a6866a5797e964d3b17e2b7ef146fe7a798f09fcea982fcda6f629b4d06 ./regenerate_parent_5w1h.py 3ee192b623f290136b36bd63abd018aad6e6639a9543970c3415734628b33bd6 ./register_sample_faces.py @@ -303,7 +303,7 @@ d0ec8f4a67c1a1eb1356ad6e9b2f466575691bd336621cdbbfd31dd10159f2dc ./utils/test_m ff98864f1b11795cc3bb64f30ccb6f8609771ddc7a5df2c003ba7c2233d16fc2 ./vectorize_chunks.py 5880c128400e6e36c8eb7dffd009dbbc99dd13f8575b0037bdc854e25ddc41fb ./video_comparison_statistics.py 0a1501ffdc027236cdf88706b3d61229e2998ab268fd57fb60e399ccb734b6a1 ./vision_agent.py -6831281de868d24ecd84151965909b57f895d534114d24300a81c396492c19f8 ./visual_chunk_processor.py +eac8f90fbbb655614abcefc4b887e346bf94db5f015d33d37bc9514fb030489d ./visual_chunk_processor.py c165dfc5fc981dc731b25ef414184ee58e56b73b148d41a32fdce985c701efd5 ./visualize_stamp.py 6c65a82fdd1d585e20bee4fcb2d1bdec2e6220bda71d6ef9cd00d6a3cf74c4d7 ./voice_embedding_extractor.py 2b3a7b357db4ddd07ca30bf200c6600724e33441d8def0a4d9a39673e2cfb1c0 ./weather_sound_detector.py @@ -343,3 +343,4 @@ b2ee4f8a445a7e83f7b99ae5d4139fd525d9e3e58a360bfef054d441aa21d901 ./swift_proces fbca5ba0783153c4e21c174b0cbf75b582514f6ef0f92750a82d3178bc170f48 ./test_search_modes.sh f8c1647cdb4db8adef1829e41fbecd97f6b3b2e62927f195cd8e68127876069d ./troubleshoot.sh 992296b5218f3ef97ce53325be12f71848f3c3aeb3ee81d764bfe4bd61e1de05 ./verify_package.sh +b6f95fa070cc0258bc5d005f10d13025ba8b08d3ee1598bcdad405ff1d3332ed ./tmdb_agent.py diff --git a/scripts/extract_face_embedding.py b/scripts/extract_face_embedding.py new file mode 100644 index 0000000..e9ecb3b --- /dev/null +++ b/scripts/extract_face_embedding.py @@ -0,0 +1,84 @@ +#!/opt/homebrew/bin/python3.11 +""" +Extract face embedding from an image using InsightFace + CoreML FaceNet. + +Usage: + python3 scripts/extract_face_embedding.py + +Output: JSON with "embedding" key (512 floats) or "error" key. +Exit code: 0 on success, 1 on failure. +""" +import json +import os +import sys + +# Prefer venv if it exists (has insightface + coremltools installed) +VENV_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "venv") +VENV_SITE = os.path.join(VENV_PATH, "lib", "python3.11", "site-packages") +if os.path.isdir(VENV_SITE): + sys.path.insert(0, VENV_SITE) + +import cv2 +import numpy as np + +MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "models") +FACENET_PATH = os.path.join(MODELS_DIR, "facenet512.mlpackage") + + +def extract_embedding(image_path: str): + import io + import warnings + warnings.filterwarnings("ignore") + + # Suppress InsightFace verbose stdout during model loading + old_stdout = sys.stdout + sys.stdout = io.StringIO() + try: + import insightface + from insightface.app import FaceAnalysis + import coremltools as ct + + app = FaceAnalysis(name="buffalo_l", providers=["CPUExecutionProvider"]) + app.prepare(ctx_id=0, det_thresh=0.5) + coreml_model = ct.models.MLModel(FACENET_PATH) + finally: + sys.stdout = old_stdout + + img_bytes = open(image_path, "rb").read() + nparr = np.frombuffer(img_bytes, np.uint8) + img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) + if img is None: + print(json.dumps({"error": "Failed to decode image"})) + sys.exit(1) + + # Detect faces + faces = app.get(img) + if not faces: + print(json.dumps({"error": "No face detected"})) + sys.exit(1) + + largest = max(faces, key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1])) + x1, y1, x2, y2 = [int(v) for v in largest.bbox] + x1, y1 = max(0, x1), max(0, y1) + x2, y2 = min(img.shape[1], x2), min(img.shape[0], y2) + if x2 <= x1 or y2 <= y1: + print(json.dumps({"error": "Invalid face bbox"})) + sys.exit(1) + + face_img = img[y1:y2, x1:x2] + face_img = cv2.resize(face_img, (160, 160)) + normalized = (face_img.astype(np.float32) / 127.5) - 1.0 + normalized = np.transpose(normalized, (2, 0, 1)) + input_array = np.expand_dims(normalized, axis=0) + + result = coreml_model.predict({"input": input_array}) + emb_key = [k for k in result.keys() if k.startswith("var_")][0] + embedding = result[emb_key].flatten().tolist() + print(json.dumps({"embedding": embedding})) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print(json.dumps({"error": "Usage: extract_face_embedding.py "})) + sys.exit(1) + extract_embedding(sys.argv[1]) diff --git a/scripts/face_landmark_qc.py b/scripts/face_landmark_qc.py index ae98db3..c9bac9e 100644 --- a/scripts/face_landmark_qc.py +++ b/scripts/face_landmark_qc.py @@ -2,23 +2,30 @@ """ Face landmark QC: verify eyes/nose are within face bounding box. Flags faces in DB where landmarks don't match the bbox. -Usage: python3 face_landmark_qc.py [--threshold 0.5] [--fix] +Usage: python3 face_landmark_qc.py [--threshold 0.5] [--apply] """ -import sys, json, psycopg2, argparse +import sys, json, psycopg2, argparse, os parser = argparse.ArgumentParser() parser.add_argument("uuid") parser.add_argument("--threshold", "-t", type=float, default=0.5, help="Fraction of landmark points that must be inside bbox (default: 0.5)") -parser.add_argument("--fix", action="store_true", help="Update face_detections QC flag in DB") +parser.add_argument("--apply", action="store_true", + help="Write qc_ok to face_detections.metadata in DB") +parser.add_argument("--schema", default="dev", + help="DB schema (default: dev)") args = parser.parse_args() UUID = args.uuid THRESHOLD = args.threshold -FACE_PATH = f"/Users/accusys/momentry/output_dev/{UUID}.face.json" +SCHEMA = args.schema +OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", f"/Users/accusys/momentry/output_dev") +FACE_PATH = f"{OUTPUT_DIR}/{UUID}.face.json" print(f"=== Face Landmark QC ===") print(f"UUID: {UUID}") +print(f"Schema: {SCHEMA}") +print(f"Face file: {FACE_PATH}") print(f"Threshold: {THRESHOLD * 100:.0f}% points must be inside bbox") # Load face.json @@ -29,8 +36,7 @@ total_faces = 0 faces_with_lm = 0 good_faces = 0 bad_faces = 0 -bad_frame_ids = set() -bad_face_details = [] +qc_results = [] # list of (frame, face_idx, qc_ok, x, y, w, h) # Build frame lookup for fast access frame_map = {} @@ -42,13 +48,22 @@ for frame_num, frm in frame_map.items(): total_faces += 1 lm = face.get('landmarks') if not lm: + bbox = face.get('bbox', {}) + qc_results.append((frame_num, fi, False, bbox.get('x'), bbox.get('y'), + bbox.get('width'), bbox.get('height'))) + bad_faces += 1 continue faces_with_lm += 1 - x, y, w, h = face['x'], face['y'], face['width'], face['height'] + bbox = face.get('bbox', {}) + x, y, w, h = bbox.get('x'), bbox.get('y'), bbox.get('width'), bbox.get('height') + if None in (x, y, w, h): + qc_results.append((frame_num, fi, False, x, y, w, h)) + bad_faces += 1 + continue inside_pts = 0 total_pts = 0 - eye_nose_inside = 0 # at least one point from each eye+nose inside + eye_nose_inside = 0 for lm_type in ['left_eye', 'right_eye', 'nose']: points = lm.get(lm_type, []) @@ -63,53 +78,39 @@ for frame_num, frm in frame_map.items(): eye_nose_inside += 1 ratio = inside_pts / max(1, total_pts) + qc_ok = (ratio >= THRESHOLD and eye_nose_inside >= 2) - if ratio >= THRESHOLD and eye_nose_inside >= 2: + qc_results.append((frame_num, fi, qc_ok, x, y, w, h)) + if qc_ok: good_faces += 1 else: bad_faces += 1 - bad_frame_ids.add(frame_num) - bad_face_details.append({ - 'frame': frame_num, - 'face_idx': fi, - 'bbox': [x, y, w, h], - 'inside_pts': inside_pts, - 'total_pts': total_pts, - 'ratio': ratio, - 'eye_nose_ok': eye_nose_inside, - }) print(f"\nTotal faces: {total_faces:,}") print(f"Faces with landmarks: {faces_with_lm:,}") print(f"✅ Good (≥{THRESHOLD*100:.0f}% inside + ≥2 features): {good_faces:,}") -print(f"❌ Bad: {bad_faces:,}") +print(f"❌ Bad (no eyes or insufficient landmarks): {bad_faces:,}") print(f"Quality pass rate: {100 * good_faces / max(1, faces_with_lm):.1f}%") -print(f"\nBad faces in {len(bad_frame_ids)} unique frames") - -# Show sample bad faces -print(f"\nSample bad faces:") -for bf in sorted(bad_face_details, key=lambda b: b['ratio'])[:5]: - print(f" frame={bf['frame']}, bbox={bf['bbox']}, {bf['inside_pts']}/{bf['total_pts']} inside ({bf['ratio']*100:.0f}%), eye/nose={bf['eye_nose_ok']}/3") - -# Show sample good faces -print(f"\nSample good faces:") -good_details = [] -for frame_num, frm in frame_map.items(): - for face in frm.get('faces', []): - lm = face.get('landmarks') - if not lm: - continue - x, y, w, h = face['x'], face['y'], face['width'], face['height'] - inside = sum(1 for pts in lm.values() for pt in pts - if (x <= pt[0] <= x + w) and (y <= pt[1] <= y + h)) - total = sum(len(pts) for pts in lm.values()) - if inside / max(1, total) >= THRESHOLD: - good_details.append((frame_num, x, y, w, h, inside, total)) - if len(good_details) >= 5: - break - if len(good_details) >= 5: - break - -for g in good_details: - print(f" frame={g[0]}, bbox=[{g[1]},{g[2]},{g[3]},{g[4]}], {g[5]}/{g[6]} inside ({100*g[5]/max(1,g[6]):.0f}%)") +# Apply mode: write qc_ok to face_detections.metadata +if args.apply: + print(f"\n=== Applying QC results to {SCHEMA}.face_detections ===") + db_url = os.environ.get("DATABASE_URL", "postgres://accusys@localhost:5432/momentry") + conn = psycopg2.connect(db_url) + cur = conn.cursor() + updated = 0 + for frame_num, fi, qc_ok, x, y, w, h in qc_results: + qc_str = "true" if qc_ok else "false" + cur.execute( + f"UPDATE {SCHEMA}.face_detections " + f"SET metadata = jsonb_set(COALESCE(metadata, '{{}}'::jsonb), '{{qc_ok}}', '\"{qc_str}\"'::jsonb) " + f"WHERE file_uuid = %s AND frame_number = %s AND x = %s AND y = %s AND width = %s AND height = %s", + (UUID, frame_num, x, y, w, h) + ) + if cur.rowcount > 0: + updated += 1 + conn.commit() + cur.close() + conn.close() + print(f"Updated {updated} rows in {SCHEMA}.face_detections") + print(f"Skipped {len(qc_results) - updated} rows (no matching face_detections row)") diff --git a/scripts/face_processor.py b/scripts/face_processor.py index 00a0541..19e17d7 100644 --- a/scripts/face_processor.py +++ b/scripts/face_processor.py @@ -13,6 +13,7 @@ Detection cost: near-zero CPU (Vision ANE) Embedding cost: near-zero CPU (CoreML ANE) """ +import re import sys import os import json @@ -29,6 +30,7 @@ from pathlib import Path import coremltools as ct sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from redis_publisher import RedisPublisher SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) SWIFT_BIN = os.path.join(SCRIPT_DIR, "swift_processors", ".build", "debug", "swift_face") @@ -49,11 +51,12 @@ def classify_pose(roll: float, yaw: float) -> str: class FaceProcessorVision: def __init__(self, video_path: str, output_path: str, uuid: str = "", - sample_interval: int = 3): + sample_interval: int = 3, publisher: RedisPublisher = None): self.video_path = video_path self.output_path = output_path self.uuid = uuid self.sample_interval = sample_interval + self.publisher = publisher # Load CoreML FaceNet self.coreml_model = None @@ -127,7 +130,33 @@ class FaceProcessorVision: print(f"[FACE_V2] Running: {' '.join(cmd)}") t0 = time.time() - subprocess.run(cmd, check=True) + log_path = swift_out + ".log" + log_f = open(log_path, "w") + proc = subprocess.Popen(cmd, stdout=log_f, stderr=subprocess.STDOUT, text=True) + last_pct = -1 + while proc.poll() is None: + time.sleep(10) + # Read latest log lines + try: + with open(log_path) as lf: + for line in lf: + line = line.strip() + m = re.search(r'(\d+)% complete', line) + if m: + pct = int(m.group(1)) + if pct > last_pct: + last_pct = pct + if self.publisher: + self.publisher.progress("face", pct, 100, f"swift detect {pct}%") + except Exception: + pass + log_f.close() + if proc.returncode != 0: + stderr_out = proc.stderr.read() + if stderr_out: + print(stderr_out.strip(), file=sys.stderr) + raise RuntimeError(f"swift_face exited with code {proc.returncode}") + elapsed = time.time() - t0 print(f"[FACE_V2] Detection done in {elapsed:.1f}s") @@ -156,6 +185,8 @@ class FaceProcessorVision: t0 = time.time() embed_count = 0 + total_face_count = 0 + last_pct = -1 for frame_info in frames: frame_num = frame_info["frame"] @@ -220,6 +251,12 @@ class FaceProcessorVision: if len(face_data["frames"]) % 100 == 0: elapsed = time.time() - t0 print(f"[FACE_V2] {len(face_data['frames'])} frames, {embed_count} embeddings, {elapsed:.0f}s") + if self.publisher: + pct = int(len(face_data["frames"]) * 100 / max(len(frames), 1)) + if pct > last_pct: + last_pct = pct + self.publisher.progress("face", len(face_data["frames"]), len(frames), + f"{embed_count} faces", embed_count, "faces") self.video.release() @@ -259,19 +296,36 @@ def main(): parser.add_argument("--force", action="store_true") args = parser.parse_args() + publisher = RedisPublisher(args.uuid) if args.uuid else None + if publisher: + publisher.info("face", "FACE_START") + if args.force and os.path.exists(args.output_path): os.remove(args.output_path) processor = FaceProcessorVision( args.video_path, args.output_path, - args.uuid, args.sample_interval + args.uuid, args.sample_interval, publisher ) # Step 1: Vision detection (bbox + pose via ANE) - detection = processor.process_with_swift() + try: + detection = processor.process_with_swift() + except Exception as e: + if publisher: + publisher.error("face", f"Detection failed: {e}") + raise # Step 2: CoreML embedding + save - processor.embed_and_save(detection) + try: + processor.embed_and_save(detection) + except Exception as e: + if publisher: + publisher.error("face", f"Embedding failed: {e}") + raise + + if publisher: + publisher.complete("face", f"{len(detection.get('frames',[]))} frames") # Clean up temp detection file swift_out = args.output_path.replace(".json", "_detect.json") diff --git a/scripts/identity_bind.py b/scripts/identity_bind.py index 714a9c5..ce154f5 100644 --- a/scripts/identity_bind.py +++ b/scripts/identity_bind.py @@ -81,10 +81,10 @@ for cluster_id in sorted(set(labels)): VALUES (%s, 'face', 'auto', 'active', NOW(), %s) ON CONFLICT (name) DO UPDATE SET status = 'active', file_uuid = COALESCE(dev.identities.file_uuid, %s) RETURNING id - """, (f"PERSON_{UUID[:8]}_{cluster_id}", UUID, UUID)) + """, (f"stranger_{UUID}_{cluster_id}", UUID, UUID)) identity_id = cur.fetchone()[0] cluster_to_identity[cluster_id] = identity_id - print(f" Cluster {cluster_id}: new identity {identity_id} (PERSON_{cluster_id})") + print(f" Cluster {cluster_id}: new identity {identity_id} (stranger_{UUID}_{cluster_id})") # Step 4: Create identity bindings print("Creating identity bindings...") diff --git a/scripts/migrate_identity_files.py b/scripts/migrate_identity_files.py new file mode 100644 index 0000000..f072340 --- /dev/null +++ b/scripts/migrate_identity_files.py @@ -0,0 +1,131 @@ +#!/opt/homebrew/bin/python3.11 +""" +Migrate Identity Files — one-time: DB identities → filesystem identity.json + +Reads all identities from PostgreSQL, queries file bindings, +and writes identity.json + _index.json to {OUTPUT_DIR}/identities/{uuid}/ + +Usage: + python3 scripts/migrate_identity_files.py + python3 scripts/migrate_identity_files.py --db "dbname=momentry user=accusys" + python3 scripts/migrate_identity_files.py --output /path/to/output +""" +import argparse +import json +import os +from datetime import datetime, timezone +from pathlib import Path + +import psycopg2 +import psycopg2.extras + + +def main(): + parser = argparse.ArgumentParser(description="Migrate identities to filesystem") + parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost")) + parser.add_argument("--output", default=os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output")) + args = parser.parse_args() + + conn = psycopg2.connect(args.db) + identities_root = Path(args.output) / "identities" + identities_root.mkdir(parents=True, exist_ok=True) + + cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + cur.execute(""" + SELECT id, uuid::text, name, identity_type, source, status, + tmdb_id, tmdb_profile, metadata::text, created_at, updated_at + FROM identities + WHERE uuid IS NOT NULL + ORDER BY id + """) + rows = cur.fetchall() + + if not rows: + print("No identities found in DB.") + return + + index = {} + migrated = 0 + skipped = 0 + + for row in rows: + uuid_raw = row["uuid"] + uuid_clean = uuid_raw.replace("-", "") + name = row["name"] or "" + + dir_path = identities_root / uuid_clean + dir_path.mkdir(parents=True, exist_ok=True) + + # Get bindings for this identity from face_detections + bindings_cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + bindings_cur.execute(""" + SELECT fd.file_uuid, + COALESCE(array_agg(DISTINCT fd.trace_id) FILTER (WHERE fd.trace_id IS NOT NULL), '{}') AS trace_ids, + COUNT(*)::bigint AS face_count + FROM face_detections fd + WHERE fd.identity_id = %s + GROUP BY fd.file_uuid + ORDER BY fd.file_uuid + """, (row["id"],)) + binding_rows = bindings_cur.fetchall() + bindings_cur.close() + + file_bindings = [] + for b in binding_rows: + trace_ids = b["trace_ids"] + if isinstance(trace_ids, list): + trace_ids = [int(t) for t in trace_ids if t is not None] + file_bindings.append({ + "file_uuid": b["file_uuid"], + "trace_ids": trace_ids, + "face_count": int(b["face_count"]), + }) + + metadata = row.get("metadata") + if isinstance(metadata, str): + metadata = json.loads(metadata) if metadata else {} + elif metadata is None: + metadata = {} + + fmt_time = lambda v: v.isoformat() if v else datetime.now(timezone.utc).isoformat() + + identity_file = { + "version": 1, + "identity_uuid": uuid_clean, + "name": name, + "identity_type": row.get("identity_type"), + "source": row.get("source"), + "status": row.get("status"), + "tmdb_id": row.get("tmdb_id"), + "tmdb_profile": row.get("tmdb_profile"), + "metadata": metadata, + "file_bindings": file_bindings, + "created_at": fmt_time(row.get("created_at")), + "updated_at": fmt_time(row.get("updated_at")), + } + + with open(dir_path / "identity.json", "w", encoding="utf-8") as f: + json.dump(identity_file, f, indent=2, ensure_ascii=False) + + index[uuid_clean] = name + migrated += 1 + print(f" [{migrated:5d}] {name} ({uuid_clean})") + + cur.close() + conn.close() + + # Write _index.json + index_file = { + "version": 1, + "updated_at": datetime.now(timezone.utc).isoformat(), + "entries": index, + } + with open(identities_root / "_index.json", "w", encoding="utf-8") as f: + json.dump(index_file, f, indent=2, ensure_ascii=False) + + print(f"\nDone: {migrated} identities migrated") + print(f"Index: {identities_root / '_index.json'} ({len(index)} entries)") + + +if __name__ == "__main__": + main() diff --git a/scripts/ocr_processor.py b/scripts/ocr_processor.py index c1af99c..04a1091 100755 --- a/scripts/ocr_processor.py +++ b/scripts/ocr_processor.py @@ -4,6 +4,7 @@ OCR Processor Wrapper Calls Swift Vision Framework OCR (swift_ocr) with fallback to PaddleOCR. """ +import re import sys import json import os @@ -11,6 +12,10 @@ import subprocess import argparse +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from redis_publisher import RedisPublisher + + SWIFT_OCR_PATH = os.path.join( os.path.dirname(os.path.abspath(__file__)), "swift_processors/.build/debug/swift_ocr" @@ -19,6 +24,7 @@ SWIFT_OCR_ALT = os.path.join( os.path.dirname(os.path.abspath(__file__)), "swift_processors/.build/arm64-apple-macosx/debug/swift_ocr" ) +SWIFT_PROGRESS_RE = re.compile(r"\[SwiftOCR\] Progress:\s*(\d+)%") def process_ocr( @@ -27,6 +33,7 @@ def process_ocr( uuid: str = "", sample_interval: int = 30, recognition_level: str = "accurate", + publisher: RedisPublisher = None, ) -> dict: swift_bin = SWIFT_OCR_PATH if not os.path.exists(swift_bin): @@ -42,15 +49,34 @@ def process_ocr( "--uuid", uuid] print(f"[OCR] Running Swift OCR", file=sys.stderr) - result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200) + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) - if result.stdout: - print(result.stdout.strip(), file=sys.stderr) - if result.stderr: - print(result.stderr.strip(), file=sys.stderr) + last_pct = -1 + stdout_lines = [] + for line in proc.stdout: + line = line.strip() + stdout_lines.append(line) + m = SWIFT_PROGRESS_RE.search(line) + if m: + pct = int(m.group(1)) + if pct > last_pct: + last_pct = pct + print(f"[OCR] Progress: {pct}%", file=sys.stderr) + if publisher: + publisher.progress("ocr", pct, 100, f"{pct}%") + elif line: + print(line, file=sys.stderr) - if result.returncode != 0 or not os.path.exists(output_path): - print(f"[OCR] Swift OCR failed, falling back to PaddleOCR", file=sys.stderr) + stderr_output = proc.stderr.read() + if stderr_output: + print(stderr_output.strip(), file=sys.stderr) + + proc.wait() + + if proc.returncode != 0 or not os.path.exists(output_path): + print(f"[OCR] Swift OCR failed (exit={proc.returncode}), falling back to PaddleOCR", file=sys.stderr) + if publisher: + publisher.error("ocr", f"Swift OCR failed, using fallback") return _fallback(video_path, output_path, uuid, sample_interval) with open(output_path) as f: @@ -81,9 +107,16 @@ if __name__ == "__main__": parser.add_argument("--recognition-level", choices=["fast", "accurate"], default="accurate") args = parser.parse_args() + publisher = RedisPublisher(args.uuid) if args.uuid else None + if publisher: + publisher.info("ocr", "OCR_START") + result = process_ocr(args.video_path, args.output_path, args.uuid, - args.sample_interval, args.recognition_level) + args.sample_interval, args.recognition_level, + publisher) with open(args.output_path, "w") as f: json.dump(result, f, indent=2) print(f"OCR: {len(result.get('frames', []))} frames with text") + if publisher: + publisher.complete("ocr", f"{len(result.get('frames',[]))} frames") diff --git a/scripts/parent_chunk_5w1h.py b/scripts/parent_chunk_5w1h.py index 5d1fe7c..1afa8c9 100644 --- a/scripts/parent_chunk_5w1h.py +++ b/scripts/parent_chunk_5w1h.py @@ -28,7 +28,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) DB_URL = os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry") SCHEMA = os.getenv("DATABASE_SCHEMA", "dev") OUTPUT_DIR = os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev") -OLLAMA_URL = "http://localhost:11434/api" +EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://localhost:11436/v1/embeddings") def load_speaker_map(file_uuid: str) -> dict: """Load speaker→identity mapping from DB (generalized, not hardcoded)""" @@ -64,7 +64,7 @@ CURRENT_VERSIONS = { "embedding_agent": "nomic-embed-768d/v1", } -LLM_URL = os.getenv("MOMENTRY_LLM_SUMMARY_URL", "http://127.0.0.1:8081/v1/chat/completions") +LLM_URL = os.getenv("MOMENTRY_LLM_URL", os.getenv("MOMENTRY_LLM_SUMMARY_URL", "http://127.0.0.1:8082/v1/chat/completions")) LLM_MODEL = os.getenv("MOMENTRY_LLM_SUMMARY_MODEL", "gemma4") @@ -97,7 +97,7 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]: s, e = cs["start_time"], cs["end_time"] children = [] - for seg in asr_segs: + for seg_idx, seg in enumerate(asr_segs): st, en = seg.get("start", 0), seg.get("end", 0) text = seg.get("text", "").strip() if st < s or en > e or not text: continue @@ -117,11 +117,11 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]: "start": st, "end": en, "text": text, "speaker_id": spk_id, "speaker_name": character, "speaker_confidence": spk_conf, - "chunk_id": f"{file_uuid}_{st:.0f}_{en:.0f}", + "chunk_id": f"{file_uuid}_{seg_idx}", }) # Boundary overlap: even empty scenes get partial children - for seg in asr_segs: + for seg_idx, seg in enumerate(asr_segs): st, en = seg.get("start", 0), seg.get("end", 0) text = seg.get("text", "").strip() if not text: continue @@ -141,7 +141,7 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]: "start": st, "end": en, "text": text, "speaker_id": spk_id, "speaker_name": character, "speaker_confidence": spk_conf, - "chunk_id": f"{file_uuid}_{st:.0f}_{en:.0f}", + "chunk_id": f"{file_uuid}_{seg_idx}", "overlap_type": "partial", }) @@ -215,14 +215,17 @@ def generate_llm_child_summary(child: dict, parent_summary: str) -> Optional[str # ===== Embedding (Ollama nomic-embed) ===== def embed_text(text: str, max_retries: int = 3) -> Optional[List[float]]: - """Get embedding via Ollama nomic-embed-text""" + """Get embedding via EmbeddingGemma server""" for attempt in range(max_retries): try: - resp = requests.post(f"{OLLAMA_URL}/embeddings", json={ - "model": "nomic-embed-text-v2-moe", "prompt": text, + resp = requests.post(EMBEDDING_URL, json={ + "input": [text], }, timeout=30) if resp.status_code == 200: - return resp.json()["embedding"] + data = resp.json() + items = data.get("data", []) + if items: + return items[0]["embedding"] except Exception as e: if attempt == max_retries - 1: print(f" ⚠️ Embedding failed: {e}") @@ -244,7 +247,7 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool, # Get base chunk_index cur.execute( - f"SELECT COALESCE(MAX(chunk_index), 0) FROM {SCHEMA}.chunks WHERE file_uuid = %s", + f"SELECT COALESCE(MAX(chunk_index), 0) FROM {SCHEMA}.chunk WHERE file_uuid = %s", (file_uuid,), ) next_index = (cur.fetchone()[0] or 0) + 1 @@ -255,20 +258,38 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool, parent_id = f"{mode}_parent_{file_uuid}_{scene['start_time']:.0f}_{scene['end_time']:.0f}" - cur.execute( - f""" - INSERT INTO {SCHEMA}.chunks (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index, - start_time, end_time, content, text_content, parent_chunk_id) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s) - ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE - SET content = EXCLUDED.content, text_content = EXCLUDED.text_content - """, - (parent_id, parent_id, file_uuid, parent_type, next_index, - scene["start_time"], scene["end_time"], - json.dumps({"summary": parent_text, "mode": mode, "type": "parent", - "source_versions": CURRENT_VERSIONS}), - parent_text, None), - ) + parent_embedding = embed_text(parent_text) if do_embed else None + if do_embed and parent_embedding: + cur.execute( + f""" + INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index, + start_time, end_time, content, text_content, parent_chunk_id, embedding) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector) + ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE + SET content = EXCLUDED.content, text_content = EXCLUDED.text_content, + embedding = EXCLUDED.embedding + """, + (parent_id, parent_id, file_uuid, parent_type, next_index, + scene["start_time"], scene["end_time"], + json.dumps({"summary": parent_text, "mode": mode, "type": "parent", + "source_versions": CURRENT_VERSIONS}), + parent_text, None, parent_embedding), + ) + else: + cur.execute( + f""" + INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index, + start_time, end_time, content, text_content, parent_chunk_id) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s) + ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE + SET content = EXCLUDED.content, text_content = EXCLUDED.text_content + """, + (parent_id, parent_id, file_uuid, parent_type, next_index, + scene["start_time"], scene["end_time"], + json.dumps({"summary": parent_text, "mode": mode, "type": "parent", + "source_versions": CURRENT_VERSIONS}), + parent_text, None), + ) next_index += 1 parent_count += 1 @@ -276,22 +297,42 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool, child_id = child["chunk_id"] child_text = generate_story_child_summary(child, parent_text) if mode == "story" else generate_llm_child_summary(child, parent_text) - cur.execute( - f""" - INSERT INTO {SCHEMA}.chunks (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index, - start_time, end_time, content, text_content, parent_chunk_id) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s) - ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE - SET content = EXCLUDED.content, text_content = EXCLUDED.text_content, - parent_chunk_id = EXCLUDED.parent_chunk_id - """, - (child_id, child_id, file_uuid, child_type, next_index, - child["start"], child["end"], - json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode, - "speaker_confidence": child.get("speaker_confidence", 0), - "source_versions": CURRENT_VERSIONS}), - child_text, parent_id), - ) + child_embedding = embed_text(child_text) if do_embed else None + if do_embed and child_embedding: + cur.execute( + f""" + INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index, + start_time, end_time, content, text_content, parent_chunk_id, embedding) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector) + ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE + SET content = EXCLUDED.content, text_content = EXCLUDED.text_content, + parent_chunk_id = EXCLUDED.parent_chunk_id, + embedding = EXCLUDED.embedding + """, + (child_id, child_id, file_uuid, child_type, next_index, + child["start"], child["end"], + json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode, + "speaker_confidence": child.get("speaker_confidence", 0), + "source_versions": CURRENT_VERSIONS}), + child_text, parent_id, child_embedding), + ) + else: + cur.execute( + f""" + INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index, + start_time, end_time, content, text_content, parent_chunk_id) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s) + ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE + SET content = EXCLUDED.content, text_content = EXCLUDED.text_content, + parent_chunk_id = EXCLUDED.parent_chunk_id + """, + (child_id, child_id, file_uuid, child_type, next_index, + child["start"], child["end"], + json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode, + "speaker_confidence": child.get("speaker_confidence", 0), + "source_versions": CURRENT_VERSIONS}), + child_text, parent_id), + ) next_index += 1 child_count += 1 @@ -304,7 +345,7 @@ def main(): parser = argparse.ArgumentParser(description="Story Processor V2.0") parser.add_argument("--file-uuid", required=True) parser.add_argument("--mode", choices=["story", "llm"], default="story") - parser.add_argument("--max-scenes", type=int, default=300) + parser.add_argument("--max-scenes", type=int, default=99999) parser.add_argument("--embed", action="store_true", help="Generate embeddings (Ollama)") parser.add_argument("--no-db", action="store_true", help="Skip DB storage") args = parser.parse_args() diff --git a/scripts/pose_processor.py b/scripts/pose_processor.py index 78df34c..ae47acc 100755 --- a/scripts/pose_processor.py +++ b/scripts/pose_processor.py @@ -5,12 +5,16 @@ Calls Swift Vision Framework pose (swift_pose) with fallback to YOLOv8 Pose. Uses VNDetectHumanBodyPoseRequest with ANE acceleration. """ +import re import sys import json import os import subprocess import argparse +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from redis_publisher import RedisPublisher + SWIFT_POSE_PATH = os.path.join( os.path.dirname(os.path.abspath(__file__)), "swift_processors/.build/debug/swift_pose" @@ -21,11 +25,14 @@ SWIFT_POSE_ALT = os.path.join( ) +SWIFT_POSE_PROGRESS_RE = re.compile(r"\[SwiftPose\] Progress:\s*(\d+)%") + def process_pose( video_path: str, output_path: str, uuid: str = "", sample_interval: int = 30, + publisher: RedisPublisher = None, ) -> dict: swift_bin = SWIFT_POSE_PATH if not os.path.exists(swift_bin): @@ -33,6 +40,8 @@ def process_pose( if not os.path.exists(swift_bin): print("[Pose] Swift binary not found, using YOLOv8 fallback", file=sys.stderr) + if publisher: + publisher.error("pose", "Swift binary not found, using fallback") return _fallback(video_path, output_path, uuid, sample_interval) cmd = [swift_bin, video_path, output_path, @@ -40,17 +49,32 @@ def process_pose( "--uuid", uuid] print(f"[Pose] Running Swift Pose (Vision Framework)", file=sys.stderr) - result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200) + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) - if result.stdout: - for line in result.stdout.strip().split("\n"): - print(f" {line}", file=sys.stderr) - if result.stderr: - for line in result.stderr.strip().split("\n"): + last_pct = -1 + for line in proc.stdout: + line = line.strip() + m = SWIFT_POSE_PROGRESS_RE.search(line) + if m: + pct = int(m.group(1)) + if pct > last_pct: + last_pct = pct + print(f"[Pose] Progress: {pct}%", file=sys.stderr) + if publisher: + publisher.progress("pose", pct, 100, f"{pct}%") + elif line: print(f" {line}", file=sys.stderr) - if result.returncode != 0 or not os.path.exists(output_path): - print(f"[Pose] Swift Pose failed, falling back to YOLOv8", file=sys.stderr) + stderr_output = proc.stderr.read() + if stderr_output: + print(stderr_output.strip(), file=sys.stderr) + + proc.wait() + + if proc.returncode != 0 or not os.path.exists(output_path): + print(f"[Pose] Swift Pose failed (exit={proc.returncode}), falling back to YOLOv8", file=sys.stderr) + if publisher: + publisher.error("pose", f"Swift Pose failed, using fallback") return _fallback(video_path, output_path, uuid, sample_interval) with open(output_path) as f: @@ -113,7 +137,14 @@ if __name__ == "__main__": parser.add_argument("--sample-interval", type=int, default=30) args = parser.parse_args() - result = process_pose(args.video_path, args.output_path, args.uuid, args.sample_interval) + publisher = RedisPublisher(args.uuid) if args.uuid else None + if publisher: + publisher.info("pose", "POSE_START") + + result = process_pose(args.video_path, args.output_path, args.uuid, + args.sample_interval, publisher) with open(args.output_path, "w") as f: json.dump(result, f, indent=2) print(f"Pose: {len(result.get('frames', []))} frames with poses") + if publisher: + publisher.complete("pose", f"{len(result.get('frames',[]))} frames") diff --git a/scripts/redis_publisher.py b/scripts/redis_publisher.py index 43dbb5e..30d759d 100644 --- a/scripts/redis_publisher.py +++ b/scripts/redis_publisher.py @@ -34,6 +34,8 @@ class ProgressData: message: Optional[str] = None current: Optional[int] = None total: Optional[int] = None + output_count: Optional[int] = None + output_type: Optional[str] = None extra: Optional[Dict[str, Any]] = None @@ -49,7 +51,8 @@ class StructuredMessage: class RedisPublisher: def __init__(self, uuid: str): self.uuid = uuid - self.channel = f"momentry:progress:{uuid}" + prefix = os.environ.get("MOMENTRY_REDIS_PREFIX", "momentry:") + self.channel = f"{prefix}progress:{uuid}" self._enabled = False self._client = None self._connect() @@ -107,6 +110,8 @@ class RedisPublisher: message: Optional[str] = None, current: Optional[int] = None, total: Optional[int] = None, + output_count: Optional[int] = None, + output_type: Optional[str] = None, extra: Optional[Dict[str, Any]] = None, ) -> bool: if not self._enabled: @@ -121,6 +126,8 @@ class RedisPublisher: message=message, current=current, total=total, + output_count=output_count, + output_type=output_type, extra=extra, ), ) @@ -136,6 +143,8 @@ class RedisPublisher: current: int, total: int, message: str = "", + output_count: Optional[int] = None, + output_type: Optional[str] = None, ) -> bool: return self.publish( MessageType.PROGRESS, @@ -143,6 +152,8 @@ class RedisPublisher: message=message, current=current, total=total, + output_count=output_count, + output_type=output_type, ) def complete(self, processor: str, message: str = "") -> bool: diff --git a/scripts/sync_users_from_sftpgo.py b/scripts/sync_users_from_sftpgo.py new file mode 100644 index 0000000..0550174 --- /dev/null +++ b/scripts/sync_users_from_sftpgo.py @@ -0,0 +1,117 @@ +#!/opt/homebrew/bin/python3.11 +""" +Sync users from SFTPGo to Momentry users table. + +Usage: + python3 scripts/sync_users_from_sftpgo.py + python3 scripts/sync_users_from_sftpgo.py --sftpgo-url http://localhost:8080 + python3 scripts/sync_users_from_sftpgo.py --db "dbname=momentry user=accusys" + +Environment: + SFTPGO_BASE_URL Default: http://localhost:8080 + DATABASE_URL Default: dbname=momentry user=accusys host=localhost + +This script does NOT copy passwords. It creates user records with placeholder +password hashes. The real password will be captured on the user's first +login through Momentry (which verifies against SFTPGo and caches the hash). +""" +import argparse +import json +import os +import sys +from typing import Any + +import psycopg2 +import psycopg2.extras +import requests + + +def get_sftpgo_users(sftpgo_url: str, admin_user: str, admin_pass: str) -> list[dict[str, Any]]: + """Get all users from SFTPGo.""" + # Get admin token (SFTPGo uses GET, not POST) + token_url = f"{sftpgo_url}/api/v2/token" + resp = requests.get(token_url, auth=(admin_user, admin_pass), timeout=10) + resp.raise_for_status() + token = resp.json().get("access_token") + if not token: + print("ERROR: Failed to get SFTPGo admin token", file=sys.stderr) + sys.exit(1) + + # List users + users_url = f"{sftpgo_url}/api/v2/users" + headers = {"Authorization": f"Bearer {token}"} + resp = requests.get(users_url, headers=headers, timeout=10) + resp.raise_for_status() + return resp.json() + + +def main(): + parser = argparse.ArgumentParser(description="Sync SFTPGo users to Momentry") + parser.add_argument("--sftpgo-url", default=os.getenv("SFTPGO_BASE_URL", "http://localhost:8080")) + parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost")) + parser.add_argument("--admin-user", default="admin") + parser.add_argument("--admin-pass", default=os.getenv("SFTPGO_ADMIN_PASSWORD", "Test3200Test3200")) + parser.add_argument("--dry-run", action="store_true", help="Print what would be done without executing") + args = parser.parse_args() + + # Fetch users from SFTPGo + print(f"[SFTPGo] Connecting to {args.sftpgo_url}...") + try: + sftpgo_users = get_sftpgo_users(args.sftpgo_url, args.admin_user, args.admin_pass) + except Exception as e: + print(f"ERROR: Failed to fetch SFTPGo users: {e}", file=sys.stderr) + sys.exit(1) + + print(f"[SFTPGo] Found {len(sftpgo_users)} users") + + # Connect to Momentry DB and set schema + conn = psycopg2.connect(args.db) + cur = conn.cursor() + cur.execute("SET search_path TO dev") + + synced = 0 + skipped = 0 + + for user in sftpgo_users: + username = user.get("username") + status = user.get("status", 0) + + if not username or status != 1: + skipped += 1 + continue + + role = "admin" if username == "admin" else "user" + # Placeholder hash — will be updated on first login via SFTPGo fallback + placeholder_hash = "$placeholder$synced_from_sftpgo" + + if args.dry_run: + print(f" Would insert: {username} (role={role})") + synced += 1 + continue + + try: + cur.execute( + "INSERT INTO users (username, password_hash, role) VALUES (%s, %s, %s) " + "ON CONFLICT (username) DO NOTHING", + (username, placeholder_hash, role), + ) + if cur.rowcount > 0: + print(f" ✅ {username} (role={role})") + synced += 1 + else: + print(f" ⏭️ {username} already exists, skipped") + skipped += 1 + except Exception as e: + print(f" ❌ {username}: {e}", file=sys.stderr) + skipped += 1 + + conn.commit() + cur.close() + conn.close() + + print(f"\nDone: {synced} synced, {skipped} skipped/errors") + print("Note: Password hashes are placeholders. First login via Momentry will cache the real hash.") + + +if __name__ == "__main__": + main() diff --git a/scripts/tmdb_agent.py b/scripts/tmdb_agent.py new file mode 100644 index 0000000..fc6f196 --- /dev/null +++ b/scripts/tmdb_agent.py @@ -0,0 +1,285 @@ +#!/opt/homebrew/bin/python3.11 +""" +TMDb Agent — pre-fetch TMDb data and write directly to identity files. + +Usage: + python3 scripts/tmdb_agent.py --file-uuid + python3 scripts/tmdb_agent.py --file-uuid --db "dbname=momentry user=accusys" + +Environment: + TMDB_API_KEY Required. TMDb API key. + MOMENTRY_OUTPUT_DIR Default: /Users/accusys/momentry/output + DATABASE_URL Default: dbname=momentry user=accusys host=localhost + +Flow: + 1. Query videos table for file_name + 2. Extract movie name from filename + 3. TMDB /search/movie → find best match + 4. TMDB /movie/{id}/credits → fetch cast + 5. TMDB /person/{id} → fetch person details + 6. Write {OUTPUT}/identities/{uuid}/identity.json + profile.jpg for each cast member + 7. Write {OUTPUT}/{uuid}.tmdb.json cache (movie info + identity uuid list) +""" +import argparse +import hashlib +import json +import os +import re +import sys +from datetime import datetime, timezone +from pathlib import Path + +import requests +import psycopg2 +import psycopg2.extras + + +TMDB_BASE = "https://api.themoviedb.org/3" +TMDB_API_KEY = os.getenv("TMDB_API_KEY") + + +def extract_movie_name(filename: str) -> str | None: + """Extract movie name from filename (e.g. 'Charade_1963.mp4' → 'Charade 1963')""" + name = Path(filename).stem + cleaned = re.sub(r'[._]', ' ', name).strip() + # Strip text after separators like |, (, [, { + for sep in ('|', '(', '[', '{', '\u2502'): + idx = cleaned.find(sep) + if idx > 0: + cleaned = cleaned[:idx].strip() + # Strip common suffixes (quality, format, source, etc.) + suffixes = ( + r'\d{3,4}p', r'\d{3,4}x\d{3,4}', r'\d+fps', r'bluray', r'web[ -]?dl', + r'webrip', r'hdrip', r'dvdrip', r'dvd', r'brrip', r'hdtv', r'xvid', + r'x264', r'h264', r'x265', r'h265', r'hevc', r'aac', r'mp3', r'ac3', + r'dts', r'5\.1', r'7\.1', r'dual[ -]?audio', r'multi[ -]?sub', + r'proper', r'repack', r'extended', r'unrated', r'directors[ -]?cut', + r'theatrical', r'internal', r'limited', r'complete', r'full[ -]?movie', + r'english', r'french', r'spanish', r'german', r'chinese', + r'youtube', r'yify', r'ettv', r'rarbg', r'tgx', r'axxo', r'ctrlhd', + ) + pattern = r'\b(?:' + '|'.join(suffixes) + r')\b' + cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE).strip() + # Collapse multiple spaces + cleaned = re.sub(r'\s+', ' ', cleaned).strip() + return cleaned if len(cleaned) >= 3 else None + + +def search_movie(query: str) -> dict | None: + """Search TMDB for a movie by name. Returns first result.""" + url = f"{TMDB_BASE}/search/movie" + params = {"query": query, "api_key": TMDB_API_KEY, "language": "en-US", "page": 1} + try: + resp = requests.get(url, params=params, timeout=15) + resp.raise_for_status() + results = resp.json().get("results", []) + return results[0] if results else None + except Exception as e: + print(f"TMDB search failed: {e}", file=sys.stderr) + return None + + +def get_credits(movie_id: int) -> list[dict]: + """Get cast credits for a movie from TMDB.""" + url = f"{TMDB_BASE}/movie/{movie_id}/credits" + params = {"api_key": TMDB_API_KEY, "language": "en-US"} + try: + resp = requests.get(url, params=params, timeout=15) + resp.raise_for_status() + return resp.json().get("cast", []) + except Exception as e: + print(f"TMDB credits failed: {e}", file=sys.stderr) + return [] + + +def get_person_details(person_id: int) -> dict: + """Fetch person details from TMDB /person/{id}.""" + url = f"{TMDB_BASE}/person/{person_id}" + params = {"api_key": TMDB_API_KEY, "language": "en-US"} + try: + resp = requests.get(url, params=params, timeout=15) + resp.raise_for_status() + data = resp.json() + return { + "biography": data.get("biography"), + "birthday": data.get("birthday"), + "place_of_birth": data.get("place_of_birth"), + "also_known_as": data.get("also_known_as", []), + "imdb_id": data.get("imdb_id"), + "known_for_department": data.get("known_for_department"), + "popularity": data.get("popularity"), + "deathday": data.get("deathday"), + "gender": data.get("gender"), + "homepage": data.get("homepage"), + } + except Exception as e: + print(f"TMDB person details failed for {person_id}: {e}", file=sys.stderr) + return {} + + +def main(): + parser = argparse.ArgumentParser(description="TMDb Agent — pre-fetch cache") + parser.add_argument("--file-uuid", required=True, help="File UUID to enrich") + parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost")) + parser.add_argument("--output", default=os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output")) + args = parser.parse_args() + + if not TMDB_API_KEY: + print("ERROR: TMDB_API_KEY not set.", file=sys.stderr) + sys.exit(1) + + # 1. Query DB for file_name + schema = os.getenv("DATABASE_SCHEMA", "").strip() + table = f"{schema}.videos" if schema else "videos" + conn = psycopg2.connect(args.db) + cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + cur.execute(f"SELECT file_name FROM {table} WHERE file_uuid = %s", (args.file_uuid,)) + row = cur.fetchone() + cur.close() + conn.close() + + if not row: + print(f"ERROR: File not found: {args.file_uuid}", file=sys.stderr) + sys.exit(1) + + file_name = row["file_name"] + print(f"[TKG-AGENT] File: {file_name} ({args.file_uuid})") + + # 2. Extract movie name + movie_name = extract_movie_name(file_name) + if not movie_name: + print(f"ERROR: Cannot extract movie name from: {file_name}", file=sys.stderr) + sys.exit(1) + print(f"[TKG-AGENT] Extracted movie name: '{movie_name}'") + + # 3. Search TMDB + movie = search_movie(movie_name) + if not movie: + print(f"ERROR: No TMDB movie found for: {movie_name}", file=sys.stderr) + sys.exit(1) + print(f"[TKG-AGENT] Matched: {movie['title']} (TMDB id={movie['id']})") + + # 4. Fetch credits + cast = get_credits(movie["id"]) + if not cast: + print(f"WARN: No cast data found for movie {movie['id']}", file=sys.stderr) + + # 5. Enrich each cast member with person details and write identity files + output = Path(args.output) + identities_root = output / "identities" + identities_root.mkdir(parents=True, exist_ok=True) + + now = datetime.now(timezone.utc).isoformat() + created_identities = [] + + for i, m in enumerate(cast): + person_id = m["id"] + person = get_person_details(person_id) + + # Generate deterministic UUID: SHA256("tmdb-{movie_id}-{person_id}-{name}") + uuid_raw = hashlib.sha256(f"tmdb-{movie['id']}-{person_id}-{m['name']}".encode()).hexdigest()[:32] + profile_url = ( + f"https://image.tmdb.org/t/p/w185{m['profile_path']}" + if m.get("profile_path") else None + ) + + # Build identity.json + metadata = { + "tmdb_character": m.get("character", ""), + "tmdb_cast_order": i, + "tmdb_movie_id": movie["id"], + "tmdb_movie_title": movie["title"], + "tmdb_biography": person.get("biography"), + "tmdb_birthday": person.get("birthday"), + "tmdb_place_of_birth": person.get("place_of_birth"), + "tmdb_aliases": person.get("also_known_as", []), + "tmdb_imdb_id": person.get("imdb_id"), + "tmdb_department": person.get("known_for_department"), + "tmdb_popularity": person.get("popularity"), + "tmdb_deathday": person.get("deathday"), + "tmdb_gender": person.get("gender"), + "tmdb_homepage": person.get("homepage"), + } + + identity = { + "version": 1, + "identity_uuid": uuid_raw, + "name": m["name"], + "identity_type": "people", + "source": "tmdb", + "status": "confirmed", + "tmdb_id": person_id, + "tmdb_profile": profile_url, + "metadata": {k: v for k, v in metadata.items() if v is not None or k == "tmdb_aliases"}, + "file_bindings": [], + "created_at": now, + "updated_at": now, + } + + # Write identity.json + identity_dir = identities_root / uuid_raw + identity_dir.mkdir(parents=True, exist_ok=True) + identity_path = identity_dir / "identity.json" + with open(identity_path, "w", encoding="utf-8") as f: + json.dump(identity, f, indent=2, ensure_ascii=False) + + # Download profile.jpg + if profile_url: + img_path = identity_dir / "profile.jpg" + if not img_path.exists(): + try: + resp = requests.get(profile_url, timeout=15) + if resp.status_code == 200: + img_path.write_bytes(resp.content) + except Exception as e: + print(f" [WARN] Failed to download profile for {m['name']}: {e}", file=sys.stderr) + + created_identities.append({ + "identity_uuid": uuid_raw, + "name": m["name"], + "tmdb_id": person_id, + "character": m.get("character", ""), + "order": i, + }) + + if (i + 1) % 5 == 0: + print(f"[TKG-AGENT] Wrote {i+1}/{len(cast)} identity files") + + # Update _index.json + index_path = identities_root / "_index.json" + index = {} + if index_path.exists(): + with open(index_path) as f: + index = json.load(f) + for ci in created_identities: + index[ci["identity_uuid"]] = ci["name"] + with open(index_path, "w", encoding="utf-8") as f: + json.dump(index, f, indent=2, ensure_ascii=False) + + # Write movie cache ({uuid}.tmdb.json) — simplified, no per-person data + cache = { + "file_uuid": args.file_uuid, + "fetched_at": now, + "source": "agent", + "movie": { + "tmdb_id": movie["id"], + "title": movie["title"], + "release_date": movie.get("release_date"), + "overview": movie.get("overview"), + "poster_path": movie.get("poster_path"), + }, + "cast_count": len(cast), + "identities_created": len(created_identities), + "identities": created_identities, + } + + cache_path = output / f"{args.file_uuid}.tmdb.json" + with open(cache_path, "w", encoding="utf-8") as f: + json.dump(cache, f, indent=2, ensure_ascii=False) + + print(f"[TKG-AGENT] Cache written: {cache_path}") + print(f"[TKG-AGENT] Identity files: {len(created_identities)} cast members → {identities_root}/") + + +if __name__ == "__main__": + main() diff --git a/scripts/visual_chunk_processor.py b/scripts/visual_chunk_processor.py index b7e6bf3..1a60750 100644 --- a/scripts/visual_chunk_processor.py +++ b/scripts/visual_chunk_processor.py @@ -384,6 +384,7 @@ def main(): parser.add_argument("video_path", help="視頻文件路徑") parser.add_argument("output_path", help="輸出文件路徑") parser.add_argument("--yolo-result", help="YOLO 結果文件路徑(可選)") + parser.add_argument("--uuid", help="檔案 UUID(由 executor 傳入)") parser.add_argument( "--strategy", choices=["fixed", "similarity"], default="fixed", help="分片策略" ) diff --git a/src/api/agent_api.rs b/src/api/agent_api.rs index 4882dc3..7505885 100644 --- a/src/api/agent_api.rs +++ b/src/api/agent_api.rs @@ -57,17 +57,12 @@ async fn translate_text( "temperature": 0.1 }); - let response = client - .post(llm_url) - .json(&body) - .send() - .await - .map_err(|e| { - ( - StatusCode::INTERNAL_SERVER_ERROR, - format!("Failed to call LLM: {}", e), - ) - })?; + let response = client.post(llm_url).json(&body).send().await.map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Failed to call LLM: {}", e), + ) + })?; let llm_resp: serde_json::Value = response.json().await.map_err(|e| { ( diff --git a/src/api/five_w1h_agent_api.rs b/src/api/five_w1h_agent_api.rs index aa71413..3732a67 100644 --- a/src/api/five_w1h_agent_api.rs +++ b/src/api/five_w1h_agent_api.rs @@ -97,17 +97,25 @@ struct SceneSummaryResult { fn llm_base_url() -> String { let v = std::env::var("MOMENTRY_LLM_URL"); - if v.is_ok() { return v.unwrap(); } + if v.is_ok() { + return v.unwrap(); + } let v = std::env::var("MOMENTRY_LLM_SUMMARY_URL"); - if v.is_ok() { return v.unwrap(); } + if v.is_ok() { + return v.unwrap(); + } "http://localhost:8082/v1/chat/completions".to_string() } fn llm_model() -> String { let v = std::env::var("MOMENTRY_LLM_MODEL"); - if v.is_ok() { return v.unwrap(); } + if v.is_ok() { + return v.unwrap(); + } let v = std::env::var("MOMENTRY_LLM_SUMMARY_MODEL"); - if v.is_ok() { return v.unwrap(); } + if v.is_ok() { + return v.unwrap(); + } "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string() } @@ -115,7 +123,7 @@ fn llm_model() -> String { async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result> { let table = schema::table_name("chunk"); - sqlx::query_as::<_, (String, i64, i64, f64, f64, f64, serde_json::Value, serde_json::Value, Option)>(&format!( + sqlx::query_as::<_, (String, i64, i64, f64, Option, Option, serde_json::Value, Option, Option)>(&format!( r#"SELECT chunk_id, start_frame, end_frame, fps, start_time, end_time, content, metadata, summary_text FROM {} WHERE file_uuid = $1 AND chunk_type = 'cut' ORDER BY start_frame"#, table )) @@ -123,7 +131,8 @@ async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result anyhow::Result> { let table = schema::table_name("chunk"); - sqlx::query_as::<_, (String, String, f64, f64, i64, i64, serde_json::Value)>(&format!( + sqlx::query_as::<_, (String, String, Option, Option, i64, i64, serde_json::Value)>(&format!( r#"SELECT chunk_id, COALESCE(text_content,''), start_time, end_time, start_frame, end_frame, content FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND start_time >= $2 AND end_time <= $3 ORDER BY start_time"#, table @@ -141,7 +150,7 @@ async fn fetch_sentences_in_scene( .bind(file_uuid).bind(cut.start_time).bind(cut.end_time) .fetch_all(db.pool()).await? .into_iter().map(|r| Ok(SentenceChunk { - chunk_id: r.0, text: r.1, start_time: r.2, end_time: r.3, + chunk_id: r.0, text: r.1, start_time: r.2.unwrap_or(0.0), end_time: r.3.unwrap_or(0.0), start_frame: r.4, end_frame: r.5, content: r.6, })).collect() } @@ -540,10 +549,7 @@ async fn analyze_5w1h( if let Some(ref t) = cut.summary_text { if t.len() > 20 { processed += 1; - prev_context.push(format!( - "Scene (t={:.0}s): {}", - cut.start_time, t - )); + prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t)); continue; } } @@ -621,10 +627,7 @@ async fn batch_analyze_5w1h( if let Some(ref t) = cut.summary_text { if t.len() > 20 { processed += 1; - prev_context.push(format!( - "Scene (t={:.0}s): {}", - cut.start_time, t - )); + prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t)); continue; } } @@ -713,10 +716,7 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result< if let Some(ref t) = cut.summary_text { if t.len() > 20 { processed += 1; - prev_context.push(format!( - "Scene (t={:.0}s): {}", - cut.start_time, t - )); + prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t)); continue; } } @@ -764,38 +764,44 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result< qdrant.init_collection(768).await?; let chunk_table = schema::table_name("chunk"); - let rows = sqlx::query_as::<_, (String, String, String, f64, f64)>( - &format!("SELECT chunk_id, chunk_type, text_content, start_time, end_time \ + let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64)>(&format!( + "SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time \ FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL \ - AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table), - ) + AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", + chunk_table + )) .bind(file_uuid) .fetch_all(db.pool()) .await?; let total_vec = rows.len(); let mut stored = 0usize; - for (chunk_id, _ctype, text, start_time, end_time) in &rows { + for (chunk_id, _ctype, text, start_frame, end_frame, start_time, end_time) in &rows { let text = text.trim(); if text.is_empty() || text.len() < 5 { continue; } match embedder.embed_document(text).await { Ok(vector) => { - if let Err(e) = sqlx::query( - &format!("UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3", chunk_table) - ) + if let Err(e) = sqlx::query(&format!( + "UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3", + chunk_table + )) .bind(&vector as &[f32]) .bind(chunk_id) .bind(file_uuid) - .execute(db.pool()).await { + .execute(db.pool()) + .await + { tracing::error!("[Vectorize] PG failed for {}: {}", chunk_id, e); continue; } let payload = VectorPayload { - uuid: file_uuid.to_string(), + file_uuid: file_uuid.to_string(), chunk_id: chunk_id.clone(), chunk_type: "sentence".to_string(), + start_frame: *start_frame, + end_frame: *end_frame, start_time: *start_time, end_time: *end_time, text: Some(text.to_string()), diff --git a/src/api/identities.rs b/src/api/identities.rs index 1e03c74..6d9ab6f 100644 --- a/src/api/identities.rs +++ b/src/api/identities.rs @@ -93,16 +93,15 @@ async fn create_identity( })?; let id_table = crate::core::db::schema::table_name("identities"); - let name_col = if id_table.starts_with("dev.") { "name" } else { "real_name" }; let query = format!( "SELECT uuid, reference_data->'total_references' as total, reference_data->'angles_covered' as angles, reference_data->'quality_avg' as quality FROM {} - WHERE {} = $1 + WHERE name = $1 ORDER BY created_at DESC LIMIT 1", - id_table, name_col + id_table ); let row: Option<(String, Option, Option>, Option)> = @@ -168,11 +167,19 @@ async fn list_identities( let id_table = crate::core::db::schema::table_name("identities"); let total: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", id_table)) - .fetch_one(db.pool()).await - .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Count error: {}", e)))?; + .fetch_one(db.pool()) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Count error: {}", e), + ) + })?; - let name_col = if id_table.starts_with("dev.") { "name" } else { "real_name" }; - let sql = format!("SELECT id::int, uuid, {} AS name, metadata FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2", name_col, id_table); + let sql = format!( + "SELECT id::int, uuid, name, metadata FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2", + id_table + ); let rows: Vec<(i32, uuid::Uuid, String, Option)> = match sqlx::query_as(&sql) .bind(page_size as i64) @@ -200,12 +207,25 @@ async fn list_identities( .collect(); let identities_table = crate::core::db::schema::table_name("identities"); - let total_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", identities_table)) - .fetch_one(db.pool()).await.unwrap_or(0); - let tmdb_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", identities_table)) - .fetch_one(db.pool()).await.unwrap_or(0); - let auto_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE source = 'auto'", identities_table)) - .fetch_one(db.pool()).await.unwrap_or(0); + let total_identities: i64 = + sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", identities_table)) + .fetch_one(db.pool()) + .await + .unwrap_or(0); + let tmdb_identities: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", + identities_table + )) + .fetch_one(db.pool()) + .await + .unwrap_or(0); + let auto_identities: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE source = 'auto'", + identities_table + )) + .fetch_one(db.pool()) + .await + .unwrap_or(0); Ok(Json(IdentityListResponse { identities, diff --git a/src/api/identity_agent_api.rs b/src/api/identity_agent_api.rs index 9f63255..3801151 100644 --- a/src/api/identity_agent_api.rs +++ b/src/api/identity_agent_api.rs @@ -15,8 +15,14 @@ use crate::core::db::PostgresDb; pub fn identity_agent_routes() -> Router { Router::new() - .route("/api/v1/agents/identity/match-from-photo", post(match_from_photo)) - .route("/api/v1/agents/identity/match-from-trace", post(match_from_trace)) + .route( + "/api/v1/agents/identity/match-from-photo", + post(match_from_photo), + ) + .route( + "/api/v1/agents/identity/match-from-trace", + post(match_from_trace), + ) } #[derive(Debug, Serialize)] @@ -73,13 +79,21 @@ async fn match_from_photo( let uuid_clean = identity_uuid.replace('-', ""); if uuid_clean.is_empty() || file_uuid.is_empty() { - return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({ - "success": false, "message": "identity_uuid and file_uuid are required" - })))); + return Err(( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({ + "success": false, "message": "identity_uuid and file_uuid are required" + })), + )); } - let data = image_data.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({ - "success": false, "message": "No image field found. Use field name 'image'." - }))))?; + let data = image_data.ok_or_else(|| { + ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({ + "success": false, "message": "No image field found. Use field name 'image'." + })), + ) + })?; // 1. Save uploaded image to temp let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR") @@ -88,11 +102,17 @@ async fn match_from_photo( .unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string()); let temp_dir = std::env::temp_dir().join("momentry_match_face"); std::fs::create_dir_all(&temp_dir).map_err(|e| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to create temp dir: {}", e)}))) + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"message": format!("Failed to create temp dir: {}", e)})), + ) })?; let temp_img = temp_dir.join(format!("{}.jpg", uuid_clean)); std::fs::write(&temp_img, &data).map_err(|e| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to save temp image: {}", e)}))) + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"message": format!("Failed to save temp image: {}", e)})), + ) })?; // 2. Extract face embedding via Python script @@ -103,79 +123,109 @@ async fn match_from_photo( .output() .await .map_err(|e| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to run extractor: {}", e)}))) + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"message": format!("Failed to run extractor: {}", e)})), + ) })?; let _ = std::fs::remove_file(&temp_img); if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); - return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({ - "success": false, "message": format!("Face extraction failed: {}", stderr) - })))); + return Err(( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({ + "success": false, "message": format!("Face extraction failed: {}", stderr) + })), + )); } let stdout = String::from_utf8_lossy(&output.stdout); let extract_result: serde_json::Value = serde_json::from_str(&stdout).map_err(|_| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": "Failed to parse extractor output"}))) + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"message": "Failed to parse extractor output"})), + ) })?; let embedding: Vec = serde_json::from_value( - extract_result.get("embedding") - .ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({"message": "No embedding in extractor output"}))))? - .clone() - ).map_err(|_| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": "Invalid embedding format"}))) + extract_result + .get("embedding") + .ok_or_else(|| { + ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({"message": "No embedding in extractor output"})), + ) + })? + .clone(), + ) + .map_err(|_| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"message": "Invalid embedding format"})), + ) })?; let embedding_f32: Vec = embedding.into_iter().map(|v| v as f32).collect(); // 3. Look up identity internal ID let id_table = schema::table_name("identities"); - let identity_id_row: Option<(i32,)> = sqlx::query_as( - &format!("SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table) - ) + let identity_id_row: Option<(i32,)> = sqlx::query_as(&format!( + "SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", + id_table + )) .bind(&uuid_clean) .fetch_optional(state.db.pool()) .await .map_err(|e| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)}))) + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"message": format!("DB error: {}", e)})), + ) })?; let identity_id = match identity_id_row { Some((id,)) => id, - None => return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({ - "success": false, "message": "Identity not found" - })))), + None => { + return Err(( + StatusCode::NOT_FOUND, + Json(serde_json::json!({ + "success": false, "message": "Identity not found" + })), + )) + } }; // 4. Find best matching trace (highest similarity, no threshold) let fd_table = schema::table_name("face_detections"); - let best_match: Option<(i32, i32, f64)> = sqlx::query_as( - &format!( - r#"SELECT id, trace_id, + let best_match: Option<(i32, i32, f64)> = sqlx::query_as(&format!( + r#"SELECT id, trace_id, 1 - (embedding::vector <=> $1::vector) as similarity FROM {} WHERE file_uuid = $2 AND embedding IS NOT NULL ORDER BY embedding::vector <=> $1::vector LIMIT 1"#, - fd_table - ) - ) + fd_table + )) .bind(&embedding_f32) .bind(&file_uuid) .fetch_optional(state.db.pool()) .await .map_err(|e| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Search failed: {}", e)}))) + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"message": format!("Search failed: {}", e)})), + ) })?; // 5. Update best match face_detection let mut traces_matched: Vec = Vec::new(); if let Some((fb_id, fb_trace, fb_sim)) = best_match { - let _ = sqlx::query( - &format!("UPDATE {} SET identity_id = $1 WHERE id = $2", fd_table) - ) + let _ = sqlx::query(&format!( + "UPDATE {} SET identity_id = $1 WHERE id = $2", + fd_table + )) .bind(identity_id) .bind(fb_id) .execute(state.db.pool()) @@ -191,7 +241,10 @@ async fn match_from_photo( file_uuid, matches: 1, traces_matched, - message: format!("Best trace: trace_id={}, similarity={:.4}", fb_trace, fb_sim), + message: format!( + "Best trace: trace_id={}, similarity={:.4}", + fb_trace, fb_sim + ), })) } else { Ok(Json(MatchFromPhotoResponse { @@ -221,26 +274,30 @@ async fn match_from_trace( // 1. Get 3 best face embeddings from this trace at different angles // Divide trace frame range into 3 segments, pick best face from each let fd_table = schema::table_name("face_detections"); - let all_faces: Vec<(Vec, i64)> = sqlx::query_as::<_, (Vec, i64)>( - &format!( - "SELECT embedding, frame_number FROM {} \ + let all_faces: Vec<(Vec, i64)> = sqlx::query_as::<_, (Vec, i64)>(&format!( + "SELECT embedding, frame_number FROM {} \ WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \ ORDER BY frame_number ASC", - fd_table - ) - ) + fd_table + )) .bind(&req.file_uuid) .bind(req.trace_id) .fetch_all(state.db.pool()) .await .map_err(|e| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)}))) + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"message": format!("DB error: {}", e)})), + ) })?; if all_faces.is_empty() { - return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({ - "success": false, "message": "No embedding found for this trace" - })))); + return Err(( + StatusCode::NOT_FOUND, + Json(serde_json::json!({ + "success": false, "message": "No embedding found for this trace" + })), + )); } // Pick 3 samples: divide frame range into 3 segments, use face with largest area per segment @@ -254,14 +311,12 @@ async fn match_from_trace( let mut query_embeddings: Vec> = Vec::new(); // Get width*height info if available (not all pipelines store it) - let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>( - &format!( - "SELECT frame_number, COALESCE(width, 0) * COALESCE(height, 0) AS area \ + let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>(&format!( + "SELECT frame_number, COALESCE(width, 0) * COALESCE(height, 0) AS area \ FROM {} WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \ ORDER BY frame_number ASC", - fd_table - ) - ) + fd_table + )) .bind(&req.file_uuid) .bind(req.trace_id) .fetch_all(state.db.pool()) @@ -296,9 +351,8 @@ async fn match_from_trace( let mut seen_trace_ids = std::collections::HashSet::new(); for qemb in &query_embeddings { - let top = sqlx::query_as::<_, (i32, i32, f64)>( - &format!( - r#"SELECT id, trace_id, + let top = sqlx::query_as::<_, (i32, i32, f64)>(&format!( + r#"SELECT id, trace_id, 1 - (embedding::vector <=> $1::vector) as similarity FROM {} WHERE file_uuid = $2 @@ -306,16 +360,18 @@ async fn match_from_trace( AND embedding IS NOT NULL ORDER BY embedding::vector <=> $1::vector LIMIT 1"#, - fd_table - ) - ) + fd_table + )) .bind(qemb) .bind(&req.file_uuid) .bind(req.trace_id) .fetch_optional(state.db.pool()) .await .map_err(|e| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Search failed: {}", e)}))) + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"message": format!("Search failed: {}", e)})), + ) })?; if let Some((cface_id, c_trace_id, c_sim)) = top { @@ -327,35 +383,49 @@ async fn match_from_trace( // 3. Look up identity internal ID let id_table = schema::table_name("identities"); - let identity_id_row: Option<(i32,)> = sqlx::query_as( - &format!("SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table) - ) + let identity_id_row: Option<(i32,)> = sqlx::query_as(&format!( + "SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", + id_table + )) .bind(&uuid_clean) .fetch_optional(state.db.pool()) .await .map_err(|e| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)}))) + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"message": format!("DB error: {}", e)})), + ) })?; let identity_id = match identity_id_row { Some((id,)) => id, - None => return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({ - "success": false, "message": "Identity not found" - })))), + None => { + return Err(( + StatusCode::NOT_FOUND, + Json(serde_json::json!({ + "success": false, "message": "Identity not found" + })), + )) + } }; // 4. Update matched face_detections let mut traces_matched: Vec = Vec::new(); for (id, trace_id, _similarity) in &validated { - if let Err(e) = sqlx::query( - &format!("UPDATE {} SET identity_id = $1 WHERE id = $2", fd_table) - ) + if let Err(e) = sqlx::query(&format!( + "UPDATE {} SET identity_id = $1 WHERE id = $2", + fd_table + )) .bind(identity_id) .bind(id) .execute(state.db.pool()) .await { - tracing::warn!("[match-from-trace] Failed to update face_detection {}: {}", id, e); + tracing::warn!( + "[match-from-trace] Failed to update face_detection {}: {}", + id, + e + ); } else { if !traces_matched.contains(trace_id) { traces_matched.push(*trace_id); @@ -364,9 +434,10 @@ async fn match_from_trace( } // 5. Also bind the source trace itself - let _ = sqlx::query( - &format!("UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3", fd_table) - ) + let _ = sqlx::query(&format!( + "UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3", + fd_table + )) .bind(identity_id) .bind(&req.file_uuid) .bind(req.trace_id) @@ -388,7 +459,10 @@ async fn match_from_trace( file_uuid: req.file_uuid, matches: match_count, traces_matched, - message: format!("Matched {} faces ({} unique traces)", match_count, trace_count), + message: format!( + "Matched {} faces ({} unique traces)", + match_count, trace_count + ), })) } @@ -461,7 +535,10 @@ fn analyze_person_speaker_overlap( } // Check if persons co-occur in time (frame proximity) - let overlap = person.frames.iter().any(|f| other_person.frames.contains(f)); + let overlap = person + .frames + .iter() + .any(|f| other_person.frames.contains(f)); if overlap { matched_persons.push(other_person.person_id.clone()); visited_persons.insert(other_person.person_id.clone()); @@ -474,9 +551,10 @@ fn analyze_person_speaker_overlap( person.frames.iter().max().copied().unwrap_or(0) as f64, ); for speaker in speakers { - let has_overlap = speaker.segments.iter().any(|(start, end)| { - *start <= person_time_range.1 && *end >= person_time_range.0 - }); + let has_overlap = speaker + .segments + .iter() + .any(|(start, end)| *start <= person_time_range.1 && *end >= person_time_range.0); if has_overlap { if !matched_speakers.contains(&speaker.speaker_id) { matched_speakers.push(speaker.speaker_id.clone()); @@ -563,11 +641,12 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow:: // Step 2: 載入所有 face_detections(含 frame_number),按 trace_id 分組 let fd_table = schema::table_name("face_detections"); - let fd_rows = sqlx::query_as::<_, (i32, i32, Vec)>( - &format!("SELECT trace_id, frame_number, embedding FROM {} \ + let fd_rows = sqlx::query_as::<_, (i32, i32, Vec)>(&format!( + "SELECT trace_id, frame_number, embedding FROM {} \ WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \ - ORDER BY trace_id, frame_number", fd_table), - ) + ORDER BY trace_id, frame_number", + fd_table + )) .bind(file_uuid) .fetch_all(pool) .await?; @@ -647,16 +726,18 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow:: let fd_table = schema::table_name("face_detections"); let mut updated = 0usize; for (tid, name) in &matched { - let id_opt = sqlx::query_scalar::<_, Option>( - &format!("SELECT id FROM {} WHERE name=$1 AND source='tmdb'", identities_table), - ) + let id_opt = sqlx::query_scalar::<_, Option>(&format!( + "SELECT id FROM {} WHERE name=$1 AND source='tmdb'", + identities_table + )) .bind(name) .fetch_optional(pool) .await?; if let Some(identity_id) = id_opt { - let _ = sqlx::query( - &format!("UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", fd_table), - ) + let _ = sqlx::query(&format!( + "UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", + fd_table + )) .bind(identity_id) .bind(file_uuid) .bind(tid) @@ -726,32 +807,32 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow:: // Step 6: 未匹配的 trace 設 stranger_id = trace_id // trace_id 在同一個 file 內是 sequential integer,直接複用為 stranger_id - let stranger_update = sqlx::query( - &format!( - "UPDATE {} SET stranger_id = trace_id \ + let stranger_update = sqlx::query(&format!( + "UPDATE {} SET stranger_id = trace_id \ WHERE file_uuid = $1 AND trace_id IS NOT NULL AND identity_id IS NULL \ AND (stranger_id IS NULL OR stranger_id != trace_id)", - fd_table - ) - ) + fd_table + )) .bind(file_uuid) .execute(pool) .await?; let stranger_count = stranger_update.rows_affected(); // Step 7: Save identity files for all affected identities - let affected = sqlx::query_scalar::<_, uuid::Uuid>( - &format!("SELECT DISTINCT i.uuid FROM {} i \ + let affected = sqlx::query_scalar::<_, uuid::Uuid>(&format!( + "SELECT DISTINCT i.uuid FROM {} i \ JOIN {} fd ON fd.identity_id = i.id \ - WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL", identities_table, fd_table) - ) + WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL", + identities_table, fd_table + )) .bind(file_uuid) .fetch_all(pool) .await .unwrap_or_default(); for uuid in &affected { let us = uuid.to_string().replace('-', ""); - if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await { + if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await + { tracing::warn!("[FaceMatch] Failed to save identity file {}: {}", us, e); } } @@ -773,13 +854,15 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow:: pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result { // Load face traces with identity_id and frame numbers let fd_table = schema::table_name("face_detections"); - let traces = sqlx::query_as::<_, (i32, Vec)>( - &format!("SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \ + let traces = sqlx::query_as::<_, (i32, Vec)>(&format!( + "SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \ FROM {} WHERE file_uuid=$1 AND trace_id IS NOT NULL AND identity_id IS NOT NULL \ - GROUP BY trace_id", fd_table) - ) + GROUP BY trace_id", + fd_table + )) .bind(file_uuid) - .fetch_all(pool).await?; + .fetch_all(pool) + .await?; if traces.is_empty() { tracing::info!("[SpeakerBind] No face traces with identities"); @@ -945,9 +1028,8 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res let speakers = extract_speakers_from_asrx_data(&asrx_data); let identities = analyze_person_speaker_overlap(&persons, &speakers); - let uuid_short = &file_uuid[..8.min(file_uuid.len())]; for (idx, id_result) in identities.iter().enumerate() { - let identity_name = format!("stranger_{}_{}", uuid_short, idx); + let identity_name = format!("stranger_{}", idx); let metadata = serde_json::json!({ "source": "identity_agent", "trace_ids": id_result.person_ids, diff --git a/src/api/identity_api.rs b/src/api/identity_api.rs index 75ebf62..127536a 100644 --- a/src/api/identity_api.rs +++ b/src/api/identity_api.rs @@ -38,8 +38,18 @@ pub fn identity_routes() -> Router { .route("/api/v1/resource/heartbeat", post(heartbeat_resource)) .route("/api/v1/resources", get(list_resources)) .route("/api/v1/identity/upload", post(upload_identity)) - .route("/api/v1/identity/:identity_uuid/profile-image", post(upload_profile_image).get(get_profile_image)) - .route("/api/v1/identity/:identity_uuid/json", get(get_identity_json)) + .route( + "/api/v1/identity/:identity_uuid/profile-image", + post(upload_profile_image).get(get_profile_image), + ) + .route( + "/api/v1/identity/:identity_uuid/status", + get(get_identity_status), + ) + .route( + "/api/v1/identity/:identity_uuid/json", + get(get_identity_json), + ) // Experiment: identity text search (non-polluting, separate endpoint) .route("/api/v1/search/identity_text", get(search_identity_text)) .route("/api/v1/identities/search", get(search_identities_by_text)) @@ -98,9 +108,10 @@ async fn list_files( .await .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; - let data = records.0 + let data = records + .0 .into_iter() - .map(|r| FileItem { + .map(|r| FileItem { file_uuid: r.file_uuid, file_name: r.file_name, file_path: r.file_path, @@ -163,7 +174,9 @@ async fn get_file_detail( file_name: f.file_name, file_path: f.file_path, metadata: f.probe_json, - created_at: chrono::DateTime::parse_from_rfc3339(&f.created_at).ok().map(|d| d.into()), + created_at: chrono::DateTime::parse_from_rfc3339(&f.created_at) + .ok() + .map(|d| d.into()), })), None => Err(( StatusCode::NOT_FOUND, @@ -214,13 +227,42 @@ async fn get_file_identities( .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; let fps = 25.0; - let data: Vec = Vec::new(); + let data: Vec = records + .into_iter() + .map(|r| FileIdentityItem { + identity_id: r.identity_id, + identity_uuid: r.identity_uuid, + name: r.name, + metadata: r.metadata, + face_count: r.face_count, + speaker_count: r.speaker_count, + start_frame: r.start_frame, + end_frame: r.end_frame, + start_time: r.start_time, + end_time: r.end_time, + confidence: r.confidence, + }) + .collect(); + + let total = match sqlx::query_scalar::<_, i64>( + &format!( + "SELECT COUNT(DISTINCT fd.identity_id) FROM {} fd WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL", + crate::core::db::schema::table_name("face_detections") + ) + ) + .bind(&file_uuid) + .fetch_one(state.db.pool()) + .await + { + Ok(c) => c, + Err(_) => data.len() as i64, + }; Ok(Json(FileIdentitiesResponse { success: true, file_uuid: file_uuid, fps, - total: data.len() as i64, + total, page, page_size, data, @@ -243,6 +285,16 @@ pub struct IdentityDetailResponse { pub updated_at: Option>, } +#[derive(Debug, Serialize)] +pub struct IdentityStatusResponse { + pub success: bool, + pub identity_uuid: String, + pub name: String, + pub has_json: bool, + pub has_jpg: bool, + pub error: Option, +} + fn strip_uuid(u: &uuid::Uuid) -> String { u.to_string().replace('-', "") } @@ -270,7 +322,11 @@ async fn get_identity_detail( metadata: i.metadata, reference_data: i.reference_data, tmdb_id: i.tmdb_id, - tmdb_profile: Some(format!("{}/identities/{}/profile.jpg", crate::core::config::OUTPUT_DIR.as_str(), i.uuid.replace('-', ""))), + tmdb_profile: Some(format!( + "{}/identities/{}/profile.jpg", + crate::core::config::OUTPUT_DIR.as_str(), + i.uuid.replace('-', "") + )), created_at: i.created_at, updated_at: i.updated_at, })), @@ -281,6 +337,44 @@ async fn get_identity_detail( } } +async fn get_identity_status( + State(state): State, + Path(identity_uuid): Path, +) -> Result, (StatusCode, String)> { + let uuid_clean = identity_uuid.replace('-', ""); + + let identity = state + .db + .get_identity_by_uuid(&uuid_clean) + .await + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; + + match identity { + Some(i) => { + // Check both UUID formats (with and without hyphens) + let dir_nohyphen = crate::core::identity::storage::identity_dir(&uuid_clean); + let uuid_hyphen = i.uuid.clone(); + let dir_hyphen = crate::core::identity::storage::identity_dir(&uuid_hyphen); + let has_json = dir_nohyphen.join("identity.json").exists() + || dir_hyphen.join("identity.json").exists(); + let has_jpg = dir_nohyphen.join("profile.jpg").exists() + || dir_hyphen.join("profile.jpg").exists(); + Ok(Json(IdentityStatusResponse { + success: true, + identity_uuid: i.uuid.clone(), + name: i.name, + has_json, + has_jpg, + error: None, + })) + } + None => Err(( + StatusCode::NOT_FOUND, + format!("Identity not found: {}", uuid_clean), + )), + } +} + #[derive(Debug, Serialize)] pub struct IdentityFilesResponse { pub success: bool, @@ -375,10 +469,25 @@ async fn get_identity_files( }) .collect(); + let total = match sqlx::query_scalar::<_, i64>( + &format!( + "SELECT COUNT(DISTINCT fd.file_uuid) FROM {} fd WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1)", + crate::core::db::schema::table_name("face_detections"), + crate::core::db::schema::table_name("identities"), + ) + ) + .bind(&uuid) + .fetch_one(state.db.pool()) + .await + { + Ok(c) => c, + Err(_) => data.len() as i64, + }; + Ok(Json(IdentityFilesResponse { success: true, identity_uuid: uuid.to_string().replace('-', ""), - total: data.len() as i64, + total, page, page_size, data, @@ -449,10 +558,25 @@ async fn get_identity_faces( }) .collect(); + let total = match sqlx::query_scalar::<_, i64>( + &format!( + "SELECT COUNT(*) FROM {} fd WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1)", + crate::core::db::schema::table_name("face_detections"), + crate::core::db::schema::table_name("identities"), + ) + ) + .bind(&uuid) + .fetch_one(state.db.pool()) + .await + { + Ok(c) => c, + Err(_) => data.len() as i64, + }; + Ok(Json(IdentityFacesResponse { success: true, identity_uuid: uuid.to_string().replace('-', ""), - total: data.len() as i64, + total, page, page_size, data, @@ -721,12 +845,24 @@ async fn upload_profile_image( let uuid_clean = identity_uuid.replace('-', ""); // Verify identity exists - if state.db.get_identity_by_uuid(&uuid_clean).await.map_err(|_| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"success": false, "message": "DB error"}))) - })?.is_none() { - return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({ - "success": false, "message": "Identity not found" - })))); + if state + .db + .get_identity_by_uuid(&uuid_clean) + .await + .map_err(|_| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"success": false, "message": "DB error"})), + ) + })? + .is_none() + { + return Err(( + StatusCode::NOT_FOUND, + Json(serde_json::json!({ + "success": false, "message": "Identity not found" + })), + )); } // Process multipart upload @@ -740,9 +876,14 @@ async fn upload_profile_image( ext = match content_type.as_str() { "image/png" => "png", "image/jpeg" | "image/jpg" => "jpg", - _ => return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({ - "success": false, "message": "Unsupported image type. Use JPEG or PNG." - })))), + _ => { + return Err(( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({ + "success": false, "message": "Unsupported image type. Use JPEG or PNG." + })), + )) + } }; image_data = Some(field.bytes().await.map_err(|_| { (StatusCode::BAD_REQUEST, Json(serde_json::json!({"success": false, "message": "Failed to read image data"}))) @@ -750,9 +891,14 @@ async fn upload_profile_image( } } - let data = image_data.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({ - "success": false, "message": "No image field found. Use field name 'image'." - }))))?; + let data = image_data.ok_or_else(|| { + ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({ + "success": false, "message": "No image field found. Use field name 'image'." + })), + ) + })?; // Write image file let dir = crate::core::identity::storage::identity_dir(&uuid_clean); @@ -789,8 +935,16 @@ async fn get_profile_image( let path = dir.join(format!("profile.{}", ext)); if path.exists() { let data = std::fs::read(&path).map_err(|_| StatusCode::NOT_FOUND)?; - let content_type = if *ext == "png" { "image/png" } else { "image/jpeg" }; - return Ok((StatusCode::OK, [("content-type".to_string(), content_type.to_string())], data)); + let content_type = if *ext == "png" { + "image/png" + } else { + "image/jpeg" + }; + return Ok(( + StatusCode::OK, + [("content-type".to_string(), content_type.to_string())], + data, + )); } } Err(StatusCode::NOT_FOUND) @@ -802,7 +956,14 @@ async fn get_identity_json( ) -> Result<(StatusCode, [(String, String); 1], Vec), StatusCode> { let clean = identity_uuid.replace('-', ""); let with_hyphens = if clean.len() == 32 { - format!("{}-{}-{}-{}-{}", &clean[0..8], &clean[8..12], &clean[12..16], &clean[16..20], &clean[20..32]) + format!( + "{}-{}-{}-{}-{}", + &clean[0..8], + &clean[8..12], + &clean[12..16], + &clean[16..20], + &clean[20..32] + ) } else { identity_uuid.clone() }; @@ -821,7 +982,9 @@ async fn get_identity_json( } // 2. Lazy Sync: If file missing, generate from DB and save - if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(state.db.pool(), &clean).await { + if let Err(e) = + crate::core::identity::storage::save_identity_file_by_pool(state.db.pool(), &clean).await + { tracing::warn!("[identity-json] Lazy sync failed for {}: {}", clean, e); return Err(StatusCode::NOT_FOUND); } @@ -858,7 +1021,7 @@ struct IdentityTextHit { chunk_id: String, start_time: f64, end_time: f64, - text_content: String, + text_content: Option, identity_id: Option, identity_name: Option, identity_source: Option, @@ -889,7 +1052,7 @@ async fn search_identity_text( let query = format!( r#"SELECT c.file_uuid, c.chunk_id, c.start_time, c.end_time, c.text_content, - fd.identity_id, CASE WHEN id_table LIKE 'dev.%' THEN i.name ELSE i.real_name END AS identity_name, i.source AS identity_source, + fd.identity_id, i.name AS identity_name, i.source AS identity_source, fd.trace_id FROM {} c LEFT JOIN {} fd ON fd.file_uuid = c.file_uuid @@ -902,18 +1065,42 @@ async fn search_identity_text( chunk_table, fd_table, id_table ); - let rows = sqlx::query_as::<_, (String, String, f64, f64, String, Option, Option, Option, Option)>(&query) - .bind(¶ms.uuid).bind(&like_q).bind(limit) - .fetch_all(state.db.pool()) - .await - .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + let rows = sqlx::query_as::< + _, + ( + String, + String, + f64, + f64, + Option, + Option, + Option, + Option, + Option, + ), + >(&query) + .bind(¶ms.uuid) + .bind(&like_q) + .bind(limit) + .fetch_all(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; let results: Vec = rows .into_iter() - .map(|(fu, cid, st, et, txt, iid, iname, isrc, tid)| IdentityTextHit { - file_uuid: fu, chunk_id: cid, start_time: st, end_time: et, text_content: txt, - identity_id: iid, identity_name: iname, identity_source: isrc, trace_id: tid, - }) + .map( + |(fu, cid, st, et, txt, iid, iname, isrc, tid)| IdentityTextHit { + file_uuid: fu, + chunk_id: cid, + start_time: st, + end_time: et, + text_content: txt, + identity_id: iid, + identity_name: iname, + identity_source: isrc, + trace_id: tid, + }, + ) .collect(); let total = results.len() as i64; @@ -922,7 +1109,14 @@ async fn search_identity_text( let start = (page - 1) * page_size; let paged: Vec = results.into_iter().skip(start).take(page_size).collect(); let limit = params.limit.unwrap_or(50) as usize; - Ok(Json(IdentityTextResponse { success: true, total, page, page_size, limit, results: paged })) + Ok(Json(IdentityTextResponse { + success: true, + total, + page, + page_size, + limit, + results: paged, + })) } #[derive(Debug, Deserialize)] @@ -942,7 +1136,7 @@ struct IdentitySearchHit { trace_id: Option, chunk_id: String, start_time: f64, - text_content: String, + text_content: Option, } #[derive(Debug, Serialize)] @@ -965,7 +1159,7 @@ async fn search_identities_by_text( let limit = params.limit.unwrap_or(50).min(100); let query = format!( - r#"SELECT i.id::int, COALESCE(i.real_name, i.actor_name, i.name) AS name, i.source, i.tmdb_id, + r#"SELECT i.id::int, i.name, i.source, i.tmdb_id, fd.file_uuid, fd.trace_id, c.chunk_id, c.start_time, c.text_content FROM {} i @@ -973,30 +1167,58 @@ async fn search_identities_by_text( JOIN {} c ON c.file_uuid = fd.file_uuid AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) - WHERE COALESCE(i.real_name, i.actor_name, i.name) ILIKE $1 + WHERE i.name ILIKE $1 AND ($2::text IS NULL OR fd.file_uuid = $2) - ORDER BY COALESCE(i.real_name, i.actor_name, i.name), c.start_time + ORDER BY i.name, c.start_time LIMIT $3"#, id_table, fd_table, chunk_table ); - let rows = sqlx::query_as::<_, (i32, String, Option, Option, String, Option, String, f64, String)>(&query) - .bind(&like_q).bind(¶ms.uuid).bind(limit) - .fetch_all(state.db.pool()) - .await - .map_err(|e| { - tracing::error!("[identities/search] Query failed: {}", e); - StatusCode::INTERNAL_SERVER_ERROR - })?; + let rows = sqlx::query_as::< + _, + ( + i32, + String, + Option, + Option, + String, + Option, + String, + f64, + Option, + ), + >(&query) + .bind(&like_q) + .bind(¶ms.uuid) + .bind(limit) + .fetch_all(state.db.pool()) + .await + .map_err(|e| { + tracing::error!("[identities/search] Query failed: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; let results: Vec = rows .into_iter() - .map(|(iid, name, src, tid, fu, trace_id, cid, st, txt)| IdentitySearchHit { - identity_id: iid, name, source: src, tmdb_id: tid, - file_uuid: fu, trace_id, chunk_id: cid, start_time: st, text_content: txt, - }) + .map( + |(iid, name, src, tid, fu, trace_id, cid, st, txt)| IdentitySearchHit { + identity_id: iid, + name, + source: src, + tmdb_id: tid, + file_uuid: fu, + trace_id, + chunk_id: cid, + start_time: st, + text_content: txt, + }, + ) .collect(); let total = results.len() as i64; - Ok(Json(IdentitySearchResponse { success: true, total, results })) + Ok(Json(IdentitySearchResponse { + success: true, + total, + results, + })) } diff --git a/src/api/identity_binding.rs b/src/api/identity_binding.rs index 7b85a9f..ddfbf1d 100644 --- a/src/api/identity_binding.rs +++ b/src/api/identity_binding.rs @@ -1,5 +1,5 @@ use axum::{ - extract::{Path, Query}, + extract::{Path, Query, State}, http::StatusCode, response::Json, routing::{get, post}, @@ -77,7 +77,7 @@ pub async fn bind_identity( // Get identity_id from identity_uuid let identity_row: Option<(i64, String)> = sqlx::query_as(&format!( - "SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid", + "SELECT id, name FROM {} WHERE uuid = $1::uuid", id_table )) .bind(&identity_uuid) @@ -116,8 +116,14 @@ pub async fn bind_identity( let uuid_clean = identity_uuid.replace('-', ""); // Sync identity JSON file - if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &uuid_clean).await { - tracing::warn!("[bind] Failed to sync identity file for {}: {}", uuid_clean, e); + if let Err(e) = + crate::core::identity::storage::save_identity_file_by_pool(&db, &uuid_clean).await + { + tracing::warn!( + "[bind] Failed to sync identity file for {}: {}", + uuid_clean, + e + ); } Ok(Json(ApiResponse { @@ -189,8 +195,15 @@ pub async fn unbind_identity( .ok() .flatten(); if let Some(identity_uuid) = uuid { - if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &identity_uuid).await { - tracing::warn!("[unbind] Failed to sync identity file for {}: {}", identity_uuid, e); + if let Err(e) = + crate::core::identity::storage::save_identity_file_by_pool(&db, &identity_uuid) + .await + { + tracing::warn!( + "[unbind] Failed to sync identity file for {}: {}", + identity_uuid, + e + ); } } } @@ -221,7 +234,7 @@ pub async fn merge_identities( // Get IDs for both identities let from_row: Option<(i64, String)> = sqlx::query_as(&format!( - "SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid", + "SELECT id, name FROM {} WHERE uuid = $1::uuid", id_table )) .bind(&identity_uuid) @@ -239,7 +252,7 @@ pub async fn merge_identities( ))?; let into_row: Option<(i64, String)> = sqlx::query_as(&format!( - "SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid", + "SELECT id, name FROM {} WHERE uuid = $1::uuid", id_table )) .bind(&req.into_uuid) @@ -299,8 +312,14 @@ pub async fn merge_identities( // Sync target identity JSON let into_uuid_clean = req.into_uuid.replace('-', ""); - if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &into_uuid_clean).await { - tracing::warn!("[merge] Failed to sync target identity file for {}: {}", into_uuid_clean, e); + if let Err(e) = + crate::core::identity::storage::save_identity_file_by_pool(&db, &into_uuid_clean).await + { + tracing::warn!( + "[merge] Failed to sync target identity file for {}: {}", + into_uuid_clean, + e + ); } // Delete source identity JSON if not keeping history @@ -339,6 +358,106 @@ pub struct ListIdentitiesParams { pub offset: Option, } +#[derive(Debug, Serialize)] +pub struct IdentityTraceInfo { + pub file_uuid: String, + pub trace_id: i32, + pub frame_count: i64, + pub first_frame: i32, + pub last_frame: i32, + pub first_sec: f64, + pub last_sec: f64, + pub avg_confidence: f64, +} + +#[derive(Debug, Serialize)] +pub struct IdentityTracesResponse { + pub success: bool, + pub identity_uuid: String, + pub name: String, + pub total_traces: usize, + pub total_faces: i64, + pub traces: Vec, +} + +pub async fn get_identity_traces( + State(state): State, + Path(identity_uuid): Path, +) -> Result, (StatusCode, String)> { + let id_table = crate::core::db::schema::table_name("identities"); + let fd_table = crate::core::db::schema::table_name("face_detections"); + + // Get identity name + let identity: Option<(i32, String)> = sqlx::query_as(&format!( + "SELECT id, name FROM {} WHERE uuid = $1::uuid", + id_table + )) + .bind(&identity_uuid) + .fetch_optional(state.db.pool()) + .await + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; + + let (identity_id, name) = + identity.ok_or((StatusCode::NOT_FOUND, "Identity not found".to_string()))?; + + // Get all traces for this identity across all files + let rows: Vec<(String, i32, i64, i32, i32, f64, f64, f64)> = sqlx::query_as(&format!( + r#"SELECT fd.file_uuid::text, fd.trace_id, + COUNT(*)::bigint AS frame_count, + MIN(fd.frame_number)::int AS first_frame, + MAX(fd.frame_number)::int AS last_frame, + ROUND(MIN(fd.frame_number)::numeric / 25.0, 1)::float8 AS first_sec, + ROUND(MAX(fd.frame_number)::numeric / 25.0, 1)::float8 AS last_sec, + ROUND(AVG(fd.confidence)::numeric, 4)::float8 AS avg_confidence + FROM {} fd + WHERE fd.identity_id = $1 + GROUP BY fd.file_uuid, fd.trace_id + ORDER BY fd.file_uuid, fd.trace_id"#, + fd_table + )) + .bind(identity_id) + .fetch_all(state.db.pool()) + .await + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; + + let total_traces = rows.len(); + let total_faces: i64 = rows.iter().map(|r| r.2).sum(); + + let traces: Vec = rows + .into_iter() + .map( + |( + file_uuid, + trace_id, + frame_count, + first_frame, + last_frame, + first_sec, + last_sec, + avg_confidence, + )| IdentityTraceInfo { + file_uuid, + trace_id, + frame_count, + first_frame, + last_frame, + first_sec, + last_sec, + avg_confidence, + }, + ) + .collect(); + + Ok(Json(IdentityTracesResponse { + success: true, + identity_uuid, + name, + total_traces, + total_faces, + traces, + })) +} + pub fn identity_binding_routes() -> Router { Router::new() .route("/api/v1/identity/:identity_uuid/bind", post(bind_identity)) @@ -350,4 +469,8 @@ pub fn identity_binding_routes() -> Router { "/api/v1/identity/:identity_uuid/mergeinto", post(merge_identities), ) + .route( + "/api/v1/identity/:identity_uuid/traces", + get(get_identity_traces), + ) } diff --git a/src/api/media_api.rs b/src/api/media_api.rs index 9ebb998..2ab25b0 100644 --- a/src/api/media_api.rs +++ b/src/api/media_api.rs @@ -14,8 +14,16 @@ use crate::core::db::{schema, PostgresDb}; /// Shared video query params: mode=normal|debug, audio=on|off fn parse_video_params(params: &std::collections::HashMap) -> (String, String) { - let mode = params.get("mode").map(|s| s.as_str()).unwrap_or("normal").to_string(); - let audio = params.get("audio").map(|s| s.as_str()).unwrap_or("on").to_string(); + let mode = params + .get("mode") + .map(|s| s.as_str()) + .unwrap_or("normal") + .to_string(); + let audio = params + .get("audio") + .map(|s| s.as_str()) + .unwrap_or("on") + .to_string(); (mode, audio) } @@ -142,9 +150,12 @@ struct BboxParams { /// Priority: start_frame/end_frame > start/end > start_time/end_time. /// If only time is given, convert via fps. fn resolve_frame_range( - start_frame: Option, end_frame: Option, - start: Option, end: Option, - start_time: Option, end_time: Option, + start_frame: Option, + end_frame: Option, + start: Option, + end: Option, + start_time: Option, + end_time: Option, fps: f64, ) -> (i32, i32) { if let (Some(sf), Some(ef)) = (start_frame.or(start), end_frame.or(end)) { @@ -186,7 +197,15 @@ async fn bbox_overlay_video( .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? .unwrap_or(24.0); - let (start_f, end_f) = resolve_frame_range(p.start_frame, p.end_frame, p.start, p.end, p.start_time, p.end_time, fps); + let (start_f, end_f) = resolve_frame_range( + p.start_frame, + p.end_frame, + p.start, + p.end, + p.start_time, + p.end_time, + fps, + ); let start_sec = start_f as f64 / fps; @@ -228,13 +247,26 @@ async fn bbox_overlay_video( let dur = duration.to_string(); let mut bbox_args = vec!["-ss", &ss, "-i", &video_path, "-t", &dur]; if vf != "null" { - bbox_args.extend_from_slice(&["-vf", &vf, "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28"]); + bbox_args.extend_from_slice(&[ + "-vf", + &vf, + "-c:v", + "libx264", + "-preset", + "ultrafast", + "-crf", + "28", + ]); } else { bbox_args.extend_from_slice(&["-c", "copy"]); } - if bbox_audio == "off" { bbox_args.push("-an"); } + if bbox_audio == "off" { + bbox_args.push("-an"); + } bbox_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]); - let status = ffmpeg_cmd().args(&bbox_args).status() + let status = ffmpeg_cmd() + .args(&bbox_args) + .status() .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; if !status.success() { let _ = std::fs::remove_file(&tmp); @@ -315,14 +347,20 @@ async fn trace_video( let sk = seek.to_string(); let du = duration.to_string(); let mut cmd_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du, "-c", "copy"]; - if audio == "off" { cmd_args.push("-an"); } + if audio == "off" { + cmd_args.push("-an"); + } cmd_args.extend_from_slice(&["-y", &tmp_str]); - let result = ffmpeg_cmd().args(&cmd_args).output() + let result = ffmpeg_cmd() + .args(&cmd_args) + .output() .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; if !result.status.success() { return Err(StatusCode::INTERNAL_SERVER_ERROR); } - let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + let data = tokio::fs::read(&tmp) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; let _ = std::fs::remove_file(&tmp); return Ok(Response::builder() .header(header::CONTENT_TYPE, "video/mp4") @@ -345,8 +383,11 @@ async fn trace_video( ORDER BY fd.trace_id, fd.frame_number", face_table, identities_table )) - .bind(&file_uuid).bind(start_fn).bind(end_fn) - .fetch_all(state.db.pool()).await + .bind(&file_uuid) + .bind(start_fn) + .bind(end_fn) + .fetch_all(state.db.pool()) + .await .unwrap_or_default(); // Group frames by trace_id, compute start_frame per trace; collect bbox per frame @@ -359,7 +400,9 @@ async fn trace_video( if let Some(name) = name_opt { trace_identity.entry(*tid).or_insert_with(|| name.clone()); } else { - trace_identity.entry(*tid).or_insert_with(|| format!("Stranger_{:03}", tid)); + trace_identity + .entry(*tid) + .or_insert_with(|| format!("Stranger_{:03}", tid)); } } @@ -374,7 +417,8 @@ async fn trace_video( .unwrap_or_else(|| "-".to_string()); // Sort traces for consistent ordering - let mut sorted_traces: Vec<(i32, &Vec)> = trace_frames.iter().map(|(k, v)| (*k, v)).collect(); + let mut sorted_traces: Vec<(i32, &Vec)> = + trace_frames.iter().map(|(k, v)| (*k, v)).collect(); sorted_traces.sort_by_key(|(tid, _)| *tid); let frame_offset = first_frame as i64 - (padding * fps) as i64; @@ -389,10 +433,12 @@ async fn trace_video( "drawtext=text='Frame %{{n}} %{{pts}}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=12" )); parts.push(format!( - "drawtext=text='Cut\\: {}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=56", cut_label + "drawtext=text='Cut\\: {}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=56", + cut_label )); parts.push(format!( - "drawtext=text='{}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=100", file_uuid + "drawtext=text='{}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=100", + file_uuid )); // Per-trace entries: show trace_id, start_frame, identity name @@ -400,11 +446,18 @@ async fn trace_video( let mut y_pos = 144; for (tid, frames) in &sorted_traces { let start = frames.iter().min().unwrap_or(&first_frame); - let identity = trace_identity.get(tid).map(|s| s.as_str()).unwrap_or("unknown"); + let identity = trace_identity + .get(tid) + .map(|s| s.as_str()) + .unwrap_or("unknown"); let label = format!("Trace {}\\: start={} {}", tid, start, identity); // Continuous range (interpolated): visible from first to last frame - let enable = format!("between(n,{},{})", frames[0] as i64 - frame_offset, frames[frames.len() - 1] as i64 - frame_offset); + let enable = format!( + "between(n,{},{})", + frames[0] as i64 - frame_offset, + frames[frames.len() - 1] as i64 - frame_offset + ); parts.push(format!( "drawtext=text='{}':fontsize=24:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y={}:enable='{}'", @@ -415,7 +468,11 @@ async fn trace_video( // Bounding boxes: interpolated (thickness=1) + actual (thickness=4) with trace_id label for (tid, frames) in &sorted_traces { - let range_enable = format!("between(n,{},{})", frames[0] as i64 - frame_offset, frames[frames.len() - 1] as i64 - frame_offset); + let range_enable = format!( + "between(n,{},{})", + frames[0] as i64 - frame_offset, + frames[frames.len() - 1] as i64 - frame_offset + ); // Interpolated bbox at first known position across the whole trace range if let Some((x, y, w, h)) = bbox_per_frame.get(&(*tid, frames[0])) { parts.push(format!( @@ -448,23 +505,45 @@ async fn trace_video( let tmp_str = tmp.to_str().unwrap_or("").to_string(); let sk = seek.to_string(); let du = duration.to_string(); - let mut debug_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du, - "-/filter_complex", &filter_path, - "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28"]; - if audio == "on" { debug_args.extend_from_slice(&["-c:a", "aac"]); } + let mut debug_args = vec![ + "-ss", + &sk, + "-i", + &video_path, + "-t", + &du, + "-/filter_complex", + &filter_path, + "-c:v", + "libx264", + "-preset", + "ultrafast", + "-crf", + "28", + ]; + if audio == "on" { + debug_args.extend_from_slice(&["-c:a", "aac"]); + } debug_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]); - let result = ffmpeg_cmd().args(&debug_args).output() + let result = ffmpeg_cmd() + .args(&debug_args) + .output() .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; if !result.status.success() { let stderr = String::from_utf8_lossy(&result.stderr); let _ = std::fs::write("/tmp/ffmpeg_last_error.txt", stderr.as_bytes()); - tracing::error!("ffmpeg failed ({} bytes), see /tmp/ffmpeg_last_error.txt", stderr.len()); + tracing::error!( + "ffmpeg failed ({} bytes), see /tmp/ffmpeg_last_error.txt", + stderr.len() + ); let _ = std::fs::remove_file(&filter_file); let _ = std::fs::remove_file(&tmp); return Err(StatusCode::INTERNAL_SERVER_ERROR); } - let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + let data = tokio::fs::read(&tmp) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; let _ = std::fs::remove_file(&filter_file); let _ = std::fs::remove_file(&tmp); Ok(Response::builder() @@ -503,19 +582,27 @@ async fn stream_video( // Chunk extraction with dual time/frame params let start_time_param = params.get("start_time").and_then(|v| v.parse::().ok()); let end_time_param = params.get("end_time").and_then(|v| v.parse::().ok()); - let start_frame_param = params.get("start_frame").and_then(|v| v.parse::().ok()); + let start_frame_param = params + .get("start_frame") + .and_then(|v| v.parse::().ok()); let end_frame_param = params.get("end_frame").and_then(|v| v.parse::().ok()); let start_legacy = params.get("start").and_then(|v| v.parse::().ok()); let end_legacy = params.get("end").and_then(|v| v.parse::().ok()); - let has_range = start_frame_param.is_some() || start_time_param.is_some() || start_legacy.is_some(); + let has_range = + start_frame_param.is_some() || start_time_param.is_some() || start_legacy.is_some(); if has_range { let (start_sec, dur) = if let (Some(sf), Some(ef)) = (start_frame_param, end_frame_param) { let _fps: f64 = sqlx::query_scalar(&format!( - "SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1", videos_table - )).bind(&file_uuid).fetch_optional(state.db.pool()).await - .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?.unwrap_or(24.0); + "SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1", + videos_table + )) + .bind(&file_uuid) + .fetch_optional(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? + .unwrap_or(24.0); (sf / _fps, (ef - sf) / _fps) } else if let (Some(st), Some(et)) = (start_time_param, end_time_param) { (st, et - st) @@ -533,15 +620,21 @@ async fn stream_video( let ss = start_sec.to_string(); let d = dur.to_string(); let mut chunk_args = vec!["-ss", &ss, "-i", &file_path, "-t", &d, "-c", "copy"]; - if audio == "off" { chunk_args.push("-an"); } + if audio == "off" { + chunk_args.push("-an"); + } chunk_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]); - let status = ffmpeg_cmd().args(&chunk_args).status() + let status = ffmpeg_cmd() + .args(&chunk_args) + .status() .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; if !status.success() { let _ = std::fs::remove_file(&tmp); return Err(StatusCode::INTERNAL_SERVER_ERROR); } - let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + let data = tokio::fs::read(&tmp) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; let _ = std::fs::remove_file(&tmp); return Ok(Response::builder() .header(header::CONTENT_TYPE, "video/mp4") @@ -704,7 +797,7 @@ async fn video_clip( let frame_count = ((e - s) * fps) as i64; cmd.args(["-vframes", &frame_count.to_string()]); } else { - cmd.args(["-to", &e.to_string()]); + cmd.args(["-t", &(e - s).to_string()]); } if mode == "debug" { let debug_text = if let (Some(sf), Some(ef)) = (q.start_frame, q.end_frame) { @@ -717,8 +810,20 @@ async fn video_clip( if audio == "off" { cmd.args(["-an"]); } - cmd.args(["-c:v", "libx264", "-c:a", "aac", "-f", "mpegts", "-"]); - let output = cmd.output().map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + cmd.args([ + "-c:v", + "libx264", + "-c:a", + "aac", + "-movflags", + "frag_keyframe+empty_moov", + "-f", + "mp4", + "-", + ]); + let output = cmd + .output() + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; if !output.status.success() { return Err(StatusCode::INTERNAL_SERVER_ERROR); } diff --git a/src/api/middleware.rs b/src/api/middleware.rs index 0d408e0..8e58695 100644 --- a/src/api/middleware.rs +++ b/src/api/middleware.rs @@ -88,16 +88,10 @@ fn hex_val(c: u8) -> Option { } fn extract_api_key(headers: &HeaderMap, uri: &axum::http::Uri) -> Result { - if let Some(key) = headers - .get("X-API-Key") - .and_then(|v| v.to_str().ok()) - { + if let Some(key) = headers.get("X-API-Key").and_then(|v| v.to_str().ok()) { return Ok(key.to_string()); } - if let Some(auth) = headers - .get("Authorization") - .and_then(|v| v.to_str().ok()) - { + if let Some(auth) = headers.get("Authorization").and_then(|v| v.to_str().ok()) { // Check if it's a JWT (starts with eyJ) let trimmed = auth.strip_prefix("Bearer ").unwrap_or(auth); if !jwt::is_jwt(trimmed) { @@ -129,7 +123,11 @@ pub async fn unified_auth( // Priority 1: Cookie session (Portal) let cookies = extract_cookies(headers); - if let Some(sid) = cookies.iter().find(|(k, _)| k == "session_id").map(|(_, v)| v.clone()) { + if let Some(sid) = cookies + .iter() + .find(|(k, _)| k == "session_id") + .map(|(_, v)| v.clone()) + { match state.db.get_session_by_id(&sid).await { Ok(Some((_id, user_id, api_key_id, _expires_at))) => { let key_hash = hash_key(&api_key_id); @@ -162,15 +160,17 @@ pub async fn unified_auth( } // Priority 2: JWT (Authorization: Bearer ) - if let Some(auth_header) = headers - .get("Authorization") - .and_then(|v| v.to_str().ok()) - { + if let Some(auth_header) = headers.get("Authorization").and_then(|v| v.to_str().ok()) { if let Some(token) = auth_header.strip_prefix("Bearer ") { if jwt::is_jwt(token) { match jwt::verify_jwt(token) { Ok(claims) => { - if !state.db.is_jwt_blacklisted(&claims.jti).await.unwrap_or(false) { + if !state + .db + .is_jwt_blacklisted(&claims.jti) + .await + .unwrap_or(false) + { let exp = chrono::DateTime::from_timestamp(claims.exp as i64, 0); let user_id: i32 = claims.sub.parse().unwrap_or(0); let auth = UserAuth { diff --git a/src/api/server.rs b/src/api/server.rs index 48d9526..cc055d9 100644 --- a/src/api/server.rs +++ b/src/api/server.rs @@ -7,9 +7,9 @@ use axum::{ }; use once_cell::sync::{Lazy, OnceCell}; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; use sha2::{Digest, Sha256}; use sqlx::{PgPool, Row}; +use std::collections::HashMap; use std::time::Instant; use tower_http::cors::{Any, CorsLayer}; @@ -17,8 +17,8 @@ use crate::core::cache::{keys, MongoCache, RedisCache}; use crate::core::config::REDIS_KEY_PREFIX; use crate::core::db::schema; use crate::core::db::{Database, PostgresDb, QdrantDb, RedisClient, VideoRecord, VideoStatus}; -use crate::worker::resources::SystemResources; use crate::core::text::tokenizer::tokenize_chinese_text; +use crate::worker::resources::SystemResources; use crate::{Embedder, FileManager}; use super::agent_api; @@ -96,6 +96,11 @@ struct HealthResponse { build_git_hash: String, build_timestamp: String, uptime_ms: u64, + watcher_running: bool, + worker_running: bool, + auto_pipeline_enabled: bool, + watcher_auto_register_enabled: bool, + system_timezone: String, } #[derive(Debug, Serialize)] @@ -207,8 +212,7 @@ struct WatcherAutoRegisterToggleResponse { // Missing structs added -#[derive(Debug, Deserialize)] -#[derive(Serialize)] +#[derive(Debug, Deserialize, Serialize)] struct FileLookupMatch { file_uuid: String, file_name: String, @@ -233,7 +237,10 @@ async fn lookup_file_by_name( State(state): State, Query(params): Query>, ) -> Result, StatusCode> { - let base = params.get("file_name").map(|s| s.trim().to_string()).unwrap_or_default(); + let base = params + .get("file_name") + .map(|s| s.trim().to_string()) + .unwrap_or_default(); if base.is_empty() { return Ok(Json(FileLookupResponse { file_name: String::new(), @@ -256,11 +263,15 @@ async fn lookup_file_by_name( .bind(&pattern) .fetch_all(state.db.pool()) .await - .map_err(|e| { tracing::error!("lookup query error: {}", e); StatusCode::INTERNAL_SERVER_ERROR })?; + .map_err(|e| { + tracing::error!("lookup query error: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; let exists = rows.iter().any(|r| r.get::("file_name") == base); - let matches: Vec = rows.iter().map(|r| { - FileLookupMatch { + let matches: Vec = rows + .iter() + .map(|r| FileLookupMatch { file_uuid: r.get("file_uuid"), file_name: r.get("file_name"), file_type: r.get("file_type"), @@ -270,16 +281,25 @@ async fn lookup_file_by_name( duration: r.get("duration"), width: r.get("width"), height: r.get("height"), - } - }).collect(); + }) + .collect(); - let max_n: usize = rows.iter().filter_map(|r| { - let n: String = r.get("file_name"); - if n == base { return Some(0usize); } - let rest = n.strip_prefix(&stem).and_then(|r| r.strip_suffix(&ext))?; - let inner = rest.trim().strip_prefix('(').and_then(|r| r.strip_suffix(')'))?; - inner.parse::().ok() - }).max().unwrap_or(0); + let max_n: usize = rows + .iter() + .filter_map(|r| { + let n: String = r.get("file_name"); + if n == base { + return Some(0usize); + } + let rest = n.strip_prefix(&stem).and_then(|r| r.strip_suffix(&ext))?; + let inner = rest + .trim() + .strip_prefix('(') + .and_then(|r| r.strip_suffix(')'))?; + inner.parse::().ok() + }) + .max() + .unwrap_or(0); let next_name = if max_n == 0 && !exists { base.clone() } else { @@ -433,30 +453,44 @@ struct HybridSearchResponse { } fn dedup_search_results(results: Vec) -> Vec { - let mut seen: std::collections::HashMap = std::collections::HashMap::new(); + let mut seen: std::collections::HashMap = + std::collections::HashMap::new(); for r in results { let key = r.chunk_id.clone(); match seen.get(&key) { Some(existing) if existing.score >= r.score => continue, - _ => { seen.insert(key, r); } + _ => { + seen.insert(key, r); + } } } let mut deduped: Vec = seen.into_values().collect(); - deduped.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal)); + deduped.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + }); deduped } fn dedup_hybrid_results(results: Vec) -> Vec { - let mut seen: std::collections::HashMap = std::collections::HashMap::new(); + let mut seen: std::collections::HashMap = + std::collections::HashMap::new(); for r in results { let key = r.chunk_id.clone(); match seen.get(&key) { Some(existing) if existing.combined_score >= r.combined_score => continue, - _ => { seen.insert(key, r); } + _ => { + seen.insert(key, r); + } } } let mut deduped: Vec = seen.into_values().collect(); - deduped.sort_by(|a, b| b.combined_score.partial_cmp(&a.combined_score).unwrap_or(std::cmp::Ordering::Equal)); + deduped.sort_by(|a, b| { + b.combined_score + .partial_cmp(&a.combined_score) + .unwrap_or(std::cmp::Ordering::Equal) + }); deduped } @@ -578,6 +612,7 @@ struct ConfigHealth { cache_enabled: bool, auto_pipeline_enabled: bool, watcher_auto_register_enabled: bool, + system_timezone: String, } #[derive(Debug, Serialize)] @@ -618,6 +653,10 @@ struct PipelineStatus { llm: ServiceStatus, /// rsync file sync tool rsync: ServiceStatus, + /// Watcher process running + watcher_running: bool, + /// Worker process running + worker_running: bool, } #[derive(Debug, Serialize)] @@ -697,6 +736,11 @@ async fn health(State(state): State) -> Json { build_git_hash: env!("BUILD_GIT_HASH").to_string(), build_timestamp: env!("BUILD_TIMESTAMP").to_string(), uptime_ms: get_uptime_ms(), + watcher_running: check_process_running("watcher"), + worker_running: check_process_running("worker"), + auto_pipeline_enabled: crate::core::config::get_auto_pipeline_enabled(), + watcher_auto_register_enabled: crate::core::config::get_watcher_auto_register(), + system_timezone: crate::core::config::SYSTEM_TIMEZONE.clone(), }) } @@ -724,15 +768,23 @@ async fn health_detailed(State(state): State) -> Json bool { @@ -741,14 +793,12 @@ async fn health_detailed(State(state): State) -> Json bool { - std::process::Command::new( - &*crate::core::config::PYTHON_PATH, - ) - .arg("-c") - .arg(format!("import {}", module)) - .output() - .map(|o| o.status.success()) - .unwrap_or(false) + std::process::Command::new(&*crate::core::config::PYTHON_PATH) + .arg("-c") + .arg(format!("import {}", module)) + .output() + .map(|o| o.status.success()) + .unwrap_or(false) }; // SHA256 checksum verification against checksums.sha256 manifest @@ -759,16 +809,21 @@ async fn health_detailed(State(state): State) -> Json = line.splitn(2, ' ').collect(); - if parts.len() < 2 { continue; } + if parts.len() < 2 { + continue; + } let expected_hash = parts[0]; let file_path = parts[1].trim_start(); total += 1; let full_path = scripts_path.join(file_path); if full_path.exists() { if let Ok(actual) = std::process::Command::new("shasum") - .arg("-a").arg("256") + .arg("-a") + .arg("256") .arg(&full_path) .output() { @@ -780,9 +835,17 @@ async fn health_detailed(State(state): State) -> Json ScriptIntegrity { matched: 0, total: 0, ok: false }, + Err(_) => ScriptIntegrity { + matched: 0, + total: 0, + ok: false, + }, }; Json(DetailedHealthResponse { @@ -830,15 +893,22 @@ async fn health_detailed(State(state): State) -> Json) -> Json 0) { tracing::warn!( "[HEALTH] Consistency issues found: {}", - report.checks.iter().filter(|c| c.count > 0).map(|c| format!("{}={}", c.check, c.count)).collect::>().join(", ") + report + .checks + .iter() + .filter(|c| c.count > 0) + .map(|c| format!("{}={}", c.check, c.count)) + .collect::>() + .join(", ") ); } Ok(Json(report)) @@ -885,38 +962,58 @@ async fn login( // Try users table first, fall back to legacy demo/demo let (user_id, username, role) = 'resolve: { // Step 1: Check local users table - if let Ok(Some((uid, uname, pw_hash, role_str))) = state.db.get_user_by_username(&req.username).await { + if let Ok(Some((uid, uname, pw_hash, role_str))) = + state.db.get_user_by_username(&req.username).await + { if crate::core::auth::password::verify_password(&req.password, &pw_hash) { break 'resolve (uid, uname, role_str); } // Password mismatch — log and continue to SFTPGo - tracing::debug!("[LOGIN] Local password mismatch for {}, trying SFTPGo", &req.username); + tracing::debug!( + "[LOGIN] Local password mismatch for {}, trying SFTPGo", + &req.username + ); } // Step 3: Legacy demo/demo fallback if req.username == "demo" && req.password == "demo" { // Get actual user id from DB if exists - let uid = state.db.get_user_by_username("demo").await.ok() - .flatten().map(|(id, _, _, _)| id).unwrap_or(0); + let uid = state + .db + .get_user_by_username("demo") + .await + .ok() + .flatten() + .map(|(id, _, _, _)| id) + .unwrap_or(0); break 'resolve (uid, "demo".to_string(), "user".to_string()); } - return Err((StatusCode::UNAUTHORIZED, Json(serde_json::json!({ - "success": false, "message": "Invalid username or password" - })))); + return Err(( + StatusCode::UNAUTHORIZED, + Json(serde_json::json!({ + "success": false, "message": "Invalid username or password" + })), + )); }; // Create JWT - let jwt_token = crate::core::auth::jwt::create_jwt(user_id, &username, &role) - .map_err(|e| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ + let jwt_token = crate::core::auth::jwt::create_jwt(user_id, &username, &role).map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({ "success": false, "message": format!("JWT creation failed: {}", e) - }))) - })?; + })), + ) + })?; // Create session let session_id = uuid::Uuid::new_v4().to_string().replace('-', ""); - state.db.create_session(&session_id, user_id, &DEMO_USER_API_KEY, 24).await.ok(); + state + .db + .create_session(&session_id, user_id, &DEMO_USER_API_KEY, 24) + .await + .ok(); // Update last_login if real user if user_id > 0 { @@ -938,9 +1035,13 @@ async fn login( let json_body = axum::body::Body::from(serde_json::to_string(&body).unwrap_or_default()); let response = axum::response::Response::builder() .header("Content-Type", "application/json") - .header("Set-Cookie", format!( - "session_id={}; Path=/; HttpOnly; SameSite=Strict; Max-Age=86400", session_id - )) + .header( + "Set-Cookie", + format!( + "session_id={}; Path=/; HttpOnly; SameSite=Strict; Max-Age=86400", + session_id + ), + ) .body(json_body) .unwrap(); @@ -953,7 +1054,11 @@ async fn logout( ) -> Json { // Extract session_id from cookie let cookies = crate::api::middleware::extract_cookies(&headers); - if let Some(sid) = cookies.iter().find(|(k, _)| k == "session_id").map(|(_, v)| v.clone()) { + if let Some(sid) = cookies + .iter() + .find(|(k, _)| k == "session_id") + .map(|(_, v)| v.clone()) + { state.db.delete_session(&sid).await.ok(); } @@ -1171,6 +1276,32 @@ async fn check_binary(name: &str) -> ServiceStatus { } } +fn check_process_running(name: &str) -> bool { + let patterns: &[&str] = match name { + "watcher" => &[ + "target/release/momentry watcher", + "target/debug/momentry_playground watcher", + ], + "worker" => &[ + "target/release/momentry worker", + "target/debug/momentry_playground worker", + ], + _ => return false, + }; + for pattern in patterns { + if let Ok(o) = std::process::Command::new("pgrep") + .arg("-f") + .arg(pattern) + .output() + { + if o.status.success() { + return true; + } + } + } + false +} + async fn check_http(url: &str) -> ServiceStatus { let start = Instant::now(); match reqwest::get(url).await { @@ -1228,11 +1359,7 @@ fn sha256_file(path: &std::path::Path) -> Option { /// Resolve name conflict: if file_name collides with existing but content differs, /// append ` (N)` suffix. Returns the resolved file_name. -async fn resolve_filename( - db: &PostgresDb, - file_name: &str, - content_hash: &str, -) -> String { +async fn resolve_filename(db: &PostgresDb, file_name: &str, content_hash: &str) -> String { let table = schema::table_name("videos"); let base = file_name.to_string(); let dot_pos = base.rfind('.'); @@ -1328,7 +1455,10 @@ async fn register_single_file( .ok() .and_then(|m| m.modified().ok()) .map(|t| { - let secs = t.duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_secs(); + let secs = t + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); chrono::DateTime::from_timestamp(secs as i64, 0) .map(|dt| dt.to_rfc3339()) .unwrap_or_else(|| chrono::Utc::now().to_rfc3339()) @@ -1337,39 +1467,72 @@ async fn register_single_file( let mac_address = crate::core::storage::uuid::get_mac_address(); let pre_file_uuid = crate::core::storage::uuid::compute_birth_uuid( - &mac_address, &birthday, &canonical_path, &file_name, + &mac_address, + &birthday, + &canonical_path, + &file_name, ); let pre_path = std::path::Path::new(&output_dir).join(format!("{}.pre.json", pre_file_uuid)); - let pre_data: Option = std::fs::read_to_string(&pre_path).ok() + let pre_data: Option = std::fs::read_to_string(&pre_path) + .ok() .and_then(|s| serde_json::from_str(&s).ok()); // Extract content_hash from pre.json or compute fresh let (content_hash, birthday, _pre_file_uuid) = if let Some(ref pre) = pre_data { - let h = pre.get("content_hash").and_then(|v| v.as_str()).unwrap_or("").to_string(); - let b = pre.get("birthday").and_then(|v| v.as_str()).unwrap_or(&birthday).to_string(); - let u = pre.get("file_uuid").and_then(|v| v.as_str()).unwrap_or(&pre_file_uuid).to_string(); + let h = pre + .get("content_hash") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let b = pre + .get("birthday") + .and_then(|v| v.as_str()) + .unwrap_or(&birthday) + .to_string(); + let u = pre + .get("file_uuid") + .and_then(|v| v.as_str()) + .unwrap_or(&pre_file_uuid) + .to_string(); (h, b, u) } else { - let h = provided_hash.filter(|h| !h.is_empty()).unwrap_or_else(|| sha256_file(&path).unwrap_or_default()); + let h = provided_hash + .filter(|h| !h.is_empty()) + .unwrap_or_else(|| sha256_file(&path).unwrap_or_default()); (h, birthday, pre_file_uuid) }; // Recompute UUID with the resolved birthday let file_uuid = crate::core::storage::uuid::compute_birth_uuid( - &mac_address, &birthday, &canonical_path, &file_name, + &mac_address, + &birthday, + &canonical_path, + &file_name, + ); + tracing::info!( + "[REGISTER] UUID inputs: mac={} birthday={} path={} name={} pre_found={} → {}", + mac_address, + birthday, + canonical_path, + file_name, + pre_data.is_some(), + file_uuid ); - tracing::info!("[REGISTER] UUID inputs: mac={} birthday={} path={} name={} pre_found={} → {}", mac_address, birthday, canonical_path, file_name, pre_data.is_some(), file_uuid); // Step 2: Hash check — same content = already registered (regardless of name) let videos_table = schema::table_name("videos"); if !content_hash.is_empty() { - if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>( - &format!("SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1", videos_table) - ) + if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(&format!( + "SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1", + videos_table + )) .bind(&content_hash) .fetch_optional(db.pool()) .await { - tracing::info!("[REGISTER] Content hash collision → already registered: {}", existing_uuid); + tracing::info!( + "[REGISTER] Content hash collision → already registered: {}", + existing_uuid + ); let existing_info: Option<(String, String, f64, i32, i32, f64, i64, Option)> = sqlx::query_as( &format!("SELECT file_name, file_path, duration, width, height, fps, total_frames, registration_time::text FROM {} WHERE file_uuid = $1", videos_table) ).bind(&existing_uuid).fetch_optional(db.pool()).await.unwrap_or(None); @@ -1433,40 +1596,71 @@ async fn register_single_file( }; let probe_json = Some(temp_probe_json.clone()); - let has_video = temp_probe_json.get("streams").and_then(|s| s.as_array()) - .map_or(false, |streams| streams.iter().any(|st| st.get("codec_type").and_then(|c| c.as_str()) == Some("video"))); - let has_audio = temp_probe_json.get("streams").and_then(|s| s.as_array()) - .map_or(false, |streams| streams.iter().any(|st| st.get("codec_type").and_then(|c| c.as_str()) == Some("audio"))); + let has_video = temp_probe_json + .get("streams") + .and_then(|s| s.as_array()) + .map_or(false, |streams| { + streams + .iter() + .any(|st| st.get("codec_type").and_then(|c| c.as_str()) == Some("video")) + }); + let has_audio = temp_probe_json + .get("streams") + .and_then(|s| s.as_array()) + .map_or(false, |streams| { + streams + .iter() + .any(|st| st.get("codec_type").and_then(|c| c.as_str()) == Some("audio")) + }); let final_file_type = if has_video { Some("video".to_string()) } else if has_audio { Some("audio".to_string()) } else { - Some(temp_probe_json.get("format").and_then(|f| f.get("file_type")).and_then(|v| v.as_str()).unwrap_or("unknown").to_string()) + Some( + temp_probe_json + .get("format") + .and_then(|f| f.get("file_type")) + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(), + ) }; - let duration = temp_probe_json.get("format").and_then(|f| { - let src = if has_video { f.get("duration") } else { None }; - src.and_then(|v| v.as_str()).and_then(|s| s.parse::().ok()) - }).unwrap_or(0.0); + let duration = temp_probe_json + .get("format") + .and_then(|f| { + let src = if has_video { f.get("duration") } else { None }; + src.and_then(|v| v.as_str()) + .and_then(|s| s.parse::().ok()) + }) + .unwrap_or(0.0); let mut width = 0u32; let mut height = 0u32; let mut fps = 0.0; let mut total_frames = 0u64; if let Some(streams) = temp_probe_json.get("streams").and_then(|s| s.as_array()) { - if let Some(s) = streams.iter().find(|st| st.get("codec_type").and_then(|c| c.as_str()) == Some("video")) { + if let Some(s) = streams + .iter() + .find(|st| st.get("codec_type").and_then(|c| c.as_str()) == Some("video")) + { width = s.get("width").and_then(|v| v.as_i64()).unwrap_or(0) as u32; height = s.get("height").and_then(|v| v.as_i64()).unwrap_or(0) as u32; if let Some(fps_str) = s.get("r_frame_rate").and_then(|v| v.as_str()) { if let Some((num, den)) = fps_str.split_once('/') { if let (Ok(n), Ok(d)) = (num.parse::(), den.parse::()) { - if d > 0.0 { fps = n / d; } + if d > 0.0 { + fps = n / d; + } } } } - total_frames = s.get("nb_frames").and_then(|v| v.as_str()) - .and_then(|s| s.parse().ok()).unwrap_or((duration * fps) as u64); + total_frames = s + .get("nb_frames") + .and_then(|v| v.as_str()) + .and_then(|s| s.parse().ok()) + .unwrap_or((duration * fps) as u64); } } @@ -1562,20 +1756,24 @@ async fn register_single_file( } // 更新 DB: cut_done, scene_done, audio_tracks - let audio_tracks: Vec = temp_probe_json.get("streams").and_then(|s| s.as_array()).map_or(vec![], |streams| { - streams.iter() - .filter(|st| st.get("codec_type").and_then(|c| c.as_str()) == Some("audio")) - .map(|st| { - serde_json::json!({ - "index": st.get("index").and_then(|v| v.as_i64()), - "codec": st.get("codec_name").and_then(|v| v.as_str()), - "channels": st.get("channels").and_then(|v| v.as_i64()), - "sample_rate": st.get("sample_rate").and_then(|v| v.as_str()), - "language": st.get("tags").and_then(|t| t.get("language")), + let audio_tracks: Vec = temp_probe_json + .get("streams") + .and_then(|s| s.as_array()) + .map_or(vec![], |streams| { + streams + .iter() + .filter(|st| st.get("codec_type").and_then(|c| c.as_str()) == Some("audio")) + .map(|st| { + serde_json::json!({ + "index": st.get("index").and_then(|v| v.as_i64()), + "codec": st.get("codec_name").and_then(|v| v.as_str()), + "channels": st.get("channels").and_then(|v| v.as_i64()), + "sample_rate": st.get("sample_rate").and_then(|v| v.as_str()), + "language": st.get("tags").and_then(|t| t.get("language")), + }) }) - }) - .collect() - }); + .collect() + }); let audio_tracks_json = serde_json::to_value(&audio_tracks).ok(); // 計算 cut_count 與 cut_max_duration let cut_path = std::path::Path::new( @@ -1628,20 +1826,33 @@ async fn register_single_file( let auto_db = db.clone(); tokio::spawn(async move { // Step 1: Offline prefetch (reads local identity files) - let identities_dir = std::path::Path::new(&*crate::core::config::OUTPUT_DIR).join("identities"); + let identities_dir = + std::path::Path::new(&*crate::core::config::OUTPUT_DIR).join("identities"); let index_path = identities_dir.join("_index.json"); - let cache_path = format!("{}/{}.tmdb.json", *crate::core::config::OUTPUT_DIR, auto_file_uuid); + let cache_path = format!( + "{}/{}.tmdb.json", + *crate::core::config::OUTPUT_DIR, + auto_file_uuid + ); let cache_file = std::path::Path::new(&cache_path); if index_path.exists() && cache_file.exists() { - tracing::info!("[AUTO-TMDB] Offline cache found for {}, running probe", auto_file_uuid); - if let Err(e) = crate::core::tmdb::probe::probe_from_cache(&auto_db, &auto_file_uuid).await { + tracing::info!( + "[AUTO-TMDB] Offline cache found for {}, running probe", + auto_file_uuid + ); + if let Err(e) = + crate::core::tmdb::probe::probe_from_cache(&auto_db, &auto_file_uuid).await + { tracing::warn!("[AUTO-TMDB] Probe failed for {}: {}", auto_file_uuid, e); } else { tracing::info!("[AUTO-TMDB] Probe completed for {}", auto_file_uuid); } } else { - tracing::info!("[AUTO-TMDB] No offline cache for {}, skipping", auto_file_uuid); + tracing::info!( + "[AUTO-TMDB] No offline cache for {}, skipping", + auto_file_uuid + ); } }); } @@ -1760,16 +1971,20 @@ async fn register_file( let resp = register_single_file(&state, &file_path, req.user_id, req.content_hash).await; // Auto-trigger pipeline for newly registered video files - if resp.success && !resp.already_exists && resp.file_type.as_deref() == Some("video") - && crate::core::config::get_auto_pipeline_enabled() { + if resp.success + && !resp.already_exists + && resp.file_type.as_deref() == Some("video") + && crate::core::config::get_auto_pipeline_enabled() + { let auto_uuid = resp.file_uuid.clone(); let auto_state = state.clone(); tokio::spawn(async move { // Brief delay to let DB settle, then trigger processing tokio::time::sleep(std::time::Duration::from_secs(2)).await; - let video_path: Option = sqlx::query_scalar( - &format!("SELECT file_path FROM {} WHERE file_uuid = $1", schema::table_name("videos")) - ) + let video_path: Option = sqlx::query_scalar(&format!( + "SELECT file_path FROM {} WHERE file_uuid = $1", + schema::table_name("videos") + )) .bind(&auto_uuid) .fetch_optional(auto_state.db.pool()) .await @@ -1780,19 +1995,36 @@ async fn register_file( if let Ok(job) = auto_state.db.create_monitor_job(&auto_uuid, Some(vp)).await { tracing::info!("[AUTO-PIPELINE] Job {} created for {}", job.id, auto_uuid); // Initialize processing status with all processors - let all_procs: Vec<&str> = vec!["asr","cut","yolo","ocr","face","pose","asrx","visual_chunk","5w1h"]; - let total = sqlx::query_scalar::<_, i64>( - &format!("SELECT COALESCE(total_frames, 0) FROM {} WHERE file_uuid = $1", schema::table_name("videos")) - ) + let all_procs: Vec<&str> = vec![ + "asr", + "cut", + "yolo", + "ocr", + "face", + "pose", + "asrx", + "visual_chunk", + "5w1h", + ]; + let total = sqlx::query_scalar::<_, i64>(&format!( + "SELECT COALESCE(total_frames, 0) FROM {} WHERE file_uuid = $1", + schema::table_name("videos") + )) .bind(&auto_uuid) .fetch_one(auto_state.db.pool()) .await .unwrap_or(0); - let _ = auto_state.db.init_processing_status(&auto_uuid, all_procs, total as u64).await; - let _ = sqlx::query(&format!("UPDATE {} SET status = 'processing' WHERE file_uuid = $1", schema::table_name("videos"))) - .bind(&auto_uuid) - .execute(auto_state.db.pool()) + let _ = auto_state + .db + .init_processing_status(&auto_uuid, all_procs, total as u64) .await; + let _ = sqlx::query(&format!( + "UPDATE {} SET status = 'processing' WHERE file_uuid = $1", + schema::table_name("videos") + )) + .bind(&auto_uuid) + .execute(auto_state.db.pool()) + .await; tracing::info!("[AUTO-PIPELINE] Pipeline triggered for {}", auto_uuid); } } @@ -1855,7 +2087,9 @@ async fn probe_by_uuid( tracing::error!("File not found at path: {}", path); return Err(( StatusCode::NOT_FOUND, - Json(serde_json::json!({"error": "File does not exist at registered path", "file_uuid": file_uuid, "file_path": path})), + Json( + serde_json::json!({"error": "File does not exist at registered path", "file_uuid": file_uuid, "file_path": path}), + ), )); } @@ -2411,7 +2645,8 @@ async fn search( let page = req.page.unwrap_or(1).max(1); let page_size = req.page_size.or(req.limit).unwrap_or(total.max(1)); let start = (page - 1) * page_size; - let paged_results: Vec = results.into_iter().skip(start).take(page_size).collect(); + let paged_results: Vec = + results.into_iter().skip(start).take(page_size).collect(); Ok::(SearchResponse { results: paged_results, query: req.query.clone(), @@ -2460,7 +2695,8 @@ async fn search_bm25( let page = req.page.unwrap_or(1).max(1); let page_size = req.page_size.or(req.limit).unwrap_or(total.max(1)); let start = (page - 1) * page_size; - let paged_results: Vec = results.into_iter().skip(start).take(page_size).collect(); + let paged_results: Vec = + results.into_iter().skip(start).take(page_size).collect(); Ok(Json(SearchResponse { results: paged_results, query: req.query.clone(), @@ -2513,7 +2749,8 @@ async fn search_smart( let page = req.page.unwrap_or(1).max(1); let page_size = req.page_size.or(req.limit).unwrap_or(total.max(1)); let start = (page - 1) * page_size; - let paged_results: Vec = results.into_iter().skip(start).take(page_size).collect(); + let paged_results: Vec = + results.into_iter().skip(start).take(page_size).collect(); Ok::(SearchResponse { results: paged_results, query: req.query.clone(), @@ -2522,7 +2759,7 @@ async fn search_smart( page_size, limit: req.limit.unwrap_or(10), }) - }) // end smart get_or_fetch + }) // end smart get_or_fetch .await .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; @@ -2585,7 +2822,11 @@ async fn hybrid_search( let page_size = req.page_size.or(req.limit).unwrap_or(total.max(1)); let start = (page - 1) * page_size; let search_results = dedup_hybrid_results(search_results); - let paged: Vec = search_results.into_iter().skip(start).take(page_size).collect(); + let paged: Vec = search_results + .into_iter() + .skip(start) + .take(page_size) + .collect(); Ok::(HybridSearchResponse { results: paged, query: req.query.clone(), @@ -2679,7 +2920,10 @@ fn scan_directory_recursive( dir: &std::path::Path, root: &std::path::Path, allowed_extensions: &[&str], - registered_paths: &std::collections::HashMap, Option)>, + registered_paths: &std::collections::HashMap< + String, + (String, String, Option, Option), + >, files: &mut Vec, ) { if let Ok(entries) = std::fs::read_dir(dir) { @@ -2775,30 +3019,35 @@ async fn scan_files( .unwrap_or_else(|_| "/Users/accusys/momentry/var/sftpgo/data/demo".to_string()); let demo_dir = std::path::Path::new(&demo_dir_str); - let allowed_extensions = vec!["mp4", "mov", "mkv", "avi", "webm", "jpg", "jpeg", "png", "gif", "webp"]; + let allowed_extensions = vec![ + "mp4", "mov", "mkv", "avi", "webm", "jpg", "jpeg", "png", "gif", "webp", + ]; // 1. Get registered files from DB (Map key: absolute file_path) let table = schema::table_name("videos"); let mj_table = schema::table_name("monitor_jobs"); - let registered_db: Vec<(String, String, String, String, Option, Option)> = sqlx::query_as(&format!( - "SELECT v.file_path, v.file_name, v.file_uuid, v.status, v.registration_time::text, \ + let registered_db: Vec<(String, String, String, String, Option, Option)> = + sqlx::query_as(&format!( + "SELECT v.file_path, v.file_name, v.file_uuid, v.status, v.registration_time::text, \ latest_job.id as job_id \ FROM {} v \ LEFT JOIN LATERAL ( \ SELECT id FROM {} WHERE uuid = v.file_uuid ORDER BY id DESC LIMIT 1 \ ) latest_job ON true \ ORDER BY v.id", - table, mj_table - )) - .fetch_all(state.db.pool()) - .await - .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + table, mj_table + )) + .fetch_all(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; - let registered_paths: std::collections::HashMap, Option)> = - registered_db - .into_iter() - .map(|(path, _name, uuid, status, reg_time, jid)| (path, (uuid, status, reg_time, jid))) - .collect(); + let registered_paths: std::collections::HashMap< + String, + (String, String, Option, Option), + > = registered_db + .into_iter() + .map(|(path, _name, uuid, status, reg_time, jid)| (path, (uuid, status, reg_time, jid))) + .collect(); // 2. Scan filesystem recursively let mut result_files = Vec::new(); @@ -2817,20 +3066,43 @@ async fn scan_files( let desc = params.sort_order.as_deref().unwrap_or("asc") == "desc"; match params.sort_by.as_deref().unwrap_or("name") { "size" => { - if desc { result_files.sort_by(|a, b| b.file_size.cmp(&a.file_size)); } - else { result_files.sort_by(|a, b| a.file_size.cmp(&b.file_size)); } + if desc { + result_files.sort_by(|a, b| b.file_size.cmp(&a.file_size)); + } else { + result_files.sort_by(|a, b| a.file_size.cmp(&b.file_size)); + } } "modified" | "time" => { - if desc { result_files.sort_by(|a, b| b.modified_time.cmp(&a.modified_time)); } - else { result_files.sort_by(|a, b| a.modified_time.cmp(&b.modified_time)); } + if desc { + result_files.sort_by(|a, b| b.modified_time.cmp(&a.modified_time)); + } else { + result_files.sort_by(|a, b| a.modified_time.cmp(&b.modified_time)); + } } "status" => { - if desc { result_files.sort_by(|a, b| b.status.cmp(&a.status).then(b.file_name.cmp(&a.file_name))); } - else { result_files.sort_by(|a, b| a.status.cmp(&b.status).then(a.file_name.cmp(&b.file_name))); } + if desc { + result_files + .sort_by(|a, b| b.status.cmp(&a.status).then(b.file_name.cmp(&a.file_name))); + } else { + result_files + .sort_by(|a, b| a.status.cmp(&b.status).then(a.file_name.cmp(&b.file_name))); + } } - _ => { // "name" (default): registered first, then by name - if desc { result_files.sort_by(|a, b| a.is_registered.cmp(&b.is_registered).then(b.file_name.cmp(&a.file_name))); } - else { result_files.sort_by(|a, b| b.is_registered.cmp(&a.is_registered).then(a.file_name.cmp(&b.file_name))); } + _ => { + // "name" (default): registered first, then by name + if desc { + result_files.sort_by(|a, b| { + a.is_registered + .cmp(&b.is_registered) + .then(b.file_name.cmp(&a.file_name)) + }); + } else { + result_files.sort_by(|a, b| { + b.is_registered + .cmp(&a.is_registered) + .then(a.file_name.cmp(&b.file_name)) + }); + } } } @@ -2844,7 +3116,10 @@ async fn scan_files( Ok(r) => r, Err(_) => return Err(StatusCode::BAD_REQUEST), }; - result_files.into_iter().filter(|f| re.is_match(&f.file_name)).collect() + result_files + .into_iter() + .filter(|f| re.is_match(&f.file_name)) + .collect() } else { result_files }; @@ -2853,19 +3128,38 @@ async fn scan_files( // 5. Pagination let page = params.page.unwrap_or(1).max(1); - let page_size = params.page_size.or(params.limit).unwrap_or(filtered_total.max(1)); - let total_pages = if page_size > 0 { (filtered_total + page_size - 1) / page_size } else { 1 }; + let page_size = params + .page_size + .or(params.limit) + .unwrap_or(filtered_total.max(1)); + let total_pages = if page_size > 0 { + (filtered_total + page_size - 1) / page_size + } else { + 1 + }; let start = (page - 1) * page_size; let files: Vec = filtered.into_iter().skip(start).take(page_size).collect(); let table_videos = schema::table_name("videos"); let table_chunks = schema::table_name("chunk"); let total_chunks: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", table_chunks)) - .fetch_one(state.db.pool()).await.unwrap_or(0); - let searchable_chunks: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE vector_id IS NOT NULL", table_chunks)) - .fetch_one(state.db.pool()).await.unwrap_or(0); - let pending_videos: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE status = 'pending'", table_videos)) - .fetch_one(state.db.pool()).await.unwrap_or(0); + .fetch_one(state.db.pool()) + .await + .unwrap_or(0); + let searchable_chunks: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE vector_id IS NOT NULL", + table_chunks + )) + .fetch_one(state.db.pool()) + .await + .unwrap_or(0); + let pending_videos: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE status = 'pending'", + table_videos + )) + .fetch_one(state.db.pool()) + .await + .unwrap_or(0); Ok(Json(ScanFilesResponse { files, @@ -2989,7 +3283,17 @@ async fn get_progress( .await .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; - let processor_names = ["asr", "cut", "asrx", "yolo", "ocr", "face", "pose", "visual_chunk", "story"]; + let processor_names = [ + "asr", + "cut", + "asrx", + "yolo", + "ocr", + "face", + "pose", + "visual_chunk", + "story", + ]; let mut processors = Vec::new(); let mut completed_count = 0u32; @@ -3068,12 +3372,21 @@ async fn get_progress( .unwrap_or_else(|_| String::new()); if !started_str.is_empty() { if let Ok(started_at) = chrono::DateTime::parse_from_rfc3339(&started_str) { - let elapsed = chrono::Utc::now().signed_duration_since(started_at).num_seconds().max(1); + let elapsed = chrono::Utc::now() + .signed_duration_since(started_at) + .num_seconds() + .max(1); let estimated_total = (elapsed as f64 * total as f64 / current as f64) as i64; Some((estimated_total - elapsed).max(0)) - } else { None } - } else { None } - } else { None }; + } else { + None + } + } else { + None + } + } else { + None + }; if status == "complete" { completed_count += 1; @@ -3096,8 +3409,14 @@ async fn get_progress( // Supplement with actual processor_results from DB (overrides stale Redis data) let pr_table = schema::table_name("processor_results"); let vt = schema::table_name("videos"); - let total_frames: i64 = sqlx::query_scalar(&format!("SELECT COALESCE(total_frames, 0) FROM {} WHERE file_uuid = $1", vt)) - .bind(&file_uuid).fetch_one(pg.pool()).await.unwrap_or(0); + let total_frames: i64 = sqlx::query_scalar(&format!( + "SELECT COALESCE(total_frames, 0) FROM {} WHERE file_uuid = $1", + vt + )) + .bind(&file_uuid) + .fetch_one(pg.pool()) + .await + .unwrap_or(0); if let Ok(rows) = sqlx::query_as::<_, (String, String, i32, i32)>( &format!( "SELECT pr.status, pr.processor_type, COALESCE(pr.frames_processed, 0), COALESCE(pr.chunks_produced, 0) \ @@ -3418,7 +3737,10 @@ async fn auto_pipeline_toggle( Ok(Json(AutoPipelineToggleResponse { success: true, auto_pipeline_enabled: req.enabled, - message: format!("Auto-pipeline {}", if req.enabled { "enabled" } else { "disabled" }), + message: format!( + "Auto-pipeline {}", + if req.enabled { "enabled" } else { "disabled" } + ), })) } @@ -3430,7 +3752,10 @@ async fn watcher_auto_register_toggle( Ok(Json(WatcherAutoRegisterToggleResponse { success: true, watcher_auto_register_enabled: req.enabled, - message: format!("Watcher auto-register {}", if req.enabled { "enabled" } else { "disabled" }), + message: format!( + "Watcher auto-register {}", + if req.enabled { "enabled" } else { "disabled" } + ), })) } @@ -3525,10 +3850,10 @@ async fn unregister( } tracing::info!("[unregister] Unregistering file: {}", uuid); - + // Check if video exists first match db.get_video_by_uuid(uuid).await { - Ok(Some(_)) => {}, + Ok(Some(_)) => {} Ok(None) => { return Ok(Json(UnregisterResponse { success: false, @@ -3540,7 +3865,7 @@ async fn unregister( return Err(StatusCode::INTERNAL_SERVER_ERROR); } } - + match db.delete_video(uuid).await { Ok(_) => { let _ = state.mongo_cache.invalidate_videos_list().await; @@ -3569,8 +3894,10 @@ async fn doc_redirect() -> axum::response::Redirect { axum::response::Redirect::to("/doc-wasm") } -async fn wasm_doc_handler() -> Result { - let path = std::path::Path::new("/Users/accusys/momentry_core_0.1/docs_v1.0/doc_wasm/index.html"); +async fn wasm_doc_handler() -> Result +{ + let path = + std::path::Path::new("/Users/accusys/momentry_core_0.1/docs_v1.0/doc_wasm/index.html"); match tokio::fs::read_to_string(path).await { Ok(html) => Ok(([("content-type", "text/html; charset=utf-8")], html)), Err(_) => Err((StatusCode::NOT_FOUND, "Doc not found")), @@ -3588,7 +3915,9 @@ async fn wasm_doc_file_handler( if !path.exists() || !path.starts_with(base) { return Err((StatusCode::NOT_FOUND, "File not found")); } - let data = tokio::fs::read(&path).await.map_err(|_| (StatusCode::NOT_FOUND, "Read error"))?; + let data = tokio::fs::read(&path) + .await + .map_err(|_| (StatusCode::NOT_FOUND, "Read error"))?; let mime = if file.ends_with(".wasm") { "application/wasm" } else if file.ends_with(".js") { @@ -3639,20 +3968,19 @@ async fn serve_doc( }; if !authorized { - let login_html = tokio::fs::read_to_string(&base_dir.join("login.html")).await - .unwrap_or_else(|_| "

Login

Please login at /api/v1/auth/login

".to_string()); - return Ok(( - [("content-type", "text/html; charset=utf-8")], - login_html, - )); + let login_html = tokio::fs::read_to_string(&base_dir.join("login.html")) + .await + .unwrap_or_else(|_| { + "

Login

Please login at /api/v1/auth/login

" + .to_string() + }); + return Ok(([("content-type", "text/html; charset=utf-8")], login_html)); } - let index_html = tokio::fs::read_to_string(&base_dir.join("index.html")).await + let index_html = tokio::fs::read_to_string(&base_dir.join("index.html")) + .await .unwrap_or_else(|_| "

Docs not found

".to_string()); - Ok(( - [("content-type", "text/html; charset=utf-8")], - index_html, - )) + Ok(([("content-type", "text/html; charset=utf-8")], index_html)) } async fn serve_doc_file( @@ -3669,12 +3997,10 @@ async fn serve_doc_file( }; if !authorized { - let login_html = tokio::fs::read_to_string(&base_dir.join("login.html")).await + let login_html = tokio::fs::read_to_string(&base_dir.join("login.html")) + .await .unwrap_or_else(|_| "

Login

".to_string()); - return Ok(( - [("content-type", "text/html; charset=utf-8")], - login_html, - )); + return Ok(([("content-type", "text/html; charset=utf-8")], login_html)); } // Sanitize: only allow .html files, no path traversal @@ -3685,29 +4011,33 @@ async fn serve_doc_file( )); } - let html = tokio::fs::read_to_string(&base_dir.join(file)).await + let html = tokio::fs::read_to_string(&base_dir.join(file)) + .await .unwrap_or_else(|_| "

Page not found

".to_string()); - Ok(( - [("content-type", "text/html; charset=utf-8")], - html, - )) + Ok(([("content-type", "text/html; charset=utf-8")], html)) } async fn check_doc_auth(state: &AppState, headers: &axum::http::HeaderMap) -> bool { use crate::api::middleware::extract_cookies; let cookies = extract_cookies(headers); - let sid = cookies.iter().find(|(k, _)| k == "session_id").map(|(_, v)| v.clone()); + let sid = cookies + .iter() + .find(|(k, _)| k == "session_id") + .map(|(_, v)| v.clone()); if let Some(ref session_id) = sid { let table = crate::core::db::schema::table_name("sessions"); - sqlx::query_scalar::<_, i32>( - &format!("SELECT 1 FROM {} WHERE session_id = $1 AND expires_at > NOW()", table) - ) + sqlx::query_scalar::<_, i32>(&format!( + "SELECT 1 FROM {} WHERE session_id = $1 AND expires_at > NOW()", + table + )) .bind(session_id) .fetch_optional(state.db.pool()) .await .map(|r| r.is_some()) .unwrap_or(false) - } else { false } + } else { + false + } } pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> { @@ -3716,10 +4046,15 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> { let resolved_ip = if host == "0.0.0.0" { // Try to find a non-loopback IP if let Ok(addrs) = std::net::ToSocketAddrs::to_socket_addrs(&"localhost:0") { - if let Some(addr) = addrs.filter_map(|a| match a { - std::net::SocketAddr::V4(v4) if !v4.ip().is_loopback() => Some(v4.ip().to_string()), - _ => None, - }).next() { + if let Some(addr) = addrs + .filter_map(|a| match a { + std::net::SocketAddr::V4(v4) if !v4.ip().is_loopback() => { + Some(v4.ip().to_string()) + } + _ => None, + }) + .next() + { addr } else { // Fallback: try getting IP from UDP socket @@ -3793,15 +4128,23 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> { .route("/api/v1/unregister", post(unregister)) .route("/api/v1/files/scan", get(scan_files)) .route("/api/v1/file/:file_uuid/probe", get(probe_by_uuid)) - .route("/api/v1/file/:file_uuid/json/:processor", get(download_json)) + .route( + "/api/v1/file/:file_uuid/json/:processor", + get(download_json), + ) .route("/api/v1/file/:file_uuid/process", post(trigger_processing)) - .route("/api/v1/file/:file_uuid/chunk/:chunk_id", get(get_chunk_by_path)) - + .route( + "/api/v1/file/:file_uuid/chunk/:chunk_id", + get(get_chunk_by_path), + ) .route("/api/v1/progress/:file_uuid", get(get_progress)) .route("/api/v1/jobs", get(list_jobs)) .route("/api/v1/config/cache", post(cache_toggle)) .route("/api/v1/config/auto-pipeline", post(auto_pipeline_toggle)) - .route("/api/v1/config/watcher-auto-register", post(watcher_auto_register_toggle)) + .route( + "/api/v1/config/watcher-auto-register", + post(watcher_auto_register_toggle), + ) // .merge(person_identity::person_identity_routes()) // V4.0: DISABLED (person_identities table removed) .merge(identity_binding::identity_binding_routes()) .merge(identities::identity_routes()) @@ -3837,8 +4180,10 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> { .route("/api/v1/auth/login", post(login)) .route("/api/v1/auth/logout", post(logout)) .route("/api/v1/stats/sftpgo", get(get_sftpgo_status)) - .route("/api/v1/stats/ingestion-status/:file_uuid", get(get_ingestion_status)) - + .route( + "/api/v1/stats/ingestion-status/:file_uuid", + get(get_ingestion_status), + ) .route("/api/v1/search/visual", post(search_visual_chunks)) .route( "/api/v1/search/visual/class", @@ -3949,9 +4294,11 @@ async fn get_ingestion_status( let fd = schema::table_name("face_detections"); let identities = schema::table_name("identities"); - let scene_meta_path = format!("{}/{}.scene_meta.json", + let scene_meta_path = format!( + "{}/{}.scene_meta.json", crate::core::config::OUTPUT_DIR.as_str(), - file_uuid); + file_uuid + ); let scene_meta_ok = std::path::Path::new(&scene_meta_path).exists(); macro_rules! count_sql { @@ -3963,31 +4310,53 @@ async fn get_ingestion_status( }; } - let sentence_count = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'sentence'")); + let sentence_count = count_sql!(&format!( + "SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'sentence'" + )); let sentence_embedded = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'sentence' AND embedding IS NOT NULL")); - let scene_count = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'cut'")); - let face_total = count_sql!(&format!("SELECT COUNT(*) FROM {fd} WHERE file_uuid = '{file_uuid}'")); + let scene_count = count_sql!(&format!( + "SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'cut'" + )); + let face_total = count_sql!(&format!( + "SELECT COUNT(*) FROM {fd} WHERE file_uuid = '{file_uuid}'" + )); let trace_count = count_sql!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd} WHERE file_uuid = '{file_uuid}' AND trace_id IS NOT NULL")); - let trace_chunks = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'trace'")); + let trace_chunks = count_sql!(&format!( + "SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'trace'" + )); let identity_count = count_sql!(&format!("SELECT COUNT(DISTINCT identity_id) FROM {fd} WHERE file_uuid = '{file_uuid}' AND identity_id IS NOT NULL")); - let tkg_nodes = count_sql!(&format!("SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'", schema::table_name("tkg_nodes"))); - let tkg_edges = count_sql!(&format!("SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'", schema::table_name("tkg_edges"))); + let tkg_nodes = count_sql!(&format!( + "SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'", + schema::table_name("tkg_nodes") + )); + let tkg_edges = count_sql!(&format!( + "SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'", + schema::table_name("tkg_edges") + )); let scene_5w1h = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'cut' AND summary_text IS NOT NULL AND summary_text != ''")); - let related_identities: Vec = match sqlx::query_as::<_, (String, String)>(&format!( - "SELECT DISTINCT i.uuid::text, i.name FROM {identities} i \ + let related_identities: Vec = + match sqlx::query_as::<_, (String, String)>(&format!( + "SELECT DISTINCT i.uuid::text, i.name FROM {identities} i \ JOIN {fd} fd ON fd.identity_id = i.id \ WHERE fd.file_uuid = '{file_uuid}' AND fd.identity_id IS NOT NULL \ ORDER BY i.name" - )).fetch_all(pool).await { - Ok(rows) => rows.into_iter().map(|(uuid, name)| { - IdentityRef { uuid: uuid.replace('-', ""), name } - }).collect(), - Err(e) => { - tracing::error!("related_identities query failed: {}", e); - vec![] - } - }; + )) + .fetch_all(pool) + .await + { + Ok(rows) => rows + .into_iter() + .map(|(uuid, name)| IdentityRef { + uuid: uuid.replace('-', ""), + name, + }) + .collect(), + Err(e) => { + tracing::error!("related_identities query failed: {}", e); + vec![] + } + }; let strangers = count_sql!(&format!( "SELECT COUNT(DISTINCT trace_id) FROM {fd} \ @@ -4005,18 +4374,55 @@ async fn get_ingestion_status( } let steps = vec![ - step!("rule1_sentence", sentence_count > 0, Some(format!("{sentence_count} sentence chunks"))), - step!("auto_vectorize", sentence_embedded > 0, Some(format!("{sentence_embedded} embedded"))), - step!("rule3_scene", scene_count > 0, Some(format!("{scene_count} scene chunks"))), - step!("face_trace", trace_count > 0, Some(format!("{trace_count} traces / {face_total} detections"))), - step!("trace_chunks", trace_chunks > 0, Some(format!("{trace_chunks} trace chunks"))), - step!("tkg", tkg_nodes > 0 || tkg_edges > 0, Some(format!("{tkg_nodes} nodes, {tkg_edges} edges"))), - step!("identity_match", identity_count > 0, Some(format!("{identity_count} identities matched"))), + step!( + "rule1_sentence", + sentence_count > 0, + Some(format!("{sentence_count} sentence chunks")) + ), + step!( + "auto_vectorize", + sentence_embedded > 0, + Some(format!("{sentence_embedded} embedded")) + ), + step!( + "rule3_scene", + scene_count > 0, + Some(format!("{scene_count} scene chunks")) + ), + step!( + "face_trace", + trace_count > 0, + Some(format!("{trace_count} traces / {face_total} detections")) + ), + step!( + "trace_chunks", + trace_chunks > 0, + Some(format!("{trace_chunks} trace chunks")) + ), + step!( + "tkg", + tkg_nodes > 0 || tkg_edges > 0, + Some(format!("{tkg_nodes} nodes, {tkg_edges} edges")) + ), + step!( + "identity_match", + identity_count > 0, + Some(format!("{identity_count} identities matched")) + ), step!("scene_metadata", scene_meta_ok, None), - step!("5w1h", scene_5w1h > 0, Some(format!("{scene_5w1h} scenes with 5W1H"))), + step!( + "5w1h", + scene_5w1h > 0, + Some(format!("{scene_5w1h} scenes with 5W1H")) + ), ]; - Ok(Json(IngestionStatusResponse { file_uuid, steps, related_identities, strangers })) + Ok(Json(IngestionStatusResponse { + file_uuid, + steps, + related_identities, + strangers, + })) } #[derive(Debug, Deserialize)] @@ -4269,9 +4675,21 @@ async fn video_details( if let Some(chunk_id) = query.chunk_id { let row: Option<( - i32, String, String, String, f64, i64, i64, - Option, serde_json::Value, Option, - Option, i32, Option, Option, Option, + i32, + String, + String, + String, + f64, + i64, + i64, + Option, + serde_json::Value, + Option, + Option, + i32, + Option, + Option, + Option, )> = sqlx::query_as(&format!( "SELECT file_id, uuid, chunk_id, chunk_type::text, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, diff --git a/src/api/tmdb_api.rs b/src/api/tmdb_api.rs index f876344..b32466c 100644 --- a/src/api/tmdb_api.rs +++ b/src/api/tmdb_api.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::api::server::AppState; use crate::core::config; -use crate::core::db::PostgresDb; +use crate::core::db::{PostgresDb, QdrantDb}; use crate::core::tmdb; #[derive(Debug, Serialize)] @@ -64,10 +64,44 @@ struct FileUuidParam { file_uuid: String, } +#[derive(Debug, Deserialize)] +struct TmdbFetchRequest { + file_uuid: String, +} + +#[derive(Debug, Serialize)] +struct TmdbFetchMemberResult { + name: String, + character: Option, + aliases: Vec, + metadata: serde_json::Value, + status: String, + has_json: bool, + has_jpg: bool, + error: Option, +} + +#[derive(Debug, Serialize)] +struct TmdbFetchResponse { + success: bool, + movie_title: Option, + tmdb_id: Option, + results: Vec, + summary: serde_json::Value, +} + pub fn tmdb_routes() -> Router { Router::new() .route("/api/v1/agents/tmdb/prefetch", post(tmdb_prefetch)) - .route("/api/v1/file/:file_uuid/tmdb-probe", post(tmdb_probe_handler)) + .route( + "/api/v1/file/:file_uuid/tmdb-probe", + post(tmdb_probe_handler), + ) + .route("/api/v1/tmdb/fetch", post(tmdb_fetch)) + .route( + "/api/v1/agents/tmdb/match/:file_uuid", + post(tmdb_match_handler), + ) .route("/api/v1/resource/tmdb", get(tmdb_resource_status)) .route("/api/v1/resource/tmdb/check", post(tmdb_resource_check)) } @@ -79,9 +113,10 @@ async fn tmdb_prefetch( let file_uuid = req.file_uuid; // Verify file exists in DB - let file_exists: bool = sqlx::query_scalar( - &format!("SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", crate::core::db::schema::table_name("videos")) - ) + let file_exists: bool = sqlx::query_scalar(&format!( + "SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", + crate::core::db::schema::table_name("videos") + )) .bind(&file_uuid) .fetch_one(state.db.pool()) .await @@ -182,18 +217,22 @@ async fn tmdb_probe_handler( let file_uuid = params.file_uuid; // Verify file exists - let file_exists: bool = sqlx::query_scalar( - &format!("SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", crate::core::db::schema::table_name("videos")) - ) + let file_exists: bool = sqlx::query_scalar(&format!( + "SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", + crate::core::db::schema::table_name("videos") + )) .bind(&file_uuid) .fetch_one(state.db.pool()) .await .unwrap_or(false); if !file_exists { - return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({ - "error": "Video not found", "file_uuid": file_uuid - })))); + return Err(( + StatusCode::NOT_FOUND, + Json(serde_json::json!({ + "error": "Video not found", "file_uuid": file_uuid + })), + )); } match tmdb::probe::probe_from_cache(&state.db, &file_uuid).await { @@ -214,7 +253,10 @@ async fn tmdb_probe_handler( .await { for uuid in rows { - let _ = crate::core::identity::storage::save_identity_file_by_pool(&pool, &uuid).await; + let _ = crate::core::identity::storage::save_identity_file_by_pool( + &pool, &uuid, + ) + .await; } } }); @@ -245,24 +287,26 @@ async fn tmdb_probe_handler( message: "No TMDb cache found. Run tmdb-prefetch first.".to_string(), })) } else { - Err((StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ - "error": msg, "file_uuid": file_uuid - })))) + Err(( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({ + "error": msg, "file_uuid": file_uuid + })), + )) } } } } -async fn tmdb_resource_status( - State(state): State, -) -> Json { +async fn tmdb_resource_status(State(state): State) -> Json { let status = tmdb::status::quick_status(); let identities_seeded = tmdb::status::count_tmdb_identities(state.db.pool()) .await .unwrap_or(0); - let identities_with_embedding = tmdb::status::count_tmdb_identities_with_embedding(state.db.pool()) - .await - .unwrap_or(0); + let identities_with_embedding = + tmdb::status::count_tmdb_identities_with_embedding(state.db.pool()) + .await + .unwrap_or(0); let cache_files = tmdb::status::count_cache_files(); Json(TmdbResourceResponse { @@ -303,3 +347,383 @@ async fn tmdb_resource_check() -> Json { status, }) } + +async fn tmdb_fetch( + State(state): State, + Json(req): Json, +) -> Result, (StatusCode, Json)> { + let file_uuid = req.file_uuid; + + let filename: Option = sqlx::query_scalar(&format!( + "SELECT file_name FROM {} WHERE file_uuid = $1", + crate::core::db::schema::table_name("videos") + )) + .bind(&file_uuid) + .fetch_optional(state.db.pool()) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"error": e.to_string()})), + ) + })? + .flatten(); + + let filename = filename.ok_or_else(|| { + ( + StatusCode::NOT_FOUND, + Json(serde_json::json!({"error": "File not found"})), + ) + })?; + + // Run probe to create identities + match tmdb::probe::probe_movie(&state.db, &filename, &file_uuid).await { + Ok(Some(probe_result)) => { + let mut member_results = Vec::new(); + + // Read the cache to get cast list with names and profile URLs + if let Ok(cache) = tmdb::cache::read_tmdb_cache(&file_uuid) { + for member in &cache.cast { + let name = member.name.clone(); + let character = if member.character.is_empty() { + None + } else { + Some(member.character.clone()) + }; + let aliases = member.also_known_as.clone(); + let profile_url = member + .profile_path + .as_ref() + .map(|p| format!("https://image.tmdb.org/t/p/w185{}", p)); + + let metadata = serde_json::json!({ + "tmdb_id": member.id, + "name": member.name, + "character": member.character, + "aliases": member.also_known_as, + "profile_path": member.profile_path, + "order": member.order, + "biography": member.biography, + "birthday": member.birthday, + "place_of_birth": member.place_of_birth, + "imdb_id": member.imdb_id, + "known_for_department": member.known_for_department, + "popularity": member.popularity, + "deathday": member.deathday, + "gender": member.gender, + "homepage": member.homepage, + }); + + let identity_row = sqlx::query_as::<_, (i32, uuid::Uuid)>(&format!( + "SELECT id, uuid FROM {} WHERE name = $1 AND source = 'tmdb' LIMIT 1", + crate::core::db::schema::table_name("identities") + )) + .bind(&name) + .fetch_optional(state.db.pool()) + .await; + + match identity_row { + Ok(Some((identity_id, uuid))) => { + let clean = uuid.to_string().replace('-', ""); + let dir = crate::core::identity::storage::identity_dir(&clean); + std::fs::create_dir_all(&dir).ok(); + + let json_result = crate::core::identity::storage::save_identity_file( + &state.db, &clean, + ) + .await; + let has_json = json_result.is_ok(); + + let has_jpg = if let Some(url) = &profile_url { + let jpg_path = dir.join("profile.jpg"); + if jpg_path.exists() { + true + } else if let Ok(resp) = reqwest::get(url).await { + if let Ok(bytes) = resp.bytes().await { + std::fs::write(&jpg_path, &bytes).is_ok() + } else { + false + } + } else { + false + } + } else { + false + }; + + // Push face_embedding to Qdrant if available + let face_collection = format!( + "{}_faces", + crate::core::config::REDIS_KEY_PREFIX + .as_str() + .trim_end_matches(':') + ); + let emb_row: Option<(Vec,)> = sqlx::query_as( + &format!( + "SELECT face_embedding::real[] FROM {} WHERE uuid = $1 AND face_embedding IS NOT NULL", + crate::core::db::schema::table_name("identities") + ) + ) + .bind(&uuid) + .fetch_optional(state.db.pool()) + .await + .unwrap_or(None); + + if let Some((embedding,)) = emb_row { + let qdrant = QdrantDb::new(); + qdrant.ensure_collection(&face_collection, 512).await.ok(); + let _ = qdrant + .upsert_vector_to_collection( + &face_collection, + identity_id as u64, + &embedding, + Some(serde_json::json!({ + "identity_id": identity_id, + "name": name, + "source": "tmdb", + })), + ) + .await; + } + + let status = if has_json && has_jpg { + "success" + } else { + "partial" + }; + let error = if !has_json { + Some(format!("{:?}", json_result.err())) + } else if !has_jpg { + Some("profile download failed".to_string()) + } else { + None + }; + + member_results.push(TmdbFetchMemberResult { + name: name.clone(), + character: character.clone(), + aliases: aliases.clone(), + metadata: metadata.clone(), + status: status.to_string(), + has_json, + has_jpg, + error, + }); + } + Ok(None) => { + member_results.push(TmdbFetchMemberResult { + name: name.clone(), + character: character.clone(), + aliases: aliases.clone(), + metadata: metadata.clone(), + status: "skipped".to_string(), + has_json: false, + has_jpg: false, + error: None, + }); + } + Err(e) => { + member_results.push(TmdbFetchMemberResult { + name: name.clone(), + character: character.clone(), + aliases: aliases.clone(), + metadata: metadata.clone(), + status: "error".to_string(), + has_json: false, + has_jpg: false, + error: Some(format!("DB error: {}", e)), + }); + } + } + } + } + + let total = member_results.len(); + let success_count = member_results + .iter() + .filter(|r| r.status == "success") + .count(); + let json_count = member_results.iter().filter(|r| r.has_json).count(); + let jpg_count = member_results.iter().filter(|r| r.has_jpg).count(); + + Ok(Json(TmdbFetchResponse { + success: true, + movie_title: Some(probe_result.title), + tmdb_id: Some(probe_result.tmdb_id), + results: member_results, + summary: serde_json::json!({ + "total": total, + "success": success_count, + "with_json": json_count, + "with_jpg": jpg_count, + }), + })) + } + Ok(None) => Err(( + StatusCode::NOT_FOUND, + Json(serde_json::json!({ + "error": "No movie found for this filename" + })), + )), + Err(e) => Err(( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({ + "error": e.to_string() + })), + )), + } +} + +#[derive(Debug, Serialize)] +struct TmdbMatchResponse { + success: bool, + file_uuid: String, + bindings_created: usize, + tmdb_identities_available: usize, + message: String, +} + +async fn tmdb_match_handler( + Path(params): Path, + State(state): State, +) -> Result, (StatusCode, Json)> { + let file_uuid = params.file_uuid; + + // Verify file exists + let file_exists: bool = sqlx::query_scalar(&format!( + "SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", + crate::core::db::schema::table_name("videos") + )) + .bind(&file_uuid) + .fetch_one(state.db.pool()) + .await + .unwrap_or(false); + + if !file_exists { + return Err(( + StatusCode::NOT_FOUND, + Json(serde_json::json!({ + "error": "Video not found", "file_uuid": file_uuid + })), + )); + } + + // Get all TMDb identities with face_embedding + let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec)>( + &format!( + "SELECT id, name, face_embedding::real[] FROM {} WHERE source='tmdb' AND face_embedding IS NOT NULL", + crate::core::db::schema::table_name("identities") + ) + ) + .fetch_all(state.db.pool()) + .await + .map_err(|e| { + (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()}))) + })?; + + if tmdb_rows.is_empty() { + return Ok(Json(TmdbMatchResponse { + success: true, + file_uuid, + bindings_created: 0, + tmdb_identities_available: 0, + message: "No TMDb identities with face embeddings".to_string(), + })); + } + + let face_collection = format!( + "{}_faces", + crate::core::config::REDIS_KEY_PREFIX + .as_str() + .trim_end_matches(':') + ); + + let qdrant = QdrantDb::new(); + let _ = qdrant.ensure_collection(&face_collection, 512).await; + + let trace_collection = format!( + "{}_traces", + crate::core::config::REDIS_KEY_PREFIX + .as_str() + .trim_end_matches(':') + ); + let _ = qdrant.ensure_collection(&trace_collection, 512).await; + + // Sync trace embeddings (idempotent) + if let Err(e) = crate::core::db::qdrant_db::sync_trace_embeddings(&file_uuid).await { + tracing::error!("[TKG-MATCH] Trace sync failed: {}", e); + } + + let mut total_bindings = 0usize; + + for (tmdb_id, tmdb_name, tmdb_embedding) in &tmdb_rows { + // Search Qdrant trace collection with this TMDb embedding + let results = match qdrant + .search_face_collection( + &trace_collection, + tmdb_embedding, + 100, + "source", + "tmdb", + Some(&file_uuid), + ) + .await + { + Ok(r) => r, + Err(e) => { + tracing::warn!("[TKG-MATCH] Qdrant search failed for {}: {}", tmdb_name, e); + continue; + } + }; + + // Filter results by threshold and file_uuid + let filtered: Vec<_> = results + .into_iter() + .filter(|(score, payload)| { + *score >= 0.50 + && payload.get("file_uuid").and_then(|v| v.as_str()) == Some(&file_uuid) + }) + .collect(); + + if filtered.is_empty() { + continue; + } + + // Bind matched traces directly + let mut bound_count = 0usize; + for (_score, payload) in &filtered { + if let Some(tid) = payload.get("trace_id").and_then(|v| v.as_i64()) { + let r = sqlx::query(&format!( + "UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", + crate::core::db::schema::table_name("face_detections") + )) + .bind(tmdb_id) + .bind(&file_uuid) + .bind(tid as i32) + .execute(state.db.pool()) + .await; + if let Ok(result) = r { + bound_count += result.rows_affected() as usize; + } + } + } + + if bound_count > 0 { + tracing::info!( + "[TKG-MATCH] {}: bound {} traces to TMDb identity {}", + tmdb_name, + bound_count, + tmdb_id + ); + } + total_bindings += bound_count; + } + + Ok(Json(TmdbMatchResponse { + success: true, + file_uuid, + bindings_created: total_bindings, + tmdb_identities_available: tmdb_rows.len(), + message: format!("{} traces matched to TMDb identities", total_bindings), + })) +} diff --git a/src/api/trace_agent_api.rs b/src/api/trace_agent_api.rs index 1449b73..c79e33d 100644 --- a/src/api/trace_agent_api.rs +++ b/src/api/trace_agent_api.rs @@ -11,10 +11,7 @@ use crate::core::db::PostgresDb; pub fn trace_agent_routes() -> Router { Router::new() - .route( - "/api/v1/file/:file_uuid/traces", - post(list_traces_sorted), - ) + .route("/api/v1/file/:file_uuid/traces", post(list_traces_sorted)) .route( "/api/v1/file/:file_uuid/trace/:trace_id/faces", get(list_trace_faces), @@ -78,14 +75,15 @@ async fn list_traces_sorted( _ => "start_frame ASC", }; - let fps: f64 = - sqlx::query_scalar(&format!("SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1", - crate::core::db::schema::table_name("videos"))) - .bind(&file_uuid) - .fetch_optional(state.db.pool()) - .await - .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))? - .unwrap_or(24.0); + let fps: f64 = sqlx::query_scalar(&format!( + "SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1", + crate::core::db::schema::table_name("videos") + )) + .bind(&file_uuid) + .fetch_optional(state.db.pool()) + .await + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))? + .unwrap_or(24.0); let query = format!( "SELECT tt.*, fd.id AS sample_face_id FROM ( @@ -113,17 +111,16 @@ async fn list_traces_sorted( crate::core::db::schema::table_name("face_detections"), ); - let rows: Vec<(i32, i64, i32, i32, f64, f64, Option)> = - sqlx::query_as(&query) - .bind(&file_uuid) - .bind(min_faces) - .bind(effective_limit) - .bind(db_offset) - .bind(min_confidence) - .bind(max_confidence) - .fetch_all(state.db.pool()) - .await - .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; + let rows: Vec<(i32, i64, i32, i32, f64, f64, Option)> = sqlx::query_as(&query) + .bind(&file_uuid) + .bind(min_faces) + .bind(effective_limit) + .bind(db_offset) + .bind(min_confidence) + .bind(max_confidence) + .fetch_all(state.db.pool()) + .await + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; let traces: Vec = rows .into_iter() @@ -220,19 +217,20 @@ async fn list_trace_faces( }; let interpolate = q.interpolate.unwrap_or(false); - let fps: f64 = - sqlx::query_scalar(&format!("SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1", - crate::core::db::schema::table_name("videos"))) - .bind(&file_uuid) - .fetch_optional(state.db.pool()) - .await - .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))? - .unwrap_or(24.0); + let fps: f64 = sqlx::query_scalar(&format!( + "SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1", + crate::core::db::schema::table_name("videos") + )) + .bind(&file_uuid) + .fetch_optional(state.db.pool()) + .await + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))? + .unwrap_or(24.0); - let total_detected: i64 = sqlx::query_scalar( - &format!("SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2", - crate::core::db::schema::table_name("face_detections")) - ) + let total_detected: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2", + crate::core::db::schema::table_name("face_detections") + )) .bind(&file_uuid) .bind(trace_id) .fetch_one(state.db.pool()) @@ -247,12 +245,12 @@ async fn list_trace_faces( Option, Option, f32, - )> = sqlx::query_as( - &format!("SELECT id, frame_number::int, x, y, width, height, confidence::float4 \ + )> = sqlx::query_as(&format!( + "SELECT id, frame_number::int, x, y, width, height, confidence::float4 \ FROM {} WHERE file_uuid = $1 AND trace_id = $2 \ ORDER BY frame_number ASC LIMIT $3 OFFSET $4", - crate::core::db::schema::table_name("face_detections")) - ) + crate::core::db::schema::table_name("face_detections") + )) .bind(&file_uuid) .bind(trace_id) .bind(limit) diff --git a/src/api/universal_search.rs b/src/api/universal_search.rs index 7869f08..860d17d 100644 --- a/src/api/universal_search.rs +++ b/src/api/universal_search.rs @@ -88,9 +88,9 @@ pub enum SearchResult { }, #[serde(rename = "person")] Person { - person_id: String, + identity_id: i32, + identity_uuid: String, name: Option, - speaker_id: Option, appearance_count: i32, score: f64, first_appearance_time: Option, @@ -168,7 +168,7 @@ pub async fn universal_search( results.retain(|r| match r { SearchResult::Chunk { chunk_id, .. } => seen_chunks.insert(chunk_id.clone()), SearchResult::Frame { frame_number, .. } => seen_frames.insert(*frame_number), - SearchResult::Person { person_id, .. } => seen_persons.insert(person_id.clone()), + SearchResult::Person { identity_id, .. } => seen_persons.insert(*identity_id), }); } @@ -251,9 +251,9 @@ pub async fn search_persons( let limit = query.limit.unwrap_or(20); let persons = search_persons_by_query( &db, + &query.file_uuid, &query.query, query.min_appearances, - query.max_age, limit, ) .await @@ -305,7 +305,6 @@ pub struct PersonSearchQuery { pub file_uuid: String, pub query: Option, pub min_appearances: Option, - pub max_age: Option, // New filter for "children" pub limit: Option, } @@ -317,13 +316,9 @@ pub struct PersonSearchResponse { #[derive(Debug, Serialize)] pub struct PersonResult { - pub person_id: String, + pub identity_id: i32, + pub identity_uuid: String, pub name: Option, - pub character_name: Option, - pub aliases: Option>, - pub age: Option, - pub gender: Option, - pub speaker_id: Option, pub appearance_count: i32, pub first_appearance_time: Option, pub last_appearance_time: Option, @@ -594,43 +589,37 @@ async fn search_persons_internal( db: &PostgresDb, req: &UniversalSearchRequest, ) -> Result, anyhow::Error> { - let table = "person_identities"; + let uuid = match &req.file_uuid { + Some(u) => u.replace('\'', "''"), + None => return Err(anyhow::anyhow!("file_uuid is required for person search")), + }; + + let id_table = schema::table_name("identities"); + let fd_table = schema::table_name("face_detections"); let mut sql = format!( - "SELECT person_id, name, speaker_id, appearance_count, first_appearance_time, last_appearance_time FROM {} WHERE 1=1", - table + "SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \ + MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \ + FROM {} i JOIN {} fd ON fd.identity_id = i.id \ + WHERE fd.file_uuid = '{}'", + id_table, fd_table, uuid ); if !req.query.is_empty() { - sql.push_str(&format!( - " AND (name ILIKE '%{}%' OR person_id ILIKE '%{}%' OR speaker_id ILIKE '%{}%')", - req.query, req.query, req.query - )); - } - if let Some(ref filters) = req.filters { - if let Some(ref speaker_id) = filters.speaker_id { - sql.push_str(&format!(" AND speaker_id = '{}'", speaker_id)); - } - if let Some(ref person_id) = filters.person_id { - sql.push_str(&format!(" AND person_id = '{}'", person_id)); - } + let q = req.query.replace('\'', "''"); + sql.push_str(&format!(" AND i.name ILIKE '%{}%'", q)); } + sql.push_str(" GROUP BY i.id, i.uuid, i.name"); sql.push_str(" ORDER BY appearance_count DESC"); sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20))); - let rows: Vec<( - String, - Option, - Option, - i32, - Option, - Option, - )> = sqlx::query_as(&sql).fetch_all(db.pool()).await?; + let rows: Vec<(i32, String, Option, i64, Option, Option)> = + sqlx::query_as(&sql).fetch_all(db.pool()).await?; let results: Vec = rows .into_iter() .map( - |(person_id, name, speaker_id, appearance_count, first_time, last_time)| { + |(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| { let score = if !req.query.is_empty() && name.as_ref().map_or(false, |n| { n.to_lowercase().contains(&req.query.to_lowercase()) @@ -641,10 +630,10 @@ async fn search_persons_internal( }; SearchResult::Person { - person_id, + identity_id, + identity_uuid, name, - speaker_id, - appearance_count, + appearance_count: appearance_count as i32, score, first_appearance_time: first_time, last_appearance_time: last_time, @@ -739,82 +728,49 @@ async fn search_frames_internal_v2( async fn search_persons_by_query( db: &PostgresDb, + file_uuid: &str, query: &Option, min_appearances: Option, - max_age: Option, limit: usize, ) -> Result, anyhow::Error> { - let table = "person_identities"; + let id_table = schema::table_name("identities"); + let fd_table = schema::table_name("face_detections"); let mut sql = format!( - "SELECT person_id, name, character_name, aliases, age, gender, speaker_id, appearance_count, first_appearance_time, last_appearance_time FROM {} WHERE 1=1", - table + "SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \ + MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \ + FROM {} i JOIN {} fd ON fd.identity_id = i.id \ + WHERE fd.file_uuid = '{}'", + id_table, + fd_table, + file_uuid.replace('\'', "''") ); - if let Some(ref q) = query { - // Search name, character_name, aliases (cast to text), person_id, speaker_id - sql.push_str(&format!( - " AND (name ILIKE '%{}%' OR character_name ILIKE '%{}%' OR aliases::text ILIKE '%{}%' OR person_id ILIKE '%{}%' OR speaker_id ILIKE '%{}%')", - q, q, q, q, q - )); + if let Some(q) = query { + let safe = q.replace('\'', "''"); + sql.push_str(&format!(" AND i.name ILIKE '%{}%'", safe)); } + sql.push_str(" GROUP BY i.id, i.uuid, i.name"); + if let Some(min) = min_appearances { - sql.push_str(&format!(" AND appearance_count >= {}", min)); - } - if let Some(max_a) = max_age { - // Strictly filter for age <= max_age. - // Note: This excludes entries with NULL age. - sql.push_str(&format!(" AND age <= {}", max_a)); + sql.push_str(&format!(" HAVING COUNT(fd.id) >= {}", min)); } sql.push_str(" ORDER BY appearance_count DESC"); sql.push_str(&format!(" LIMIT {}", limit)); - let rows: Vec<( - String, - Option, - Option, - Option, - Option, - Option, - Option, - i32, - Option, - Option, - )> = sqlx::query_as(&sql).fetch_all(db.pool()).await?; + let rows: Vec<(i32, String, Option, i64, Option, Option)> = + sqlx::query_as(&sql).fetch_all(db.pool()).await?; let results: Vec = rows .into_iter() .map( - |( - person_id, - name, - character_name, - aliases_json, - age, - gender, - speaker_id, - appearance_count, - first_time, - last_time, - )| { - let aliases = aliases_json.and_then(|v| { - v.as_array().map(|arr| { - arr.iter() - .filter_map(|val| val.as_str().map(String::from)) - .collect() - }) - }); - + |(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| { PersonResult { - person_id, + identity_id, + identity_uuid, name, - character_name, - aliases, - age, - gender, - speaker_id, - appearance_count, + appearance_count: appearance_count as i32, first_appearance_time: first_time, last_appearance_time: last_time, } diff --git a/src/api/visual_chunk_search.rs b/src/api/visual_chunk_search.rs index 448a381..1b21221 100644 --- a/src/api/visual_chunk_search.rs +++ b/src/api/visual_chunk_search.rs @@ -392,8 +392,14 @@ pub async fn get_visual_chunk_statistics( uuid.replace('\'', "''") ); - let row: (i64, Option, Option, Option, Option, Option) = - sqlx::query_as(&sql).fetch_one(db.pool()).await?; + let row: ( + i64, + Option, + Option, + Option, + Option, + Option, + ) = sqlx::query_as(&sql).fetch_one(db.pool()).await?; let mut stats = HashMap::new(); stats.insert("total_chunks".to_string(), Value::from(row.0)); diff --git a/src/bin/release.rs b/src/bin/release.rs index 874a457..34f2871 100644 --- a/src/bin/release.rs +++ b/src/bin/release.rs @@ -13,7 +13,14 @@ use std::path::{Path, PathBuf}; use std::process::Command; fn dir_size(path: &Path) -> u64 { - path.read_dir().map(|d| d.filter_map(|e| e.ok()).filter_map(|e| e.metadata().ok()).map(|m| m.len()).sum()).unwrap_or(0) + path.read_dir() + .map(|d| { + d.filter_map(|e| e.ok()) + .filter_map(|e| e.metadata().ok()) + .map(|m| m.len()) + .sum() + }) + .unwrap_or(0) } const DEMO_DIR: &str = "/Users/accusys/momentry/var/sftpgo/data/demo"; @@ -22,7 +29,10 @@ const RELEASE_DIR: &str = "/Users/accusys/momentry_core_0.1/release/files"; const PG_BIN: &str = "/Users/accusys/pgsql/18.3/bin"; #[derive(Parser)] -#[command(name = "release", about = "Release Manager — deploy/undeploy video packages")] +#[command( + name = "release", + about = "Release Manager — deploy/undeploy video packages" +)] struct Cli { #[command(subcommand)] command: Commands, @@ -107,7 +117,12 @@ fn extract_tarball(tarball: &Path) -> Result { fs::create_dir_all(&tmpdir)?; let status = Command::new("tar") - .args(["-xzf", tarball.to_str().unwrap(), "-C", tmpdir.to_str().unwrap()]) + .args([ + "-xzf", + tarball.to_str().unwrap(), + "-C", + tmpdir.to_str().unwrap(), + ]) .status() .context("tar extraction failed")?; if !status.success() { @@ -127,8 +142,8 @@ fn extract_tarball(tarball: &Path) -> Result { /// Get file_info.json from package directory fn read_file_info(pkg_dir: &Path) -> Result { let info_path = pkg_dir.join("file_info.json"); - let content = fs::read_to_string(&info_path) - .with_context(|| format!("Cannot read {:?}", info_path))?; + let content = + fs::read_to_string(&info_path).with_context(|| format!("Cannot read {:?}", info_path))?; serde_json::from_str(&content).context("Invalid file_info.json") } @@ -140,7 +155,10 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> { anyhow::bail!("File not found: {}", tarball); } - println!("=== Deploy: {} ===", tarball_path.file_name().unwrap().to_str().unwrap()); + println!( + "=== Deploy: {} ===", + tarball_path.file_name().unwrap().to_str().unwrap() + ); // Extract let pkg_dir = extract_tarball(tarball_path)?; @@ -148,7 +166,9 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> { // Read file_info let info = read_file_info(&pkg_dir)?; - let uuid = info["file_uuid"].as_str().context("Missing file_uuid in file_info.json")?; + let uuid = info["file_uuid"] + .as_str() + .context("Missing file_uuid in file_info.json")?; let file_name = info["file_name"].as_str().unwrap_or("?"); println!("UUID: {}\nVideo: {}", uuid, file_name); @@ -168,7 +188,8 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> { let entry = entry?; let fname = entry.file_name(); let fname_str = fname.to_str().unwrap_or(""); - if fname_str.ends_with(".mp4") || fname_str.ends_with(".mov") || fname_str.ends_with(".avi") { + if fname_str.ends_with(".mp4") || fname_str.ends_with(".mov") || fname_str.ends_with(".avi") + { let dest = Path::new(DEMO_DIR).join(&fname); if !dest.exists() { fs::copy(entry.path(), &dest)?; @@ -192,12 +213,15 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> { println!("Output files copied to {}", OUTPUT_DIR); // Verify - let chunk_count: (i64,) = sqlx::query_as( - "SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = $1" - ).bind(uuid).fetch_one(db.pool()).await?; - let face_count: (i64,) = sqlx::query_as( - "SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1" - ).bind(uuid).fetch_one(db.pool()).await?; + let chunk_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = $1") + .bind(uuid) + .fetch_one(db.pool()) + .await?; + let face_count: (i64,) = + sqlx::query_as("SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1") + .bind(uuid) + .fetch_one(db.pool()) + .await?; // Cleanup fs::remove_dir_all(&pkg_dir.parent().unwrap_or(&pkg_dir))?; @@ -213,9 +237,11 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> { async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result<()> { // Get video info - let rows: Vec<(String, String)> = sqlx::query_as( - "SELECT file_name, file_path FROM dev.videos WHERE file_uuid = $1" - ).bind(uuid).fetch_all(db.pool()).await?; + let rows: Vec<(String, String)> = + sqlx::query_as("SELECT file_name, file_path FROM dev.videos WHERE file_uuid = $1") + .bind(uuid) + .fetch_all(db.pool()) + .await?; if rows.is_empty() { anyhow::bail!("UUID {} not found in DB", uuid); @@ -252,7 +278,9 @@ async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result println!(" {}: {} rows deleted", tbl, result.rows_affected()); } sqlx::query("DELETE FROM dev.videos WHERE file_uuid = $1") - .bind(uuid).execute(db.pool()).await?; + .bind(uuid) + .execute(db.pool()) + .await?; println!(" dev.videos: removed"); // Delete output files @@ -270,7 +298,10 @@ async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result let vp = Path::new(file_path); if vp.exists() { fs::remove_file(vp)?; - println!(" Video file: removed ({})", vp.file_name().unwrap().to_str().unwrap_or("?")); + println!( + " Video file: removed ({})", + vp.file_name().unwrap().to_str().unwrap_or("?") + ); } } @@ -292,11 +323,15 @@ async fn cmd_list(db: &PostgresDb) -> Result<()> { "SELECT file_uuid, file_name, duration, status, (SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = v.file_uuid) as chunks, (SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = v.file_uuid) as faces - FROM dev.videos v ORDER BY id DESC" - ).fetch_all(db.pool()).await?; + FROM dev.videos v ORDER BY id DESC", + ) + .fetch_all(db.pool()) + .await?; - println!("{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}", - "UUID", "Name", "Duration", "Status", "Chunks", "Faces"); + println!( + "{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}", + "UUID", "Name", "Duration", "Status", "Chunks", "Faces" + ); println!("{}", "-".repeat(116)); for row in &rows { @@ -318,10 +353,15 @@ async fn cmd_list(db: &PostgresDb) -> Result<()> { name.clone() }; - println!("{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}", - uuid, short_name, dur_str, + println!( + "{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}", + uuid, + short_name, + dur_str, status.as_deref().unwrap_or("?"), - chunks.unwrap_or(0), faces.unwrap_or(0)); + chunks.unwrap_or(0), + faces.unwrap_or(0) + ); } Ok(()) } @@ -336,9 +376,23 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> { "SELECT file_uuid, file_name, file_path, duration, fps, width, height FROM dev.videos WHERE file_uuid = $1" ).bind(uuid).fetch_optional(db.pool()).await?; let (_, file_name, file_path, duration, fps, width, height): ( - String, String, String, Option, Option, Option, Option + String, + String, + String, + Option, + Option, + Option, + Option, ) = match row { - Some(r) => (r.get(0), r.get(1), r.get(2), r.get(3), r.get(4), r.get(5), r.get(6)), + Some(r) => ( + r.get(0), + r.get(1), + r.get(2), + r.get(3), + r.get(4), + r.get(5), + r.get(6), + ), None => anyhow::bail!("UUID {} not found", uuid), }; @@ -360,7 +414,10 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> { "momentry_version": env!("CARGO_PKG_VERSION"), "momentry_build": env!("BUILD_GIT_HASH"), }); - fs::write(outdir.join("file_info.json"), serde_json::to_string_pretty(&info)?)?; + fs::write( + outdir.join("file_info.json"), + serde_json::to_string_pretty(&info)?, + )?; // Export per-table .sql files (avoid single 4.7GB psql load) let sql_dir = outdir.join("sql"); @@ -376,7 +433,13 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> { let mut import_order = vec!["master.sql"]; - fn write_table_sql(outdir: &Path, tbl: &str, col: &str, uuid: &str, psql_exec: &dyn Fn(&str) -> Result) -> Result<()> { + fn write_table_sql( + outdir: &Path, + tbl: &str, + col: &str, + uuid: &str, + psql_exec: &dyn Fn(&str) -> Result, + ) -> Result<()> { let safe_name = tbl.replace('.', "_"); let path = outdir.join(format!("{}.sql", safe_name)); let parts: Vec<&str> = tbl.split('.').collect(); @@ -419,8 +482,16 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> { let data = psql_exec(&idents_query)?; if !data.is_empty() { let mut f = fs::File::create(&idents_path)?; - writeln!(f, "-- dev.identities WHERE file_uuid = '{}' OR global (tmdb/merged/user_defined)", uuid)?; - writeln!(f, "COPY dev.identities ({}) FROM STDIN WITH CSV HEADER;", cols)?; + writeln!( + f, + "-- dev.identities WHERE file_uuid = '{}' OR global (tmdb/merged/user_defined)", + uuid + )?; + writeln!( + f, + "COPY dev.identities ({}) FROM STDIN WITH CSV HEADER;", + cols + )?; writeln!(f, "{}", data)?; writeln!(f, "\\.")?; } @@ -440,7 +511,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> { if !data.is_empty() { let mut f = fs::File::create(&binds_path)?; writeln!(f, "-- dev.identity_bindings (from face_detections JOIN)")?; - writeln!(f, "COPY dev.identity_bindings ({}) FROM STDIN WITH CSV HEADER;", cols)?; + writeln!( + f, + "COPY dev.identity_bindings ({}) FROM STDIN WITH CSV HEADER;", + cols + )?; writeln!(f, "{}", data)?; writeln!(f, "\\.")?; } @@ -469,7 +544,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> { let sql_path = outdir.join("data.sql"); { let mut f = fs::File::create(&sql_path)?; - writeln!(f, "-- Release package: {} — see sql/ for per-table files", uuid)?; + writeln!( + f, + "-- Release package: {} — see sql/ for per-table files", + uuid + )?; writeln!(f, "BEGIN;")?; writeln!(f, "\\i sql/dev_videos.sql")?; writeln!(f, "\\i sql/dev_chunk.sql")?; @@ -492,7 +571,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> { let dest = outdir.join(vp.file_name().unwrap()); fs::copy(vp, &dest)?; let vsize = fs::metadata(&dest)?.len(); - println!(" {} ({} MB)", vp.file_name().unwrap().to_str().unwrap_or("?"), vsize / 1024 / 1024); + println!( + " {} ({} MB)", + vp.file_name().unwrap().to_str().unwrap_or("?"), + vsize / 1024 / 1024 + ); } } @@ -541,11 +624,18 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> { let vec0_src = "/Users/accusys/momentry_core_0.1/scripts/vec0.dylib"; if Path::new(vec0_src).exists() { fs::copy(vec0_src, outdir.join("vec0.dylib"))?; - println!(" vec0.dylib ({} KB)", fs::metadata(outdir.join("vec0.dylib"))?.len() / 1024); + println!( + " vec0.dylib ({} KB)", + fs::metadata(outdir.join("vec0.dylib"))?.len() / 1024 + ); } // Create tar.gz - let tarball = Path::new(RELEASE_DIR).join(format!("{}_v{}.tar.gz", uuid, Utc::now().format("%Y%m%d_%H%M%S"))); + let tarball = Path::new(RELEASE_DIR).join(format!( + "{}_v{}.tar.gz", + uuid, + Utc::now().format("%Y%m%d_%H%M%S") + )); let status = Command::new("tar") .args(["-czf", tarball.to_str().unwrap(), "-C", RELEASE_DIR, uuid]) .status()?; @@ -553,7 +643,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> { anyhow::bail!("tar creation failed"); } let tsize = fs::metadata(&tarball)?.len(); - println!("\n Package: {} ({} MB)", tarball.display(), tsize / 1024 / 1024); + println!( + "\n Package: {} ({} MB)", + tarball.display(), + tsize / 1024 / 1024 + ); // Sanity check: warn if any sql file is suspiciously large println!(" Checking sql/ file sizes..."); @@ -564,33 +658,55 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> { let sz = fs::metadata(&path)?.len() as f64 / 1024.0 / 1024.0; let name = path.file_stem().and_then(|s| s.to_str()).unwrap_or("?"); match name { - "dev_videos" | "master" if sz > 1.0 => - println!(" ⚠️ {} is {} MB, expected < 1 MB", name, sz as u64), - "dev_chunk" if sz > 2.0 => - println!(" ⚠️ {} is {} MB, expected < 2 MB for ~2.4K chunks", name, sz as u64), - "dev_identities" if sz > 1.0 => - println!(" ⚠️ {} is {} MB, expected < 1 MB for ~428 identities", name, sz as u64), - "dev_identity_bindings" if sz > 5.0 => - println!(" ⚠️ {} is {} MB, expected < 5 MB for ~7.6K bindings", name, sz as u64), - "dev_tkg_nodes" if sz > 10.0 => - println!(" ⚠️ {} is {} MB, expected < 10 MB for ~6.4K nodes", name, sz as u64), - "dev_tkg_edges" if sz > 20.0 => - println!(" ⚠️ {} is {} MB, expected < 20 MB for ~21K edges", name, sz as u64), - "dev_face_detections" if sz > 1000.0 => - println!(" ⚠️ {} is {} MB, expected < 1000 MB for ~70K faces (512D emb)", name, sz as u64), - "dev_chunk_vectors" if sz > 200.0 => - println!(" ⚠️ {} is {} MB, expected < 200 MB for ~2.4K chunks (768D emb)", name, sz as u64), + "dev_videos" | "master" if sz > 1.0 => { + println!(" ⚠️ {} is {} MB, expected < 1 MB", name, sz as u64) + } + "dev_chunk" if sz > 2.0 => println!( + " ⚠️ {} is {} MB, expected < 2 MB for ~2.4K chunks", + name, sz as u64 + ), + "dev_identities" if sz > 1.0 => println!( + " ⚠️ {} is {} MB, expected < 1 MB for ~428 identities", + name, sz as u64 + ), + "dev_identity_bindings" if sz > 5.0 => println!( + " ⚠️ {} is {} MB, expected < 5 MB for ~7.6K bindings", + name, sz as u64 + ), + "dev_tkg_nodes" if sz > 10.0 => println!( + " ⚠️ {} is {} MB, expected < 10 MB for ~6.4K nodes", + name, sz as u64 + ), + "dev_tkg_edges" if sz > 20.0 => println!( + " ⚠️ {} is {} MB, expected < 20 MB for ~21K edges", + name, sz as u64 + ), + "dev_face_detections" if sz > 1000.0 => println!( + " ⚠️ {} is {} MB, expected < 1000 MB for ~70K faces (512D emb)", + name, sz as u64 + ), + "dev_chunk_vectors" if sz > 200.0 => println!( + " ⚠️ {} is {} MB, expected < 200 MB for ~2.4K chunks (768D emb)", + name, sz as u64 + ), _ => {} } if sz > 2000.0 { - println!(" ⚠️ {} is {:.0} MB — unusually large, verify query", name, sz); + println!( + " ⚠️ {} is {:.0} MB — unusually large, verify query", + name, sz + ); } } } Ok(()) } -fn cmd_visualize_offline(sqlite_path: &str, output: Option<&str>, identity: Option) -> Result<()> { +fn cmd_visualize_offline( + sqlite_path: &str, + output: Option<&str>, + identity: Option, +) -> Result<()> { let outpath = match output { Some(p) => p.to_string(), None => sqlite_path.replace(".sqlite", "_report.html"), @@ -606,7 +722,10 @@ fn cmd_visualize_offline(sqlite_path: &str, output: Option<&str>, identity: Opti .output() .context("Offline report script failed")?; if !output.status.success() { - anyhow::bail!("Offline report: {}", String::from_utf8_lossy(&output.stderr)); + anyhow::bail!( + "Offline report: {}", + String::from_utf8_lossy(&output.stderr) + ); } println!("{}", String::from_utf8_lossy(&output.stdout)); println!("\n Open: {}", outpath); @@ -624,7 +743,10 @@ fn cmd_visualize(uuid: &str, typ: &str, output: Option<&str>, identity: Option generate_face_heatmap(uuid, &outpath, identity)?, "timeline" => generate_face_timeline(uuid, &outpath, identity)?, - _ => anyhow::bail!("Unknown visualization type: {}. Try: heatmap, density, timeline", typ), + _ => anyhow::bail!( + "Unknown visualization type: {}. Try: heatmap, density, timeline", + typ + ), } Ok(()) } @@ -698,16 +820,28 @@ fn cmd_stats() -> Result<()> { for line in listing.lines() { let trimmed = line.trim(); - if trimmed.is_empty() || trimmed.ends_with('/') { continue; } + if trimmed.is_empty() || trimmed.ends_with('/') { + continue; + } // tar -tvzf format: perms link owner group size date_month date_day time path... // Fields are space-separated; size is 5th field, path starts at 8th field let parts: Vec<&str> = trimmed.split_whitespace().collect(); - if parts.len() < 8 { continue; } + if parts.len() < 8 { + continue; + } let fsize = parts[4].parse::().unwrap_or(0); let fpath = parts[8..].join(" "); - let fname = Path::new(&fpath).file_name().unwrap_or_default().to_str().unwrap_or("?"); - let ext = Path::new(&fpath).extension().unwrap_or_default().to_str().unwrap_or(""); + let fname = Path::new(&fpath) + .file_name() + .unwrap_or_default() + .to_str() + .unwrap_or("?"); + let ext = Path::new(&fpath) + .extension() + .unwrap_or_default() + .to_str() + .unwrap_or(""); match ext { "sql" => { @@ -732,10 +866,26 @@ fn cmd_stats() -> Result<()> { } println!(" ─────────────────────────────"); - println!(" SQL: {} files, {:.0} MB", sql_count, total_sql as f64 / 1048576.0); - println!(" Video: {} files, {:.0} MB", video_count, total_video as f64 / 1048576.0); - println!(" JSON: {} files, {:.0} MB", json_count, total_json as f64 / 1048576.0); - println!(" Total: {:.0} MB (compressed: {:.0} MB)", (total_sql + total_video + total_json) as f64 / 1048576.0, pkg_size as f64 / 1048576.0); + println!( + " SQL: {} files, {:.0} MB", + sql_count, + total_sql as f64 / 1048576.0 + ); + println!( + " Video: {} files, {:.0} MB", + video_count, + total_video as f64 / 1048576.0 + ); + println!( + " JSON: {} files, {:.0} MB", + json_count, + total_json as f64 / 1048576.0 + ); + println!( + " Total: {:.0} MB (compressed: {:.0} MB)", + (total_sql + total_video + total_json) as f64 / 1048576.0, + pkg_size as f64 / 1048576.0 + ); println!(); } @@ -758,8 +908,17 @@ async fn main() -> Result<()> { Commands::List => cmd_list(&db).await?, Commands::Package { uuid } => cmd_package(&db, &uuid).await?, Commands::Stats => cmd_stats()?, - Commands::Visualize { uuid, typ, output, identity } => cmd_visualize(&uuid, &typ, output.as_deref(), identity)?, - Commands::VisualizeOffline { sqlite_path, output, identity } => cmd_visualize_offline(&sqlite_path, output.as_deref(), identity)?, + Commands::Visualize { + uuid, + typ, + output, + identity, + } => cmd_visualize(&uuid, &typ, output.as_deref(), identity)?, + Commands::VisualizeOffline { + sqlite_path, + output, + identity, + } => cmd_visualize_offline(&sqlite_path, output.as_deref(), identity)?, } Ok(()) } diff --git a/src/bin/service.rs b/src/bin/service.rs index 84abdd1..8dbaf85 100644 --- a/src/bin/service.rs +++ b/src/bin/service.rs @@ -16,7 +16,10 @@ const LOG_DIR: &str = "/Users/accusys/service_logs"; const LAUNCH_DIR: &str = "/Users/accusys/Library/LaunchAgents"; #[derive(Parser)] -#[command(name = "service", about = "Service Lifecycle Manager — source → build → install → config → launch → env")] +#[command( + name = "service", + about = "Service Lifecycle Manager — source → build → install → config → launch → env" +)] struct Cli { #[command(subcommand)] command: Commands, @@ -111,22 +114,54 @@ fn cmd_source_list() -> Result<()> { ("pyenv", "pyenv/", "git repo"), ("cmake", "cmake-4.2.0-macos-universal.tar.gz", "binary"), ("llama.cpp", "llama.cpp/", "git repo"), - ("libreoffice (src)", "libreoffice-26.2.3.2.tar.xz", "source tarball"), - ("libreoffice (dmg)", "LibreOffice_26.2.3_MacOS_aarch64.dmg", "binary (TDF)"), - ("mermaid-cli", "mermaid-js-mermaid-cli-11.14.0.tgz", "npm package"), + ( + "libreoffice (src)", + "libreoffice-26.2.3.2.tar.xz", + "source tarball", + ), + ( + "libreoffice (dmg)", + "LibreOffice_26.2.3_MacOS_aarch64.dmg", + "binary (TDF)", + ), + ( + "mermaid-cli", + "mermaid-js-mermaid-cli-11.14.0.tgz", + "npm package", + ), ("librsvg", "librsvg/", "Rust source"), - ("GroundingDINO", "GroundingDINO/", "git repo (IDEA-Research)"), + ( + "GroundingDINO", + "GroundingDINO/", + "git repo (IDEA-Research)", + ), ("PaliGemma", "paligemma/", "HuggingFace reference"), ("Odoo 19 CE", "odoo/", "git repo (LGPL-3.0)"), ("ERPNext v15", "erpnext/", "git repo (GPL-3.0)"), ("Frappe Framework", "frappe/", "git repo (MIT)"), ("Gitea v1.25", "gitea/", "git repo (MIT, Go)"), ("Go v1.26", "go/", "git repo (BSD)"), - ("Rust/Cargo", "rustc-1.92.0-src.tar.xz", "source tarball (Apache 2.0 / MIT)"), - ("rustup", "rustup-1.28.1.tar.gz", "source tarball (Apache 2.0)"), - ("Swift v6.3", "swift-6.3.1-RELEASE.tar.gz", "source tarball (Apache 2.0)"), + ( + "Rust/Cargo", + "rustc-1.92.0-src.tar.xz", + "source tarball (Apache 2.0 / MIT)", + ), + ( + "rustup", + "rustup-1.28.1.tar.gz", + "source tarball (Apache 2.0)", + ), + ( + "Swift v6.3", + "swift-6.3.1-RELEASE.tar.gz", + "source tarball (Apache 2.0)", + ), ("yt-dlp", "yt-dlp/", "git repo (Unlicense)"), - ("SQLite", "sqlite-amalgamation-3490100.zip", "amalgamation (Public Domain)"), + ( + "SQLite", + "sqlite-amalgamation-3490100.zip", + "amalgamation (Public Domain)", + ), ("sqlite-vec", "sqlite-vec/", "git repo (MIT)"), ]; @@ -164,7 +199,11 @@ fn cmd_source_verify() -> Result<()> { ("cmake", "cmake-4.2.0-macos-universal.tar.gz", false), ("llama.cpp", "llama.cpp/", true), ("libreoffice (src)", "libreoffice-26.2.3.2.tar.xz", false), - ("libreoffice (dmg)", "LibreOffice_26.2.3_MacOS_aarch64.dmg", false), + ( + "libreoffice (dmg)", + "LibreOffice_26.2.3_MacOS_aarch64.dmg", + false, + ), ("mermaid-cli", "mermaid-js-mermaid-cli-11.14.0.tgz", false), ("librsvg", "librsvg/", true), ("GroundingDINO", "GroundingDINO/", true), @@ -186,7 +225,11 @@ fn cmd_source_verify() -> Result<()> { let mut missing = 0; for (name, path, is_dir) in &checks { let full = src_dir.join(path); - let exists = if *is_dir { full.is_dir() } else { full.is_file() }; + let exists = if *is_dir { + full.is_dir() + } else { + full.is_file() + }; if exists { println!(" ✅ {}", name); ok += 1; @@ -202,7 +245,10 @@ fn cmd_source_verify() -> Result<()> { // ---- Build ---- fn cmd_build(service: &str) -> Result<()> { - let install_sh = Path::new(SERVICE_SRC).parent().unwrap().join("install_services.sh"); + let install_sh = Path::new(SERVICE_SRC) + .parent() + .unwrap() + .join("install_services.sh"); if service == "all" { // Run the full install script @@ -224,8 +270,14 @@ fn cmd_build(service: &str) -> Result<()> { "ffmpeg" => { println!("Building ffmpeg (requires x264 + freetype)..."); // Simplified: run the install script which handles incremental builds - let status = Command::new("bash").arg(&install_sh).env("PREFIX", PREFIX).env("SRC_DIR", SERVICE_SRC).status()?; - if !status.success() { anyhow::bail!("Build failed"); } + let status = Command::new("bash") + .arg(&install_sh) + .env("PREFIX", PREFIX) + .env("SRC_DIR", SERVICE_SRC) + .status()?; + if !status.success() { + anyhow::bail!("Build failed"); + } } "redis" => { let src = format!("{}/redis-7.4.3.tar.gz", SERVICE_SRC); @@ -236,37 +288,67 @@ fn cmd_build(service: &str) -> Result<()> { run_build("postgresql", &src, &format!("cd /tmp && tar xzf {} && cd postgresql-18.3 && ./configure --prefix={}/pgsql/18.3 && make -j$(sysctl -n hw.ncpu) && make install", src, PREFIX))?; } "llama" => { - println!("Building llama.cpp from {}...", format!("{}/llama.cpp", SERVICE_SRC)); + println!( + "Building llama.cpp from {}...", + format!("{}/llama.cpp", SERVICE_SRC) + ); let status = Command::new("cmake") .args(["-B", "build", "-DCMAKE_INSTALL_PREFIX=/tmp/llama_install"]) .current_dir(format!("{}/llama.cpp", SERVICE_SRC)) .status()?; - if !status.success() { anyhow::bail!("cmake failed"); } - let status = Command::new("cmake").args(["--build", "build", "--config", "Release", "-j"]).current_dir(format!("{}/llama.cpp", SERVICE_SRC)).status()?; - if !status.success() { anyhow::bail!("build failed"); } + if !status.success() { + anyhow::bail!("cmake failed"); + } + let status = Command::new("cmake") + .args(["--build", "build", "--config", "Release", "-j"]) + .current_dir(format!("{}/llama.cpp", SERVICE_SRC)) + .status()?; + if !status.success() { + anyhow::bail!("build failed"); + } } "libreoffice" => { let dmg = format!("{}/LibreOffice_26.2.3_MacOS_aarch64.dmg", SERVICE_SRC); let mount = "/tmp/lo_mount"; println!("Extracting LibreOffice from DMG..."); // Mount - let status = Command::new("hdiutil").args(["attach", &dmg, "-nobrowse", "-quiet", "-mountpoint", mount]).status()?; - if !status.success() { anyhow::bail!("DMG mount failed"); } + let status = Command::new("hdiutil") + .args(["attach", &dmg, "-nobrowse", "-quiet", "-mountpoint", mount]) + .status()?; + if !status.success() { + anyhow::bail!("DMG mount failed"); + } // Copy app let lo_dir = format!("{}/libreoffice", PREFIX); let _ = std::fs::remove_dir_all(format!("{}/LibreOffice.app", lo_dir)); std::fs::create_dir_all(&lo_dir)?; - let status = Command::new("cp").args(["-R", &format!("{}/LibreOffice.app", mount), &format!("{}/LibreOffice.app", lo_dir)]).status()?; - if !status.success() { anyhow::bail!("Copy failed"); } + let status = Command::new("cp") + .args([ + "-R", + &format!("{}/LibreOffice.app", mount), + &format!("{}/LibreOffice.app", lo_dir), + ]) + .status()?; + if !status.success() { + anyhow::bail!("Copy failed"); + } // Create symlink std::fs::create_dir_all(format!("{}/bin", lo_dir))?; let _ = std::fs::remove_file(format!("{}/bin/soffice", lo_dir)); - std::os::unix::fs::symlink("../LibreOffice.app/Contents/MacOS/soffice", format!("{}/bin/soffice", lo_dir))?; + std::os::unix::fs::symlink( + "../LibreOffice.app/Contents/MacOS/soffice", + format!("{}/bin/soffice", lo_dir), + )?; // Unmount - let _ = Command::new("hdiutil").args(["detach", mount, "-quiet"]).status(); + let _ = Command::new("hdiutil") + .args(["detach", mount, "-quiet"]) + .status(); println!(" libreoffice installed to {}/bin/soffice", lo_dir); } - _ => anyhow::bail!("Unknown service: {}. Try: all, ffmpeg, redis, postgres, llama, libreoffice, python", service), + _ => anyhow::bail!( + "Unknown service: {}. Try: all, ffmpeg, redis, postgres, llama, libreoffice, python", + service + ), } Ok(()) } @@ -274,7 +356,9 @@ fn cmd_build(service: &str) -> Result<()> { fn run_build(name: &str, src: &str, cmd: &str) -> Result<()> { println!("Building {} from {}...", name, src); let status = Command::new("bash").arg("-c").arg(cmd).status()?; - if !status.success() { anyhow::bail!("{} build failed", name); } + if !status.success() { + anyhow::bail!("{} build failed", name); + } println!(" {} build complete", name); Ok(()) } @@ -292,7 +376,10 @@ fn cmd_install(service: &str) -> Result<()> { let rsvg_src = format!("{}/librsvg/bin/rsvg-convert", PREFIX); let gitea_src = format!("{}/gitea/bin/gitea", PREFIX); let go_src = format!("{}/go/bin/go", PREFIX); - let rustc_src = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc", PREFIX); + let rustc_src = format!( + "{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc", + PREFIX + ); let swift_src = "/usr/bin/swift".to_string(); let ytdlp_src = "/opt/homebrew/bin/yt-dlp".to_string(); @@ -313,7 +400,9 @@ fn cmd_install(service: &str) -> Result<()> { ]; for (name, src) in &installs { - if service != "all" && service != *name { continue; } + if service != "all" && service != *name { + continue; + } if Path::new(src).exists() { println!(" ✅ {} installed: {}", name, src); } else { @@ -370,12 +459,18 @@ fn cmd_config(service: &str) -> Result<()> { println!("MOMENTRY_LLM_SUMMARY_URL=http://localhost:8082/v1/chat/completions"); println!("MOMENTRY_OUTPUT_DIR={}/momentry/output_dev", PREFIX); println!("MOMENTRY_SCRIPTS_DIR={}/momentry_core_0.1/scripts", PREFIX); - println!("MOMENTRY_PYTHON_PATH={}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX); + println!( + "MOMENTRY_PYTHON_PATH={}/.pyenv/versions/3.11.15/bin/python3.11", + PREFIX + ); } if service == "all" || service == "embedding" { println!("\n--- Embedding Server config ---"); - println!("# Start: {} embeddinggemma_server.py --port 11436", format!("{}/momentry_core_0.1/scripts", PREFIX)); + println!( + "# Start: {} embeddinggemma_server.py --port 11436", + format!("{}/momentry_core_0.1/scripts", PREFIX) + ); println!("MODEL=google/embeddinggemma-300m"); println!("PORT=11436"); println!("DEVICE=mps"); @@ -393,25 +488,58 @@ fn cmd_launch_generate() -> Result<()> { let pg_args = format!("-D {}/pgsql/18.3/data", PREFIX); let redis_bin = format!("{}/redis/bin/redis-server", PREFIX); let redis_args = format!("{}/redis/redis.conf", PREFIX); - let qdrant_bin = format!("{}/momentry_core_0.1/services/qdrant/target/release/qdrant", PREFIX); + let qdrant_bin = format!( + "{}/momentry_core_0.1/services/qdrant/target/release/qdrant", + PREFIX + ); let embed_bin = format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX); - let embed_args = format!("{}/momentry_core_0.1/scripts/embeddinggemma_server.py --port 11436", PREFIX); + let embed_args = format!( + "{}/momentry_core_0.1/scripts/embeddinggemma_server.py --port 11436", + PREFIX + ); let llama_bin = format!("{}/llama/bin/llama-server", PREFIX); - let llama_args = format!("-m {}/models/google_gemma-4-26B-A4B-it-Q5_K_M.gguf --port 8082 -ngl 99 -c 16384", PREFIX); - let play_bin = format!("{}/momentry_core_0.1/target/debug/momentry_playground", PREFIX); + let llama_args = format!( + "-m {}/models/google_gemma-4-26B-A4B-it-Q5_K_M.gguf --port 8082 -ngl 99 -c 16384", + PREFIX + ); + let play_bin = format!( + "{}/momentry_core_0.1/target/debug/momentry_playground", + PREFIX + ); let services: Vec<(&str, &str, &str, &str)> = vec![ - ("com.momentry.postgres", &pg_bin, &pg_args, "PostgreSQL"), - ("com.momentry.redis", &redis_bin, &redis_args, "Redis"), - ("com.momentry.qdrant", &qdrant_bin, "", "Qdrant"), - ("com.momentry.embedding", &embed_bin, &embed_args, "EmbeddingGemma"), - ("com.momentry.llama", &llama_bin, &llama_args, "LLM (llama.cpp)"), - ("com.momentry.playground", &play_bin, "server --port 3003", "Momentry Playground"), - ("com.momentry.worker", &play_bin, "worker --max-concurrent 2 --poll-interval 5", "Momentry Worker"), + ("com.momentry.postgres", &pg_bin, &pg_args, "PostgreSQL"), + ("com.momentry.redis", &redis_bin, &redis_args, "Redis"), + ("com.momentry.qdrant", &qdrant_bin, "", "Qdrant"), + ( + "com.momentry.embedding", + &embed_bin, + &embed_args, + "EmbeddingGemma", + ), + ( + "com.momentry.llama", + &llama_bin, + &llama_args, + "LLM (llama.cpp)", + ), + ( + "com.momentry.playground", + &play_bin, + "server --port 3003", + "Momentry Playground", + ), + ( + "com.momentry.worker", + &play_bin, + "worker --max-concurrent 2 --poll-interval 5", + "Momentry Worker", + ), ]; for (label, bin, args, _desc) in &services { - let plist = format!(r#" + let plist = format!( + r#" @@ -451,7 +579,11 @@ fn cmd_launch_generate() -> Result<()> { fs::write(&plist_path, plist)?; println!(" 📝 {} → {:?}", label, plist_path.file_name().unwrap()); } - println!("\n Generated {} plist files in {}", services.len(), LAUNCH_DIR); + println!( + "\n Generated {} plist files in {}", + services.len(), + LAUNCH_DIR + ); Ok(()) } @@ -461,7 +593,9 @@ fn cmd_launch_load() -> Result<()> { let path = entry.path(); if path.extension().map_or(false, |e| e == "plist") { let name = path.file_stem().unwrap().to_str().unwrap_or("?"); - let status = Command::new("launchctl").args(["load", "-w", path.to_str().unwrap()]).status(); + let status = Command::new("launchctl") + .args(["load", "-w", path.to_str().unwrap()]) + .status(); match status { Ok(s) if s.success() => println!(" ✅ loaded: {}", name), Ok(_) => println!(" ⚠️ load failed: {}", name), @@ -478,7 +612,9 @@ fn cmd_launch_unload() -> Result<()> { let path = entry.path(); if path.extension().map_or(false, |e| e == "plist") { let name = path.file_stem().unwrap().to_str().unwrap_or("?"); - let status = Command::new("launchctl").args(["unload", path.to_str().unwrap()]).status(); + let status = Command::new("launchctl") + .args(["unload", path.to_str().unwrap()]) + .status(); match status { Ok(s) if s.success() => println!(" ✅ unloaded: {}", name), Ok(_) => println!(" ⚠️ unload failed: {}", name), @@ -504,7 +640,11 @@ fn cmd_launch_status() -> Result<()> { Ok(o) if o.status.success() => { let stdout = String::from_utf8_lossy(&o.stdout); if stdout.contains("PID") || stdout.lines().count() > 1 { - let pid = stdout.lines().nth(1).and_then(|l| l.split_whitespace().next()).unwrap_or("-"); + let pid = stdout + .lines() + .nth(1) + .and_then(|l| l.split_whitespace().next()) + .unwrap_or("-"); println!(" 🟢 {} (PID: {})", label, pid); } else { println!(" ⚪ {} (not running)", label); @@ -519,7 +659,8 @@ fn cmd_launch_status() -> Result<()> { // ---- Env ---- fn cmd_env(output: &Option) -> Result<()> { - let env_content = format!(r#"# Momentry Core — Environment Configuration + let env_content = format!( + r#"# Momentry Core — Environment Configuration # Generated: {} # Service: {} env @@ -601,8 +742,14 @@ fn cmd_test() -> Result<()> { let rsvg_bin = format!("{}/librsvg/bin/rsvg-convert", PREFIX); let gitea_bin = format!("{}/gitea/bin/gitea", PREFIX); let go_bin = format!("{}/go/bin/go", PREFIX); - let rustc_bin = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc", PREFIX); - let cargo_bin = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/cargo", PREFIX); + let rustc_bin = format!( + "{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc", + PREFIX + ); + let cargo_bin = format!( + "{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/cargo", + PREFIX + ); let swift_bin = "/usr/bin/swift".to_string(); let ytdlp_bin = "/opt/homebrew/bin/yt-dlp".to_string(); @@ -641,7 +788,11 @@ fn cmd_test() -> Result<()> { let output = Command::new(bin).args(args).output(); match output { Ok(o) if o.status.success() => { - let ver = String::from_utf8_lossy(&o.stdout).lines().next().unwrap_or("?").to_string(); + let ver = String::from_utf8_lossy(&o.stdout) + .lines() + .next() + .unwrap_or("?") + .to_string(); println!("✅ {}", ver.chars().take(70).collect::()); pass += 1; } @@ -666,14 +817,87 @@ fn cmd_test() -> Result<()> { // Functional tests println!("\n--- Functional Tests ---"); // Create test docx for libreoffice test - let _ = std::fs::write("/tmp/svc_test_func.docx", "Service test document for LibreOffice conversion"); + let _ = std::fs::write( + "/tmp/svc_test_func.docx", + "Service test document for LibreOffice conversion", + ); let func_tests = [ - ("ffprobe probe", "ffprobe", vec!["-v", "error", "-show_entries", "format=duration", "-of", "csv=p=0", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4"]), - ("ffmpeg audio extract", "ffmpeg", vec!["-y", "-v", "quiet", "-i", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", "-t", "2", "-ar", "16000", "-ac", "1", "/tmp/svc_test_audio.wav"]), - ("ffmpeg frame extract", "ffmpeg", vec!["-y", "-v", "quiet", "-i", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", "-ss", "100", "-vframes", "1", "/tmp/svc_test_frame.jpg"]), - ("libreoffice doc→txt", "libreoffice", vec!["--headless", "--convert-to", "txt", "/tmp/svc_test_func.docx", "--outdir", "/tmp/"]), - ("rsvg-convert svg→png", "rsvg-convert", vec!["-o", "/tmp/svc_test_rsvg.png", "/tmp/test_rsvg.svg"]), - ("mmdc mermaid→png", "mermaid-cli", vec!["-i", "/tmp/test_mermaid.mmd", "-o", "/tmp/svc_test_mmd.png", "-w", "200"]), + ( + "ffprobe probe", + "ffprobe", + vec![ + "-v", + "error", + "-show_entries", + "format=duration", + "-of", + "csv=p=0", + "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", + ], + ), + ( + "ffmpeg audio extract", + "ffmpeg", + vec![ + "-y", + "-v", + "quiet", + "-i", + "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", + "-t", + "2", + "-ar", + "16000", + "-ac", + "1", + "/tmp/svc_test_audio.wav", + ], + ), + ( + "ffmpeg frame extract", + "ffmpeg", + vec![ + "-y", + "-v", + "quiet", + "-i", + "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", + "-ss", + "100", + "-vframes", + "1", + "/tmp/svc_test_frame.jpg", + ], + ), + ( + "libreoffice doc→txt", + "libreoffice", + vec![ + "--headless", + "--convert-to", + "txt", + "/tmp/svc_test_func.docx", + "--outdir", + "/tmp/", + ], + ), + ( + "rsvg-convert svg→png", + "rsvg-convert", + vec!["-o", "/tmp/svc_test_rsvg.png", "/tmp/test_rsvg.svg"], + ), + ( + "mmdc mermaid→png", + "mermaid-cli", + vec![ + "-i", + "/tmp/test_mermaid.mmd", + "-o", + "/tmp/svc_test_mmd.png", + "-w", + "200", + ], + ), ]; for (desc, bin_name, args) in &func_tests { @@ -689,8 +913,14 @@ fn cmd_test() -> Result<()> { }; let output = Command::new(bin).args(args).output(); match output { - Ok(o) if o.status.success() => { println!("✅"); pass += 1; } - _ => { println!("❌"); fail += 1; } + Ok(o) if o.status.success() => { + println!("✅"); + pass += 1; + } + _ => { + println!("❌"); + fail += 1; + } } } @@ -706,7 +936,10 @@ fn cmd_test() -> Result<()> { fn cmd_report() -> Result<()> { println!("=== Momentry Service Report ==="); - println!("Generated: {}", chrono::Local::now().format("%Y-%m-%d %H:%M:%S")); + println!( + "Generated: {}", + chrono::Local::now().format("%Y-%m-%d %H:%M:%S") + ); println!(); // 1. Source status @@ -730,13 +963,25 @@ fn cmd_report() -> Result<()> { println!("\n## 2. Binaries"); let binaries = [ ("cmake", &format!("{}/bin/cmake", PREFIX)), - ("python3.11", &format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX)), + ( + "python3.11", + &format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX), + ), ("ffmpeg", &format!("{}/ffmpeg_build/bin/ffmpeg", PREFIX)), ("ffprobe", &format!("{}/ffmpeg_build/bin/ffprobe", PREFIX)), - ("redis-server", &format!("{}/redis/bin/redis-server", PREFIX)), + ( + "redis-server", + &format!("{}/redis/bin/redis-server", PREFIX), + ), ("postgres", &format!("{}/pgsql/18.3/bin/postgres", PREFIX)), - ("llama-server", &format!("{}/llama/bin/llama-server", PREFIX)), - ("libreoffice", &format!("{}/libreoffice/bin/soffice", PREFIX)), + ( + "llama-server", + &format!("{}/llama/bin/llama-server", PREFIX), + ), + ( + "libreoffice", + &format!("{}/libreoffice/bin/soffice", PREFIX), + ), ]; for (name, path) in &binaries { let status = if Path::new(path).exists() { @@ -772,9 +1017,18 @@ fn cmd_report() -> Result<()> { // 4. Ports println!("\n## 4. Port Status"); - let ports = [(3003, "Playground"), (5432, "PostgreSQL"), (6379, "Redis"), (6333, "Qdrant"), (8082, "LLM"), (11436, "Embedding")]; + let ports = [ + (3003, "Playground"), + (5432, "PostgreSQL"), + (6379, "Redis"), + (6333, "Qdrant"), + (8082, "LLM"), + (11436, "Embedding"), + ]; for (port, name) in &ports { - let output = Command::new("lsof").args(["-i", &format!(":{}", port)]).output(); + let output = Command::new("lsof") + .args(["-i", &format!(":{}", port)]) + .output(); match output { Ok(o) if o.status.success() => println!(" 🟢 :{} ({})", port, name), _ => println!(" ⚪ :{} ({})", port, name), @@ -797,14 +1051,21 @@ fn cmd_report() -> Result<()> { } fn format_bytes(bytes: u64) -> String { - if bytes > 1024 * 1024 * 1024 { format!("{:.1}GB", bytes as f64 / 1_073_741_824.0) } - else if bytes > 1024 * 1024 { format!("{:.0}MB", bytes as f64 / 1_048_576.0) } - else if bytes > 1024 { format!("{:.0}KB", bytes as f64 / 1024.0) } - else { format!("{}B", bytes) } + if bytes > 1024 * 1024 * 1024 { + format!("{:.1}GB", bytes as f64 / 1_073_741_824.0) + } else if bytes > 1024 * 1024 { + format!("{:.0}MB", bytes as f64 / 1_048_576.0) + } else if bytes > 1024 { + format!("{:.0}KB", bytes as f64 / 1024.0) + } else { + format!("{}B", bytes) + } } fn format_dir_size(path: &Path) -> String { - let output = Command::new("du").args(["-sh", path.to_str().unwrap()]).output(); + let output = Command::new("du") + .args(["-sh", path.to_str().unwrap()]) + .output(); match output { Ok(o) if o.status.success() => { let s = String::from_utf8_lossy(&o.stdout); @@ -824,7 +1085,10 @@ async fn main() -> Result<()> { SourceAction::List => cmd_source_list()?, SourceAction::Verify => cmd_source_verify()?, SourceAction::Download { name } => { - println!("Downloading: {} (use install_services.sh for full download)", name); + println!( + "Downloading: {} (use install_services.sh for full download)", + name + ); println!("Source URLs:"); println!(" ffmpeg: https://ffmpeg.org/releases/ffmpeg-7.1.1.tar.xz"); println!(" redis: https://download.redis.io/releases/redis-7.4.3.tar.gz"); diff --git a/src/core/chunk/rule3_ingest.rs b/src/core/chunk/rule3_ingest.rs index 3c33aa0..bb73ff0 100644 --- a/src/core/chunk/rule3_ingest.rs +++ b/src/core/chunk/rule3_ingest.rs @@ -75,15 +75,13 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result { // Query chunks table for Rule 1 sentence chunks let chunk_table = schema::table_name("chunk"); - let rule1_rows: Vec<(String,)> = sqlx::query_as( - &format!( - "SELECT chunk_id FROM {} \ + let rule1_rows: Vec<(String,)> = sqlx::query_as(&format!( + "SELECT chunk_id FROM {} \ WHERE file_uuid = $1 AND chunk_type = 'sentence' \ AND start_frame >= $2 \ AND end_frame <= $3", - chunk_table - ), - ) + chunk_table + )) .bind(file_uuid) .bind(scene.start_frame as i64) .bind(scene.end_frame as i64) @@ -101,16 +99,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result { // Let's re-query text directly. } - let texts: Vec = sqlx::query_scalar( - &format!( - "SELECT text_content FROM {} \ + let texts: Vec = sqlx::query_scalar(&format!( + "SELECT text_content FROM {} \ WHERE file_uuid = $1 AND chunk_type = 'sentence' \ AND start_frame >= $2 \ AND end_frame <= $3 \ ORDER BY start_frame ASC", - chunk_table - ), - ) + chunk_table + )) .bind(file_uuid) .bind(scene.start_frame as i64) .bind(scene.end_frame as i64) @@ -154,16 +150,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result { "scene_number": scene.scene_number }); - sqlx::query( - &format!( - "INSERT INTO {} (file_uuid, chunk_id, chunk_type, \ + sqlx::query(&format!( + "INSERT INTO {} (file_uuid, chunk_id, chunk_type, \ start_time, end_time, fps, start_frame, end_frame, \ content, text_content, summary_text, metadata, child_chunk_ids) \ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) \ ON CONFLICT (file_uuid, chunk_id) DO NOTHING", - chunk_table - ), - ) + chunk_table + )) .bind(file_uuid) .bind(&chunk_id) .bind(scene.scene_number as i32) diff --git a/src/core/config.rs b/src/core/config.rs index 683081f..7d46e1e 100644 --- a/src/core/config.rs +++ b/src/core/config.rs @@ -20,8 +20,7 @@ pub fn set_cache_enabled(enabled: bool) { } // Switch 1: watcher detects new file → auto-register -pub static RUNTIME_WATCHER_AUTO_REGISTER: Lazy> = - Lazy::new(|| RwLock::new(false)); +pub static RUNTIME_WATCHER_AUTO_REGISTER: Lazy> = Lazy::new(|| RwLock::new(false)); pub fn get_watcher_auto_register() -> bool { *RUNTIME_WATCHER_AUTO_REGISTER.read().unwrap() @@ -33,8 +32,7 @@ pub fn set_watcher_auto_register(enabled: bool) { } // Switch 2: register → auto-trigger processing pipeline -pub static RUNTIME_AUTO_PIPELINE_ENABLED: Lazy> = - Lazy::new(|| RwLock::new(false)); +pub static RUNTIME_AUTO_PIPELINE_ENABLED: Lazy> = Lazy::new(|| RwLock::new(false)); pub fn get_auto_pipeline_enabled() -> bool { *RUNTIME_AUTO_PIPELINE_ENABLED.read().unwrap() @@ -107,6 +105,30 @@ pub static REDIS_KEY_PREFIX: Lazy = pub static DATABASE_SCHEMA: Lazy = Lazy::new(|| env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "public".to_string())); +pub static SYSTEM_TIMEZONE: Lazy = Lazy::new(|| { + if let Ok(tz) = env::var("MOMENTRY_TIMEZONE") { + if !tz.is_empty() { + return tz; + } + } + if let Ok(tz) = env::var("TZ") { + if !tz.is_empty() { + return tz; + } + } + // macOS: /etc/localtime → /var/db/timezone/zoneinfo/Asia/Taipei + // Linux: /etc/localtime → /usr/share/zoneinfo/Asia/Taipei + if let Ok(path) = std::fs::read_link("/etc/localtime") { + let s = path.to_string_lossy(); + for prefix in &["/usr/share/zoneinfo/", "/var/db/timezone/zoneinfo/"] { + if let Some(tz) = s.strip_prefix(prefix) { + return tz.to_string(); + } + } + } + "Asia/Taipei".to_string() +}); + pub static MONGODB_DATABASE: Lazy = Lazy::new(|| env::var("MONGODB_DATABASE").unwrap_or_else(|_| "momentry".to_string())); diff --git a/src/core/db/postgres_db.rs b/src/core/db/postgres_db.rs index 049a354..1a13036 100644 --- a/src/core/db/postgres_db.rs +++ b/src/core/db/postgres_db.rs @@ -65,8 +65,9 @@ pub struct FileIdentityRecord { pub speaker_count: Option, pub start_frame: Option, pub end_frame: Option, + pub start_time: Option, + pub end_time: Option, pub confidence: Option, - pub fps: f64, } #[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)] @@ -423,7 +424,9 @@ impl sqlx::postgres::PgHasArrayType for ProcessorType { } impl<'r> sqlx::Decode<'r, sqlx::Postgres> for ProcessorType { - fn decode(value: sqlx::postgres::PgValueRef<'r>) -> Result> { + fn decode( + value: sqlx::postgres::PgValueRef<'r>, + ) -> Result> { let s: &str = <&str as sqlx::Decode>::decode(value)?; ProcessorType::from_db_str(s).ok_or_else(|| format!("Unknown processor type: {}", s).into()) } @@ -581,9 +584,12 @@ impl sqlx::postgres::PgHasArrayType for ProcessorJobStatus { } impl<'r> sqlx::Decode<'r, sqlx::Postgres> for ProcessorJobStatus { - fn decode(value: sqlx::postgres::PgValueRef<'r>) -> Result> { + fn decode( + value: sqlx::postgres::PgValueRef<'r>, + ) -> Result> { let s: &str = <&str as sqlx::Decode>::decode(value)?; - ProcessorJobStatus::from_db_str(s).ok_or_else(|| format!("Unknown processor job status: {}", s).into()) + ProcessorJobStatus::from_db_str(s) + .ok_or_else(|| format!("Unknown processor job status: {}", s).into()) } } @@ -1340,10 +1346,13 @@ impl PostgresDb { .await?; let pre_chunks = schema::table_name("pre_chunks"); - sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1::uuid", pre_chunks)) - .bind(uuid) - .execute(&self.pool) - .await?; + sqlx::query(&format!( + "DELETE FROM {} WHERE file_uuid = $1::uuid", + pre_chunks + )) + .bind(uuid) + .execute(&self.pool) + .await?; sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", videos)) .bind(uuid) @@ -1885,7 +1894,10 @@ impl PostgresDb { // 認證系統 (Authentication) // ========================================== - pub async fn get_user_by_username(&self, username: &str) -> Result> { + pub async fn get_user_by_username( + &self, + username: &str, + ) -> Result> { let row = sqlx::query_as::<_, (i32, String, String, String)>( "SELECT id, username, password_hash, role FROM users WHERE username = $1 AND status = 'active'" ) @@ -1895,7 +1907,13 @@ impl PostgresDb { Ok(row) } - pub async fn create_session(&self, session_id: &str, user_id: i32, api_key_id: &str, ttl_hours: i64) -> Result<()> { + pub async fn create_session( + &self, + session_id: &str, + user_id: i32, + api_key_id: &str, + ttl_hours: i64, + ) -> Result<()> { let table = schema::table_name("sessions"); let interval = format!("{} hours", ttl_hours); sqlx::query( @@ -1910,7 +1928,10 @@ impl PostgresDb { Ok(()) } - pub async fn get_session_by_id(&self, session_id: &str) -> Result)>> { + pub async fn get_session_by_id( + &self, + session_id: &str, + ) -> Result)>> { let table = schema::table_name("sessions"); let row = sqlx::query_as::<_, (i32, i32, String, chrono::DateTime)>( @@ -1940,7 +1961,11 @@ impl PostgresDb { Ok(r.rows_affected()) } - pub async fn add_jwt_to_blacklist(&self, jti: &str, expires_at: chrono::DateTime) -> Result<()> { + pub async fn add_jwt_to_blacklist( + &self, + jti: &str, + expires_at: chrono::DateTime, + ) -> Result<()> { sqlx::query( "INSERT INTO jwt_blacklist (jti, expires_at) VALUES ($1, $2) ON CONFLICT (jti) DO NOTHING" ) @@ -1953,7 +1978,7 @@ impl PostgresDb { pub async fn is_jwt_blacklisted(&self, jti: &str) -> Result { let count: i64 = sqlx::query_scalar( - "SELECT COUNT(*) FROM jwt_blacklist WHERE jti = $1 AND expires_at > CURRENT_TIMESTAMP" + "SELECT COUNT(*) FROM jwt_blacklist WHERE jti = $1 AND expires_at > CURRENT_TIMESTAMP", ) .bind(jti) .fetch_one(&self.pool) @@ -1970,11 +1995,16 @@ impl PostgresDb { Ok(()) } - pub async fn upsert_user(&self, username: &str, password_hash: &str, role: &str) -> Result { + pub async fn upsert_user( + &self, + username: &str, + password_hash: &str, + role: &str, + ) -> Result { let id: i32 = sqlx::query_scalar( "INSERT INTO users (username, password_hash, role) VALUES ($1, $2, $3) \ ON CONFLICT (username) DO UPDATE SET password_hash = EXCLUDED.password_hash, \ - updated_at = CURRENT_TIMESTAMP RETURNING id" + updated_at = CURRENT_TIMESTAMP RETURNING id", ) .bind(username) .bind(password_hash) @@ -2141,13 +2171,22 @@ impl PostgresDb { pub async fn update_video_status(&self, uuid: &str, status: VideoStatus) -> Result<()> { let table = schema::table_name("videos"); let status_str = status.as_str(); - sqlx::query(&format!("UPDATE {} SET status = $1 WHERE file_uuid = $2", table)) - .bind(status_str).bind(uuid) - .execute(&self.pool).await?; + sqlx::query(&format!( + "UPDATE {} SET status = $1 WHERE file_uuid = $2", + table + )) + .bind(status_str) + .bind(uuid) + .execute(&self.pool) + .await?; Ok(()) } - pub async fn update_processing_status_completed(&self, uuid: &str, total_frames: u64) -> Result<()> { + pub async fn update_processing_status_completed( + &self, + uuid: &str, + total_frames: u64, + ) -> Result<()> { let table = schema::table_name("videos"); let status = serde_json::json!({ "phase": "COMPLETED", @@ -2155,13 +2194,22 @@ impl PostgresDb { "total_frames": total_frames, "progress": serde_json::Value::Object(serde_json::Map::new()) }); - sqlx::query(&format!("UPDATE {} SET processing_status = $1 WHERE file_uuid = $2", table)) - .bind(&status).bind(uuid) - .execute(&self.pool).await?; + sqlx::query(&format!( + "UPDATE {} SET processing_status = $1 WHERE file_uuid = $2", + table + )) + .bind(&status) + .bind(uuid) + .execute(&self.pool) + .await?; Ok(()) } - pub async fn store_asr_pre_chunks_batch(&self, uuid: &str, segments: &[(i64, i64, i64, f64, f64, serde_json::Value)]) -> Result<()> { + pub async fn store_asr_pre_chunks_batch( + &self, + uuid: &str, + segments: &[(i64, i64, i64, f64, f64, serde_json::Value)], + ) -> Result<()> { let table = schema::table_name("pre_chunks"); for (i, _start_frame, _end_frame, start, end, data) in segments { sqlx::query(&format!( @@ -2174,7 +2222,11 @@ impl PostgresDb { Ok(()) } - pub async fn store_cut_pre_chunks_batch(&self, uuid: &str, scenes: &[(i64, i64, i64, f64, f64, serde_json::Value)]) -> Result<()> { + pub async fn store_cut_pre_chunks_batch( + &self, + uuid: &str, + scenes: &[(i64, i64, i64, f64, f64, serde_json::Value)], + ) -> Result<()> { let table = schema::table_name("pre_chunks"); for (i, _sf, _ef, start, end, data) in scenes { sqlx::query(&format!( @@ -2188,7 +2240,16 @@ impl PostgresDb { } pub async fn store_raw_pre_chunks_batch( - &self, uuid: &str, processor_type: &str, chunks: &[(i64, Option, serde_json::Value, Option, Option)] + &self, + uuid: &str, + processor_type: &str, + chunks: &[( + i64, + Option, + serde_json::Value, + Option, + Option, + )], ) -> Result<()> { let table = schema::table_name("pre_chunks"); for (frame, ts, data, text, _) in chunks { @@ -2203,7 +2264,9 @@ impl PostgresDb { } pub async fn store_face_detections_batch( - &self, uuid: &str, detections: &[(i64, f64, i32, i32, i32, i32, f32)] + &self, + uuid: &str, + detections: &[(i64, f64, i32, i32, i32, i32, f32)], ) -> Result<()> { let table = schema::table_name("face_detections"); for (frame, ts, x, y, w, h, conf) in detections { @@ -2217,7 +2280,11 @@ impl PostgresDb { Ok(()) } - pub async fn store_scene_pre_chunks_batch(&self, uuid: &str, scenes: &[(i64, i64, i64, f64, f64, serde_json::Value)]) -> Result<()> { + pub async fn store_scene_pre_chunks_batch( + &self, + uuid: &str, + scenes: &[(i64, i64, i64, f64, f64, serde_json::Value)], + ) -> Result<()> { let table = schema::table_name("pre_chunks"); for (_i, _sf, _ef, start, end, data) in scenes { sqlx::query(&format!( @@ -2230,22 +2297,33 @@ impl PostgresDb { Ok(()) } - pub async fn store_chunk_in_tx(&self, chunk: &crate::core::chunk::types::Chunk, tx: &mut sqlx::Transaction<'_, sqlx::Postgres>) -> Result<()> { + pub async fn store_chunk_in_tx( + &self, + chunk: &crate::core::chunk::types::Chunk, + tx: &mut sqlx::Transaction<'_, sqlx::Postgres>, + ) -> Result<()> { let table = schema::table_name("chunk"); let ct_str = format!("{:?}", chunk.chunk_type).to_lowercase(); let fps = chunk.fps; + let start_time = chunk.start_frame as f64 / chunk.fps; + let end_time = chunk.end_frame as f64 / chunk.fps; sqlx::query(&format!( - "INSERT INTO {} (file_uuid, chunk_id, chunk_type, start_frame, end_frame, text_content, content, fps) \ - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) ON CONFLICT DO NOTHING", table + "INSERT INTO {} (file_uuid, chunk_id, chunk_type, start_frame, end_frame, start_time, end_time, text_content, content, fps) \ + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) ON CONFLICT DO NOTHING", table )) .bind(&chunk.uuid).bind(&chunk.chunk_id).bind(&ct_str) .bind(chunk.start_frame).bind(chunk.end_frame) + .bind(start_time).bind(end_time) .bind(&chunk.text_content).bind(&chunk.content).bind(fps) .execute(&mut **tx).await?; Ok(()) } - pub async fn get_chunk_by_chunk_id_and_uuid(&self, chunk_id: &str, uuid: &str) -> Result> { + pub async fn get_chunk_by_chunk_id_and_uuid( + &self, + chunk_id: &str, + uuid: &str, + ) -> Result> { let table = schema::table_name("chunk"); let row = sqlx::query_as::<_, (String, f64, f64, f64, String, Option, Option)>( &format!("SELECT chunk_type, start_time, end_time, fps, content::text, text_content, metadata FROM {} WHERE file_uuid = $1 AND chunk_id = $2 LIMIT 1", table) @@ -2253,44 +2331,64 @@ impl PostgresDb { .bind(uuid).bind(chunk_id) .fetch_optional(&self.pool).await?; - Ok(row.map(|(ct, st, et, fps, content_str, text_content, metadata)| { - let content: serde_json::Value = serde_json::from_str(&content_str).unwrap_or_default(); - let chunk_type = match ct.as_str() { - "time" => crate::core::chunk::types::ChunkType::TimeBased, - "sentence" => crate::core::chunk::types::ChunkType::Sentence, - "cut" => crate::core::chunk::types::ChunkType::Cut, - "trace" => crate::core::chunk::types::ChunkType::Trace, - "story" | "story_parent" | "story_child" => crate::core::chunk::types::ChunkType::Story, - "visual" => crate::core::chunk::types::ChunkType::Visual, - _ => crate::core::chunk::types::ChunkType::Story, - }; - let start_frame = (st * fps).round() as i64; - let end_frame = (et * fps).round() as i64; - let mut c = crate::core::chunk::types::Chunk::new( - 0, uuid.to_string(), chunk_id.to_string(), - chunk_type, crate::core::chunk::types::ChunkRule::Rule1, - start_frame, end_frame, fps, content, - ); - c.text_content = text_content; - c.metadata = metadata; - c - })) + Ok( + row.map(|(ct, st, et, fps, content_str, text_content, metadata)| { + let content: serde_json::Value = + serde_json::from_str(&content_str).unwrap_or_default(); + let chunk_type = match ct.as_str() { + "time" => crate::core::chunk::types::ChunkType::TimeBased, + "sentence" => crate::core::chunk::types::ChunkType::Sentence, + "cut" => crate::core::chunk::types::ChunkType::Cut, + "trace" => crate::core::chunk::types::ChunkType::Trace, + "story" | "story_parent" | "story_child" => { + crate::core::chunk::types::ChunkType::Story + } + "visual" => crate::core::chunk::types::ChunkType::Visual, + _ => crate::core::chunk::types::ChunkType::Story, + }; + let start_frame = (st * fps).round() as i64; + let end_frame = (et * fps).round() as i64; + let mut c = crate::core::chunk::types::Chunk::new( + 0, + uuid.to_string(), + chunk_id.to_string(), + chunk_type, + crate::core::chunk::types::ChunkRule::Rule1, + start_frame, + end_frame, + fps, + content, + ); + c.text_content = text_content; + c.metadata = metadata; + c + }), + ) } - pub async fn get_running_jobs_with_all_processors_done(&self, _limit: i32) -> Result> { - self.list_monitor_jobs_by_status(MonitorJobStatus::Running).await + pub async fn get_running_jobs_with_all_processors_done( + &self, + _limit: i32, + ) -> Result> { + self.list_monitor_jobs_by_status(MonitorJobStatus::Running) + .await } pub async fn get_all_running_jobs(&self, _limit: i32) -> Result> { - self.list_monitor_jobs_by_status(MonitorJobStatus::Running).await + self.list_monitor_jobs_by_status(MonitorJobStatus::Running) + .await } pub async fn get_pending_jobs(&self, _limit: i32) -> Result> { - self.list_monitor_jobs_by_status(MonitorJobStatus::Pending).await + self.list_monitor_jobs_by_status(MonitorJobStatus::Pending) + .await } pub async fn update_job_processors_arrays( - &self, job_id: i32, completed: Vec, failed: Vec + &self, + job_id: i32, + completed: Vec, + failed: Vec, ) -> Result<()> { let table = schema::table_name("monitor_jobs"); sqlx::query(&format!( @@ -2302,7 +2400,10 @@ impl PostgresDb { } pub async fn create_processor_result( - &self, job_id: i32, processor_type: crate::core::db::ProcessorType, uuid: &str + &self, + job_id: i32, + processor_type: crate::core::db::ProcessorType, + uuid: &str, ) -> Result { let table = schema::table_name("processor_results"); let ptype = processor_type.as_str(); @@ -2324,14 +2425,22 @@ impl PostgresDb { }); let path = std::path::Path::new(crate::core::config::OUTPUT_DIR.as_str()) .join(format!("pipeline_{}.log", uuid)); - if let Ok(mut file) = std::fs::OpenOptions::new().create(true).append(true).open(&path) { + if let Ok(mut file) = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(&path) + { use std::io::Write; let _ = writeln!(file, "{}", entry); } } pub async fn upsert_processor_result( - &self, job_id: i32, processor_type: crate::core::db::ProcessorType, uuid: &str, status: &str + &self, + job_id: i32, + processor_type: crate::core::db::ProcessorType, + uuid: &str, + status: &str, ) -> Result { let table = schema::table_name("processor_results"); let ptype = processor_type.as_str(); @@ -2349,7 +2458,10 @@ impl PostgresDb { Ok(id) } - pub async fn get_processor_results_by_job(&self, job_id: i32) -> Result> { + pub async fn get_processor_results_by_job( + &self, + job_id: i32, + ) -> Result> { let table = schema::table_name("processor_results"); use sqlx::Row; let rows = sqlx::query( @@ -2357,30 +2469,44 @@ impl PostgresDb { ) .bind(job_id) .fetch_all(&self.pool).await?; - Ok(rows.into_iter().map(|r| { - let ptype: &str = r.get("processor"); - let st: &str = r.get("status"); - crate::core::db::ProcessorResult { - id: r.get("id"), - job_id: r.get("job_id"), - processor_type: crate::core::db::ProcessorType::from_db_str(ptype).unwrap_or(crate::core::db::ProcessorType::Asr), - status: crate::core::db::ProcessorJobStatus::from_db_str(st).unwrap_or(crate::core::db::ProcessorJobStatus::Pending), - started_at: r.try_get::<&str, _>("started_at").ok().map(|s| s.to_string()), - completed_at: r.try_get::<&str, _>("completed_at").ok().map(|s| s.to_string()), - duration_secs: r.get("duration_secs"), - chunks_produced: r.get("chunks_produced"), - frames_processed: r.get("frames_processed"), - output_size_bytes: r.get("output_size_bytes"), - error_message: r.get("error_message"), - output_data: r.get("output_data"), - retry_count: r.get("retry_count"), - created_at: r.get::<&str, _>("created_at").to_string(), - updated_at: r.get::<&str, _>("updated_at").to_string(), - } - }).collect()) + Ok(rows + .into_iter() + .map(|r| { + let ptype: &str = r.get("processor"); + let st: &str = r.get("status"); + crate::core::db::ProcessorResult { + id: r.get("id"), + job_id: r.get("job_id"), + processor_type: crate::core::db::ProcessorType::from_db_str(ptype) + .unwrap_or(crate::core::db::ProcessorType::Asr), + status: crate::core::db::ProcessorJobStatus::from_db_str(st) + .unwrap_or(crate::core::db::ProcessorJobStatus::Pending), + started_at: r + .try_get::<&str, _>("started_at") + .ok() + .map(|s| s.to_string()), + completed_at: r + .try_get::<&str, _>("completed_at") + .ok() + .map(|s| s.to_string()), + duration_secs: r.get("duration_secs"), + chunks_produced: r.get("chunks_produced"), + frames_processed: r.get("frames_processed"), + output_size_bytes: r.get("output_size_bytes"), + error_message: r.get("error_message"), + output_data: r.get("output_data"), + retry_count: r.get("retry_count"), + created_at: r.get::<&str, _>("created_at").to_string(), + updated_at: r.get::<&str, _>("updated_at").to_string(), + } + }) + .collect()) } - pub async fn get_latest_processor_results_by_file_uuid(&self, uuid: &str) -> Result> { + pub async fn get_latest_processor_results_by_file_uuid( + &self, + uuid: &str, + ) -> Result> { let table = schema::table_name("processor_results"); let jt = schema::table_name("monitor_jobs"); use sqlx::Row; @@ -2389,35 +2515,55 @@ impl PostgresDb { ) .bind(uuid) .fetch_all(&self.pool).await?; - Ok(rows.into_iter().map(|r| { - let ptype: &str = r.get("processor"); - let st: &str = r.get("status"); - crate::core::db::ProcessorResult { - id: r.get("id"), - job_id: r.get("job_id"), - processor_type: crate::core::db::ProcessorType::from_db_str(ptype).unwrap_or(crate::core::db::ProcessorType::Asr), - status: crate::core::db::ProcessorJobStatus::from_db_str(st).unwrap_or(crate::core::db::ProcessorJobStatus::Pending), - started_at: r.try_get::<&str, _>("started_at").ok().map(|s| s.to_string()), - completed_at: r.try_get::<&str, _>("completed_at").ok().map(|s| s.to_string()), - duration_secs: r.get("duration_secs"), - chunks_produced: r.get("chunks_produced"), - frames_processed: r.get("frames_processed"), - output_size_bytes: r.get("output_size_bytes"), - error_message: r.get("error_message"), - output_data: r.get("output_data"), - retry_count: r.get("retry_count"), - created_at: r.get::<&str, _>("created_at").to_string(), - updated_at: r.get::<&str, _>("updated_at").to_string(), - } - }).collect()) + Ok(rows + .into_iter() + .map(|r| { + let ptype: &str = r.get("processor"); + let st: &str = r.get("status"); + crate::core::db::ProcessorResult { + id: r.get("id"), + job_id: r.get("job_id"), + processor_type: crate::core::db::ProcessorType::from_db_str(ptype) + .unwrap_or(crate::core::db::ProcessorType::Asr), + status: crate::core::db::ProcessorJobStatus::from_db_str(st) + .unwrap_or(crate::core::db::ProcessorJobStatus::Pending), + started_at: r + .try_get::<&str, _>("started_at") + .ok() + .map(|s| s.to_string()), + completed_at: r + .try_get::<&str, _>("completed_at") + .ok() + .map(|s| s.to_string()), + duration_secs: r.get("duration_secs"), + chunks_produced: r.get("chunks_produced"), + frames_processed: r.get("frames_processed"), + output_size_bytes: r.get("output_size_bytes"), + error_message: r.get("error_message"), + output_data: r.get("output_data"), + retry_count: r.get("retry_count"), + created_at: r.get::<&str, _>("created_at").to_string(), + updated_at: r.get::<&str, _>("updated_at").to_string(), + } + }) + .collect()) } pub async fn update_processor_progress( - &self, uuid: &str, processor: &str, current: u64, total: u64, status: &str + &self, + uuid: &str, + processor: &str, + current: u64, + total: u64, + status: &str, ) -> Result<()> { let table = schema::table_name("videos"); let key = processor.to_uppercase(); - let pct = if total > 0 { ((current as f64 / total as f64) * 100.0).round() as u32 } else { 0 }; + let pct = if total > 0 { + ((current as f64 / total as f64) * 100.0).round() as u32 + } else { + 0 + }; let path = format!("{{progress,{}}}", key); sqlx::query(&format!( "UPDATE {} SET processing_status = jsonb_set(COALESCE(processing_status, '{{}}'::jsonb), $1::text[], $2::jsonb) WHERE file_uuid = $3", table @@ -2430,20 +2576,30 @@ impl PostgresDb { } pub async fn update_processor_result( - &self, result_id: i32, status: crate::core::db::ProcessorJobStatus, _started_at: Option, _completed_at: Option + &self, + result_id: i32, + status: crate::core::db::ProcessorJobStatus, + _started_at: Option, + _completed_at: Option, ) -> Result<()> { let table = schema::table_name("processor_results"); let s = format!("{:?}", status).to_lowercase(); sqlx::query(&format!("UPDATE {} SET status = $1 WHERE id = $2", table)) - .bind(&s).bind(result_id) - .execute(&self.pool).await?; + .bind(&s) + .bind(result_id) + .execute(&self.pool) + .await?; Ok(()) } pub async fn update_processor_result_with_stats( - &self, result_id: i32, status: crate::core::db::ProcessorJobStatus, - error_message: Option<&str>, output_data: Option<&serde_json::Value>, - chunks_produced: i32, frames_processed: i32, + &self, + result_id: i32, + status: crate::core::db::ProcessorJobStatus, + error_message: Option<&str>, + output_data: Option<&serde_json::Value>, + chunks_produced: i32, + frames_processed: i32, ) -> Result<()> { let table = schema::table_name("processor_results"); let s = format!("{:?}", status).to_lowercase(); @@ -2455,18 +2611,30 @@ impl PostgresDb { Ok(()) } - pub async fn reset_stale_processor_results(&self, status: crate::core::db::ProcessorJobStatus, reason: &str) -> Result { + pub async fn reset_stale_processor_results( + &self, + status: crate::core::db::ProcessorJobStatus, + reason: &str, + ) -> Result { let table = schema::table_name("processor_results"); let s = format!("{:?}", status).to_lowercase(); let r = sqlx::query(&format!( - "UPDATE {} SET status = 'pending', error_message = $1 WHERE status = $2", table + "UPDATE {} SET status = 'pending', error_message = $1 WHERE status = $2", + table )) - .bind(reason).bind(&s) - .execute(&self.pool).await?; + .bind(reason) + .bind(&s) + .execute(&self.pool) + .await?; Ok(r.rows_affected()) } - pub async fn search_bm25(&self, query: &str, file_uuid: Option<&str>, limit: i64) -> Result> { + pub async fn search_bm25( + &self, + query: &str, + file_uuid: Option<&str>, + limit: i64, + ) -> Result> { let table = schema::table_name("chunk"); let like = format!("%{}%", query.replace('%', "%%")); use sqlx::Row; @@ -2485,52 +2653,101 @@ impl PostgresDb { .bind(&like).bind(limit) .fetch_all(&self.pool).await? }; - Ok(rows.into_iter().map(|r| Bm25Result { - file_uuid: r.get("file_uuid"), - chunk_id: r.get("chunk_id"), - chunk_type: r.get("chunk_type"), - uuid: r.get("file_uuid"), - text: r.get("text_content"), - start_time: r.get("start_time"), - end_time: r.get("end_time"), - bm25_score: r.get("score"), - vector_score: 0.0, - combined_score: r.get("score"), - }).collect()) + Ok(rows + .into_iter() + .map(|r| Bm25Result { + file_uuid: r.get("file_uuid"), + chunk_id: r.get("chunk_id"), + chunk_type: r.get("chunk_type"), + uuid: r.get("file_uuid"), + text: r.get("text_content"), + start_time: r.get("start_time"), + end_time: r.get("end_time"), + bm25_score: r.get("score"), + vector_score: 0.0, + combined_score: r.get("score"), + }) + .collect()) } - pub async fn hybrid_search(&self, query: &str, _query_vector: &[f32], uuid: Option<&str>, limit: usize, _vector_weight: f32, _bm25_weight: f32) -> Result> { + pub async fn hybrid_search( + &self, + query: &str, + _query_vector: &[f32], + uuid: Option<&str>, + limit: usize, + _vector_weight: f32, + _bm25_weight: f32, + ) -> Result> { self.search_bm25(query, uuid, limit as i64).await } - pub async fn list_identities(&self, search: &str, limit: i32, offset: i32) -> Result> { + pub async fn list_identities( + &self, + search: &str, + limit: i32, + offset: i32, + ) -> Result> { use sqlx::Row; if search.is_empty() { let rows = sqlx::query("SELECT id, name, metadata, created_at FROM identities ORDER BY id LIMIT $1 OFFSET $2") .bind(limit).bind(offset) .fetch_all(&self.pool).await?; - Ok(rows.into_iter().map(|r| crate::core::person_identity::Identity { - id: r.get(0), name: r.get(1), metadata: r.get(2), created_at: r.get(3), - embedding: None, uuid: None, identity_type: None, source: None, - status: None, face_embedding: None, voice_embedding: None, - identity_embedding: None, reference_data: None, - tmdb_id: None, tmdb_profile: None, tmdb_poster: None, file_uuid: None, - }).collect()) + Ok(rows + .into_iter() + .map(|r| crate::core::person_identity::Identity { + id: r.get(0), + name: r.get(1), + metadata: r.get(2), + created_at: r.get(3), + embedding: None, + uuid: None, + identity_type: None, + source: None, + status: None, + face_embedding: None, + voice_embedding: None, + identity_embedding: None, + reference_data: None, + tmdb_id: None, + tmdb_profile: None, + tmdb_poster: None, + file_uuid: None, + }) + .collect()) } else { let rows = sqlx::query("SELECT id, name, metadata, created_at FROM identities WHERE name ILIKE $1 ORDER BY id LIMIT $2 OFFSET $3") .bind(format!("%{}%", search)).bind(limit).bind(offset) .fetch_all(&self.pool).await?; - Ok(rows.into_iter().map(|r| crate::core::person_identity::Identity { - id: r.get(0), name: r.get(1), metadata: r.get(2), created_at: r.get(3), - embedding: None, uuid: None, identity_type: None, source: None, - status: None, face_embedding: None, voice_embedding: None, - identity_embedding: None, reference_data: None, - tmdb_id: None, tmdb_profile: None, tmdb_poster: None, file_uuid: None, - }).collect()) + Ok(rows + .into_iter() + .map(|r| crate::core::person_identity::Identity { + id: r.get(0), + name: r.get(1), + metadata: r.get(2), + created_at: r.get(3), + embedding: None, + uuid: None, + identity_type: None, + source: None, + status: None, + face_embedding: None, + voice_embedding: None, + identity_embedding: None, + reference_data: None, + tmdb_id: None, + tmdb_profile: None, + tmdb_poster: None, + file_uuid: None, + }) + .collect()) } } - pub async fn register_resource(&self, resource: super::postgres_db::ResourceRecord) -> Result<()> { + pub async fn register_resource( + &self, + resource: super::postgres_db::ResourceRecord, + ) -> Result<()> { let table = schema::table_name("resources"); sqlx::query(&format!( "INSERT INTO {} (resource_id, resource_type, category, capabilities, config, metadata, status) \ @@ -2562,94 +2779,162 @@ impl PostgresDb { &format!("SELECT resource_id, resource_type, category, capabilities::text as capabilities, config::text as config, metadata::text as metadata, status, last_heartbeat, created_at FROM {} ORDER BY resource_id", table) ) .fetch_all(&self.pool).await?; - Ok(rows.into_iter().map(|r| { - let parse_json = |s: Option| s.and_then(|s| serde_json::from_str(&s).ok()); - super::postgres_db::ResourceRecord { - resource_id: r.get("resource_id"), - resource_type: r.get("resource_type"), - category: r.get("category"), - capabilities: parse_json(r.get("capabilities")), - config: parse_json(r.get("config")), - metadata: parse_json(r.get("metadata")), - status: r.get("status"), - last_heartbeat: r.get("last_heartbeat"), - created_at: r.get("created_at"), - } - }).collect()) + Ok(rows + .into_iter() + .map(|r| { + let parse_json = |s: Option| s.and_then(|s| serde_json::from_str(&s).ok()); + super::postgres_db::ResourceRecord { + resource_id: r.get("resource_id"), + resource_type: r.get("resource_type"), + category: r.get("category"), + capabilities: parse_json(r.get("capabilities")), + config: parse_json(r.get("config")), + metadata: parse_json(r.get("metadata")), + status: r.get("status"), + last_heartbeat: r.get("last_heartbeat"), + created_at: r.get("created_at"), + } + }) + .collect()) } pub async fn log_api_key_audit( - &self, key_id: &str, action: &str, - actor: Option<&str>, ip: Option<&str>, ua: Option<&str>, - path: Option<&str>, code: Option, anomaly: Option<&str>, details: Option<&serde_json::Value> + &self, + key_id: &str, + action: &str, + actor: Option<&str>, + ip: Option<&str>, + ua: Option<&str>, + path: Option<&str>, + code: Option, + anomaly: Option<&str>, + details: Option<&serde_json::Value>, ) -> Result<()> { - tracing::info!("[AUDIT] api_key={} action={} actor={:?} ip={:?} code={:?}", key_id, action, actor, ip, code); + tracing::info!( + "[AUDIT] api_key={} action={} actor={:?} ip={:?} code={:?}", + key_id, + action, + actor, + ip, + code + ); Ok(()) } pub async fn get_api_key_stats(&self) -> Result { let table = schema::table_name("api_keys"); - let total_keys: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", table)).fetch_one(&self.pool).await.unwrap_or(0); - let active_keys: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE status='active'", table)).fetch_one(&self.pool).await.unwrap_or(0); - let expired_keys: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE status='expired' OR expires_at < CURRENT_TIMESTAMP", table)).fetch_one(&self.pool).await.unwrap_or(0); - let rotation_required: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE rotation_required = true AND status='active'", table)).fetch_one(&self.pool).await.unwrap_or(0); - Ok(super::postgres_db::ApiKeyStats { total_keys, active_keys, expired_keys, rotation_required, anomalies_last_24h: 0 }) + let total_keys: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", table)) + .fetch_one(&self.pool) + .await + .unwrap_or(0); + let active_keys: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE status='active'", + table + )) + .fetch_one(&self.pool) + .await + .unwrap_or(0); + let expired_keys: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE status='expired' OR expires_at < CURRENT_TIMESTAMP", + table + )) + .fetch_one(&self.pool) + .await + .unwrap_or(0); + let rotation_required: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE rotation_required = true AND status='active'", + table + )) + .fetch_one(&self.pool) + .await + .unwrap_or(0); + Ok(super::postgres_db::ApiKeyStats { + total_keys, + active_keys, + expired_keys, + rotation_required, + anomalies_last_24h: 0, + }) } - pub async fn get_identity_files(&self, uuid_str: &str, limit: i32, offset: i64) -> Result> { + pub async fn get_identity_files( + &self, + uuid_str: &str, + limit: i32, + offset: i64, + ) -> Result> { let id_table = schema::table_name("identities"); let fd_table = schema::table_name("face_detections"); + let video_table = schema::table_name("videos"); use sqlx::Row; let rows = sqlx::query( - &format!("SELECT fd.file_uuid, '' as file_name, '' as file_path, '' as status, COUNT(*)::int4 as face_count, \ - 0::int4 as speaker_count, NULL::float8 as first_appearance, NULL::float8 as last_appearance, \ + &format!("SELECT fd.file_uuid, v.file_name, v.file_path, v.status, COUNT(*)::int4 as face_count, \ + 0::int4 as speaker_count, MIN(fd.frame_number::float8 / NULLIF(v.fps, 0)) as first_appearance, MAX(fd.frame_number::float8 / NULLIF(v.fps, 0)) as last_appearance, \ AVG(fd.confidence)::float8 as confidence \ - FROM {} fd WHERE fd.identity_id = (SELECT id FROM {} WHERE uuid::text = $1) \ - GROUP BY fd.file_uuid LIMIT $2 OFFSET $3", fd_table, id_table) + FROM {} fd JOIN {} v ON v.file_uuid = fd.file_uuid \ + WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1) \ + GROUP BY fd.file_uuid, v.file_name, v.file_path, v.status LIMIT $2 OFFSET $3", fd_table, video_table, id_table) ) .bind(uuid_str).bind(limit).bind(offset) .fetch_all(&self.pool).await?; - Ok(rows.into_iter().map(|r| super::IdentityFileRecord { - file_uuid: r.get("file_uuid"), - file_name: r.get("file_name"), - file_path: r.get("file_path"), - status: r.get("status"), - face_count: r.get("face_count"), - speaker_count: r.get("speaker_count"), - first_appearance: r.get("first_appearance"), - last_appearance: r.get("last_appearance"), - confidence: r.get("confidence"), - }).collect()) + Ok(rows + .into_iter() + .map(|r| super::IdentityFileRecord { + file_uuid: r.get("file_uuid"), + file_name: r.get("file_name"), + file_path: r.get("file_path"), + status: r.get("status"), + face_count: r.get("face_count"), + speaker_count: r.get("speaker_count"), + first_appearance: r.get("first_appearance"), + last_appearance: r.get("last_appearance"), + confidence: r.get("confidence"), + }) + .collect()) } - pub async fn get_identity_faces(&self, uuid_str: &str, limit: i32, offset: i64) -> Result> { + pub async fn get_identity_faces( + &self, + uuid_str: &str, + limit: i32, + offset: i64, + ) -> Result> { let id_table = schema::table_name("identities"); let fd_table = schema::table_name("face_detections"); + let video_table = schema::table_name("videos"); use sqlx::Row; let rows = sqlx::query( - &format!("SELECT fd.id, fd.file_uuid, fd.frame_number, NULL::float8 as timestamp_secs, \ - ('face_' || fd.frame_number::text) as face_id, 0.0::float8 as x, 0.0::float8 as y, 0.0::float8 as w, 0.0::float8 as h, \ - fd.confidence, NULL::text as thumbnail_path \ - FROM {} fd WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1) \ - ORDER BY fd.frame_number LIMIT $2 OFFSET $3", fd_table, id_table) + &format!("SELECT fd.id::bigint as id, fd.file_uuid, fd.frame_number::bigint as frame_number, \ + (fd.frame_number::float8 / NULLIF(v.fps, 0)) as timestamp_secs, fd.face_id, fd.x::float8 as x, fd.y::float8 as y, fd.width::float8 as width, fd.height::float8 as height, \ + fd.confidence::float8 as confidence \ + FROM {} fd JOIN {} v ON v.file_uuid = fd.file_uuid WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1) \ + ORDER BY fd.frame_number LIMIT $2 OFFSET $3", fd_table, video_table, id_table) ) .bind(uuid_str).bind(limit).bind(offset) .fetch_all(&self.pool).await?; - Ok(rows.into_iter().map(|r| super::IdentityFaceRecord { - id: r.get("id"), - file_uuid: r.get("file_uuid"), - frame_number: r.get("frame_number"), - timestamp_secs: r.get("timestamp_secs"), - face_id: r.get("face_id"), - x: r.get("x"), - y: r.get("y"), - width: r.get("w"), - height: r.get("h"), - confidence: r.get("confidence"), - }).collect()) + Ok(rows + .into_iter() + .map(|r| super::IdentityFaceRecord { + id: r.get("id"), + file_uuid: r.get("file_uuid"), + frame_number: r.get("frame_number"), + timestamp_secs: r.get("timestamp_secs"), + face_id: r.get("face_id"), + x: r.get("x"), + y: r.get("y"), + width: r.get("width"), + height: r.get("height"), + confidence: r.get("confidence"), + }) + .collect()) } - pub async fn get_identity_chunks(&self, uuid_str: &str, limit: i32, offset: i64) -> Result> { + pub async fn get_identity_chunks( + &self, + uuid_str: &str, + limit: i32, + offset: i64, + ) -> Result> { let id_table = schema::table_name("identities"); let fd_table = schema::table_name("face_detections"); let chunk_table = schema::table_name("chunk"); @@ -2663,27 +2948,33 @@ impl PostgresDb { ) .bind(uuid_str).bind(limit).bind(offset) .fetch_all(&self.pool).await?; - Ok(rows.into_iter().map(|r| super::IdentityChunkRecord { - id: 0, - file_uuid: r.get("file_uuid"), - chunk_id: r.get("chunk_id"), - chunk_type: r.get("chunk_type"), - start_frame: r.get("start_frame"), - end_frame: r.get("end_frame"), - fps: r.get("fps"), - text_content: r.get("text_content"), - start_time: r.get("start_time"), - end_time: r.get("end_time"), - content: serde_json::Value::Null, - }).collect()) + Ok(rows + .into_iter() + .map(|r| super::IdentityChunkRecord { + id: 0, + file_uuid: r.get("file_uuid"), + chunk_id: r.get("chunk_id"), + chunk_type: r.get("chunk_type"), + start_frame: r.get("start_frame"), + end_frame: r.get("end_frame"), + fps: r.get("fps"), + text_content: r.get("text_content"), + start_time: r.get("start_time"), + end_time: r.get("end_time"), + content: serde_json::Value::Null, + }) + .collect()) } - pub async fn get_identity_by_uuid(&self, uuid_str: &str) -> Result> { + pub async fn get_identity_by_uuid( + &self, + uuid_str: &str, + ) -> Result> { let id_table = schema::table_name("identities"); let clean = uuid_str.replace('-', ""); use sqlx::Row; let row = sqlx::query( - &format!("SELECT id, uuid::text, real_name AS name, identity_type, source, status, metadata, reference_data, \ + &format!("SELECT id::bigint as id, uuid::text, name, identity_type, source, status, metadata, reference_data, \ NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \ face_embedding::real[] as face_embedding, \ tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \ @@ -2691,52 +2982,73 @@ impl PostgresDb { ) .bind(&clean) .fetch_optional(&self.pool).await?; - Ok(row.map(|r| { - super::IdentityDetailRecord { - id: r.get("id"), - uuid: r.get::<&str, _>("uuid").to_string(), - name: r.get("name"), - identity_type: r.get("identity_type"), - source: r.get("source"), - status: r.get("status"), - metadata: r.get("metadata"), - reference_data: r.get("reference_data"), - voice_embedding: r.get("voice_embedding"), - identity_embedding: r.get("identity_embedding"), - face_embedding: r.get("face_embedding"), - tmdb_id: r.get("tmdb_id"), - tmdb_profile: r.get("tmdb_profile"), - created_at: r.get("created_at"), - updated_at: r.get("updated_at"), - } + Ok(row.map(|r| super::IdentityDetailRecord { + id: r.get("id"), + uuid: r.get::<&str, _>("uuid").to_string(), + name: r.get("name"), + identity_type: r.get("identity_type"), + source: r.get("source"), + status: r.get("status"), + metadata: r.get("metadata"), + reference_data: r.get("reference_data"), + voice_embedding: r.get("voice_embedding"), + identity_embedding: r.get("identity_embedding"), + face_embedding: r.get("face_embedding"), + tmdb_id: r.get("tmdb_id"), + tmdb_profile: r.get("tmdb_profile"), + created_at: r.get("created_at"), + updated_at: r.get("updated_at"), })) } - pub async fn store_pre_chunk(&self, _uuid: &str, _chunk_type: &str, _data: serde_json::Value) -> Result<()> { + pub async fn store_pre_chunk( + &self, + _uuid: &str, + _chunk_type: &str, + _data: serde_json::Value, + ) -> Result<()> { Ok(()) } - pub async fn store_frame(&self, _uuid: &str, _frame_number: i64, _data: serde_json::Value) -> Result<()> { + pub async fn store_frame( + &self, + _uuid: &str, + _frame_number: i64, + _data: serde_json::Value, + ) -> Result<()> { Ok(()) } - pub async fn get_chunks_by_time_range(&self, _uuid: &str, _start: f64, _end: f64) -> Result> { + pub async fn get_chunks_by_time_range( + &self, + _uuid: &str, + _start: f64, + _end: f64, + ) -> Result> { Ok(Vec::new()) } - pub async fn get_frames_by_time_range(&self, _uuid: &str, _start: f64, _end: f64) -> Result> { + pub async fn get_frames_by_time_range( + &self, + _uuid: &str, + _start: f64, + _end: f64, + ) -> Result> { Ok(Vec::new()) } pub async fn store_chunk(&self, chunk: &crate::core::chunk::types::Chunk) -> Result<()> { let table = schema::table_name("chunk"); let ct_str = format!("{:?}", chunk.chunk_type).to_lowercase(); + let start_time = chunk.start_frame as f64 / chunk.fps; + let end_time = chunk.end_frame as f64 / chunk.fps; sqlx::query(&format!( - "INSERT INTO {} (file_uuid, chunk_id, chunk_type, start_frame, end_frame, text_content, content, fps) \ - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) ON CONFLICT DO NOTHING", table + "INSERT INTO {} (file_uuid, chunk_id, chunk_type, start_frame, end_frame, start_time, end_time, text_content, content, fps) \ + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) ON CONFLICT DO NOTHING", table )) .bind(&chunk.uuid).bind(&chunk.chunk_id).bind(&ct_str) .bind(chunk.start_frame).bind(chunk.end_frame) + .bind(start_time).bind(end_time) .bind(&chunk.text_content).bind(&chunk.content).bind(chunk.fps) .execute(&self.pool).await?; Ok(()) @@ -2747,16 +3059,27 @@ impl PostgresDb { Ok(()) } - pub async fn update_job_status(&self, job_id: i32, status: crate::core::db::MonitorJobStatus) -> Result<()> { + pub async fn update_job_status( + &self, + job_id: i32, + status: crate::core::db::MonitorJobStatus, + ) -> Result<()> { let table = schema::table_name("monitor_jobs"); let status_str = format!("{:?}", status).to_lowercase(); sqlx::query(&format!("UPDATE {} SET status = $1 WHERE id = $2", table)) - .bind(&status_str).bind(job_id) - .execute(&self.pool).await?; + .bind(&status_str) + .bind(job_id) + .execute(&self.pool) + .await?; Ok(()) } - pub async fn init_processing_status(&self, uuid: &str, processors: Vec<&str>, total_frames: u64) -> Result<()> { + pub async fn init_processing_status( + &self, + uuid: &str, + processors: Vec<&str>, + total_frames: u64, + ) -> Result<()> { let table = schema::table_name("videos"); let progress: serde_json::Map = processors.iter().map(|p| { (p.to_uppercase(), serde_json::json!({ @@ -2773,11 +3096,57 @@ impl PostgresDb { Ok(()) } - pub async fn get_file_identities(&self, _uuid: &str, _limit: i32, _offset: i64) -> Result> { - Ok(Vec::new()) + pub async fn get_file_identities( + &self, + uuid: &str, + limit: i32, + offset: i64, + ) -> Result> { + let id_table = schema::table_name("identities"); + let fd_table = schema::table_name("face_detections"); + let video_table = schema::table_name("videos"); + use sqlx::Row; + let rows = sqlx::query(&format!( + "SELECT i.id, i.uuid::text, i.name, i.metadata, \ + COUNT(fd.id)::int4 as face_count, 0::int4 as speaker_count, \ + MIN(fd.frame_number)::int4 as start_frame, MAX(fd.frame_number)::int4 as end_frame, \ + MIN(fd.frame_number::float8 / NULLIF(v.fps, 0)) as start_time, \ + MAX(fd.frame_number::float8 / NULLIF(v.fps, 0)) as end_time, \ + AVG(fd.confidence)::float8 as confidence \ + FROM {} fd JOIN {} i ON i.id = fd.identity_id \ + JOIN {} v ON v.file_uuid = fd.file_uuid \ + WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL \ + GROUP BY i.id, i.uuid, i.name, i.metadata \ + ORDER BY face_count DESC LIMIT $2 OFFSET $3", + fd_table, id_table, video_table + )) + .bind(uuid) + .bind(limit) + .bind(offset) + .fetch_all(&self.pool) + .await?; + Ok(rows + .into_iter() + .map(|r| super::FileIdentityRecord { + identity_id: r.get("id"), + identity_uuid: Some(r.get::("uuid")), + name: r.get("name"), + metadata: r.get("metadata"), + face_count: r.get("face_count"), + speaker_count: r.get("speaker_count"), + start_frame: r.get("start_frame"), + end_frame: r.get("end_frame"), + start_time: r.get("start_time"), + end_time: r.get("end_time"), + confidence: r.get("confidence"), + }) + .collect()) } - pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result> { + pub async fn get_chunks_by_uuid( + &self, + uuid: &str, + ) -> Result> { use crate::core::db::ChunkStore; ChunkStore::get_chunks_by_uuid(self, uuid).await } @@ -2788,15 +3157,22 @@ impl PostgresDb { } pub async fn create_gitea_token( - &self, _id: i64, _username: &str, _token_name: &str, - _last_eight: &str, _scopes: &serde_json::Value, _last_verified: Option, + &self, + _id: i64, + _username: &str, + _token_name: &str, + _last_eight: &str, + _scopes: &serde_json::Value, + _last_verified: Option, ) -> Result<()> { tracing::warn!("[PostgresDb] create_gitea_token stub"); Ok(()) } pub async fn get_gitea_token_by_name( - &self, _username: &str, _token_name: &str, + &self, + _username: &str, + _token_name: &str, ) -> Result> { tracing::warn!("[PostgresDb] get_gitea_token_by_name stub"); Ok(None) @@ -2808,15 +3184,20 @@ impl PostgresDb { } pub async fn create_n8n_api_key( - &self, _key_id: &str, _label: &str, _last_eight: &str, - _last_verified: Option, _expires_at: Option>, + &self, + _key_id: &str, + _label: &str, + _last_eight: &str, + _last_verified: Option, + _expires_at: Option>, ) -> Result<()> { tracing::warn!("[PostgresDb] create_n8n_api_key stub"); Ok(()) } pub async fn get_n8n_api_key_by_label( - &self, _label: &str, + &self, + _label: &str, ) -> Result> { tracing::warn!("[PostgresDb] get_n8n_api_key_by_label stub"); Ok(None) @@ -2865,19 +3246,68 @@ impl crate::core::db::ChunkStore for PostgresDb { async fn store_chunk(&self, chunk: &crate::core::chunk::types::Chunk) -> Result<()> { let table = schema::table_name("chunk"); let ct_str = format!("{:?}", chunk.chunk_type).to_lowercase(); + let start_time = chunk.start_frame as f64 / chunk.fps; + let end_time = chunk.end_frame as f64 / chunk.fps; sqlx::query(&format!( - "INSERT INTO {} (file_uuid, chunk_id, chunk_type, start_frame, end_frame, text_content, content, fps) \ - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) ON CONFLICT DO NOTHING", table + "INSERT INTO {} (file_uuid, chunk_id, chunk_type, start_frame, end_frame, start_time, end_time, text_content, content, fps) \ + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) ON CONFLICT DO NOTHING", table )) .bind(&chunk.uuid).bind(&chunk.chunk_id).bind(&ct_str) .bind(chunk.start_frame).bind(chunk.end_frame) + .bind(start_time).bind(end_time) .bind(&chunk.text_content).bind(&chunk.content).bind(chunk.fps) .execute(&self.pool).await?; Ok(()) } - async fn get_chunks_by_uuid(&self, uuid: &str) -> Result> { - Ok(Vec::new()) + async fn get_chunks_by_uuid( + &self, + uuid: &str, + ) -> Result> { + let table = schema::table_name("chunk"); + let rows = sqlx::query_as::<_, (String, String, f64, f64, f64, String, Option, Option, Option)>( + &format!("SELECT chunk_type, chunk_id, start_time, end_time, fps, content::text, text_content, metadata, vector_id FROM {} WHERE file_uuid = $1 ORDER BY id", table) + ) + .bind(uuid) + .fetch_all(&self.pool).await?; + + Ok(rows + .into_iter() + .map( + |(ct, chunk_id, st, et, fps, content_str, text_content, metadata, vector_id)| { + let content: serde_json::Value = + serde_json::from_str(&content_str).unwrap_or_default(); + let chunk_type = match ct.as_str() { + "time" => crate::core::chunk::types::ChunkType::TimeBased, + "sentence" => crate::core::chunk::types::ChunkType::Sentence, + "cut" => crate::core::chunk::types::ChunkType::Cut, + "trace" => crate::core::chunk::types::ChunkType::Trace, + "story" | "story_parent" | "story_child" => { + crate::core::chunk::types::ChunkType::Story + } + "visual" => crate::core::chunk::types::ChunkType::Visual, + _ => crate::core::chunk::types::ChunkType::Story, + }; + let start_frame = (st * fps).round() as i64; + let end_frame = (et * fps).round() as i64; + let mut c = crate::core::chunk::types::Chunk::new( + 0, + uuid.to_string(), + chunk_id, + chunk_type, + crate::core::chunk::types::ChunkRule::Rule1, + start_frame, + end_frame, + fps, + content, + ); + c.text_content = text_content; + c.metadata = metadata; + c.vector_id = vector_id; + c + }, + ) + .collect()) } async fn get_all_chunks(&self) -> Result> { @@ -2888,11 +3318,18 @@ impl crate::core::db::ChunkStore for PostgresDb { #[async_trait] impl crate::core::db::VectorStore for PostgresDb { async fn store_vector(&self, chunk_id: &str, _vector: &[f32]) -> Result<()> { - tracing::warn!("[PostgresDb] store_vector: Qdrant should handle vectors, not PostgreSQL. chunk_id={}", chunk_id); + tracing::warn!( + "[PostgresDb] store_vector: Qdrant should handle vectors, not PostgreSQL. chunk_id={}", + chunk_id + ); Ok(()) } - async fn search(&self, _query_vector: &[f32], _limit: usize) -> Result> { + async fn search( + &self, + _query_vector: &[f32], + _limit: usize, + ) -> Result> { Ok(Vec::new()) } } diff --git a/src/core/db/qdrant_db.rs b/src/core/db/qdrant_db.rs index f7af794..d63897c 100644 --- a/src/core/db/qdrant_db.rs +++ b/src/core/db/qdrant_db.rs @@ -15,9 +15,11 @@ pub struct QdrantDb { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VectorPayload { - pub uuid: String, + pub file_uuid: String, pub chunk_id: String, pub chunk_type: String, + pub start_frame: i64, + pub end_frame: i64, pub start_time: f64, pub end_time: f64, pub text: Option, @@ -189,6 +191,49 @@ impl QdrantDb { Ok(()) } + pub async fn upsert_vectors_batch( + &self, + collection: &str, + points: &[(u64, &[f32], Option)], + ) -> Result<()> { + let url = format!( + "{}/collections/{}/points?wait=true", + self.base_url, collection + ); + + let qdrant_points: Vec = points + .iter() + .map(|(id, vec, payload)| { + let mut p = serde_json::json!({ + "id": id, + "vector": vec, + }); + if let Some(pl) = payload { + p["payload"] = pl.clone(); + } + p + }) + .collect(); + + let body = serde_json::json!({ "points": qdrant_points }); + + let response = self + .client + .put(&url) + .header("api-key", &self.api_key) + .json(&body) + .send() + .await + .context("Failed to send batch upsert request to Qdrant")?; + + let status = response.status(); + if !status.is_success() { + let response_text = response.text().await.unwrap_or_default(); + anyhow::bail!("Qdrant batch upsert failed: {} - {}", status, response_text); + } + Ok(()) + } + pub async fn upsert_vector( &self, chunk_id: &str, @@ -207,12 +252,23 @@ impl QdrantDb { ); let mut payload_map = HashMap::new(); - payload_map.insert("uuid".to_string(), serde_json::json!(payload.uuid)); + payload_map.insert( + "file_uuid".to_string(), + serde_json::json!(payload.file_uuid), + ); payload_map.insert("chunk_id".to_string(), serde_json::json!(payload.chunk_id)); payload_map.insert( "chunk_type".to_string(), serde_json::json!(payload.chunk_type), ); + payload_map.insert( + "start_frame".to_string(), + serde_json::json!(payload.start_frame), + ); + payload_map.insert( + "end_frame".to_string(), + serde_json::json!(payload.end_frame), + ); payload_map.insert( "start_time".to_string(), serde_json::json!(payload.start_time), @@ -224,7 +280,7 @@ impl QdrantDb { // Generate consistent point ID from uuid and chunk_id // Qdrant requires integer or UUID point IDs. We'll use a simple integer hash. - let point_id_str = format!("{}_{}", payload.uuid, chunk_id); + let point_id_str = format!("{}_{}", payload.file_uuid, chunk_id); use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; let mut hasher = DefaultHasher::new(); @@ -240,9 +296,9 @@ impl QdrantDb { }); tracing::debug!( - "Upserting vector to Qdrant: chunk_id={}, uuid={}, vector_len={}", + "Upserting vector to Qdrant: chunk_id={}, file_uuid={}, vector_len={}", chunk_id, - payload.uuid, + payload.file_uuid, vector.len() ); @@ -337,7 +393,7 @@ impl QdrantDb { .map(|r| { let uuid = r .payload - .get("uuid") + .get("file_uuid") .and_then(|v| v.as_str()) .unwrap_or("unknown") .to_string(); @@ -409,7 +465,7 @@ impl QdrantDb { .map(|r| { let uuid = r .payload - .get("uuid") + .get("file_uuid") .and_then(|v| v.as_str()) .unwrap_or("unknown") .to_string(); @@ -471,7 +527,7 @@ impl QdrantDb { "filter": { "must": [ { - "key": "uuid", + "key": "file_uuid", "match": { "value": uuid } @@ -532,7 +588,7 @@ impl QdrantDb { .map(|r| { let uuid = r .payload - .get("uuid") + .get("file_uuid") .and_then(|v| v.as_str()) .unwrap_or("unknown") .to_string(); @@ -553,6 +609,89 @@ impl QdrantDb { Ok(search_results) } + pub async fn search_face_collection( + &self, + collection: &str, + query_vector: &[f32], + limit: usize, + exclude_payload_key: &str, + exclude_payload_value: &str, + include_file_uuid: Option<&str>, + ) -> Result)>> { + let url = format!("{}/collections/{}/points/search", self.base_url, collection); + + let mut filter = serde_json::json!({ + "must_not": [ + { + "key": exclude_payload_key, + "match": { "value": exclude_payload_value } + } + ] + }); + + if let Some(file_uuid) = include_file_uuid { + filter["must"] = serde_json::json!([ + { + "key": "file_uuid", + "match": { "value": file_uuid } + } + ]); + } + + let body = serde_json::json!({ + "vector": query_vector, + "limit": limit, + "with_payload": true, + "filter": filter, + }); + + let response = self + .client + .post(&url) + .header("api-key", &self.api_key) + .header("Content-Type", "application/json") + .json(&body) + .send() + .await + .context("Failed to search Qdrant face collection")?; + + let status = response.status(); + let response_text = response + .text() + .await + .unwrap_or_else(|_| "Failed to read response".to_string()); + + if !status.is_success() { + return Err(anyhow::anyhow!( + "Qdrant search_face_collection failed: {} - {}", + status, + response_text + )); + } + + #[derive(Deserialize)] + struct QdrantSearchResult { + result: Vec, + } + #[derive(Deserialize)] + struct QdrantPoint { + score: f64, + payload: HashMap, + } + + match serde_json::from_str::(&response_text) { + Ok(parsed) => { + let results: Vec<(f64, HashMap)> = parsed + .result + .into_iter() + .map(|r| (r.score, r.payload)) + .collect(); + Ok(results) + } + Err(e) => Err(anyhow::anyhow!("Failed to parse Qdrant response: {}", e)), + } + } + pub async fn delete_by_uuid(&self, uuid: &str) -> Result<()> { let url = format!( "{}/collections/{}/points/delete", @@ -563,7 +702,7 @@ impl QdrantDb { "filter": { "must": [ { - "key": "uuid", + "key": "file_uuid", "match": { "value": uuid } @@ -711,9 +850,11 @@ impl Database for QdrantDb { impl VectorStore for QdrantDb { async fn store_vector(&self, chunk_id: &str, vector: &[f32]) -> Result<()> { let payload = VectorPayload { - uuid: String::new(), + file_uuid: String::new(), chunk_id: chunk_id.to_string(), chunk_type: String::new(), + start_frame: 0, + end_frame: 0, start_time: 0.0, end_time: 0.0, text: None, @@ -737,7 +878,9 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> { let qdrant: QdrantDb = QdrantDb::new(); let query = format!( - "SELECT id, trace_id, frame_number, embedding FROM {} WHERE file_uuid = $1 AND embedding IS NOT NULL", + "SELECT id, trace_id, frame_number, embedding FROM {} \ + WHERE file_uuid = $1 AND embedding IS NOT NULL \ + AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)", table ); let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?; @@ -767,3 +910,103 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> { ); Ok(()) } + +pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> { + use crate::core::config::DATABASE_URL; + use sqlx::Row; + + let pool = sqlx::PgPool::connect(&DATABASE_URL).await?; + let table = crate::core::db::schema::table_name("face_detections"); + let qdrant = QdrantDb::new(); + + let collection = format!( + "{}_traces", + crate::core::config::REDIS_KEY_PREFIX + .as_str() + .trim_end_matches(':') + ); + qdrant.ensure_collection(&collection, 512).await?; + + // Read all face_detections with embeddings, grouped by trace_id in Rust + let rows = sqlx::query(&format!( + "SELECT trace_id, embedding FROM {} \ + WHERE file_uuid = $1 AND embedding IS NOT NULL AND trace_id IS NOT NULL \ + AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)", + table + )) + .bind(file_uuid) + .fetch_all(&pool) + .await?; + + let mut trace_faces: std::collections::HashMap>> = + std::collections::HashMap::new(); + let mut trace_stats: std::collections::HashMap = + std::collections::HashMap::new(); // (count, min_frame, max_frame) + + for row in &rows { + let tid: Option = row.get(0); + let emb: Option> = row.get(1); + if let (Some(tid), Some(emb)) = (tid, emb) { + trace_faces.entry(tid).or_default().push(emb); + let entry = trace_stats.entry(tid).or_insert((0, i64::MAX, i64::MIN)); + entry.0 += 1; + } + } + + // Compute average embedding per trace + struct AvgTrace { + tid: i32, + avg_emb: Vec, + frame_count: i64, + } + + let mut trace_avgs: Vec = Vec::new(); + + for (&tid, faces) in &trace_faces { + let dim = faces[0].len(); + let mut avg = vec![0.0f32; dim]; + for face in faces { + for (i, &v) in face.iter().enumerate() { + avg[i] += v; + } + } + let n = faces.len() as f32; + for v in &mut avg { + *v /= n; + } + + let stats = trace_stats.get(&tid).unwrap_or(&(0, 0, 0)); + trace_avgs.push(AvgTrace { + tid, + avg_emb: avg, + frame_count: stats.0, + }); + } + + // Push to Qdrant in batches + for chunk in trace_avgs.chunks(500) { + let batch: Vec<(u64, &[f32], Option)> = chunk + .iter() + .map(|t| { + ( + t.tid as u64, + t.avg_emb.as_slice(), + Some(serde_json::json!({ + "trace_id": t.tid, + "file_uuid": file_uuid, + "frame_count": t.frame_count, + "source": "trace", + })), + ) + }) + .collect(); + qdrant.upsert_vectors_batch(&collection, &batch).await?; + } + + tracing::info!( + "Synced {} trace embeddings to Qdrant for {}", + trace_faces.len(), + file_uuid + ); + Ok(()) +} diff --git a/src/core/db/sync_db.rs b/src/core/db/sync_db.rs index b8d1d2c..c5485c6 100644 --- a/src/core/db/sync_db.rs +++ b/src/core/db/sync_db.rs @@ -45,9 +45,11 @@ impl SyncDb { } let payload = VectorPayload { - uuid: uuid.clone(), + file_uuid: uuid.clone(), chunk_id: chunk_id.clone(), chunk_type, + start_frame: chunk.start_frame, + end_frame: chunk.end_frame, start_time, end_time, text: Some(text.to_string()), diff --git a/src/core/health_agent.rs b/src/core/health_agent.rs index e578225..b258a13 100644 --- a/src/core/health_agent.rs +++ b/src/core/health_agent.rs @@ -33,26 +33,38 @@ pub async fn run_consistency_checks(db: &PostgresDb) -> ConsistencyReport { // Check 1: stale_processing — status=processing but job_id is null let c1 = check_stale_processing(db).await; - if c1.count > 0 { any_issue = true; } + if c1.count > 0 { + any_issue = true; + } checks.push(c1); // Check 2: orphaned_processing — status=processing but no active monitor_job let c2 = check_orphaned_processing(db).await; - if c2.count > 0 { any_issue = true; } + if c2.count > 0 { + any_issue = true; + } checks.push(c2); // Check 3: unregistered_with_uuid — DB rows left behind by migration let c3 = check_unregistered_with_uuid(db).await; - if c3.count > 0 { any_issue = true; } + if c3.count > 0 { + any_issue = true; + } checks.push(c3); // Check 4: processing_job_done — status=processing but job already completed let c4 = check_processing_job_done(db).await; - if c4.count > 0 { any_issue = true; } + if c4.count > 0 { + any_issue = true; + } checks.push(c4); ConsistencyReport { - status: if any_issue { "degraded".to_string() } else { "ok".to_string() }, + status: if any_issue { + "degraded".to_string() + } else { + "ok".to_string() + }, checked_at, checks, } @@ -68,9 +80,17 @@ async fn check_stale_processing(db: &PostgresDb) -> ConsistencyCheck { .await .unwrap_or_default(); - let files: Vec = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile { - file_uuid, file_name, status, detail: "job_id is null".to_string(), - }).collect(); + let files: Vec = rows + .into_iter() + .map( + |(file_uuid, file_name, status): (String, String, String)| ConsistencyFile { + file_uuid, + file_name, + status, + detail: "job_id is null".to_string(), + }, + ) + .collect(); ConsistencyCheck { check: "stale_processing".to_string(), @@ -83,19 +103,28 @@ async fn check_stale_processing(db: &PostgresDb) -> ConsistencyCheck { async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck { let vt = schema::table_name("videos"); let mj = schema::table_name("monitor_jobs"); - let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!( - "SELECT v.file_uuid, v.file_name, v.status \ + let rows: Vec<(String, String, String)> = + sqlx::query_as::<_, (String, String, String)>(&format!( + "SELECT v.file_uuid, v.file_name, v.status \ FROM {} v LEFT JOIN {} m ON v.file_uuid = m.uuid AND m.status IN ('pending','running') \ WHERE v.status = 'processing' AND m.id IS NULL", - vt, mj - )) - .fetch_all(db.pool()) - .await - .unwrap_or_default(); + vt, mj + )) + .fetch_all(db.pool()) + .await + .unwrap_or_default(); - let files: Vec = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile { - file_uuid, file_name, status, detail: "no active monitor_job".to_string(), - }).collect(); + let files: Vec = rows + .into_iter() + .map( + |(file_uuid, file_name, status): (String, String, String)| ConsistencyFile { + file_uuid, + file_name, + status, + detail: "no active monitor_job".to_string(), + }, + ) + .collect(); ConsistencyCheck { check: "orphaned_processing".to_string(), @@ -107,17 +136,26 @@ async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck { async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck { let vt = schema::table_name("videos"); - let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!( - "SELECT file_uuid, file_name, status FROM {} WHERE status = 'unregistered'", - vt - )) - .fetch_all(db.pool()) - .await - .unwrap_or_default(); + let rows: Vec<(String, String, String)> = + sqlx::query_as::<_, (String, String, String)>(&format!( + "SELECT file_uuid, file_name, status FROM {} WHERE status = 'unregistered'", + vt + )) + .fetch_all(db.pool()) + .await + .unwrap_or_default(); - let files: Vec = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile { - file_uuid, file_name, status, detail: "migration residue".to_string(), - }).collect(); + let files: Vec = rows + .into_iter() + .map( + |(file_uuid, file_name, status): (String, String, String)| ConsistencyFile { + file_uuid, + file_name, + status, + detail: "migration residue".to_string(), + }, + ) + .collect(); ConsistencyCheck { check: "unregistered_with_uuid".to_string(), @@ -130,19 +168,28 @@ async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck { async fn check_processing_job_done(db: &PostgresDb) -> ConsistencyCheck { let vt = schema::table_name("videos"); let mj = schema::table_name("monitor_jobs"); - let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!( - "SELECT v.file_uuid, v.file_name, v.status \ + let rows: Vec<(String, String, String)> = + sqlx::query_as::<_, (String, String, String)>(&format!( + "SELECT v.file_uuid, v.file_name, v.status \ FROM {} v JOIN {} m ON v.file_uuid = m.uuid \ WHERE v.status = 'processing' AND m.status = 'completed'", - vt, mj - )) - .fetch_all(db.pool()) - .await - .unwrap_or_default(); + vt, mj + )) + .fetch_all(db.pool()) + .await + .unwrap_or_default(); - let files: Vec = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile { - file_uuid, file_name, status, detail: "monitor_job already completed".to_string(), - }).collect(); + let files: Vec = rows + .into_iter() + .map( + |(file_uuid, file_name, status): (String, String, String)| ConsistencyFile { + file_uuid, + file_name, + status, + detail: "monitor_job already completed".to_string(), + }, + ) + .collect(); ConsistencyCheck { check: "processing_job_done".to_string(), diff --git a/src/core/identity/storage.rs b/src/core/identity/storage.rs index 928509d..c76d5a9 100644 --- a/src/core/identity/storage.rs +++ b/src/core/identity/storage.rs @@ -54,8 +54,7 @@ pub fn read_identity_file(uuid: &str) -> Result { let path = identity_file_path(uuid); let content = std::fs::read_to_string(&path) .with_context(|| format!("Identity file not found: {} ({})", uuid, path.display()))?; - serde_json::from_str(&content) - .with_context(|| format!("Invalid identity.json: {}", uuid)) + serde_json::from_str(&content).with_context(|| format!("Invalid identity.json: {}", uuid)) } pub fn write_identity_file(file: &IdentityFile) -> Result<()> { @@ -167,7 +166,10 @@ pub fn rebuild_index() -> Result { entries.insert(uuid.clone(), file.name); } Err(e) => { - warn!("[identity-storage] Skipping {} in index rebuild: {}", uuid, e); + warn!( + "[identity-storage] Skipping {} in index rebuild: {}", + uuid, e + ); } } } @@ -187,18 +189,16 @@ pub async fn save_identity_file_by_pool(pool: &sqlx::PgPool, uuid: &str) -> Resu let identity_table = crate::core::db::schema::table_name("identities"); let fd_table = crate::core::db::schema::table_name("face_detections"); - // Schema-aware column selection: dev uses 'name', public uses 'real_name' - let name_col = if identity_table.starts_with("dev.") { "name" } else { "real_name" }; - let clean = uuid.replace('-', ""); + let record = sqlx::query_as::<_, crate::core::db::IdentityDetailRecord>( &format!( - "SELECT id, uuid::text, {} AS name, identity_type, source, status, metadata, reference_data, \ - NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \ - face_embedding::real[] as face_embedding, \ - tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \ - FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", - name_col, identity_table + "SELECT id, uuid::text, name, identity_type, source, status, metadata, reference_data, \ + NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \ + face_embedding::real[] as face_embedding, \ + tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \ + FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", + identity_table ) ) .bind(&clean) @@ -322,8 +322,13 @@ pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result let mut entries: HashMap = if index_path.exists() { let content = std::fs::read_to_string(&index_path)?; let v: serde_json::Value = serde_json::from_str(&content).unwrap_or_default(); - v["entries"].as_object() - .map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string())).collect()) + v["entries"] + .as_object() + .map(|obj| { + obj.iter() + .map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string())) + .collect() + }) .unwrap_or_default() } else { HashMap::new() @@ -338,7 +343,9 @@ pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result } pub async fn save_identity_file(db: &PostgresDb, uuid: &str) -> Result<()> { - let record = db.get_identity_by_uuid(uuid).await? + let record = db + .get_identity_by_uuid(uuid) + .await? .with_context(|| format!("Identity not found in DB: {}", uuid))?; let identity_uuid = record.uuid.clone(); @@ -415,6 +422,7 @@ mod tests { status: Some("confirmed".to_string()), tmdb_id: Some(112), tmdb_profile: Some("https://image.tmdb.org/t/p/w185/test.jpg".to_string()), + local_profile: None, metadata: serde_json::json!({"tmdb_character": "Test Role"}), file_bindings: vec![FileBinding { file_uuid: "ffffffffffffffffffffffffffffffff".to_string(), @@ -442,7 +450,9 @@ mod tests { fn test_identity_dir_path() { let uuid = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; let p = identity_dir(uuid); - assert!(p.to_string_lossy().ends_with(&format!("identities/{}", uuid))); + assert!(p + .to_string_lossy() + .ends_with(&format!("identities/{}", uuid))); } #[test] @@ -463,7 +473,10 @@ mod tests { let base = Path::new("/tmp/test_base"); let uuid = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; let p = identity_dir_at(base, uuid); - assert_eq!(p, Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")); + assert_eq!( + p, + Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb") + ); } #[test] @@ -490,7 +503,10 @@ mod tests { assert_eq!(read.name, file.name); assert_eq!(read.source, file.source); assert_eq!(read.tmdb_id, file.tmdb_id); - assert_eq!(read.file_bindings[0].face_count, file.file_bindings[0].face_count); + assert_eq!( + read.file_bindings[0].face_count, + file.file_bindings[0].face_count + ); let _ = std::fs::remove_dir_all(&tmp); } @@ -521,9 +537,21 @@ mod tests { let _ = std::fs::remove_dir_all(&tmp); let base = &tmp; - std::fs::create_dir_all(base.join("identities").join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")).unwrap(); - std::fs::create_dir_all(base.join("identities").join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")).unwrap(); - std::fs::create_dir_all(base.join("identities").join("cccccccccccccccccccccccccccccccc")).unwrap(); + std::fs::create_dir_all( + base.join("identities") + .join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + ) + .unwrap(); + std::fs::create_dir_all( + base.join("identities") + .join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"), + ) + .unwrap(); + std::fs::create_dir_all( + base.join("identities") + .join("cccccccccccccccccccccccccccccccc"), + ) + .unwrap(); std::fs::create_dir_all(base.join("identities").join("not_a_uuid")).unwrap(); std::fs::create_dir_all(base.join("identities").join("short")).unwrap(); diff --git a/src/core/ingestion.rs b/src/core/ingestion.rs index b31992e..6cfd646 100644 --- a/src/core/ingestion.rs +++ b/src/core/ingestion.rs @@ -56,19 +56,25 @@ impl IngestionService { .to_string(); // 1. Compute SHA256 for dedup - let content_hash = crate::core::storage::content_hash::compute_sha256(&canonical_path).ok().unwrap_or_default(); + let content_hash = crate::core::storage::content_hash::compute_sha256(&canonical_path) + .ok() + .unwrap_or_default(); // 2. Hash check — same content = already registered let videos_table = schema::table_name("videos"); if !content_hash.is_empty() { - if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>( - &format!("SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1", videos_table) - ) + if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(&format!( + "SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1", + videos_table + )) .bind(&content_hash) .fetch_optional(self.db.pool()) .await { - info!("Content already registered: {} ({})", filename, existing_uuid); + info!( + "Content already registered: {} ({})", + filename, existing_uuid + ); return Ok(Some(existing_uuid)); } } @@ -108,7 +114,8 @@ impl IngestionService { let probe_result = probe::probe_video(file_path).ok(); let file_meta = std::fs::metadata(&canonical_path).ok(); - let duration = probe_result.as_ref() + let duration = probe_result + .as_ref() .and_then(|r| r.format.duration.as_ref()) .and_then(|s| s.parse::().ok()) .unwrap_or(0.0); @@ -148,7 +155,11 @@ impl IngestionService { } let total_frames = { - let video_stream = probe_result.as_ref().and_then(|pr| pr.streams.iter().find(|s| s.codec_type.as_deref() == Some("video"))); + let video_stream = probe_result.as_ref().and_then(|pr| { + pr.streams + .iter() + .find(|s| s.codec_type.as_deref() == Some("video")) + }); if let Some(stream) = video_stream { if let Some(nb_frames_str) = &stream.nb_frames { @@ -223,11 +234,14 @@ impl IngestionService { // Store content_hash for dedup if !content_hash.is_empty() { let vt = schema::table_name("videos"); - let _ = sqlx::query(&format!("UPDATE {} SET content_hash = $1 WHERE file_uuid = $2", vt)) - .bind(&content_hash) - .bind(&uuid) - .execute(self.db.pool()) - .await; + let _ = sqlx::query(&format!( + "UPDATE {} SET content_hash = $1 WHERE file_uuid = $2", + vt + )) + .bind(&content_hash) + .bind(&uuid) + .execute(self.db.pool()) + .await; } self.db @@ -243,5 +257,3 @@ impl IngestionService { Ok(Some(uuid)) } } - - diff --git a/src/core/probe/unified.rs b/src/core/probe/unified.rs index 3d9804d..f893f7f 100644 --- a/src/core/probe/unified.rs +++ b/src/core/probe/unified.rs @@ -17,42 +17,84 @@ mod tests { #[test] fn test_detect_category_image() { assert_eq!(detect_category(Path::new("photo.jpg")), FileCategory::Image); - assert_eq!(detect_category(Path::new("photo.jpeg")), FileCategory::Image); + assert_eq!( + detect_category(Path::new("photo.jpeg")), + FileCategory::Image + ); assert_eq!(detect_category(Path::new("photo.png")), FileCategory::Image); assert_eq!(detect_category(Path::new("photo.svg")), FileCategory::Image); - assert_eq!(detect_category(Path::new("photo.webp")), FileCategory::Image); + assert_eq!( + detect_category(Path::new("photo.webp")), + FileCategory::Image + ); } #[test] fn test_detect_category_document() { - assert_eq!(detect_category(Path::new("doc.pdf")), FileCategory::Document); - assert_eq!(detect_category(Path::new("doc.docx")), FileCategory::Document); - assert_eq!(detect_category(Path::new("doc.pages")), FileCategory::Document); - assert_eq!(detect_category(Path::new("doc.txt")), FileCategory::Document); + assert_eq!( + detect_category(Path::new("doc.pdf")), + FileCategory::Document + ); + assert_eq!( + detect_category(Path::new("doc.docx")), + FileCategory::Document + ); + assert_eq!( + detect_category(Path::new("doc.pages")), + FileCategory::Document + ); + assert_eq!( + detect_category(Path::new("doc.txt")), + FileCategory::Document + ); } #[test] fn test_detect_category_spreadsheet() { - assert_eq!(detect_category(Path::new("data.xlsx")), FileCategory::Spreadsheet); - assert_eq!(detect_category(Path::new("data.csv")), FileCategory::Spreadsheet); - assert_eq!(detect_category(Path::new("data.numbers")), FileCategory::Spreadsheet); + assert_eq!( + detect_category(Path::new("data.xlsx")), + FileCategory::Spreadsheet + ); + assert_eq!( + detect_category(Path::new("data.csv")), + FileCategory::Spreadsheet + ); + assert_eq!( + detect_category(Path::new("data.numbers")), + FileCategory::Spreadsheet + ); } #[test] fn test_detect_category_presentation() { - assert_eq!(detect_category(Path::new("deck.pptx")), FileCategory::Presentation); - assert_eq!(detect_category(Path::new("deck.key")), FileCategory::Presentation); + assert_eq!( + detect_category(Path::new("deck.pptx")), + FileCategory::Presentation + ); + assert_eq!( + detect_category(Path::new("deck.key")), + FileCategory::Presentation + ); } #[test] fn test_detect_category_archive() { - assert_eq!(detect_category(Path::new("files.zip")), FileCategory::Archive); - assert_eq!(detect_category(Path::new("files.tar.gz")), FileCategory::Archive); + assert_eq!( + detect_category(Path::new("files.zip")), + FileCategory::Archive + ); + assert_eq!( + detect_category(Path::new("files.tar.gz")), + FileCategory::Archive + ); } #[test] fn test_detect_category_unknown() { - assert_eq!(detect_category(Path::new("file.xyz")), FileCategory::Unknown); + assert_eq!( + detect_category(Path::new("file.xyz")), + FileCategory::Unknown + ); assert_eq!(detect_category(Path::new("file")), FileCategory::Unknown); } @@ -84,13 +126,18 @@ pub enum FileCategory { /// Detect file category from path extension pub fn detect_category(path: &Path) -> FileCategory { - let ext = path.extension() + let ext = path + .extension() .and_then(|e| e.to_str()) .map(|e| e.to_lowercase()); match ext.as_deref() { Some("mp4" | "mov" | "mkv" | "avi" | "webm" | "m4v" | "mpeg") => FileCategory::Video, - Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg" | "heic" | "tiff") => FileCategory::Image, - Some("pdf" | "doc" | "docx" | "odt" | "pages" | "rtf" | "txt" | "md" | "rst") => FileCategory::Document, + Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg" | "heic" | "tiff") => { + FileCategory::Image + } + Some("pdf" | "doc" | "docx" | "odt" | "pages" | "rtf" | "txt" | "md" | "rst") => { + FileCategory::Document + } Some("xls" | "xlsx" | "csv" | "ods" | "numbers") => FileCategory::Spreadsheet, Some("ppt" | "pptx" | "odp" | "key") => FileCategory::Presentation, Some("zip" | "tar" | "gz" | "tgz" | "7z" | "rar") => FileCategory::Archive, @@ -102,16 +149,20 @@ pub fn detect_category(path: &Path) -> FileCategory { pub fn base_format_info(path: &Path) -> serde_json::Value { let meta = std::fs::metadata(path).ok(); let size = meta.as_ref().map(|m| m.len()).unwrap_or(0); - let mtime = meta.as_ref() + let mtime = meta + .as_ref() .and_then(|m| m.modified().ok()) .and_then(|t| { let secs = t.duration_since(SystemTime::UNIX_EPOCH).ok()?.as_secs() as i64; - chrono::DateTime::from_timestamp(secs, 0) - .map(|dt| dt.to_rfc3339()) + chrono::DateTime::from_timestamp(secs, 0).map(|dt| dt.to_rfc3339()) }) .unwrap_or_default(); let fname = path.to_string_lossy().to_string(); - let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("").to_lowercase(); + let ext = path + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("") + .to_lowercase(); let cat = detect_category(path); let file_type = match cat { FileCategory::Video => "video", @@ -150,7 +201,13 @@ fn ffprobe_probe(path: &Path, format_base: serde_json::Value) -> serde_json::Val } /// Run Python probe for document/spreadsheet/presentation files -fn python_probe(path: &Path, category: &FileCategory, scripts_dir: &str, python_path: &str, format_base: serde_json::Value) -> serde_json::Value { +fn python_probe( + path: &Path, + category: &FileCategory, + scripts_dir: &str, + python_path: &str, + format_base: serde_json::Value, +) -> serde_json::Value { let script = format!("{}/probe_file.py", scripts_dir); if !std::path::Path::new(&script).exists() { return minimal_probe(format_base); @@ -184,18 +241,12 @@ fn minimal_probe(format_base: serde_json::Value) -> serde_json::Value { /// Unified probe: dispatches to the right probe based on file type /// Returns a probe_json-compatible Value -pub async fn unified_probe( - path: &Path, - scripts_dir: &str, - python_path: &str, -) -> serde_json::Value { +pub async fn unified_probe(path: &Path, scripts_dir: &str, python_path: &str) -> serde_json::Value { let cat = detect_category(path); let format_base = base_format_info(path); match cat { - FileCategory::Video | FileCategory::Image => { - ffprobe_probe(path, format_base) - } + FileCategory::Video | FileCategory::Image => ffprobe_probe(path, format_base), FileCategory::Document | FileCategory::Spreadsheet | FileCategory::Presentation => { python_probe(path, &cat, scripts_dir, python_path, format_base) } diff --git a/src/core/processor/cut.rs b/src/core/processor/cut.rs index 6340ee6..dd705bc 100644 --- a/src/core/processor/cut.rs +++ b/src/core/processor/cut.rs @@ -1,5 +1,6 @@ use anyhow::{Context, Result}; use serde::{Deserialize, Serialize}; +use std::process::Command; use std::time::Duration; use super::executor::PythonExecutor; @@ -27,13 +28,21 @@ pub async fn process_cut( output_path: &str, uuid: Option<&str>, ) -> Result { + // Try native ffmpeg-based scene detection first + let result = try_native_cut(video_path); + if let Ok(r) = result { + let json = serde_json::to_string_pretty(&r)?; + std::fs::write(output_path, &json) + .with_context(|| format!("Failed to write {:?}", output_path))?; + return Ok(r); + } + + // Fallback: Python scenedetect + tracing::warn!("[CUT] Native impl failed, falling back to Python"); let executor = PythonExecutor::new()?; let script_path = executor.script_path("cut_processor.py"); - tracing::info!("[CUT] Starting scene detection: {}", video_path); - if !script_path.exists() { - tracing::warn!("[CUT] Script not found, returning empty result"); return Ok(CutResult { frame_count: 0, fps: 0.0, @@ -53,19 +62,179 @@ pub async fn process_cut( .with_context(|| format!("Failed to run {:?}", script_path))?; let json_str = std::fs::read_to_string(output_path).context("Failed to read CUT output")?; - let result: CutResult = serde_json::from_str(&json_str).context("Failed to parse CUT output")?; - tracing::info!("[CUT] Result: {} scenes detected", result.scenes.len()); - Ok(result) } +// ── Native ffmpeg scene detection ───────────────────────────────── + +fn try_native_cut(video_path: &str) -> Result { + // Step 1: Get video info (fps, frame count) + let probe = Command::new("ffprobe") + .args([ + "-v", + "quiet", + "-print_format", + "json", + "-show_format", + "-show_streams", + video_path, + ]) + .output() + .context("Failed to run ffprobe")?; + + let probe_info: serde_json::Value = + serde_json::from_slice(&probe.stdout).context("Failed to parse ffprobe output")?; + + let streams = probe_info["streams"] + .as_array() + .map_or(vec![], |s| s.clone()); + let video_stream = streams.iter().find(|s| s["codec_type"] == "video"); + + let fps = video_stream + .and_then(|s| s["r_frame_rate"].as_str().and_then(parse_fraction)) + .unwrap_or(30.0); + + let total_frames: u64 = video_stream + .and_then(|s| s["nb_frames"].as_str()) + .and_then(|s| s.parse().ok()) + .unwrap_or(0); + + let duration: f64 = probe_info["format"]["duration"] + .as_str() + .and_then(|s| s.parse().ok()) + .unwrap_or(0.0); + + // Step 2: Use ffmpeg scene detection filter + // The `scene` filter computes the difference between consecutive frames + // and outputs when the difference exceeds the threshold (0.3 = medium sensitivity) + let scene_output = Command::new("ffprobe") + .args([ + "-v", + "quiet", + "-show_entries", + "frame=pts_time", + "-of", + "compact=p=0:nk=1", + "-f", + "lavfi", + &format!("movie={},select='gt(scene\\,0.3)',showinfo", video_path), + "-show_frames", + ]) + .output() + .context("Failed to run ffmpeg scene detection")?; + + let stderr_output = String::from_utf8_lossy(&scene_output.stderr); + let mut scene_times: Vec = Vec::new(); + + // Parse ffmpeg showinfo output for scene changes + // Format: [Parsed_showinfo...] pts:123.456 pts_time:123.456 ... + for line in stderr_output.lines() { + if line.contains("pts_time:") { + if let Some(pos) = line.find("pts_time:") { + let rest = &line[pos + 9..]; + let time_str = rest.split_whitespace().next().unwrap_or(""); + if let Ok(t) = time_str.parse::() { + scene_times.push(t); + } + } + } + } + + // Step 3: Build scenes from cut points + let mut scenes: Vec = Vec::new(); + let mut prev_time = 0.0; + let mut prev_frame: u64 = 0; + + for (i, &cut_time) in scene_times.iter().enumerate() { + let end_frame = (cut_time * fps).round() as u64; + let start_frame = prev_frame; + + if end_frame > start_frame { + scenes.push(CutScene { + scene_number: (i + 1) as u32, + start_frame: prev_frame, + end_frame: end_frame.saturating_sub(1), + start_time: prev_time, + end_time: cut_time - (1.0 / fps), + }); + } + + prev_time = cut_time; + prev_frame = end_frame; + } + + // Final scene (last cut point → end of video) + if total_frames > 0 && prev_frame < total_frames { + scenes.push(CutScene { + scene_number: (scenes.len() + 1) as u32, + start_frame: prev_frame, + end_frame: total_frames.saturating_sub(1), + start_time: prev_time, + end_time: duration, + }); + } + + // If no scenes detected, create a single scene covering the whole video + if scenes.is_empty() && total_frames > 0 { + scenes.push(CutScene { + scene_number: 1, + start_frame: 0, + end_frame: total_frames.saturating_sub(1), + start_time: 0.0, + end_time: duration, + }); + } + + Ok(CutResult { + frame_count: total_frames, + fps, + scenes, + }) +} + +/// Parse fractional fps like "30000/1001" into f64 +fn parse_fraction(s: &str) -> Option { + if let Some(pos) = s.find('/') { + let num: f64 = s[..pos].parse().ok()?; + let den: f64 = s[pos + 1..].parse().ok()?; + if den > 0.0 { + return Some(num / den); + } + } + s.parse::().ok() +} + +// ── Tests ───────────────────────────────────────────────────────── + #[cfg(test)] mod tests { use super::*; + #[test] + fn test_parse_fraction() { + let r = parse_fraction("30000/1001").unwrap(); + assert!((r - 29.97).abs() < 0.01); + } + + #[test] + fn test_parse_fraction_int() { + let r = parse_fraction("30").unwrap(); + assert!((r - 30.0).abs() < 0.01); + } + + #[test] + fn test_parse_fraction_invalid() { + assert!(parse_fraction("not/a/num").is_none()); + } + + #[test] + fn test_parse_fraction_zero_den() { + assert!(parse_fraction("1/0").is_none()); + } + #[test] fn test_cut_result_serialization() { let result = CutResult { @@ -81,8 +250,9 @@ mod tests { }; let json = serde_json::to_string(&result).unwrap(); + assert!(json.contains("frame_count")); assert!(json.contains("scene_number")); - assert!(json.contains("1")); + assert!(json.contains("fps")); } #[test] @@ -90,20 +260,23 @@ mod tests { let json = r#"{ "frame_count": 100, "fps": 30.0, - "scenes": [ - {"scene_number": 1, "start_frame": 0, "end_frame": 30, "start_time": 0.0, "end_time": 1.0}, - {"scene_number": 2, "start_frame": 31, "end_frame": 60, "start_time": 1.033, "end_time": 2.0} - ] + "scenes": [{ + "scene_number": 1, + "start_frame": 0, + "end_frame": 30, + "start_time": 0.0, + "end_time": 1.0 + }] }"#; let result: CutResult = serde_json::from_str(json).unwrap(); - assert_eq!(result.frame_count, 100); - assert_eq!(result.scenes.len(), 2); - assert_eq!(result.scenes[1].scene_number, 2); + assert_eq!(result.scenes.len(), 1); + assert_eq!(result.scenes[0].scene_number, 1); + assert_eq!(result.scenes[0].start_frame, 0); } #[test] - fn test_cut_result_empty_scenes() { + fn test_empty_scenes() { let result = CutResult { frame_count: 0, fps: 0.0, @@ -111,17 +284,4 @@ mod tests { }; assert!(result.scenes.is_empty()); } - - #[test] - fn test_cut_scene_times() { - let scene = CutScene { - scene_number: 1, - start_frame: 0, - end_frame: 30, - start_time: 0.0, - end_time: 1.0, - }; - assert!(scene.end_time > scene.start_time); - assert_eq!(scene.scene_number, 1); - } } diff --git a/src/core/processor/executor.rs b/src/core/processor/executor.rs index 45eaaea..7579776 100644 --- a/src/core/processor/executor.rs +++ b/src/core/processor/executor.rs @@ -109,11 +109,10 @@ pub fn validate_python_env() -> Result<()> { tracing::warn!("Expected Python 3.11, got: {}", version.trim()); } - let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR") - .unwrap_or_else(|_| { - let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - manifest.join("scripts").to_string_lossy().to_string() - }); + let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| { + let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + manifest.join("scripts").to_string_lossy().to_string() + }); let script_path = PathBuf::from(&scripts_dir); if !script_path.exists() { anyhow::bail!("Scripts directory not found at {}", scripts_dir); @@ -133,11 +132,10 @@ impl PythonExecutor { pub fn new() -> Result { let python_path = std::env::var("MOMENTRY_PYTHON_PATH") .unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string()); - let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR") - .unwrap_or_else(|_| { - let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - manifest.join("scripts").to_string_lossy().to_string() - }); + let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| { + let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + manifest.join("scripts").to_string_lossy().to_string() + }); let python_bin = PathBuf::from(&python_path); let scripts_path = PathBuf::from(&scripts_dir); @@ -173,7 +171,8 @@ impl PythonExecutor { if let Some(expected_hash) = self.checksums.get(&rel_path) { let output = std::process::Command::new("shasum") - .arg("-a").arg("256") + .arg("-a") + .arg("256") .arg(&script_path) .output() .context("Failed to run shasum for integrity check")?; @@ -235,8 +234,9 @@ impl PythonExecutor { } // Verify script integrity via SHA256 checksum before execution - self.verify_script_integrity(script_name) - .context("Pre-execution integrity check failed — possible version mismatch or corruption")?; + self.verify_script_integrity(script_name).context( + "Pre-execution integrity check failed — possible version mismatch or corruption", + )?; // 標記輸出檔為處理中(add .tmp suffix) let output_path = args.get(1).map(|p| std::path::PathBuf::from(p)); diff --git a/src/core/processor/heuristic_scene.rs b/src/core/processor/heuristic_scene.rs index 18732f2..7ab93cf 100644 --- a/src/core/processor/heuristic_scene.rs +++ b/src/core/processor/heuristic_scene.rs @@ -44,22 +44,59 @@ pub enum CrowdSize { /// Indoor-indicative YOLO classes (COCO labels) const INDOOR_CLASSES: &[&str] = &[ - "chair", "couch", "bed", "dining table", "toilet", "tv", "laptop", - "microwave", "oven", "refrigerator", "sink", "book", "clock", - "vase", "potted plant", + "chair", + "couch", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "microwave", + "oven", + "refrigerator", + "sink", + "book", + "clock", + "vase", + "potted plant", ]; /// Vehicle-indicative classes (person + vehicle = transport scene) const VEHICLE_CLASSES: &[&str] = &[ - "car", "truck", "bus", "train", "boat", "aeroplane", "bicycle", "motorbike", + "car", + "truck", + "bus", + "train", + "boat", + "aeroplane", + "bicycle", + "motorbike", ]; /// Outdoor-indicative YOLO classes const OUTDOOR_CLASSES: &[&str] = &[ - "car", "truck", "bus", "train", "boat", "airplane", - "traffic light", "fire hydrant", "stop sign", "parking meter", - "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", - "bear", "zebra", "giraffe", "tree", + "car", + "truck", + "bus", + "train", + "boat", + "airplane", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "tree", ]; /// Build heuristic scene metadata from disk files (yolo.json + DB face data). @@ -113,13 +150,14 @@ pub async fn build_heuristic_scene_meta( // Get face counts grouped by frame let fd_table = schema::table_name("face_detections"); - let face_rows: Vec<(i64, i64)> = sqlx::query_as( - &format!("SELECT frame_number, COUNT(*) as fc \ + let face_rows: Vec<(i64, i64)> = sqlx::query_as(&format!( + "SELECT frame_number, COUNT(*) as fc \ FROM {} \ WHERE file_uuid = $1 AND frame_number IS NOT NULL \ GROUP BY frame_number \ - ORDER BY frame_number", fd_table), - ) + ORDER BY frame_number", + fd_table + )) .bind(file_uuid) .fetch_all(pool) .await @@ -166,7 +204,10 @@ pub async fn build_heuristic_scene_meta( let outdoor_ratio = outdoor_objects as f64 / frame_count.max(1) as f64; let total_indicator = indoor_ratio + outdoor_ratio; let (indoor_score, outdoor_score) = if total_indicator > 0.0 { - (indoor_ratio / total_indicator, outdoor_ratio / total_indicator) + ( + indoor_ratio / total_indicator, + outdoor_ratio / total_indicator, + ) } else { (0.5, 0.5) }; @@ -187,17 +228,13 @@ pub async fn build_heuristic_scene_meta( .map(|c| class_frame_presence.get(*c).copied().unwrap_or(0)) .sum(); let person_ratio = person_frames as f64 / frame_count.max(1) as f64; - let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0 - && outdoor_score > 0.3; + let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0 && outdoor_score > 0.3; // Dominant objects: rank by frame presence (not total count) let mut sorted: Vec<_> = class_frame_presence.into_iter().collect(); sorted.sort_by(|a, b| b.1.cmp(&a.1)); - let dominant_objects: Vec = sorted - .iter() - .take(3) - .map(|(cls, _)| cls.clone()) - .collect(); + let dominant_objects: Vec = + sorted.iter().take(3).map(|(cls, _)| cls.clone()).collect(); segments.push(SceneSegmentMeta { segment_index: idx as u32 + 1, @@ -229,12 +266,15 @@ pub async fn build_heuristic_scene_meta( /// Full pipeline entry point: reads CUT data, generates heuristic metadata, writes JSON. /// Called from job_worker post-processing trigger. -pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &str) -> Result { +pub async fn generate_scene_meta( + db: &crate::core::db::PostgresDb, + file_uuid: &str, +) -> Result { let pool = db.pool(); // Read CUT segment boundaries from cut.json - let cut_path = Path::new(crate::core::config::OUTPUT_DIR.as_str()) - .join(format!("{}.cut.json", file_uuid)); + let cut_path = + Path::new(crate::core::config::OUTPUT_DIR.as_str()).join(format!("{}.cut.json", file_uuid)); let segments: Vec<(i64, i64, f64, f64)> = if cut_path.exists() { let cut_str = tokio::fs::read_to_string(&cut_path) .await @@ -250,8 +290,7 @@ pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &s start_time: f64, end_time: f64, } - let cut: CutJson = serde_json::from_str(&cut_str) - .context("Failed to parse cut.json")?; + let cut: CutJson = serde_json::from_str(&cut_str).context("Failed to parse cut.json")?; cut.scenes .into_iter() .map(|s| (s.start_frame, s.end_frame, s.start_time, s.end_time)) @@ -259,9 +298,10 @@ pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &s } else { // Fallback: query DB for video duration, make one segment let videos_table = schema::table_name("videos"); - let (total_frames, duration): (Option, Option) = sqlx::query_as( - &format!("SELECT total_frames, duration FROM {} WHERE file_uuid = $1", videos_table), - ) + let (total_frames, duration): (Option, Option) = sqlx::query_as(&format!( + "SELECT total_frames, duration FROM {} WHERE file_uuid = $1", + videos_table + )) .bind(file_uuid) .fetch_optional(pool) .await diff --git a/src/core/processor/mod.rs b/src/core/processor/mod.rs index b783848..ada48ef 100644 --- a/src/core/processor/mod.rs +++ b/src/core/processor/mod.rs @@ -10,6 +10,7 @@ pub mod ocr; pub mod pose; pub mod scene_classification; pub mod story; +pub mod tkg; pub mod visual_chunk; pub mod yolo; @@ -25,7 +26,8 @@ pub use face_recognition::{ RecognizedFaceDetection, }; pub use heuristic_scene::{ - build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta, SceneSegmentMeta, + build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta, + SceneSegmentMeta, }; pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText}; pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult}; @@ -34,5 +36,6 @@ pub use scene_classification::{ SceneSegment, }; pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats}; +pub use tkg::{build_tkg, TkgResult}; pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult}; pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult}; diff --git a/src/core/processor/story.rs b/src/core/processor/story.rs index 83b2b71..aa803ee 100644 --- a/src/core/processor/story.rs +++ b/src/core/processor/story.rs @@ -106,7 +106,10 @@ pub async fn process_story( } // Fallback: Python script - tracing::warn!("[STORY] Native impl failed, falling back to Python: {:?}", result.err()); + tracing::warn!( + "[STORY] Native impl failed, falling back to Python: {:?}", + result.err() + ); let executor = PythonExecutor::new()?; let script_path = executor.script_path("story_processor.py"); @@ -145,7 +148,11 @@ pub async fn process_story( // ── Native implementation ───────────────────────────────────────── -fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -> Result { +fn try_native_story( + _video_path: &str, + output_path: &str, + _uuid: Option<&str>, +) -> Result { let output_dir = Path::new(output_path).parent().unwrap_or(Path::new(".")); let basename = Path::new(output_path) .file_stem() @@ -160,8 +167,7 @@ fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) - let asr_data: AsrData = if asr_path.exists() { let content = std::fs::read_to_string(&asr_path) .with_context(|| format!("Failed to read {:?}", asr_path))?; - serde_json::from_str(&content) - .with_context(|| format!("Failed to parse {:?}", asr_path))? + serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", asr_path))? } else { AsrData { segments: vec![] } }; @@ -169,8 +175,7 @@ fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) - let cut_data: CutData = if cut_path.exists() { let content = std::fs::read_to_string(&cut_path) .with_context(|| format!("Failed to read {:?}", cut_path))?; - serde_json::from_str(&content) - .with_context(|| format!("Failed to parse {:?}", cut_path))? + serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", cut_path))? } else { CutData { scenes: vec![] } }; @@ -376,22 +381,39 @@ fn generate_narrative(texts: &[String], objects: &[String], start: f64, end: f64 let mut unique: Vec<&String> = objects.iter().collect(); unique.sort(); unique.dedup(); - let objs = unique.iter().take(5).map(|s| (*s).as_str()).collect::>().join(", "); + let objs = unique + .iter() + .take(5) + .map(|s| (*s).as_str()) + .collect::>() + .join(", "); parts.push(format!("Visuals: {}", objs)); } format!("[{:.0}s-{:.0}s] {}", start, end, parts.join(" | ")) } -fn generate_scene_narrative(objects: &[String], start: f64, end: f64, scene_count: usize) -> String { +fn generate_scene_narrative( + objects: &[String], + start: f64, + end: f64, + scene_count: usize, +) -> String { let mut unique: Vec<&String> = objects.iter().collect(); unique.sort(); unique.dedup(); let top5: Vec<&String> = unique.iter().take(5).cloned().collect(); if !top5.is_empty() { - let obj_str = top5.iter().map(|s| s.as_str()).collect::>().join(", "); - format!("[{:.0}s-{:.0}s] {} scenes. Visuals: {}.", start, end, scene_count, obj_str) + let obj_str = top5 + .iter() + .map(|s| s.as_str()) + .collect::>() + .join(", "); + format!( + "[{:.0}s-{:.0}s] {} scenes. Visuals: {}.", + start, end, scene_count, obj_str + ) } else { format!("[{:.0}s-{:.0}s] {} video scenes.", start, end, scene_count) } @@ -408,7 +430,8 @@ mod tests { let text = generate_narrative( &["Hello world".to_string()], &["person".to_string()], - 0.0, 5.0, + 0.0, + 5.0, ); assert!(text.contains("[0s-5s]")); assert!(text.contains("Speech:")); @@ -576,7 +599,10 @@ mod tests { }; assert_eq!(result.parent_chunks[0].child_chunk_ids.len(), 2); - assert!(result.child_chunks.iter().all(|c| c.parent_chunk_id.is_some())); + assert!(result + .child_chunks + .iter() + .all(|c| c.parent_chunk_id.is_some())); assert!(result.parent_chunks[0].parent_chunk_id.is_none()); } @@ -594,11 +620,7 @@ mod tests { std::fs::write(&asr_path, r#"{"segments":[]}"#).unwrap(); std::fs::write(&cut_path, r#"{"scenes":[]}"#).unwrap(); - let result = try_native_story( - "/dummy.mp4", - out_path.to_str().unwrap(), - None, - ).unwrap(); + let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap(); assert_eq!(result.stats.total_child_chunks, 0); assert_eq!(result.stats.total_parent_chunks, 0); @@ -616,13 +638,17 @@ mod tests { let cut_path = dir.join(format!("{}.cut.json", basename)); let out_path = dir.join(format!("{}.story.json", basename)); - std::fs::write(&asr_path, r#"{ + std::fs::write( + &asr_path, + r#"{ "segments": [ {"start": 0.0, "end": 2.5, "text": "Hello", "confidence": 0.95}, {"start": 2.5, "end": 5.0, "text": "World", "confidence": 0.92}, {"start": 5.0, "end": 7.5, "text": "Foo", "confidence": 0.90} ] - }"#).unwrap(); + }"#, + ) + .unwrap(); std::fs::write(&cut_path, r#"{ "scenes": [ @@ -631,11 +657,7 @@ mod tests { ] }"#).unwrap(); - let result = try_native_story( - "/dummy.mp4", - out_path.to_str().unwrap(), - None, - ).unwrap(); + let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap(); assert_eq!(result.stats.asr_children, 3); assert_eq!(result.stats.cut_children, 2); @@ -649,7 +671,11 @@ mod tests { for child in &result.child_chunks { if child.source == "asr" { assert!(child.parent_chunk_id.is_some()); - assert!(child.parent_chunk_id.as_ref().unwrap().starts_with("story_asr_")); + assert!(child + .parent_chunk_id + .as_ref() + .unwrap() + .starts_with("story_asr_")); } } diff --git a/src/core/processor/tkg.rs b/src/core/processor/tkg.rs new file mode 100644 index 0000000..39a7626 --- /dev/null +++ b/src/core/processor/tkg.rs @@ -0,0 +1,703 @@ +use anyhow::{Context, Result}; +use serde::Deserialize; +use sqlx::PgPool; +use std::collections::HashMap; +use std::path::Path; + +use crate::core::db::postgres_db::PostgresDb; + +fn t(name: &str) -> String { + let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string()); + if schema == "public" { + name.to_string() + } else { + format!("{}.{}", schema, name) + } +} + +// ── Input data structs ──────────────────────────────────────────── + +#[derive(Debug, Deserialize)] +struct YoloJson { + #[serde(default)] + frames: HashMap, +} + +#[derive(Debug, Deserialize)] +struct YoloFrameEntry { + #[serde(default)] + detections: Vec, + #[serde(default)] + objects: Vec, +} + +#[derive(Debug, Deserialize)] +struct YoloDetEntry { + #[serde(default)] + class_name: String, + #[serde(default)] + confidence: f64, +} + +#[derive(Debug, Deserialize)] +struct AsrxJson { + #[serde(default)] + segments: Vec, + #[serde(default)] + speaker_stats: Option>, +} + +#[derive(Debug, Deserialize)] +struct AsrxSegmentEntry { + #[serde(default)] + speaker_id: String, + #[serde(default)] + start_time: f64, + #[serde(default)] + end_time: f64, + #[allow(dead_code)] + start_frame: i64, + #[allow(dead_code)] + end_frame: i64, +} + +#[derive(Debug, Deserialize)] +struct AsrxSpeakerStat { + #[serde(default)] + count: i64, +} + +// ── Face detection trace ────────────────────────────────────────── + +#[derive(Debug, sqlx::FromRow)] +struct FaceTraceRow { + trace_id: i64, + frame_count: i64, + start_f: i64, + end_f: i64, + avg_x: Option, + avg_y: Option, + avg_w: Option, + avg_h: Option, +} + +#[derive(Debug, sqlx::FromRow)] +struct FaceDetectionRow { + trace_id: i64, + frame_number: i64, + #[allow(dead_code)] + x: Option, + #[allow(dead_code)] + y: Option, + #[allow(dead_code)] + width: Option, + #[allow(dead_code)] + height: Option, +} + +// ── Public API ──────────────────────────────────────────────────── + +pub struct TkgResult { + pub face_trace_nodes: usize, + pub object_nodes: usize, + pub speaker_nodes: usize, + pub co_occurrence_edges: usize, + pub speaker_face_edges: usize, + pub face_face_edges: usize, +} + +pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Result { + let pool = db.pool(); + let n_face = build_face_trace_nodes(pool, file_uuid).await?; + let n_objects = build_yolo_object_nodes(pool, file_uuid, output_dir).await?; + let n_speakers = build_speaker_nodes(pool, file_uuid, output_dir).await?; + + let e_co = build_co_occurrence_edges(pool, file_uuid, output_dir).await?; + let e_sf = build_speaker_face_edges(pool, file_uuid, output_dir).await?; + let e_ff = build_face_face_edges(pool, file_uuid).await?; + + Ok(TkgResult { + face_trace_nodes: n_face, + object_nodes: n_objects, + speaker_nodes: n_speakers, + co_occurrence_edges: e_co, + speaker_face_edges: e_sf, + face_face_edges: e_ff, + }) +} + +// ── Node builders ───────────────────────────────────────────────── + +async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str) -> Result { + let face_table = t("face_detections"); + let nodes_table = t("tkg_nodes"); + + let rows = sqlx::query_as::<_, FaceTraceRow>(&format!( + r#" + SELECT trace_id, + COUNT(*)::bigint as frame_count, + MIN(frame_number) as start_f, + MAX(frame_number) as end_f, + AVG(x::float8) as avg_x, + AVG(y::float8) as avg_y, + AVG(width::float8) as avg_w, + AVG(height::float8) as avg_h + FROM {} + WHERE file_uuid = $1 AND trace_id IS NOT NULL + GROUP BY trace_id + ORDER BY trace_id + "#, + face_table + )) + .bind(file_uuid) + .fetch_all(pool) + .await?; + + let mut count = 0; + for row in &rows { + let external_id = format!("trace_{}", row.trace_id); + let label = format!("Face Trace {}", row.trace_id); + let props = serde_json::json!({ + "frame_count": row.frame_count, + "start_frame": row.start_f, + "end_frame": row.end_f, + "avg_bbox": { + "x": row.avg_x.unwrap_or(0.0).round() as i64, + "y": row.avg_y.unwrap_or(0.0).round() as i64, + "width": row.avg_w.unwrap_or(0.0).round() as i64, + "height": row.avg_h.unwrap_or(0.0).round() as i64, + } + }); + + sqlx::query(&format!( + r#" + INSERT INTO {} (node_type, external_id, file_uuid, label, properties) + VALUES ($1, $2, $3, $4, $5::jsonb) + ON CONFLICT (file_uuid, node_type, external_id) + DO UPDATE SET + properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties), + label = COALESCE(NULLIF(EXCLUDED.label, ''), tkg_nodes.label) + "#, + nodes_table + )) + .bind("face_trace") + .bind(&external_id) + .bind(file_uuid) + .bind(&label) + .bind(serde_json::to_string(&props)?) + .execute(pool) + .await?; + + count += 1; + } + + Ok(count) +} + +async fn build_yolo_object_nodes( + pool: &PgPool, + file_uuid: &str, + output_dir: &str, +) -> Result { + let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid)); + if !yolo_path.exists() { + return Ok(0); + } + + let content = std::fs::read_to_string(&yolo_path) + .with_context(|| format!("Failed to read {:?}", yolo_path))?; + let yolo: YoloJson = serde_json::from_str(&content) + .with_context(|| format!("Failed to parse {:?}", yolo_path))?; + + let mut class_counts: HashMap = HashMap::new(); + for fdata in yolo.frames.values() { + let dets = if !fdata.detections.is_empty() { + &fdata.detections + } else { + &fdata.objects + }; + for det in dets { + *class_counts.entry(det.class_name.clone()).or_insert(0) += 1; + } + } + + let nodes_table = t("tkg_nodes"); + let mut count = 0; + for (cls, cnt) in &class_counts { + let props = serde_json::json!({ "total_detections": cnt }); + + sqlx::query(&format!( + r#" + INSERT INTO {} (node_type, external_id, file_uuid, label, properties) + VALUES ($1, $2, $3, $4, $5::jsonb) + ON CONFLICT (file_uuid, node_type, external_id) + DO UPDATE SET + properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties) + "#, + nodes_table + )) + .bind("object") + .bind(cls) + .bind(file_uuid) + .bind(cls) + .bind(serde_json::to_string(&props)?) + .execute(pool) + .await?; + + count += 1; + } + + Ok(count) +} + +async fn build_speaker_nodes(pool: &PgPool, file_uuid: &str, output_dir: &str) -> Result { + let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid)); + if !asrx_path.exists() { + return Ok(0); + } + + let content = std::fs::read_to_string(&asrx_path) + .with_context(|| format!("Failed to read {:?}", asrx_path))?; + let asrx: AsrxJson = serde_json::from_str(&content) + .with_context(|| format!("Failed to parse {:?}", asrx_path))?; + + let stats = asrx.speaker_stats.unwrap_or_default(); + let nodes_table = t("tkg_nodes"); + let mut count = 0; + + for (sid, stat) in &stats { + let props = serde_json::json!({ "segment_count": stat.count }); + + sqlx::query(&format!( + r#" + INSERT INTO {} (node_type, external_id, file_uuid, label, properties) + VALUES ($1, $2, $3, $4, $5::jsonb) + ON CONFLICT (file_uuid, node_type, external_id) + DO UPDATE SET + properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties) + "#, + nodes_table + )) + .bind("speaker") + .bind(sid) + .bind(file_uuid) + .bind(sid) + .bind(serde_json::to_string(&props)?) + .execute(pool) + .await?; + + count += 1; + } + + Ok(count) +} + +// ── Edge builders ───────────────────────────────────────────────── + +async fn build_co_occurrence_edges( + pool: &PgPool, + file_uuid: &str, + output_dir: &str, +) -> Result { + let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid)); + if !yolo_path.exists() { + return Ok(0); + } + + let content = std::fs::read_to_string(&yolo_path)?; + let yolo: YoloJson = serde_json::from_str(&content)?; + + let face_table = t("face_detections"); + let nodes_table = t("tkg_nodes"); + let edges_table = t("tkg_edges"); + + let face_rows = sqlx::query_as::<_, FaceDetectionRow>(&format!( + r#"SELECT trace_id, frame_number, x, y, width, height + FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL + ORDER BY frame_number"#, + face_table + )) + .bind(file_uuid) + .fetch_all(pool) + .await?; + + let mut edge_count = 0; + for face in &face_rows { + let frame_str = face.frame_number.to_string(); + let yolo_frame = match yolo.frames.get(&frame_str) { + Some(f) => f, + None => continue, + }; + + let dets = if !yolo_frame.detections.is_empty() { + &yolo_frame.detections + } else { + &yolo_frame.objects + }; + + if dets.is_empty() { + continue; + } + + let external_id = format!("trace_{}", face.trace_id); + let face_node: Option<(i64,)> = sqlx::query_as(&format!( + "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2", + nodes_table + )) + .bind(file_uuid) + .bind(&external_id) + .fetch_optional(pool) + .await?; + + let face_node_id = match face_node { + Some((id,)) => id, + None => continue, + }; + + for det in dets { + let obj_node: Option<(i64,)> = sqlx::query_as(&format!( + "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='object' AND external_id=$2", + nodes_table + )) + .bind(file_uuid) + .bind(&det.class_name) + .fetch_optional(pool) + .await?; + + let obj_node_id = match obj_node { + Some((id,)) => id, + None => continue, + }; + + let edge_props = serde_json::json!({ + "frame": face.frame_number, + "object_confidence": det.confidence, + }); + + if let Err(e) = sqlx::query(&format!( + r#" + INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties) + VALUES ($1, $2, $3, $4, $5::jsonb) + ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id) + DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties) + "#, + edges_table + )) + .bind("CO_OCCURS_WITH") + .bind(face_node_id) + .bind(obj_node_id) + .bind(file_uuid) + .bind(serde_json::to_string(&edge_props)?) + .execute(pool) + .await + { + tracing::warn!( + "[TKG] Edge insert failed (trace={}, obj={}): {}", + face.trace_id, + det.class_name, + e + ); + continue; + } + + edge_count += 1; + } + } + + Ok(edge_count) +} + +async fn build_speaker_face_edges( + pool: &PgPool, + file_uuid: &str, + output_dir: &str, +) -> Result { + let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid)); + if !asrx_path.exists() { + return Ok(0); + } + + let content = std::fs::read_to_string(&asrx_path)?; + let asrx: AsrxJson = serde_json::from_str(&content)?; + + if asrx.segments.is_empty() { + return Ok(0); + } + + let face_table = t("face_detections"); + let nodes_table = t("tkg_nodes"); + let edges_table = t("tkg_edges"); + + let traces = sqlx::query_as::<_, (i64, i64, i64)>(&format!( + r#"SELECT trace_id, MIN(frame_number) as start_f, MAX(frame_number) as end_f + FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL + GROUP BY trace_id"#, + face_table + )) + .bind(file_uuid) + .fetch_all(pool) + .await?; + + // Calculate fps from last segment + let last = asrx.segments.last().unwrap(); + let fps = if last.end_time > 0.0 { + last.end_frame as f64 / last.end_time + } else { + 30.0 + }; + + let mut edge_count = 0; + + for (tid, sf, ef) in &traces { + let face_ext_id = format!("trace_{}", tid); + let face_node: Option<(i64,)> = sqlx::query_as(&format!( + "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2", + nodes_table + )) + .bind(file_uuid) + .bind(&face_ext_id) + .fetch_optional(pool) + .await?; + + let face_node_id = match face_node { + Some((id,)) => id, + None => continue, + }; + + let face_start_sec = *sf as f64 / fps; + let face_end_sec = *ef as f64 / fps; + + for seg in &asrx.segments { + let seg_start = seg.start_time; + let seg_end = seg.end_time; + let overlap_start = face_start_sec.max(seg_start); + let overlap_end = face_end_sec.min(seg_end); + + if overlap_start >= overlap_end { + continue; + } + + let overlap_dur = overlap_end - overlap_start; + let face_dur = face_end_sec - face_start_sec; + if face_dur <= 0.0 { + continue; + } + let overlap_ratio = overlap_dur / face_dur; + + if overlap_ratio < 0.3 { + continue; + } + + let speaker_node: Option<(i64,)> = sqlx::query_as(&format!( + "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='speaker' AND external_id=$2", + nodes_table + )) + .bind(file_uuid) + .bind(&seg.speaker_id) + .fetch_optional(pool) + .await?; + + let speaker_node_id = match speaker_node { + Some((id,)) => id, + None => continue, + }; + + let edge_props = serde_json::json!({ + "overlap_ratio": (overlap_ratio * 1000.0).round() / 1000.0, + "overlap_duration_s": (overlap_dur * 10.0).round() / 10.0, + "face_time_range": format!("{:.1}-{:.1}s", face_start_sec, face_end_sec), + "speaker_time_range": format!("{:.1}-{:.1}s", seg_start, seg_end), + }); + + sqlx::query(&format!( + r#" + INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties) + VALUES ($1, $2, $3, $4, $5::jsonb) + ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id) + DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties) + "#, + edges_table + )) + .bind("SPEAKS_AS") + .bind(face_node_id) + .bind(speaker_node_id) + .bind(file_uuid) + .bind(serde_json::to_string(&edge_props)?) + .execute(pool) + .await?; + + edge_count += 1; + } + } + + Ok(edge_count) +} + +async fn build_face_face_edges(pool: &PgPool, file_uuid: &str) -> Result { + let face_table = t("face_detections"); + let nodes_table = t("tkg_nodes"); + let edges_table = t("tkg_edges"); + + let rows: Vec<(i64, i64, i64)> = sqlx::query_as(&format!( + r#" + SELECT a.trace_id AS tid_a, b.trace_id AS tid_b, a.frame_number + FROM {} a + JOIN {} b + ON a.file_uuid = b.file_uuid + AND a.frame_number = b.frame_number + AND a.trace_id < b.trace_id + WHERE a.file_uuid = $1 + AND a.trace_id IS NOT NULL + AND b.trace_id IS NOT NULL + ORDER BY a.frame_number + "#, + face_table, face_table + )) + .bind(file_uuid) + .fetch_all(pool) + .await?; + + if rows.is_empty() { + return Ok(0); + } + + // Deduplicate by pair + let mut pair_frames: HashMap<(i64, i64), Vec> = HashMap::new(); + for (tid_a, tid_b, frame) in &rows { + let key = if *tid_a < *tid_b { + (*tid_a, *tid_b) + } else { + (*tid_b, *tid_a) + }; + pair_frames.entry(key).or_default().push(*frame); + } + + let mut edge_count = 0; + for ((tid_a, tid_b), frames) in &pair_frames { + let ext_a = format!("trace_{}", tid_a); + let ext_b = format!("trace_{}", tid_b); + + let n_a: Option<(i64,)> = sqlx::query_as(&format!( + "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2", + nodes_table + )) + .bind(file_uuid) + .bind(&ext_a) + .fetch_optional(pool) + .await?; + + let n_b: Option<(i64,)> = sqlx::query_as(&format!( + "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2", + nodes_table + )) + .bind(file_uuid) + .bind(&ext_b) + .fetch_optional(pool) + .await?; + + let (n_a_id, n_b_id) = match (n_a, n_b) { + (Some((a,)), Some((b,))) => (a, b), + _ => continue, + }; + + let edge_props = serde_json::json!({ + "first_frame": frames[0], + "frame_count": frames.len() as i64, + }); + + sqlx::query(&format!( + r#" + INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties) + VALUES ($1, $2, $3, $4, $5::jsonb) + ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id) + DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties) + "#, + edges_table + )) + .bind("CO_OCCURS_WITH") + .bind(n_a_id) + .bind(n_b_id) + .bind(file_uuid) + .bind(serde_json::to_string(&edge_props)?) + .execute(pool) + .await?; + + edge_count += 1; + } + + Ok(edge_count) +} + +// ── Tests ───────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_yolo_json_deserialize() { + let json = r#"{ + "frames": { + "1": {"time_seconds": 0.0, "detections": [{"class_name": "person", "confidence": 0.9}]}, + "2": {"time_seconds": 1.0, "detections": [{"class_name": "chair", "confidence": 0.8}]} + } + }"#; + let yolo: YoloJson = serde_json::from_str(json).unwrap(); + assert_eq!(yolo.frames.len(), 2); + assert_eq!(yolo.frames["1"].detections[0].class_name, "person"); + } + + #[test] + fn test_yolo_json_empty_frames() { + let json = r#"{"frames": {}}"#; + let yolo: YoloJson = serde_json::from_str(json).unwrap(); + assert!(yolo.frames.is_empty()); + } + + #[test] + fn test_asrx_json_deserialize() { + let json = r#"{ + "segments": [ + {"speaker_id": "SPEAKER_01", "start_time": 0.0, "end_time": 2.0, "start_frame": 0, "end_frame": 60} + ], + "speaker_stats": {"SPEAKER_01": {"count": 1}} + }"#; + let asrx: AsrxJson = serde_json::from_str(json).unwrap(); + assert_eq!(asrx.segments.len(), 1); + assert_eq!(asrx.segments[0].speaker_id, "SPEAKER_01"); + } + + #[test] + fn test_asrx_json_no_stats() { + let json = r#"{"segments": []}"#; + let asrx: AsrxJson = serde_json::from_str(json).unwrap(); + assert!(asrx.speaker_stats.is_none()); + } + + #[test] + fn test_yolo_objects_fallback() { + let json = r#"{ + "frames": { + "1": {"objects": [{"class_name": "person"}]} + } + }"#; + let yolo: YoloJson = serde_json::from_str(json).unwrap(); + assert_eq!(yolo.frames["1"].objects[0].class_name, "person"); + assert!(yolo.frames["1"].detections.is_empty()); + } + + #[test] + fn test_tkg_result() { + let r = TkgResult { + face_trace_nodes: 5, + object_nodes: 10, + speaker_nodes: 3, + co_occurrence_edges: 20, + speaker_face_edges: 8, + face_face_edges: 4, + }; + assert_eq!(r.face_trace_nodes, 5); + assert_eq!(r.object_nodes, 10); + assert_eq!(r.speaker_nodes, 3); + } +} diff --git a/src/core/storage/content_hash.rs b/src/core/storage/content_hash.rs index 72a11bc..a82e73d 100644 --- a/src/core/storage/content_hash.rs +++ b/src/core/storage/content_hash.rs @@ -1,7 +1,7 @@ +use anyhow::Result; use sha2::{Digest, Sha256}; use std::io::Read; use std::path::Path; -use anyhow::Result; /// Compute SHA256 of the entire file content pub fn compute_sha256(path: &Path) -> Result { @@ -10,7 +10,9 @@ pub fn compute_sha256(path: &Path) -> Result { let mut buf = [0u8; 65536]; loop { let n = file.read(&mut buf)?; - if n == 0 { break; } + if n == 0 { + break; + } hasher.update(&buf[..n]); } let hash = format!("{:x}", hasher.finalize()); diff --git a/src/core/tmdb/cache.rs b/src/core/tmdb/cache.rs index 67e340a..6611e6d 100644 --- a/src/core/tmdb/cache.rs +++ b/src/core/tmdb/cache.rs @@ -65,7 +65,11 @@ pub fn tmdb_cache_path(file_uuid: &str) -> PathBuf { pub fn read_tmdb_cache(file_uuid: &str) -> Result { let path = tmdb_cache_path(file_uuid); if !path.exists() { - anyhow::bail!("TMDb cache not found: {} (expected: {})", file_uuid, path.display()); + anyhow::bail!( + "TMDb cache not found: {} (expected: {})", + file_uuid, + path.display() + ); } let content = std::fs::read_to_string(&path) .with_context(|| format!("Failed to read TMDb cache: {}", path.display()))?; @@ -96,9 +100,7 @@ pub fn count_cache_files() -> usize { match std::fs::read_dir(&dir) { Ok(entries) => entries .filter_map(|e| e.ok()) - .filter(|e| { - e.file_name().to_string_lossy().ends_with(".tmdb.json") - }) + .filter(|e| e.file_name().to_string_lossy().ends_with(".tmdb.json")) .count(), Err(_) => 0, } diff --git a/src/core/tmdb/face_agent.rs b/src/core/tmdb/face_agent.rs index cd9a67b..396aa07 100644 --- a/src/core/tmdb/face_agent.rs +++ b/src/core/tmdb/face_agent.rs @@ -46,11 +46,12 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul // Step 2: Load face_detections grouped by trace_id let fd_table = schema::table_name("face_detections"); - let fd_rows = sqlx::query_as::<_, (i32, Vec)>( - &format!("SELECT trace_id, embedding FROM {} \ + let fd_rows = sqlx::query_as::<_, (i32, Vec)>(&format!( + "SELECT trace_id, embedding FROM {} \ WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \ - ORDER BY trace_id", fd_table), - ) + ORDER BY trace_id", + fd_table + )) .bind(file_uuid) .fetch_all(pool) .await?; @@ -156,9 +157,10 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul let fd_table = schema::table_name("face_detections"); let mut after_qc = HashMap::new(); for (&tid, &(id, ref name)) in &matched { - let cnt: i64 = sqlx::query_scalar( - &format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2", fd_table), - ) + let cnt: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2", + fd_table + )) .bind(file_uuid) .bind(tid) .fetch_one(pool) @@ -194,9 +196,10 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul // Step 5: Update DB let mut updated = 0usize; for (&tid, &(id, _)) in &matched { - let r = sqlx::query( - &format!("UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", fd_table), - ) + let r = sqlx::query(&format!( + "UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", + fd_table + )) .bind(id) .bind(file_uuid) .bind(tid) @@ -223,9 +226,8 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str) -> Result { let fd_table = schema::table_name("face_detections"); // Find all collision pairs: same identity, same frame, different trace - let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>( - &format!( - "SELECT a.identity_id, a.trace_id, b.trace_id, a.frame_number \ + let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>(&format!( + "SELECT a.identity_id, a.trace_id, b.trace_id, a.frame_number \ FROM {} a \ JOIN {} b \ ON a.file_uuid = b.file_uuid \ @@ -235,9 +237,8 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str) AND a.identity_id IS NOT NULL \ AND a.identity_id = b.identity_id \ ORDER BY a.identity_id, a.frame_number", - fd_table, fd_table - ), - ) + fd_table, fd_table + )) .bind(file_uuid) .fetch_all(pool) .await?; @@ -256,25 +257,36 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str) let mut unbound = 0usize; for ((id, ta, tb), overlap_frames) in &collision_groups { // Get face detection count for each trace - let cnt_a: i64 = sqlx::query_scalar( - &format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", fd_table) - ) - .bind(file_uuid).bind(ta).bind(id) - .fetch_one(pool).await.unwrap_or(0); + let cnt_a: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", + fd_table + )) + .bind(file_uuid) + .bind(ta) + .bind(id) + .fetch_one(pool) + .await + .unwrap_or(0); - let cnt_b: i64 = sqlx::query_scalar( - &format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", fd_table) - ) - .bind(file_uuid).bind(tb).bind(id) - .fetch_one(pool).await.unwrap_or(0); + let cnt_b: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", + fd_table + )) + .bind(file_uuid) + .bind(tb) + .bind(id) + .fetch_one(pool) + .await + .unwrap_or(0); // Unbind the trace with fewer detections (likely the false positive) let victim = if cnt_a <= cnt_b { *ta } else { *tb }; let victim_cnt = if cnt_a <= cnt_b { cnt_a } else { cnt_b }; - sqlx::query( - &format!("UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2", fd_table), - ) + sqlx::query(&format!( + "UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2", + fd_table + )) .bind(file_uuid) .bind(victim) .execute(pool) diff --git a/src/core/tmdb/probe.rs b/src/core/tmdb/probe.rs index e54c6de..25ad064 100644 --- a/src/core/tmdb/probe.rs +++ b/src/core/tmdb/probe.rs @@ -45,7 +45,14 @@ fn extract_movie_name(filename: &str) -> Option { .file_stem() .and_then(|s| s.to_str())?; - let cleaned = name.replace(['.', '_'], " ").trim().to_string(); + // Take only the part before year patterns or separators + let cleaned = name + .replace(['.', '_'], " ") + .split(|c: char| c == '(' || c == '[' || c == '│' || c == '|') + .next() + .unwrap_or(&name) + .trim() + .to_string(); if cleaned.is_empty() || cleaned.len() < 3 { return None; @@ -53,10 +60,7 @@ fn extract_movie_name(filename: &str) -> Option { Some(cleaned) } -pub async fn probe_from_cache( - db: &PostgresDb, - file_uuid: &str, -) -> Result { +pub async fn probe_from_cache(db: &PostgresDb, file_uuid: &str) -> Result { let cache = crate::core::tmdb::cache::read_tmdb_cache(file_uuid)?; if cache.identities.is_empty() && !cache.cast.is_empty() { return create_identities_from_data(db, file_uuid, &cache.movie, &cache.cast).await; @@ -83,7 +87,8 @@ async fn upsert_identities_from_disk( } match std::fs::read_to_string(&path) { Ok(content) => { - match serde_json::from_str::(&content) { + match serde_json::from_str::(&content) + { Ok(identity_file) => { let identities_table = crate::core::db::schema::table_name("identities"); let result = sqlx::query(&format!( @@ -106,21 +111,35 @@ async fn upsert_identities_from_disk( match result { Ok(_) => { - info!("[TMDB] Upserted identity: {} (uuid={})", identity_file.name, identity_file.identity_uuid); + info!( + "[TMDB] Upserted identity: {} (uuid={})", + identity_file.name, identity_file.identity_uuid + ); identities_created += 1; } Err(e) => { - warn!("[TMDB] Failed to upsert identity '{}': {}", identity_file.name, e); + warn!( + "[TMDB] Failed to upsert identity '{}': {}", + identity_file.name, e + ); } } } Err(e) => { - warn!("[TMDB] Failed to parse identity file {}: {}", path.display(), e); + warn!( + "[TMDB] Failed to parse identity file {}: {}", + path.display(), + e + ); } } } Err(e) => { - warn!("[TMDB] Failed to read identity file {}: {}", path.display(), e); + warn!( + "[TMDB] Failed to read identity file {}: {}", + path.display(), + e + ); } } } @@ -181,7 +200,9 @@ pub async fn create_identities_from_data( continue; } - let profile_url = member.profile_path.as_ref() + let profile_url = member + .profile_path + .as_ref() .map(|p| format!("https://image.tmdb.org/t/p/w185{}", p)); let metadata = serde_json::json!({ @@ -226,8 +247,13 @@ pub async fn create_identities_from_data( member.name, member.character, uuid_str ); identities_created += 1; - if let Err(e) = crate::core::identity::storage::save_identity_file(db, &uuid_str).await { - warn!("[TMDB] Failed to save identity file for {}: {}", member.name, e); + if let Err(e) = + crate::core::identity::storage::save_identity_file(db, &uuid_str).await + { + warn!( + "[TMDB] Failed to save identity file for {}: {}", + member.name, e + ); } // Download and save TMDb profile image locally if let Some(url) = &profile_url { @@ -393,8 +419,10 @@ pub async fn probe_movie( overview: movie.overview.clone(), poster_path: movie.poster_path.clone(), }; - let cache_cast: Vec = credits.cast.iter().map(|m| { - cache::TmdbCastMember { + let cache_cast: Vec = credits + .cast + .iter() + .map(|m| cache::TmdbCastMember { id: m.id, name: m.name.clone(), character: m.character.clone(), @@ -410,8 +438,8 @@ pub async fn probe_movie( deathday: None, gender: None, homepage: None, - } - }).collect(); + }) + .collect(); // Write TMDb cache so probe_from_cache can be used next time let cache_obj = cache::TmdbCache { diff --git a/src/core/tmdb/status.rs b/src/core/tmdb/status.rs index ef134cf..95daeea 100644 --- a/src/core/tmdb/status.rs +++ b/src/core/tmdb/status.rs @@ -60,7 +60,11 @@ pub async fn check_tmdb_api() -> TmdbResourceStatus { enabled: *config::tmdb::PROBE_ENABLED, api_reachable: Some(reachable), api_latency_ms: Some(latency), - api_error: if reachable { None } else { Some(format!("HTTP {}", resp.status())) }, + api_error: if reachable { + None + } else { + Some(format!("HTTP {}", resp.status())) + }, last_check_at: Some(chrono::Utc::now().to_rfc3339()), } } @@ -84,9 +88,10 @@ pub fn count_cache_files() -> usize { pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result { let identities_table = crate::core::db::schema::table_name("identities"); - let count: i64 = sqlx::query_scalar( - &format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", identities_table) - ) + let count: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", + identities_table + )) .fetch_one(pool) .await?; Ok(count) @@ -94,9 +99,10 @@ pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result { pub async fn count_tmdb_identities_with_embedding(pool: &sqlx::PgPool) -> Result { let identities_table = crate::core::db::schema::table_name("identities"); - let count: i64 = sqlx::query_scalar( - &format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL", identities_table) - ) + let count: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL", + identities_table + )) .fetch_one(pool) .await?; Ok(count) diff --git a/src/player/chunk_selector.rs b/src/player/chunk_selector.rs index bd87f4e..3da2439 100644 --- a/src/player/chunk_selector.rs +++ b/src/player/chunk_selector.rs @@ -147,7 +147,7 @@ impl ChunkSelector { // Try to match UUID - either exact match or partial match let _uuid = payload - .and_then(|p| p.get("uuid")) + .and_then(|p| p.get("file_uuid")) .and_then(|v| v.as_str()) .unwrap_or(""); diff --git a/src/playground.rs b/src/playground.rs index 4cf8057..a36fa6e 100644 --- a/src/playground.rs +++ b/src/playground.rs @@ -8,10 +8,10 @@ use tracing::{info, warn}; use momentry_core::core::api_key::{ApiKeyService, ApiKeyType}; use momentry_core::core::chunk::types::{Chunk, ChunkRule, ChunkType}; +use momentry_core::core::db::schema; use momentry_core::core::db::Database; use momentry_core::core::time::FrameTime; use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi}; -use momentry_core::core::db::schema; use momentry_core::{ Embedder, OutputDir, PostgresDb, QdrantDb, RedisClient, VectorPayload, VideoRecord, VideoStatus, }; @@ -1985,7 +1985,8 @@ async fn main() -> Result<()> { chunk_id: None, created_at: String::new(), }; - db.store_pre_chunk(&uuid, "asr", serde_json::to_value(&pre_chunk)?).await?; + db.store_pre_chunk(&uuid, "asr", serde_json::to_value(&pre_chunk)?) + .await?; asr_pre_chunk_ids.push(i as i64); } @@ -2009,7 +2010,8 @@ async fn main() -> Result<()> { chunk_id: None, created_at: String::new(), }; - db.store_pre_chunk(&uuid, "cut", serde_json::to_value(&pre_chunk)?).await?; + db.store_pre_chunk(&uuid, "cut", serde_json::to_value(&pre_chunk)?) + .await?; cut_pre_chunk_ids.push(i as i64); } @@ -2037,7 +2039,8 @@ async fn main() -> Result<()> { chunk_id: None, created_at: String::new(), }; - db.store_pre_chunk(&uuid, "time", serde_json::to_value(&pre_chunk)?).await?; + db.store_pre_chunk(&uuid, "time", serde_json::to_value(&pre_chunk)?) + .await?; time_pre_chunk_ids.push(time_pre_chunk_ids.len() as i64); time_start = time_end; } @@ -2117,7 +2120,8 @@ async fn main() -> Result<()> { frame_path: None, created_at: String::new(), }; - db.store_frame(&uuid, *frame_num as i64, serde_json::to_value(&frame)?).await?; + db.store_frame(&uuid, *frame_num as i64, serde_json::to_value(&frame)?) + .await?; } println!("Stored {} frames", all_frames.len()); @@ -2357,8 +2361,7 @@ async fn main() -> Result<()> { for frame in &context_frames { if let Some(objects) = frame["yolo_objects"].as_array() { for obj in objects { - if let Some(class_name) = - obj.get("class_name").and_then(|v| v.as_str()) + if let Some(class_name) = obj.get("class_name").and_then(|v| v.as_str()) { *all_objects.entry(class_name.to_string()).or_insert(0) += 1; } @@ -2494,9 +2497,11 @@ async fn main() -> Result<()> { } let qdrant_payload = VectorPayload { - uuid: chunk.uuid.clone(), + file_uuid: chunk.uuid.clone(), chunk_id: chunk.chunk_id.clone(), chunk_type: "sentence".to_string(), + start_frame: chunk.start_frame, + end_frame: chunk.end_frame, start_time: chunk.start_time().seconds(), end_time: chunk.end_time().seconds(), text: Some(text.to_string()), diff --git a/src/verification/verifier.rs b/src/verification/verifier.rs index 5f595e2..d6bab51 100644 --- a/src/verification/verifier.rs +++ b/src/verification/verifier.rs @@ -79,12 +79,8 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification None => VerificationResult::ok(proc_name, file_uuid), } } - ProcessorType::Yolo => { - VerificationResult::ok(proc_name, file_uuid) - } - ProcessorType::Face => { - VerificationResult::ok(proc_name, file_uuid) - } + ProcessorType::Yolo => VerificationResult::ok(proc_name, file_uuid), + ProcessorType::Face => VerificationResult::ok(proc_name, file_uuid), ProcessorType::Ocr => { let frames = value.get("frames").and_then(|v| v.as_array()); match frames { @@ -114,7 +110,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification ProcessorType::FiveW1H => { let scenes = value.get("scenes").and_then(|v| v.as_array()); match scenes { - Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 scenes"), + Some(s) if s.is_empty() => { + VerificationResult::fail(proc_name, file_uuid, "0 scenes") + } Some(_) => VerificationResult::ok(proc_name, file_uuid), None => VerificationResult::ok(proc_name, file_uuid), } diff --git a/src/watcher/watcher.rs b/src/watcher/watcher.rs index b507838..1796360 100644 --- a/src/watcher/watcher.rs +++ b/src/watcher/watcher.rs @@ -37,7 +37,8 @@ pub async fn run_watcher() -> Result<()> { info!("Watch directories: {:?}", dirs); tokio::spawn(async move { - let mut interval = time::interval(std::time::Duration::from_millis(config.poll_interval_ms)); + let mut interval = + time::interval(std::time::Duration::from_millis(config.poll_interval_ms)); let mut known = std::collections::HashSet::new(); loop { interval.tick().await; @@ -109,15 +110,43 @@ async fn auto_register_file(file_path: &str) { } }; - let file_name = pre.get("file_name").and_then(|v| v.as_str()).unwrap_or("unknown").to_string(); + let file_name = pre + .get("file_name") + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); let probe = pre.get("probe_json").cloned().unwrap_or_default(); - let file_type = pre.get("file_type").and_then(|v| v.as_str()).unwrap_or("unknown").to_string(); - let canonical_path = pre.get("file_path").and_then(|v| v.as_str()).unwrap_or(file_path).to_string(); + let file_type = pre + .get("file_type") + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + let canonical_path = pre + .get("file_path") + .and_then(|v| v.as_str()) + .unwrap_or(file_path) + .to_string(); - let duration = probe.get("format").and_then(|f| f.get("duration")).and_then(|v| v.as_f64()).unwrap_or(0.0); - let width = probe.get("format").and_then(|f| f.get("width")).and_then(|v| v.as_u64()).unwrap_or(0) as u32; - let height = probe.get("format").and_then(|f| f.get("height")).and_then(|v| v.as_u64()).unwrap_or(0) as u32; - let fps_val = probe.get("format").and_then(|f| f.get("fps")).and_then(|v| v.as_f64()).unwrap_or(0.0); + let duration = probe + .get("format") + .and_then(|f| f.get("duration")) + .and_then(|v| v.as_f64()) + .unwrap_or(0.0); + let width = probe + .get("format") + .and_then(|f| f.get("width")) + .and_then(|v| v.as_u64()) + .unwrap_or(0) as u32; + let height = probe + .get("format") + .and_then(|f| f.get("height")) + .and_then(|v| v.as_u64()) + .unwrap_or(0) as u32; + let fps_val = probe + .get("format") + .and_then(|f| f.get("fps")) + .and_then(|v| v.as_f64()) + .unwrap_or(0.0); let record = VideoRecord { id: 0, @@ -158,7 +187,10 @@ async fn auto_register_file(file_path: &str) { match db.register_video(&record).await { Ok(id) => info!("[WATCHER] Auto-registered {} (id={})", record.file_uuid, id), - Err(e) => warn!("[WATCHER] Auto-register failed for {}: {}", record.file_uuid, e), + Err(e) => warn!( + "[WATCHER] Auto-register failed for {}: {}", + record.file_uuid, e + ), } } @@ -175,10 +207,14 @@ pub async fn pre_process_file(file_path: &str) -> Option { let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR") .unwrap_or_else(|_| "/Users/accusys/momentry/output_dev".to_string()); - let birthday = std::fs::metadata(&path).ok() + let birthday = std::fs::metadata(&path) + .ok() .and_then(|m| m.modified().ok()) .map(|t| { - let secs = t.duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_secs(); + let secs = t + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); chrono::DateTime::from_timestamp(secs as i64, 0) .map(|dt| dt.to_rfc3339()) .unwrap_or_else(|| chrono::Utc::now().to_rfc3339()) @@ -186,9 +222,8 @@ pub async fn pre_process_file(file_path: &str) -> Option { .unwrap_or_else(|| chrono::Utc::now().to_rfc3339()); let mac = crate::core::storage::uuid::get_mac_address(); - let file_uuid = crate::core::storage::uuid::compute_birth_uuid( - &mac, &birthday, &canonical_str, &filename, - ); + let file_uuid = + crate::core::storage::uuid::compute_birth_uuid(&mac, &birthday, &canonical_str, &filename); let pre_path = std::path::PathBuf::from(&output_dir).join(format!("{}.pre.json", file_uuid)); if pre_path.exists() { @@ -198,15 +233,22 @@ pub async fn pre_process_file(file_path: &str) -> Option { info!("[PRE-PROCESS] Pre-processing: {} → {}", filename, file_uuid); - let content_hash = crate::core::storage::content_hash::compute_sha256(&path).unwrap_or_default(); + let content_hash = + crate::core::storage::content_hash::compute_sha256(&path).unwrap_or_default(); let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR") .unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string()); let python_path = std::env::var("MOMENTRY_PYTHON_PATH") .unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string()); - let probe_json = crate::core::probe::unified::unified_probe(&path, &scripts_dir, &python_path).await; + let probe_json = + crate::core::probe::unified::unified_probe(&path, &scripts_dir, &python_path).await; - let file_type = probe_json.get("format").and_then(|f| f.get("file_type")).and_then(|v| v.as_str()).unwrap_or("unknown").to_string(); + let file_type = probe_json + .get("format") + .and_then(|f| f.get("file_type")) + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); let pre_data = serde_json::json!({ "file_name": filename, diff --git a/src/worker/job_worker.rs b/src/worker/job_worker.rs index 2843488..035f4a8 100644 --- a/src/worker/job_worker.rs +++ b/src/worker/job_worker.rs @@ -12,12 +12,13 @@ use crate::core::chunk::{rule1_ingest, rule3_ingest}; use crate::core::config::OUTPUT_DIR; use crate::core::db::qdrant_db::QdrantDb; use crate::core::db::{ - schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus, + schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, + VideoStatus, }; use crate::core::embedding::Embedder; +use crate::core::processor::heuristic_scene::generate_scene_meta; use crate::worker::config::WorkerConfig; use crate::worker::processor::{ProcessorPool, ProcessorTask}; -use crate::core::processor::heuristic_scene::generate_scene_meta; use crate::worker::resources::SystemResources; use sqlx::PgPool; @@ -70,14 +71,15 @@ impl JobWorker { // Reset stale running jobs: jobs stuck in 'running' with no active processor results let monitor_jobs_table = schema::table_name("monitor_jobs"); let processor_results_table = schema::table_name("processor_results"); - if let Err(e) = sqlx::query( - &format!("UPDATE {} SET status = 'pending', updated_at = NOW() + if let Err(e) = sqlx::query(&format!( + "UPDATE {} SET status = 'pending', updated_at = NOW() WHERE status = 'running' AND id NOT IN ( SELECT DISTINCT job_id FROM {} WHERE status IN ('pending', 'running') - )", monitor_jobs_table, processor_results_table), - ) + )", + monitor_jobs_table, processor_results_table + )) .execute(self.db.pool()) .await { @@ -608,12 +610,23 @@ impl JobWorker { } let fu = uuid; - let rule1 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1")); + let rule1 = check!(&format!( + "SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1" + )); let vector = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1")); - let rule3 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1")); + let rule3 = check!(&format!( + "SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1" + )); let trace = check!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = '{fu}' AND trace_id IS NOT NULL")); - let tkg = check!(&format!("SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1", schema::table_name("tkg_nodes"))); - let scene_meta = std::path::Path::new(&format!("{}/{fu}.scene_meta.json", crate::core::config::OUTPUT_DIR.as_str())).exists(); + let tkg = check!(&format!( + "SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1", + schema::table_name("tkg_nodes") + )); + let scene_meta = std::path::Path::new(&format!( + "{}/{fu}.scene_meta.json", + crate::core::config::OUTPUT_DIR.as_str() + )) + .exists(); let five_w1h = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' AND summary_text IS NOT NULL AND summary_text != '' LIMIT 1")); let all_ok = rule1 && vector && rule3 && trace && tkg && scene_meta && five_w1h; @@ -847,26 +860,23 @@ impl JobWorker { Err(e) => error!("❌ Trace chunk ingestion failed: {}", e), } - // Build Temporal Knowledge Graph (TKG) - info!("📝 Building TKG graph..."); - let executor = match crate::core::processor::PythonExecutor::new() { - Ok(ex) => ex, - Err(e) => { - error!("Failed to create PythonExecutor for TKG: {}", e); - return; - } - }; - match executor - .run( - "tkg_builder.py", - &["--file-uuid", &uuid_clone], - Some(&uuid_clone), - "TKG_BUILDER", - Some(std::time::Duration::from_secs(300)), - ) - .await + // Build Temporal Knowledge Graph (TKG) — native Rust + info!("📝 Building TKG graph (Rust)..."); + let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR") + .unwrap_or_else(|_| ".".to_string()); + match crate::core::processor::tkg::build_tkg( + db_clone.as_ref(), + &uuid_clone, + &output_dir, + ) + .await { - Ok(()) => info!("✅ TKG built for {}", uuid_clone), + Ok(r) => info!( + "✅ TKG built for {}: {} face, {} obj, {} spk, {} co, {} sf, {} ff edges", + uuid_clone, + r.face_trace_nodes, r.object_nodes, r.speaker_nodes, + r.co_occurrence_edges, r.speaker_face_edges, r.face_face_edges, + ), Err(e) => error!("❌ TKG build failed for {}: {}", uuid_clone, e), } } @@ -898,7 +908,7 @@ impl JobWorker { let ids = sqlx::query_scalar::<_, uuid::Uuid>( "SELECT DISTINCT i.uuid FROM identities i \ JOIN face_detections fd ON fd.identity_id = i.id \ - WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL" + WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL", ) .bind(&uuid_clone) .fetch_all(db_clone.pool()) @@ -907,12 +917,18 @@ impl JobWorker { for id_uuid in &ids { let us = id_uuid.to_string().replace('-', ""); if let Err(e) = crate::core::identity::storage::save_identity_file( - &db_clone, &us - ).await { + &db_clone, &us, + ) + .await + { warn!("[P2.5] Failed to save identity file {}: {}", us, e); } } - info!("[P2.5] {} identity files saved for {}", ids.len(), uuid_clone); + info!( + "[P2.5] {} identity files saved for {}", + ids.len(), + uuid_clone + ); } Err(e) => error!("❌ TMDb face matching failed for {}: {}", uuid_clone, e), } @@ -1088,8 +1104,8 @@ impl JobWorker { let pool = db.pool(); let chunk_table = schema::table_name("chunk"); - let rows = sqlx::query_as::<_, (String, String, String, f64, f64, String)>( - &format!("SELECT chunk_id, chunk_type, text_content, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table), + let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64, String)>( + &format!("SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table), ) .bind(uuid) .fetch_all(pool) @@ -1107,7 +1123,17 @@ impl JobWorker { ); let mut stored = 0usize; - for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows { + for ( + chunk_id, + _chunk_type, + text, + start_frame, + end_frame, + start_time, + end_time, + _content_str, + ) in &rows + { if text.is_empty() { continue; } @@ -1119,9 +1145,11 @@ impl JobWorker { continue; } let payload = VectorPayload { - uuid: uuid.to_string(), + file_uuid: uuid.to_string(), chunk_id: chunk_id.clone(), chunk_type: "sentence".to_string(), + start_frame: *start_frame, + end_frame: *end_frame, start_time: *start_time, end_time: *end_time, text: Some(text.clone()), diff --git a/src/worker/processor.rs b/src/worker/processor.rs index 8b15ff5..392c648 100644 --- a/src/worker/processor.rs +++ b/src/worker/processor.rs @@ -237,11 +237,19 @@ impl ProcessorPool { let key = format!("{}job:{}:processor:{}", prefix, &job.uuid, &processor_name); let now = chrono::Utc::now().to_rfc3339(); let _: Option = redis::cmd("HSET") - .arg(&key).arg("started_at").arg(&now) - .query_async(&mut conn).await.ok(); + .arg(&key) + .arg("started_at") + .arg(&now) + .query_async(&mut conn) + .await + .ok(); let _: Option = redis::cmd("HSET") - .arg(&key).arg("embedding_started_at").arg(&now) - .query_async(&mut conn).await.ok(); + .arg(&key) + .arg("embedding_started_at") + .arg(&now) + .query_async(&mut conn) + .await + .ok(); } // Subscribe to Redis progress pub/sub and update processor hash in real-time @@ -254,10 +262,12 @@ impl ProcessorPool { let cb_processor = sub_processor.clone(); if let Err(e) = sub_redis .subscribe_and_callback(&sub_uuid, move |msg| { - tracing::info!("[Subscriber] Got msg for={} cur={} tot={}", - msg.processor, + tracing::info!( + "[Subscriber] Got msg for={} cur={} tot={}", + msg.processor, msg.data.current.unwrap_or(0), - msg.data.total.unwrap_or(0)); + msg.data.total.unwrap_or(0) + ); if msg.processor == cb_processor { let cur = msg.data.current.unwrap_or(0); let tot = msg.data.total.unwrap_or(0); @@ -266,11 +276,18 @@ impl ProcessorPool { let u = cb_uuid.clone(); let p = cb_processor.clone(); tokio::spawn(async move { - match r.update_worker_processor_status( - &u, &p, "running", None, - cur, oc, tot, 0, 0, - ).await { - Ok(_) => tracing::info!("[Subscriber] Updated {}: cur={} tot={}", p, cur, tot), + match r + .update_worker_processor_status( + &u, &p, "running", None, cur, oc, tot, 0, 0, + ) + .await + { + Ok(_) => tracing::info!( + "[Subscriber] Updated {}: cur={} tot={}", + p, + cur, + tot + ), Err(e) => tracing::error!("[Subscriber] FAILED {}: {}", p, e), } }); @@ -756,9 +773,11 @@ impl ProcessorPool { .enumerate() .map(|(i, segment)| { // Prefer ASR output frames, fallback to time-based conversion - let start_frame = segment.start_frame + let start_frame = segment + .start_frame .unwrap_or_else(|| (segment.start_time * fps).round() as i64); - let end_frame = segment.end_frame + let end_frame = segment + .end_frame .unwrap_or_else(|| (segment.end_time * fps).round() as i64); let data = serde_json::json!({ "text": segment.text, @@ -892,7 +911,11 @@ impl ProcessorPool { tracing::info!( "Storing {} Face pre-chunks + {} detections for video {}", frames_count, - face_result.frames.iter().map(|f| f.faces.len()).sum::(), + face_result + .frames + .iter() + .map(|f| f.faces.len()) + .sum::(), uuid ); @@ -911,7 +934,10 @@ impl ProcessorPool { detections_to_store.push(( frame.frame as i64, frame.timestamp, - face.x, face.y, face.width, face.height, + face.x, + face.y, + face.width, + face.height, face.confidence, )); } @@ -1170,9 +1196,10 @@ impl ProcessorPool { "top_5": scene.top_5, }); let chunk_table = crate::core::db::schema::table_name("chunk"); - let _ = sqlx::query( - &format!("UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3", chunk_table) - ) + let _ = sqlx::query(&format!( + "UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3", + chunk_table + )) .bind(&meta) .bind(uuid) .bind(&chk_id)