feat: trace-level matching, health watcher/worker status, timezone config

This commit is contained in:
Accusys
2026-05-21 01:08:30 +08:00
parent 8ede4be159
commit bebaa743ed
60 changed files with 6110 additions and 1586 deletions

View File

@@ -29,7 +29,7 @@ REDIS_PASSWORD=accusys
# Qdrant Vector Database - Collection isolation
QDRANT_URL=http://localhost:6333
QDRANT_API_KEY=Test3200Test3200Test3200
QDRANT_COLLECTION=momentry_dev_v1
QDRANT_COLLECTION=momentry_dev_rule1_v2
# Paths
MOMENTRY_OUTPUT_DIR=/Users/accusys/momentry/output_dev

View File

@@ -22,6 +22,9 @@ QDRANT_COLLECTION=momentry_rule1
# === API Keys ===
MOMENTRY_API_KEY=muser_your_key_here
MOMENTRY_DEMO_API_KEY=muser_your_demo_key_here
JWT_SECRET=your_jwt_secret_here_change_in_production
SFTPGO_BASE_URL=http://127.0.0.1:8080
TMDB_API_KEY=your_tmdb_api_key_here
# === LLM ===

34
Cargo.lock generated
View File

@@ -178,6 +178,18 @@ dependencies = [
"password-hash",
]
[[package]]
name = "async-compression"
version = "0.4.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac"
dependencies = [
"compression-codecs",
"compression-core",
"pin-project-lite",
"tokio",
]
[[package]]
name = "async-lock"
version = "3.4.2"
@@ -615,6 +627,23 @@ dependencies = [
"static_assertions",
]
[[package]]
name = "compression-codecs"
version = "0.4.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf"
dependencies = [
"compression-core",
"flate2",
"memchr",
]
[[package]]
name = "compression-core"
version = "0.4.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789"
[[package]]
name = "concurrent-queue"
version = "2.5.0"
@@ -4861,13 +4890,18 @@ version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
dependencies = [
"async-compression",
"bitflags 2.11.1",
"bytes",
"futures-core",
"futures-util",
"http",
"http-body",
"http-body-util",
"iri-string",
"pin-project-lite",
"tokio",
"tokio-util",
"tower 0.5.3",
"tower-layer",
"tower-service",

View File

@@ -55,7 +55,7 @@ sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "js
mongodb = { version = "2", features = ["tokio-runtime"] }
bson = { version = "2", features = ["chrono-0_4"] }
qdrant-client = "1.7"
reqwest = { version = "0.12", features = ["json"] }
reqwest = { version = "0.12", features = ["json", "gzip"] }
pgvector = { version = "0.3", features = ["sqlx"] }
# HTTP Server

View File

@@ -60,7 +60,8 @@ fn sha256_hex(data: &[u8]) -> String {
use std::io::Write;
use std::process::{Command, Stdio};
if let Ok(mut child) = Command::new("shasum")
.arg("-a").arg("256")
.arg("-a")
.arg("256")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()

View File

@@ -103,7 +103,7 @@ f4d1b4334a49357b74b80e390ad5a3d16263e51cbe5cab661af92bd2e9721f02 ./face_process
802015c73dfce0866f2a0bc94c645aa35ba30a6de78244af23090bb1f1828c6e ./face_processor_mps.py
96ffdbde3f4d87e9942f9e1f4c93cbd999dc404b43e00d4cdcbb22de3c0f16b7 ./face_processor_optimized.py
17e7d0bd142bddfead94b1dd959c1f41c0dad7063ffc677dff1a99d62aab6cf8 ./face_processor_v1.py
15877adf5c160d861da688a25b93fd2edc189f326f9646ffb4de063e554f773a ./face_processor.py
d6ddad29a5e53b43b887554072d7965f0535e47fb62dad1a8b87e44fa1be6015 ./face_processor.py
8edab61189ad1a8fa60c203077e814e82d46c5bae67054fa2ab1958e199c05f9 ./face_recognition_processor.py
9ea19f357b3fcec6c8b3875c538e53cb46e407ab188cd544963e0123e535fa03 ./face_registration.py
72648816de611fd9b84d2b98c177b8b4f24374024b69184e8151c06cf44d633b ./face_statistics_report.py
@@ -174,15 +174,15 @@ fd39b779a0337f521940f3f7b159931f1f207f200eefd610183781fdcf3dfafd ./object_searc
42d2952fc78b57302b0d12bc3d45790a2c2c46d4ffa3c713a82686134bd63f13 ./ocr_benchmark_runner.py
7b3ccb5c4ddd4c62c5ad04d0e3aafaecc2c1441012b6a98613cdcf055e2e50e8 ./ocr_processor_contract_v1.py
271023eec42d6be4a1ce6ae2ce3f29e825210a57e6bb37554a6f7fdf54616f9a ./ocr_processor_mps.py
e666bc8488bb93cc45bcd6a70a4ef38a74af6631d7b87a789381bfbdab4569f5 ./ocr_processor.py
2e73c41285e52ef013594fcd4d20df9f5781bfc26bcf62e54dd2c04ec44200c3 ./ocr_processor.py
62196108cb3337b5f9a873d70d2981ac8f49152369afbcc8a12b3a13de579e80 ./opencv_stamp_search.py
b2e8d552c272fd173c77693e9453a85fe16dfc12f7c2cd304d299c6188c14077 ./paligemma_vs_gdino.py
2c6767e763cf69917af832b8383528f754c65db5a3f02cb4d63e3f896d5920b6 ./parent_chunk_5w1h.py
1534d5b7617dbae77f7a37a2c33a89b90f965247a6828f00b73ea6b720f6f4fc ./parent_chunk_5w1h.py
5208c738d4b615282813d351daf09872ce516121bb604caa64968ef5e52c53d3 ./pipeline_checklist.py
8f80c3a2be5c330e2d1853d9250a171c75db84598dbf3304280c42237ed4fb1f ./pipeline_status.py
94db44c0f49115a677d117d4901a1b7991c1517905300eaa495dd62b8ac1c79c ./pose_processor_contract_v1.py
167dee5e42c6bd46674bcffcfd92f368fc0b48a1f42c459c806853b281bc6482 ./pose_processor_mps.py
a1cdb1efd992d229829ae156d8aa439347c51d664e2a606c14d2274a11c93a66 ./pose_processor.py
a6ef3a785ef5c6dc47fa38dbed80d76bc7d4bf48cbaf0f7edb3d26df98d7262c ./pose_processor.py
45e6798dc5900f2f7c8776a2d260c122aae5068a075256b8a5c02e8d0be6c131 ./probe_file.py
139a68b5915680ec697d4bb5420adbd20b89637de2c16a15d68aca4fc22da02b ./qa/executor.py
4a59b36c29e1ee6e2b169db3b0201d2f7088c6ccbfdf642a3b522aeb182bbeea ./qa/judges/facenet.py
@@ -197,7 +197,7 @@ c4e4424aad1847d822e9cf7dc98a1b2e903735a61e8ec056c6a9be75f79486bd ./qa/pipeline.
01c7b3c30c1531224f9605f0ee633285fe8489ab2d0a3c9c6a41f2b2b60d6626 ./quick_stamp_search.py
e3143673a2bff6139e05c82446fd8770c4b7e59a854a42c3b29662f5ac75efe2 ./rebuild_parents.py
4aa98981632d4f8a11039c510e86aa296ae1cd4b399fc871ed664ac11e445bd9 ./rebuild_story_content.py
45c437b412d34c7c6d5758e94b7205a2956b32b6fe170c3f56db7231ec6f5a15 ./redis_publisher.py
205cfc47b603b5ab94d97dae8c25486b342b7c2858afe6d6dae27615ca0b2aeb ./redis_publisher.py
750f778946b56bc57c47d9d2295332bb0f8cec2c1aa03c6b882d39ef4432673d ./refine_search.py
0f8a6a6866a5797e964d3b17e2b7ef146fe7a798f09fcea982fcda6f629b4d06 ./regenerate_parent_5w1h.py
3ee192b623f290136b36bd63abd018aad6e6639a9543970c3415734628b33bd6 ./register_sample_faces.py
@@ -303,7 +303,7 @@ d0ec8f4a67c1a1eb1356ad6e9b2f466575691bd336621cdbbfd31dd10159f2dc ./utils/test_m
ff98864f1b11795cc3bb64f30ccb6f8609771ddc7a5df2c003ba7c2233d16fc2 ./vectorize_chunks.py
5880c128400e6e36c8eb7dffd009dbbc99dd13f8575b0037bdc854e25ddc41fb ./video_comparison_statistics.py
0a1501ffdc027236cdf88706b3d61229e2998ab268fd57fb60e399ccb734b6a1 ./vision_agent.py
6831281de868d24ecd84151965909b57f895d534114d24300a81c396492c19f8 ./visual_chunk_processor.py
eac8f90fbbb655614abcefc4b887e346bf94db5f015d33d37bc9514fb030489d ./visual_chunk_processor.py
c165dfc5fc981dc731b25ef414184ee58e56b73b148d41a32fdce985c701efd5 ./visualize_stamp.py
6c65a82fdd1d585e20bee4fcb2d1bdec2e6220bda71d6ef9cd00d6a3cf74c4d7 ./voice_embedding_extractor.py
2b3a7b357db4ddd07ca30bf200c6600724e33441d8def0a4d9a39673e2cfb1c0 ./weather_sound_detector.py
@@ -343,3 +343,4 @@ b2ee4f8a445a7e83f7b99ae5d4139fd525d9e3e58a360bfef054d441aa21d901 ./swift_proces
fbca5ba0783153c4e21c174b0cbf75b582514f6ef0f92750a82d3178bc170f48 ./test_search_modes.sh
f8c1647cdb4db8adef1829e41fbecd97f6b3b2e62927f195cd8e68127876069d ./troubleshoot.sh
992296b5218f3ef97ce53325be12f71848f3c3aeb3ee81d764bfe4bd61e1de05 ./verify_package.sh
b6f95fa070cc0258bc5d005f10d13025ba8b08d3ee1598bcdad405ff1d3332ed ./tmdb_agent.py

View File

@@ -0,0 +1,84 @@
#!/opt/homebrew/bin/python3.11
"""
Extract face embedding from an image using InsightFace + CoreML FaceNet.
Usage:
python3 scripts/extract_face_embedding.py <image_path>
Output: JSON with "embedding" key (512 floats) or "error" key.
Exit code: 0 on success, 1 on failure.
"""
import json
import os
import sys
# Prefer venv if it exists (has insightface + coremltools installed)
VENV_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "venv")
VENV_SITE = os.path.join(VENV_PATH, "lib", "python3.11", "site-packages")
if os.path.isdir(VENV_SITE):
sys.path.insert(0, VENV_SITE)
import cv2
import numpy as np
MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "models")
FACENET_PATH = os.path.join(MODELS_DIR, "facenet512.mlpackage")
def extract_embedding(image_path: str):
import io
import warnings
warnings.filterwarnings("ignore")
# Suppress InsightFace verbose stdout during model loading
old_stdout = sys.stdout
sys.stdout = io.StringIO()
try:
import insightface
from insightface.app import FaceAnalysis
import coremltools as ct
app = FaceAnalysis(name="buffalo_l", providers=["CPUExecutionProvider"])
app.prepare(ctx_id=0, det_thresh=0.5)
coreml_model = ct.models.MLModel(FACENET_PATH)
finally:
sys.stdout = old_stdout
img_bytes = open(image_path, "rb").read()
nparr = np.frombuffer(img_bytes, np.uint8)
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if img is None:
print(json.dumps({"error": "Failed to decode image"}))
sys.exit(1)
# Detect faces
faces = app.get(img)
if not faces:
print(json.dumps({"error": "No face detected"}))
sys.exit(1)
largest = max(faces, key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]))
x1, y1, x2, y2 = [int(v) for v in largest.bbox]
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(img.shape[1], x2), min(img.shape[0], y2)
if x2 <= x1 or y2 <= y1:
print(json.dumps({"error": "Invalid face bbox"}))
sys.exit(1)
face_img = img[y1:y2, x1:x2]
face_img = cv2.resize(face_img, (160, 160))
normalized = (face_img.astype(np.float32) / 127.5) - 1.0
normalized = np.transpose(normalized, (2, 0, 1))
input_array = np.expand_dims(normalized, axis=0)
result = coreml_model.predict({"input": input_array})
emb_key = [k for k in result.keys() if k.startswith("var_")][0]
embedding = result[emb_key].flatten().tolist()
print(json.dumps({"embedding": embedding}))
if __name__ == "__main__":
if len(sys.argv) < 2:
print(json.dumps({"error": "Usage: extract_face_embedding.py <image_path>"}))
sys.exit(1)
extract_embedding(sys.argv[1])

View File

@@ -2,23 +2,30 @@
"""
Face landmark QC: verify eyes/nose are within face bounding box.
Flags faces in DB where landmarks don't match the bbox.
Usage: python3 face_landmark_qc.py <file_uuid> [--threshold 0.5] [--fix]
Usage: python3 face_landmark_qc.py <file_uuid> [--threshold 0.5] [--apply]
"""
import sys, json, psycopg2, argparse
import sys, json, psycopg2, argparse, os
parser = argparse.ArgumentParser()
parser.add_argument("uuid")
parser.add_argument("--threshold", "-t", type=float, default=0.5,
help="Fraction of landmark points that must be inside bbox (default: 0.5)")
parser.add_argument("--fix", action="store_true", help="Update face_detections QC flag in DB")
parser.add_argument("--apply", action="store_true",
help="Write qc_ok to face_detections.metadata in DB")
parser.add_argument("--schema", default="dev",
help="DB schema (default: dev)")
args = parser.parse_args()
UUID = args.uuid
THRESHOLD = args.threshold
FACE_PATH = f"/Users/accusys/momentry/output_dev/{UUID}.face.json"
SCHEMA = args.schema
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", f"/Users/accusys/momentry/output_dev")
FACE_PATH = f"{OUTPUT_DIR}/{UUID}.face.json"
print(f"=== Face Landmark QC ===")
print(f"UUID: {UUID}")
print(f"Schema: {SCHEMA}")
print(f"Face file: {FACE_PATH}")
print(f"Threshold: {THRESHOLD * 100:.0f}% points must be inside bbox")
# Load face.json
@@ -29,8 +36,7 @@ total_faces = 0
faces_with_lm = 0
good_faces = 0
bad_faces = 0
bad_frame_ids = set()
bad_face_details = []
qc_results = [] # list of (frame, face_idx, qc_ok, x, y, w, h)
# Build frame lookup for fast access
frame_map = {}
@@ -42,13 +48,22 @@ for frame_num, frm in frame_map.items():
total_faces += 1
lm = face.get('landmarks')
if not lm:
bbox = face.get('bbox', {})
qc_results.append((frame_num, fi, False, bbox.get('x'), bbox.get('y'),
bbox.get('width'), bbox.get('height')))
bad_faces += 1
continue
faces_with_lm += 1
x, y, w, h = face['x'], face['y'], face['width'], face['height']
bbox = face.get('bbox', {})
x, y, w, h = bbox.get('x'), bbox.get('y'), bbox.get('width'), bbox.get('height')
if None in (x, y, w, h):
qc_results.append((frame_num, fi, False, x, y, w, h))
bad_faces += 1
continue
inside_pts = 0
total_pts = 0
eye_nose_inside = 0 # at least one point from each eye+nose inside
eye_nose_inside = 0
for lm_type in ['left_eye', 'right_eye', 'nose']:
points = lm.get(lm_type, [])
@@ -63,53 +78,39 @@ for frame_num, frm in frame_map.items():
eye_nose_inside += 1
ratio = inside_pts / max(1, total_pts)
qc_ok = (ratio >= THRESHOLD and eye_nose_inside >= 2)
if ratio >= THRESHOLD and eye_nose_inside >= 2:
qc_results.append((frame_num, fi, qc_ok, x, y, w, h))
if qc_ok:
good_faces += 1
else:
bad_faces += 1
bad_frame_ids.add(frame_num)
bad_face_details.append({
'frame': frame_num,
'face_idx': fi,
'bbox': [x, y, w, h],
'inside_pts': inside_pts,
'total_pts': total_pts,
'ratio': ratio,
'eye_nose_ok': eye_nose_inside,
})
print(f"\nTotal faces: {total_faces:,}")
print(f"Faces with landmarks: {faces_with_lm:,}")
print(f"✅ Good (≥{THRESHOLD*100:.0f}% inside + ≥2 features): {good_faces:,}")
print(f"❌ Bad: {bad_faces:,}")
print(f"❌ Bad (no eyes or insufficient landmarks): {bad_faces:,}")
print(f"Quality pass rate: {100 * good_faces / max(1, faces_with_lm):.1f}%")
print(f"\nBad faces in {len(bad_frame_ids)} unique frames")
# Show sample bad faces
print(f"\nSample bad faces:")
for bf in sorted(bad_face_details, key=lambda b: b['ratio'])[:5]:
print(f" frame={bf['frame']}, bbox={bf['bbox']}, {bf['inside_pts']}/{bf['total_pts']} inside ({bf['ratio']*100:.0f}%), eye/nose={bf['eye_nose_ok']}/3")
# Show sample good faces
print(f"\nSample good faces:")
good_details = []
for frame_num, frm in frame_map.items():
for face in frm.get('faces', []):
lm = face.get('landmarks')
if not lm:
continue
x, y, w, h = face['x'], face['y'], face['width'], face['height']
inside = sum(1 for pts in lm.values() for pt in pts
if (x <= pt[0] <= x + w) and (y <= pt[1] <= y + h))
total = sum(len(pts) for pts in lm.values())
if inside / max(1, total) >= THRESHOLD:
good_details.append((frame_num, x, y, w, h, inside, total))
if len(good_details) >= 5:
break
if len(good_details) >= 5:
break
for g in good_details:
print(f" frame={g[0]}, bbox=[{g[1]},{g[2]},{g[3]},{g[4]}], {g[5]}/{g[6]} inside ({100*g[5]/max(1,g[6]):.0f}%)")
# Apply mode: write qc_ok to face_detections.metadata
if args.apply:
print(f"\n=== Applying QC results to {SCHEMA}.face_detections ===")
db_url = os.environ.get("DATABASE_URL", "postgres://accusys@localhost:5432/momentry")
conn = psycopg2.connect(db_url)
cur = conn.cursor()
updated = 0
for frame_num, fi, qc_ok, x, y, w, h in qc_results:
qc_str = "true" if qc_ok else "false"
cur.execute(
f"UPDATE {SCHEMA}.face_detections "
f"SET metadata = jsonb_set(COALESCE(metadata, '{{}}'::jsonb), '{{qc_ok}}', '\"{qc_str}\"'::jsonb) "
f"WHERE file_uuid = %s AND frame_number = %s AND x = %s AND y = %s AND width = %s AND height = %s",
(UUID, frame_num, x, y, w, h)
)
if cur.rowcount > 0:
updated += 1
conn.commit()
cur.close()
conn.close()
print(f"Updated {updated} rows in {SCHEMA}.face_detections")
print(f"Skipped {len(qc_results) - updated} rows (no matching face_detections row)")

View File

@@ -13,6 +13,7 @@ Detection cost: near-zero CPU (Vision ANE)
Embedding cost: near-zero CPU (CoreML ANE)
"""
import re
import sys
import os
import json
@@ -29,6 +30,7 @@ from pathlib import Path
import coremltools as ct
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from redis_publisher import RedisPublisher
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
SWIFT_BIN = os.path.join(SCRIPT_DIR, "swift_processors", ".build", "debug", "swift_face")
@@ -49,11 +51,12 @@ def classify_pose(roll: float, yaw: float) -> str:
class FaceProcessorVision:
def __init__(self, video_path: str, output_path: str, uuid: str = "",
sample_interval: int = 3):
sample_interval: int = 3, publisher: RedisPublisher = None):
self.video_path = video_path
self.output_path = output_path
self.uuid = uuid
self.sample_interval = sample_interval
self.publisher = publisher
# Load CoreML FaceNet
self.coreml_model = None
@@ -127,7 +130,33 @@ class FaceProcessorVision:
print(f"[FACE_V2] Running: {' '.join(cmd)}")
t0 = time.time()
subprocess.run(cmd, check=True)
log_path = swift_out + ".log"
log_f = open(log_path, "w")
proc = subprocess.Popen(cmd, stdout=log_f, stderr=subprocess.STDOUT, text=True)
last_pct = -1
while proc.poll() is None:
time.sleep(10)
# Read latest log lines
try:
with open(log_path) as lf:
for line in lf:
line = line.strip()
m = re.search(r'(\d+)% complete', line)
if m:
pct = int(m.group(1))
if pct > last_pct:
last_pct = pct
if self.publisher:
self.publisher.progress("face", pct, 100, f"swift detect {pct}%")
except Exception:
pass
log_f.close()
if proc.returncode != 0:
stderr_out = proc.stderr.read()
if stderr_out:
print(stderr_out.strip(), file=sys.stderr)
raise RuntimeError(f"swift_face exited with code {proc.returncode}")
elapsed = time.time() - t0
print(f"[FACE_V2] Detection done in {elapsed:.1f}s")
@@ -156,6 +185,8 @@ class FaceProcessorVision:
t0 = time.time()
embed_count = 0
total_face_count = 0
last_pct = -1
for frame_info in frames:
frame_num = frame_info["frame"]
@@ -220,6 +251,12 @@ class FaceProcessorVision:
if len(face_data["frames"]) % 100 == 0:
elapsed = time.time() - t0
print(f"[FACE_V2] {len(face_data['frames'])} frames, {embed_count} embeddings, {elapsed:.0f}s")
if self.publisher:
pct = int(len(face_data["frames"]) * 100 / max(len(frames), 1))
if pct > last_pct:
last_pct = pct
self.publisher.progress("face", len(face_data["frames"]), len(frames),
f"{embed_count} faces", embed_count, "faces")
self.video.release()
@@ -259,19 +296,36 @@ def main():
parser.add_argument("--force", action="store_true")
args = parser.parse_args()
publisher = RedisPublisher(args.uuid) if args.uuid else None
if publisher:
publisher.info("face", "FACE_START")
if args.force and os.path.exists(args.output_path):
os.remove(args.output_path)
processor = FaceProcessorVision(
args.video_path, args.output_path,
args.uuid, args.sample_interval
args.uuid, args.sample_interval, publisher
)
# Step 1: Vision detection (bbox + pose via ANE)
try:
detection = processor.process_with_swift()
except Exception as e:
if publisher:
publisher.error("face", f"Detection failed: {e}")
raise
# Step 2: CoreML embedding + save
try:
processor.embed_and_save(detection)
except Exception as e:
if publisher:
publisher.error("face", f"Embedding failed: {e}")
raise
if publisher:
publisher.complete("face", f"{len(detection.get('frames',[]))} frames")
# Clean up temp detection file
swift_out = args.output_path.replace(".json", "_detect.json")

View File

@@ -81,10 +81,10 @@ for cluster_id in sorted(set(labels)):
VALUES (%s, 'face', 'auto', 'active', NOW(), %s)
ON CONFLICT (name) DO UPDATE SET status = 'active', file_uuid = COALESCE(dev.identities.file_uuid, %s)
RETURNING id
""", (f"PERSON_{UUID[:8]}_{cluster_id}", UUID, UUID))
""", (f"stranger_{UUID}_{cluster_id}", UUID, UUID))
identity_id = cur.fetchone()[0]
cluster_to_identity[cluster_id] = identity_id
print(f" Cluster {cluster_id}: new identity {identity_id} (PERSON_{cluster_id})")
print(f" Cluster {cluster_id}: new identity {identity_id} (stranger_{UUID}_{cluster_id})")
# Step 4: Create identity bindings
print("Creating identity bindings...")

View File

@@ -0,0 +1,131 @@
#!/opt/homebrew/bin/python3.11
"""
Migrate Identity Files — one-time: DB identities → filesystem identity.json
Reads all identities from PostgreSQL, queries file bindings,
and writes identity.json + _index.json to {OUTPUT_DIR}/identities/{uuid}/
Usage:
python3 scripts/migrate_identity_files.py
python3 scripts/migrate_identity_files.py --db "dbname=momentry user=accusys"
python3 scripts/migrate_identity_files.py --output /path/to/output
"""
import argparse
import json
import os
from datetime import datetime, timezone
from pathlib import Path
import psycopg2
import psycopg2.extras
def main():
parser = argparse.ArgumentParser(description="Migrate identities to filesystem")
parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost"))
parser.add_argument("--output", default=os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output"))
args = parser.parse_args()
conn = psycopg2.connect(args.db)
identities_root = Path(args.output) / "identities"
identities_root.mkdir(parents=True, exist_ok=True)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
cur.execute("""
SELECT id, uuid::text, name, identity_type, source, status,
tmdb_id, tmdb_profile, metadata::text, created_at, updated_at
FROM identities
WHERE uuid IS NOT NULL
ORDER BY id
""")
rows = cur.fetchall()
if not rows:
print("No identities found in DB.")
return
index = {}
migrated = 0
skipped = 0
for row in rows:
uuid_raw = row["uuid"]
uuid_clean = uuid_raw.replace("-", "")
name = row["name"] or ""
dir_path = identities_root / uuid_clean
dir_path.mkdir(parents=True, exist_ok=True)
# Get bindings for this identity from face_detections
bindings_cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
bindings_cur.execute("""
SELECT fd.file_uuid,
COALESCE(array_agg(DISTINCT fd.trace_id) FILTER (WHERE fd.trace_id IS NOT NULL), '{}') AS trace_ids,
COUNT(*)::bigint AS face_count
FROM face_detections fd
WHERE fd.identity_id = %s
GROUP BY fd.file_uuid
ORDER BY fd.file_uuid
""", (row["id"],))
binding_rows = bindings_cur.fetchall()
bindings_cur.close()
file_bindings = []
for b in binding_rows:
trace_ids = b["trace_ids"]
if isinstance(trace_ids, list):
trace_ids = [int(t) for t in trace_ids if t is not None]
file_bindings.append({
"file_uuid": b["file_uuid"],
"trace_ids": trace_ids,
"face_count": int(b["face_count"]),
})
metadata = row.get("metadata")
if isinstance(metadata, str):
metadata = json.loads(metadata) if metadata else {}
elif metadata is None:
metadata = {}
fmt_time = lambda v: v.isoformat() if v else datetime.now(timezone.utc).isoformat()
identity_file = {
"version": 1,
"identity_uuid": uuid_clean,
"name": name,
"identity_type": row.get("identity_type"),
"source": row.get("source"),
"status": row.get("status"),
"tmdb_id": row.get("tmdb_id"),
"tmdb_profile": row.get("tmdb_profile"),
"metadata": metadata,
"file_bindings": file_bindings,
"created_at": fmt_time(row.get("created_at")),
"updated_at": fmt_time(row.get("updated_at")),
}
with open(dir_path / "identity.json", "w", encoding="utf-8") as f:
json.dump(identity_file, f, indent=2, ensure_ascii=False)
index[uuid_clean] = name
migrated += 1
print(f" [{migrated:5d}] {name} ({uuid_clean})")
cur.close()
conn.close()
# Write _index.json
index_file = {
"version": 1,
"updated_at": datetime.now(timezone.utc).isoformat(),
"entries": index,
}
with open(identities_root / "_index.json", "w", encoding="utf-8") as f:
json.dump(index_file, f, indent=2, ensure_ascii=False)
print(f"\nDone: {migrated} identities migrated")
print(f"Index: {identities_root / '_index.json'} ({len(index)} entries)")
if __name__ == "__main__":
main()

View File

@@ -4,6 +4,7 @@ OCR Processor Wrapper
Calls Swift Vision Framework OCR (swift_ocr) with fallback to PaddleOCR.
"""
import re
import sys
import json
import os
@@ -11,6 +12,10 @@ import subprocess
import argparse
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from redis_publisher import RedisPublisher
SWIFT_OCR_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"swift_processors/.build/debug/swift_ocr"
@@ -19,6 +24,7 @@ SWIFT_OCR_ALT = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"swift_processors/.build/arm64-apple-macosx/debug/swift_ocr"
)
SWIFT_PROGRESS_RE = re.compile(r"\[SwiftOCR\] Progress:\s*(\d+)%")
def process_ocr(
@@ -27,6 +33,7 @@ def process_ocr(
uuid: str = "",
sample_interval: int = 30,
recognition_level: str = "accurate",
publisher: RedisPublisher = None,
) -> dict:
swift_bin = SWIFT_OCR_PATH
if not os.path.exists(swift_bin):
@@ -42,15 +49,34 @@ def process_ocr(
"--uuid", uuid]
print(f"[OCR] Running Swift OCR", file=sys.stderr)
result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200)
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.stdout:
print(result.stdout.strip(), file=sys.stderr)
if result.stderr:
print(result.stderr.strip(), file=sys.stderr)
last_pct = -1
stdout_lines = []
for line in proc.stdout:
line = line.strip()
stdout_lines.append(line)
m = SWIFT_PROGRESS_RE.search(line)
if m:
pct = int(m.group(1))
if pct > last_pct:
last_pct = pct
print(f"[OCR] Progress: {pct}%", file=sys.stderr)
if publisher:
publisher.progress("ocr", pct, 100, f"{pct}%")
elif line:
print(line, file=sys.stderr)
if result.returncode != 0 or not os.path.exists(output_path):
print(f"[OCR] Swift OCR failed, falling back to PaddleOCR", file=sys.stderr)
stderr_output = proc.stderr.read()
if stderr_output:
print(stderr_output.strip(), file=sys.stderr)
proc.wait()
if proc.returncode != 0 or not os.path.exists(output_path):
print(f"[OCR] Swift OCR failed (exit={proc.returncode}), falling back to PaddleOCR", file=sys.stderr)
if publisher:
publisher.error("ocr", f"Swift OCR failed, using fallback")
return _fallback(video_path, output_path, uuid, sample_interval)
with open(output_path) as f:
@@ -81,9 +107,16 @@ if __name__ == "__main__":
parser.add_argument("--recognition-level", choices=["fast", "accurate"], default="accurate")
args = parser.parse_args()
publisher = RedisPublisher(args.uuid) if args.uuid else None
if publisher:
publisher.info("ocr", "OCR_START")
result = process_ocr(args.video_path, args.output_path, args.uuid,
args.sample_interval, args.recognition_level)
args.sample_interval, args.recognition_level,
publisher)
with open(args.output_path, "w") as f:
json.dump(result, f, indent=2)
print(f"OCR: {len(result.get('frames', []))} frames with text")
if publisher:
publisher.complete("ocr", f"{len(result.get('frames',[]))} frames")

View File

@@ -28,7 +28,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
DB_URL = os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
SCHEMA = os.getenv("DATABASE_SCHEMA", "dev")
OUTPUT_DIR = os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
OLLAMA_URL = "http://localhost:11434/api"
EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://localhost:11436/v1/embeddings")
def load_speaker_map(file_uuid: str) -> dict:
"""Load speaker→identity mapping from DB (generalized, not hardcoded)"""
@@ -64,7 +64,7 @@ CURRENT_VERSIONS = {
"embedding_agent": "nomic-embed-768d/v1",
}
LLM_URL = os.getenv("MOMENTRY_LLM_SUMMARY_URL", "http://127.0.0.1:8081/v1/chat/completions")
LLM_URL = os.getenv("MOMENTRY_LLM_URL", os.getenv("MOMENTRY_LLM_SUMMARY_URL", "http://127.0.0.1:8082/v1/chat/completions"))
LLM_MODEL = os.getenv("MOMENTRY_LLM_SUMMARY_MODEL", "gemma4")
@@ -97,7 +97,7 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
s, e = cs["start_time"], cs["end_time"]
children = []
for seg in asr_segs:
for seg_idx, seg in enumerate(asr_segs):
st, en = seg.get("start", 0), seg.get("end", 0)
text = seg.get("text", "").strip()
if st < s or en > e or not text: continue
@@ -117,11 +117,11 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
"start": st, "end": en, "text": text,
"speaker_id": spk_id, "speaker_name": character,
"speaker_confidence": spk_conf,
"chunk_id": f"{file_uuid}_{st:.0f}_{en:.0f}",
"chunk_id": f"{file_uuid}_{seg_idx}",
})
# Boundary overlap: even empty scenes get partial children
for seg in asr_segs:
for seg_idx, seg in enumerate(asr_segs):
st, en = seg.get("start", 0), seg.get("end", 0)
text = seg.get("text", "").strip()
if not text: continue
@@ -141,7 +141,7 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
"start": st, "end": en, "text": text,
"speaker_id": spk_id, "speaker_name": character,
"speaker_confidence": spk_conf,
"chunk_id": f"{file_uuid}_{st:.0f}_{en:.0f}",
"chunk_id": f"{file_uuid}_{seg_idx}",
"overlap_type": "partial",
})
@@ -215,14 +215,17 @@ def generate_llm_child_summary(child: dict, parent_summary: str) -> Optional[str
# ===== Embedding (Ollama nomic-embed) =====
def embed_text(text: str, max_retries: int = 3) -> Optional[List[float]]:
"""Get embedding via Ollama nomic-embed-text"""
"""Get embedding via EmbeddingGemma server"""
for attempt in range(max_retries):
try:
resp = requests.post(f"{OLLAMA_URL}/embeddings", json={
"model": "nomic-embed-text-v2-moe", "prompt": text,
resp = requests.post(EMBEDDING_URL, json={
"input": [text],
}, timeout=30)
if resp.status_code == 200:
return resp.json()["embedding"]
data = resp.json()
items = data.get("data", [])
if items:
return items[0]["embedding"]
except Exception as e:
if attempt == max_retries - 1:
print(f" ⚠️ Embedding failed: {e}")
@@ -244,7 +247,7 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool,
# Get base chunk_index
cur.execute(
f"SELECT COALESCE(MAX(chunk_index), 0) FROM {SCHEMA}.chunks WHERE file_uuid = %s",
f"SELECT COALESCE(MAX(chunk_index), 0) FROM {SCHEMA}.chunk WHERE file_uuid = %s",
(file_uuid,),
)
next_index = (cur.fetchone()[0] or 0) + 1
@@ -255,9 +258,27 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool,
parent_id = f"{mode}_parent_{file_uuid}_{scene['start_time']:.0f}_{scene['end_time']:.0f}"
parent_embedding = embed_text(parent_text) if do_embed else None
if do_embed and parent_embedding:
cur.execute(
f"""
INSERT INTO {SCHEMA}.chunks (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
start_time, end_time, content, text_content, parent_chunk_id, embedding)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector)
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
embedding = EXCLUDED.embedding
""",
(parent_id, parent_id, file_uuid, parent_type, next_index,
scene["start_time"], scene["end_time"],
json.dumps({"summary": parent_text, "mode": mode, "type": "parent",
"source_versions": CURRENT_VERSIONS}),
parent_text, None, parent_embedding),
)
else:
cur.execute(
f"""
INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
start_time, end_time, content, text_content, parent_chunk_id)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s)
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
@@ -276,9 +297,29 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool,
child_id = child["chunk_id"]
child_text = generate_story_child_summary(child, parent_text) if mode == "story" else generate_llm_child_summary(child, parent_text)
child_embedding = embed_text(child_text) if do_embed else None
if do_embed and child_embedding:
cur.execute(
f"""
INSERT INTO {SCHEMA}.chunks (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
start_time, end_time, content, text_content, parent_chunk_id, embedding)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector)
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
parent_chunk_id = EXCLUDED.parent_chunk_id,
embedding = EXCLUDED.embedding
""",
(child_id, child_id, file_uuid, child_type, next_index,
child["start"], child["end"],
json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode,
"speaker_confidence": child.get("speaker_confidence", 0),
"source_versions": CURRENT_VERSIONS}),
child_text, parent_id, child_embedding),
)
else:
cur.execute(
f"""
INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
start_time, end_time, content, text_content, parent_chunk_id)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s)
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
@@ -304,7 +345,7 @@ def main():
parser = argparse.ArgumentParser(description="Story Processor V2.0")
parser.add_argument("--file-uuid", required=True)
parser.add_argument("--mode", choices=["story", "llm"], default="story")
parser.add_argument("--max-scenes", type=int, default=300)
parser.add_argument("--max-scenes", type=int, default=99999)
parser.add_argument("--embed", action="store_true", help="Generate embeddings (Ollama)")
parser.add_argument("--no-db", action="store_true", help="Skip DB storage")
args = parser.parse_args()

View File

@@ -5,12 +5,16 @@ Calls Swift Vision Framework pose (swift_pose) with fallback to YOLOv8 Pose.
Uses VNDetectHumanBodyPoseRequest with ANE acceleration.
"""
import re
import sys
import json
import os
import subprocess
import argparse
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from redis_publisher import RedisPublisher
SWIFT_POSE_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"swift_processors/.build/debug/swift_pose"
@@ -21,11 +25,14 @@ SWIFT_POSE_ALT = os.path.join(
)
SWIFT_POSE_PROGRESS_RE = re.compile(r"\[SwiftPose\] Progress:\s*(\d+)%")
def process_pose(
video_path: str,
output_path: str,
uuid: str = "",
sample_interval: int = 30,
publisher: RedisPublisher = None,
) -> dict:
swift_bin = SWIFT_POSE_PATH
if not os.path.exists(swift_bin):
@@ -33,6 +40,8 @@ def process_pose(
if not os.path.exists(swift_bin):
print("[Pose] Swift binary not found, using YOLOv8 fallback", file=sys.stderr)
if publisher:
publisher.error("pose", "Swift binary not found, using fallback")
return _fallback(video_path, output_path, uuid, sample_interval)
cmd = [swift_bin, video_path, output_path,
@@ -40,17 +49,32 @@ def process_pose(
"--uuid", uuid]
print(f"[Pose] Running Swift Pose (Vision Framework)", file=sys.stderr)
result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200)
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.stdout:
for line in result.stdout.strip().split("\n"):
print(f" {line}", file=sys.stderr)
if result.stderr:
for line in result.stderr.strip().split("\n"):
last_pct = -1
for line in proc.stdout:
line = line.strip()
m = SWIFT_POSE_PROGRESS_RE.search(line)
if m:
pct = int(m.group(1))
if pct > last_pct:
last_pct = pct
print(f"[Pose] Progress: {pct}%", file=sys.stderr)
if publisher:
publisher.progress("pose", pct, 100, f"{pct}%")
elif line:
print(f" {line}", file=sys.stderr)
if result.returncode != 0 or not os.path.exists(output_path):
print(f"[Pose] Swift Pose failed, falling back to YOLOv8", file=sys.stderr)
stderr_output = proc.stderr.read()
if stderr_output:
print(stderr_output.strip(), file=sys.stderr)
proc.wait()
if proc.returncode != 0 or not os.path.exists(output_path):
print(f"[Pose] Swift Pose failed (exit={proc.returncode}), falling back to YOLOv8", file=sys.stderr)
if publisher:
publisher.error("pose", f"Swift Pose failed, using fallback")
return _fallback(video_path, output_path, uuid, sample_interval)
with open(output_path) as f:
@@ -113,7 +137,14 @@ if __name__ == "__main__":
parser.add_argument("--sample-interval", type=int, default=30)
args = parser.parse_args()
result = process_pose(args.video_path, args.output_path, args.uuid, args.sample_interval)
publisher = RedisPublisher(args.uuid) if args.uuid else None
if publisher:
publisher.info("pose", "POSE_START")
result = process_pose(args.video_path, args.output_path, args.uuid,
args.sample_interval, publisher)
with open(args.output_path, "w") as f:
json.dump(result, f, indent=2)
print(f"Pose: {len(result.get('frames', []))} frames with poses")
if publisher:
publisher.complete("pose", f"{len(result.get('frames',[]))} frames")

View File

@@ -34,6 +34,8 @@ class ProgressData:
message: Optional[str] = None
current: Optional[int] = None
total: Optional[int] = None
output_count: Optional[int] = None
output_type: Optional[str] = None
extra: Optional[Dict[str, Any]] = None
@@ -49,7 +51,8 @@ class StructuredMessage:
class RedisPublisher:
def __init__(self, uuid: str):
self.uuid = uuid
self.channel = f"momentry:progress:{uuid}"
prefix = os.environ.get("MOMENTRY_REDIS_PREFIX", "momentry:")
self.channel = f"{prefix}progress:{uuid}"
self._enabled = False
self._client = None
self._connect()
@@ -107,6 +110,8 @@ class RedisPublisher:
message: Optional[str] = None,
current: Optional[int] = None,
total: Optional[int] = None,
output_count: Optional[int] = None,
output_type: Optional[str] = None,
extra: Optional[Dict[str, Any]] = None,
) -> bool:
if not self._enabled:
@@ -121,6 +126,8 @@ class RedisPublisher:
message=message,
current=current,
total=total,
output_count=output_count,
output_type=output_type,
extra=extra,
),
)
@@ -136,6 +143,8 @@ class RedisPublisher:
current: int,
total: int,
message: str = "",
output_count: Optional[int] = None,
output_type: Optional[str] = None,
) -> bool:
return self.publish(
MessageType.PROGRESS,
@@ -143,6 +152,8 @@ class RedisPublisher:
message=message,
current=current,
total=total,
output_count=output_count,
output_type=output_type,
)
def complete(self, processor: str, message: str = "") -> bool:

View File

@@ -0,0 +1,117 @@
#!/opt/homebrew/bin/python3.11
"""
Sync users from SFTPGo to Momentry users table.
Usage:
python3 scripts/sync_users_from_sftpgo.py
python3 scripts/sync_users_from_sftpgo.py --sftpgo-url http://localhost:8080
python3 scripts/sync_users_from_sftpgo.py --db "dbname=momentry user=accusys"
Environment:
SFTPGO_BASE_URL Default: http://localhost:8080
DATABASE_URL Default: dbname=momentry user=accusys host=localhost
This script does NOT copy passwords. It creates user records with placeholder
password hashes. The real password will be captured on the user's first
login through Momentry (which verifies against SFTPGo and caches the hash).
"""
import argparse
import json
import os
import sys
from typing import Any
import psycopg2
import psycopg2.extras
import requests
def get_sftpgo_users(sftpgo_url: str, admin_user: str, admin_pass: str) -> list[dict[str, Any]]:
"""Get all users from SFTPGo."""
# Get admin token (SFTPGo uses GET, not POST)
token_url = f"{sftpgo_url}/api/v2/token"
resp = requests.get(token_url, auth=(admin_user, admin_pass), timeout=10)
resp.raise_for_status()
token = resp.json().get("access_token")
if not token:
print("ERROR: Failed to get SFTPGo admin token", file=sys.stderr)
sys.exit(1)
# List users
users_url = f"{sftpgo_url}/api/v2/users"
headers = {"Authorization": f"Bearer {token}"}
resp = requests.get(users_url, headers=headers, timeout=10)
resp.raise_for_status()
return resp.json()
def main():
parser = argparse.ArgumentParser(description="Sync SFTPGo users to Momentry")
parser.add_argument("--sftpgo-url", default=os.getenv("SFTPGO_BASE_URL", "http://localhost:8080"))
parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost"))
parser.add_argument("--admin-user", default="admin")
parser.add_argument("--admin-pass", default=os.getenv("SFTPGO_ADMIN_PASSWORD", "Test3200Test3200"))
parser.add_argument("--dry-run", action="store_true", help="Print what would be done without executing")
args = parser.parse_args()
# Fetch users from SFTPGo
print(f"[SFTPGo] Connecting to {args.sftpgo_url}...")
try:
sftpgo_users = get_sftpgo_users(args.sftpgo_url, args.admin_user, args.admin_pass)
except Exception as e:
print(f"ERROR: Failed to fetch SFTPGo users: {e}", file=sys.stderr)
sys.exit(1)
print(f"[SFTPGo] Found {len(sftpgo_users)} users")
# Connect to Momentry DB and set schema
conn = psycopg2.connect(args.db)
cur = conn.cursor()
cur.execute("SET search_path TO dev")
synced = 0
skipped = 0
for user in sftpgo_users:
username = user.get("username")
status = user.get("status", 0)
if not username or status != 1:
skipped += 1
continue
role = "admin" if username == "admin" else "user"
# Placeholder hash — will be updated on first login via SFTPGo fallback
placeholder_hash = "$placeholder$synced_from_sftpgo"
if args.dry_run:
print(f" Would insert: {username} (role={role})")
synced += 1
continue
try:
cur.execute(
"INSERT INTO users (username, password_hash, role) VALUES (%s, %s, %s) "
"ON CONFLICT (username) DO NOTHING",
(username, placeholder_hash, role),
)
if cur.rowcount > 0:
print(f"{username} (role={role})")
synced += 1
else:
print(f" ⏭️ {username} already exists, skipped")
skipped += 1
except Exception as e:
print(f"{username}: {e}", file=sys.stderr)
skipped += 1
conn.commit()
cur.close()
conn.close()
print(f"\nDone: {synced} synced, {skipped} skipped/errors")
print("Note: Password hashes are placeholders. First login via Momentry will cache the real hash.")
if __name__ == "__main__":
main()

285
scripts/tmdb_agent.py Normal file
View File

@@ -0,0 +1,285 @@
#!/opt/homebrew/bin/python3.11
"""
TMDb Agent — pre-fetch TMDb data and write directly to identity files.
Usage:
python3 scripts/tmdb_agent.py --file-uuid <uuid>
python3 scripts/tmdb_agent.py --file-uuid <uuid> --db "dbname=momentry user=accusys"
Environment:
TMDB_API_KEY Required. TMDb API key.
MOMENTRY_OUTPUT_DIR Default: /Users/accusys/momentry/output
DATABASE_URL Default: dbname=momentry user=accusys host=localhost
Flow:
1. Query videos table for file_name
2. Extract movie name from filename
3. TMDB /search/movie → find best match
4. TMDB /movie/{id}/credits → fetch cast
5. TMDB /person/{id} → fetch person details
6. Write {OUTPUT}/identities/{uuid}/identity.json + profile.jpg for each cast member
7. Write {OUTPUT}/{uuid}.tmdb.json cache (movie info + identity uuid list)
"""
import argparse
import hashlib
import json
import os
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
import requests
import psycopg2
import psycopg2.extras
TMDB_BASE = "https://api.themoviedb.org/3"
TMDB_API_KEY = os.getenv("TMDB_API_KEY")
def extract_movie_name(filename: str) -> str | None:
"""Extract movie name from filename (e.g. 'Charade_1963.mp4''Charade 1963')"""
name = Path(filename).stem
cleaned = re.sub(r'[._]', ' ', name).strip()
# Strip text after separators like |, (, [, {
for sep in ('|', '(', '[', '{', '\u2502'):
idx = cleaned.find(sep)
if idx > 0:
cleaned = cleaned[:idx].strip()
# Strip common suffixes (quality, format, source, etc.)
suffixes = (
r'\d{3,4}p', r'\d{3,4}x\d{3,4}', r'\d+fps', r'bluray', r'web[ -]?dl',
r'webrip', r'hdrip', r'dvdrip', r'dvd', r'brrip', r'hdtv', r'xvid',
r'x264', r'h264', r'x265', r'h265', r'hevc', r'aac', r'mp3', r'ac3',
r'dts', r'5\.1', r'7\.1', r'dual[ -]?audio', r'multi[ -]?sub',
r'proper', r'repack', r'extended', r'unrated', r'directors[ -]?cut',
r'theatrical', r'internal', r'limited', r'complete', r'full[ -]?movie',
r'english', r'french', r'spanish', r'german', r'chinese',
r'youtube', r'yify', r'ettv', r'rarbg', r'tgx', r'axxo', r'ctrlhd',
)
pattern = r'\b(?:' + '|'.join(suffixes) + r')\b'
cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE).strip()
# Collapse multiple spaces
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
return cleaned if len(cleaned) >= 3 else None
def search_movie(query: str) -> dict | None:
"""Search TMDB for a movie by name. Returns first result."""
url = f"{TMDB_BASE}/search/movie"
params = {"query": query, "api_key": TMDB_API_KEY, "language": "en-US", "page": 1}
try:
resp = requests.get(url, params=params, timeout=15)
resp.raise_for_status()
results = resp.json().get("results", [])
return results[0] if results else None
except Exception as e:
print(f"TMDB search failed: {e}", file=sys.stderr)
return None
def get_credits(movie_id: int) -> list[dict]:
"""Get cast credits for a movie from TMDB."""
url = f"{TMDB_BASE}/movie/{movie_id}/credits"
params = {"api_key": TMDB_API_KEY, "language": "en-US"}
try:
resp = requests.get(url, params=params, timeout=15)
resp.raise_for_status()
return resp.json().get("cast", [])
except Exception as e:
print(f"TMDB credits failed: {e}", file=sys.stderr)
return []
def get_person_details(person_id: int) -> dict:
"""Fetch person details from TMDB /person/{id}."""
url = f"{TMDB_BASE}/person/{person_id}"
params = {"api_key": TMDB_API_KEY, "language": "en-US"}
try:
resp = requests.get(url, params=params, timeout=15)
resp.raise_for_status()
data = resp.json()
return {
"biography": data.get("biography"),
"birthday": data.get("birthday"),
"place_of_birth": data.get("place_of_birth"),
"also_known_as": data.get("also_known_as", []),
"imdb_id": data.get("imdb_id"),
"known_for_department": data.get("known_for_department"),
"popularity": data.get("popularity"),
"deathday": data.get("deathday"),
"gender": data.get("gender"),
"homepage": data.get("homepage"),
}
except Exception as e:
print(f"TMDB person details failed for {person_id}: {e}", file=sys.stderr)
return {}
def main():
parser = argparse.ArgumentParser(description="TMDb Agent — pre-fetch cache")
parser.add_argument("--file-uuid", required=True, help="File UUID to enrich")
parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost"))
parser.add_argument("--output", default=os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output"))
args = parser.parse_args()
if not TMDB_API_KEY:
print("ERROR: TMDB_API_KEY not set.", file=sys.stderr)
sys.exit(1)
# 1. Query DB for file_name
schema = os.getenv("DATABASE_SCHEMA", "").strip()
table = f"{schema}.videos" if schema else "videos"
conn = psycopg2.connect(args.db)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
cur.execute(f"SELECT file_name FROM {table} WHERE file_uuid = %s", (args.file_uuid,))
row = cur.fetchone()
cur.close()
conn.close()
if not row:
print(f"ERROR: File not found: {args.file_uuid}", file=sys.stderr)
sys.exit(1)
file_name = row["file_name"]
print(f"[TKG-AGENT] File: {file_name} ({args.file_uuid})")
# 2. Extract movie name
movie_name = extract_movie_name(file_name)
if not movie_name:
print(f"ERROR: Cannot extract movie name from: {file_name}", file=sys.stderr)
sys.exit(1)
print(f"[TKG-AGENT] Extracted movie name: '{movie_name}'")
# 3. Search TMDB
movie = search_movie(movie_name)
if not movie:
print(f"ERROR: No TMDB movie found for: {movie_name}", file=sys.stderr)
sys.exit(1)
print(f"[TKG-AGENT] Matched: {movie['title']} (TMDB id={movie['id']})")
# 4. Fetch credits
cast = get_credits(movie["id"])
if not cast:
print(f"WARN: No cast data found for movie {movie['id']}", file=sys.stderr)
# 5. Enrich each cast member with person details and write identity files
output = Path(args.output)
identities_root = output / "identities"
identities_root.mkdir(parents=True, exist_ok=True)
now = datetime.now(timezone.utc).isoformat()
created_identities = []
for i, m in enumerate(cast):
person_id = m["id"]
person = get_person_details(person_id)
# Generate deterministic UUID: SHA256("tmdb-{movie_id}-{person_id}-{name}")
uuid_raw = hashlib.sha256(f"tmdb-{movie['id']}-{person_id}-{m['name']}".encode()).hexdigest()[:32]
profile_url = (
f"https://image.tmdb.org/t/p/w185{m['profile_path']}"
if m.get("profile_path") else None
)
# Build identity.json
metadata = {
"tmdb_character": m.get("character", ""),
"tmdb_cast_order": i,
"tmdb_movie_id": movie["id"],
"tmdb_movie_title": movie["title"],
"tmdb_biography": person.get("biography"),
"tmdb_birthday": person.get("birthday"),
"tmdb_place_of_birth": person.get("place_of_birth"),
"tmdb_aliases": person.get("also_known_as", []),
"tmdb_imdb_id": person.get("imdb_id"),
"tmdb_department": person.get("known_for_department"),
"tmdb_popularity": person.get("popularity"),
"tmdb_deathday": person.get("deathday"),
"tmdb_gender": person.get("gender"),
"tmdb_homepage": person.get("homepage"),
}
identity = {
"version": 1,
"identity_uuid": uuid_raw,
"name": m["name"],
"identity_type": "people",
"source": "tmdb",
"status": "confirmed",
"tmdb_id": person_id,
"tmdb_profile": profile_url,
"metadata": {k: v for k, v in metadata.items() if v is not None or k == "tmdb_aliases"},
"file_bindings": [],
"created_at": now,
"updated_at": now,
}
# Write identity.json
identity_dir = identities_root / uuid_raw
identity_dir.mkdir(parents=True, exist_ok=True)
identity_path = identity_dir / "identity.json"
with open(identity_path, "w", encoding="utf-8") as f:
json.dump(identity, f, indent=2, ensure_ascii=False)
# Download profile.jpg
if profile_url:
img_path = identity_dir / "profile.jpg"
if not img_path.exists():
try:
resp = requests.get(profile_url, timeout=15)
if resp.status_code == 200:
img_path.write_bytes(resp.content)
except Exception as e:
print(f" [WARN] Failed to download profile for {m['name']}: {e}", file=sys.stderr)
created_identities.append({
"identity_uuid": uuid_raw,
"name": m["name"],
"tmdb_id": person_id,
"character": m.get("character", ""),
"order": i,
})
if (i + 1) % 5 == 0:
print(f"[TKG-AGENT] Wrote {i+1}/{len(cast)} identity files")
# Update _index.json
index_path = identities_root / "_index.json"
index = {}
if index_path.exists():
with open(index_path) as f:
index = json.load(f)
for ci in created_identities:
index[ci["identity_uuid"]] = ci["name"]
with open(index_path, "w", encoding="utf-8") as f:
json.dump(index, f, indent=2, ensure_ascii=False)
# Write movie cache ({uuid}.tmdb.json) — simplified, no per-person data
cache = {
"file_uuid": args.file_uuid,
"fetched_at": now,
"source": "agent",
"movie": {
"tmdb_id": movie["id"],
"title": movie["title"],
"release_date": movie.get("release_date"),
"overview": movie.get("overview"),
"poster_path": movie.get("poster_path"),
},
"cast_count": len(cast),
"identities_created": len(created_identities),
"identities": created_identities,
}
cache_path = output / f"{args.file_uuid}.tmdb.json"
with open(cache_path, "w", encoding="utf-8") as f:
json.dump(cache, f, indent=2, ensure_ascii=False)
print(f"[TKG-AGENT] Cache written: {cache_path}")
print(f"[TKG-AGENT] Identity files: {len(created_identities)} cast members → {identities_root}/")
if __name__ == "__main__":
main()

View File

@@ -384,6 +384,7 @@ def main():
parser.add_argument("video_path", help="視頻文件路徑")
parser.add_argument("output_path", help="輸出文件路徑")
parser.add_argument("--yolo-result", help="YOLO 結果文件路徑(可選)")
parser.add_argument("--uuid", help="檔案 UUID由 executor 傳入)")
parser.add_argument(
"--strategy", choices=["fixed", "similarity"], default="fixed", help="分片策略"
)

View File

@@ -57,12 +57,7 @@ async fn translate_text(
"temperature": 0.1
});
let response = client
.post(llm_url)
.json(&body)
.send()
.await
.map_err(|e| {
let response = client.post(llm_url).json(&body).send().await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to call LLM: {}", e),

View File

@@ -97,17 +97,25 @@ struct SceneSummaryResult {
fn llm_base_url() -> String {
let v = std::env::var("MOMENTRY_LLM_URL");
if v.is_ok() { return v.unwrap(); }
if v.is_ok() {
return v.unwrap();
}
let v = std::env::var("MOMENTRY_LLM_SUMMARY_URL");
if v.is_ok() { return v.unwrap(); }
if v.is_ok() {
return v.unwrap();
}
"http://localhost:8082/v1/chat/completions".to_string()
}
fn llm_model() -> String {
let v = std::env::var("MOMENTRY_LLM_MODEL");
if v.is_ok() { return v.unwrap(); }
if v.is_ok() {
return v.unwrap();
}
let v = std::env::var("MOMENTRY_LLM_SUMMARY_MODEL");
if v.is_ok() { return v.unwrap(); }
if v.is_ok() {
return v.unwrap();
}
"google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string()
}
@@ -115,7 +123,7 @@ fn llm_model() -> String {
async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Vec<CutScene>> {
let table = schema::table_name("chunk");
sqlx::query_as::<_, (String, i64, i64, f64, f64, f64, serde_json::Value, serde_json::Value, Option<String>)>(&format!(
sqlx::query_as::<_, (String, i64, i64, f64, Option<f64>, Option<f64>, serde_json::Value, Option<serde_json::Value>, Option<String>)>(&format!(
r#"SELECT chunk_id, start_frame, end_frame, fps, start_time, end_time, content, metadata, summary_text
FROM {} WHERE file_uuid = $1 AND chunk_type = 'cut' ORDER BY start_frame"#, table
))
@@ -123,7 +131,8 @@ async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Ve
.fetch_all(db.pool()).await?
.into_iter().map(|r| Ok(CutScene {
chunk_id: r.0, start_frame: r.1, end_frame: r.2,
fps: r.3, start_time: r.4, end_time: r.5, content: r.6, metadata: r.7, summary_text: r.8,
fps: r.3, start_time: r.4.unwrap_or(0.0), end_time: r.5.unwrap_or(0.0),
content: r.6, metadata: r.7.unwrap_or(serde_json::json!({})), summary_text: r.8,
})).collect()
}
@@ -133,7 +142,7 @@ async fn fetch_sentences_in_scene(
cut: &CutScene,
) -> anyhow::Result<Vec<SentenceChunk>> {
let table = schema::table_name("chunk");
sqlx::query_as::<_, (String, String, f64, f64, i64, i64, serde_json::Value)>(&format!(
sqlx::query_as::<_, (String, String, Option<f64>, Option<f64>, i64, i64, serde_json::Value)>(&format!(
r#"SELECT chunk_id, COALESCE(text_content,''), start_time, end_time, start_frame, end_frame, content
FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence'
AND start_time >= $2 AND end_time <= $3 ORDER BY start_time"#, table
@@ -141,7 +150,7 @@ async fn fetch_sentences_in_scene(
.bind(file_uuid).bind(cut.start_time).bind(cut.end_time)
.fetch_all(db.pool()).await?
.into_iter().map(|r| Ok(SentenceChunk {
chunk_id: r.0, text: r.1, start_time: r.2, end_time: r.3,
chunk_id: r.0, text: r.1, start_time: r.2.unwrap_or(0.0), end_time: r.3.unwrap_or(0.0),
start_frame: r.4, end_frame: r.5, content: r.6,
})).collect()
}
@@ -540,10 +549,7 @@ async fn analyze_5w1h(
if let Some(ref t) = cut.summary_text {
if t.len() > 20 {
processed += 1;
prev_context.push(format!(
"Scene (t={:.0}s): {}",
cut.start_time, t
));
prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
continue;
}
}
@@ -621,10 +627,7 @@ async fn batch_analyze_5w1h(
if let Some(ref t) = cut.summary_text {
if t.len() > 20 {
processed += 1;
prev_context.push(format!(
"Scene (t={:.0}s): {}",
cut.start_time, t
));
prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
continue;
}
}
@@ -713,10 +716,7 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<
if let Some(ref t) = cut.summary_text {
if t.len() > 20 {
processed += 1;
prev_context.push(format!(
"Scene (t={:.0}s): {}",
cut.start_time, t
));
prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
continue;
}
}
@@ -764,38 +764,44 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<
qdrant.init_collection(768).await?;
let chunk_table = schema::table_name("chunk");
let rows = sqlx::query_as::<_, (String, String, String, f64, f64)>(
&format!("SELECT chunk_id, chunk_type, text_content, start_time, end_time \
let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64)>(&format!(
"SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time \
FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL \
AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
)
AND (text_content IS NOT NULL AND text_content != '') ORDER BY id",
chunk_table
))
.bind(file_uuid)
.fetch_all(db.pool())
.await?;
let total_vec = rows.len();
let mut stored = 0usize;
for (chunk_id, _ctype, text, start_time, end_time) in &rows {
for (chunk_id, _ctype, text, start_frame, end_frame, start_time, end_time) in &rows {
let text = text.trim();
if text.is_empty() || text.len() < 5 {
continue;
}
match embedder.embed_document(text).await {
Ok(vector) => {
if let Err(e) = sqlx::query(
&format!("UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3", chunk_table)
)
if let Err(e) = sqlx::query(&format!(
"UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3",
chunk_table
))
.bind(&vector as &[f32])
.bind(chunk_id)
.bind(file_uuid)
.execute(db.pool()).await {
.execute(db.pool())
.await
{
tracing::error!("[Vectorize] PG failed for {}: {}", chunk_id, e);
continue;
}
let payload = VectorPayload {
uuid: file_uuid.to_string(),
file_uuid: file_uuid.to_string(),
chunk_id: chunk_id.clone(),
chunk_type: "sentence".to_string(),
start_frame: *start_frame,
end_frame: *end_frame,
start_time: *start_time,
end_time: *end_time,
text: Some(text.to_string()),

View File

@@ -93,16 +93,15 @@ async fn create_identity(
})?;
let id_table = crate::core::db::schema::table_name("identities");
let name_col = if id_table.starts_with("dev.") { "name" } else { "real_name" };
let query = format!(
"SELECT uuid, reference_data->'total_references' as total,
reference_data->'angles_covered' as angles,
reference_data->'quality_avg' as quality
FROM {}
WHERE {} = $1
WHERE name = $1
ORDER BY created_at DESC
LIMIT 1",
id_table, name_col
id_table
);
let row: Option<(String, Option<i32>, Option<Vec<String>>, Option<f64>)> =
@@ -168,11 +167,19 @@ async fn list_identities(
let id_table = crate::core::db::schema::table_name("identities");
let total: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", id_table))
.fetch_one(db.pool()).await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Count error: {}", e)))?;
.fetch_one(db.pool())
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Count error: {}", e),
)
})?;
let name_col = if id_table.starts_with("dev.") { "name" } else { "real_name" };
let sql = format!("SELECT id::int, uuid, {} AS name, metadata FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2", name_col, id_table);
let sql = format!(
"SELECT id::int, uuid, name, metadata FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2",
id_table
);
let rows: Vec<(i32, uuid::Uuid, String, Option<serde_json::Value>)> = match sqlx::query_as(&sql)
.bind(page_size as i64)
@@ -200,12 +207,25 @@ async fn list_identities(
.collect();
let identities_table = crate::core::db::schema::table_name("identities");
let total_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", identities_table))
.fetch_one(db.pool()).await.unwrap_or(0);
let tmdb_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", identities_table))
.fetch_one(db.pool()).await.unwrap_or(0);
let auto_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE source = 'auto'", identities_table))
.fetch_one(db.pool()).await.unwrap_or(0);
let total_identities: i64 =
sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", identities_table))
.fetch_one(db.pool())
.await
.unwrap_or(0);
let tmdb_identities: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE source = 'tmdb'",
identities_table
))
.fetch_one(db.pool())
.await
.unwrap_or(0);
let auto_identities: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE source = 'auto'",
identities_table
))
.fetch_one(db.pool())
.await
.unwrap_or(0);
Ok(Json(IdentityListResponse {
identities,

View File

@@ -15,8 +15,14 @@ use crate::core::db::PostgresDb;
pub fn identity_agent_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/agents/identity/match-from-photo", post(match_from_photo))
.route("/api/v1/agents/identity/match-from-trace", post(match_from_trace))
.route(
"/api/v1/agents/identity/match-from-photo",
post(match_from_photo),
)
.route(
"/api/v1/agents/identity/match-from-trace",
post(match_from_trace),
)
}
#[derive(Debug, Serialize)]
@@ -73,13 +79,21 @@ async fn match_from_photo(
let uuid_clean = identity_uuid.replace('-', "");
if uuid_clean.is_empty() || file_uuid.is_empty() {
return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
return Err((
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"success": false, "message": "identity_uuid and file_uuid are required"
}))));
})),
));
}
let data = image_data.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({
let data = image_data.ok_or_else(|| {
(
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"success": false, "message": "No image field found. Use field name 'image'."
}))))?;
})),
)
})?;
// 1. Save uploaded image to temp
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
@@ -88,11 +102,17 @@ async fn match_from_photo(
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
let temp_dir = std::env::temp_dir().join("momentry_match_face");
std::fs::create_dir_all(&temp_dir).map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to create temp dir: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Failed to create temp dir: {}", e)})),
)
})?;
let temp_img = temp_dir.join(format!("{}.jpg", uuid_clean));
std::fs::write(&temp_img, &data).map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to save temp image: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Failed to save temp image: {}", e)})),
)
})?;
// 2. Extract face embedding via Python script
@@ -103,56 +123,83 @@ async fn match_from_photo(
.output()
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to run extractor: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Failed to run extractor: {}", e)})),
)
})?;
let _ = std::fs::remove_file(&temp_img);
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
return Err((
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"success": false, "message": format!("Face extraction failed: {}", stderr)
}))));
})),
));
}
let stdout = String::from_utf8_lossy(&output.stdout);
let extract_result: serde_json::Value = serde_json::from_str(&stdout).map_err(|_| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": "Failed to parse extractor output"})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": "Failed to parse extractor output"})),
)
})?;
let embedding: Vec<f64> = serde_json::from_value(
extract_result.get("embedding")
.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({"message": "No embedding in extractor output"}))))?
.clone()
).map_err(|_| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": "Invalid embedding format"})))
extract_result
.get("embedding")
.ok_or_else(|| {
(
StatusCode::BAD_REQUEST,
Json(serde_json::json!({"message": "No embedding in extractor output"})),
)
})?
.clone(),
)
.map_err(|_| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": "Invalid embedding format"})),
)
})?;
let embedding_f32: Vec<f32> = embedding.into_iter().map(|v| v as f32).collect();
// 3. Look up identity internal ID
let id_table = schema::table_name("identities");
let identity_id_row: Option<(i32,)> = sqlx::query_as(
&format!("SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table)
)
let identity_id_row: Option<(i32,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
id_table
))
.bind(&uuid_clean)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
)
})?;
let identity_id = match identity_id_row {
Some((id,)) => id,
None => return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
None => {
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"success": false, "message": "Identity not found"
})))),
})),
))
}
};
// 4. Find best matching trace (highest similarity, no threshold)
let fd_table = schema::table_name("face_detections");
let best_match: Option<(i32, i32, f64)> = sqlx::query_as(
&format!(
let best_match: Option<(i32, i32, f64)> = sqlx::query_as(&format!(
r#"SELECT id, trace_id,
1 - (embedding::vector <=> $1::vector) as similarity
FROM {}
@@ -160,22 +207,25 @@ async fn match_from_photo(
ORDER BY embedding::vector <=> $1::vector
LIMIT 1"#,
fd_table
)
)
))
.bind(&embedding_f32)
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Search failed: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Search failed: {}", e)})),
)
})?;
// 5. Update best match face_detection
let mut traces_matched: Vec<i32> = Vec::new();
if let Some((fb_id, fb_trace, fb_sim)) = best_match {
let _ = sqlx::query(
&format!("UPDATE {} SET identity_id = $1 WHERE id = $2", fd_table)
)
let _ = sqlx::query(&format!(
"UPDATE {} SET identity_id = $1 WHERE id = $2",
fd_table
))
.bind(identity_id)
.bind(fb_id)
.execute(state.db.pool())
@@ -191,7 +241,10 @@ async fn match_from_photo(
file_uuid,
matches: 1,
traces_matched,
message: format!("Best trace: trace_id={}, similarity={:.4}", fb_trace, fb_sim),
message: format!(
"Best trace: trace_id={}, similarity={:.4}",
fb_trace, fb_sim
),
}))
} else {
Ok(Json(MatchFromPhotoResponse {
@@ -221,26 +274,30 @@ async fn match_from_trace(
// 1. Get 3 best face embeddings from this trace at different angles
// Divide trace frame range into 3 segments, pick best face from each
let fd_table = schema::table_name("face_detections");
let all_faces: Vec<(Vec<f32>, i64)> = sqlx::query_as::<_, (Vec<f32>, i64)>(
&format!(
let all_faces: Vec<(Vec<f32>, i64)> = sqlx::query_as::<_, (Vec<f32>, i64)>(&format!(
"SELECT embedding, frame_number FROM {} \
WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \
ORDER BY frame_number ASC",
fd_table
)
)
))
.bind(&req.file_uuid)
.bind(req.trace_id)
.fetch_all(state.db.pool())
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
)
})?;
if all_faces.is_empty() {
return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"success": false, "message": "No embedding found for this trace"
}))));
})),
));
}
// Pick 3 samples: divide frame range into 3 segments, use face with largest area per segment
@@ -254,14 +311,12 @@ async fn match_from_trace(
let mut query_embeddings: Vec<Vec<f32>> = Vec::new();
// Get width*height info if available (not all pipelines store it)
let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>(
&format!(
let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>(&format!(
"SELECT frame_number, COALESCE(width, 0) * COALESCE(height, 0) AS area \
FROM {} WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \
ORDER BY frame_number ASC",
fd_table
)
)
))
.bind(&req.file_uuid)
.bind(req.trace_id)
.fetch_all(state.db.pool())
@@ -296,8 +351,7 @@ async fn match_from_trace(
let mut seen_trace_ids = std::collections::HashSet::new();
for qemb in &query_embeddings {
let top = sqlx::query_as::<_, (i32, i32, f64)>(
&format!(
let top = sqlx::query_as::<_, (i32, i32, f64)>(&format!(
r#"SELECT id, trace_id,
1 - (embedding::vector <=> $1::vector) as similarity
FROM {}
@@ -307,15 +361,17 @@ async fn match_from_trace(
ORDER BY embedding::vector <=> $1::vector
LIMIT 1"#,
fd_table
)
)
))
.bind(qemb)
.bind(&req.file_uuid)
.bind(req.trace_id)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Search failed: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Search failed: {}", e)})),
)
})?;
if let Some((cface_id, c_trace_id, c_sim)) = top {
@@ -327,35 +383,49 @@ async fn match_from_trace(
// 3. Look up identity internal ID
let id_table = schema::table_name("identities");
let identity_id_row: Option<(i32,)> = sqlx::query_as(
&format!("SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table)
)
let identity_id_row: Option<(i32,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
id_table
))
.bind(&uuid_clean)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
)
})?;
let identity_id = match identity_id_row {
Some((id,)) => id,
None => return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
None => {
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"success": false, "message": "Identity not found"
})))),
})),
))
}
};
// 4. Update matched face_detections
let mut traces_matched: Vec<i32> = Vec::new();
for (id, trace_id, _similarity) in &validated {
if let Err(e) = sqlx::query(
&format!("UPDATE {} SET identity_id = $1 WHERE id = $2", fd_table)
)
if let Err(e) = sqlx::query(&format!(
"UPDATE {} SET identity_id = $1 WHERE id = $2",
fd_table
))
.bind(identity_id)
.bind(id)
.execute(state.db.pool())
.await
{
tracing::warn!("[match-from-trace] Failed to update face_detection {}: {}", id, e);
tracing::warn!(
"[match-from-trace] Failed to update face_detection {}: {}",
id,
e
);
} else {
if !traces_matched.contains(trace_id) {
traces_matched.push(*trace_id);
@@ -364,9 +434,10 @@ async fn match_from_trace(
}
// 5. Also bind the source trace itself
let _ = sqlx::query(
&format!("UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3", fd_table)
)
let _ = sqlx::query(&format!(
"UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3",
fd_table
))
.bind(identity_id)
.bind(&req.file_uuid)
.bind(req.trace_id)
@@ -388,7 +459,10 @@ async fn match_from_trace(
file_uuid: req.file_uuid,
matches: match_count,
traces_matched,
message: format!("Matched {} faces ({} unique traces)", match_count, trace_count),
message: format!(
"Matched {} faces ({} unique traces)",
match_count, trace_count
),
}))
}
@@ -461,7 +535,10 @@ fn analyze_person_speaker_overlap(
}
// Check if persons co-occur in time (frame proximity)
let overlap = person.frames.iter().any(|f| other_person.frames.contains(f));
let overlap = person
.frames
.iter()
.any(|f| other_person.frames.contains(f));
if overlap {
matched_persons.push(other_person.person_id.clone());
visited_persons.insert(other_person.person_id.clone());
@@ -474,9 +551,10 @@ fn analyze_person_speaker_overlap(
person.frames.iter().max().copied().unwrap_or(0) as f64,
);
for speaker in speakers {
let has_overlap = speaker.segments.iter().any(|(start, end)| {
*start <= person_time_range.1 && *end >= person_time_range.0
});
let has_overlap = speaker
.segments
.iter()
.any(|(start, end)| *start <= person_time_range.1 && *end >= person_time_range.0);
if has_overlap {
if !matched_speakers.contains(&speaker.speaker_id) {
matched_speakers.push(speaker.speaker_id.clone());
@@ -563,11 +641,12 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
// Step 2: 載入所有 face_detections含 frame_number按 trace_id 分組
let fd_table = schema::table_name("face_detections");
let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(
&format!("SELECT trace_id, frame_number, embedding FROM {} \
let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(&format!(
"SELECT trace_id, frame_number, embedding FROM {} \
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
ORDER BY trace_id, frame_number", fd_table),
)
ORDER BY trace_id, frame_number",
fd_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
@@ -647,16 +726,18 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
let fd_table = schema::table_name("face_detections");
let mut updated = 0usize;
for (tid, name) in &matched {
let id_opt = sqlx::query_scalar::<_, Option<i32>>(
&format!("SELECT id FROM {} WHERE name=$1 AND source='tmdb'", identities_table),
)
let id_opt = sqlx::query_scalar::<_, Option<i32>>(&format!(
"SELECT id FROM {} WHERE name=$1 AND source='tmdb'",
identities_table
))
.bind(name)
.fetch_optional(pool)
.await?;
if let Some(identity_id) = id_opt {
let _ = sqlx::query(
&format!("UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", fd_table),
)
let _ = sqlx::query(&format!(
"UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
fd_table
))
.bind(identity_id)
.bind(file_uuid)
.bind(tid)
@@ -726,32 +807,32 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
// Step 6: 未匹配的 trace 設 stranger_id = trace_id
// trace_id 在同一個 file 內是 sequential integer直接複用為 stranger_id
let stranger_update = sqlx::query(
&format!(
let stranger_update = sqlx::query(&format!(
"UPDATE {} SET stranger_id = trace_id \
WHERE file_uuid = $1 AND trace_id IS NOT NULL AND identity_id IS NULL \
AND (stranger_id IS NULL OR stranger_id != trace_id)",
fd_table
)
)
))
.bind(file_uuid)
.execute(pool)
.await?;
let stranger_count = stranger_update.rows_affected();
// Step 7: Save identity files for all affected identities
let affected = sqlx::query_scalar::<_, uuid::Uuid>(
&format!("SELECT DISTINCT i.uuid FROM {} i \
let affected = sqlx::query_scalar::<_, uuid::Uuid>(&format!(
"SELECT DISTINCT i.uuid FROM {} i \
JOIN {} fd ON fd.identity_id = i.id \
WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL", identities_table, fd_table)
)
WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL",
identities_table, fd_table
))
.bind(file_uuid)
.fetch_all(pool)
.await
.unwrap_or_default();
for uuid in &affected {
let us = uuid.to_string().replace('-', "");
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await {
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await
{
tracing::warn!("[FaceMatch] Failed to save identity file {}: {}", us, e);
}
}
@@ -773,13 +854,15 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
// Load face traces with identity_id and frame numbers
let fd_table = schema::table_name("face_detections");
let traces = sqlx::query_as::<_, (i32, Vec<i32>)>(
&format!("SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \
let traces = sqlx::query_as::<_, (i32, Vec<i32>)>(&format!(
"SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \
FROM {} WHERE file_uuid=$1 AND trace_id IS NOT NULL AND identity_id IS NOT NULL \
GROUP BY trace_id", fd_table)
)
GROUP BY trace_id",
fd_table
))
.bind(file_uuid)
.fetch_all(pool).await?;
.fetch_all(pool)
.await?;
if traces.is_empty() {
tracing::info!("[SpeakerBind] No face traces with identities");
@@ -945,9 +1028,8 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
let speakers = extract_speakers_from_asrx_data(&asrx_data);
let identities = analyze_person_speaker_overlap(&persons, &speakers);
let uuid_short = &file_uuid[..8.min(file_uuid.len())];
for (idx, id_result) in identities.iter().enumerate() {
let identity_name = format!("stranger_{}_{}", uuid_short, idx);
let identity_name = format!("stranger_{}", idx);
let metadata = serde_json::json!({
"source": "identity_agent",
"trace_ids": id_result.person_ids,

View File

@@ -38,8 +38,18 @@ pub fn identity_routes() -> Router<crate::api::server::AppState> {
.route("/api/v1/resource/heartbeat", post(heartbeat_resource))
.route("/api/v1/resources", get(list_resources))
.route("/api/v1/identity/upload", post(upload_identity))
.route("/api/v1/identity/:identity_uuid/profile-image", post(upload_profile_image).get(get_profile_image))
.route("/api/v1/identity/:identity_uuid/json", get(get_identity_json))
.route(
"/api/v1/identity/:identity_uuid/profile-image",
post(upload_profile_image).get(get_profile_image),
)
.route(
"/api/v1/identity/:identity_uuid/status",
get(get_identity_status),
)
.route(
"/api/v1/identity/:identity_uuid/json",
get(get_identity_json),
)
// Experiment: identity text search (non-polluting, separate endpoint)
.route("/api/v1/search/identity_text", get(search_identity_text))
.route("/api/v1/identities/search", get(search_identities_by_text))
@@ -98,7 +108,8 @@ async fn list_files(
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let data = records.0
let data = records
.0
.into_iter()
.map(|r| FileItem {
file_uuid: r.file_uuid,
@@ -163,7 +174,9 @@ async fn get_file_detail(
file_name: f.file_name,
file_path: f.file_path,
metadata: f.probe_json,
created_at: chrono::DateTime::parse_from_rfc3339(&f.created_at).ok().map(|d| d.into()),
created_at: chrono::DateTime::parse_from_rfc3339(&f.created_at)
.ok()
.map(|d| d.into()),
})),
None => Err((
StatusCode::NOT_FOUND,
@@ -214,13 +227,42 @@ async fn get_file_identities(
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let fps = 25.0;
let data: Vec<FileIdentityItem> = Vec::new();
let data: Vec<FileIdentityItem> = records
.into_iter()
.map(|r| FileIdentityItem {
identity_id: r.identity_id,
identity_uuid: r.identity_uuid,
name: r.name,
metadata: r.metadata,
face_count: r.face_count,
speaker_count: r.speaker_count,
start_frame: r.start_frame,
end_frame: r.end_frame,
start_time: r.start_time,
end_time: r.end_time,
confidence: r.confidence,
})
.collect();
let total = match sqlx::query_scalar::<_, i64>(
&format!(
"SELECT COUNT(DISTINCT fd.identity_id) FROM {} fd WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL",
crate::core::db::schema::table_name("face_detections")
)
)
.bind(&file_uuid)
.fetch_one(state.db.pool())
.await
{
Ok(c) => c,
Err(_) => data.len() as i64,
};
Ok(Json(FileIdentitiesResponse {
success: true,
file_uuid: file_uuid,
fps,
total: data.len() as i64,
total,
page,
page_size,
data,
@@ -243,6 +285,16 @@ pub struct IdentityDetailResponse {
pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
}
#[derive(Debug, Serialize)]
pub struct IdentityStatusResponse {
pub success: bool,
pub identity_uuid: String,
pub name: String,
pub has_json: bool,
pub has_jpg: bool,
pub error: Option<String>,
}
fn strip_uuid(u: &uuid::Uuid) -> String {
u.to_string().replace('-', "")
}
@@ -270,7 +322,11 @@ async fn get_identity_detail(
metadata: i.metadata,
reference_data: i.reference_data,
tmdb_id: i.tmdb_id,
tmdb_profile: Some(format!("{}/identities/{}/profile.jpg", crate::core::config::OUTPUT_DIR.as_str(), i.uuid.replace('-', ""))),
tmdb_profile: Some(format!(
"{}/identities/{}/profile.jpg",
crate::core::config::OUTPUT_DIR.as_str(),
i.uuid.replace('-', "")
)),
created_at: i.created_at,
updated_at: i.updated_at,
})),
@@ -281,6 +337,44 @@ async fn get_identity_detail(
}
}
async fn get_identity_status(
State(state): State<crate::api::server::AppState>,
Path(identity_uuid): Path<String>,
) -> Result<Json<IdentityStatusResponse>, (StatusCode, String)> {
let uuid_clean = identity_uuid.replace('-', "");
let identity = state
.db
.get_identity_by_uuid(&uuid_clean)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
match identity {
Some(i) => {
// Check both UUID formats (with and without hyphens)
let dir_nohyphen = crate::core::identity::storage::identity_dir(&uuid_clean);
let uuid_hyphen = i.uuid.clone();
let dir_hyphen = crate::core::identity::storage::identity_dir(&uuid_hyphen);
let has_json = dir_nohyphen.join("identity.json").exists()
|| dir_hyphen.join("identity.json").exists();
let has_jpg = dir_nohyphen.join("profile.jpg").exists()
|| dir_hyphen.join("profile.jpg").exists();
Ok(Json(IdentityStatusResponse {
success: true,
identity_uuid: i.uuid.clone(),
name: i.name,
has_json,
has_jpg,
error: None,
}))
}
None => Err((
StatusCode::NOT_FOUND,
format!("Identity not found: {}", uuid_clean),
)),
}
}
#[derive(Debug, Serialize)]
pub struct IdentityFilesResponse {
pub success: bool,
@@ -375,10 +469,25 @@ async fn get_identity_files(
})
.collect();
let total = match sqlx::query_scalar::<_, i64>(
&format!(
"SELECT COUNT(DISTINCT fd.file_uuid) FROM {} fd WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1)",
crate::core::db::schema::table_name("face_detections"),
crate::core::db::schema::table_name("identities"),
)
)
.bind(&uuid)
.fetch_one(state.db.pool())
.await
{
Ok(c) => c,
Err(_) => data.len() as i64,
};
Ok(Json(IdentityFilesResponse {
success: true,
identity_uuid: uuid.to_string().replace('-', ""),
total: data.len() as i64,
total,
page,
page_size,
data,
@@ -449,10 +558,25 @@ async fn get_identity_faces(
})
.collect();
let total = match sqlx::query_scalar::<_, i64>(
&format!(
"SELECT COUNT(*) FROM {} fd WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1)",
crate::core::db::schema::table_name("face_detections"),
crate::core::db::schema::table_name("identities"),
)
)
.bind(&uuid)
.fetch_one(state.db.pool())
.await
{
Ok(c) => c,
Err(_) => data.len() as i64,
};
Ok(Json(IdentityFacesResponse {
success: true,
identity_uuid: uuid.to_string().replace('-', ""),
total: data.len() as i64,
total,
page,
page_size,
data,
@@ -721,12 +845,24 @@ async fn upload_profile_image(
let uuid_clean = identity_uuid.replace('-', "");
// Verify identity exists
if state.db.get_identity_by_uuid(&uuid_clean).await.map_err(|_| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"success": false, "message": "DB error"})))
})?.is_none() {
return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
if state
.db
.get_identity_by_uuid(&uuid_clean)
.await
.map_err(|_| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"success": false, "message": "DB error"})),
)
})?
.is_none()
{
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"success": false, "message": "Identity not found"
}))));
})),
));
}
// Process multipart upload
@@ -740,9 +876,14 @@ async fn upload_profile_image(
ext = match content_type.as_str() {
"image/png" => "png",
"image/jpeg" | "image/jpg" => "jpg",
_ => return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
_ => {
return Err((
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"success": false, "message": "Unsupported image type. Use JPEG or PNG."
})))),
})),
))
}
};
image_data = Some(field.bytes().await.map_err(|_| {
(StatusCode::BAD_REQUEST, Json(serde_json::json!({"success": false, "message": "Failed to read image data"})))
@@ -750,9 +891,14 @@ async fn upload_profile_image(
}
}
let data = image_data.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({
let data = image_data.ok_or_else(|| {
(
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"success": false, "message": "No image field found. Use field name 'image'."
}))))?;
})),
)
})?;
// Write image file
let dir = crate::core::identity::storage::identity_dir(&uuid_clean);
@@ -789,8 +935,16 @@ async fn get_profile_image(
let path = dir.join(format!("profile.{}", ext));
if path.exists() {
let data = std::fs::read(&path).map_err(|_| StatusCode::NOT_FOUND)?;
let content_type = if *ext == "png" { "image/png" } else { "image/jpeg" };
return Ok((StatusCode::OK, [("content-type".to_string(), content_type.to_string())], data));
let content_type = if *ext == "png" {
"image/png"
} else {
"image/jpeg"
};
return Ok((
StatusCode::OK,
[("content-type".to_string(), content_type.to_string())],
data,
));
}
}
Err(StatusCode::NOT_FOUND)
@@ -802,7 +956,14 @@ async fn get_identity_json(
) -> Result<(StatusCode, [(String, String); 1], Vec<u8>), StatusCode> {
let clean = identity_uuid.replace('-', "");
let with_hyphens = if clean.len() == 32 {
format!("{}-{}-{}-{}-{}", &clean[0..8], &clean[8..12], &clean[12..16], &clean[16..20], &clean[20..32])
format!(
"{}-{}-{}-{}-{}",
&clean[0..8],
&clean[8..12],
&clean[12..16],
&clean[16..20],
&clean[20..32]
)
} else {
identity_uuid.clone()
};
@@ -821,7 +982,9 @@ async fn get_identity_json(
}
// 2. Lazy Sync: If file missing, generate from DB and save
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(state.db.pool(), &clean).await {
if let Err(e) =
crate::core::identity::storage::save_identity_file_by_pool(state.db.pool(), &clean).await
{
tracing::warn!("[identity-json] Lazy sync failed for {}: {}", clean, e);
return Err(StatusCode::NOT_FOUND);
}
@@ -858,7 +1021,7 @@ struct IdentityTextHit {
chunk_id: String,
start_time: f64,
end_time: f64,
text_content: String,
text_content: Option<String>,
identity_id: Option<i32>,
identity_name: Option<String>,
identity_source: Option<String>,
@@ -889,7 +1052,7 @@ async fn search_identity_text(
let query = format!(
r#"SELECT c.file_uuid, c.chunk_id, c.start_time, c.end_time, c.text_content,
fd.identity_id, CASE WHEN id_table LIKE 'dev.%' THEN i.name ELSE i.real_name END AS identity_name, i.source AS identity_source,
fd.identity_id, i.name AS identity_name, i.source AS identity_source,
fd.trace_id
FROM {} c
LEFT JOIN {} fd ON fd.file_uuid = c.file_uuid
@@ -902,18 +1065,42 @@ async fn search_identity_text(
chunk_table, fd_table, id_table
);
let rows = sqlx::query_as::<_, (String, String, f64, f64, String, Option<i32>, Option<String>, Option<String>, Option<i32>)>(&query)
.bind(&params.uuid).bind(&like_q).bind(limit)
let rows = sqlx::query_as::<
_,
(
String,
String,
f64,
f64,
Option<String>,
Option<i32>,
Option<String>,
Option<String>,
Option<i32>,
),
>(&query)
.bind(&params.uuid)
.bind(&like_q)
.bind(limit)
.fetch_all(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let results: Vec<IdentityTextHit> = rows
.into_iter()
.map(|(fu, cid, st, et, txt, iid, iname, isrc, tid)| IdentityTextHit {
file_uuid: fu, chunk_id: cid, start_time: st, end_time: et, text_content: txt,
identity_id: iid, identity_name: iname, identity_source: isrc, trace_id: tid,
})
.map(
|(fu, cid, st, et, txt, iid, iname, isrc, tid)| IdentityTextHit {
file_uuid: fu,
chunk_id: cid,
start_time: st,
end_time: et,
text_content: txt,
identity_id: iid,
identity_name: iname,
identity_source: isrc,
trace_id: tid,
},
)
.collect();
let total = results.len() as i64;
@@ -922,7 +1109,14 @@ async fn search_identity_text(
let start = (page - 1) * page_size;
let paged: Vec<IdentityTextHit> = results.into_iter().skip(start).take(page_size).collect();
let limit = params.limit.unwrap_or(50) as usize;
Ok(Json(IdentityTextResponse { success: true, total, page, page_size, limit, results: paged }))
Ok(Json(IdentityTextResponse {
success: true,
total,
page,
page_size,
limit,
results: paged,
}))
}
#[derive(Debug, Deserialize)]
@@ -942,7 +1136,7 @@ struct IdentitySearchHit {
trace_id: Option<i32>,
chunk_id: String,
start_time: f64,
text_content: String,
text_content: Option<String>,
}
#[derive(Debug, Serialize)]
@@ -965,7 +1159,7 @@ async fn search_identities_by_text(
let limit = params.limit.unwrap_or(50).min(100);
let query = format!(
r#"SELECT i.id::int, COALESCE(i.real_name, i.actor_name, i.name) AS name, i.source, i.tmdb_id,
r#"SELECT i.id::int, i.name, i.source, i.tmdb_id,
fd.file_uuid, fd.trace_id,
c.chunk_id, c.start_time, c.text_content
FROM {} i
@@ -973,15 +1167,30 @@ async fn search_identities_by_text(
JOIN {} c ON c.file_uuid = fd.file_uuid
AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0)
AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0)
WHERE COALESCE(i.real_name, i.actor_name, i.name) ILIKE $1
WHERE i.name ILIKE $1
AND ($2::text IS NULL OR fd.file_uuid = $2)
ORDER BY COALESCE(i.real_name, i.actor_name, i.name), c.start_time
ORDER BY i.name, c.start_time
LIMIT $3"#,
id_table, fd_table, chunk_table
);
let rows = sqlx::query_as::<_, (i32, String, Option<String>, Option<i32>, String, Option<i32>, String, f64, String)>(&query)
.bind(&like_q).bind(&params.uuid).bind(limit)
let rows = sqlx::query_as::<
_,
(
i32,
String,
Option<String>,
Option<i32>,
String,
Option<i32>,
String,
f64,
Option<String>,
),
>(&query)
.bind(&like_q)
.bind(&params.uuid)
.bind(limit)
.fetch_all(state.db.pool())
.await
.map_err(|e| {
@@ -991,12 +1200,25 @@ async fn search_identities_by_text(
let results: Vec<IdentitySearchHit> = rows
.into_iter()
.map(|(iid, name, src, tid, fu, trace_id, cid, st, txt)| IdentitySearchHit {
identity_id: iid, name, source: src, tmdb_id: tid,
file_uuid: fu, trace_id, chunk_id: cid, start_time: st, text_content: txt,
})
.map(
|(iid, name, src, tid, fu, trace_id, cid, st, txt)| IdentitySearchHit {
identity_id: iid,
name,
source: src,
tmdb_id: tid,
file_uuid: fu,
trace_id,
chunk_id: cid,
start_time: st,
text_content: txt,
},
)
.collect();
let total = results.len() as i64;
Ok(Json(IdentitySearchResponse { success: true, total, results }))
Ok(Json(IdentitySearchResponse {
success: true,
total,
results,
}))
}

View File

@@ -1,5 +1,5 @@
use axum::{
extract::{Path, Query},
extract::{Path, Query, State},
http::StatusCode,
response::Json,
routing::{get, post},
@@ -77,7 +77,7 @@ pub async fn bind_identity(
// Get identity_id from identity_uuid
let identity_row: Option<(i64, String)> = sqlx::query_as(&format!(
"SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid",
"SELECT id, name FROM {} WHERE uuid = $1::uuid",
id_table
))
.bind(&identity_uuid)
@@ -116,8 +116,14 @@ pub async fn bind_identity(
let uuid_clean = identity_uuid.replace('-', "");
// Sync identity JSON file
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &uuid_clean).await {
tracing::warn!("[bind] Failed to sync identity file for {}: {}", uuid_clean, e);
if let Err(e) =
crate::core::identity::storage::save_identity_file_by_pool(&db, &uuid_clean).await
{
tracing::warn!(
"[bind] Failed to sync identity file for {}: {}",
uuid_clean,
e
);
}
Ok(Json(ApiResponse {
@@ -189,8 +195,15 @@ pub async fn unbind_identity(
.ok()
.flatten();
if let Some(identity_uuid) = uuid {
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &identity_uuid).await {
tracing::warn!("[unbind] Failed to sync identity file for {}: {}", identity_uuid, e);
if let Err(e) =
crate::core::identity::storage::save_identity_file_by_pool(&db, &identity_uuid)
.await
{
tracing::warn!(
"[unbind] Failed to sync identity file for {}: {}",
identity_uuid,
e
);
}
}
}
@@ -221,7 +234,7 @@ pub async fn merge_identities(
// Get IDs for both identities
let from_row: Option<(i64, String)> = sqlx::query_as(&format!(
"SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid",
"SELECT id, name FROM {} WHERE uuid = $1::uuid",
id_table
))
.bind(&identity_uuid)
@@ -239,7 +252,7 @@ pub async fn merge_identities(
))?;
let into_row: Option<(i64, String)> = sqlx::query_as(&format!(
"SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid",
"SELECT id, name FROM {} WHERE uuid = $1::uuid",
id_table
))
.bind(&req.into_uuid)
@@ -299,8 +312,14 @@ pub async fn merge_identities(
// Sync target identity JSON
let into_uuid_clean = req.into_uuid.replace('-', "");
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &into_uuid_clean).await {
tracing::warn!("[merge] Failed to sync target identity file for {}: {}", into_uuid_clean, e);
if let Err(e) =
crate::core::identity::storage::save_identity_file_by_pool(&db, &into_uuid_clean).await
{
tracing::warn!(
"[merge] Failed to sync target identity file for {}: {}",
into_uuid_clean,
e
);
}
// Delete source identity JSON if not keeping history
@@ -339,6 +358,106 @@ pub struct ListIdentitiesParams {
pub offset: Option<i32>,
}
#[derive(Debug, Serialize)]
pub struct IdentityTraceInfo {
pub file_uuid: String,
pub trace_id: i32,
pub frame_count: i64,
pub first_frame: i32,
pub last_frame: i32,
pub first_sec: f64,
pub last_sec: f64,
pub avg_confidence: f64,
}
#[derive(Debug, Serialize)]
pub struct IdentityTracesResponse {
pub success: bool,
pub identity_uuid: String,
pub name: String,
pub total_traces: usize,
pub total_faces: i64,
pub traces: Vec<IdentityTraceInfo>,
}
pub async fn get_identity_traces(
State(state): State<crate::api::server::AppState>,
Path(identity_uuid): Path<String>,
) -> Result<Json<IdentityTracesResponse>, (StatusCode, String)> {
let id_table = crate::core::db::schema::table_name("identities");
let fd_table = crate::core::db::schema::table_name("face_detections");
// Get identity name
let identity: Option<(i32, String)> = sqlx::query_as(&format!(
"SELECT id, name FROM {} WHERE uuid = $1::uuid",
id_table
))
.bind(&identity_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let (identity_id, name) =
identity.ok_or((StatusCode::NOT_FOUND, "Identity not found".to_string()))?;
// Get all traces for this identity across all files
let rows: Vec<(String, i32, i64, i32, i32, f64, f64, f64)> = sqlx::query_as(&format!(
r#"SELECT fd.file_uuid::text, fd.trace_id,
COUNT(*)::bigint AS frame_count,
MIN(fd.frame_number)::int AS first_frame,
MAX(fd.frame_number)::int AS last_frame,
ROUND(MIN(fd.frame_number)::numeric / 25.0, 1)::float8 AS first_sec,
ROUND(MAX(fd.frame_number)::numeric / 25.0, 1)::float8 AS last_sec,
ROUND(AVG(fd.confidence)::numeric, 4)::float8 AS avg_confidence
FROM {} fd
WHERE fd.identity_id = $1
GROUP BY fd.file_uuid, fd.trace_id
ORDER BY fd.file_uuid, fd.trace_id"#,
fd_table
))
.bind(identity_id)
.fetch_all(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let total_traces = rows.len();
let total_faces: i64 = rows.iter().map(|r| r.2).sum();
let traces: Vec<IdentityTraceInfo> = rows
.into_iter()
.map(
|(
file_uuid,
trace_id,
frame_count,
first_frame,
last_frame,
first_sec,
last_sec,
avg_confidence,
)| IdentityTraceInfo {
file_uuid,
trace_id,
frame_count,
first_frame,
last_frame,
first_sec,
last_sec,
avg_confidence,
},
)
.collect();
Ok(Json(IdentityTracesResponse {
success: true,
identity_uuid,
name,
total_traces,
total_faces,
traces,
}))
}
pub fn identity_binding_routes() -> Router<crate::api::server::AppState> {
Router::new()
.route("/api/v1/identity/:identity_uuid/bind", post(bind_identity))
@@ -350,4 +469,8 @@ pub fn identity_binding_routes() -> Router<crate::api::server::AppState> {
"/api/v1/identity/:identity_uuid/mergeinto",
post(merge_identities),
)
.route(
"/api/v1/identity/:identity_uuid/traces",
get(get_identity_traces),
)
}

View File

@@ -14,8 +14,16 @@ use crate::core::db::{schema, PostgresDb};
/// Shared video query params: mode=normal|debug, audio=on|off
fn parse_video_params(params: &std::collections::HashMap<String, String>) -> (String, String) {
let mode = params.get("mode").map(|s| s.as_str()).unwrap_or("normal").to_string();
let audio = params.get("audio").map(|s| s.as_str()).unwrap_or("on").to_string();
let mode = params
.get("mode")
.map(|s| s.as_str())
.unwrap_or("normal")
.to_string();
let audio = params
.get("audio")
.map(|s| s.as_str())
.unwrap_or("on")
.to_string();
(mode, audio)
}
@@ -142,9 +150,12 @@ struct BboxParams {
/// Priority: start_frame/end_frame > start/end > start_time/end_time.
/// If only time is given, convert via fps.
fn resolve_frame_range(
start_frame: Option<i32>, end_frame: Option<i32>,
start: Option<i32>, end: Option<i32>,
start_time: Option<f64>, end_time: Option<f64>,
start_frame: Option<i32>,
end_frame: Option<i32>,
start: Option<i32>,
end: Option<i32>,
start_time: Option<f64>,
end_time: Option<f64>,
fps: f64,
) -> (i32, i32) {
if let (Some(sf), Some(ef)) = (start_frame.or(start), end_frame.or(end)) {
@@ -186,7 +197,15 @@ async fn bbox_overlay_video(
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.unwrap_or(24.0);
let (start_f, end_f) = resolve_frame_range(p.start_frame, p.end_frame, p.start, p.end, p.start_time, p.end_time, fps);
let (start_f, end_f) = resolve_frame_range(
p.start_frame,
p.end_frame,
p.start,
p.end,
p.start_time,
p.end_time,
fps,
);
let start_sec = start_f as f64 / fps;
@@ -228,13 +247,26 @@ async fn bbox_overlay_video(
let dur = duration.to_string();
let mut bbox_args = vec!["-ss", &ss, "-i", &video_path, "-t", &dur];
if vf != "null" {
bbox_args.extend_from_slice(&["-vf", &vf, "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28"]);
bbox_args.extend_from_slice(&[
"-vf",
&vf,
"-c:v",
"libx264",
"-preset",
"ultrafast",
"-crf",
"28",
]);
} else {
bbox_args.extend_from_slice(&["-c", "copy"]);
}
if bbox_audio == "off" { bbox_args.push("-an"); }
if bbox_audio == "off" {
bbox_args.push("-an");
}
bbox_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
let status = ffmpeg_cmd().args(&bbox_args).status()
let status = ffmpeg_cmd()
.args(&bbox_args)
.status()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !status.success() {
let _ = std::fs::remove_file(&tmp);
@@ -315,14 +347,20 @@ async fn trace_video(
let sk = seek.to_string();
let du = duration.to_string();
let mut cmd_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du, "-c", "copy"];
if audio == "off" { cmd_args.push("-an"); }
if audio == "off" {
cmd_args.push("-an");
}
cmd_args.extend_from_slice(&["-y", &tmp_str]);
let result = ffmpeg_cmd().args(&cmd_args).output()
let result = ffmpeg_cmd()
.args(&cmd_args)
.output()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !result.status.success() {
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let data = tokio::fs::read(&tmp)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let _ = std::fs::remove_file(&tmp);
return Ok(Response::builder()
.header(header::CONTENT_TYPE, "video/mp4")
@@ -345,8 +383,11 @@ async fn trace_video(
ORDER BY fd.trace_id, fd.frame_number",
face_table, identities_table
))
.bind(&file_uuid).bind(start_fn).bind(end_fn)
.fetch_all(state.db.pool()).await
.bind(&file_uuid)
.bind(start_fn)
.bind(end_fn)
.fetch_all(state.db.pool())
.await
.unwrap_or_default();
// Group frames by trace_id, compute start_frame per trace; collect bbox per frame
@@ -359,7 +400,9 @@ async fn trace_video(
if let Some(name) = name_opt {
trace_identity.entry(*tid).or_insert_with(|| name.clone());
} else {
trace_identity.entry(*tid).or_insert_with(|| format!("Stranger_{:03}", tid));
trace_identity
.entry(*tid)
.or_insert_with(|| format!("Stranger_{:03}", tid));
}
}
@@ -374,7 +417,8 @@ async fn trace_video(
.unwrap_or_else(|| "-".to_string());
// Sort traces for consistent ordering
let mut sorted_traces: Vec<(i32, &Vec<i32>)> = trace_frames.iter().map(|(k, v)| (*k, v)).collect();
let mut sorted_traces: Vec<(i32, &Vec<i32>)> =
trace_frames.iter().map(|(k, v)| (*k, v)).collect();
sorted_traces.sort_by_key(|(tid, _)| *tid);
let frame_offset = first_frame as i64 - (padding * fps) as i64;
@@ -389,10 +433,12 @@ async fn trace_video(
"drawtext=text='Frame %{{n}} %{{pts}}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=12"
));
parts.push(format!(
"drawtext=text='Cut\\: {}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=56", cut_label
"drawtext=text='Cut\\: {}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=56",
cut_label
));
parts.push(format!(
"drawtext=text='{}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=100", file_uuid
"drawtext=text='{}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=100",
file_uuid
));
// Per-trace entries: show trace_id, start_frame, identity name
@@ -400,11 +446,18 @@ async fn trace_video(
let mut y_pos = 144;
for (tid, frames) in &sorted_traces {
let start = frames.iter().min().unwrap_or(&first_frame);
let identity = trace_identity.get(tid).map(|s| s.as_str()).unwrap_or("unknown");
let identity = trace_identity
.get(tid)
.map(|s| s.as_str())
.unwrap_or("unknown");
let label = format!("Trace {}\\: start={} {}", tid, start, identity);
// Continuous range (interpolated): visible from first to last frame
let enable = format!("between(n,{},{})", frames[0] as i64 - frame_offset, frames[frames.len() - 1] as i64 - frame_offset);
let enable = format!(
"between(n,{},{})",
frames[0] as i64 - frame_offset,
frames[frames.len() - 1] as i64 - frame_offset
);
parts.push(format!(
"drawtext=text='{}':fontsize=24:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y={}:enable='{}'",
@@ -415,7 +468,11 @@ async fn trace_video(
// Bounding boxes: interpolated (thickness=1) + actual (thickness=4) with trace_id label
for (tid, frames) in &sorted_traces {
let range_enable = format!("between(n,{},{})", frames[0] as i64 - frame_offset, frames[frames.len() - 1] as i64 - frame_offset);
let range_enable = format!(
"between(n,{},{})",
frames[0] as i64 - frame_offset,
frames[frames.len() - 1] as i64 - frame_offset
);
// Interpolated bbox at first known position across the whole trace range
if let Some((x, y, w, h)) = bbox_per_frame.get(&(*tid, frames[0])) {
parts.push(format!(
@@ -448,23 +505,45 @@ async fn trace_video(
let tmp_str = tmp.to_str().unwrap_or("").to_string();
let sk = seek.to_string();
let du = duration.to_string();
let mut debug_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du,
"-/filter_complex", &filter_path,
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "28"];
if audio == "on" { debug_args.extend_from_slice(&["-c:a", "aac"]); }
let mut debug_args = vec![
"-ss",
&sk,
"-i",
&video_path,
"-t",
&du,
"-/filter_complex",
&filter_path,
"-c:v",
"libx264",
"-preset",
"ultrafast",
"-crf",
"28",
];
if audio == "on" {
debug_args.extend_from_slice(&["-c:a", "aac"]);
}
debug_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
let result = ffmpeg_cmd().args(&debug_args).output()
let result = ffmpeg_cmd()
.args(&debug_args)
.output()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !result.status.success() {
let stderr = String::from_utf8_lossy(&result.stderr);
let _ = std::fs::write("/tmp/ffmpeg_last_error.txt", stderr.as_bytes());
tracing::error!("ffmpeg failed ({} bytes), see /tmp/ffmpeg_last_error.txt", stderr.len());
tracing::error!(
"ffmpeg failed ({} bytes), see /tmp/ffmpeg_last_error.txt",
stderr.len()
);
let _ = std::fs::remove_file(&filter_file);
let _ = std::fs::remove_file(&tmp);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let data = tokio::fs::read(&tmp)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let _ = std::fs::remove_file(&filter_file);
let _ = std::fs::remove_file(&tmp);
Ok(Response::builder()
@@ -503,19 +582,27 @@ async fn stream_video(
// Chunk extraction with dual time/frame params
let start_time_param = params.get("start_time").and_then(|v| v.parse::<f64>().ok());
let end_time_param = params.get("end_time").and_then(|v| v.parse::<f64>().ok());
let start_frame_param = params.get("start_frame").and_then(|v| v.parse::<f64>().ok());
let start_frame_param = params
.get("start_frame")
.and_then(|v| v.parse::<f64>().ok());
let end_frame_param = params.get("end_frame").and_then(|v| v.parse::<f64>().ok());
let start_legacy = params.get("start").and_then(|v| v.parse::<f64>().ok());
let end_legacy = params.get("end").and_then(|v| v.parse::<f64>().ok());
let has_range = start_frame_param.is_some() || start_time_param.is_some() || start_legacy.is_some();
let has_range =
start_frame_param.is_some() || start_time_param.is_some() || start_legacy.is_some();
if has_range {
let (start_sec, dur) = if let (Some(sf), Some(ef)) = (start_frame_param, end_frame_param) {
let _fps: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1", videos_table
)).bind(&file_uuid).fetch_optional(state.db.pool()).await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?.unwrap_or(24.0);
"SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.unwrap_or(24.0);
(sf / _fps, (ef - sf) / _fps)
} else if let (Some(st), Some(et)) = (start_time_param, end_time_param) {
(st, et - st)
@@ -533,15 +620,21 @@ async fn stream_video(
let ss = start_sec.to_string();
let d = dur.to_string();
let mut chunk_args = vec!["-ss", &ss, "-i", &file_path, "-t", &d, "-c", "copy"];
if audio == "off" { chunk_args.push("-an"); }
if audio == "off" {
chunk_args.push("-an");
}
chunk_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
let status = ffmpeg_cmd().args(&chunk_args).status()
let status = ffmpeg_cmd()
.args(&chunk_args)
.status()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !status.success() {
let _ = std::fs::remove_file(&tmp);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let data = tokio::fs::read(&tmp)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let _ = std::fs::remove_file(&tmp);
return Ok(Response::builder()
.header(header::CONTENT_TYPE, "video/mp4")
@@ -704,7 +797,7 @@ async fn video_clip(
let frame_count = ((e - s) * fps) as i64;
cmd.args(["-vframes", &frame_count.to_string()]);
} else {
cmd.args(["-to", &e.to_string()]);
cmd.args(["-t", &(e - s).to_string()]);
}
if mode == "debug" {
let debug_text = if let (Some(sf), Some(ef)) = (q.start_frame, q.end_frame) {
@@ -717,8 +810,20 @@ async fn video_clip(
if audio == "off" {
cmd.args(["-an"]);
}
cmd.args(["-c:v", "libx264", "-c:a", "aac", "-f", "mpegts", "-"]);
let output = cmd.output().map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
cmd.args([
"-c:v",
"libx264",
"-c:a",
"aac",
"-movflags",
"frag_keyframe+empty_moov",
"-f",
"mp4",
"-",
]);
let output = cmd
.output()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !output.status.success() {
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}

View File

@@ -88,16 +88,10 @@ fn hex_val(c: u8) -> Option<u8> {
}
fn extract_api_key(headers: &HeaderMap, uri: &axum::http::Uri) -> Result<String, StatusCode> {
if let Some(key) = headers
.get("X-API-Key")
.and_then(|v| v.to_str().ok())
{
if let Some(key) = headers.get("X-API-Key").and_then(|v| v.to_str().ok()) {
return Ok(key.to_string());
}
if let Some(auth) = headers
.get("Authorization")
.and_then(|v| v.to_str().ok())
{
if let Some(auth) = headers.get("Authorization").and_then(|v| v.to_str().ok()) {
// Check if it's a JWT (starts with eyJ)
let trimmed = auth.strip_prefix("Bearer ").unwrap_or(auth);
if !jwt::is_jwt(trimmed) {
@@ -129,7 +123,11 @@ pub async fn unified_auth(
// Priority 1: Cookie session (Portal)
let cookies = extract_cookies(headers);
if let Some(sid) = cookies.iter().find(|(k, _)| k == "session_id").map(|(_, v)| v.clone()) {
if let Some(sid) = cookies
.iter()
.find(|(k, _)| k == "session_id")
.map(|(_, v)| v.clone())
{
match state.db.get_session_by_id(&sid).await {
Ok(Some((_id, user_id, api_key_id, _expires_at))) => {
let key_hash = hash_key(&api_key_id);
@@ -162,15 +160,17 @@ pub async fn unified_auth(
}
// Priority 2: JWT (Authorization: Bearer <eyJ...>)
if let Some(auth_header) = headers
.get("Authorization")
.and_then(|v| v.to_str().ok())
{
if let Some(auth_header) = headers.get("Authorization").and_then(|v| v.to_str().ok()) {
if let Some(token) = auth_header.strip_prefix("Bearer ") {
if jwt::is_jwt(token) {
match jwt::verify_jwt(token) {
Ok(claims) => {
if !state.db.is_jwt_blacklisted(&claims.jti).await.unwrap_or(false) {
if !state
.db
.is_jwt_blacklisted(&claims.jti)
.await
.unwrap_or(false)
{
let exp = chrono::DateTime::from_timestamp(claims.exp as i64, 0);
let user_id: i32 = claims.sub.parse().unwrap_or(0);
let auth = UserAuth {

File diff suppressed because it is too large Load Diff

View File

@@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};
use crate::api::server::AppState;
use crate::core::config;
use crate::core::db::PostgresDb;
use crate::core::db::{PostgresDb, QdrantDb};
use crate::core::tmdb;
#[derive(Debug, Serialize)]
@@ -64,10 +64,44 @@ struct FileUuidParam {
file_uuid: String,
}
#[derive(Debug, Deserialize)]
struct TmdbFetchRequest {
file_uuid: String,
}
#[derive(Debug, Serialize)]
struct TmdbFetchMemberResult {
name: String,
character: Option<String>,
aliases: Vec<String>,
metadata: serde_json::Value,
status: String,
has_json: bool,
has_jpg: bool,
error: Option<String>,
}
#[derive(Debug, Serialize)]
struct TmdbFetchResponse {
success: bool,
movie_title: Option<String>,
tmdb_id: Option<u64>,
results: Vec<TmdbFetchMemberResult>,
summary: serde_json::Value,
}
pub fn tmdb_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/agents/tmdb/prefetch", post(tmdb_prefetch))
.route("/api/v1/file/:file_uuid/tmdb-probe", post(tmdb_probe_handler))
.route(
"/api/v1/file/:file_uuid/tmdb-probe",
post(tmdb_probe_handler),
)
.route("/api/v1/tmdb/fetch", post(tmdb_fetch))
.route(
"/api/v1/agents/tmdb/match/:file_uuid",
post(tmdb_match_handler),
)
.route("/api/v1/resource/tmdb", get(tmdb_resource_status))
.route("/api/v1/resource/tmdb/check", post(tmdb_resource_check))
}
@@ -79,9 +113,10 @@ async fn tmdb_prefetch(
let file_uuid = req.file_uuid;
// Verify file exists in DB
let file_exists: bool = sqlx::query_scalar(
&format!("SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", crate::core::db::schema::table_name("videos"))
)
let file_exists: bool = sqlx::query_scalar(&format!(
"SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid)
.fetch_one(state.db.pool())
.await
@@ -182,18 +217,22 @@ async fn tmdb_probe_handler(
let file_uuid = params.file_uuid;
// Verify file exists
let file_exists: bool = sqlx::query_scalar(
&format!("SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", crate::core::db::schema::table_name("videos"))
)
let file_exists: bool = sqlx::query_scalar(&format!(
"SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid)
.fetch_one(state.db.pool())
.await
.unwrap_or(false);
if !file_exists {
return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"error": "Video not found", "file_uuid": file_uuid
}))));
})),
));
}
match tmdb::probe::probe_from_cache(&state.db, &file_uuid).await {
@@ -214,7 +253,10 @@ async fn tmdb_probe_handler(
.await
{
for uuid in rows {
let _ = crate::core::identity::storage::save_identity_file_by_pool(&pool, &uuid).await;
let _ = crate::core::identity::storage::save_identity_file_by_pool(
&pool, &uuid,
)
.await;
}
}
});
@@ -245,22 +287,24 @@ async fn tmdb_probe_handler(
message: "No TMDb cache found. Run tmdb-prefetch first.".to_string(),
}))
} else {
Err((StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": msg, "file_uuid": file_uuid
}))))
})),
))
}
}
}
}
async fn tmdb_resource_status(
State(state): State<AppState>,
) -> Json<TmdbResourceResponse> {
async fn tmdb_resource_status(State(state): State<AppState>) -> Json<TmdbResourceResponse> {
let status = tmdb::status::quick_status();
let identities_seeded = tmdb::status::count_tmdb_identities(state.db.pool())
.await
.unwrap_or(0);
let identities_with_embedding = tmdb::status::count_tmdb_identities_with_embedding(state.db.pool())
let identities_with_embedding =
tmdb::status::count_tmdb_identities_with_embedding(state.db.pool())
.await
.unwrap_or(0);
let cache_files = tmdb::status::count_cache_files();
@@ -303,3 +347,383 @@ async fn tmdb_resource_check() -> Json<TmdbCheckResponse> {
status,
})
}
async fn tmdb_fetch(
State(state): State<AppState>,
Json(req): Json<TmdbFetchRequest>,
) -> Result<Json<TmdbFetchResponse>, (StatusCode, Json<serde_json::Value>)> {
let file_uuid = req.file_uuid;
let filename: Option<String> = sqlx::query_scalar(&format!(
"SELECT file_name FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": e.to_string()})),
)
})?
.flatten();
let filename = filename.ok_or_else(|| {
(
StatusCode::NOT_FOUND,
Json(serde_json::json!({"error": "File not found"})),
)
})?;
// Run probe to create identities
match tmdb::probe::probe_movie(&state.db, &filename, &file_uuid).await {
Ok(Some(probe_result)) => {
let mut member_results = Vec::new();
// Read the cache to get cast list with names and profile URLs
if let Ok(cache) = tmdb::cache::read_tmdb_cache(&file_uuid) {
for member in &cache.cast {
let name = member.name.clone();
let character = if member.character.is_empty() {
None
} else {
Some(member.character.clone())
};
let aliases = member.also_known_as.clone();
let profile_url = member
.profile_path
.as_ref()
.map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));
let metadata = serde_json::json!({
"tmdb_id": member.id,
"name": member.name,
"character": member.character,
"aliases": member.also_known_as,
"profile_path": member.profile_path,
"order": member.order,
"biography": member.biography,
"birthday": member.birthday,
"place_of_birth": member.place_of_birth,
"imdb_id": member.imdb_id,
"known_for_department": member.known_for_department,
"popularity": member.popularity,
"deathday": member.deathday,
"gender": member.gender,
"homepage": member.homepage,
});
let identity_row = sqlx::query_as::<_, (i32, uuid::Uuid)>(&format!(
"SELECT id, uuid FROM {} WHERE name = $1 AND source = 'tmdb' LIMIT 1",
crate::core::db::schema::table_name("identities")
))
.bind(&name)
.fetch_optional(state.db.pool())
.await;
match identity_row {
Ok(Some((identity_id, uuid))) => {
let clean = uuid.to_string().replace('-', "");
let dir = crate::core::identity::storage::identity_dir(&clean);
std::fs::create_dir_all(&dir).ok();
let json_result = crate::core::identity::storage::save_identity_file(
&state.db, &clean,
)
.await;
let has_json = json_result.is_ok();
let has_jpg = if let Some(url) = &profile_url {
let jpg_path = dir.join("profile.jpg");
if jpg_path.exists() {
true
} else if let Ok(resp) = reqwest::get(url).await {
if let Ok(bytes) = resp.bytes().await {
std::fs::write(&jpg_path, &bytes).is_ok()
} else {
false
}
} else {
false
}
} else {
false
};
// Push face_embedding to Qdrant if available
let face_collection = format!(
"{}_faces",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
);
let emb_row: Option<(Vec<f32>,)> = sqlx::query_as(
&format!(
"SELECT face_embedding::real[] FROM {} WHERE uuid = $1 AND face_embedding IS NOT NULL",
crate::core::db::schema::table_name("identities")
)
)
.bind(&uuid)
.fetch_optional(state.db.pool())
.await
.unwrap_or(None);
if let Some((embedding,)) = emb_row {
let qdrant = QdrantDb::new();
qdrant.ensure_collection(&face_collection, 512).await.ok();
let _ = qdrant
.upsert_vector_to_collection(
&face_collection,
identity_id as u64,
&embedding,
Some(serde_json::json!({
"identity_id": identity_id,
"name": name,
"source": "tmdb",
})),
)
.await;
}
let status = if has_json && has_jpg {
"success"
} else {
"partial"
};
let error = if !has_json {
Some(format!("{:?}", json_result.err()))
} else if !has_jpg {
Some("profile download failed".to_string())
} else {
None
};
member_results.push(TmdbFetchMemberResult {
name: name.clone(),
character: character.clone(),
aliases: aliases.clone(),
metadata: metadata.clone(),
status: status.to_string(),
has_json,
has_jpg,
error,
});
}
Ok(None) => {
member_results.push(TmdbFetchMemberResult {
name: name.clone(),
character: character.clone(),
aliases: aliases.clone(),
metadata: metadata.clone(),
status: "skipped".to_string(),
has_json: false,
has_jpg: false,
error: None,
});
}
Err(e) => {
member_results.push(TmdbFetchMemberResult {
name: name.clone(),
character: character.clone(),
aliases: aliases.clone(),
metadata: metadata.clone(),
status: "error".to_string(),
has_json: false,
has_jpg: false,
error: Some(format!("DB error: {}", e)),
});
}
}
}
}
let total = member_results.len();
let success_count = member_results
.iter()
.filter(|r| r.status == "success")
.count();
let json_count = member_results.iter().filter(|r| r.has_json).count();
let jpg_count = member_results.iter().filter(|r| r.has_jpg).count();
Ok(Json(TmdbFetchResponse {
success: true,
movie_title: Some(probe_result.title),
tmdb_id: Some(probe_result.tmdb_id),
results: member_results,
summary: serde_json::json!({
"total": total,
"success": success_count,
"with_json": json_count,
"with_jpg": jpg_count,
}),
}))
}
Ok(None) => Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"error": "No movie found for this filename"
})),
)),
Err(e) => Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": e.to_string()
})),
)),
}
}
#[derive(Debug, Serialize)]
struct TmdbMatchResponse {
success: bool,
file_uuid: String,
bindings_created: usize,
tmdb_identities_available: usize,
message: String,
}
async fn tmdb_match_handler(
Path(params): Path<FileUuidParam>,
State(state): State<AppState>,
) -> Result<Json<TmdbMatchResponse>, (StatusCode, Json<serde_json::Value>)> {
let file_uuid = params.file_uuid;
// Verify file exists
let file_exists: bool = sqlx::query_scalar(&format!(
"SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid)
.fetch_one(state.db.pool())
.await
.unwrap_or(false);
if !file_exists {
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"error": "Video not found", "file_uuid": file_uuid
})),
));
}
// Get all TMDb identities with face_embedding
let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec<f32>)>(
&format!(
"SELECT id, name, face_embedding::real[] FROM {} WHERE source='tmdb' AND face_embedding IS NOT NULL",
crate::core::db::schema::table_name("identities")
)
)
.fetch_all(state.db.pool())
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
})?;
if tmdb_rows.is_empty() {
return Ok(Json(TmdbMatchResponse {
success: true,
file_uuid,
bindings_created: 0,
tmdb_identities_available: 0,
message: "No TMDb identities with face embeddings".to_string(),
}));
}
let face_collection = format!(
"{}_faces",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
);
let qdrant = QdrantDb::new();
let _ = qdrant.ensure_collection(&face_collection, 512).await;
let trace_collection = format!(
"{}_traces",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
);
let _ = qdrant.ensure_collection(&trace_collection, 512).await;
// Sync trace embeddings (idempotent)
if let Err(e) = crate::core::db::qdrant_db::sync_trace_embeddings(&file_uuid).await {
tracing::error!("[TKG-MATCH] Trace sync failed: {}", e);
}
let mut total_bindings = 0usize;
for (tmdb_id, tmdb_name, tmdb_embedding) in &tmdb_rows {
// Search Qdrant trace collection with this TMDb embedding
let results = match qdrant
.search_face_collection(
&trace_collection,
tmdb_embedding,
100,
"source",
"tmdb",
Some(&file_uuid),
)
.await
{
Ok(r) => r,
Err(e) => {
tracing::warn!("[TKG-MATCH] Qdrant search failed for {}: {}", tmdb_name, e);
continue;
}
};
// Filter results by threshold and file_uuid
let filtered: Vec<_> = results
.into_iter()
.filter(|(score, payload)| {
*score >= 0.50
&& payload.get("file_uuid").and_then(|v| v.as_str()) == Some(&file_uuid)
})
.collect();
if filtered.is_empty() {
continue;
}
// Bind matched traces directly
let mut bound_count = 0usize;
for (_score, payload) in &filtered {
if let Some(tid) = payload.get("trace_id").and_then(|v| v.as_i64()) {
let r = sqlx::query(&format!(
"UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
crate::core::db::schema::table_name("face_detections")
))
.bind(tmdb_id)
.bind(&file_uuid)
.bind(tid as i32)
.execute(state.db.pool())
.await;
if let Ok(result) = r {
bound_count += result.rows_affected() as usize;
}
}
}
if bound_count > 0 {
tracing::info!(
"[TKG-MATCH] {}: bound {} traces to TMDb identity {}",
tmdb_name,
bound_count,
tmdb_id
);
}
total_bindings += bound_count;
}
Ok(Json(TmdbMatchResponse {
success: true,
file_uuid,
bindings_created: total_bindings,
tmdb_identities_available: tmdb_rows.len(),
message: format!("{} traces matched to TMDb identities", total_bindings),
}))
}

View File

@@ -11,10 +11,7 @@ use crate::core::db::PostgresDb;
pub fn trace_agent_routes() -> Router<crate::api::server::AppState> {
Router::new()
.route(
"/api/v1/file/:file_uuid/traces",
post(list_traces_sorted),
)
.route("/api/v1/file/:file_uuid/traces", post(list_traces_sorted))
.route(
"/api/v1/file/:file_uuid/trace/:trace_id/faces",
get(list_trace_faces),
@@ -78,9 +75,10 @@ async fn list_traces_sorted(
_ => "start_frame ASC",
};
let fps: f64 =
sqlx::query_scalar(&format!("SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")))
let fps: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
@@ -113,8 +111,7 @@ async fn list_traces_sorted(
crate::core::db::schema::table_name("face_detections"),
);
let rows: Vec<(i32, i64, i32, i32, f64, f64, Option<i32>)> =
sqlx::query_as(&query)
let rows: Vec<(i32, i64, i32, i32, f64, f64, Option<i32>)> = sqlx::query_as(&query)
.bind(&file_uuid)
.bind(min_faces)
.bind(effective_limit)
@@ -220,19 +217,20 @@ async fn list_trace_faces(
};
let interpolate = q.interpolate.unwrap_or(false);
let fps: f64 =
sqlx::query_scalar(&format!("SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")))
let fps: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
.unwrap_or(24.0);
let total_detected: i64 = sqlx::query_scalar(
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2",
crate::core::db::schema::table_name("face_detections"))
)
let total_detected: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2",
crate::core::db::schema::table_name("face_detections")
))
.bind(&file_uuid)
.bind(trace_id)
.fetch_one(state.db.pool())
@@ -247,12 +245,12 @@ async fn list_trace_faces(
Option<i32>,
Option<i32>,
f32,
)> = sqlx::query_as(
&format!("SELECT id, frame_number::int, x, y, width, height, confidence::float4 \
)> = sqlx::query_as(&format!(
"SELECT id, frame_number::int, x, y, width, height, confidence::float4 \
FROM {} WHERE file_uuid = $1 AND trace_id = $2 \
ORDER BY frame_number ASC LIMIT $3 OFFSET $4",
crate::core::db::schema::table_name("face_detections"))
)
crate::core::db::schema::table_name("face_detections")
))
.bind(&file_uuid)
.bind(trace_id)
.bind(limit)

View File

@@ -88,9 +88,9 @@ pub enum SearchResult {
},
#[serde(rename = "person")]
Person {
person_id: String,
identity_id: i32,
identity_uuid: String,
name: Option<String>,
speaker_id: Option<String>,
appearance_count: i32,
score: f64,
first_appearance_time: Option<f64>,
@@ -168,7 +168,7 @@ pub async fn universal_search(
results.retain(|r| match r {
SearchResult::Chunk { chunk_id, .. } => seen_chunks.insert(chunk_id.clone()),
SearchResult::Frame { frame_number, .. } => seen_frames.insert(*frame_number),
SearchResult::Person { person_id, .. } => seen_persons.insert(person_id.clone()),
SearchResult::Person { identity_id, .. } => seen_persons.insert(*identity_id),
});
}
@@ -251,9 +251,9 @@ pub async fn search_persons(
let limit = query.limit.unwrap_or(20);
let persons = search_persons_by_query(
&db,
&query.file_uuid,
&query.query,
query.min_appearances,
query.max_age,
limit,
)
.await
@@ -305,7 +305,6 @@ pub struct PersonSearchQuery {
pub file_uuid: String,
pub query: Option<String>,
pub min_appearances: Option<i32>,
pub max_age: Option<i32>, // New filter for "children"
pub limit: Option<usize>,
}
@@ -317,13 +316,9 @@ pub struct PersonSearchResponse {
#[derive(Debug, Serialize)]
pub struct PersonResult {
pub person_id: String,
pub identity_id: i32,
pub identity_uuid: String,
pub name: Option<String>,
pub character_name: Option<String>,
pub aliases: Option<Vec<String>>,
pub age: Option<i32>,
pub gender: Option<String>,
pub speaker_id: Option<String>,
pub appearance_count: i32,
pub first_appearance_time: Option<f64>,
pub last_appearance_time: Option<f64>,
@@ -594,43 +589,37 @@ async fn search_persons_internal(
db: &PostgresDb,
req: &UniversalSearchRequest,
) -> Result<Vec<SearchResult>, anyhow::Error> {
let table = "person_identities";
let uuid = match &req.file_uuid {
Some(u) => u.replace('\'', "''"),
None => return Err(anyhow::anyhow!("file_uuid is required for person search")),
};
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let mut sql = format!(
"SELECT person_id, name, speaker_id, appearance_count, first_appearance_time, last_appearance_time FROM {} WHERE 1=1",
table
"SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \
FROM {} i JOIN {} fd ON fd.identity_id = i.id \
WHERE fd.file_uuid = '{}'",
id_table, fd_table, uuid
);
if !req.query.is_empty() {
sql.push_str(&format!(
" AND (name ILIKE '%{}%' OR person_id ILIKE '%{}%' OR speaker_id ILIKE '%{}%')",
req.query, req.query, req.query
));
}
if let Some(ref filters) = req.filters {
if let Some(ref speaker_id) = filters.speaker_id {
sql.push_str(&format!(" AND speaker_id = '{}'", speaker_id));
}
if let Some(ref person_id) = filters.person_id {
sql.push_str(&format!(" AND person_id = '{}'", person_id));
}
let q = req.query.replace('\'', "''");
sql.push_str(&format!(" AND i.name ILIKE '%{}%'", q));
}
sql.push_str(" GROUP BY i.id, i.uuid, i.name");
sql.push_str(" ORDER BY appearance_count DESC");
sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));
let rows: Vec<(
String,
Option<String>,
Option<String>,
i32,
Option<f64>,
Option<f64>,
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let rows: Vec<(i32, String, Option<String>, i64, Option<f64>, Option<f64>)> =
sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let results: Vec<SearchResult> = rows
.into_iter()
.map(
|(person_id, name, speaker_id, appearance_count, first_time, last_time)| {
|(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| {
let score = if !req.query.is_empty()
&& name.as_ref().map_or(false, |n| {
n.to_lowercase().contains(&req.query.to_lowercase())
@@ -641,10 +630,10 @@ async fn search_persons_internal(
};
SearchResult::Person {
person_id,
identity_id,
identity_uuid,
name,
speaker_id,
appearance_count,
appearance_count: appearance_count as i32,
score,
first_appearance_time: first_time,
last_appearance_time: last_time,
@@ -739,82 +728,49 @@ async fn search_frames_internal_v2(
async fn search_persons_by_query(
db: &PostgresDb,
file_uuid: &str,
query: &Option<String>,
min_appearances: Option<i32>,
max_age: Option<i32>,
limit: usize,
) -> Result<Vec<PersonResult>, anyhow::Error> {
let table = "person_identities";
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let mut sql = format!(
"SELECT person_id, name, character_name, aliases, age, gender, speaker_id, appearance_count, first_appearance_time, last_appearance_time FROM {} WHERE 1=1",
table
"SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \
FROM {} i JOIN {} fd ON fd.identity_id = i.id \
WHERE fd.file_uuid = '{}'",
id_table,
fd_table,
file_uuid.replace('\'', "''")
);
if let Some(ref q) = query {
// Search name, character_name, aliases (cast to text), person_id, speaker_id
sql.push_str(&format!(
" AND (name ILIKE '%{}%' OR character_name ILIKE '%{}%' OR aliases::text ILIKE '%{}%' OR person_id ILIKE '%{}%' OR speaker_id ILIKE '%{}%')",
q, q, q, q, q
));
if let Some(q) = query {
let safe = q.replace('\'', "''");
sql.push_str(&format!(" AND i.name ILIKE '%{}%'", safe));
}
sql.push_str(" GROUP BY i.id, i.uuid, i.name");
if let Some(min) = min_appearances {
sql.push_str(&format!(" AND appearance_count >= {}", min));
}
if let Some(max_a) = max_age {
// Strictly filter for age <= max_age.
// Note: This excludes entries with NULL age.
sql.push_str(&format!(" AND age <= {}", max_a));
sql.push_str(&format!(" HAVING COUNT(fd.id) >= {}", min));
}
sql.push_str(" ORDER BY appearance_count DESC");
sql.push_str(&format!(" LIMIT {}", limit));
let rows: Vec<(
String,
Option<String>,
Option<String>,
Option<serde_json::Value>,
Option<i32>,
Option<String>,
Option<String>,
i32,
Option<f64>,
Option<f64>,
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let rows: Vec<(i32, String, Option<String>, i64, Option<f64>, Option<f64>)> =
sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let results: Vec<PersonResult> = rows
.into_iter()
.map(
|(
person_id,
name,
character_name,
aliases_json,
age,
gender,
speaker_id,
appearance_count,
first_time,
last_time,
)| {
let aliases = aliases_json.and_then(|v| {
v.as_array().map(|arr| {
arr.iter()
.filter_map(|val| val.as_str().map(String::from))
.collect()
})
});
|(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| {
PersonResult {
person_id,
identity_id,
identity_uuid,
name,
character_name,
aliases,
age,
gender,
speaker_id,
appearance_count,
appearance_count: appearance_count as i32,
first_appearance_time: first_time,
last_appearance_time: last_time,
}

View File

@@ -392,8 +392,14 @@ pub async fn get_visual_chunk_statistics(
uuid.replace('\'', "''")
);
let row: (i64, Option<f64>, Option<f64>, Option<f64>, Option<i64>, Option<f64>) =
sqlx::query_as(&sql).fetch_one(db.pool()).await?;
let row: (
i64,
Option<f64>,
Option<f64>,
Option<f64>,
Option<i64>,
Option<f64>,
) = sqlx::query_as(&sql).fetch_one(db.pool()).await?;
let mut stats = HashMap::new();
stats.insert("total_chunks".to_string(), Value::from(row.0));

View File

@@ -13,7 +13,14 @@ use std::path::{Path, PathBuf};
use std::process::Command;
fn dir_size(path: &Path) -> u64 {
path.read_dir().map(|d| d.filter_map(|e| e.ok()).filter_map(|e| e.metadata().ok()).map(|m| m.len()).sum()).unwrap_or(0)
path.read_dir()
.map(|d| {
d.filter_map(|e| e.ok())
.filter_map(|e| e.metadata().ok())
.map(|m| m.len())
.sum()
})
.unwrap_or(0)
}
const DEMO_DIR: &str = "/Users/accusys/momentry/var/sftpgo/data/demo";
@@ -22,7 +29,10 @@ const RELEASE_DIR: &str = "/Users/accusys/momentry_core_0.1/release/files";
const PG_BIN: &str = "/Users/accusys/pgsql/18.3/bin";
#[derive(Parser)]
#[command(name = "release", about = "Release Manager — deploy/undeploy video packages")]
#[command(
name = "release",
about = "Release Manager — deploy/undeploy video packages"
)]
struct Cli {
#[command(subcommand)]
command: Commands,
@@ -107,7 +117,12 @@ fn extract_tarball(tarball: &Path) -> Result<PathBuf> {
fs::create_dir_all(&tmpdir)?;
let status = Command::new("tar")
.args(["-xzf", tarball.to_str().unwrap(), "-C", tmpdir.to_str().unwrap()])
.args([
"-xzf",
tarball.to_str().unwrap(),
"-C",
tmpdir.to_str().unwrap(),
])
.status()
.context("tar extraction failed")?;
if !status.success() {
@@ -127,8 +142,8 @@ fn extract_tarball(tarball: &Path) -> Result<PathBuf> {
/// Get file_info.json from package directory
fn read_file_info(pkg_dir: &Path) -> Result<serde_json::Value> {
let info_path = pkg_dir.join("file_info.json");
let content = fs::read_to_string(&info_path)
.with_context(|| format!("Cannot read {:?}", info_path))?;
let content =
fs::read_to_string(&info_path).with_context(|| format!("Cannot read {:?}", info_path))?;
serde_json::from_str(&content).context("Invalid file_info.json")
}
@@ -140,7 +155,10 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
anyhow::bail!("File not found: {}", tarball);
}
println!("=== Deploy: {} ===", tarball_path.file_name().unwrap().to_str().unwrap());
println!(
"=== Deploy: {} ===",
tarball_path.file_name().unwrap().to_str().unwrap()
);
// Extract
let pkg_dir = extract_tarball(tarball_path)?;
@@ -148,7 +166,9 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
// Read file_info
let info = read_file_info(&pkg_dir)?;
let uuid = info["file_uuid"].as_str().context("Missing file_uuid in file_info.json")?;
let uuid = info["file_uuid"]
.as_str()
.context("Missing file_uuid in file_info.json")?;
let file_name = info["file_name"].as_str().unwrap_or("?");
println!("UUID: {}\nVideo: {}", uuid, file_name);
@@ -168,7 +188,8 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
let entry = entry?;
let fname = entry.file_name();
let fname_str = fname.to_str().unwrap_or("");
if fname_str.ends_with(".mp4") || fname_str.ends_with(".mov") || fname_str.ends_with(".avi") {
if fname_str.ends_with(".mp4") || fname_str.ends_with(".mov") || fname_str.ends_with(".avi")
{
let dest = Path::new(DEMO_DIR).join(&fname);
if !dest.exists() {
fs::copy(entry.path(), &dest)?;
@@ -192,12 +213,15 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
println!("Output files copied to {}", OUTPUT_DIR);
// Verify
let chunk_count: (i64,) = sqlx::query_as(
"SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = $1"
).bind(uuid).fetch_one(db.pool()).await?;
let face_count: (i64,) = sqlx::query_as(
"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1"
).bind(uuid).fetch_one(db.pool()).await?;
let chunk_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = $1")
.bind(uuid)
.fetch_one(db.pool())
.await?;
let face_count: (i64,) =
sqlx::query_as("SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1")
.bind(uuid)
.fetch_one(db.pool())
.await?;
// Cleanup
fs::remove_dir_all(&pkg_dir.parent().unwrap_or(&pkg_dir))?;
@@ -213,9 +237,11 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result<()> {
// Get video info
let rows: Vec<(String, String)> = sqlx::query_as(
"SELECT file_name, file_path FROM dev.videos WHERE file_uuid = $1"
).bind(uuid).fetch_all(db.pool()).await?;
let rows: Vec<(String, String)> =
sqlx::query_as("SELECT file_name, file_path FROM dev.videos WHERE file_uuid = $1")
.bind(uuid)
.fetch_all(db.pool())
.await?;
if rows.is_empty() {
anyhow::bail!("UUID {} not found in DB", uuid);
@@ -252,7 +278,9 @@ async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result
println!(" {}: {} rows deleted", tbl, result.rows_affected());
}
sqlx::query("DELETE FROM dev.videos WHERE file_uuid = $1")
.bind(uuid).execute(db.pool()).await?;
.bind(uuid)
.execute(db.pool())
.await?;
println!(" dev.videos: removed");
// Delete output files
@@ -270,7 +298,10 @@ async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result
let vp = Path::new(file_path);
if vp.exists() {
fs::remove_file(vp)?;
println!(" Video file: removed ({})", vp.file_name().unwrap().to_str().unwrap_or("?"));
println!(
" Video file: removed ({})",
vp.file_name().unwrap().to_str().unwrap_or("?")
);
}
}
@@ -292,11 +323,15 @@ async fn cmd_list(db: &PostgresDb) -> Result<()> {
"SELECT file_uuid, file_name, duration, status,
(SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = v.file_uuid) as chunks,
(SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = v.file_uuid) as faces
FROM dev.videos v ORDER BY id DESC"
).fetch_all(db.pool()).await?;
FROM dev.videos v ORDER BY id DESC",
)
.fetch_all(db.pool())
.await?;
println!("{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
"UUID", "Name", "Duration", "Status", "Chunks", "Faces");
println!(
"{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
"UUID", "Name", "Duration", "Status", "Chunks", "Faces"
);
println!("{}", "-".repeat(116));
for row in &rows {
@@ -318,10 +353,15 @@ async fn cmd_list(db: &PostgresDb) -> Result<()> {
name.clone()
};
println!("{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
uuid, short_name, dur_str,
println!(
"{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
uuid,
short_name,
dur_str,
status.as_deref().unwrap_or("?"),
chunks.unwrap_or(0), faces.unwrap_or(0));
chunks.unwrap_or(0),
faces.unwrap_or(0)
);
}
Ok(())
}
@@ -336,9 +376,23 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
"SELECT file_uuid, file_name, file_path, duration, fps, width, height FROM dev.videos WHERE file_uuid = $1"
).bind(uuid).fetch_optional(db.pool()).await?;
let (_, file_name, file_path, duration, fps, width, height): (
String, String, String, Option<f64>, Option<f64>, Option<i32>, Option<i32>
String,
String,
String,
Option<f64>,
Option<f64>,
Option<i32>,
Option<i32>,
) = match row {
Some(r) => (r.get(0), r.get(1), r.get(2), r.get(3), r.get(4), r.get(5), r.get(6)),
Some(r) => (
r.get(0),
r.get(1),
r.get(2),
r.get(3),
r.get(4),
r.get(5),
r.get(6),
),
None => anyhow::bail!("UUID {} not found", uuid),
};
@@ -360,7 +414,10 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
"momentry_version": env!("CARGO_PKG_VERSION"),
"momentry_build": env!("BUILD_GIT_HASH"),
});
fs::write(outdir.join("file_info.json"), serde_json::to_string_pretty(&info)?)?;
fs::write(
outdir.join("file_info.json"),
serde_json::to_string_pretty(&info)?,
)?;
// Export per-table .sql files (avoid single 4.7GB psql load)
let sql_dir = outdir.join("sql");
@@ -376,7 +433,13 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
let mut import_order = vec!["master.sql"];
fn write_table_sql(outdir: &Path, tbl: &str, col: &str, uuid: &str, psql_exec: &dyn Fn(&str) -> Result<String>) -> Result<()> {
fn write_table_sql(
outdir: &Path,
tbl: &str,
col: &str,
uuid: &str,
psql_exec: &dyn Fn(&str) -> Result<String>,
) -> Result<()> {
let safe_name = tbl.replace('.', "_");
let path = outdir.join(format!("{}.sql", safe_name));
let parts: Vec<&str> = tbl.split('.').collect();
@@ -419,8 +482,16 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
let data = psql_exec(&idents_query)?;
if !data.is_empty() {
let mut f = fs::File::create(&idents_path)?;
writeln!(f, "-- dev.identities WHERE file_uuid = '{}' OR global (tmdb/merged/user_defined)", uuid)?;
writeln!(f, "COPY dev.identities ({}) FROM STDIN WITH CSV HEADER;", cols)?;
writeln!(
f,
"-- dev.identities WHERE file_uuid = '{}' OR global (tmdb/merged/user_defined)",
uuid
)?;
writeln!(
f,
"COPY dev.identities ({}) FROM STDIN WITH CSV HEADER;",
cols
)?;
writeln!(f, "{}", data)?;
writeln!(f, "\\.")?;
}
@@ -440,7 +511,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
if !data.is_empty() {
let mut f = fs::File::create(&binds_path)?;
writeln!(f, "-- dev.identity_bindings (from face_detections JOIN)")?;
writeln!(f, "COPY dev.identity_bindings ({}) FROM STDIN WITH CSV HEADER;", cols)?;
writeln!(
f,
"COPY dev.identity_bindings ({}) FROM STDIN WITH CSV HEADER;",
cols
)?;
writeln!(f, "{}", data)?;
writeln!(f, "\\.")?;
}
@@ -469,7 +544,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
let sql_path = outdir.join("data.sql");
{
let mut f = fs::File::create(&sql_path)?;
writeln!(f, "-- Release package: {} — see sql/ for per-table files", uuid)?;
writeln!(
f,
"-- Release package: {} — see sql/ for per-table files",
uuid
)?;
writeln!(f, "BEGIN;")?;
writeln!(f, "\\i sql/dev_videos.sql")?;
writeln!(f, "\\i sql/dev_chunk.sql")?;
@@ -492,7 +571,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
let dest = outdir.join(vp.file_name().unwrap());
fs::copy(vp, &dest)?;
let vsize = fs::metadata(&dest)?.len();
println!(" {} ({} MB)", vp.file_name().unwrap().to_str().unwrap_or("?"), vsize / 1024 / 1024);
println!(
" {} ({} MB)",
vp.file_name().unwrap().to_str().unwrap_or("?"),
vsize / 1024 / 1024
);
}
}
@@ -541,11 +624,18 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
let vec0_src = "/Users/accusys/momentry_core_0.1/scripts/vec0.dylib";
if Path::new(vec0_src).exists() {
fs::copy(vec0_src, outdir.join("vec0.dylib"))?;
println!(" vec0.dylib ({} KB)", fs::metadata(outdir.join("vec0.dylib"))?.len() / 1024);
println!(
" vec0.dylib ({} KB)",
fs::metadata(outdir.join("vec0.dylib"))?.len() / 1024
);
}
// Create tar.gz
let tarball = Path::new(RELEASE_DIR).join(format!("{}_v{}.tar.gz", uuid, Utc::now().format("%Y%m%d_%H%M%S")));
let tarball = Path::new(RELEASE_DIR).join(format!(
"{}_v{}.tar.gz",
uuid,
Utc::now().format("%Y%m%d_%H%M%S")
));
let status = Command::new("tar")
.args(["-czf", tarball.to_str().unwrap(), "-C", RELEASE_DIR, uuid])
.status()?;
@@ -553,7 +643,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
anyhow::bail!("tar creation failed");
}
let tsize = fs::metadata(&tarball)?.len();
println!("\n Package: {} ({} MB)", tarball.display(), tsize / 1024 / 1024);
println!(
"\n Package: {} ({} MB)",
tarball.display(),
tsize / 1024 / 1024
);
// Sanity check: warn if any sql file is suspiciously large
println!(" Checking sql/ file sizes...");
@@ -564,33 +658,55 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
let sz = fs::metadata(&path)?.len() as f64 / 1024.0 / 1024.0;
let name = path.file_stem().and_then(|s| s.to_str()).unwrap_or("?");
match name {
"dev_videos" | "master" if sz > 1.0 =>
println!(" ⚠️ {} is {} MB, expected < 1 MB", name, sz as u64),
"dev_chunk" if sz > 2.0 =>
println!(" ⚠️ {} is {} MB, expected < 2 MB for ~2.4K chunks", name, sz as u64),
"dev_identities" if sz > 1.0 =>
println!(" ⚠️ {} is {} MB, expected < 1 MB for ~428 identities", name, sz as u64),
"dev_identity_bindings" if sz > 5.0 =>
println!(" ⚠️ {} is {} MB, expected < 5 MB for ~7.6K bindings", name, sz as u64),
"dev_tkg_nodes" if sz > 10.0 =>
println!(" ⚠️ {} is {} MB, expected < 10 MB for ~6.4K nodes", name, sz as u64),
"dev_tkg_edges" if sz > 20.0 =>
println!(" ⚠️ {} is {} MB, expected < 20 MB for ~21K edges", name, sz as u64),
"dev_face_detections" if sz > 1000.0 =>
println!(" ⚠️ {} is {} MB, expected < 1000 MB for ~70K faces (512D emb)", name, sz as u64),
"dev_chunk_vectors" if sz > 200.0 =>
println!(" ⚠️ {} is {} MB, expected < 200 MB for ~2.4K chunks (768D emb)", name, sz as u64),
"dev_videos" | "master" if sz > 1.0 => {
println!(" ⚠️ {} is {} MB, expected < 1 MB", name, sz as u64)
}
"dev_chunk" if sz > 2.0 => println!(
" ⚠️ {} is {} MB, expected < 2 MB for ~2.4K chunks",
name, sz as u64
),
"dev_identities" if sz > 1.0 => println!(
" ⚠️ {} is {} MB, expected < 1 MB for ~428 identities",
name, sz as u64
),
"dev_identity_bindings" if sz > 5.0 => println!(
" ⚠️ {} is {} MB, expected < 5 MB for ~7.6K bindings",
name, sz as u64
),
"dev_tkg_nodes" if sz > 10.0 => println!(
" ⚠️ {} is {} MB, expected < 10 MB for ~6.4K nodes",
name, sz as u64
),
"dev_tkg_edges" if sz > 20.0 => println!(
" ⚠️ {} is {} MB, expected < 20 MB for ~21K edges",
name, sz as u64
),
"dev_face_detections" if sz > 1000.0 => println!(
" ⚠️ {} is {} MB, expected < 1000 MB for ~70K faces (512D emb)",
name, sz as u64
),
"dev_chunk_vectors" if sz > 200.0 => println!(
" ⚠️ {} is {} MB, expected < 200 MB for ~2.4K chunks (768D emb)",
name, sz as u64
),
_ => {}
}
if sz > 2000.0 {
println!(" ⚠️ {} is {:.0} MB — unusually large, verify query", name, sz);
println!(
" ⚠️ {} is {:.0} MB — unusually large, verify query",
name, sz
);
}
}
}
Ok(())
}
fn cmd_visualize_offline(sqlite_path: &str, output: Option<&str>, identity: Option<i64>) -> Result<()> {
fn cmd_visualize_offline(
sqlite_path: &str,
output: Option<&str>,
identity: Option<i64>,
) -> Result<()> {
let outpath = match output {
Some(p) => p.to_string(),
None => sqlite_path.replace(".sqlite", "_report.html"),
@@ -606,7 +722,10 @@ fn cmd_visualize_offline(sqlite_path: &str, output: Option<&str>, identity: Opti
.output()
.context("Offline report script failed")?;
if !output.status.success() {
anyhow::bail!("Offline report: {}", String::from_utf8_lossy(&output.stderr));
anyhow::bail!(
"Offline report: {}",
String::from_utf8_lossy(&output.stderr)
);
}
println!("{}", String::from_utf8_lossy(&output.stdout));
println!("\n Open: {}", outpath);
@@ -624,7 +743,10 @@ fn cmd_visualize(uuid: &str, typ: &str, output: Option<&str>, identity: Option<i
match typ {
"heatmap" | "density" => generate_face_heatmap(uuid, &outpath, identity)?,
"timeline" => generate_face_timeline(uuid, &outpath, identity)?,
_ => anyhow::bail!("Unknown visualization type: {}. Try: heatmap, density, timeline", typ),
_ => anyhow::bail!(
"Unknown visualization type: {}. Try: heatmap, density, timeline",
typ
),
}
Ok(())
}
@@ -698,16 +820,28 @@ fn cmd_stats() -> Result<()> {
for line in listing.lines() {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.ends_with('/') { continue; }
if trimmed.is_empty() || trimmed.ends_with('/') {
continue;
}
// tar -tvzf format: perms link owner group size date_month date_day time path...
// Fields are space-separated; size is 5th field, path starts at 8th field
let parts: Vec<&str> = trimmed.split_whitespace().collect();
if parts.len() < 8 { continue; }
if parts.len() < 8 {
continue;
}
let fsize = parts[4].parse::<u64>().unwrap_or(0);
let fpath = parts[8..].join(" ");
let fname = Path::new(&fpath).file_name().unwrap_or_default().to_str().unwrap_or("?");
let ext = Path::new(&fpath).extension().unwrap_or_default().to_str().unwrap_or("");
let fname = Path::new(&fpath)
.file_name()
.unwrap_or_default()
.to_str()
.unwrap_or("?");
let ext = Path::new(&fpath)
.extension()
.unwrap_or_default()
.to_str()
.unwrap_or("");
match ext {
"sql" => {
@@ -732,10 +866,26 @@ fn cmd_stats() -> Result<()> {
}
println!(" ─────────────────────────────");
println!(" SQL: {} files, {:.0} MB", sql_count, total_sql as f64 / 1048576.0);
println!(" Video: {} files, {:.0} MB", video_count, total_video as f64 / 1048576.0);
println!(" JSON: {} files, {:.0} MB", json_count, total_json as f64 / 1048576.0);
println!(" Total: {:.0} MB (compressed: {:.0} MB)", (total_sql + total_video + total_json) as f64 / 1048576.0, pkg_size as f64 / 1048576.0);
println!(
" SQL: {} files, {:.0} MB",
sql_count,
total_sql as f64 / 1048576.0
);
println!(
" Video: {} files, {:.0} MB",
video_count,
total_video as f64 / 1048576.0
);
println!(
" JSON: {} files, {:.0} MB",
json_count,
total_json as f64 / 1048576.0
);
println!(
" Total: {:.0} MB (compressed: {:.0} MB)",
(total_sql + total_video + total_json) as f64 / 1048576.0,
pkg_size as f64 / 1048576.0
);
println!();
}
@@ -758,8 +908,17 @@ async fn main() -> Result<()> {
Commands::List => cmd_list(&db).await?,
Commands::Package { uuid } => cmd_package(&db, &uuid).await?,
Commands::Stats => cmd_stats()?,
Commands::Visualize { uuid, typ, output, identity } => cmd_visualize(&uuid, &typ, output.as_deref(), identity)?,
Commands::VisualizeOffline { sqlite_path, output, identity } => cmd_visualize_offline(&sqlite_path, output.as_deref(), identity)?,
Commands::Visualize {
uuid,
typ,
output,
identity,
} => cmd_visualize(&uuid, &typ, output.as_deref(), identity)?,
Commands::VisualizeOffline {
sqlite_path,
output,
identity,
} => cmd_visualize_offline(&sqlite_path, output.as_deref(), identity)?,
}
Ok(())
}

View File

@@ -16,7 +16,10 @@ const LOG_DIR: &str = "/Users/accusys/service_logs";
const LAUNCH_DIR: &str = "/Users/accusys/Library/LaunchAgents";
#[derive(Parser)]
#[command(name = "service", about = "Service Lifecycle Manager — source → build → install → config → launch → env")]
#[command(
name = "service",
about = "Service Lifecycle Manager — source → build → install → config → launch → env"
)]
struct Cli {
#[command(subcommand)]
command: Commands,
@@ -111,22 +114,54 @@ fn cmd_source_list() -> Result<()> {
("pyenv", "pyenv/", "git repo"),
("cmake", "cmake-4.2.0-macos-universal.tar.gz", "binary"),
("llama.cpp", "llama.cpp/", "git repo"),
("libreoffice (src)", "libreoffice-26.2.3.2.tar.xz", "source tarball"),
("libreoffice (dmg)", "LibreOffice_26.2.3_MacOS_aarch64.dmg", "binary (TDF)"),
("mermaid-cli", "mermaid-js-mermaid-cli-11.14.0.tgz", "npm package"),
(
"libreoffice (src)",
"libreoffice-26.2.3.2.tar.xz",
"source tarball",
),
(
"libreoffice (dmg)",
"LibreOffice_26.2.3_MacOS_aarch64.dmg",
"binary (TDF)",
),
(
"mermaid-cli",
"mermaid-js-mermaid-cli-11.14.0.tgz",
"npm package",
),
("librsvg", "librsvg/", "Rust source"),
("GroundingDINO", "GroundingDINO/", "git repo (IDEA-Research)"),
(
"GroundingDINO",
"GroundingDINO/",
"git repo (IDEA-Research)",
),
("PaliGemma", "paligemma/", "HuggingFace reference"),
("Odoo 19 CE", "odoo/", "git repo (LGPL-3.0)"),
("ERPNext v15", "erpnext/", "git repo (GPL-3.0)"),
("Frappe Framework", "frappe/", "git repo (MIT)"),
("Gitea v1.25", "gitea/", "git repo (MIT, Go)"),
("Go v1.26", "go/", "git repo (BSD)"),
("Rust/Cargo", "rustc-1.92.0-src.tar.xz", "source tarball (Apache 2.0 / MIT)"),
("rustup", "rustup-1.28.1.tar.gz", "source tarball (Apache 2.0)"),
("Swift v6.3", "swift-6.3.1-RELEASE.tar.gz", "source tarball (Apache 2.0)"),
(
"Rust/Cargo",
"rustc-1.92.0-src.tar.xz",
"source tarball (Apache 2.0 / MIT)",
),
(
"rustup",
"rustup-1.28.1.tar.gz",
"source tarball (Apache 2.0)",
),
(
"Swift v6.3",
"swift-6.3.1-RELEASE.tar.gz",
"source tarball (Apache 2.0)",
),
("yt-dlp", "yt-dlp/", "git repo (Unlicense)"),
("SQLite", "sqlite-amalgamation-3490100.zip", "amalgamation (Public Domain)"),
(
"SQLite",
"sqlite-amalgamation-3490100.zip",
"amalgamation (Public Domain)",
),
("sqlite-vec", "sqlite-vec/", "git repo (MIT)"),
];
@@ -164,7 +199,11 @@ fn cmd_source_verify() -> Result<()> {
("cmake", "cmake-4.2.0-macos-universal.tar.gz", false),
("llama.cpp", "llama.cpp/", true),
("libreoffice (src)", "libreoffice-26.2.3.2.tar.xz", false),
("libreoffice (dmg)", "LibreOffice_26.2.3_MacOS_aarch64.dmg", false),
(
"libreoffice (dmg)",
"LibreOffice_26.2.3_MacOS_aarch64.dmg",
false,
),
("mermaid-cli", "mermaid-js-mermaid-cli-11.14.0.tgz", false),
("librsvg", "librsvg/", true),
("GroundingDINO", "GroundingDINO/", true),
@@ -186,7 +225,11 @@ fn cmd_source_verify() -> Result<()> {
let mut missing = 0;
for (name, path, is_dir) in &checks {
let full = src_dir.join(path);
let exists = if *is_dir { full.is_dir() } else { full.is_file() };
let exists = if *is_dir {
full.is_dir()
} else {
full.is_file()
};
if exists {
println!("{}", name);
ok += 1;
@@ -202,7 +245,10 @@ fn cmd_source_verify() -> Result<()> {
// ---- Build ----
fn cmd_build(service: &str) -> Result<()> {
let install_sh = Path::new(SERVICE_SRC).parent().unwrap().join("install_services.sh");
let install_sh = Path::new(SERVICE_SRC)
.parent()
.unwrap()
.join("install_services.sh");
if service == "all" {
// Run the full install script
@@ -224,8 +270,14 @@ fn cmd_build(service: &str) -> Result<()> {
"ffmpeg" => {
println!("Building ffmpeg (requires x264 + freetype)...");
// Simplified: run the install script which handles incremental builds
let status = Command::new("bash").arg(&install_sh).env("PREFIX", PREFIX).env("SRC_DIR", SERVICE_SRC).status()?;
if !status.success() { anyhow::bail!("Build failed"); }
let status = Command::new("bash")
.arg(&install_sh)
.env("PREFIX", PREFIX)
.env("SRC_DIR", SERVICE_SRC)
.status()?;
if !status.success() {
anyhow::bail!("Build failed");
}
}
"redis" => {
let src = format!("{}/redis-7.4.3.tar.gz", SERVICE_SRC);
@@ -236,37 +288,67 @@ fn cmd_build(service: &str) -> Result<()> {
run_build("postgresql", &src, &format!("cd /tmp && tar xzf {} && cd postgresql-18.3 && ./configure --prefix={}/pgsql/18.3 && make -j$(sysctl -n hw.ncpu) && make install", src, PREFIX))?;
}
"llama" => {
println!("Building llama.cpp from {}...", format!("{}/llama.cpp", SERVICE_SRC));
println!(
"Building llama.cpp from {}...",
format!("{}/llama.cpp", SERVICE_SRC)
);
let status = Command::new("cmake")
.args(["-B", "build", "-DCMAKE_INSTALL_PREFIX=/tmp/llama_install"])
.current_dir(format!("{}/llama.cpp", SERVICE_SRC))
.status()?;
if !status.success() { anyhow::bail!("cmake failed"); }
let status = Command::new("cmake").args(["--build", "build", "--config", "Release", "-j"]).current_dir(format!("{}/llama.cpp", SERVICE_SRC)).status()?;
if !status.success() { anyhow::bail!("build failed"); }
if !status.success() {
anyhow::bail!("cmake failed");
}
let status = Command::new("cmake")
.args(["--build", "build", "--config", "Release", "-j"])
.current_dir(format!("{}/llama.cpp", SERVICE_SRC))
.status()?;
if !status.success() {
anyhow::bail!("build failed");
}
}
"libreoffice" => {
let dmg = format!("{}/LibreOffice_26.2.3_MacOS_aarch64.dmg", SERVICE_SRC);
let mount = "/tmp/lo_mount";
println!("Extracting LibreOffice from DMG...");
// Mount
let status = Command::new("hdiutil").args(["attach", &dmg, "-nobrowse", "-quiet", "-mountpoint", mount]).status()?;
if !status.success() { anyhow::bail!("DMG mount failed"); }
let status = Command::new("hdiutil")
.args(["attach", &dmg, "-nobrowse", "-quiet", "-mountpoint", mount])
.status()?;
if !status.success() {
anyhow::bail!("DMG mount failed");
}
// Copy app
let lo_dir = format!("{}/libreoffice", PREFIX);
let _ = std::fs::remove_dir_all(format!("{}/LibreOffice.app", lo_dir));
std::fs::create_dir_all(&lo_dir)?;
let status = Command::new("cp").args(["-R", &format!("{}/LibreOffice.app", mount), &format!("{}/LibreOffice.app", lo_dir)]).status()?;
if !status.success() { anyhow::bail!("Copy failed"); }
let status = Command::new("cp")
.args([
"-R",
&format!("{}/LibreOffice.app", mount),
&format!("{}/LibreOffice.app", lo_dir),
])
.status()?;
if !status.success() {
anyhow::bail!("Copy failed");
}
// Create symlink
std::fs::create_dir_all(format!("{}/bin", lo_dir))?;
let _ = std::fs::remove_file(format!("{}/bin/soffice", lo_dir));
std::os::unix::fs::symlink("../LibreOffice.app/Contents/MacOS/soffice", format!("{}/bin/soffice", lo_dir))?;
std::os::unix::fs::symlink(
"../LibreOffice.app/Contents/MacOS/soffice",
format!("{}/bin/soffice", lo_dir),
)?;
// Unmount
let _ = Command::new("hdiutil").args(["detach", mount, "-quiet"]).status();
let _ = Command::new("hdiutil")
.args(["detach", mount, "-quiet"])
.status();
println!(" libreoffice installed to {}/bin/soffice", lo_dir);
}
_ => anyhow::bail!("Unknown service: {}. Try: all, ffmpeg, redis, postgres, llama, libreoffice, python", service),
_ => anyhow::bail!(
"Unknown service: {}. Try: all, ffmpeg, redis, postgres, llama, libreoffice, python",
service
),
}
Ok(())
}
@@ -274,7 +356,9 @@ fn cmd_build(service: &str) -> Result<()> {
fn run_build(name: &str, src: &str, cmd: &str) -> Result<()> {
println!("Building {} from {}...", name, src);
let status = Command::new("bash").arg("-c").arg(cmd).status()?;
if !status.success() { anyhow::bail!("{} build failed", name); }
if !status.success() {
anyhow::bail!("{} build failed", name);
}
println!(" {} build complete", name);
Ok(())
}
@@ -292,7 +376,10 @@ fn cmd_install(service: &str) -> Result<()> {
let rsvg_src = format!("{}/librsvg/bin/rsvg-convert", PREFIX);
let gitea_src = format!("{}/gitea/bin/gitea", PREFIX);
let go_src = format!("{}/go/bin/go", PREFIX);
let rustc_src = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc", PREFIX);
let rustc_src = format!(
"{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc",
PREFIX
);
let swift_src = "/usr/bin/swift".to_string();
let ytdlp_src = "/opt/homebrew/bin/yt-dlp".to_string();
@@ -313,7 +400,9 @@ fn cmd_install(service: &str) -> Result<()> {
];
for (name, src) in &installs {
if service != "all" && service != *name { continue; }
if service != "all" && service != *name {
continue;
}
if Path::new(src).exists() {
println!("{} installed: {}", name, src);
} else {
@@ -370,12 +459,18 @@ fn cmd_config(service: &str) -> Result<()> {
println!("MOMENTRY_LLM_SUMMARY_URL=http://localhost:8082/v1/chat/completions");
println!("MOMENTRY_OUTPUT_DIR={}/momentry/output_dev", PREFIX);
println!("MOMENTRY_SCRIPTS_DIR={}/momentry_core_0.1/scripts", PREFIX);
println!("MOMENTRY_PYTHON_PATH={}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX);
println!(
"MOMENTRY_PYTHON_PATH={}/.pyenv/versions/3.11.15/bin/python3.11",
PREFIX
);
}
if service == "all" || service == "embedding" {
println!("\n--- Embedding Server config ---");
println!("# Start: {} embeddinggemma_server.py --port 11436", format!("{}/momentry_core_0.1/scripts", PREFIX));
println!(
"# Start: {} embeddinggemma_server.py --port 11436",
format!("{}/momentry_core_0.1/scripts", PREFIX)
);
println!("MODEL=google/embeddinggemma-300m");
println!("PORT=11436");
println!("DEVICE=mps");
@@ -393,25 +488,58 @@ fn cmd_launch_generate() -> Result<()> {
let pg_args = format!("-D {}/pgsql/18.3/data", PREFIX);
let redis_bin = format!("{}/redis/bin/redis-server", PREFIX);
let redis_args = format!("{}/redis/redis.conf", PREFIX);
let qdrant_bin = format!("{}/momentry_core_0.1/services/qdrant/target/release/qdrant", PREFIX);
let qdrant_bin = format!(
"{}/momentry_core_0.1/services/qdrant/target/release/qdrant",
PREFIX
);
let embed_bin = format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX);
let embed_args = format!("{}/momentry_core_0.1/scripts/embeddinggemma_server.py --port 11436", PREFIX);
let embed_args = format!(
"{}/momentry_core_0.1/scripts/embeddinggemma_server.py --port 11436",
PREFIX
);
let llama_bin = format!("{}/llama/bin/llama-server", PREFIX);
let llama_args = format!("-m {}/models/google_gemma-4-26B-A4B-it-Q5_K_M.gguf --port 8082 -ngl 99 -c 16384", PREFIX);
let play_bin = format!("{}/momentry_core_0.1/target/debug/momentry_playground", PREFIX);
let llama_args = format!(
"-m {}/models/google_gemma-4-26B-A4B-it-Q5_K_M.gguf --port 8082 -ngl 99 -c 16384",
PREFIX
);
let play_bin = format!(
"{}/momentry_core_0.1/target/debug/momentry_playground",
PREFIX
);
let services: Vec<(&str, &str, &str, &str)> = vec![
("com.momentry.postgres", &pg_bin, &pg_args, "PostgreSQL"),
("com.momentry.redis", &redis_bin, &redis_args, "Redis"),
("com.momentry.qdrant", &qdrant_bin, "", "Qdrant"),
("com.momentry.embedding", &embed_bin, &embed_args, "EmbeddingGemma"),
("com.momentry.llama", &llama_bin, &llama_args, "LLM (llama.cpp)"),
("com.momentry.playground", &play_bin, "server --port 3003", "Momentry Playground"),
("com.momentry.worker", &play_bin, "worker --max-concurrent 2 --poll-interval 5", "Momentry Worker"),
(
"com.momentry.embedding",
&embed_bin,
&embed_args,
"EmbeddingGemma",
),
(
"com.momentry.llama",
&llama_bin,
&llama_args,
"LLM (llama.cpp)",
),
(
"com.momentry.playground",
&play_bin,
"server --port 3003",
"Momentry Playground",
),
(
"com.momentry.worker",
&play_bin,
"worker --max-concurrent 2 --poll-interval 5",
"Momentry Worker",
),
];
for (label, bin, args, _desc) in &services {
let plist = format!(r#"<?xml version="1.0" encoding="UTF-8"?>
let plist = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
@@ -451,7 +579,11 @@ fn cmd_launch_generate() -> Result<()> {
fs::write(&plist_path, plist)?;
println!(" 📝 {}{:?}", label, plist_path.file_name().unwrap());
}
println!("\n Generated {} plist files in {}", services.len(), LAUNCH_DIR);
println!(
"\n Generated {} plist files in {}",
services.len(),
LAUNCH_DIR
);
Ok(())
}
@@ -461,7 +593,9 @@ fn cmd_launch_load() -> Result<()> {
let path = entry.path();
if path.extension().map_or(false, |e| e == "plist") {
let name = path.file_stem().unwrap().to_str().unwrap_or("?");
let status = Command::new("launchctl").args(["load", "-w", path.to_str().unwrap()]).status();
let status = Command::new("launchctl")
.args(["load", "-w", path.to_str().unwrap()])
.status();
match status {
Ok(s) if s.success() => println!(" ✅ loaded: {}", name),
Ok(_) => println!(" ⚠️ load failed: {}", name),
@@ -478,7 +612,9 @@ fn cmd_launch_unload() -> Result<()> {
let path = entry.path();
if path.extension().map_or(false, |e| e == "plist") {
let name = path.file_stem().unwrap().to_str().unwrap_or("?");
let status = Command::new("launchctl").args(["unload", path.to_str().unwrap()]).status();
let status = Command::new("launchctl")
.args(["unload", path.to_str().unwrap()])
.status();
match status {
Ok(s) if s.success() => println!(" ✅ unloaded: {}", name),
Ok(_) => println!(" ⚠️ unload failed: {}", name),
@@ -504,7 +640,11 @@ fn cmd_launch_status() -> Result<()> {
Ok(o) if o.status.success() => {
let stdout = String::from_utf8_lossy(&o.stdout);
if stdout.contains("PID") || stdout.lines().count() > 1 {
let pid = stdout.lines().nth(1).and_then(|l| l.split_whitespace().next()).unwrap_or("-");
let pid = stdout
.lines()
.nth(1)
.and_then(|l| l.split_whitespace().next())
.unwrap_or("-");
println!(" 🟢 {} (PID: {})", label, pid);
} else {
println!("{} (not running)", label);
@@ -519,7 +659,8 @@ fn cmd_launch_status() -> Result<()> {
// ---- Env ----
fn cmd_env(output: &Option<String>) -> Result<()> {
let env_content = format!(r#"# Momentry Core — Environment Configuration
let env_content = format!(
r#"# Momentry Core — Environment Configuration
# Generated: {}
# Service: {} env
@@ -601,8 +742,14 @@ fn cmd_test() -> Result<()> {
let rsvg_bin = format!("{}/librsvg/bin/rsvg-convert", PREFIX);
let gitea_bin = format!("{}/gitea/bin/gitea", PREFIX);
let go_bin = format!("{}/go/bin/go", PREFIX);
let rustc_bin = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc", PREFIX);
let cargo_bin = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/cargo", PREFIX);
let rustc_bin = format!(
"{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc",
PREFIX
);
let cargo_bin = format!(
"{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/cargo",
PREFIX
);
let swift_bin = "/usr/bin/swift".to_string();
let ytdlp_bin = "/opt/homebrew/bin/yt-dlp".to_string();
@@ -641,7 +788,11 @@ fn cmd_test() -> Result<()> {
let output = Command::new(bin).args(args).output();
match output {
Ok(o) if o.status.success() => {
let ver = String::from_utf8_lossy(&o.stdout).lines().next().unwrap_or("?").to_string();
let ver = String::from_utf8_lossy(&o.stdout)
.lines()
.next()
.unwrap_or("?")
.to_string();
println!("{}", ver.chars().take(70).collect::<String>());
pass += 1;
}
@@ -666,14 +817,87 @@ fn cmd_test() -> Result<()> {
// Functional tests
println!("\n--- Functional Tests ---");
// Create test docx for libreoffice test
let _ = std::fs::write("/tmp/svc_test_func.docx", "Service test document for LibreOffice conversion");
let _ = std::fs::write(
"/tmp/svc_test_func.docx",
"Service test document for LibreOffice conversion",
);
let func_tests = [
("ffprobe probe", "ffprobe", vec!["-v", "error", "-show_entries", "format=duration", "-of", "csv=p=0", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4"]),
("ffmpeg audio extract", "ffmpeg", vec!["-y", "-v", "quiet", "-i", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", "-t", "2", "-ar", "16000", "-ac", "1", "/tmp/svc_test_audio.wav"]),
("ffmpeg frame extract", "ffmpeg", vec!["-y", "-v", "quiet", "-i", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", "-ss", "100", "-vframes", "1", "/tmp/svc_test_frame.jpg"]),
("libreoffice doc→txt", "libreoffice", vec!["--headless", "--convert-to", "txt", "/tmp/svc_test_func.docx", "--outdir", "/tmp/"]),
("rsvg-convert svg→png", "rsvg-convert", vec!["-o", "/tmp/svc_test_rsvg.png", "/tmp/test_rsvg.svg"]),
("mmdc mermaid→png", "mermaid-cli", vec!["-i", "/tmp/test_mermaid.mmd", "-o", "/tmp/svc_test_mmd.png", "-w", "200"]),
(
"ffprobe probe",
"ffprobe",
vec![
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"csv=p=0",
"/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4",
],
),
(
"ffmpeg audio extract",
"ffmpeg",
vec![
"-y",
"-v",
"quiet",
"-i",
"/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4",
"-t",
"2",
"-ar",
"16000",
"-ac",
"1",
"/tmp/svc_test_audio.wav",
],
),
(
"ffmpeg frame extract",
"ffmpeg",
vec![
"-y",
"-v",
"quiet",
"-i",
"/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4",
"-ss",
"100",
"-vframes",
"1",
"/tmp/svc_test_frame.jpg",
],
),
(
"libreoffice doc→txt",
"libreoffice",
vec![
"--headless",
"--convert-to",
"txt",
"/tmp/svc_test_func.docx",
"--outdir",
"/tmp/",
],
),
(
"rsvg-convert svg→png",
"rsvg-convert",
vec!["-o", "/tmp/svc_test_rsvg.png", "/tmp/test_rsvg.svg"],
),
(
"mmdc mermaid→png",
"mermaid-cli",
vec![
"-i",
"/tmp/test_mermaid.mmd",
"-o",
"/tmp/svc_test_mmd.png",
"-w",
"200",
],
),
];
for (desc, bin_name, args) in &func_tests {
@@ -689,8 +913,14 @@ fn cmd_test() -> Result<()> {
};
let output = Command::new(bin).args(args).output();
match output {
Ok(o) if o.status.success() => { println!(""); pass += 1; }
_ => { println!(""); fail += 1; }
Ok(o) if o.status.success() => {
println!("");
pass += 1;
}
_ => {
println!("");
fail += 1;
}
}
}
@@ -706,7 +936,10 @@ fn cmd_test() -> Result<()> {
fn cmd_report() -> Result<()> {
println!("=== Momentry Service Report ===");
println!("Generated: {}", chrono::Local::now().format("%Y-%m-%d %H:%M:%S"));
println!(
"Generated: {}",
chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
);
println!();
// 1. Source status
@@ -730,13 +963,25 @@ fn cmd_report() -> Result<()> {
println!("\n## 2. Binaries");
let binaries = [
("cmake", &format!("{}/bin/cmake", PREFIX)),
("python3.11", &format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX)),
(
"python3.11",
&format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX),
),
("ffmpeg", &format!("{}/ffmpeg_build/bin/ffmpeg", PREFIX)),
("ffprobe", &format!("{}/ffmpeg_build/bin/ffprobe", PREFIX)),
("redis-server", &format!("{}/redis/bin/redis-server", PREFIX)),
(
"redis-server",
&format!("{}/redis/bin/redis-server", PREFIX),
),
("postgres", &format!("{}/pgsql/18.3/bin/postgres", PREFIX)),
("llama-server", &format!("{}/llama/bin/llama-server", PREFIX)),
("libreoffice", &format!("{}/libreoffice/bin/soffice", PREFIX)),
(
"llama-server",
&format!("{}/llama/bin/llama-server", PREFIX),
),
(
"libreoffice",
&format!("{}/libreoffice/bin/soffice", PREFIX),
),
];
for (name, path) in &binaries {
let status = if Path::new(path).exists() {
@@ -772,9 +1017,18 @@ fn cmd_report() -> Result<()> {
// 4. Ports
println!("\n## 4. Port Status");
let ports = [(3003, "Playground"), (5432, "PostgreSQL"), (6379, "Redis"), (6333, "Qdrant"), (8082, "LLM"), (11436, "Embedding")];
let ports = [
(3003, "Playground"),
(5432, "PostgreSQL"),
(6379, "Redis"),
(6333, "Qdrant"),
(8082, "LLM"),
(11436, "Embedding"),
];
for (port, name) in &ports {
let output = Command::new("lsof").args(["-i", &format!(":{}", port)]).output();
let output = Command::new("lsof")
.args(["-i", &format!(":{}", port)])
.output();
match output {
Ok(o) if o.status.success() => println!(" 🟢 :{} ({})", port, name),
_ => println!(" ⚪ :{} ({})", port, name),
@@ -797,14 +1051,21 @@ fn cmd_report() -> Result<()> {
}
fn format_bytes(bytes: u64) -> String {
if bytes > 1024 * 1024 * 1024 { format!("{:.1}GB", bytes as f64 / 1_073_741_824.0) }
else if bytes > 1024 * 1024 { format!("{:.0}MB", bytes as f64 / 1_048_576.0) }
else if bytes > 1024 { format!("{:.0}KB", bytes as f64 / 1024.0) }
else { format!("{}B", bytes) }
if bytes > 1024 * 1024 * 1024 {
format!("{:.1}GB", bytes as f64 / 1_073_741_824.0)
} else if bytes > 1024 * 1024 {
format!("{:.0}MB", bytes as f64 / 1_048_576.0)
} else if bytes > 1024 {
format!("{:.0}KB", bytes as f64 / 1024.0)
} else {
format!("{}B", bytes)
}
}
fn format_dir_size(path: &Path) -> String {
let output = Command::new("du").args(["-sh", path.to_str().unwrap()]).output();
let output = Command::new("du")
.args(["-sh", path.to_str().unwrap()])
.output();
match output {
Ok(o) if o.status.success() => {
let s = String::from_utf8_lossy(&o.stdout);
@@ -824,7 +1085,10 @@ async fn main() -> Result<()> {
SourceAction::List => cmd_source_list()?,
SourceAction::Verify => cmd_source_verify()?,
SourceAction::Download { name } => {
println!("Downloading: {} (use install_services.sh for full download)", name);
println!(
"Downloading: {} (use install_services.sh for full download)",
name
);
println!("Source URLs:");
println!(" ffmpeg: https://ffmpeg.org/releases/ffmpeg-7.1.1.tar.xz");
println!(" redis: https://download.redis.io/releases/redis-7.4.3.tar.gz");

View File

@@ -75,15 +75,13 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
// Query chunks table for Rule 1 sentence chunks
let chunk_table = schema::table_name("chunk");
let rule1_rows: Vec<(String,)> = sqlx::query_as(
&format!(
let rule1_rows: Vec<(String,)> = sqlx::query_as(&format!(
"SELECT chunk_id FROM {} \
WHERE file_uuid = $1 AND chunk_type = 'sentence' \
AND start_frame >= $2 \
AND end_frame <= $3",
chunk_table
),
)
))
.bind(file_uuid)
.bind(scene.start_frame as i64)
.bind(scene.end_frame as i64)
@@ -101,16 +99,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
// Let's re-query text directly.
}
let texts: Vec<String> = sqlx::query_scalar(
&format!(
let texts: Vec<String> = sqlx::query_scalar(&format!(
"SELECT text_content FROM {} \
WHERE file_uuid = $1 AND chunk_type = 'sentence' \
AND start_frame >= $2 \
AND end_frame <= $3 \
ORDER BY start_frame ASC",
chunk_table
),
)
))
.bind(file_uuid)
.bind(scene.start_frame as i64)
.bind(scene.end_frame as i64)
@@ -154,16 +150,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
"scene_number": scene.scene_number
});
sqlx::query(
&format!(
sqlx::query(&format!(
"INSERT INTO {} (file_uuid, chunk_id, chunk_type, \
start_time, end_time, fps, start_frame, end_frame, \
content, text_content, summary_text, metadata, child_chunk_ids) \
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) \
ON CONFLICT (file_uuid, chunk_id) DO NOTHING",
chunk_table
),
)
))
.bind(file_uuid)
.bind(&chunk_id)
.bind(scene.scene_number as i32)

View File

@@ -20,8 +20,7 @@ pub fn set_cache_enabled(enabled: bool) {
}
// Switch 1: watcher detects new file → auto-register
pub static RUNTIME_WATCHER_AUTO_REGISTER: Lazy<RwLock<bool>> =
Lazy::new(|| RwLock::new(false));
pub static RUNTIME_WATCHER_AUTO_REGISTER: Lazy<RwLock<bool>> = Lazy::new(|| RwLock::new(false));
pub fn get_watcher_auto_register() -> bool {
*RUNTIME_WATCHER_AUTO_REGISTER.read().unwrap()
@@ -33,8 +32,7 @@ pub fn set_watcher_auto_register(enabled: bool) {
}
// Switch 2: register → auto-trigger processing pipeline
pub static RUNTIME_AUTO_PIPELINE_ENABLED: Lazy<RwLock<bool>> =
Lazy::new(|| RwLock::new(false));
pub static RUNTIME_AUTO_PIPELINE_ENABLED: Lazy<RwLock<bool>> = Lazy::new(|| RwLock::new(false));
pub fn get_auto_pipeline_enabled() -> bool {
*RUNTIME_AUTO_PIPELINE_ENABLED.read().unwrap()
@@ -107,6 +105,30 @@ pub static REDIS_KEY_PREFIX: Lazy<String> =
pub static DATABASE_SCHEMA: Lazy<String> =
Lazy::new(|| env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "public".to_string()));
pub static SYSTEM_TIMEZONE: Lazy<String> = Lazy::new(|| {
if let Ok(tz) = env::var("MOMENTRY_TIMEZONE") {
if !tz.is_empty() {
return tz;
}
}
if let Ok(tz) = env::var("TZ") {
if !tz.is_empty() {
return tz;
}
}
// macOS: /etc/localtime → /var/db/timezone/zoneinfo/Asia/Taipei
// Linux: /etc/localtime → /usr/share/zoneinfo/Asia/Taipei
if let Ok(path) = std::fs::read_link("/etc/localtime") {
let s = path.to_string_lossy();
for prefix in &["/usr/share/zoneinfo/", "/var/db/timezone/zoneinfo/"] {
if let Some(tz) = s.strip_prefix(prefix) {
return tz.to_string();
}
}
}
"Asia/Taipei".to_string()
});
pub static MONGODB_DATABASE: Lazy<String> =
Lazy::new(|| env::var("MONGODB_DATABASE").unwrap_or_else(|_| "momentry".to_string()));

File diff suppressed because it is too large Load Diff

View File

@@ -15,9 +15,11 @@ pub struct QdrantDb {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VectorPayload {
pub uuid: String,
pub file_uuid: String,
pub chunk_id: String,
pub chunk_type: String,
pub start_frame: i64,
pub end_frame: i64,
pub start_time: f64,
pub end_time: f64,
pub text: Option<String>,
@@ -189,6 +191,49 @@ impl QdrantDb {
Ok(())
}
pub async fn upsert_vectors_batch(
&self,
collection: &str,
points: &[(u64, &[f32], Option<serde_json::Value>)],
) -> Result<()> {
let url = format!(
"{}/collections/{}/points?wait=true",
self.base_url, collection
);
let qdrant_points: Vec<serde_json::Value> = points
.iter()
.map(|(id, vec, payload)| {
let mut p = serde_json::json!({
"id": id,
"vector": vec,
});
if let Some(pl) = payload {
p["payload"] = pl.clone();
}
p
})
.collect();
let body = serde_json::json!({ "points": qdrant_points });
let response = self
.client
.put(&url)
.header("api-key", &self.api_key)
.json(&body)
.send()
.await
.context("Failed to send batch upsert request to Qdrant")?;
let status = response.status();
if !status.is_success() {
let response_text = response.text().await.unwrap_or_default();
anyhow::bail!("Qdrant batch upsert failed: {} - {}", status, response_text);
}
Ok(())
}
pub async fn upsert_vector(
&self,
chunk_id: &str,
@@ -207,12 +252,23 @@ impl QdrantDb {
);
let mut payload_map = HashMap::new();
payload_map.insert("uuid".to_string(), serde_json::json!(payload.uuid));
payload_map.insert(
"file_uuid".to_string(),
serde_json::json!(payload.file_uuid),
);
payload_map.insert("chunk_id".to_string(), serde_json::json!(payload.chunk_id));
payload_map.insert(
"chunk_type".to_string(),
serde_json::json!(payload.chunk_type),
);
payload_map.insert(
"start_frame".to_string(),
serde_json::json!(payload.start_frame),
);
payload_map.insert(
"end_frame".to_string(),
serde_json::json!(payload.end_frame),
);
payload_map.insert(
"start_time".to_string(),
serde_json::json!(payload.start_time),
@@ -224,7 +280,7 @@ impl QdrantDb {
// Generate consistent point ID from uuid and chunk_id
// Qdrant requires integer or UUID point IDs. We'll use a simple integer hash.
let point_id_str = format!("{}_{}", payload.uuid, chunk_id);
let point_id_str = format!("{}_{}", payload.file_uuid, chunk_id);
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
@@ -240,9 +296,9 @@ impl QdrantDb {
});
tracing::debug!(
"Upserting vector to Qdrant: chunk_id={}, uuid={}, vector_len={}",
"Upserting vector to Qdrant: chunk_id={}, file_uuid={}, vector_len={}",
chunk_id,
payload.uuid,
payload.file_uuid,
vector.len()
);
@@ -337,7 +393,7 @@ impl QdrantDb {
.map(|r| {
let uuid = r
.payload
.get("uuid")
.get("file_uuid")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
@@ -409,7 +465,7 @@ impl QdrantDb {
.map(|r| {
let uuid = r
.payload
.get("uuid")
.get("file_uuid")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
@@ -471,7 +527,7 @@ impl QdrantDb {
"filter": {
"must": [
{
"key": "uuid",
"key": "file_uuid",
"match": {
"value": uuid
}
@@ -532,7 +588,7 @@ impl QdrantDb {
.map(|r| {
let uuid = r
.payload
.get("uuid")
.get("file_uuid")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
@@ -553,6 +609,89 @@ impl QdrantDb {
Ok(search_results)
}
pub async fn search_face_collection(
&self,
collection: &str,
query_vector: &[f32],
limit: usize,
exclude_payload_key: &str,
exclude_payload_value: &str,
include_file_uuid: Option<&str>,
) -> Result<Vec<(f64, HashMap<String, serde_json::Value>)>> {
let url = format!("{}/collections/{}/points/search", self.base_url, collection);
let mut filter = serde_json::json!({
"must_not": [
{
"key": exclude_payload_key,
"match": { "value": exclude_payload_value }
}
]
});
if let Some(file_uuid) = include_file_uuid {
filter["must"] = serde_json::json!([
{
"key": "file_uuid",
"match": { "value": file_uuid }
}
]);
}
let body = serde_json::json!({
"vector": query_vector,
"limit": limit,
"with_payload": true,
"filter": filter,
});
let response = self
.client
.post(&url)
.header("api-key", &self.api_key)
.header("Content-Type", "application/json")
.json(&body)
.send()
.await
.context("Failed to search Qdrant face collection")?;
let status = response.status();
let response_text = response
.text()
.await
.unwrap_or_else(|_| "Failed to read response".to_string());
if !status.is_success() {
return Err(anyhow::anyhow!(
"Qdrant search_face_collection failed: {} - {}",
status,
response_text
));
}
#[derive(Deserialize)]
struct QdrantSearchResult {
result: Vec<QdrantPoint>,
}
#[derive(Deserialize)]
struct QdrantPoint {
score: f64,
payload: HashMap<String, serde_json::Value>,
}
match serde_json::from_str::<QdrantSearchResult>(&response_text) {
Ok(parsed) => {
let results: Vec<(f64, HashMap<String, serde_json::Value>)> = parsed
.result
.into_iter()
.map(|r| (r.score, r.payload))
.collect();
Ok(results)
}
Err(e) => Err(anyhow::anyhow!("Failed to parse Qdrant response: {}", e)),
}
}
pub async fn delete_by_uuid(&self, uuid: &str) -> Result<()> {
let url = format!(
"{}/collections/{}/points/delete",
@@ -563,7 +702,7 @@ impl QdrantDb {
"filter": {
"must": [
{
"key": "uuid",
"key": "file_uuid",
"match": {
"value": uuid
}
@@ -711,9 +850,11 @@ impl Database for QdrantDb {
impl VectorStore for QdrantDb {
async fn store_vector(&self, chunk_id: &str, vector: &[f32]) -> Result<()> {
let payload = VectorPayload {
uuid: String::new(),
file_uuid: String::new(),
chunk_id: chunk_id.to_string(),
chunk_type: String::new(),
start_frame: 0,
end_frame: 0,
start_time: 0.0,
end_time: 0.0,
text: None,
@@ -737,7 +878,9 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
let qdrant: QdrantDb = QdrantDb::new();
let query = format!(
"SELECT id, trace_id, frame_number, embedding FROM {} WHERE file_uuid = $1 AND embedding IS NOT NULL",
"SELECT id, trace_id, frame_number, embedding FROM {} \
WHERE file_uuid = $1 AND embedding IS NOT NULL \
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
table
);
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?;
@@ -767,3 +910,103 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
);
Ok(())
}
pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
use crate::core::config::DATABASE_URL;
use sqlx::Row;
let pool = sqlx::PgPool::connect(&DATABASE_URL).await?;
let table = crate::core::db::schema::table_name("face_detections");
let qdrant = QdrantDb::new();
let collection = format!(
"{}_traces",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
);
qdrant.ensure_collection(&collection, 512).await?;
// Read all face_detections with embeddings, grouped by trace_id in Rust
let rows = sqlx::query(&format!(
"SELECT trace_id, embedding FROM {} \
WHERE file_uuid = $1 AND embedding IS NOT NULL AND trace_id IS NOT NULL \
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
table
))
.bind(file_uuid)
.fetch_all(&pool)
.await?;
let mut trace_faces: std::collections::HashMap<i32, Vec<Vec<f32>>> =
std::collections::HashMap::new();
let mut trace_stats: std::collections::HashMap<i32, (i64, i64, i64)> =
std::collections::HashMap::new(); // (count, min_frame, max_frame)
for row in &rows {
let tid: Option<i32> = row.get(0);
let emb: Option<Vec<f32>> = row.get(1);
if let (Some(tid), Some(emb)) = (tid, emb) {
trace_faces.entry(tid).or_default().push(emb);
let entry = trace_stats.entry(tid).or_insert((0, i64::MAX, i64::MIN));
entry.0 += 1;
}
}
// Compute average embedding per trace
struct AvgTrace {
tid: i32,
avg_emb: Vec<f32>,
frame_count: i64,
}
let mut trace_avgs: Vec<AvgTrace> = Vec::new();
for (&tid, faces) in &trace_faces {
let dim = faces[0].len();
let mut avg = vec![0.0f32; dim];
for face in faces {
for (i, &v) in face.iter().enumerate() {
avg[i] += v;
}
}
let n = faces.len() as f32;
for v in &mut avg {
*v /= n;
}
let stats = trace_stats.get(&tid).unwrap_or(&(0, 0, 0));
trace_avgs.push(AvgTrace {
tid,
avg_emb: avg,
frame_count: stats.0,
});
}
// Push to Qdrant in batches
for chunk in trace_avgs.chunks(500) {
let batch: Vec<(u64, &[f32], Option<serde_json::Value>)> = chunk
.iter()
.map(|t| {
(
t.tid as u64,
t.avg_emb.as_slice(),
Some(serde_json::json!({
"trace_id": t.tid,
"file_uuid": file_uuid,
"frame_count": t.frame_count,
"source": "trace",
})),
)
})
.collect();
qdrant.upsert_vectors_batch(&collection, &batch).await?;
}
tracing::info!(
"Synced {} trace embeddings to Qdrant for {}",
trace_faces.len(),
file_uuid
);
Ok(())
}

View File

@@ -45,9 +45,11 @@ impl SyncDb {
}
let payload = VectorPayload {
uuid: uuid.clone(),
file_uuid: uuid.clone(),
chunk_id: chunk_id.clone(),
chunk_type,
start_frame: chunk.start_frame,
end_frame: chunk.end_frame,
start_time,
end_time,
text: Some(text.to_string()),

View File

@@ -33,26 +33,38 @@ pub async fn run_consistency_checks(db: &PostgresDb) -> ConsistencyReport {
// Check 1: stale_processing — status=processing but job_id is null
let c1 = check_stale_processing(db).await;
if c1.count > 0 { any_issue = true; }
if c1.count > 0 {
any_issue = true;
}
checks.push(c1);
// Check 2: orphaned_processing — status=processing but no active monitor_job
let c2 = check_orphaned_processing(db).await;
if c2.count > 0 { any_issue = true; }
if c2.count > 0 {
any_issue = true;
}
checks.push(c2);
// Check 3: unregistered_with_uuid — DB rows left behind by migration
let c3 = check_unregistered_with_uuid(db).await;
if c3.count > 0 { any_issue = true; }
if c3.count > 0 {
any_issue = true;
}
checks.push(c3);
// Check 4: processing_job_done — status=processing but job already completed
let c4 = check_processing_job_done(db).await;
if c4.count > 0 { any_issue = true; }
if c4.count > 0 {
any_issue = true;
}
checks.push(c4);
ConsistencyReport {
status: if any_issue { "degraded".to_string() } else { "ok".to_string() },
status: if any_issue {
"degraded".to_string()
} else {
"ok".to_string()
},
checked_at,
checks,
}
@@ -68,9 +80,17 @@ async fn check_stale_processing(db: &PostgresDb) -> ConsistencyCheck {
.await
.unwrap_or_default();
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
file_uuid, file_name, status, detail: "job_id is null".to_string(),
}).collect();
let files: Vec<ConsistencyFile> = rows
.into_iter()
.map(
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
file_uuid,
file_name,
status,
detail: "job_id is null".to_string(),
},
)
.collect();
ConsistencyCheck {
check: "stale_processing".to_string(),
@@ -83,7 +103,8 @@ async fn check_stale_processing(db: &PostgresDb) -> ConsistencyCheck {
async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck {
let vt = schema::table_name("videos");
let mj = schema::table_name("monitor_jobs");
let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!(
let rows: Vec<(String, String, String)> =
sqlx::query_as::<_, (String, String, String)>(&format!(
"SELECT v.file_uuid, v.file_name, v.status \
FROM {} v LEFT JOIN {} m ON v.file_uuid = m.uuid AND m.status IN ('pending','running') \
WHERE v.status = 'processing' AND m.id IS NULL",
@@ -93,9 +114,17 @@ async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck {
.await
.unwrap_or_default();
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
file_uuid, file_name, status, detail: "no active monitor_job".to_string(),
}).collect();
let files: Vec<ConsistencyFile> = rows
.into_iter()
.map(
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
file_uuid,
file_name,
status,
detail: "no active monitor_job".to_string(),
},
)
.collect();
ConsistencyCheck {
check: "orphaned_processing".to_string(),
@@ -107,7 +136,8 @@ async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck {
async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck {
let vt = schema::table_name("videos");
let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!(
let rows: Vec<(String, String, String)> =
sqlx::query_as::<_, (String, String, String)>(&format!(
"SELECT file_uuid, file_name, status FROM {} WHERE status = 'unregistered'",
vt
))
@@ -115,9 +145,17 @@ async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck {
.await
.unwrap_or_default();
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
file_uuid, file_name, status, detail: "migration residue".to_string(),
}).collect();
let files: Vec<ConsistencyFile> = rows
.into_iter()
.map(
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
file_uuid,
file_name,
status,
detail: "migration residue".to_string(),
},
)
.collect();
ConsistencyCheck {
check: "unregistered_with_uuid".to_string(),
@@ -130,7 +168,8 @@ async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck {
async fn check_processing_job_done(db: &PostgresDb) -> ConsistencyCheck {
let vt = schema::table_name("videos");
let mj = schema::table_name("monitor_jobs");
let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!(
let rows: Vec<(String, String, String)> =
sqlx::query_as::<_, (String, String, String)>(&format!(
"SELECT v.file_uuid, v.file_name, v.status \
FROM {} v JOIN {} m ON v.file_uuid = m.uuid \
WHERE v.status = 'processing' AND m.status = 'completed'",
@@ -140,9 +179,17 @@ async fn check_processing_job_done(db: &PostgresDb) -> ConsistencyCheck {
.await
.unwrap_or_default();
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
file_uuid, file_name, status, detail: "monitor_job already completed".to_string(),
}).collect();
let files: Vec<ConsistencyFile> = rows
.into_iter()
.map(
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
file_uuid,
file_name,
status,
detail: "monitor_job already completed".to_string(),
},
)
.collect();
ConsistencyCheck {
check: "processing_job_done".to_string(),

View File

@@ -54,8 +54,7 @@ pub fn read_identity_file(uuid: &str) -> Result<IdentityFile> {
let path = identity_file_path(uuid);
let content = std::fs::read_to_string(&path)
.with_context(|| format!("Identity file not found: {} ({})", uuid, path.display()))?;
serde_json::from_str(&content)
.with_context(|| format!("Invalid identity.json: {}", uuid))
serde_json::from_str(&content).with_context(|| format!("Invalid identity.json: {}", uuid))
}
pub fn write_identity_file(file: &IdentityFile) -> Result<()> {
@@ -167,7 +166,10 @@ pub fn rebuild_index() -> Result<usize> {
entries.insert(uuid.clone(), file.name);
}
Err(e) => {
warn!("[identity-storage] Skipping {} in index rebuild: {}", uuid, e);
warn!(
"[identity-storage] Skipping {} in index rebuild: {}",
uuid, e
);
}
}
}
@@ -187,18 +189,16 @@ pub async fn save_identity_file_by_pool(pool: &sqlx::PgPool, uuid: &str) -> Resu
let identity_table = crate::core::db::schema::table_name("identities");
let fd_table = crate::core::db::schema::table_name("face_detections");
// Schema-aware column selection: dev uses 'name', public uses 'real_name'
let name_col = if identity_table.starts_with("dev.") { "name" } else { "real_name" };
let clean = uuid.replace('-', "");
let record = sqlx::query_as::<_, crate::core::db::IdentityDetailRecord>(
&format!(
"SELECT id, uuid::text, {} AS name, identity_type, source, status, metadata, reference_data, \
"SELECT id, uuid::text, name, identity_type, source, status, metadata, reference_data, \
NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \
face_embedding::real[] as face_embedding, \
tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \
FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
name_col, identity_table
identity_table
)
)
.bind(&clean)
@@ -322,8 +322,13 @@ pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result
let mut entries: HashMap<String, String> = if index_path.exists() {
let content = std::fs::read_to_string(&index_path)?;
let v: serde_json::Value = serde_json::from_str(&content).unwrap_or_default();
v["entries"].as_object()
.map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string())).collect())
v["entries"]
.as_object()
.map(|obj| {
obj.iter()
.map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string()))
.collect()
})
.unwrap_or_default()
} else {
HashMap::new()
@@ -338,7 +343,9 @@ pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result
}
pub async fn save_identity_file(db: &PostgresDb, uuid: &str) -> Result<()> {
let record = db.get_identity_by_uuid(uuid).await?
let record = db
.get_identity_by_uuid(uuid)
.await?
.with_context(|| format!("Identity not found in DB: {}", uuid))?;
let identity_uuid = record.uuid.clone();
@@ -415,6 +422,7 @@ mod tests {
status: Some("confirmed".to_string()),
tmdb_id: Some(112),
tmdb_profile: Some("https://image.tmdb.org/t/p/w185/test.jpg".to_string()),
local_profile: None,
metadata: serde_json::json!({"tmdb_character": "Test Role"}),
file_bindings: vec![FileBinding {
file_uuid: "ffffffffffffffffffffffffffffffff".to_string(),
@@ -442,7 +450,9 @@ mod tests {
fn test_identity_dir_path() {
let uuid = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
let p = identity_dir(uuid);
assert!(p.to_string_lossy().ends_with(&format!("identities/{}", uuid)));
assert!(p
.to_string_lossy()
.ends_with(&format!("identities/{}", uuid)));
}
#[test]
@@ -463,7 +473,10 @@ mod tests {
let base = Path::new("/tmp/test_base");
let uuid = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
let p = identity_dir_at(base, uuid);
assert_eq!(p, Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"));
assert_eq!(
p,
Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")
);
}
#[test]
@@ -490,7 +503,10 @@ mod tests {
assert_eq!(read.name, file.name);
assert_eq!(read.source, file.source);
assert_eq!(read.tmdb_id, file.tmdb_id);
assert_eq!(read.file_bindings[0].face_count, file.file_bindings[0].face_count);
assert_eq!(
read.file_bindings[0].face_count,
file.file_bindings[0].face_count
);
let _ = std::fs::remove_dir_all(&tmp);
}
@@ -521,9 +537,21 @@ mod tests {
let _ = std::fs::remove_dir_all(&tmp);
let base = &tmp;
std::fs::create_dir_all(base.join("identities").join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")).unwrap();
std::fs::create_dir_all(base.join("identities").join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")).unwrap();
std::fs::create_dir_all(base.join("identities").join("cccccccccccccccccccccccccccccccc")).unwrap();
std::fs::create_dir_all(
base.join("identities")
.join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
)
.unwrap();
std::fs::create_dir_all(
base.join("identities")
.join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"),
)
.unwrap();
std::fs::create_dir_all(
base.join("identities")
.join("cccccccccccccccccccccccccccccccc"),
)
.unwrap();
std::fs::create_dir_all(base.join("identities").join("not_a_uuid")).unwrap();
std::fs::create_dir_all(base.join("identities").join("short")).unwrap();

View File

@@ -56,19 +56,25 @@ impl IngestionService {
.to_string();
// 1. Compute SHA256 for dedup
let content_hash = crate::core::storage::content_hash::compute_sha256(&canonical_path).ok().unwrap_or_default();
let content_hash = crate::core::storage::content_hash::compute_sha256(&canonical_path)
.ok()
.unwrap_or_default();
// 2. Hash check — same content = already registered
let videos_table = schema::table_name("videos");
if !content_hash.is_empty() {
if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(
&format!("SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1", videos_table)
)
if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(&format!(
"SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1",
videos_table
))
.bind(&content_hash)
.fetch_optional(self.db.pool())
.await
{
info!("Content already registered: {} ({})", filename, existing_uuid);
info!(
"Content already registered: {} ({})",
filename, existing_uuid
);
return Ok(Some(existing_uuid));
}
}
@@ -108,7 +114,8 @@ impl IngestionService {
let probe_result = probe::probe_video(file_path).ok();
let file_meta = std::fs::metadata(&canonical_path).ok();
let duration = probe_result.as_ref()
let duration = probe_result
.as_ref()
.and_then(|r| r.format.duration.as_ref())
.and_then(|s| s.parse::<f64>().ok())
.unwrap_or(0.0);
@@ -148,7 +155,11 @@ impl IngestionService {
}
let total_frames = {
let video_stream = probe_result.as_ref().and_then(|pr| pr.streams.iter().find(|s| s.codec_type.as_deref() == Some("video")));
let video_stream = probe_result.as_ref().and_then(|pr| {
pr.streams
.iter()
.find(|s| s.codec_type.as_deref() == Some("video"))
});
if let Some(stream) = video_stream {
if let Some(nb_frames_str) = &stream.nb_frames {
@@ -223,7 +234,10 @@ impl IngestionService {
// Store content_hash for dedup
if !content_hash.is_empty() {
let vt = schema::table_name("videos");
let _ = sqlx::query(&format!("UPDATE {} SET content_hash = $1 WHERE file_uuid = $2", vt))
let _ = sqlx::query(&format!(
"UPDATE {} SET content_hash = $1 WHERE file_uuid = $2",
vt
))
.bind(&content_hash)
.bind(&uuid)
.execute(self.db.pool())
@@ -243,5 +257,3 @@ impl IngestionService {
Ok(Some(uuid))
}
}

View File

@@ -17,42 +17,84 @@ mod tests {
#[test]
fn test_detect_category_image() {
assert_eq!(detect_category(Path::new("photo.jpg")), FileCategory::Image);
assert_eq!(detect_category(Path::new("photo.jpeg")), FileCategory::Image);
assert_eq!(
detect_category(Path::new("photo.jpeg")),
FileCategory::Image
);
assert_eq!(detect_category(Path::new("photo.png")), FileCategory::Image);
assert_eq!(detect_category(Path::new("photo.svg")), FileCategory::Image);
assert_eq!(detect_category(Path::new("photo.webp")), FileCategory::Image);
assert_eq!(
detect_category(Path::new("photo.webp")),
FileCategory::Image
);
}
#[test]
fn test_detect_category_document() {
assert_eq!(detect_category(Path::new("doc.pdf")), FileCategory::Document);
assert_eq!(detect_category(Path::new("doc.docx")), FileCategory::Document);
assert_eq!(detect_category(Path::new("doc.pages")), FileCategory::Document);
assert_eq!(detect_category(Path::new("doc.txt")), FileCategory::Document);
assert_eq!(
detect_category(Path::new("doc.pdf")),
FileCategory::Document
);
assert_eq!(
detect_category(Path::new("doc.docx")),
FileCategory::Document
);
assert_eq!(
detect_category(Path::new("doc.pages")),
FileCategory::Document
);
assert_eq!(
detect_category(Path::new("doc.txt")),
FileCategory::Document
);
}
#[test]
fn test_detect_category_spreadsheet() {
assert_eq!(detect_category(Path::new("data.xlsx")), FileCategory::Spreadsheet);
assert_eq!(detect_category(Path::new("data.csv")), FileCategory::Spreadsheet);
assert_eq!(detect_category(Path::new("data.numbers")), FileCategory::Spreadsheet);
assert_eq!(
detect_category(Path::new("data.xlsx")),
FileCategory::Spreadsheet
);
assert_eq!(
detect_category(Path::new("data.csv")),
FileCategory::Spreadsheet
);
assert_eq!(
detect_category(Path::new("data.numbers")),
FileCategory::Spreadsheet
);
}
#[test]
fn test_detect_category_presentation() {
assert_eq!(detect_category(Path::new("deck.pptx")), FileCategory::Presentation);
assert_eq!(detect_category(Path::new("deck.key")), FileCategory::Presentation);
assert_eq!(
detect_category(Path::new("deck.pptx")),
FileCategory::Presentation
);
assert_eq!(
detect_category(Path::new("deck.key")),
FileCategory::Presentation
);
}
#[test]
fn test_detect_category_archive() {
assert_eq!(detect_category(Path::new("files.zip")), FileCategory::Archive);
assert_eq!(detect_category(Path::new("files.tar.gz")), FileCategory::Archive);
assert_eq!(
detect_category(Path::new("files.zip")),
FileCategory::Archive
);
assert_eq!(
detect_category(Path::new("files.tar.gz")),
FileCategory::Archive
);
}
#[test]
fn test_detect_category_unknown() {
assert_eq!(detect_category(Path::new("file.xyz")), FileCategory::Unknown);
assert_eq!(
detect_category(Path::new("file.xyz")),
FileCategory::Unknown
);
assert_eq!(detect_category(Path::new("file")), FileCategory::Unknown);
}
@@ -84,13 +126,18 @@ pub enum FileCategory {
/// Detect file category from path extension
pub fn detect_category(path: &Path) -> FileCategory {
let ext = path.extension()
let ext = path
.extension()
.and_then(|e| e.to_str())
.map(|e| e.to_lowercase());
match ext.as_deref() {
Some("mp4" | "mov" | "mkv" | "avi" | "webm" | "m4v" | "mpeg") => FileCategory::Video,
Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg" | "heic" | "tiff") => FileCategory::Image,
Some("pdf" | "doc" | "docx" | "odt" | "pages" | "rtf" | "txt" | "md" | "rst") => FileCategory::Document,
Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg" | "heic" | "tiff") => {
FileCategory::Image
}
Some("pdf" | "doc" | "docx" | "odt" | "pages" | "rtf" | "txt" | "md" | "rst") => {
FileCategory::Document
}
Some("xls" | "xlsx" | "csv" | "ods" | "numbers") => FileCategory::Spreadsheet,
Some("ppt" | "pptx" | "odp" | "key") => FileCategory::Presentation,
Some("zip" | "tar" | "gz" | "tgz" | "7z" | "rar") => FileCategory::Archive,
@@ -102,16 +149,20 @@ pub fn detect_category(path: &Path) -> FileCategory {
pub fn base_format_info(path: &Path) -> serde_json::Value {
let meta = std::fs::metadata(path).ok();
let size = meta.as_ref().map(|m| m.len()).unwrap_or(0);
let mtime = meta.as_ref()
let mtime = meta
.as_ref()
.and_then(|m| m.modified().ok())
.and_then(|t| {
let secs = t.duration_since(SystemTime::UNIX_EPOCH).ok()?.as_secs() as i64;
chrono::DateTime::from_timestamp(secs, 0)
.map(|dt| dt.to_rfc3339())
chrono::DateTime::from_timestamp(secs, 0).map(|dt| dt.to_rfc3339())
})
.unwrap_or_default();
let fname = path.to_string_lossy().to_string();
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("").to_lowercase();
let ext = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_lowercase();
let cat = detect_category(path);
let file_type = match cat {
FileCategory::Video => "video",
@@ -150,7 +201,13 @@ fn ffprobe_probe(path: &Path, format_base: serde_json::Value) -> serde_json::Val
}
/// Run Python probe for document/spreadsheet/presentation files
fn python_probe(path: &Path, category: &FileCategory, scripts_dir: &str, python_path: &str, format_base: serde_json::Value) -> serde_json::Value {
fn python_probe(
path: &Path,
category: &FileCategory,
scripts_dir: &str,
python_path: &str,
format_base: serde_json::Value,
) -> serde_json::Value {
let script = format!("{}/probe_file.py", scripts_dir);
if !std::path::Path::new(&script).exists() {
return minimal_probe(format_base);
@@ -184,18 +241,12 @@ fn minimal_probe(format_base: serde_json::Value) -> serde_json::Value {
/// Unified probe: dispatches to the right probe based on file type
/// Returns a probe_json-compatible Value
pub async fn unified_probe(
path: &Path,
scripts_dir: &str,
python_path: &str,
) -> serde_json::Value {
pub async fn unified_probe(path: &Path, scripts_dir: &str, python_path: &str) -> serde_json::Value {
let cat = detect_category(path);
let format_base = base_format_info(path);
match cat {
FileCategory::Video | FileCategory::Image => {
ffprobe_probe(path, format_base)
}
FileCategory::Video | FileCategory::Image => ffprobe_probe(path, format_base),
FileCategory::Document | FileCategory::Spreadsheet | FileCategory::Presentation => {
python_probe(path, &cat, scripts_dir, python_path, format_base)
}

View File

@@ -1,5 +1,6 @@
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::process::Command;
use std::time::Duration;
use super::executor::PythonExecutor;
@@ -27,13 +28,21 @@ pub async fn process_cut(
output_path: &str,
uuid: Option<&str>,
) -> Result<CutResult> {
// Try native ffmpeg-based scene detection first
let result = try_native_cut(video_path);
if let Ok(r) = result {
let json = serde_json::to_string_pretty(&r)?;
std::fs::write(output_path, &json)
.with_context(|| format!("Failed to write {:?}", output_path))?;
return Ok(r);
}
// Fallback: Python scenedetect
tracing::warn!("[CUT] Native impl failed, falling back to Python");
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("cut_processor.py");
tracing::info!("[CUT] Starting scene detection: {}", video_path);
if !script_path.exists() {
tracing::warn!("[CUT] Script not found, returning empty result");
return Ok(CutResult {
frame_count: 0,
fps: 0.0,
@@ -53,19 +62,179 @@ pub async fn process_cut(
.with_context(|| format!("Failed to run {:?}", script_path))?;
let json_str = std::fs::read_to_string(output_path).context("Failed to read CUT output")?;
let result: CutResult =
serde_json::from_str(&json_str).context("Failed to parse CUT output")?;
tracing::info!("[CUT] Result: {} scenes detected", result.scenes.len());
Ok(result)
}
// ── Native ffmpeg scene detection ─────────────────────────────────
fn try_native_cut(video_path: &str) -> Result<CutResult> {
// Step 1: Get video info (fps, frame count)
let probe = Command::new("ffprobe")
.args([
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
video_path,
])
.output()
.context("Failed to run ffprobe")?;
let probe_info: serde_json::Value =
serde_json::from_slice(&probe.stdout).context("Failed to parse ffprobe output")?;
let streams = probe_info["streams"]
.as_array()
.map_or(vec![], |s| s.clone());
let video_stream = streams.iter().find(|s| s["codec_type"] == "video");
let fps = video_stream
.and_then(|s| s["r_frame_rate"].as_str().and_then(parse_fraction))
.unwrap_or(30.0);
let total_frames: u64 = video_stream
.and_then(|s| s["nb_frames"].as_str())
.and_then(|s| s.parse().ok())
.unwrap_or(0);
let duration: f64 = probe_info["format"]["duration"]
.as_str()
.and_then(|s| s.parse().ok())
.unwrap_or(0.0);
// Step 2: Use ffmpeg scene detection filter
// The `scene` filter computes the difference between consecutive frames
// and outputs when the difference exceeds the threshold (0.3 = medium sensitivity)
let scene_output = Command::new("ffprobe")
.args([
"-v",
"quiet",
"-show_entries",
"frame=pts_time",
"-of",
"compact=p=0:nk=1",
"-f",
"lavfi",
&format!("movie={},select='gt(scene\\,0.3)',showinfo", video_path),
"-show_frames",
])
.output()
.context("Failed to run ffmpeg scene detection")?;
let stderr_output = String::from_utf8_lossy(&scene_output.stderr);
let mut scene_times: Vec<f64> = Vec::new();
// Parse ffmpeg showinfo output for scene changes
// Format: [Parsed_showinfo...] pts:123.456 pts_time:123.456 ...
for line in stderr_output.lines() {
if line.contains("pts_time:") {
if let Some(pos) = line.find("pts_time:") {
let rest = &line[pos + 9..];
let time_str = rest.split_whitespace().next().unwrap_or("");
if let Ok(t) = time_str.parse::<f64>() {
scene_times.push(t);
}
}
}
}
// Step 3: Build scenes from cut points
let mut scenes: Vec<CutScene> = Vec::new();
let mut prev_time = 0.0;
let mut prev_frame: u64 = 0;
for (i, &cut_time) in scene_times.iter().enumerate() {
let end_frame = (cut_time * fps).round() as u64;
let start_frame = prev_frame;
if end_frame > start_frame {
scenes.push(CutScene {
scene_number: (i + 1) as u32,
start_frame: prev_frame,
end_frame: end_frame.saturating_sub(1),
start_time: prev_time,
end_time: cut_time - (1.0 / fps),
});
}
prev_time = cut_time;
prev_frame = end_frame;
}
// Final scene (last cut point → end of video)
if total_frames > 0 && prev_frame < total_frames {
scenes.push(CutScene {
scene_number: (scenes.len() + 1) as u32,
start_frame: prev_frame,
end_frame: total_frames.saturating_sub(1),
start_time: prev_time,
end_time: duration,
});
}
// If no scenes detected, create a single scene covering the whole video
if scenes.is_empty() && total_frames > 0 {
scenes.push(CutScene {
scene_number: 1,
start_frame: 0,
end_frame: total_frames.saturating_sub(1),
start_time: 0.0,
end_time: duration,
});
}
Ok(CutResult {
frame_count: total_frames,
fps,
scenes,
})
}
/// Parse fractional fps like "30000/1001" into f64
fn parse_fraction(s: &str) -> Option<f64> {
if let Some(pos) = s.find('/') {
let num: f64 = s[..pos].parse().ok()?;
let den: f64 = s[pos + 1..].parse().ok()?;
if den > 0.0 {
return Some(num / den);
}
}
s.parse::<f64>().ok()
}
// ── Tests ─────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_fraction() {
let r = parse_fraction("30000/1001").unwrap();
assert!((r - 29.97).abs() < 0.01);
}
#[test]
fn test_parse_fraction_int() {
let r = parse_fraction("30").unwrap();
assert!((r - 30.0).abs() < 0.01);
}
#[test]
fn test_parse_fraction_invalid() {
assert!(parse_fraction("not/a/num").is_none());
}
#[test]
fn test_parse_fraction_zero_den() {
assert!(parse_fraction("1/0").is_none());
}
#[test]
fn test_cut_result_serialization() {
let result = CutResult {
@@ -81,8 +250,9 @@ mod tests {
};
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("frame_count"));
assert!(json.contains("scene_number"));
assert!(json.contains("1"));
assert!(json.contains("fps"));
}
#[test]
@@ -90,20 +260,23 @@ mod tests {
let json = r#"{
"frame_count": 100,
"fps": 30.0,
"scenes": [
{"scene_number": 1, "start_frame": 0, "end_frame": 30, "start_time": 0.0, "end_time": 1.0},
{"scene_number": 2, "start_frame": 31, "end_frame": 60, "start_time": 1.033, "end_time": 2.0}
]
"scenes": [{
"scene_number": 1,
"start_frame": 0,
"end_frame": 30,
"start_time": 0.0,
"end_time": 1.0
}]
}"#;
let result: CutResult = serde_json::from_str(json).unwrap();
assert_eq!(result.frame_count, 100);
assert_eq!(result.scenes.len(), 2);
assert_eq!(result.scenes[1].scene_number, 2);
assert_eq!(result.scenes.len(), 1);
assert_eq!(result.scenes[0].scene_number, 1);
assert_eq!(result.scenes[0].start_frame, 0);
}
#[test]
fn test_cut_result_empty_scenes() {
fn test_empty_scenes() {
let result = CutResult {
frame_count: 0,
fps: 0.0,
@@ -111,17 +284,4 @@ mod tests {
};
assert!(result.scenes.is_empty());
}
#[test]
fn test_cut_scene_times() {
let scene = CutScene {
scene_number: 1,
start_frame: 0,
end_frame: 30,
start_time: 0.0,
end_time: 1.0,
};
assert!(scene.end_time > scene.start_time);
assert_eq!(scene.scene_number, 1);
}
}

View File

@@ -109,8 +109,7 @@ pub fn validate_python_env() -> Result<()> {
tracing::warn!("Expected Python 3.11, got: {}", version.trim());
}
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
.unwrap_or_else(|_| {
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| {
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
manifest.join("scripts").to_string_lossy().to_string()
});
@@ -133,8 +132,7 @@ impl PythonExecutor {
pub fn new() -> Result<Self> {
let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
.unwrap_or_else(|_| {
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| {
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
manifest.join("scripts").to_string_lossy().to_string()
});
@@ -173,7 +171,8 @@ impl PythonExecutor {
if let Some(expected_hash) = self.checksums.get(&rel_path) {
let output = std::process::Command::new("shasum")
.arg("-a").arg("256")
.arg("-a")
.arg("256")
.arg(&script_path)
.output()
.context("Failed to run shasum for integrity check")?;
@@ -235,8 +234,9 @@ impl PythonExecutor {
}
// Verify script integrity via SHA256 checksum before execution
self.verify_script_integrity(script_name)
.context("Pre-execution integrity check failed — possible version mismatch or corruption")?;
self.verify_script_integrity(script_name).context(
"Pre-execution integrity check failed — possible version mismatch or corruption",
)?;
// 標記輸出檔為處理中add .tmp suffix
let output_path = args.get(1).map(|p| std::path::PathBuf::from(p));

View File

@@ -44,22 +44,59 @@ pub enum CrowdSize {
/// Indoor-indicative YOLO classes (COCO labels)
const INDOOR_CLASSES: &[&str] = &[
"chair", "couch", "bed", "dining table", "toilet", "tv", "laptop",
"microwave", "oven", "refrigerator", "sink", "book", "clock",
"vase", "potted plant",
"chair",
"couch",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"microwave",
"oven",
"refrigerator",
"sink",
"book",
"clock",
"vase",
"potted plant",
];
/// Vehicle-indicative classes (person + vehicle = transport scene)
const VEHICLE_CLASSES: &[&str] = &[
"car", "truck", "bus", "train", "boat", "aeroplane", "bicycle", "motorbike",
"car",
"truck",
"bus",
"train",
"boat",
"aeroplane",
"bicycle",
"motorbike",
];
/// Outdoor-indicative YOLO classes
const OUTDOOR_CLASSES: &[&str] = &[
"car", "truck", "bus", "train", "boat", "airplane",
"traffic light", "fire hydrant", "stop sign", "parking meter",
"bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
"bear", "zebra", "giraffe", "tree",
"car",
"truck",
"bus",
"train",
"boat",
"airplane",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"tree",
];
/// Build heuristic scene metadata from disk files (yolo.json + DB face data).
@@ -113,13 +150,14 @@ pub async fn build_heuristic_scene_meta(
// Get face counts grouped by frame
let fd_table = schema::table_name("face_detections");
let face_rows: Vec<(i64, i64)> = sqlx::query_as(
&format!("SELECT frame_number, COUNT(*) as fc \
let face_rows: Vec<(i64, i64)> = sqlx::query_as(&format!(
"SELECT frame_number, COUNT(*) as fc \
FROM {} \
WHERE file_uuid = $1 AND frame_number IS NOT NULL \
GROUP BY frame_number \
ORDER BY frame_number", fd_table),
)
ORDER BY frame_number",
fd_table
))
.bind(file_uuid)
.fetch_all(pool)
.await
@@ -166,7 +204,10 @@ pub async fn build_heuristic_scene_meta(
let outdoor_ratio = outdoor_objects as f64 / frame_count.max(1) as f64;
let total_indicator = indoor_ratio + outdoor_ratio;
let (indoor_score, outdoor_score) = if total_indicator > 0.0 {
(indoor_ratio / total_indicator, outdoor_ratio / total_indicator)
(
indoor_ratio / total_indicator,
outdoor_ratio / total_indicator,
)
} else {
(0.5, 0.5)
};
@@ -187,17 +228,13 @@ pub async fn build_heuristic_scene_meta(
.map(|c| class_frame_presence.get(*c).copied().unwrap_or(0))
.sum();
let person_ratio = person_frames as f64 / frame_count.max(1) as f64;
let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0
&& outdoor_score > 0.3;
let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0 && outdoor_score > 0.3;
// Dominant objects: rank by frame presence (not total count)
let mut sorted: Vec<_> = class_frame_presence.into_iter().collect();
sorted.sort_by(|a, b| b.1.cmp(&a.1));
let dominant_objects: Vec<String> = sorted
.iter()
.take(3)
.map(|(cls, _)| cls.clone())
.collect();
let dominant_objects: Vec<String> =
sorted.iter().take(3).map(|(cls, _)| cls.clone()).collect();
segments.push(SceneSegmentMeta {
segment_index: idx as u32 + 1,
@@ -229,12 +266,15 @@ pub async fn build_heuristic_scene_meta(
/// Full pipeline entry point: reads CUT data, generates heuristic metadata, writes JSON.
/// Called from job_worker post-processing trigger.
pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &str) -> Result<usize> {
pub async fn generate_scene_meta(
db: &crate::core::db::PostgresDb,
file_uuid: &str,
) -> Result<usize> {
let pool = db.pool();
// Read CUT segment boundaries from cut.json
let cut_path = Path::new(crate::core::config::OUTPUT_DIR.as_str())
.join(format!("{}.cut.json", file_uuid));
let cut_path =
Path::new(crate::core::config::OUTPUT_DIR.as_str()).join(format!("{}.cut.json", file_uuid));
let segments: Vec<(i64, i64, f64, f64)> = if cut_path.exists() {
let cut_str = tokio::fs::read_to_string(&cut_path)
.await
@@ -250,8 +290,7 @@ pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &s
start_time: f64,
end_time: f64,
}
let cut: CutJson = serde_json::from_str(&cut_str)
.context("Failed to parse cut.json")?;
let cut: CutJson = serde_json::from_str(&cut_str).context("Failed to parse cut.json")?;
cut.scenes
.into_iter()
.map(|s| (s.start_frame, s.end_frame, s.start_time, s.end_time))
@@ -259,9 +298,10 @@ pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &s
} else {
// Fallback: query DB for video duration, make one segment
let videos_table = schema::table_name("videos");
let (total_frames, duration): (Option<i64>, Option<f64>) = sqlx::query_as(
&format!("SELECT total_frames, duration FROM {} WHERE file_uuid = $1", videos_table),
)
let (total_frames, duration): (Option<i64>, Option<f64>) = sqlx::query_as(&format!(
"SELECT total_frames, duration FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(file_uuid)
.fetch_optional(pool)
.await

View File

@@ -10,6 +10,7 @@ pub mod ocr;
pub mod pose;
pub mod scene_classification;
pub mod story;
pub mod tkg;
pub mod visual_chunk;
pub mod yolo;
@@ -25,7 +26,8 @@ pub use face_recognition::{
RecognizedFaceDetection,
};
pub use heuristic_scene::{
build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta, SceneSegmentMeta,
build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta,
SceneSegmentMeta,
};
pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText};
pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
@@ -34,5 +36,6 @@ pub use scene_classification::{
SceneSegment,
};
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
pub use tkg::{build_tkg, TkgResult};
pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};

View File

@@ -106,7 +106,10 @@ pub async fn process_story(
}
// Fallback: Python script
tracing::warn!("[STORY] Native impl failed, falling back to Python: {:?}", result.err());
tracing::warn!(
"[STORY] Native impl failed, falling back to Python: {:?}",
result.err()
);
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("story_processor.py");
@@ -145,7 +148,11 @@ pub async fn process_story(
// ── Native implementation ─────────────────────────────────────────
fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -> Result<StoryResult> {
fn try_native_story(
_video_path: &str,
output_path: &str,
_uuid: Option<&str>,
) -> Result<StoryResult> {
let output_dir = Path::new(output_path).parent().unwrap_or(Path::new("."));
let basename = Path::new(output_path)
.file_stem()
@@ -160,8 +167,7 @@ fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -
let asr_data: AsrData = if asr_path.exists() {
let content = std::fs::read_to_string(&asr_path)
.with_context(|| format!("Failed to read {:?}", asr_path))?;
serde_json::from_str(&content)
.with_context(|| format!("Failed to parse {:?}", asr_path))?
serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", asr_path))?
} else {
AsrData { segments: vec![] }
};
@@ -169,8 +175,7 @@ fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -
let cut_data: CutData = if cut_path.exists() {
let content = std::fs::read_to_string(&cut_path)
.with_context(|| format!("Failed to read {:?}", cut_path))?;
serde_json::from_str(&content)
.with_context(|| format!("Failed to parse {:?}", cut_path))?
serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", cut_path))?
} else {
CutData { scenes: vec![] }
};
@@ -376,22 +381,39 @@ fn generate_narrative(texts: &[String], objects: &[String], start: f64, end: f64
let mut unique: Vec<&String> = objects.iter().collect();
unique.sort();
unique.dedup();
let objs = unique.iter().take(5).map(|s| (*s).as_str()).collect::<Vec<_>>().join(", ");
let objs = unique
.iter()
.take(5)
.map(|s| (*s).as_str())
.collect::<Vec<_>>()
.join(", ");
parts.push(format!("Visuals: {}", objs));
}
format!("[{:.0}s-{:.0}s] {}", start, end, parts.join(" | "))
}
fn generate_scene_narrative(objects: &[String], start: f64, end: f64, scene_count: usize) -> String {
fn generate_scene_narrative(
objects: &[String],
start: f64,
end: f64,
scene_count: usize,
) -> String {
let mut unique: Vec<&String> = objects.iter().collect();
unique.sort();
unique.dedup();
let top5: Vec<&String> = unique.iter().take(5).cloned().collect();
if !top5.is_empty() {
let obj_str = top5.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ");
format!("[{:.0}s-{:.0}s] {} scenes. Visuals: {}.", start, end, scene_count, obj_str)
let obj_str = top5
.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(", ");
format!(
"[{:.0}s-{:.0}s] {} scenes. Visuals: {}.",
start, end, scene_count, obj_str
)
} else {
format!("[{:.0}s-{:.0}s] {} video scenes.", start, end, scene_count)
}
@@ -408,7 +430,8 @@ mod tests {
let text = generate_narrative(
&["Hello world".to_string()],
&["person".to_string()],
0.0, 5.0,
0.0,
5.0,
);
assert!(text.contains("[0s-5s]"));
assert!(text.contains("Speech:"));
@@ -576,7 +599,10 @@ mod tests {
};
assert_eq!(result.parent_chunks[0].child_chunk_ids.len(), 2);
assert!(result.child_chunks.iter().all(|c| c.parent_chunk_id.is_some()));
assert!(result
.child_chunks
.iter()
.all(|c| c.parent_chunk_id.is_some()));
assert!(result.parent_chunks[0].parent_chunk_id.is_none());
}
@@ -594,11 +620,7 @@ mod tests {
std::fs::write(&asr_path, r#"{"segments":[]}"#).unwrap();
std::fs::write(&cut_path, r#"{"scenes":[]}"#).unwrap();
let result = try_native_story(
"/dummy.mp4",
out_path.to_str().unwrap(),
None,
).unwrap();
let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap();
assert_eq!(result.stats.total_child_chunks, 0);
assert_eq!(result.stats.total_parent_chunks, 0);
@@ -616,13 +638,17 @@ mod tests {
let cut_path = dir.join(format!("{}.cut.json", basename));
let out_path = dir.join(format!("{}.story.json", basename));
std::fs::write(&asr_path, r#"{
std::fs::write(
&asr_path,
r#"{
"segments": [
{"start": 0.0, "end": 2.5, "text": "Hello", "confidence": 0.95},
{"start": 2.5, "end": 5.0, "text": "World", "confidence": 0.92},
{"start": 5.0, "end": 7.5, "text": "Foo", "confidence": 0.90}
]
}"#).unwrap();
}"#,
)
.unwrap();
std::fs::write(&cut_path, r#"{
"scenes": [
@@ -631,11 +657,7 @@ mod tests {
]
}"#).unwrap();
let result = try_native_story(
"/dummy.mp4",
out_path.to_str().unwrap(),
None,
).unwrap();
let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap();
assert_eq!(result.stats.asr_children, 3);
assert_eq!(result.stats.cut_children, 2);
@@ -649,7 +671,11 @@ mod tests {
for child in &result.child_chunks {
if child.source == "asr" {
assert!(child.parent_chunk_id.is_some());
assert!(child.parent_chunk_id.as_ref().unwrap().starts_with("story_asr_"));
assert!(child
.parent_chunk_id
.as_ref()
.unwrap()
.starts_with("story_asr_"));
}
}

703
src/core/processor/tkg.rs Normal file
View File

@@ -0,0 +1,703 @@
use anyhow::{Context, Result};
use serde::Deserialize;
use sqlx::PgPool;
use std::collections::HashMap;
use std::path::Path;
use crate::core::db::postgres_db::PostgresDb;
fn t(name: &str) -> String {
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
if schema == "public" {
name.to_string()
} else {
format!("{}.{}", schema, name)
}
}
// ── Input data structs ────────────────────────────────────────────
#[derive(Debug, Deserialize)]
struct YoloJson {
#[serde(default)]
frames: HashMap<String, YoloFrameEntry>,
}
#[derive(Debug, Deserialize)]
struct YoloFrameEntry {
#[serde(default)]
detections: Vec<YoloDetEntry>,
#[serde(default)]
objects: Vec<YoloDetEntry>,
}
#[derive(Debug, Deserialize)]
struct YoloDetEntry {
#[serde(default)]
class_name: String,
#[serde(default)]
confidence: f64,
}
#[derive(Debug, Deserialize)]
struct AsrxJson {
#[serde(default)]
segments: Vec<AsrxSegmentEntry>,
#[serde(default)]
speaker_stats: Option<HashMap<String, AsrxSpeakerStat>>,
}
#[derive(Debug, Deserialize)]
struct AsrxSegmentEntry {
#[serde(default)]
speaker_id: String,
#[serde(default)]
start_time: f64,
#[serde(default)]
end_time: f64,
#[allow(dead_code)]
start_frame: i64,
#[allow(dead_code)]
end_frame: i64,
}
#[derive(Debug, Deserialize)]
struct AsrxSpeakerStat {
#[serde(default)]
count: i64,
}
// ── Face detection trace ──────────────────────────────────────────
#[derive(Debug, sqlx::FromRow)]
struct FaceTraceRow {
trace_id: i64,
frame_count: i64,
start_f: i64,
end_f: i64,
avg_x: Option<f64>,
avg_y: Option<f64>,
avg_w: Option<f64>,
avg_h: Option<f64>,
}
#[derive(Debug, sqlx::FromRow)]
struct FaceDetectionRow {
trace_id: i64,
frame_number: i64,
#[allow(dead_code)]
x: Option<f64>,
#[allow(dead_code)]
y: Option<f64>,
#[allow(dead_code)]
width: Option<f64>,
#[allow(dead_code)]
height: Option<f64>,
}
// ── Public API ────────────────────────────────────────────────────
pub struct TkgResult {
pub face_trace_nodes: usize,
pub object_nodes: usize,
pub speaker_nodes: usize,
pub co_occurrence_edges: usize,
pub speaker_face_edges: usize,
pub face_face_edges: usize,
}
pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Result<TkgResult> {
let pool = db.pool();
let n_face = build_face_trace_nodes(pool, file_uuid).await?;
let n_objects = build_yolo_object_nodes(pool, file_uuid, output_dir).await?;
let n_speakers = build_speaker_nodes(pool, file_uuid, output_dir).await?;
let e_co = build_co_occurrence_edges(pool, file_uuid, output_dir).await?;
let e_sf = build_speaker_face_edges(pool, file_uuid, output_dir).await?;
let e_ff = build_face_face_edges(pool, file_uuid).await?;
Ok(TkgResult {
face_trace_nodes: n_face,
object_nodes: n_objects,
speaker_nodes: n_speakers,
co_occurrence_edges: e_co,
speaker_face_edges: e_sf,
face_face_edges: e_ff,
})
}
// ── Node builders ─────────────────────────────────────────────────
async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str) -> Result<usize> {
let face_table = t("face_detections");
let nodes_table = t("tkg_nodes");
let rows = sqlx::query_as::<_, FaceTraceRow>(&format!(
r#"
SELECT trace_id,
COUNT(*)::bigint as frame_count,
MIN(frame_number) as start_f,
MAX(frame_number) as end_f,
AVG(x::float8) as avg_x,
AVG(y::float8) as avg_y,
AVG(width::float8) as avg_w,
AVG(height::float8) as avg_h
FROM {}
WHERE file_uuid = $1 AND trace_id IS NOT NULL
GROUP BY trace_id
ORDER BY trace_id
"#,
face_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
let mut count = 0;
for row in &rows {
let external_id = format!("trace_{}", row.trace_id);
let label = format!("Face Trace {}", row.trace_id);
let props = serde_json::json!({
"frame_count": row.frame_count,
"start_frame": row.start_f,
"end_frame": row.end_f,
"avg_bbox": {
"x": row.avg_x.unwrap_or(0.0).round() as i64,
"y": row.avg_y.unwrap_or(0.0).round() as i64,
"width": row.avg_w.unwrap_or(0.0).round() as i64,
"height": row.avg_h.unwrap_or(0.0).round() as i64,
}
});
sqlx::query(&format!(
r#"
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, node_type, external_id)
DO UPDATE SET
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties),
label = COALESCE(NULLIF(EXCLUDED.label, ''), tkg_nodes.label)
"#,
nodes_table
))
.bind("face_trace")
.bind(&external_id)
.bind(file_uuid)
.bind(&label)
.bind(serde_json::to_string(&props)?)
.execute(pool)
.await?;
count += 1;
}
Ok(count)
}
async fn build_yolo_object_nodes(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
) -> Result<usize> {
let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid));
if !yolo_path.exists() {
return Ok(0);
}
let content = std::fs::read_to_string(&yolo_path)
.with_context(|| format!("Failed to read {:?}", yolo_path))?;
let yolo: YoloJson = serde_json::from_str(&content)
.with_context(|| format!("Failed to parse {:?}", yolo_path))?;
let mut class_counts: HashMap<String, i64> = HashMap::new();
for fdata in yolo.frames.values() {
let dets = if !fdata.detections.is_empty() {
&fdata.detections
} else {
&fdata.objects
};
for det in dets {
*class_counts.entry(det.class_name.clone()).or_insert(0) += 1;
}
}
let nodes_table = t("tkg_nodes");
let mut count = 0;
for (cls, cnt) in &class_counts {
let props = serde_json::json!({ "total_detections": cnt });
sqlx::query(&format!(
r#"
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, node_type, external_id)
DO UPDATE SET
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)
"#,
nodes_table
))
.bind("object")
.bind(cls)
.bind(file_uuid)
.bind(cls)
.bind(serde_json::to_string(&props)?)
.execute(pool)
.await?;
count += 1;
}
Ok(count)
}
async fn build_speaker_nodes(pool: &PgPool, file_uuid: &str, output_dir: &str) -> Result<usize> {
let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid));
if !asrx_path.exists() {
return Ok(0);
}
let content = std::fs::read_to_string(&asrx_path)
.with_context(|| format!("Failed to read {:?}", asrx_path))?;
let asrx: AsrxJson = serde_json::from_str(&content)
.with_context(|| format!("Failed to parse {:?}", asrx_path))?;
let stats = asrx.speaker_stats.unwrap_or_default();
let nodes_table = t("tkg_nodes");
let mut count = 0;
for (sid, stat) in &stats {
let props = serde_json::json!({ "segment_count": stat.count });
sqlx::query(&format!(
r#"
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, node_type, external_id)
DO UPDATE SET
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)
"#,
nodes_table
))
.bind("speaker")
.bind(sid)
.bind(file_uuid)
.bind(sid)
.bind(serde_json::to_string(&props)?)
.execute(pool)
.await?;
count += 1;
}
Ok(count)
}
// ── Edge builders ─────────────────────────────────────────────────
async fn build_co_occurrence_edges(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
) -> Result<usize> {
let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid));
if !yolo_path.exists() {
return Ok(0);
}
let content = std::fs::read_to_string(&yolo_path)?;
let yolo: YoloJson = serde_json::from_str(&content)?;
let face_table = t("face_detections");
let nodes_table = t("tkg_nodes");
let edges_table = t("tkg_edges");
let face_rows = sqlx::query_as::<_, FaceDetectionRow>(&format!(
r#"SELECT trace_id, frame_number, x, y, width, height
FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL
ORDER BY frame_number"#,
face_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
let mut edge_count = 0;
for face in &face_rows {
let frame_str = face.frame_number.to_string();
let yolo_frame = match yolo.frames.get(&frame_str) {
Some(f) => f,
None => continue,
};
let dets = if !yolo_frame.detections.is_empty() {
&yolo_frame.detections
} else {
&yolo_frame.objects
};
if dets.is_empty() {
continue;
}
let external_id = format!("trace_{}", face.trace_id);
let face_node: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&external_id)
.fetch_optional(pool)
.await?;
let face_node_id = match face_node {
Some((id,)) => id,
None => continue,
};
for det in dets {
let obj_node: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='object' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&det.class_name)
.fetch_optional(pool)
.await?;
let obj_node_id = match obj_node {
Some((id,)) => id,
None => continue,
};
let edge_props = serde_json::json!({
"frame": face.frame_number,
"object_confidence": det.confidence,
});
if let Err(e) = sqlx::query(&format!(
r#"
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
"#,
edges_table
))
.bind("CO_OCCURS_WITH")
.bind(face_node_id)
.bind(obj_node_id)
.bind(file_uuid)
.bind(serde_json::to_string(&edge_props)?)
.execute(pool)
.await
{
tracing::warn!(
"[TKG] Edge insert failed (trace={}, obj={}): {}",
face.trace_id,
det.class_name,
e
);
continue;
}
edge_count += 1;
}
}
Ok(edge_count)
}
async fn build_speaker_face_edges(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
) -> Result<usize> {
let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid));
if !asrx_path.exists() {
return Ok(0);
}
let content = std::fs::read_to_string(&asrx_path)?;
let asrx: AsrxJson = serde_json::from_str(&content)?;
if asrx.segments.is_empty() {
return Ok(0);
}
let face_table = t("face_detections");
let nodes_table = t("tkg_nodes");
let edges_table = t("tkg_edges");
let traces = sqlx::query_as::<_, (i64, i64, i64)>(&format!(
r#"SELECT trace_id, MIN(frame_number) as start_f, MAX(frame_number) as end_f
FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL
GROUP BY trace_id"#,
face_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
// Calculate fps from last segment
let last = asrx.segments.last().unwrap();
let fps = if last.end_time > 0.0 {
last.end_frame as f64 / last.end_time
} else {
30.0
};
let mut edge_count = 0;
for (tid, sf, ef) in &traces {
let face_ext_id = format!("trace_{}", tid);
let face_node: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&face_ext_id)
.fetch_optional(pool)
.await?;
let face_node_id = match face_node {
Some((id,)) => id,
None => continue,
};
let face_start_sec = *sf as f64 / fps;
let face_end_sec = *ef as f64 / fps;
for seg in &asrx.segments {
let seg_start = seg.start_time;
let seg_end = seg.end_time;
let overlap_start = face_start_sec.max(seg_start);
let overlap_end = face_end_sec.min(seg_end);
if overlap_start >= overlap_end {
continue;
}
let overlap_dur = overlap_end - overlap_start;
let face_dur = face_end_sec - face_start_sec;
if face_dur <= 0.0 {
continue;
}
let overlap_ratio = overlap_dur / face_dur;
if overlap_ratio < 0.3 {
continue;
}
let speaker_node: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='speaker' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&seg.speaker_id)
.fetch_optional(pool)
.await?;
let speaker_node_id = match speaker_node {
Some((id,)) => id,
None => continue,
};
let edge_props = serde_json::json!({
"overlap_ratio": (overlap_ratio * 1000.0).round() / 1000.0,
"overlap_duration_s": (overlap_dur * 10.0).round() / 10.0,
"face_time_range": format!("{:.1}-{:.1}s", face_start_sec, face_end_sec),
"speaker_time_range": format!("{:.1}-{:.1}s", seg_start, seg_end),
});
sqlx::query(&format!(
r#"
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
"#,
edges_table
))
.bind("SPEAKS_AS")
.bind(face_node_id)
.bind(speaker_node_id)
.bind(file_uuid)
.bind(serde_json::to_string(&edge_props)?)
.execute(pool)
.await?;
edge_count += 1;
}
}
Ok(edge_count)
}
async fn build_face_face_edges(pool: &PgPool, file_uuid: &str) -> Result<usize> {
let face_table = t("face_detections");
let nodes_table = t("tkg_nodes");
let edges_table = t("tkg_edges");
let rows: Vec<(i64, i64, i64)> = sqlx::query_as(&format!(
r#"
SELECT a.trace_id AS tid_a, b.trace_id AS tid_b, a.frame_number
FROM {} a
JOIN {} b
ON a.file_uuid = b.file_uuid
AND a.frame_number = b.frame_number
AND a.trace_id < b.trace_id
WHERE a.file_uuid = $1
AND a.trace_id IS NOT NULL
AND b.trace_id IS NOT NULL
ORDER BY a.frame_number
"#,
face_table, face_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
if rows.is_empty() {
return Ok(0);
}
// Deduplicate by pair
let mut pair_frames: HashMap<(i64, i64), Vec<i64>> = HashMap::new();
for (tid_a, tid_b, frame) in &rows {
let key = if *tid_a < *tid_b {
(*tid_a, *tid_b)
} else {
(*tid_b, *tid_a)
};
pair_frames.entry(key).or_default().push(*frame);
}
let mut edge_count = 0;
for ((tid_a, tid_b), frames) in &pair_frames {
let ext_a = format!("trace_{}", tid_a);
let ext_b = format!("trace_{}", tid_b);
let n_a: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&ext_a)
.fetch_optional(pool)
.await?;
let n_b: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&ext_b)
.fetch_optional(pool)
.await?;
let (n_a_id, n_b_id) = match (n_a, n_b) {
(Some((a,)), Some((b,))) => (a, b),
_ => continue,
};
let edge_props = serde_json::json!({
"first_frame": frames[0],
"frame_count": frames.len() as i64,
});
sqlx::query(&format!(
r#"
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
"#,
edges_table
))
.bind("CO_OCCURS_WITH")
.bind(n_a_id)
.bind(n_b_id)
.bind(file_uuid)
.bind(serde_json::to_string(&edge_props)?)
.execute(pool)
.await?;
edge_count += 1;
}
Ok(edge_count)
}
// ── Tests ─────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_yolo_json_deserialize() {
let json = r#"{
"frames": {
"1": {"time_seconds": 0.0, "detections": [{"class_name": "person", "confidence": 0.9}]},
"2": {"time_seconds": 1.0, "detections": [{"class_name": "chair", "confidence": 0.8}]}
}
}"#;
let yolo: YoloJson = serde_json::from_str(json).unwrap();
assert_eq!(yolo.frames.len(), 2);
assert_eq!(yolo.frames["1"].detections[0].class_name, "person");
}
#[test]
fn test_yolo_json_empty_frames() {
let json = r#"{"frames": {}}"#;
let yolo: YoloJson = serde_json::from_str(json).unwrap();
assert!(yolo.frames.is_empty());
}
#[test]
fn test_asrx_json_deserialize() {
let json = r#"{
"segments": [
{"speaker_id": "SPEAKER_01", "start_time": 0.0, "end_time": 2.0, "start_frame": 0, "end_frame": 60}
],
"speaker_stats": {"SPEAKER_01": {"count": 1}}
}"#;
let asrx: AsrxJson = serde_json::from_str(json).unwrap();
assert_eq!(asrx.segments.len(), 1);
assert_eq!(asrx.segments[0].speaker_id, "SPEAKER_01");
}
#[test]
fn test_asrx_json_no_stats() {
let json = r#"{"segments": []}"#;
let asrx: AsrxJson = serde_json::from_str(json).unwrap();
assert!(asrx.speaker_stats.is_none());
}
#[test]
fn test_yolo_objects_fallback() {
let json = r#"{
"frames": {
"1": {"objects": [{"class_name": "person"}]}
}
}"#;
let yolo: YoloJson = serde_json::from_str(json).unwrap();
assert_eq!(yolo.frames["1"].objects[0].class_name, "person");
assert!(yolo.frames["1"].detections.is_empty());
}
#[test]
fn test_tkg_result() {
let r = TkgResult {
face_trace_nodes: 5,
object_nodes: 10,
speaker_nodes: 3,
co_occurrence_edges: 20,
speaker_face_edges: 8,
face_face_edges: 4,
};
assert_eq!(r.face_trace_nodes, 5);
assert_eq!(r.object_nodes, 10);
assert_eq!(r.speaker_nodes, 3);
}
}

View File

@@ -1,7 +1,7 @@
use anyhow::Result;
use sha2::{Digest, Sha256};
use std::io::Read;
use std::path::Path;
use anyhow::Result;
/// Compute SHA256 of the entire file content
pub fn compute_sha256(path: &Path) -> Result<String> {
@@ -10,7 +10,9 @@ pub fn compute_sha256(path: &Path) -> Result<String> {
let mut buf = [0u8; 65536];
loop {
let n = file.read(&mut buf)?;
if n == 0 { break; }
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
let hash = format!("{:x}", hasher.finalize());

View File

@@ -65,7 +65,11 @@ pub fn tmdb_cache_path(file_uuid: &str) -> PathBuf {
pub fn read_tmdb_cache(file_uuid: &str) -> Result<TmdbCache> {
let path = tmdb_cache_path(file_uuid);
if !path.exists() {
anyhow::bail!("TMDb cache not found: {} (expected: {})", file_uuid, path.display());
anyhow::bail!(
"TMDb cache not found: {} (expected: {})",
file_uuid,
path.display()
);
}
let content = std::fs::read_to_string(&path)
.with_context(|| format!("Failed to read TMDb cache: {}", path.display()))?;
@@ -96,9 +100,7 @@ pub fn count_cache_files() -> usize {
match std::fs::read_dir(&dir) {
Ok(entries) => entries
.filter_map(|e| e.ok())
.filter(|e| {
e.file_name().to_string_lossy().ends_with(".tmdb.json")
})
.filter(|e| e.file_name().to_string_lossy().ends_with(".tmdb.json"))
.count(),
Err(_) => 0,
}

View File

@@ -46,11 +46,12 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
// Step 2: Load face_detections grouped by trace_id
let fd_table = schema::table_name("face_detections");
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
&format!("SELECT trace_id, embedding FROM {} \
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(&format!(
"SELECT trace_id, embedding FROM {} \
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
ORDER BY trace_id", fd_table),
)
ORDER BY trace_id",
fd_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
@@ -156,9 +157,10 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
let fd_table = schema::table_name("face_detections");
let mut after_qc = HashMap::new();
for (&tid, &(id, ref name)) in &matched {
let cnt: i64 = sqlx::query_scalar(
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2", fd_table),
)
let cnt: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2",
fd_table
))
.bind(file_uuid)
.bind(tid)
.fetch_one(pool)
@@ -194,9 +196,10 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
// Step 5: Update DB
let mut updated = 0usize;
for (&tid, &(id, _)) in &matched {
let r = sqlx::query(
&format!("UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", fd_table),
)
let r = sqlx::query(&format!(
"UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
fd_table
))
.bind(id)
.bind(file_uuid)
.bind(tid)
@@ -223,8 +226,7 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str) -> Result<usize> {
let fd_table = schema::table_name("face_detections");
// Find all collision pairs: same identity, same frame, different trace
let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>(
&format!(
let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>(&format!(
"SELECT a.identity_id, a.trace_id, b.trace_id, a.frame_number \
FROM {} a \
JOIN {} b \
@@ -236,8 +238,7 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
AND a.identity_id = b.identity_id \
ORDER BY a.identity_id, a.frame_number",
fd_table, fd_table
),
)
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
@@ -256,25 +257,36 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
let mut unbound = 0usize;
for ((id, ta, tb), overlap_frames) in &collision_groups {
// Get face detection count for each trace
let cnt_a: i64 = sqlx::query_scalar(
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", fd_table)
)
.bind(file_uuid).bind(ta).bind(id)
.fetch_one(pool).await.unwrap_or(0);
let cnt_a: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3",
fd_table
))
.bind(file_uuid)
.bind(ta)
.bind(id)
.fetch_one(pool)
.await
.unwrap_or(0);
let cnt_b: i64 = sqlx::query_scalar(
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", fd_table)
)
.bind(file_uuid).bind(tb).bind(id)
.fetch_one(pool).await.unwrap_or(0);
let cnt_b: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3",
fd_table
))
.bind(file_uuid)
.bind(tb)
.bind(id)
.fetch_one(pool)
.await
.unwrap_or(0);
// Unbind the trace with fewer detections (likely the false positive)
let victim = if cnt_a <= cnt_b { *ta } else { *tb };
let victim_cnt = if cnt_a <= cnt_b { cnt_a } else { cnt_b };
sqlx::query(
&format!("UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2", fd_table),
)
sqlx::query(&format!(
"UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2",
fd_table
))
.bind(file_uuid)
.bind(victim)
.execute(pool)

View File

@@ -45,7 +45,14 @@ fn extract_movie_name(filename: &str) -> Option<String> {
.file_stem()
.and_then(|s| s.to_str())?;
let cleaned = name.replace(['.', '_'], " ").trim().to_string();
// Take only the part before year patterns or separators
let cleaned = name
.replace(['.', '_'], " ")
.split(|c: char| c == '(' || c == '[' || c == '│' || c == '|')
.next()
.unwrap_or(&name)
.trim()
.to_string();
if cleaned.is_empty() || cleaned.len() < 3 {
return None;
@@ -53,10 +60,7 @@ fn extract_movie_name(filename: &str) -> Option<String> {
Some(cleaned)
}
pub async fn probe_from_cache(
db: &PostgresDb,
file_uuid: &str,
) -> Result<TmdbProbeResult> {
pub async fn probe_from_cache(db: &PostgresDb, file_uuid: &str) -> Result<TmdbProbeResult> {
let cache = crate::core::tmdb::cache::read_tmdb_cache(file_uuid)?;
if cache.identities.is_empty() && !cache.cast.is_empty() {
return create_identities_from_data(db, file_uuid, &cache.movie, &cache.cast).await;
@@ -83,7 +87,8 @@ async fn upsert_identities_from_disk(
}
match std::fs::read_to_string(&path) {
Ok(content) => {
match serde_json::from_str::<crate::core::identity::storage::IdentityFile>(&content) {
match serde_json::from_str::<crate::core::identity::storage::IdentityFile>(&content)
{
Ok(identity_file) => {
let identities_table = crate::core::db::schema::table_name("identities");
let result = sqlx::query(&format!(
@@ -106,21 +111,35 @@ async fn upsert_identities_from_disk(
match result {
Ok(_) => {
info!("[TMDB] Upserted identity: {} (uuid={})", identity_file.name, identity_file.identity_uuid);
info!(
"[TMDB] Upserted identity: {} (uuid={})",
identity_file.name, identity_file.identity_uuid
);
identities_created += 1;
}
Err(e) => {
warn!("[TMDB] Failed to upsert identity '{}': {}", identity_file.name, e);
warn!(
"[TMDB] Failed to upsert identity '{}': {}",
identity_file.name, e
);
}
}
}
Err(e) => {
warn!("[TMDB] Failed to parse identity file {}: {}", path.display(), e);
warn!(
"[TMDB] Failed to parse identity file {}: {}",
path.display(),
e
);
}
}
}
Err(e) => {
warn!("[TMDB] Failed to read identity file {}: {}", path.display(), e);
warn!(
"[TMDB] Failed to read identity file {}: {}",
path.display(),
e
);
}
}
}
@@ -181,7 +200,9 @@ pub async fn create_identities_from_data(
continue;
}
let profile_url = member.profile_path.as_ref()
let profile_url = member
.profile_path
.as_ref()
.map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));
let metadata = serde_json::json!({
@@ -226,8 +247,13 @@ pub async fn create_identities_from_data(
member.name, member.character, uuid_str
);
identities_created += 1;
if let Err(e) = crate::core::identity::storage::save_identity_file(db, &uuid_str).await {
warn!("[TMDB] Failed to save identity file for {}: {}", member.name, e);
if let Err(e) =
crate::core::identity::storage::save_identity_file(db, &uuid_str).await
{
warn!(
"[TMDB] Failed to save identity file for {}: {}",
member.name, e
);
}
// Download and save TMDb profile image locally
if let Some(url) = &profile_url {
@@ -393,8 +419,10 @@ pub async fn probe_movie(
overview: movie.overview.clone(),
poster_path: movie.poster_path.clone(),
};
let cache_cast: Vec<cache::TmdbCastMember> = credits.cast.iter().map(|m| {
cache::TmdbCastMember {
let cache_cast: Vec<cache::TmdbCastMember> = credits
.cast
.iter()
.map(|m| cache::TmdbCastMember {
id: m.id,
name: m.name.clone(),
character: m.character.clone(),
@@ -410,8 +438,8 @@ pub async fn probe_movie(
deathday: None,
gender: None,
homepage: None,
}
}).collect();
})
.collect();
// Write TMDb cache so probe_from_cache can be used next time
let cache_obj = cache::TmdbCache {

View File

@@ -60,7 +60,11 @@ pub async fn check_tmdb_api() -> TmdbResourceStatus {
enabled: *config::tmdb::PROBE_ENABLED,
api_reachable: Some(reachable),
api_latency_ms: Some(latency),
api_error: if reachable { None } else { Some(format!("HTTP {}", resp.status())) },
api_error: if reachable {
None
} else {
Some(format!("HTTP {}", resp.status()))
},
last_check_at: Some(chrono::Utc::now().to_rfc3339()),
}
}
@@ -84,9 +88,10 @@ pub fn count_cache_files() -> usize {
pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result<i64> {
let identities_table = crate::core::db::schema::table_name("identities");
let count: i64 = sqlx::query_scalar(
&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", identities_table)
)
let count: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE source = 'tmdb'",
identities_table
))
.fetch_one(pool)
.await?;
Ok(count)
@@ -94,9 +99,10 @@ pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result<i64> {
pub async fn count_tmdb_identities_with_embedding(pool: &sqlx::PgPool) -> Result<i64> {
let identities_table = crate::core::db::schema::table_name("identities");
let count: i64 = sqlx::query_scalar(
&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL", identities_table)
)
let count: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL",
identities_table
))
.fetch_one(pool)
.await?;
Ok(count)

View File

@@ -147,7 +147,7 @@ impl ChunkSelector {
// Try to match UUID - either exact match or partial match
let _uuid = payload
.and_then(|p| p.get("uuid"))
.and_then(|p| p.get("file_uuid"))
.and_then(|v| v.as_str())
.unwrap_or("");

View File

@@ -8,10 +8,10 @@ use tracing::{info, warn};
use momentry_core::core::api_key::{ApiKeyService, ApiKeyType};
use momentry_core::core::chunk::types::{Chunk, ChunkRule, ChunkType};
use momentry_core::core::db::schema;
use momentry_core::core::db::Database;
use momentry_core::core::time::FrameTime;
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
use momentry_core::core::db::schema;
use momentry_core::{
Embedder, OutputDir, PostgresDb, QdrantDb, RedisClient, VectorPayload, VideoRecord, VideoStatus,
};
@@ -1985,7 +1985,8 @@ async fn main() -> Result<()> {
chunk_id: None,
created_at: String::new(),
};
db.store_pre_chunk(&uuid, "asr", serde_json::to_value(&pre_chunk)?).await?;
db.store_pre_chunk(&uuid, "asr", serde_json::to_value(&pre_chunk)?)
.await?;
asr_pre_chunk_ids.push(i as i64);
}
@@ -2009,7 +2010,8 @@ async fn main() -> Result<()> {
chunk_id: None,
created_at: String::new(),
};
db.store_pre_chunk(&uuid, "cut", serde_json::to_value(&pre_chunk)?).await?;
db.store_pre_chunk(&uuid, "cut", serde_json::to_value(&pre_chunk)?)
.await?;
cut_pre_chunk_ids.push(i as i64);
}
@@ -2037,7 +2039,8 @@ async fn main() -> Result<()> {
chunk_id: None,
created_at: String::new(),
};
db.store_pre_chunk(&uuid, "time", serde_json::to_value(&pre_chunk)?).await?;
db.store_pre_chunk(&uuid, "time", serde_json::to_value(&pre_chunk)?)
.await?;
time_pre_chunk_ids.push(time_pre_chunk_ids.len() as i64);
time_start = time_end;
}
@@ -2117,7 +2120,8 @@ async fn main() -> Result<()> {
frame_path: None,
created_at: String::new(),
};
db.store_frame(&uuid, *frame_num as i64, serde_json::to_value(&frame)?).await?;
db.store_frame(&uuid, *frame_num as i64, serde_json::to_value(&frame)?)
.await?;
}
println!("Stored {} frames", all_frames.len());
@@ -2357,8 +2361,7 @@ async fn main() -> Result<()> {
for frame in &context_frames {
if let Some(objects) = frame["yolo_objects"].as_array() {
for obj in objects {
if let Some(class_name) =
obj.get("class_name").and_then(|v| v.as_str())
if let Some(class_name) = obj.get("class_name").and_then(|v| v.as_str())
{
*all_objects.entry(class_name.to_string()).or_insert(0) += 1;
}
@@ -2494,9 +2497,11 @@ async fn main() -> Result<()> {
}
let qdrant_payload = VectorPayload {
uuid: chunk.uuid.clone(),
file_uuid: chunk.uuid.clone(),
chunk_id: chunk.chunk_id.clone(),
chunk_type: "sentence".to_string(),
start_frame: chunk.start_frame,
end_frame: chunk.end_frame,
start_time: chunk.start_time().seconds(),
end_time: chunk.end_time().seconds(),
text: Some(text.to_string()),

View File

@@ -79,12 +79,8 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
None => VerificationResult::ok(proc_name, file_uuid),
}
}
ProcessorType::Yolo => {
VerificationResult::ok(proc_name, file_uuid)
}
ProcessorType::Face => {
VerificationResult::ok(proc_name, file_uuid)
}
ProcessorType::Yolo => VerificationResult::ok(proc_name, file_uuid),
ProcessorType::Face => VerificationResult::ok(proc_name, file_uuid),
ProcessorType::Ocr => {
let frames = value.get("frames").and_then(|v| v.as_array());
match frames {
@@ -114,7 +110,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
ProcessorType::FiveW1H => {
let scenes = value.get("scenes").and_then(|v| v.as_array());
match scenes {
Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 scenes"),
Some(s) if s.is_empty() => {
VerificationResult::fail(proc_name, file_uuid, "0 scenes")
}
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}

View File

@@ -37,7 +37,8 @@ pub async fn run_watcher() -> Result<()> {
info!("Watch directories: {:?}", dirs);
tokio::spawn(async move {
let mut interval = time::interval(std::time::Duration::from_millis(config.poll_interval_ms));
let mut interval =
time::interval(std::time::Duration::from_millis(config.poll_interval_ms));
let mut known = std::collections::HashSet::new();
loop {
interval.tick().await;
@@ -109,15 +110,43 @@ async fn auto_register_file(file_path: &str) {
}
};
let file_name = pre.get("file_name").and_then(|v| v.as_str()).unwrap_or("unknown").to_string();
let file_name = pre
.get("file_name")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
let probe = pre.get("probe_json").cloned().unwrap_or_default();
let file_type = pre.get("file_type").and_then(|v| v.as_str()).unwrap_or("unknown").to_string();
let canonical_path = pre.get("file_path").and_then(|v| v.as_str()).unwrap_or(file_path).to_string();
let file_type = pre
.get("file_type")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
let canonical_path = pre
.get("file_path")
.and_then(|v| v.as_str())
.unwrap_or(file_path)
.to_string();
let duration = probe.get("format").and_then(|f| f.get("duration")).and_then(|v| v.as_f64()).unwrap_or(0.0);
let width = probe.get("format").and_then(|f| f.get("width")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
let height = probe.get("format").and_then(|f| f.get("height")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
let fps_val = probe.get("format").and_then(|f| f.get("fps")).and_then(|v| v.as_f64()).unwrap_or(0.0);
let duration = probe
.get("format")
.and_then(|f| f.get("duration"))
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
let width = probe
.get("format")
.and_then(|f| f.get("width"))
.and_then(|v| v.as_u64())
.unwrap_or(0) as u32;
let height = probe
.get("format")
.and_then(|f| f.get("height"))
.and_then(|v| v.as_u64())
.unwrap_or(0) as u32;
let fps_val = probe
.get("format")
.and_then(|f| f.get("fps"))
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
let record = VideoRecord {
id: 0,
@@ -158,7 +187,10 @@ async fn auto_register_file(file_path: &str) {
match db.register_video(&record).await {
Ok(id) => info!("[WATCHER] Auto-registered {} (id={})", record.file_uuid, id),
Err(e) => warn!("[WATCHER] Auto-register failed for {}: {}", record.file_uuid, e),
Err(e) => warn!(
"[WATCHER] Auto-register failed for {}: {}",
record.file_uuid, e
),
}
}
@@ -175,10 +207,14 @@ pub async fn pre_process_file(file_path: &str) -> Option<String> {
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry/output_dev".to_string());
let birthday = std::fs::metadata(&path).ok()
let birthday = std::fs::metadata(&path)
.ok()
.and_then(|m| m.modified().ok())
.map(|t| {
let secs = t.duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_secs();
let secs = t
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
chrono::DateTime::from_timestamp(secs as i64, 0)
.map(|dt| dt.to_rfc3339())
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339())
@@ -186,9 +222,8 @@ pub async fn pre_process_file(file_path: &str) -> Option<String> {
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339());
let mac = crate::core::storage::uuid::get_mac_address();
let file_uuid = crate::core::storage::uuid::compute_birth_uuid(
&mac, &birthday, &canonical_str, &filename,
);
let file_uuid =
crate::core::storage::uuid::compute_birth_uuid(&mac, &birthday, &canonical_str, &filename);
let pre_path = std::path::PathBuf::from(&output_dir).join(format!("{}.pre.json", file_uuid));
if pre_path.exists() {
@@ -198,15 +233,22 @@ pub async fn pre_process_file(file_path: &str) -> Option<String> {
info!("[PRE-PROCESS] Pre-processing: {} → {}", filename, file_uuid);
let content_hash = crate::core::storage::content_hash::compute_sha256(&path).unwrap_or_default();
let content_hash =
crate::core::storage::content_hash::compute_sha256(&path).unwrap_or_default();
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string());
let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
let probe_json = crate::core::probe::unified::unified_probe(&path, &scripts_dir, &python_path).await;
let probe_json =
crate::core::probe::unified::unified_probe(&path, &scripts_dir, &python_path).await;
let file_type = probe_json.get("format").and_then(|f| f.get("file_type")).and_then(|v| v.as_str()).unwrap_or("unknown").to_string();
let file_type = probe_json
.get("format")
.and_then(|f| f.get("file_type"))
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
let pre_data = serde_json::json!({
"file_name": filename,

View File

@@ -12,12 +12,13 @@ use crate::core::chunk::{rule1_ingest, rule3_ingest};
use crate::core::config::OUTPUT_DIR;
use crate::core::db::qdrant_db::QdrantDb;
use crate::core::db::{
schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus,
schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload,
VideoStatus,
};
use crate::core::embedding::Embedder;
use crate::core::processor::heuristic_scene::generate_scene_meta;
use crate::worker::config::WorkerConfig;
use crate::worker::processor::{ProcessorPool, ProcessorTask};
use crate::core::processor::heuristic_scene::generate_scene_meta;
use crate::worker::resources::SystemResources;
use sqlx::PgPool;
@@ -70,14 +71,15 @@ impl JobWorker {
// Reset stale running jobs: jobs stuck in 'running' with no active processor results
let monitor_jobs_table = schema::table_name("monitor_jobs");
let processor_results_table = schema::table_name("processor_results");
if let Err(e) = sqlx::query(
&format!("UPDATE {} SET status = 'pending', updated_at = NOW()
if let Err(e) = sqlx::query(&format!(
"UPDATE {} SET status = 'pending', updated_at = NOW()
WHERE status = 'running'
AND id NOT IN (
SELECT DISTINCT job_id FROM {}
WHERE status IN ('pending', 'running')
)", monitor_jobs_table, processor_results_table),
)
)",
monitor_jobs_table, processor_results_table
))
.execute(self.db.pool())
.await
{
@@ -608,12 +610,23 @@ impl JobWorker {
}
let fu = uuid;
let rule1 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"));
let rule1 = check!(&format!(
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"
));
let vector = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1"));
let rule3 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"));
let rule3 = check!(&format!(
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"
));
let trace = check!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = '{fu}' AND trace_id IS NOT NULL"));
let tkg = check!(&format!("SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1", schema::table_name("tkg_nodes")));
let scene_meta = std::path::Path::new(&format!("{}/{fu}.scene_meta.json", crate::core::config::OUTPUT_DIR.as_str())).exists();
let tkg = check!(&format!(
"SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1",
schema::table_name("tkg_nodes")
));
let scene_meta = std::path::Path::new(&format!(
"{}/{fu}.scene_meta.json",
crate::core::config::OUTPUT_DIR.as_str()
))
.exists();
let five_w1h = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' AND summary_text IS NOT NULL AND summary_text != '' LIMIT 1"));
let all_ok = rule1 && vector && rule3 && trace && tkg && scene_meta && five_w1h;
@@ -847,26 +860,23 @@ impl JobWorker {
Err(e) => error!("❌ Trace chunk ingestion failed: {}", e),
}
// Build Temporal Knowledge Graph (TKG)
info!("📝 Building TKG graph...");
let executor = match crate::core::processor::PythonExecutor::new() {
Ok(ex) => ex,
Err(e) => {
error!("Failed to create PythonExecutor for TKG: {}", e);
return;
}
};
match executor
.run(
"tkg_builder.py",
&["--file-uuid", &uuid_clone],
Some(&uuid_clone),
"TKG_BUILDER",
Some(std::time::Duration::from_secs(300)),
// Build Temporal Knowledge Graph (TKG) — native Rust
info!("📝 Building TKG graph (Rust)...");
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
.unwrap_or_else(|_| ".".to_string());
match crate::core::processor::tkg::build_tkg(
db_clone.as_ref(),
&uuid_clone,
&output_dir,
)
.await
{
Ok(()) => info!("✅ TKG built for {}", uuid_clone),
Ok(r) => info!(
"✅ TKG built for {}: {} face, {} obj, {} spk, {} co, {} sf, {} ff edges",
uuid_clone,
r.face_trace_nodes, r.object_nodes, r.speaker_nodes,
r.co_occurrence_edges, r.speaker_face_edges, r.face_face_edges,
),
Err(e) => error!("❌ TKG build failed for {}: {}", uuid_clone, e),
}
}
@@ -898,7 +908,7 @@ impl JobWorker {
let ids = sqlx::query_scalar::<_, uuid::Uuid>(
"SELECT DISTINCT i.uuid FROM identities i \
JOIN face_detections fd ON fd.identity_id = i.id \
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL"
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL",
)
.bind(&uuid_clone)
.fetch_all(db_clone.pool())
@@ -907,12 +917,18 @@ impl JobWorker {
for id_uuid in &ids {
let us = id_uuid.to_string().replace('-', "");
if let Err(e) = crate::core::identity::storage::save_identity_file(
&db_clone, &us
).await {
&db_clone, &us,
)
.await
{
warn!("[P2.5] Failed to save identity file {}: {}", us, e);
}
}
info!("[P2.5] {} identity files saved for {}", ids.len(), uuid_clone);
info!(
"[P2.5] {} identity files saved for {}",
ids.len(),
uuid_clone
);
}
Err(e) => error!("❌ TMDb face matching failed for {}: {}", uuid_clone, e),
}
@@ -1088,8 +1104,8 @@ impl JobWorker {
let pool = db.pool();
let chunk_table = schema::table_name("chunk");
let rows = sqlx::query_as::<_, (String, String, String, f64, f64, String)>(
&format!("SELECT chunk_id, chunk_type, text_content, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64, String)>(
&format!("SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
)
.bind(uuid)
.fetch_all(pool)
@@ -1107,7 +1123,17 @@ impl JobWorker {
);
let mut stored = 0usize;
for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows {
for (
chunk_id,
_chunk_type,
text,
start_frame,
end_frame,
start_time,
end_time,
_content_str,
) in &rows
{
if text.is_empty() {
continue;
}
@@ -1119,9 +1145,11 @@ impl JobWorker {
continue;
}
let payload = VectorPayload {
uuid: uuid.to_string(),
file_uuid: uuid.to_string(),
chunk_id: chunk_id.clone(),
chunk_type: "sentence".to_string(),
start_frame: *start_frame,
end_frame: *end_frame,
start_time: *start_time,
end_time: *end_time,
text: Some(text.clone()),

View File

@@ -237,11 +237,19 @@ impl ProcessorPool {
let key = format!("{}job:{}:processor:{}", prefix, &job.uuid, &processor_name);
let now = chrono::Utc::now().to_rfc3339();
let _: Option<String> = redis::cmd("HSET")
.arg(&key).arg("started_at").arg(&now)
.query_async(&mut conn).await.ok();
.arg(&key)
.arg("started_at")
.arg(&now)
.query_async(&mut conn)
.await
.ok();
let _: Option<String> = redis::cmd("HSET")
.arg(&key).arg("embedding_started_at").arg(&now)
.query_async(&mut conn).await.ok();
.arg(&key)
.arg("embedding_started_at")
.arg(&now)
.query_async(&mut conn)
.await
.ok();
}
// Subscribe to Redis progress pub/sub and update processor hash in real-time
@@ -254,10 +262,12 @@ impl ProcessorPool {
let cb_processor = sub_processor.clone();
if let Err(e) = sub_redis
.subscribe_and_callback(&sub_uuid, move |msg| {
tracing::info!("[Subscriber] Got msg for={} cur={} tot={}",
tracing::info!(
"[Subscriber] Got msg for={} cur={} tot={}",
msg.processor,
msg.data.current.unwrap_or(0),
msg.data.total.unwrap_or(0));
msg.data.total.unwrap_or(0)
);
if msg.processor == cb_processor {
let cur = msg.data.current.unwrap_or(0);
let tot = msg.data.total.unwrap_or(0);
@@ -266,11 +276,18 @@ impl ProcessorPool {
let u = cb_uuid.clone();
let p = cb_processor.clone();
tokio::spawn(async move {
match r.update_worker_processor_status(
&u, &p, "running", None,
cur, oc, tot, 0, 0,
).await {
Ok(_) => tracing::info!("[Subscriber] Updated {}: cur={} tot={}", p, cur, tot),
match r
.update_worker_processor_status(
&u, &p, "running", None, cur, oc, tot, 0, 0,
)
.await
{
Ok(_) => tracing::info!(
"[Subscriber] Updated {}: cur={} tot={}",
p,
cur,
tot
),
Err(e) => tracing::error!("[Subscriber] FAILED {}: {}", p, e),
}
});
@@ -756,9 +773,11 @@ impl ProcessorPool {
.enumerate()
.map(|(i, segment)| {
// Prefer ASR output frames, fallback to time-based conversion
let start_frame = segment.start_frame
let start_frame = segment
.start_frame
.unwrap_or_else(|| (segment.start_time * fps).round() as i64);
let end_frame = segment.end_frame
let end_frame = segment
.end_frame
.unwrap_or_else(|| (segment.end_time * fps).round() as i64);
let data = serde_json::json!({
"text": segment.text,
@@ -892,7 +911,11 @@ impl ProcessorPool {
tracing::info!(
"Storing {} Face pre-chunks + {} detections for video {}",
frames_count,
face_result.frames.iter().map(|f| f.faces.len()).sum::<usize>(),
face_result
.frames
.iter()
.map(|f| f.faces.len())
.sum::<usize>(),
uuid
);
@@ -911,7 +934,10 @@ impl ProcessorPool {
detections_to_store.push((
frame.frame as i64,
frame.timestamp,
face.x, face.y, face.width, face.height,
face.x,
face.y,
face.width,
face.height,
face.confidence,
));
}
@@ -1170,9 +1196,10 @@ impl ProcessorPool {
"top_5": scene.top_5,
});
let chunk_table = crate::core::db::schema::table_name("chunk");
let _ = sqlx::query(
&format!("UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3", chunk_table)
)
let _ = sqlx::query(&format!(
"UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3",
chunk_table
))
.bind(&meta)
.bind(uuid)
.bind(&chk_id)