feat: trace-level matching, health watcher/worker status, timezone config
This commit is contained in:
@@ -29,7 +29,7 @@ REDIS_PASSWORD=accusys
|
||||
# Qdrant Vector Database - Collection isolation
|
||||
QDRANT_URL=http://localhost:6333
|
||||
QDRANT_API_KEY=Test3200Test3200Test3200
|
||||
QDRANT_COLLECTION=momentry_dev_v1
|
||||
QDRANT_COLLECTION=momentry_dev_rule1_v2
|
||||
|
||||
# Paths
|
||||
MOMENTRY_OUTPUT_DIR=/Users/accusys/momentry/output_dev
|
||||
|
||||
@@ -22,6 +22,9 @@ QDRANT_COLLECTION=momentry_rule1
|
||||
# === API Keys ===
|
||||
MOMENTRY_API_KEY=muser_your_key_here
|
||||
MOMENTRY_DEMO_API_KEY=muser_your_demo_key_here
|
||||
JWT_SECRET=your_jwt_secret_here_change_in_production
|
||||
SFTPGO_BASE_URL=http://127.0.0.1:8080
|
||||
|
||||
TMDB_API_KEY=your_tmdb_api_key_here
|
||||
|
||||
# === LLM ===
|
||||
|
||||
34
Cargo.lock
generated
34
Cargo.lock
generated
@@ -178,6 +178,18 @@ dependencies = [
|
||||
"password-hash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-compression"
|
||||
version = "0.4.42"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac"
|
||||
dependencies = [
|
||||
"compression-codecs",
|
||||
"compression-core",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-lock"
|
||||
version = "3.4.2"
|
||||
@@ -615,6 +627,23 @@ dependencies = [
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "compression-codecs"
|
||||
version = "0.4.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf"
|
||||
dependencies = [
|
||||
"compression-core",
|
||||
"flate2",
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "compression-core"
|
||||
version = "0.4.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789"
|
||||
|
||||
[[package]]
|
||||
name = "concurrent-queue"
|
||||
version = "2.5.0"
|
||||
@@ -4861,13 +4890,18 @@ version = "0.6.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
|
||||
dependencies = [
|
||||
"async-compression",
|
||||
"bitflags 2.11.1",
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"http-body-util",
|
||||
"iri-string",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tower 0.5.3",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
|
||||
@@ -55,7 +55,7 @@ sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "js
|
||||
mongodb = { version = "2", features = ["tokio-runtime"] }
|
||||
bson = { version = "2", features = ["chrono-0_4"] }
|
||||
qdrant-client = "1.7"
|
||||
reqwest = { version = "0.12", features = ["json"] }
|
||||
reqwest = { version = "0.12", features = ["json", "gzip"] }
|
||||
pgvector = { version = "0.3", features = ["sqlx"] }
|
||||
|
||||
# HTTP Server
|
||||
|
||||
3
build.rs
3
build.rs
@@ -60,7 +60,8 @@ fn sha256_hex(data: &[u8]) -> String {
|
||||
use std::io::Write;
|
||||
use std::process::{Command, Stdio};
|
||||
if let Ok(mut child) = Command::new("shasum")
|
||||
.arg("-a").arg("256")
|
||||
.arg("-a")
|
||||
.arg("256")
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()
|
||||
|
||||
Binary file not shown.
@@ -103,7 +103,7 @@ f4d1b4334a49357b74b80e390ad5a3d16263e51cbe5cab661af92bd2e9721f02 ./face_process
|
||||
802015c73dfce0866f2a0bc94c645aa35ba30a6de78244af23090bb1f1828c6e ./face_processor_mps.py
|
||||
96ffdbde3f4d87e9942f9e1f4c93cbd999dc404b43e00d4cdcbb22de3c0f16b7 ./face_processor_optimized.py
|
||||
17e7d0bd142bddfead94b1dd959c1f41c0dad7063ffc677dff1a99d62aab6cf8 ./face_processor_v1.py
|
||||
15877adf5c160d861da688a25b93fd2edc189f326f9646ffb4de063e554f773a ./face_processor.py
|
||||
d6ddad29a5e53b43b887554072d7965f0535e47fb62dad1a8b87e44fa1be6015 ./face_processor.py
|
||||
8edab61189ad1a8fa60c203077e814e82d46c5bae67054fa2ab1958e199c05f9 ./face_recognition_processor.py
|
||||
9ea19f357b3fcec6c8b3875c538e53cb46e407ab188cd544963e0123e535fa03 ./face_registration.py
|
||||
72648816de611fd9b84d2b98c177b8b4f24374024b69184e8151c06cf44d633b ./face_statistics_report.py
|
||||
@@ -174,15 +174,15 @@ fd39b779a0337f521940f3f7b159931f1f207f200eefd610183781fdcf3dfafd ./object_searc
|
||||
42d2952fc78b57302b0d12bc3d45790a2c2c46d4ffa3c713a82686134bd63f13 ./ocr_benchmark_runner.py
|
||||
7b3ccb5c4ddd4c62c5ad04d0e3aafaecc2c1441012b6a98613cdcf055e2e50e8 ./ocr_processor_contract_v1.py
|
||||
271023eec42d6be4a1ce6ae2ce3f29e825210a57e6bb37554a6f7fdf54616f9a ./ocr_processor_mps.py
|
||||
e666bc8488bb93cc45bcd6a70a4ef38a74af6631d7b87a789381bfbdab4569f5 ./ocr_processor.py
|
||||
2e73c41285e52ef013594fcd4d20df9f5781bfc26bcf62e54dd2c04ec44200c3 ./ocr_processor.py
|
||||
62196108cb3337b5f9a873d70d2981ac8f49152369afbcc8a12b3a13de579e80 ./opencv_stamp_search.py
|
||||
b2e8d552c272fd173c77693e9453a85fe16dfc12f7c2cd304d299c6188c14077 ./paligemma_vs_gdino.py
|
||||
2c6767e763cf69917af832b8383528f754c65db5a3f02cb4d63e3f896d5920b6 ./parent_chunk_5w1h.py
|
||||
1534d5b7617dbae77f7a37a2c33a89b90f965247a6828f00b73ea6b720f6f4fc ./parent_chunk_5w1h.py
|
||||
5208c738d4b615282813d351daf09872ce516121bb604caa64968ef5e52c53d3 ./pipeline_checklist.py
|
||||
8f80c3a2be5c330e2d1853d9250a171c75db84598dbf3304280c42237ed4fb1f ./pipeline_status.py
|
||||
94db44c0f49115a677d117d4901a1b7991c1517905300eaa495dd62b8ac1c79c ./pose_processor_contract_v1.py
|
||||
167dee5e42c6bd46674bcffcfd92f368fc0b48a1f42c459c806853b281bc6482 ./pose_processor_mps.py
|
||||
a1cdb1efd992d229829ae156d8aa439347c51d664e2a606c14d2274a11c93a66 ./pose_processor.py
|
||||
a6ef3a785ef5c6dc47fa38dbed80d76bc7d4bf48cbaf0f7edb3d26df98d7262c ./pose_processor.py
|
||||
45e6798dc5900f2f7c8776a2d260c122aae5068a075256b8a5c02e8d0be6c131 ./probe_file.py
|
||||
139a68b5915680ec697d4bb5420adbd20b89637de2c16a15d68aca4fc22da02b ./qa/executor.py
|
||||
4a59b36c29e1ee6e2b169db3b0201d2f7088c6ccbfdf642a3b522aeb182bbeea ./qa/judges/facenet.py
|
||||
@@ -197,7 +197,7 @@ c4e4424aad1847d822e9cf7dc98a1b2e903735a61e8ec056c6a9be75f79486bd ./qa/pipeline.
|
||||
01c7b3c30c1531224f9605f0ee633285fe8489ab2d0a3c9c6a41f2b2b60d6626 ./quick_stamp_search.py
|
||||
e3143673a2bff6139e05c82446fd8770c4b7e59a854a42c3b29662f5ac75efe2 ./rebuild_parents.py
|
||||
4aa98981632d4f8a11039c510e86aa296ae1cd4b399fc871ed664ac11e445bd9 ./rebuild_story_content.py
|
||||
45c437b412d34c7c6d5758e94b7205a2956b32b6fe170c3f56db7231ec6f5a15 ./redis_publisher.py
|
||||
205cfc47b603b5ab94d97dae8c25486b342b7c2858afe6d6dae27615ca0b2aeb ./redis_publisher.py
|
||||
750f778946b56bc57c47d9d2295332bb0f8cec2c1aa03c6b882d39ef4432673d ./refine_search.py
|
||||
0f8a6a6866a5797e964d3b17e2b7ef146fe7a798f09fcea982fcda6f629b4d06 ./regenerate_parent_5w1h.py
|
||||
3ee192b623f290136b36bd63abd018aad6e6639a9543970c3415734628b33bd6 ./register_sample_faces.py
|
||||
@@ -303,7 +303,7 @@ d0ec8f4a67c1a1eb1356ad6e9b2f466575691bd336621cdbbfd31dd10159f2dc ./utils/test_m
|
||||
ff98864f1b11795cc3bb64f30ccb6f8609771ddc7a5df2c003ba7c2233d16fc2 ./vectorize_chunks.py
|
||||
5880c128400e6e36c8eb7dffd009dbbc99dd13f8575b0037bdc854e25ddc41fb ./video_comparison_statistics.py
|
||||
0a1501ffdc027236cdf88706b3d61229e2998ab268fd57fb60e399ccb734b6a1 ./vision_agent.py
|
||||
6831281de868d24ecd84151965909b57f895d534114d24300a81c396492c19f8 ./visual_chunk_processor.py
|
||||
eac8f90fbbb655614abcefc4b887e346bf94db5f015d33d37bc9514fb030489d ./visual_chunk_processor.py
|
||||
c165dfc5fc981dc731b25ef414184ee58e56b73b148d41a32fdce985c701efd5 ./visualize_stamp.py
|
||||
6c65a82fdd1d585e20bee4fcb2d1bdec2e6220bda71d6ef9cd00d6a3cf74c4d7 ./voice_embedding_extractor.py
|
||||
2b3a7b357db4ddd07ca30bf200c6600724e33441d8def0a4d9a39673e2cfb1c0 ./weather_sound_detector.py
|
||||
@@ -343,3 +343,4 @@ b2ee4f8a445a7e83f7b99ae5d4139fd525d9e3e58a360bfef054d441aa21d901 ./swift_proces
|
||||
fbca5ba0783153c4e21c174b0cbf75b582514f6ef0f92750a82d3178bc170f48 ./test_search_modes.sh
|
||||
f8c1647cdb4db8adef1829e41fbecd97f6b3b2e62927f195cd8e68127876069d ./troubleshoot.sh
|
||||
992296b5218f3ef97ce53325be12f71848f3c3aeb3ee81d764bfe4bd61e1de05 ./verify_package.sh
|
||||
b6f95fa070cc0258bc5d005f10d13025ba8b08d3ee1598bcdad405ff1d3332ed ./tmdb_agent.py
|
||||
|
||||
84
scripts/extract_face_embedding.py
Normal file
84
scripts/extract_face_embedding.py
Normal file
@@ -0,0 +1,84 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Extract face embedding from an image using InsightFace + CoreML FaceNet.
|
||||
|
||||
Usage:
|
||||
python3 scripts/extract_face_embedding.py <image_path>
|
||||
|
||||
Output: JSON with "embedding" key (512 floats) or "error" key.
|
||||
Exit code: 0 on success, 1 on failure.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Prefer venv if it exists (has insightface + coremltools installed)
|
||||
VENV_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "venv")
|
||||
VENV_SITE = os.path.join(VENV_PATH, "lib", "python3.11", "site-packages")
|
||||
if os.path.isdir(VENV_SITE):
|
||||
sys.path.insert(0, VENV_SITE)
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "models")
|
||||
FACENET_PATH = os.path.join(MODELS_DIR, "facenet512.mlpackage")
|
||||
|
||||
|
||||
def extract_embedding(image_path: str):
|
||||
import io
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
# Suppress InsightFace verbose stdout during model loading
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = io.StringIO()
|
||||
try:
|
||||
import insightface
|
||||
from insightface.app import FaceAnalysis
|
||||
import coremltools as ct
|
||||
|
||||
app = FaceAnalysis(name="buffalo_l", providers=["CPUExecutionProvider"])
|
||||
app.prepare(ctx_id=0, det_thresh=0.5)
|
||||
coreml_model = ct.models.MLModel(FACENET_PATH)
|
||||
finally:
|
||||
sys.stdout = old_stdout
|
||||
|
||||
img_bytes = open(image_path, "rb").read()
|
||||
nparr = np.frombuffer(img_bytes, np.uint8)
|
||||
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
print(json.dumps({"error": "Failed to decode image"}))
|
||||
sys.exit(1)
|
||||
|
||||
# Detect faces
|
||||
faces = app.get(img)
|
||||
if not faces:
|
||||
print(json.dumps({"error": "No face detected"}))
|
||||
sys.exit(1)
|
||||
|
||||
largest = max(faces, key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]))
|
||||
x1, y1, x2, y2 = [int(v) for v in largest.bbox]
|
||||
x1, y1 = max(0, x1), max(0, y1)
|
||||
x2, y2 = min(img.shape[1], x2), min(img.shape[0], y2)
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
print(json.dumps({"error": "Invalid face bbox"}))
|
||||
sys.exit(1)
|
||||
|
||||
face_img = img[y1:y2, x1:x2]
|
||||
face_img = cv2.resize(face_img, (160, 160))
|
||||
normalized = (face_img.astype(np.float32) / 127.5) - 1.0
|
||||
normalized = np.transpose(normalized, (2, 0, 1))
|
||||
input_array = np.expand_dims(normalized, axis=0)
|
||||
|
||||
result = coreml_model.predict({"input": input_array})
|
||||
emb_key = [k for k in result.keys() if k.startswith("var_")][0]
|
||||
embedding = result[emb_key].flatten().tolist()
|
||||
print(json.dumps({"embedding": embedding}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print(json.dumps({"error": "Usage: extract_face_embedding.py <image_path>"}))
|
||||
sys.exit(1)
|
||||
extract_embedding(sys.argv[1])
|
||||
@@ -2,23 +2,30 @@
|
||||
"""
|
||||
Face landmark QC: verify eyes/nose are within face bounding box.
|
||||
Flags faces in DB where landmarks don't match the bbox.
|
||||
Usage: python3 face_landmark_qc.py <file_uuid> [--threshold 0.5] [--fix]
|
||||
Usage: python3 face_landmark_qc.py <file_uuid> [--threshold 0.5] [--apply]
|
||||
"""
|
||||
import sys, json, psycopg2, argparse
|
||||
import sys, json, psycopg2, argparse, os
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("uuid")
|
||||
parser.add_argument("--threshold", "-t", type=float, default=0.5,
|
||||
help="Fraction of landmark points that must be inside bbox (default: 0.5)")
|
||||
parser.add_argument("--fix", action="store_true", help="Update face_detections QC flag in DB")
|
||||
parser.add_argument("--apply", action="store_true",
|
||||
help="Write qc_ok to face_detections.metadata in DB")
|
||||
parser.add_argument("--schema", default="dev",
|
||||
help="DB schema (default: dev)")
|
||||
args = parser.parse_args()
|
||||
|
||||
UUID = args.uuid
|
||||
THRESHOLD = args.threshold
|
||||
FACE_PATH = f"/Users/accusys/momentry/output_dev/{UUID}.face.json"
|
||||
SCHEMA = args.schema
|
||||
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", f"/Users/accusys/momentry/output_dev")
|
||||
FACE_PATH = f"{OUTPUT_DIR}/{UUID}.face.json"
|
||||
|
||||
print(f"=== Face Landmark QC ===")
|
||||
print(f"UUID: {UUID}")
|
||||
print(f"Schema: {SCHEMA}")
|
||||
print(f"Face file: {FACE_PATH}")
|
||||
print(f"Threshold: {THRESHOLD * 100:.0f}% points must be inside bbox")
|
||||
|
||||
# Load face.json
|
||||
@@ -29,8 +36,7 @@ total_faces = 0
|
||||
faces_with_lm = 0
|
||||
good_faces = 0
|
||||
bad_faces = 0
|
||||
bad_frame_ids = set()
|
||||
bad_face_details = []
|
||||
qc_results = [] # list of (frame, face_idx, qc_ok, x, y, w, h)
|
||||
|
||||
# Build frame lookup for fast access
|
||||
frame_map = {}
|
||||
@@ -42,13 +48,22 @@ for frame_num, frm in frame_map.items():
|
||||
total_faces += 1
|
||||
lm = face.get('landmarks')
|
||||
if not lm:
|
||||
bbox = face.get('bbox', {})
|
||||
qc_results.append((frame_num, fi, False, bbox.get('x'), bbox.get('y'),
|
||||
bbox.get('width'), bbox.get('height')))
|
||||
bad_faces += 1
|
||||
continue
|
||||
faces_with_lm += 1
|
||||
|
||||
x, y, w, h = face['x'], face['y'], face['width'], face['height']
|
||||
bbox = face.get('bbox', {})
|
||||
x, y, w, h = bbox.get('x'), bbox.get('y'), bbox.get('width'), bbox.get('height')
|
||||
if None in (x, y, w, h):
|
||||
qc_results.append((frame_num, fi, False, x, y, w, h))
|
||||
bad_faces += 1
|
||||
continue
|
||||
inside_pts = 0
|
||||
total_pts = 0
|
||||
eye_nose_inside = 0 # at least one point from each eye+nose inside
|
||||
eye_nose_inside = 0
|
||||
|
||||
for lm_type in ['left_eye', 'right_eye', 'nose']:
|
||||
points = lm.get(lm_type, [])
|
||||
@@ -63,53 +78,39 @@ for frame_num, frm in frame_map.items():
|
||||
eye_nose_inside += 1
|
||||
|
||||
ratio = inside_pts / max(1, total_pts)
|
||||
qc_ok = (ratio >= THRESHOLD and eye_nose_inside >= 2)
|
||||
|
||||
if ratio >= THRESHOLD and eye_nose_inside >= 2:
|
||||
qc_results.append((frame_num, fi, qc_ok, x, y, w, h))
|
||||
if qc_ok:
|
||||
good_faces += 1
|
||||
else:
|
||||
bad_faces += 1
|
||||
bad_frame_ids.add(frame_num)
|
||||
bad_face_details.append({
|
||||
'frame': frame_num,
|
||||
'face_idx': fi,
|
||||
'bbox': [x, y, w, h],
|
||||
'inside_pts': inside_pts,
|
||||
'total_pts': total_pts,
|
||||
'ratio': ratio,
|
||||
'eye_nose_ok': eye_nose_inside,
|
||||
})
|
||||
|
||||
print(f"\nTotal faces: {total_faces:,}")
|
||||
print(f"Faces with landmarks: {faces_with_lm:,}")
|
||||
print(f"✅ Good (≥{THRESHOLD*100:.0f}% inside + ≥2 features): {good_faces:,}")
|
||||
print(f"❌ Bad: {bad_faces:,}")
|
||||
print(f"❌ Bad (no eyes or insufficient landmarks): {bad_faces:,}")
|
||||
print(f"Quality pass rate: {100 * good_faces / max(1, faces_with_lm):.1f}%")
|
||||
|
||||
print(f"\nBad faces in {len(bad_frame_ids)} unique frames")
|
||||
|
||||
# Show sample bad faces
|
||||
print(f"\nSample bad faces:")
|
||||
for bf in sorted(bad_face_details, key=lambda b: b['ratio'])[:5]:
|
||||
print(f" frame={bf['frame']}, bbox={bf['bbox']}, {bf['inside_pts']}/{bf['total_pts']} inside ({bf['ratio']*100:.0f}%), eye/nose={bf['eye_nose_ok']}/3")
|
||||
|
||||
# Show sample good faces
|
||||
print(f"\nSample good faces:")
|
||||
good_details = []
|
||||
for frame_num, frm in frame_map.items():
|
||||
for face in frm.get('faces', []):
|
||||
lm = face.get('landmarks')
|
||||
if not lm:
|
||||
continue
|
||||
x, y, w, h = face['x'], face['y'], face['width'], face['height']
|
||||
inside = sum(1 for pts in lm.values() for pt in pts
|
||||
if (x <= pt[0] <= x + w) and (y <= pt[1] <= y + h))
|
||||
total = sum(len(pts) for pts in lm.values())
|
||||
if inside / max(1, total) >= THRESHOLD:
|
||||
good_details.append((frame_num, x, y, w, h, inside, total))
|
||||
if len(good_details) >= 5:
|
||||
break
|
||||
if len(good_details) >= 5:
|
||||
break
|
||||
|
||||
for g in good_details:
|
||||
print(f" frame={g[0]}, bbox=[{g[1]},{g[2]},{g[3]},{g[4]}], {g[5]}/{g[6]} inside ({100*g[5]/max(1,g[6]):.0f}%)")
|
||||
# Apply mode: write qc_ok to face_detections.metadata
|
||||
if args.apply:
|
||||
print(f"\n=== Applying QC results to {SCHEMA}.face_detections ===")
|
||||
db_url = os.environ.get("DATABASE_URL", "postgres://accusys@localhost:5432/momentry")
|
||||
conn = psycopg2.connect(db_url)
|
||||
cur = conn.cursor()
|
||||
updated = 0
|
||||
for frame_num, fi, qc_ok, x, y, w, h in qc_results:
|
||||
qc_str = "true" if qc_ok else "false"
|
||||
cur.execute(
|
||||
f"UPDATE {SCHEMA}.face_detections "
|
||||
f"SET metadata = jsonb_set(COALESCE(metadata, '{{}}'::jsonb), '{{qc_ok}}', '\"{qc_str}\"'::jsonb) "
|
||||
f"WHERE file_uuid = %s AND frame_number = %s AND x = %s AND y = %s AND width = %s AND height = %s",
|
||||
(UUID, frame_num, x, y, w, h)
|
||||
)
|
||||
if cur.rowcount > 0:
|
||||
updated += 1
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
print(f"Updated {updated} rows in {SCHEMA}.face_detections")
|
||||
print(f"Skipped {len(qc_results) - updated} rows (no matching face_detections row)")
|
||||
|
||||
@@ -13,6 +13,7 @@ Detection cost: near-zero CPU (Vision ANE)
|
||||
Embedding cost: near-zero CPU (CoreML ANE)
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
@@ -29,6 +30,7 @@ from pathlib import Path
|
||||
import coremltools as ct
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
SWIFT_BIN = os.path.join(SCRIPT_DIR, "swift_processors", ".build", "debug", "swift_face")
|
||||
@@ -49,11 +51,12 @@ def classify_pose(roll: float, yaw: float) -> str:
|
||||
|
||||
class FaceProcessorVision:
|
||||
def __init__(self, video_path: str, output_path: str, uuid: str = "",
|
||||
sample_interval: int = 3):
|
||||
sample_interval: int = 3, publisher: RedisPublisher = None):
|
||||
self.video_path = video_path
|
||||
self.output_path = output_path
|
||||
self.uuid = uuid
|
||||
self.sample_interval = sample_interval
|
||||
self.publisher = publisher
|
||||
|
||||
# Load CoreML FaceNet
|
||||
self.coreml_model = None
|
||||
@@ -127,7 +130,33 @@ class FaceProcessorVision:
|
||||
|
||||
print(f"[FACE_V2] Running: {' '.join(cmd)}")
|
||||
t0 = time.time()
|
||||
subprocess.run(cmd, check=True)
|
||||
log_path = swift_out + ".log"
|
||||
log_f = open(log_path, "w")
|
||||
proc = subprocess.Popen(cmd, stdout=log_f, stderr=subprocess.STDOUT, text=True)
|
||||
last_pct = -1
|
||||
while proc.poll() is None:
|
||||
time.sleep(10)
|
||||
# Read latest log lines
|
||||
try:
|
||||
with open(log_path) as lf:
|
||||
for line in lf:
|
||||
line = line.strip()
|
||||
m = re.search(r'(\d+)% complete', line)
|
||||
if m:
|
||||
pct = int(m.group(1))
|
||||
if pct > last_pct:
|
||||
last_pct = pct
|
||||
if self.publisher:
|
||||
self.publisher.progress("face", pct, 100, f"swift detect {pct}%")
|
||||
except Exception:
|
||||
pass
|
||||
log_f.close()
|
||||
if proc.returncode != 0:
|
||||
stderr_out = proc.stderr.read()
|
||||
if stderr_out:
|
||||
print(stderr_out.strip(), file=sys.stderr)
|
||||
raise RuntimeError(f"swift_face exited with code {proc.returncode}")
|
||||
|
||||
elapsed = time.time() - t0
|
||||
print(f"[FACE_V2] Detection done in {elapsed:.1f}s")
|
||||
|
||||
@@ -156,6 +185,8 @@ class FaceProcessorVision:
|
||||
|
||||
t0 = time.time()
|
||||
embed_count = 0
|
||||
total_face_count = 0
|
||||
last_pct = -1
|
||||
|
||||
for frame_info in frames:
|
||||
frame_num = frame_info["frame"]
|
||||
@@ -220,6 +251,12 @@ class FaceProcessorVision:
|
||||
if len(face_data["frames"]) % 100 == 0:
|
||||
elapsed = time.time() - t0
|
||||
print(f"[FACE_V2] {len(face_data['frames'])} frames, {embed_count} embeddings, {elapsed:.0f}s")
|
||||
if self.publisher:
|
||||
pct = int(len(face_data["frames"]) * 100 / max(len(frames), 1))
|
||||
if pct > last_pct:
|
||||
last_pct = pct
|
||||
self.publisher.progress("face", len(face_data["frames"]), len(frames),
|
||||
f"{embed_count} faces", embed_count, "faces")
|
||||
|
||||
self.video.release()
|
||||
|
||||
@@ -259,19 +296,36 @@ def main():
|
||||
parser.add_argument("--force", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
publisher = RedisPublisher(args.uuid) if args.uuid else None
|
||||
if publisher:
|
||||
publisher.info("face", "FACE_START")
|
||||
|
||||
if args.force and os.path.exists(args.output_path):
|
||||
os.remove(args.output_path)
|
||||
|
||||
processor = FaceProcessorVision(
|
||||
args.video_path, args.output_path,
|
||||
args.uuid, args.sample_interval
|
||||
args.uuid, args.sample_interval, publisher
|
||||
)
|
||||
|
||||
# Step 1: Vision detection (bbox + pose via ANE)
|
||||
detection = processor.process_with_swift()
|
||||
try:
|
||||
detection = processor.process_with_swift()
|
||||
except Exception as e:
|
||||
if publisher:
|
||||
publisher.error("face", f"Detection failed: {e}")
|
||||
raise
|
||||
|
||||
# Step 2: CoreML embedding + save
|
||||
processor.embed_and_save(detection)
|
||||
try:
|
||||
processor.embed_and_save(detection)
|
||||
except Exception as e:
|
||||
if publisher:
|
||||
publisher.error("face", f"Embedding failed: {e}")
|
||||
raise
|
||||
|
||||
if publisher:
|
||||
publisher.complete("face", f"{len(detection.get('frames',[]))} frames")
|
||||
|
||||
# Clean up temp detection file
|
||||
swift_out = args.output_path.replace(".json", "_detect.json")
|
||||
|
||||
@@ -81,10 +81,10 @@ for cluster_id in sorted(set(labels)):
|
||||
VALUES (%s, 'face', 'auto', 'active', NOW(), %s)
|
||||
ON CONFLICT (name) DO UPDATE SET status = 'active', file_uuid = COALESCE(dev.identities.file_uuid, %s)
|
||||
RETURNING id
|
||||
""", (f"PERSON_{UUID[:8]}_{cluster_id}", UUID, UUID))
|
||||
""", (f"stranger_{UUID}_{cluster_id}", UUID, UUID))
|
||||
identity_id = cur.fetchone()[0]
|
||||
cluster_to_identity[cluster_id] = identity_id
|
||||
print(f" Cluster {cluster_id}: new identity {identity_id} (PERSON_{cluster_id})")
|
||||
print(f" Cluster {cluster_id}: new identity {identity_id} (stranger_{UUID}_{cluster_id})")
|
||||
|
||||
# Step 4: Create identity bindings
|
||||
print("Creating identity bindings...")
|
||||
|
||||
131
scripts/migrate_identity_files.py
Normal file
131
scripts/migrate_identity_files.py
Normal file
@@ -0,0 +1,131 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Migrate Identity Files — one-time: DB identities → filesystem identity.json
|
||||
|
||||
Reads all identities from PostgreSQL, queries file bindings,
|
||||
and writes identity.json + _index.json to {OUTPUT_DIR}/identities/{uuid}/
|
||||
|
||||
Usage:
|
||||
python3 scripts/migrate_identity_files.py
|
||||
python3 scripts/migrate_identity_files.py --db "dbname=momentry user=accusys"
|
||||
python3 scripts/migrate_identity_files.py --output /path/to/output
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Migrate identities to filesystem")
|
||||
parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost"))
|
||||
parser.add_argument("--output", default=os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output"))
|
||||
args = parser.parse_args()
|
||||
|
||||
conn = psycopg2.connect(args.db)
|
||||
identities_root = Path(args.output) / "identities"
|
||||
identities_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
||||
cur.execute("""
|
||||
SELECT id, uuid::text, name, identity_type, source, status,
|
||||
tmdb_id, tmdb_profile, metadata::text, created_at, updated_at
|
||||
FROM identities
|
||||
WHERE uuid IS NOT NULL
|
||||
ORDER BY id
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
|
||||
if not rows:
|
||||
print("No identities found in DB.")
|
||||
return
|
||||
|
||||
index = {}
|
||||
migrated = 0
|
||||
skipped = 0
|
||||
|
||||
for row in rows:
|
||||
uuid_raw = row["uuid"]
|
||||
uuid_clean = uuid_raw.replace("-", "")
|
||||
name = row["name"] or ""
|
||||
|
||||
dir_path = identities_root / uuid_clean
|
||||
dir_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Get bindings for this identity from face_detections
|
||||
bindings_cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
||||
bindings_cur.execute("""
|
||||
SELECT fd.file_uuid,
|
||||
COALESCE(array_agg(DISTINCT fd.trace_id) FILTER (WHERE fd.trace_id IS NOT NULL), '{}') AS trace_ids,
|
||||
COUNT(*)::bigint AS face_count
|
||||
FROM face_detections fd
|
||||
WHERE fd.identity_id = %s
|
||||
GROUP BY fd.file_uuid
|
||||
ORDER BY fd.file_uuid
|
||||
""", (row["id"],))
|
||||
binding_rows = bindings_cur.fetchall()
|
||||
bindings_cur.close()
|
||||
|
||||
file_bindings = []
|
||||
for b in binding_rows:
|
||||
trace_ids = b["trace_ids"]
|
||||
if isinstance(trace_ids, list):
|
||||
trace_ids = [int(t) for t in trace_ids if t is not None]
|
||||
file_bindings.append({
|
||||
"file_uuid": b["file_uuid"],
|
||||
"trace_ids": trace_ids,
|
||||
"face_count": int(b["face_count"]),
|
||||
})
|
||||
|
||||
metadata = row.get("metadata")
|
||||
if isinstance(metadata, str):
|
||||
metadata = json.loads(metadata) if metadata else {}
|
||||
elif metadata is None:
|
||||
metadata = {}
|
||||
|
||||
fmt_time = lambda v: v.isoformat() if v else datetime.now(timezone.utc).isoformat()
|
||||
|
||||
identity_file = {
|
||||
"version": 1,
|
||||
"identity_uuid": uuid_clean,
|
||||
"name": name,
|
||||
"identity_type": row.get("identity_type"),
|
||||
"source": row.get("source"),
|
||||
"status": row.get("status"),
|
||||
"tmdb_id": row.get("tmdb_id"),
|
||||
"tmdb_profile": row.get("tmdb_profile"),
|
||||
"metadata": metadata,
|
||||
"file_bindings": file_bindings,
|
||||
"created_at": fmt_time(row.get("created_at")),
|
||||
"updated_at": fmt_time(row.get("updated_at")),
|
||||
}
|
||||
|
||||
with open(dir_path / "identity.json", "w", encoding="utf-8") as f:
|
||||
json.dump(identity_file, f, indent=2, ensure_ascii=False)
|
||||
|
||||
index[uuid_clean] = name
|
||||
migrated += 1
|
||||
print(f" [{migrated:5d}] {name} ({uuid_clean})")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
# Write _index.json
|
||||
index_file = {
|
||||
"version": 1,
|
||||
"updated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"entries": index,
|
||||
}
|
||||
with open(identities_root / "_index.json", "w", encoding="utf-8") as f:
|
||||
json.dump(index_file, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\nDone: {migrated} identities migrated")
|
||||
print(f"Index: {identities_root / '_index.json'} ({len(index)} entries)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -4,6 +4,7 @@ OCR Processor Wrapper
|
||||
Calls Swift Vision Framework OCR (swift_ocr) with fallback to PaddleOCR.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
@@ -11,6 +12,10 @@ import subprocess
|
||||
import argparse
|
||||
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
SWIFT_OCR_PATH = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
"swift_processors/.build/debug/swift_ocr"
|
||||
@@ -19,6 +24,7 @@ SWIFT_OCR_ALT = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
"swift_processors/.build/arm64-apple-macosx/debug/swift_ocr"
|
||||
)
|
||||
SWIFT_PROGRESS_RE = re.compile(r"\[SwiftOCR\] Progress:\s*(\d+)%")
|
||||
|
||||
|
||||
def process_ocr(
|
||||
@@ -27,6 +33,7 @@ def process_ocr(
|
||||
uuid: str = "",
|
||||
sample_interval: int = 30,
|
||||
recognition_level: str = "accurate",
|
||||
publisher: RedisPublisher = None,
|
||||
) -> dict:
|
||||
swift_bin = SWIFT_OCR_PATH
|
||||
if not os.path.exists(swift_bin):
|
||||
@@ -42,15 +49,34 @@ def process_ocr(
|
||||
"--uuid", uuid]
|
||||
|
||||
print(f"[OCR] Running Swift OCR", file=sys.stderr)
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200)
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
|
||||
if result.stdout:
|
||||
print(result.stdout.strip(), file=sys.stderr)
|
||||
if result.stderr:
|
||||
print(result.stderr.strip(), file=sys.stderr)
|
||||
last_pct = -1
|
||||
stdout_lines = []
|
||||
for line in proc.stdout:
|
||||
line = line.strip()
|
||||
stdout_lines.append(line)
|
||||
m = SWIFT_PROGRESS_RE.search(line)
|
||||
if m:
|
||||
pct = int(m.group(1))
|
||||
if pct > last_pct:
|
||||
last_pct = pct
|
||||
print(f"[OCR] Progress: {pct}%", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.progress("ocr", pct, 100, f"{pct}%")
|
||||
elif line:
|
||||
print(line, file=sys.stderr)
|
||||
|
||||
if result.returncode != 0 or not os.path.exists(output_path):
|
||||
print(f"[OCR] Swift OCR failed, falling back to PaddleOCR", file=sys.stderr)
|
||||
stderr_output = proc.stderr.read()
|
||||
if stderr_output:
|
||||
print(stderr_output.strip(), file=sys.stderr)
|
||||
|
||||
proc.wait()
|
||||
|
||||
if proc.returncode != 0 or not os.path.exists(output_path):
|
||||
print(f"[OCR] Swift OCR failed (exit={proc.returncode}), falling back to PaddleOCR", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.error("ocr", f"Swift OCR failed, using fallback")
|
||||
return _fallback(video_path, output_path, uuid, sample_interval)
|
||||
|
||||
with open(output_path) as f:
|
||||
@@ -81,9 +107,16 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--recognition-level", choices=["fast", "accurate"], default="accurate")
|
||||
args = parser.parse_args()
|
||||
|
||||
publisher = RedisPublisher(args.uuid) if args.uuid else None
|
||||
if publisher:
|
||||
publisher.info("ocr", "OCR_START")
|
||||
|
||||
result = process_ocr(args.video_path, args.output_path, args.uuid,
|
||||
args.sample_interval, args.recognition_level)
|
||||
args.sample_interval, args.recognition_level,
|
||||
publisher)
|
||||
|
||||
with open(args.output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
print(f"OCR: {len(result.get('frames', []))} frames with text")
|
||||
if publisher:
|
||||
publisher.complete("ocr", f"{len(result.get('frames',[]))} frames")
|
||||
|
||||
@@ -28,7 +28,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
DB_URL = os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
|
||||
SCHEMA = os.getenv("DATABASE_SCHEMA", "dev")
|
||||
OUTPUT_DIR = os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
|
||||
OLLAMA_URL = "http://localhost:11434/api"
|
||||
EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://localhost:11436/v1/embeddings")
|
||||
|
||||
def load_speaker_map(file_uuid: str) -> dict:
|
||||
"""Load speaker→identity mapping from DB (generalized, not hardcoded)"""
|
||||
@@ -64,7 +64,7 @@ CURRENT_VERSIONS = {
|
||||
"embedding_agent": "nomic-embed-768d/v1",
|
||||
}
|
||||
|
||||
LLM_URL = os.getenv("MOMENTRY_LLM_SUMMARY_URL", "http://127.0.0.1:8081/v1/chat/completions")
|
||||
LLM_URL = os.getenv("MOMENTRY_LLM_URL", os.getenv("MOMENTRY_LLM_SUMMARY_URL", "http://127.0.0.1:8082/v1/chat/completions"))
|
||||
LLM_MODEL = os.getenv("MOMENTRY_LLM_SUMMARY_MODEL", "gemma4")
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
|
||||
s, e = cs["start_time"], cs["end_time"]
|
||||
|
||||
children = []
|
||||
for seg in asr_segs:
|
||||
for seg_idx, seg in enumerate(asr_segs):
|
||||
st, en = seg.get("start", 0), seg.get("end", 0)
|
||||
text = seg.get("text", "").strip()
|
||||
if st < s or en > e or not text: continue
|
||||
@@ -117,11 +117,11 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
|
||||
"start": st, "end": en, "text": text,
|
||||
"speaker_id": spk_id, "speaker_name": character,
|
||||
"speaker_confidence": spk_conf,
|
||||
"chunk_id": f"{file_uuid}_{st:.0f}_{en:.0f}",
|
||||
"chunk_id": f"{file_uuid}_{seg_idx}",
|
||||
})
|
||||
|
||||
# Boundary overlap: even empty scenes get partial children
|
||||
for seg in asr_segs:
|
||||
for seg_idx, seg in enumerate(asr_segs):
|
||||
st, en = seg.get("start", 0), seg.get("end", 0)
|
||||
text = seg.get("text", "").strip()
|
||||
if not text: continue
|
||||
@@ -141,7 +141,7 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
|
||||
"start": st, "end": en, "text": text,
|
||||
"speaker_id": spk_id, "speaker_name": character,
|
||||
"speaker_confidence": spk_conf,
|
||||
"chunk_id": f"{file_uuid}_{st:.0f}_{en:.0f}",
|
||||
"chunk_id": f"{file_uuid}_{seg_idx}",
|
||||
"overlap_type": "partial",
|
||||
})
|
||||
|
||||
@@ -215,14 +215,17 @@ def generate_llm_child_summary(child: dict, parent_summary: str) -> Optional[str
|
||||
# ===== Embedding (Ollama nomic-embed) =====
|
||||
|
||||
def embed_text(text: str, max_retries: int = 3) -> Optional[List[float]]:
|
||||
"""Get embedding via Ollama nomic-embed-text"""
|
||||
"""Get embedding via EmbeddingGemma server"""
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
resp = requests.post(f"{OLLAMA_URL}/embeddings", json={
|
||||
"model": "nomic-embed-text-v2-moe", "prompt": text,
|
||||
resp = requests.post(EMBEDDING_URL, json={
|
||||
"input": [text],
|
||||
}, timeout=30)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()["embedding"]
|
||||
data = resp.json()
|
||||
items = data.get("data", [])
|
||||
if items:
|
||||
return items[0]["embedding"]
|
||||
except Exception as e:
|
||||
if attempt == max_retries - 1:
|
||||
print(f" ⚠️ Embedding failed: {e}")
|
||||
@@ -244,7 +247,7 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool,
|
||||
|
||||
# Get base chunk_index
|
||||
cur.execute(
|
||||
f"SELECT COALESCE(MAX(chunk_index), 0) FROM {SCHEMA}.chunks WHERE file_uuid = %s",
|
||||
f"SELECT COALESCE(MAX(chunk_index), 0) FROM {SCHEMA}.chunk WHERE file_uuid = %s",
|
||||
(file_uuid,),
|
||||
)
|
||||
next_index = (cur.fetchone()[0] or 0) + 1
|
||||
@@ -255,20 +258,38 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool,
|
||||
|
||||
parent_id = f"{mode}_parent_{file_uuid}_{scene['start_time']:.0f}_{scene['end_time']:.0f}"
|
||||
|
||||
cur.execute(
|
||||
f"""
|
||||
INSERT INTO {SCHEMA}.chunks (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
|
||||
start_time, end_time, content, text_content, parent_chunk_id)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s)
|
||||
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
|
||||
SET content = EXCLUDED.content, text_content = EXCLUDED.text_content
|
||||
""",
|
||||
(parent_id, parent_id, file_uuid, parent_type, next_index,
|
||||
scene["start_time"], scene["end_time"],
|
||||
json.dumps({"summary": parent_text, "mode": mode, "type": "parent",
|
||||
"source_versions": CURRENT_VERSIONS}),
|
||||
parent_text, None),
|
||||
)
|
||||
parent_embedding = embed_text(parent_text) if do_embed else None
|
||||
if do_embed and parent_embedding:
|
||||
cur.execute(
|
||||
f"""
|
||||
INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
|
||||
start_time, end_time, content, text_content, parent_chunk_id, embedding)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector)
|
||||
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
|
||||
SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
|
||||
embedding = EXCLUDED.embedding
|
||||
""",
|
||||
(parent_id, parent_id, file_uuid, parent_type, next_index,
|
||||
scene["start_time"], scene["end_time"],
|
||||
json.dumps({"summary": parent_text, "mode": mode, "type": "parent",
|
||||
"source_versions": CURRENT_VERSIONS}),
|
||||
parent_text, None, parent_embedding),
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
f"""
|
||||
INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
|
||||
start_time, end_time, content, text_content, parent_chunk_id)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s)
|
||||
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
|
||||
SET content = EXCLUDED.content, text_content = EXCLUDED.text_content
|
||||
""",
|
||||
(parent_id, parent_id, file_uuid, parent_type, next_index,
|
||||
scene["start_time"], scene["end_time"],
|
||||
json.dumps({"summary": parent_text, "mode": mode, "type": "parent",
|
||||
"source_versions": CURRENT_VERSIONS}),
|
||||
parent_text, None),
|
||||
)
|
||||
next_index += 1
|
||||
parent_count += 1
|
||||
|
||||
@@ -276,22 +297,42 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool,
|
||||
child_id = child["chunk_id"]
|
||||
child_text = generate_story_child_summary(child, parent_text) if mode == "story" else generate_llm_child_summary(child, parent_text)
|
||||
|
||||
cur.execute(
|
||||
f"""
|
||||
INSERT INTO {SCHEMA}.chunks (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
|
||||
start_time, end_time, content, text_content, parent_chunk_id)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s)
|
||||
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
|
||||
SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
|
||||
parent_chunk_id = EXCLUDED.parent_chunk_id
|
||||
""",
|
||||
(child_id, child_id, file_uuid, child_type, next_index,
|
||||
child["start"], child["end"],
|
||||
json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode,
|
||||
"speaker_confidence": child.get("speaker_confidence", 0),
|
||||
"source_versions": CURRENT_VERSIONS}),
|
||||
child_text, parent_id),
|
||||
)
|
||||
child_embedding = embed_text(child_text) if do_embed else None
|
||||
if do_embed and child_embedding:
|
||||
cur.execute(
|
||||
f"""
|
||||
INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
|
||||
start_time, end_time, content, text_content, parent_chunk_id, embedding)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector)
|
||||
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
|
||||
SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
|
||||
parent_chunk_id = EXCLUDED.parent_chunk_id,
|
||||
embedding = EXCLUDED.embedding
|
||||
""",
|
||||
(child_id, child_id, file_uuid, child_type, next_index,
|
||||
child["start"], child["end"],
|
||||
json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode,
|
||||
"speaker_confidence": child.get("speaker_confidence", 0),
|
||||
"source_versions": CURRENT_VERSIONS}),
|
||||
child_text, parent_id, child_embedding),
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
f"""
|
||||
INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
|
||||
start_time, end_time, content, text_content, parent_chunk_id)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s)
|
||||
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
|
||||
SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
|
||||
parent_chunk_id = EXCLUDED.parent_chunk_id
|
||||
""",
|
||||
(child_id, child_id, file_uuid, child_type, next_index,
|
||||
child["start"], child["end"],
|
||||
json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode,
|
||||
"speaker_confidence": child.get("speaker_confidence", 0),
|
||||
"source_versions": CURRENT_VERSIONS}),
|
||||
child_text, parent_id),
|
||||
)
|
||||
next_index += 1
|
||||
child_count += 1
|
||||
|
||||
@@ -304,7 +345,7 @@ def main():
|
||||
parser = argparse.ArgumentParser(description="Story Processor V2.0")
|
||||
parser.add_argument("--file-uuid", required=True)
|
||||
parser.add_argument("--mode", choices=["story", "llm"], default="story")
|
||||
parser.add_argument("--max-scenes", type=int, default=300)
|
||||
parser.add_argument("--max-scenes", type=int, default=99999)
|
||||
parser.add_argument("--embed", action="store_true", help="Generate embeddings (Ollama)")
|
||||
parser.add_argument("--no-db", action="store_true", help="Skip DB storage")
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -5,12 +5,16 @@ Calls Swift Vision Framework pose (swift_pose) with fallback to YOLOv8 Pose.
|
||||
Uses VNDetectHumanBodyPoseRequest with ANE acceleration.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import argparse
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
SWIFT_POSE_PATH = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
"swift_processors/.build/debug/swift_pose"
|
||||
@@ -21,11 +25,14 @@ SWIFT_POSE_ALT = os.path.join(
|
||||
)
|
||||
|
||||
|
||||
SWIFT_POSE_PROGRESS_RE = re.compile(r"\[SwiftPose\] Progress:\s*(\d+)%")
|
||||
|
||||
def process_pose(
|
||||
video_path: str,
|
||||
output_path: str,
|
||||
uuid: str = "",
|
||||
sample_interval: int = 30,
|
||||
publisher: RedisPublisher = None,
|
||||
) -> dict:
|
||||
swift_bin = SWIFT_POSE_PATH
|
||||
if not os.path.exists(swift_bin):
|
||||
@@ -33,6 +40,8 @@ def process_pose(
|
||||
|
||||
if not os.path.exists(swift_bin):
|
||||
print("[Pose] Swift binary not found, using YOLOv8 fallback", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.error("pose", "Swift binary not found, using fallback")
|
||||
return _fallback(video_path, output_path, uuid, sample_interval)
|
||||
|
||||
cmd = [swift_bin, video_path, output_path,
|
||||
@@ -40,17 +49,32 @@ def process_pose(
|
||||
"--uuid", uuid]
|
||||
|
||||
print(f"[Pose] Running Swift Pose (Vision Framework)", file=sys.stderr)
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200)
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
|
||||
if result.stdout:
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
print(f" {line}", file=sys.stderr)
|
||||
if result.stderr:
|
||||
for line in result.stderr.strip().split("\n"):
|
||||
last_pct = -1
|
||||
for line in proc.stdout:
|
||||
line = line.strip()
|
||||
m = SWIFT_POSE_PROGRESS_RE.search(line)
|
||||
if m:
|
||||
pct = int(m.group(1))
|
||||
if pct > last_pct:
|
||||
last_pct = pct
|
||||
print(f"[Pose] Progress: {pct}%", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.progress("pose", pct, 100, f"{pct}%")
|
||||
elif line:
|
||||
print(f" {line}", file=sys.stderr)
|
||||
|
||||
if result.returncode != 0 or not os.path.exists(output_path):
|
||||
print(f"[Pose] Swift Pose failed, falling back to YOLOv8", file=sys.stderr)
|
||||
stderr_output = proc.stderr.read()
|
||||
if stderr_output:
|
||||
print(stderr_output.strip(), file=sys.stderr)
|
||||
|
||||
proc.wait()
|
||||
|
||||
if proc.returncode != 0 or not os.path.exists(output_path):
|
||||
print(f"[Pose] Swift Pose failed (exit={proc.returncode}), falling back to YOLOv8", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.error("pose", f"Swift Pose failed, using fallback")
|
||||
return _fallback(video_path, output_path, uuid, sample_interval)
|
||||
|
||||
with open(output_path) as f:
|
||||
@@ -113,7 +137,14 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--sample-interval", type=int, default=30)
|
||||
args = parser.parse_args()
|
||||
|
||||
result = process_pose(args.video_path, args.output_path, args.uuid, args.sample_interval)
|
||||
publisher = RedisPublisher(args.uuid) if args.uuid else None
|
||||
if publisher:
|
||||
publisher.info("pose", "POSE_START")
|
||||
|
||||
result = process_pose(args.video_path, args.output_path, args.uuid,
|
||||
args.sample_interval, publisher)
|
||||
with open(args.output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
print(f"Pose: {len(result.get('frames', []))} frames with poses")
|
||||
if publisher:
|
||||
publisher.complete("pose", f"{len(result.get('frames',[]))} frames")
|
||||
|
||||
@@ -34,6 +34,8 @@ class ProgressData:
|
||||
message: Optional[str] = None
|
||||
current: Optional[int] = None
|
||||
total: Optional[int] = None
|
||||
output_count: Optional[int] = None
|
||||
output_type: Optional[str] = None
|
||||
extra: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
@@ -49,7 +51,8 @@ class StructuredMessage:
|
||||
class RedisPublisher:
|
||||
def __init__(self, uuid: str):
|
||||
self.uuid = uuid
|
||||
self.channel = f"momentry:progress:{uuid}"
|
||||
prefix = os.environ.get("MOMENTRY_REDIS_PREFIX", "momentry:")
|
||||
self.channel = f"{prefix}progress:{uuid}"
|
||||
self._enabled = False
|
||||
self._client = None
|
||||
self._connect()
|
||||
@@ -107,6 +110,8 @@ class RedisPublisher:
|
||||
message: Optional[str] = None,
|
||||
current: Optional[int] = None,
|
||||
total: Optional[int] = None,
|
||||
output_count: Optional[int] = None,
|
||||
output_type: Optional[str] = None,
|
||||
extra: Optional[Dict[str, Any]] = None,
|
||||
) -> bool:
|
||||
if not self._enabled:
|
||||
@@ -121,6 +126,8 @@ class RedisPublisher:
|
||||
message=message,
|
||||
current=current,
|
||||
total=total,
|
||||
output_count=output_count,
|
||||
output_type=output_type,
|
||||
extra=extra,
|
||||
),
|
||||
)
|
||||
@@ -136,6 +143,8 @@ class RedisPublisher:
|
||||
current: int,
|
||||
total: int,
|
||||
message: str = "",
|
||||
output_count: Optional[int] = None,
|
||||
output_type: Optional[str] = None,
|
||||
) -> bool:
|
||||
return self.publish(
|
||||
MessageType.PROGRESS,
|
||||
@@ -143,6 +152,8 @@ class RedisPublisher:
|
||||
message=message,
|
||||
current=current,
|
||||
total=total,
|
||||
output_count=output_count,
|
||||
output_type=output_type,
|
||||
)
|
||||
|
||||
def complete(self, processor: str, message: str = "") -> bool:
|
||||
|
||||
117
scripts/sync_users_from_sftpgo.py
Normal file
117
scripts/sync_users_from_sftpgo.py
Normal file
@@ -0,0 +1,117 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Sync users from SFTPGo to Momentry users table.
|
||||
|
||||
Usage:
|
||||
python3 scripts/sync_users_from_sftpgo.py
|
||||
python3 scripts/sync_users_from_sftpgo.py --sftpgo-url http://localhost:8080
|
||||
python3 scripts/sync_users_from_sftpgo.py --db "dbname=momentry user=accusys"
|
||||
|
||||
Environment:
|
||||
SFTPGO_BASE_URL Default: http://localhost:8080
|
||||
DATABASE_URL Default: dbname=momentry user=accusys host=localhost
|
||||
|
||||
This script does NOT copy passwords. It creates user records with placeholder
|
||||
password hashes. The real password will be captured on the user's first
|
||||
login through Momentry (which verifies against SFTPGo and caches the hash).
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import requests
|
||||
|
||||
|
||||
def get_sftpgo_users(sftpgo_url: str, admin_user: str, admin_pass: str) -> list[dict[str, Any]]:
|
||||
"""Get all users from SFTPGo."""
|
||||
# Get admin token (SFTPGo uses GET, not POST)
|
||||
token_url = f"{sftpgo_url}/api/v2/token"
|
||||
resp = requests.get(token_url, auth=(admin_user, admin_pass), timeout=10)
|
||||
resp.raise_for_status()
|
||||
token = resp.json().get("access_token")
|
||||
if not token:
|
||||
print("ERROR: Failed to get SFTPGo admin token", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# List users
|
||||
users_url = f"{sftpgo_url}/api/v2/users"
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
resp = requests.get(users_url, headers=headers, timeout=10)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Sync SFTPGo users to Momentry")
|
||||
parser.add_argument("--sftpgo-url", default=os.getenv("SFTPGO_BASE_URL", "http://localhost:8080"))
|
||||
parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost"))
|
||||
parser.add_argument("--admin-user", default="admin")
|
||||
parser.add_argument("--admin-pass", default=os.getenv("SFTPGO_ADMIN_PASSWORD", "Test3200Test3200"))
|
||||
parser.add_argument("--dry-run", action="store_true", help="Print what would be done without executing")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Fetch users from SFTPGo
|
||||
print(f"[SFTPGo] Connecting to {args.sftpgo_url}...")
|
||||
try:
|
||||
sftpgo_users = get_sftpgo_users(args.sftpgo_url, args.admin_user, args.admin_pass)
|
||||
except Exception as e:
|
||||
print(f"ERROR: Failed to fetch SFTPGo users: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"[SFTPGo] Found {len(sftpgo_users)} users")
|
||||
|
||||
# Connect to Momentry DB and set schema
|
||||
conn = psycopg2.connect(args.db)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SET search_path TO dev")
|
||||
|
||||
synced = 0
|
||||
skipped = 0
|
||||
|
||||
for user in sftpgo_users:
|
||||
username = user.get("username")
|
||||
status = user.get("status", 0)
|
||||
|
||||
if not username or status != 1:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
role = "admin" if username == "admin" else "user"
|
||||
# Placeholder hash — will be updated on first login via SFTPGo fallback
|
||||
placeholder_hash = "$placeholder$synced_from_sftpgo"
|
||||
|
||||
if args.dry_run:
|
||||
print(f" Would insert: {username} (role={role})")
|
||||
synced += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
cur.execute(
|
||||
"INSERT INTO users (username, password_hash, role) VALUES (%s, %s, %s) "
|
||||
"ON CONFLICT (username) DO NOTHING",
|
||||
(username, placeholder_hash, role),
|
||||
)
|
||||
if cur.rowcount > 0:
|
||||
print(f" ✅ {username} (role={role})")
|
||||
synced += 1
|
||||
else:
|
||||
print(f" ⏭️ {username} already exists, skipped")
|
||||
skipped += 1
|
||||
except Exception as e:
|
||||
print(f" ❌ {username}: {e}", file=sys.stderr)
|
||||
skipped += 1
|
||||
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print(f"\nDone: {synced} synced, {skipped} skipped/errors")
|
||||
print("Note: Password hashes are placeholders. First login via Momentry will cache the real hash.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
285
scripts/tmdb_agent.py
Normal file
285
scripts/tmdb_agent.py
Normal file
@@ -0,0 +1,285 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
TMDb Agent — pre-fetch TMDb data and write directly to identity files.
|
||||
|
||||
Usage:
|
||||
python3 scripts/tmdb_agent.py --file-uuid <uuid>
|
||||
python3 scripts/tmdb_agent.py --file-uuid <uuid> --db "dbname=momentry user=accusys"
|
||||
|
||||
Environment:
|
||||
TMDB_API_KEY Required. TMDb API key.
|
||||
MOMENTRY_OUTPUT_DIR Default: /Users/accusys/momentry/output
|
||||
DATABASE_URL Default: dbname=momentry user=accusys host=localhost
|
||||
|
||||
Flow:
|
||||
1. Query videos table for file_name
|
||||
2. Extract movie name from filename
|
||||
3. TMDB /search/movie → find best match
|
||||
4. TMDB /movie/{id}/credits → fetch cast
|
||||
5. TMDB /person/{id} → fetch person details
|
||||
6. Write {OUTPUT}/identities/{uuid}/identity.json + profile.jpg for each cast member
|
||||
7. Write {OUTPUT}/{uuid}.tmdb.json cache (movie info + identity uuid list)
|
||||
"""
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
|
||||
TMDB_BASE = "https://api.themoviedb.org/3"
|
||||
TMDB_API_KEY = os.getenv("TMDB_API_KEY")
|
||||
|
||||
|
||||
def extract_movie_name(filename: str) -> str | None:
|
||||
"""Extract movie name from filename (e.g. 'Charade_1963.mp4' → 'Charade 1963')"""
|
||||
name = Path(filename).stem
|
||||
cleaned = re.sub(r'[._]', ' ', name).strip()
|
||||
# Strip text after separators like |, (, [, {
|
||||
for sep in ('|', '(', '[', '{', '\u2502'):
|
||||
idx = cleaned.find(sep)
|
||||
if idx > 0:
|
||||
cleaned = cleaned[:idx].strip()
|
||||
# Strip common suffixes (quality, format, source, etc.)
|
||||
suffixes = (
|
||||
r'\d{3,4}p', r'\d{3,4}x\d{3,4}', r'\d+fps', r'bluray', r'web[ -]?dl',
|
||||
r'webrip', r'hdrip', r'dvdrip', r'dvd', r'brrip', r'hdtv', r'xvid',
|
||||
r'x264', r'h264', r'x265', r'h265', r'hevc', r'aac', r'mp3', r'ac3',
|
||||
r'dts', r'5\.1', r'7\.1', r'dual[ -]?audio', r'multi[ -]?sub',
|
||||
r'proper', r'repack', r'extended', r'unrated', r'directors[ -]?cut',
|
||||
r'theatrical', r'internal', r'limited', r'complete', r'full[ -]?movie',
|
||||
r'english', r'french', r'spanish', r'german', r'chinese',
|
||||
r'youtube', r'yify', r'ettv', r'rarbg', r'tgx', r'axxo', r'ctrlhd',
|
||||
)
|
||||
pattern = r'\b(?:' + '|'.join(suffixes) + r')\b'
|
||||
cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE).strip()
|
||||
# Collapse multiple spaces
|
||||
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
|
||||
return cleaned if len(cleaned) >= 3 else None
|
||||
|
||||
|
||||
def search_movie(query: str) -> dict | None:
|
||||
"""Search TMDB for a movie by name. Returns first result."""
|
||||
url = f"{TMDB_BASE}/search/movie"
|
||||
params = {"query": query, "api_key": TMDB_API_KEY, "language": "en-US", "page": 1}
|
||||
try:
|
||||
resp = requests.get(url, params=params, timeout=15)
|
||||
resp.raise_for_status()
|
||||
results = resp.json().get("results", [])
|
||||
return results[0] if results else None
|
||||
except Exception as e:
|
||||
print(f"TMDB search failed: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def get_credits(movie_id: int) -> list[dict]:
|
||||
"""Get cast credits for a movie from TMDB."""
|
||||
url = f"{TMDB_BASE}/movie/{movie_id}/credits"
|
||||
params = {"api_key": TMDB_API_KEY, "language": "en-US"}
|
||||
try:
|
||||
resp = requests.get(url, params=params, timeout=15)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("cast", [])
|
||||
except Exception as e:
|
||||
print(f"TMDB credits failed: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
|
||||
def get_person_details(person_id: int) -> dict:
|
||||
"""Fetch person details from TMDB /person/{id}."""
|
||||
url = f"{TMDB_BASE}/person/{person_id}"
|
||||
params = {"api_key": TMDB_API_KEY, "language": "en-US"}
|
||||
try:
|
||||
resp = requests.get(url, params=params, timeout=15)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return {
|
||||
"biography": data.get("biography"),
|
||||
"birthday": data.get("birthday"),
|
||||
"place_of_birth": data.get("place_of_birth"),
|
||||
"also_known_as": data.get("also_known_as", []),
|
||||
"imdb_id": data.get("imdb_id"),
|
||||
"known_for_department": data.get("known_for_department"),
|
||||
"popularity": data.get("popularity"),
|
||||
"deathday": data.get("deathday"),
|
||||
"gender": data.get("gender"),
|
||||
"homepage": data.get("homepage"),
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"TMDB person details failed for {person_id}: {e}", file=sys.stderr)
|
||||
return {}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="TMDb Agent — pre-fetch cache")
|
||||
parser.add_argument("--file-uuid", required=True, help="File UUID to enrich")
|
||||
parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost"))
|
||||
parser.add_argument("--output", default=os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output"))
|
||||
args = parser.parse_args()
|
||||
|
||||
if not TMDB_API_KEY:
|
||||
print("ERROR: TMDB_API_KEY not set.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# 1. Query DB for file_name
|
||||
schema = os.getenv("DATABASE_SCHEMA", "").strip()
|
||||
table = f"{schema}.videos" if schema else "videos"
|
||||
conn = psycopg2.connect(args.db)
|
||||
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
||||
cur.execute(f"SELECT file_name FROM {table} WHERE file_uuid = %s", (args.file_uuid,))
|
||||
row = cur.fetchone()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
if not row:
|
||||
print(f"ERROR: File not found: {args.file_uuid}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
file_name = row["file_name"]
|
||||
print(f"[TKG-AGENT] File: {file_name} ({args.file_uuid})")
|
||||
|
||||
# 2. Extract movie name
|
||||
movie_name = extract_movie_name(file_name)
|
||||
if not movie_name:
|
||||
print(f"ERROR: Cannot extract movie name from: {file_name}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print(f"[TKG-AGENT] Extracted movie name: '{movie_name}'")
|
||||
|
||||
# 3. Search TMDB
|
||||
movie = search_movie(movie_name)
|
||||
if not movie:
|
||||
print(f"ERROR: No TMDB movie found for: {movie_name}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print(f"[TKG-AGENT] Matched: {movie['title']} (TMDB id={movie['id']})")
|
||||
|
||||
# 4. Fetch credits
|
||||
cast = get_credits(movie["id"])
|
||||
if not cast:
|
||||
print(f"WARN: No cast data found for movie {movie['id']}", file=sys.stderr)
|
||||
|
||||
# 5. Enrich each cast member with person details and write identity files
|
||||
output = Path(args.output)
|
||||
identities_root = output / "identities"
|
||||
identities_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
created_identities = []
|
||||
|
||||
for i, m in enumerate(cast):
|
||||
person_id = m["id"]
|
||||
person = get_person_details(person_id)
|
||||
|
||||
# Generate deterministic UUID: SHA256("tmdb-{movie_id}-{person_id}-{name}")
|
||||
uuid_raw = hashlib.sha256(f"tmdb-{movie['id']}-{person_id}-{m['name']}".encode()).hexdigest()[:32]
|
||||
profile_url = (
|
||||
f"https://image.tmdb.org/t/p/w185{m['profile_path']}"
|
||||
if m.get("profile_path") else None
|
||||
)
|
||||
|
||||
# Build identity.json
|
||||
metadata = {
|
||||
"tmdb_character": m.get("character", ""),
|
||||
"tmdb_cast_order": i,
|
||||
"tmdb_movie_id": movie["id"],
|
||||
"tmdb_movie_title": movie["title"],
|
||||
"tmdb_biography": person.get("biography"),
|
||||
"tmdb_birthday": person.get("birthday"),
|
||||
"tmdb_place_of_birth": person.get("place_of_birth"),
|
||||
"tmdb_aliases": person.get("also_known_as", []),
|
||||
"tmdb_imdb_id": person.get("imdb_id"),
|
||||
"tmdb_department": person.get("known_for_department"),
|
||||
"tmdb_popularity": person.get("popularity"),
|
||||
"tmdb_deathday": person.get("deathday"),
|
||||
"tmdb_gender": person.get("gender"),
|
||||
"tmdb_homepage": person.get("homepage"),
|
||||
}
|
||||
|
||||
identity = {
|
||||
"version": 1,
|
||||
"identity_uuid": uuid_raw,
|
||||
"name": m["name"],
|
||||
"identity_type": "people",
|
||||
"source": "tmdb",
|
||||
"status": "confirmed",
|
||||
"tmdb_id": person_id,
|
||||
"tmdb_profile": profile_url,
|
||||
"metadata": {k: v for k, v in metadata.items() if v is not None or k == "tmdb_aliases"},
|
||||
"file_bindings": [],
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
|
||||
# Write identity.json
|
||||
identity_dir = identities_root / uuid_raw
|
||||
identity_dir.mkdir(parents=True, exist_ok=True)
|
||||
identity_path = identity_dir / "identity.json"
|
||||
with open(identity_path, "w", encoding="utf-8") as f:
|
||||
json.dump(identity, f, indent=2, ensure_ascii=False)
|
||||
|
||||
# Download profile.jpg
|
||||
if profile_url:
|
||||
img_path = identity_dir / "profile.jpg"
|
||||
if not img_path.exists():
|
||||
try:
|
||||
resp = requests.get(profile_url, timeout=15)
|
||||
if resp.status_code == 200:
|
||||
img_path.write_bytes(resp.content)
|
||||
except Exception as e:
|
||||
print(f" [WARN] Failed to download profile for {m['name']}: {e}", file=sys.stderr)
|
||||
|
||||
created_identities.append({
|
||||
"identity_uuid": uuid_raw,
|
||||
"name": m["name"],
|
||||
"tmdb_id": person_id,
|
||||
"character": m.get("character", ""),
|
||||
"order": i,
|
||||
})
|
||||
|
||||
if (i + 1) % 5 == 0:
|
||||
print(f"[TKG-AGENT] Wrote {i+1}/{len(cast)} identity files")
|
||||
|
||||
# Update _index.json
|
||||
index_path = identities_root / "_index.json"
|
||||
index = {}
|
||||
if index_path.exists():
|
||||
with open(index_path) as f:
|
||||
index = json.load(f)
|
||||
for ci in created_identities:
|
||||
index[ci["identity_uuid"]] = ci["name"]
|
||||
with open(index_path, "w", encoding="utf-8") as f:
|
||||
json.dump(index, f, indent=2, ensure_ascii=False)
|
||||
|
||||
# Write movie cache ({uuid}.tmdb.json) — simplified, no per-person data
|
||||
cache = {
|
||||
"file_uuid": args.file_uuid,
|
||||
"fetched_at": now,
|
||||
"source": "agent",
|
||||
"movie": {
|
||||
"tmdb_id": movie["id"],
|
||||
"title": movie["title"],
|
||||
"release_date": movie.get("release_date"),
|
||||
"overview": movie.get("overview"),
|
||||
"poster_path": movie.get("poster_path"),
|
||||
},
|
||||
"cast_count": len(cast),
|
||||
"identities_created": len(created_identities),
|
||||
"identities": created_identities,
|
||||
}
|
||||
|
||||
cache_path = output / f"{args.file_uuid}.tmdb.json"
|
||||
with open(cache_path, "w", encoding="utf-8") as f:
|
||||
json.dump(cache, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"[TKG-AGENT] Cache written: {cache_path}")
|
||||
print(f"[TKG-AGENT] Identity files: {len(created_identities)} cast members → {identities_root}/")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -384,6 +384,7 @@ def main():
|
||||
parser.add_argument("video_path", help="視頻文件路徑")
|
||||
parser.add_argument("output_path", help="輸出文件路徑")
|
||||
parser.add_argument("--yolo-result", help="YOLO 結果文件路徑(可選)")
|
||||
parser.add_argument("--uuid", help="檔案 UUID(由 executor 傳入)")
|
||||
parser.add_argument(
|
||||
"--strategy", choices=["fixed", "similarity"], default="fixed", help="分片策略"
|
||||
)
|
||||
|
||||
@@ -57,17 +57,12 @@ async fn translate_text(
|
||||
"temperature": 0.1
|
||||
});
|
||||
|
||||
let response = client
|
||||
.post(llm_url)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to call LLM: {}", e),
|
||||
)
|
||||
})?;
|
||||
let response = client.post(llm_url).json(&body).send().await.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to call LLM: {}", e),
|
||||
)
|
||||
})?;
|
||||
|
||||
let llm_resp: serde_json::Value = response.json().await.map_err(|e| {
|
||||
(
|
||||
|
||||
@@ -97,17 +97,25 @@ struct SceneSummaryResult {
|
||||
|
||||
fn llm_base_url() -> String {
|
||||
let v = std::env::var("MOMENTRY_LLM_URL");
|
||||
if v.is_ok() { return v.unwrap(); }
|
||||
if v.is_ok() {
|
||||
return v.unwrap();
|
||||
}
|
||||
let v = std::env::var("MOMENTRY_LLM_SUMMARY_URL");
|
||||
if v.is_ok() { return v.unwrap(); }
|
||||
if v.is_ok() {
|
||||
return v.unwrap();
|
||||
}
|
||||
"http://localhost:8082/v1/chat/completions".to_string()
|
||||
}
|
||||
|
||||
fn llm_model() -> String {
|
||||
let v = std::env::var("MOMENTRY_LLM_MODEL");
|
||||
if v.is_ok() { return v.unwrap(); }
|
||||
if v.is_ok() {
|
||||
return v.unwrap();
|
||||
}
|
||||
let v = std::env::var("MOMENTRY_LLM_SUMMARY_MODEL");
|
||||
if v.is_ok() { return v.unwrap(); }
|
||||
if v.is_ok() {
|
||||
return v.unwrap();
|
||||
}
|
||||
"google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string()
|
||||
}
|
||||
|
||||
@@ -115,7 +123,7 @@ fn llm_model() -> String {
|
||||
|
||||
async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Vec<CutScene>> {
|
||||
let table = schema::table_name("chunk");
|
||||
sqlx::query_as::<_, (String, i64, i64, f64, f64, f64, serde_json::Value, serde_json::Value, Option<String>)>(&format!(
|
||||
sqlx::query_as::<_, (String, i64, i64, f64, Option<f64>, Option<f64>, serde_json::Value, Option<serde_json::Value>, Option<String>)>(&format!(
|
||||
r#"SELECT chunk_id, start_frame, end_frame, fps, start_time, end_time, content, metadata, summary_text
|
||||
FROM {} WHERE file_uuid = $1 AND chunk_type = 'cut' ORDER BY start_frame"#, table
|
||||
))
|
||||
@@ -123,7 +131,8 @@ async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Ve
|
||||
.fetch_all(db.pool()).await?
|
||||
.into_iter().map(|r| Ok(CutScene {
|
||||
chunk_id: r.0, start_frame: r.1, end_frame: r.2,
|
||||
fps: r.3, start_time: r.4, end_time: r.5, content: r.6, metadata: r.7, summary_text: r.8,
|
||||
fps: r.3, start_time: r.4.unwrap_or(0.0), end_time: r.5.unwrap_or(0.0),
|
||||
content: r.6, metadata: r.7.unwrap_or(serde_json::json!({})), summary_text: r.8,
|
||||
})).collect()
|
||||
}
|
||||
|
||||
@@ -133,7 +142,7 @@ async fn fetch_sentences_in_scene(
|
||||
cut: &CutScene,
|
||||
) -> anyhow::Result<Vec<SentenceChunk>> {
|
||||
let table = schema::table_name("chunk");
|
||||
sqlx::query_as::<_, (String, String, f64, f64, i64, i64, serde_json::Value)>(&format!(
|
||||
sqlx::query_as::<_, (String, String, Option<f64>, Option<f64>, i64, i64, serde_json::Value)>(&format!(
|
||||
r#"SELECT chunk_id, COALESCE(text_content,''), start_time, end_time, start_frame, end_frame, content
|
||||
FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence'
|
||||
AND start_time >= $2 AND end_time <= $3 ORDER BY start_time"#, table
|
||||
@@ -141,7 +150,7 @@ async fn fetch_sentences_in_scene(
|
||||
.bind(file_uuid).bind(cut.start_time).bind(cut.end_time)
|
||||
.fetch_all(db.pool()).await?
|
||||
.into_iter().map(|r| Ok(SentenceChunk {
|
||||
chunk_id: r.0, text: r.1, start_time: r.2, end_time: r.3,
|
||||
chunk_id: r.0, text: r.1, start_time: r.2.unwrap_or(0.0), end_time: r.3.unwrap_or(0.0),
|
||||
start_frame: r.4, end_frame: r.5, content: r.6,
|
||||
})).collect()
|
||||
}
|
||||
@@ -540,10 +549,7 @@ async fn analyze_5w1h(
|
||||
if let Some(ref t) = cut.summary_text {
|
||||
if t.len() > 20 {
|
||||
processed += 1;
|
||||
prev_context.push(format!(
|
||||
"Scene (t={:.0}s): {}",
|
||||
cut.start_time, t
|
||||
));
|
||||
prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -621,10 +627,7 @@ async fn batch_analyze_5w1h(
|
||||
if let Some(ref t) = cut.summary_text {
|
||||
if t.len() > 20 {
|
||||
processed += 1;
|
||||
prev_context.push(format!(
|
||||
"Scene (t={:.0}s): {}",
|
||||
cut.start_time, t
|
||||
));
|
||||
prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -713,10 +716,7 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<
|
||||
if let Some(ref t) = cut.summary_text {
|
||||
if t.len() > 20 {
|
||||
processed += 1;
|
||||
prev_context.push(format!(
|
||||
"Scene (t={:.0}s): {}",
|
||||
cut.start_time, t
|
||||
));
|
||||
prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -764,38 +764,44 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<
|
||||
qdrant.init_collection(768).await?;
|
||||
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let rows = sqlx::query_as::<_, (String, String, String, f64, f64)>(
|
||||
&format!("SELECT chunk_id, chunk_type, text_content, start_time, end_time \
|
||||
let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64)>(&format!(
|
||||
"SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time \
|
||||
FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL \
|
||||
AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
|
||||
)
|
||||
AND (text_content IS NOT NULL AND text_content != '') ORDER BY id",
|
||||
chunk_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(db.pool())
|
||||
.await?;
|
||||
|
||||
let total_vec = rows.len();
|
||||
let mut stored = 0usize;
|
||||
for (chunk_id, _ctype, text, start_time, end_time) in &rows {
|
||||
for (chunk_id, _ctype, text, start_frame, end_frame, start_time, end_time) in &rows {
|
||||
let text = text.trim();
|
||||
if text.is_empty() || text.len() < 5 {
|
||||
continue;
|
||||
}
|
||||
match embedder.embed_document(text).await {
|
||||
Ok(vector) => {
|
||||
if let Err(e) = sqlx::query(
|
||||
&format!("UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3", chunk_table)
|
||||
)
|
||||
if let Err(e) = sqlx::query(&format!(
|
||||
"UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3",
|
||||
chunk_table
|
||||
))
|
||||
.bind(&vector as &[f32])
|
||||
.bind(chunk_id)
|
||||
.bind(file_uuid)
|
||||
.execute(db.pool()).await {
|
||||
.execute(db.pool())
|
||||
.await
|
||||
{
|
||||
tracing::error!("[Vectorize] PG failed for {}: {}", chunk_id, e);
|
||||
continue;
|
||||
}
|
||||
let payload = VectorPayload {
|
||||
uuid: file_uuid.to_string(),
|
||||
file_uuid: file_uuid.to_string(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "sentence".to_string(),
|
||||
start_frame: *start_frame,
|
||||
end_frame: *end_frame,
|
||||
start_time: *start_time,
|
||||
end_time: *end_time,
|
||||
text: Some(text.to_string()),
|
||||
|
||||
@@ -93,16 +93,15 @@ async fn create_identity(
|
||||
})?;
|
||||
|
||||
let id_table = crate::core::db::schema::table_name("identities");
|
||||
let name_col = if id_table.starts_with("dev.") { "name" } else { "real_name" };
|
||||
let query = format!(
|
||||
"SELECT uuid, reference_data->'total_references' as total,
|
||||
reference_data->'angles_covered' as angles,
|
||||
reference_data->'quality_avg' as quality
|
||||
FROM {}
|
||||
WHERE {} = $1
|
||||
WHERE name = $1
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1",
|
||||
id_table, name_col
|
||||
id_table
|
||||
);
|
||||
|
||||
let row: Option<(String, Option<i32>, Option<Vec<String>>, Option<f64>)> =
|
||||
@@ -168,11 +167,19 @@ async fn list_identities(
|
||||
let id_table = crate::core::db::schema::table_name("identities");
|
||||
|
||||
let total: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", id_table))
|
||||
.fetch_one(db.pool()).await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Count error: {}", e)))?;
|
||||
.fetch_one(db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Count error: {}", e),
|
||||
)
|
||||
})?;
|
||||
|
||||
let name_col = if id_table.starts_with("dev.") { "name" } else { "real_name" };
|
||||
let sql = format!("SELECT id::int, uuid, {} AS name, metadata FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2", name_col, id_table);
|
||||
let sql = format!(
|
||||
"SELECT id::int, uuid, name, metadata FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2",
|
||||
id_table
|
||||
);
|
||||
|
||||
let rows: Vec<(i32, uuid::Uuid, String, Option<serde_json::Value>)> = match sqlx::query_as(&sql)
|
||||
.bind(page_size as i64)
|
||||
@@ -200,12 +207,25 @@ async fn list_identities(
|
||||
.collect();
|
||||
|
||||
let identities_table = crate::core::db::schema::table_name("identities");
|
||||
let total_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", identities_table))
|
||||
.fetch_one(db.pool()).await.unwrap_or(0);
|
||||
let tmdb_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", identities_table))
|
||||
.fetch_one(db.pool()).await.unwrap_or(0);
|
||||
let auto_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE source = 'auto'", identities_table))
|
||||
.fetch_one(db.pool()).await.unwrap_or(0);
|
||||
let total_identities: i64 =
|
||||
sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", identities_table))
|
||||
.fetch_one(db.pool())
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
let tmdb_identities: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE source = 'tmdb'",
|
||||
identities_table
|
||||
))
|
||||
.fetch_one(db.pool())
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
let auto_identities: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE source = 'auto'",
|
||||
identities_table
|
||||
))
|
||||
.fetch_one(db.pool())
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
|
||||
Ok(Json(IdentityListResponse {
|
||||
identities,
|
||||
|
||||
@@ -15,8 +15,14 @@ use crate::core::db::PostgresDb;
|
||||
|
||||
pub fn identity_agent_routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/api/v1/agents/identity/match-from-photo", post(match_from_photo))
|
||||
.route("/api/v1/agents/identity/match-from-trace", post(match_from_trace))
|
||||
.route(
|
||||
"/api/v1/agents/identity/match-from-photo",
|
||||
post(match_from_photo),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/agents/identity/match-from-trace",
|
||||
post(match_from_trace),
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -73,13 +79,21 @@ async fn match_from_photo(
|
||||
|
||||
let uuid_clean = identity_uuid.replace('-', "");
|
||||
if uuid_clean.is_empty() || file_uuid.is_empty() {
|
||||
return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
|
||||
"success": false, "message": "identity_uuid and file_uuid are required"
|
||||
}))));
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({
|
||||
"success": false, "message": "identity_uuid and file_uuid are required"
|
||||
})),
|
||||
));
|
||||
}
|
||||
let data = image_data.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({
|
||||
"success": false, "message": "No image field found. Use field name 'image'."
|
||||
}))))?;
|
||||
let data = image_data.ok_or_else(|| {
|
||||
(
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({
|
||||
"success": false, "message": "No image field found. Use field name 'image'."
|
||||
})),
|
||||
)
|
||||
})?;
|
||||
|
||||
// 1. Save uploaded image to temp
|
||||
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
|
||||
@@ -88,11 +102,17 @@ async fn match_from_photo(
|
||||
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
|
||||
let temp_dir = std::env::temp_dir().join("momentry_match_face");
|
||||
std::fs::create_dir_all(&temp_dir).map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to create temp dir: {}", e)})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"message": format!("Failed to create temp dir: {}", e)})),
|
||||
)
|
||||
})?;
|
||||
let temp_img = temp_dir.join(format!("{}.jpg", uuid_clean));
|
||||
std::fs::write(&temp_img, &data).map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to save temp image: {}", e)})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"message": format!("Failed to save temp image: {}", e)})),
|
||||
)
|
||||
})?;
|
||||
|
||||
// 2. Extract face embedding via Python script
|
||||
@@ -103,79 +123,109 @@ async fn match_from_photo(
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to run extractor: {}", e)})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"message": format!("Failed to run extractor: {}", e)})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let _ = std::fs::remove_file(&temp_img);
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
|
||||
"success": false, "message": format!("Face extraction failed: {}", stderr)
|
||||
}))));
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({
|
||||
"success": false, "message": format!("Face extraction failed: {}", stderr)
|
||||
})),
|
||||
));
|
||||
}
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let extract_result: serde_json::Value = serde_json::from_str(&stdout).map_err(|_| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": "Failed to parse extractor output"})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"message": "Failed to parse extractor output"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let embedding: Vec<f64> = serde_json::from_value(
|
||||
extract_result.get("embedding")
|
||||
.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({"message": "No embedding in extractor output"}))))?
|
||||
.clone()
|
||||
).map_err(|_| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": "Invalid embedding format"})))
|
||||
extract_result
|
||||
.get("embedding")
|
||||
.ok_or_else(|| {
|
||||
(
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({"message": "No embedding in extractor output"})),
|
||||
)
|
||||
})?
|
||||
.clone(),
|
||||
)
|
||||
.map_err(|_| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"message": "Invalid embedding format"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let embedding_f32: Vec<f32> = embedding.into_iter().map(|v| v as f32).collect();
|
||||
|
||||
// 3. Look up identity internal ID
|
||||
let id_table = schema::table_name("identities");
|
||||
let identity_id_row: Option<(i32,)> = sqlx::query_as(
|
||||
&format!("SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table)
|
||||
)
|
||||
let identity_id_row: Option<(i32,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
|
||||
id_table
|
||||
))
|
||||
.bind(&uuid_clean)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let identity_id = match identity_id_row {
|
||||
Some((id,)) => id,
|
||||
None => return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
|
||||
"success": false, "message": "Identity not found"
|
||||
})))),
|
||||
None => {
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({
|
||||
"success": false, "message": "Identity not found"
|
||||
})),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
// 4. Find best matching trace (highest similarity, no threshold)
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let best_match: Option<(i32, i32, f64)> = sqlx::query_as(
|
||||
&format!(
|
||||
r#"SELECT id, trace_id,
|
||||
let best_match: Option<(i32, i32, f64)> = sqlx::query_as(&format!(
|
||||
r#"SELECT id, trace_id,
|
||||
1 - (embedding::vector <=> $1::vector) as similarity
|
||||
FROM {}
|
||||
WHERE file_uuid = $2 AND embedding IS NOT NULL
|
||||
ORDER BY embedding::vector <=> $1::vector
|
||||
LIMIT 1"#,
|
||||
fd_table
|
||||
)
|
||||
)
|
||||
fd_table
|
||||
))
|
||||
.bind(&embedding_f32)
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Search failed: {}", e)})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"message": format!("Search failed: {}", e)})),
|
||||
)
|
||||
})?;
|
||||
|
||||
// 5. Update best match face_detection
|
||||
let mut traces_matched: Vec<i32> = Vec::new();
|
||||
if let Some((fb_id, fb_trace, fb_sim)) = best_match {
|
||||
let _ = sqlx::query(
|
||||
&format!("UPDATE {} SET identity_id = $1 WHERE id = $2", fd_table)
|
||||
)
|
||||
let _ = sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id = $1 WHERE id = $2",
|
||||
fd_table
|
||||
))
|
||||
.bind(identity_id)
|
||||
.bind(fb_id)
|
||||
.execute(state.db.pool())
|
||||
@@ -191,7 +241,10 @@ async fn match_from_photo(
|
||||
file_uuid,
|
||||
matches: 1,
|
||||
traces_matched,
|
||||
message: format!("Best trace: trace_id={}, similarity={:.4}", fb_trace, fb_sim),
|
||||
message: format!(
|
||||
"Best trace: trace_id={}, similarity={:.4}",
|
||||
fb_trace, fb_sim
|
||||
),
|
||||
}))
|
||||
} else {
|
||||
Ok(Json(MatchFromPhotoResponse {
|
||||
@@ -221,26 +274,30 @@ async fn match_from_trace(
|
||||
// 1. Get 3 best face embeddings from this trace at different angles
|
||||
// Divide trace frame range into 3 segments, pick best face from each
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let all_faces: Vec<(Vec<f32>, i64)> = sqlx::query_as::<_, (Vec<f32>, i64)>(
|
||||
&format!(
|
||||
"SELECT embedding, frame_number FROM {} \
|
||||
let all_faces: Vec<(Vec<f32>, i64)> = sqlx::query_as::<_, (Vec<f32>, i64)>(&format!(
|
||||
"SELECT embedding, frame_number FROM {} \
|
||||
WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \
|
||||
ORDER BY frame_number ASC",
|
||||
fd_table
|
||||
)
|
||||
)
|
||||
fd_table
|
||||
))
|
||||
.bind(&req.file_uuid)
|
||||
.bind(req.trace_id)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
|
||||
)
|
||||
})?;
|
||||
|
||||
if all_faces.is_empty() {
|
||||
return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
|
||||
"success": false, "message": "No embedding found for this trace"
|
||||
}))));
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({
|
||||
"success": false, "message": "No embedding found for this trace"
|
||||
})),
|
||||
));
|
||||
}
|
||||
|
||||
// Pick 3 samples: divide frame range into 3 segments, use face with largest area per segment
|
||||
@@ -254,14 +311,12 @@ async fn match_from_trace(
|
||||
let mut query_embeddings: Vec<Vec<f32>> = Vec::new();
|
||||
|
||||
// Get width*height info if available (not all pipelines store it)
|
||||
let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>(
|
||||
&format!(
|
||||
"SELECT frame_number, COALESCE(width, 0) * COALESCE(height, 0) AS area \
|
||||
let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>(&format!(
|
||||
"SELECT frame_number, COALESCE(width, 0) * COALESCE(height, 0) AS area \
|
||||
FROM {} WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \
|
||||
ORDER BY frame_number ASC",
|
||||
fd_table
|
||||
)
|
||||
)
|
||||
fd_table
|
||||
))
|
||||
.bind(&req.file_uuid)
|
||||
.bind(req.trace_id)
|
||||
.fetch_all(state.db.pool())
|
||||
@@ -296,9 +351,8 @@ async fn match_from_trace(
|
||||
let mut seen_trace_ids = std::collections::HashSet::new();
|
||||
|
||||
for qemb in &query_embeddings {
|
||||
let top = sqlx::query_as::<_, (i32, i32, f64)>(
|
||||
&format!(
|
||||
r#"SELECT id, trace_id,
|
||||
let top = sqlx::query_as::<_, (i32, i32, f64)>(&format!(
|
||||
r#"SELECT id, trace_id,
|
||||
1 - (embedding::vector <=> $1::vector) as similarity
|
||||
FROM {}
|
||||
WHERE file_uuid = $2
|
||||
@@ -306,16 +360,18 @@ async fn match_from_trace(
|
||||
AND embedding IS NOT NULL
|
||||
ORDER BY embedding::vector <=> $1::vector
|
||||
LIMIT 1"#,
|
||||
fd_table
|
||||
)
|
||||
)
|
||||
fd_table
|
||||
))
|
||||
.bind(qemb)
|
||||
.bind(&req.file_uuid)
|
||||
.bind(req.trace_id)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Search failed: {}", e)})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"message": format!("Search failed: {}", e)})),
|
||||
)
|
||||
})?;
|
||||
|
||||
if let Some((cface_id, c_trace_id, c_sim)) = top {
|
||||
@@ -327,35 +383,49 @@ async fn match_from_trace(
|
||||
|
||||
// 3. Look up identity internal ID
|
||||
let id_table = schema::table_name("identities");
|
||||
let identity_id_row: Option<(i32,)> = sqlx::query_as(
|
||||
&format!("SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table)
|
||||
)
|
||||
let identity_id_row: Option<(i32,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
|
||||
id_table
|
||||
))
|
||||
.bind(&uuid_clean)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let identity_id = match identity_id_row {
|
||||
Some((id,)) => id,
|
||||
None => return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
|
||||
"success": false, "message": "Identity not found"
|
||||
})))),
|
||||
None => {
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({
|
||||
"success": false, "message": "Identity not found"
|
||||
})),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
// 4. Update matched face_detections
|
||||
let mut traces_matched: Vec<i32> = Vec::new();
|
||||
for (id, trace_id, _similarity) in &validated {
|
||||
if let Err(e) = sqlx::query(
|
||||
&format!("UPDATE {} SET identity_id = $1 WHERE id = $2", fd_table)
|
||||
)
|
||||
if let Err(e) = sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id = $1 WHERE id = $2",
|
||||
fd_table
|
||||
))
|
||||
.bind(identity_id)
|
||||
.bind(id)
|
||||
.execute(state.db.pool())
|
||||
.await
|
||||
{
|
||||
tracing::warn!("[match-from-trace] Failed to update face_detection {}: {}", id, e);
|
||||
tracing::warn!(
|
||||
"[match-from-trace] Failed to update face_detection {}: {}",
|
||||
id,
|
||||
e
|
||||
);
|
||||
} else {
|
||||
if !traces_matched.contains(trace_id) {
|
||||
traces_matched.push(*trace_id);
|
||||
@@ -364,9 +434,10 @@ async fn match_from_trace(
|
||||
}
|
||||
|
||||
// 5. Also bind the source trace itself
|
||||
let _ = sqlx::query(
|
||||
&format!("UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3", fd_table)
|
||||
)
|
||||
let _ = sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3",
|
||||
fd_table
|
||||
))
|
||||
.bind(identity_id)
|
||||
.bind(&req.file_uuid)
|
||||
.bind(req.trace_id)
|
||||
@@ -388,7 +459,10 @@ async fn match_from_trace(
|
||||
file_uuid: req.file_uuid,
|
||||
matches: match_count,
|
||||
traces_matched,
|
||||
message: format!("Matched {} faces ({} unique traces)", match_count, trace_count),
|
||||
message: format!(
|
||||
"Matched {} faces ({} unique traces)",
|
||||
match_count, trace_count
|
||||
),
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -461,7 +535,10 @@ fn analyze_person_speaker_overlap(
|
||||
}
|
||||
|
||||
// Check if persons co-occur in time (frame proximity)
|
||||
let overlap = person.frames.iter().any(|f| other_person.frames.contains(f));
|
||||
let overlap = person
|
||||
.frames
|
||||
.iter()
|
||||
.any(|f| other_person.frames.contains(f));
|
||||
if overlap {
|
||||
matched_persons.push(other_person.person_id.clone());
|
||||
visited_persons.insert(other_person.person_id.clone());
|
||||
@@ -474,9 +551,10 @@ fn analyze_person_speaker_overlap(
|
||||
person.frames.iter().max().copied().unwrap_or(0) as f64,
|
||||
);
|
||||
for speaker in speakers {
|
||||
let has_overlap = speaker.segments.iter().any(|(start, end)| {
|
||||
*start <= person_time_range.1 && *end >= person_time_range.0
|
||||
});
|
||||
let has_overlap = speaker
|
||||
.segments
|
||||
.iter()
|
||||
.any(|(start, end)| *start <= person_time_range.1 && *end >= person_time_range.0);
|
||||
if has_overlap {
|
||||
if !matched_speakers.contains(&speaker.speaker_id) {
|
||||
matched_speakers.push(speaker.speaker_id.clone());
|
||||
@@ -563,11 +641,12 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
|
||||
// Step 2: 載入所有 face_detections(含 frame_number),按 trace_id 分組
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(
|
||||
&format!("SELECT trace_id, frame_number, embedding FROM {} \
|
||||
let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(&format!(
|
||||
"SELECT trace_id, frame_number, embedding FROM {} \
|
||||
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
|
||||
ORDER BY trace_id, frame_number", fd_table),
|
||||
)
|
||||
ORDER BY trace_id, frame_number",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
@@ -647,16 +726,18 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let mut updated = 0usize;
|
||||
for (tid, name) in &matched {
|
||||
let id_opt = sqlx::query_scalar::<_, Option<i32>>(
|
||||
&format!("SELECT id FROM {} WHERE name=$1 AND source='tmdb'", identities_table),
|
||||
)
|
||||
let id_opt = sqlx::query_scalar::<_, Option<i32>>(&format!(
|
||||
"SELECT id FROM {} WHERE name=$1 AND source='tmdb'",
|
||||
identities_table
|
||||
))
|
||||
.bind(name)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
if let Some(identity_id) = id_opt {
|
||||
let _ = sqlx::query(
|
||||
&format!("UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", fd_table),
|
||||
)
|
||||
let _ = sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
|
||||
fd_table
|
||||
))
|
||||
.bind(identity_id)
|
||||
.bind(file_uuid)
|
||||
.bind(tid)
|
||||
@@ -726,32 +807,32 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
|
||||
// Step 6: 未匹配的 trace 設 stranger_id = trace_id
|
||||
// trace_id 在同一個 file 內是 sequential integer,直接複用為 stranger_id
|
||||
let stranger_update = sqlx::query(
|
||||
&format!(
|
||||
"UPDATE {} SET stranger_id = trace_id \
|
||||
let stranger_update = sqlx::query(&format!(
|
||||
"UPDATE {} SET stranger_id = trace_id \
|
||||
WHERE file_uuid = $1 AND trace_id IS NOT NULL AND identity_id IS NULL \
|
||||
AND (stranger_id IS NULL OR stranger_id != trace_id)",
|
||||
fd_table
|
||||
)
|
||||
)
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
let stranger_count = stranger_update.rows_affected();
|
||||
|
||||
// Step 7: Save identity files for all affected identities
|
||||
let affected = sqlx::query_scalar::<_, uuid::Uuid>(
|
||||
&format!("SELECT DISTINCT i.uuid FROM {} i \
|
||||
let affected = sqlx::query_scalar::<_, uuid::Uuid>(&format!(
|
||||
"SELECT DISTINCT i.uuid FROM {} i \
|
||||
JOIN {} fd ON fd.identity_id = i.id \
|
||||
WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL", identities_table, fd_table)
|
||||
)
|
||||
WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL",
|
||||
identities_table, fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
for uuid in &affected {
|
||||
let us = uuid.to_string().replace('-', "");
|
||||
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await {
|
||||
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await
|
||||
{
|
||||
tracing::warn!("[FaceMatch] Failed to save identity file {}: {}", us, e);
|
||||
}
|
||||
}
|
||||
@@ -773,13 +854,15 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
|
||||
// Load face traces with identity_id and frame numbers
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let traces = sqlx::query_as::<_, (i32, Vec<i32>)>(
|
||||
&format!("SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \
|
||||
let traces = sqlx::query_as::<_, (i32, Vec<i32>)>(&format!(
|
||||
"SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \
|
||||
FROM {} WHERE file_uuid=$1 AND trace_id IS NOT NULL AND identity_id IS NOT NULL \
|
||||
GROUP BY trace_id", fd_table)
|
||||
)
|
||||
GROUP BY trace_id",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool).await?;
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
if traces.is_empty() {
|
||||
tracing::info!("[SpeakerBind] No face traces with identities");
|
||||
@@ -945,9 +1028,8 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
|
||||
let speakers = extract_speakers_from_asrx_data(&asrx_data);
|
||||
let identities = analyze_person_speaker_overlap(&persons, &speakers);
|
||||
|
||||
let uuid_short = &file_uuid[..8.min(file_uuid.len())];
|
||||
for (idx, id_result) in identities.iter().enumerate() {
|
||||
let identity_name = format!("stranger_{}_{}", uuid_short, idx);
|
||||
let identity_name = format!("stranger_{}", idx);
|
||||
let metadata = serde_json::json!({
|
||||
"source": "identity_agent",
|
||||
"trace_ids": id_result.person_ids,
|
||||
|
||||
@@ -38,8 +38,18 @@ pub fn identity_routes() -> Router<crate::api::server::AppState> {
|
||||
.route("/api/v1/resource/heartbeat", post(heartbeat_resource))
|
||||
.route("/api/v1/resources", get(list_resources))
|
||||
.route("/api/v1/identity/upload", post(upload_identity))
|
||||
.route("/api/v1/identity/:identity_uuid/profile-image", post(upload_profile_image).get(get_profile_image))
|
||||
.route("/api/v1/identity/:identity_uuid/json", get(get_identity_json))
|
||||
.route(
|
||||
"/api/v1/identity/:identity_uuid/profile-image",
|
||||
post(upload_profile_image).get(get_profile_image),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/identity/:identity_uuid/status",
|
||||
get(get_identity_status),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/identity/:identity_uuid/json",
|
||||
get(get_identity_json),
|
||||
)
|
||||
// Experiment: identity text search (non-polluting, separate endpoint)
|
||||
.route("/api/v1/search/identity_text", get(search_identity_text))
|
||||
.route("/api/v1/identities/search", get(search_identities_by_text))
|
||||
@@ -98,9 +108,10 @@ async fn list_files(
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let data = records.0
|
||||
let data = records
|
||||
.0
|
||||
.into_iter()
|
||||
.map(|r| FileItem {
|
||||
.map(|r| FileItem {
|
||||
file_uuid: r.file_uuid,
|
||||
file_name: r.file_name,
|
||||
file_path: r.file_path,
|
||||
@@ -163,7 +174,9 @@ async fn get_file_detail(
|
||||
file_name: f.file_name,
|
||||
file_path: f.file_path,
|
||||
metadata: f.probe_json,
|
||||
created_at: chrono::DateTime::parse_from_rfc3339(&f.created_at).ok().map(|d| d.into()),
|
||||
created_at: chrono::DateTime::parse_from_rfc3339(&f.created_at)
|
||||
.ok()
|
||||
.map(|d| d.into()),
|
||||
})),
|
||||
None => Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
@@ -214,13 +227,42 @@ async fn get_file_identities(
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let fps = 25.0;
|
||||
let data: Vec<FileIdentityItem> = Vec::new();
|
||||
let data: Vec<FileIdentityItem> = records
|
||||
.into_iter()
|
||||
.map(|r| FileIdentityItem {
|
||||
identity_id: r.identity_id,
|
||||
identity_uuid: r.identity_uuid,
|
||||
name: r.name,
|
||||
metadata: r.metadata,
|
||||
face_count: r.face_count,
|
||||
speaker_count: r.speaker_count,
|
||||
start_frame: r.start_frame,
|
||||
end_frame: r.end_frame,
|
||||
start_time: r.start_time,
|
||||
end_time: r.end_time,
|
||||
confidence: r.confidence,
|
||||
})
|
||||
.collect();
|
||||
|
||||
let total = match sqlx::query_scalar::<_, i64>(
|
||||
&format!(
|
||||
"SELECT COUNT(DISTINCT fd.identity_id) FROM {} fd WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL",
|
||||
crate::core::db::schema::table_name("face_detections")
|
||||
)
|
||||
)
|
||||
.bind(&file_uuid)
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
{
|
||||
Ok(c) => c,
|
||||
Err(_) => data.len() as i64,
|
||||
};
|
||||
|
||||
Ok(Json(FileIdentitiesResponse {
|
||||
success: true,
|
||||
file_uuid: file_uuid,
|
||||
fps,
|
||||
total: data.len() as i64,
|
||||
total,
|
||||
page,
|
||||
page_size,
|
||||
data,
|
||||
@@ -243,6 +285,16 @@ pub struct IdentityDetailResponse {
|
||||
pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct IdentityStatusResponse {
|
||||
pub success: bool,
|
||||
pub identity_uuid: String,
|
||||
pub name: String,
|
||||
pub has_json: bool,
|
||||
pub has_jpg: bool,
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
fn strip_uuid(u: &uuid::Uuid) -> String {
|
||||
u.to_string().replace('-', "")
|
||||
}
|
||||
@@ -270,7 +322,11 @@ async fn get_identity_detail(
|
||||
metadata: i.metadata,
|
||||
reference_data: i.reference_data,
|
||||
tmdb_id: i.tmdb_id,
|
||||
tmdb_profile: Some(format!("{}/identities/{}/profile.jpg", crate::core::config::OUTPUT_DIR.as_str(), i.uuid.replace('-', ""))),
|
||||
tmdb_profile: Some(format!(
|
||||
"{}/identities/{}/profile.jpg",
|
||||
crate::core::config::OUTPUT_DIR.as_str(),
|
||||
i.uuid.replace('-', "")
|
||||
)),
|
||||
created_at: i.created_at,
|
||||
updated_at: i.updated_at,
|
||||
})),
|
||||
@@ -281,6 +337,44 @@ async fn get_identity_detail(
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_identity_status(
|
||||
State(state): State<crate::api::server::AppState>,
|
||||
Path(identity_uuid): Path<String>,
|
||||
) -> Result<Json<IdentityStatusResponse>, (StatusCode, String)> {
|
||||
let uuid_clean = identity_uuid.replace('-', "");
|
||||
|
||||
let identity = state
|
||||
.db
|
||||
.get_identity_by_uuid(&uuid_clean)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
match identity {
|
||||
Some(i) => {
|
||||
// Check both UUID formats (with and without hyphens)
|
||||
let dir_nohyphen = crate::core::identity::storage::identity_dir(&uuid_clean);
|
||||
let uuid_hyphen = i.uuid.clone();
|
||||
let dir_hyphen = crate::core::identity::storage::identity_dir(&uuid_hyphen);
|
||||
let has_json = dir_nohyphen.join("identity.json").exists()
|
||||
|| dir_hyphen.join("identity.json").exists();
|
||||
let has_jpg = dir_nohyphen.join("profile.jpg").exists()
|
||||
|| dir_hyphen.join("profile.jpg").exists();
|
||||
Ok(Json(IdentityStatusResponse {
|
||||
success: true,
|
||||
identity_uuid: i.uuid.clone(),
|
||||
name: i.name,
|
||||
has_json,
|
||||
has_jpg,
|
||||
error: None,
|
||||
}))
|
||||
}
|
||||
None => Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
format!("Identity not found: {}", uuid_clean),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct IdentityFilesResponse {
|
||||
pub success: bool,
|
||||
@@ -375,10 +469,25 @@ async fn get_identity_files(
|
||||
})
|
||||
.collect();
|
||||
|
||||
let total = match sqlx::query_scalar::<_, i64>(
|
||||
&format!(
|
||||
"SELECT COUNT(DISTINCT fd.file_uuid) FROM {} fd WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1)",
|
||||
crate::core::db::schema::table_name("face_detections"),
|
||||
crate::core::db::schema::table_name("identities"),
|
||||
)
|
||||
)
|
||||
.bind(&uuid)
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
{
|
||||
Ok(c) => c,
|
||||
Err(_) => data.len() as i64,
|
||||
};
|
||||
|
||||
Ok(Json(IdentityFilesResponse {
|
||||
success: true,
|
||||
identity_uuid: uuid.to_string().replace('-', ""),
|
||||
total: data.len() as i64,
|
||||
total,
|
||||
page,
|
||||
page_size,
|
||||
data,
|
||||
@@ -449,10 +558,25 @@ async fn get_identity_faces(
|
||||
})
|
||||
.collect();
|
||||
|
||||
let total = match sqlx::query_scalar::<_, i64>(
|
||||
&format!(
|
||||
"SELECT COUNT(*) FROM {} fd WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1)",
|
||||
crate::core::db::schema::table_name("face_detections"),
|
||||
crate::core::db::schema::table_name("identities"),
|
||||
)
|
||||
)
|
||||
.bind(&uuid)
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
{
|
||||
Ok(c) => c,
|
||||
Err(_) => data.len() as i64,
|
||||
};
|
||||
|
||||
Ok(Json(IdentityFacesResponse {
|
||||
success: true,
|
||||
identity_uuid: uuid.to_string().replace('-', ""),
|
||||
total: data.len() as i64,
|
||||
total,
|
||||
page,
|
||||
page_size,
|
||||
data,
|
||||
@@ -721,12 +845,24 @@ async fn upload_profile_image(
|
||||
let uuid_clean = identity_uuid.replace('-', "");
|
||||
|
||||
// Verify identity exists
|
||||
if state.db.get_identity_by_uuid(&uuid_clean).await.map_err(|_| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"success": false, "message": "DB error"})))
|
||||
})?.is_none() {
|
||||
return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
|
||||
"success": false, "message": "Identity not found"
|
||||
}))));
|
||||
if state
|
||||
.db
|
||||
.get_identity_by_uuid(&uuid_clean)
|
||||
.await
|
||||
.map_err(|_| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"success": false, "message": "DB error"})),
|
||||
)
|
||||
})?
|
||||
.is_none()
|
||||
{
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({
|
||||
"success": false, "message": "Identity not found"
|
||||
})),
|
||||
));
|
||||
}
|
||||
|
||||
// Process multipart upload
|
||||
@@ -740,9 +876,14 @@ async fn upload_profile_image(
|
||||
ext = match content_type.as_str() {
|
||||
"image/png" => "png",
|
||||
"image/jpeg" | "image/jpg" => "jpg",
|
||||
_ => return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
|
||||
"success": false, "message": "Unsupported image type. Use JPEG or PNG."
|
||||
})))),
|
||||
_ => {
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({
|
||||
"success": false, "message": "Unsupported image type. Use JPEG or PNG."
|
||||
})),
|
||||
))
|
||||
}
|
||||
};
|
||||
image_data = Some(field.bytes().await.map_err(|_| {
|
||||
(StatusCode::BAD_REQUEST, Json(serde_json::json!({"success": false, "message": "Failed to read image data"})))
|
||||
@@ -750,9 +891,14 @@ async fn upload_profile_image(
|
||||
}
|
||||
}
|
||||
|
||||
let data = image_data.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({
|
||||
"success": false, "message": "No image field found. Use field name 'image'."
|
||||
}))))?;
|
||||
let data = image_data.ok_or_else(|| {
|
||||
(
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({
|
||||
"success": false, "message": "No image field found. Use field name 'image'."
|
||||
})),
|
||||
)
|
||||
})?;
|
||||
|
||||
// Write image file
|
||||
let dir = crate::core::identity::storage::identity_dir(&uuid_clean);
|
||||
@@ -789,8 +935,16 @@ async fn get_profile_image(
|
||||
let path = dir.join(format!("profile.{}", ext));
|
||||
if path.exists() {
|
||||
let data = std::fs::read(&path).map_err(|_| StatusCode::NOT_FOUND)?;
|
||||
let content_type = if *ext == "png" { "image/png" } else { "image/jpeg" };
|
||||
return Ok((StatusCode::OK, [("content-type".to_string(), content_type.to_string())], data));
|
||||
let content_type = if *ext == "png" {
|
||||
"image/png"
|
||||
} else {
|
||||
"image/jpeg"
|
||||
};
|
||||
return Ok((
|
||||
StatusCode::OK,
|
||||
[("content-type".to_string(), content_type.to_string())],
|
||||
data,
|
||||
));
|
||||
}
|
||||
}
|
||||
Err(StatusCode::NOT_FOUND)
|
||||
@@ -802,7 +956,14 @@ async fn get_identity_json(
|
||||
) -> Result<(StatusCode, [(String, String); 1], Vec<u8>), StatusCode> {
|
||||
let clean = identity_uuid.replace('-', "");
|
||||
let with_hyphens = if clean.len() == 32 {
|
||||
format!("{}-{}-{}-{}-{}", &clean[0..8], &clean[8..12], &clean[12..16], &clean[16..20], &clean[20..32])
|
||||
format!(
|
||||
"{}-{}-{}-{}-{}",
|
||||
&clean[0..8],
|
||||
&clean[8..12],
|
||||
&clean[12..16],
|
||||
&clean[16..20],
|
||||
&clean[20..32]
|
||||
)
|
||||
} else {
|
||||
identity_uuid.clone()
|
||||
};
|
||||
@@ -821,7 +982,9 @@ async fn get_identity_json(
|
||||
}
|
||||
|
||||
// 2. Lazy Sync: If file missing, generate from DB and save
|
||||
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(state.db.pool(), &clean).await {
|
||||
if let Err(e) =
|
||||
crate::core::identity::storage::save_identity_file_by_pool(state.db.pool(), &clean).await
|
||||
{
|
||||
tracing::warn!("[identity-json] Lazy sync failed for {}: {}", clean, e);
|
||||
return Err(StatusCode::NOT_FOUND);
|
||||
}
|
||||
@@ -858,7 +1021,7 @@ struct IdentityTextHit {
|
||||
chunk_id: String,
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
text_content: String,
|
||||
text_content: Option<String>,
|
||||
identity_id: Option<i32>,
|
||||
identity_name: Option<String>,
|
||||
identity_source: Option<String>,
|
||||
@@ -889,7 +1052,7 @@ async fn search_identity_text(
|
||||
|
||||
let query = format!(
|
||||
r#"SELECT c.file_uuid, c.chunk_id, c.start_time, c.end_time, c.text_content,
|
||||
fd.identity_id, CASE WHEN id_table LIKE 'dev.%' THEN i.name ELSE i.real_name END AS identity_name, i.source AS identity_source,
|
||||
fd.identity_id, i.name AS identity_name, i.source AS identity_source,
|
||||
fd.trace_id
|
||||
FROM {} c
|
||||
LEFT JOIN {} fd ON fd.file_uuid = c.file_uuid
|
||||
@@ -902,18 +1065,42 @@ async fn search_identity_text(
|
||||
chunk_table, fd_table, id_table
|
||||
);
|
||||
|
||||
let rows = sqlx::query_as::<_, (String, String, f64, f64, String, Option<i32>, Option<String>, Option<String>, Option<i32>)>(&query)
|
||||
.bind(¶ms.uuid).bind(&like_q).bind(limit)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
let rows = sqlx::query_as::<
|
||||
_,
|
||||
(
|
||||
String,
|
||||
String,
|
||||
f64,
|
||||
f64,
|
||||
Option<String>,
|
||||
Option<i32>,
|
||||
Option<String>,
|
||||
Option<String>,
|
||||
Option<i32>,
|
||||
),
|
||||
>(&query)
|
||||
.bind(¶ms.uuid)
|
||||
.bind(&like_q)
|
||||
.bind(limit)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
let results: Vec<IdentityTextHit> = rows
|
||||
.into_iter()
|
||||
.map(|(fu, cid, st, et, txt, iid, iname, isrc, tid)| IdentityTextHit {
|
||||
file_uuid: fu, chunk_id: cid, start_time: st, end_time: et, text_content: txt,
|
||||
identity_id: iid, identity_name: iname, identity_source: isrc, trace_id: tid,
|
||||
})
|
||||
.map(
|
||||
|(fu, cid, st, et, txt, iid, iname, isrc, tid)| IdentityTextHit {
|
||||
file_uuid: fu,
|
||||
chunk_id: cid,
|
||||
start_time: st,
|
||||
end_time: et,
|
||||
text_content: txt,
|
||||
identity_id: iid,
|
||||
identity_name: iname,
|
||||
identity_source: isrc,
|
||||
trace_id: tid,
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
let total = results.len() as i64;
|
||||
@@ -922,7 +1109,14 @@ async fn search_identity_text(
|
||||
let start = (page - 1) * page_size;
|
||||
let paged: Vec<IdentityTextHit> = results.into_iter().skip(start).take(page_size).collect();
|
||||
let limit = params.limit.unwrap_or(50) as usize;
|
||||
Ok(Json(IdentityTextResponse { success: true, total, page, page_size, limit, results: paged }))
|
||||
Ok(Json(IdentityTextResponse {
|
||||
success: true,
|
||||
total,
|
||||
page,
|
||||
page_size,
|
||||
limit,
|
||||
results: paged,
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
@@ -942,7 +1136,7 @@ struct IdentitySearchHit {
|
||||
trace_id: Option<i32>,
|
||||
chunk_id: String,
|
||||
start_time: f64,
|
||||
text_content: String,
|
||||
text_content: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -965,7 +1159,7 @@ async fn search_identities_by_text(
|
||||
let limit = params.limit.unwrap_or(50).min(100);
|
||||
|
||||
let query = format!(
|
||||
r#"SELECT i.id::int, COALESCE(i.real_name, i.actor_name, i.name) AS name, i.source, i.tmdb_id,
|
||||
r#"SELECT i.id::int, i.name, i.source, i.tmdb_id,
|
||||
fd.file_uuid, fd.trace_id,
|
||||
c.chunk_id, c.start_time, c.text_content
|
||||
FROM {} i
|
||||
@@ -973,30 +1167,58 @@ async fn search_identities_by_text(
|
||||
JOIN {} c ON c.file_uuid = fd.file_uuid
|
||||
AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0)
|
||||
AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0)
|
||||
WHERE COALESCE(i.real_name, i.actor_name, i.name) ILIKE $1
|
||||
WHERE i.name ILIKE $1
|
||||
AND ($2::text IS NULL OR fd.file_uuid = $2)
|
||||
ORDER BY COALESCE(i.real_name, i.actor_name, i.name), c.start_time
|
||||
ORDER BY i.name, c.start_time
|
||||
LIMIT $3"#,
|
||||
id_table, fd_table, chunk_table
|
||||
);
|
||||
|
||||
let rows = sqlx::query_as::<_, (i32, String, Option<String>, Option<i32>, String, Option<i32>, String, f64, String)>(&query)
|
||||
.bind(&like_q).bind(¶ms.uuid).bind(limit)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[identities/search] Query failed: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
let rows = sqlx::query_as::<
|
||||
_,
|
||||
(
|
||||
i32,
|
||||
String,
|
||||
Option<String>,
|
||||
Option<i32>,
|
||||
String,
|
||||
Option<i32>,
|
||||
String,
|
||||
f64,
|
||||
Option<String>,
|
||||
),
|
||||
>(&query)
|
||||
.bind(&like_q)
|
||||
.bind(¶ms.uuid)
|
||||
.bind(limit)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[identities/search] Query failed: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
let results: Vec<IdentitySearchHit> = rows
|
||||
.into_iter()
|
||||
.map(|(iid, name, src, tid, fu, trace_id, cid, st, txt)| IdentitySearchHit {
|
||||
identity_id: iid, name, source: src, tmdb_id: tid,
|
||||
file_uuid: fu, trace_id, chunk_id: cid, start_time: st, text_content: txt,
|
||||
})
|
||||
.map(
|
||||
|(iid, name, src, tid, fu, trace_id, cid, st, txt)| IdentitySearchHit {
|
||||
identity_id: iid,
|
||||
name,
|
||||
source: src,
|
||||
tmdb_id: tid,
|
||||
file_uuid: fu,
|
||||
trace_id,
|
||||
chunk_id: cid,
|
||||
start_time: st,
|
||||
text_content: txt,
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
let total = results.len() as i64;
|
||||
Ok(Json(IdentitySearchResponse { success: true, total, results }))
|
||||
Ok(Json(IdentitySearchResponse {
|
||||
success: true,
|
||||
total,
|
||||
results,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use axum::{
|
||||
extract::{Path, Query},
|
||||
extract::{Path, Query, State},
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::{get, post},
|
||||
@@ -77,7 +77,7 @@ pub async fn bind_identity(
|
||||
|
||||
// Get identity_id from identity_uuid
|
||||
let identity_row: Option<(i64, String)> = sqlx::query_as(&format!(
|
||||
"SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid",
|
||||
"SELECT id, name FROM {} WHERE uuid = $1::uuid",
|
||||
id_table
|
||||
))
|
||||
.bind(&identity_uuid)
|
||||
@@ -116,8 +116,14 @@ pub async fn bind_identity(
|
||||
|
||||
let uuid_clean = identity_uuid.replace('-', "");
|
||||
// Sync identity JSON file
|
||||
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &uuid_clean).await {
|
||||
tracing::warn!("[bind] Failed to sync identity file for {}: {}", uuid_clean, e);
|
||||
if let Err(e) =
|
||||
crate::core::identity::storage::save_identity_file_by_pool(&db, &uuid_clean).await
|
||||
{
|
||||
tracing::warn!(
|
||||
"[bind] Failed to sync identity file for {}: {}",
|
||||
uuid_clean,
|
||||
e
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Json(ApiResponse {
|
||||
@@ -189,8 +195,15 @@ pub async fn unbind_identity(
|
||||
.ok()
|
||||
.flatten();
|
||||
if let Some(identity_uuid) = uuid {
|
||||
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &identity_uuid).await {
|
||||
tracing::warn!("[unbind] Failed to sync identity file for {}: {}", identity_uuid, e);
|
||||
if let Err(e) =
|
||||
crate::core::identity::storage::save_identity_file_by_pool(&db, &identity_uuid)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
"[unbind] Failed to sync identity file for {}: {}",
|
||||
identity_uuid,
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -221,7 +234,7 @@ pub async fn merge_identities(
|
||||
|
||||
// Get IDs for both identities
|
||||
let from_row: Option<(i64, String)> = sqlx::query_as(&format!(
|
||||
"SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid",
|
||||
"SELECT id, name FROM {} WHERE uuid = $1::uuid",
|
||||
id_table
|
||||
))
|
||||
.bind(&identity_uuid)
|
||||
@@ -239,7 +252,7 @@ pub async fn merge_identities(
|
||||
))?;
|
||||
|
||||
let into_row: Option<(i64, String)> = sqlx::query_as(&format!(
|
||||
"SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid",
|
||||
"SELECT id, name FROM {} WHERE uuid = $1::uuid",
|
||||
id_table
|
||||
))
|
||||
.bind(&req.into_uuid)
|
||||
@@ -299,8 +312,14 @@ pub async fn merge_identities(
|
||||
|
||||
// Sync target identity JSON
|
||||
let into_uuid_clean = req.into_uuid.replace('-', "");
|
||||
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &into_uuid_clean).await {
|
||||
tracing::warn!("[merge] Failed to sync target identity file for {}: {}", into_uuid_clean, e);
|
||||
if let Err(e) =
|
||||
crate::core::identity::storage::save_identity_file_by_pool(&db, &into_uuid_clean).await
|
||||
{
|
||||
tracing::warn!(
|
||||
"[merge] Failed to sync target identity file for {}: {}",
|
||||
into_uuid_clean,
|
||||
e
|
||||
);
|
||||
}
|
||||
|
||||
// Delete source identity JSON if not keeping history
|
||||
@@ -339,6 +358,106 @@ pub struct ListIdentitiesParams {
|
||||
pub offset: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct IdentityTraceInfo {
|
||||
pub file_uuid: String,
|
||||
pub trace_id: i32,
|
||||
pub frame_count: i64,
|
||||
pub first_frame: i32,
|
||||
pub last_frame: i32,
|
||||
pub first_sec: f64,
|
||||
pub last_sec: f64,
|
||||
pub avg_confidence: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct IdentityTracesResponse {
|
||||
pub success: bool,
|
||||
pub identity_uuid: String,
|
||||
pub name: String,
|
||||
pub total_traces: usize,
|
||||
pub total_faces: i64,
|
||||
pub traces: Vec<IdentityTraceInfo>,
|
||||
}
|
||||
|
||||
pub async fn get_identity_traces(
|
||||
State(state): State<crate::api::server::AppState>,
|
||||
Path(identity_uuid): Path<String>,
|
||||
) -> Result<Json<IdentityTracesResponse>, (StatusCode, String)> {
|
||||
let id_table = crate::core::db::schema::table_name("identities");
|
||||
let fd_table = crate::core::db::schema::table_name("face_detections");
|
||||
|
||||
// Get identity name
|
||||
let identity: Option<(i32, String)> = sqlx::query_as(&format!(
|
||||
"SELECT id, name FROM {} WHERE uuid = $1::uuid",
|
||||
id_table
|
||||
))
|
||||
.bind(&identity_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let (identity_id, name) =
|
||||
identity.ok_or((StatusCode::NOT_FOUND, "Identity not found".to_string()))?;
|
||||
|
||||
// Get all traces for this identity across all files
|
||||
let rows: Vec<(String, i32, i64, i32, i32, f64, f64, f64)> = sqlx::query_as(&format!(
|
||||
r#"SELECT fd.file_uuid::text, fd.trace_id,
|
||||
COUNT(*)::bigint AS frame_count,
|
||||
MIN(fd.frame_number)::int AS first_frame,
|
||||
MAX(fd.frame_number)::int AS last_frame,
|
||||
ROUND(MIN(fd.frame_number)::numeric / 25.0, 1)::float8 AS first_sec,
|
||||
ROUND(MAX(fd.frame_number)::numeric / 25.0, 1)::float8 AS last_sec,
|
||||
ROUND(AVG(fd.confidence)::numeric, 4)::float8 AS avg_confidence
|
||||
FROM {} fd
|
||||
WHERE fd.identity_id = $1
|
||||
GROUP BY fd.file_uuid, fd.trace_id
|
||||
ORDER BY fd.file_uuid, fd.trace_id"#,
|
||||
fd_table
|
||||
))
|
||||
.bind(identity_id)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let total_traces = rows.len();
|
||||
let total_faces: i64 = rows.iter().map(|r| r.2).sum();
|
||||
|
||||
let traces: Vec<IdentityTraceInfo> = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(
|
||||
file_uuid,
|
||||
trace_id,
|
||||
frame_count,
|
||||
first_frame,
|
||||
last_frame,
|
||||
first_sec,
|
||||
last_sec,
|
||||
avg_confidence,
|
||||
)| IdentityTraceInfo {
|
||||
file_uuid,
|
||||
trace_id,
|
||||
frame_count,
|
||||
first_frame,
|
||||
last_frame,
|
||||
first_sec,
|
||||
last_sec,
|
||||
avg_confidence,
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
Ok(Json(IdentityTracesResponse {
|
||||
success: true,
|
||||
identity_uuid,
|
||||
name,
|
||||
total_traces,
|
||||
total_faces,
|
||||
traces,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn identity_binding_routes() -> Router<crate::api::server::AppState> {
|
||||
Router::new()
|
||||
.route("/api/v1/identity/:identity_uuid/bind", post(bind_identity))
|
||||
@@ -350,4 +469,8 @@ pub fn identity_binding_routes() -> Router<crate::api::server::AppState> {
|
||||
"/api/v1/identity/:identity_uuid/mergeinto",
|
||||
post(merge_identities),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/identity/:identity_uuid/traces",
|
||||
get(get_identity_traces),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -14,8 +14,16 @@ use crate::core::db::{schema, PostgresDb};
|
||||
|
||||
/// Shared video query params: mode=normal|debug, audio=on|off
|
||||
fn parse_video_params(params: &std::collections::HashMap<String, String>) -> (String, String) {
|
||||
let mode = params.get("mode").map(|s| s.as_str()).unwrap_or("normal").to_string();
|
||||
let audio = params.get("audio").map(|s| s.as_str()).unwrap_or("on").to_string();
|
||||
let mode = params
|
||||
.get("mode")
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or("normal")
|
||||
.to_string();
|
||||
let audio = params
|
||||
.get("audio")
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or("on")
|
||||
.to_string();
|
||||
(mode, audio)
|
||||
}
|
||||
|
||||
@@ -142,9 +150,12 @@ struct BboxParams {
|
||||
/// Priority: start_frame/end_frame > start/end > start_time/end_time.
|
||||
/// If only time is given, convert via fps.
|
||||
fn resolve_frame_range(
|
||||
start_frame: Option<i32>, end_frame: Option<i32>,
|
||||
start: Option<i32>, end: Option<i32>,
|
||||
start_time: Option<f64>, end_time: Option<f64>,
|
||||
start_frame: Option<i32>,
|
||||
end_frame: Option<i32>,
|
||||
start: Option<i32>,
|
||||
end: Option<i32>,
|
||||
start_time: Option<f64>,
|
||||
end_time: Option<f64>,
|
||||
fps: f64,
|
||||
) -> (i32, i32) {
|
||||
if let (Some(sf), Some(ef)) = (start_frame.or(start), end_frame.or(end)) {
|
||||
@@ -186,7 +197,15 @@ async fn bbox_overlay_video(
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||
.unwrap_or(24.0);
|
||||
|
||||
let (start_f, end_f) = resolve_frame_range(p.start_frame, p.end_frame, p.start, p.end, p.start_time, p.end_time, fps);
|
||||
let (start_f, end_f) = resolve_frame_range(
|
||||
p.start_frame,
|
||||
p.end_frame,
|
||||
p.start,
|
||||
p.end,
|
||||
p.start_time,
|
||||
p.end_time,
|
||||
fps,
|
||||
);
|
||||
|
||||
let start_sec = start_f as f64 / fps;
|
||||
|
||||
@@ -228,13 +247,26 @@ async fn bbox_overlay_video(
|
||||
let dur = duration.to_string();
|
||||
let mut bbox_args = vec!["-ss", &ss, "-i", &video_path, "-t", &dur];
|
||||
if vf != "null" {
|
||||
bbox_args.extend_from_slice(&["-vf", &vf, "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28"]);
|
||||
bbox_args.extend_from_slice(&[
|
||||
"-vf",
|
||||
&vf,
|
||||
"-c:v",
|
||||
"libx264",
|
||||
"-preset",
|
||||
"ultrafast",
|
||||
"-crf",
|
||||
"28",
|
||||
]);
|
||||
} else {
|
||||
bbox_args.extend_from_slice(&["-c", "copy"]);
|
||||
}
|
||||
if bbox_audio == "off" { bbox_args.push("-an"); }
|
||||
if bbox_audio == "off" {
|
||||
bbox_args.push("-an");
|
||||
}
|
||||
bbox_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
|
||||
let status = ffmpeg_cmd().args(&bbox_args).status()
|
||||
let status = ffmpeg_cmd()
|
||||
.args(&bbox_args)
|
||||
.status()
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
if !status.success() {
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
@@ -315,14 +347,20 @@ async fn trace_video(
|
||||
let sk = seek.to_string();
|
||||
let du = duration.to_string();
|
||||
let mut cmd_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du, "-c", "copy"];
|
||||
if audio == "off" { cmd_args.push("-an"); }
|
||||
if audio == "off" {
|
||||
cmd_args.push("-an");
|
||||
}
|
||||
cmd_args.extend_from_slice(&["-y", &tmp_str]);
|
||||
let result = ffmpeg_cmd().args(&cmd_args).output()
|
||||
let result = ffmpeg_cmd()
|
||||
.args(&cmd_args)
|
||||
.output()
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
if !result.status.success() {
|
||||
return Err(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
let data = tokio::fs::read(&tmp)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
return Ok(Response::builder()
|
||||
.header(header::CONTENT_TYPE, "video/mp4")
|
||||
@@ -345,8 +383,11 @@ async fn trace_video(
|
||||
ORDER BY fd.trace_id, fd.frame_number",
|
||||
face_table, identities_table
|
||||
))
|
||||
.bind(&file_uuid).bind(start_fn).bind(end_fn)
|
||||
.fetch_all(state.db.pool()).await
|
||||
.bind(&file_uuid)
|
||||
.bind(start_fn)
|
||||
.bind(end_fn)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
// Group frames by trace_id, compute start_frame per trace; collect bbox per frame
|
||||
@@ -359,7 +400,9 @@ async fn trace_video(
|
||||
if let Some(name) = name_opt {
|
||||
trace_identity.entry(*tid).or_insert_with(|| name.clone());
|
||||
} else {
|
||||
trace_identity.entry(*tid).or_insert_with(|| format!("Stranger_{:03}", tid));
|
||||
trace_identity
|
||||
.entry(*tid)
|
||||
.or_insert_with(|| format!("Stranger_{:03}", tid));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -374,7 +417,8 @@ async fn trace_video(
|
||||
.unwrap_or_else(|| "-".to_string());
|
||||
|
||||
// Sort traces for consistent ordering
|
||||
let mut sorted_traces: Vec<(i32, &Vec<i32>)> = trace_frames.iter().map(|(k, v)| (*k, v)).collect();
|
||||
let mut sorted_traces: Vec<(i32, &Vec<i32>)> =
|
||||
trace_frames.iter().map(|(k, v)| (*k, v)).collect();
|
||||
sorted_traces.sort_by_key(|(tid, _)| *tid);
|
||||
|
||||
let frame_offset = first_frame as i64 - (padding * fps) as i64;
|
||||
@@ -389,10 +433,12 @@ async fn trace_video(
|
||||
"drawtext=text='Frame %{{n}} %{{pts}}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=12"
|
||||
));
|
||||
parts.push(format!(
|
||||
"drawtext=text='Cut\\: {}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=56", cut_label
|
||||
"drawtext=text='Cut\\: {}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=56",
|
||||
cut_label
|
||||
));
|
||||
parts.push(format!(
|
||||
"drawtext=text='{}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=100", file_uuid
|
||||
"drawtext=text='{}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=100",
|
||||
file_uuid
|
||||
));
|
||||
|
||||
// Per-trace entries: show trace_id, start_frame, identity name
|
||||
@@ -400,11 +446,18 @@ async fn trace_video(
|
||||
let mut y_pos = 144;
|
||||
for (tid, frames) in &sorted_traces {
|
||||
let start = frames.iter().min().unwrap_or(&first_frame);
|
||||
let identity = trace_identity.get(tid).map(|s| s.as_str()).unwrap_or("unknown");
|
||||
let identity = trace_identity
|
||||
.get(tid)
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or("unknown");
|
||||
let label = format!("Trace {}\\: start={} {}", tid, start, identity);
|
||||
|
||||
// Continuous range (interpolated): visible from first to last frame
|
||||
let enable = format!("between(n,{},{})", frames[0] as i64 - frame_offset, frames[frames.len() - 1] as i64 - frame_offset);
|
||||
let enable = format!(
|
||||
"between(n,{},{})",
|
||||
frames[0] as i64 - frame_offset,
|
||||
frames[frames.len() - 1] as i64 - frame_offset
|
||||
);
|
||||
|
||||
parts.push(format!(
|
||||
"drawtext=text='{}':fontsize=24:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y={}:enable='{}'",
|
||||
@@ -415,7 +468,11 @@ async fn trace_video(
|
||||
|
||||
// Bounding boxes: interpolated (thickness=1) + actual (thickness=4) with trace_id label
|
||||
for (tid, frames) in &sorted_traces {
|
||||
let range_enable = format!("between(n,{},{})", frames[0] as i64 - frame_offset, frames[frames.len() - 1] as i64 - frame_offset);
|
||||
let range_enable = format!(
|
||||
"between(n,{},{})",
|
||||
frames[0] as i64 - frame_offset,
|
||||
frames[frames.len() - 1] as i64 - frame_offset
|
||||
);
|
||||
// Interpolated bbox at first known position across the whole trace range
|
||||
if let Some((x, y, w, h)) = bbox_per_frame.get(&(*tid, frames[0])) {
|
||||
parts.push(format!(
|
||||
@@ -448,23 +505,45 @@ async fn trace_video(
|
||||
let tmp_str = tmp.to_str().unwrap_or("").to_string();
|
||||
let sk = seek.to_string();
|
||||
let du = duration.to_string();
|
||||
let mut debug_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du,
|
||||
"-/filter_complex", &filter_path,
|
||||
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "28"];
|
||||
if audio == "on" { debug_args.extend_from_slice(&["-c:a", "aac"]); }
|
||||
let mut debug_args = vec![
|
||||
"-ss",
|
||||
&sk,
|
||||
"-i",
|
||||
&video_path,
|
||||
"-t",
|
||||
&du,
|
||||
"-/filter_complex",
|
||||
&filter_path,
|
||||
"-c:v",
|
||||
"libx264",
|
||||
"-preset",
|
||||
"ultrafast",
|
||||
"-crf",
|
||||
"28",
|
||||
];
|
||||
if audio == "on" {
|
||||
debug_args.extend_from_slice(&["-c:a", "aac"]);
|
||||
}
|
||||
debug_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
|
||||
let result = ffmpeg_cmd().args(&debug_args).output()
|
||||
let result = ffmpeg_cmd()
|
||||
.args(&debug_args)
|
||||
.output()
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
if !result.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&result.stderr);
|
||||
let _ = std::fs::write("/tmp/ffmpeg_last_error.txt", stderr.as_bytes());
|
||||
tracing::error!("ffmpeg failed ({} bytes), see /tmp/ffmpeg_last_error.txt", stderr.len());
|
||||
tracing::error!(
|
||||
"ffmpeg failed ({} bytes), see /tmp/ffmpeg_last_error.txt",
|
||||
stderr.len()
|
||||
);
|
||||
let _ = std::fs::remove_file(&filter_file);
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
return Err(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
|
||||
let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
let data = tokio::fs::read(&tmp)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
let _ = std::fs::remove_file(&filter_file);
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
Ok(Response::builder()
|
||||
@@ -503,19 +582,27 @@ async fn stream_video(
|
||||
// Chunk extraction with dual time/frame params
|
||||
let start_time_param = params.get("start_time").and_then(|v| v.parse::<f64>().ok());
|
||||
let end_time_param = params.get("end_time").and_then(|v| v.parse::<f64>().ok());
|
||||
let start_frame_param = params.get("start_frame").and_then(|v| v.parse::<f64>().ok());
|
||||
let start_frame_param = params
|
||||
.get("start_frame")
|
||||
.and_then(|v| v.parse::<f64>().ok());
|
||||
let end_frame_param = params.get("end_frame").and_then(|v| v.parse::<f64>().ok());
|
||||
let start_legacy = params.get("start").and_then(|v| v.parse::<f64>().ok());
|
||||
let end_legacy = params.get("end").and_then(|v| v.parse::<f64>().ok());
|
||||
|
||||
let has_range = start_frame_param.is_some() || start_time_param.is_some() || start_legacy.is_some();
|
||||
let has_range =
|
||||
start_frame_param.is_some() || start_time_param.is_some() || start_legacy.is_some();
|
||||
|
||||
if has_range {
|
||||
let (start_sec, dur) = if let (Some(sf), Some(ef)) = (start_frame_param, end_frame_param) {
|
||||
let _fps: f64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1", videos_table
|
||||
)).bind(&file_uuid).fetch_optional(state.db.pool()).await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?.unwrap_or(24.0);
|
||||
"SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
|
||||
videos_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||
.unwrap_or(24.0);
|
||||
(sf / _fps, (ef - sf) / _fps)
|
||||
} else if let (Some(st), Some(et)) = (start_time_param, end_time_param) {
|
||||
(st, et - st)
|
||||
@@ -533,15 +620,21 @@ async fn stream_video(
|
||||
let ss = start_sec.to_string();
|
||||
let d = dur.to_string();
|
||||
let mut chunk_args = vec!["-ss", &ss, "-i", &file_path, "-t", &d, "-c", "copy"];
|
||||
if audio == "off" { chunk_args.push("-an"); }
|
||||
if audio == "off" {
|
||||
chunk_args.push("-an");
|
||||
}
|
||||
chunk_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
|
||||
let status = ffmpeg_cmd().args(&chunk_args).status()
|
||||
let status = ffmpeg_cmd()
|
||||
.args(&chunk_args)
|
||||
.status()
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
if !status.success() {
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
return Err(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
let data = tokio::fs::read(&tmp)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
return Ok(Response::builder()
|
||||
.header(header::CONTENT_TYPE, "video/mp4")
|
||||
@@ -704,7 +797,7 @@ async fn video_clip(
|
||||
let frame_count = ((e - s) * fps) as i64;
|
||||
cmd.args(["-vframes", &frame_count.to_string()]);
|
||||
} else {
|
||||
cmd.args(["-to", &e.to_string()]);
|
||||
cmd.args(["-t", &(e - s).to_string()]);
|
||||
}
|
||||
if mode == "debug" {
|
||||
let debug_text = if let (Some(sf), Some(ef)) = (q.start_frame, q.end_frame) {
|
||||
@@ -717,8 +810,20 @@ async fn video_clip(
|
||||
if audio == "off" {
|
||||
cmd.args(["-an"]);
|
||||
}
|
||||
cmd.args(["-c:v", "libx264", "-c:a", "aac", "-f", "mpegts", "-"]);
|
||||
let output = cmd.output().map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
cmd.args([
|
||||
"-c:v",
|
||||
"libx264",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-movflags",
|
||||
"frag_keyframe+empty_moov",
|
||||
"-f",
|
||||
"mp4",
|
||||
"-",
|
||||
]);
|
||||
let output = cmd
|
||||
.output()
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
if !output.status.success() {
|
||||
return Err(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
|
||||
@@ -88,16 +88,10 @@ fn hex_val(c: u8) -> Option<u8> {
|
||||
}
|
||||
|
||||
fn extract_api_key(headers: &HeaderMap, uri: &axum::http::Uri) -> Result<String, StatusCode> {
|
||||
if let Some(key) = headers
|
||||
.get("X-API-Key")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
{
|
||||
if let Some(key) = headers.get("X-API-Key").and_then(|v| v.to_str().ok()) {
|
||||
return Ok(key.to_string());
|
||||
}
|
||||
if let Some(auth) = headers
|
||||
.get("Authorization")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
{
|
||||
if let Some(auth) = headers.get("Authorization").and_then(|v| v.to_str().ok()) {
|
||||
// Check if it's a JWT (starts with eyJ)
|
||||
let trimmed = auth.strip_prefix("Bearer ").unwrap_or(auth);
|
||||
if !jwt::is_jwt(trimmed) {
|
||||
@@ -129,7 +123,11 @@ pub async fn unified_auth(
|
||||
|
||||
// Priority 1: Cookie session (Portal)
|
||||
let cookies = extract_cookies(headers);
|
||||
if let Some(sid) = cookies.iter().find(|(k, _)| k == "session_id").map(|(_, v)| v.clone()) {
|
||||
if let Some(sid) = cookies
|
||||
.iter()
|
||||
.find(|(k, _)| k == "session_id")
|
||||
.map(|(_, v)| v.clone())
|
||||
{
|
||||
match state.db.get_session_by_id(&sid).await {
|
||||
Ok(Some((_id, user_id, api_key_id, _expires_at))) => {
|
||||
let key_hash = hash_key(&api_key_id);
|
||||
@@ -162,15 +160,17 @@ pub async fn unified_auth(
|
||||
}
|
||||
|
||||
// Priority 2: JWT (Authorization: Bearer <eyJ...>)
|
||||
if let Some(auth_header) = headers
|
||||
.get("Authorization")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
{
|
||||
if let Some(auth_header) = headers.get("Authorization").and_then(|v| v.to_str().ok()) {
|
||||
if let Some(token) = auth_header.strip_prefix("Bearer ") {
|
||||
if jwt::is_jwt(token) {
|
||||
match jwt::verify_jwt(token) {
|
||||
Ok(claims) => {
|
||||
if !state.db.is_jwt_blacklisted(&claims.jti).await.unwrap_or(false) {
|
||||
if !state
|
||||
.db
|
||||
.is_jwt_blacklisted(&claims.jti)
|
||||
.await
|
||||
.unwrap_or(false)
|
||||
{
|
||||
let exp = chrono::DateTime::from_timestamp(claims.exp as i64, 0);
|
||||
let user_id: i32 = claims.sub.parse().unwrap_or(0);
|
||||
let auth = UserAuth {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::api::server::AppState;
|
||||
use crate::core::config;
|
||||
use crate::core::db::PostgresDb;
|
||||
use crate::core::db::{PostgresDb, QdrantDb};
|
||||
use crate::core::tmdb;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -64,10 +64,44 @@ struct FileUuidParam {
|
||||
file_uuid: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TmdbFetchRequest {
|
||||
file_uuid: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TmdbFetchMemberResult {
|
||||
name: String,
|
||||
character: Option<String>,
|
||||
aliases: Vec<String>,
|
||||
metadata: serde_json::Value,
|
||||
status: String,
|
||||
has_json: bool,
|
||||
has_jpg: bool,
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TmdbFetchResponse {
|
||||
success: bool,
|
||||
movie_title: Option<String>,
|
||||
tmdb_id: Option<u64>,
|
||||
results: Vec<TmdbFetchMemberResult>,
|
||||
summary: serde_json::Value,
|
||||
}
|
||||
|
||||
pub fn tmdb_routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/api/v1/agents/tmdb/prefetch", post(tmdb_prefetch))
|
||||
.route("/api/v1/file/:file_uuid/tmdb-probe", post(tmdb_probe_handler))
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/tmdb-probe",
|
||||
post(tmdb_probe_handler),
|
||||
)
|
||||
.route("/api/v1/tmdb/fetch", post(tmdb_fetch))
|
||||
.route(
|
||||
"/api/v1/agents/tmdb/match/:file_uuid",
|
||||
post(tmdb_match_handler),
|
||||
)
|
||||
.route("/api/v1/resource/tmdb", get(tmdb_resource_status))
|
||||
.route("/api/v1/resource/tmdb/check", post(tmdb_resource_check))
|
||||
}
|
||||
@@ -79,9 +113,10 @@ async fn tmdb_prefetch(
|
||||
let file_uuid = req.file_uuid;
|
||||
|
||||
// Verify file exists in DB
|
||||
let file_exists: bool = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", crate::core::db::schema::table_name("videos"))
|
||||
)
|
||||
let file_exists: bool = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
|
||||
crate::core::db::schema::table_name("videos")
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
@@ -182,18 +217,22 @@ async fn tmdb_probe_handler(
|
||||
let file_uuid = params.file_uuid;
|
||||
|
||||
// Verify file exists
|
||||
let file_exists: bool = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", crate::core::db::schema::table_name("videos"))
|
||||
)
|
||||
let file_exists: bool = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
|
||||
crate::core::db::schema::table_name("videos")
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
.unwrap_or(false);
|
||||
|
||||
if !file_exists {
|
||||
return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
|
||||
"error": "Video not found", "file_uuid": file_uuid
|
||||
}))));
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({
|
||||
"error": "Video not found", "file_uuid": file_uuid
|
||||
})),
|
||||
));
|
||||
}
|
||||
|
||||
match tmdb::probe::probe_from_cache(&state.db, &file_uuid).await {
|
||||
@@ -214,7 +253,10 @@ async fn tmdb_probe_handler(
|
||||
.await
|
||||
{
|
||||
for uuid in rows {
|
||||
let _ = crate::core::identity::storage::save_identity_file_by_pool(&pool, &uuid).await;
|
||||
let _ = crate::core::identity::storage::save_identity_file_by_pool(
|
||||
&pool, &uuid,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -245,24 +287,26 @@ async fn tmdb_probe_handler(
|
||||
message: "No TMDb cache found. Run tmdb-prefetch first.".to_string(),
|
||||
}))
|
||||
} else {
|
||||
Err((StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({
|
||||
"error": msg, "file_uuid": file_uuid
|
||||
}))))
|
||||
Err((
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({
|
||||
"error": msg, "file_uuid": file_uuid
|
||||
})),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn tmdb_resource_status(
|
||||
State(state): State<AppState>,
|
||||
) -> Json<TmdbResourceResponse> {
|
||||
async fn tmdb_resource_status(State(state): State<AppState>) -> Json<TmdbResourceResponse> {
|
||||
let status = tmdb::status::quick_status();
|
||||
let identities_seeded = tmdb::status::count_tmdb_identities(state.db.pool())
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
let identities_with_embedding = tmdb::status::count_tmdb_identities_with_embedding(state.db.pool())
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
let identities_with_embedding =
|
||||
tmdb::status::count_tmdb_identities_with_embedding(state.db.pool())
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
let cache_files = tmdb::status::count_cache_files();
|
||||
|
||||
Json(TmdbResourceResponse {
|
||||
@@ -303,3 +347,383 @@ async fn tmdb_resource_check() -> Json<TmdbCheckResponse> {
|
||||
status,
|
||||
})
|
||||
}
|
||||
|
||||
async fn tmdb_fetch(
|
||||
State(state): State<AppState>,
|
||||
Json(req): Json<TmdbFetchRequest>,
|
||||
) -> Result<Json<TmdbFetchResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let file_uuid = req.file_uuid;
|
||||
|
||||
let filename: Option<String> = sqlx::query_scalar(&format!(
|
||||
"SELECT file_name FROM {} WHERE file_uuid = $1",
|
||||
crate::core::db::schema::table_name("videos")
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?
|
||||
.flatten();
|
||||
|
||||
let filename = filename.ok_or_else(|| {
|
||||
(
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({"error": "File not found"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
// Run probe to create identities
|
||||
match tmdb::probe::probe_movie(&state.db, &filename, &file_uuid).await {
|
||||
Ok(Some(probe_result)) => {
|
||||
let mut member_results = Vec::new();
|
||||
|
||||
// Read the cache to get cast list with names and profile URLs
|
||||
if let Ok(cache) = tmdb::cache::read_tmdb_cache(&file_uuid) {
|
||||
for member in &cache.cast {
|
||||
let name = member.name.clone();
|
||||
let character = if member.character.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(member.character.clone())
|
||||
};
|
||||
let aliases = member.also_known_as.clone();
|
||||
let profile_url = member
|
||||
.profile_path
|
||||
.as_ref()
|
||||
.map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));
|
||||
|
||||
let metadata = serde_json::json!({
|
||||
"tmdb_id": member.id,
|
||||
"name": member.name,
|
||||
"character": member.character,
|
||||
"aliases": member.also_known_as,
|
||||
"profile_path": member.profile_path,
|
||||
"order": member.order,
|
||||
"biography": member.biography,
|
||||
"birthday": member.birthday,
|
||||
"place_of_birth": member.place_of_birth,
|
||||
"imdb_id": member.imdb_id,
|
||||
"known_for_department": member.known_for_department,
|
||||
"popularity": member.popularity,
|
||||
"deathday": member.deathday,
|
||||
"gender": member.gender,
|
||||
"homepage": member.homepage,
|
||||
});
|
||||
|
||||
let identity_row = sqlx::query_as::<_, (i32, uuid::Uuid)>(&format!(
|
||||
"SELECT id, uuid FROM {} WHERE name = $1 AND source = 'tmdb' LIMIT 1",
|
||||
crate::core::db::schema::table_name("identities")
|
||||
))
|
||||
.bind(&name)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await;
|
||||
|
||||
match identity_row {
|
||||
Ok(Some((identity_id, uuid))) => {
|
||||
let clean = uuid.to_string().replace('-', "");
|
||||
let dir = crate::core::identity::storage::identity_dir(&clean);
|
||||
std::fs::create_dir_all(&dir).ok();
|
||||
|
||||
let json_result = crate::core::identity::storage::save_identity_file(
|
||||
&state.db, &clean,
|
||||
)
|
||||
.await;
|
||||
let has_json = json_result.is_ok();
|
||||
|
||||
let has_jpg = if let Some(url) = &profile_url {
|
||||
let jpg_path = dir.join("profile.jpg");
|
||||
if jpg_path.exists() {
|
||||
true
|
||||
} else if let Ok(resp) = reqwest::get(url).await {
|
||||
if let Ok(bytes) = resp.bytes().await {
|
||||
std::fs::write(&jpg_path, &bytes).is_ok()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
// Push face_embedding to Qdrant if available
|
||||
let face_collection = format!(
|
||||
"{}_faces",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':')
|
||||
);
|
||||
let emb_row: Option<(Vec<f32>,)> = sqlx::query_as(
|
||||
&format!(
|
||||
"SELECT face_embedding::real[] FROM {} WHERE uuid = $1 AND face_embedding IS NOT NULL",
|
||||
crate::core::db::schema::table_name("identities")
|
||||
)
|
||||
)
|
||||
.bind(&uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.unwrap_or(None);
|
||||
|
||||
if let Some((embedding,)) = emb_row {
|
||||
let qdrant = QdrantDb::new();
|
||||
qdrant.ensure_collection(&face_collection, 512).await.ok();
|
||||
let _ = qdrant
|
||||
.upsert_vector_to_collection(
|
||||
&face_collection,
|
||||
identity_id as u64,
|
||||
&embedding,
|
||||
Some(serde_json::json!({
|
||||
"identity_id": identity_id,
|
||||
"name": name,
|
||||
"source": "tmdb",
|
||||
})),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let status = if has_json && has_jpg {
|
||||
"success"
|
||||
} else {
|
||||
"partial"
|
||||
};
|
||||
let error = if !has_json {
|
||||
Some(format!("{:?}", json_result.err()))
|
||||
} else if !has_jpg {
|
||||
Some("profile download failed".to_string())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
member_results.push(TmdbFetchMemberResult {
|
||||
name: name.clone(),
|
||||
character: character.clone(),
|
||||
aliases: aliases.clone(),
|
||||
metadata: metadata.clone(),
|
||||
status: status.to_string(),
|
||||
has_json,
|
||||
has_jpg,
|
||||
error,
|
||||
});
|
||||
}
|
||||
Ok(None) => {
|
||||
member_results.push(TmdbFetchMemberResult {
|
||||
name: name.clone(),
|
||||
character: character.clone(),
|
||||
aliases: aliases.clone(),
|
||||
metadata: metadata.clone(),
|
||||
status: "skipped".to_string(),
|
||||
has_json: false,
|
||||
has_jpg: false,
|
||||
error: None,
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
member_results.push(TmdbFetchMemberResult {
|
||||
name: name.clone(),
|
||||
character: character.clone(),
|
||||
aliases: aliases.clone(),
|
||||
metadata: metadata.clone(),
|
||||
status: "error".to_string(),
|
||||
has_json: false,
|
||||
has_jpg: false,
|
||||
error: Some(format!("DB error: {}", e)),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let total = member_results.len();
|
||||
let success_count = member_results
|
||||
.iter()
|
||||
.filter(|r| r.status == "success")
|
||||
.count();
|
||||
let json_count = member_results.iter().filter(|r| r.has_json).count();
|
||||
let jpg_count = member_results.iter().filter(|r| r.has_jpg).count();
|
||||
|
||||
Ok(Json(TmdbFetchResponse {
|
||||
success: true,
|
||||
movie_title: Some(probe_result.title),
|
||||
tmdb_id: Some(probe_result.tmdb_id),
|
||||
results: member_results,
|
||||
summary: serde_json::json!({
|
||||
"total": total,
|
||||
"success": success_count,
|
||||
"with_json": json_count,
|
||||
"with_jpg": jpg_count,
|
||||
}),
|
||||
}))
|
||||
}
|
||||
Ok(None) => Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({
|
||||
"error": "No movie found for this filename"
|
||||
})),
|
||||
)),
|
||||
Err(e) => Err((
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({
|
||||
"error": e.to_string()
|
||||
})),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TmdbMatchResponse {
|
||||
success: bool,
|
||||
file_uuid: String,
|
||||
bindings_created: usize,
|
||||
tmdb_identities_available: usize,
|
||||
message: String,
|
||||
}
|
||||
|
||||
async fn tmdb_match_handler(
|
||||
Path(params): Path<FileUuidParam>,
|
||||
State(state): State<AppState>,
|
||||
) -> Result<Json<TmdbMatchResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let file_uuid = params.file_uuid;
|
||||
|
||||
// Verify file exists
|
||||
let file_exists: bool = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
|
||||
crate::core::db::schema::table_name("videos")
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
.unwrap_or(false);
|
||||
|
||||
if !file_exists {
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({
|
||||
"error": "Video not found", "file_uuid": file_uuid
|
||||
})),
|
||||
));
|
||||
}
|
||||
|
||||
// Get all TMDb identities with face_embedding
|
||||
let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec<f32>)>(
|
||||
&format!(
|
||||
"SELECT id, name, face_embedding::real[] FROM {} WHERE source='tmdb' AND face_embedding IS NOT NULL",
|
||||
crate::core::db::schema::table_name("identities")
|
||||
)
|
||||
)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
|
||||
})?;
|
||||
|
||||
if tmdb_rows.is_empty() {
|
||||
return Ok(Json(TmdbMatchResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
bindings_created: 0,
|
||||
tmdb_identities_available: 0,
|
||||
message: "No TMDb identities with face embeddings".to_string(),
|
||||
}));
|
||||
}
|
||||
|
||||
let face_collection = format!(
|
||||
"{}_faces",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':')
|
||||
);
|
||||
|
||||
let qdrant = QdrantDb::new();
|
||||
let _ = qdrant.ensure_collection(&face_collection, 512).await;
|
||||
|
||||
let trace_collection = format!(
|
||||
"{}_traces",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':')
|
||||
);
|
||||
let _ = qdrant.ensure_collection(&trace_collection, 512).await;
|
||||
|
||||
// Sync trace embeddings (idempotent)
|
||||
if let Err(e) = crate::core::db::qdrant_db::sync_trace_embeddings(&file_uuid).await {
|
||||
tracing::error!("[TKG-MATCH] Trace sync failed: {}", e);
|
||||
}
|
||||
|
||||
let mut total_bindings = 0usize;
|
||||
|
||||
for (tmdb_id, tmdb_name, tmdb_embedding) in &tmdb_rows {
|
||||
// Search Qdrant trace collection with this TMDb embedding
|
||||
let results = match qdrant
|
||||
.search_face_collection(
|
||||
&trace_collection,
|
||||
tmdb_embedding,
|
||||
100,
|
||||
"source",
|
||||
"tmdb",
|
||||
Some(&file_uuid),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
tracing::warn!("[TKG-MATCH] Qdrant search failed for {}: {}", tmdb_name, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Filter results by threshold and file_uuid
|
||||
let filtered: Vec<_> = results
|
||||
.into_iter()
|
||||
.filter(|(score, payload)| {
|
||||
*score >= 0.50
|
||||
&& payload.get("file_uuid").and_then(|v| v.as_str()) == Some(&file_uuid)
|
||||
})
|
||||
.collect();
|
||||
|
||||
if filtered.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Bind matched traces directly
|
||||
let mut bound_count = 0usize;
|
||||
for (_score, payload) in &filtered {
|
||||
if let Some(tid) = payload.get("trace_id").and_then(|v| v.as_i64()) {
|
||||
let r = sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
|
||||
crate::core::db::schema::table_name("face_detections")
|
||||
))
|
||||
.bind(tmdb_id)
|
||||
.bind(&file_uuid)
|
||||
.bind(tid as i32)
|
||||
.execute(state.db.pool())
|
||||
.await;
|
||||
if let Ok(result) = r {
|
||||
bound_count += result.rows_affected() as usize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if bound_count > 0 {
|
||||
tracing::info!(
|
||||
"[TKG-MATCH] {}: bound {} traces to TMDb identity {}",
|
||||
tmdb_name,
|
||||
bound_count,
|
||||
tmdb_id
|
||||
);
|
||||
}
|
||||
total_bindings += bound_count;
|
||||
}
|
||||
|
||||
Ok(Json(TmdbMatchResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
bindings_created: total_bindings,
|
||||
tmdb_identities_available: tmdb_rows.len(),
|
||||
message: format!("{} traces matched to TMDb identities", total_bindings),
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -11,10 +11,7 @@ use crate::core::db::PostgresDb;
|
||||
|
||||
pub fn trace_agent_routes() -> Router<crate::api::server::AppState> {
|
||||
Router::new()
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/traces",
|
||||
post(list_traces_sorted),
|
||||
)
|
||||
.route("/api/v1/file/:file_uuid/traces", post(list_traces_sorted))
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/trace/:trace_id/faces",
|
||||
get(list_trace_faces),
|
||||
@@ -78,14 +75,15 @@ async fn list_traces_sorted(
|
||||
_ => "start_frame ASC",
|
||||
};
|
||||
|
||||
let fps: f64 =
|
||||
sqlx::query_scalar(&format!("SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
|
||||
crate::core::db::schema::table_name("videos")))
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
|
||||
.unwrap_or(24.0);
|
||||
let fps: f64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
|
||||
crate::core::db::schema::table_name("videos")
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
|
||||
.unwrap_or(24.0);
|
||||
|
||||
let query = format!(
|
||||
"SELECT tt.*, fd.id AS sample_face_id FROM (
|
||||
@@ -113,17 +111,16 @@ async fn list_traces_sorted(
|
||||
crate::core::db::schema::table_name("face_detections"),
|
||||
);
|
||||
|
||||
let rows: Vec<(i32, i64, i32, i32, f64, f64, Option<i32>)> =
|
||||
sqlx::query_as(&query)
|
||||
.bind(&file_uuid)
|
||||
.bind(min_faces)
|
||||
.bind(effective_limit)
|
||||
.bind(db_offset)
|
||||
.bind(min_confidence)
|
||||
.bind(max_confidence)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
let rows: Vec<(i32, i64, i32, i32, f64, f64, Option<i32>)> = sqlx::query_as(&query)
|
||||
.bind(&file_uuid)
|
||||
.bind(min_faces)
|
||||
.bind(effective_limit)
|
||||
.bind(db_offset)
|
||||
.bind(min_confidence)
|
||||
.bind(max_confidence)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let traces: Vec<TraceInfo> = rows
|
||||
.into_iter()
|
||||
@@ -220,19 +217,20 @@ async fn list_trace_faces(
|
||||
};
|
||||
let interpolate = q.interpolate.unwrap_or(false);
|
||||
|
||||
let fps: f64 =
|
||||
sqlx::query_scalar(&format!("SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
|
||||
crate::core::db::schema::table_name("videos")))
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
|
||||
.unwrap_or(24.0);
|
||||
let fps: f64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
|
||||
crate::core::db::schema::table_name("videos")
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
|
||||
.unwrap_or(24.0);
|
||||
|
||||
let total_detected: i64 = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2",
|
||||
crate::core::db::schema::table_name("face_detections"))
|
||||
)
|
||||
let total_detected: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2",
|
||||
crate::core::db::schema::table_name("face_detections")
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.bind(trace_id)
|
||||
.fetch_one(state.db.pool())
|
||||
@@ -247,12 +245,12 @@ async fn list_trace_faces(
|
||||
Option<i32>,
|
||||
Option<i32>,
|
||||
f32,
|
||||
)> = sqlx::query_as(
|
||||
&format!("SELECT id, frame_number::int, x, y, width, height, confidence::float4 \
|
||||
)> = sqlx::query_as(&format!(
|
||||
"SELECT id, frame_number::int, x, y, width, height, confidence::float4 \
|
||||
FROM {} WHERE file_uuid = $1 AND trace_id = $2 \
|
||||
ORDER BY frame_number ASC LIMIT $3 OFFSET $4",
|
||||
crate::core::db::schema::table_name("face_detections"))
|
||||
)
|
||||
crate::core::db::schema::table_name("face_detections")
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.bind(trace_id)
|
||||
.bind(limit)
|
||||
|
||||
@@ -88,9 +88,9 @@ pub enum SearchResult {
|
||||
},
|
||||
#[serde(rename = "person")]
|
||||
Person {
|
||||
person_id: String,
|
||||
identity_id: i32,
|
||||
identity_uuid: String,
|
||||
name: Option<String>,
|
||||
speaker_id: Option<String>,
|
||||
appearance_count: i32,
|
||||
score: f64,
|
||||
first_appearance_time: Option<f64>,
|
||||
@@ -168,7 +168,7 @@ pub async fn universal_search(
|
||||
results.retain(|r| match r {
|
||||
SearchResult::Chunk { chunk_id, .. } => seen_chunks.insert(chunk_id.clone()),
|
||||
SearchResult::Frame { frame_number, .. } => seen_frames.insert(*frame_number),
|
||||
SearchResult::Person { person_id, .. } => seen_persons.insert(person_id.clone()),
|
||||
SearchResult::Person { identity_id, .. } => seen_persons.insert(*identity_id),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -251,9 +251,9 @@ pub async fn search_persons(
|
||||
let limit = query.limit.unwrap_or(20);
|
||||
let persons = search_persons_by_query(
|
||||
&db,
|
||||
&query.file_uuid,
|
||||
&query.query,
|
||||
query.min_appearances,
|
||||
query.max_age,
|
||||
limit,
|
||||
)
|
||||
.await
|
||||
@@ -305,7 +305,6 @@ pub struct PersonSearchQuery {
|
||||
pub file_uuid: String,
|
||||
pub query: Option<String>,
|
||||
pub min_appearances: Option<i32>,
|
||||
pub max_age: Option<i32>, // New filter for "children"
|
||||
pub limit: Option<usize>,
|
||||
}
|
||||
|
||||
@@ -317,13 +316,9 @@ pub struct PersonSearchResponse {
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct PersonResult {
|
||||
pub person_id: String,
|
||||
pub identity_id: i32,
|
||||
pub identity_uuid: String,
|
||||
pub name: Option<String>,
|
||||
pub character_name: Option<String>,
|
||||
pub aliases: Option<Vec<String>>,
|
||||
pub age: Option<i32>,
|
||||
pub gender: Option<String>,
|
||||
pub speaker_id: Option<String>,
|
||||
pub appearance_count: i32,
|
||||
pub first_appearance_time: Option<f64>,
|
||||
pub last_appearance_time: Option<f64>,
|
||||
@@ -594,43 +589,37 @@ async fn search_persons_internal(
|
||||
db: &PostgresDb,
|
||||
req: &UniversalSearchRequest,
|
||||
) -> Result<Vec<SearchResult>, anyhow::Error> {
|
||||
let table = "person_identities";
|
||||
let uuid = match &req.file_uuid {
|
||||
Some(u) => u.replace('\'', "''"),
|
||||
None => return Err(anyhow::anyhow!("file_uuid is required for person search")),
|
||||
};
|
||||
|
||||
let id_table = schema::table_name("identities");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let mut sql = format!(
|
||||
"SELECT person_id, name, speaker_id, appearance_count, first_appearance_time, last_appearance_time FROM {} WHERE 1=1",
|
||||
table
|
||||
"SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
|
||||
MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \
|
||||
FROM {} i JOIN {} fd ON fd.identity_id = i.id \
|
||||
WHERE fd.file_uuid = '{}'",
|
||||
id_table, fd_table, uuid
|
||||
);
|
||||
|
||||
if !req.query.is_empty() {
|
||||
sql.push_str(&format!(
|
||||
" AND (name ILIKE '%{}%' OR person_id ILIKE '%{}%' OR speaker_id ILIKE '%{}%')",
|
||||
req.query, req.query, req.query
|
||||
));
|
||||
}
|
||||
if let Some(ref filters) = req.filters {
|
||||
if let Some(ref speaker_id) = filters.speaker_id {
|
||||
sql.push_str(&format!(" AND speaker_id = '{}'", speaker_id));
|
||||
}
|
||||
if let Some(ref person_id) = filters.person_id {
|
||||
sql.push_str(&format!(" AND person_id = '{}'", person_id));
|
||||
}
|
||||
let q = req.query.replace('\'', "''");
|
||||
sql.push_str(&format!(" AND i.name ILIKE '%{}%'", q));
|
||||
}
|
||||
|
||||
sql.push_str(" GROUP BY i.id, i.uuid, i.name");
|
||||
sql.push_str(" ORDER BY appearance_count DESC");
|
||||
sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));
|
||||
|
||||
let rows: Vec<(
|
||||
String,
|
||||
Option<String>,
|
||||
Option<String>,
|
||||
i32,
|
||||
Option<f64>,
|
||||
Option<f64>,
|
||||
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
|
||||
let rows: Vec<(i32, String, Option<String>, i64, Option<f64>, Option<f64>)> =
|
||||
sqlx::query_as(&sql).fetch_all(db.pool()).await?;
|
||||
|
||||
let results: Vec<SearchResult> = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(person_id, name, speaker_id, appearance_count, first_time, last_time)| {
|
||||
|(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| {
|
||||
let score = if !req.query.is_empty()
|
||||
&& name.as_ref().map_or(false, |n| {
|
||||
n.to_lowercase().contains(&req.query.to_lowercase())
|
||||
@@ -641,10 +630,10 @@ async fn search_persons_internal(
|
||||
};
|
||||
|
||||
SearchResult::Person {
|
||||
person_id,
|
||||
identity_id,
|
||||
identity_uuid,
|
||||
name,
|
||||
speaker_id,
|
||||
appearance_count,
|
||||
appearance_count: appearance_count as i32,
|
||||
score,
|
||||
first_appearance_time: first_time,
|
||||
last_appearance_time: last_time,
|
||||
@@ -739,82 +728,49 @@ async fn search_frames_internal_v2(
|
||||
|
||||
async fn search_persons_by_query(
|
||||
db: &PostgresDb,
|
||||
file_uuid: &str,
|
||||
query: &Option<String>,
|
||||
min_appearances: Option<i32>,
|
||||
max_age: Option<i32>,
|
||||
limit: usize,
|
||||
) -> Result<Vec<PersonResult>, anyhow::Error> {
|
||||
let table = "person_identities";
|
||||
let id_table = schema::table_name("identities");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let mut sql = format!(
|
||||
"SELECT person_id, name, character_name, aliases, age, gender, speaker_id, appearance_count, first_appearance_time, last_appearance_time FROM {} WHERE 1=1",
|
||||
table
|
||||
"SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
|
||||
MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \
|
||||
FROM {} i JOIN {} fd ON fd.identity_id = i.id \
|
||||
WHERE fd.file_uuid = '{}'",
|
||||
id_table,
|
||||
fd_table,
|
||||
file_uuid.replace('\'', "''")
|
||||
);
|
||||
|
||||
if let Some(ref q) = query {
|
||||
// Search name, character_name, aliases (cast to text), person_id, speaker_id
|
||||
sql.push_str(&format!(
|
||||
" AND (name ILIKE '%{}%' OR character_name ILIKE '%{}%' OR aliases::text ILIKE '%{}%' OR person_id ILIKE '%{}%' OR speaker_id ILIKE '%{}%')",
|
||||
q, q, q, q, q
|
||||
));
|
||||
if let Some(q) = query {
|
||||
let safe = q.replace('\'', "''");
|
||||
sql.push_str(&format!(" AND i.name ILIKE '%{}%'", safe));
|
||||
}
|
||||
|
||||
sql.push_str(" GROUP BY i.id, i.uuid, i.name");
|
||||
|
||||
if let Some(min) = min_appearances {
|
||||
sql.push_str(&format!(" AND appearance_count >= {}", min));
|
||||
}
|
||||
if let Some(max_a) = max_age {
|
||||
// Strictly filter for age <= max_age.
|
||||
// Note: This excludes entries with NULL age.
|
||||
sql.push_str(&format!(" AND age <= {}", max_a));
|
||||
sql.push_str(&format!(" HAVING COUNT(fd.id) >= {}", min));
|
||||
}
|
||||
|
||||
sql.push_str(" ORDER BY appearance_count DESC");
|
||||
sql.push_str(&format!(" LIMIT {}", limit));
|
||||
|
||||
let rows: Vec<(
|
||||
String,
|
||||
Option<String>,
|
||||
Option<String>,
|
||||
Option<serde_json::Value>,
|
||||
Option<i32>,
|
||||
Option<String>,
|
||||
Option<String>,
|
||||
i32,
|
||||
Option<f64>,
|
||||
Option<f64>,
|
||||
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
|
||||
let rows: Vec<(i32, String, Option<String>, i64, Option<f64>, Option<f64>)> =
|
||||
sqlx::query_as(&sql).fetch_all(db.pool()).await?;
|
||||
|
||||
let results: Vec<PersonResult> = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(
|
||||
person_id,
|
||||
name,
|
||||
character_name,
|
||||
aliases_json,
|
||||
age,
|
||||
gender,
|
||||
speaker_id,
|
||||
appearance_count,
|
||||
first_time,
|
||||
last_time,
|
||||
)| {
|
||||
let aliases = aliases_json.and_then(|v| {
|
||||
v.as_array().map(|arr| {
|
||||
arr.iter()
|
||||
.filter_map(|val| val.as_str().map(String::from))
|
||||
.collect()
|
||||
})
|
||||
});
|
||||
|
||||
|(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| {
|
||||
PersonResult {
|
||||
person_id,
|
||||
identity_id,
|
||||
identity_uuid,
|
||||
name,
|
||||
character_name,
|
||||
aliases,
|
||||
age,
|
||||
gender,
|
||||
speaker_id,
|
||||
appearance_count,
|
||||
appearance_count: appearance_count as i32,
|
||||
first_appearance_time: first_time,
|
||||
last_appearance_time: last_time,
|
||||
}
|
||||
|
||||
@@ -392,8 +392,14 @@ pub async fn get_visual_chunk_statistics(
|
||||
uuid.replace('\'', "''")
|
||||
);
|
||||
|
||||
let row: (i64, Option<f64>, Option<f64>, Option<f64>, Option<i64>, Option<f64>) =
|
||||
sqlx::query_as(&sql).fetch_one(db.pool()).await?;
|
||||
let row: (
|
||||
i64,
|
||||
Option<f64>,
|
||||
Option<f64>,
|
||||
Option<f64>,
|
||||
Option<i64>,
|
||||
Option<f64>,
|
||||
) = sqlx::query_as(&sql).fetch_one(db.pool()).await?;
|
||||
|
||||
let mut stats = HashMap::new();
|
||||
stats.insert("total_chunks".to_string(), Value::from(row.0));
|
||||
|
||||
@@ -13,7 +13,14 @@ use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
|
||||
fn dir_size(path: &Path) -> u64 {
|
||||
path.read_dir().map(|d| d.filter_map(|e| e.ok()).filter_map(|e| e.metadata().ok()).map(|m| m.len()).sum()).unwrap_or(0)
|
||||
path.read_dir()
|
||||
.map(|d| {
|
||||
d.filter_map(|e| e.ok())
|
||||
.filter_map(|e| e.metadata().ok())
|
||||
.map(|m| m.len())
|
||||
.sum()
|
||||
})
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
const DEMO_DIR: &str = "/Users/accusys/momentry/var/sftpgo/data/demo";
|
||||
@@ -22,7 +29,10 @@ const RELEASE_DIR: &str = "/Users/accusys/momentry_core_0.1/release/files";
|
||||
const PG_BIN: &str = "/Users/accusys/pgsql/18.3/bin";
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "release", about = "Release Manager — deploy/undeploy video packages")]
|
||||
#[command(
|
||||
name = "release",
|
||||
about = "Release Manager — deploy/undeploy video packages"
|
||||
)]
|
||||
struct Cli {
|
||||
#[command(subcommand)]
|
||||
command: Commands,
|
||||
@@ -107,7 +117,12 @@ fn extract_tarball(tarball: &Path) -> Result<PathBuf> {
|
||||
fs::create_dir_all(&tmpdir)?;
|
||||
|
||||
let status = Command::new("tar")
|
||||
.args(["-xzf", tarball.to_str().unwrap(), "-C", tmpdir.to_str().unwrap()])
|
||||
.args([
|
||||
"-xzf",
|
||||
tarball.to_str().unwrap(),
|
||||
"-C",
|
||||
tmpdir.to_str().unwrap(),
|
||||
])
|
||||
.status()
|
||||
.context("tar extraction failed")?;
|
||||
if !status.success() {
|
||||
@@ -127,8 +142,8 @@ fn extract_tarball(tarball: &Path) -> Result<PathBuf> {
|
||||
/// Get file_info.json from package directory
|
||||
fn read_file_info(pkg_dir: &Path) -> Result<serde_json::Value> {
|
||||
let info_path = pkg_dir.join("file_info.json");
|
||||
let content = fs::read_to_string(&info_path)
|
||||
.with_context(|| format!("Cannot read {:?}", info_path))?;
|
||||
let content =
|
||||
fs::read_to_string(&info_path).with_context(|| format!("Cannot read {:?}", info_path))?;
|
||||
serde_json::from_str(&content).context("Invalid file_info.json")
|
||||
}
|
||||
|
||||
@@ -140,7 +155,10 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
|
||||
anyhow::bail!("File not found: {}", tarball);
|
||||
}
|
||||
|
||||
println!("=== Deploy: {} ===", tarball_path.file_name().unwrap().to_str().unwrap());
|
||||
println!(
|
||||
"=== Deploy: {} ===",
|
||||
tarball_path.file_name().unwrap().to_str().unwrap()
|
||||
);
|
||||
|
||||
// Extract
|
||||
let pkg_dir = extract_tarball(tarball_path)?;
|
||||
@@ -148,7 +166,9 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
|
||||
|
||||
// Read file_info
|
||||
let info = read_file_info(&pkg_dir)?;
|
||||
let uuid = info["file_uuid"].as_str().context("Missing file_uuid in file_info.json")?;
|
||||
let uuid = info["file_uuid"]
|
||||
.as_str()
|
||||
.context("Missing file_uuid in file_info.json")?;
|
||||
let file_name = info["file_name"].as_str().unwrap_or("?");
|
||||
println!("UUID: {}\nVideo: {}", uuid, file_name);
|
||||
|
||||
@@ -168,7 +188,8 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
|
||||
let entry = entry?;
|
||||
let fname = entry.file_name();
|
||||
let fname_str = fname.to_str().unwrap_or("");
|
||||
if fname_str.ends_with(".mp4") || fname_str.ends_with(".mov") || fname_str.ends_with(".avi") {
|
||||
if fname_str.ends_with(".mp4") || fname_str.ends_with(".mov") || fname_str.ends_with(".avi")
|
||||
{
|
||||
let dest = Path::new(DEMO_DIR).join(&fname);
|
||||
if !dest.exists() {
|
||||
fs::copy(entry.path(), &dest)?;
|
||||
@@ -192,12 +213,15 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
|
||||
println!("Output files copied to {}", OUTPUT_DIR);
|
||||
|
||||
// Verify
|
||||
let chunk_count: (i64,) = sqlx::query_as(
|
||||
"SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = $1"
|
||||
).bind(uuid).fetch_one(db.pool()).await?;
|
||||
let face_count: (i64,) = sqlx::query_as(
|
||||
"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1"
|
||||
).bind(uuid).fetch_one(db.pool()).await?;
|
||||
let chunk_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = $1")
|
||||
.bind(uuid)
|
||||
.fetch_one(db.pool())
|
||||
.await?;
|
||||
let face_count: (i64,) =
|
||||
sqlx::query_as("SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1")
|
||||
.bind(uuid)
|
||||
.fetch_one(db.pool())
|
||||
.await?;
|
||||
|
||||
// Cleanup
|
||||
fs::remove_dir_all(&pkg_dir.parent().unwrap_or(&pkg_dir))?;
|
||||
@@ -213,9 +237,11 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
|
||||
|
||||
async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result<()> {
|
||||
// Get video info
|
||||
let rows: Vec<(String, String)> = sqlx::query_as(
|
||||
"SELECT file_name, file_path FROM dev.videos WHERE file_uuid = $1"
|
||||
).bind(uuid).fetch_all(db.pool()).await?;
|
||||
let rows: Vec<(String, String)> =
|
||||
sqlx::query_as("SELECT file_name, file_path FROM dev.videos WHERE file_uuid = $1")
|
||||
.bind(uuid)
|
||||
.fetch_all(db.pool())
|
||||
.await?;
|
||||
|
||||
if rows.is_empty() {
|
||||
anyhow::bail!("UUID {} not found in DB", uuid);
|
||||
@@ -252,7 +278,9 @@ async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result
|
||||
println!(" {}: {} rows deleted", tbl, result.rows_affected());
|
||||
}
|
||||
sqlx::query("DELETE FROM dev.videos WHERE file_uuid = $1")
|
||||
.bind(uuid).execute(db.pool()).await?;
|
||||
.bind(uuid)
|
||||
.execute(db.pool())
|
||||
.await?;
|
||||
println!(" dev.videos: removed");
|
||||
|
||||
// Delete output files
|
||||
@@ -270,7 +298,10 @@ async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result
|
||||
let vp = Path::new(file_path);
|
||||
if vp.exists() {
|
||||
fs::remove_file(vp)?;
|
||||
println!(" Video file: removed ({})", vp.file_name().unwrap().to_str().unwrap_or("?"));
|
||||
println!(
|
||||
" Video file: removed ({})",
|
||||
vp.file_name().unwrap().to_str().unwrap_or("?")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -292,11 +323,15 @@ async fn cmd_list(db: &PostgresDb) -> Result<()> {
|
||||
"SELECT file_uuid, file_name, duration, status,
|
||||
(SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = v.file_uuid) as chunks,
|
||||
(SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = v.file_uuid) as faces
|
||||
FROM dev.videos v ORDER BY id DESC"
|
||||
).fetch_all(db.pool()).await?;
|
||||
FROM dev.videos v ORDER BY id DESC",
|
||||
)
|
||||
.fetch_all(db.pool())
|
||||
.await?;
|
||||
|
||||
println!("{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
|
||||
"UUID", "Name", "Duration", "Status", "Chunks", "Faces");
|
||||
println!(
|
||||
"{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
|
||||
"UUID", "Name", "Duration", "Status", "Chunks", "Faces"
|
||||
);
|
||||
println!("{}", "-".repeat(116));
|
||||
|
||||
for row in &rows {
|
||||
@@ -318,10 +353,15 @@ async fn cmd_list(db: &PostgresDb) -> Result<()> {
|
||||
name.clone()
|
||||
};
|
||||
|
||||
println!("{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
|
||||
uuid, short_name, dur_str,
|
||||
println!(
|
||||
"{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
|
||||
uuid,
|
||||
short_name,
|
||||
dur_str,
|
||||
status.as_deref().unwrap_or("?"),
|
||||
chunks.unwrap_or(0), faces.unwrap_or(0));
|
||||
chunks.unwrap_or(0),
|
||||
faces.unwrap_or(0)
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -336,9 +376,23 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
|
||||
"SELECT file_uuid, file_name, file_path, duration, fps, width, height FROM dev.videos WHERE file_uuid = $1"
|
||||
).bind(uuid).fetch_optional(db.pool()).await?;
|
||||
let (_, file_name, file_path, duration, fps, width, height): (
|
||||
String, String, String, Option<f64>, Option<f64>, Option<i32>, Option<i32>
|
||||
String,
|
||||
String,
|
||||
String,
|
||||
Option<f64>,
|
||||
Option<f64>,
|
||||
Option<i32>,
|
||||
Option<i32>,
|
||||
) = match row {
|
||||
Some(r) => (r.get(0), r.get(1), r.get(2), r.get(3), r.get(4), r.get(5), r.get(6)),
|
||||
Some(r) => (
|
||||
r.get(0),
|
||||
r.get(1),
|
||||
r.get(2),
|
||||
r.get(3),
|
||||
r.get(4),
|
||||
r.get(5),
|
||||
r.get(6),
|
||||
),
|
||||
None => anyhow::bail!("UUID {} not found", uuid),
|
||||
};
|
||||
|
||||
@@ -360,7 +414,10 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
|
||||
"momentry_version": env!("CARGO_PKG_VERSION"),
|
||||
"momentry_build": env!("BUILD_GIT_HASH"),
|
||||
});
|
||||
fs::write(outdir.join("file_info.json"), serde_json::to_string_pretty(&info)?)?;
|
||||
fs::write(
|
||||
outdir.join("file_info.json"),
|
||||
serde_json::to_string_pretty(&info)?,
|
||||
)?;
|
||||
|
||||
// Export per-table .sql files (avoid single 4.7GB psql load)
|
||||
let sql_dir = outdir.join("sql");
|
||||
@@ -376,7 +433,13 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
|
||||
|
||||
let mut import_order = vec!["master.sql"];
|
||||
|
||||
fn write_table_sql(outdir: &Path, tbl: &str, col: &str, uuid: &str, psql_exec: &dyn Fn(&str) -> Result<String>) -> Result<()> {
|
||||
fn write_table_sql(
|
||||
outdir: &Path,
|
||||
tbl: &str,
|
||||
col: &str,
|
||||
uuid: &str,
|
||||
psql_exec: &dyn Fn(&str) -> Result<String>,
|
||||
) -> Result<()> {
|
||||
let safe_name = tbl.replace('.', "_");
|
||||
let path = outdir.join(format!("{}.sql", safe_name));
|
||||
let parts: Vec<&str> = tbl.split('.').collect();
|
||||
@@ -419,8 +482,16 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
|
||||
let data = psql_exec(&idents_query)?;
|
||||
if !data.is_empty() {
|
||||
let mut f = fs::File::create(&idents_path)?;
|
||||
writeln!(f, "-- dev.identities WHERE file_uuid = '{}' OR global (tmdb/merged/user_defined)", uuid)?;
|
||||
writeln!(f, "COPY dev.identities ({}) FROM STDIN WITH CSV HEADER;", cols)?;
|
||||
writeln!(
|
||||
f,
|
||||
"-- dev.identities WHERE file_uuid = '{}' OR global (tmdb/merged/user_defined)",
|
||||
uuid
|
||||
)?;
|
||||
writeln!(
|
||||
f,
|
||||
"COPY dev.identities ({}) FROM STDIN WITH CSV HEADER;",
|
||||
cols
|
||||
)?;
|
||||
writeln!(f, "{}", data)?;
|
||||
writeln!(f, "\\.")?;
|
||||
}
|
||||
@@ -440,7 +511,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
|
||||
if !data.is_empty() {
|
||||
let mut f = fs::File::create(&binds_path)?;
|
||||
writeln!(f, "-- dev.identity_bindings (from face_detections JOIN)")?;
|
||||
writeln!(f, "COPY dev.identity_bindings ({}) FROM STDIN WITH CSV HEADER;", cols)?;
|
||||
writeln!(
|
||||
f,
|
||||
"COPY dev.identity_bindings ({}) FROM STDIN WITH CSV HEADER;",
|
||||
cols
|
||||
)?;
|
||||
writeln!(f, "{}", data)?;
|
||||
writeln!(f, "\\.")?;
|
||||
}
|
||||
@@ -469,7 +544,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
|
||||
let sql_path = outdir.join("data.sql");
|
||||
{
|
||||
let mut f = fs::File::create(&sql_path)?;
|
||||
writeln!(f, "-- Release package: {} — see sql/ for per-table files", uuid)?;
|
||||
writeln!(
|
||||
f,
|
||||
"-- Release package: {} — see sql/ for per-table files",
|
||||
uuid
|
||||
)?;
|
||||
writeln!(f, "BEGIN;")?;
|
||||
writeln!(f, "\\i sql/dev_videos.sql")?;
|
||||
writeln!(f, "\\i sql/dev_chunk.sql")?;
|
||||
@@ -492,7 +571,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
|
||||
let dest = outdir.join(vp.file_name().unwrap());
|
||||
fs::copy(vp, &dest)?;
|
||||
let vsize = fs::metadata(&dest)?.len();
|
||||
println!(" {} ({} MB)", vp.file_name().unwrap().to_str().unwrap_or("?"), vsize / 1024 / 1024);
|
||||
println!(
|
||||
" {} ({} MB)",
|
||||
vp.file_name().unwrap().to_str().unwrap_or("?"),
|
||||
vsize / 1024 / 1024
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -541,11 +624,18 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
|
||||
let vec0_src = "/Users/accusys/momentry_core_0.1/scripts/vec0.dylib";
|
||||
if Path::new(vec0_src).exists() {
|
||||
fs::copy(vec0_src, outdir.join("vec0.dylib"))?;
|
||||
println!(" vec0.dylib ({} KB)", fs::metadata(outdir.join("vec0.dylib"))?.len() / 1024);
|
||||
println!(
|
||||
" vec0.dylib ({} KB)",
|
||||
fs::metadata(outdir.join("vec0.dylib"))?.len() / 1024
|
||||
);
|
||||
}
|
||||
|
||||
// Create tar.gz
|
||||
let tarball = Path::new(RELEASE_DIR).join(format!("{}_v{}.tar.gz", uuid, Utc::now().format("%Y%m%d_%H%M%S")));
|
||||
let tarball = Path::new(RELEASE_DIR).join(format!(
|
||||
"{}_v{}.tar.gz",
|
||||
uuid,
|
||||
Utc::now().format("%Y%m%d_%H%M%S")
|
||||
));
|
||||
let status = Command::new("tar")
|
||||
.args(["-czf", tarball.to_str().unwrap(), "-C", RELEASE_DIR, uuid])
|
||||
.status()?;
|
||||
@@ -553,7 +643,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
|
||||
anyhow::bail!("tar creation failed");
|
||||
}
|
||||
let tsize = fs::metadata(&tarball)?.len();
|
||||
println!("\n Package: {} ({} MB)", tarball.display(), tsize / 1024 / 1024);
|
||||
println!(
|
||||
"\n Package: {} ({} MB)",
|
||||
tarball.display(),
|
||||
tsize / 1024 / 1024
|
||||
);
|
||||
|
||||
// Sanity check: warn if any sql file is suspiciously large
|
||||
println!(" Checking sql/ file sizes...");
|
||||
@@ -564,33 +658,55 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
|
||||
let sz = fs::metadata(&path)?.len() as f64 / 1024.0 / 1024.0;
|
||||
let name = path.file_stem().and_then(|s| s.to_str()).unwrap_or("?");
|
||||
match name {
|
||||
"dev_videos" | "master" if sz > 1.0 =>
|
||||
println!(" ⚠️ {} is {} MB, expected < 1 MB", name, sz as u64),
|
||||
"dev_chunk" if sz > 2.0 =>
|
||||
println!(" ⚠️ {} is {} MB, expected < 2 MB for ~2.4K chunks", name, sz as u64),
|
||||
"dev_identities" if sz > 1.0 =>
|
||||
println!(" ⚠️ {} is {} MB, expected < 1 MB for ~428 identities", name, sz as u64),
|
||||
"dev_identity_bindings" if sz > 5.0 =>
|
||||
println!(" ⚠️ {} is {} MB, expected < 5 MB for ~7.6K bindings", name, sz as u64),
|
||||
"dev_tkg_nodes" if sz > 10.0 =>
|
||||
println!(" ⚠️ {} is {} MB, expected < 10 MB for ~6.4K nodes", name, sz as u64),
|
||||
"dev_tkg_edges" if sz > 20.0 =>
|
||||
println!(" ⚠️ {} is {} MB, expected < 20 MB for ~21K edges", name, sz as u64),
|
||||
"dev_face_detections" if sz > 1000.0 =>
|
||||
println!(" ⚠️ {} is {} MB, expected < 1000 MB for ~70K faces (512D emb)", name, sz as u64),
|
||||
"dev_chunk_vectors" if sz > 200.0 =>
|
||||
println!(" ⚠️ {} is {} MB, expected < 200 MB for ~2.4K chunks (768D emb)", name, sz as u64),
|
||||
"dev_videos" | "master" if sz > 1.0 => {
|
||||
println!(" ⚠️ {} is {} MB, expected < 1 MB", name, sz as u64)
|
||||
}
|
||||
"dev_chunk" if sz > 2.0 => println!(
|
||||
" ⚠️ {} is {} MB, expected < 2 MB for ~2.4K chunks",
|
||||
name, sz as u64
|
||||
),
|
||||
"dev_identities" if sz > 1.0 => println!(
|
||||
" ⚠️ {} is {} MB, expected < 1 MB for ~428 identities",
|
||||
name, sz as u64
|
||||
),
|
||||
"dev_identity_bindings" if sz > 5.0 => println!(
|
||||
" ⚠️ {} is {} MB, expected < 5 MB for ~7.6K bindings",
|
||||
name, sz as u64
|
||||
),
|
||||
"dev_tkg_nodes" if sz > 10.0 => println!(
|
||||
" ⚠️ {} is {} MB, expected < 10 MB for ~6.4K nodes",
|
||||
name, sz as u64
|
||||
),
|
||||
"dev_tkg_edges" if sz > 20.0 => println!(
|
||||
" ⚠️ {} is {} MB, expected < 20 MB for ~21K edges",
|
||||
name, sz as u64
|
||||
),
|
||||
"dev_face_detections" if sz > 1000.0 => println!(
|
||||
" ⚠️ {} is {} MB, expected < 1000 MB for ~70K faces (512D emb)",
|
||||
name, sz as u64
|
||||
),
|
||||
"dev_chunk_vectors" if sz > 200.0 => println!(
|
||||
" ⚠️ {} is {} MB, expected < 200 MB for ~2.4K chunks (768D emb)",
|
||||
name, sz as u64
|
||||
),
|
||||
_ => {}
|
||||
}
|
||||
if sz > 2000.0 {
|
||||
println!(" ⚠️ {} is {:.0} MB — unusually large, verify query", name, sz);
|
||||
println!(
|
||||
" ⚠️ {} is {:.0} MB — unusually large, verify query",
|
||||
name, sz
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_visualize_offline(sqlite_path: &str, output: Option<&str>, identity: Option<i64>) -> Result<()> {
|
||||
fn cmd_visualize_offline(
|
||||
sqlite_path: &str,
|
||||
output: Option<&str>,
|
||||
identity: Option<i64>,
|
||||
) -> Result<()> {
|
||||
let outpath = match output {
|
||||
Some(p) => p.to_string(),
|
||||
None => sqlite_path.replace(".sqlite", "_report.html"),
|
||||
@@ -606,7 +722,10 @@ fn cmd_visualize_offline(sqlite_path: &str, output: Option<&str>, identity: Opti
|
||||
.output()
|
||||
.context("Offline report script failed")?;
|
||||
if !output.status.success() {
|
||||
anyhow::bail!("Offline report: {}", String::from_utf8_lossy(&output.stderr));
|
||||
anyhow::bail!(
|
||||
"Offline report: {}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
}
|
||||
println!("{}", String::from_utf8_lossy(&output.stdout));
|
||||
println!("\n Open: {}", outpath);
|
||||
@@ -624,7 +743,10 @@ fn cmd_visualize(uuid: &str, typ: &str, output: Option<&str>, identity: Option<i
|
||||
match typ {
|
||||
"heatmap" | "density" => generate_face_heatmap(uuid, &outpath, identity)?,
|
||||
"timeline" => generate_face_timeline(uuid, &outpath, identity)?,
|
||||
_ => anyhow::bail!("Unknown visualization type: {}. Try: heatmap, density, timeline", typ),
|
||||
_ => anyhow::bail!(
|
||||
"Unknown visualization type: {}. Try: heatmap, density, timeline",
|
||||
typ
|
||||
),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -698,16 +820,28 @@ fn cmd_stats() -> Result<()> {
|
||||
|
||||
for line in listing.lines() {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() || trimmed.ends_with('/') { continue; }
|
||||
if trimmed.is_empty() || trimmed.ends_with('/') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// tar -tvzf format: perms link owner group size date_month date_day time path...
|
||||
// Fields are space-separated; size is 5th field, path starts at 8th field
|
||||
let parts: Vec<&str> = trimmed.split_whitespace().collect();
|
||||
if parts.len() < 8 { continue; }
|
||||
if parts.len() < 8 {
|
||||
continue;
|
||||
}
|
||||
let fsize = parts[4].parse::<u64>().unwrap_or(0);
|
||||
let fpath = parts[8..].join(" ");
|
||||
let fname = Path::new(&fpath).file_name().unwrap_or_default().to_str().unwrap_or("?");
|
||||
let ext = Path::new(&fpath).extension().unwrap_or_default().to_str().unwrap_or("");
|
||||
let fname = Path::new(&fpath)
|
||||
.file_name()
|
||||
.unwrap_or_default()
|
||||
.to_str()
|
||||
.unwrap_or("?");
|
||||
let ext = Path::new(&fpath)
|
||||
.extension()
|
||||
.unwrap_or_default()
|
||||
.to_str()
|
||||
.unwrap_or("");
|
||||
|
||||
match ext {
|
||||
"sql" => {
|
||||
@@ -732,10 +866,26 @@ fn cmd_stats() -> Result<()> {
|
||||
}
|
||||
|
||||
println!(" ─────────────────────────────");
|
||||
println!(" SQL: {} files, {:.0} MB", sql_count, total_sql as f64 / 1048576.0);
|
||||
println!(" Video: {} files, {:.0} MB", video_count, total_video as f64 / 1048576.0);
|
||||
println!(" JSON: {} files, {:.0} MB", json_count, total_json as f64 / 1048576.0);
|
||||
println!(" Total: {:.0} MB (compressed: {:.0} MB)", (total_sql + total_video + total_json) as f64 / 1048576.0, pkg_size as f64 / 1048576.0);
|
||||
println!(
|
||||
" SQL: {} files, {:.0} MB",
|
||||
sql_count,
|
||||
total_sql as f64 / 1048576.0
|
||||
);
|
||||
println!(
|
||||
" Video: {} files, {:.0} MB",
|
||||
video_count,
|
||||
total_video as f64 / 1048576.0
|
||||
);
|
||||
println!(
|
||||
" JSON: {} files, {:.0} MB",
|
||||
json_count,
|
||||
total_json as f64 / 1048576.0
|
||||
);
|
||||
println!(
|
||||
" Total: {:.0} MB (compressed: {:.0} MB)",
|
||||
(total_sql + total_video + total_json) as f64 / 1048576.0,
|
||||
pkg_size as f64 / 1048576.0
|
||||
);
|
||||
println!();
|
||||
}
|
||||
|
||||
@@ -758,8 +908,17 @@ async fn main() -> Result<()> {
|
||||
Commands::List => cmd_list(&db).await?,
|
||||
Commands::Package { uuid } => cmd_package(&db, &uuid).await?,
|
||||
Commands::Stats => cmd_stats()?,
|
||||
Commands::Visualize { uuid, typ, output, identity } => cmd_visualize(&uuid, &typ, output.as_deref(), identity)?,
|
||||
Commands::VisualizeOffline { sqlite_path, output, identity } => cmd_visualize_offline(&sqlite_path, output.as_deref(), identity)?,
|
||||
Commands::Visualize {
|
||||
uuid,
|
||||
typ,
|
||||
output,
|
||||
identity,
|
||||
} => cmd_visualize(&uuid, &typ, output.as_deref(), identity)?,
|
||||
Commands::VisualizeOffline {
|
||||
sqlite_path,
|
||||
output,
|
||||
identity,
|
||||
} => cmd_visualize_offline(&sqlite_path, output.as_deref(), identity)?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -16,7 +16,10 @@ const LOG_DIR: &str = "/Users/accusys/service_logs";
|
||||
const LAUNCH_DIR: &str = "/Users/accusys/Library/LaunchAgents";
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "service", about = "Service Lifecycle Manager — source → build → install → config → launch → env")]
|
||||
#[command(
|
||||
name = "service",
|
||||
about = "Service Lifecycle Manager — source → build → install → config → launch → env"
|
||||
)]
|
||||
struct Cli {
|
||||
#[command(subcommand)]
|
||||
command: Commands,
|
||||
@@ -111,22 +114,54 @@ fn cmd_source_list() -> Result<()> {
|
||||
("pyenv", "pyenv/", "git repo"),
|
||||
("cmake", "cmake-4.2.0-macos-universal.tar.gz", "binary"),
|
||||
("llama.cpp", "llama.cpp/", "git repo"),
|
||||
("libreoffice (src)", "libreoffice-26.2.3.2.tar.xz", "source tarball"),
|
||||
("libreoffice (dmg)", "LibreOffice_26.2.3_MacOS_aarch64.dmg", "binary (TDF)"),
|
||||
("mermaid-cli", "mermaid-js-mermaid-cli-11.14.0.tgz", "npm package"),
|
||||
(
|
||||
"libreoffice (src)",
|
||||
"libreoffice-26.2.3.2.tar.xz",
|
||||
"source tarball",
|
||||
),
|
||||
(
|
||||
"libreoffice (dmg)",
|
||||
"LibreOffice_26.2.3_MacOS_aarch64.dmg",
|
||||
"binary (TDF)",
|
||||
),
|
||||
(
|
||||
"mermaid-cli",
|
||||
"mermaid-js-mermaid-cli-11.14.0.tgz",
|
||||
"npm package",
|
||||
),
|
||||
("librsvg", "librsvg/", "Rust source"),
|
||||
("GroundingDINO", "GroundingDINO/", "git repo (IDEA-Research)"),
|
||||
(
|
||||
"GroundingDINO",
|
||||
"GroundingDINO/",
|
||||
"git repo (IDEA-Research)",
|
||||
),
|
||||
("PaliGemma", "paligemma/", "HuggingFace reference"),
|
||||
("Odoo 19 CE", "odoo/", "git repo (LGPL-3.0)"),
|
||||
("ERPNext v15", "erpnext/", "git repo (GPL-3.0)"),
|
||||
("Frappe Framework", "frappe/", "git repo (MIT)"),
|
||||
("Gitea v1.25", "gitea/", "git repo (MIT, Go)"),
|
||||
("Go v1.26", "go/", "git repo (BSD)"),
|
||||
("Rust/Cargo", "rustc-1.92.0-src.tar.xz", "source tarball (Apache 2.0 / MIT)"),
|
||||
("rustup", "rustup-1.28.1.tar.gz", "source tarball (Apache 2.0)"),
|
||||
("Swift v6.3", "swift-6.3.1-RELEASE.tar.gz", "source tarball (Apache 2.0)"),
|
||||
(
|
||||
"Rust/Cargo",
|
||||
"rustc-1.92.0-src.tar.xz",
|
||||
"source tarball (Apache 2.0 / MIT)",
|
||||
),
|
||||
(
|
||||
"rustup",
|
||||
"rustup-1.28.1.tar.gz",
|
||||
"source tarball (Apache 2.0)",
|
||||
),
|
||||
(
|
||||
"Swift v6.3",
|
||||
"swift-6.3.1-RELEASE.tar.gz",
|
||||
"source tarball (Apache 2.0)",
|
||||
),
|
||||
("yt-dlp", "yt-dlp/", "git repo (Unlicense)"),
|
||||
("SQLite", "sqlite-amalgamation-3490100.zip", "amalgamation (Public Domain)"),
|
||||
(
|
||||
"SQLite",
|
||||
"sqlite-amalgamation-3490100.zip",
|
||||
"amalgamation (Public Domain)",
|
||||
),
|
||||
("sqlite-vec", "sqlite-vec/", "git repo (MIT)"),
|
||||
];
|
||||
|
||||
@@ -164,7 +199,11 @@ fn cmd_source_verify() -> Result<()> {
|
||||
("cmake", "cmake-4.2.0-macos-universal.tar.gz", false),
|
||||
("llama.cpp", "llama.cpp/", true),
|
||||
("libreoffice (src)", "libreoffice-26.2.3.2.tar.xz", false),
|
||||
("libreoffice (dmg)", "LibreOffice_26.2.3_MacOS_aarch64.dmg", false),
|
||||
(
|
||||
"libreoffice (dmg)",
|
||||
"LibreOffice_26.2.3_MacOS_aarch64.dmg",
|
||||
false,
|
||||
),
|
||||
("mermaid-cli", "mermaid-js-mermaid-cli-11.14.0.tgz", false),
|
||||
("librsvg", "librsvg/", true),
|
||||
("GroundingDINO", "GroundingDINO/", true),
|
||||
@@ -186,7 +225,11 @@ fn cmd_source_verify() -> Result<()> {
|
||||
let mut missing = 0;
|
||||
for (name, path, is_dir) in &checks {
|
||||
let full = src_dir.join(path);
|
||||
let exists = if *is_dir { full.is_dir() } else { full.is_file() };
|
||||
let exists = if *is_dir {
|
||||
full.is_dir()
|
||||
} else {
|
||||
full.is_file()
|
||||
};
|
||||
if exists {
|
||||
println!(" ✅ {}", name);
|
||||
ok += 1;
|
||||
@@ -202,7 +245,10 @@ fn cmd_source_verify() -> Result<()> {
|
||||
// ---- Build ----
|
||||
|
||||
fn cmd_build(service: &str) -> Result<()> {
|
||||
let install_sh = Path::new(SERVICE_SRC).parent().unwrap().join("install_services.sh");
|
||||
let install_sh = Path::new(SERVICE_SRC)
|
||||
.parent()
|
||||
.unwrap()
|
||||
.join("install_services.sh");
|
||||
|
||||
if service == "all" {
|
||||
// Run the full install script
|
||||
@@ -224,8 +270,14 @@ fn cmd_build(service: &str) -> Result<()> {
|
||||
"ffmpeg" => {
|
||||
println!("Building ffmpeg (requires x264 + freetype)...");
|
||||
// Simplified: run the install script which handles incremental builds
|
||||
let status = Command::new("bash").arg(&install_sh).env("PREFIX", PREFIX).env("SRC_DIR", SERVICE_SRC).status()?;
|
||||
if !status.success() { anyhow::bail!("Build failed"); }
|
||||
let status = Command::new("bash")
|
||||
.arg(&install_sh)
|
||||
.env("PREFIX", PREFIX)
|
||||
.env("SRC_DIR", SERVICE_SRC)
|
||||
.status()?;
|
||||
if !status.success() {
|
||||
anyhow::bail!("Build failed");
|
||||
}
|
||||
}
|
||||
"redis" => {
|
||||
let src = format!("{}/redis-7.4.3.tar.gz", SERVICE_SRC);
|
||||
@@ -236,37 +288,67 @@ fn cmd_build(service: &str) -> Result<()> {
|
||||
run_build("postgresql", &src, &format!("cd /tmp && tar xzf {} && cd postgresql-18.3 && ./configure --prefix={}/pgsql/18.3 && make -j$(sysctl -n hw.ncpu) && make install", src, PREFIX))?;
|
||||
}
|
||||
"llama" => {
|
||||
println!("Building llama.cpp from {}...", format!("{}/llama.cpp", SERVICE_SRC));
|
||||
println!(
|
||||
"Building llama.cpp from {}...",
|
||||
format!("{}/llama.cpp", SERVICE_SRC)
|
||||
);
|
||||
let status = Command::new("cmake")
|
||||
.args(["-B", "build", "-DCMAKE_INSTALL_PREFIX=/tmp/llama_install"])
|
||||
.current_dir(format!("{}/llama.cpp", SERVICE_SRC))
|
||||
.status()?;
|
||||
if !status.success() { anyhow::bail!("cmake failed"); }
|
||||
let status = Command::new("cmake").args(["--build", "build", "--config", "Release", "-j"]).current_dir(format!("{}/llama.cpp", SERVICE_SRC)).status()?;
|
||||
if !status.success() { anyhow::bail!("build failed"); }
|
||||
if !status.success() {
|
||||
anyhow::bail!("cmake failed");
|
||||
}
|
||||
let status = Command::new("cmake")
|
||||
.args(["--build", "build", "--config", "Release", "-j"])
|
||||
.current_dir(format!("{}/llama.cpp", SERVICE_SRC))
|
||||
.status()?;
|
||||
if !status.success() {
|
||||
anyhow::bail!("build failed");
|
||||
}
|
||||
}
|
||||
"libreoffice" => {
|
||||
let dmg = format!("{}/LibreOffice_26.2.3_MacOS_aarch64.dmg", SERVICE_SRC);
|
||||
let mount = "/tmp/lo_mount";
|
||||
println!("Extracting LibreOffice from DMG...");
|
||||
// Mount
|
||||
let status = Command::new("hdiutil").args(["attach", &dmg, "-nobrowse", "-quiet", "-mountpoint", mount]).status()?;
|
||||
if !status.success() { anyhow::bail!("DMG mount failed"); }
|
||||
let status = Command::new("hdiutil")
|
||||
.args(["attach", &dmg, "-nobrowse", "-quiet", "-mountpoint", mount])
|
||||
.status()?;
|
||||
if !status.success() {
|
||||
anyhow::bail!("DMG mount failed");
|
||||
}
|
||||
// Copy app
|
||||
let lo_dir = format!("{}/libreoffice", PREFIX);
|
||||
let _ = std::fs::remove_dir_all(format!("{}/LibreOffice.app", lo_dir));
|
||||
std::fs::create_dir_all(&lo_dir)?;
|
||||
let status = Command::new("cp").args(["-R", &format!("{}/LibreOffice.app", mount), &format!("{}/LibreOffice.app", lo_dir)]).status()?;
|
||||
if !status.success() { anyhow::bail!("Copy failed"); }
|
||||
let status = Command::new("cp")
|
||||
.args([
|
||||
"-R",
|
||||
&format!("{}/LibreOffice.app", mount),
|
||||
&format!("{}/LibreOffice.app", lo_dir),
|
||||
])
|
||||
.status()?;
|
||||
if !status.success() {
|
||||
anyhow::bail!("Copy failed");
|
||||
}
|
||||
// Create symlink
|
||||
std::fs::create_dir_all(format!("{}/bin", lo_dir))?;
|
||||
let _ = std::fs::remove_file(format!("{}/bin/soffice", lo_dir));
|
||||
std::os::unix::fs::symlink("../LibreOffice.app/Contents/MacOS/soffice", format!("{}/bin/soffice", lo_dir))?;
|
||||
std::os::unix::fs::symlink(
|
||||
"../LibreOffice.app/Contents/MacOS/soffice",
|
||||
format!("{}/bin/soffice", lo_dir),
|
||||
)?;
|
||||
// Unmount
|
||||
let _ = Command::new("hdiutil").args(["detach", mount, "-quiet"]).status();
|
||||
let _ = Command::new("hdiutil")
|
||||
.args(["detach", mount, "-quiet"])
|
||||
.status();
|
||||
println!(" libreoffice installed to {}/bin/soffice", lo_dir);
|
||||
}
|
||||
_ => anyhow::bail!("Unknown service: {}. Try: all, ffmpeg, redis, postgres, llama, libreoffice, python", service),
|
||||
_ => anyhow::bail!(
|
||||
"Unknown service: {}. Try: all, ffmpeg, redis, postgres, llama, libreoffice, python",
|
||||
service
|
||||
),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -274,7 +356,9 @@ fn cmd_build(service: &str) -> Result<()> {
|
||||
fn run_build(name: &str, src: &str, cmd: &str) -> Result<()> {
|
||||
println!("Building {} from {}...", name, src);
|
||||
let status = Command::new("bash").arg("-c").arg(cmd).status()?;
|
||||
if !status.success() { anyhow::bail!("{} build failed", name); }
|
||||
if !status.success() {
|
||||
anyhow::bail!("{} build failed", name);
|
||||
}
|
||||
println!(" {} build complete", name);
|
||||
Ok(())
|
||||
}
|
||||
@@ -292,7 +376,10 @@ fn cmd_install(service: &str) -> Result<()> {
|
||||
let rsvg_src = format!("{}/librsvg/bin/rsvg-convert", PREFIX);
|
||||
let gitea_src = format!("{}/gitea/bin/gitea", PREFIX);
|
||||
let go_src = format!("{}/go/bin/go", PREFIX);
|
||||
let rustc_src = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc", PREFIX);
|
||||
let rustc_src = format!(
|
||||
"{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc",
|
||||
PREFIX
|
||||
);
|
||||
let swift_src = "/usr/bin/swift".to_string();
|
||||
let ytdlp_src = "/opt/homebrew/bin/yt-dlp".to_string();
|
||||
|
||||
@@ -313,7 +400,9 @@ fn cmd_install(service: &str) -> Result<()> {
|
||||
];
|
||||
|
||||
for (name, src) in &installs {
|
||||
if service != "all" && service != *name { continue; }
|
||||
if service != "all" && service != *name {
|
||||
continue;
|
||||
}
|
||||
if Path::new(src).exists() {
|
||||
println!(" ✅ {} installed: {}", name, src);
|
||||
} else {
|
||||
@@ -370,12 +459,18 @@ fn cmd_config(service: &str) -> Result<()> {
|
||||
println!("MOMENTRY_LLM_SUMMARY_URL=http://localhost:8082/v1/chat/completions");
|
||||
println!("MOMENTRY_OUTPUT_DIR={}/momentry/output_dev", PREFIX);
|
||||
println!("MOMENTRY_SCRIPTS_DIR={}/momentry_core_0.1/scripts", PREFIX);
|
||||
println!("MOMENTRY_PYTHON_PATH={}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX);
|
||||
println!(
|
||||
"MOMENTRY_PYTHON_PATH={}/.pyenv/versions/3.11.15/bin/python3.11",
|
||||
PREFIX
|
||||
);
|
||||
}
|
||||
|
||||
if service == "all" || service == "embedding" {
|
||||
println!("\n--- Embedding Server config ---");
|
||||
println!("# Start: {} embeddinggemma_server.py --port 11436", format!("{}/momentry_core_0.1/scripts", PREFIX));
|
||||
println!(
|
||||
"# Start: {} embeddinggemma_server.py --port 11436",
|
||||
format!("{}/momentry_core_0.1/scripts", PREFIX)
|
||||
);
|
||||
println!("MODEL=google/embeddinggemma-300m");
|
||||
println!("PORT=11436");
|
||||
println!("DEVICE=mps");
|
||||
@@ -393,25 +488,58 @@ fn cmd_launch_generate() -> Result<()> {
|
||||
let pg_args = format!("-D {}/pgsql/18.3/data", PREFIX);
|
||||
let redis_bin = format!("{}/redis/bin/redis-server", PREFIX);
|
||||
let redis_args = format!("{}/redis/redis.conf", PREFIX);
|
||||
let qdrant_bin = format!("{}/momentry_core_0.1/services/qdrant/target/release/qdrant", PREFIX);
|
||||
let qdrant_bin = format!(
|
||||
"{}/momentry_core_0.1/services/qdrant/target/release/qdrant",
|
||||
PREFIX
|
||||
);
|
||||
let embed_bin = format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX);
|
||||
let embed_args = format!("{}/momentry_core_0.1/scripts/embeddinggemma_server.py --port 11436", PREFIX);
|
||||
let embed_args = format!(
|
||||
"{}/momentry_core_0.1/scripts/embeddinggemma_server.py --port 11436",
|
||||
PREFIX
|
||||
);
|
||||
let llama_bin = format!("{}/llama/bin/llama-server", PREFIX);
|
||||
let llama_args = format!("-m {}/models/google_gemma-4-26B-A4B-it-Q5_K_M.gguf --port 8082 -ngl 99 -c 16384", PREFIX);
|
||||
let play_bin = format!("{}/momentry_core_0.1/target/debug/momentry_playground", PREFIX);
|
||||
let llama_args = format!(
|
||||
"-m {}/models/google_gemma-4-26B-A4B-it-Q5_K_M.gguf --port 8082 -ngl 99 -c 16384",
|
||||
PREFIX
|
||||
);
|
||||
let play_bin = format!(
|
||||
"{}/momentry_core_0.1/target/debug/momentry_playground",
|
||||
PREFIX
|
||||
);
|
||||
|
||||
let services: Vec<(&str, &str, &str, &str)> = vec![
|
||||
("com.momentry.postgres", &pg_bin, &pg_args, "PostgreSQL"),
|
||||
("com.momentry.redis", &redis_bin, &redis_args, "Redis"),
|
||||
("com.momentry.qdrant", &qdrant_bin, "", "Qdrant"),
|
||||
("com.momentry.embedding", &embed_bin, &embed_args, "EmbeddingGemma"),
|
||||
("com.momentry.llama", &llama_bin, &llama_args, "LLM (llama.cpp)"),
|
||||
("com.momentry.playground", &play_bin, "server --port 3003", "Momentry Playground"),
|
||||
("com.momentry.worker", &play_bin, "worker --max-concurrent 2 --poll-interval 5", "Momentry Worker"),
|
||||
("com.momentry.postgres", &pg_bin, &pg_args, "PostgreSQL"),
|
||||
("com.momentry.redis", &redis_bin, &redis_args, "Redis"),
|
||||
("com.momentry.qdrant", &qdrant_bin, "", "Qdrant"),
|
||||
(
|
||||
"com.momentry.embedding",
|
||||
&embed_bin,
|
||||
&embed_args,
|
||||
"EmbeddingGemma",
|
||||
),
|
||||
(
|
||||
"com.momentry.llama",
|
||||
&llama_bin,
|
||||
&llama_args,
|
||||
"LLM (llama.cpp)",
|
||||
),
|
||||
(
|
||||
"com.momentry.playground",
|
||||
&play_bin,
|
||||
"server --port 3003",
|
||||
"Momentry Playground",
|
||||
),
|
||||
(
|
||||
"com.momentry.worker",
|
||||
&play_bin,
|
||||
"worker --max-concurrent 2 --poll-interval 5",
|
||||
"Momentry Worker",
|
||||
),
|
||||
];
|
||||
|
||||
for (label, bin, args, _desc) in &services {
|
||||
let plist = format!(r#"<?xml version="1.0" encoding="UTF-8"?>
|
||||
let plist = format!(
|
||||
r#"<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
@@ -451,7 +579,11 @@ fn cmd_launch_generate() -> Result<()> {
|
||||
fs::write(&plist_path, plist)?;
|
||||
println!(" 📝 {} → {:?}", label, plist_path.file_name().unwrap());
|
||||
}
|
||||
println!("\n Generated {} plist files in {}", services.len(), LAUNCH_DIR);
|
||||
println!(
|
||||
"\n Generated {} plist files in {}",
|
||||
services.len(),
|
||||
LAUNCH_DIR
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -461,7 +593,9 @@ fn cmd_launch_load() -> Result<()> {
|
||||
let path = entry.path();
|
||||
if path.extension().map_or(false, |e| e == "plist") {
|
||||
let name = path.file_stem().unwrap().to_str().unwrap_or("?");
|
||||
let status = Command::new("launchctl").args(["load", "-w", path.to_str().unwrap()]).status();
|
||||
let status = Command::new("launchctl")
|
||||
.args(["load", "-w", path.to_str().unwrap()])
|
||||
.status();
|
||||
match status {
|
||||
Ok(s) if s.success() => println!(" ✅ loaded: {}", name),
|
||||
Ok(_) => println!(" ⚠️ load failed: {}", name),
|
||||
@@ -478,7 +612,9 @@ fn cmd_launch_unload() -> Result<()> {
|
||||
let path = entry.path();
|
||||
if path.extension().map_or(false, |e| e == "plist") {
|
||||
let name = path.file_stem().unwrap().to_str().unwrap_or("?");
|
||||
let status = Command::new("launchctl").args(["unload", path.to_str().unwrap()]).status();
|
||||
let status = Command::new("launchctl")
|
||||
.args(["unload", path.to_str().unwrap()])
|
||||
.status();
|
||||
match status {
|
||||
Ok(s) if s.success() => println!(" ✅ unloaded: {}", name),
|
||||
Ok(_) => println!(" ⚠️ unload failed: {}", name),
|
||||
@@ -504,7 +640,11 @@ fn cmd_launch_status() -> Result<()> {
|
||||
Ok(o) if o.status.success() => {
|
||||
let stdout = String::from_utf8_lossy(&o.stdout);
|
||||
if stdout.contains("PID") || stdout.lines().count() > 1 {
|
||||
let pid = stdout.lines().nth(1).and_then(|l| l.split_whitespace().next()).unwrap_or("-");
|
||||
let pid = stdout
|
||||
.lines()
|
||||
.nth(1)
|
||||
.and_then(|l| l.split_whitespace().next())
|
||||
.unwrap_or("-");
|
||||
println!(" 🟢 {} (PID: {})", label, pid);
|
||||
} else {
|
||||
println!(" ⚪ {} (not running)", label);
|
||||
@@ -519,7 +659,8 @@ fn cmd_launch_status() -> Result<()> {
|
||||
// ---- Env ----
|
||||
|
||||
fn cmd_env(output: &Option<String>) -> Result<()> {
|
||||
let env_content = format!(r#"# Momentry Core — Environment Configuration
|
||||
let env_content = format!(
|
||||
r#"# Momentry Core — Environment Configuration
|
||||
# Generated: {}
|
||||
# Service: {} env
|
||||
|
||||
@@ -601,8 +742,14 @@ fn cmd_test() -> Result<()> {
|
||||
let rsvg_bin = format!("{}/librsvg/bin/rsvg-convert", PREFIX);
|
||||
let gitea_bin = format!("{}/gitea/bin/gitea", PREFIX);
|
||||
let go_bin = format!("{}/go/bin/go", PREFIX);
|
||||
let rustc_bin = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc", PREFIX);
|
||||
let cargo_bin = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/cargo", PREFIX);
|
||||
let rustc_bin = format!(
|
||||
"{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc",
|
||||
PREFIX
|
||||
);
|
||||
let cargo_bin = format!(
|
||||
"{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/cargo",
|
||||
PREFIX
|
||||
);
|
||||
let swift_bin = "/usr/bin/swift".to_string();
|
||||
let ytdlp_bin = "/opt/homebrew/bin/yt-dlp".to_string();
|
||||
|
||||
@@ -641,7 +788,11 @@ fn cmd_test() -> Result<()> {
|
||||
let output = Command::new(bin).args(args).output();
|
||||
match output {
|
||||
Ok(o) if o.status.success() => {
|
||||
let ver = String::from_utf8_lossy(&o.stdout).lines().next().unwrap_or("?").to_string();
|
||||
let ver = String::from_utf8_lossy(&o.stdout)
|
||||
.lines()
|
||||
.next()
|
||||
.unwrap_or("?")
|
||||
.to_string();
|
||||
println!("✅ {}", ver.chars().take(70).collect::<String>());
|
||||
pass += 1;
|
||||
}
|
||||
@@ -666,14 +817,87 @@ fn cmd_test() -> Result<()> {
|
||||
// Functional tests
|
||||
println!("\n--- Functional Tests ---");
|
||||
// Create test docx for libreoffice test
|
||||
let _ = std::fs::write("/tmp/svc_test_func.docx", "Service test document for LibreOffice conversion");
|
||||
let _ = std::fs::write(
|
||||
"/tmp/svc_test_func.docx",
|
||||
"Service test document for LibreOffice conversion",
|
||||
);
|
||||
let func_tests = [
|
||||
("ffprobe probe", "ffprobe", vec!["-v", "error", "-show_entries", "format=duration", "-of", "csv=p=0", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4"]),
|
||||
("ffmpeg audio extract", "ffmpeg", vec!["-y", "-v", "quiet", "-i", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", "-t", "2", "-ar", "16000", "-ac", "1", "/tmp/svc_test_audio.wav"]),
|
||||
("ffmpeg frame extract", "ffmpeg", vec!["-y", "-v", "quiet", "-i", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", "-ss", "100", "-vframes", "1", "/tmp/svc_test_frame.jpg"]),
|
||||
("libreoffice doc→txt", "libreoffice", vec!["--headless", "--convert-to", "txt", "/tmp/svc_test_func.docx", "--outdir", "/tmp/"]),
|
||||
("rsvg-convert svg→png", "rsvg-convert", vec!["-o", "/tmp/svc_test_rsvg.png", "/tmp/test_rsvg.svg"]),
|
||||
("mmdc mermaid→png", "mermaid-cli", vec!["-i", "/tmp/test_mermaid.mmd", "-o", "/tmp/svc_test_mmd.png", "-w", "200"]),
|
||||
(
|
||||
"ffprobe probe",
|
||||
"ffprobe",
|
||||
vec![
|
||||
"-v",
|
||||
"error",
|
||||
"-show_entries",
|
||||
"format=duration",
|
||||
"-of",
|
||||
"csv=p=0",
|
||||
"/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4",
|
||||
],
|
||||
),
|
||||
(
|
||||
"ffmpeg audio extract",
|
||||
"ffmpeg",
|
||||
vec![
|
||||
"-y",
|
||||
"-v",
|
||||
"quiet",
|
||||
"-i",
|
||||
"/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4",
|
||||
"-t",
|
||||
"2",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-ac",
|
||||
"1",
|
||||
"/tmp/svc_test_audio.wav",
|
||||
],
|
||||
),
|
||||
(
|
||||
"ffmpeg frame extract",
|
||||
"ffmpeg",
|
||||
vec![
|
||||
"-y",
|
||||
"-v",
|
||||
"quiet",
|
||||
"-i",
|
||||
"/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4",
|
||||
"-ss",
|
||||
"100",
|
||||
"-vframes",
|
||||
"1",
|
||||
"/tmp/svc_test_frame.jpg",
|
||||
],
|
||||
),
|
||||
(
|
||||
"libreoffice doc→txt",
|
||||
"libreoffice",
|
||||
vec![
|
||||
"--headless",
|
||||
"--convert-to",
|
||||
"txt",
|
||||
"/tmp/svc_test_func.docx",
|
||||
"--outdir",
|
||||
"/tmp/",
|
||||
],
|
||||
),
|
||||
(
|
||||
"rsvg-convert svg→png",
|
||||
"rsvg-convert",
|
||||
vec!["-o", "/tmp/svc_test_rsvg.png", "/tmp/test_rsvg.svg"],
|
||||
),
|
||||
(
|
||||
"mmdc mermaid→png",
|
||||
"mermaid-cli",
|
||||
vec![
|
||||
"-i",
|
||||
"/tmp/test_mermaid.mmd",
|
||||
"-o",
|
||||
"/tmp/svc_test_mmd.png",
|
||||
"-w",
|
||||
"200",
|
||||
],
|
||||
),
|
||||
];
|
||||
|
||||
for (desc, bin_name, args) in &func_tests {
|
||||
@@ -689,8 +913,14 @@ fn cmd_test() -> Result<()> {
|
||||
};
|
||||
let output = Command::new(bin).args(args).output();
|
||||
match output {
|
||||
Ok(o) if o.status.success() => { println!("✅"); pass += 1; }
|
||||
_ => { println!("❌"); fail += 1; }
|
||||
Ok(o) if o.status.success() => {
|
||||
println!("✅");
|
||||
pass += 1;
|
||||
}
|
||||
_ => {
|
||||
println!("❌");
|
||||
fail += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -706,7 +936,10 @@ fn cmd_test() -> Result<()> {
|
||||
|
||||
fn cmd_report() -> Result<()> {
|
||||
println!("=== Momentry Service Report ===");
|
||||
println!("Generated: {}", chrono::Local::now().format("%Y-%m-%d %H:%M:%S"));
|
||||
println!(
|
||||
"Generated: {}",
|
||||
chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
|
||||
);
|
||||
println!();
|
||||
|
||||
// 1. Source status
|
||||
@@ -730,13 +963,25 @@ fn cmd_report() -> Result<()> {
|
||||
println!("\n## 2. Binaries");
|
||||
let binaries = [
|
||||
("cmake", &format!("{}/bin/cmake", PREFIX)),
|
||||
("python3.11", &format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX)),
|
||||
(
|
||||
"python3.11",
|
||||
&format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX),
|
||||
),
|
||||
("ffmpeg", &format!("{}/ffmpeg_build/bin/ffmpeg", PREFIX)),
|
||||
("ffprobe", &format!("{}/ffmpeg_build/bin/ffprobe", PREFIX)),
|
||||
("redis-server", &format!("{}/redis/bin/redis-server", PREFIX)),
|
||||
(
|
||||
"redis-server",
|
||||
&format!("{}/redis/bin/redis-server", PREFIX),
|
||||
),
|
||||
("postgres", &format!("{}/pgsql/18.3/bin/postgres", PREFIX)),
|
||||
("llama-server", &format!("{}/llama/bin/llama-server", PREFIX)),
|
||||
("libreoffice", &format!("{}/libreoffice/bin/soffice", PREFIX)),
|
||||
(
|
||||
"llama-server",
|
||||
&format!("{}/llama/bin/llama-server", PREFIX),
|
||||
),
|
||||
(
|
||||
"libreoffice",
|
||||
&format!("{}/libreoffice/bin/soffice", PREFIX),
|
||||
),
|
||||
];
|
||||
for (name, path) in &binaries {
|
||||
let status = if Path::new(path).exists() {
|
||||
@@ -772,9 +1017,18 @@ fn cmd_report() -> Result<()> {
|
||||
|
||||
// 4. Ports
|
||||
println!("\n## 4. Port Status");
|
||||
let ports = [(3003, "Playground"), (5432, "PostgreSQL"), (6379, "Redis"), (6333, "Qdrant"), (8082, "LLM"), (11436, "Embedding")];
|
||||
let ports = [
|
||||
(3003, "Playground"),
|
||||
(5432, "PostgreSQL"),
|
||||
(6379, "Redis"),
|
||||
(6333, "Qdrant"),
|
||||
(8082, "LLM"),
|
||||
(11436, "Embedding"),
|
||||
];
|
||||
for (port, name) in &ports {
|
||||
let output = Command::new("lsof").args(["-i", &format!(":{}", port)]).output();
|
||||
let output = Command::new("lsof")
|
||||
.args(["-i", &format!(":{}", port)])
|
||||
.output();
|
||||
match output {
|
||||
Ok(o) if o.status.success() => println!(" 🟢 :{} ({})", port, name),
|
||||
_ => println!(" ⚪ :{} ({})", port, name),
|
||||
@@ -797,14 +1051,21 @@ fn cmd_report() -> Result<()> {
|
||||
}
|
||||
|
||||
fn format_bytes(bytes: u64) -> String {
|
||||
if bytes > 1024 * 1024 * 1024 { format!("{:.1}GB", bytes as f64 / 1_073_741_824.0) }
|
||||
else if bytes > 1024 * 1024 { format!("{:.0}MB", bytes as f64 / 1_048_576.0) }
|
||||
else if bytes > 1024 { format!("{:.0}KB", bytes as f64 / 1024.0) }
|
||||
else { format!("{}B", bytes) }
|
||||
if bytes > 1024 * 1024 * 1024 {
|
||||
format!("{:.1}GB", bytes as f64 / 1_073_741_824.0)
|
||||
} else if bytes > 1024 * 1024 {
|
||||
format!("{:.0}MB", bytes as f64 / 1_048_576.0)
|
||||
} else if bytes > 1024 {
|
||||
format!("{:.0}KB", bytes as f64 / 1024.0)
|
||||
} else {
|
||||
format!("{}B", bytes)
|
||||
}
|
||||
}
|
||||
|
||||
fn format_dir_size(path: &Path) -> String {
|
||||
let output = Command::new("du").args(["-sh", path.to_str().unwrap()]).output();
|
||||
let output = Command::new("du")
|
||||
.args(["-sh", path.to_str().unwrap()])
|
||||
.output();
|
||||
match output {
|
||||
Ok(o) if o.status.success() => {
|
||||
let s = String::from_utf8_lossy(&o.stdout);
|
||||
@@ -824,7 +1085,10 @@ async fn main() -> Result<()> {
|
||||
SourceAction::List => cmd_source_list()?,
|
||||
SourceAction::Verify => cmd_source_verify()?,
|
||||
SourceAction::Download { name } => {
|
||||
println!("Downloading: {} (use install_services.sh for full download)", name);
|
||||
println!(
|
||||
"Downloading: {} (use install_services.sh for full download)",
|
||||
name
|
||||
);
|
||||
println!("Source URLs:");
|
||||
println!(" ffmpeg: https://ffmpeg.org/releases/ffmpeg-7.1.1.tar.xz");
|
||||
println!(" redis: https://download.redis.io/releases/redis-7.4.3.tar.gz");
|
||||
|
||||
@@ -75,15 +75,13 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
|
||||
// Query chunks table for Rule 1 sentence chunks
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let rule1_rows: Vec<(String,)> = sqlx::query_as(
|
||||
&format!(
|
||||
"SELECT chunk_id FROM {} \
|
||||
let rule1_rows: Vec<(String,)> = sqlx::query_as(&format!(
|
||||
"SELECT chunk_id FROM {} \
|
||||
WHERE file_uuid = $1 AND chunk_type = 'sentence' \
|
||||
AND start_frame >= $2 \
|
||||
AND end_frame <= $3",
|
||||
chunk_table
|
||||
),
|
||||
)
|
||||
chunk_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(scene.start_frame as i64)
|
||||
.bind(scene.end_frame as i64)
|
||||
@@ -101,16 +99,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
// Let's re-query text directly.
|
||||
}
|
||||
|
||||
let texts: Vec<String> = sqlx::query_scalar(
|
||||
&format!(
|
||||
"SELECT text_content FROM {} \
|
||||
let texts: Vec<String> = sqlx::query_scalar(&format!(
|
||||
"SELECT text_content FROM {} \
|
||||
WHERE file_uuid = $1 AND chunk_type = 'sentence' \
|
||||
AND start_frame >= $2 \
|
||||
AND end_frame <= $3 \
|
||||
ORDER BY start_frame ASC",
|
||||
chunk_table
|
||||
),
|
||||
)
|
||||
chunk_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(scene.start_frame as i64)
|
||||
.bind(scene.end_frame as i64)
|
||||
@@ -154,16 +150,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
"scene_number": scene.scene_number
|
||||
});
|
||||
|
||||
sqlx::query(
|
||||
&format!(
|
||||
"INSERT INTO {} (file_uuid, chunk_id, chunk_type, \
|
||||
sqlx::query(&format!(
|
||||
"INSERT INTO {} (file_uuid, chunk_id, chunk_type, \
|
||||
start_time, end_time, fps, start_frame, end_frame, \
|
||||
content, text_content, summary_text, metadata, child_chunk_ids) \
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) \
|
||||
ON CONFLICT (file_uuid, chunk_id) DO NOTHING",
|
||||
chunk_table
|
||||
),
|
||||
)
|
||||
chunk_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&chunk_id)
|
||||
.bind(scene.scene_number as i32)
|
||||
|
||||
@@ -20,8 +20,7 @@ pub fn set_cache_enabled(enabled: bool) {
|
||||
}
|
||||
|
||||
// Switch 1: watcher detects new file → auto-register
|
||||
pub static RUNTIME_WATCHER_AUTO_REGISTER: Lazy<RwLock<bool>> =
|
||||
Lazy::new(|| RwLock::new(false));
|
||||
pub static RUNTIME_WATCHER_AUTO_REGISTER: Lazy<RwLock<bool>> = Lazy::new(|| RwLock::new(false));
|
||||
|
||||
pub fn get_watcher_auto_register() -> bool {
|
||||
*RUNTIME_WATCHER_AUTO_REGISTER.read().unwrap()
|
||||
@@ -33,8 +32,7 @@ pub fn set_watcher_auto_register(enabled: bool) {
|
||||
}
|
||||
|
||||
// Switch 2: register → auto-trigger processing pipeline
|
||||
pub static RUNTIME_AUTO_PIPELINE_ENABLED: Lazy<RwLock<bool>> =
|
||||
Lazy::new(|| RwLock::new(false));
|
||||
pub static RUNTIME_AUTO_PIPELINE_ENABLED: Lazy<RwLock<bool>> = Lazy::new(|| RwLock::new(false));
|
||||
|
||||
pub fn get_auto_pipeline_enabled() -> bool {
|
||||
*RUNTIME_AUTO_PIPELINE_ENABLED.read().unwrap()
|
||||
@@ -107,6 +105,30 @@ pub static REDIS_KEY_PREFIX: Lazy<String> =
|
||||
pub static DATABASE_SCHEMA: Lazy<String> =
|
||||
Lazy::new(|| env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "public".to_string()));
|
||||
|
||||
pub static SYSTEM_TIMEZONE: Lazy<String> = Lazy::new(|| {
|
||||
if let Ok(tz) = env::var("MOMENTRY_TIMEZONE") {
|
||||
if !tz.is_empty() {
|
||||
return tz;
|
||||
}
|
||||
}
|
||||
if let Ok(tz) = env::var("TZ") {
|
||||
if !tz.is_empty() {
|
||||
return tz;
|
||||
}
|
||||
}
|
||||
// macOS: /etc/localtime → /var/db/timezone/zoneinfo/Asia/Taipei
|
||||
// Linux: /etc/localtime → /usr/share/zoneinfo/Asia/Taipei
|
||||
if let Ok(path) = std::fs::read_link("/etc/localtime") {
|
||||
let s = path.to_string_lossy();
|
||||
for prefix in &["/usr/share/zoneinfo/", "/var/db/timezone/zoneinfo/"] {
|
||||
if let Some(tz) = s.strip_prefix(prefix) {
|
||||
return tz.to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
"Asia/Taipei".to_string()
|
||||
});
|
||||
|
||||
pub static MONGODB_DATABASE: Lazy<String> =
|
||||
Lazy::new(|| env::var("MONGODB_DATABASE").unwrap_or_else(|_| "momentry".to_string()));
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,9 +15,11 @@ pub struct QdrantDb {
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VectorPayload {
|
||||
pub uuid: String,
|
||||
pub file_uuid: String,
|
||||
pub chunk_id: String,
|
||||
pub chunk_type: String,
|
||||
pub start_frame: i64,
|
||||
pub end_frame: i64,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub text: Option<String>,
|
||||
@@ -189,6 +191,49 @@ impl QdrantDb {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn upsert_vectors_batch(
|
||||
&self,
|
||||
collection: &str,
|
||||
points: &[(u64, &[f32], Option<serde_json::Value>)],
|
||||
) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points?wait=true",
|
||||
self.base_url, collection
|
||||
);
|
||||
|
||||
let qdrant_points: Vec<serde_json::Value> = points
|
||||
.iter()
|
||||
.map(|(id, vec, payload)| {
|
||||
let mut p = serde_json::json!({
|
||||
"id": id,
|
||||
"vector": vec,
|
||||
});
|
||||
if let Some(pl) = payload {
|
||||
p["payload"] = pl.clone();
|
||||
}
|
||||
p
|
||||
})
|
||||
.collect();
|
||||
|
||||
let body = serde_json::json!({ "points": qdrant_points });
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.put(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to send batch upsert request to Qdrant")?;
|
||||
|
||||
let status = response.status();
|
||||
if !status.is_success() {
|
||||
let response_text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Qdrant batch upsert failed: {} - {}", status, response_text);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn upsert_vector(
|
||||
&self,
|
||||
chunk_id: &str,
|
||||
@@ -207,12 +252,23 @@ impl QdrantDb {
|
||||
);
|
||||
|
||||
let mut payload_map = HashMap::new();
|
||||
payload_map.insert("uuid".to_string(), serde_json::json!(payload.uuid));
|
||||
payload_map.insert(
|
||||
"file_uuid".to_string(),
|
||||
serde_json::json!(payload.file_uuid),
|
||||
);
|
||||
payload_map.insert("chunk_id".to_string(), serde_json::json!(payload.chunk_id));
|
||||
payload_map.insert(
|
||||
"chunk_type".to_string(),
|
||||
serde_json::json!(payload.chunk_type),
|
||||
);
|
||||
payload_map.insert(
|
||||
"start_frame".to_string(),
|
||||
serde_json::json!(payload.start_frame),
|
||||
);
|
||||
payload_map.insert(
|
||||
"end_frame".to_string(),
|
||||
serde_json::json!(payload.end_frame),
|
||||
);
|
||||
payload_map.insert(
|
||||
"start_time".to_string(),
|
||||
serde_json::json!(payload.start_time),
|
||||
@@ -224,7 +280,7 @@ impl QdrantDb {
|
||||
|
||||
// Generate consistent point ID from uuid and chunk_id
|
||||
// Qdrant requires integer or UUID point IDs. We'll use a simple integer hash.
|
||||
let point_id_str = format!("{}_{}", payload.uuid, chunk_id);
|
||||
let point_id_str = format!("{}_{}", payload.file_uuid, chunk_id);
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
let mut hasher = DefaultHasher::new();
|
||||
@@ -240,9 +296,9 @@ impl QdrantDb {
|
||||
});
|
||||
|
||||
tracing::debug!(
|
||||
"Upserting vector to Qdrant: chunk_id={}, uuid={}, vector_len={}",
|
||||
"Upserting vector to Qdrant: chunk_id={}, file_uuid={}, vector_len={}",
|
||||
chunk_id,
|
||||
payload.uuid,
|
||||
payload.file_uuid,
|
||||
vector.len()
|
||||
);
|
||||
|
||||
@@ -337,7 +393,7 @@ impl QdrantDb {
|
||||
.map(|r| {
|
||||
let uuid = r
|
||||
.payload
|
||||
.get("uuid")
|
||||
.get("file_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
@@ -409,7 +465,7 @@ impl QdrantDb {
|
||||
.map(|r| {
|
||||
let uuid = r
|
||||
.payload
|
||||
.get("uuid")
|
||||
.get("file_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
@@ -471,7 +527,7 @@ impl QdrantDb {
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "uuid",
|
||||
"key": "file_uuid",
|
||||
"match": {
|
||||
"value": uuid
|
||||
}
|
||||
@@ -532,7 +588,7 @@ impl QdrantDb {
|
||||
.map(|r| {
|
||||
let uuid = r
|
||||
.payload
|
||||
.get("uuid")
|
||||
.get("file_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
@@ -553,6 +609,89 @@ impl QdrantDb {
|
||||
Ok(search_results)
|
||||
}
|
||||
|
||||
pub async fn search_face_collection(
|
||||
&self,
|
||||
collection: &str,
|
||||
query_vector: &[f32],
|
||||
limit: usize,
|
||||
exclude_payload_key: &str,
|
||||
exclude_payload_value: &str,
|
||||
include_file_uuid: Option<&str>,
|
||||
) -> Result<Vec<(f64, HashMap<String, serde_json::Value>)>> {
|
||||
let url = format!("{}/collections/{}/points/search", self.base_url, collection);
|
||||
|
||||
let mut filter = serde_json::json!({
|
||||
"must_not": [
|
||||
{
|
||||
"key": exclude_payload_key,
|
||||
"match": { "value": exclude_payload_value }
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
if let Some(file_uuid) = include_file_uuid {
|
||||
filter["must"] = serde_json::json!([
|
||||
{
|
||||
"key": "file_uuid",
|
||||
"match": { "value": file_uuid }
|
||||
}
|
||||
]);
|
||||
}
|
||||
|
||||
let body = serde_json::json!({
|
||||
"vector": query_vector,
|
||||
"limit": limit,
|
||||
"with_payload": true,
|
||||
"filter": filter,
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to search Qdrant face collection")?;
|
||||
|
||||
let status = response.status();
|
||||
let response_text = response
|
||||
.text()
|
||||
.await
|
||||
.unwrap_or_else(|_| "Failed to read response".to_string());
|
||||
|
||||
if !status.is_success() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Qdrant search_face_collection failed: {} - {}",
|
||||
status,
|
||||
response_text
|
||||
));
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct QdrantSearchResult {
|
||||
result: Vec<QdrantPoint>,
|
||||
}
|
||||
#[derive(Deserialize)]
|
||||
struct QdrantPoint {
|
||||
score: f64,
|
||||
payload: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
match serde_json::from_str::<QdrantSearchResult>(&response_text) {
|
||||
Ok(parsed) => {
|
||||
let results: Vec<(f64, HashMap<String, serde_json::Value>)> = parsed
|
||||
.result
|
||||
.into_iter()
|
||||
.map(|r| (r.score, r.payload))
|
||||
.collect();
|
||||
Ok(results)
|
||||
}
|
||||
Err(e) => Err(anyhow::anyhow!("Failed to parse Qdrant response: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_by_uuid(&self, uuid: &str) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/delete",
|
||||
@@ -563,7 +702,7 @@ impl QdrantDb {
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "uuid",
|
||||
"key": "file_uuid",
|
||||
"match": {
|
||||
"value": uuid
|
||||
}
|
||||
@@ -711,9 +850,11 @@ impl Database for QdrantDb {
|
||||
impl VectorStore for QdrantDb {
|
||||
async fn store_vector(&self, chunk_id: &str, vector: &[f32]) -> Result<()> {
|
||||
let payload = VectorPayload {
|
||||
uuid: String::new(),
|
||||
file_uuid: String::new(),
|
||||
chunk_id: chunk_id.to_string(),
|
||||
chunk_type: String::new(),
|
||||
start_frame: 0,
|
||||
end_frame: 0,
|
||||
start_time: 0.0,
|
||||
end_time: 0.0,
|
||||
text: None,
|
||||
@@ -737,7 +878,9 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
|
||||
let qdrant: QdrantDb = QdrantDb::new();
|
||||
|
||||
let query = format!(
|
||||
"SELECT id, trace_id, frame_number, embedding FROM {} WHERE file_uuid = $1 AND embedding IS NOT NULL",
|
||||
"SELECT id, trace_id, frame_number, embedding FROM {} \
|
||||
WHERE file_uuid = $1 AND embedding IS NOT NULL \
|
||||
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
|
||||
table
|
||||
);
|
||||
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?;
|
||||
@@ -767,3 +910,103 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
|
||||
use crate::core::config::DATABASE_URL;
|
||||
use sqlx::Row;
|
||||
|
||||
let pool = sqlx::PgPool::connect(&DATABASE_URL).await?;
|
||||
let table = crate::core::db::schema::table_name("face_detections");
|
||||
let qdrant = QdrantDb::new();
|
||||
|
||||
let collection = format!(
|
||||
"{}_traces",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':')
|
||||
);
|
||||
qdrant.ensure_collection(&collection, 512).await?;
|
||||
|
||||
// Read all face_detections with embeddings, grouped by trace_id in Rust
|
||||
let rows = sqlx::query(&format!(
|
||||
"SELECT trace_id, embedding FROM {} \
|
||||
WHERE file_uuid = $1 AND embedding IS NOT NULL AND trace_id IS NOT NULL \
|
||||
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
|
||||
table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(&pool)
|
||||
.await?;
|
||||
|
||||
let mut trace_faces: std::collections::HashMap<i32, Vec<Vec<f32>>> =
|
||||
std::collections::HashMap::new();
|
||||
let mut trace_stats: std::collections::HashMap<i32, (i64, i64, i64)> =
|
||||
std::collections::HashMap::new(); // (count, min_frame, max_frame)
|
||||
|
||||
for row in &rows {
|
||||
let tid: Option<i32> = row.get(0);
|
||||
let emb: Option<Vec<f32>> = row.get(1);
|
||||
if let (Some(tid), Some(emb)) = (tid, emb) {
|
||||
trace_faces.entry(tid).or_default().push(emb);
|
||||
let entry = trace_stats.entry(tid).or_insert((0, i64::MAX, i64::MIN));
|
||||
entry.0 += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute average embedding per trace
|
||||
struct AvgTrace {
|
||||
tid: i32,
|
||||
avg_emb: Vec<f32>,
|
||||
frame_count: i64,
|
||||
}
|
||||
|
||||
let mut trace_avgs: Vec<AvgTrace> = Vec::new();
|
||||
|
||||
for (&tid, faces) in &trace_faces {
|
||||
let dim = faces[0].len();
|
||||
let mut avg = vec![0.0f32; dim];
|
||||
for face in faces {
|
||||
for (i, &v) in face.iter().enumerate() {
|
||||
avg[i] += v;
|
||||
}
|
||||
}
|
||||
let n = faces.len() as f32;
|
||||
for v in &mut avg {
|
||||
*v /= n;
|
||||
}
|
||||
|
||||
let stats = trace_stats.get(&tid).unwrap_or(&(0, 0, 0));
|
||||
trace_avgs.push(AvgTrace {
|
||||
tid,
|
||||
avg_emb: avg,
|
||||
frame_count: stats.0,
|
||||
});
|
||||
}
|
||||
|
||||
// Push to Qdrant in batches
|
||||
for chunk in trace_avgs.chunks(500) {
|
||||
let batch: Vec<(u64, &[f32], Option<serde_json::Value>)> = chunk
|
||||
.iter()
|
||||
.map(|t| {
|
||||
(
|
||||
t.tid as u64,
|
||||
t.avg_emb.as_slice(),
|
||||
Some(serde_json::json!({
|
||||
"trace_id": t.tid,
|
||||
"file_uuid": file_uuid,
|
||||
"frame_count": t.frame_count,
|
||||
"source": "trace",
|
||||
})),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
qdrant.upsert_vectors_batch(&collection, &batch).await?;
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Synced {} trace embeddings to Qdrant for {}",
|
||||
trace_faces.len(),
|
||||
file_uuid
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -45,9 +45,11 @@ impl SyncDb {
|
||||
}
|
||||
|
||||
let payload = VectorPayload {
|
||||
uuid: uuid.clone(),
|
||||
file_uuid: uuid.clone(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type,
|
||||
start_frame: chunk.start_frame,
|
||||
end_frame: chunk.end_frame,
|
||||
start_time,
|
||||
end_time,
|
||||
text: Some(text.to_string()),
|
||||
|
||||
@@ -33,26 +33,38 @@ pub async fn run_consistency_checks(db: &PostgresDb) -> ConsistencyReport {
|
||||
|
||||
// Check 1: stale_processing — status=processing but job_id is null
|
||||
let c1 = check_stale_processing(db).await;
|
||||
if c1.count > 0 { any_issue = true; }
|
||||
if c1.count > 0 {
|
||||
any_issue = true;
|
||||
}
|
||||
checks.push(c1);
|
||||
|
||||
// Check 2: orphaned_processing — status=processing but no active monitor_job
|
||||
let c2 = check_orphaned_processing(db).await;
|
||||
if c2.count > 0 { any_issue = true; }
|
||||
if c2.count > 0 {
|
||||
any_issue = true;
|
||||
}
|
||||
checks.push(c2);
|
||||
|
||||
// Check 3: unregistered_with_uuid — DB rows left behind by migration
|
||||
let c3 = check_unregistered_with_uuid(db).await;
|
||||
if c3.count > 0 { any_issue = true; }
|
||||
if c3.count > 0 {
|
||||
any_issue = true;
|
||||
}
|
||||
checks.push(c3);
|
||||
|
||||
// Check 4: processing_job_done — status=processing but job already completed
|
||||
let c4 = check_processing_job_done(db).await;
|
||||
if c4.count > 0 { any_issue = true; }
|
||||
if c4.count > 0 {
|
||||
any_issue = true;
|
||||
}
|
||||
checks.push(c4);
|
||||
|
||||
ConsistencyReport {
|
||||
status: if any_issue { "degraded".to_string() } else { "ok".to_string() },
|
||||
status: if any_issue {
|
||||
"degraded".to_string()
|
||||
} else {
|
||||
"ok".to_string()
|
||||
},
|
||||
checked_at,
|
||||
checks,
|
||||
}
|
||||
@@ -68,9 +80,17 @@ async fn check_stale_processing(db: &PostgresDb) -> ConsistencyCheck {
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid, file_name, status, detail: "job_id is null".to_string(),
|
||||
}).collect();
|
||||
let files: Vec<ConsistencyFile> = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid,
|
||||
file_name,
|
||||
status,
|
||||
detail: "job_id is null".to_string(),
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
ConsistencyCheck {
|
||||
check: "stale_processing".to_string(),
|
||||
@@ -83,19 +103,28 @@ async fn check_stale_processing(db: &PostgresDb) -> ConsistencyCheck {
|
||||
async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck {
|
||||
let vt = schema::table_name("videos");
|
||||
let mj = schema::table_name("monitor_jobs");
|
||||
let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!(
|
||||
"SELECT v.file_uuid, v.file_name, v.status \
|
||||
let rows: Vec<(String, String, String)> =
|
||||
sqlx::query_as::<_, (String, String, String)>(&format!(
|
||||
"SELECT v.file_uuid, v.file_name, v.status \
|
||||
FROM {} v LEFT JOIN {} m ON v.file_uuid = m.uuid AND m.status IN ('pending','running') \
|
||||
WHERE v.status = 'processing' AND m.id IS NULL",
|
||||
vt, mj
|
||||
))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
vt, mj
|
||||
))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid, file_name, status, detail: "no active monitor_job".to_string(),
|
||||
}).collect();
|
||||
let files: Vec<ConsistencyFile> = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid,
|
||||
file_name,
|
||||
status,
|
||||
detail: "no active monitor_job".to_string(),
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
ConsistencyCheck {
|
||||
check: "orphaned_processing".to_string(),
|
||||
@@ -107,17 +136,26 @@ async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck {
|
||||
|
||||
async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck {
|
||||
let vt = schema::table_name("videos");
|
||||
let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!(
|
||||
"SELECT file_uuid, file_name, status FROM {} WHERE status = 'unregistered'",
|
||||
vt
|
||||
))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let rows: Vec<(String, String, String)> =
|
||||
sqlx::query_as::<_, (String, String, String)>(&format!(
|
||||
"SELECT file_uuid, file_name, status FROM {} WHERE status = 'unregistered'",
|
||||
vt
|
||||
))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid, file_name, status, detail: "migration residue".to_string(),
|
||||
}).collect();
|
||||
let files: Vec<ConsistencyFile> = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid,
|
||||
file_name,
|
||||
status,
|
||||
detail: "migration residue".to_string(),
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
ConsistencyCheck {
|
||||
check: "unregistered_with_uuid".to_string(),
|
||||
@@ -130,19 +168,28 @@ async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck {
|
||||
async fn check_processing_job_done(db: &PostgresDb) -> ConsistencyCheck {
|
||||
let vt = schema::table_name("videos");
|
||||
let mj = schema::table_name("monitor_jobs");
|
||||
let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!(
|
||||
"SELECT v.file_uuid, v.file_name, v.status \
|
||||
let rows: Vec<(String, String, String)> =
|
||||
sqlx::query_as::<_, (String, String, String)>(&format!(
|
||||
"SELECT v.file_uuid, v.file_name, v.status \
|
||||
FROM {} v JOIN {} m ON v.file_uuid = m.uuid \
|
||||
WHERE v.status = 'processing' AND m.status = 'completed'",
|
||||
vt, mj
|
||||
))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
vt, mj
|
||||
))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid, file_name, status, detail: "monitor_job already completed".to_string(),
|
||||
}).collect();
|
||||
let files: Vec<ConsistencyFile> = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid,
|
||||
file_name,
|
||||
status,
|
||||
detail: "monitor_job already completed".to_string(),
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
ConsistencyCheck {
|
||||
check: "processing_job_done".to_string(),
|
||||
|
||||
@@ -54,8 +54,7 @@ pub fn read_identity_file(uuid: &str) -> Result<IdentityFile> {
|
||||
let path = identity_file_path(uuid);
|
||||
let content = std::fs::read_to_string(&path)
|
||||
.with_context(|| format!("Identity file not found: {} ({})", uuid, path.display()))?;
|
||||
serde_json::from_str(&content)
|
||||
.with_context(|| format!("Invalid identity.json: {}", uuid))
|
||||
serde_json::from_str(&content).with_context(|| format!("Invalid identity.json: {}", uuid))
|
||||
}
|
||||
|
||||
pub fn write_identity_file(file: &IdentityFile) -> Result<()> {
|
||||
@@ -167,7 +166,10 @@ pub fn rebuild_index() -> Result<usize> {
|
||||
entries.insert(uuid.clone(), file.name);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("[identity-storage] Skipping {} in index rebuild: {}", uuid, e);
|
||||
warn!(
|
||||
"[identity-storage] Skipping {} in index rebuild: {}",
|
||||
uuid, e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -187,18 +189,16 @@ pub async fn save_identity_file_by_pool(pool: &sqlx::PgPool, uuid: &str) -> Resu
|
||||
let identity_table = crate::core::db::schema::table_name("identities");
|
||||
let fd_table = crate::core::db::schema::table_name("face_detections");
|
||||
|
||||
// Schema-aware column selection: dev uses 'name', public uses 'real_name'
|
||||
let name_col = if identity_table.starts_with("dev.") { "name" } else { "real_name" };
|
||||
|
||||
let clean = uuid.replace('-', "");
|
||||
|
||||
let record = sqlx::query_as::<_, crate::core::db::IdentityDetailRecord>(
|
||||
&format!(
|
||||
"SELECT id, uuid::text, {} AS name, identity_type, source, status, metadata, reference_data, \
|
||||
NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \
|
||||
face_embedding::real[] as face_embedding, \
|
||||
tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \
|
||||
FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
|
||||
name_col, identity_table
|
||||
"SELECT id, uuid::text, name, identity_type, source, status, metadata, reference_data, \
|
||||
NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \
|
||||
face_embedding::real[] as face_embedding, \
|
||||
tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \
|
||||
FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
|
||||
identity_table
|
||||
)
|
||||
)
|
||||
.bind(&clean)
|
||||
@@ -322,8 +322,13 @@ pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result
|
||||
let mut entries: HashMap<String, String> = if index_path.exists() {
|
||||
let content = std::fs::read_to_string(&index_path)?;
|
||||
let v: serde_json::Value = serde_json::from_str(&content).unwrap_or_default();
|
||||
v["entries"].as_object()
|
||||
.map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string())).collect())
|
||||
v["entries"]
|
||||
.as_object()
|
||||
.map(|obj| {
|
||||
obj.iter()
|
||||
.map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string()))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
HashMap::new()
|
||||
@@ -338,7 +343,9 @@ pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result
|
||||
}
|
||||
|
||||
pub async fn save_identity_file(db: &PostgresDb, uuid: &str) -> Result<()> {
|
||||
let record = db.get_identity_by_uuid(uuid).await?
|
||||
let record = db
|
||||
.get_identity_by_uuid(uuid)
|
||||
.await?
|
||||
.with_context(|| format!("Identity not found in DB: {}", uuid))?;
|
||||
|
||||
let identity_uuid = record.uuid.clone();
|
||||
@@ -415,6 +422,7 @@ mod tests {
|
||||
status: Some("confirmed".to_string()),
|
||||
tmdb_id: Some(112),
|
||||
tmdb_profile: Some("https://image.tmdb.org/t/p/w185/test.jpg".to_string()),
|
||||
local_profile: None,
|
||||
metadata: serde_json::json!({"tmdb_character": "Test Role"}),
|
||||
file_bindings: vec![FileBinding {
|
||||
file_uuid: "ffffffffffffffffffffffffffffffff".to_string(),
|
||||
@@ -442,7 +450,9 @@ mod tests {
|
||||
fn test_identity_dir_path() {
|
||||
let uuid = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
|
||||
let p = identity_dir(uuid);
|
||||
assert!(p.to_string_lossy().ends_with(&format!("identities/{}", uuid)));
|
||||
assert!(p
|
||||
.to_string_lossy()
|
||||
.ends_with(&format!("identities/{}", uuid)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -463,7 +473,10 @@ mod tests {
|
||||
let base = Path::new("/tmp/test_base");
|
||||
let uuid = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
|
||||
let p = identity_dir_at(base, uuid);
|
||||
assert_eq!(p, Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"));
|
||||
assert_eq!(
|
||||
p,
|
||||
Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -490,7 +503,10 @@ mod tests {
|
||||
assert_eq!(read.name, file.name);
|
||||
assert_eq!(read.source, file.source);
|
||||
assert_eq!(read.tmdb_id, file.tmdb_id);
|
||||
assert_eq!(read.file_bindings[0].face_count, file.file_bindings[0].face_count);
|
||||
assert_eq!(
|
||||
read.file_bindings[0].face_count,
|
||||
file.file_bindings[0].face_count
|
||||
);
|
||||
|
||||
let _ = std::fs::remove_dir_all(&tmp);
|
||||
}
|
||||
@@ -521,9 +537,21 @@ mod tests {
|
||||
let _ = std::fs::remove_dir_all(&tmp);
|
||||
let base = &tmp;
|
||||
|
||||
std::fs::create_dir_all(base.join("identities").join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")).unwrap();
|
||||
std::fs::create_dir_all(base.join("identities").join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")).unwrap();
|
||||
std::fs::create_dir_all(base.join("identities").join("cccccccccccccccccccccccccccccccc")).unwrap();
|
||||
std::fs::create_dir_all(
|
||||
base.join("identities")
|
||||
.join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::create_dir_all(
|
||||
base.join("identities")
|
||||
.join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"),
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::create_dir_all(
|
||||
base.join("identities")
|
||||
.join("cccccccccccccccccccccccccccccccc"),
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::create_dir_all(base.join("identities").join("not_a_uuid")).unwrap();
|
||||
std::fs::create_dir_all(base.join("identities").join("short")).unwrap();
|
||||
|
||||
|
||||
@@ -56,19 +56,25 @@ impl IngestionService {
|
||||
.to_string();
|
||||
|
||||
// 1. Compute SHA256 for dedup
|
||||
let content_hash = crate::core::storage::content_hash::compute_sha256(&canonical_path).ok().unwrap_or_default();
|
||||
let content_hash = crate::core::storage::content_hash::compute_sha256(&canonical_path)
|
||||
.ok()
|
||||
.unwrap_or_default();
|
||||
|
||||
// 2. Hash check — same content = already registered
|
||||
let videos_table = schema::table_name("videos");
|
||||
if !content_hash.is_empty() {
|
||||
if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(
|
||||
&format!("SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1", videos_table)
|
||||
)
|
||||
if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(&format!(
|
||||
"SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1",
|
||||
videos_table
|
||||
))
|
||||
.bind(&content_hash)
|
||||
.fetch_optional(self.db.pool())
|
||||
.await
|
||||
{
|
||||
info!("Content already registered: {} ({})", filename, existing_uuid);
|
||||
info!(
|
||||
"Content already registered: {} ({})",
|
||||
filename, existing_uuid
|
||||
);
|
||||
return Ok(Some(existing_uuid));
|
||||
}
|
||||
}
|
||||
@@ -108,7 +114,8 @@ impl IngestionService {
|
||||
let probe_result = probe::probe_video(file_path).ok();
|
||||
let file_meta = std::fs::metadata(&canonical_path).ok();
|
||||
|
||||
let duration = probe_result.as_ref()
|
||||
let duration = probe_result
|
||||
.as_ref()
|
||||
.and_then(|r| r.format.duration.as_ref())
|
||||
.and_then(|s| s.parse::<f64>().ok())
|
||||
.unwrap_or(0.0);
|
||||
@@ -148,7 +155,11 @@ impl IngestionService {
|
||||
}
|
||||
|
||||
let total_frames = {
|
||||
let video_stream = probe_result.as_ref().and_then(|pr| pr.streams.iter().find(|s| s.codec_type.as_deref() == Some("video")));
|
||||
let video_stream = probe_result.as_ref().and_then(|pr| {
|
||||
pr.streams
|
||||
.iter()
|
||||
.find(|s| s.codec_type.as_deref() == Some("video"))
|
||||
});
|
||||
|
||||
if let Some(stream) = video_stream {
|
||||
if let Some(nb_frames_str) = &stream.nb_frames {
|
||||
@@ -223,11 +234,14 @@ impl IngestionService {
|
||||
// Store content_hash for dedup
|
||||
if !content_hash.is_empty() {
|
||||
let vt = schema::table_name("videos");
|
||||
let _ = sqlx::query(&format!("UPDATE {} SET content_hash = $1 WHERE file_uuid = $2", vt))
|
||||
.bind(&content_hash)
|
||||
.bind(&uuid)
|
||||
.execute(self.db.pool())
|
||||
.await;
|
||||
let _ = sqlx::query(&format!(
|
||||
"UPDATE {} SET content_hash = $1 WHERE file_uuid = $2",
|
||||
vt
|
||||
))
|
||||
.bind(&content_hash)
|
||||
.bind(&uuid)
|
||||
.execute(self.db.pool())
|
||||
.await;
|
||||
}
|
||||
|
||||
self.db
|
||||
@@ -243,5 +257,3 @@ impl IngestionService {
|
||||
Ok(Some(uuid))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -17,42 +17,84 @@ mod tests {
|
||||
#[test]
|
||||
fn test_detect_category_image() {
|
||||
assert_eq!(detect_category(Path::new("photo.jpg")), FileCategory::Image);
|
||||
assert_eq!(detect_category(Path::new("photo.jpeg")), FileCategory::Image);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("photo.jpeg")),
|
||||
FileCategory::Image
|
||||
);
|
||||
assert_eq!(detect_category(Path::new("photo.png")), FileCategory::Image);
|
||||
assert_eq!(detect_category(Path::new("photo.svg")), FileCategory::Image);
|
||||
assert_eq!(detect_category(Path::new("photo.webp")), FileCategory::Image);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("photo.webp")),
|
||||
FileCategory::Image
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_category_document() {
|
||||
assert_eq!(detect_category(Path::new("doc.pdf")), FileCategory::Document);
|
||||
assert_eq!(detect_category(Path::new("doc.docx")), FileCategory::Document);
|
||||
assert_eq!(detect_category(Path::new("doc.pages")), FileCategory::Document);
|
||||
assert_eq!(detect_category(Path::new("doc.txt")), FileCategory::Document);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("doc.pdf")),
|
||||
FileCategory::Document
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("doc.docx")),
|
||||
FileCategory::Document
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("doc.pages")),
|
||||
FileCategory::Document
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("doc.txt")),
|
||||
FileCategory::Document
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_category_spreadsheet() {
|
||||
assert_eq!(detect_category(Path::new("data.xlsx")), FileCategory::Spreadsheet);
|
||||
assert_eq!(detect_category(Path::new("data.csv")), FileCategory::Spreadsheet);
|
||||
assert_eq!(detect_category(Path::new("data.numbers")), FileCategory::Spreadsheet);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("data.xlsx")),
|
||||
FileCategory::Spreadsheet
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("data.csv")),
|
||||
FileCategory::Spreadsheet
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("data.numbers")),
|
||||
FileCategory::Spreadsheet
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_category_presentation() {
|
||||
assert_eq!(detect_category(Path::new("deck.pptx")), FileCategory::Presentation);
|
||||
assert_eq!(detect_category(Path::new("deck.key")), FileCategory::Presentation);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("deck.pptx")),
|
||||
FileCategory::Presentation
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("deck.key")),
|
||||
FileCategory::Presentation
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_category_archive() {
|
||||
assert_eq!(detect_category(Path::new("files.zip")), FileCategory::Archive);
|
||||
assert_eq!(detect_category(Path::new("files.tar.gz")), FileCategory::Archive);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("files.zip")),
|
||||
FileCategory::Archive
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("files.tar.gz")),
|
||||
FileCategory::Archive
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_category_unknown() {
|
||||
assert_eq!(detect_category(Path::new("file.xyz")), FileCategory::Unknown);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("file.xyz")),
|
||||
FileCategory::Unknown
|
||||
);
|
||||
assert_eq!(detect_category(Path::new("file")), FileCategory::Unknown);
|
||||
}
|
||||
|
||||
@@ -84,13 +126,18 @@ pub enum FileCategory {
|
||||
|
||||
/// Detect file category from path extension
|
||||
pub fn detect_category(path: &Path) -> FileCategory {
|
||||
let ext = path.extension()
|
||||
let ext = path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.map(|e| e.to_lowercase());
|
||||
match ext.as_deref() {
|
||||
Some("mp4" | "mov" | "mkv" | "avi" | "webm" | "m4v" | "mpeg") => FileCategory::Video,
|
||||
Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg" | "heic" | "tiff") => FileCategory::Image,
|
||||
Some("pdf" | "doc" | "docx" | "odt" | "pages" | "rtf" | "txt" | "md" | "rst") => FileCategory::Document,
|
||||
Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg" | "heic" | "tiff") => {
|
||||
FileCategory::Image
|
||||
}
|
||||
Some("pdf" | "doc" | "docx" | "odt" | "pages" | "rtf" | "txt" | "md" | "rst") => {
|
||||
FileCategory::Document
|
||||
}
|
||||
Some("xls" | "xlsx" | "csv" | "ods" | "numbers") => FileCategory::Spreadsheet,
|
||||
Some("ppt" | "pptx" | "odp" | "key") => FileCategory::Presentation,
|
||||
Some("zip" | "tar" | "gz" | "tgz" | "7z" | "rar") => FileCategory::Archive,
|
||||
@@ -102,16 +149,20 @@ pub fn detect_category(path: &Path) -> FileCategory {
|
||||
pub fn base_format_info(path: &Path) -> serde_json::Value {
|
||||
let meta = std::fs::metadata(path).ok();
|
||||
let size = meta.as_ref().map(|m| m.len()).unwrap_or(0);
|
||||
let mtime = meta.as_ref()
|
||||
let mtime = meta
|
||||
.as_ref()
|
||||
.and_then(|m| m.modified().ok())
|
||||
.and_then(|t| {
|
||||
let secs = t.duration_since(SystemTime::UNIX_EPOCH).ok()?.as_secs() as i64;
|
||||
chrono::DateTime::from_timestamp(secs, 0)
|
||||
.map(|dt| dt.to_rfc3339())
|
||||
chrono::DateTime::from_timestamp(secs, 0).map(|dt| dt.to_rfc3339())
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let fname = path.to_string_lossy().to_string();
|
||||
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("").to_lowercase();
|
||||
let ext = path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.unwrap_or("")
|
||||
.to_lowercase();
|
||||
let cat = detect_category(path);
|
||||
let file_type = match cat {
|
||||
FileCategory::Video => "video",
|
||||
@@ -150,7 +201,13 @@ fn ffprobe_probe(path: &Path, format_base: serde_json::Value) -> serde_json::Val
|
||||
}
|
||||
|
||||
/// Run Python probe for document/spreadsheet/presentation files
|
||||
fn python_probe(path: &Path, category: &FileCategory, scripts_dir: &str, python_path: &str, format_base: serde_json::Value) -> serde_json::Value {
|
||||
fn python_probe(
|
||||
path: &Path,
|
||||
category: &FileCategory,
|
||||
scripts_dir: &str,
|
||||
python_path: &str,
|
||||
format_base: serde_json::Value,
|
||||
) -> serde_json::Value {
|
||||
let script = format!("{}/probe_file.py", scripts_dir);
|
||||
if !std::path::Path::new(&script).exists() {
|
||||
return minimal_probe(format_base);
|
||||
@@ -184,18 +241,12 @@ fn minimal_probe(format_base: serde_json::Value) -> serde_json::Value {
|
||||
|
||||
/// Unified probe: dispatches to the right probe based on file type
|
||||
/// Returns a probe_json-compatible Value
|
||||
pub async fn unified_probe(
|
||||
path: &Path,
|
||||
scripts_dir: &str,
|
||||
python_path: &str,
|
||||
) -> serde_json::Value {
|
||||
pub async fn unified_probe(path: &Path, scripts_dir: &str, python_path: &str) -> serde_json::Value {
|
||||
let cat = detect_category(path);
|
||||
let format_base = base_format_info(path);
|
||||
|
||||
match cat {
|
||||
FileCategory::Video | FileCategory::Image => {
|
||||
ffprobe_probe(path, format_base)
|
||||
}
|
||||
FileCategory::Video | FileCategory::Image => ffprobe_probe(path, format_base),
|
||||
FileCategory::Document | FileCategory::Spreadsheet | FileCategory::Presentation => {
|
||||
python_probe(path, &cat, scripts_dir, python_path, format_base)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::executor::PythonExecutor;
|
||||
@@ -27,13 +28,21 @@ pub async fn process_cut(
|
||||
output_path: &str,
|
||||
uuid: Option<&str>,
|
||||
) -> Result<CutResult> {
|
||||
// Try native ffmpeg-based scene detection first
|
||||
let result = try_native_cut(video_path);
|
||||
if let Ok(r) = result {
|
||||
let json = serde_json::to_string_pretty(&r)?;
|
||||
std::fs::write(output_path, &json)
|
||||
.with_context(|| format!("Failed to write {:?}", output_path))?;
|
||||
return Ok(r);
|
||||
}
|
||||
|
||||
// Fallback: Python scenedetect
|
||||
tracing::warn!("[CUT] Native impl failed, falling back to Python");
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("cut_processor.py");
|
||||
|
||||
tracing::info!("[CUT] Starting scene detection: {}", video_path);
|
||||
|
||||
if !script_path.exists() {
|
||||
tracing::warn!("[CUT] Script not found, returning empty result");
|
||||
return Ok(CutResult {
|
||||
frame_count: 0,
|
||||
fps: 0.0,
|
||||
@@ -53,19 +62,179 @@ pub async fn process_cut(
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str = std::fs::read_to_string(output_path).context("Failed to read CUT output")?;
|
||||
|
||||
let result: CutResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse CUT output")?;
|
||||
|
||||
tracing::info!("[CUT] Result: {} scenes detected", result.scenes.len());
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// ── Native ffmpeg scene detection ─────────────────────────────────
|
||||
|
||||
fn try_native_cut(video_path: &str) -> Result<CutResult> {
|
||||
// Step 1: Get video info (fps, frame count)
|
||||
let probe = Command::new("ffprobe")
|
||||
.args([
|
||||
"-v",
|
||||
"quiet",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_format",
|
||||
"-show_streams",
|
||||
video_path,
|
||||
])
|
||||
.output()
|
||||
.context("Failed to run ffprobe")?;
|
||||
|
||||
let probe_info: serde_json::Value =
|
||||
serde_json::from_slice(&probe.stdout).context("Failed to parse ffprobe output")?;
|
||||
|
||||
let streams = probe_info["streams"]
|
||||
.as_array()
|
||||
.map_or(vec![], |s| s.clone());
|
||||
let video_stream = streams.iter().find(|s| s["codec_type"] == "video");
|
||||
|
||||
let fps = video_stream
|
||||
.and_then(|s| s["r_frame_rate"].as_str().and_then(parse_fraction))
|
||||
.unwrap_or(30.0);
|
||||
|
||||
let total_frames: u64 = video_stream
|
||||
.and_then(|s| s["nb_frames"].as_str())
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
|
||||
let duration: f64 = probe_info["format"]["duration"]
|
||||
.as_str()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0.0);
|
||||
|
||||
// Step 2: Use ffmpeg scene detection filter
|
||||
// The `scene` filter computes the difference between consecutive frames
|
||||
// and outputs when the difference exceeds the threshold (0.3 = medium sensitivity)
|
||||
let scene_output = Command::new("ffprobe")
|
||||
.args([
|
||||
"-v",
|
||||
"quiet",
|
||||
"-show_entries",
|
||||
"frame=pts_time",
|
||||
"-of",
|
||||
"compact=p=0:nk=1",
|
||||
"-f",
|
||||
"lavfi",
|
||||
&format!("movie={},select='gt(scene\\,0.3)',showinfo", video_path),
|
||||
"-show_frames",
|
||||
])
|
||||
.output()
|
||||
.context("Failed to run ffmpeg scene detection")?;
|
||||
|
||||
let stderr_output = String::from_utf8_lossy(&scene_output.stderr);
|
||||
let mut scene_times: Vec<f64> = Vec::new();
|
||||
|
||||
// Parse ffmpeg showinfo output for scene changes
|
||||
// Format: [Parsed_showinfo...] pts:123.456 pts_time:123.456 ...
|
||||
for line in stderr_output.lines() {
|
||||
if line.contains("pts_time:") {
|
||||
if let Some(pos) = line.find("pts_time:") {
|
||||
let rest = &line[pos + 9..];
|
||||
let time_str = rest.split_whitespace().next().unwrap_or("");
|
||||
if let Ok(t) = time_str.parse::<f64>() {
|
||||
scene_times.push(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Build scenes from cut points
|
||||
let mut scenes: Vec<CutScene> = Vec::new();
|
||||
let mut prev_time = 0.0;
|
||||
let mut prev_frame: u64 = 0;
|
||||
|
||||
for (i, &cut_time) in scene_times.iter().enumerate() {
|
||||
let end_frame = (cut_time * fps).round() as u64;
|
||||
let start_frame = prev_frame;
|
||||
|
||||
if end_frame > start_frame {
|
||||
scenes.push(CutScene {
|
||||
scene_number: (i + 1) as u32,
|
||||
start_frame: prev_frame,
|
||||
end_frame: end_frame.saturating_sub(1),
|
||||
start_time: prev_time,
|
||||
end_time: cut_time - (1.0 / fps),
|
||||
});
|
||||
}
|
||||
|
||||
prev_time = cut_time;
|
||||
prev_frame = end_frame;
|
||||
}
|
||||
|
||||
// Final scene (last cut point → end of video)
|
||||
if total_frames > 0 && prev_frame < total_frames {
|
||||
scenes.push(CutScene {
|
||||
scene_number: (scenes.len() + 1) as u32,
|
||||
start_frame: prev_frame,
|
||||
end_frame: total_frames.saturating_sub(1),
|
||||
start_time: prev_time,
|
||||
end_time: duration,
|
||||
});
|
||||
}
|
||||
|
||||
// If no scenes detected, create a single scene covering the whole video
|
||||
if scenes.is_empty() && total_frames > 0 {
|
||||
scenes.push(CutScene {
|
||||
scene_number: 1,
|
||||
start_frame: 0,
|
||||
end_frame: total_frames.saturating_sub(1),
|
||||
start_time: 0.0,
|
||||
end_time: duration,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(CutResult {
|
||||
frame_count: total_frames,
|
||||
fps,
|
||||
scenes,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse fractional fps like "30000/1001" into f64
|
||||
fn parse_fraction(s: &str) -> Option<f64> {
|
||||
if let Some(pos) = s.find('/') {
|
||||
let num: f64 = s[..pos].parse().ok()?;
|
||||
let den: f64 = s[pos + 1..].parse().ok()?;
|
||||
if den > 0.0 {
|
||||
return Some(num / den);
|
||||
}
|
||||
}
|
||||
s.parse::<f64>().ok()
|
||||
}
|
||||
|
||||
// ── Tests ─────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_fraction() {
|
||||
let r = parse_fraction("30000/1001").unwrap();
|
||||
assert!((r - 29.97).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_fraction_int() {
|
||||
let r = parse_fraction("30").unwrap();
|
||||
assert!((r - 30.0).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_fraction_invalid() {
|
||||
assert!(parse_fraction("not/a/num").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_fraction_zero_den() {
|
||||
assert!(parse_fraction("1/0").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cut_result_serialization() {
|
||||
let result = CutResult {
|
||||
@@ -81,8 +250,9 @@ mod tests {
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
assert!(json.contains("frame_count"));
|
||||
assert!(json.contains("scene_number"));
|
||||
assert!(json.contains("1"));
|
||||
assert!(json.contains("fps"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -90,20 +260,23 @@ mod tests {
|
||||
let json = r#"{
|
||||
"frame_count": 100,
|
||||
"fps": 30.0,
|
||||
"scenes": [
|
||||
{"scene_number": 1, "start_frame": 0, "end_frame": 30, "start_time": 0.0, "end_time": 1.0},
|
||||
{"scene_number": 2, "start_frame": 31, "end_frame": 60, "start_time": 1.033, "end_time": 2.0}
|
||||
]
|
||||
"scenes": [{
|
||||
"scene_number": 1,
|
||||
"start_frame": 0,
|
||||
"end_frame": 30,
|
||||
"start_time": 0.0,
|
||||
"end_time": 1.0
|
||||
}]
|
||||
}"#;
|
||||
|
||||
let result: CutResult = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(result.frame_count, 100);
|
||||
assert_eq!(result.scenes.len(), 2);
|
||||
assert_eq!(result.scenes[1].scene_number, 2);
|
||||
assert_eq!(result.scenes.len(), 1);
|
||||
assert_eq!(result.scenes[0].scene_number, 1);
|
||||
assert_eq!(result.scenes[0].start_frame, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cut_result_empty_scenes() {
|
||||
fn test_empty_scenes() {
|
||||
let result = CutResult {
|
||||
frame_count: 0,
|
||||
fps: 0.0,
|
||||
@@ -111,17 +284,4 @@ mod tests {
|
||||
};
|
||||
assert!(result.scenes.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cut_scene_times() {
|
||||
let scene = CutScene {
|
||||
scene_number: 1,
|
||||
start_frame: 0,
|
||||
end_frame: 30,
|
||||
start_time: 0.0,
|
||||
end_time: 1.0,
|
||||
};
|
||||
assert!(scene.end_time > scene.start_time);
|
||||
assert_eq!(scene.scene_number, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,11 +109,10 @@ pub fn validate_python_env() -> Result<()> {
|
||||
tracing::warn!("Expected Python 3.11, got: {}", version.trim());
|
||||
}
|
||||
|
||||
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
|
||||
.unwrap_or_else(|_| {
|
||||
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
manifest.join("scripts").to_string_lossy().to_string()
|
||||
});
|
||||
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| {
|
||||
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
manifest.join("scripts").to_string_lossy().to_string()
|
||||
});
|
||||
let script_path = PathBuf::from(&scripts_dir);
|
||||
if !script_path.exists() {
|
||||
anyhow::bail!("Scripts directory not found at {}", scripts_dir);
|
||||
@@ -133,11 +132,10 @@ impl PythonExecutor {
|
||||
pub fn new() -> Result<Self> {
|
||||
let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
|
||||
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
|
||||
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
|
||||
.unwrap_or_else(|_| {
|
||||
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
manifest.join("scripts").to_string_lossy().to_string()
|
||||
});
|
||||
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| {
|
||||
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
manifest.join("scripts").to_string_lossy().to_string()
|
||||
});
|
||||
|
||||
let python_bin = PathBuf::from(&python_path);
|
||||
let scripts_path = PathBuf::from(&scripts_dir);
|
||||
@@ -173,7 +171,8 @@ impl PythonExecutor {
|
||||
|
||||
if let Some(expected_hash) = self.checksums.get(&rel_path) {
|
||||
let output = std::process::Command::new("shasum")
|
||||
.arg("-a").arg("256")
|
||||
.arg("-a")
|
||||
.arg("256")
|
||||
.arg(&script_path)
|
||||
.output()
|
||||
.context("Failed to run shasum for integrity check")?;
|
||||
@@ -235,8 +234,9 @@ impl PythonExecutor {
|
||||
}
|
||||
|
||||
// Verify script integrity via SHA256 checksum before execution
|
||||
self.verify_script_integrity(script_name)
|
||||
.context("Pre-execution integrity check failed — possible version mismatch or corruption")?;
|
||||
self.verify_script_integrity(script_name).context(
|
||||
"Pre-execution integrity check failed — possible version mismatch or corruption",
|
||||
)?;
|
||||
|
||||
// 標記輸出檔為處理中(add .tmp suffix)
|
||||
let output_path = args.get(1).map(|p| std::path::PathBuf::from(p));
|
||||
|
||||
@@ -44,22 +44,59 @@ pub enum CrowdSize {
|
||||
|
||||
/// Indoor-indicative YOLO classes (COCO labels)
|
||||
const INDOOR_CLASSES: &[&str] = &[
|
||||
"chair", "couch", "bed", "dining table", "toilet", "tv", "laptop",
|
||||
"microwave", "oven", "refrigerator", "sink", "book", "clock",
|
||||
"vase", "potted plant",
|
||||
"chair",
|
||||
"couch",
|
||||
"bed",
|
||||
"dining table",
|
||||
"toilet",
|
||||
"tv",
|
||||
"laptop",
|
||||
"microwave",
|
||||
"oven",
|
||||
"refrigerator",
|
||||
"sink",
|
||||
"book",
|
||||
"clock",
|
||||
"vase",
|
||||
"potted plant",
|
||||
];
|
||||
|
||||
/// Vehicle-indicative classes (person + vehicle = transport scene)
|
||||
const VEHICLE_CLASSES: &[&str] = &[
|
||||
"car", "truck", "bus", "train", "boat", "aeroplane", "bicycle", "motorbike",
|
||||
"car",
|
||||
"truck",
|
||||
"bus",
|
||||
"train",
|
||||
"boat",
|
||||
"aeroplane",
|
||||
"bicycle",
|
||||
"motorbike",
|
||||
];
|
||||
|
||||
/// Outdoor-indicative YOLO classes
|
||||
const OUTDOOR_CLASSES: &[&str] = &[
|
||||
"car", "truck", "bus", "train", "boat", "airplane",
|
||||
"traffic light", "fire hydrant", "stop sign", "parking meter",
|
||||
"bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
|
||||
"bear", "zebra", "giraffe", "tree",
|
||||
"car",
|
||||
"truck",
|
||||
"bus",
|
||||
"train",
|
||||
"boat",
|
||||
"airplane",
|
||||
"traffic light",
|
||||
"fire hydrant",
|
||||
"stop sign",
|
||||
"parking meter",
|
||||
"bench",
|
||||
"bird",
|
||||
"cat",
|
||||
"dog",
|
||||
"horse",
|
||||
"sheep",
|
||||
"cow",
|
||||
"elephant",
|
||||
"bear",
|
||||
"zebra",
|
||||
"giraffe",
|
||||
"tree",
|
||||
];
|
||||
|
||||
/// Build heuristic scene metadata from disk files (yolo.json + DB face data).
|
||||
@@ -113,13 +150,14 @@ pub async fn build_heuristic_scene_meta(
|
||||
|
||||
// Get face counts grouped by frame
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let face_rows: Vec<(i64, i64)> = sqlx::query_as(
|
||||
&format!("SELECT frame_number, COUNT(*) as fc \
|
||||
let face_rows: Vec<(i64, i64)> = sqlx::query_as(&format!(
|
||||
"SELECT frame_number, COUNT(*) as fc \
|
||||
FROM {} \
|
||||
WHERE file_uuid = $1 AND frame_number IS NOT NULL \
|
||||
GROUP BY frame_number \
|
||||
ORDER BY frame_number", fd_table),
|
||||
)
|
||||
ORDER BY frame_number",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
@@ -166,7 +204,10 @@ pub async fn build_heuristic_scene_meta(
|
||||
let outdoor_ratio = outdoor_objects as f64 / frame_count.max(1) as f64;
|
||||
let total_indicator = indoor_ratio + outdoor_ratio;
|
||||
let (indoor_score, outdoor_score) = if total_indicator > 0.0 {
|
||||
(indoor_ratio / total_indicator, outdoor_ratio / total_indicator)
|
||||
(
|
||||
indoor_ratio / total_indicator,
|
||||
outdoor_ratio / total_indicator,
|
||||
)
|
||||
} else {
|
||||
(0.5, 0.5)
|
||||
};
|
||||
@@ -187,17 +228,13 @@ pub async fn build_heuristic_scene_meta(
|
||||
.map(|c| class_frame_presence.get(*c).copied().unwrap_or(0))
|
||||
.sum();
|
||||
let person_ratio = person_frames as f64 / frame_count.max(1) as f64;
|
||||
let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0
|
||||
&& outdoor_score > 0.3;
|
||||
let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0 && outdoor_score > 0.3;
|
||||
|
||||
// Dominant objects: rank by frame presence (not total count)
|
||||
let mut sorted: Vec<_> = class_frame_presence.into_iter().collect();
|
||||
sorted.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
let dominant_objects: Vec<String> = sorted
|
||||
.iter()
|
||||
.take(3)
|
||||
.map(|(cls, _)| cls.clone())
|
||||
.collect();
|
||||
let dominant_objects: Vec<String> =
|
||||
sorted.iter().take(3).map(|(cls, _)| cls.clone()).collect();
|
||||
|
||||
segments.push(SceneSegmentMeta {
|
||||
segment_index: idx as u32 + 1,
|
||||
@@ -229,12 +266,15 @@ pub async fn build_heuristic_scene_meta(
|
||||
|
||||
/// Full pipeline entry point: reads CUT data, generates heuristic metadata, writes JSON.
|
||||
/// Called from job_worker post-processing trigger.
|
||||
pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &str) -> Result<usize> {
|
||||
pub async fn generate_scene_meta(
|
||||
db: &crate::core::db::PostgresDb,
|
||||
file_uuid: &str,
|
||||
) -> Result<usize> {
|
||||
let pool = db.pool();
|
||||
|
||||
// Read CUT segment boundaries from cut.json
|
||||
let cut_path = Path::new(crate::core::config::OUTPUT_DIR.as_str())
|
||||
.join(format!("{}.cut.json", file_uuid));
|
||||
let cut_path =
|
||||
Path::new(crate::core::config::OUTPUT_DIR.as_str()).join(format!("{}.cut.json", file_uuid));
|
||||
let segments: Vec<(i64, i64, f64, f64)> = if cut_path.exists() {
|
||||
let cut_str = tokio::fs::read_to_string(&cut_path)
|
||||
.await
|
||||
@@ -250,8 +290,7 @@ pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &s
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
}
|
||||
let cut: CutJson = serde_json::from_str(&cut_str)
|
||||
.context("Failed to parse cut.json")?;
|
||||
let cut: CutJson = serde_json::from_str(&cut_str).context("Failed to parse cut.json")?;
|
||||
cut.scenes
|
||||
.into_iter()
|
||||
.map(|s| (s.start_frame, s.end_frame, s.start_time, s.end_time))
|
||||
@@ -259,9 +298,10 @@ pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &s
|
||||
} else {
|
||||
// Fallback: query DB for video duration, make one segment
|
||||
let videos_table = schema::table_name("videos");
|
||||
let (total_frames, duration): (Option<i64>, Option<f64>) = sqlx::query_as(
|
||||
&format!("SELECT total_frames, duration FROM {} WHERE file_uuid = $1", videos_table),
|
||||
)
|
||||
let (total_frames, duration): (Option<i64>, Option<f64>) = sqlx::query_as(&format!(
|
||||
"SELECT total_frames, duration FROM {} WHERE file_uuid = $1",
|
||||
videos_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
|
||||
@@ -10,6 +10,7 @@ pub mod ocr;
|
||||
pub mod pose;
|
||||
pub mod scene_classification;
|
||||
pub mod story;
|
||||
pub mod tkg;
|
||||
pub mod visual_chunk;
|
||||
pub mod yolo;
|
||||
|
||||
@@ -25,7 +26,8 @@ pub use face_recognition::{
|
||||
RecognizedFaceDetection,
|
||||
};
|
||||
pub use heuristic_scene::{
|
||||
build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta, SceneSegmentMeta,
|
||||
build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta,
|
||||
SceneSegmentMeta,
|
||||
};
|
||||
pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText};
|
||||
pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
|
||||
@@ -34,5 +36,6 @@ pub use scene_classification::{
|
||||
SceneSegment,
|
||||
};
|
||||
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
|
||||
pub use tkg::{build_tkg, TkgResult};
|
||||
pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
|
||||
pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};
|
||||
|
||||
@@ -106,7 +106,10 @@ pub async fn process_story(
|
||||
}
|
||||
|
||||
// Fallback: Python script
|
||||
tracing::warn!("[STORY] Native impl failed, falling back to Python: {:?}", result.err());
|
||||
tracing::warn!(
|
||||
"[STORY] Native impl failed, falling back to Python: {:?}",
|
||||
result.err()
|
||||
);
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("story_processor.py");
|
||||
|
||||
@@ -145,7 +148,11 @@ pub async fn process_story(
|
||||
|
||||
// ── Native implementation ─────────────────────────────────────────
|
||||
|
||||
fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -> Result<StoryResult> {
|
||||
fn try_native_story(
|
||||
_video_path: &str,
|
||||
output_path: &str,
|
||||
_uuid: Option<&str>,
|
||||
) -> Result<StoryResult> {
|
||||
let output_dir = Path::new(output_path).parent().unwrap_or(Path::new("."));
|
||||
let basename = Path::new(output_path)
|
||||
.file_stem()
|
||||
@@ -160,8 +167,7 @@ fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -
|
||||
let asr_data: AsrData = if asr_path.exists() {
|
||||
let content = std::fs::read_to_string(&asr_path)
|
||||
.with_context(|| format!("Failed to read {:?}", asr_path))?;
|
||||
serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse {:?}", asr_path))?
|
||||
serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", asr_path))?
|
||||
} else {
|
||||
AsrData { segments: vec![] }
|
||||
};
|
||||
@@ -169,8 +175,7 @@ fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -
|
||||
let cut_data: CutData = if cut_path.exists() {
|
||||
let content = std::fs::read_to_string(&cut_path)
|
||||
.with_context(|| format!("Failed to read {:?}", cut_path))?;
|
||||
serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse {:?}", cut_path))?
|
||||
serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", cut_path))?
|
||||
} else {
|
||||
CutData { scenes: vec![] }
|
||||
};
|
||||
@@ -376,22 +381,39 @@ fn generate_narrative(texts: &[String], objects: &[String], start: f64, end: f64
|
||||
let mut unique: Vec<&String> = objects.iter().collect();
|
||||
unique.sort();
|
||||
unique.dedup();
|
||||
let objs = unique.iter().take(5).map(|s| (*s).as_str()).collect::<Vec<_>>().join(", ");
|
||||
let objs = unique
|
||||
.iter()
|
||||
.take(5)
|
||||
.map(|s| (*s).as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
parts.push(format!("Visuals: {}", objs));
|
||||
}
|
||||
|
||||
format!("[{:.0}s-{:.0}s] {}", start, end, parts.join(" | "))
|
||||
}
|
||||
|
||||
fn generate_scene_narrative(objects: &[String], start: f64, end: f64, scene_count: usize) -> String {
|
||||
fn generate_scene_narrative(
|
||||
objects: &[String],
|
||||
start: f64,
|
||||
end: f64,
|
||||
scene_count: usize,
|
||||
) -> String {
|
||||
let mut unique: Vec<&String> = objects.iter().collect();
|
||||
unique.sort();
|
||||
unique.dedup();
|
||||
let top5: Vec<&String> = unique.iter().take(5).cloned().collect();
|
||||
|
||||
if !top5.is_empty() {
|
||||
let obj_str = top5.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ");
|
||||
format!("[{:.0}s-{:.0}s] {} scenes. Visuals: {}.", start, end, scene_count, obj_str)
|
||||
let obj_str = top5
|
||||
.iter()
|
||||
.map(|s| s.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
format!(
|
||||
"[{:.0}s-{:.0}s] {} scenes. Visuals: {}.",
|
||||
start, end, scene_count, obj_str
|
||||
)
|
||||
} else {
|
||||
format!("[{:.0}s-{:.0}s] {} video scenes.", start, end, scene_count)
|
||||
}
|
||||
@@ -408,7 +430,8 @@ mod tests {
|
||||
let text = generate_narrative(
|
||||
&["Hello world".to_string()],
|
||||
&["person".to_string()],
|
||||
0.0, 5.0,
|
||||
0.0,
|
||||
5.0,
|
||||
);
|
||||
assert!(text.contains("[0s-5s]"));
|
||||
assert!(text.contains("Speech:"));
|
||||
@@ -576,7 +599,10 @@ mod tests {
|
||||
};
|
||||
|
||||
assert_eq!(result.parent_chunks[0].child_chunk_ids.len(), 2);
|
||||
assert!(result.child_chunks.iter().all(|c| c.parent_chunk_id.is_some()));
|
||||
assert!(result
|
||||
.child_chunks
|
||||
.iter()
|
||||
.all(|c| c.parent_chunk_id.is_some()));
|
||||
assert!(result.parent_chunks[0].parent_chunk_id.is_none());
|
||||
}
|
||||
|
||||
@@ -594,11 +620,7 @@ mod tests {
|
||||
std::fs::write(&asr_path, r#"{"segments":[]}"#).unwrap();
|
||||
std::fs::write(&cut_path, r#"{"scenes":[]}"#).unwrap();
|
||||
|
||||
let result = try_native_story(
|
||||
"/dummy.mp4",
|
||||
out_path.to_str().unwrap(),
|
||||
None,
|
||||
).unwrap();
|
||||
let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap();
|
||||
|
||||
assert_eq!(result.stats.total_child_chunks, 0);
|
||||
assert_eq!(result.stats.total_parent_chunks, 0);
|
||||
@@ -616,13 +638,17 @@ mod tests {
|
||||
let cut_path = dir.join(format!("{}.cut.json", basename));
|
||||
let out_path = dir.join(format!("{}.story.json", basename));
|
||||
|
||||
std::fs::write(&asr_path, r#"{
|
||||
std::fs::write(
|
||||
&asr_path,
|
||||
r#"{
|
||||
"segments": [
|
||||
{"start": 0.0, "end": 2.5, "text": "Hello", "confidence": 0.95},
|
||||
{"start": 2.5, "end": 5.0, "text": "World", "confidence": 0.92},
|
||||
{"start": 5.0, "end": 7.5, "text": "Foo", "confidence": 0.90}
|
||||
]
|
||||
}"#).unwrap();
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
std::fs::write(&cut_path, r#"{
|
||||
"scenes": [
|
||||
@@ -631,11 +657,7 @@ mod tests {
|
||||
]
|
||||
}"#).unwrap();
|
||||
|
||||
let result = try_native_story(
|
||||
"/dummy.mp4",
|
||||
out_path.to_str().unwrap(),
|
||||
None,
|
||||
).unwrap();
|
||||
let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap();
|
||||
|
||||
assert_eq!(result.stats.asr_children, 3);
|
||||
assert_eq!(result.stats.cut_children, 2);
|
||||
@@ -649,7 +671,11 @@ mod tests {
|
||||
for child in &result.child_chunks {
|
||||
if child.source == "asr" {
|
||||
assert!(child.parent_chunk_id.is_some());
|
||||
assert!(child.parent_chunk_id.as_ref().unwrap().starts_with("story_asr_"));
|
||||
assert!(child
|
||||
.parent_chunk_id
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.starts_with("story_asr_"));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
703
src/core/processor/tkg.rs
Normal file
703
src/core/processor/tkg.rs
Normal file
@@ -0,0 +1,703 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::Deserialize;
|
||||
use sqlx::PgPool;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::core::db::postgres_db::PostgresDb;
|
||||
|
||||
fn t(name: &str) -> String {
|
||||
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
|
||||
if schema == "public" {
|
||||
name.to_string()
|
||||
} else {
|
||||
format!("{}.{}", schema, name)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Input data structs ────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct YoloJson {
|
||||
#[serde(default)]
|
||||
frames: HashMap<String, YoloFrameEntry>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct YoloFrameEntry {
|
||||
#[serde(default)]
|
||||
detections: Vec<YoloDetEntry>,
|
||||
#[serde(default)]
|
||||
objects: Vec<YoloDetEntry>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct YoloDetEntry {
|
||||
#[serde(default)]
|
||||
class_name: String,
|
||||
#[serde(default)]
|
||||
confidence: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrxJson {
|
||||
#[serde(default)]
|
||||
segments: Vec<AsrxSegmentEntry>,
|
||||
#[serde(default)]
|
||||
speaker_stats: Option<HashMap<String, AsrxSpeakerStat>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrxSegmentEntry {
|
||||
#[serde(default)]
|
||||
speaker_id: String,
|
||||
#[serde(default)]
|
||||
start_time: f64,
|
||||
#[serde(default)]
|
||||
end_time: f64,
|
||||
#[allow(dead_code)]
|
||||
start_frame: i64,
|
||||
#[allow(dead_code)]
|
||||
end_frame: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrxSpeakerStat {
|
||||
#[serde(default)]
|
||||
count: i64,
|
||||
}
|
||||
|
||||
// ── Face detection trace ──────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, sqlx::FromRow)]
|
||||
struct FaceTraceRow {
|
||||
trace_id: i64,
|
||||
frame_count: i64,
|
||||
start_f: i64,
|
||||
end_f: i64,
|
||||
avg_x: Option<f64>,
|
||||
avg_y: Option<f64>,
|
||||
avg_w: Option<f64>,
|
||||
avg_h: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, sqlx::FromRow)]
|
||||
struct FaceDetectionRow {
|
||||
trace_id: i64,
|
||||
frame_number: i64,
|
||||
#[allow(dead_code)]
|
||||
x: Option<f64>,
|
||||
#[allow(dead_code)]
|
||||
y: Option<f64>,
|
||||
#[allow(dead_code)]
|
||||
width: Option<f64>,
|
||||
#[allow(dead_code)]
|
||||
height: Option<f64>,
|
||||
}
|
||||
|
||||
// ── Public API ────────────────────────────────────────────────────
|
||||
|
||||
pub struct TkgResult {
|
||||
pub face_trace_nodes: usize,
|
||||
pub object_nodes: usize,
|
||||
pub speaker_nodes: usize,
|
||||
pub co_occurrence_edges: usize,
|
||||
pub speaker_face_edges: usize,
|
||||
pub face_face_edges: usize,
|
||||
}
|
||||
|
||||
pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Result<TkgResult> {
|
||||
let pool = db.pool();
|
||||
let n_face = build_face_trace_nodes(pool, file_uuid).await?;
|
||||
let n_objects = build_yolo_object_nodes(pool, file_uuid, output_dir).await?;
|
||||
let n_speakers = build_speaker_nodes(pool, file_uuid, output_dir).await?;
|
||||
|
||||
let e_co = build_co_occurrence_edges(pool, file_uuid, output_dir).await?;
|
||||
let e_sf = build_speaker_face_edges(pool, file_uuid, output_dir).await?;
|
||||
let e_ff = build_face_face_edges(pool, file_uuid).await?;
|
||||
|
||||
Ok(TkgResult {
|
||||
face_trace_nodes: n_face,
|
||||
object_nodes: n_objects,
|
||||
speaker_nodes: n_speakers,
|
||||
co_occurrence_edges: e_co,
|
||||
speaker_face_edges: e_sf,
|
||||
face_face_edges: e_ff,
|
||||
})
|
||||
}
|
||||
|
||||
// ── Node builders ─────────────────────────────────────────────────
|
||||
|
||||
async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
let face_table = t("face_detections");
|
||||
let nodes_table = t("tkg_nodes");
|
||||
|
||||
let rows = sqlx::query_as::<_, FaceTraceRow>(&format!(
|
||||
r#"
|
||||
SELECT trace_id,
|
||||
COUNT(*)::bigint as frame_count,
|
||||
MIN(frame_number) as start_f,
|
||||
MAX(frame_number) as end_f,
|
||||
AVG(x::float8) as avg_x,
|
||||
AVG(y::float8) as avg_y,
|
||||
AVG(width::float8) as avg_w,
|
||||
AVG(height::float8) as avg_h
|
||||
FROM {}
|
||||
WHERE file_uuid = $1 AND trace_id IS NOT NULL
|
||||
GROUP BY trace_id
|
||||
ORDER BY trace_id
|
||||
"#,
|
||||
face_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let mut count = 0;
|
||||
for row in &rows {
|
||||
let external_id = format!("trace_{}", row.trace_id);
|
||||
let label = format!("Face Trace {}", row.trace_id);
|
||||
let props = serde_json::json!({
|
||||
"frame_count": row.frame_count,
|
||||
"start_frame": row.start_f,
|
||||
"end_frame": row.end_f,
|
||||
"avg_bbox": {
|
||||
"x": row.avg_x.unwrap_or(0.0).round() as i64,
|
||||
"y": row.avg_y.unwrap_or(0.0).round() as i64,
|
||||
"width": row.avg_w.unwrap_or(0.0).round() as i64,
|
||||
"height": row.avg_h.unwrap_or(0.0).round() as i64,
|
||||
}
|
||||
});
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb)
|
||||
ON CONFLICT (file_uuid, node_type, external_id)
|
||||
DO UPDATE SET
|
||||
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties),
|
||||
label = COALESCE(NULLIF(EXCLUDED.label, ''), tkg_nodes.label)
|
||||
"#,
|
||||
nodes_table
|
||||
))
|
||||
.bind("face_trace")
|
||||
.bind(&external_id)
|
||||
.bind(file_uuid)
|
||||
.bind(&label)
|
||||
.bind(serde_json::to_string(&props)?)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
async fn build_yolo_object_nodes(
|
||||
pool: &PgPool,
|
||||
file_uuid: &str,
|
||||
output_dir: &str,
|
||||
) -> Result<usize> {
|
||||
let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid));
|
||||
if !yolo_path.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(&yolo_path)
|
||||
.with_context(|| format!("Failed to read {:?}", yolo_path))?;
|
||||
let yolo: YoloJson = serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse {:?}", yolo_path))?;
|
||||
|
||||
let mut class_counts: HashMap<String, i64> = HashMap::new();
|
||||
for fdata in yolo.frames.values() {
|
||||
let dets = if !fdata.detections.is_empty() {
|
||||
&fdata.detections
|
||||
} else {
|
||||
&fdata.objects
|
||||
};
|
||||
for det in dets {
|
||||
*class_counts.entry(det.class_name.clone()).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let mut count = 0;
|
||||
for (cls, cnt) in &class_counts {
|
||||
let props = serde_json::json!({ "total_detections": cnt });
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb)
|
||||
ON CONFLICT (file_uuid, node_type, external_id)
|
||||
DO UPDATE SET
|
||||
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)
|
||||
"#,
|
||||
nodes_table
|
||||
))
|
||||
.bind("object")
|
||||
.bind(cls)
|
||||
.bind(file_uuid)
|
||||
.bind(cls)
|
||||
.bind(serde_json::to_string(&props)?)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
async fn build_speaker_nodes(pool: &PgPool, file_uuid: &str, output_dir: &str) -> Result<usize> {
|
||||
let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid));
|
||||
if !asrx_path.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(&asrx_path)
|
||||
.with_context(|| format!("Failed to read {:?}", asrx_path))?;
|
||||
let asrx: AsrxJson = serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse {:?}", asrx_path))?;
|
||||
|
||||
let stats = asrx.speaker_stats.unwrap_or_default();
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let mut count = 0;
|
||||
|
||||
for (sid, stat) in &stats {
|
||||
let props = serde_json::json!({ "segment_count": stat.count });
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb)
|
||||
ON CONFLICT (file_uuid, node_type, external_id)
|
||||
DO UPDATE SET
|
||||
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)
|
||||
"#,
|
||||
nodes_table
|
||||
))
|
||||
.bind("speaker")
|
||||
.bind(sid)
|
||||
.bind(file_uuid)
|
||||
.bind(sid)
|
||||
.bind(serde_json::to_string(&props)?)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
// ── Edge builders ─────────────────────────────────────────────────
|
||||
|
||||
async fn build_co_occurrence_edges(
|
||||
pool: &PgPool,
|
||||
file_uuid: &str,
|
||||
output_dir: &str,
|
||||
) -> Result<usize> {
|
||||
let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid));
|
||||
if !yolo_path.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(&yolo_path)?;
|
||||
let yolo: YoloJson = serde_json::from_str(&content)?;
|
||||
|
||||
let face_table = t("face_detections");
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let edges_table = t("tkg_edges");
|
||||
|
||||
let face_rows = sqlx::query_as::<_, FaceDetectionRow>(&format!(
|
||||
r#"SELECT trace_id, frame_number, x, y, width, height
|
||||
FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL
|
||||
ORDER BY frame_number"#,
|
||||
face_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let mut edge_count = 0;
|
||||
for face in &face_rows {
|
||||
let frame_str = face.frame_number.to_string();
|
||||
let yolo_frame = match yolo.frames.get(&frame_str) {
|
||||
Some(f) => f,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let dets = if !yolo_frame.detections.is_empty() {
|
||||
&yolo_frame.detections
|
||||
} else {
|
||||
&yolo_frame.objects
|
||||
};
|
||||
|
||||
if dets.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let external_id = format!("trace_{}", face.trace_id);
|
||||
let face_node: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
|
||||
nodes_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&external_id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let face_node_id = match face_node {
|
||||
Some((id,)) => id,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
for det in dets {
|
||||
let obj_node: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='object' AND external_id=$2",
|
||||
nodes_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&det.class_name)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let obj_node_id = match obj_node {
|
||||
Some((id,)) => id,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let edge_props = serde_json::json!({
|
||||
"frame": face.frame_number,
|
||||
"object_confidence": det.confidence,
|
||||
});
|
||||
|
||||
if let Err(e) = sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb)
|
||||
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
|
||||
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
|
||||
"#,
|
||||
edges_table
|
||||
))
|
||||
.bind("CO_OCCURS_WITH")
|
||||
.bind(face_node_id)
|
||||
.bind(obj_node_id)
|
||||
.bind(file_uuid)
|
||||
.bind(serde_json::to_string(&edge_props)?)
|
||||
.execute(pool)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
"[TKG] Edge insert failed (trace={}, obj={}): {}",
|
||||
face.trace_id,
|
||||
det.class_name,
|
||||
e
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
edge_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(edge_count)
|
||||
}
|
||||
|
||||
async fn build_speaker_face_edges(
|
||||
pool: &PgPool,
|
||||
file_uuid: &str,
|
||||
output_dir: &str,
|
||||
) -> Result<usize> {
|
||||
let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid));
|
||||
if !asrx_path.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(&asrx_path)?;
|
||||
let asrx: AsrxJson = serde_json::from_str(&content)?;
|
||||
|
||||
if asrx.segments.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let face_table = t("face_detections");
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let edges_table = t("tkg_edges");
|
||||
|
||||
let traces = sqlx::query_as::<_, (i64, i64, i64)>(&format!(
|
||||
r#"SELECT trace_id, MIN(frame_number) as start_f, MAX(frame_number) as end_f
|
||||
FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL
|
||||
GROUP BY trace_id"#,
|
||||
face_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
// Calculate fps from last segment
|
||||
let last = asrx.segments.last().unwrap();
|
||||
let fps = if last.end_time > 0.0 {
|
||||
last.end_frame as f64 / last.end_time
|
||||
} else {
|
||||
30.0
|
||||
};
|
||||
|
||||
let mut edge_count = 0;
|
||||
|
||||
for (tid, sf, ef) in &traces {
|
||||
let face_ext_id = format!("trace_{}", tid);
|
||||
let face_node: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
|
||||
nodes_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&face_ext_id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let face_node_id = match face_node {
|
||||
Some((id,)) => id,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let face_start_sec = *sf as f64 / fps;
|
||||
let face_end_sec = *ef as f64 / fps;
|
||||
|
||||
for seg in &asrx.segments {
|
||||
let seg_start = seg.start_time;
|
||||
let seg_end = seg.end_time;
|
||||
let overlap_start = face_start_sec.max(seg_start);
|
||||
let overlap_end = face_end_sec.min(seg_end);
|
||||
|
||||
if overlap_start >= overlap_end {
|
||||
continue;
|
||||
}
|
||||
|
||||
let overlap_dur = overlap_end - overlap_start;
|
||||
let face_dur = face_end_sec - face_start_sec;
|
||||
if face_dur <= 0.0 {
|
||||
continue;
|
||||
}
|
||||
let overlap_ratio = overlap_dur / face_dur;
|
||||
|
||||
if overlap_ratio < 0.3 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let speaker_node: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='speaker' AND external_id=$2",
|
||||
nodes_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&seg.speaker_id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let speaker_node_id = match speaker_node {
|
||||
Some((id,)) => id,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let edge_props = serde_json::json!({
|
||||
"overlap_ratio": (overlap_ratio * 1000.0).round() / 1000.0,
|
||||
"overlap_duration_s": (overlap_dur * 10.0).round() / 10.0,
|
||||
"face_time_range": format!("{:.1}-{:.1}s", face_start_sec, face_end_sec),
|
||||
"speaker_time_range": format!("{:.1}-{:.1}s", seg_start, seg_end),
|
||||
});
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb)
|
||||
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
|
||||
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
|
||||
"#,
|
||||
edges_table
|
||||
))
|
||||
.bind("SPEAKS_AS")
|
||||
.bind(face_node_id)
|
||||
.bind(speaker_node_id)
|
||||
.bind(file_uuid)
|
||||
.bind(serde_json::to_string(&edge_props)?)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
edge_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(edge_count)
|
||||
}
|
||||
|
||||
async fn build_face_face_edges(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
let face_table = t("face_detections");
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let edges_table = t("tkg_edges");
|
||||
|
||||
let rows: Vec<(i64, i64, i64)> = sqlx::query_as(&format!(
|
||||
r#"
|
||||
SELECT a.trace_id AS tid_a, b.trace_id AS tid_b, a.frame_number
|
||||
FROM {} a
|
||||
JOIN {} b
|
||||
ON a.file_uuid = b.file_uuid
|
||||
AND a.frame_number = b.frame_number
|
||||
AND a.trace_id < b.trace_id
|
||||
WHERE a.file_uuid = $1
|
||||
AND a.trace_id IS NOT NULL
|
||||
AND b.trace_id IS NOT NULL
|
||||
ORDER BY a.frame_number
|
||||
"#,
|
||||
face_table, face_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
if rows.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Deduplicate by pair
|
||||
let mut pair_frames: HashMap<(i64, i64), Vec<i64>> = HashMap::new();
|
||||
for (tid_a, tid_b, frame) in &rows {
|
||||
let key = if *tid_a < *tid_b {
|
||||
(*tid_a, *tid_b)
|
||||
} else {
|
||||
(*tid_b, *tid_a)
|
||||
};
|
||||
pair_frames.entry(key).or_default().push(*frame);
|
||||
}
|
||||
|
||||
let mut edge_count = 0;
|
||||
for ((tid_a, tid_b), frames) in &pair_frames {
|
||||
let ext_a = format!("trace_{}", tid_a);
|
||||
let ext_b = format!("trace_{}", tid_b);
|
||||
|
||||
let n_a: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
|
||||
nodes_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&ext_a)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let n_b: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
|
||||
nodes_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&ext_b)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let (n_a_id, n_b_id) = match (n_a, n_b) {
|
||||
(Some((a,)), Some((b,))) => (a, b),
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let edge_props = serde_json::json!({
|
||||
"first_frame": frames[0],
|
||||
"frame_count": frames.len() as i64,
|
||||
});
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb)
|
||||
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
|
||||
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
|
||||
"#,
|
||||
edges_table
|
||||
))
|
||||
.bind("CO_OCCURS_WITH")
|
||||
.bind(n_a_id)
|
||||
.bind(n_b_id)
|
||||
.bind(file_uuid)
|
||||
.bind(serde_json::to_string(&edge_props)?)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
edge_count += 1;
|
||||
}
|
||||
|
||||
Ok(edge_count)
|
||||
}
|
||||
|
||||
// ── Tests ─────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_yolo_json_deserialize() {
|
||||
let json = r#"{
|
||||
"frames": {
|
||||
"1": {"time_seconds": 0.0, "detections": [{"class_name": "person", "confidence": 0.9}]},
|
||||
"2": {"time_seconds": 1.0, "detections": [{"class_name": "chair", "confidence": 0.8}]}
|
||||
}
|
||||
}"#;
|
||||
let yolo: YoloJson = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(yolo.frames.len(), 2);
|
||||
assert_eq!(yolo.frames["1"].detections[0].class_name, "person");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_yolo_json_empty_frames() {
|
||||
let json = r#"{"frames": {}}"#;
|
||||
let yolo: YoloJson = serde_json::from_str(json).unwrap();
|
||||
assert!(yolo.frames.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asrx_json_deserialize() {
|
||||
let json = r#"{
|
||||
"segments": [
|
||||
{"speaker_id": "SPEAKER_01", "start_time": 0.0, "end_time": 2.0, "start_frame": 0, "end_frame": 60}
|
||||
],
|
||||
"speaker_stats": {"SPEAKER_01": {"count": 1}}
|
||||
}"#;
|
||||
let asrx: AsrxJson = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(asrx.segments.len(), 1);
|
||||
assert_eq!(asrx.segments[0].speaker_id, "SPEAKER_01");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asrx_json_no_stats() {
|
||||
let json = r#"{"segments": []}"#;
|
||||
let asrx: AsrxJson = serde_json::from_str(json).unwrap();
|
||||
assert!(asrx.speaker_stats.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_yolo_objects_fallback() {
|
||||
let json = r#"{
|
||||
"frames": {
|
||||
"1": {"objects": [{"class_name": "person"}]}
|
||||
}
|
||||
}"#;
|
||||
let yolo: YoloJson = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(yolo.frames["1"].objects[0].class_name, "person");
|
||||
assert!(yolo.frames["1"].detections.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tkg_result() {
|
||||
let r = TkgResult {
|
||||
face_trace_nodes: 5,
|
||||
object_nodes: 10,
|
||||
speaker_nodes: 3,
|
||||
co_occurrence_edges: 20,
|
||||
speaker_face_edges: 8,
|
||||
face_face_edges: 4,
|
||||
};
|
||||
assert_eq!(r.face_trace_nodes, 5);
|
||||
assert_eq!(r.object_nodes, 10);
|
||||
assert_eq!(r.speaker_nodes, 3);
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
use anyhow::Result;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::io::Read;
|
||||
use std::path::Path;
|
||||
use anyhow::Result;
|
||||
|
||||
/// Compute SHA256 of the entire file content
|
||||
pub fn compute_sha256(path: &Path) -> Result<String> {
|
||||
@@ -10,7 +10,9 @@ pub fn compute_sha256(path: &Path) -> Result<String> {
|
||||
let mut buf = [0u8; 65536];
|
||||
loop {
|
||||
let n = file.read(&mut buf)?;
|
||||
if n == 0 { break; }
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buf[..n]);
|
||||
}
|
||||
let hash = format!("{:x}", hasher.finalize());
|
||||
|
||||
@@ -65,7 +65,11 @@ pub fn tmdb_cache_path(file_uuid: &str) -> PathBuf {
|
||||
pub fn read_tmdb_cache(file_uuid: &str) -> Result<TmdbCache> {
|
||||
let path = tmdb_cache_path(file_uuid);
|
||||
if !path.exists() {
|
||||
anyhow::bail!("TMDb cache not found: {} (expected: {})", file_uuid, path.display());
|
||||
anyhow::bail!(
|
||||
"TMDb cache not found: {} (expected: {})",
|
||||
file_uuid,
|
||||
path.display()
|
||||
);
|
||||
}
|
||||
let content = std::fs::read_to_string(&path)
|
||||
.with_context(|| format!("Failed to read TMDb cache: {}", path.display()))?;
|
||||
@@ -96,9 +100,7 @@ pub fn count_cache_files() -> usize {
|
||||
match std::fs::read_dir(&dir) {
|
||||
Ok(entries) => entries
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| {
|
||||
e.file_name().to_string_lossy().ends_with(".tmdb.json")
|
||||
})
|
||||
.filter(|e| e.file_name().to_string_lossy().ends_with(".tmdb.json"))
|
||||
.count(),
|
||||
Err(_) => 0,
|
||||
}
|
||||
|
||||
@@ -46,11 +46,12 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
|
||||
|
||||
// Step 2: Load face_detections grouped by trace_id
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
|
||||
&format!("SELECT trace_id, embedding FROM {} \
|
||||
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(&format!(
|
||||
"SELECT trace_id, embedding FROM {} \
|
||||
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
|
||||
ORDER BY trace_id", fd_table),
|
||||
)
|
||||
ORDER BY trace_id",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
@@ -156,9 +157,10 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let mut after_qc = HashMap::new();
|
||||
for (&tid, &(id, ref name)) in &matched {
|
||||
let cnt: i64 = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2", fd_table),
|
||||
)
|
||||
let cnt: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(tid)
|
||||
.fetch_one(pool)
|
||||
@@ -194,9 +196,10 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
|
||||
// Step 5: Update DB
|
||||
let mut updated = 0usize;
|
||||
for (&tid, &(id, _)) in &matched {
|
||||
let r = sqlx::query(
|
||||
&format!("UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", fd_table),
|
||||
)
|
||||
let r = sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
|
||||
fd_table
|
||||
))
|
||||
.bind(id)
|
||||
.bind(file_uuid)
|
||||
.bind(tid)
|
||||
@@ -223,9 +226,8 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
|
||||
async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str) -> Result<usize> {
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
// Find all collision pairs: same identity, same frame, different trace
|
||||
let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>(
|
||||
&format!(
|
||||
"SELECT a.identity_id, a.trace_id, b.trace_id, a.frame_number \
|
||||
let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>(&format!(
|
||||
"SELECT a.identity_id, a.trace_id, b.trace_id, a.frame_number \
|
||||
FROM {} a \
|
||||
JOIN {} b \
|
||||
ON a.file_uuid = b.file_uuid \
|
||||
@@ -235,9 +237,8 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
|
||||
AND a.identity_id IS NOT NULL \
|
||||
AND a.identity_id = b.identity_id \
|
||||
ORDER BY a.identity_id, a.frame_number",
|
||||
fd_table, fd_table
|
||||
),
|
||||
)
|
||||
fd_table, fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
@@ -256,25 +257,36 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
|
||||
let mut unbound = 0usize;
|
||||
for ((id, ta, tb), overlap_frames) in &collision_groups {
|
||||
// Get face detection count for each trace
|
||||
let cnt_a: i64 = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", fd_table)
|
||||
)
|
||||
.bind(file_uuid).bind(ta).bind(id)
|
||||
.fetch_one(pool).await.unwrap_or(0);
|
||||
let cnt_a: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(ta)
|
||||
.bind(id)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
|
||||
let cnt_b: i64 = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", fd_table)
|
||||
)
|
||||
.bind(file_uuid).bind(tb).bind(id)
|
||||
.fetch_one(pool).await.unwrap_or(0);
|
||||
let cnt_b: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(tb)
|
||||
.bind(id)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
|
||||
// Unbind the trace with fewer detections (likely the false positive)
|
||||
let victim = if cnt_a <= cnt_b { *ta } else { *tb };
|
||||
let victim_cnt = if cnt_a <= cnt_b { cnt_a } else { cnt_b };
|
||||
|
||||
sqlx::query(
|
||||
&format!("UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2", fd_table),
|
||||
)
|
||||
sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(victim)
|
||||
.execute(pool)
|
||||
|
||||
@@ -45,7 +45,14 @@ fn extract_movie_name(filename: &str) -> Option<String> {
|
||||
.file_stem()
|
||||
.and_then(|s| s.to_str())?;
|
||||
|
||||
let cleaned = name.replace(['.', '_'], " ").trim().to_string();
|
||||
// Take only the part before year patterns or separators
|
||||
let cleaned = name
|
||||
.replace(['.', '_'], " ")
|
||||
.split(|c: char| c == '(' || c == '[' || c == '│' || c == '|')
|
||||
.next()
|
||||
.unwrap_or(&name)
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
if cleaned.is_empty() || cleaned.len() < 3 {
|
||||
return None;
|
||||
@@ -53,10 +60,7 @@ fn extract_movie_name(filename: &str) -> Option<String> {
|
||||
Some(cleaned)
|
||||
}
|
||||
|
||||
pub async fn probe_from_cache(
|
||||
db: &PostgresDb,
|
||||
file_uuid: &str,
|
||||
) -> Result<TmdbProbeResult> {
|
||||
pub async fn probe_from_cache(db: &PostgresDb, file_uuid: &str) -> Result<TmdbProbeResult> {
|
||||
let cache = crate::core::tmdb::cache::read_tmdb_cache(file_uuid)?;
|
||||
if cache.identities.is_empty() && !cache.cast.is_empty() {
|
||||
return create_identities_from_data(db, file_uuid, &cache.movie, &cache.cast).await;
|
||||
@@ -83,7 +87,8 @@ async fn upsert_identities_from_disk(
|
||||
}
|
||||
match std::fs::read_to_string(&path) {
|
||||
Ok(content) => {
|
||||
match serde_json::from_str::<crate::core::identity::storage::IdentityFile>(&content) {
|
||||
match serde_json::from_str::<crate::core::identity::storage::IdentityFile>(&content)
|
||||
{
|
||||
Ok(identity_file) => {
|
||||
let identities_table = crate::core::db::schema::table_name("identities");
|
||||
let result = sqlx::query(&format!(
|
||||
@@ -106,21 +111,35 @@ async fn upsert_identities_from_disk(
|
||||
|
||||
match result {
|
||||
Ok(_) => {
|
||||
info!("[TMDB] Upserted identity: {} (uuid={})", identity_file.name, identity_file.identity_uuid);
|
||||
info!(
|
||||
"[TMDB] Upserted identity: {} (uuid={})",
|
||||
identity_file.name, identity_file.identity_uuid
|
||||
);
|
||||
identities_created += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("[TMDB] Failed to upsert identity '{}': {}", identity_file.name, e);
|
||||
warn!(
|
||||
"[TMDB] Failed to upsert identity '{}': {}",
|
||||
identity_file.name, e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("[TMDB] Failed to parse identity file {}: {}", path.display(), e);
|
||||
warn!(
|
||||
"[TMDB] Failed to parse identity file {}: {}",
|
||||
path.display(),
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("[TMDB] Failed to read identity file {}: {}", path.display(), e);
|
||||
warn!(
|
||||
"[TMDB] Failed to read identity file {}: {}",
|
||||
path.display(),
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -181,7 +200,9 @@ pub async fn create_identities_from_data(
|
||||
continue;
|
||||
}
|
||||
|
||||
let profile_url = member.profile_path.as_ref()
|
||||
let profile_url = member
|
||||
.profile_path
|
||||
.as_ref()
|
||||
.map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));
|
||||
|
||||
let metadata = serde_json::json!({
|
||||
@@ -226,8 +247,13 @@ pub async fn create_identities_from_data(
|
||||
member.name, member.character, uuid_str
|
||||
);
|
||||
identities_created += 1;
|
||||
if let Err(e) = crate::core::identity::storage::save_identity_file(db, &uuid_str).await {
|
||||
warn!("[TMDB] Failed to save identity file for {}: {}", member.name, e);
|
||||
if let Err(e) =
|
||||
crate::core::identity::storage::save_identity_file(db, &uuid_str).await
|
||||
{
|
||||
warn!(
|
||||
"[TMDB] Failed to save identity file for {}: {}",
|
||||
member.name, e
|
||||
);
|
||||
}
|
||||
// Download and save TMDb profile image locally
|
||||
if let Some(url) = &profile_url {
|
||||
@@ -393,8 +419,10 @@ pub async fn probe_movie(
|
||||
overview: movie.overview.clone(),
|
||||
poster_path: movie.poster_path.clone(),
|
||||
};
|
||||
let cache_cast: Vec<cache::TmdbCastMember> = credits.cast.iter().map(|m| {
|
||||
cache::TmdbCastMember {
|
||||
let cache_cast: Vec<cache::TmdbCastMember> = credits
|
||||
.cast
|
||||
.iter()
|
||||
.map(|m| cache::TmdbCastMember {
|
||||
id: m.id,
|
||||
name: m.name.clone(),
|
||||
character: m.character.clone(),
|
||||
@@ -410,8 +438,8 @@ pub async fn probe_movie(
|
||||
deathday: None,
|
||||
gender: None,
|
||||
homepage: None,
|
||||
}
|
||||
}).collect();
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Write TMDb cache so probe_from_cache can be used next time
|
||||
let cache_obj = cache::TmdbCache {
|
||||
|
||||
@@ -60,7 +60,11 @@ pub async fn check_tmdb_api() -> TmdbResourceStatus {
|
||||
enabled: *config::tmdb::PROBE_ENABLED,
|
||||
api_reachable: Some(reachable),
|
||||
api_latency_ms: Some(latency),
|
||||
api_error: if reachable { None } else { Some(format!("HTTP {}", resp.status())) },
|
||||
api_error: if reachable {
|
||||
None
|
||||
} else {
|
||||
Some(format!("HTTP {}", resp.status()))
|
||||
},
|
||||
last_check_at: Some(chrono::Utc::now().to_rfc3339()),
|
||||
}
|
||||
}
|
||||
@@ -84,9 +88,10 @@ pub fn count_cache_files() -> usize {
|
||||
|
||||
pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result<i64> {
|
||||
let identities_table = crate::core::db::schema::table_name("identities");
|
||||
let count: i64 = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", identities_table)
|
||||
)
|
||||
let count: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE source = 'tmdb'",
|
||||
identities_table
|
||||
))
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
Ok(count)
|
||||
@@ -94,9 +99,10 @@ pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result<i64> {
|
||||
|
||||
pub async fn count_tmdb_identities_with_embedding(pool: &sqlx::PgPool) -> Result<i64> {
|
||||
let identities_table = crate::core::db::schema::table_name("identities");
|
||||
let count: i64 = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL", identities_table)
|
||||
)
|
||||
let count: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL",
|
||||
identities_table
|
||||
))
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
Ok(count)
|
||||
|
||||
@@ -147,7 +147,7 @@ impl ChunkSelector {
|
||||
|
||||
// Try to match UUID - either exact match or partial match
|
||||
let _uuid = payload
|
||||
.and_then(|p| p.get("uuid"))
|
||||
.and_then(|p| p.get("file_uuid"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
|
||||
|
||||
@@ -8,10 +8,10 @@ use tracing::{info, warn};
|
||||
|
||||
use momentry_core::core::api_key::{ApiKeyService, ApiKeyType};
|
||||
use momentry_core::core::chunk::types::{Chunk, ChunkRule, ChunkType};
|
||||
use momentry_core::core::db::schema;
|
||||
use momentry_core::core::db::Database;
|
||||
use momentry_core::core::time::FrameTime;
|
||||
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
|
||||
use momentry_core::core::db::schema;
|
||||
use momentry_core::{
|
||||
Embedder, OutputDir, PostgresDb, QdrantDb, RedisClient, VectorPayload, VideoRecord, VideoStatus,
|
||||
};
|
||||
@@ -1985,7 +1985,8 @@ async fn main() -> Result<()> {
|
||||
chunk_id: None,
|
||||
created_at: String::new(),
|
||||
};
|
||||
db.store_pre_chunk(&uuid, "asr", serde_json::to_value(&pre_chunk)?).await?;
|
||||
db.store_pre_chunk(&uuid, "asr", serde_json::to_value(&pre_chunk)?)
|
||||
.await?;
|
||||
asr_pre_chunk_ids.push(i as i64);
|
||||
}
|
||||
|
||||
@@ -2009,7 +2010,8 @@ async fn main() -> Result<()> {
|
||||
chunk_id: None,
|
||||
created_at: String::new(),
|
||||
};
|
||||
db.store_pre_chunk(&uuid, "cut", serde_json::to_value(&pre_chunk)?).await?;
|
||||
db.store_pre_chunk(&uuid, "cut", serde_json::to_value(&pre_chunk)?)
|
||||
.await?;
|
||||
cut_pre_chunk_ids.push(i as i64);
|
||||
}
|
||||
|
||||
@@ -2037,7 +2039,8 @@ async fn main() -> Result<()> {
|
||||
chunk_id: None,
|
||||
created_at: String::new(),
|
||||
};
|
||||
db.store_pre_chunk(&uuid, "time", serde_json::to_value(&pre_chunk)?).await?;
|
||||
db.store_pre_chunk(&uuid, "time", serde_json::to_value(&pre_chunk)?)
|
||||
.await?;
|
||||
time_pre_chunk_ids.push(time_pre_chunk_ids.len() as i64);
|
||||
time_start = time_end;
|
||||
}
|
||||
@@ -2117,7 +2120,8 @@ async fn main() -> Result<()> {
|
||||
frame_path: None,
|
||||
created_at: String::new(),
|
||||
};
|
||||
db.store_frame(&uuid, *frame_num as i64, serde_json::to_value(&frame)?).await?;
|
||||
db.store_frame(&uuid, *frame_num as i64, serde_json::to_value(&frame)?)
|
||||
.await?;
|
||||
}
|
||||
|
||||
println!("Stored {} frames", all_frames.len());
|
||||
@@ -2357,8 +2361,7 @@ async fn main() -> Result<()> {
|
||||
for frame in &context_frames {
|
||||
if let Some(objects) = frame["yolo_objects"].as_array() {
|
||||
for obj in objects {
|
||||
if let Some(class_name) =
|
||||
obj.get("class_name").and_then(|v| v.as_str())
|
||||
if let Some(class_name) = obj.get("class_name").and_then(|v| v.as_str())
|
||||
{
|
||||
*all_objects.entry(class_name.to_string()).or_insert(0) += 1;
|
||||
}
|
||||
@@ -2494,9 +2497,11 @@ async fn main() -> Result<()> {
|
||||
}
|
||||
|
||||
let qdrant_payload = VectorPayload {
|
||||
uuid: chunk.uuid.clone(),
|
||||
file_uuid: chunk.uuid.clone(),
|
||||
chunk_id: chunk.chunk_id.clone(),
|
||||
chunk_type: "sentence".to_string(),
|
||||
start_frame: chunk.start_frame,
|
||||
end_frame: chunk.end_frame,
|
||||
start_time: chunk.start_time().seconds(),
|
||||
end_time: chunk.end_time().seconds(),
|
||||
text: Some(text.to_string()),
|
||||
|
||||
@@ -79,12 +79,8 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
|
||||
None => VerificationResult::ok(proc_name, file_uuid),
|
||||
}
|
||||
}
|
||||
ProcessorType::Yolo => {
|
||||
VerificationResult::ok(proc_name, file_uuid)
|
||||
}
|
||||
ProcessorType::Face => {
|
||||
VerificationResult::ok(proc_name, file_uuid)
|
||||
}
|
||||
ProcessorType::Yolo => VerificationResult::ok(proc_name, file_uuid),
|
||||
ProcessorType::Face => VerificationResult::ok(proc_name, file_uuid),
|
||||
ProcessorType::Ocr => {
|
||||
let frames = value.get("frames").and_then(|v| v.as_array());
|
||||
match frames {
|
||||
@@ -114,7 +110,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
|
||||
ProcessorType::FiveW1H => {
|
||||
let scenes = value.get("scenes").and_then(|v| v.as_array());
|
||||
match scenes {
|
||||
Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 scenes"),
|
||||
Some(s) if s.is_empty() => {
|
||||
VerificationResult::fail(proc_name, file_uuid, "0 scenes")
|
||||
}
|
||||
Some(_) => VerificationResult::ok(proc_name, file_uuid),
|
||||
None => VerificationResult::ok(proc_name, file_uuid),
|
||||
}
|
||||
|
||||
@@ -37,7 +37,8 @@ pub async fn run_watcher() -> Result<()> {
|
||||
info!("Watch directories: {:?}", dirs);
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut interval = time::interval(std::time::Duration::from_millis(config.poll_interval_ms));
|
||||
let mut interval =
|
||||
time::interval(std::time::Duration::from_millis(config.poll_interval_ms));
|
||||
let mut known = std::collections::HashSet::new();
|
||||
loop {
|
||||
interval.tick().await;
|
||||
@@ -109,15 +110,43 @@ async fn auto_register_file(file_path: &str) {
|
||||
}
|
||||
};
|
||||
|
||||
let file_name = pre.get("file_name").and_then(|v| v.as_str()).unwrap_or("unknown").to_string();
|
||||
let file_name = pre
|
||||
.get("file_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
let probe = pre.get("probe_json").cloned().unwrap_or_default();
|
||||
let file_type = pre.get("file_type").and_then(|v| v.as_str()).unwrap_or("unknown").to_string();
|
||||
let canonical_path = pre.get("file_path").and_then(|v| v.as_str()).unwrap_or(file_path).to_string();
|
||||
let file_type = pre
|
||||
.get("file_type")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
let canonical_path = pre
|
||||
.get("file_path")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or(file_path)
|
||||
.to_string();
|
||||
|
||||
let duration = probe.get("format").and_then(|f| f.get("duration")).and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let width = probe.get("format").and_then(|f| f.get("width")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
|
||||
let height = probe.get("format").and_then(|f| f.get("height")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
|
||||
let fps_val = probe.get("format").and_then(|f| f.get("fps")).and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let duration = probe
|
||||
.get("format")
|
||||
.and_then(|f| f.get("duration"))
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0);
|
||||
let width = probe
|
||||
.get("format")
|
||||
.and_then(|f| f.get("width"))
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(0) as u32;
|
||||
let height = probe
|
||||
.get("format")
|
||||
.and_then(|f| f.get("height"))
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(0) as u32;
|
||||
let fps_val = probe
|
||||
.get("format")
|
||||
.and_then(|f| f.get("fps"))
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0);
|
||||
|
||||
let record = VideoRecord {
|
||||
id: 0,
|
||||
@@ -158,7 +187,10 @@ async fn auto_register_file(file_path: &str) {
|
||||
|
||||
match db.register_video(&record).await {
|
||||
Ok(id) => info!("[WATCHER] Auto-registered {} (id={})", record.file_uuid, id),
|
||||
Err(e) => warn!("[WATCHER] Auto-register failed for {}: {}", record.file_uuid, e),
|
||||
Err(e) => warn!(
|
||||
"[WATCHER] Auto-register failed for {}: {}",
|
||||
record.file_uuid, e
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,10 +207,14 @@ pub async fn pre_process_file(file_path: &str) -> Option<String> {
|
||||
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
|
||||
.unwrap_or_else(|_| "/Users/accusys/momentry/output_dev".to_string());
|
||||
|
||||
let birthday = std::fs::metadata(&path).ok()
|
||||
let birthday = std::fs::metadata(&path)
|
||||
.ok()
|
||||
.and_then(|m| m.modified().ok())
|
||||
.map(|t| {
|
||||
let secs = t.duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_secs();
|
||||
let secs = t
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs();
|
||||
chrono::DateTime::from_timestamp(secs as i64, 0)
|
||||
.map(|dt| dt.to_rfc3339())
|
||||
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339())
|
||||
@@ -186,9 +222,8 @@ pub async fn pre_process_file(file_path: &str) -> Option<String> {
|
||||
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339());
|
||||
|
||||
let mac = crate::core::storage::uuid::get_mac_address();
|
||||
let file_uuid = crate::core::storage::uuid::compute_birth_uuid(
|
||||
&mac, &birthday, &canonical_str, &filename,
|
||||
);
|
||||
let file_uuid =
|
||||
crate::core::storage::uuid::compute_birth_uuid(&mac, &birthday, &canonical_str, &filename);
|
||||
|
||||
let pre_path = std::path::PathBuf::from(&output_dir).join(format!("{}.pre.json", file_uuid));
|
||||
if pre_path.exists() {
|
||||
@@ -198,15 +233,22 @@ pub async fn pre_process_file(file_path: &str) -> Option<String> {
|
||||
|
||||
info!("[PRE-PROCESS] Pre-processing: {} → {}", filename, file_uuid);
|
||||
|
||||
let content_hash = crate::core::storage::content_hash::compute_sha256(&path).unwrap_or_default();
|
||||
let content_hash =
|
||||
crate::core::storage::content_hash::compute_sha256(&path).unwrap_or_default();
|
||||
|
||||
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
|
||||
.unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string());
|
||||
let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
|
||||
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
|
||||
let probe_json = crate::core::probe::unified::unified_probe(&path, &scripts_dir, &python_path).await;
|
||||
let probe_json =
|
||||
crate::core::probe::unified::unified_probe(&path, &scripts_dir, &python_path).await;
|
||||
|
||||
let file_type = probe_json.get("format").and_then(|f| f.get("file_type")).and_then(|v| v.as_str()).unwrap_or("unknown").to_string();
|
||||
let file_type = probe_json
|
||||
.get("format")
|
||||
.and_then(|f| f.get("file_type"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
|
||||
let pre_data = serde_json::json!({
|
||||
"file_name": filename,
|
||||
|
||||
@@ -12,12 +12,13 @@ use crate::core::chunk::{rule1_ingest, rule3_ingest};
|
||||
use crate::core::config::OUTPUT_DIR;
|
||||
use crate::core::db::qdrant_db::QdrantDb;
|
||||
use crate::core::db::{
|
||||
schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus,
|
||||
schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload,
|
||||
VideoStatus,
|
||||
};
|
||||
use crate::core::embedding::Embedder;
|
||||
use crate::core::processor::heuristic_scene::generate_scene_meta;
|
||||
use crate::worker::config::WorkerConfig;
|
||||
use crate::worker::processor::{ProcessorPool, ProcessorTask};
|
||||
use crate::core::processor::heuristic_scene::generate_scene_meta;
|
||||
use crate::worker::resources::SystemResources;
|
||||
use sqlx::PgPool;
|
||||
|
||||
@@ -70,14 +71,15 @@ impl JobWorker {
|
||||
// Reset stale running jobs: jobs stuck in 'running' with no active processor results
|
||||
let monitor_jobs_table = schema::table_name("monitor_jobs");
|
||||
let processor_results_table = schema::table_name("processor_results");
|
||||
if let Err(e) = sqlx::query(
|
||||
&format!("UPDATE {} SET status = 'pending', updated_at = NOW()
|
||||
if let Err(e) = sqlx::query(&format!(
|
||||
"UPDATE {} SET status = 'pending', updated_at = NOW()
|
||||
WHERE status = 'running'
|
||||
AND id NOT IN (
|
||||
SELECT DISTINCT job_id FROM {}
|
||||
WHERE status IN ('pending', 'running')
|
||||
)", monitor_jobs_table, processor_results_table),
|
||||
)
|
||||
)",
|
||||
monitor_jobs_table, processor_results_table
|
||||
))
|
||||
.execute(self.db.pool())
|
||||
.await
|
||||
{
|
||||
@@ -608,12 +610,23 @@ impl JobWorker {
|
||||
}
|
||||
|
||||
let fu = uuid;
|
||||
let rule1 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"));
|
||||
let rule1 = check!(&format!(
|
||||
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"
|
||||
));
|
||||
let vector = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1"));
|
||||
let rule3 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"));
|
||||
let rule3 = check!(&format!(
|
||||
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"
|
||||
));
|
||||
let trace = check!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = '{fu}' AND trace_id IS NOT NULL"));
|
||||
let tkg = check!(&format!("SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1", schema::table_name("tkg_nodes")));
|
||||
let scene_meta = std::path::Path::new(&format!("{}/{fu}.scene_meta.json", crate::core::config::OUTPUT_DIR.as_str())).exists();
|
||||
let tkg = check!(&format!(
|
||||
"SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1",
|
||||
schema::table_name("tkg_nodes")
|
||||
));
|
||||
let scene_meta = std::path::Path::new(&format!(
|
||||
"{}/{fu}.scene_meta.json",
|
||||
crate::core::config::OUTPUT_DIR.as_str()
|
||||
))
|
||||
.exists();
|
||||
let five_w1h = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' AND summary_text IS NOT NULL AND summary_text != '' LIMIT 1"));
|
||||
|
||||
let all_ok = rule1 && vector && rule3 && trace && tkg && scene_meta && five_w1h;
|
||||
@@ -847,26 +860,23 @@ impl JobWorker {
|
||||
Err(e) => error!("❌ Trace chunk ingestion failed: {}", e),
|
||||
}
|
||||
|
||||
// Build Temporal Knowledge Graph (TKG)
|
||||
info!("📝 Building TKG graph...");
|
||||
let executor = match crate::core::processor::PythonExecutor::new() {
|
||||
Ok(ex) => ex,
|
||||
Err(e) => {
|
||||
error!("Failed to create PythonExecutor for TKG: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
match executor
|
||||
.run(
|
||||
"tkg_builder.py",
|
||||
&["--file-uuid", &uuid_clone],
|
||||
Some(&uuid_clone),
|
||||
"TKG_BUILDER",
|
||||
Some(std::time::Duration::from_secs(300)),
|
||||
)
|
||||
.await
|
||||
// Build Temporal Knowledge Graph (TKG) — native Rust
|
||||
info!("📝 Building TKG graph (Rust)...");
|
||||
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
|
||||
.unwrap_or_else(|_| ".".to_string());
|
||||
match crate::core::processor::tkg::build_tkg(
|
||||
db_clone.as_ref(),
|
||||
&uuid_clone,
|
||||
&output_dir,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => info!("✅ TKG built for {}", uuid_clone),
|
||||
Ok(r) => info!(
|
||||
"✅ TKG built for {}: {} face, {} obj, {} spk, {} co, {} sf, {} ff edges",
|
||||
uuid_clone,
|
||||
r.face_trace_nodes, r.object_nodes, r.speaker_nodes,
|
||||
r.co_occurrence_edges, r.speaker_face_edges, r.face_face_edges,
|
||||
),
|
||||
Err(e) => error!("❌ TKG build failed for {}: {}", uuid_clone, e),
|
||||
}
|
||||
}
|
||||
@@ -898,7 +908,7 @@ impl JobWorker {
|
||||
let ids = sqlx::query_scalar::<_, uuid::Uuid>(
|
||||
"SELECT DISTINCT i.uuid FROM identities i \
|
||||
JOIN face_detections fd ON fd.identity_id = i.id \
|
||||
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL"
|
||||
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL",
|
||||
)
|
||||
.bind(&uuid_clone)
|
||||
.fetch_all(db_clone.pool())
|
||||
@@ -907,12 +917,18 @@ impl JobWorker {
|
||||
for id_uuid in &ids {
|
||||
let us = id_uuid.to_string().replace('-', "");
|
||||
if let Err(e) = crate::core::identity::storage::save_identity_file(
|
||||
&db_clone, &us
|
||||
).await {
|
||||
&db_clone, &us,
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!("[P2.5] Failed to save identity file {}: {}", us, e);
|
||||
}
|
||||
}
|
||||
info!("[P2.5] {} identity files saved for {}", ids.len(), uuid_clone);
|
||||
info!(
|
||||
"[P2.5] {} identity files saved for {}",
|
||||
ids.len(),
|
||||
uuid_clone
|
||||
);
|
||||
}
|
||||
Err(e) => error!("❌ TMDb face matching failed for {}: {}", uuid_clone, e),
|
||||
}
|
||||
@@ -1088,8 +1104,8 @@ impl JobWorker {
|
||||
let pool = db.pool();
|
||||
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let rows = sqlx::query_as::<_, (String, String, String, f64, f64, String)>(
|
||||
&format!("SELECT chunk_id, chunk_type, text_content, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
|
||||
let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64, String)>(
|
||||
&format!("SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
|
||||
)
|
||||
.bind(uuid)
|
||||
.fetch_all(pool)
|
||||
@@ -1107,7 +1123,17 @@ impl JobWorker {
|
||||
);
|
||||
|
||||
let mut stored = 0usize;
|
||||
for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows {
|
||||
for (
|
||||
chunk_id,
|
||||
_chunk_type,
|
||||
text,
|
||||
start_frame,
|
||||
end_frame,
|
||||
start_time,
|
||||
end_time,
|
||||
_content_str,
|
||||
) in &rows
|
||||
{
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
@@ -1119,9 +1145,11 @@ impl JobWorker {
|
||||
continue;
|
||||
}
|
||||
let payload = VectorPayload {
|
||||
uuid: uuid.to_string(),
|
||||
file_uuid: uuid.to_string(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "sentence".to_string(),
|
||||
start_frame: *start_frame,
|
||||
end_frame: *end_frame,
|
||||
start_time: *start_time,
|
||||
end_time: *end_time,
|
||||
text: Some(text.clone()),
|
||||
|
||||
@@ -237,11 +237,19 @@ impl ProcessorPool {
|
||||
let key = format!("{}job:{}:processor:{}", prefix, &job.uuid, &processor_name);
|
||||
let now = chrono::Utc::now().to_rfc3339();
|
||||
let _: Option<String> = redis::cmd("HSET")
|
||||
.arg(&key).arg("started_at").arg(&now)
|
||||
.query_async(&mut conn).await.ok();
|
||||
.arg(&key)
|
||||
.arg("started_at")
|
||||
.arg(&now)
|
||||
.query_async(&mut conn)
|
||||
.await
|
||||
.ok();
|
||||
let _: Option<String> = redis::cmd("HSET")
|
||||
.arg(&key).arg("embedding_started_at").arg(&now)
|
||||
.query_async(&mut conn).await.ok();
|
||||
.arg(&key)
|
||||
.arg("embedding_started_at")
|
||||
.arg(&now)
|
||||
.query_async(&mut conn)
|
||||
.await
|
||||
.ok();
|
||||
}
|
||||
|
||||
// Subscribe to Redis progress pub/sub and update processor hash in real-time
|
||||
@@ -254,10 +262,12 @@ impl ProcessorPool {
|
||||
let cb_processor = sub_processor.clone();
|
||||
if let Err(e) = sub_redis
|
||||
.subscribe_and_callback(&sub_uuid, move |msg| {
|
||||
tracing::info!("[Subscriber] Got msg for={} cur={} tot={}",
|
||||
tracing::info!(
|
||||
"[Subscriber] Got msg for={} cur={} tot={}",
|
||||
msg.processor,
|
||||
msg.data.current.unwrap_or(0),
|
||||
msg.data.total.unwrap_or(0));
|
||||
msg.data.total.unwrap_or(0)
|
||||
);
|
||||
if msg.processor == cb_processor {
|
||||
let cur = msg.data.current.unwrap_or(0);
|
||||
let tot = msg.data.total.unwrap_or(0);
|
||||
@@ -266,11 +276,18 @@ impl ProcessorPool {
|
||||
let u = cb_uuid.clone();
|
||||
let p = cb_processor.clone();
|
||||
tokio::spawn(async move {
|
||||
match r.update_worker_processor_status(
|
||||
&u, &p, "running", None,
|
||||
cur, oc, tot, 0, 0,
|
||||
).await {
|
||||
Ok(_) => tracing::info!("[Subscriber] Updated {}: cur={} tot={}", p, cur, tot),
|
||||
match r
|
||||
.update_worker_processor_status(
|
||||
&u, &p, "running", None, cur, oc, tot, 0, 0,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(_) => tracing::info!(
|
||||
"[Subscriber] Updated {}: cur={} tot={}",
|
||||
p,
|
||||
cur,
|
||||
tot
|
||||
),
|
||||
Err(e) => tracing::error!("[Subscriber] FAILED {}: {}", p, e),
|
||||
}
|
||||
});
|
||||
@@ -756,9 +773,11 @@ impl ProcessorPool {
|
||||
.enumerate()
|
||||
.map(|(i, segment)| {
|
||||
// Prefer ASR output frames, fallback to time-based conversion
|
||||
let start_frame = segment.start_frame
|
||||
let start_frame = segment
|
||||
.start_frame
|
||||
.unwrap_or_else(|| (segment.start_time * fps).round() as i64);
|
||||
let end_frame = segment.end_frame
|
||||
let end_frame = segment
|
||||
.end_frame
|
||||
.unwrap_or_else(|| (segment.end_time * fps).round() as i64);
|
||||
let data = serde_json::json!({
|
||||
"text": segment.text,
|
||||
@@ -892,7 +911,11 @@ impl ProcessorPool {
|
||||
tracing::info!(
|
||||
"Storing {} Face pre-chunks + {} detections for video {}",
|
||||
frames_count,
|
||||
face_result.frames.iter().map(|f| f.faces.len()).sum::<usize>(),
|
||||
face_result
|
||||
.frames
|
||||
.iter()
|
||||
.map(|f| f.faces.len())
|
||||
.sum::<usize>(),
|
||||
uuid
|
||||
);
|
||||
|
||||
@@ -911,7 +934,10 @@ impl ProcessorPool {
|
||||
detections_to_store.push((
|
||||
frame.frame as i64,
|
||||
frame.timestamp,
|
||||
face.x, face.y, face.width, face.height,
|
||||
face.x,
|
||||
face.y,
|
||||
face.width,
|
||||
face.height,
|
||||
face.confidence,
|
||||
));
|
||||
}
|
||||
@@ -1170,9 +1196,10 @@ impl ProcessorPool {
|
||||
"top_5": scene.top_5,
|
||||
});
|
||||
let chunk_table = crate::core::db::schema::table_name("chunk");
|
||||
let _ = sqlx::query(
|
||||
&format!("UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3", chunk_table)
|
||||
)
|
||||
let _ = sqlx::query(&format!(
|
||||
"UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3",
|
||||
chunk_table
|
||||
))
|
||||
.bind(&meta)
|
||||
.bind(uuid)
|
||||
.bind(&chk_id)
|
||||
|
||||
Reference in New Issue
Block a user