M4 handover: coordinate fixes, detector registry, deploy v2, YOLOv8s, identity lifecycle

- Fix swift_pose/swift_ocr Y-flip bugs (BUG-003~006)
- Add heuristic_scene module + post-processing trigger (replaces Places365)
- YOLOv5nu → YOLOv8s CoreML (+33% detections, +390% scene indicators)
- Per-table SQL export (split 4.7GB single file → 478MB max per table)
- Version/build check in deploy.sh (compare /health vs file_info.json)
- Add file_uuid column to identities table + backfill
- Identity pre-clean step in deploy (avoids UNIQUE conflicts on re-deploy)
- Stranger_xxx naming fix with UUID context
- Add DETECTOR_REGISTRY.md (25 detectors), DETECTOR_SELECTION_SOP.md
- Update SPATIAL_COORDINATE_REGISTRY.md (P layer, 6-layer architecture)
- New IDENTITY_LIFECYCLE.md
- M4 response docs for deploy_script_fix and 111614 test report
This commit is contained in:
Accusys
2026-05-13 20:00:47 +08:00
parent d34bcae145
commit ffc30d7377
25 changed files with 2219 additions and 118 deletions

View File

@@ -15,10 +15,38 @@ echo "=== Momentry Package Deploy ==="
echo "UUID: $UUID"
echo "Time: $(date '+%Y-%m-%d %H:%M:%S')"
echo ""
echo "=== Momentry Package Deploy ==="
echo "UUID: $UUID"
echo "Time: $(date '+%Y-%m-%d %H:%M:%S')"
echo ""
# 0. Version & build compatibility check
echo "[0/8] Checking system version and build..."
PKG_VER=$(python3 -c "import json; f=json.load(open('$DIR/file_info.json')); print(f.get('momentry_version','?'))")
PKG_BUILD=$(python3 -c "import json; f=json.load(open('$DIR/file_info.json')); print(f.get('momentry_build','?'))")
SRV=$(curl -sf http://localhost:3003/health | python3 -c "
import json,sys
d=json.load(sys.stdin)
print(d.get('version','unknown'), d.get('build_git_hash','unknown'))
" 2>/dev/null || echo "down down")
SRV_VER=$(echo "$SRV" | cut -d' ' -f1)
SRV_BUILD=$(echo "$SRV" | cut -d' ' -f2)
if [ "$SRV_VER" = "down" ]; then
echo " ⚠️ Cannot reach server at localhost:3003, skipping version check"
elif [ "$SRV_VER" != "$PKG_VER" ] || [ "$SRV_BUILD" != "$PKG_BUILD" ]; then
echo " ❌ Mismatch:"
echo " Package Server"
echo " Version: $PKG_VER $SRV_VER"
echo " Build: $PKG_BUILD $SRV_BUILD"
echo ""
echo " Please obtain the matching system upgrade package."
exit 1
else
echo " ✅ Server v$SRV_VER (build $SRV_BUILD) matches package"
fi
# 1. Verify package integrity
echo "[1/5] Verifying package..."
REQUIRED_FILES=("data.sql" "file_info.json")
echo "[1/8] Verifying package..."
MISSING=0
for f in "${REQUIRED_FILES[@]}"; do
if [ ! -f "$DIR/$f" ]; then
@@ -32,28 +60,38 @@ if [ $MISSING -eq 1 ]; then
fi
echo " ✅ Package verified"
# 2. Import data.sql
echo "[2/5] Importing DB data..."
"$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -f "$DIR/data.sql" 2>&1 | tail -3
# 2. Pre-clean: remove existing identities for this file (avoids UNIQUE(name) conflicts on COPY)
echo "[2/8] Pre-cleaning existing identities for this file..."
"$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -c "DELETE FROM dev.identities WHERE file_uuid = '$UUID'" > /dev/null 2>&1
echo " ✅ Cleared identities for $UUID"
# 3. Import data.sql (uses \i to load per-table files from sql/)
echo "[3/8] Importing DB data..."
(cd "$DIR" && "$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -f data.sql 2>&1) | tail -5
echo " ✅ Data imported"
# 3. Copy video to demo dir
# 4. Copy video to demo dir (only this package's video, not scanning others)
VIDEO_FILE=$(ls "$DIR"/*.mp4 "$DIR"/*.mov "$DIR"/*.avi "$DIR"/*.mkv 2>/dev/null | head -1)
if [ -n "$VIDEO_FILE" ]; then
VIDEO_NAME=$(basename "$VIDEO_FILE")
DEST="$DEMO_DIR/$VIDEO_NAME"
if [ ! -f "$DEST" ]; then
cp "$VIDEO_FILE" "$DEST"
echo "[3/5] Video copied: $VIDEO_NAME$DEMO_DIR"
echo "[4/8] Video copied: $VIDEO_NAME$DEMO_DIR"
else
echo "[3/5] Video already in demo dir, skipping"
echo "[4/8] Video already in demo dir, skipping"
fi
else
echo "[3/5] No video file in package, skipping"
echo "[4/8] No video file in package, skipping"
fi
# 4. Copy output files
echo "[4/5] Copying output files..."
# 5. Set video status to completed (package is fully processed)
echo "[5/8] Setting deployment status..."
"$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -c "UPDATE dev.videos SET status = 'completed' WHERE file_uuid = '$UUID'" > /dev/null 2>&1
echo " ✅ Status set to 'completed'"
# 6. Copy output files
echo "[6/8] Copying output files..."
COPIED=0
for f in "$DIR"/*.json "$DIR"/*.sqlite "$DIR"/*.sqlite; do
if [ -f "$f" ]; then
@@ -66,20 +104,25 @@ for f in "$DIR"/*.json "$DIR"/*.sqlite "$DIR"/*.sqlite; do
done
echo "$COPIED files copied to $OUTPUT_DIR"
# 5. Verify deployment
echo "[5/5] Verifying deployment..."
CHUNKS=$("$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT COUNT(*) FROM dev.chunk WHERE file_uuid='$UUID' AND chunk_type='sentence'" 2>/dev/null || echo "?")
# 7. Verify deployment
echo "[7/8] Verifying deployment..."
CHUNKS=$("$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT COUNT(*) FROM dev.chunk WHERE file_uuid='$UUID'" 2>/dev/null || echo "?")
FACES=$("$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid='$UUID'" 2>/dev/null || echo "?")
IDENTS=$("$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT COUNT(*) FROM dev.identities WHERE file_uuid='$UUID'" 2>/dev/null || echo "?")
TKG_NODES=$("$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT COUNT(*) FROM dev.tkg_nodes WHERE file_uuid='$UUID'" 2>/dev/null || echo "?")
TKG_EDGES=$("$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT COUNT(*) FROM dev.tkg_edges WHERE file_uuid='$UUID'" 2>/dev/null || echo "?")
echo ""
echo "=== Deploy Complete ==="
echo " UUID: $UUID"
echo " Chunks: $CHUNKS"
echo " Faces: $FACES"
echo " Output: $OUTPUT_DIR/"
echo " UUID: $UUID"
echo " Chunks: $CHUNKS"
echo " Faces: $FACES"
echo " Identities: $IDENTS"
echo " TKG nodes: $TKG_NODES"
echo " TKG edges: $TKG_EDGES"
echo " Output: $OUTPUT_DIR/"
echo ""
echo "Next: trigger pipeline processing"
echo " curl -X POST http://localhost:3003/api/v1/file/$UUID/process"
echo "Package is self-contained — no further processing needed."
echo ""
echo "Or open the offline report:"
echo " python3 render_offline_report.py $OUTPUT_DIR/$UUID.sqlite"
echo "Offline report:"
echo " python3 scripts/render_offline_report.py $OUTPUT_DIR/$UUID.sqlite"

View File

@@ -13,6 +13,8 @@ TABLES = [
("dev.chunk", "file_uuid"),
("dev.chunk_vectors", "uuid"),
("dev.face_detections", "file_uuid"),
("dev.tkg_nodes", "file_uuid"),
("dev.tkg_edges", "file_uuid"),
]
def main():
@@ -47,8 +49,9 @@ def main():
f.write("\n")
f.write("\\.\n\n")
# Export identities referenced by this file's face_detections
f.write(f"-- dev.identities (referenced by face_detections WHERE file_uuid='{uuid}')\n")
# Export identities for this file (by file_uuid column) plus global identities
# Global: tmdb + merged + user_defined (exclude inactive auto)
f.write(f"-- dev.identities (WHERE file_uuid='{uuid}' OR global tmdb/merged/user_defined)\n")
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", "SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='dev' AND table_name='identities' AND is_updatable='YES'"],
@@ -56,7 +59,7 @@ def main():
cols = r.stdout.strip()
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
f"COPY (SELECT DISTINCT i.* FROM dev.identities i INNER JOIN dev.face_detections fd ON fd.identity_id = i.id WHERE fd.file_uuid = '{uuid}') TO STDOUT WITH CSV HEADER"],
f"COPY (SELECT * FROM dev.identities WHERE file_uuid = '{uuid}' OR (file_uuid IS NULL AND source IN ('tmdb', 'merged', 'user_defined'))) TO STDOUT WITH CSV HEADER"],
capture_output=True, text=True, timeout=60)
if r.stdout.strip():
f.write(f"COPY dev.identities ({cols}) FROM STDIN WITH CSV HEADER;\n")
@@ -74,7 +77,7 @@ def main():
cols = r.stdout.strip()
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
f"COPY (SELECT DISTINCT ib.* FROM dev.identity_bindings ib INNER JOIN dev.face_detections fd ON fd.identity_id = ib.identity_id WHERE fd.file_uuid = '{uuid}') TO STDOUT WITH CSV HEADER"],
f"COPY (SELECT ib.* FROM dev.identity_bindings ib INNER JOIN dev.face_detections fd ON fd.identity_id = ib.identity_id AND fd.trace_id IS NOT NULL WHERE fd.file_uuid = '{uuid}' AND ib.identity_value IN (SELECT DISTINCT trace_id::text FROM dev.face_detections WHERE file_uuid = '{uuid}' AND trace_id IS NOT NULL)) TO STDOUT WITH CSV HEADER"],
capture_output=True, text=True, timeout=60)
if r.stdout.strip():
f.write(f"COPY dev.identity_bindings ({cols}) FROM STDIN WITH CSV HEADER;\n")
@@ -111,6 +114,9 @@ def main():
capture_output=True, text=True, timeout=15)
if r.stdout.strip():
info = json.loads(r.stdout.strip())
info["momentry_version"] = "1.0.0" # keep in sync with Cargo.toml version
info["momentry_build"] = subprocess.run(["git", "rev-parse", "--short", "HEAD"],
capture_output=True, text=True, timeout=5).stdout.strip()
with open(os.path.join(outdir, "file_info.json"), "w") as f:
json.dump(info, f, indent=2)
print(f" file_info.json")

View File

@@ -87,7 +87,7 @@ pg_to_sqlite(
# chunk
pg_to_sqlite(
"SELECT file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, metadata->>'speaker_id' as speaker_id FROM dev.chunk WHERE file_uuid=%s AND chunk_type='sentence' ORDER BY chunk_id",
"SELECT file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, metadata->>'speaker_id' as speaker_id FROM dev.chunk WHERE file_uuid=%s ORDER BY chunk_id",
"chunk",
"""CREATE TABLE IF NOT EXISTS chunk (
file_uuid TEXT, chunk_id TEXT, chunk_type TEXT,

View File

@@ -77,11 +77,11 @@ for cluster_id in sorted(set(labels)):
# Create new identity
identity_uuid = None
cur.execute("""
INSERT INTO dev.identities (name, identity_type, source, status, created_at)
VALUES (%s, 'face', 'auto', 'active', NOW())
ON CONFLICT (name) DO UPDATE SET status = 'active'
INSERT INTO dev.identities (name, identity_type, source, status, created_at, file_uuid)
VALUES (%s, 'face', 'auto', 'active', NOW(), %s)
ON CONFLICT (name) DO UPDATE SET status = 'active', file_uuid = COALESCE(dev.identities.file_uuid, %s)
RETURNING id
""", (f"PERSON_{UUID[:8]}_{cluster_id}",))
""", (f"PERSON_{UUID[:8]}_{cluster_id}", UUID, UUID))
identity_id = cur.fetchone()[0]
cluster_to_identity[cluster_id] = identity_id
print(f" Cluster {cluster_id}: new identity {identity_id} (PERSON_{cluster_id})")

View File

@@ -0,0 +1,133 @@
#!/opt/homebrew/bin/python3.11
"""
Match auto-generated identities to TMDB identities via centroid embedding similarity.
Updates identity name, tmdb_id, source for matches above threshold.
Usage: python3 match_identities_to_tmdb.py <file_uuid>
"""
import sys
import psycopg2
import psycopg2.extras
import numpy as np
DB = "dbname=momentry user=accusys host=localhost"
THRESHOLD = 0.55
def cosine_similarity(a, b):
dot = np.dot(a, b)
na = np.linalg.norm(a)
nb = np.linalg.norm(b)
if na == 0 or nb == 0:
return 0.0
return dot / (na * nb)
def main():
uuid = sys.argv[1] if len(sys.argv) > 1 else "aeed71342a899fe4b4c57b7d41bcb692"
conn = psycopg2.connect(DB)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
# Load TMDB identities with face_embedding (pgvector)
cur.execute("""
SELECT id, name, tmdb_id, face_embedding::text as emb_text
FROM dev.identities
WHERE source = 'tmdb' AND face_embedding IS NOT NULL
""")
tmdb_identities = []
for row in cur.fetchall():
emb_str = row["emb_text"]
if not emb_str:
continue
emb = np.array([float(x) for x in emb_str.strip("[]").split(",")])
tmdb_identities.append({
"id": row["id"],
"name": row["name"],
"tmdb_id": row["tmdb_id"],
"embedding": emb,
})
print(f"Loaded {len(tmdb_identities)} TMDB identities with embeddings")
if not tmdb_identities:
print("No TMDB identities found. Run tmdb_embed_extractor.py first.")
cur.close()
conn.close()
return
# Get auto identities linked to this file with their centroid embeddings
cur.execute("""
SELECT DISTINCT i.id, i.name
FROM dev.identities i
INNER JOIN dev.face_detections fd ON fd.identity_id = i.id
WHERE fd.file_uuid = %s AND i.source = 'auto'
""", (uuid,))
auto_rows = cur.fetchall()
print(f"Auto identities for {uuid[:8]}...: {len(auto_rows)}")
matched = 0
for row in auto_rows:
auto_id = row["id"]
auto_name = row["name"]
# Get face embeddings from face_detections for this identity
cur.execute("""
SELECT embedding
FROM dev.face_detections
WHERE file_uuid = %s AND identity_id = %s AND embedding IS NOT NULL
LIMIT 500
""", (uuid, auto_id))
emb_rows = cur.fetchall()
if not emb_rows:
continue
# Compute centroid
all_embs = [np.array(r["embedding"], dtype=np.float32) for r in emb_rows]
centroid = np.mean(all_embs, axis=0)
# Match against TMDB identities
best_sim = 0.0
best_tmdb = None
for tmdb in tmdb_identities:
sim = cosine_similarity(centroid, tmdb["embedding"])
if sim > best_sim:
best_sim = sim
best_tmdb = tmdb
if best_tmdb and best_sim >= THRESHOLD:
fm = best_tmdb["name"]
tmdb_identity_id = best_tmdb["id"]
print(f" {auto_name}{fm} (sim={best_sim:.3f})")
# Update face_detections to point to TMDB identity
cur.execute("""
UPDATE dev.face_detections
SET identity_id = %s
WHERE file_uuid = %s AND identity_id = %s
""", (tmdb_identity_id, uuid, auto_id))
# Update identity_bindings to point to TMDB identity
cur.execute("""
UPDATE dev.identity_bindings
SET identity_id = %s
WHERE identity_id = %s
""", (tmdb_identity_id, auto_id))
# Mark auto identity as merged (or we could delete it)
cur.execute("""
UPDATE dev.identities
SET source = 'merged', tmdb_id = %s
WHERE id = %s
""", (best_tmdb["tmdb_id"], auto_id))
matched += 1
conn.commit()
print(f"\nMatched {matched}/{len(auto_rows)} auto identities to TMDB")
print(f"Threshold: {THRESHOLD}")
cur.close()
conn.close()
if __name__ == "__main__":
main()

View File

@@ -126,7 +126,7 @@ struct SwiftOCR: ParsableCommand {
let item: [String: Any] = [
"text": candidate.string,
"x": Int(bb.origin.x * CGFloat(cgW)),
"y": Int(bb.origin.y * CGFloat(cgH)),
"y": Int((1.0 - bb.origin.y - bb.size.height) * CGFloat(cgH)),
"width": Int(bb.size.width * CGFloat(cgW)),
"height": Int(bb.size.height * CGFloat(cgH)),
"confidence": conf
@@ -183,16 +183,19 @@ struct SwiftOCR: ParsableCommand {
guard (try? handler.perform([request])) != nil,
let results = request.results else { return texts }
let cgW = CGFloat(CVPixelBufferGetWidth(pixelBuffer))
let cgH = CGFloat(CVPixelBufferGetHeight(pixelBuffer))
for obs in results {
guard let candidate = obs.topCandidates(1).first,
candidate.confidence > 0.2 else { continue }
let bb = obs.boundingBox
texts.append([
"text": candidate.string,
"x": Int(bb.origin.x * 640),
"y": Int(bb.origin.y * 360),
"width": Int(bb.size.width * 640),
"height": Int(bb.size.height * 360),
"x": Int(bb.origin.x * cgW),
"y": Int((1.0 - bb.origin.y - bb.size.height) * cgH),
"width": Int(bb.size.width * cgW),
"height": Int(bb.size.height * cgH),
"confidence": candidate.confidence
])
}

View File

@@ -151,17 +151,19 @@ struct SwiftPose: ParsableCommand {
if let mapped = nameMap[rawName] {
rawName = mapped
}
let px = point.location.x * CGFloat(w)
let py = CGFloat(h) - point.location.y * CGFloat(h)
keypoints.append([
"name": rawName.isEmpty ? "\(joint)" : rawName,
"x": point.location.x * CGFloat(w),
"y": point.location.y * CGFloat(h),
"x": px,
"y": py,
"confidence": point.confidence,
])
if point.confidence > 0.1 {
minX = min(minX, point.location.x)
minY = min(minY, point.location.y)
maxX = max(maxX, point.location.x)
maxY = max(maxY, point.location.y)
minX = min(minX, px)
minY = min(minY, py)
maxX = max(maxX, px)
maxY = max(maxY, py)
}
}
}
@@ -171,10 +173,10 @@ struct SwiftPose: ParsableCommand {
]
if maxX > minX {
bbox = [
"x": Int(minX * CGFloat(w)),
"y": Int(minY * CGFloat(h)),
"width": Int((maxX - minX) * CGFloat(w)),
"height": Int((maxY - minY) * CGFloat(h)),
"x": Int(minX),
"y": Int(minY),
"width": Int(maxX - minX),
"height": Int(maxY - minY),
]
}