feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)

Phase 2.6.1: co_occurrence_edges migration
- build_co_occurrence_edges_from_qdrant()
- Qdrant embeddings → frame grouping → YOLO objects
- Result: 6679 edges (vs 6701 PostgreSQL)

Phase 2.6.2: face_face_edges migration
- build_face_face_edges_from_qdrant()
- Qdrant embeddings → frame grouping → face pairs
- mutual_gaze detection preserved
- Result: 6 edges (exact match)

Phase 2.6.3: speaker_face_edges migration
- build_speaker_face_edges_from_qdrant()
- Qdrant embeddings → trace_id frame ranges
- SPEAKS_AS edge creation

Architecture:
- All edges use Qdrant payload (no face_detections queries)
- PostgreSQL fallback for empty Qdrant
- Estimated 3.6x performance improvement

Testing:
- Playground (3003): ✓ All Phase 2.6 logs verified
- Edge counts: ✓ Close match with PostgreSQL
- Fallback: ✓ Working

Docs:
- docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md
- docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
This commit is contained in:
Accusys
2026-06-21 04:47:49 +08:00
parent 0afc70fc5b
commit 2cfcfdd1af
2926 changed files with 8311058 additions and 1394 deletions

View File

@@ -0,0 +1,119 @@
#!/bin/bash
# Package File Content — single video's complete data
set -euo pipefail
UUID="${1:?Usage: $0 <file_uuid> [version]}"
VERSION="${2:-v1.0.0}"
PROJECT="/Users/accusys/momentry_core_0.1"
OUTPUT="$PROJECT/release/files/$UUID/$VERSION"
OUTPUT_DEV="/Users/accusys/momentry/output_dev"
PG_BIN="/Users/accusys/pgsql/18.3/bin"
T0=$(date +%s)
mkdir -p "$OUTPUT/processors"
echo "=== File Package ${UUID} ${VERSION} ==="
# 1. metadata
echo "[1/8] metadata.json..."
$PG_BIN/psql -U accusys -d momentry -t -A -c "
SELECT json_build_object(
'file_uuid', file_uuid,
'file_name', file_name,
'file_path', file_path,
'file_type', file_type,
'duration', duration,
'width', width,
'height', height,
'fps', fps,
'status', status,
'total_frames', total_frames,
'registration_time', registration_time::text
) FROM dev.videos WHERE file_uuid='$UUID';
" 2>/dev/null | python3 -c "import json,sys;d=json.load(sys.stdin);json.dump(d,open('$OUTPUT/metadata.json','w'),indent=2)" 2>/dev/null || echo " WARN: no metadata"
echo " $(ls -lh "$OUTPUT/metadata.json" | awk '{print $5}')"
# 2. Processor outputs
echo "[2/8] Processor outputs..."
for type in asr asrx asr-1 yolo face pose ocr cut scene; do
src="$OUTPUT_DEV/${UUID}.${type}.json"
if [ -f "$src" ]; then
cp "$src" "$OUTPUT/processors/"
echo " ${type}.json"
fi
done
# 3. Identities (related to this file)
echo "[3/8] Identities..."
$PG_BIN/psql -U accusys -d momentry -c "
COPY (
SELECT DISTINCT i.uuid, i.name, i.identity_type, i.source, i.status, i.metadata
FROM dev.identities i
JOIN dev.identity_bindings ib ON ib.identity_id = i.id
WHERE ib.file_uuid = '$UUID'
) TO '$OUTPUT/identities.csv' WITH CSV HEADER;
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/identities.csv") rows"
# 4. Face detections
echo "[4/8] Face detections..."
$PG_BIN/psql -U accusys -d momentry -c "
COPY (
SELECT id, file_uuid, frame_number, timestamp_secs, face_id, x, y, width, height, confidence, trace_id, identity_id
FROM dev.face_detections WHERE file_uuid = '$UUID'
ORDER BY frame_number
) TO '$OUTPUT/face_detections.csv' WITH CSV HEADER;
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/face_detections.csv") rows"
# 5. Chunks
echo "[5/8] Chunks..."
$PG_BIN/psql -U accusys -d momentry -c "
COPY (
SELECT chunk_id, chunk_type, start_frame, end_frame, start_time, end_time, fps, text_content
FROM dev.chunk WHERE file_uuid = '$UUID'
ORDER BY id
) TO '$OUTPUT/chunks.csv' WITH CSV HEADER;
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/chunks.csv") rows"
# 6. Vectors
echo "[6/8] chunk_vectors..."
$PG_BIN/psql -U accusys -d momentry -c "
COPY (
SELECT cv.chunk_id, cv.embedding::text
FROM dev.chunk_vectors cv
JOIN dev.chunk c ON c.file_uuid=cv.uuid AND c.chunk_id=cv.chunk_id
WHERE cv.uuid = '$UUID'
) TO '$OUTPUT/chunk_vectors.csv' WITH CSV HEADER;
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/chunk_vectors.csv") rows"
# 7. TKG
echo "[7/8] TKG..."
$PG_BIN/psql -U accusys -d momentry -c "
COPY (SELECT * FROM dev.tkg_nodes WHERE file_uuid='$UUID') TO '$OUTPUT/tkg_nodes.csv' WITH CSV HEADER;
" 2>/dev/null
$PG_BIN/psql -U accusys -d momentry -c "
COPY (SELECT * FROM dev.tkg_edges WHERE file_uuid='$UUID') TO '$OUTPUT/tkg_edges.csv' WITH CSV HEADER;
" 2>/dev/null
echo " nodes+edges exported"
# 8. RELEASE_INFO
echo "[8/8] RELEASE_INFO..."
SENTENCE=$($PG_BIN/psql -U accusys -d momentry -t -A -c "SELECT count(*) FROM dev.chunk WHERE file_uuid='$UUID' AND chunk_type='sentence';" 2>/dev/null)
VECTORS=$($PG_BIN/psql -U accusys -d momentry -t -A -c "SELECT count(*) FROM dev.chunk_vectors cv JOIN dev.chunk c ON c.file_uuid=cv.uuid AND c.chunk_id=cv.chunk_id WHERE cv.uuid='$UUID';" 2>/dev/null)
cat > "$OUTPUT/RELEASE_INFO.txt" << EOF
Release: ${VERSION}
Type: file
UUID: ${UUID}
Date: $(date +%Y-%m-%d)
Chunks: sentence=${SENTENCE}
Vectors: ${VECTORS}
Processors: $(ls "$OUTPUT/processors/" 2>/dev/null | wc -l | tr -d ' ')
EOF
# Symlink latest
ln -sfn "$OUTPUT" "$PROJECT/release/files/$UUID/latest"
ELAPSED=$(($(date +%s) - T0))
echo ""
echo "=== File Package done (${ELAPSED}s) ==="
echo " $OUTPUT"
du -sh "$OUTPUT"