fix: restore identity_id after face_dedup, rebuild package v20260512
- Re-ran identity_bind.py to restore identity_id on face_detections - Dedup cleanup had removed rows with identity_id, kept NULL rows - 70691 face_detections now have identity_id, 428 identities - Full package rebuild: 169MB sqlite, 1358MB tar.gz - identities.json: 428 identities + 5483 bindings + 5483 trace maps - TMDB matching complete: Audrey Hepburn 843 traces, Cary Grant 482
This commit is contained in:
119
scripts/package_file.sh
Normal file
119
scripts/package_file.sh
Normal file
@@ -0,0 +1,119 @@
|
||||
#!/bin/bash
|
||||
# Package File Content — single video's complete data
|
||||
set -euo pipefail
|
||||
UUID="${1:?Usage: $0 <file_uuid> [version]}"
|
||||
VERSION="${2:-v1.0.0}"
|
||||
PROJECT="/Users/accusys/momentry_core_0.1"
|
||||
OUTPUT="$PROJECT/release/files/$UUID/$VERSION"
|
||||
OUTPUT_DEV="/Users/accusys/momentry/output_dev"
|
||||
PG_BIN="/Users/accusys/pgsql/18.3/bin"
|
||||
T0=$(date +%s)
|
||||
|
||||
mkdir -p "$OUTPUT/processors"
|
||||
|
||||
echo "=== File Package ${UUID} ${VERSION} ==="
|
||||
|
||||
# 1. metadata
|
||||
echo "[1/8] metadata.json..."
|
||||
$PG_BIN/psql -U accusys -d momentry -t -A -c "
|
||||
SELECT json_build_object(
|
||||
'file_uuid', file_uuid,
|
||||
'file_name', file_name,
|
||||
'file_path', file_path,
|
||||
'file_type', file_type,
|
||||
'duration', duration,
|
||||
'width', width,
|
||||
'height', height,
|
||||
'fps', fps,
|
||||
'status', status,
|
||||
'total_frames', total_frames,
|
||||
'registration_time', registration_time::text
|
||||
) FROM dev.videos WHERE file_uuid='$UUID';
|
||||
" 2>/dev/null | python3 -c "import json,sys;d=json.load(sys.stdin);json.dump(d,open('$OUTPUT/metadata.json','w'),indent=2)" 2>/dev/null || echo " WARN: no metadata"
|
||||
echo " $(ls -lh "$OUTPUT/metadata.json" | awk '{print $5}')"
|
||||
|
||||
# 2. Processor outputs
|
||||
echo "[2/8] Processor outputs..."
|
||||
for type in asr asrx asr-1 yolo face pose ocr cut scene; do
|
||||
src="$OUTPUT_DEV/${UUID}.${type}.json"
|
||||
if [ -f "$src" ]; then
|
||||
cp "$src" "$OUTPUT/processors/"
|
||||
echo " ${type}.json"
|
||||
fi
|
||||
done
|
||||
|
||||
# 3. Identities (related to this file)
|
||||
echo "[3/8] Identities..."
|
||||
$PG_BIN/psql -U accusys -d momentry -c "
|
||||
COPY (
|
||||
SELECT DISTINCT i.uuid, i.name, i.identity_type, i.source, i.status, i.metadata
|
||||
FROM dev.identities i
|
||||
JOIN dev.identity_bindings ib ON ib.identity_id = i.id
|
||||
WHERE ib.file_uuid = '$UUID'
|
||||
) TO '$OUTPUT/identities.csv' WITH CSV HEADER;
|
||||
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/identities.csv") rows"
|
||||
|
||||
# 4. Face detections
|
||||
echo "[4/8] Face detections..."
|
||||
$PG_BIN/psql -U accusys -d momentry -c "
|
||||
COPY (
|
||||
SELECT id, file_uuid, frame_number, timestamp_secs, face_id, x, y, width, height, confidence, trace_id, identity_id
|
||||
FROM dev.face_detections WHERE file_uuid = '$UUID'
|
||||
ORDER BY frame_number
|
||||
) TO '$OUTPUT/face_detections.csv' WITH CSV HEADER;
|
||||
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/face_detections.csv") rows"
|
||||
|
||||
# 5. Chunks
|
||||
echo "[5/8] Chunks..."
|
||||
$PG_BIN/psql -U accusys -d momentry -c "
|
||||
COPY (
|
||||
SELECT chunk_id, chunk_type, start_frame, end_frame, start_time, end_time, fps, text_content
|
||||
FROM dev.chunk WHERE file_uuid = '$UUID'
|
||||
ORDER BY id
|
||||
) TO '$OUTPUT/chunks.csv' WITH CSV HEADER;
|
||||
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/chunks.csv") rows"
|
||||
|
||||
# 6. Vectors
|
||||
echo "[6/8] chunk_vectors..."
|
||||
$PG_BIN/psql -U accusys -d momentry -c "
|
||||
COPY (
|
||||
SELECT cv.chunk_id, cv.embedding::text
|
||||
FROM dev.chunk_vectors cv
|
||||
JOIN dev.chunk c ON c.file_uuid=cv.uuid AND c.chunk_id=cv.chunk_id
|
||||
WHERE cv.uuid = '$UUID'
|
||||
) TO '$OUTPUT/chunk_vectors.csv' WITH CSV HEADER;
|
||||
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/chunk_vectors.csv") rows"
|
||||
|
||||
# 7. TKG
|
||||
echo "[7/8] TKG..."
|
||||
$PG_BIN/psql -U accusys -d momentry -c "
|
||||
COPY (SELECT * FROM dev.tkg_nodes WHERE file_uuid='$UUID') TO '$OUTPUT/tkg_nodes.csv' WITH CSV HEADER;
|
||||
" 2>/dev/null
|
||||
$PG_BIN/psql -U accusys -d momentry -c "
|
||||
COPY (SELECT * FROM dev.tkg_edges WHERE file_uuid='$UUID') TO '$OUTPUT/tkg_edges.csv' WITH CSV HEADER;
|
||||
" 2>/dev/null
|
||||
echo " nodes+edges exported"
|
||||
|
||||
# 8. RELEASE_INFO
|
||||
echo "[8/8] RELEASE_INFO..."
|
||||
SENTENCE=$($PG_BIN/psql -U accusys -d momentry -t -A -c "SELECT count(*) FROM dev.chunk WHERE file_uuid='$UUID' AND chunk_type='sentence';" 2>/dev/null)
|
||||
VECTORS=$($PG_BIN/psql -U accusys -d momentry -t -A -c "SELECT count(*) FROM dev.chunk_vectors cv JOIN dev.chunk c ON c.file_uuid=cv.uuid AND c.chunk_id=cv.chunk_id WHERE cv.uuid='$UUID';" 2>/dev/null)
|
||||
cat > "$OUTPUT/RELEASE_INFO.txt" << EOF
|
||||
Release: ${VERSION}
|
||||
Type: file
|
||||
UUID: ${UUID}
|
||||
Date: $(date +%Y-%m-%d)
|
||||
|
||||
Chunks: sentence=${SENTENCE}
|
||||
Vectors: ${VECTORS}
|
||||
Processors: $(ls "$OUTPUT/processors/" 2>/dev/null | wc -l | tr -d ' ')
|
||||
EOF
|
||||
|
||||
# Symlink latest
|
||||
ln -sfn "$OUTPUT" "$PROJECT/release/files/$UUID/latest"
|
||||
|
||||
ELAPSED=$(($(date +%s) - T0))
|
||||
echo ""
|
||||
echo "=== File Package done (${ELAPSED}s) ==="
|
||||
echo " $OUTPUT"
|
||||
du -sh "$OUTPUT"
|
||||
Reference in New Issue
Block a user