- Re-ran identity_bind.py to restore identity_id on face_detections - Dedup cleanup had removed rows with identity_id, kept NULL rows - 70691 face_detections now have identity_id, 428 identities - Full package rebuild: 169MB sqlite, 1358MB tar.gz - identities.json: 428 identities + 5483 bindings + 5483 trace maps - TMDB matching complete: Audrey Hepburn 843 traces, Cary Grant 482
119 lines
4.0 KiB
Bash
119 lines
4.0 KiB
Bash
#!/bin/bash
|
|
# Package File Content — single video's complete data
|
|
set -euo pipefail
|
|
UUID="${1:?Usage: $0 <file_uuid> [version]}"
|
|
VERSION="${2:-v1.0.0}"
|
|
PROJECT="/Users/accusys/momentry_core_0.1"
|
|
OUTPUT="$PROJECT/release/files/$UUID/$VERSION"
|
|
OUTPUT_DEV="/Users/accusys/momentry/output_dev"
|
|
PG_BIN="/Users/accusys/pgsql/18.3/bin"
|
|
T0=$(date +%s)
|
|
|
|
mkdir -p "$OUTPUT/processors"
|
|
|
|
echo "=== File Package ${UUID} ${VERSION} ==="
|
|
|
|
# 1. metadata
|
|
echo "[1/8] metadata.json..."
|
|
$PG_BIN/psql -U accusys -d momentry -t -A -c "
|
|
SELECT json_build_object(
|
|
'file_uuid', file_uuid,
|
|
'file_name', file_name,
|
|
'file_path', file_path,
|
|
'file_type', file_type,
|
|
'duration', duration,
|
|
'width', width,
|
|
'height', height,
|
|
'fps', fps,
|
|
'status', status,
|
|
'total_frames', total_frames,
|
|
'registration_time', registration_time::text
|
|
) FROM dev.videos WHERE file_uuid='$UUID';
|
|
" 2>/dev/null | python3 -c "import json,sys;d=json.load(sys.stdin);json.dump(d,open('$OUTPUT/metadata.json','w'),indent=2)" 2>/dev/null || echo " WARN: no metadata"
|
|
echo " $(ls -lh "$OUTPUT/metadata.json" | awk '{print $5}')"
|
|
|
|
# 2. Processor outputs
|
|
echo "[2/8] Processor outputs..."
|
|
for type in asr asrx asr-1 yolo face pose ocr cut scene; do
|
|
src="$OUTPUT_DEV/${UUID}.${type}.json"
|
|
if [ -f "$src" ]; then
|
|
cp "$src" "$OUTPUT/processors/"
|
|
echo " ${type}.json"
|
|
fi
|
|
done
|
|
|
|
# 3. Identities (related to this file)
|
|
echo "[3/8] Identities..."
|
|
$PG_BIN/psql -U accusys -d momentry -c "
|
|
COPY (
|
|
SELECT DISTINCT i.uuid, i.name, i.identity_type, i.source, i.status, i.metadata
|
|
FROM dev.identities i
|
|
JOIN dev.identity_bindings ib ON ib.identity_id = i.id
|
|
WHERE ib.file_uuid = '$UUID'
|
|
) TO '$OUTPUT/identities.csv' WITH CSV HEADER;
|
|
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/identities.csv") rows"
|
|
|
|
# 4. Face detections
|
|
echo "[4/8] Face detections..."
|
|
$PG_BIN/psql -U accusys -d momentry -c "
|
|
COPY (
|
|
SELECT id, file_uuid, frame_number, timestamp_secs, face_id, x, y, width, height, confidence, trace_id, identity_id
|
|
FROM dev.face_detections WHERE file_uuid = '$UUID'
|
|
ORDER BY frame_number
|
|
) TO '$OUTPUT/face_detections.csv' WITH CSV HEADER;
|
|
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/face_detections.csv") rows"
|
|
|
|
# 5. Chunks
|
|
echo "[5/8] Chunks..."
|
|
$PG_BIN/psql -U accusys -d momentry -c "
|
|
COPY (
|
|
SELECT chunk_id, chunk_type, start_frame, end_frame, start_time, end_time, fps, text_content
|
|
FROM dev.chunk WHERE file_uuid = '$UUID'
|
|
ORDER BY id
|
|
) TO '$OUTPUT/chunks.csv' WITH CSV HEADER;
|
|
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/chunks.csv") rows"
|
|
|
|
# 6. Vectors
|
|
echo "[6/8] chunk_vectors..."
|
|
$PG_BIN/psql -U accusys -d momentry -c "
|
|
COPY (
|
|
SELECT cv.chunk_id, cv.embedding::text
|
|
FROM dev.chunk_vectors cv
|
|
JOIN dev.chunk c ON c.file_uuid=cv.uuid AND c.chunk_id=cv.chunk_id
|
|
WHERE cv.uuid = '$UUID'
|
|
) TO '$OUTPUT/chunk_vectors.csv' WITH CSV HEADER;
|
|
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/chunk_vectors.csv") rows"
|
|
|
|
# 7. TKG
|
|
echo "[7/8] TKG..."
|
|
$PG_BIN/psql -U accusys -d momentry -c "
|
|
COPY (SELECT * FROM dev.tkg_nodes WHERE file_uuid='$UUID') TO '$OUTPUT/tkg_nodes.csv' WITH CSV HEADER;
|
|
" 2>/dev/null
|
|
$PG_BIN/psql -U accusys -d momentry -c "
|
|
COPY (SELECT * FROM dev.tkg_edges WHERE file_uuid='$UUID') TO '$OUTPUT/tkg_edges.csv' WITH CSV HEADER;
|
|
" 2>/dev/null
|
|
echo " nodes+edges exported"
|
|
|
|
# 8. RELEASE_INFO
|
|
echo "[8/8] RELEASE_INFO..."
|
|
SENTENCE=$($PG_BIN/psql -U accusys -d momentry -t -A -c "SELECT count(*) FROM dev.chunk WHERE file_uuid='$UUID' AND chunk_type='sentence';" 2>/dev/null)
|
|
VECTORS=$($PG_BIN/psql -U accusys -d momentry -t -A -c "SELECT count(*) FROM dev.chunk_vectors cv JOIN dev.chunk c ON c.file_uuid=cv.uuid AND c.chunk_id=cv.chunk_id WHERE cv.uuid='$UUID';" 2>/dev/null)
|
|
cat > "$OUTPUT/RELEASE_INFO.txt" << EOF
|
|
Release: ${VERSION}
|
|
Type: file
|
|
UUID: ${UUID}
|
|
Date: $(date +%Y-%m-%d)
|
|
|
|
Chunks: sentence=${SENTENCE}
|
|
Vectors: ${VECTORS}
|
|
Processors: $(ls "$OUTPUT/processors/" 2>/dev/null | wc -l | tr -d ' ')
|
|
EOF
|
|
|
|
# Symlink latest
|
|
ln -sfn "$OUTPUT" "$PROJECT/release/files/$UUID/latest"
|
|
|
|
ELAPSED=$(($(date +%s) - T0))
|
|
echo ""
|
|
echo "=== File Package done (${ELAPSED}s) ==="
|
|
echo " $OUTPUT"
|
|
du -sh "$OUTPUT" |