Files
momentry_core/scripts/package_file.sh
Accusys 48c3b13c37 fix: restore identity_id after face_dedup, rebuild package v20260512
- Re-ran identity_bind.py to restore identity_id on face_detections
- Dedup cleanup had removed rows with identity_id, kept NULL rows
- 70691 face_detections now have identity_id, 428 identities
- Full package rebuild: 169MB sqlite, 1358MB tar.gz
- identities.json: 428 identities + 5483 bindings + 5483 trace maps
- TMDB matching complete: Audrey Hepburn 843 traces, Cary Grant 482
2026-05-13 04:30:18 +08:00

119 lines
4.0 KiB
Bash

#!/bin/bash
# Package File Content — single video's complete data
set -euo pipefail
UUID="${1:?Usage: $0 <file_uuid> [version]}"
VERSION="${2:-v1.0.0}"
PROJECT="/Users/accusys/momentry_core_0.1"
OUTPUT="$PROJECT/release/files/$UUID/$VERSION"
OUTPUT_DEV="/Users/accusys/momentry/output_dev"
PG_BIN="/Users/accusys/pgsql/18.3/bin"
T0=$(date +%s)
mkdir -p "$OUTPUT/processors"
echo "=== File Package ${UUID} ${VERSION} ==="
# 1. metadata
echo "[1/8] metadata.json..."
$PG_BIN/psql -U accusys -d momentry -t -A -c "
SELECT json_build_object(
'file_uuid', file_uuid,
'file_name', file_name,
'file_path', file_path,
'file_type', file_type,
'duration', duration,
'width', width,
'height', height,
'fps', fps,
'status', status,
'total_frames', total_frames,
'registration_time', registration_time::text
) FROM dev.videos WHERE file_uuid='$UUID';
" 2>/dev/null | python3 -c "import json,sys;d=json.load(sys.stdin);json.dump(d,open('$OUTPUT/metadata.json','w'),indent=2)" 2>/dev/null || echo " WARN: no metadata"
echo " $(ls -lh "$OUTPUT/metadata.json" | awk '{print $5}')"
# 2. Processor outputs
echo "[2/8] Processor outputs..."
for type in asr asrx asr-1 yolo face pose ocr cut scene; do
src="$OUTPUT_DEV/${UUID}.${type}.json"
if [ -f "$src" ]; then
cp "$src" "$OUTPUT/processors/"
echo " ${type}.json"
fi
done
# 3. Identities (related to this file)
echo "[3/8] Identities..."
$PG_BIN/psql -U accusys -d momentry -c "
COPY (
SELECT DISTINCT i.uuid, i.name, i.identity_type, i.source, i.status, i.metadata
FROM dev.identities i
JOIN dev.identity_bindings ib ON ib.identity_id = i.id
WHERE ib.file_uuid = '$UUID'
) TO '$OUTPUT/identities.csv' WITH CSV HEADER;
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/identities.csv") rows"
# 4. Face detections
echo "[4/8] Face detections..."
$PG_BIN/psql -U accusys -d momentry -c "
COPY (
SELECT id, file_uuid, frame_number, timestamp_secs, face_id, x, y, width, height, confidence, trace_id, identity_id
FROM dev.face_detections WHERE file_uuid = '$UUID'
ORDER BY frame_number
) TO '$OUTPUT/face_detections.csv' WITH CSV HEADER;
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/face_detections.csv") rows"
# 5. Chunks
echo "[5/8] Chunks..."
$PG_BIN/psql -U accusys -d momentry -c "
COPY (
SELECT chunk_id, chunk_type, start_frame, end_frame, start_time, end_time, fps, text_content
FROM dev.chunk WHERE file_uuid = '$UUID'
ORDER BY id
) TO '$OUTPUT/chunks.csv' WITH CSV HEADER;
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/chunks.csv") rows"
# 6. Vectors
echo "[6/8] chunk_vectors..."
$PG_BIN/psql -U accusys -d momentry -c "
COPY (
SELECT cv.chunk_id, cv.embedding::text
FROM dev.chunk_vectors cv
JOIN dev.chunk c ON c.file_uuid=cv.uuid AND c.chunk_id=cv.chunk_id
WHERE cv.uuid = '$UUID'
) TO '$OUTPUT/chunk_vectors.csv' WITH CSV HEADER;
" 2>/dev/null && echo " $(wc -l < "$OUTPUT/chunk_vectors.csv") rows"
# 7. TKG
echo "[7/8] TKG..."
$PG_BIN/psql -U accusys -d momentry -c "
COPY (SELECT * FROM dev.tkg_nodes WHERE file_uuid='$UUID') TO '$OUTPUT/tkg_nodes.csv' WITH CSV HEADER;
" 2>/dev/null
$PG_BIN/psql -U accusys -d momentry -c "
COPY (SELECT * FROM dev.tkg_edges WHERE file_uuid='$UUID') TO '$OUTPUT/tkg_edges.csv' WITH CSV HEADER;
" 2>/dev/null
echo " nodes+edges exported"
# 8. RELEASE_INFO
echo "[8/8] RELEASE_INFO..."
SENTENCE=$($PG_BIN/psql -U accusys -d momentry -t -A -c "SELECT count(*) FROM dev.chunk WHERE file_uuid='$UUID' AND chunk_type='sentence';" 2>/dev/null)
VECTORS=$($PG_BIN/psql -U accusys -d momentry -t -A -c "SELECT count(*) FROM dev.chunk_vectors cv JOIN dev.chunk c ON c.file_uuid=cv.uuid AND c.chunk_id=cv.chunk_id WHERE cv.uuid='$UUID';" 2>/dev/null)
cat > "$OUTPUT/RELEASE_INFO.txt" << EOF
Release: ${VERSION}
Type: file
UUID: ${UUID}
Date: $(date +%Y-%m-%d)
Chunks: sentence=${SENTENCE}
Vectors: ${VECTORS}
Processors: $(ls "$OUTPUT/processors/" 2>/dev/null | wc -l | tr -d ' ')
EOF
# Symlink latest
ln -sfn "$OUTPUT" "$PROJECT/release/files/$UUID/latest"
ELAPSED=$(($(date +%s) - T0))
echo ""
echo "=== File Package done (${ELAPSED}s) ==="
echo " $OUTPUT"
du -sh "$OUTPUT"