feat: Phase 1 handover - schema migration, correction mechanism, API fixes
Schema changes: dev.chunks->dev.chunk, remove old_chunk_id/chunk_index Correction: asr-1.json format, generate/apply scripts API: 37/37 endpoints fixed and tested Docs: HANDOVER_V2.0.md for M4
This commit is contained in:
259
scripts/import_file.py
Normal file
259
scripts/import_file.py
Normal file
@@ -0,0 +1,259 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
momentry-import — 匯入檔案歷程封包
|
||||
將 export_file.py 產出的 tar.gz 匯入到目標 Momentry 系統
|
||||
|
||||
Usage:
|
||||
python3 scripts/import_file.py <package.tar.gz> [--schema <schema>]
|
||||
|
||||
Example:
|
||||
python3 scripts/import_file.py /tmp/charade_export.tar.gz --schema dev
|
||||
"""
|
||||
|
||||
import sys, os, json, argparse, tarfile, io, tempfile, shutil
|
||||
from pathlib import Path
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
DB_URL = os.environ.get("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
|
||||
SCHEMA = os.environ.get("MOMENTRY_DB_SCHEMA", "dev")
|
||||
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
|
||||
|
||||
|
||||
def get_conn():
|
||||
return psycopg2.connect(DB_URL)
|
||||
|
||||
|
||||
def json_loads(data: bytes):
|
||||
return json.loads(data.decode())
|
||||
|
||||
|
||||
def import_package(package_path: str, schema: str):
|
||||
print(f"[IMPORT] Opening {package_path}...")
|
||||
|
||||
with tarfile.open(package_path, "r:gz") as tar:
|
||||
# 讀取 manifest
|
||||
manifest = json_loads(tar.extractfile("manifest.json").read())
|
||||
uuid = manifest["file_uuid"]
|
||||
print(f"[IMPORT] File: {manifest.get('file_name','?')} ({uuid})")
|
||||
print(f"[IMPORT] Exported at: {manifest.get('exported_at','?')}")
|
||||
print(f"[IMPORT] Completeness: {manifest.get('completeness',{})}")
|
||||
print(f"[IMPORT] Merge policy: {manifest.get('merge_policy',{})}")
|
||||
|
||||
conn = get_conn()
|
||||
cur = conn.cursor()
|
||||
|
||||
# Step 1: 檢查目標系統是否已有此 file_uuid
|
||||
cur.execute(
|
||||
f"SELECT file_uuid FROM {schema}.videos WHERE file_uuid = %s",
|
||||
(uuid,),
|
||||
)
|
||||
existing = cur.fetchone()
|
||||
if existing:
|
||||
print(f" ⚠️ UUID {uuid} 已存在於目標系統")
|
||||
# TODO: 支援覆蓋或略過
|
||||
|
||||
# Step 2: 匯入 identities(需先做 identity merge)
|
||||
identity_map = {} # old_id → new_id
|
||||
if "data/identities.json" in [m.name for m in tar.getmembers()]:
|
||||
identities = json_loads(tar.extractfile("data/identities.json").read())
|
||||
print(f"\n ── Identity Merge ──")
|
||||
for ident in identities:
|
||||
old_id = ident["id"]
|
||||
name = ident.get("name", "")
|
||||
# 依名稱比對
|
||||
cur.execute(
|
||||
f"SELECT id FROM {schema}.identities WHERE name = %s",
|
||||
(name,),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
if row:
|
||||
# 已存在 → merge
|
||||
identity_map[old_id] = row[0]
|
||||
print(f" 🔗 '{name}' → 已存在 (id={row[0]}), 合併")
|
||||
else:
|
||||
# 不存在 → 新增
|
||||
cur.execute(
|
||||
f"INSERT INTO {schema}.identities (name) VALUES (%s) RETURNING id",
|
||||
(name,),
|
||||
)
|
||||
new_id = cur.fetchone()[0]
|
||||
identity_map[old_id] = new_id
|
||||
print(f" ✅ '{name}' → 新增 (id={new_id})")
|
||||
conn.commit()
|
||||
print(f" ────────────────")
|
||||
else:
|
||||
print(f" [IMPORT] identities: (package 無 identity 資料)")
|
||||
|
||||
# Step 3: 匯入 identity_bindings(若有)
|
||||
if "data/identity_bindings.json" in [m.name for m in tar.getmembers()]:
|
||||
bindings = json_loads(tar.extractfile("data/identity_bindings.json").read())
|
||||
for b in bindings:
|
||||
b["identity_id"] = identity_map.get(b["identity_id"], b["identity_id"])
|
||||
try:
|
||||
cur.execute(
|
||||
f"INSERT INTO {schema}.identity_bindings "
|
||||
f"(identity_id, identity_type, identity_value, metadata, confidence) "
|
||||
f"VALUES (%s, %s, %s, %s, %s) ON CONFLICT DO NOTHING",
|
||||
(b["identity_id"], b["identity_type"], b["identity_value"],
|
||||
json.dumps(b.get("metadata", {})), b.get("confidence", 1.0)),
|
||||
)
|
||||
except Exception as e:
|
||||
print(f" ⚠️ binding 匯入失敗: {e}")
|
||||
conn.commit()
|
||||
print(f" [IMPORT] identity_bindings: {len(bindings)} rows")
|
||||
|
||||
# Step 4: 匯入 videos 資料
|
||||
video_data = json_loads(tar.extractfile("data/video.json").read())
|
||||
cur.execute(
|
||||
f"""
|
||||
INSERT INTO {schema}.videos
|
||||
(file_uuid, file_path, file_name, file_type, duration, width, height,
|
||||
fps, total_frames, probe_json, status)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'completed')
|
||||
ON CONFLICT (file_uuid) DO UPDATE SET
|
||||
file_path = EXCLUDED.file_path,
|
||||
file_name = EXCLUDED.file_name,
|
||||
status = 'completed'
|
||||
""",
|
||||
(
|
||||
uuid,
|
||||
video_data.get("file_path", ""),
|
||||
video_data.get("file_name", ""),
|
||||
video_data.get("file_type", "video"),
|
||||
video_data.get("duration"),
|
||||
video_data.get("width"),
|
||||
video_data.get("height"),
|
||||
float(video_data.get("fps") or 0),
|
||||
video_data.get("total_frames"),
|
||||
json.dumps(video_data.get("probe_json", {})),
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
print(f" [IMPORT] videos: ✅")
|
||||
|
||||
# Step 5: 匯入 output JSON 檔案
|
||||
output_dir = Path(OUTPUT_DIR)
|
||||
for member in tar.getmembers():
|
||||
if member.name.startswith("output/") and member.isfile():
|
||||
fname = member.name.replace("output/", "")
|
||||
dst = output_dir / fname
|
||||
if not dst.parent.exists():
|
||||
dst.parent.mkdir(parents=True)
|
||||
with tar.extractfile(member) as src_f:
|
||||
with open(dst, "wb") as dst_f:
|
||||
shutil.copyfileobj(src_f, dst_f)
|
||||
print(f" [IMPORT] output/{fname} ({member.size // 1024}KB)")
|
||||
print(f" [IMPORT] output files: 完成")
|
||||
|
||||
# Step 6: 匯入 pre_chunks(批次插入)
|
||||
if "data/pre_chunks.json" in [m.name for m in tar.getmembers()]:
|
||||
pre_chunks = json_loads(tar.extractfile("data/pre_chunks.json").read())
|
||||
# 先取得 file_id(videos table 的 id)
|
||||
cur.execute(f"SELECT id FROM {schema}.videos WHERE file_uuid = %s", (uuid,))
|
||||
file_row = cur.fetchone()
|
||||
if file_row:
|
||||
file_id = file_row[0]
|
||||
inserted = 0
|
||||
for pc in pre_chunks:
|
||||
try:
|
||||
cur.execute(
|
||||
f"INSERT INTO {schema}.pre_chunks "
|
||||
f"(file_id, file_uuid, processor_type, coordinate_type, "
|
||||
f"coordinate_index, start_frame, end_frame, start_time, end_time, "
|
||||
f"fps, data) "
|
||||
f"VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) "
|
||||
f"ON CONFLICT DO NOTHING",
|
||||
(
|
||||
file_id, uuid,
|
||||
pc.get("processor_type"), pc.get("coordinate_type"),
|
||||
pc.get("coordinate_index"),
|
||||
pc.get("start_frame"), pc.get("end_frame"),
|
||||
pc.get("start_time"), pc.get("end_time"),
|
||||
pc.get("fps"), json.dumps(pc.get("data", {})),
|
||||
),
|
||||
)
|
||||
inserted += 1
|
||||
if inserted % 1000 == 0:
|
||||
print(f" ... {inserted}/{len(pre_chunks)}", end="\r")
|
||||
except Exception as e:
|
||||
pass
|
||||
conn.commit()
|
||||
print(f" [IMPORT] pre_chunks: {inserted} rows \n")
|
||||
else:
|
||||
print(f" [IMPORT] pre_chunks: 無法取得 file_id")
|
||||
|
||||
# Step 7: 匯入 processor_results
|
||||
if "data/processor_results.json" in [m.name for m in tar.getmembers()]:
|
||||
results = json_loads(tar.extractfile("data/processor_results.json").read())
|
||||
for r in results:
|
||||
try:
|
||||
cur.execute(
|
||||
f"INSERT INTO {schema}.processor_results "
|
||||
f"(job_id, file_uuid, processor, status, chunks_produced, frames_processed) "
|
||||
f"VALUES (0, %s, %s, %s, %s, %s) ON CONFLICT DO NOTHING",
|
||||
(uuid, r.get("processor"), r.get("status"),
|
||||
r.get("chunks_produced", 0), r.get("frames_processed", 0)),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
conn.commit()
|
||||
print(f" [IMPORT] processor_results: {len(results)} rows")
|
||||
|
||||
# Step 7: 匯入 face_detections(若無 embedding 可省略該欄位)
|
||||
face_detections_src = None
|
||||
for candidate in ["data/face_detections.json", "data/face_detections_meta.json"]:
|
||||
if candidate in [m.name for m in tar.getmembers()]:
|
||||
face_detections_src = candidate
|
||||
break
|
||||
if face_detections_src:
|
||||
fds = json_loads(tar.extractfile(face_detections_src).read())
|
||||
inserted = 0
|
||||
for fd in fds:
|
||||
try:
|
||||
cur.execute(
|
||||
f"INSERT INTO {schema}.face_detections "
|
||||
f"(file_uuid, face_id, frame_number, x, y, width, height, "
|
||||
f"confidence, identity_id, trace_id) "
|
||||
f"VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) "
|
||||
f"ON CONFLICT DO NOTHING",
|
||||
(
|
||||
uuid,
|
||||
fd.get("face_id"),
|
||||
fd.get("frame_number"),
|
||||
fd.get("x"), fd.get("y"),
|
||||
fd.get("width"), fd.get("height"),
|
||||
fd.get("confidence"),
|
||||
identity_map.get(fd.get("identity_id"), fd.get("identity_id")),
|
||||
fd.get("trace_id"),
|
||||
),
|
||||
)
|
||||
inserted += 1
|
||||
if inserted % 1000 == 0:
|
||||
print(f" ... {inserted}/{len(fds)}", end="\r")
|
||||
except Exception as e:
|
||||
pass
|
||||
conn.commit()
|
||||
print(f" [IMPORT] face_detections: {inserted} rows \n")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print(f"\n[IMPORT] ✅ 完成: {manifest.get('file_name','?')} 已匯入 (file_uuid={uuid})")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Import file processing history package")
|
||||
parser.add_argument("package", help="Path to .tar.gz package")
|
||||
parser.add_argument("--schema", default=SCHEMA, help="Target DB schema")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.package):
|
||||
print(f"[IMPORT] ❌ Package not found: {args.package}")
|
||||
sys.exit(1)
|
||||
|
||||
import_package(args.package, args.schema)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user