feat: Initial v0.9 release with API Key authentication

## v0.9.20260325_144654 ### Features - API Key Authentication System - Job Worker System - V2 Backup Versioning ### Bug Fixes - get_processor_results_by_job column mapping Co-authored-by: OpenCode
2026-03-25 14:52:51 +08:00
parent 47e86b696f
commit 383201cacd
193 changed files with 40268 additions and 422 deletions
--- a/scripts/test_v2_model.py
+++ b/scripts/test_v2_model.py
@@ -0,0 +1,188 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Vector Search Test with nomic-embed-text:v1.5 using prefixes
+"""
+
+import time
+import requests
+import psycopg2
+import uuid
+
+
+VIDEO_UUID = "39567a0eb16f39fd"
+
+POSTGRES_CONFIG = {
+    "host": "localhost",
+    "port": 5432,
+    "user": "accusys",
+    "password": "Test3200",
+    "database": "momentry",
+}
+
+MODEL = "nomic-embed-text:v1.5"
+QDRANT_COLLECTION = "chunks_v2"
+
+
+def get_embedding(text, prefix=""):
+    """Get embedding from Ollama with prefix"""
+    prompt = f"{prefix}{text}"
+    resp = requests.post(
+        "http://localhost:11434/api/embeddings", json={"model": MODEL, "prompt": prompt}
+    )
+    return resp.json()["embedding"]
+
+
+def sync_to_qdrant():
+    """Sync vectors to Qdrant with v1.5 model and prefixes"""
+    conn = psycopg2.connect(**POSTGRES_CONFIG)
+    cur = conn.cursor()
+
+    cur.execute(
+        """
+        SELECT chunk_id, content->>'text' as text, start_time, end_time, uuid
+        FROM chunks
+        WHERE uuid = %s AND chunk_type = 'sentence'
+        ORDER BY chunk_index
+    """,
+        (VIDEO_UUID,),
+    )
+
+    rows = cur.fetchall()
+    print(f"Syncing {len(rows)} chunks to Qdrant with {MODEL}")
+
+    points = []
+    for chunk_id, text, start_time, end_time, vid in rows:
+        if not text:
+            continue
+
+        # Use search_document: prefix for chunks
+        embedding = get_embedding(text, "search_document: ")
+
+        point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, chunk_id))
+
+        payload = {
+            "uuid": vid,
+            "chunk_id": chunk_id,
+            "chunk_type": "sentence",
+            "start_time": float(start_time),
+            "end_time": float(end_time),
+            "text": text[:200],
+        }
+
+        points.append({"id": point_id, "vector": embedding, "payload": payload})
+
+    # Upload in batches
+    batch_size = 100
+    for i in range(0, len(points), batch_size):
+        batch = points[i : i + batch_size]
+        resp = requests.put(
+            f"http://localhost:6333/collections/{QDRANT_COLLECTION}/points",
+            headers={
+                "api-key": "Test3200Test3200Test3200",
+                "Content-Type": "application/json",
+            },
+            json={"points": batch},
+        )
+        if resp.status_code != 200:
+            print(f"Error: {resp.text[:200]}")
+            break
+        print(
+            f"Uploaded batch {i // batch_size + 1}/{(len(points) - 1) // batch_size + 1}"
+        )
+
+    cur.close()
+    conn.close()
+    print("Done!")
+
+
+def test_queries(queries, use_prefix=True):
+    """Test queries against Qdrant"""
+    prefix = "search_query: " if use_prefix else ""
+
+    for query in queries:
+        embedding = get_embedding(query, prefix)
+
+        start = time.time()
+        resp = requests.post(
+            f"http://localhost:6333/collections/{QDRANT_COLLECTION}/points/search",
+            headers={"api-key": "Test3200Test3200Test3200"},
+            json={"vector": embedding, "limit": 5},
+        )
+        elapsed = (time.time() - start) * 1000
+
+        results = resp.json().get("result", [])
+
+        print(f"\nQuery: '{query}' ({elapsed:.1f}ms)")
+        print("-" * 50)
+        for i, r in enumerate(results):
+            chunk_id = r.get("id", "")[:20]
+            score = r.get("score", 0)
+            print(f"  {i + 1}. [{score:.3f}] {chunk_id}")
+
+
+# English queries
+ENGLISH_QUERIES = [
+    "a person talking",
+    "someone speaking on camera",
+    "outdoor scene",
+    "indoor setting",
+    "walking or moving",
+    "dialogue or conversation",
+    "looking at something",
+    "happy or joyful",
+    "serious or dramatic",
+    "comedy or funny",
+    "wearing a tie",
+    "holding an object",
+    "sitting on a chair",
+    "city or urban",
+    "building or room",
+    "open space",
+]
+
+# Chinese queries
+CHINESE_QUERIES = [
+    "有人在說話",
+    "戶外場景",
+    "室內場景",
+    "走路或移動",
+    "對話或交談",
+    "看著某樣東西",
+    "快樂或開心",
+    "嚴肅或戲劇性",
+    "喜劇或有趣",
+    "戴著領帶",
+    "拿著東西",
+    "坐在椅子上",
+    "城市或都市",
+    "建築物或房間",
+    "開放空間",
+]
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) > 1 and sys.argv[1] == "sync":
+        print("=" * 60)
+        print(f"Syncing vectors to {QDRANT_COLLECTION}")
+        print(f"Model: {MODEL}")
+        print("Prefix for chunks: search_document:")
+        print("=" * 60)
+        sync_to_qdrant()
+    else:
+        print("=" * 60)
+        print(f"Testing with {QDRANT_COLLECTION}")
+        print(f"Model: {MODEL}")
+        print("Prefix for queries: search_query:")
+        print("=" * 60)
+
+        print("\n" + "=" * 60)
+        print("ENGLISH QUERIES")
+        print("=" * 60)
+        test_queries(ENGLISH_QUERIES)
+
+        print("\n" + "=" * 60)
+        print("CHINESE QUERIES")
+        print("=" * 60)
+        test_queries(CHINESE_QUERIES)