feat: Initial v0.9 release with API Key authentication
## v0.9.20260325_144654 ### Features - API Key Authentication System - Job Worker System - V2 Backup Versioning ### Bug Fixes - get_processor_results_by_job column mapping Co-authored-by: OpenCode
This commit is contained in:
188
scripts/test_v2_model.py
Normal file
188
scripts/test_v2_model.py
Normal file
@@ -0,0 +1,188 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Vector Search Test with nomic-embed-text:v1.5 using prefixes
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import psycopg2
|
||||
import uuid
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
MODEL = "nomic-embed-text:v1.5"
|
||||
QDRANT_COLLECTION = "chunks_v2"
|
||||
|
||||
|
||||
def get_embedding(text, prefix=""):
|
||||
"""Get embedding from Ollama with prefix"""
|
||||
prompt = f"{prefix}{text}"
|
||||
resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings", json={"model": MODEL, "prompt": prompt}
|
||||
)
|
||||
return resp.json()["embedding"]
|
||||
|
||||
|
||||
def sync_to_qdrant():
|
||||
"""Sync vectors to Qdrant with v1.5 model and prefixes"""
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT chunk_id, content->>'text' as text, start_time, end_time, uuid
|
||||
FROM chunks
|
||||
WHERE uuid = %s AND chunk_type = 'sentence'
|
||||
ORDER BY chunk_index
|
||||
""",
|
||||
(VIDEO_UUID,),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
print(f"Syncing {len(rows)} chunks to Qdrant with {MODEL}")
|
||||
|
||||
points = []
|
||||
for chunk_id, text, start_time, end_time, vid in rows:
|
||||
if not text:
|
||||
continue
|
||||
|
||||
# Use search_document: prefix for chunks
|
||||
embedding = get_embedding(text, "search_document: ")
|
||||
|
||||
point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, chunk_id))
|
||||
|
||||
payload = {
|
||||
"uuid": vid,
|
||||
"chunk_id": chunk_id,
|
||||
"chunk_type": "sentence",
|
||||
"start_time": float(start_time),
|
||||
"end_time": float(end_time),
|
||||
"text": text[:200],
|
||||
}
|
||||
|
||||
points.append({"id": point_id, "vector": embedding, "payload": payload})
|
||||
|
||||
# Upload in batches
|
||||
batch_size = 100
|
||||
for i in range(0, len(points), batch_size):
|
||||
batch = points[i : i + batch_size]
|
||||
resp = requests.put(
|
||||
f"http://localhost:6333/collections/{QDRANT_COLLECTION}/points",
|
||||
headers={
|
||||
"api-key": "Test3200Test3200Test3200",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={"points": batch},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
print(f"Error: {resp.text[:200]}")
|
||||
break
|
||||
print(
|
||||
f"Uploaded batch {i // batch_size + 1}/{(len(points) - 1) // batch_size + 1}"
|
||||
)
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
print("Done!")
|
||||
|
||||
|
||||
def test_queries(queries, use_prefix=True):
|
||||
"""Test queries against Qdrant"""
|
||||
prefix = "search_query: " if use_prefix else ""
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query, prefix)
|
||||
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
f"http://localhost:6333/collections/{QDRANT_COLLECTION}/points/search",
|
||||
headers={"api-key": "Test3200Test3200Test3200"},
|
||||
json={"vector": embedding, "limit": 5},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results = resp.json().get("result", [])
|
||||
|
||||
print(f"\nQuery: '{query}' ({elapsed:.1f}ms)")
|
||||
print("-" * 50)
|
||||
for i, r in enumerate(results):
|
||||
chunk_id = r.get("id", "")[:20]
|
||||
score = r.get("score", 0)
|
||||
print(f" {i + 1}. [{score:.3f}] {chunk_id}")
|
||||
|
||||
|
||||
# English queries
|
||||
ENGLISH_QUERIES = [
|
||||
"a person talking",
|
||||
"someone speaking on camera",
|
||||
"outdoor scene",
|
||||
"indoor setting",
|
||||
"walking or moving",
|
||||
"dialogue or conversation",
|
||||
"looking at something",
|
||||
"happy or joyful",
|
||||
"serious or dramatic",
|
||||
"comedy or funny",
|
||||
"wearing a tie",
|
||||
"holding an object",
|
||||
"sitting on a chair",
|
||||
"city or urban",
|
||||
"building or room",
|
||||
"open space",
|
||||
]
|
||||
|
||||
# Chinese queries
|
||||
CHINESE_QUERIES = [
|
||||
"有人在說話",
|
||||
"戶外場景",
|
||||
"室內場景",
|
||||
"走路或移動",
|
||||
"對話或交談",
|
||||
"看著某樣東西",
|
||||
"快樂或開心",
|
||||
"嚴肅或戲劇性",
|
||||
"喜劇或有趣",
|
||||
"戴著領帶",
|
||||
"拿著東西",
|
||||
"坐在椅子上",
|
||||
"城市或都市",
|
||||
"建築物或房間",
|
||||
"開放空間",
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "sync":
|
||||
print("=" * 60)
|
||||
print(f"Syncing vectors to {QDRANT_COLLECTION}")
|
||||
print(f"Model: {MODEL}")
|
||||
print("Prefix for chunks: search_document:")
|
||||
print("=" * 60)
|
||||
sync_to_qdrant()
|
||||
else:
|
||||
print("=" * 60)
|
||||
print(f"Testing with {QDRANT_COLLECTION}")
|
||||
print(f"Model: {MODEL}")
|
||||
print("Prefix for queries: search_query:")
|
||||
print("=" * 60)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("ENGLISH QUERIES")
|
||||
print("=" * 60)
|
||||
test_queries(ENGLISH_QUERIES)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("CHINESE QUERIES")
|
||||
print("=" * 60)
|
||||
test_queries(CHINESE_QUERIES)
|
||||
Reference in New Issue
Block a user