Files
momentry_core/scripts/test_v2_with_text.py
accusys 383201cacd feat: Initial v0.9 release with API Key authentication
## v0.9.20260325_144654

### Features
- API Key Authentication System
- Job Worker System
- V2 Backup Versioning

### Bug Fixes
- get_processor_results_by_job column mapping

Co-authored-by: OpenCode
2026-03-25 14:53:41 +08:00

134 lines
3.4 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Vector Search Test with nomic-embed-text:v1.5 using prefixes - with text content
"""
import time
import requests
import psycopg2
VIDEO_UUID = "39567a0eb16f39fd"
POSTGRES_CONFIG = {
"host": "localhost",
"port": 5432,
"user": "accusys",
"password": "Test3200",
"database": "momentry",
}
MODEL = "nomic-embed-text:v1.5"
QDRANT_COLLECTION = "chunks_v2"
def get_embedding(text, prefix=""):
prompt = f"{prefix}{text}"
resp = requests.post(
"http://localhost:11434/api/embeddings", json={"model": MODEL, "prompt": prompt}
)
return resp.json()["embedding"]
def get_text_from_chunk_id(chunk_id):
"""Get text from PostgreSQL using chunk_id"""
conn = psycopg2.connect(**POSTGRES_CONFIG)
cur = conn.cursor()
cur.execute("SELECT content->>'text' FROM chunks WHERE chunk_id = %s", (chunk_id,))
result = cur.fetchone()
cur.close()
conn.close()
return result[0] if result else ""
def test_queries(queries, use_prefix=True):
"""Test queries against Qdrant"""
prefix = "search_query: " if use_prefix else ""
for query in queries:
embedding = get_embedding(query, prefix)
start = time.time()
resp = requests.post(
f"http://localhost:6333/collections/{QDRANT_COLLECTION}/points/search",
headers={"api-key": "Test3200Test3200Test3200"},
json={"vector": embedding, "limit": 3},
)
elapsed = (time.time() - start) * 1000
results = resp.json().get("result", [])
print(f"\nQuery: '{query}' ({elapsed:.1f}ms)")
print("-" * 60)
for i, r in enumerate(results):
score = r.get("score", 0)
# Try to get chunk_id from payload
payload = r.get("payload", {})
chunk_id = payload.get("chunk_id", "")
if not chunk_id:
# Try to get text from Qdrant payload
text = payload.get("text", "")[:50]
else:
# Get text from PostgreSQL
text = get_text_from_chunk_id(chunk_id)[:50]
print(f" {i + 1}. [{score:.3f}] {text}...")
# English queries
ENGLISH_QUERIES = [
"a person talking",
"someone speaking on camera",
"outdoor scene",
"indoor setting",
"walking or moving",
"dialogue or conversation",
"looking at something",
"happy or joyful",
"serious or dramatic",
"comedy or funny",
"wearing a tie",
"holding an object",
"sitting on a chair",
"city or urban",
"building or room",
"open space",
]
# Chinese queries
CHINESE_QUERIES = [
"有人在說話",
"戶外場景",
"室內場景",
"走路或移動",
"對話或交談",
"看著某樣東西",
"快樂或開心",
"嚴肅或戲劇性",
"喜劇或有趣",
"戴著領帶",
"拿著東西",
"坐在椅子上",
"城市或都市",
"建築物或房間",
"開放空間",
]
if __name__ == "__main__":
print("=" * 70)
print(f"Testing with {QDRANT_COLLECTION}")
print(f"Model: {MODEL}")
print("Prefix for chunks: search_document:")
print("Prefix for queries: search_query:")
print("=" * 70)
print("\n" + "=" * 70)
print("ENGLISH QUERIES")
print("=" * 70)
test_queries(ENGLISH_QUERIES)
print("\n" + "=" * 70)
print("CHINESE QUERIES")
print("=" * 70)
test_queries(CHINESE_QUERIES)