#!/opt/homebrew/bin/python3.11 """ Vector Search Test with nomic-embed-text:v1.5 using prefixes - with text content """ import time import requests import psycopg2 VIDEO_UUID = "39567a0eb16f39fd" POSTGRES_CONFIG = { "host": "localhost", "port": 5432, "user": "accusys", "password": "Test3200", "database": "momentry", } MODEL = "nomic-embed-text:v1.5" QDRANT_COLLECTION = "chunks_v2" def get_embedding(text, prefix=""): prompt = f"{prefix}{text}" resp = requests.post( "http://localhost:11434/api/embeddings", json={"model": MODEL, "prompt": prompt} ) return resp.json()["embedding"] def get_text_from_chunk_id(chunk_id): """Get text from PostgreSQL using chunk_id""" conn = psycopg2.connect(**POSTGRES_CONFIG) cur = conn.cursor() cur.execute("SELECT content->>'text' FROM chunks WHERE chunk_id = %s", (chunk_id,)) result = cur.fetchone() cur.close() conn.close() return result[0] if result else "" def test_queries(queries, use_prefix=True): """Test queries against Qdrant""" prefix = "search_query: " if use_prefix else "" for query in queries: embedding = get_embedding(query, prefix) start = time.time() resp = requests.post( f"http://localhost:6333/collections/{QDRANT_COLLECTION}/points/search", headers={"api-key": "Test3200Test3200Test3200"}, json={"vector": embedding, "limit": 3}, ) elapsed = (time.time() - start) * 1000 results = resp.json().get("result", []) print(f"\nQuery: '{query}' ({elapsed:.1f}ms)") print("-" * 60) for i, r in enumerate(results): score = r.get("score", 0) # Try to get chunk_id from payload payload = r.get("payload", {}) chunk_id = payload.get("chunk_id", "") if not chunk_id: # Try to get text from Qdrant payload text = payload.get("text", "")[:50] else: # Get text from PostgreSQL text = get_text_from_chunk_id(chunk_id)[:50] print(f" {i + 1}. [{score:.3f}] {text}...") # English queries ENGLISH_QUERIES = [ "a person talking", "someone speaking on camera", "outdoor scene", "indoor setting", "walking or moving", "dialogue or conversation", "looking at something", "happy or joyful", "serious or dramatic", "comedy or funny", "wearing a tie", "holding an object", "sitting on a chair", "city or urban", "building or room", "open space", ] # Chinese queries CHINESE_QUERIES = [ "有人在說話", "戶外場景", "室內場景", "走路或移動", "對話或交談", "看著某樣東西", "快樂或開心", "嚴肅或戲劇性", "喜劇或有趣", "戴著領帶", "拿著東西", "坐在椅子上", "城市或都市", "建築物或房間", "開放空間", ] if __name__ == "__main__": print("=" * 70) print(f"Testing with {QDRANT_COLLECTION}") print(f"Model: {MODEL}") print("Prefix for chunks: search_document:") print("Prefix for queries: search_query:") print("=" * 70) print("\n" + "=" * 70) print("ENGLISH QUERIES") print("=" * 70) test_queries(ENGLISH_QUERIES) print("\n" + "=" * 70) print("CHINESE QUERIES") print("=" * 70) test_queries(CHINESE_QUERIES)