#!/opt/homebrew/bin/python3.11 """ Natural Language Vector Search - Show Top 10 Results """ import time import requests import psycopg2 VIDEO_UUID = "39567a0eb16f39fd" POSTGRES_CONFIG = { "host": "localhost", "port": 5432, "user": "accusys", "password": "Test3200", "database": "momentry", } NATURAL_LANGUAGE_QUERIES = [ "a person talking", "someone speaking on camera", "outdoor scene", "indoor setting", "walking or moving", "dialogue or conversation", "looking at something", "happy or joyful", "serious or dramatic", "comedy or funny", "wearing a tie", "holding an object", "sitting on a chair", "city or urban", "building or room", "open space", ] def get_embedding(text): resp = requests.post( "http://localhost:11434/api/embeddings", json={"model": "nomic-embed-text", "prompt": text}, ) return resp.json()["embedding"] def test_qdrant(queries): results = {} for query in queries: embedding = get_embedding(query) start = time.time() resp = requests.post( "http://localhost:6333/collections/AccusysDB/points/search", headers={"api-key": "Test3200Test3200Test3200"}, json={"vector": embedding, "limit": 10}, ) elapsed = (time.time() - start) * 1000 data = resp.json() results[query] = {"ms": round(elapsed, 2), "results": data.get("result", [])} return results def test_pgvector(queries): results = {} conn = psycopg2.connect(**POSTGRES_CONFIG) cur = conn.cursor() for query in queries: embedding = get_embedding(query) vector_str = "[" + ",".join(str(x) for x in embedding) + "]" start = time.time() cur.execute( """ SELECT cv.chunk_id, (cv.embedding_vector <=> %s::vector) as distance, c.content->>'text' as text FROM chunk_vectors cv JOIN chunks c ON cv.chunk_id = c.chunk_id WHERE cv.embedding_vector IS NOT NULL ORDER BY cv.embedding_vector <=> %s::vector LIMIT 10 """, (vector_str, vector_str), ) rows = cur.fetchall() elapsed = (time.time() - start) * 1000 results[query] = { "ms": round(elapsed, 2), "results": [ {"chunk_id": r[0], "score": 1 - r[1], "text": r[2]} for r in rows ], } cur.close() conn.close() return results def main(): print("=" * 80) print("NATURAL LANGUAGE VECTOR SEARCH - TOP 10 RESULTS") print("=" * 80) print("\nVideo: Charade 1963") print("Model: nomic-embed-text\n") # Run tests print("Running Qdrant searches...") qdrant_results = test_qdrant(NATURAL_LANGUAGE_QUERIES) print("Running pgvector searches...") pgvector_results = test_pgvector(NATURAL_LANGUAGE_QUERIES) # Calculate averages qdrant_avg = sum(r["ms"] for r in qdrant_results.values()) / len(qdrant_results) pgvector_avg = sum(r["ms"] for r in pgvector_results.values()) / len( pgvector_results ) print("\n" + "=" * 80) print("AVERAGE RESPONSE TIME") print("=" * 80) print(f" Qdrant: {qdrant_avg:.2f}ms") print(f" pgvector: {pgvector_avg:.2f}ms") # Show detailed results for each query print("\n" + "=" * 80) print("DETAILED RESULTS") print("=" * 80) for query in NATURAL_LANGUAGE_QUERIES: qd = qdrant_results[query] pg = pgvector_results[query] print(f"\n{'=' * 60}") print(f'Query: "{query}"') print(f"{'=' * 60}") print(f"\n[Qdrant] Time: {qd['ms']:.1f}ms") print("-" * 60) for i, r in enumerate(qd["results"][:10]): text = pg["results"][i]["text"] if i < len(pg["results"]) else "" text_display = ( text[:70] + "..." if text and len(text) > 70 else (text if text else "") ) print(f" {i + 1:2}. [{r['score']:.3f}] {text_display}") print(f"\n[pgvector] Time: {pg['ms']:.1f}ms") print("-" * 60) for i, r in enumerate(pg["results"][:10]): text = r["text"] text_display = ( text[:70] + "..." if text and len(text) > 70 else (text if text else "") ) print(f" {i + 1:2}. [{r['score']:.3f}] {text_display}") print() if __name__ == "__main__": main()