#!/opt/homebrew/bin/python3.11 """ Search comparison script for PostgreSQL, MongoDB, and Qdrant """ import time import requests # Test queries TEST_QUERIES = [ "Charade", "Paris", " Audrey Hepburn", "Cary Grant", ] # PostgreSQL connection POSTGRES_CONFIG = { "host": "localhost", "port": 5432, "user": "accusys", "password": "Test3200", "database": "momentry", } def test_postgres_text_search(): """Test text search in PostgreSQL""" import psycopg2 results = {} conn = psycopg2.connect(**POSTGRES_CONFIG) cur = conn.cursor() for query in TEST_QUERIES: start = time.time() cur.execute( "SELECT chunk_id, content->>'text' FROM chunks WHERE chunk_type = 'sentence' AND content->>'text' ILIKE %s LIMIT 10", (f"%{query}%",), ) rows = cur.fetchall() elapsed = (time.time() - start) * 1000 results[query] = { "method": "PostgreSQL ILIKE", "ms": round(elapsed, 2), "rows": len(rows), } print(f"PostgreSQL text search '{query}': {elapsed:.2f}ms, {len(rows)} rows") cur.close() conn.close() return results def test_qdrant_vector_search(): """Test vector search in Qdrant""" results = {} # First, generate query embeddings for query in TEST_QUERIES: # Get embedding from Ollama embed_resp = requests.post( "http://localhost:11434/api/embeddings", json={"model": "nomic-embed-text", "prompt": query}, ) embedding = embed_resp.json()["embedding"] # Search in Qdrant (using AccusysDB collection) start = time.time() resp = requests.post( "http://localhost:6333/collections/AccusysDB/points/search", headers={"api-key": "Test3200Test3200Test3200"}, json={"vector": embedding, "limit": 10}, ) elapsed = (time.time() - start) * 1000 data = resp.json() result_count = len(data.get("result", [])) results[query] = { "method": "Qdrant HNSW", "ms": round(elapsed, 2), "rows": result_count, } print(f"Qdrant vector search '{query}': {elapsed:.2f}ms, {result_count} rows") return results def main(): print("=" * 60) print("Search Performance Comparison Test") print("=" * 60) # Get chunk count import psycopg2 conn = psycopg2.connect(**POSTGRES_CONFIG) cur = conn.cursor() cur.execute("SELECT COUNT(*) FROM chunks WHERE chunk_type = 'sentence'") count = cur.fetchone()[0] cur.close() conn.close() print(f"\nTotal sentence chunks: {count}") print("\n" + "=" * 60) print("A. Text Search Test (Priority a)") print("=" * 60) pg_results = test_postgres_text_search() print("\n" + "=" * 60) print("B. Vector Search Test (Priority b)") print("=" * 60) qdrant_results = test_qdrant_vector_search() print("\n" + "=" * 60) print("Summary") print("=" * 60) print(f"\n{'Query':<20} | {'PostgreSQL':<25} | {'Qdrant':<25}") print("-" * 70) for query in TEST_QUERIES: pg = pg_results.get(query, {}) qd = qdrant_results.get(query, {}) print( f"{query:<20} | {pg.get('ms', 0):.1f}ms ({pg.get('rows', 0)} rows) | {qd.get('ms', 0):.1f}ms ({qd.get('rows', 0)} rows)" ) if __name__ == "__main__": main()