#!/opt/homebrew/bin/python3.11 """ Natural Language Vector Search - Chinese Queries """ import time import requests import psycopg2 VIDEO_UUID = "39567a0eb16f39fd" POSTGRES_CONFIG = { "host": "localhost", "port": 5432, "user": "accusys", "password": "Test3200", "database": "momentry", } # Chinese natural language queries CHINESE_QUERIES = [ # Scene "有人在說話", "戶外場景", "室內場景", # Actions "走路或移動", "對話或交談", "看著某樣東西", # Emotions "快樂或開心", "嚴肅或戲劇性", "喜劇或有趣", # Objects "戴著領帶", "拿著東西", "坐在椅子上", # Locations "城市或都市", "建築物或房間", "開放空間", ] def get_embedding(text): resp = requests.post( "http://localhost:11434/api/embeddings", json={"model": "nomic-embed-text", "prompt": text}, ) return resp.json()["embedding"] def test_qdrant(queries): results = {} for query in queries: embedding = get_embedding(query) start = time.time() resp = requests.post( "http://localhost:6333/collections/AccusysDB/points/search", headers={"api-key": "Test3200Test3200Test3200"}, json={"vector": embedding, "limit": 10}, ) elapsed = (time.time() - start) * 1000 data = resp.json() results[query] = {"ms": round(elapsed, 2), "results": data.get("result", [])} return results def test_pgvector(queries): results = {} conn = psycopg2.connect(**POSTGRES_CONFIG) cur = conn.cursor() for query in queries: embedding = get_embedding(query) vector_str = "[" + ",".join(str(x) for x in embedding) + "]" start = time.time() cur.execute( """ SELECT cv.chunk_id, (cv.embedding_vector <=> %s::vector) as distance, c.content->>'text' as text FROM chunk_vectors cv JOIN chunks c ON cv.chunk_id = c.chunk_id WHERE cv.embedding_vector IS NOT NULL ORDER BY cv.embedding_vector <=> %s::vector LIMIT 10 """, (vector_str, vector_str), ) rows = cur.fetchall() elapsed = (time.time() - start) * 1000 results[query] = { "ms": round(elapsed, 2), "results": [ {"chunk_id": r[0], "score": 1 - r[1], "text": r[2]} for r in rows ], } cur.close() conn.close() return results def main(): print("=" * 80) print("中文自然語言向量搜尋測試") print("Chinese Natural Language Vector Search Test") print("=" * 80) print("\nVideo: Charade 1963") print("Model: nomic-embed-text\n") print("Running Qdrant searches...") qdrant_results = test_qdrant(CHINESE_QUERIES) print("Running pgvector searches...") pgvector_results = test_pgvector(CHINESE_QUERIES) qdrant_avg = sum(r["ms"] for r in qdrant_results.values()) / len(qdrant_results) pgvector_avg = sum(r["ms"] for r in pgvector_results.values()) / len( pgvector_results ) print("\n" + "=" * 80) print("平均回應時間 / AVERAGE RESPONSE TIME") print("=" * 80) print(f" Qdrant: {qdrant_avg:.2f}ms") print(f" pgvector: {pgvector_avg:.2f}ms") print("\n" + "=" * 80) print("詳細結果 / DETAILED RESULTS") print("=" * 80) for query in CHINESE_QUERIES: qd = qdrant_results[query] pg = pgvector_results[query] print(f"\n{'=' * 60}") print(f'查詢 / Query: "{query}"') print(f"{'=' * 60}") print(f"\n[Qdrant] Time: {qd['ms']:.1f}ms") print("-" * 60) for i, r in enumerate(qd["results"][:5]): text = pg["results"][i]["text"] if i < len(pg["results"]) else "" text_display = ( text[:50] + "..." if text and len(text) > 50 else (text if text else "") ) print(f" {i + 1:2}. [{r['score']:.3f}] {text_display}") print(f"\n[pgvector] Time: {pg['ms']:.1f}ms") print("-" * 60) for i, r in enumerate(pg["results"][:5]): text = r["text"] text_display = ( text[:50] + "..." if text and len(text) > 50 else (text if text else "") ) print(f" {i + 1:2}. [{r['score']:.3f}] {text_display}") if __name__ == "__main__": main()