Phase 2.6.1: co_occurrence_edges migration - build_co_occurrence_edges_from_qdrant() - Qdrant embeddings → frame grouping → YOLO objects - Result: 6679 edges (vs 6701 PostgreSQL) Phase 2.6.2: face_face_edges migration - build_face_face_edges_from_qdrant() - Qdrant embeddings → frame grouping → face pairs - mutual_gaze detection preserved - Result: 6 edges (exact match) Phase 2.6.3: speaker_face_edges migration - build_speaker_face_edges_from_qdrant() - Qdrant embeddings → trace_id frame ranges - SPEAKS_AS edge creation Architecture: - All edges use Qdrant payload (no face_detections queries) - PostgreSQL fallback for empty Qdrant - Estimated 3.6x performance improvement Testing: - Playground (3003): ✓ All Phase 2.6 logs verified - Edge counts: ✓ Close match with PostgreSQL - Fallback: ✓ Working Docs: - docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md - docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
132 lines
3.4 KiB
Python
132 lines
3.4 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
Search comparison script for PostgreSQL, MongoDB, and Qdrant
|
|
"""
|
|
|
|
import time
|
|
import requests
|
|
|
|
# Test queries
|
|
TEST_QUERIES = [
|
|
"Charade",
|
|
"Paris",
|
|
" Audrey Hepburn",
|
|
"Cary Grant",
|
|
]
|
|
|
|
# PostgreSQL connection
|
|
POSTGRES_CONFIG = {
|
|
"host": "localhost",
|
|
"port": 5432,
|
|
"user": "accusys",
|
|
"password": "Test3200",
|
|
"database": "momentry",
|
|
}
|
|
|
|
|
|
def test_postgres_text_search():
|
|
"""Test text search in PostgreSQL"""
|
|
import psycopg2
|
|
|
|
results = {}
|
|
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
|
cur = conn.cursor()
|
|
|
|
for query in TEST_QUERIES:
|
|
start = time.time()
|
|
cur.execute(
|
|
"SELECT chunk_id, content->>'text' FROM chunks WHERE chunk_type = 'sentence' AND content->>'text' ILIKE %s LIMIT 10",
|
|
(f"%{query}%",),
|
|
)
|
|
rows = cur.fetchall()
|
|
elapsed = (time.time() - start) * 1000
|
|
|
|
results[query] = {
|
|
"method": "PostgreSQL ILIKE",
|
|
"ms": round(elapsed, 2),
|
|
"rows": len(rows),
|
|
}
|
|
print(f"PostgreSQL text search '{query}': {elapsed:.2f}ms, {len(rows)} rows")
|
|
|
|
cur.close()
|
|
conn.close()
|
|
return results
|
|
|
|
|
|
def test_qdrant_vector_search():
|
|
"""Test vector search in Qdrant"""
|
|
results = {}
|
|
|
|
# First, generate query embeddings
|
|
for query in TEST_QUERIES:
|
|
# Get embedding from Ollama
|
|
embed_resp = requests.post(
|
|
"http://localhost:11434/api/embeddings",
|
|
json={"model": "nomic-embed-text", "prompt": query},
|
|
)
|
|
embedding = embed_resp.json()["embedding"]
|
|
|
|
# Search in Qdrant (using AccusysDB collection)
|
|
start = time.time()
|
|
resp = requests.post(
|
|
"http://localhost:6333/collections/AccusysDB/points/search",
|
|
headers={"api-key": "Test3200Test3200Test3200"},
|
|
json={"vector": embedding, "limit": 10},
|
|
)
|
|
elapsed = (time.time() - start) * 1000
|
|
|
|
data = resp.json()
|
|
result_count = len(data.get("result", []))
|
|
|
|
results[query] = {
|
|
"method": "Qdrant HNSW",
|
|
"ms": round(elapsed, 2),
|
|
"rows": result_count,
|
|
}
|
|
print(f"Qdrant vector search '{query}': {elapsed:.2f}ms, {result_count} rows")
|
|
|
|
return results
|
|
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("Search Performance Comparison Test")
|
|
print("=" * 60)
|
|
|
|
# Get chunk count
|
|
import psycopg2
|
|
|
|
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
|
cur = conn.cursor()
|
|
cur.execute("SELECT COUNT(*) FROM chunks WHERE chunk_type = 'sentence'")
|
|
count = cur.fetchone()[0]
|
|
cur.close()
|
|
conn.close()
|
|
|
|
print(f"\nTotal sentence chunks: {count}")
|
|
print("\n" + "=" * 60)
|
|
print("A. Text Search Test (Priority a)")
|
|
print("=" * 60)
|
|
pg_results = test_postgres_text_search()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("B. Vector Search Test (Priority b)")
|
|
print("=" * 60)
|
|
qdrant_results = test_qdrant_vector_search()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("Summary")
|
|
print("=" * 60)
|
|
print(f"\n{'Query':<20} | {'PostgreSQL':<25} | {'Qdrant':<25}")
|
|
print("-" * 70)
|
|
for query in TEST_QUERIES:
|
|
pg = pg_results.get(query, {})
|
|
qd = qdrant_results.get(query, {})
|
|
print(
|
|
f"{query:<20} | {pg.get('ms', 0):.1f}ms ({pg.get('rows', 0)} rows) | {qd.get('ms', 0):.1f}ms ({qd.get('rows', 0)} rows)"
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|