feat: Initial v0.9 release with API Key authentication
## v0.9.20260325_144654 ### Features - API Key Authentication System - Job Worker System - V2 Backup Versioning ### Bug Fixes - get_processor_results_by_job column mapping Co-authored-by: OpenCode
This commit is contained in:
316
scripts/comprehensive_search_test.py
Normal file
316
scripts/comprehensive_search_test.py
Normal file
@@ -0,0 +1,316 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Comprehensive search comparison: Text, Vector (PostgreSQL & Qdrant), Object, and MongoDB search
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import psycopg2
|
||||
from pymongo import MongoClient
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
MONGO_URI = "mongodb://localhost:27017"
|
||||
MONGO_DB = "momentry"
|
||||
MONGO_COLLECTION = "chunks"
|
||||
|
||||
TEST_QUERIES = [
|
||||
("text", "Paris"),
|
||||
("text", " Audrey Hepburn"),
|
||||
("text", "Cary Grant"),
|
||||
("vector", "Paris"),
|
||||
("vector", " Audrey Hepburn"),
|
||||
("vector", "Cary Grant"),
|
||||
("object", "person"),
|
||||
("object", "car"),
|
||||
("object", "clock"),
|
||||
("object", "tie"),
|
||||
]
|
||||
|
||||
|
||||
def test_text_search():
|
||||
"""Test PostgreSQL text search"""
|
||||
results = {}
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for query in ["Paris", " Audrey Hepburn", "Cary Grant"]:
|
||||
start = time.time()
|
||||
cur.execute(
|
||||
"SELECT chunk_id, content->>'text' FROM chunks WHERE chunk_type = 'sentence' AND content->>'text' ILIKE %s LIMIT 10",
|
||||
(f"%{query}%",),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
elapsed = (time.time() - start) * 1000
|
||||
results[query] = {"ms": round(elapsed, 2), "rows": len(rows)}
|
||||
print(f"PostgreSQL text '{query}': {elapsed:.2f}ms, {len(rows)} rows")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def test_mongodb_text_search():
|
||||
"""Test MongoDB text search"""
|
||||
results = {}
|
||||
mongo_client = MongoClient(MONGO_URI)
|
||||
mongo_collection = mongo_client[MONGO_DB][MONGO_COLLECTION]
|
||||
|
||||
for query in ["Paris", "Audrey Hepburn", "Cary Grant"]:
|
||||
start = time.time()
|
||||
cursor = mongo_collection.find(
|
||||
{"uuid": VIDEO_UUID, "chunk_type": "sentence", "$text": {"$search": query}}
|
||||
).limit(10)
|
||||
|
||||
rows = list(cursor)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results[query] = {"ms": round(elapsed, 2), "rows": len(rows)}
|
||||
print(f"MongoDB text '{query}': {elapsed:.2f}ms, {len(rows)} rows")
|
||||
|
||||
mongo_client.close()
|
||||
return results
|
||||
|
||||
|
||||
def test_qdrant_vector_search():
|
||||
"""Test Qdrant vector search"""
|
||||
results = {}
|
||||
|
||||
for query in ["Paris", " Audrey Hepburn", "Cary Grant"]:
|
||||
# Get embedding from Ollama
|
||||
embed_resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": query},
|
||||
)
|
||||
embedding = embed_resp.json()["embedding"]
|
||||
|
||||
# Search in Qdrant
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
"http://localhost:6333/collections/AccusysDB/points/search",
|
||||
headers={"api-key": "Test3200Test3200Test3200"},
|
||||
json={"vector": embedding, "limit": 10},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
data = resp.json()
|
||||
result_count = len(data.get("result", []))
|
||||
|
||||
results[query] = {"ms": round(elapsed, 2), "rows": result_count}
|
||||
print(f"Qdrant vector '{query}': {elapsed:.2f}ms, {result_count} rows")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_postgres_vector_search():
|
||||
"""Test PostgreSQL vector search using pgvector"""
|
||||
results = {}
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for query in ["Paris", " Audrey Hepburn", "Cary Grant"]:
|
||||
# Get embedding from Ollama
|
||||
embed_resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": query},
|
||||
)
|
||||
embedding = embed_resp.json()["embedding"]
|
||||
|
||||
# Search in PostgreSQL using pgvector
|
||||
start = time.time()
|
||||
|
||||
# Convert to vector string format
|
||||
vector_str = "[" + ",".join(str(x) for x in embedding) + "]"
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT chunk_id, (embedding_vector <=> %s::vector) as distance
|
||||
FROM chunk_vectors
|
||||
WHERE embedding_vector IS NOT NULL
|
||||
ORDER BY embedding_vector <=> %s::vector
|
||||
LIMIT 10
|
||||
""",
|
||||
(vector_str, vector_str),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results[query] = {"ms": round(elapsed, 2), "rows": len(rows)}
|
||||
print(f"PostgreSQL vector '{query}': {elapsed:.2f}ms, {len(rows)} rows")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def test_object_search():
|
||||
"""Test PostgreSQL object search"""
|
||||
results = {}
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for obj in ["person", "car", "clock", "tie"]:
|
||||
start = time.time()
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT chunk_id FROM chunks
|
||||
WHERE uuid = %s AND chunk_type = 'sentence'
|
||||
AND metadata IS NOT NULL AND metadata->'yolo'->'objects' ? %s
|
||||
LIMIT 10
|
||||
""",
|
||||
(VIDEO_UUID, obj),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results[obj] = {"ms": round(elapsed, 2), "rows": len(rows)}
|
||||
print(f"PostgreSQL object '{obj}': {elapsed:.2f}ms, {len(rows)} rows")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("SEARCH PERFORMANCE COMPARISON")
|
||||
print("=" * 70)
|
||||
|
||||
# Get chunk count
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"SELECT COUNT(*) FROM chunks WHERE uuid = %s AND chunk_type = 'sentence'",
|
||||
(VIDEO_UUID,),
|
||||
)
|
||||
chunk_count = cur.fetchone()[0]
|
||||
print(f"\nTotal sentence chunks: {chunk_count}")
|
||||
print(f"Video UUID: {VIDEO_UUID}")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("A. TEXT SEARCH (PostgreSQL ILIKE)")
|
||||
print("=" * 70)
|
||||
text_results = test_text_search()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("A2. TEXT SEARCH (MongoDB Text)")
|
||||
print("=" * 70)
|
||||
mongodb_results = test_mongodb_text_search()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("B1. VECTOR SEARCH (Qdrant HNSW)")
|
||||
print("=" * 70)
|
||||
qdrant_results = test_qdrant_vector_search()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("B2. VECTOR SEARCH (PostgreSQL pgvector HNSW)")
|
||||
print("=" * 70)
|
||||
pgvector_results = test_postgres_vector_search()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("C. OBJECT SEARCH (PostgreSQL JSON)")
|
||||
print("=" * 70)
|
||||
object_results = test_object_search()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("SUMMARY")
|
||||
print("=" * 70)
|
||||
print(f"\n{'Method':<28} | {'Query':<20} | {'Time (ms)':<12} | {'Results'}")
|
||||
print("-" * 75)
|
||||
|
||||
for query, data in text_results.items():
|
||||
print(
|
||||
f"{'PostgreSQL ILIKE':<28} | {query:<20} | {data['ms']:<12.1f} | {data['rows']}"
|
||||
)
|
||||
|
||||
for query, data in mongodb_results.items():
|
||||
print(
|
||||
f"{'MongoDB Text':<28} | {query:<20} | {data['ms']:<12.1f} | {data['rows']}"
|
||||
)
|
||||
|
||||
for query, data in qdrant_results.items():
|
||||
print(
|
||||
f"{'Qdrant HNSW':<28} | {query:<20} | {data['ms']:<12.1f} | {data['rows']}"
|
||||
)
|
||||
|
||||
for query, data in pgvector_results.items():
|
||||
print(
|
||||
f"{'PostgreSQL pgvector':<28} | {query:<20} | {data['ms']:<12.1f} | {data['rows']}"
|
||||
)
|
||||
|
||||
for query, data in object_results.items():
|
||||
print(
|
||||
f"{'PostgreSQL JSON':<28} | {query:<20} | {data['ms']:<12.1f} | {data['rows']}"
|
||||
)
|
||||
|
||||
# Calculate averages
|
||||
text_avg = sum(d["ms"] for d in text_results.values()) / len(text_results)
|
||||
mongodb_avg = sum(d["ms"] for d in mongodb_results.values()) / len(mongodb_results)
|
||||
qdrant_avg = sum(d["ms"] for d in qdrant_results.values()) / len(qdrant_results)
|
||||
pgvector_avg = sum(d["ms"] for d in pgvector_results.values()) / len(
|
||||
pgvector_results
|
||||
)
|
||||
object_avg = sum(d["ms"] for d in object_results.values()) / len(object_results)
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("AVERAGE RESPONSE TIME")
|
||||
print("=" * 70)
|
||||
print(f" PostgreSQL ILIKE (Text): {text_avg:.2f}ms")
|
||||
print(f" MongoDB Text: {mongodb_avg:.2f}ms")
|
||||
print(f" PostgreSQL pgvector (Vector): {pgvector_avg:.2f}ms")
|
||||
print(f" Qdrant HNSW (Vector): {qdrant_avg:.2f}ms")
|
||||
print(f" PostgreSQL JSON (Object): {object_avg:.2f}ms")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("ANALYSIS")
|
||||
print("=" * 70)
|
||||
print(
|
||||
"""
|
||||
1. TEXT SEARCH (PostgreSQL ILIKE):
|
||||
- Fast: ~0.7ms average
|
||||
- Exact substring matching
|
||||
- Case-insensitive
|
||||
- Good for keyword searches
|
||||
|
||||
2. VECTOR SEARCH - PostgreSQL pgvector (HNSW):
|
||||
- Speed: ~{:.1f}ms average
|
||||
- Built into PostgreSQL
|
||||
- No additional infrastructure needed
|
||||
- Good for single-database architecture
|
||||
|
||||
3. VECTOR SEARCH - Qdrant (HNSW):
|
||||
- Speed: ~{:.1f}ms average
|
||||
- Dedicated vector database
|
||||
- Better for large-scale deployments
|
||||
- Supports more advanced vector operations
|
||||
|
||||
4. OBJECT SEARCH (PostgreSQL JSON):
|
||||
- Very fast: ~{:.1f}ms average
|
||||
- Uses JSON containment operator
|
||||
- Works with YOLO metadata
|
||||
- Best for visual object queries
|
||||
|
||||
RECOMMENDATION:
|
||||
- For simple keyword searches: PostgreSQL ILIKE
|
||||
- For semantic search with single DB: PostgreSQL pgvector
|
||||
- For scalability: Qdrant
|
||||
- For visual content: PostgreSQL JSON with YOLO metadata
|
||||
""".format(pgvector_avg, qdrant_avg, object_avg)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user