feat: Initial v0.9 release with API Key authentication
## v0.9.20260325_144654 ### Features - API Key Authentication System - Job Worker System - V2 Backup Versioning ### Bug Fixes - get_processor_results_by_job column mapping Co-authored-by: OpenCode
This commit is contained in:
170
scripts/chinese_vector_test.py
Normal file
170
scripts/chinese_vector_test.py
Normal file
@@ -0,0 +1,170 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Natural Language Vector Search - Chinese Queries
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import psycopg2
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
|
||||
# Chinese natural language queries
|
||||
CHINESE_QUERIES = [
|
||||
# Scene
|
||||
"有人在說話",
|
||||
"戶外場景",
|
||||
"室內場景",
|
||||
# Actions
|
||||
"走路或移動",
|
||||
"對話或交談",
|
||||
"看著某樣東西",
|
||||
# Emotions
|
||||
"快樂或開心",
|
||||
"嚴肅或戲劇性",
|
||||
"喜劇或有趣",
|
||||
# Objects
|
||||
"戴著領帶",
|
||||
"拿著東西",
|
||||
"坐在椅子上",
|
||||
# Locations
|
||||
"城市或都市",
|
||||
"建築物或房間",
|
||||
"開放空間",
|
||||
]
|
||||
|
||||
|
||||
def get_embedding(text):
|
||||
resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": text},
|
||||
)
|
||||
return resp.json()["embedding"]
|
||||
|
||||
|
||||
def test_qdrant(queries):
|
||||
results = {}
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query)
|
||||
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
"http://localhost:6333/collections/AccusysDB/points/search",
|
||||
headers={"api-key": "Test3200Test3200Test3200"},
|
||||
json={"vector": embedding, "limit": 10},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
data = resp.json()
|
||||
results[query] = {"ms": round(elapsed, 2), "results": data.get("result", [])}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_pgvector(queries):
|
||||
results = {}
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query)
|
||||
vector_str = "[" + ",".join(str(x) for x in embedding) + "]"
|
||||
|
||||
start = time.time()
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT cv.chunk_id, (cv.embedding_vector <=> %s::vector) as distance,
|
||||
c.content->>'text' as text
|
||||
FROM chunk_vectors cv
|
||||
JOIN chunks c ON cv.chunk_id = c.chunk_id
|
||||
WHERE cv.embedding_vector IS NOT NULL
|
||||
ORDER BY cv.embedding_vector <=> %s::vector
|
||||
LIMIT 10
|
||||
""",
|
||||
(vector_str, vector_str),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results[query] = {
|
||||
"ms": round(elapsed, 2),
|
||||
"results": [
|
||||
{"chunk_id": r[0], "score": 1 - r[1], "text": r[2]} for r in rows
|
||||
],
|
||||
}
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 80)
|
||||
print("中文自然語言向量搜尋測試")
|
||||
print("Chinese Natural Language Vector Search Test")
|
||||
print("=" * 80)
|
||||
print("\nVideo: Charade 1963")
|
||||
print("Model: nomic-embed-text\n")
|
||||
|
||||
print("Running Qdrant searches...")
|
||||
qdrant_results = test_qdrant(CHINESE_QUERIES)
|
||||
|
||||
print("Running pgvector searches...")
|
||||
pgvector_results = test_pgvector(CHINESE_QUERIES)
|
||||
|
||||
qdrant_avg = sum(r["ms"] for r in qdrant_results.values()) / len(qdrant_results)
|
||||
pgvector_avg = sum(r["ms"] for r in pgvector_results.values()) / len(
|
||||
pgvector_results
|
||||
)
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("平均回應時間 / AVERAGE RESPONSE TIME")
|
||||
print("=" * 80)
|
||||
print(f" Qdrant: {qdrant_avg:.2f}ms")
|
||||
print(f" pgvector: {pgvector_avg:.2f}ms")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("詳細結果 / DETAILED RESULTS")
|
||||
print("=" * 80)
|
||||
|
||||
for query in CHINESE_QUERIES:
|
||||
qd = qdrant_results[query]
|
||||
pg = pgvector_results[query]
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f'查詢 / Query: "{query}"')
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
print(f"\n[Qdrant] Time: {qd['ms']:.1f}ms")
|
||||
print("-" * 60)
|
||||
for i, r in enumerate(qd["results"][:5]):
|
||||
text = pg["results"][i]["text"] if i < len(pg["results"]) else ""
|
||||
text_display = (
|
||||
text[:50] + "..." if text and len(text) > 50 else (text if text else "")
|
||||
)
|
||||
print(f" {i + 1:2}. [{r['score']:.3f}] {text_display}")
|
||||
|
||||
print(f"\n[pgvector] Time: {pg['ms']:.1f}ms")
|
||||
print("-" * 60)
|
||||
for i, r in enumerate(pg["results"][:5]):
|
||||
text = r["text"]
|
||||
text_display = (
|
||||
text[:50] + "..." if text and len(text) > 50 else (text if text else "")
|
||||
)
|
||||
print(f" {i + 1:2}. [{r['score']:.3f}] {text_display}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user