#!/opt/homebrew/bin/python3.11 """ Story Embedding Pipeline: 1. Read story chunks → LLM summary (Gemma4) 2. Embed summary (EmbeddingGemma) 3. Store in chunks table + Qdrant """ import json, urllib.request, subprocess, sys, time, os UUID = "aeed71342a899fe4b4c57b7d41bcb692" PSQL = ["/Users/accusys/pgsql/18.3/bin/psql", "-U", "accusys", "-d", "momentry", "-t", "-A"] LLM_URL = "http://localhost:8082/v1/chat/completions" EMBED_URL = "http://localhost:11436/v1/embeddings" QDRANT_URL = "http://localhost:6333" QDRANT_COL = "momentry_dev_stories" def psql(sql): r = subprocess.run(PSQL + ["-c", sql], capture_output=True, text=True, timeout=30) return r.stdout.strip() def call_llm(dialogue): prompt = f"Dialogue: {dialogue}\n\n50-word summary:" body = json.dumps({"model": "google_gemma-4-26B-A4B-it-Q5_K_M.gguf", "messages": [{"role": "user", "content": prompt}], "temperature": 0.1, "max_tokens": 100}).encode() req = urllib.request.Request(LLM_URL, data=body, headers={"Content-Type": "application/json"}) resp = urllib.request.urlopen(req, timeout=120) return json.loads(resp.read())["choices"][0]["message"]["content"].strip() def call_embed(text): body = json.dumps({"input": text}).encode() req = urllib.request.Request(EMBED_URL, data=body, headers={"Content-Type": "application/json"}) resp = urllib.request.urlopen(req, timeout=30) return json.loads(resp.read())["data"][0]["embedding"] # Step 0: Ensure Qdrant collection exists (768 dims) subprocess.run(["curl", "-s", "-X", "PUT", f"{QDRANT_URL}/collections/{QDRANT_COL}", "-H", "Content-Type: application/json", "-d", '{"vectors":{"size":768,"distance":"Cosine"}}'], capture_output=True) # Step 1: Get all story chunks that need summaries lines = [l for l in psql(f"SELECT chunk_id, chunk_index, start_time, end_time, text_content FROM dev.chunks WHERE file_uuid='{UUID}' AND chunk_type='story' AND (summary_text IS NULL OR summary_text = '') ORDER BY chunk_index").split('\n') if l.strip() and '|' in l] print(f"Chunks to process: {len(lines)}") total = len(lines) errors = 0 for i, line in enumerate(lines): parts = line.split('|', 4) cid, idx, st, et, dialogue = parts[0].strip(), int(parts[1]), float(parts[2]), float(parts[3]), parts[4] if len(parts) > 4 else "" if len(dialogue) < 10: summary = "[no dialogue]" embedding = [0.0] * 768 else: try: summary = call_llm(dialogue) time.sleep(0.3) embedding = call_embed(summary) except Exception as e: print(f"[{i+1}/{total}] Error: {cid} - {e}") errors += 1 summary = "[error]" embedding = [0.0] * 768 # Update DB s_esc = summary.replace("'", "''") psql(f"UPDATE dev.chunks SET summary_text='{s_esc}', updated_at=CURRENT_TIMESTAMP WHERE chunk_id='{cid}'") # Store in Qdrant point = json.dumps({"points": [{"id": idx + 1, "vector": embedding, "payload": {"chunk_id": cid, "file_uuid": UUID, "start_time": st, "end_time": et, "summary": summary, "type": "story_summary"} }]}).encode() req = urllib.request.Request(f"{QDRANT_URL}/collections/{QDRANT_COL}/points?wait=true", data=point, headers={"Content-Type": "application/json"}, method="PUT") try: urllib.request.urlopen(req, timeout=10) except: pass if (i+1) % 20 == 0: print(f"[{i+1}/{total}] {errors} errors so far") print(f"\nDone. Processed: {total}, Errors: {errors}") print(f"Qdrant: {QDRANT_COL}")