momentry_core/scripts/story_embed.py

#!/opt/homebrew/bin/python3.11
"""
Story Embedding Pipeline:
1. Read story chunks → LLM summary (Gemma4)
2. Embed summary (EmbeddingGemma)
3. Store in chunks table + Qdrant
"""

import json, urllib.request, subprocess, sys, time, os

UUID = "aeed71342a899fe4b4c57b7d41bcb692"
PSQL = ["/Users/accusys/pgsql/18.3/bin/psql", "-U", "accusys", "-d", "momentry", "-t", "-A"]
LLM_URL = "http://localhost:8082/v1/chat/completions"
EMBED_URL = "http://localhost:11436/v1/embeddings"
QDRANT_URL = "http://localhost:6333"
QDRANT_COL = "momentry_dev_stories"

def psql(sql):
    r = subprocess.run(PSQL + ["-c", sql], capture_output=True, text=True, timeout=30)
    return r.stdout.strip()

def call_llm(dialogue):
    prompt = f"Dialogue: {dialogue}\n\n50-word summary:"
    body = json.dumps({"model": "google_gemma-4-26B-A4B-it-Q5_K_M.gguf",
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.1, "max_tokens": 100}).encode()
    req = urllib.request.Request(LLM_URL, data=body, headers={"Content-Type": "application/json"})
    resp = urllib.request.urlopen(req, timeout=120)
    return json.loads(resp.read())["choices"][0]["message"]["content"].strip()

def call_embed(text):
    body = json.dumps({"input": text}).encode()
    req = urllib.request.Request(EMBED_URL, data=body, headers={"Content-Type": "application/json"})
    resp = urllib.request.urlopen(req, timeout=30)
    return json.loads(resp.read())["data"][0]["embedding"]

# Step 0: Ensure Qdrant collection exists (768 dims)
subprocess.run(["curl", "-s", "-X", "PUT", f"{QDRANT_URL}/collections/{QDRANT_COL}",
    "-H", "Content-Type: application/json",
    "-d", '{"vectors":{"size":768,"distance":"Cosine"}}'], capture_output=True)

# Step 1: Get all story chunks that need summaries
lines = [l for l in psql(f"SELECT chunk_id, chunk_index, start_time, end_time, text_content FROM dev.chunks WHERE file_uuid='{UUID}' AND chunk_type='story' AND (summary_text IS NULL OR summary_text = '') ORDER BY chunk_index").split('\n') if l.strip() and '|' in l]

print(f"Chunks to process: {len(lines)}")
total = len(lines)
errors = 0

for i, line in enumerate(lines):
    parts = line.split('|', 4)
    cid, idx, st, et, dialogue = parts[0].strip(), int(parts[1]), float(parts[2]), float(parts[3]), parts[4] if len(parts) > 4 else ""

    if len(dialogue) < 10:
        summary = "[no dialogue]"
        embedding = [0.0] * 768
    else:
        try:
            summary = call_llm(dialogue)
            time.sleep(0.3)
            embedding = call_embed(summary)
        except Exception as e:
            print(f"[{i+1}/{total}] Error: {cid} - {e}")
            errors += 1
            summary = "[error]"
            embedding = [0.0] * 768

    # Update DB
    s_esc = summary.replace("'", "''")
    psql(f"UPDATE dev.chunks SET summary_text='{s_esc}', updated_at=CURRENT_TIMESTAMP WHERE chunk_id='{cid}'")

    # Store in Qdrant
    point = json.dumps({"points": [{"id": idx + 1, "vector": embedding,
        "payload": {"chunk_id": cid, "file_uuid": UUID, "start_time": st, "end_time": et,
                     "summary": summary, "type": "story_summary"}
    }]}).encode()
    req = urllib.request.Request(f"{QDRANT_URL}/collections/{QDRANT_COL}/points?wait=true",
        data=point, headers={"Content-Type": "application/json"}, method="PUT")
    try:
        urllib.request.urlopen(req, timeout=10)
    except:
        pass

    if (i+1) % 20 == 0:
        print(f"[{i+1}/{total}] {errors} errors so far")

print(f"\nDone. Processed: {total}, Errors: {errors}")
print(f"Qdrant: {QDRANT_COL}")