#!/usr/bin/env python3 """ LLM Metadata Enhancer for Momentry Identity Reads identity name + biography from stdin, calls llama.cpp (Gemma4) to produce structured metadata: summary, nationality, profession, aliases (BCP 47 locale-tagged). Output: JSON to stdout (no extra text). Usage: echo '{"name": "John Smith", "biography": "..."}' | python3 llm_metadata_enhancer.py python3 llm_metadata_enhancer.py --url http://127.0.0.1:8081 < input.json Requires: - llama.cpp server running (default: http://127.0.0.1:8081) - pip install requests """ import json import sys import argparse import requests DEFAULT_API_URL = "http://127.0.0.1:8081" SYSTEM_PROMPT = """You are a metadata structuring assistant for a media asset management system. Given an identity name and biography text, produce a structured JSON object with these fields: - summary: 2-3 sentence summary in the same language as the biography - nationality: inferred nationality or null - profession: array of inferred professions/titles - birth_date: YYYY-MM-DD format if available, else null - aliases: array of {locale: "BCP47_tag", name: "translated_name"} objects - Use common well-known translations only (do not fabricate) - Locale tags follow BCP 47 (e.g., en, zh-TW, zh-CN, ja, ko, fr, es, yue, th, ar, ru) - Include at least one alias with locale "en" using the original name - Only include locales where the name has a widely recognized translation Output ONLY valid JSON. No explanation, no markdown, no extra text.""" def call_llm(api_url: str, name: str, biography: str) -> dict: user_prompt = f"Identity name: {name}\n\nBiography:\n{biography}" payload = { "messages": [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_prompt}, ], "temperature": 0.1, "max_tokens": 1024, "stop": [], } resp = requests.post(f"{api_url}/v1/chat/completions", json=payload, timeout=60) if resp.status_code != 200: raise RuntimeError(f"LLM API error {resp.status_code}: {resp.text}") content = resp.json()["choices"][0]["message"]["content"].strip() # Strip markdown code blocks if present if content.startswith("```"): content = content.split("\n", 1)[-1] content = content.rsplit("```", 1)[0] if content.startswith("json"): content = content[4:].strip() return json.loads(content) def main(): parser = argparse.ArgumentParser(description="Enhance identity metadata via LLM") parser.add_argument("--url", default=DEFAULT_API_URL, help="llama.cpp server URL") parser.add_argument("--input", help="Input JSON file (default: stdin)") args = parser.parse_args() source = args.input if source: with open(source) as f: data = json.load(f) else: data = json.load(sys.stdin) name = data.get("name", "") biography = data.get("biography", "") if not name: result = {"error": "name is required"} elif not biography: result = {"error": "biography is required"} else: try: result = call_llm(args.url, name, biography) except Exception as e: result = {"error": str(e)} json.dump(result, sys.stdout, ensure_ascii=False, indent=2) sys.stdout.write("\n") if __name__ == "__main__": main()