feat: identity PATCH update, alias system, name UNIQUE removal
- Add PATCH /api/v1/identity/:identity_uuid endpoint - Migration 030: remove name UNIQUE, add tmdb_id index - TMDb upsert: ON CONFLICT (name) -> ON CONFLICT (tmdb_id) - get_or_create_identity: pre-check by name - upload_identity: ON CONFLICT (name) -> ON CONFLICT (uuid) - Search: include aliases in identity text search - Add scripts/llm_metadata_enhancer.py - Add DESIGN/IdentityUpdateAndAliasSystem.md
This commit is contained in:
104
scripts/llm_metadata_enhancer.py
Normal file
104
scripts/llm_metadata_enhancer.py
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
LLM Metadata Enhancer for Momentry Identity
|
||||
|
||||
Reads identity name + biography from stdin, calls llama.cpp (Gemma4) to
|
||||
produce structured metadata: summary, nationality, profession, aliases (BCP 47 locale-tagged).
|
||||
|
||||
Output: JSON to stdout (no extra text).
|
||||
|
||||
Usage:
|
||||
echo '{"name": "John Smith", "biography": "..."}' | python3 llm_metadata_enhancer.py
|
||||
python3 llm_metadata_enhancer.py --url http://127.0.0.1:8081 < input.json
|
||||
|
||||
Requires:
|
||||
- llama.cpp server running (default: http://127.0.0.1:8081)
|
||||
- pip install requests
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import argparse
|
||||
import requests
|
||||
|
||||
DEFAULT_API_URL = "http://127.0.0.1:8081"
|
||||
|
||||
SYSTEM_PROMPT = """You are a metadata structuring assistant for a media asset management system.
|
||||
|
||||
Given an identity name and biography text, produce a structured JSON object with these fields:
|
||||
|
||||
- summary: 2-3 sentence summary in the same language as the biography
|
||||
- nationality: inferred nationality or null
|
||||
- profession: array of inferred professions/titles
|
||||
- birth_date: YYYY-MM-DD format if available, else null
|
||||
- aliases: array of {locale: "BCP47_tag", name: "translated_name"} objects
|
||||
- Use common well-known translations only (do not fabricate)
|
||||
- Locale tags follow BCP 47 (e.g., en, zh-TW, zh-CN, ja, ko, fr, es, yue, th, ar, ru)
|
||||
- Include at least one alias with locale "en" using the original name
|
||||
- Only include locales where the name has a widely recognized translation
|
||||
|
||||
Output ONLY valid JSON. No explanation, no markdown, no extra text."""
|
||||
|
||||
|
||||
def call_llm(api_url: str, name: str, biography: str) -> dict:
|
||||
user_prompt = f"Identity name: {name}\n\nBiography:\n{biography}"
|
||||
|
||||
payload = {
|
||||
"messages": [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
"temperature": 0.1,
|
||||
"max_tokens": 1024,
|
||||
"stop": [],
|
||||
}
|
||||
|
||||
resp = requests.post(f"{api_url}/v1/chat/completions", json=payload, timeout=60)
|
||||
|
||||
if resp.status_code != 200:
|
||||
raise RuntimeError(f"LLM API error {resp.status_code}: {resp.text}")
|
||||
|
||||
content = resp.json()["choices"][0]["message"]["content"].strip()
|
||||
|
||||
# Strip markdown code blocks if present
|
||||
if content.startswith("```"):
|
||||
content = content.split("\n", 1)[-1]
|
||||
content = content.rsplit("```", 1)[0]
|
||||
if content.startswith("json"):
|
||||
content = content[4:].strip()
|
||||
|
||||
return json.loads(content)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Enhance identity metadata via LLM")
|
||||
parser.add_argument("--url", default=DEFAULT_API_URL, help="llama.cpp server URL")
|
||||
parser.add_argument("--input", help="Input JSON file (default: stdin)")
|
||||
args = parser.parse_args()
|
||||
|
||||
source = args.input
|
||||
if source:
|
||||
with open(source) as f:
|
||||
data = json.load(f)
|
||||
else:
|
||||
data = json.load(sys.stdin)
|
||||
|
||||
name = data.get("name", "")
|
||||
biography = data.get("biography", "")
|
||||
|
||||
if not name:
|
||||
result = {"error": "name is required"}
|
||||
elif not biography:
|
||||
result = {"error": "biography is required"}
|
||||
else:
|
||||
try:
|
||||
result = call_llm(args.url, name, biography)
|
||||
except Exception as e:
|
||||
result = {"error": str(e)}
|
||||
|
||||
json.dump(result, sys.stdout, ensure_ascii=False, indent=2)
|
||||
sys.stdout.write("\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user