From 701e71463d9cb94f0fe7b7bedbc0939952a95eeb Mon Sep 17 00:00:00 2001 From: M5Max128 Date: Fri, 22 May 2026 08:35:28 +0800 Subject: [PATCH] feat: identity PATCH update, alias system, name UNIQUE removal - Add PATCH /api/v1/identity/:identity_uuid endpoint - Migration 030: remove name UNIQUE, add tmdb_id index - TMDb upsert: ON CONFLICT (name) -> ON CONFLICT (tmdb_id) - get_or_create_identity: pre-check by name - upload_identity: ON CONFLICT (name) -> ON CONFLICT (uuid) - Search: include aliases in identity text search - Add scripts/llm_metadata_enhancer.py - Add DESIGN/IdentityUpdateAndAliasSystem.md --- .../DESIGN/IdentityUpdateAndAliasSystem.md | 229 ++++++++++++++++++ .../030_remove_identity_name_unique.sql | 18 ++ scripts/llm_metadata_enhancer.py | 104 ++++++++ src/api/identity_api.rs | 156 +++++++++++- src/core/db/postgres_db.rs | 23 +- src/core/tmdb/probe.rs | 10 +- 6 files changed, 524 insertions(+), 16 deletions(-) create mode 100644 docs_v1.0/DESIGN/IdentityUpdateAndAliasSystem.md create mode 100644 migrations/030_remove_identity_name_unique.sql create mode 100644 scripts/llm_metadata_enhancer.py diff --git a/docs_v1.0/DESIGN/IdentityUpdateAndAliasSystem.md b/docs_v1.0/DESIGN/IdentityUpdateAndAliasSystem.md new file mode 100644 index 0000000..b74194d --- /dev/null +++ b/docs_v1.0/DESIGN/IdentityUpdateAndAliasSystem.md @@ -0,0 +1,229 @@ +--- +document_type: "design_doc" +service: "MOMENTRY_CORE" +title: "Identity Update & Alias System" +version: "V1.0" +date: "2026-05-22" +author: "M5" +status: "draft" +--- + +# Identity Update & Alias System + +| Item | Value | +|------|-------| +| Scope | Identity CRUD expansion, alias system, LLM-enhanced metadata | +| Status | Draft | +| Key principle | `uuid` is the true identity key; `name` is display label (no longer UNIQUE) | + +--- + +## Overview + +Currently, identity records have no update endpoint and `name` is constrained to UNIQUE. This design adds: + +1. `PATCH /api/v1/identity/:identity_uuid` — partial update for name, metadata, aliases, status, type +2. Remove `name UNIQUE` constraint — allow multiple identities with the same display name +3. TMDb upsert key changes from `name` to `tmdb_id` +4. Alias system with BCP 47 locale tagging stored in `metadata.aliases` +5. LLM background task for metadata structuring and alias generation + +--- + +## Schema Changes + +### Migration 030 + +File: `migrations/030_remove_identity_name_unique.sql` + +```sql +-- Phase 1: Remove name UNIQUE (keep NOT NULL) +ALTER TABLE identities DROP CONSTRAINT IF EXISTS identities_name_key; + +-- Phase 2: Partial unique index for TMDb-sourced identities +CREATE UNIQUE INDEX IF NOT EXISTS idx_identities_tmdb_id + ON identities(tmdb_id) WHERE tmdb_id IS NOT NULL; +``` + +### Aliases Storage + +Aliases are stored in `identities.metadata::jsonb` under the key `aliases`: + +```json +{ + "aliases": [ + {"locale": "en", "name": "John Smith"}, + {"locale": "zh-TW", "name": "約翰·史密斯"}, + {"locale": "ja", "name": "ジョン・スミス"} + ], + "summary": "American actor and producer...", + "nationality": "American", + "birth_date": "1970-01-15", + "profession": ["actor", "producer"] +} +``` + +No schema change needed — `metadata JSONB DEFAULT '{}'` already supports this. + +--- + +## API Changes + +### PATCH /api/v1/identity/:identity_uuid + +**Request:** +```json +{ + "name": "John Smith", + "aliases": [ + {"locale": "en", "name": "John Smith"}, + {"locale": "zh-TW", "name": "約翰·史密斯"} + ], + "metadata": {"summary": "American actor..."}, + "status": "confirmed", + "identity_type": "people" +} +``` + +All fields are optional. Only provided fields are updated. + +**Response (200):** +```json +{ + "success": true, + "identity_uuid": "abc-...", + "updated_fields": ["name", "aliases", "metadata"] +} +``` + +**Processing flow:** +1. Lookup identity by UUID → 404 if not found +2. Dynamic UPDATE SQL (COALESCE for optional fields) +3. If name changed → update `_index.json` +4. If name changed → update Qdrant face point payloads +5. Call `save_identity_file_by_pool()` → sync identity.json to disk +6. Return updated identity detail + +--- + +## Alias System + +### Locale Tagging (BCP 47) + +Standard tags for alias entries: + +| Locale | Tag | Example | +|--------|-----|---------| +| English | `en` | John Smith | +| Traditional Chinese | `zh-TW` | 約翰·史密斯 | +| Simplified Chinese | `zh-CN` | 约翰·史密斯 | +| Japanese | `ja` | ジョン・スミス | +| Korean | `ko` | 존 스미스 | +| Cantonese | `yue` | 約翰·史密夫 | +| French | `fr` | Jean Smith | +| Spanish | `es` | Juan Smith | + +### Frontend Display Logic + +```javascript +function getDisplayName(identity, preferredLocale) { + // 1. Try exact locale match + const match = identity.aliases?.find(a => a.locale === preferredLocale); + if (match) return match.name; + + // 2. Try language-only match (zh-TW → zh) + const lang = preferredLocale.split('-')[0]; + const langMatch = identity.aliases?.find(a => a.locale.startsWith(lang)); + if (langMatch) return langMatch.name; + + // 3. Fallback to identity.name + return identity.name; +} +``` + +### Search Integration + +Identity search should include aliases: + +```sql +SELECT * FROM identities +WHERE name ILIKE $1 + OR metadata->'aliases' @> $2::jsonb +ORDER BY name +``` + +--- + +## Upsert Key Changes + +| Location | Current Key | New Key | Rationale | +|----------|------------|---------|-----------| +| `tmdb/probe.rs` | `ON CONFLICT (name)` | `ON CONFLICT (tmdb_id) WHERE tmdb_id IS NOT NULL` | TMDb ID is the true identity for TMDb sources | +| `tmdb/ingest.rs` | `ON CONFLICT (name)` | `ON CONFLICT (tmdb_id)` | Same as above | +| `postgres_db.rs:get_or_create_identity` | `ON CONFLICT (name) DO UPDATE` | Query by name first, then INSERT with uuid | Maintain backward compatibility for name lookup | +| `identity_api.rs:upload_identity` | `ON CONFLICT (name)` | `ON CONFLICT (uuid)` | Upload provides uuid; uuid is the true key | + +--- + +## LLM Metadata Enhancer + +### Script: `scripts/llm_metadata_enhancer.py` + +**Trigger:** Called as background task after TMDb registration or manual PATCH. + +**Input:** +```json +{ + "name": "John Smith", + "biography": "John Smith (born January 15, 1970) is an American actor...", + "existing_metadata": {"tmdb_id": 123, "tmdb_profile": "..."} +} +``` + +**Output:** +```json +{ + "metadata": { + "summary": "American actor and producer known for...", + "nationality": "American", + "birth_date": "1970-01-15", + "profession": ["actor", "producer"], + "aliases": [ + {"locale": "en", "name": "John Smith"}, + {"locale": "zh-TW", "name": "約翰·史密斯"}, + {"locale": "ja", "name": "ジョン・スミス"} + ] + } +} +``` + +### LLM Prompt Design + +The prompt defines a fixed output schema for the LLM to follow: + +1. Read biography text + identity name +2. Extract structured fields: summary, nationality, birth_date, profession +3. Generate locale-tagged aliases based on known translations +4. Output as JSON only (no extra text) + +--- + +## Affected Files + +| File | Change | Complexity | +|------|--------|------------| +| `migrations/030_remove_identity_name_unique.sql` | New | Low | +| `src/api/identity_api.rs` | Add PATCH route + handler | Medium | +| `src/core/tmdb/probe.rs` | Change upsert key | Low | +| `src/core/db/postgres_db.rs` | Change get_or_create_identity | Low | +| `src/api/identity_api.rs` (upload) | Change upsert key | Low | +| `scripts/llm_metadata_enhancer.py` | New script | Medium | +| `src/api/tmdb_api.rs` or background | LLM task integration | Low | + +--- + +## Version History + +| Version | Date | Author | Description | +|---------|------|--------|-------------| +| V1.0 | 2026-05-22 | M5 | Initial design | diff --git a/migrations/030_remove_identity_name_unique.sql b/migrations/030_remove_identity_name_unique.sql new file mode 100644 index 0000000..00a4555 --- /dev/null +++ b/migrations/030_remove_identity_name_unique.sql @@ -0,0 +1,18 @@ +-- Migration 030: Remove name UNIQUE constraint from identities table +-- Rationale: uuid is the true identity key; name is a display label that can repeat. +-- TMDb-sourced identities use tmdb_id as their unique key. + +BEGIN; + +-- Phase 1: Remove name UNIQUE (keep NOT NULL — every identity must have a name) +ALTER TABLE identities DROP CONSTRAINT IF EXISTS identities_name_key; + +-- Phase 2: Add updated_at column for tracking modifications +ALTER TABLE identities ADD COLUMN IF NOT EXISTS updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(); + +-- Phase 3: Partial unique index for TMDb-sourced identities +-- Only applies to rows with a non-null tmdb_id. +CREATE UNIQUE INDEX IF NOT EXISTS idx_identities_tmdb_id + ON identities(tmdb_id) WHERE tmdb_id IS NOT NULL; + +COMMIT; diff --git a/scripts/llm_metadata_enhancer.py b/scripts/llm_metadata_enhancer.py new file mode 100644 index 0000000..656bd42 --- /dev/null +++ b/scripts/llm_metadata_enhancer.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +""" +LLM Metadata Enhancer for Momentry Identity + +Reads identity name + biography from stdin, calls llama.cpp (Gemma4) to +produce structured metadata: summary, nationality, profession, aliases (BCP 47 locale-tagged). + +Output: JSON to stdout (no extra text). + +Usage: + echo '{"name": "John Smith", "biography": "..."}' | python3 llm_metadata_enhancer.py + python3 llm_metadata_enhancer.py --url http://127.0.0.1:8081 < input.json + +Requires: + - llama.cpp server running (default: http://127.0.0.1:8081) + - pip install requests +""" + +import json +import sys +import argparse +import requests + +DEFAULT_API_URL = "http://127.0.0.1:8081" + +SYSTEM_PROMPT = """You are a metadata structuring assistant for a media asset management system. + +Given an identity name and biography text, produce a structured JSON object with these fields: + +- summary: 2-3 sentence summary in the same language as the biography +- nationality: inferred nationality or null +- profession: array of inferred professions/titles +- birth_date: YYYY-MM-DD format if available, else null +- aliases: array of {locale: "BCP47_tag", name: "translated_name"} objects + - Use common well-known translations only (do not fabricate) + - Locale tags follow BCP 47 (e.g., en, zh-TW, zh-CN, ja, ko, fr, es, yue, th, ar, ru) + - Include at least one alias with locale "en" using the original name + - Only include locales where the name has a widely recognized translation + +Output ONLY valid JSON. No explanation, no markdown, no extra text.""" + + +def call_llm(api_url: str, name: str, biography: str) -> dict: + user_prompt = f"Identity name: {name}\n\nBiography:\n{biography}" + + payload = { + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": user_prompt}, + ], + "temperature": 0.1, + "max_tokens": 1024, + "stop": [], + } + + resp = requests.post(f"{api_url}/v1/chat/completions", json=payload, timeout=60) + + if resp.status_code != 200: + raise RuntimeError(f"LLM API error {resp.status_code}: {resp.text}") + + content = resp.json()["choices"][0]["message"]["content"].strip() + + # Strip markdown code blocks if present + if content.startswith("```"): + content = content.split("\n", 1)[-1] + content = content.rsplit("```", 1)[0] + if content.startswith("json"): + content = content[4:].strip() + + return json.loads(content) + + +def main(): + parser = argparse.ArgumentParser(description="Enhance identity metadata via LLM") + parser.add_argument("--url", default=DEFAULT_API_URL, help="llama.cpp server URL") + parser.add_argument("--input", help="Input JSON file (default: stdin)") + args = parser.parse_args() + + source = args.input + if source: + with open(source) as f: + data = json.load(f) + else: + data = json.load(sys.stdin) + + name = data.get("name", "") + biography = data.get("biography", "") + + if not name: + result = {"error": "name is required"} + elif not biography: + result = {"error": "biography is required"} + else: + try: + result = call_llm(args.url, name, biography) + except Exception as e: + result = {"error": str(e)} + + json.dump(result, sys.stdout, ensure_ascii=False, indent=2) + sys.stdout.write("\n") + + +if __name__ == "__main__": + main() diff --git a/src/api/identity_api.rs b/src/api/identity_api.rs index d277fb4..03f55cf 100644 --- a/src/api/identity_api.rs +++ b/src/api/identity_api.rs @@ -2,7 +2,7 @@ use axum::{ extract::{Multipart, Path, Query, State}, http::StatusCode, response::{Html, Json}, - routing::{get, post}, + routing::{get, patch, post}, Router, }; use serde::{Deserialize, Serialize}; @@ -20,7 +20,9 @@ pub fn identity_routes() -> Router { ) .route( "/api/v1/identity/:identity_uuid", - get(get_identity_detail).delete(delete_identity), + get(get_identity_detail) + .delete(delete_identity) + .patch(update_identity), ) .route( "/api/v1/identity/:identity_uuid/files", @@ -785,8 +787,8 @@ async fn upload_identity( let result = sqlx::query_as::<_, (String,)>(&format!( "INSERT INTO {} (uuid, name, identity_type, source, status, tmdb_id, tmdb_profile, metadata) \ VALUES ($1, $2, $3, $4, $5, $6, $7, $8) \ - ON CONFLICT (name) DO UPDATE SET \ - source = EXCLUDED.source, status = EXCLUDED.status, \ + ON CONFLICT (uuid) DO UPDATE SET \ + name = EXCLUDED.name, source = EXCLUDED.source, status = EXCLUDED.status, \ tmdb_id = EXCLUDED.tmdb_id, tmdb_profile = EXCLUDED.tmdb_profile, \ metadata = EXCLUDED.metadata \ RETURNING uuid::text", identities_table @@ -1167,8 +1169,12 @@ async fn search_identities_by_text( JOIN {} c ON c.file_uuid = fd.file_uuid AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) - WHERE i.name ILIKE $1 - AND ($2::text IS NULL OR fd.file_uuid = $2) + WHERE (i.name ILIKE $1 + OR EXISTS ( + SELECT 1 FROM jsonb_array_elements(i.metadata->'aliases') AS a + WHERE a->>'name' ILIKE $1 + )) + AND ($2::text IS NULL OR fd.file_uuid = $2) ORDER BY i.name, c.start_time LIMIT $3"#, id_table, fd_table, chunk_table @@ -1222,3 +1228,141 @@ async fn search_identities_by_text( results, })) } + +// ── PATCH /api/v1/identity/:identity_uuid ──────────────────── + +#[derive(Debug, Deserialize)] +struct UpdateIdentityRequest { + name: Option, + metadata: Option, + status: Option, + identity_type: Option, +} + +#[derive(Debug, Serialize)] +struct UpdateIdentityResponse { + success: bool, + identity_uuid: String, + updated_fields: Vec, +} + +async fn update_identity( + State(state): State, + Path(identity_uuid): Path, + Json(req): Json, +) -> Result, (StatusCode, Json)> { + let uuid_clean = identity_uuid.replace('-', ""); + let uuid_parsed = uuid::Uuid::parse_str(&uuid_clean).map_err(|_| { + ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({ + "success": false, "error": "Invalid identity_uuid" + })), + ) + })?; + + let table = crate::core::db::schema::table_name("identities"); + let existing: Option<(i32, String)> = sqlx::query_as(&format!( + "SELECT id, name FROM {} WHERE uuid = $1::uuid", + table + )) + .bind(uuid_parsed) + .fetch_optional(state.db.pool()) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({ + "success": false, "error": format!("DB error: {}", e) + })), + ) + })?; + + let (identity_id, old_name) = existing.ok_or_else(|| { + ( + StatusCode::NOT_FOUND, + Json(serde_json::json!({ + "success": false, "error": "Identity not found" + })), + ) + })?; + + let mut updated_fields: Vec = Vec::new(); + let mut set_clauses: Vec = Vec::new(); + + if let Some(ref name) = req.name { + set_clauses.push(format!("name = ${}", set_clauses.len() + 1)); + updated_fields.push("name".to_string()); + } + if let Some(ref metadata) = req.metadata { + set_clauses.push(format!("metadata = ${}::jsonb", set_clauses.len() + 1)); + updated_fields.push("metadata".to_string()); + } + if let Some(ref status) = req.status { + set_clauses.push(format!("status = ${}", set_clauses.len() + 1)); + updated_fields.push("status".to_string()); + } + if let Some(ref identity_type) = req.identity_type { + set_clauses.push(format!("identity_type = ${}", set_clauses.len() + 1)); + updated_fields.push("identity_type".to_string()); + } + + if set_clauses.is_empty() { + return Err(( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({ + "success": false, "error": "No fields to update" + })), + )); + } + + let set_sql = set_clauses.join(", "); + let uuid_param = set_clauses.len() + 1; + let update_sql = format!( + "UPDATE {} SET {} WHERE uuid = ${}::uuid", + table, set_sql, uuid_param + ); + + let mut query = sqlx::query(&update_sql); + + if let Some(ref name) = req.name { + query = query.bind(name); + } + if let Some(ref metadata) = req.metadata { + query = query.bind(metadata); + } + if let Some(ref status) = req.status { + query = query.bind(status); + } + if let Some(ref identity_type) = req.identity_type { + query = query.bind(identity_type); + } + + query = query.bind(uuid_parsed); + + query.execute(state.db.pool()).await.map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({ + "success": false, "error": format!("Update failed: {}", e) + })), + ) + })?; + + // Sync identity.json to disk + let _ = + crate::core::identity::storage::save_identity_file_by_pool(state.db.pool(), &uuid_clean) + .await; + + // If name changed, update _index.json + if req.name.is_some() { + let new_name = req.name.as_deref().unwrap_or(&old_name); + let _ = crate::core::identity::storage::update_index(&uuid_clean, new_name); + } + + Ok(Json(UpdateIdentityResponse { + success: true, + identity_uuid: uuid_clean, + updated_fields, + })) +} diff --git a/src/core/db/postgres_db.rs b/src/core/db/postgres_db.rs index 1a13036..7567ebb 100644 --- a/src/core/db/postgres_db.rs +++ b/src/core/db/postgres_db.rs @@ -3210,12 +3210,27 @@ impl PostgresDb { pub async fn get_or_create_identity(&self, name: &str) -> Result { let identities_table = schema::table_name("identities"); - let id: i32 = sqlx::query_scalar(&format!( - "INSERT INTO {} (name, identity_type, source, status) VALUES ($1, 'people', 'user_defined', 'confirmed') \ - ON CONFLICT (name) DO UPDATE SET updated_at = CURRENT_TIMESTAMP RETURNING id", identities_table + // First: try to find existing identity by name + if let Some(id) = sqlx::query_scalar::<_, i32>(&format!( + "SELECT id FROM {} WHERE name = $1 LIMIT 1", + identities_table )) .bind(name) - .fetch_one(&self.pool).await?; + .fetch_optional(&self.pool) + .await? + { + return Ok(id); + } + // Not found: create new with generated uuid + let id: i32 = sqlx::query_scalar(&format!( + "INSERT INTO {} (uuid, name, identity_type, source, status) \ + VALUES (gen_random_uuid(), $1, 'people', 'user_defined', 'confirmed') \ + RETURNING id", + identities_table + )) + .bind(name) + .fetch_one(&self.pool) + .await?; Ok(id) } } diff --git a/src/core/tmdb/probe.rs b/src/core/tmdb/probe.rs index 25ad064..d010117 100644 --- a/src/core/tmdb/probe.rs +++ b/src/core/tmdb/probe.rs @@ -94,12 +94,11 @@ async fn upsert_identities_from_disk( let result = sqlx::query(&format!( "INSERT INTO {} (uuid, name, identity_type, source, status, tmdb_id, tmdb_profile, metadata) \ VALUES ($1::uuid, $2, 'people', 'tmdb', 'confirmed', $3, $4, $5::jsonb) \ - ON CONFLICT (name) DO UPDATE SET \ + ON CONFLICT (tmdb_id) WHERE tmdb_id IS NOT NULL DO UPDATE SET \ uuid = COALESCE({}.uuid, $1::uuid), \ - tmdb_id = COALESCE(EXCLUDED.tmdb_id, {}.tmdb_id), \ tmdb_profile = COALESCE(EXCLUDED.tmdb_profile, {}.tmdb_profile), \ metadata = {}.metadata || $5::jsonb", - identities_table, identities_table, identities_table, identities_table, identities_table + identities_table, identities_table, identities_table, identities_table )) .bind(&identity_file.identity_uuid) .bind(&identity_file.name) @@ -225,12 +224,11 @@ pub async fn create_identities_from_data( let result = sqlx::query_as::<_, (uuid::Uuid,)>(&format!( "INSERT INTO {} (name, identity_type, source, status, tmdb_id, tmdb_profile, metadata) \ VALUES ($1, 'people', 'tmdb', 'confirmed', $2, $3, $4::jsonb) \ - ON CONFLICT (name) DO UPDATE SET \ - tmdb_id = COALESCE(EXCLUDED.tmdb_id, {}.tmdb_id), \ + ON CONFLICT (tmdb_id) WHERE tmdb_id IS NOT NULL DO UPDATE SET \ tmdb_profile = COALESCE(EXCLUDED.tmdb_profile, {}.tmdb_profile), \ metadata = {}.metadata || $4::jsonb \ RETURNING uuid", - identities_table, identities_table, identities_table, identities_table + identities_table, identities_table, identities_table )) .bind(&member.name) .bind(member.id as i64)