Files
momentry_core/scripts/tmdb_cast_fetcher.py
Warren e75c4d6f07 cleanup: remove dead code and duplicate docs
- Remove session-ses_2f27.md (161KB raw session log)
- Remove 49 ROOT_* duplicate files across REFERENCE/
- Remove 14 duplicate files between REFERENCE/ root and history/
- Remove asr_legacy.rs (dead code, replaced by asr.rs)
- Remove src/core/worker/ (duplicate JobWorker)
- Remove src/core/layers/ (empty directory)
- Remove 4 .bak files in src/
- Remove 7 dead private methods in worker/processor.rs
- Remove backup directory from git tracking
2026-05-04 01:31:21 +08:00

167 lines
4.9 KiB
Python

#!/usr/bin/env python3
"""
TMDB Cast & Face Fetcher
Fetches top cast info and profile images from TMDB.
Requires: pip install requests
"""
import os
import sys
import json
import argparse
import requests
from pathlib import Path
# ======================== Configuration ========================
# Get API Key from env or prompt user
TMDB_API_KEY = os.getenv("TMDB_API_KEY")
if not TMDB_API_KEY:
print("⚠️ TMDB_API_KEY not found.")
print("👉 Please get a free API key from https://www.themoviedb.org/settings/api")
# TMDB_API_KEY = input("Enter your TMDB API Key: ").strip()
# if not TMDB_API_KEY:
# sys.exit(1)
# Using a default placeholder for the script to be runnable if user sets env later
# For testing, we will ask for it if not set
print("Please set the environment variable TMDB_API_KEY and try again.")
sys.exit(1)
TMDB_BASE_URL = "https://api.themoviedb.org/3"
IMG_BASE_URL = "https://image.tmdb.org/t/p/w185"
OUTPUT_DIR = Path("data/cast_faces")
# ======================== Core Logic ========================
def search_movie(query: str, year: str | None = None):
"""Search for a movie and return the best match"""
url = f"{TMDB_BASE_URL}/search/movie"
params = {"query": query, "api_key": TMDB_API_KEY, "language": "en-US", "page": 1}
if year:
params["year"] = year
try:
resp = requests.get(url, params=params)
resp.raise_for_status()
data = resp.json()
if data.get("results"):
return data["results"][0]
return None
except Exception as e:
print(f"❌ Search failed: {e}")
return None
def get_credits(movie_id: int) -> list[dict]:
"""Get cast credits for a movie"""
url = f"{TMDB_BASE_URL}/movie/{movie_id}/credits"
params = {"api_key": TMDB_API_KEY, "language": "en-US"}
try:
resp = requests.get(url, params=params)
resp.raise_for_status()
data = resp.json()
return data.get("cast", [])[:10] # Top 10 cast
except Exception as e:
print(f"❌ Failed to get credits: {e}")
return []
def download_image(url: str, path: Path) -> bool:
"""Download image from TMDB"""
if not url:
return False
try:
resp = requests.get(url)
resp.raise_for_status()
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "wb") as f:
f.write(resp.content)
return True
except Exception as e:
print(f"❌ Download failed: {e}")
return False
# ======================== Main ========================
def main():
parser = argparse.ArgumentParser(description="Fetch TMDB Cast Faces")
parser.add_argument("query", help="Movie title (e.g., 'Charade 1963')")
parser.add_argument(
"--limit", type=int, default=5, help="Number of cast faces to fetch"
)
args = parser.parse_args()
# Parse year if present in query
parts = args.query.split()
year = None
if parts[-1].isdigit() and len(parts[-1]) == 4:
year = parts[-1]
title = " ".join(parts[:-1])
else:
title = args.query
print(f"🔍 Searching TMDB for: '{title}' ({year})")
movie = search_movie(title, year)
if not movie:
print("❌ Movie not found.")
sys.exit(1)
print(
f"✅ Found: {movie['title']} ({movie['release_date'][:4]}) - ID: {movie['id']}"
)
# Get Credits
print("🎬 Fetching cast list...")
cast = get_credits(movie["id"])
if not cast:
print("❌ No cast found.")
sys.exit(1)
# Create output directory for this movie
safe_title = "".join(
c if c.isalnum() or c in (" ", "-", "_") else "_" for c in movie["title"]
).strip()
movie_dir = OUTPUT_DIR / safe_title / str(movie["id"])
movie_dir.mkdir(parents=True, exist_ok=True)
print(f"📂 Saving faces to: {movie_dir}")
results = []
for i, actor in enumerate(cast[: args.limit]):
name = actor.get("name", "Unknown")
role = actor.get("character", "Unknown")
img_path = actor.get("profile_path")
full_url = f"{IMG_BASE_URL}{img_path}" if img_path else None
local_path = movie_dir / f"{name.replace(' ', '_')}.jpg"
print(f" 👤 {i + 1}. {name} as {role}")
if full_url:
success = download_image(full_url, local_path)
if success:
print(f" ✅ Saved: {local_path.name}")
results.append({"name": name, "role": role, "image": str(local_path)})
else:
print(" ⚠️ Failed to download")
else:
print(" ⚠️ No profile image available")
# Save metadata
meta_path = movie_dir / "cast_data.json"
with open(meta_path, "w", encoding="utf-8") as f:
json.dump(results, f, indent=2, ensure_ascii=False)
print(f"\n✅ Done! Saved {len(results)} cast images.")
print(f"📄 Metadata: {meta_path}")
if __name__ == "__main__":
main()