momentry_core/scripts/tmdb_cast_fetcher.py

#!/usr/bin/env python3
"""
TMDB Cast & Face Fetcher
Fetches top cast info and profile images from TMDB.
Requires: pip install requests
"""

import os
import sys
import json
import argparse
import requests
from pathlib import Path

# ======================== Configuration ========================

# Get API Key from env or prompt user
TMDB_API_KEY = os.getenv("TMDB_API_KEY")
if not TMDB_API_KEY:
    print("⚠️ TMDB_API_KEY not found.")
    print("👉 Please get a free API key from https://www.themoviedb.org/settings/api")
    # TMDB_API_KEY = input("Enter your TMDB API Key: ").strip()
    # if not TMDB_API_KEY:
    #     sys.exit(1)
    # Using a default placeholder for the script to be runnable if user sets env later
    # For testing, we will ask for it if not set
    print("Please set the environment variable TMDB_API_KEY and try again.")
    sys.exit(1)

TMDB_BASE_URL = "https://api.themoviedb.org/3"
IMG_BASE_URL = "https://image.tmdb.org/t/p/w185"
OUTPUT_DIR = Path("data/cast_faces")

# ======================== Core Logic ========================


def search_movie(query: str, year: str | None = None):
    """Search for a movie and return the best match"""
    url = f"{TMDB_BASE_URL}/search/movie"
    params = {"query": query, "api_key": TMDB_API_KEY, "language": "en-US", "page": 1}
    if year:
        params["year"] = year

    try:
        resp = requests.get(url, params=params)
        resp.raise_for_status()
        data = resp.json()
        if data.get("results"):
            return data["results"][0]
        return None
    except Exception as e:
        print(f"❌ Search failed: {e}")
        return None


def get_credits(movie_id: int) -> list[dict]:
    """Get cast credits for a movie"""
    url = f"{TMDB_BASE_URL}/movie/{movie_id}/credits"
    params = {"api_key": TMDB_API_KEY, "language": "en-US"}

    try:
        resp = requests.get(url, params=params)
        resp.raise_for_status()
        data = resp.json()
        return data.get("cast", [])[:10]  # Top 10 cast
    except Exception as e:
        print(f"❌ Failed to get credits: {e}")
        return []


def download_image(url: str, path: Path) -> bool:
    """Download image from TMDB"""
    if not url:
        return False
    try:
        resp = requests.get(url)
        resp.raise_for_status()
        path.parent.mkdir(parents=True, exist_ok=True)
        with open(path, "wb") as f:
            f.write(resp.content)
        return True
    except Exception as e:
        print(f"❌ Download failed: {e}")
        return False


# ======================== Main ========================


def main():
    parser = argparse.ArgumentParser(description="Fetch TMDB Cast Faces")
    parser.add_argument("query", help="Movie title (e.g., 'Charade 1963')")
    parser.add_argument(
        "--limit", type=int, default=5, help="Number of cast faces to fetch"
    )
    args = parser.parse_args()

    # Parse year if present in query
    parts = args.query.split()
    year = None
    if parts[-1].isdigit() and len(parts[-1]) == 4:
        year = parts[-1]
        title = " ".join(parts[:-1])
    else:
        title = args.query

    print(f"🔍 Searching TMDB for: '{title}' ({year})")
    movie = search_movie(title, year)

    if not movie:
        print("❌ Movie not found.")
        sys.exit(1)

    print(
        f"✅ Found: {movie['title']} ({movie['release_date'][:4]}) - ID: {movie['id']}"
    )

    # Get Credits
    print("🎬 Fetching cast list...")
    cast = get_credits(movie["id"])

    if not cast:
        print("❌ No cast found.")
        sys.exit(1)

    # Create output directory for this movie
    safe_title = "".join(
        c if c.isalnum() or c in (" ", "-", "_") else "_" for c in movie["title"]
    ).strip()
    movie_dir = OUTPUT_DIR / safe_title / str(movie["id"])
    movie_dir.mkdir(parents=True, exist_ok=True)

    print(f"📂 Saving faces to: {movie_dir}")

    results = []
    for i, actor in enumerate(cast[: args.limit]):
        name = actor.get("name", "Unknown")
        role = actor.get("character", "Unknown")
        img_path = actor.get("profile_path")

        full_url = f"{IMG_BASE_URL}{img_path}" if img_path else None
        local_path = movie_dir / f"{name.replace(' ', '_')}.jpg"

        print(f"  👤 {i + 1}. {name} as {role}")

        if full_url:
            success = download_image(full_url, local_path)
            if success:
                print(f"     ✅ Saved: {local_path.name}")
                results.append({"name": name, "role": role, "image": str(local_path)})
            else:
                print("     ⚠️ Failed to download")
        else:
            print("     ⚠️ No profile image available")

    # Save metadata
    meta_path = movie_dir / "cast_data.json"
    with open(meta_path, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

    print(f"\n✅ Done! Saved {len(results)} cast images.")
    print(f"📄 Metadata: {meta_path}")


if __name__ == "__main__":
    main()