- Remove session-ses_2f27.md (161KB raw session log) - Remove 49 ROOT_* duplicate files across REFERENCE/ - Remove 14 duplicate files between REFERENCE/ root and history/ - Remove asr_legacy.rs (dead code, replaced by asr.rs) - Remove src/core/worker/ (duplicate JobWorker) - Remove src/core/layers/ (empty directory) - Remove 4 .bak files in src/ - Remove 7 dead private methods in worker/processor.rs - Remove backup directory from git tracking
167 lines
4.9 KiB
Python
167 lines
4.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
TMDB Cast & Face Fetcher
|
|
Fetches top cast info and profile images from TMDB.
|
|
Requires: pip install requests
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import argparse
|
|
import requests
|
|
from pathlib import Path
|
|
|
|
# ======================== Configuration ========================
|
|
|
|
# Get API Key from env or prompt user
|
|
TMDB_API_KEY = os.getenv("TMDB_API_KEY")
|
|
if not TMDB_API_KEY:
|
|
print("⚠️ TMDB_API_KEY not found.")
|
|
print("👉 Please get a free API key from https://www.themoviedb.org/settings/api")
|
|
# TMDB_API_KEY = input("Enter your TMDB API Key: ").strip()
|
|
# if not TMDB_API_KEY:
|
|
# sys.exit(1)
|
|
# Using a default placeholder for the script to be runnable if user sets env later
|
|
# For testing, we will ask for it if not set
|
|
print("Please set the environment variable TMDB_API_KEY and try again.")
|
|
sys.exit(1)
|
|
|
|
TMDB_BASE_URL = "https://api.themoviedb.org/3"
|
|
IMG_BASE_URL = "https://image.tmdb.org/t/p/w185"
|
|
OUTPUT_DIR = Path("data/cast_faces")
|
|
|
|
# ======================== Core Logic ========================
|
|
|
|
|
|
def search_movie(query: str, year: str | None = None):
|
|
"""Search for a movie and return the best match"""
|
|
url = f"{TMDB_BASE_URL}/search/movie"
|
|
params = {"query": query, "api_key": TMDB_API_KEY, "language": "en-US", "page": 1}
|
|
if year:
|
|
params["year"] = year
|
|
|
|
try:
|
|
resp = requests.get(url, params=params)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
if data.get("results"):
|
|
return data["results"][0]
|
|
return None
|
|
except Exception as e:
|
|
print(f"❌ Search failed: {e}")
|
|
return None
|
|
|
|
|
|
def get_credits(movie_id: int) -> list[dict]:
|
|
"""Get cast credits for a movie"""
|
|
url = f"{TMDB_BASE_URL}/movie/{movie_id}/credits"
|
|
params = {"api_key": TMDB_API_KEY, "language": "en-US"}
|
|
|
|
try:
|
|
resp = requests.get(url, params=params)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
return data.get("cast", [])[:10] # Top 10 cast
|
|
except Exception as e:
|
|
print(f"❌ Failed to get credits: {e}")
|
|
return []
|
|
|
|
|
|
def download_image(url: str, path: Path) -> bool:
|
|
"""Download image from TMDB"""
|
|
if not url:
|
|
return False
|
|
try:
|
|
resp = requests.get(url)
|
|
resp.raise_for_status()
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(path, "wb") as f:
|
|
f.write(resp.content)
|
|
return True
|
|
except Exception as e:
|
|
print(f"❌ Download failed: {e}")
|
|
return False
|
|
|
|
|
|
# ======================== Main ========================
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Fetch TMDB Cast Faces")
|
|
parser.add_argument("query", help="Movie title (e.g., 'Charade 1963')")
|
|
parser.add_argument(
|
|
"--limit", type=int, default=5, help="Number of cast faces to fetch"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
# Parse year if present in query
|
|
parts = args.query.split()
|
|
year = None
|
|
if parts[-1].isdigit() and len(parts[-1]) == 4:
|
|
year = parts[-1]
|
|
title = " ".join(parts[:-1])
|
|
else:
|
|
title = args.query
|
|
|
|
print(f"🔍 Searching TMDB for: '{title}' ({year})")
|
|
movie = search_movie(title, year)
|
|
|
|
if not movie:
|
|
print("❌ Movie not found.")
|
|
sys.exit(1)
|
|
|
|
print(
|
|
f"✅ Found: {movie['title']} ({movie['release_date'][:4]}) - ID: {movie['id']}"
|
|
)
|
|
|
|
# Get Credits
|
|
print("🎬 Fetching cast list...")
|
|
cast = get_credits(movie["id"])
|
|
|
|
if not cast:
|
|
print("❌ No cast found.")
|
|
sys.exit(1)
|
|
|
|
# Create output directory for this movie
|
|
safe_title = "".join(
|
|
c if c.isalnum() or c in (" ", "-", "_") else "_" for c in movie["title"]
|
|
).strip()
|
|
movie_dir = OUTPUT_DIR / safe_title / str(movie["id"])
|
|
movie_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
print(f"📂 Saving faces to: {movie_dir}")
|
|
|
|
results = []
|
|
for i, actor in enumerate(cast[: args.limit]):
|
|
name = actor.get("name", "Unknown")
|
|
role = actor.get("character", "Unknown")
|
|
img_path = actor.get("profile_path")
|
|
|
|
full_url = f"{IMG_BASE_URL}{img_path}" if img_path else None
|
|
local_path = movie_dir / f"{name.replace(' ', '_')}.jpg"
|
|
|
|
print(f" 👤 {i + 1}. {name} as {role}")
|
|
|
|
if full_url:
|
|
success = download_image(full_url, local_path)
|
|
if success:
|
|
print(f" ✅ Saved: {local_path.name}")
|
|
results.append({"name": name, "role": role, "image": str(local_path)})
|
|
else:
|
|
print(" ⚠️ Failed to download")
|
|
else:
|
|
print(" ⚠️ No profile image available")
|
|
|
|
# Save metadata
|
|
meta_path = movie_dir / "cast_data.json"
|
|
with open(meta_path, "w", encoding="utf-8") as f:
|
|
json.dump(results, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"\n✅ Done! Saved {len(results)} cast images.")
|
|
print(f"📄 Metadata: {meta_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|