#!/opt/homebrew/bin/python3.11 """ Test Parent Chunk Summary Generation (Gemma 4) """ import json import ollama import time # Configuration UUID = "384b0ff44aaaa1f1" ASR_PATH = f"output/{UUID}/{UUID}.asr.json" MODEL = "gemma4:latest" # The Prompt Template PARENT_SUMMARY_PROMPT = """ You are an expert film analyst. Analyze the following movie dialogue segment (approx 60 seconds). Your task is to generate a structured JSON summary containing: 1. **narrative_summary**: A one-sentence summary of the main event/plot point. 2. **entities**: Key information extracted: - `who`: List of characters involved. - `where`: Inferred location (e.g., "Apartment", "Train"). - `objects`: Key props mentioned (e.g., "Ticket", "Money"). 3. **emotional_arc**: The emotional transition: - `start_mood`: Mood at the beginning. - `end_mood`: Mood at the end. 4. **plot_sequence**: - `scene_type`: Type of scene (e.g., "Confrontation", "Romance", "Discovery"). - `key_action`: The main action taking place. **IMPORTANT RULES:** - Output **ONLY** valid JSON. - Do NOT include "Thinking Process" or markdown formatting. - If information is unknown, use "Unknown". - Context: This is from the movie "Charade" (1963). Dialogue: {context} """ def load_sample(start_index, count=20): """Load a slice of dialogue to simulate a Parent Chunk""" try: with open(ASR_PATH, "r") as f: data = json.load(f) segments = data.get("segments", []) selected = segments[start_index : start_index + count] text = " ".join([s.get("text", "") for s in selected]) print(f"๐Ÿ“‚ Loaded Sample {start_index}: {len(selected)} segments.") return text except Exception as e: return f"Error: {e}" def run_test(name, context_text): print(f"\n๐Ÿงช Testing: {name}") print("-" * 50) print(f"๐Ÿ“– Input Preview: {context_text[:100]}...") prompt = PARENT_SUMMARY_PROMPT.format(context=context_text) try: start = time.time() response = ollama.chat( model=MODEL, messages=[{"role": "user", "content": prompt}] ) duration = time.time() - start content = response["message"]["content"] # Clean up thinking tags if present if "```json" in content: content = content.split("```json")[1].split("```")[0] elif "Thinking..." in content: # crude cleanup for demo content = content.split("...")[-1] # Attempt parse try: result = json.loads(content.strip()) print(f"โœ… Success ({duration:.2f}s)") print(json.dumps(result, indent=2)) return True except json.JSONDecodeError: print(f"โš ๏ธ JSON Parse Failed ({duration:.2f}s)") print(content[:500]) return False except Exception as e: print(f"โŒ API Error: {e}") return False def main(): print(f"๐Ÿš€ Starting Parent Chunk Summary Tests on '{UUID}'") # Test 1: Early Dialogue (Entities & Narrative Focus) # "possessed a ticket of passage..." txt1 = load_sample(start_index=10) res1 = run_test("Test 1: Early Plot (Entities & Narrative)", txt1) time.sleep(2) # Cool down # Test 2: Middle Conflict (Emotional Arc Focus) # "where did he keep his money..." (From previous context) txt2 = load_sample(start_index=50) res2 = run_test("Test 2: Conflict (Emotional Arc)", txt2) time.sleep(2) # Cool down # Test 3: Later Dialogue (Plot Sequence Focus) # Looking for a scene involving a conclusion or death aftermath # Let's pick a later section to test robustness txt3 = load_sample(start_index=150) res3 = run_test("Test 3: Late Plot (Sequence)", txt3) if __name__ == "__main__": main()