#!/opt/homebrew/bin/python3.11 """ Quick stamp search on 20 critical frames using OWL-ViT """ import os import cv2 import json import glob from PIL import Image import torch from transformers import OwlViTProcessor, OwlViTForObjectDetection BASE_DIR = "output/384b0ff44aaaa1f1/critical_scenes" RESULTS_DIR = "output/384b0ff44aaaa1f1/critical_results" os.makedirs(RESULTS_DIR, exist_ok=True) print("šŸ”¬ Loading OWL-ViT...") processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32") model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32") model.eval() SEARCH_TERMS = [ "postage stamp", "stamp on envelope", "envelope", "hand holding paper", "document", ] frames = sorted(glob.glob(os.path.join(BASE_DIR, "frame_*.jpg"))) print(f"šŸ“ø Scanning {len(frames)} critical frames...") all_detections = [] for frame_path in frames: frame_name = os.path.basename(frame_path) sec = frame_name.replace("frame_", "").replace("s.jpg", "") image = Image.open(frame_path).convert("RGB") for term in SEARCH_TERMS: inputs = processor(text=[[term]], images=image, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) target_sizes = torch.Tensor([image.size[::-1]]) results = processor.post_process_object_detection( outputs=outputs, target_sizes=target_sizes, threshold=0.05 ) for score, label, box in zip( results[0]["scores"], results[0]["labels"], results[0]["boxes"] ): s = float(score) if s > 0.08: det = { "frame": frame_name, "sec": sec, "term": term, "score": s, "bbox": box.tolist(), } all_detections.append(det) print(f" šŸ“ {sec}s | {term} | {s:.2f} | bbox={box.tolist()}") # Save crop x1, y1, x2, y2 = map(int, box.tolist()) img = cv2.imread(frame_path) crop = img[y1:y2, x1:x2] if crop.size > 0: crop_name = f"stamp_{sec}s_{term.replace(' ', '_')}.jpg" cv2.imwrite(os.path.join(RESULTS_DIR, crop_name), crop) # Annotate cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 3) cv2.putText( img, f"{term} {s:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2, ) ann_name = f"annotated_{sec}s.jpg" cv2.imwrite(os.path.join(RESULTS_DIR, ann_name), img) with open(os.path.join(RESULTS_DIR, "results.json"), "w") as f: json.dump(all_detections, f, indent=2) print(f"\nšŸ Found {len(all_detections)} detections. Check {RESULTS_DIR}")