#!/opt/homebrew/bin/python3.11 """ Search for magnifying glass in key stamp scenes using OWL-ViT """ import os import cv2 from PIL import Image import torch from transformers import OwlViTProcessor, OwlViTForObjectDetection BASE_DIR = "output/384b0ff44aaaa1f1/magnifying_glass" RESULTS_DIR = "output/384b0ff44aaaa1f1/magnifying_glass_results" os.makedirs(RESULTS_DIR, exist_ok=True) print("šŸ”¬ Loading OWL-ViT...") processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32") model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32") model.eval() SEARCH_TERMS = [ "magnifying glass", "magnifier", "loupe", "lens", "looking glass", "glass", "round glass", ] import glob frames = sorted(glob.glob(os.path.join(BASE_DIR, "mag_*.jpg"))) print(f"šŸ” Searching {len(frames)} frames for magnifying glass...") found = False for frame_path in frames: frame_name = os.path.basename(frame_path) sec = frame_name.replace("mag_", "").replace("s.jpg", "") image = Image.open(frame_path).convert("RGB") for term in SEARCH_TERMS: inputs = processor(text=[[term]], images=image, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) target_sizes = torch.Tensor([image.size[::-1]]) results = processor.post_process_object_detection( outputs=outputs, target_sizes=target_sizes, threshold=0.05 ) for score, label, box in zip( results[0]["scores"], results[0]["labels"], results[0]["boxes"] ): s = float(score) if s > 0.05: x1, y1, x2, y2 = map(int, box.tolist()) img = cv2.imread(frame_path) crop = img[y1:y2, x1:x2] if crop.size > 0: crop_name = f"mag_{sec}s_{term.replace(' ', '_')}_{s:.2f}.jpg" cv2.imwrite(os.path.join(RESULTS_DIR, crop_name), crop) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 3) cv2.putText( img, f"{term} {s:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2, ) ann_name = f"annotated_mag_{sec}s.jpg" cv2.imwrite(os.path.join(RESULTS_DIR, ann_name), img) print(f" šŸ“ {sec}s | {term} | {s:.2f}") found = True if not found: print("āŒ No magnifying glass detected in these frames.") else: print(f"\nāœ… Found magnifying glass detections. Check {RESULTS_DIR}")