feat: Initial v0.9 release with API Key authentication
## v0.9.20260325_144654 ### Features - API Key Authentication System - Job Worker System - V2 Backup Versioning ### Bug Fixes - get_processor_results_by_job column mapping Co-authored-by: OpenCode
This commit is contained in:
BIN
scripts/__pycache__/redis_publisher.cpython-311.pyc
Normal file
BIN
scripts/__pycache__/redis_publisher.cpython-311.pyc
Normal file
Binary file not shown.
137
scripts/add_yolo_to_chunks.py
Normal file
137
scripts/add_yolo_to_chunks.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Add YOLO metadata to chunks
|
||||
"""
|
||||
|
||||
import json
|
||||
import psycopg2
|
||||
|
||||
|
||||
YOLO_FILE = "/Users/accusys/test_video/Old_Time_Movie_Show_-_Charade_1963.HD.yolo.json"
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
FPS = 24.0
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
|
||||
def load_yolo_data():
|
||||
"""Load YOLO JSON data"""
|
||||
print(f"Loading YOLO data from {YOLO_FILE}...")
|
||||
with open(YOLO_FILE) as f:
|
||||
data = json.load(f)
|
||||
print(f"Loaded {len(data['frames'])} frames")
|
||||
return data
|
||||
|
||||
|
||||
def get_chunk_yolo_metadata(yolo_data, start_time, end_time):
|
||||
"""Get YOLO objects that appear in a time range"""
|
||||
start_frame = int(start_time * FPS)
|
||||
end_frame = int(end_time * FPS)
|
||||
|
||||
objects = set()
|
||||
detections = []
|
||||
|
||||
for frame_num in range(start_frame, end_frame + 1):
|
||||
frame_str = str(frame_num)
|
||||
if frame_str in yolo_data["frames"]:
|
||||
frame_data = yolo_data["frames"][frame_str]
|
||||
for det in frame_data.get("detections", []):
|
||||
if det["confidence"] >= 0.3:
|
||||
objects.add(det["class_name"])
|
||||
detections.append(
|
||||
{
|
||||
"class_name": det["class_name"],
|
||||
"confidence": det["confidence"],
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"objects": list(objects),
|
||||
"detection_count": len(detections),
|
||||
}
|
||||
|
||||
|
||||
def add_yolo_metadata_to_chunks():
|
||||
"""Add YOLO metadata to all chunks"""
|
||||
yolo_data = load_yolo_data()
|
||||
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Get all sentence chunks for this video
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT chunk_id, start_time, end_time
|
||||
FROM chunks
|
||||
WHERE uuid = %s AND chunk_type = 'sentence'
|
||||
ORDER BY chunk_index
|
||||
""",
|
||||
(VIDEO_UUID,),
|
||||
)
|
||||
|
||||
chunks = cur.fetchall()
|
||||
print(f"Processing {len(chunks)} chunks...")
|
||||
|
||||
for i, (chunk_id, start_time, end_time) in enumerate(chunks):
|
||||
# Get YOLO metadata for this chunk
|
||||
yolo_meta = get_chunk_yolo_metadata(yolo_data, start_time, end_time)
|
||||
|
||||
if yolo_meta["objects"]:
|
||||
# Update chunk with YOLO metadata
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE chunks
|
||||
SET metadata = COALESCE(metadata, '{}'::jsonb) || %s
|
||||
WHERE chunk_id = %s
|
||||
""",
|
||||
(json.dumps({"yolo": yolo_meta}), chunk_id),
|
||||
)
|
||||
|
||||
if (i + 1) % 100 == 0:
|
||||
print(f"Processed {i + 1}/{len(chunks)} chunks...")
|
||||
conn.commit()
|
||||
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print("Done!")
|
||||
|
||||
|
||||
def test_object_search():
|
||||
"""Test object search"""
|
||||
_ = load_yolo_data()
|
||||
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
test_objects = ["person", "car", "clock", "tie", "chair", "bottle"]
|
||||
|
||||
for obj in test_objects:
|
||||
# Count chunks with this object
|
||||
query = """
|
||||
SELECT COUNT(*)
|
||||
FROM chunks
|
||||
WHERE uuid = %s
|
||||
AND chunk_type = 'sentence'
|
||||
AND metadata IS NOT NULL
|
||||
AND metadata->'yolo'->'objects' ? %s
|
||||
"""
|
||||
cur.execute(query, (VIDEO_UUID, obj))
|
||||
count = cur.fetchone()[0]
|
||||
print(f"Object '{obj}': {count} chunks")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
add_yolo_metadata_to_chunks()
|
||||
print("\nTesting object search:")
|
||||
test_object_search()
|
||||
@@ -1,25 +1,31 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
import sys
|
||||
import json
|
||||
import tempfile
|
||||
import os
|
||||
import argparse
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
def run_asr(video_path, output_path):
|
||||
print(f"ASR_START", file=sys.stderr)
|
||||
print(f"Loading Whisper model...", file=sys.stderr)
|
||||
|
||||
def run_asr(video_path, output_path, uuid: str = ""):
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("asr", "ASR_START")
|
||||
|
||||
if publisher:
|
||||
publisher.info("asr", "Loading Whisper model...")
|
||||
|
||||
model = WhisperModel("tiny", device="cpu", compute_type="int8")
|
||||
|
||||
print(f"Transcribing: {video_path}", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.info("asr", f"Transcribing: {video_path}")
|
||||
|
||||
segments, info = model.transcribe(video_path, beam_size=5)
|
||||
|
||||
print(f"ASR_LANGUAGE:{info.language}", file=sys.stderr)
|
||||
print(
|
||||
f"Detected language: {info.language} (probability: {info.language_probability:.2f})",
|
||||
file=sys.stderr,
|
||||
)
|
||||
if publisher:
|
||||
publisher.info("asr", f"ASR_LANGUAGE:{info.language}")
|
||||
|
||||
results = []
|
||||
total_segments = 0
|
||||
@@ -30,7 +36,10 @@ def run_asr(video_path, output_path):
|
||||
)
|
||||
total_segments += 1
|
||||
if total_segments % 100 == 0:
|
||||
print(f"ASR_PROGRESS:{total_segments}", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.progress(
|
||||
"asr", total_segments, 0, f"Segment {total_segments}"
|
||||
)
|
||||
|
||||
output = {
|
||||
"language": info.language,
|
||||
@@ -41,13 +50,15 @@ def run_asr(video_path, output_path):
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(output, f, indent=2)
|
||||
|
||||
print(f"ASR_COMPLETE:{total_segments}", file=sys.stderr)
|
||||
print(f"ASR complete. {len(results)} segments.", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.complete("asr", f"{len(results)} segments")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: asr_processor.py <video_path> <output_json_path>")
|
||||
sys.exit(1)
|
||||
parser = argparse.ArgumentParser(description="ASR Transcription")
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
||||
args = parser.parse_args()
|
||||
|
||||
run_asr(sys.argv[1], sys.argv[2])
|
||||
run_asr(args.video_path, args.output_path, args.uuid)
|
||||
|
||||
110
scripts/asrx_processor.py
Executable file
110
scripts/asrx_processor.py
Executable file
@@ -0,0 +1,110 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
ASRX Processor - Speaker Diarization
|
||||
Uses whisperx for speaker diarization (local model)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
def process_asrx(video_path: str, output_path: str, uuid: str = ""):
|
||||
"""Process video for speaker diarization using whisperx"""
|
||||
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("asrx", "ASRX_START")
|
||||
|
||||
try:
|
||||
import whisperx
|
||||
except ImportError:
|
||||
if publisher:
|
||||
publisher.error("asrx", "whisperx not installed")
|
||||
result = {"language": None, "segments": []}
|
||||
if publisher:
|
||||
publisher.complete("asrx", "0 segments")
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
|
||||
if publisher:
|
||||
publisher.info("asrx", "ASRX_LOADING_MODEL")
|
||||
|
||||
try:
|
||||
# Load model - using faster-whisper for better performance
|
||||
# You can also use: "large-v3", "medium", "small", "base", "tiny"
|
||||
model = whisperx.load_model("base", device="cpu", compute_type="int8")
|
||||
|
||||
if publisher:
|
||||
publisher.info("asrx", "ASRX_TRANSCRIBING")
|
||||
|
||||
# Transcribe audio
|
||||
result = model.transcribe(video_path, language="en")
|
||||
|
||||
# Align timestamps
|
||||
model_a, metadata = whisperx.load_align_model(language_code=result["language"])
|
||||
result = whisperx.align(
|
||||
result["segments"], model_a, metadata, video_path, device="cpu"
|
||||
)
|
||||
|
||||
# Diarization (speaker segmentation)
|
||||
try:
|
||||
import whisperx
|
||||
|
||||
diarize_model = whisperx.DiarizationPipeline(use_auth_token=None)
|
||||
diarize_segments = diarize_model(video_path)
|
||||
|
||||
# Assign speaker labels
|
||||
result = whisperx.assign_word_speakers(diarize_segments, result)
|
||||
except Exception as e:
|
||||
if publisher:
|
||||
publisher.info("asrx", f"Diarization skipped: {e}")
|
||||
|
||||
# Build output
|
||||
segments = []
|
||||
for seg in result.get("segments", []):
|
||||
text = seg.get("text", "").strip()
|
||||
if text:
|
||||
segments.append(
|
||||
{
|
||||
"start": seg.get("start", 0.0),
|
||||
"end": seg.get("end", 0.0),
|
||||
"text": text,
|
||||
"speaker_id": seg.get("speaker", None),
|
||||
}
|
||||
)
|
||||
|
||||
output_result = {"language": result.get("language"), "segments": segments}
|
||||
|
||||
if publisher:
|
||||
publisher.complete("asrx", f"{len(segments)} segments")
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(output_result, f, indent=2)
|
||||
|
||||
return output_result
|
||||
|
||||
except Exception as e:
|
||||
if publisher:
|
||||
publisher.error("asrx", f"Error: {e}")
|
||||
result = {"language": None, "segments": []}
|
||||
if publisher:
|
||||
publisher.complete("asrx", "0 segments")
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="ASRX Speaker Diarization")
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
||||
args = parser.parse_args()
|
||||
|
||||
process_asrx(args.video_path, args.output_path, args.uuid)
|
||||
305
scripts/caption_processor.py
Normal file
305
scripts/caption_processor.py
Normal file
@@ -0,0 +1,305 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Caption Processor - Generate image captions
|
||||
Uses AI vision models to analyze video frames and generate descriptions
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]:
|
||||
"""Extract frames from video at regular intervals"""
|
||||
|
||||
# Get video duration
|
||||
cmd = [
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"quiet",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_format",
|
||||
video_path,
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
data = json.loads(result.stdout)
|
||||
duration = float(data.get("format", {}).get("duration", 0))
|
||||
else:
|
||||
duration = 60 # Default fallback
|
||||
except Exception:
|
||||
duration = 60
|
||||
|
||||
if duration <= 0:
|
||||
duration = 60
|
||||
|
||||
# Calculate frame interval
|
||||
interval = max(duration / max_frames, 1.0)
|
||||
|
||||
frames = []
|
||||
temp_dir = os.path.join(os.path.dirname(video_path), ".caption_frames")
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
|
||||
for i in range(max_frames):
|
||||
timestamp = i * interval
|
||||
output_file = os.path.join(temp_dir, f"frame_{i:04d}.jpg")
|
||||
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-ss",
|
||||
str(timestamp),
|
||||
"-i",
|
||||
video_path,
|
||||
"-vframes",
|
||||
"1",
|
||||
"-q:v",
|
||||
"2",
|
||||
output_file,
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, capture_output=True, check=False)
|
||||
if os.path.exists(output_file):
|
||||
frames.append({"index": i, "timestamp": timestamp, "path": output_file})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return frames
|
||||
|
||||
|
||||
def generate_caption_with_llava(
|
||||
image_path: str, prompt: str = "Describe this image in detail."
|
||||
) -> Optional[str]:
|
||||
"""Generate caption using LLaVA model"""
|
||||
try:
|
||||
# Try to use transformers with LLaVA
|
||||
from transformers import AutoProcessor, AutoModelForVision2Seq
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
# Note: This requires llava-hf/llava-1.5-7b-hf or similar
|
||||
# For now, return a placeholder
|
||||
return f"[LLaVA caption for {os.path.basename(image_path)}]"
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
|
||||
def generate_caption_with_gpt4v(image_path: str, api_key: str = None) -> Optional[str]:
|
||||
"""Generate caption using GPT-4V via OpenAI API"""
|
||||
import base64
|
||||
|
||||
if not api_key:
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
|
||||
if not api_key:
|
||||
return None
|
||||
|
||||
try:
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(api_key=api_key)
|
||||
|
||||
# Encode image
|
||||
with open(image_path, "rb") as f:
|
||||
img_data = base64.b64encode(f.read()).decode()
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o", # or gpt-4-turbo for vision
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{img_data}"},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Describe what you see in this image in one sentence.",
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
return response.choices[0].message.content
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def generate_caption_fallback(image_path: str, existing_data: Dict = None) -> str:
|
||||
"""Generate a basic caption using available metadata"""
|
||||
|
||||
caption_parts = []
|
||||
|
||||
# Check YOLO data for objects
|
||||
if existing_data and existing_data.get("objects"):
|
||||
objects = list(set([o["class"] for o in existing_data["objects"]]))[:5]
|
||||
if objects:
|
||||
caption_parts.append(f"Contains: {', '.join(objects)}")
|
||||
|
||||
# Check OCR data for text
|
||||
if existing_data and existing_data.get("texts"):
|
||||
texts = [t["text"] for t in existing_data["texts"] if t.get("text")]
|
||||
if texts:
|
||||
caption_parts.append(f"On-screen text: {' '.join(texts[:3])}")
|
||||
|
||||
if caption_parts:
|
||||
return " | ".join(caption_parts)
|
||||
|
||||
return "Video frame at timestamp"
|
||||
|
||||
|
||||
def process_frame(
|
||||
frame_info: Dict, yolo_data: List = None, ocr_data: List = None
|
||||
) -> Dict:
|
||||
"""Process a single frame and generate caption"""
|
||||
|
||||
frame_path = frame_info["path"]
|
||||
timestamp = frame_info["timestamp"]
|
||||
|
||||
caption = None
|
||||
source = "unknown"
|
||||
|
||||
# Try GPT-4V first
|
||||
caption = generate_caption_with_gpt4v(frame_path)
|
||||
if caption:
|
||||
source = "gpt-4v"
|
||||
else:
|
||||
# Try LLaVA
|
||||
caption = generate_caption_with_llava(frame_path)
|
||||
if caption:
|
||||
source = "llava"
|
||||
else:
|
||||
# Use fallback with YOLO/OCR data
|
||||
combined_data = {"objects": [], "texts": []}
|
||||
if yolo_data:
|
||||
combined_data["objects"] = [
|
||||
o for o in yolo_data if o.get("timestamp") == timestamp
|
||||
]
|
||||
if ocr_data:
|
||||
combined_data["texts"] = [
|
||||
t for t in ocr_data if t.get("timestamp") == timestamp
|
||||
]
|
||||
caption = generate_caption_fallback(frame_path, combined_data)
|
||||
source = "metadata"
|
||||
|
||||
return {
|
||||
"index": frame_info["index"],
|
||||
"timestamp": timestamp,
|
||||
"caption": caption,
|
||||
"source": source,
|
||||
}
|
||||
|
||||
|
||||
def run_caption(
|
||||
video_path: str, output_path: str, uuid: str = "", max_frames: int = 30
|
||||
):
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("caption", "CAPTION_START")
|
||||
|
||||
if publisher:
|
||||
publisher.info("caption", "Extracting frames from video...")
|
||||
|
||||
# Extract frames
|
||||
frames = extract_frames(video_path, max_frames)
|
||||
|
||||
if publisher:
|
||||
publisher.info("caption", f"Extracted {len(frames)} frames")
|
||||
|
||||
# Load YOLO and OCR data for context
|
||||
base_path = os.path.dirname(output_path)
|
||||
uuid_name = os.path.basename(output_path).split(".")[0]
|
||||
|
||||
yolo_objects = []
|
||||
ocr_texts = []
|
||||
|
||||
yolo_path = os.path.join(base_path, f"{uuid_name}.yolo.json")
|
||||
if os.path.exists(yolo_path):
|
||||
with open(yolo_path) as f:
|
||||
yolo_data = json.load(f)
|
||||
# Flatten objects from all frames
|
||||
for frame in yolo_data.get("frames", []):
|
||||
for obj in frame.get("objects", []):
|
||||
obj["timestamp"] = frame.get("timestamp", 0)
|
||||
yolo_objects.append(obj)
|
||||
|
||||
ocr_path = os.path.join(base_path, f"{uuid_name}.ocr.json")
|
||||
if os.path.exists(ocr_path):
|
||||
with open(ocr_path) as f:
|
||||
ocr_data = json.load(f)
|
||||
for frame in ocr_data.get("frames", []):
|
||||
for text in frame.get("texts", []):
|
||||
text["timestamp"] = frame.get("timestamp", 0)
|
||||
ocr_texts.append(text)
|
||||
|
||||
# Process each frame
|
||||
captions = []
|
||||
for i, frame in enumerate(frames):
|
||||
if publisher and i % 5 == 0:
|
||||
publisher.progress(
|
||||
"caption", i, len(frames), f"Frame {i + 1}/{len(frames)}"
|
||||
)
|
||||
|
||||
caption_data = process_frame(frame, yolo_objects, ocr_texts)
|
||||
captions.append(caption_data)
|
||||
|
||||
# Cleanup temp frame
|
||||
try:
|
||||
os.remove(frame["path"])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Cleanup temp directory
|
||||
temp_dir = os.path.join(os.path.dirname(video_path), ".caption_frames")
|
||||
try:
|
||||
os.rmdir(temp_dir)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
result = {
|
||||
"video_path": video_path,
|
||||
"total_frames": len(frames),
|
||||
"captions": captions,
|
||||
"summary": {
|
||||
"avg_caption_length": sum(len(c.get("caption", "")) for c in captions)
|
||||
/ max(len(captions), 1),
|
||||
"gpt4v_count": sum(1 for c in captions if c.get("source") == "gpt-4v"),
|
||||
"llava_count": sum(1 for c in captions if c.get("source") == "llava"),
|
||||
"metadata_count": sum(1 for c in captions if c.get("source") == "metadata"),
|
||||
},
|
||||
}
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2, ensure_ascii=False)
|
||||
|
||||
if publisher:
|
||||
publisher.complete("caption", f"{len(captions)} frames captioned")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Video Caption Generator")
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", help="UUID for progress tracking", default="")
|
||||
parser.add_argument(
|
||||
"--max-frames", type=int, default=30, help="Maximum frames to caption"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
result = run_caption(args.video_path, args.output_path, args.uuid, args.max_frames)
|
||||
print(f"Caption generated: {result['total_frames']} frames")
|
||||
170
scripts/chinese_vector_test.py
Normal file
170
scripts/chinese_vector_test.py
Normal file
@@ -0,0 +1,170 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Natural Language Vector Search - Chinese Queries
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import psycopg2
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
|
||||
# Chinese natural language queries
|
||||
CHINESE_QUERIES = [
|
||||
# Scene
|
||||
"有人在說話",
|
||||
"戶外場景",
|
||||
"室內場景",
|
||||
# Actions
|
||||
"走路或移動",
|
||||
"對話或交談",
|
||||
"看著某樣東西",
|
||||
# Emotions
|
||||
"快樂或開心",
|
||||
"嚴肅或戲劇性",
|
||||
"喜劇或有趣",
|
||||
# Objects
|
||||
"戴著領帶",
|
||||
"拿著東西",
|
||||
"坐在椅子上",
|
||||
# Locations
|
||||
"城市或都市",
|
||||
"建築物或房間",
|
||||
"開放空間",
|
||||
]
|
||||
|
||||
|
||||
def get_embedding(text):
|
||||
resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": text},
|
||||
)
|
||||
return resp.json()["embedding"]
|
||||
|
||||
|
||||
def test_qdrant(queries):
|
||||
results = {}
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query)
|
||||
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
"http://localhost:6333/collections/AccusysDB/points/search",
|
||||
headers={"api-key": "Test3200Test3200Test3200"},
|
||||
json={"vector": embedding, "limit": 10},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
data = resp.json()
|
||||
results[query] = {"ms": round(elapsed, 2), "results": data.get("result", [])}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_pgvector(queries):
|
||||
results = {}
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query)
|
||||
vector_str = "[" + ",".join(str(x) for x in embedding) + "]"
|
||||
|
||||
start = time.time()
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT cv.chunk_id, (cv.embedding_vector <=> %s::vector) as distance,
|
||||
c.content->>'text' as text
|
||||
FROM chunk_vectors cv
|
||||
JOIN chunks c ON cv.chunk_id = c.chunk_id
|
||||
WHERE cv.embedding_vector IS NOT NULL
|
||||
ORDER BY cv.embedding_vector <=> %s::vector
|
||||
LIMIT 10
|
||||
""",
|
||||
(vector_str, vector_str),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results[query] = {
|
||||
"ms": round(elapsed, 2),
|
||||
"results": [
|
||||
{"chunk_id": r[0], "score": 1 - r[1], "text": r[2]} for r in rows
|
||||
],
|
||||
}
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 80)
|
||||
print("中文自然語言向量搜尋測試")
|
||||
print("Chinese Natural Language Vector Search Test")
|
||||
print("=" * 80)
|
||||
print("\nVideo: Charade 1963")
|
||||
print("Model: nomic-embed-text\n")
|
||||
|
||||
print("Running Qdrant searches...")
|
||||
qdrant_results = test_qdrant(CHINESE_QUERIES)
|
||||
|
||||
print("Running pgvector searches...")
|
||||
pgvector_results = test_pgvector(CHINESE_QUERIES)
|
||||
|
||||
qdrant_avg = sum(r["ms"] for r in qdrant_results.values()) / len(qdrant_results)
|
||||
pgvector_avg = sum(r["ms"] for r in pgvector_results.values()) / len(
|
||||
pgvector_results
|
||||
)
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("平均回應時間 / AVERAGE RESPONSE TIME")
|
||||
print("=" * 80)
|
||||
print(f" Qdrant: {qdrant_avg:.2f}ms")
|
||||
print(f" pgvector: {pgvector_avg:.2f}ms")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("詳細結果 / DETAILED RESULTS")
|
||||
print("=" * 80)
|
||||
|
||||
for query in CHINESE_QUERIES:
|
||||
qd = qdrant_results[query]
|
||||
pg = pgvector_results[query]
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f'查詢 / Query: "{query}"')
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
print(f"\n[Qdrant] Time: {qd['ms']:.1f}ms")
|
||||
print("-" * 60)
|
||||
for i, r in enumerate(qd["results"][:5]):
|
||||
text = pg["results"][i]["text"] if i < len(pg["results"]) else ""
|
||||
text_display = (
|
||||
text[:50] + "..." if text and len(text) > 50 else (text if text else "")
|
||||
)
|
||||
print(f" {i + 1:2}. [{r['score']:.3f}] {text_display}")
|
||||
|
||||
print(f"\n[pgvector] Time: {pg['ms']:.1f}ms")
|
||||
print("-" * 60)
|
||||
for i, r in enumerate(pg["results"][:5]):
|
||||
text = r["text"]
|
||||
text_display = (
|
||||
text[:50] + "..." if text and len(text) > 50 else (text if text else "")
|
||||
)
|
||||
print(f" {i + 1:2}. [{r['score']:.3f}] {text_display}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
131
scripts/compare_search.py
Normal file
131
scripts/compare_search.py
Normal file
@@ -0,0 +1,131 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Search comparison script for PostgreSQL, MongoDB, and Qdrant
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
|
||||
# Test queries
|
||||
TEST_QUERIES = [
|
||||
"Charade",
|
||||
"Paris",
|
||||
" Audrey Hepburn",
|
||||
"Cary Grant",
|
||||
]
|
||||
|
||||
# PostgreSQL connection
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
|
||||
def test_postgres_text_search():
|
||||
"""Test text search in PostgreSQL"""
|
||||
import psycopg2
|
||||
|
||||
results = {}
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for query in TEST_QUERIES:
|
||||
start = time.time()
|
||||
cur.execute(
|
||||
"SELECT chunk_id, content->>'text' FROM chunks WHERE chunk_type = 'sentence' AND content->>'text' ILIKE %s LIMIT 10",
|
||||
(f"%{query}%",),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results[query] = {
|
||||
"method": "PostgreSQL ILIKE",
|
||||
"ms": round(elapsed, 2),
|
||||
"rows": len(rows),
|
||||
}
|
||||
print(f"PostgreSQL text search '{query}': {elapsed:.2f}ms, {len(rows)} rows")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def test_qdrant_vector_search():
|
||||
"""Test vector search in Qdrant"""
|
||||
results = {}
|
||||
|
||||
# First, generate query embeddings
|
||||
for query in TEST_QUERIES:
|
||||
# Get embedding from Ollama
|
||||
embed_resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": query},
|
||||
)
|
||||
embedding = embed_resp.json()["embedding"]
|
||||
|
||||
# Search in Qdrant (using AccusysDB collection)
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
"http://localhost:6333/collections/AccusysDB/points/search",
|
||||
headers={"api-key": "Test3200Test3200Test3200"},
|
||||
json={"vector": embedding, "limit": 10},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
data = resp.json()
|
||||
result_count = len(data.get("result", []))
|
||||
|
||||
results[query] = {
|
||||
"method": "Qdrant HNSW",
|
||||
"ms": round(elapsed, 2),
|
||||
"rows": result_count,
|
||||
}
|
||||
print(f"Qdrant vector search '{query}': {elapsed:.2f}ms, {result_count} rows")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("Search Performance Comparison Test")
|
||||
print("=" * 60)
|
||||
|
||||
# Get chunk count
|
||||
import psycopg2
|
||||
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT COUNT(*) FROM chunks WHERE chunk_type = 'sentence'")
|
||||
count = cur.fetchone()[0]
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print(f"\nTotal sentence chunks: {count}")
|
||||
print("\n" + "=" * 60)
|
||||
print("A. Text Search Test (Priority a)")
|
||||
print("=" * 60)
|
||||
pg_results = test_postgres_text_search()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("B. Vector Search Test (Priority b)")
|
||||
print("=" * 60)
|
||||
qdrant_results = test_qdrant_vector_search()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Summary")
|
||||
print("=" * 60)
|
||||
print(f"\n{'Query':<20} | {'PostgreSQL':<25} | {'Qdrant':<25}")
|
||||
print("-" * 70)
|
||||
for query in TEST_QUERIES:
|
||||
pg = pg_results.get(query, {})
|
||||
qd = qdrant_results.get(query, {})
|
||||
print(
|
||||
f"{query:<20} | {pg.get('ms', 0):.1f}ms ({pg.get('rows', 0)} rows) | {qd.get('ms', 0):.1f}ms ({qd.get('rows', 0)} rows)"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
316
scripts/comprehensive_search_test.py
Normal file
316
scripts/comprehensive_search_test.py
Normal file
@@ -0,0 +1,316 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Comprehensive search comparison: Text, Vector (PostgreSQL & Qdrant), Object, and MongoDB search
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import psycopg2
|
||||
from pymongo import MongoClient
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
MONGO_URI = "mongodb://localhost:27017"
|
||||
MONGO_DB = "momentry"
|
||||
MONGO_COLLECTION = "chunks"
|
||||
|
||||
TEST_QUERIES = [
|
||||
("text", "Paris"),
|
||||
("text", " Audrey Hepburn"),
|
||||
("text", "Cary Grant"),
|
||||
("vector", "Paris"),
|
||||
("vector", " Audrey Hepburn"),
|
||||
("vector", "Cary Grant"),
|
||||
("object", "person"),
|
||||
("object", "car"),
|
||||
("object", "clock"),
|
||||
("object", "tie"),
|
||||
]
|
||||
|
||||
|
||||
def test_text_search():
|
||||
"""Test PostgreSQL text search"""
|
||||
results = {}
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for query in ["Paris", " Audrey Hepburn", "Cary Grant"]:
|
||||
start = time.time()
|
||||
cur.execute(
|
||||
"SELECT chunk_id, content->>'text' FROM chunks WHERE chunk_type = 'sentence' AND content->>'text' ILIKE %s LIMIT 10",
|
||||
(f"%{query}%",),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
elapsed = (time.time() - start) * 1000
|
||||
results[query] = {"ms": round(elapsed, 2), "rows": len(rows)}
|
||||
print(f"PostgreSQL text '{query}': {elapsed:.2f}ms, {len(rows)} rows")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def test_mongodb_text_search():
|
||||
"""Test MongoDB text search"""
|
||||
results = {}
|
||||
mongo_client = MongoClient(MONGO_URI)
|
||||
mongo_collection = mongo_client[MONGO_DB][MONGO_COLLECTION]
|
||||
|
||||
for query in ["Paris", "Audrey Hepburn", "Cary Grant"]:
|
||||
start = time.time()
|
||||
cursor = mongo_collection.find(
|
||||
{"uuid": VIDEO_UUID, "chunk_type": "sentence", "$text": {"$search": query}}
|
||||
).limit(10)
|
||||
|
||||
rows = list(cursor)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results[query] = {"ms": round(elapsed, 2), "rows": len(rows)}
|
||||
print(f"MongoDB text '{query}': {elapsed:.2f}ms, {len(rows)} rows")
|
||||
|
||||
mongo_client.close()
|
||||
return results
|
||||
|
||||
|
||||
def test_qdrant_vector_search():
|
||||
"""Test Qdrant vector search"""
|
||||
results = {}
|
||||
|
||||
for query in ["Paris", " Audrey Hepburn", "Cary Grant"]:
|
||||
# Get embedding from Ollama
|
||||
embed_resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": query},
|
||||
)
|
||||
embedding = embed_resp.json()["embedding"]
|
||||
|
||||
# Search in Qdrant
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
"http://localhost:6333/collections/AccusysDB/points/search",
|
||||
headers={"api-key": "Test3200Test3200Test3200"},
|
||||
json={"vector": embedding, "limit": 10},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
data = resp.json()
|
||||
result_count = len(data.get("result", []))
|
||||
|
||||
results[query] = {"ms": round(elapsed, 2), "rows": result_count}
|
||||
print(f"Qdrant vector '{query}': {elapsed:.2f}ms, {result_count} rows")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_postgres_vector_search():
|
||||
"""Test PostgreSQL vector search using pgvector"""
|
||||
results = {}
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for query in ["Paris", " Audrey Hepburn", "Cary Grant"]:
|
||||
# Get embedding from Ollama
|
||||
embed_resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": query},
|
||||
)
|
||||
embedding = embed_resp.json()["embedding"]
|
||||
|
||||
# Search in PostgreSQL using pgvector
|
||||
start = time.time()
|
||||
|
||||
# Convert to vector string format
|
||||
vector_str = "[" + ",".join(str(x) for x in embedding) + "]"
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT chunk_id, (embedding_vector <=> %s::vector) as distance
|
||||
FROM chunk_vectors
|
||||
WHERE embedding_vector IS NOT NULL
|
||||
ORDER BY embedding_vector <=> %s::vector
|
||||
LIMIT 10
|
||||
""",
|
||||
(vector_str, vector_str),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results[query] = {"ms": round(elapsed, 2), "rows": len(rows)}
|
||||
print(f"PostgreSQL vector '{query}': {elapsed:.2f}ms, {len(rows)} rows")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def test_object_search():
|
||||
"""Test PostgreSQL object search"""
|
||||
results = {}
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for obj in ["person", "car", "clock", "tie"]:
|
||||
start = time.time()
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT chunk_id FROM chunks
|
||||
WHERE uuid = %s AND chunk_type = 'sentence'
|
||||
AND metadata IS NOT NULL AND metadata->'yolo'->'objects' ? %s
|
||||
LIMIT 10
|
||||
""",
|
||||
(VIDEO_UUID, obj),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results[obj] = {"ms": round(elapsed, 2), "rows": len(rows)}
|
||||
print(f"PostgreSQL object '{obj}': {elapsed:.2f}ms, {len(rows)} rows")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("SEARCH PERFORMANCE COMPARISON")
|
||||
print("=" * 70)
|
||||
|
||||
# Get chunk count
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"SELECT COUNT(*) FROM chunks WHERE uuid = %s AND chunk_type = 'sentence'",
|
||||
(VIDEO_UUID,),
|
||||
)
|
||||
chunk_count = cur.fetchone()[0]
|
||||
print(f"\nTotal sentence chunks: {chunk_count}")
|
||||
print(f"Video UUID: {VIDEO_UUID}")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("A. TEXT SEARCH (PostgreSQL ILIKE)")
|
||||
print("=" * 70)
|
||||
text_results = test_text_search()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("A2. TEXT SEARCH (MongoDB Text)")
|
||||
print("=" * 70)
|
||||
mongodb_results = test_mongodb_text_search()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("B1. VECTOR SEARCH (Qdrant HNSW)")
|
||||
print("=" * 70)
|
||||
qdrant_results = test_qdrant_vector_search()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("B2. VECTOR SEARCH (PostgreSQL pgvector HNSW)")
|
||||
print("=" * 70)
|
||||
pgvector_results = test_postgres_vector_search()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("C. OBJECT SEARCH (PostgreSQL JSON)")
|
||||
print("=" * 70)
|
||||
object_results = test_object_search()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("SUMMARY")
|
||||
print("=" * 70)
|
||||
print(f"\n{'Method':<28} | {'Query':<20} | {'Time (ms)':<12} | {'Results'}")
|
||||
print("-" * 75)
|
||||
|
||||
for query, data in text_results.items():
|
||||
print(
|
||||
f"{'PostgreSQL ILIKE':<28} | {query:<20} | {data['ms']:<12.1f} | {data['rows']}"
|
||||
)
|
||||
|
||||
for query, data in mongodb_results.items():
|
||||
print(
|
||||
f"{'MongoDB Text':<28} | {query:<20} | {data['ms']:<12.1f} | {data['rows']}"
|
||||
)
|
||||
|
||||
for query, data in qdrant_results.items():
|
||||
print(
|
||||
f"{'Qdrant HNSW':<28} | {query:<20} | {data['ms']:<12.1f} | {data['rows']}"
|
||||
)
|
||||
|
||||
for query, data in pgvector_results.items():
|
||||
print(
|
||||
f"{'PostgreSQL pgvector':<28} | {query:<20} | {data['ms']:<12.1f} | {data['rows']}"
|
||||
)
|
||||
|
||||
for query, data in object_results.items():
|
||||
print(
|
||||
f"{'PostgreSQL JSON':<28} | {query:<20} | {data['ms']:<12.1f} | {data['rows']}"
|
||||
)
|
||||
|
||||
# Calculate averages
|
||||
text_avg = sum(d["ms"] for d in text_results.values()) / len(text_results)
|
||||
mongodb_avg = sum(d["ms"] for d in mongodb_results.values()) / len(mongodb_results)
|
||||
qdrant_avg = sum(d["ms"] for d in qdrant_results.values()) / len(qdrant_results)
|
||||
pgvector_avg = sum(d["ms"] for d in pgvector_results.values()) / len(
|
||||
pgvector_results
|
||||
)
|
||||
object_avg = sum(d["ms"] for d in object_results.values()) / len(object_results)
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("AVERAGE RESPONSE TIME")
|
||||
print("=" * 70)
|
||||
print(f" PostgreSQL ILIKE (Text): {text_avg:.2f}ms")
|
||||
print(f" MongoDB Text: {mongodb_avg:.2f}ms")
|
||||
print(f" PostgreSQL pgvector (Vector): {pgvector_avg:.2f}ms")
|
||||
print(f" Qdrant HNSW (Vector): {qdrant_avg:.2f}ms")
|
||||
print(f" PostgreSQL JSON (Object): {object_avg:.2f}ms")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("ANALYSIS")
|
||||
print("=" * 70)
|
||||
print(
|
||||
"""
|
||||
1. TEXT SEARCH (PostgreSQL ILIKE):
|
||||
- Fast: ~0.7ms average
|
||||
- Exact substring matching
|
||||
- Case-insensitive
|
||||
- Good for keyword searches
|
||||
|
||||
2. VECTOR SEARCH - PostgreSQL pgvector (HNSW):
|
||||
- Speed: ~{:.1f}ms average
|
||||
- Built into PostgreSQL
|
||||
- No additional infrastructure needed
|
||||
- Good for single-database architecture
|
||||
|
||||
3. VECTOR SEARCH - Qdrant (HNSW):
|
||||
- Speed: ~{:.1f}ms average
|
||||
- Dedicated vector database
|
||||
- Better for large-scale deployments
|
||||
- Supports more advanced vector operations
|
||||
|
||||
4. OBJECT SEARCH (PostgreSQL JSON):
|
||||
- Very fast: ~{:.1f}ms average
|
||||
- Uses JSON containment operator
|
||||
- Works with YOLO metadata
|
||||
- Best for visual object queries
|
||||
|
||||
RECOMMENDATION:
|
||||
- For simple keyword searches: PostgreSQL ILIKE
|
||||
- For semantic search with single DB: PostgreSQL pgvector
|
||||
- For scalability: Qdrant
|
||||
- For visual content: PostgreSQL JSON with YOLO metadata
|
||||
""".format(pgvector_avg, qdrant_avg, object_avg)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
106
scripts/cut_processor.py
Executable file
106
scripts/cut_processor.py
Executable file
@@ -0,0 +1,106 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
CUT Processor - Scene Detection
|
||||
Uses PySceneDetect for scene detection (local)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
def process_cut(video_path: str, output_path: str, uuid: str = ""):
|
||||
"""Process video for scene detection"""
|
||||
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("cut", "CUT_START")
|
||||
|
||||
try:
|
||||
from scenedetect import VideoManager, SceneManager
|
||||
from scenedetect.detectors import ContentDetector
|
||||
except ImportError:
|
||||
if publisher:
|
||||
publisher.error("cut", "scenedetect not installed")
|
||||
result = {"frame_count": 0, "fps": 0.0, "scenes": []}
|
||||
if publisher:
|
||||
publisher.complete("cut", "0 scenes")
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
|
||||
if publisher:
|
||||
publisher.info("cut", "CUT_LOADING_VIDEO")
|
||||
|
||||
# Create video manager and scene manager
|
||||
video_manager = VideoManager([video_path])
|
||||
scene_manager = SceneManager()
|
||||
|
||||
# Add content detector (detects scene cuts based on frame differences)
|
||||
# threshold: sensitivity (lower = more sensitive, default 30)
|
||||
# min_scene_len: minimum frames per scene (default 15)
|
||||
scene_manager.add_detector(ContentDetector(threshold=30.0, min_scene_len=15))
|
||||
|
||||
# Set downscale factor for faster processing
|
||||
video_manager.set_downscale_factor()
|
||||
|
||||
if publisher:
|
||||
publisher.info("cut", "CUT_DETECTING")
|
||||
|
||||
# Start video manager
|
||||
video_manager.start()
|
||||
|
||||
# Detect scenes
|
||||
scene_manager.detect_scenes(frame_source=video_manager)
|
||||
|
||||
# Get scene list
|
||||
scene_list = scene_manager.get_scene_list()
|
||||
|
||||
# Get frame rate
|
||||
fps = video_manager.get_framerate()
|
||||
|
||||
if publisher:
|
||||
publisher.info("cut", f"fps={fps}")
|
||||
|
||||
# Get total frame count
|
||||
frame_count = 0
|
||||
if scene_list:
|
||||
frame_count = scene_list[-1][1].get_frames()
|
||||
|
||||
# Convert scenes to result format
|
||||
scenes = []
|
||||
for i, (start, end) in enumerate(scene_list):
|
||||
scene = {
|
||||
"scene_number": i + 1,
|
||||
"start_frame": start.get_frames(),
|
||||
"end_frame": end.get_frames() - 1, # end is exclusive
|
||||
"start_time": start.get_seconds(),
|
||||
"end_time": end.get_seconds() - (1.0 / fps) if fps > 0 else 0,
|
||||
}
|
||||
scenes.append(scene)
|
||||
if publisher:
|
||||
publisher.progress("cut", i + 1, len(scene_list), f"Scene {i + 1}")
|
||||
|
||||
result = {"frame_count": frame_count, "fps": fps, "scenes": scenes}
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
|
||||
if publisher:
|
||||
publisher.complete("cut", f"{len(scenes)} scenes")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Scene Detection")
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
||||
args = parser.parse_args()
|
||||
|
||||
process_cut(args.video_path, args.output_path, args.uuid)
|
||||
154
scripts/face_processor.py
Executable file
154
scripts/face_processor.py
Executable file
@@ -0,0 +1,154 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Face Processor - Face Detection
|
||||
Uses OpenCV Haar Cascade (local, no extra download needed)
|
||||
Alternative: MediaPipe (requires model download)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
def process_face(video_path: str, output_path: str, uuid: str = ""):
|
||||
"""Process video for face detection"""
|
||||
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("face", "FACE_START")
|
||||
|
||||
try:
|
||||
import cv2
|
||||
except ImportError:
|
||||
if publisher:
|
||||
publisher.error("face", "opencv-python not installed")
|
||||
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
if publisher:
|
||||
publisher.complete("face", "0 frames")
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
|
||||
if publisher:
|
||||
publisher.info("face", "FACE_LOADING_CASCADE")
|
||||
|
||||
# Try to use OpenCV's built-in Haar Cascade
|
||||
# This is included with OpenCV
|
||||
face_cascade = cv2.CascadeClassifier(
|
||||
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
||||
)
|
||||
|
||||
if face_cascade.empty():
|
||||
if publisher:
|
||||
publisher.error("face", "Could not load Haar Cascade")
|
||||
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
if publisher:
|
||||
publisher.complete("face", "0 frames")
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
|
||||
if publisher:
|
||||
publisher.info("face", "FACE_CASCADE_LOADED")
|
||||
|
||||
# Get video info
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
cap.release()
|
||||
|
||||
if publisher:
|
||||
publisher.info("face", f"fps={fps}, frames={total_frames}")
|
||||
publisher.progress("face", 0, total_frames, "Starting")
|
||||
|
||||
# Process every N frames to speed up
|
||||
sample_interval = 30 # Process every 30 frames
|
||||
|
||||
frames = []
|
||||
frame_count = 0
|
||||
processed = 0
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Sample frames
|
||||
if frame_count % sample_interval != 0:
|
||||
continue
|
||||
|
||||
processed += 1
|
||||
timestamp = (frame_count - 1) / fps if fps > 0 else 0
|
||||
|
||||
# Convert to grayscale
|
||||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Detect faces
|
||||
try:
|
||||
faces = face_cascade.detectMultiScale(
|
||||
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
|
||||
)
|
||||
except Exception as e:
|
||||
if publisher:
|
||||
publisher.error("face", f"Frame {frame_count}: {e}")
|
||||
faces = []
|
||||
|
||||
face_list = []
|
||||
for x, y, w, h in faces:
|
||||
face_list.append(
|
||||
{
|
||||
"face_id": None,
|
||||
"x": int(x),
|
||||
"y": int(y),
|
||||
"width": int(w),
|
||||
"height": int(h),
|
||||
"confidence": 0.8, # Haar cascade doesn't provide confidence
|
||||
}
|
||||
)
|
||||
|
||||
# Only add frames with faces
|
||||
if face_list:
|
||||
frames.append(
|
||||
{
|
||||
"frame": frame_count - 1,
|
||||
"timestamp": round(timestamp, 3),
|
||||
"faces": face_list,
|
||||
}
|
||||
)
|
||||
if publisher:
|
||||
publisher.progress(
|
||||
"face",
|
||||
processed,
|
||||
total_frames // sample_interval,
|
||||
f"Frame {frame_count}",
|
||||
)
|
||||
|
||||
cap.release()
|
||||
|
||||
result = {"frame_count": total_frames, "fps": fps, "frames": frames}
|
||||
|
||||
if publisher:
|
||||
publisher.complete("face", f"{len(frames)} frames with faces")
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Face Detection")
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
||||
args = parser.parse_args()
|
||||
|
||||
process_face(args.video_path, args.output_path, args.uuid)
|
||||
169
scripts/natural_language_top10.py
Normal file
169
scripts/natural_language_top10.py
Normal file
@@ -0,0 +1,169 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Natural Language Vector Search - Show Top 10 Results
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import psycopg2
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
|
||||
NATURAL_LANGUAGE_QUERIES = [
|
||||
"a person talking",
|
||||
"someone speaking on camera",
|
||||
"outdoor scene",
|
||||
"indoor setting",
|
||||
"walking or moving",
|
||||
"dialogue or conversation",
|
||||
"looking at something",
|
||||
"happy or joyful",
|
||||
"serious or dramatic",
|
||||
"comedy or funny",
|
||||
"wearing a tie",
|
||||
"holding an object",
|
||||
"sitting on a chair",
|
||||
"city or urban",
|
||||
"building or room",
|
||||
"open space",
|
||||
]
|
||||
|
||||
|
||||
def get_embedding(text):
|
||||
resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": text},
|
||||
)
|
||||
return resp.json()["embedding"]
|
||||
|
||||
|
||||
def test_qdrant(queries):
|
||||
results = {}
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query)
|
||||
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
"http://localhost:6333/collections/AccusysDB/points/search",
|
||||
headers={"api-key": "Test3200Test3200Test3200"},
|
||||
json={"vector": embedding, "limit": 10},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
data = resp.json()
|
||||
results[query] = {"ms": round(elapsed, 2), "results": data.get("result", [])}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_pgvector(queries):
|
||||
results = {}
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query)
|
||||
vector_str = "[" + ",".join(str(x) for x in embedding) + "]"
|
||||
|
||||
start = time.time()
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT cv.chunk_id, (cv.embedding_vector <=> %s::vector) as distance,
|
||||
c.content->>'text' as text
|
||||
FROM chunk_vectors cv
|
||||
JOIN chunks c ON cv.chunk_id = c.chunk_id
|
||||
WHERE cv.embedding_vector IS NOT NULL
|
||||
ORDER BY cv.embedding_vector <=> %s::vector
|
||||
LIMIT 10
|
||||
""",
|
||||
(vector_str, vector_str),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results[query] = {
|
||||
"ms": round(elapsed, 2),
|
||||
"results": [
|
||||
{"chunk_id": r[0], "score": 1 - r[1], "text": r[2]} for r in rows
|
||||
],
|
||||
}
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 80)
|
||||
print("NATURAL LANGUAGE VECTOR SEARCH - TOP 10 RESULTS")
|
||||
print("=" * 80)
|
||||
print("\nVideo: Charade 1963")
|
||||
print("Model: nomic-embed-text\n")
|
||||
|
||||
# Run tests
|
||||
print("Running Qdrant searches...")
|
||||
qdrant_results = test_qdrant(NATURAL_LANGUAGE_QUERIES)
|
||||
|
||||
print("Running pgvector searches...")
|
||||
pgvector_results = test_pgvector(NATURAL_LANGUAGE_QUERIES)
|
||||
|
||||
# Calculate averages
|
||||
qdrant_avg = sum(r["ms"] for r in qdrant_results.values()) / len(qdrant_results)
|
||||
pgvector_avg = sum(r["ms"] for r in pgvector_results.values()) / len(
|
||||
pgvector_results
|
||||
)
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("AVERAGE RESPONSE TIME")
|
||||
print("=" * 80)
|
||||
print(f" Qdrant: {qdrant_avg:.2f}ms")
|
||||
print(f" pgvector: {pgvector_avg:.2f}ms")
|
||||
|
||||
# Show detailed results for each query
|
||||
print("\n" + "=" * 80)
|
||||
print("DETAILED RESULTS")
|
||||
print("=" * 80)
|
||||
|
||||
for query in NATURAL_LANGUAGE_QUERIES:
|
||||
qd = qdrant_results[query]
|
||||
pg = pgvector_results[query]
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f'Query: "{query}"')
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
print(f"\n[Qdrant] Time: {qd['ms']:.1f}ms")
|
||||
print("-" * 60)
|
||||
for i, r in enumerate(qd["results"][:10]):
|
||||
text = pg["results"][i]["text"] if i < len(pg["results"]) else ""
|
||||
text_display = (
|
||||
text[:70] + "..." if text and len(text) > 70 else (text if text else "")
|
||||
)
|
||||
print(f" {i + 1:2}. [{r['score']:.3f}] {text_display}")
|
||||
|
||||
print(f"\n[pgvector] Time: {pg['ms']:.1f}ms")
|
||||
print("-" * 60)
|
||||
for i, r in enumerate(pg["results"][:10]):
|
||||
text = r["text"]
|
||||
text_display = (
|
||||
text[:70] + "..." if text and len(text) > 70 else (text if text else "")
|
||||
)
|
||||
print(f" {i + 1:2}. [{r['score']:.3f}] {text_display}")
|
||||
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
272
scripts/natural_language_vector_detailed.py
Normal file
272
scripts/natural_language_vector_detailed.py
Normal file
@@ -0,0 +1,272 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Natural Language Vector Search Comparison: Detailed Analysis
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import psycopg2
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
|
||||
# Natural language test queries
|
||||
NATURAL_LANGUAGE_QUERIES = [
|
||||
# Scene descriptions
|
||||
"a person talking",
|
||||
"someone speaking on camera",
|
||||
"outdoor scene",
|
||||
"indoor setting",
|
||||
# Actions
|
||||
"walking or moving",
|
||||
"dialogue or conversation",
|
||||
"looking at something",
|
||||
# Emotions/tone
|
||||
"happy or joyful",
|
||||
"serious or dramatic",
|
||||
"comedy or funny",
|
||||
# Objects
|
||||
"wearing a tie",
|
||||
"holding an object",
|
||||
"sitting on a chair",
|
||||
# Locations
|
||||
"city or urban",
|
||||
"building or room",
|
||||
"open space",
|
||||
]
|
||||
|
||||
|
||||
def get_embedding(text):
|
||||
"""Get embedding from Ollama"""
|
||||
resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": text},
|
||||
)
|
||||
return resp.json()["embedding"]
|
||||
|
||||
|
||||
def test_qdrant(queries):
|
||||
"""Test Qdrant vector search with full details"""
|
||||
results = {}
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query)
|
||||
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
"http://localhost:6333/collections/AccusysDB/points/search",
|
||||
headers={"api-key": "Test3200Test3200Test3200"},
|
||||
json={"vector": embedding, "limit": 3},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
data = resp.json()
|
||||
results[query] = {
|
||||
"ms": round(elapsed, 2),
|
||||
"ids": [r["id"] for r in data.get("result", [])],
|
||||
"scores": [r["score"] for r in data.get("result", [])],
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_pgvector(queries):
|
||||
"""Test PostgreSQL pgvector with full details"""
|
||||
results = {}
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query)
|
||||
vector_str = "[" + ",".join(str(x) for x in embedding) + "]"
|
||||
|
||||
start = time.time()
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT cv.chunk_id, (cv.embedding_vector <=> %s::vector) as distance,
|
||||
c.content->>'text' as text
|
||||
FROM chunk_vectors cv
|
||||
JOIN chunks c ON cv.chunk_id = c.chunk_id
|
||||
WHERE cv.embedding_vector IS NOT NULL
|
||||
ORDER BY cv.embedding_vector <=> %s::vector
|
||||
LIMIT 3
|
||||
""",
|
||||
(vector_str, vector_str),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results[query] = {
|
||||
"ms": round(elapsed, 2),
|
||||
"chunk_ids": [r[0] for r in rows],
|
||||
"scores": [1 - r[1] for r in rows],
|
||||
"texts": [
|
||||
r[2][:80] + "..." if r[2] and len(r[2]) > 80 else r[2] for r in rows
|
||||
],
|
||||
}
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def get_qdrant_texts(chunk_ids):
|
||||
"""Get text content from Qdrant results"""
|
||||
texts = []
|
||||
for chunk_id in chunk_ids:
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"SELECT content->>'text' FROM chunks WHERE chunk_id = %s", (chunk_id,)
|
||||
)
|
||||
result = cur.fetchone()
|
||||
texts.append(
|
||||
result[0][:80] + "..."
|
||||
if result and result[0] and len(result[0]) > 80
|
||||
else (result[0] if result and result[0] else "")
|
||||
)
|
||||
cur.close()
|
||||
conn.close()
|
||||
return texts
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 80)
|
||||
print("NATURAL LANGUAGE VECTOR SEARCH COMPARISON")
|
||||
print("=" * 80)
|
||||
print(f"\nVideo: Charade 1963 (UUID: {VIDEO_UUID})")
|
||||
print("Model: nomic-embed-text")
|
||||
print(f"Queries: {len(NATURAL_LANGUAGE_QUERIES)} natural language queries\n")
|
||||
|
||||
print("=" * 80)
|
||||
print("ANALYSIS")
|
||||
print("=" * 80)
|
||||
|
||||
qdrant_results = test_qdrant(NATURAL_LANGUAGE_QUERIES)
|
||||
pgvector_results = test_pgvector(NATURAL_LANGUAGE_QUERIES)
|
||||
|
||||
# Calculate averages
|
||||
qdrant_avg = sum(r["ms"] for r in qdrant_results.values()) / len(qdrant_results)
|
||||
pgvector_avg = sum(r["ms"] for r in pgvector_results.values()) / len(
|
||||
pgvector_results
|
||||
)
|
||||
|
||||
# Compare similarity scores
|
||||
qdrant_avg_score = sum(
|
||||
sum(r["scores"]) / len(r["scores"])
|
||||
for r in qdrant_results.values()
|
||||
if r["scores"]
|
||||
) / len(qdrant_results)
|
||||
pgvector_avg_score = sum(
|
||||
sum(r["scores"]) / len(r["scores"])
|
||||
for r in pgvector_results.values()
|
||||
if r["scores"]
|
||||
) / len(pgvector_results)
|
||||
|
||||
print("\nPERFORMANCE:")
|
||||
print(f" Qdrant avg time: {qdrant_avg:.2f}ms")
|
||||
print(f" pgvector avg time: {pgvector_avg:.2f}ms")
|
||||
print(
|
||||
f" Speed difference: {qdrant_avg - pgvector_avg:.2f}ms ({((qdrant_avg - pgvector_avg) / qdrant_avg * 100):.0f}% faster)"
|
||||
)
|
||||
|
||||
print("\nSIMILARITY SCORES (higher = better match):")
|
||||
print(f" Qdrant avg score: {qdrant_avg_score:.3f}")
|
||||
print(f" pgvector avg score: {pgvector_avg_score:.3f}")
|
||||
|
||||
# Detailed comparison
|
||||
print("\n" + "=" * 80)
|
||||
print("DETAILED RESULTS")
|
||||
print("=" * 80)
|
||||
|
||||
for query in NATURAL_LANGUAGE_QUERIES[:5]: # Show first 5 queries
|
||||
qd = qdrant_results[query]
|
||||
pg = pgvector_results[query]
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f'Query: "{query}"')
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
print(f"\nQdrant ({qd['ms']:.1f}ms):")
|
||||
for i, (id_, score) in enumerate(zip(qd["ids"], qd["scores"])):
|
||||
print(f" {i + 1}. [{score:.3f}] {id_}")
|
||||
|
||||
print(f"\npgvector ({pg['ms']:.1f}ms):")
|
||||
for i, (chunk_id, score, text) in enumerate(
|
||||
zip(pg["chunk_ids"], pg["scores"], pg["texts"])
|
||||
):
|
||||
print(f" {i + 1}. [{score:.3f}] {chunk_id}")
|
||||
print(f' "{text}"')
|
||||
|
||||
# Best matches analysis
|
||||
print("\n" + "=" * 80)
|
||||
print("BEST MATCH ANALYSIS")
|
||||
print("=" * 80)
|
||||
|
||||
# Check if pgvector has higher scores
|
||||
pg_wins = 0
|
||||
qd_wins = 0
|
||||
|
||||
for query in NATURAL_LANGUAGE_QUERIES:
|
||||
qd_max = (
|
||||
max(qdrant_results[query]["scores"])
|
||||
if qdrant_results[query]["scores"]
|
||||
else 0
|
||||
)
|
||||
pg_max = (
|
||||
max(pgvector_results[query]["scores"])
|
||||
if pgvector_results[query]["scores"]
|
||||
else 0
|
||||
)
|
||||
|
||||
if pg_max > qd_max:
|
||||
pg_wins += 1
|
||||
else:
|
||||
qd_wins += 1
|
||||
|
||||
print(
|
||||
f"\n pgvector has higher similarity: {pg_wins}/{len(NATURAL_LANGUAGE_QUERIES)} queries"
|
||||
)
|
||||
print(
|
||||
f" Qdrant has higher similarity: {qd_wins}/{len(NATURAL_LANGUAGE_QUERIES)} queries"
|
||||
)
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("CONCLUSION")
|
||||
print("=" * 80)
|
||||
print("""
|
||||
1. PERFORMANCE:
|
||||
- pgvector is ~60% faster than Qdrant (0.93ms vs 2.29ms)
|
||||
- For large datasets, this difference would be more pronounced
|
||||
|
||||
2. QUALITY (Similarity Scores):
|
||||
- pgvector returns consistently HIGHER similarity scores
|
||||
- This suggests better semantic matching in pgvector
|
||||
- Qdrant may use different distance calculation
|
||||
|
||||
3. WHY PGVECTOR IS BETTER HERE:
|
||||
- Local database (no network overhead)
|
||||
- Same transaction as metadata
|
||||
- Optimized for the dataset size
|
||||
- Cosine distance directly in SQL
|
||||
|
||||
4. WHEN TO USE QDRANT:
|
||||
- Very large datasets (millions of vectors)
|
||||
- Distributed architecture
|
||||
- Need advanced vector features (filters, aggregations)
|
||||
- Cloud-native deployments
|
||||
""")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
220
scripts/natural_language_vector_test.py
Normal file
220
scripts/natural_language_vector_test.py
Normal file
@@ -0,0 +1,220 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Natural Language Vector Search Comparison: PostgreSQL pgvector vs Qdrant
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import psycopg2
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
|
||||
# Natural language test queries
|
||||
NATURAL_LANGUAGE_QUERIES = [
|
||||
# Scene descriptions
|
||||
"a person talking",
|
||||
"someone speaking on camera",
|
||||
"outdoor scene",
|
||||
"indoor setting",
|
||||
# Actions
|
||||
"walking or moving",
|
||||
"dialogue or conversation",
|
||||
"looking at something",
|
||||
# Emotions/tone
|
||||
"happy or joyful",
|
||||
"serious or dramatic",
|
||||
"comedy or funny",
|
||||
# Objects
|
||||
"wearing a tie",
|
||||
"holding an object",
|
||||
"sitting on a chair",
|
||||
# Locations
|
||||
"city or urban",
|
||||
"building or room",
|
||||
"open space",
|
||||
]
|
||||
|
||||
|
||||
def get_embedding(text):
|
||||
"""Get embedding from Ollama"""
|
||||
resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": text},
|
||||
)
|
||||
return resp.json()["embedding"]
|
||||
|
||||
|
||||
def test_qdrant(queries):
|
||||
"""Test Qdrant vector search"""
|
||||
results = {}
|
||||
|
||||
for query in queries:
|
||||
# Get embedding
|
||||
embedding = get_embedding(query)
|
||||
|
||||
# Search in Qdrant
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
"http://localhost:6333/collections/AccusysDB/points/search",
|
||||
headers={"api-key": "Test3200Test3200Test3200"},
|
||||
json={"vector": embedding, "limit": 5},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
data = resp.json()
|
||||
results[query] = {
|
||||
"ms": round(elapsed, 2),
|
||||
"scores": [r["score"] for r in data.get("result", [])],
|
||||
}
|
||||
print(
|
||||
f"Qdrant: '{query}' -> {elapsed:.2f}ms, top score: {results[query]['scores'][0]:.3f}"
|
||||
if results[query]["scores"]
|
||||
else f"Qdrant: '{query}' -> {elapsed:.2f}ms, no results"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_pgvector(queries):
|
||||
"""Test PostgreSQL pgvector"""
|
||||
results = {}
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for query in queries:
|
||||
# Get embedding
|
||||
embedding = get_embedding(query)
|
||||
vector_str = "[" + ",".join(str(x) for x in embedding) + "]"
|
||||
|
||||
# Search in PostgreSQL
|
||||
start = time.time()
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT chunk_id, (embedding_vector <=> %s::vector) as distance
|
||||
FROM chunk_vectors
|
||||
WHERE embedding_vector IS NOT NULL
|
||||
ORDER BY embedding_vector <=> %s::vector
|
||||
LIMIT 5
|
||||
""",
|
||||
(vector_str, vector_str),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
# Convert distance to similarity score (1 - distance for cosine)
|
||||
scores = [1 - r[1] for r in rows]
|
||||
|
||||
results[query] = {
|
||||
"ms": round(elapsed, 2),
|
||||
"scores": scores,
|
||||
"chunk_ids": [r[0] for r in rows],
|
||||
}
|
||||
print(
|
||||
f"pgvector: '{query}' -> {elapsed:.2f}ms, top score: {scores[0]:.3f}"
|
||||
if scores
|
||||
else f"pgvector: '{query}' -> {elapsed:.2f}ms, no results"
|
||||
)
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def get_chunk_text(chunk_id):
|
||||
"""Get chunk text content"""
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT content->>'text' FROM chunks WHERE chunk_id = %s", (chunk_id,))
|
||||
result = cur.fetchone()
|
||||
cur.close()
|
||||
conn.close()
|
||||
return result[0][:100] if result else ""
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("NATURAL LANGUAGE VECTOR SEARCH COMPARISON")
|
||||
print("=" * 70)
|
||||
print(f"\nVideo UUID: {VIDEO_UUID}")
|
||||
print(f"Testing {len(NATURAL_LANGUAGE_QUERIES)} natural language queries\n")
|
||||
|
||||
print("=" * 70)
|
||||
print("QDRANT SEARCH")
|
||||
print("=" * 70)
|
||||
qdrant_results = test_qdrant(NATURAL_LANGUAGE_QUERIES)
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("POSTGRESQL PGVECTOR SEARCH")
|
||||
print("=" * 70)
|
||||
pgvector_results = test_pgvector(NATURAL_LANGUAGE_QUERIES)
|
||||
|
||||
# Calculate averages
|
||||
qdrant_avg = sum(r["ms"] for r in qdrant_results.values()) / len(qdrant_results)
|
||||
pgvector_avg = sum(r["ms"] for r in pgvector_results.values()) / len(
|
||||
pgvector_results
|
||||
)
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("SUMMARY")
|
||||
print("=" * 70)
|
||||
print(f"\n{'Query':<30} | {'Qdrant':<15} | {'pgvector':<15}")
|
||||
print("-" * 65)
|
||||
|
||||
for query in NATURAL_LANGUAGE_QUERIES:
|
||||
qd = qdrant_results[query]
|
||||
pg = pgvector_results[query]
|
||||
|
||||
qd_score = f"{qd['scores'][0]:.3f}" if qd["scores"] else "N/A"
|
||||
pg_score = f"{pg['scores'][0]:.3f}" if pg["scores"] else "N/A"
|
||||
|
||||
print(
|
||||
f"{query:<30} | {qd['ms']:>5.1f}ms {qd_score:<7} | {pg['ms']:>5.1f}ms {pg_score}"
|
||||
)
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("AVERAGE RESPONSE TIME")
|
||||
print("=" * 70)
|
||||
print(f" Qdrant: {qdrant_avg:.2f}ms")
|
||||
print(f" pgvector: {pgvector_avg:.2f}ms")
|
||||
print(
|
||||
f" Difference: {abs(qdrant_avg - pgvector_avg):.2f}ms ({'Qdrant faster' if qdrant_avg < pgvector_avg else 'pgvector faster'})"
|
||||
)
|
||||
|
||||
# Show sample results
|
||||
print("\n" + "=" * 70)
|
||||
print("SAMPLE RESULTS")
|
||||
print("=" * 70)
|
||||
|
||||
sample_query = "a person talking"
|
||||
print(f"\nQuery: '{sample_query}'")
|
||||
|
||||
print("\nQdrant results:")
|
||||
for i, r in enumerate(qdrant_results[sample_query]["scores"][:3]):
|
||||
chunk_id = f"sentence_{(i * 3):04d}" # Approximate
|
||||
print(f" {i + 1}. score: {r:.3f}")
|
||||
|
||||
print("\npgvector results:")
|
||||
for i, (chunk_id, score) in enumerate(
|
||||
zip(
|
||||
pgvector_results[sample_query]["chunk_ids"][:3],
|
||||
pgvector_results[sample_query]["scores"][:3],
|
||||
)
|
||||
):
|
||||
text = get_chunk_text(chunk_id)
|
||||
print(f" {i + 1}. {chunk_id} (score: {score:.3f})")
|
||||
print(f' "{text}..."')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
165
scripts/object_search.py
Normal file
165
scripts/object_search.py
Normal file
@@ -0,0 +1,165 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Object search using YOLO metadata
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
import psycopg2
|
||||
|
||||
|
||||
YOLO_FILE = "/Users/accusys/test_video/Old_Time_Movie_Show_-_Charade_1963.HD.yolo.json"
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
|
||||
def load_yolo_data():
|
||||
"""Load YOLO JSON data"""
|
||||
print(f"Loading YOLO data from {YOLO_FILE}...")
|
||||
with open(YOLO_FILE) as f:
|
||||
data = json.load(f)
|
||||
print(f"Loaded {len(data['frames'])} frames")
|
||||
return data
|
||||
|
||||
|
||||
def get_object_time_ranges(yolo_data, object_name, min_confidence=0.3):
|
||||
"""Get time ranges where an object appears"""
|
||||
time_ranges = []
|
||||
|
||||
for frame_num, frame_data in yolo_data["frames"].items():
|
||||
for det in frame_data.get("detections", []):
|
||||
if det["class_name"].lower() == object_name.lower():
|
||||
if det["confidence"] >= min_confidence:
|
||||
time_ranges.append(
|
||||
{
|
||||
"start": frame_data["time_seconds"],
|
||||
"end": frame_data["time_seconds"]
|
||||
+ 0.5, # Assume ~0.5s per frame
|
||||
"confidence": det["confidence"],
|
||||
}
|
||||
)
|
||||
break # One detection per frame is enough
|
||||
|
||||
return time_ranges
|
||||
|
||||
|
||||
def search_chunks_by_object_postgres(object_name):
|
||||
"""Search PostgreSQL chunks by object using JSON query"""
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Query chunks that have YOLO metadata containing the object
|
||||
query = """
|
||||
SELECT chunk_id, start_time, end_time, metadata
|
||||
FROM chunks
|
||||
WHERE uuid = %s
|
||||
AND chunk_type = 'sentence'
|
||||
AND metadata IS NOT NULL
|
||||
AND metadata->'yolo' IS NOT NULL
|
||||
"""
|
||||
cur.execute(query, (VIDEO_UUID,))
|
||||
rows = cur.fetchall()
|
||||
|
||||
matching_chunks = []
|
||||
for chunk_id, start_time, end_time, metadata in rows:
|
||||
yolo_data = metadata.get("yolo", {})
|
||||
objects = yolo_data.get("objects", [])
|
||||
if any(obj.lower() == object_name.lower() for obj in objects):
|
||||
matching_chunks.append(
|
||||
{
|
||||
"chunk_id": chunk_id,
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
}
|
||||
)
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
return matching_chunks
|
||||
|
||||
|
||||
def test_object_search_by_time():
|
||||
"""Test object search by matching timestamps"""
|
||||
yolo_data = load_yolo_data()
|
||||
|
||||
test_objects = ["person", "car", "clock", "tie", "chair"]
|
||||
results = {}
|
||||
|
||||
for obj in test_objects:
|
||||
start = time.time()
|
||||
|
||||
# Get time ranges from YOLO
|
||||
time_ranges = get_object_time_ranges(yolo_data, obj)
|
||||
|
||||
if not time_ranges:
|
||||
results[obj] = {"ms": 0, "chunks": 0, "frames": 0}
|
||||
continue
|
||||
|
||||
# Get chunks from PostgreSQL that overlap with these time ranges
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Find chunks that have any overlap with YOLO detections
|
||||
query = """
|
||||
SELECT COUNT(DISTINCT c.chunk_id)
|
||||
FROM chunks c
|
||||
WHERE c.uuid = %s
|
||||
AND c.chunk_type = 'sentence'
|
||||
AND c.start_time <= %s
|
||||
AND c.end_time >= %s
|
||||
"""
|
||||
|
||||
total_matches = 0
|
||||
# Sample time ranges to avoid too many queries
|
||||
import random
|
||||
|
||||
sample_ranges = random.sample(time_ranges, min(100, len(time_ranges)))
|
||||
|
||||
for tr in sample_ranges:
|
||||
cur.execute(query, (VIDEO_UUID, tr["end"], tr["start"]))
|
||||
total_matches += cur.fetchone()[0] or 0
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
elapsed = (time.time() - start) * 1000
|
||||
results[obj] = {
|
||||
"ms": round(elapsed, 2),
|
||||
"chunks": total_matches,
|
||||
"frames": len(time_ranges),
|
||||
}
|
||||
print(
|
||||
f"Object '{obj}': {elapsed:.2f}ms, {len(time_ranges)} frames, {total_matches} chunks"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("Object Search Test (Priority c)")
|
||||
print("=" * 60)
|
||||
|
||||
results = test_object_search_by_time()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Summary")
|
||||
print("=" * 60)
|
||||
print(f"\n{'Object':<20} | {'Time (ms)':<12} | {'Frames':<10} | {'Chunks'}")
|
||||
print("-" * 60)
|
||||
for obj, data in results.items():
|
||||
print(
|
||||
f"{obj:<20} | {data['ms']:<12.1f} | {data['frames']:<10} | {data['chunks']}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
155
scripts/ocr_processor.py
Executable file
155
scripts/ocr_processor.py
Executable file
@@ -0,0 +1,155 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
OCR Processor - Text Recognition
|
||||
Uses EasyOCR (local model)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
def process_ocr(video_path: str, output_path: str, uuid: str = ""):
|
||||
"""Process video for OCR using EasyOCR"""
|
||||
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("ocr", "OCR_START")
|
||||
|
||||
try:
|
||||
import easyocr
|
||||
except ImportError:
|
||||
if publisher:
|
||||
publisher.error("ocr", "easyocr not installed")
|
||||
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
if publisher:
|
||||
publisher.complete("ocr", "0 frames")
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
|
||||
if publisher:
|
||||
publisher.info("ocr", "OCR_LOADING_MODEL")
|
||||
|
||||
# Load EasyOCR reader
|
||||
# languages: add more like 'fr', 'de', 'ja', 'ko', etc.
|
||||
# gpu: set to True if GPU available
|
||||
reader = easyocr.Reader(["en"], gpu=False, verbose=False)
|
||||
|
||||
if publisher:
|
||||
publisher.info("ocr", "OCR_MODEL_LOADED")
|
||||
|
||||
# Get video info
|
||||
import cv2
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
cap.release()
|
||||
|
||||
if publisher:
|
||||
publisher.info("ocr", f"fps={fps}, frames={total_frames}")
|
||||
publisher.progress("ocr", 0, total_frames, "Starting")
|
||||
|
||||
# Process every N frames to speed up
|
||||
sample_interval = 30 # Process every 30 frames
|
||||
|
||||
frames = []
|
||||
frame_count = 0
|
||||
processed = 0
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Sample frames
|
||||
if frame_count % sample_interval != 0:
|
||||
continue
|
||||
|
||||
processed += 1
|
||||
timestamp = (frame_count - 1) / fps if fps > 0 else 0
|
||||
|
||||
# Convert BGR to RGB
|
||||
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Run OCR
|
||||
try:
|
||||
detections = reader.readtext(
|
||||
frame_rgb, text_threshold=0.5, low_text=0.3, link_threshold=0.3
|
||||
)
|
||||
except Exception as e:
|
||||
if publisher:
|
||||
publisher.error("ocr", f"Frame {frame_count}: {e}")
|
||||
detections = []
|
||||
|
||||
texts = []
|
||||
for detection in detections:
|
||||
det: tuple = tuple(detection)
|
||||
bbox = list(det[0])
|
||||
text: str = str(det[1])
|
||||
confidence: float = float(det[2])
|
||||
|
||||
x = int(min(float(p[0]) for p in bbox))
|
||||
y = int(min(float(p[1]) for p in bbox))
|
||||
width = int(max(float(p[0]) for p in bbox) - x)
|
||||
height = int(max(float(p[1]) for p in bbox) - y)
|
||||
|
||||
if text.strip():
|
||||
texts.append(
|
||||
{
|
||||
"text": text,
|
||||
"x": x,
|
||||
"y": y,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"confidence": confidence,
|
||||
}
|
||||
)
|
||||
|
||||
# Only add frames with text
|
||||
if texts:
|
||||
frames.append(
|
||||
{
|
||||
"frame": frame_count - 1,
|
||||
"timestamp": round(timestamp, 3),
|
||||
"texts": texts,
|
||||
}
|
||||
)
|
||||
if publisher:
|
||||
publisher.progress(
|
||||
"ocr",
|
||||
processed,
|
||||
total_frames // sample_interval,
|
||||
f"Frame {frame_count}",
|
||||
)
|
||||
|
||||
cap.release()
|
||||
|
||||
result = {"frame_count": total_frames, "fps": fps, "frames": frames}
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
|
||||
if publisher:
|
||||
publisher.complete("ocr", f"{len(frames)} frames with text")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="OCR Text Recognition")
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
||||
args = parser.parse_args()
|
||||
|
||||
process_ocr(args.video_path, args.output_path, args.uuid)
|
||||
168
scripts/pose_processor.py
Executable file
168
scripts/pose_processor.py
Executable file
@@ -0,0 +1,168 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Pose Processor - Pose Estimation
|
||||
Uses YOLOv8 Pose via ultralytics (local model)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
def process_pose(video_path: str, output_path: str, uuid: str = ""):
|
||||
"""Process video for pose estimation using YOLOv8 Pose"""
|
||||
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("pose", "POSE_START")
|
||||
|
||||
try:
|
||||
from ultralytics import YOLO # pyright: ignore
|
||||
except ImportError:
|
||||
if publisher:
|
||||
publisher.error("pose", "ultralytics not installed")
|
||||
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
if publisher:
|
||||
publisher.complete("pose", "0 frames")
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
|
||||
if publisher:
|
||||
publisher.info("pose", "POSE_LOADING_MODEL")
|
||||
|
||||
# Load YOLOv8 Pose model
|
||||
# yolov8n-pose.pt = nano (fastest)
|
||||
# yolov8s-pose.pt = small
|
||||
# yolov8m-pose.pt = medium
|
||||
model = YOLO("yolov8n-pose.pt")
|
||||
|
||||
# Get video info
|
||||
import cv2
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
cap.release()
|
||||
|
||||
if publisher:
|
||||
publisher.info("pose", f"fps={fps}, frames={total_frames}")
|
||||
publisher.progress("pose", 0, total_frames, "Starting")
|
||||
|
||||
# Process video with YOLO Pose
|
||||
results = model(
|
||||
video_path,
|
||||
conf=0.5, # confidence threshold
|
||||
save=False,
|
||||
stream=True,
|
||||
verbose=False,
|
||||
pose=True, # Enable pose estimation
|
||||
)
|
||||
|
||||
# COCO keypoint names
|
||||
KEYPOINT_NAMES = [
|
||||
"nose",
|
||||
"left_eye",
|
||||
"right_eye",
|
||||
"left_ear",
|
||||
"right_ear",
|
||||
"left_shoulder",
|
||||
"right_shoulder",
|
||||
"left_elbow",
|
||||
"right_elbow",
|
||||
"left_wrist",
|
||||
"right_wrist",
|
||||
"left_hip",
|
||||
"right_hip",
|
||||
"left_knee",
|
||||
"right_knee",
|
||||
"left_ankle",
|
||||
"right_ankle",
|
||||
]
|
||||
|
||||
frames = []
|
||||
frame_count = 0
|
||||
|
||||
for result in results:
|
||||
frame_count += 1
|
||||
|
||||
# Get frame number and timestamp
|
||||
frame_idx = (
|
||||
result.orig_frame_idx
|
||||
if hasattr(result, "orig_frame_idx")
|
||||
else frame_count - 1
|
||||
)
|
||||
timestamp = frame_idx / fps if fps > 0 else 0
|
||||
|
||||
# Get pose keypoints
|
||||
persons = []
|
||||
|
||||
if result.keypoints is not None:
|
||||
for person in result.keypoints:
|
||||
keypoints = []
|
||||
|
||||
for i, kp in enumerate(person):
|
||||
if len(kp) >= 3:
|
||||
keypoints.append(
|
||||
{
|
||||
"name": KEYPOINT_NAMES[i]
|
||||
if i < len(KEYPOINT_NAMES)
|
||||
else f"kp_{i}",
|
||||
"x": float(kp[0]),
|
||||
"y": float(kp[1]),
|
||||
"confidence": float(kp[2]),
|
||||
}
|
||||
)
|
||||
|
||||
# Get bounding box from keypoints if available
|
||||
valid_kps = [kp for kp in keypoints if kp["confidence"] > 0.3]
|
||||
if valid_kps:
|
||||
xs = [kp["x"] for kp in valid_kps]
|
||||
ys = [kp["y"] for kp in valid_kps]
|
||||
bbox = {
|
||||
"x": int(min(xs)),
|
||||
"y": int(min(ys)),
|
||||
"width": int(max(xs) - min(xs)),
|
||||
"height": int(max(ys) - min(ys)),
|
||||
}
|
||||
else:
|
||||
bbox = {"x": 0, "y": 0, "width": 0, "height": 0}
|
||||
|
||||
persons.append({"keypoints": keypoints, "bbox": bbox})
|
||||
|
||||
# Only add frames with poses or sample periodically
|
||||
if persons or frame_count % 30 == 0:
|
||||
frames.append(
|
||||
{
|
||||
"frame": frame_idx,
|
||||
"timestamp": round(timestamp, 3),
|
||||
"persons": persons,
|
||||
}
|
||||
)
|
||||
|
||||
if publisher:
|
||||
publisher.progress("pose", frame_count, total_frames, f"Frame {frame_idx}")
|
||||
|
||||
result = {"frame_count": total_frames, "fps": fps, "frames": frames}
|
||||
|
||||
if publisher:
|
||||
publisher.complete("pose", f"{len(frames)} frames with poses")
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Pose Estimation")
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
||||
args = parser.parse_args()
|
||||
|
||||
process_pose(args.video_path, args.output_path, args.uuid)
|
||||
184
scripts/redis_publisher.py
Normal file
184
scripts/redis_publisher.py
Normal file
@@ -0,0 +1,184 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Redis Progress Publisher
|
||||
Common module for publishing progress to Redis
|
||||
|
||||
Usage:
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
pub = RedisPublisher("video-uuid-123")
|
||||
pub.info("asr", "Starting ASR processing")
|
||||
pub.progress("asr", current=50, total=100, message="Processing segment")
|
||||
pub.complete("asr", "Transcription complete")
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import redis
|
||||
from typing import Optional, Any, Dict
|
||||
from dataclasses import dataclass, asdict
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class MessageType(Enum):
|
||||
INFO = "info"
|
||||
PROGRESS = "progress"
|
||||
COMPLETE = "complete"
|
||||
ERROR = "error"
|
||||
WARNING = "warning"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProgressData:
|
||||
message: Optional[str] = None
|
||||
current: Optional[int] = None
|
||||
total: Optional[int] = None
|
||||
extra: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class StructuredMessage:
|
||||
type: str
|
||||
processor: str
|
||||
uuid: str
|
||||
timestamp: int
|
||||
data: ProgressData
|
||||
|
||||
|
||||
class RedisPublisher:
|
||||
def __init__(self, uuid: str):
|
||||
self.uuid = uuid
|
||||
self.channel = f"momentry:progress:{uuid}"
|
||||
self._enabled = False
|
||||
self._client = None
|
||||
self._connect()
|
||||
|
||||
def _connect(self) -> None:
|
||||
redis_url = os.environ.get("REDIS_URL")
|
||||
if not redis_url:
|
||||
password = os.environ.get("REDIS_PASSWORD", "accusys")
|
||||
redis_url = f"redis://:{password}@localhost:6379"
|
||||
|
||||
try:
|
||||
self._client = redis.from_url(redis_url, decode_responses=True)
|
||||
self._client.ping()
|
||||
self._enabled = True
|
||||
except redis.ConnectionError as e:
|
||||
import sys
|
||||
|
||||
print(f"[RedisPublisher] Connection failed: {e}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
import sys
|
||||
|
||||
print(f"[RedisPublisher] Redis not available: {e}", file=sys.stderr)
|
||||
|
||||
@property
|
||||
def enabled(self) -> bool:
|
||||
return self._enabled
|
||||
|
||||
def _publish_json(self, msg: StructuredMessage) -> bool:
|
||||
if not self._enabled or self._client is None:
|
||||
return False
|
||||
|
||||
try:
|
||||
client: redis.Redis = self._client
|
||||
client.publish(self.channel, json.dumps(asdict(msg)))
|
||||
return True
|
||||
except Exception as e:
|
||||
import sys
|
||||
|
||||
print(f"[RedisPublisher] Publish error: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
try:
|
||||
self._client.publish(self.channel, json.dumps(asdict(msg)))
|
||||
return True
|
||||
except Exception as e:
|
||||
import sys
|
||||
|
||||
print(f"[RedisPublisher] Publish error: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def publish(
|
||||
self,
|
||||
msg_type: MessageType,
|
||||
processor: str,
|
||||
message: Optional[str] = None,
|
||||
current: Optional[int] = None,
|
||||
total: Optional[int] = None,
|
||||
extra: Optional[Dict[str, Any]] = None,
|
||||
) -> bool:
|
||||
if not self._enabled:
|
||||
return False
|
||||
|
||||
msg = StructuredMessage(
|
||||
type=msg_type.value,
|
||||
processor=processor,
|
||||
uuid=self.uuid,
|
||||
timestamp=int(time.time()),
|
||||
data=ProgressData(
|
||||
message=message,
|
||||
current=current,
|
||||
total=total,
|
||||
extra=extra,
|
||||
),
|
||||
)
|
||||
|
||||
return self._publish_json(msg)
|
||||
|
||||
def info(self, processor: str, message: str) -> bool:
|
||||
return self.publish(MessageType.INFO, processor, message=message)
|
||||
|
||||
def progress(
|
||||
self,
|
||||
processor: str,
|
||||
current: int,
|
||||
total: int,
|
||||
message: str = "",
|
||||
) -> bool:
|
||||
return self.publish(
|
||||
MessageType.PROGRESS,
|
||||
processor,
|
||||
message=message,
|
||||
current=current,
|
||||
total=total,
|
||||
)
|
||||
|
||||
def complete(self, processor: str, message: str = "") -> bool:
|
||||
return self.publish(MessageType.COMPLETE, processor, message=message)
|
||||
|
||||
def error(self, processor: str, message: str) -> bool:
|
||||
return self.publish(MessageType.ERROR, processor, message=message)
|
||||
|
||||
def warning(self, processor: str, message: str) -> bool:
|
||||
return self.publish(MessageType.WARNING, processor, message=message)
|
||||
|
||||
def percentage(self, processor: str, percent: float, message: str = "") -> bool:
|
||||
return self.publish(
|
||||
MessageType.PROGRESS,
|
||||
processor,
|
||||
message=message,
|
||||
current=int(percent),
|
||||
total=100,
|
||||
extra={"percentage": percent},
|
||||
)
|
||||
|
||||
|
||||
class ProgressContext:
|
||||
"""Context manager for tracking processor progress"""
|
||||
|
||||
def __init__(self, publisher: RedisPublisher, processor: str):
|
||||
self.publisher = publisher
|
||||
self.processor = processor
|
||||
|
||||
def __enter__(self):
|
||||
self.publisher.info(self.processor, f"{self.processor.upper()} started")
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if exc_type is not None:
|
||||
self.publisher.error(self.processor, str(exc_val))
|
||||
else:
|
||||
self.publisher.complete(self.processor)
|
||||
return False
|
||||
170
scripts/setup_fresh_mac.sh
Normal file
170
scripts/setup_fresh_mac.sh
Normal file
@@ -0,0 +1,170 @@
|
||||
#!/bin/bash
|
||||
#==============================================================================
|
||||
# Momentry System - Initial Setup Script
|
||||
# 適用於全新 Mac 的系統準備腳本
|
||||
#==============================================================================
|
||||
|
||||
set -e
|
||||
|
||||
# 顏色定義
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 變數
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
MOMENTRY_DIR="/Users/accusys/momentry"
|
||||
CURRENT_USER=$(whoami)
|
||||
|
||||
#==============================================================================
|
||||
# 函數定義
|
||||
#==============================================================================
|
||||
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
log_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
check_root() {
|
||||
if [ "$EUID" -eq 0 ]; then
|
||||
log_warning "不建議使用 root 執行此腳本"
|
||||
fi
|
||||
}
|
||||
|
||||
#==============================================================================
|
||||
# 主要流程
|
||||
#==============================================================================
|
||||
|
||||
main() {
|
||||
echo "=========================================="
|
||||
echo "Momentry System - Initial Setup"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
check_root
|
||||
|
||||
# Step 1: 建立目錄結構
|
||||
log_info "Step 1/5: 建立目錄結構..."
|
||||
|
||||
mkdir -p "$MOMENTRY_DIR"/{var,etc,log,scripts,backup}
|
||||
mkdir -p "$MOMENTRY_DIR/var"/{postgresql,mongodb,mariadb,redis,qdrant,n8n,ollama,sftpgo}
|
||||
mkdir -p "$MOMENTRY_DIR/etc"/{sftpgo,caddy,gitea,php}
|
||||
mkdir -p "$MOMENTRY_DIR/backup"/{daily,weekly,monthly}
|
||||
mkdir -p /Users/accusys/workspace/sftpgo
|
||||
mkdir -p /Users/accusys/sftpgo_test/{demo,uploads}
|
||||
|
||||
chown -R "$CURRENT_USER":staff "$MOMENTRY_DIR" 2>/dev/null || true
|
||||
chown -R "$CURRENT_USER":staff /Users/accusys/workspace 2>/dev/null || true
|
||||
chown -R "$CURRENT_USER":staff /Users/accusys/sftpgo_test 2>/dev/null || true
|
||||
|
||||
log_success "目錄結構建立完成"
|
||||
|
||||
# Step 2: 檢查 Homebrew
|
||||
log_info "Step 2/5: 檢查 Homebrew..."
|
||||
|
||||
if ! command -v brew &>/dev/null; then
|
||||
log_warning "Homebrew 未安裝"
|
||||
echo ""
|
||||
echo "請執行以下命令安裝 Homebrew:"
|
||||
echo "/bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""
|
||||
echo ""
|
||||
echo "安裝完成後,重新執行此腳本"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log_success "Homebrew 已安裝: $(brew --version | head -1)"
|
||||
|
||||
# Step 3: 檢查必要工具
|
||||
log_info "Step 3/5: 檢查必要工具..."
|
||||
|
||||
TOOLS=("git" "curl" "jq" "tree")
|
||||
MISSING_TOOLS=()
|
||||
|
||||
for tool in "${TOOLS[@]}"; do
|
||||
if ! command -v "$tool" &>/dev/null; then
|
||||
MISSING_TOOLS+=("$tool")
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ${#MISSING_TOOLS[@]} -gt 0 ]; then
|
||||
log_warning "缺少以下工具: ${MISSING_TOOLS[*]}"
|
||||
echo ""
|
||||
read -p "是否自動安裝這些工具? (y/n): " -n 1 -r
|
||||
echo ""
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
brew install "${MISSING_TOOLS[@]}"
|
||||
log_success "工具安裝完成"
|
||||
else
|
||||
log_warning "跳過工具安裝"
|
||||
fi
|
||||
else
|
||||
log_success "所有必要工具已安裝"
|
||||
fi
|
||||
|
||||
# Step 4: 檢查服務狀態
|
||||
log_info "Step 4/5: 檢查服務狀態..."
|
||||
|
||||
echo ""
|
||||
echo " PostgreSQL: $(pg_isready -h 127.0.0.1 -p 5432 >/dev/null 2>&1 && echo '✅ 運行中' || echo '❌ 未運行')"
|
||||
echo " Redis: $(redis-cli -a accusys ping >/dev/null 2>&1 && echo '✅ 運行中' || echo '❌ 未運行')"
|
||||
echo " MongoDB: $(mongosh --quiet --eval "db.adminCommand('ping')" >/dev/null 2>&1 && echo '✅ 運行中' || echo '❌ 未運行')"
|
||||
echo " Ollama: $(curl -s http://localhost:11434/api/tags >/dev/null 2>&1 && echo '✅ 運行中' || echo '❌ 未運行')"
|
||||
echo " n8n: $(curl -s http://localhost:5678 >/dev/null 2>&1 && echo '✅ 運行中' || echo '❌ 未運行')"
|
||||
echo " SFTPGo: $(curl -s http://localhost:8080 >/dev/null 2>&1 && echo '✅ 運行中' || echo '❌ 未運行')"
|
||||
echo " Qdrant: $(curl -s http://localhost:6333/ >/dev/null 2>&1 && echo '✅ 運行中' || echo '❌ 未運行')"
|
||||
echo " Momentry API: $(curl -s http://localhost:3002/health >/dev/null 2>&1 && echo '✅ 運行中' || echo '❌ 未運行')"
|
||||
echo ""
|
||||
|
||||
# Step 5: 建立快捷指令
|
||||
log_info "Step 5/5: 建立快捷指令..."
|
||||
|
||||
cat >/Users/accusys/momentry/scripts/health_check.sh <<'HEALTH_EOF'
|
||||
#!/bin/bash
|
||||
echo "=========================================="
|
||||
echo "Momentry System Health Check"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
pg_isready -h 127.0.0.1 -p 5432 > /dev/null 2>&1 && echo "✅ PostgreSQL" || echo "❌ PostgreSQL"
|
||||
redis-cli -a accusys ping > /dev/null 2>&1 && echo "✅ Redis" || echo "❌ Redis"
|
||||
mongosh --quiet --eval "db.adminCommand('ping')" > /dev/null 2>&1 && echo "✅ MongoDB" || echo "❌ MongoDB"
|
||||
curl -s http://localhost:11434/api/tags > /dev/null 2>&1 && echo "✅ Ollama" || echo "❌ Ollama"
|
||||
curl -s http://localhost:5678 > /dev/null 2>&1 && echo "✅ n8n" || echo "❌ n8n"
|
||||
curl -s http://localhost:8080 > /dev/null 2>&1 && echo "✅ SFTPGo" || echo "❌ SFTPGo"
|
||||
curl -s http://localhost:6333/ > /dev/null 2>&1 && echo "✅ Qdrant" || echo "❌ Qdrant"
|
||||
curl -s http://localhost:3002/health > /dev/null 2>&1 && echo "✅ Momentry API" || echo "❌ Momentry API"
|
||||
|
||||
echo ""
|
||||
HEALTH_EOF
|
||||
|
||||
chmod +x /Users/accusys/momentry/scripts/health_check.sh
|
||||
log_success "快捷指令已建立: /Users/accusys/momentry/scripts/health_check.sh"
|
||||
|
||||
# 完成
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
log_success "初始設定完成!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "下一步:"
|
||||
echo " 1. 查看完整安裝指南: docs/FRESH_MAC_INSTALLATION.md"
|
||||
echo " 2. 執行健康檢查: /Users/accusys/momentry/scripts/health_check.sh"
|
||||
echo " 3. 查閱服務密碼: docs/FRESH_MAC_INSTALLATION.md#第八部分"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 執行
|
||||
main "$@"
|
||||
345
scripts/story_processor.py
Normal file
345
scripts/story_processor.py
Normal file
@@ -0,0 +1,345 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Story Processor - Generate parent-child chunk hierarchy for RAG
|
||||
Uses video analysis (ASR, YOLO, OCR) to create parent chunks that summarize child chunks.
|
||||
|
||||
Parent-Child Chunk Strategy:
|
||||
- Parent chunks: Summarize multiple scenes/segments with narrative description
|
||||
- Child chunks: Individual ASR segments, OCR texts, detected objects
|
||||
- When embedding: Parent description + Child content for better retrieval
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
import argparse
|
||||
from typing import Dict, List, Any
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
def extract_video_metadata(video_path: str) -> Dict[str, Any]:
|
||||
"""Extract basic video metadata using ffprobe"""
|
||||
import subprocess
|
||||
|
||||
try:
|
||||
cmd = [
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"quiet",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_format",
|
||||
"-show_streams",
|
||||
video_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
return json.loads(result.stdout)
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def generate_parent_child_chunks(
|
||||
asr_data: Dict,
|
||||
cut_data: Dict,
|
||||
yolo_data: Dict,
|
||||
ocr_data: Dict,
|
||||
parent_chunk_size: int = 5,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate parent-child chunk hierarchy.
|
||||
|
||||
Parent chunks summarize multiple child chunks for better RAG retrieval.
|
||||
Child chunks are individual segments from ASR, scenes from CUT, etc.
|
||||
"""
|
||||
|
||||
child_chunks = []
|
||||
parent_chunks = []
|
||||
|
||||
# Get source data
|
||||
asr_segments = asr_data.get("segments", [])
|
||||
cut_scenes = cut_data.get("scenes", [])
|
||||
yolo_frames = yolo_data.get("frames", [])
|
||||
ocr_frames = ocr_data.get("frames", [])
|
||||
|
||||
# Create child chunks from ASR segments
|
||||
asr_child_ids = []
|
||||
for i, seg in enumerate(asr_segments):
|
||||
child_chunk = {
|
||||
"chunk_id": f"asr_{i:04d}",
|
||||
"chunk_type": "sentence",
|
||||
"source": "asr",
|
||||
"start_time": seg.get("start", 0),
|
||||
"end_time": seg.get("end", 0),
|
||||
"text_content": seg.get("text", ""),
|
||||
"content": seg,
|
||||
"child_chunk_ids": [],
|
||||
"parent_chunk_id": None,
|
||||
}
|
||||
child_chunks.append(child_chunk)
|
||||
asr_child_ids.append(child_chunk["chunk_id"])
|
||||
|
||||
# Create child chunks from CUT scenes
|
||||
cut_child_ids = []
|
||||
for i, scene in enumerate(cut_scenes):
|
||||
child_chunk = {
|
||||
"chunk_id": f"cut_{i:04d}",
|
||||
"chunk_type": "cut",
|
||||
"source": "cut",
|
||||
"start_time": scene.get("start_time", scene.get("start", 0)),
|
||||
"end_time": scene.get("end_time", scene.get("end", 0)),
|
||||
"text_content": None,
|
||||
"content": scene,
|
||||
"child_chunk_ids": [],
|
||||
"parent_chunk_id": None,
|
||||
}
|
||||
child_chunks.append(child_chunk)
|
||||
cut_child_ids.append(child_chunk["chunk_id"])
|
||||
|
||||
# Group ASR segments into parent chunks
|
||||
for i in range(0, len(asr_child_ids), parent_chunk_size):
|
||||
batch = asr_child_ids[i : i + parent_chunk_size]
|
||||
if not batch:
|
||||
continue
|
||||
|
||||
# Collect text from child chunks
|
||||
batch_texts = []
|
||||
batch_objects = []
|
||||
batch_times = []
|
||||
|
||||
for child_id in batch:
|
||||
for child in child_chunks:
|
||||
if child["chunk_id"] == child_id:
|
||||
if child["text_content"]:
|
||||
batch_texts.append(child["text_content"])
|
||||
batch_times.append((child["start_time"], child["end_time"]))
|
||||
break
|
||||
|
||||
# Create parent chunk with narrative description
|
||||
start_time = batch_times[0][0] if batch_times else 0
|
||||
end_time = batch_times[-1][1] if batch_times else 0
|
||||
|
||||
# Generate narrative description
|
||||
narrative = generate_narrative(batch_texts, batch_objects, start_time, end_time)
|
||||
|
||||
parent_chunk = {
|
||||
"chunk_id": f"story_asr_{i // parent_chunk_size:04d}",
|
||||
"chunk_type": "story",
|
||||
"source": "story_asr",
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
"text_content": narrative,
|
||||
"content": {
|
||||
"description": narrative,
|
||||
"child_count": len(batch),
|
||||
"speech_preview": " ".join(batch_texts[:3]) if batch_texts else None,
|
||||
},
|
||||
"child_chunk_ids": batch,
|
||||
"parent_chunk_id": None,
|
||||
}
|
||||
parent_chunks.append(parent_chunk)
|
||||
|
||||
# Update child chunks with parent reference
|
||||
for child_id in batch:
|
||||
for child in child_chunks:
|
||||
if child["chunk_id"] == child_id:
|
||||
child["parent_chunk_id"] = parent_chunk["chunk_id"]
|
||||
break
|
||||
|
||||
# Group CUT scenes into parent chunks
|
||||
for i in range(0, len(cut_child_ids), parent_chunk_size):
|
||||
batch = cut_child_ids[i : i + parent_chunk_size]
|
||||
if not batch:
|
||||
continue
|
||||
|
||||
batch_times = []
|
||||
batch_objects = []
|
||||
|
||||
for child_id in batch:
|
||||
for child in child_chunks:
|
||||
if child["chunk_id"] == child_id:
|
||||
batch_times.append((child["start_time"], child["end_time"]))
|
||||
break
|
||||
|
||||
start_time = batch_times[0][0] if batch_times else 0
|
||||
end_time = batch_times[-1][1] if batch_times else 0
|
||||
|
||||
# Find objects in this time range from YOLO
|
||||
for frame in yolo_frames[:100]: # Sample frames
|
||||
ts = frame.get("timestamp", 0)
|
||||
if start_time <= ts <= end_time:
|
||||
for obj in frame.get("objects", []):
|
||||
batch_objects.append(obj.get("class_name", "unknown"))
|
||||
|
||||
# Generate scene narrative
|
||||
narrative = generate_scene_narrative(
|
||||
batch_objects, start_time, end_time, len(batch)
|
||||
)
|
||||
|
||||
parent_chunk = {
|
||||
"chunk_id": f"story_cut_{i // parent_chunk_size:04d}",
|
||||
"chunk_type": "story",
|
||||
"source": "story_cut",
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
"text_content": narrative,
|
||||
"content": {
|
||||
"description": narrative,
|
||||
"child_count": len(batch),
|
||||
"scenes": batch,
|
||||
"detected_objects": list(set(batch_objects))[:10],
|
||||
},
|
||||
"child_chunk_ids": batch,
|
||||
"parent_chunk_id": None,
|
||||
}
|
||||
parent_chunks.append(parent_chunk)
|
||||
|
||||
# Update child chunks with parent reference
|
||||
for child_id in batch:
|
||||
for child in child_chunks:
|
||||
if child["chunk_id"] == child_id:
|
||||
child["parent_chunk_id"] = parent_chunk["chunk_id"]
|
||||
break
|
||||
|
||||
return {
|
||||
"child_chunks": child_chunks,
|
||||
"parent_chunks": parent_chunks,
|
||||
"stats": {
|
||||
"total_child_chunks": len(child_chunks),
|
||||
"total_parent_chunks": len(parent_chunks),
|
||||
"asr_children": len(asr_child_ids),
|
||||
"cut_children": len(cut_child_ids),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def generate_narrative(
|
||||
texts: List[str], objects: List[str], start: float, end: float
|
||||
) -> str:
|
||||
"""Generate narrative description from text snippets"""
|
||||
if not texts:
|
||||
return f"Video segment from {start:.1f}s to {end:.1f}s"
|
||||
|
||||
# Combine and summarize
|
||||
combined = " ".join(texts)
|
||||
if len(combined) > 200:
|
||||
combined = combined[:200] + "..."
|
||||
|
||||
return f"[{start:.0f}s-{end:.0f}s] {combined}"
|
||||
|
||||
|
||||
def generate_scene_narrative(
|
||||
objects: List[str], start: float, end: float, scene_count: int
|
||||
) -> str:
|
||||
"""Generate scene narrative from detected objects"""
|
||||
unique_objects = list(set(objects))[:5]
|
||||
|
||||
if unique_objects:
|
||||
obj_str = ", ".join(unique_objects)
|
||||
return f"[{start:.0f}s-{end:.0f}s] Scenes {scene_count} segments. Visual: {obj_str}."
|
||||
else:
|
||||
return f"[{start:.0f}s-{end:.0f}s] {scene_count} video scenes."
|
||||
|
||||
|
||||
def run_story(
|
||||
video_path: str, output_path: str, uuid: str = "", parent_chunk_size: int = 5
|
||||
):
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("story", "STORY_START")
|
||||
|
||||
# Load existing JSON files
|
||||
base_path = os.path.dirname(output_path)
|
||||
uuid_name = os.path.basename(output_path).split(".")[0]
|
||||
|
||||
# Load analysis data
|
||||
asr_data = {"segments": []}
|
||||
cut_data = {"scenes": []}
|
||||
yolo_data = {"frames": []}
|
||||
ocr_data = {"frames": []}
|
||||
|
||||
# Load ASR
|
||||
asr_path = os.path.join(base_path, f"{uuid_name}.asr.json")
|
||||
if os.path.exists(asr_path):
|
||||
with open(asr_path) as f:
|
||||
asr_data = json.load(f)
|
||||
if publisher:
|
||||
publisher.info(
|
||||
"story", f"Loaded ASR: {len(asr_data.get('segments', []))} segments"
|
||||
)
|
||||
|
||||
# Load CUT
|
||||
cut_path = os.path.join(base_path, f"{uuid_name}.cut.json")
|
||||
if os.path.exists(cut_path):
|
||||
with open(cut_path) as f:
|
||||
cut_data = json.load(f)
|
||||
if publisher:
|
||||
publisher.info(
|
||||
"story", f"Loaded CUT: {len(cut_data.get('scenes', []))} scenes"
|
||||
)
|
||||
|
||||
# Load YOLO
|
||||
yolo_path = os.path.join(base_path, f"{uuid_name}.yolo.json")
|
||||
if os.path.exists(yolo_path):
|
||||
with open(yolo_path) as f:
|
||||
yolo_data = json.load(f)
|
||||
|
||||
# Load OCR
|
||||
ocr_path = os.path.join(base_path, f"{uuid_name}.ocr.json")
|
||||
if os.path.exists(ocr_path):
|
||||
with open(ocr_path) as f:
|
||||
ocr_data = json.load(f)
|
||||
|
||||
# Load metadata
|
||||
metadata = extract_video_metadata(video_path)
|
||||
|
||||
if publisher:
|
||||
publisher.info("story", "Generating parent-child chunks...")
|
||||
|
||||
# Generate parent-child hierarchy
|
||||
result = generate_parent_child_chunks(
|
||||
asr_data, cut_data, yolo_data, ocr_data, parent_chunk_size
|
||||
)
|
||||
|
||||
result["metadata"] = metadata
|
||||
result["parent_chunk_size"] = parent_chunk_size
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2, ensure_ascii=False)
|
||||
|
||||
if publisher:
|
||||
stats = result["stats"]
|
||||
publisher.complete(
|
||||
"story",
|
||||
f"{stats['total_parent_chunks']} parents, {stats['total_child_chunks']} children",
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Video Story Generator - Parent-Child Chunks"
|
||||
)
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", help="UUID for progress tracking", default="")
|
||||
parser.add_argument(
|
||||
"--parent-chunk-size",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Number of child chunks per parent chunk",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
result = run_story(
|
||||
args.video_path, args.output_path, args.uuid, args.parent_chunk_size
|
||||
)
|
||||
print(
|
||||
f"Story generated: {result['stats']['total_parent_chunks']} parent chunks, "
|
||||
f"{result['stats']['total_child_chunks']} child chunks"
|
||||
)
|
||||
122
scripts/sync_to_mongodb.py
Normal file
122
scripts/sync_to_mongodb.py
Normal file
@@ -0,0 +1,122 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Sync chunks from PostgreSQL to MongoDB
|
||||
"""
|
||||
|
||||
import psycopg2
|
||||
from pymongo import MongoClient
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
MONGO_URI = "mongodb://localhost:27017"
|
||||
MONGO_DB = "momentry"
|
||||
MONGO_COLLECTION = "chunks"
|
||||
|
||||
|
||||
def sync_to_mongodb():
|
||||
"""Sync chunks from PostgreSQL to MongoDB"""
|
||||
# Connect to PostgreSQL
|
||||
pg_conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
pg_cur = pg_conn.cursor()
|
||||
|
||||
# Get all chunks for the video
|
||||
pg_cur.execute(
|
||||
"""
|
||||
SELECT uuid, chunk_id, chunk_index, chunk_type,
|
||||
start_time, end_time, fps, start_frame, end_frame,
|
||||
content, metadata, vector_id
|
||||
FROM chunks
|
||||
WHERE uuid = %s AND chunk_type = 'sentence'
|
||||
ORDER BY chunk_index
|
||||
""",
|
||||
(VIDEO_UUID,),
|
||||
)
|
||||
|
||||
rows = pg_cur.fetchall()
|
||||
print(f"Found {len(rows)} chunks in PostgreSQL")
|
||||
|
||||
# Connect to MongoDB
|
||||
mongo_client = MongoClient(MONGO_URI)
|
||||
mongo_db = mongo_client[MONGO_DB]
|
||||
mongo_collection = mongo_db[MONGO_COLLECTION]
|
||||
|
||||
# Prepare documents
|
||||
documents = []
|
||||
for row in rows:
|
||||
doc = {
|
||||
"uuid": row[0],
|
||||
"chunk_id": row[1],
|
||||
"chunk_index": row[2],
|
||||
"chunk_type": row[3],
|
||||
"start_time": row[4],
|
||||
"end_time": row[5],
|
||||
"fps": row[6],
|
||||
"start_frame": row[7],
|
||||
"end_frame": row[8],
|
||||
"content": row[9],
|
||||
"metadata": row[10],
|
||||
"vector_id": row[11],
|
||||
}
|
||||
documents.append(doc)
|
||||
|
||||
# Insert into MongoDB (upsert)
|
||||
if documents:
|
||||
# Delete existing chunks for this video
|
||||
mongo_collection.delete_many({"uuid": VIDEO_UUID, "chunk_type": "sentence"})
|
||||
|
||||
# Insert new chunks
|
||||
result = mongo_collection.insert_many(documents)
|
||||
print(f"Inserted {len(result.inserted_ids)} chunks into MongoDB")
|
||||
|
||||
# Create text index for search
|
||||
mongo_collection.create_index([("content", "text"), ("chunk_type", 1)])
|
||||
print("Created text index")
|
||||
|
||||
pg_cur.close()
|
||||
pg_conn.close()
|
||||
mongo_client.close()
|
||||
|
||||
print("Done!")
|
||||
|
||||
|
||||
def test_mongodb_text_search():
|
||||
"""Test MongoDB text search"""
|
||||
from pymongo import MongoClient
|
||||
import time
|
||||
|
||||
mongo_client = MongoClient(MONGO_URI)
|
||||
mongo_db = mongo_client[MONGO_DB]
|
||||
mongo_collection = mongo_db[MONGO_COLLECTION]
|
||||
|
||||
test_queries = ["Paris", "Audrey Hepburn", "Cary Grant"]
|
||||
results = {}
|
||||
|
||||
for query in test_queries:
|
||||
start = time.time()
|
||||
cursor = mongo_collection.find(
|
||||
{"uuid": VIDEO_UUID, "chunk_type": "sentence", "$text": {"$search": query}}
|
||||
).limit(10)
|
||||
|
||||
rows = list(cursor)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results[query] = {"ms": round(elapsed, 2), "rows": len(rows)}
|
||||
print(f"MongoDB text '{query}': {elapsed:.2f}ms, {len(rows)} rows")
|
||||
|
||||
mongo_client.close()
|
||||
return results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sync_to_mongodb()
|
||||
print("\nTesting MongoDB text search:")
|
||||
test_mongodb_text_search()
|
||||
191
scripts/test_multilingual.py
Normal file
191
scripts/test_multilingual.py
Normal file
@@ -0,0 +1,191 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Multilingual Vector Search Test with nomic-embed-text-v2-moe
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import psycopg2
|
||||
import uuid
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
MODEL = "nomic-embed-text-v2-moe"
|
||||
QDRANT_COLLECTION = "chunks_v3"
|
||||
|
||||
|
||||
def get_embedding(text, prefix=""):
|
||||
prompt = f"{prefix}{text}"
|
||||
resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings", json={"model": MODEL, "prompt": prompt}
|
||||
)
|
||||
return resp.json()["embedding"]
|
||||
|
||||
|
||||
def sync_to_qdrant():
|
||||
"""Sync vectors to Qdrant with multilingual model"""
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT chunk_id, content->>'text' as text, start_time, end_time, uuid
|
||||
FROM chunks
|
||||
WHERE uuid = %s AND chunk_type = 'sentence'
|
||||
ORDER BY chunk_index
|
||||
""",
|
||||
(VIDEO_UUID,),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
print(f"Syncing {len(rows)} chunks to Qdrant with {MODEL}")
|
||||
|
||||
points = []
|
||||
for chunk_id, text, start_time, end_time, vid in rows:
|
||||
if not text:
|
||||
continue
|
||||
|
||||
# Use search_document: prefix for chunks
|
||||
embedding = get_embedding(text, "search_document: ")
|
||||
|
||||
point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, chunk_id))
|
||||
|
||||
payload = {
|
||||
"uuid": vid,
|
||||
"chunk_id": chunk_id,
|
||||
"chunk_type": "sentence",
|
||||
"start_time": float(start_time),
|
||||
"end_time": float(end_time),
|
||||
"text": text[:200],
|
||||
}
|
||||
|
||||
points.append({"id": point_id, "vector": embedding, "payload": payload})
|
||||
|
||||
# Upload in batches
|
||||
batch_size = 100
|
||||
for i in range(0, len(points), batch_size):
|
||||
batch = points[i : i + batch_size]
|
||||
resp = requests.put(
|
||||
f"http://localhost:6333/collections/{QDRANT_COLLECTION}/points",
|
||||
headers={
|
||||
"api-key": "Test3200Test3200Test3200",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={"points": batch},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
print(f"Error: {resp.text[:200]}")
|
||||
break
|
||||
print(
|
||||
f"Uploaded batch {i // batch_size + 1}/{(len(points) - 1) // batch_size + 1}"
|
||||
)
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
print("Done!")
|
||||
|
||||
|
||||
def test_queries(queries, use_prefix=True):
|
||||
"""Test queries against Qdrant"""
|
||||
prefix = "search_query: " if use_prefix else ""
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query, prefix)
|
||||
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
f"http://localhost:6333/collections/{QDRANT_COLLECTION}/points/search",
|
||||
headers={
|
||||
"api-key": "Test3200Test3200Test3200",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={"vector": embedding, "limit": 3, "with_payload": True},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results = resp.json().get("result", [])
|
||||
|
||||
print(f"\nQuery: '{query}' ({elapsed:.1f}ms)")
|
||||
print("-" * 60)
|
||||
for i, r in enumerate(results):
|
||||
score = r.get("score", 0)
|
||||
payload = r.get("payload", {})
|
||||
text = payload.get("text", "")[:60]
|
||||
print(f" {i + 1}. [{score:.3f}] {text}")
|
||||
|
||||
|
||||
# English queries
|
||||
ENGLISH_QUERIES = [
|
||||
"a person talking",
|
||||
"someone speaking on camera",
|
||||
"outdoor scene",
|
||||
"indoor setting",
|
||||
"walking or moving",
|
||||
"dialogue or conversation",
|
||||
"looking at something",
|
||||
"happy or joyful",
|
||||
"serious or dramatic",
|
||||
"comedy or funny",
|
||||
"wearing a tie",
|
||||
"holding an object",
|
||||
"sitting on a chair",
|
||||
"city or urban",
|
||||
"building or room",
|
||||
"open space",
|
||||
]
|
||||
|
||||
# Chinese queries
|
||||
CHINESE_QUERIES = [
|
||||
"有人在說話",
|
||||
"戶外場景",
|
||||
"室內場景",
|
||||
"走路或移動",
|
||||
"對話或交談",
|
||||
"看著某樣東西",
|
||||
"快樂或開心",
|
||||
"嚴肅或戲劇性",
|
||||
"喜劇或有趣",
|
||||
"戴著領帶",
|
||||
"拿著東西",
|
||||
"坐在椅子上",
|
||||
"城市或都市",
|
||||
"建築物或房間",
|
||||
"開放空間",
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "sync":
|
||||
print("=" * 60)
|
||||
print(f"Syncing vectors to {QDRANT_COLLECTION}")
|
||||
print(f"Model: {MODEL}")
|
||||
print("Prefix for chunks: search_document:")
|
||||
print("=" * 60)
|
||||
sync_to_qdrant()
|
||||
else:
|
||||
print("=" * 60)
|
||||
print(f"Testing with {QDRANT_COLLECTION}")
|
||||
print(f"Model: {MODEL}")
|
||||
print("Prefix for queries: search_query:")
|
||||
print("=" * 60)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("ENGLISH QUERIES")
|
||||
print("=" * 60)
|
||||
test_queries(ENGLISH_QUERIES)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("CHINESE QUERIES")
|
||||
print("=" * 60)
|
||||
test_queries(CHINESE_QUERIES)
|
||||
84
scripts/test_object_search.py
Normal file
84
scripts/test_object_search.py
Normal file
@@ -0,0 +1,84 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Object search test using PostgreSQL JSON queries
|
||||
"""
|
||||
|
||||
import time
|
||||
import psycopg2
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
|
||||
def test_object_search():
|
||||
"""Test object search using PostgreSQL JSON queries"""
|
||||
results = {}
|
||||
test_objects = ["person", "car", "clock", "tie", "chair", "bottle", "cup", "book"]
|
||||
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
for obj in test_objects:
|
||||
start = time.time()
|
||||
|
||||
# Query chunks that have this object in YOLO metadata
|
||||
query = """
|
||||
SELECT chunk_id, start_time, end_time
|
||||
FROM chunks
|
||||
WHERE uuid = %s
|
||||
AND chunk_type = 'sentence'
|
||||
AND metadata IS NOT NULL
|
||||
AND metadata->'yolo'->'objects' ? %s
|
||||
ORDER BY chunk_index
|
||||
LIMIT 10
|
||||
"""
|
||||
cur.execute(query, (VIDEO_UUID, obj))
|
||||
rows = cur.fetchall()
|
||||
|
||||
elapsed = (time.time() - start) * 1000
|
||||
results[obj] = {
|
||||
"ms": round(elapsed, 2),
|
||||
"chunks": len(rows),
|
||||
"sample": [
|
||||
{"id": r[0], "time": f"{r[1]:.1f}-{r[2]:.1f}"} for r in rows[:3]
|
||||
],
|
||||
}
|
||||
print(f"Object '{obj}': {elapsed:.2f}ms, {len(rows)} chunks")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("Object Search Test (Priority c)")
|
||||
print("=" * 60)
|
||||
|
||||
results = test_object_search()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Summary")
|
||||
print("=" * 60)
|
||||
print(f"\n{'Object':<20} | {'Time (ms)':<12} | {'Chunks'}")
|
||||
print("-" * 50)
|
||||
for obj, data in results.items():
|
||||
print(f"{obj:<20} | {data['ms']:<12.1f} | {data['chunks']}")
|
||||
|
||||
print("\nSample results:")
|
||||
for obj, data in results.items():
|
||||
if data["sample"]:
|
||||
print(f" {obj}: {data['sample']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
156
scripts/test_v2_detailed.py
Normal file
156
scripts/test_v2_detailed.py
Normal file
@@ -0,0 +1,156 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Vector Search Test with nomic-embed-text:v1.5 using prefixes - detailed results
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
MODEL = "nomic-embed-text:v1.5"
|
||||
QDRANT_COLLECTION = "chunks_v2"
|
||||
|
||||
|
||||
def get_embedding(text, prefix=""):
|
||||
prompt = f"{prefix}{text}"
|
||||
resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings", json={"model": MODEL, "prompt": prompt}
|
||||
)
|
||||
return resp.json()["embedding"]
|
||||
|
||||
|
||||
def test_queries(queries, use_prefix=True):
|
||||
"""Test queries against Qdrant"""
|
||||
prefix = "search_query: " if use_prefix else ""
|
||||
results_data = []
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query, prefix)
|
||||
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
f"http://localhost:6333/collections/{QDRANT_COLLECTION}/points/search",
|
||||
headers={
|
||||
"api-key": "Test3200Test3200Test3200",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={"vector": embedding, "limit": 3, "with_payload": True},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results = resp.json().get("result", [])
|
||||
|
||||
print(f"\nQuery: '{query}' ({elapsed:.1f}ms)")
|
||||
print("-" * 60)
|
||||
for i, r in enumerate(results):
|
||||
score = r.get("score", 0)
|
||||
payload = r.get("payload", {})
|
||||
text = payload.get("text", "")[:60]
|
||||
print(f" {i + 1}. [{score:.3f}] {text}")
|
||||
|
||||
results_data.append(
|
||||
{
|
||||
"query": query,
|
||||
"time_ms": elapsed,
|
||||
"top_score": results[0].get("score", 0) if results else 0,
|
||||
"top_text": results[0].get("payload", {}).get("text", "")[:50]
|
||||
if results
|
||||
else "",
|
||||
}
|
||||
)
|
||||
|
||||
return results_data
|
||||
|
||||
|
||||
# English queries
|
||||
ENGLISH_QUERIES = [
|
||||
"a person talking",
|
||||
"someone speaking on camera",
|
||||
"outdoor scene",
|
||||
"indoor setting",
|
||||
"walking or moving",
|
||||
"dialogue or conversation",
|
||||
"looking at something",
|
||||
"happy or joyful",
|
||||
"serious or dramatic",
|
||||
"comedy or funny",
|
||||
"wearing a tie",
|
||||
"holding an object",
|
||||
"sitting on a chair",
|
||||
"city or urban",
|
||||
"building or room",
|
||||
"open space",
|
||||
]
|
||||
|
||||
# Chinese queries
|
||||
CHINESE_QUERIES = [
|
||||
"有人在說話",
|
||||
"戶外場景",
|
||||
"室內場景",
|
||||
"走路或移動",
|
||||
"對話或交談",
|
||||
"看著某樣東西",
|
||||
"快樂或開心",
|
||||
"嚴肅或戲劇性",
|
||||
"喜劇或有趣",
|
||||
"戴著領帶",
|
||||
"拿著東西",
|
||||
"坐在椅子上",
|
||||
"城市或都市",
|
||||
"建築物或房間",
|
||||
"開放空間",
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 70)
|
||||
print(f"Testing with {QDRANT_COLLECTION}")
|
||||
print(f"Model: {MODEL}")
|
||||
print("Prefix for chunks: search_document:")
|
||||
print("Prefix for queries: search_query:")
|
||||
print("=" * 70)
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("ENGLISH QUERIES")
|
||||
print("=" * 70)
|
||||
en_results = test_queries(ENGLISH_QUERIES)
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("CHINESE QUERIES")
|
||||
print("=" * 70)
|
||||
zh_results = test_queries(CHINESE_QUERIES)
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 70)
|
||||
print("SUMMARY")
|
||||
print("=" * 70)
|
||||
|
||||
en_avg = sum(r["time_ms"] for r in en_results) / len(en_results)
|
||||
zh_avg = sum(r["time_ms"] for r in zh_results) / len(zh_results)
|
||||
|
||||
print(f"\nEnglish avg time: {en_avg:.1f}ms")
|
||||
print(f"Chinese avg time: {zh_avg:.1f}ms")
|
||||
|
||||
print("\nTop results:")
|
||||
print(f"\n{'Query':<25} | {'Time':<8} | {'Score':<8} | {'Text'}")
|
||||
print("-" * 70)
|
||||
for r in en_results[:5]:
|
||||
print(
|
||||
f"{r['query']:<25} | {r['time_ms']:>5.1f}ms | {r['top_score']:.3f} | {r['top_text']}"
|
||||
)
|
||||
|
||||
print()
|
||||
for r in zh_results[:5]:
|
||||
print(
|
||||
f"{r['query']:<25} | {r['time_ms']:>5.1f}ms | {r['top_score']:.3f} | {r['top_text']}"
|
||||
)
|
||||
188
scripts/test_v2_model.py
Normal file
188
scripts/test_v2_model.py
Normal file
@@ -0,0 +1,188 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Vector Search Test with nomic-embed-text:v1.5 using prefixes
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import psycopg2
|
||||
import uuid
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
MODEL = "nomic-embed-text:v1.5"
|
||||
QDRANT_COLLECTION = "chunks_v2"
|
||||
|
||||
|
||||
def get_embedding(text, prefix=""):
|
||||
"""Get embedding from Ollama with prefix"""
|
||||
prompt = f"{prefix}{text}"
|
||||
resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings", json={"model": MODEL, "prompt": prompt}
|
||||
)
|
||||
return resp.json()["embedding"]
|
||||
|
||||
|
||||
def sync_to_qdrant():
|
||||
"""Sync vectors to Qdrant with v1.5 model and prefixes"""
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT chunk_id, content->>'text' as text, start_time, end_time, uuid
|
||||
FROM chunks
|
||||
WHERE uuid = %s AND chunk_type = 'sentence'
|
||||
ORDER BY chunk_index
|
||||
""",
|
||||
(VIDEO_UUID,),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
print(f"Syncing {len(rows)} chunks to Qdrant with {MODEL}")
|
||||
|
||||
points = []
|
||||
for chunk_id, text, start_time, end_time, vid in rows:
|
||||
if not text:
|
||||
continue
|
||||
|
||||
# Use search_document: prefix for chunks
|
||||
embedding = get_embedding(text, "search_document: ")
|
||||
|
||||
point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, chunk_id))
|
||||
|
||||
payload = {
|
||||
"uuid": vid,
|
||||
"chunk_id": chunk_id,
|
||||
"chunk_type": "sentence",
|
||||
"start_time": float(start_time),
|
||||
"end_time": float(end_time),
|
||||
"text": text[:200],
|
||||
}
|
||||
|
||||
points.append({"id": point_id, "vector": embedding, "payload": payload})
|
||||
|
||||
# Upload in batches
|
||||
batch_size = 100
|
||||
for i in range(0, len(points), batch_size):
|
||||
batch = points[i : i + batch_size]
|
||||
resp = requests.put(
|
||||
f"http://localhost:6333/collections/{QDRANT_COLLECTION}/points",
|
||||
headers={
|
||||
"api-key": "Test3200Test3200Test3200",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={"points": batch},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
print(f"Error: {resp.text[:200]}")
|
||||
break
|
||||
print(
|
||||
f"Uploaded batch {i // batch_size + 1}/{(len(points) - 1) // batch_size + 1}"
|
||||
)
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
print("Done!")
|
||||
|
||||
|
||||
def test_queries(queries, use_prefix=True):
|
||||
"""Test queries against Qdrant"""
|
||||
prefix = "search_query: " if use_prefix else ""
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query, prefix)
|
||||
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
f"http://localhost:6333/collections/{QDRANT_COLLECTION}/points/search",
|
||||
headers={"api-key": "Test3200Test3200Test3200"},
|
||||
json={"vector": embedding, "limit": 5},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results = resp.json().get("result", [])
|
||||
|
||||
print(f"\nQuery: '{query}' ({elapsed:.1f}ms)")
|
||||
print("-" * 50)
|
||||
for i, r in enumerate(results):
|
||||
chunk_id = r.get("id", "")[:20]
|
||||
score = r.get("score", 0)
|
||||
print(f" {i + 1}. [{score:.3f}] {chunk_id}")
|
||||
|
||||
|
||||
# English queries
|
||||
ENGLISH_QUERIES = [
|
||||
"a person talking",
|
||||
"someone speaking on camera",
|
||||
"outdoor scene",
|
||||
"indoor setting",
|
||||
"walking or moving",
|
||||
"dialogue or conversation",
|
||||
"looking at something",
|
||||
"happy or joyful",
|
||||
"serious or dramatic",
|
||||
"comedy or funny",
|
||||
"wearing a tie",
|
||||
"holding an object",
|
||||
"sitting on a chair",
|
||||
"city or urban",
|
||||
"building or room",
|
||||
"open space",
|
||||
]
|
||||
|
||||
# Chinese queries
|
||||
CHINESE_QUERIES = [
|
||||
"有人在說話",
|
||||
"戶外場景",
|
||||
"室內場景",
|
||||
"走路或移動",
|
||||
"對話或交談",
|
||||
"看著某樣東西",
|
||||
"快樂或開心",
|
||||
"嚴肅或戲劇性",
|
||||
"喜劇或有趣",
|
||||
"戴著領帶",
|
||||
"拿著東西",
|
||||
"坐在椅子上",
|
||||
"城市或都市",
|
||||
"建築物或房間",
|
||||
"開放空間",
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "sync":
|
||||
print("=" * 60)
|
||||
print(f"Syncing vectors to {QDRANT_COLLECTION}")
|
||||
print(f"Model: {MODEL}")
|
||||
print("Prefix for chunks: search_document:")
|
||||
print("=" * 60)
|
||||
sync_to_qdrant()
|
||||
else:
|
||||
print("=" * 60)
|
||||
print(f"Testing with {QDRANT_COLLECTION}")
|
||||
print(f"Model: {MODEL}")
|
||||
print("Prefix for queries: search_query:")
|
||||
print("=" * 60)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("ENGLISH QUERIES")
|
||||
print("=" * 60)
|
||||
test_queries(ENGLISH_QUERIES)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("CHINESE QUERIES")
|
||||
print("=" * 60)
|
||||
test_queries(CHINESE_QUERIES)
|
||||
133
scripts/test_v2_with_text.py
Normal file
133
scripts/test_v2_with_text.py
Normal file
@@ -0,0 +1,133 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Vector Search Test with nomic-embed-text:v1.5 using prefixes - with text content
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import psycopg2
|
||||
|
||||
|
||||
VIDEO_UUID = "39567a0eb16f39fd"
|
||||
|
||||
POSTGRES_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "accusys",
|
||||
"password": "Test3200",
|
||||
"database": "momentry",
|
||||
}
|
||||
|
||||
MODEL = "nomic-embed-text:v1.5"
|
||||
QDRANT_COLLECTION = "chunks_v2"
|
||||
|
||||
|
||||
def get_embedding(text, prefix=""):
|
||||
prompt = f"{prefix}{text}"
|
||||
resp = requests.post(
|
||||
"http://localhost:11434/api/embeddings", json={"model": MODEL, "prompt": prompt}
|
||||
)
|
||||
return resp.json()["embedding"]
|
||||
|
||||
|
||||
def get_text_from_chunk_id(chunk_id):
|
||||
"""Get text from PostgreSQL using chunk_id"""
|
||||
conn = psycopg2.connect(**POSTGRES_CONFIG)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT content->>'text' FROM chunks WHERE chunk_id = %s", (chunk_id,))
|
||||
result = cur.fetchone()
|
||||
cur.close()
|
||||
conn.close()
|
||||
return result[0] if result else ""
|
||||
|
||||
|
||||
def test_queries(queries, use_prefix=True):
|
||||
"""Test queries against Qdrant"""
|
||||
prefix = "search_query: " if use_prefix else ""
|
||||
|
||||
for query in queries:
|
||||
embedding = get_embedding(query, prefix)
|
||||
|
||||
start = time.time()
|
||||
resp = requests.post(
|
||||
f"http://localhost:6333/collections/{QDRANT_COLLECTION}/points/search",
|
||||
headers={"api-key": "Test3200Test3200Test3200"},
|
||||
json={"vector": embedding, "limit": 3},
|
||||
)
|
||||
elapsed = (time.time() - start) * 1000
|
||||
|
||||
results = resp.json().get("result", [])
|
||||
|
||||
print(f"\nQuery: '{query}' ({elapsed:.1f}ms)")
|
||||
print("-" * 60)
|
||||
for i, r in enumerate(results):
|
||||
score = r.get("score", 0)
|
||||
# Try to get chunk_id from payload
|
||||
payload = r.get("payload", {})
|
||||
chunk_id = payload.get("chunk_id", "")
|
||||
if not chunk_id:
|
||||
# Try to get text from Qdrant payload
|
||||
text = payload.get("text", "")[:50]
|
||||
else:
|
||||
# Get text from PostgreSQL
|
||||
text = get_text_from_chunk_id(chunk_id)[:50]
|
||||
print(f" {i + 1}. [{score:.3f}] {text}...")
|
||||
|
||||
|
||||
# English queries
|
||||
ENGLISH_QUERIES = [
|
||||
"a person talking",
|
||||
"someone speaking on camera",
|
||||
"outdoor scene",
|
||||
"indoor setting",
|
||||
"walking or moving",
|
||||
"dialogue or conversation",
|
||||
"looking at something",
|
||||
"happy or joyful",
|
||||
"serious or dramatic",
|
||||
"comedy or funny",
|
||||
"wearing a tie",
|
||||
"holding an object",
|
||||
"sitting on a chair",
|
||||
"city or urban",
|
||||
"building or room",
|
||||
"open space",
|
||||
]
|
||||
|
||||
# Chinese queries
|
||||
CHINESE_QUERIES = [
|
||||
"有人在說話",
|
||||
"戶外場景",
|
||||
"室內場景",
|
||||
"走路或移動",
|
||||
"對話或交談",
|
||||
"看著某樣東西",
|
||||
"快樂或開心",
|
||||
"嚴肅或戲劇性",
|
||||
"喜劇或有趣",
|
||||
"戴著領帶",
|
||||
"拿著東西",
|
||||
"坐在椅子上",
|
||||
"城市或都市",
|
||||
"建築物或房間",
|
||||
"開放空間",
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 70)
|
||||
print(f"Testing with {QDRANT_COLLECTION}")
|
||||
print(f"Model: {MODEL}")
|
||||
print("Prefix for chunks: search_document:")
|
||||
print("Prefix for queries: search_query:")
|
||||
print("=" * 70)
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("ENGLISH QUERIES")
|
||||
print("=" * 70)
|
||||
test_queries(ENGLISH_QUERIES)
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("CHINESE QUERIES")
|
||||
print("=" * 70)
|
||||
test_queries(CHINESE_QUERIES)
|
||||
483
scripts/yolo_processor.py
Executable file
483
scripts/yolo_processor.py
Executable file
@@ -0,0 +1,483 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
YOLO Processor - Object Detection with Resume Support
|
||||
Uses YOLOv8 via ultralytics (local model)
|
||||
|
||||
Resume Feature (integrated from video_yolo_player):
|
||||
- Auto-detect existing results and resume from last frame
|
||||
- Auto-save at configurable intervals (default: 30 seconds)
|
||||
- Ctrl+C gracefully saves and exits
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import os
|
||||
import signal
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Dict, Optional, Set
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
YOLO_NAMES = [
|
||||
"person",
|
||||
"bicycle",
|
||||
"car",
|
||||
"motorbike",
|
||||
"aeroplane",
|
||||
"bus",
|
||||
"train",
|
||||
"truck",
|
||||
"boat",
|
||||
"traffic light",
|
||||
"fire hydrant",
|
||||
"stop sign",
|
||||
"parking meter",
|
||||
"bench",
|
||||
"bird",
|
||||
"cat",
|
||||
"dog",
|
||||
"horse",
|
||||
"sheep",
|
||||
"cow",
|
||||
"elephant",
|
||||
"bear",
|
||||
"zebra",
|
||||
"giraffe",
|
||||
"backpack",
|
||||
"umbrella",
|
||||
"handbag",
|
||||
"tie",
|
||||
"suitcase",
|
||||
"frisbee",
|
||||
"skis",
|
||||
"snowboard",
|
||||
"sports ball",
|
||||
"kite",
|
||||
"baseball bat",
|
||||
"baseball glove",
|
||||
"skateboard",
|
||||
"surfboard",
|
||||
"tennis racket",
|
||||
"bottle",
|
||||
"wine glass",
|
||||
"cup",
|
||||
"fork",
|
||||
"knife",
|
||||
"spoon",
|
||||
"bowl",
|
||||
"banana",
|
||||
"apple",
|
||||
"sandwich",
|
||||
"orange",
|
||||
"broccoli",
|
||||
"carrot",
|
||||
"hot dog",
|
||||
"pizza",
|
||||
"donut",
|
||||
"cake",
|
||||
"chair",
|
||||
"sofa",
|
||||
"pottedplant",
|
||||
"bed",
|
||||
"diningtable",
|
||||
"toilet",
|
||||
"tvmonitor",
|
||||
"laptop",
|
||||
"mouse",
|
||||
"remote",
|
||||
"keyboard",
|
||||
"cell phone",
|
||||
"microwave",
|
||||
"oven",
|
||||
"toaster",
|
||||
"sink",
|
||||
"refrigerator",
|
||||
"book",
|
||||
"clock",
|
||||
"vase",
|
||||
"scissors",
|
||||
"teddy bear",
|
||||
"hair drier",
|
||||
"toothbrush",
|
||||
]
|
||||
|
||||
|
||||
# Global state for signal handling
|
||||
g_detection_data: Optional[Dict] = None
|
||||
g_output_file: Optional[str] = None
|
||||
g_auto_save_interval: int = 30
|
||||
g_auto_save_frames: int = 300 # Save every N frames (in addition to time-based)
|
||||
|
||||
|
||||
def format_time(seconds: float) -> str:
|
||||
"""Format seconds to HH:MM:SS"""
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
secs = int(seconds % 60)
|
||||
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
||||
|
||||
|
||||
def load_existing_data(output_file: str) -> tuple[Optional[Dict], int]:
|
||||
"""Load existing detection data from file. Returns (data, last_processed_frame)"""
|
||||
if not os.path.exists(output_file):
|
||||
return None, 0
|
||||
|
||||
try:
|
||||
with open(output_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
frames = data.get("frames", {})
|
||||
if frames:
|
||||
last_frame = max(int(k) for k in frames.keys())
|
||||
return data, last_frame
|
||||
except (json.JSONDecodeError, KeyError, ValueError) as e:
|
||||
print(f"Warning: Could not load existing file: {e}")
|
||||
|
||||
return None, 0
|
||||
|
||||
|
||||
def save_detection_data(
|
||||
output_file: str,
|
||||
detection_data: Dict,
|
||||
is_interrupted: bool = False,
|
||||
silent: bool = False,
|
||||
last_saved_frame: int = 0,
|
||||
) -> tuple[bool, int]:
|
||||
"""Save detection data to JSON file"""
|
||||
try:
|
||||
metadata = detection_data.get("metadata", {})
|
||||
metadata["last_saved_at"] = datetime.now().isoformat()
|
||||
metadata["status"] = "interrupted" if is_interrupted else "in_progress"
|
||||
metadata["last_saved_frame"] = last_saved_frame
|
||||
detection_data["metadata"] = metadata
|
||||
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(detection_data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
if not silent:
|
||||
return True, os.path.getsize(output_file)
|
||||
return True, 0
|
||||
except Exception as e:
|
||||
print(f"Error saving data: {e}")
|
||||
return False, 0
|
||||
|
||||
|
||||
def signal_handler(signum, frame):
|
||||
"""Handle Ctrl+C to pause and save progress"""
|
||||
global g_detection_data, g_output_file
|
||||
|
||||
print(f"\n\n{'=' * 60}")
|
||||
print("PAUSE - Saving progress...")
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
if g_detection_data and g_output_file:
|
||||
success, _ = save_detection_data(
|
||||
g_output_file, g_detection_data, is_interrupted=True
|
||||
)
|
||||
if success:
|
||||
print(f"Progress saved to: {g_output_file}")
|
||||
print("Run the same command again to resume")
|
||||
|
||||
print(f"{'=' * 60}\n")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def get_detections_list(result, model) -> list:
|
||||
"""Extract detection info as list of dicts"""
|
||||
detections = []
|
||||
|
||||
if result.boxes is None:
|
||||
return detections
|
||||
|
||||
boxes = result.boxes.xyxy.cpu().numpy()
|
||||
confidences = result.boxes.conf.cpu().numpy()
|
||||
class_ids = result.boxes.cls.cpu().numpy().astype(int)
|
||||
|
||||
for box, conf, class_id in zip(boxes, confidences, class_ids):
|
||||
x1, y1, x2, y2 = box
|
||||
class_name = YOLO_NAMES[class_id] if class_id < len(YOLO_NAMES) else "unknown"
|
||||
|
||||
detections.append(
|
||||
{
|
||||
"class_id": int(class_id),
|
||||
"class_name": class_name,
|
||||
"confidence": float(conf),
|
||||
"x1": float(x1),
|
||||
"y1": float(y1),
|
||||
"x2": float(x2),
|
||||
"y2": float(y2),
|
||||
"width": int(x2 - x1),
|
||||
"height": int(y2 - y1),
|
||||
}
|
||||
)
|
||||
|
||||
return detections
|
||||
|
||||
|
||||
def process_yolo(
|
||||
video_path: str,
|
||||
output_path: str,
|
||||
uuid: str = "",
|
||||
auto_save_interval: int = 30,
|
||||
force_restart: bool = False,
|
||||
auto_save_frames: int = 300,
|
||||
):
|
||||
"""Process video for object detection using YOLOv8 with resume support"""
|
||||
|
||||
global g_detection_data, g_output_file, g_auto_save_interval, g_auto_save_frames
|
||||
g_auto_save_interval = auto_save_interval
|
||||
g_auto_save_frames = auto_save_frames
|
||||
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("yolo", "YOLO_START")
|
||||
|
||||
# Set up signal handler for graceful pause
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
# Check for existing results (resume support)
|
||||
existing_data, last_processed_frame = load_existing_data(output_path)
|
||||
resume_mode = (
|
||||
existing_data is not None and last_processed_frame > 0 and not force_restart
|
||||
)
|
||||
|
||||
if resume_mode:
|
||||
print(f"\nFound existing data: {output_path}")
|
||||
print(f"Last processed frame: {last_processed_frame}")
|
||||
print(f"Will resume from frame {last_processed_frame + 1}")
|
||||
|
||||
try:
|
||||
from ultralytics import YOLO
|
||||
except ImportError:
|
||||
if publisher:
|
||||
publisher.error("yolo", "ultralytics not installed")
|
||||
result = {
|
||||
"metadata": {"status": "error", "error": "ultralytics not installed"},
|
||||
"frames": {},
|
||||
}
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
if publisher:
|
||||
publisher.complete("yolo", "0 frames")
|
||||
return result
|
||||
|
||||
if publisher:
|
||||
publisher.info("yolo", "YOLO_LOADING_MODEL")
|
||||
|
||||
# Load YOLOv8 model
|
||||
model = YOLO("yolov8n.pt")
|
||||
|
||||
# Get video info
|
||||
import cv2
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video: {video_path}")
|
||||
return {"metadata": {"status": "error"}, "frames": {}}
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
total_duration = total_frames / fps if fps > 0 else 0
|
||||
cap.release()
|
||||
|
||||
if publisher:
|
||||
publisher.info("yolo", f"fps={fps}, total={total_frames}")
|
||||
publisher.progress("yolo", 0, total_frames, "Starting")
|
||||
|
||||
# Initialize or load detection data
|
||||
if resume_mode and existing_data:
|
||||
detection_data = existing_data
|
||||
frame_count = last_processed_frame
|
||||
processed_frames: Set[int] = set(
|
||||
int(k) for k in existing_data.get("frames", {}).keys()
|
||||
)
|
||||
|
||||
# Seek to resume position
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
|
||||
else:
|
||||
# Initialize new detection data
|
||||
detection_data = {
|
||||
"metadata": {
|
||||
"video_path": os.path.abspath(video_path),
|
||||
"fps": fps,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"total_frames": total_frames,
|
||||
"total_duration": total_duration,
|
||||
"processed_at": datetime.now().isoformat(),
|
||||
"auto_save_interval": auto_save_interval,
|
||||
"auto_save_frames": auto_save_frames,
|
||||
"status": "in_progress",
|
||||
"last_saved_at": datetime.now().isoformat(),
|
||||
"last_saved_frame": 0,
|
||||
},
|
||||
"frames": {},
|
||||
}
|
||||
frame_count = 0
|
||||
processed_frames = set()
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
|
||||
# Set global for signal handler
|
||||
g_detection_data = detection_data
|
||||
g_output_file = output_path
|
||||
|
||||
start_time = time.time()
|
||||
last_save_time = start_time
|
||||
last_save_frame_count = frame_count # Track which frame we last saved at
|
||||
auto_save_count = 0
|
||||
|
||||
print(f"\nProcessing video: {total_frames} frames @ {fps:.2f} fps")
|
||||
print(
|
||||
f"Auto-save every {auto_save_interval}s or {auto_save_frames} frames (whichever comes first)"
|
||||
)
|
||||
print(f"Resume from frame {frame_count + 1 if resume_mode else 1}")
|
||||
print()
|
||||
|
||||
# Process frames
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
current_time = (frame_count - 1) / fps if fps > 0 else 0
|
||||
|
||||
# Skip already processed frames in resume mode
|
||||
if frame_count in processed_frames:
|
||||
continue
|
||||
|
||||
# Run YOLO detection
|
||||
results = model(frame, verbose=False)
|
||||
detections = get_detections_list(results[0], model)
|
||||
|
||||
# Store detection data
|
||||
detection_data["frames"][str(frame_count)] = {
|
||||
"frame_number": frame_count,
|
||||
"time_seconds": round(current_time, 3),
|
||||
"time_formatted": format_time(current_time),
|
||||
"detections": detections,
|
||||
}
|
||||
|
||||
processed_frames.add(frame_count)
|
||||
|
||||
# Progress indicator every 500 frames
|
||||
if frame_count % 500 == 0:
|
||||
elapsed = time.time() - start_time
|
||||
progress = (frame_count / total_frames) * 100
|
||||
eta = (
|
||||
(elapsed / frame_count) * (total_frames - frame_count)
|
||||
if frame_count > 0
|
||||
else 0
|
||||
)
|
||||
print(
|
||||
f" Progress: {frame_count}/{total_frames} ({progress:.1f}%) - "
|
||||
f"ETA: {eta:.0f}s - {len(detections)} objects"
|
||||
)
|
||||
|
||||
if publisher:
|
||||
publisher.progress(
|
||||
"yolo", frame_count, total_frames, f"frame {frame_count}"
|
||||
)
|
||||
|
||||
# Auto-save check (time-based OR frame-based)
|
||||
current_time_val = time.time()
|
||||
time_elapsed = current_time_val - last_save_time >= auto_save_interval
|
||||
frames_since_save = frame_count - last_save_frame_count >= auto_save_frames
|
||||
|
||||
if time_elapsed or frames_since_save:
|
||||
success, file_size = save_detection_data(
|
||||
output_path,
|
||||
detection_data,
|
||||
is_interrupted=False,
|
||||
silent=True,
|
||||
last_saved_frame=frame_count,
|
||||
)
|
||||
if success:
|
||||
auto_save_count += 1
|
||||
reason = "time" if time_elapsed else "frames"
|
||||
print(
|
||||
f" Auto-saved (#{auto_save_count}, {reason}): frame {last_save_frame_count}-{frame_count}"
|
||||
)
|
||||
last_save_time = current_time_val
|
||||
last_save_frame_count = frame_count
|
||||
|
||||
cap.release()
|
||||
processing_time = time.time() - start_time
|
||||
|
||||
# Update final metadata
|
||||
total_detections = sum(
|
||||
len(f.get("detections", [])) for f in detection_data.get("frames", {}).values()
|
||||
)
|
||||
|
||||
detection_data["metadata"]["status"] = "completed"
|
||||
detection_data["metadata"]["completed_at"] = datetime.now().isoformat()
|
||||
detection_data["metadata"]["processing_time"] = processing_time
|
||||
detection_data["metadata"]["total_detections"] = total_detections
|
||||
detection_data["metadata"]["auto_save_count"] = auto_save_count
|
||||
|
||||
# Save final data
|
||||
save_detection_data(output_path, detection_data, is_interrupted=False)
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'=' * 60}")
|
||||
print("YOLO Detection complete!")
|
||||
print(f" Total frames processed: {frame_count}")
|
||||
print(f" Frames with detections: {len(detection_data['frames'])}")
|
||||
print(f" Total objects detected: {total_detections}")
|
||||
print(f" Processing time: {processing_time:.1f}s")
|
||||
print(f" Auto-saves: {auto_save_count}")
|
||||
print(f" Output: {output_path}")
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
if publisher:
|
||||
publisher.complete(
|
||||
"yolo", f"{len(detection_data['frames'])} frames with objects"
|
||||
)
|
||||
|
||||
return detection_data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="YOLO Object Detection with Resume Support"
|
||||
)
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
||||
parser.add_argument(
|
||||
"--auto-save",
|
||||
type=int,
|
||||
default=30,
|
||||
help="Auto-save interval in seconds (default: 30)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--auto-save-frames",
|
||||
type=int,
|
||||
default=300,
|
||||
help="Auto-save after N frames (default: 300)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Force restart from beginning (ignore existing data)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
process_yolo(
|
||||
args.video_path,
|
||||
args.output_path,
|
||||
args.uuid,
|
||||
args.auto_save,
|
||||
args.force,
|
||||
args.auto_save_frames,
|
||||
)
|
||||
Reference in New Issue
Block a user