diff --git a/docs_v1.0/DESIGN/Thumbnail_JPEG_Validation_Impl.md b/docs_v1.0/DESIGN/Thumbnail_JPEG_Validation_Impl.md index 4350073..9257bb2 100644 --- a/docs_v1.0/DESIGN/Thumbnail_JPEG_Validation_Impl.md +++ b/docs_v1.0/DESIGN/Thumbnail_JPEG_Validation_Impl.md @@ -146,6 +146,96 @@ for entry in &entries { } ``` +## Python Scripts (Optional Enhancement) + +### 6. Create: `scripts/utils/jpeg_validator.py` + +```python +#!/usr/bin/env python3 +"""JPEG validation utilities for ffmpeg-extracted frames.""" + +JPEG_MIN_SIZE = 100 +JPEG_SOI_MARKER = bytes([0xFF, 0xD8, 0xFF]) +JPEG_EOI_MARKER = bytes([0xFF, 0xD9]) + + +def validate_jpeg(data: bytes) -> bool: + """Validate JPEG by checking header, footer, and minimum size.""" + if len(data) < JPEG_MIN_SIZE: + return False + if data[:3] != JPEG_SOI_MARKER: + return False + if data[-2:] != JPEG_EOI_MARKER: + return False + return True + + +def validate_jpeg_file(path: str) -> bool: + """Validate JPEG file on disk.""" + try: + with open(path, "rb") as f: + data = f.read() + return validate_jpeg(data) + except Exception: + return False + + +def filter_valid_jpegs(paths: list[str]) -> list[str]: + """Filter list of paths to only valid JPEGs.""" + return [p for p in paths if validate_jpeg_file(p)] +``` + +### 7. Modify: `scripts/thumbnail_extractor.py` + +Location: After extracting each thumbnail (around line 65) + +Add validation: + +```python +if result.returncode == 0 and os.path.exists(output_file): + # ADD VALIDATION: + if validate_jpeg_file(output_file): + extracted.append(output_file) + print(f" Extracted: {output_file} at {ts:.1f}s", file=sys.stderr) + else: + print(f" Invalid JPEG at {ts:.1f}s", file=sys.stderr) + os.remove(output_file) # Clean up invalid file +else: + print(f" Failed to extract frame at {ts:.1f}s", file=sys.stderr) +``` + +### 8. Modify: `scripts/caption_processor.py` + +Location: `extract_frames()` function, after ffmpeg extraction (around line 70) + +Add validation: + +```python +try: + subprocess.run(cmd, capture_output=True, check=False) + if os.path.exists(output_file): + # ADD VALIDATION: + if validate_jpeg_file(output_file): + frames.append({"index": i, "timestamp": timestamp, "path": output_file}) + else: + os.remove(output_file) # Clean up invalid file +except Exception: + pass +``` + +### Python Scripts Affected + +| Script | Function | Line | Priority | +|--------|----------|------|----------| +| `thumbnail_extractor.py` | `extract_thumbnails()` | 65 | High (user-facing) | +| `caption_processor.py` | `extract_frames()` | 70 | Medium | +| `caption_processor_contract_v1.py` | `extract_frames()` | 310 | Medium | +| `ocr_processor_contract_v1.py` | `extract_frames()` | 367 | Medium | +| `qa/executor.py` | `extract_frames()` | 93 | Low (QA only) | +| `face_cross_validate.py` | `extract_frames()` | 16 | Low (testing) | +| `face_mediapipe_test.py` | `extract_frames()` | 25 | Low (testing) | +| `analyze_video_faces.py` | `extract_video_frames()` | 61 | Low (analysis) | + ## Validation Logic | Check | Condition | Error if failed | @@ -176,6 +266,7 @@ feat: add JPEG validation to thumbnail endpoints - Add validation to face_thumbnail endpoint - Add validation to get_trace_thumbnail endpoint - Filter invalid JPEGs in FrameManager::extract +- (Optional) Add Python jpeg_validator utility for script validation Prevents serving corrupted/incomplete JPEG images to frontend. ``` @@ -184,4 +275,5 @@ Prevents serving corrupted/incomplete JPEG images to frontend. | Version | Date | Author | Changes | |---------|------|--------|---------| -| 1.0.0 | 2026-05-27 | M5Max128 | Implementation plan ready | \ No newline at end of file +| 1.0.0 | 2026-05-27 | M5Max128 | Implementation plan ready | +| 1.1.0 | 2026-05-27 | M5Max128 | Added Python scripts section | \ No newline at end of file