docs: expand JPEG validation plan to include Python scripts
This commit is contained in:
@@ -146,6 +146,96 @@ for entry in &entries {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Python Scripts (Optional Enhancement)
|
||||||
|
|
||||||
|
### 6. Create: `scripts/utils/jpeg_validator.py`
|
||||||
|
|
||||||
|
```python
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""JPEG validation utilities for ffmpeg-extracted frames."""
|
||||||
|
|
||||||
|
JPEG_MIN_SIZE = 100
|
||||||
|
JPEG_SOI_MARKER = bytes([0xFF, 0xD8, 0xFF])
|
||||||
|
JPEG_EOI_MARKER = bytes([0xFF, 0xD9])
|
||||||
|
|
||||||
|
|
||||||
|
def validate_jpeg(data: bytes) -> bool:
|
||||||
|
"""Validate JPEG by checking header, footer, and minimum size."""
|
||||||
|
if len(data) < JPEG_MIN_SIZE:
|
||||||
|
return False
|
||||||
|
if data[:3] != JPEG_SOI_MARKER:
|
||||||
|
return False
|
||||||
|
if data[-2:] != JPEG_EOI_MARKER:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def validate_jpeg_file(path: str) -> bool:
|
||||||
|
"""Validate JPEG file on disk."""
|
||||||
|
try:
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
data = f.read()
|
||||||
|
return validate_jpeg(data)
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def filter_valid_jpegs(paths: list[str]) -> list[str]:
|
||||||
|
"""Filter list of paths to only valid JPEGs."""
|
||||||
|
return [p for p in paths if validate_jpeg_file(p)]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7. Modify: `scripts/thumbnail_extractor.py`
|
||||||
|
|
||||||
|
Location: After extracting each thumbnail (around line 65)
|
||||||
|
|
||||||
|
Add validation:
|
||||||
|
|
||||||
|
```python
|
||||||
|
if result.returncode == 0 and os.path.exists(output_file):
|
||||||
|
# ADD VALIDATION:
|
||||||
|
if validate_jpeg_file(output_file):
|
||||||
|
extracted.append(output_file)
|
||||||
|
print(f" Extracted: {output_file} at {ts:.1f}s", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
print(f" Invalid JPEG at {ts:.1f}s", file=sys.stderr)
|
||||||
|
os.remove(output_file) # Clean up invalid file
|
||||||
|
else:
|
||||||
|
print(f" Failed to extract frame at {ts:.1f}s", file=sys.stderr)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8. Modify: `scripts/caption_processor.py`
|
||||||
|
|
||||||
|
Location: `extract_frames()` function, after ffmpeg extraction (around line 70)
|
||||||
|
|
||||||
|
Add validation:
|
||||||
|
|
||||||
|
```python
|
||||||
|
try:
|
||||||
|
subprocess.run(cmd, capture_output=True, check=False)
|
||||||
|
if os.path.exists(output_file):
|
||||||
|
# ADD VALIDATION:
|
||||||
|
if validate_jpeg_file(output_file):
|
||||||
|
frames.append({"index": i, "timestamp": timestamp, "path": output_file})
|
||||||
|
else:
|
||||||
|
os.remove(output_file) # Clean up invalid file
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
```
|
||||||
|
|
||||||
|
### Python Scripts Affected
|
||||||
|
|
||||||
|
| Script | Function | Line | Priority |
|
||||||
|
|--------|----------|------|----------|
|
||||||
|
| `thumbnail_extractor.py` | `extract_thumbnails()` | 65 | High (user-facing) |
|
||||||
|
| `caption_processor.py` | `extract_frames()` | 70 | Medium |
|
||||||
|
| `caption_processor_contract_v1.py` | `extract_frames()` | 310 | Medium |
|
||||||
|
| `ocr_processor_contract_v1.py` | `extract_frames()` | 367 | Medium |
|
||||||
|
| `qa/executor.py` | `extract_frames()` | 93 | Low (QA only) |
|
||||||
|
| `face_cross_validate.py` | `extract_frames()` | 16 | Low (testing) |
|
||||||
|
| `face_mediapipe_test.py` | `extract_frames()` | 25 | Low (testing) |
|
||||||
|
| `analyze_video_faces.py` | `extract_video_frames()` | 61 | Low (analysis) |
|
||||||
|
|
||||||
## Validation Logic
|
## Validation Logic
|
||||||
|
|
||||||
| Check | Condition | Error if failed |
|
| Check | Condition | Error if failed |
|
||||||
@@ -176,6 +266,7 @@ feat: add JPEG validation to thumbnail endpoints
|
|||||||
- Add validation to face_thumbnail endpoint
|
- Add validation to face_thumbnail endpoint
|
||||||
- Add validation to get_trace_thumbnail endpoint
|
- Add validation to get_trace_thumbnail endpoint
|
||||||
- Filter invalid JPEGs in FrameManager::extract
|
- Filter invalid JPEGs in FrameManager::extract
|
||||||
|
- (Optional) Add Python jpeg_validator utility for script validation
|
||||||
|
|
||||||
Prevents serving corrupted/incomplete JPEG images to frontend.
|
Prevents serving corrupted/incomplete JPEG images to frontend.
|
||||||
```
|
```
|
||||||
@@ -184,4 +275,5 @@ Prevents serving corrupted/incomplete JPEG images to frontend.
|
|||||||
|
|
||||||
| Version | Date | Author | Changes |
|
| Version | Date | Author | Changes |
|
||||||
|---------|------|--------|---------|
|
|---------|------|--------|---------|
|
||||||
| 1.0.0 | 2026-05-27 | M5Max128 | Implementation plan ready |
|
| 1.0.0 | 2026-05-27 | M5Max128 | Implementation plan ready |
|
||||||
|
| 1.1.0 | 2026-05-27 | M5Max128 | Added Python scripts section |
|
||||||
Reference in New Issue
Block a user