M4 handover: coordinate fixes, detector registry, deploy v2, YOLOv8s, identity lifecycle

- Fix swift_pose/swift_ocr Y-flip bugs (BUG-003~006)
- Add heuristic_scene module + post-processing trigger (replaces Places365)
- YOLOv5nu → YOLOv8s CoreML (+33% detections, +390% scene indicators)
- Per-table SQL export (split 4.7GB single file → 478MB max per table)
- Version/build check in deploy.sh (compare /health vs file_info.json)
- Add file_uuid column to identities table + backfill
- Identity pre-clean step in deploy (avoids UNIQUE conflicts on re-deploy)
- Stranger_xxx naming fix with UUID context
- Add DETECTOR_REGISTRY.md (25 detectors), DETECTOR_SELECTION_SOP.md
- Update SPATIAL_COORDINATE_REGISTRY.md (P layer, 6-layer architecture)
- New IDENTITY_LIFECYCLE.md
- M4 response docs for deploy_script_fix and 111614 test report
This commit is contained in:
Accusys
2026-05-13 20:00:47 +08:00
parent d34bcae145
commit ffc30d7377
25 changed files with 2219 additions and 118 deletions

View File

@@ -0,0 +1,292 @@
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use std::path::Path;
use tracing::info;
/// Heuristic scene metadata derived from YOLO + Face + luminance data.
/// Runs as a post-processing trigger, not a standalone processor.
/// Replaces the removed Places365 Scene classifier.
#[derive(Debug, Serialize)]
pub struct HeuristicSceneMeta {
pub file_uuid: String,
pub segments: Vec<SceneSegmentMeta>,
}
#[derive(Debug, Serialize)]
pub struct SceneSegmentMeta {
pub segment_index: u32,
pub start_frame: i64,
pub end_frame: i64,
pub start_time: f64,
pub end_time: f64,
pub indoor_score: f64,
pub outdoor_score: f64,
pub crowd_size: CrowdSize,
pub max_face_count: i64,
pub dominant_objects: Vec<String>,
pub likely_vehicle_transport: bool,
pub avg_brightness: Option<f64>,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum CrowdSize {
Empty,
Single,
Duo,
SmallGroup,
Crowd,
}
/// Indoor-indicative YOLO classes (COCO labels)
const INDOOR_CLASSES: &[&str] = &[
"chair", "couch", "bed", "dining table", "toilet", "tv", "laptop",
"microwave", "oven", "refrigerator", "sink", "book", "clock",
"vase", "potted plant",
];
/// Vehicle-indicative classes (person + vehicle = transport scene)
const VEHICLE_CLASSES: &[&str] = &[
"car", "truck", "bus", "train", "boat", "aeroplane", "bicycle", "motorbike",
];
/// Outdoor-indicative YOLO classes
const OUTDOOR_CLASSES: &[&str] = &[
"car", "truck", "bus", "train", "boat", "airplane",
"traffic light", "fire hydrant", "stop sign", "parking meter",
"bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
"bear", "zebra", "giraffe", "tree",
];
/// Build heuristic scene metadata from disk files (yolo.json + DB face data).
/// segment_boundaries: [(start_frame, end_frame, start_time, end_time), ...]
/// — from CUT detections.
pub async fn build_heuristic_scene_meta(
pool: &PgPool,
file_uuid: &str,
segment_boundaries: &[(i64, i64, f64, f64)],
) -> Result<HeuristicSceneMeta> {
if segment_boundaries.is_empty() {
return Ok(HeuristicSceneMeta {
file_uuid: file_uuid.to_string(),
segments: vec![],
});
}
use std::collections::HashMap;
use std::collections::HashSet;
// Build frame→class_counts map from yolo.json
let yolo_path = Path::new(crate::core::config::OUTPUT_DIR.as_str())
.join(format!("{}.yolo.json", file_uuid));
let mut frame_objects: HashMap<i64, Vec<String>> = HashMap::new();
if yolo_path.exists() {
if let Ok(yolo_str) = tokio::fs::read_to_string(&yolo_path).await {
#[derive(Deserialize)]
struct YoloJson {
frames: Vec<YoloFrameJson>,
}
#[derive(Deserialize)]
struct YoloFrameJson {
frame: i64,
objects: Vec<YoloObjectJson>,
}
#[derive(Deserialize)]
struct YoloObjectJson {
class_name: String,
}
if let Ok(yolo) = serde_json::from_str::<YoloJson>(&yolo_str) {
for frm in &yolo.frames {
let classes: Vec<String> =
frm.objects.iter().map(|o| o.class_name.clone()).collect();
if !classes.is_empty() {
frame_objects.insert(frm.frame, classes);
}
}
}
}
}
// Get face counts grouped by frame
let face_rows: Vec<(i64, i64)> = sqlx::query_as(
"SELECT frame_number, COUNT(*) as fc \
FROM dev.face_detections \
WHERE file_uuid = $1 AND frame_number IS NOT NULL \
GROUP BY frame_number \
ORDER BY frame_number",
)
.bind(file_uuid)
.fetch_all(pool)
.await
.unwrap_or_default();
let mut frame_face_counts: HashMap<i64, i64> = HashMap::new();
for (frame, count) in &face_rows {
frame_face_counts.insert(*frame, *count);
}
// Process each segment
let mut segments = Vec::new();
for (idx, &(start_f, end_f, start_t, end_t)) in segment_boundaries.iter().enumerate() {
let mut class_counts: HashMap<String, u64> = HashMap::new();
let mut class_frame_presence: HashMap<String, u64> = HashMap::new();
let mut indoor_objects = 0u64;
let mut outdoor_objects = 0u64;
let mut max_faces: i64 = 0;
let mut frame_count = 0u64;
for frame in start_f..=end_f {
frame_count += 1;
if let Some(objects) = frame_objects.get(&frame) {
let mut seen_this_frame: HashSet<String> = HashSet::new();
for cls in objects {
*class_counts.entry(cls.clone()).or_default() += 1;
if seen_this_frame.insert(cls.clone()) {
*class_frame_presence.entry(cls.clone()).or_default() += 1;
}
if INDOOR_CLASSES.contains(&cls.as_str()) {
indoor_objects += 1;
} else if OUTDOOR_CLASSES.contains(&cls.as_str()) {
outdoor_objects += 1;
}
}
}
if let Some(&fc) = frame_face_counts.get(&frame) {
max_faces = max_faces.max(fc);
}
}
// Normalize by frame count (prevents static-scene FP inflation)
let indoor_ratio = indoor_objects as f64 / frame_count.max(1) as f64;
let outdoor_ratio = outdoor_objects as f64 / frame_count.max(1) as f64;
let total_indicator = indoor_ratio + outdoor_ratio;
let (indoor_score, outdoor_score) = if total_indicator > 0.0 {
(indoor_ratio / total_indicator, outdoor_ratio / total_indicator)
} else {
(0.5, 0.5)
};
// Determine crowd size
let crowd_size = match max_faces {
0 => CrowdSize::Empty,
1 => CrowdSize::Single,
2 | 3 => CrowdSize::Duo,
4..=10 => CrowdSize::SmallGroup,
_ => CrowdSize::Crowd,
};
// Vehicle transport detection: check BEFORE class_frame_presence is consumed
let person_frames = class_frame_presence.get("person").copied().unwrap_or(0);
let vehicle_frames: u64 = VEHICLE_CLASSES
.iter()
.map(|c| class_frame_presence.get(*c).copied().unwrap_or(0))
.sum();
let person_ratio = person_frames as f64 / frame_count.max(1) as f64;
let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0
&& outdoor_score > 0.3;
// Dominant objects: rank by frame presence (not total count)
let mut sorted: Vec<_> = class_frame_presence.into_iter().collect();
sorted.sort_by(|a, b| b.1.cmp(&a.1));
let dominant_objects: Vec<String> = sorted
.iter()
.take(3)
.map(|(cls, _)| cls.clone())
.collect();
segments.push(SceneSegmentMeta {
segment_index: idx as u32 + 1,
start_frame: start_f,
end_frame: end_f,
start_time: start_t,
end_time: end_t,
indoor_score,
outdoor_score,
crowd_size,
max_face_count: max_faces,
dominant_objects,
likely_vehicle_transport: likely_vehicle,
avg_brightness: None, // Future: from frame luminance analysis
});
}
info!(
"[SCENE-META] {} segments generated for {}",
segments.len(),
file_uuid
);
Ok(HeuristicSceneMeta {
file_uuid: file_uuid.to_string(),
segments,
})
}
/// Full pipeline entry point: reads CUT data, generates heuristic metadata, writes JSON.
/// Called from job_worker post-processing trigger.
pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &str) -> Result<usize> {
let pool = db.pool();
// Read CUT segment boundaries from cut.json
let cut_path = Path::new(crate::core::config::OUTPUT_DIR.as_str())
.join(format!("{}.cut.json", file_uuid));
let segments: Vec<(i64, i64, f64, f64)> = if cut_path.exists() {
let cut_str = tokio::fs::read_to_string(&cut_path)
.await
.context("Failed to read cut.json")?;
#[derive(Deserialize)]
struct CutJson {
scenes: Vec<CutSceneJson>,
}
#[derive(Deserialize)]
struct CutSceneJson {
start_frame: i64,
end_frame: i64,
start_time: f64,
end_time: f64,
}
let cut: CutJson = serde_json::from_str(&cut_str)
.context("Failed to parse cut.json")?;
cut.scenes
.into_iter()
.map(|s| (s.start_frame, s.end_frame, s.start_time, s.end_time))
.collect()
} else {
// Fallback: query DB for video duration, make one segment
let (total_frames, duration): (Option<i64>, Option<f64>) = sqlx::query_as(
"SELECT total_frames, duration FROM dev.videos WHERE file_uuid = $1",
)
.bind(file_uuid)
.fetch_optional(pool)
.await
.context("Failed to query video info")?
.unwrap_or((Some(0), Some(0.0)));
let tf = total_frames.unwrap_or(0);
let dur = duration.unwrap_or(0.0);
if tf > 0 {
vec![(0, tf, 0.0, dur)]
} else {
vec![]
}
};
if segments.is_empty() {
info!("[SCENE-META] No segments for {}", file_uuid);
return Ok(0);
}
let meta = build_heuristic_scene_meta(pool, file_uuid, &segments).await?;
let n = meta.segments.len();
// Write scene_meta.json
let out_path = Path::new(crate::core::config::OUTPUT_DIR.as_str())
.join(format!("{}.scene_meta.json", file_uuid));
let json_str = serde_json::to_string_pretty(&meta)?;
tokio::fs::write(&out_path, json_str)
.await
.context("Failed to write scene_meta.json")?;
Ok(n)
}

View File

@@ -5,6 +5,7 @@ pub mod cut;
pub mod executor;
pub mod face;
pub mod face_recognition;
pub mod heuristic_scene;
pub mod ocr;
pub mod pose;
pub mod scene_classification;
@@ -23,6 +24,9 @@ pub use face_recognition::{
FaceRecognitionFrame, FaceRecognitionResult, FaceRegistrationResult, RecognizedFace,
RecognizedFaceDetection,
};
pub use heuristic_scene::{
build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta, SceneSegmentMeta,
};
pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText};
pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
pub use scene_classification::{