diff --git a/src/api/scan.rs b/src/api/scan.rs index 8fcd1b1..b3275e6 100644 --- a/src/api/scan.rs +++ b/src/api/scan.rs @@ -40,7 +40,6 @@ struct ProcessorStatus { #[derive(Debug, Serialize, Default)] struct PostgresStats { sentence_chunks: i64, - trace_chunks: i64, relationship_chunks: i64, identities: i64, file_identities: i64, @@ -514,9 +513,6 @@ async fn get_ingestion_status( )); let face_total = face_total; let trace_count = trace_count; - let trace_chunks = count_sql!(&format!( - "SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'trace'" - )); let identity_count = identity_count; let tkg_nodes = count_sql!(&format!( "SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'", @@ -609,11 +605,6 @@ async fn get_ingestion_status( trace_count > 0, Some(format!("{trace_count} traces / {face_total} detections")) ), - step!( - "trace_chunks", - trace_chunks > 0, - Some(format!("{trace_chunks} trace chunks")) - ), // TKG Nodes step!("tkg_face_track", face_track_nodes > 0, Some(format!("{face_track_nodes} nodes"))), step!("tkg_gaze_track", gaze_track_nodes > 0, Some(format!("{gaze_track_nodes} nodes"))), @@ -736,13 +727,6 @@ async fn get_file_stats( .fetch_one(pool) .await .unwrap_or(0), - trace_chunks: sqlx::query_scalar::<_, i64>(&format!( - "SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'trace'" - )) - .bind(&file_uuid) - .fetch_one(pool) - .await - .unwrap_or(0), relationship_chunks: sqlx::query_scalar::<_, i64>(&format!( "SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'relationship'" )) diff --git a/src/core/chunk/rule1_ingest.rs b/src/core/chunk/rule1_ingest.rs index a212395..fabbaff 100644 --- a/src/core/chunk/rule1_ingest.rs +++ b/src/core/chunk/rule1_ingest.rs @@ -14,7 +14,7 @@ pub async fn execute_rule1(db: &PostgresDb, file_uuid: &str, fps: f64) -> Result let pre_chunks_table = schema::table_name("pre_chunks"); let asr_segments = fetch_asr_segments(pool, file_uuid, &pre_chunks_table, fps).await?; - let ocr_map = fetch_ocr_texts(pool, file_uuid, &pre_chunks_table).await?; + let ocr_map = fetch_ocr_texts(pool, file_uuid, &pre_chunks_table, fps).await?; let video = db .get_video_by_uuid(file_uuid) @@ -161,14 +161,15 @@ async fn fetch_ocr_texts( pool: &PgPool, file_uuid: &str, table: &str, + fps: f64, ) -> Result>> { let query = format!( r#" SELECT - coordinate_index as frame, data + start_frame, start_time, end_time, data FROM {} WHERE file_uuid = $1 AND processor_type = 'ocr' - ORDER BY coordinate_index + ORDER BY start_frame "#, table ); @@ -177,9 +178,16 @@ async fn fetch_ocr_texts( let mut map: BTreeMap> = BTreeMap::new(); for row in rows { - let frame: i64 = row.try_get("frame").unwrap_or(0); + let start_time: f64 = row.try_get("start_time").unwrap_or(0.0); + let end_time_raw: Option = row.try_get("end_time").ok(); let data: Value = row.try_get("data").unwrap_or(Value::Null); + let start_frame = (start_time * fps) as i64; + let end_frame = end_time_raw + .filter(|t| *t > 0.0) + .map(|t| (t * fps) as i64) + .unwrap_or(start_frame + 1); + let texts: Vec = data .get("texts") .and_then(|t| t.as_array()) @@ -201,7 +209,16 @@ async fn fetch_ocr_texts( }) .unwrap_or_default(); - map.insert(frame, texts); + // Store texts for each frame in the range + for frame in start_frame..=end_frame { + map.entry(frame).or_default().extend(texts.clone()); + } + } + + // Deduplicate texts per frame + for (_frame, texts) in map.iter_mut() { + let mut seen = std::collections::HashSet::new(); + texts.retain(|t| seen.insert(t.clone())); } Ok(map)