chore: backup before migration to new repo

2026-04-23 16:46:02 +08:00
parent 13dd3b30f3
commit 59809dae1f
40 changed files with 5566 additions and 1783 deletions
--- a/src/main.rs
+++ b/src/main.rs
@@ -1805,6 +1805,64 @@ async fn main() -> Result<()> {
                }
            };

+            // Read Pose JSON (optional)
+            let pose_path = format!("{}.pose.json", uuid);
+            let pose_result = match std::fs::read_to_string(&pose_path) {
+                Ok(pose_json) => match serde_json::from_str::<
+                    momentry_core::core::processor::pose::PoseResult,
+                >(&pose_json)
+                {
+                    Ok(result) => {
+                        println!("Loaded Pose: {} frames", result.frames.len());
+                        result
+                    }
+                    Err(e) => {
+                        println!("Warning: Failed to parse Pose JSON: {}. Skipping Pose.", e);
+                        momentry_core::core::processor::pose::PoseResult {
+                            frame_count: 0,
+                            fps: 0.0,
+                            frames: vec![],
+                        }
+                    }
+                },
+                Err(_) => {
+                    println!("Warning: Pose file not found. Skipping Pose.");
+                    momentry_core::core::processor::pose::PoseResult {
+                        frame_count: 0,
+                        fps: 0.0,
+                        frames: vec![],
+                    }
+                }
+            };
+
+            // Read ASRX JSON (optional)
+            let asrx_path = format!("{}.asrx.json", uuid);
+            let asrx_result = match std::fs::read_to_string(&asrx_path) {
+                Ok(asrx_json) => match serde_json::from_str::<
+                    momentry_core::core::processor::asrx::AsrxResult,
+                >(&asrx_json)
+                {
+                    Ok(result) => {
+                        println!("Loaded ASRX: {} segments", result.segments.len());
+                        result
+                    }
+                    Err(e) => {
+                        println!("Warning: Failed to parse ASRX JSON: {}. Skipping ASRX.", e);
+                        momentry_core::core::processor::asrx::AsrxResult {
+                            language: None,
+                            segments: vec![],
+                        }
+                    }
+                },
+                Err(_) => {
+                    println!("Warning: ASRX file not found. Skipping ASRX.");
+                    momentry_core::core::processor::asrx::AsrxResult {
+                        language: None,
+                        segments: vec![],
+                    }
+                }
+            };
+
            // ========== Store pre_chunks (from ASR, CUT) ==========

            println!("\nStoring pre_chunks...");
@@ -1922,12 +1980,21 @@ async fn main() -> Result<()> {
                face_by_frame.insert(frame.frame, frame.clone());
            }

-            // Store frames (merge data from YOLO, OCR, Face)
+            let mut pose_by_frame: std::collections::HashMap<
+                u64,
+                momentry_core::core::processor::pose::PoseFrame,
+            > = std::collections::HashMap::new();
+            for frame in &pose_result.frames {
+                pose_by_frame.insert(frame.frame, frame.clone());
+            }
+
+            // Store frames (merge data from YOLO, OCR, Face, Pose)
            let mut all_frames: Vec<u64> = frame_data
                .keys()
                .cloned()
                .chain(ocr_by_frame.keys().cloned())
                .chain(face_by_frame.keys().cloned())
+                .chain(pose_by_frame.keys().cloned())
                .collect();
            all_frames.sort();
            all_frames.dedup();
@@ -1937,6 +2004,7 @@ async fn main() -> Result<()> {
                let yolo_frame = frame_data.get(frame_num);
                let ocr_frame = ocr_by_frame.get(frame_num);
                let face_frame = face_by_frame.get(frame_num);
+                let pose_frame = pose_by_frame.get(frame_num);

                let frame = momentry_core::core::db::postgres_db::Frame {
                    id: 0,
@@ -1947,6 +2015,7 @@ async fn main() -> Result<()> {
                    yolo_objects: yolo_frame.map(|f| serde_json::json!(&f.objects)),
                    ocr_results: ocr_frame.map(|f| serde_json::json!(&f.texts)),
                    face_results: face_frame.map(|f| serde_json::json!(&f.faces)),
+                    pose_results: pose_frame.map(|f| serde_json::json!(&f.persons)),
                    frame_path: None,
                    created_at: String::new(),
                };
@@ -1960,10 +2029,33 @@ async fn main() -> Result<()> {
            println!("\nCreating chunks...");

            // Rule 1: Direct conversion (sentence pre_chunk -> sentence chunk)
+            // Merge ASRX speaker_id by time overlap
            let mut sentence_chunks = Vec::new();
            for (i, seg) in asr_result.segments.iter().enumerate() {
                let pre_chunk_id = asr_pre_chunk_ids.get(i).copied().unwrap_or(0);
-                let chunk = Chunk::from_seconds(
+
+                // Find matching ASRX segment by time overlap
+                let speaker_id = asrx_result
+                    .segments
+                    .iter()
+                    .find(|ax| {
+                        // Overlap: ASRX segment overlaps with ASR segment
+                        ax.start <= seg.end && ax.end >= seg.start
+                    })
+                    .and_then(|ax| ax.speaker_id.clone());
+
+                let content = if let Some(ref sid) = speaker_id {
+                    serde_json::json!({
+                        "text": seg.text,
+                        "speaker_id": sid,
+                    })
+                } else {
+                    serde_json::json!({
+                        "text": seg.text,
+                    })
+                };
+
+                let mut chunk = Chunk::from_seconds(
                    file_id as i32,
                    uuid.clone(),
                    i as u32,
@@ -1972,15 +2064,40 @@ async fn main() -> Result<()> {
                    seg.start,
                    seg.end,
                    fps,
-                    serde_json::json!({
-                        "text": seg.text,
-                    }),
+                    content,
                )
                .with_text_content(seg.text.clone())
                .with_pre_chunk_ids(vec![pre_chunk_id as i32]);
+
+                // Add ASRX metadata if available
+                if speaker_id.is_some() {
+                    chunk = chunk.with_metadata(serde_json::json!({
+                        "language": asr_result.language,
+                        "language_probability": asr_result.language_probability,
+                        "speaker_matched": true,
+                    }));
+                }
+
                sentence_chunks.push(chunk);
            }

+            if !asrx_result.segments.is_empty() {
+                let matched = sentence_chunks
+                    .iter()
+                    .filter(|c| {
+                        c.content
+                            .get("speaker_id")
+                            .and_then(|v| v.as_str())
+                            .is_some()
+                    })
+                    .count();
+                println!(
+                    "  ASRX merge: {}/{} sentence chunks matched to speakers",
+                    matched,
+                    sentence_chunks.len()
+                );
+            }
+
            // Rule 1: CUT chunks
            let mut cut_chunks = Vec::new();
            for (i, scene) in cut_result.scenes.iter().enumerate() {
@@ -2235,7 +2352,7 @@ async fn main() -> Result<()> {
            // Get list of videos to process
            let videos_to_process = if uuid == "all" {
                // Get all videos
-                let videos = pg.list_videos().await?;
+                let videos = pg.list_videos(10000, 0).await?.0;
                videos.into_iter().map(|v| v.uuid).collect::<Vec<_>>()
            } else {
                // Process single video
@@ -2486,7 +2603,7 @@ async fn main() -> Result<()> {
                    .await?
                    .ok_or_else(|| anyhow::anyhow!("Video not found: {}", uuid))?]
            } else {
-                db.list_videos().await?
+                db.list_videos(10000, 0).await?.0
            };

            let output_dir = std::path::PathBuf::from("thumbnails");
@@ -2520,7 +2637,7 @@ async fn main() -> Result<()> {
                    .await?
                    .ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?]
            } else {
-                db.list_videos().await?
+                db.list_videos(10000, 0).await?.0
            };

            println!("\n╔══════════════════════════════════════════════════════════════════════════════════╗");