chore: backup before migration to new repo

This commit is contained in:
Warren
2026-04-23 16:46:02 +08:00
parent 13dd3b30f3
commit 59809dae1f
40 changed files with 5566 additions and 1783 deletions

View File

@@ -1805,6 +1805,64 @@ async fn main() -> Result<()> {
}
};
// Read Pose JSON (optional)
let pose_path = format!("{}.pose.json", uuid);
let pose_result = match std::fs::read_to_string(&pose_path) {
Ok(pose_json) => match serde_json::from_str::<
momentry_core::core::processor::pose::PoseResult,
>(&pose_json)
{
Ok(result) => {
println!("Loaded Pose: {} frames", result.frames.len());
result
}
Err(e) => {
println!("Warning: Failed to parse Pose JSON: {}. Skipping Pose.", e);
momentry_core::core::processor::pose::PoseResult {
frame_count: 0,
fps: 0.0,
frames: vec![],
}
}
},
Err(_) => {
println!("Warning: Pose file not found. Skipping Pose.");
momentry_core::core::processor::pose::PoseResult {
frame_count: 0,
fps: 0.0,
frames: vec![],
}
}
};
// Read ASRX JSON (optional)
let asrx_path = format!("{}.asrx.json", uuid);
let asrx_result = match std::fs::read_to_string(&asrx_path) {
Ok(asrx_json) => match serde_json::from_str::<
momentry_core::core::processor::asrx::AsrxResult,
>(&asrx_json)
{
Ok(result) => {
println!("Loaded ASRX: {} segments", result.segments.len());
result
}
Err(e) => {
println!("Warning: Failed to parse ASRX JSON: {}. Skipping ASRX.", e);
momentry_core::core::processor::asrx::AsrxResult {
language: None,
segments: vec![],
}
}
},
Err(_) => {
println!("Warning: ASRX file not found. Skipping ASRX.");
momentry_core::core::processor::asrx::AsrxResult {
language: None,
segments: vec![],
}
}
};
// ========== Store pre_chunks (from ASR, CUT) ==========
println!("\nStoring pre_chunks...");
@@ -1922,12 +1980,21 @@ async fn main() -> Result<()> {
face_by_frame.insert(frame.frame, frame.clone());
}
// Store frames (merge data from YOLO, OCR, Face)
let mut pose_by_frame: std::collections::HashMap<
u64,
momentry_core::core::processor::pose::PoseFrame,
> = std::collections::HashMap::new();
for frame in &pose_result.frames {
pose_by_frame.insert(frame.frame, frame.clone());
}
// Store frames (merge data from YOLO, OCR, Face, Pose)
let mut all_frames: Vec<u64> = frame_data
.keys()
.cloned()
.chain(ocr_by_frame.keys().cloned())
.chain(face_by_frame.keys().cloned())
.chain(pose_by_frame.keys().cloned())
.collect();
all_frames.sort();
all_frames.dedup();
@@ -1937,6 +2004,7 @@ async fn main() -> Result<()> {
let yolo_frame = frame_data.get(frame_num);
let ocr_frame = ocr_by_frame.get(frame_num);
let face_frame = face_by_frame.get(frame_num);
let pose_frame = pose_by_frame.get(frame_num);
let frame = momentry_core::core::db::postgres_db::Frame {
id: 0,
@@ -1947,6 +2015,7 @@ async fn main() -> Result<()> {
yolo_objects: yolo_frame.map(|f| serde_json::json!(&f.objects)),
ocr_results: ocr_frame.map(|f| serde_json::json!(&f.texts)),
face_results: face_frame.map(|f| serde_json::json!(&f.faces)),
pose_results: pose_frame.map(|f| serde_json::json!(&f.persons)),
frame_path: None,
created_at: String::new(),
};
@@ -1960,10 +2029,33 @@ async fn main() -> Result<()> {
println!("\nCreating chunks...");
// Rule 1: Direct conversion (sentence pre_chunk -> sentence chunk)
// Merge ASRX speaker_id by time overlap
let mut sentence_chunks = Vec::new();
for (i, seg) in asr_result.segments.iter().enumerate() {
let pre_chunk_id = asr_pre_chunk_ids.get(i).copied().unwrap_or(0);
let chunk = Chunk::from_seconds(
// Find matching ASRX segment by time overlap
let speaker_id = asrx_result
.segments
.iter()
.find(|ax| {
// Overlap: ASRX segment overlaps with ASR segment
ax.start <= seg.end && ax.end >= seg.start
})
.and_then(|ax| ax.speaker_id.clone());
let content = if let Some(ref sid) = speaker_id {
serde_json::json!({
"text": seg.text,
"speaker_id": sid,
})
} else {
serde_json::json!({
"text": seg.text,
})
};
let mut chunk = Chunk::from_seconds(
file_id as i32,
uuid.clone(),
i as u32,
@@ -1972,15 +2064,40 @@ async fn main() -> Result<()> {
seg.start,
seg.end,
fps,
serde_json::json!({
"text": seg.text,
}),
content,
)
.with_text_content(seg.text.clone())
.with_pre_chunk_ids(vec![pre_chunk_id as i32]);
// Add ASRX metadata if available
if speaker_id.is_some() {
chunk = chunk.with_metadata(serde_json::json!({
"language": asr_result.language,
"language_probability": asr_result.language_probability,
"speaker_matched": true,
}));
}
sentence_chunks.push(chunk);
}
if !asrx_result.segments.is_empty() {
let matched = sentence_chunks
.iter()
.filter(|c| {
c.content
.get("speaker_id")
.and_then(|v| v.as_str())
.is_some()
})
.count();
println!(
" ASRX merge: {}/{} sentence chunks matched to speakers",
matched,
sentence_chunks.len()
);
}
// Rule 1: CUT chunks
let mut cut_chunks = Vec::new();
for (i, scene) in cut_result.scenes.iter().enumerate() {
@@ -2235,7 +2352,7 @@ async fn main() -> Result<()> {
// Get list of videos to process
let videos_to_process = if uuid == "all" {
// Get all videos
let videos = pg.list_videos().await?;
let videos = pg.list_videos(10000, 0).await?.0;
videos.into_iter().map(|v| v.uuid).collect::<Vec<_>>()
} else {
// Process single video
@@ -2486,7 +2603,7 @@ async fn main() -> Result<()> {
.await?
.ok_or_else(|| anyhow::anyhow!("Video not found: {}", uuid))?]
} else {
db.list_videos().await?
db.list_videos(10000, 0).await?.0
};
let output_dir = std::path::PathBuf::from("thumbnails");
@@ -2520,7 +2637,7 @@ async fn main() -> Result<()> {
.await?
.ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?]
} else {
db.list_videos().await?
db.list_videos(10000, 0).await?.0
};
println!("\n╔══════════════════════════════════════════════════════════════════════════════════╗");