feat: unified probe — dispatcher detects category, runs ffprobe/Python/meta per file type

This commit is contained in:
Accusys
2026-05-15 14:38:47 +08:00
parent 4ee8a42e76
commit 29eca5a224
5 changed files with 270 additions and 65 deletions

View File

@@ -999,70 +999,52 @@ async fn register_single_file(
&final_name,
);
// Step 5: Probe — use pre.json if available, otherwise run ffprobe
let cached_probe = pre_data.as_ref()
.and_then(|p| p.get("probe_json"))
.and_then(|v| serde_json::from_value::<crate::core::probe::ProbeResult>(v.clone()).ok());
let probe_result = cached_probe.or_else(|| crate::core::probe::probe_video(&canonical_path).ok());
let file_meta = std::fs::metadata(&canonical_path).ok();
let probe_json: Option<serde_json::Value> = if let Some(ref pre) = pre_data {
pre.get("probe_json").cloned()
// Step 5: Unified probe — use pre.json, otherwise run unified_probe()
let temp_probe_json: serde_json::Value = if let Some(ref pre) = pre_data {
pre.get("probe_json").cloned().unwrap_or_default()
} else {
probe_result.as_ref().map(|r| serde_json::to_value(r)).and_then(|r| r.ok()).or_else(|| {
file_meta.map(|m| serde_json::json!({
"format": {"size": m.len().to_string(), "filename": &canonical_path, "format_name": "unknown"},
"streams": []
}))
})
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string());
let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
crate::core::probe::unified::unified_probe(&path, &scripts_dir, &python_path).await
};
let probe_json = Some(temp_probe_json.clone());
let has_video = probe_result.as_ref().map_or(false, |r| r.streams.iter().any(|s| s.codec_type.as_deref() == Some("video")));
let has_audio = probe_result.as_ref().map_or(false, |r| r.streams.iter().any(|s| s.codec_type.as_deref() == Some("audio")));
let has_video = temp_probe_json.get("streams").and_then(|s| s.as_array())
.map_or(false, |streams| streams.iter().any(|st| st.get("codec_type").and_then(|c| c.as_str()) == Some("video")));
let has_audio = temp_probe_json.get("streams").and_then(|s| s.as_array())
.map_or(false, |streams| streams.iter().any(|st| st.get("codec_type").and_then(|c| c.as_str()) == Some("audio")));
// Determine file_type: check ffprobe result, then extension
let final_file_type = if has_video {
Some("video".to_string())
} else if has_audio {
Some("audio".to_string())
} else {
let ext = std::path::Path::new(&canonical_path).extension().and_then(|e| e.to_str()).map(|e| e.to_lowercase());
match ext.as_deref() {
Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg") => Some("image".to_string()),
Some("pdf") => Some("document".to_string()),
Some("doc" | "docx") => Some("document".to_string()),
Some("pages") => Some("document".to_string()),
Some("xls" | "xlsx" | "numbers") => Some("spreadsheet".to_string()),
Some("ppt" | "pptx" | "key") => Some("presentation".to_string()),
_ => probe_result.as_ref().and_then(|r| {
if r.streams.is_empty() && r.format.duration.is_some() { Some("unknown".to_string()) } else { None }
}),
}
Some(temp_probe_json.get("format").and_then(|f| f.get("file_type")).and_then(|v| v.as_str()).unwrap_or("unknown").to_string())
};
let duration = probe_result.as_ref()
.and_then(|r| r.format.duration.as_ref())
.and_then(|s| s.parse::<f64>().ok())
.unwrap_or(0.0);
let duration = temp_probe_json.get("format").and_then(|f| {
let src = if has_video { f.get("duration") } else { None };
src.and_then(|v| v.as_str()).and_then(|s| s.parse::<f64>().ok())
}).unwrap_or(0.0);
let mut width = 0u32;
let mut height = 0u32;
let mut fps = 0.0;
let mut total_frames = 0u64;
if let Some(ref probe) = probe_result {
if let Some(s) = probe.streams.iter().find(|s| s.codec_type.as_deref() == Some("video")) {
width = s.width.unwrap_or(0);
height = s.height.unwrap_or(0);
if let Some(fps_str) = &s.r_frame_rate {
if let Some(streams) = temp_probe_json.get("streams").and_then(|s| s.as_array()) {
if let Some(s) = streams.iter().find(|st| st.get("codec_type").and_then(|c| c.as_str()) == Some("video")) {
width = s.get("width").and_then(|v| v.as_i64()).unwrap_or(0) as u32;
height = s.get("height").and_then(|v| v.as_i64()).unwrap_or(0) as u32;
if let Some(fps_str) = s.get("r_frame_rate").and_then(|v| v.as_str()) {
if let Some((num, den)) = fps_str.split_once('/') {
if let (Ok(n), Ok(d)) = (num.parse::<f64>(), den.parse::<f64>()) {
if d > 0.0 {
fps = n / d;
}
if d > 0.0 { fps = n / d; }
}
}
}
total_frames = s.nb_frames.as_ref().and_then(|s| s.parse().ok()).unwrap_or((duration * fps) as u64);
total_frames = s.get("nb_frames").and_then(|v| v.as_str())
.and_then(|s| s.parse().ok()).unwrap_or((duration * fps) as u64);
}
}
@@ -1158,16 +1140,16 @@ async fn register_single_file(
}
// 更新 DB: cut_done, scene_done, audio_tracks
let audio_tracks: Vec<serde_json::Value> = probe_result.as_ref().map_or(vec![], |pr| {
pr.streams.iter()
.filter(|s| s.codec_type.as_deref() == Some("audio"))
.map(|s| {
let audio_tracks: Vec<serde_json::Value> = temp_probe_json.get("streams").and_then(|s| s.as_array()).map_or(vec![], |streams| {
streams.iter()
.filter(|st| st.get("codec_type").and_then(|c| c.as_str()) == Some("audio"))
.map(|st| {
serde_json::json!({
"index": s.index,
"codec": s.codec_name,
"channels": s.channels,
"sample_rate": s.sample_rate,
"language": s.tags.as_ref().and_then(|t| t.get("language")).unwrap_or(&serde_json::Value::Null),
"index": st.get("index").and_then(|v| v.as_i64()),
"codec": st.get("codec_name").and_then(|v| v.as_str()),
"channels": st.get("channels").and_then(|v| v.as_i64()),
"sample_rate": st.get("sample_rate").and_then(|v| v.as_str()),
"language": st.get("tags").and_then(|t| t.get("language")),
})
})
.collect()