feat: backup architecture docs, source code, and scripts

This commit is contained in:
Warren
2026-04-25 17:15:45 +08:00
parent 59809dae1f
commit 1f84e5469f
368 changed files with 146329 additions and 261 deletions

38
src/bin/debug_tsquery.rs Normal file
View File

@@ -0,0 +1,38 @@
use momentry_core::core::text::global_synonym_expander;
fn main() {
let expander = global_synonym_expander();
let query = "電腦";
println!("原始查詢: '{}'", query);
let expanded = expander.expand_chinese_query(query);
println!("擴展結果: '{}'", expanded);
// 測試 split
let groups: Vec<&str> = if expanded.contains('&') {
expanded.split('&').map(|s| s.trim()).collect()
} else {
expanded.split_whitespace().collect()
};
println!("分組: {:?}", groups);
for group in groups {
println!(" 分組: '{}'", group);
let terms = if group.starts_with('(') && group.ends_with(')') {
let inner = &group[1..group.len() - 1];
inner.split('|').map(|s| s.trim()).collect::<Vec<&str>>()
} else {
vec![group]
};
println!(" 詞語: {:?}", terms);
for term in &terms {
let cleaned: String = term
.chars()
.filter(|c| c.is_alphanumeric() || c.is_alphabetic())
.collect();
println!(" 詞語 '{}' -> 清理後 '{}'", term, cleaned);
}
}
}

View File

@@ -0,0 +1,659 @@
use anyhow::{Context, Result};
use clap::Parser;
use crossterm::event::{self, Event, KeyCode};
use crossterm::terminal as crossterm_terminal;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::io::{self, IsTerminal, Write};
use std::path::PathBuf;
use std::process::{Command, Stdio};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::thread;
use std::time::Duration;
#[derive(Parser, Debug)]
#[command(name = "integrated_player")]
#[command(about = "Integrated player for ASR, Face, ASRX, and Pose")]
struct Args {
#[arg(short, long)]
video: PathBuf,
#[arg(short = 'r', long)]
asr: Option<PathBuf>,
#[arg(short = 'f', long)]
face: Option<PathBuf>,
#[arg(short = 'x', long)]
asrx: Option<PathBuf>,
#[arg(short = 'p', long)]
pose: Option<PathBuf>,
#[arg(short = 's', long, default_value = "0.0")]
start: f64,
#[arg(long)]
speaker_name: Option<String>,
#[arg(long)]
auto_play_speaker: bool,
#[arg(long)]
demo: bool,
#[arg(long, default_value = "3")]
demo_segments_per_speaker: usize,
#[arg(long, default_value = "2.0")]
demo_speed: f64,
#[arg(long)]
show_video: bool,
#[arg(long, default_value = "800")]
video_width: u32,
#[arg(long, default_value = "600")]
video_height: u32,
#[arg(long)]
continuous_demo: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AsrSegment {
start: f64,
end: f64,
text: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AsrData {
language: Option<String>,
segments: Vec<AsrSegment>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct FaceInfo {
face_id: Option<String>,
x: i32,
y: i32,
width: i32,
height: i32,
confidence: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct FaceFrame {
frame: u64,
timestamp: f64,
faces: Vec<FaceInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct FaceData {
fps: f64,
frame_count: u64,
frames: Vec<FaceFrame>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AsrxSegment {
index: usize,
start: f64,
end: f64,
duration: f64,
speaker: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AsrxData {
segments: Vec<AsrxSegment>,
speaker_stats: HashMap<String, SpeakerStats>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct SpeakerStats {
count: usize,
duration: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct Keypoint {
name: String,
x: f32,
y: f32,
confidence: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct PersonPose {
keypoints: Vec<Keypoint>,
bbox: Bbox,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct Bbox {
x: i32,
y: i32,
width: i32,
height: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct PoseFrame {
frame: u64,
timestamp: f64,
persons: Vec<PersonPose>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct PoseData {
frames: Vec<PoseFrame>,
}
#[derive(Debug, Clone)]
struct IntegratedSegment {
start: f64,
end: f64,
text: Option<String>,
speaker: Option<String>,
face: Option<FaceInfo>,
mouth_landmarks: Option<Vec<Keypoint>>,
}
struct IntegratedPlayer {
asr_data: Option<AsrData>,
face_data: Option<FaceData>,
asrx_data: Option<AsrxData>,
pose_data: Option<PoseData>,
current_time: f64,
speaker_names: HashMap<String, (String, String)>,
}
impl IntegratedPlayer {
fn new() -> Self {
let mut speaker_names = HashMap::new();
speaker_names.insert(
"SPEAKER_0".to_string(),
("Cary Grant".to_string(), "Peter Joshua".to_string()),
);
speaker_names.insert(
"SPEAKER_1".to_string(),
("Audrey Hepburn".to_string(), "Regina Lampert".to_string()),
);
speaker_names.insert(
"SPEAKER_2".to_string(),
(
"Walter Matthau".to_string(),
"Hamilton Bartholomew".to_string(),
),
);
speaker_names.insert(
"SPEAKER_4".to_string(),
("James Coburn".to_string(), "Tex Panthollow".to_string()),
);
Self {
asr_data: None,
face_data: None,
asrx_data: None,
pose_data: None,
current_time: 0.0,
speaker_names,
}
}
fn load_asr(&mut self, path: &PathBuf) -> Result<()> {
let content = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read ASR file: {:?}", path))?;
self.asr_data = Some(serde_json::from_str(&content)?);
println!(
"✓ Loaded {} ASR segments",
self.asr_data.as_ref().unwrap().segments.len()
);
Ok(())
}
fn load_face(&mut self, path: &PathBuf) -> Result<()> {
let content = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read Face file: {:?}", path))?;
self.face_data = Some(serde_json::from_str(&content)?);
let total_faces = self
.face_data
.as_ref()
.unwrap()
.frames
.iter()
.map(|f| f.faces.len())
.sum::<usize>();
println!(
"✓ Loaded {} face frames, {} total detections",
self.face_data.as_ref().unwrap().frames.len(),
total_faces
);
Ok(())
}
fn load_asrx(&mut self, path: &PathBuf) -> Result<()> {
let content = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read ASRX file: {:?}", path))?;
self.asrx_data = Some(serde_json::from_str(&content)?);
println!(
"✓ Loaded {} ASRX segments, {} speakers",
self.asrx_data.as_ref().unwrap().segments.len(),
self.asrx_data.as_ref().unwrap().speaker_stats.len()
);
Ok(())
}
fn load_pose(&mut self, path: &PathBuf) -> Result<()> {
let content = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read Pose file: {:?}", path))?;
self.pose_data = Some(serde_json::from_str(&content)?);
println!(
"✓ Loaded {} pose frames",
self.pose_data.as_ref().unwrap().frames.len()
);
Ok(())
}
fn get_current_segment(&self, time: f64) -> Option<IntegratedSegment> {
let mut segment = IntegratedSegment {
start: 0.0,
end: 0.0,
text: None,
speaker: None,
face: None,
mouth_landmarks: None,
};
if let Some(asr) = &self.asr_data {
for seg in &asr.segments {
if time >= seg.start && time <= seg.end {
segment.start = seg.start;
segment.end = seg.end;
segment.text = Some(seg.text.clone());
break;
}
}
}
if let Some(asrx) = &self.asrx_data {
for seg in &asrx.segments {
if time >= seg.start && time <= seg.end {
segment.start = seg.start;
segment.end = seg.end;
segment.speaker = Some(seg.speaker.clone());
break;
}
}
}
if let Some(face) = &self.face_data {
for frame in &face.frames {
if (frame.timestamp - time).abs() < 1.0 {
if let Some(face_info) = frame.faces.first() {
segment.face = Some(face_info.clone());
break;
}
}
}
}
if let Some(pose) = &self.pose_data {
for frame in &pose.frames {
if (frame.timestamp - time).abs() < 0.5 {
if let Some(person) = frame.persons.first() {
let mouth_points: Vec<Keypoint> = person
.keypoints
.iter()
.filter(|kp| {
kp.name.contains("mouth")
|| kp.name.contains("lip")
|| kp.name == "nose"
})
.cloned()
.collect();
if !mouth_points.is_empty() {
segment.mouth_landmarks = Some(mouth_points);
break;
}
}
}
}
}
if segment.text.is_some()
|| segment.speaker.is_some()
|| segment.face.is_some()
|| segment.mouth_landmarks.is_some()
{
Some(segment)
} else {
None
}
}
fn get_speaker_info(&self, speaker_id: &str) -> (String, String) {
self.speaker_names
.get(speaker_id)
.cloned()
.unwrap_or_else(|| ("Unknown".to_string(), "Unknown".to_string()))
}
fn list_speakers(&self) {
if let Some(asrx) = &self.asrx_data {
println!("\n📊 Speaker Statistics:");
println!("{:-<80}", "");
println!(
"{:15} {:20} {:20} {:>10} {:>10}",
"Speaker ID", "Actor", "Character", "Segments", "Duration"
);
println!("{:-<80}", "");
for (speaker_id, stats) in &asrx.speaker_stats {
let (actor, character) = self.get_speaker_info(speaker_id);
println!(
"{:15} {:20} {:20} {:>10} {:>9.1}s",
speaker_id, actor, character, stats.count, stats.duration
);
}
println!("{:-<80}", "");
}
}
}
fn run_continuous_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
println!("\n🎬 Continuous Demo Mode");
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
let is_interactive = io::stdin().is_terminal();
if is_interactive {
println!("Controls:");
println!(" SPACE - Pause/Resume");
println!(" Q - Quit");
} else {
println!("Running in non-interactive mode (no keyboard control)");
println!("Use Ctrl+C to stop");
}
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!();
let paused = Arc::new(AtomicBool::new(false));
let quit = Arc::new(AtomicBool::new(false));
let paused_clone = paused.clone();
let quit_clone = quit.clone();
let raw_mode_enabled = if is_interactive {
crossterm_terminal::enable_raw_mode().ok().is_some()
} else {
false
};
if is_interactive && raw_mode_enabled {
thread::spawn(move || loop {
if let Ok(Event::Key(key_event)) = event::read() {
if key_event.code == KeyCode::Char(' ') {
paused_clone.fetch_xor(true, Ordering::SeqCst);
} else if key_event.code == KeyCode::Char('q')
|| key_event.code == KeyCode::Char('Q')
|| key_event.code == KeyCode::Esc
{
quit_clone.store(true, Ordering::SeqCst);
break;
}
}
if quit_clone.load(Ordering::SeqCst) {
break;
}
thread::sleep(Duration::from_millis(50));
});
}
if let Some(asr) = &player.asr_data {
let total_segments = asr.segments.len();
for (i, seg) in asr.segments.iter().enumerate() {
if quit.load(Ordering::SeqCst) {
println!("\n⏹️ Stopped by user");
break;
}
while paused.load(Ordering::SeqCst) {
println!("\r⏸️ Paused - Press SPACE to resume");
io::stdout().flush()?;
thread::sleep(Duration::from_millis(100));
if quit.load(Ordering::SeqCst) {
println!("\n⏹️ Stopped by user");
if raw_mode_enabled {
crossterm_terminal::disable_raw_mode().ok();
}
return Ok(());
}
}
println!("\n[{}/{}] Segment", i + 1, total_segments);
println!("{:=<80}", "");
println!("📝 ASR Text: {}", seg.text);
println!("⏱ Time: {:.2}s - {:.2}s", seg.start, seg.end);
if let Some(asrx) = &player.asrx_data {
for asrx_seg in &asrx.segments {
if seg.start >= asrx_seg.start && seg.start <= asrx_seg.end {
let (actor, character) = player.get_speaker_info(&asrx_seg.speaker);
println!(
"🎤 Speaker: {}{} ({})",
asrx_seg.speaker, actor, character
);
break;
}
}
}
if let Some(segment) = player.get_current_segment(seg.start + 0.01) {
if let Some(face) = &segment.face {
println!(
"👤 Face: bbox=({},{}) {}x{}, conf={:.2}",
face.x, face.y, face.width, face.height, face.confidence
);
}
if let Some(landmarks) = &segment.mouth_landmarks {
println!("👄 Mouth landmarks: {} points", landmarks.len());
}
}
let duration = seg.end - seg.start;
println!(
"▶️ Playing: {:.2}s - {:.2}s ({:.2}s)",
seg.start, seg.end, duration
);
let mut cmd = Command::new("ffplay");
if args.show_video {
cmd.args([
"-ss",
&format!("{:.2}", seg.start),
"-t",
&format!("{:.2}", duration),
"-autoexit",
"-x",
&format!("{}", args.video_width),
"-y",
&format!("{}", args.video_height),
args.video.to_str().unwrap(),
]);
} else {
cmd.args([
"-ss",
&format!("{:.2}", seg.start),
"-t",
&format!("{:.2}", duration),
"-autoexit",
"-nodisp",
args.video.to_str().unwrap(),
]);
}
let _child = cmd
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn()
.context("Failed to start ffplay")?;
thread::sleep(Duration::from_millis((duration * 1000.0) as u64 + 100));
}
println!("\n{:=<80}", "");
println!("✅ Demo completed! Played {} segments", total_segments);
println!("{:=<80}", "");
} else if let Some(asrx) = &player.asrx_data {
let total_segments = asrx.segments.len();
println!(
"Playing {} ASRX segments (no ASR text available)",
total_segments
);
for (i, seg) in asrx.segments.iter().enumerate() {
if quit.load(Ordering::SeqCst) {
println!("\n⏹️ Stopped by user");
break;
}
while paused.load(Ordering::SeqCst) {
println!("\r⏸️ Paused - Press SPACE to resume");
io::stdout().flush()?;
thread::sleep(Duration::from_millis(100));
if quit.load(Ordering::SeqCst) {
println!("\n⏹️ Stopped by user");
if raw_mode_enabled {
crossterm_terminal::disable_raw_mode().ok();
}
return Ok(());
}
}
let (actor, character) = player.get_speaker_info(&seg.speaker);
println!("\n[{}/{}] Segment", i + 1, total_segments);
println!("{:=<80}", "");
println!(
"⏱ Time: {:.2}s - {:.2}s ({:.2}s)",
seg.start, seg.end, seg.duration
);
println!("🎤 Speaker: {}{} ({})", seg.speaker, actor, character);
if let Some(segment) = player.get_current_segment(seg.start + 0.01) {
if let Some(face) = &segment.face {
println!(
"👤 Face: bbox=({},{}) {}x{}, conf={:.2}",
face.x, face.y, face.width, face.height, face.confidence
);
}
if let Some(landmarks) = &segment.mouth_landmarks {
println!("👄 Mouth landmarks: {} points", landmarks.len());
}
}
println!("▶️ Playing audio segment");
let mut cmd = Command::new("ffplay");
if args.show_video {
cmd.args([
"-ss",
&format!("{:.2}", seg.start),
"-t",
&format!("{:.2}", seg.duration),
"-autoexit",
"-x",
&format!("{}", args.video_width),
"-y",
&format!("{}", args.video_height),
args.video.to_str().unwrap(),
]);
} else {
cmd.args([
"-ss",
&format!("{:.2}", seg.start),
"-t",
&format!("{:.2}", seg.duration),
"-autoexit",
"-nodisp",
args.video.to_str().unwrap(),
]);
}
let _child = cmd
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn()
.context("Failed to start ffplay")?;
thread::sleep(Duration::from_millis((seg.duration * 1000.0) as u64 + 100));
}
println!("\n{:=<80}", "");
println!("✅ Demo completed! Played {} segments", total_segments);
println!("{:=<80}", "");
} else {
println!("⚠️ No ASR or ASRX data loaded");
}
if raw_mode_enabled {
crossterm_terminal::disable_raw_mode().ok();
}
Ok(())
}
fn main() -> Result<()> {
let args = Args::parse();
if !args.video.exists() {
anyhow::bail!("Video file not found: {:?}", args.video);
}
println!("🎬 Integrated Player for ASR/Face/ASRX/Pose");
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!("Video: {:?}", args.video);
let mut player = IntegratedPlayer::new();
if let Some(asr_path) = &args.asr {
if asr_path.exists() {
player.load_asr(asr_path)?;
}
}
if let Some(face_path) = &args.face {
if face_path.exists() {
player.load_face(face_path)?;
}
}
if let Some(asrx_path) = &args.asrx {
if asrx_path.exists() {
player.load_asrx(asrx_path)?;
}
}
if let Some(pose_path) = &args.pose {
if pose_path.exists() {
player.load_pose(pose_path)?;
}
}
player.list_speakers();
if args.continuous_demo {
run_continuous_demo(&player, &args)?;
} else {
println!("\n⚠️ Please use --continuous-demo flag");
}
Ok(())
}

View File

@@ -0,0 +1,711 @@
use anyhow::{Context, Result};
use clap::Parser;
use crossterm::event::{self, Event, KeyCode, KeyModifiers};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::io::{self, Write};
use std::path::PathBuf;
use std::process::{Child, Command, Stdio};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::thread;
use std::time::Duration;
#[derive(Parser, Debug)]
#[command(name = "integrated_player")]
#[command(about = "Integrated player for ASR, Face, ASRX, and Pose")]
struct Args {
#[arg(short, long)]
video: PathBuf,
#[arg(short = 'r', long)]
asr: Option<PathBuf>,
#[arg(short = 'f', long)]
face: Option<PathBuf>,
#[arg(short = 'x', long)]
asrx: Option<PathBuf>,
#[arg(short = 'p', long)]
pose: Option<PathBuf>,
#[arg(short = 's', long, default_value = "0.0")]
start: f64,
#[arg(long)]
speaker_name: Option<String>,
#[arg(long)]
auto_play_speaker: bool,
#[arg(long)]
demo: bool,
#[arg(long, default_value = "3")]
demo_segments_per_speaker: usize,
#[arg(long, default_value = "2.0")]
demo_speed: f64,
#[arg(long)]
show_video: bool,
#[arg(long, default_value = "800")]
video_width: u32,
#[arg(long, default_value = "600")]
video_height: u32,
#[arg(long)]
continuous_demo: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AsrSegment {
start: f64,
end: f64,
text: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AsrData {
language: Option<String>,
segments: Vec<AsrSegment>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct FaceDetection {
frame: u64,
timestamp: f64,
x: i32,
y: i32,
width: i32,
height: i32,
confidence: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct FaceResult {
results: FaceResults,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct FaceResults {
detections: Vec<FaceDetection>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AsrxSegment {
index: usize,
start: f64,
end: f64,
duration: f64,
speaker: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AsrxData {
segments: Vec<AsrxSegment>,
speaker_stats: HashMap<String, SpeakerStats>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct SpeakerStats {
count: usize,
duration: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct Keypoint {
name: String,
x: f32,
y: f32,
confidence: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct PersonPose {
keypoints: Vec<Keypoint>,
bbox: Bbox,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct Bbox {
x: i32,
y: i32,
width: i32,
height: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct PoseFrame {
frame: u64,
timestamp: f64,
persons: Vec<PersonPose>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct PoseData {
frames: Vec<PoseFrame>,
}
#[derive(Debug, Clone)]
struct IntegratedSegment {
start: f64,
end: f64,
text: Option<String>,
speaker: Option<String>,
face: Option<FaceDetection>,
mouth_landmarks: Option<Vec<Keypoint>>,
}
struct IntegratedPlayer {
asr_data: Option<AsrData>,
face_data: Option<FaceResult>,
asrx_data: Option<AsrxData>,
pose_data: Option<PoseData>,
current_time: f64,
is_playing: bool,
speaker_names: HashMap<String, (String, String)>,
}
impl IntegratedPlayer {
fn new() -> Self {
let mut speaker_names = HashMap::new();
speaker_names.insert(
"SPEAKER_0".to_string(),
("Cary Grant".to_string(), "Peter Joshua".to_string()),
);
speaker_names.insert(
"SPEAKER_1".to_string(),
("Audrey Hepburn".to_string(), "Regina Lampert".to_string()),
);
speaker_names.insert(
"SPEAKER_2".to_string(),
(
"Walter Matthau".to_string(),
"Hamilton Bartholomew".to_string(),
),
);
speaker_names.insert(
"SPEAKER_4".to_string(),
("James Coburn".to_string(), "Tex Panthollow".to_string()),
);
Self {
asr_data: None,
face_data: None,
asrx_data: None,
pose_data: None,
current_time: 0.0,
is_playing: false,
speaker_names,
}
}
fn load_asr(&mut self, path: &PathBuf) -> Result<()> {
let content = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read ASR file: {:?}", path))?;
self.asr_data = Some(serde_json::from_str(&content)?);
println!(
"✓ Loaded {} ASR segments",
self.asr_data.as_ref().unwrap().segments.len()
);
Ok(())
}
fn load_face(&mut self, path: &PathBuf) -> Result<()> {
let content = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read Face file: {:?}", path))?;
self.face_data = Some(serde_json::from_str(&content)?);
println!(
"✓ Loaded {} face detections",
self.face_data.as_ref().unwrap().results.detections.len()
);
Ok(())
}
fn load_asrx(&mut self, path: &PathBuf) -> Result<()> {
let content = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read ASRX file: {:?}", path))?;
self.asrx_data = Some(serde_json::from_str(&content)?);
println!(
"✓ Loaded {} ASRX segments, {} speakers",
self.asrx_data.as_ref().unwrap().segments.len(),
self.asrx_data.as_ref().unwrap().speaker_stats.len()
);
Ok(())
}
fn load_pose(&mut self, path: &PathBuf) -> Result<()> {
let content = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read Pose file: {:?}", path))?;
self.pose_data = Some(serde_json::from_str(&content)?);
println!(
"✓ Loaded {} pose frames",
self.pose_data.as_ref().unwrap().frames.len()
);
Ok(())
}
fn get_current_segment(&self, time: f64) -> Option<IntegratedSegment> {
let mut segment = IntegratedSegment {
start: 0.0,
end: 0.0,
text: None,
speaker: None,
face: None,
mouth_landmarks: None,
};
if let Some(asr) = &self.asr_data {
for seg in &asr.segments {
if time >= seg.start && time <= seg.end {
segment.start = seg.start;
segment.end = seg.end;
segment.text = Some(seg.text.clone());
break;
}
}
}
if let Some(asrx) = &self.asrx_data {
for seg in &asrx.segments {
if time >= seg.start && time <= seg.end {
segment.start = seg.start;
segment.end = seg.end;
segment.speaker = Some(seg.speaker.clone());
break;
}
}
}
if let Some(face) = &self.face_data {
for det in &face.results.detections {
if (det.timestamp - time).abs() < 1.0 {
segment.face = Some(det.clone());
break;
}
}
}
if let Some(pose) = &self.pose_data {
for frame in &pose.frames {
if (frame.timestamp - time).abs() < 0.5 {
if let Some(person) = frame.persons.first() {
let mouth_points: Vec<Keypoint> = person
.keypoints
.iter()
.filter(|kp| {
kp.name.contains("mouth")
|| kp.name.contains("lip")
|| kp.name == "nose"
})
.cloned()
.collect();
if !mouth_points.is_empty() {
segment.mouth_landmarks = Some(mouth_points);
break;
}
}
}
}
}
if segment.text.is_some()
|| segment.speaker.is_some()
|| segment.face.is_some()
|| segment.mouth_landmarks.is_some()
{
Some(segment)
} else {
None
}
}
fn get_speaker_info(&self, speaker_id: &str) -> (String, String) {
self.speaker_names
.get(speaker_id)
.cloned()
.unwrap_or_else(|| ("Unknown".to_string(), "Unknown".to_string()))
}
fn print_segment(&self, segment: &IntegratedSegment) {
println!("\n{:=<80}", "");
println!("⏱ Time: {:.2}s - {:.2}s", segment.start, segment.end);
if let Some(text) = &segment.text {
println!("📝 Text: {}", text);
}
if let Some(speaker) = &segment.speaker {
let (actor, character) = self.get_speaker_info(speaker);
println!("🎤 Speaker: {}{} ({})", speaker, actor, character);
}
if let Some(face) = &segment.face {
println!(
"👤 Face: bbox=({},{}) {}x{}, confidence={:.2}",
face.x, face.y, face.width, face.height, face.confidence
);
}
if let Some(landmarks) = &segment.mouth_landmarks {
println!("👄 Mouth landmarks: {} points", landmarks.len());
for kp in landmarks.iter().take(3) {
println!(
"{}: ({:.1}, {:.1}) conf={:.2}",
kp.name, kp.x, kp.y, kp.confidence
);
}
}
println!("{:=<80}", "");
}
fn list_speakers(&self) {
if let Some(asrx) = &self.asrx_data {
println!("\n📊 Speaker Statistics:");
println!("{:-<80}", "");
println!(
"{:15} {:20} {:20} {:>10} {:>10}",
"Speaker ID", "Actor", "Character", "Segments", "Duration"
);
println!("{:-<80}", "");
for (speaker_id, stats) in &asrx.speaker_stats {
let (actor, character) = self.get_speaker_info(speaker_id);
println!(
"{:15} {:20} {:20} {:>10} {:>9.1}s",
speaker_id, actor, character, stats.count, stats.duration
);
}
println!("{:-<80}", "");
}
}
}
fn play_segment(video_path: &PathBuf, start: f64, duration: f64, show_video: bool) -> Result<()> {
println!("▶️ Playing {:.2}s - {:.2}s", start, start + duration);
let mut cmd = Command::new("ffplay");
if show_video {
cmd.args([
"-ss",
&format!("{:.2}", start),
"-t",
&format!("{:.2}", duration),
"-autoexit",
video_path.to_str().unwrap(),
]);
} else {
cmd.args([
"-ss",
&format!("{:.2}", start),
"-t",
&format!("{:.2}", duration),
"-autoexit",
"-nodisp",
video_path.to_str().unwrap(),
]);
}
let _child = cmd
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn()
.context("Failed to start ffplay")?;
Ok(())
}
fn play_speaker_segments(
player: &IntegratedPlayer,
video_path: &PathBuf,
speaker_id: &str,
limit: Option<usize>,
) -> Result<()> {
if let Some(asrx) = &player.asrx_data {
let segments: Vec<&AsrxSegment> = asrx
.segments
.iter()
.filter(|s| s.speaker == speaker_id)
.collect();
let total = segments.len();
let count = limit.unwrap_or(total).min(total);
println!("\n🎬 Playing {} segments for {}", count, speaker_id);
for (i, seg) in segments.iter().take(count).enumerate() {
println!("\n[{}/{}] Segment {}", i + 1, count, seg.index);
if let Some(segment) = player.get_current_segment(seg.start + 0.1) {
player.print_segment(&segment);
}
play_segment(video_path, seg.start, seg.duration, false)?;
thread::sleep(Duration::from_millis(500));
}
println!("\n✅ Finished playing {} segments", count);
}
Ok(())
}
fn run_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
}
}
});
if let Some(asr) = &player.asr_data {
let total_segments = asr.segments.len();
for (i, seg) in asr.segments.iter().enumerate() {
// 檢查是否退出
if quit.load(Ordering::SeqCst) {
println!("\n⏹️ Stopped by user");
break;
}
// 檢查是否暫停
while paused.load(Ordering::SeqCst) {
println!("\r⏸️ Paused - Press SPACE to resume",);
std::io::stdout().flush()?;
thread::sleep(Duration::from_millis(100));
if quit.load(Ordering::SeqCst) {
println!("\n⏹️ Stopped by user");
return Ok(());
}
}
println!("\n[{}/{}] Segment", i + 1, total_segments);
println!("{:=<80}", "");
// 顯示所有信息
if let Some(segment) = player.get_current_segment(seg.start + 0.01) {
player.print_segment(&segment);
}
// 播放音頻/視頻
let duration = seg.end - seg.start;
println!(
"▶️ Playing: {:.2}s - {:.2}s ({:.2}s)",
seg.start, seg.end, duration
);
let mut cmd = Command::new("ffplay");
if args.show_video {
cmd.args([
"-ss",
&format!("{:.2}", seg.start),
"-t",
&format!("{:.2}", duration),
"-autoexit",
"-x",
&format!("{}", args.video_width),
"-y",
&format!("{}", args.video_height),
args.video.to_str().unwrap(),
]);
} else {
cmd.args([
"-ss",
&format!("{:.2}", seg.start),
"-t",
&format!("{:.2}", duration),
"-autoexit",
"-nodisp",
args.video.to_str().unwrap(),
]);
}
let _child = cmd
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn()
.context("Failed to start ffplay")?;
// 等待播放完成
thread::sleep(Duration::from_millis((duration * 1000.0) as u64 + 100));
}
println!("\n{:=<80}", "");
println!("✅ Demo completed! Played {} segments", total_segments);
println!("{:=<80}", "");
} else {
println!("⚠️ No ASR data loaded");
}
Ok(())
}
fn run_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
println!("\n🎬 Auto Demo Mode");
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!("Segments per speaker: {}", args.demo_segments_per_speaker);
println!("Demo speed: {:.1}x", args.demo_speed);
println!();
if let Some(asrx) = &player.asrx_data {
let mut speaker_ids: Vec<String> = asrx.speaker_stats.keys().cloned().collect();
speaker_ids.sort();
for speaker_id in &speaker_ids {
let (actor, character) = player.get_speaker_info(speaker_id);
println!("\n{:=<80}", "");
println!("🎭 Demo: {}{} ({})", speaker_id, actor, character);
println!("{:=<80}", "");
let segments: Vec<&AsrxSegment> = asrx
.segments
.iter()
.filter(|s| s.speaker == *speaker_id)
.collect();
let count = args.demo_segments_per_speaker.min(segments.len());
for (i, seg) in segments.iter().take(count).enumerate() {
println!("\n[Segment {}/{}]", i + 1, count);
if let Some(segment) = player.get_current_segment(seg.start + 0.1) {
player.print_segment(&segment);
}
println!(
"⏳ Playing audio ({:.1}s)...",
seg.duration / args.demo_speed
);
let _child = Command::new("ffplay")
.args([
"-ss",
&format!("{:.2}", seg.start),
"-t",
&format!("{:.2}", seg.duration / args.demo_speed),
"-autoexit",
"-nodisp",
args.video.to_str().unwrap(),
])
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn()
.context("Failed to start ffplay")?;
thread::sleep(Duration::from_millis(
((seg.duration / args.demo_speed) * 1000.0) as u64 + 500,
));
}
println!("\n⏸️ Pausing 2 seconds before next speaker...");
thread::sleep(Duration::from_secs(2));
}
println!("\n{:=<80}", "");
println!("✅ Demo completed!");
println!("{:=<80}", "");
}
Ok(())
}
fn main() -> Result<()> {
let args = Args::parse();
if !args.video.exists() {
anyhow::bail!("Video file not found: {:?}", args.video);
}
println!("🎬 Integrated Player for ASR/Face/ASRX/Pose");
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!("Video: {:?}", args.video);
let mut player = IntegratedPlayer::new();
if let Some(asr_path) = &args.asr {
if asr_path.exists() {
player.load_asr(asr_path)?;
}
}
if let Some(face_path) = &args.face {
if face_path.exists() {
player.load_face(face_path)?;
}
}
if let Some(asrx_path) = &args.asrx {
if asrx_path.exists() {
player.load_asrx(asrx_path)?;
}
}
if let Some(pose_path) = &args.pose {
if pose_path.exists() {
player.load_pose(pose_path)?;
}
}
player.list_speakers();
if args.demo {
run_demo(&player, &args)?;
} else if args.continuous_demo {
run_continuous_demo(&player, &args)?;
} else if args.auto_play_speaker {
if let Some(speaker_id) = &args.speaker_name {
play_speaker_segments(&player, &args.video, speaker_id, Some(5))?;
} else {
println!("\n⚠️ --speaker-name required for --auto-play-speaker");
}
} else {
println!("\n🎮 Interactive Mode");
println!(" Commands:");
println!(" • Enter time in seconds to seek");
println!(" • 's' to show current segment");
println!(" • 'l' to list speakers");
println!(" • 'p <speaker>' to play speaker segments");
println!(" • 'q' to quit");
println!();
loop {
print!("> ");
std::io::Write::flush(&mut std::io::stdout())?;
let mut input = String::new();
std::io::stdin().read_line(&mut input)?;
let input = input.trim();
if input == "q" || input == "quit" || input == "exit" {
break;
} else if input == "s" || input == "show" {
if let Some(segment) = player.get_current_segment(player.current_time) {
player.print_segment(&segment);
} else {
println!("No segment at time {:.2}s", player.current_time);
}
} else if input == "l" || input == "list" {
player.list_speakers();
} else if input.starts_with("p ") {
let speaker_id = input.strip_prefix("p ").unwrap();
play_speaker_segments(&player, &args.video, speaker_id, Some(3))?;
} else if let Ok(time) = input.parse::<f64>() {
player.current_time = time;
println!("Seeked to {:.2}s", time);
if let Some(segment) = player.get_current_segment(time) {
player.print_segment(&segment);
} else {
println!("No segment at this time");
}
}
}
}
Ok(())
}

View File

@@ -0,0 +1,92 @@
// Migration script to tokenize existing Chinese text in the database
// Usage: cargo run --bin migrate_chinese_text
use dotenv;
use momentry_core::core::text::tokenizer::tokenize_chinese_text;
use sqlx::{postgres::PgPoolOptions, Row};
use std::env;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Load environment variables from .env file
dotenv::dotenv().ok();
// Get database URL from environment
let database_url = env::var("DATABASE_URL")
.unwrap_or_else(|_| "postgres://accusys@localhost:5432/momentry".to_string());
println!("Connecting to database...");
// Create connection pool
let pool = PgPoolOptions::new()
.max_connections(5)
.connect(&database_url)
.await?;
println!("Fetching Chinese chunks from database...");
// Get all chunks with Chinese text using raw query to avoid sqlx macro issues
let query = r#"
SELECT id, text_content, content->'data'->>'text' as chinese_text, content->>'text' as english_text
FROM chunks
WHERE text_content ~ '[\u4e00-\u9fff]'
ORDER BY id
"#;
let rows = sqlx::query(query).fetch_all(&pool).await?;
println!("Found {} Chinese chunks to process", rows.len());
let mut updated_count = 0;
for row in &rows {
let id: i32 = row.get(0);
let text_content: Option<String> = row.get(1);
let chinese_text: Option<String> = row.get(2);
let english_text: Option<String> = row.get(3);
// Clone text_content for later comparison
let text_content_clone = text_content.clone();
// Determine the original text (prioritize chinese_text from content->'data'->>'text')
let original_text = if let Some(ref chinese_text) = chinese_text {
chinese_text.as_str()
} else if let Some(ref english_text) = english_text {
english_text.as_str()
} else {
text_content.as_deref().unwrap_or("")
};
// Tokenize the text
let tokenized_text = tokenize_chinese_text(original_text);
// Check if tokenization changed the text
let current_text = text_content_clone.unwrap_or_default();
if current_text == tokenized_text {
println!("Skipping chunk {} - already tokenized", id);
continue;
}
println!("Updating chunk {}:", id);
println!(" Original: {}", original_text);
println!(" Tokenized: {}", tokenized_text);
// Update the chunk
sqlx::query("UPDATE chunks SET text_content = $1 WHERE id = $2")
.bind(&tokenized_text)
.bind(id)
.execute(&pool)
.await?;
updated_count += 1;
}
println!("\nMigration completed!");
println!(
"Updated {} out of {} Chinese chunks",
updated_count,
rows.len()
);
Ok(())
}

View File

@@ -0,0 +1,68 @@
use anyhow::{Context, Result};
use momentry_core::core::db::{Database, PostgresDb};
use std::env;
#[tokio::main]
async fn main() -> Result<()> {
env::set_var("RUST_LOG", "info");
println!("=== BM25 簡單測試 ===\n");
// 初始化 PostgreSQL
let pg = PostgresDb::init()
.await
.context("Failed to initialize PostgreSQL database")?;
// 測試查詢
let test_queries = vec![
("telephone", Some("384b0ff44aaaa1f1")),
("工作", Some("9760d0820f0cf9a7")),
("团体", Some("9760d0820f0cf9a7")), // Simplified Chinese, should match Traditional "團體"
("computer", None),
];
for (query_str, uuid_opt) in test_queries {
println!(
"\n🔍 測試查詢: '{}' {}",
query_str,
uuid_opt
.map(|u| format!("(uuid: {})", u))
.unwrap_or_default()
);
// 顯示轉換後的 tsquery (除錯用)
match pg.prepare_tsquery(query_str) {
Ok(tsquery) => println!(" TSQUERY: {}", tsquery),
Err(e) => println!(" TSQUERY 錯誤: {}", e),
}
let results = pg.search_bm25(query_str, uuid_opt, 5).await?;
println!("找到 {} 筆結果:", results.len());
for (i, r) in results.iter().enumerate() {
let text_preview: String = r.text.chars().take(60).collect();
let text_preview = if r.text.chars().count() > 60 {
format!("{}...", text_preview)
} else {
text_preview
};
println!(
" {}. {} (uuid: {}, chunk_id: {})",
i + 1,
text_preview,
r.uuid,
r.chunk_id
);
println!(
" 分數: {:.4}, 時間: {:.1}-{:.1}s, 類型: {}",
r.bm25_score, r.start_time, r.end_time, r.chunk_type
);
}
if results.is_empty() {
println!(" ⚠️ 沒有找到結果");
}
}
Ok(())
}

View File

@@ -0,0 +1,37 @@
use anyhow::{Context, Result};
use momentry_core::core::db::{Database, PostgresDb};
use std::env;
#[tokio::main]
async fn main() -> Result<()> {
env::set_var("RUST_LOG", "info");
println!("=== 簡體中文轉換測試 ===\n");
// 初始化 PostgreSQL
let pg = PostgresDb::init()
.await
.context("Failed to initialize PostgreSQL database")?;
// 測試查詢:簡體中文
let test_queries = vec!["团体", "视频", "文件"];
for query_str in test_queries {
println!("\n🔍 測試查詢 (簡體): '{}'", query_str);
// 顯示轉換後的 tsquery
match pg.prepare_tsquery(query_str) {
Ok(tsquery) => println!(" TSQUERY: {}", tsquery),
Err(e) => println!(" TSQUERY 錯誤: {}", e),
}
// 執行搜索
let results = pg.search_bm25(query_str, None, 5).await?;
println!(" 找到 {} 筆結果", results.len());
for (i, r) in results.iter().enumerate() {
println!(" {}. [{}] {}", i + 1, r.uuid, r.text);
}
}
Ok(())
}

View File

@@ -0,0 +1,23 @@
use momentry_core::core::text::global_synonym_expander;
fn main() {
let expander = global_synonym_expander();
println!("=== 中文同義詞擴展測試 ===");
let test_queries = vec!["電腦", "電腦工作", "工作檔案", "視頻分析", "電腦工作檔案"];
for query in test_queries {
println!("\n查詢: '{}'", query);
let expanded = expander.expand_chinese_query(query);
println!("擴展結果: {}", expanded);
// 測試單詞擴展
println!("單詞擴展:");
if let Some(syns) = expander.get_synonyms(query) {
println!(" '{}' -> {:?}", query, syns);
} else {
println!(" '{}' 沒有同義詞", query);
}
}
}

View File

@@ -0,0 +1,56 @@
use anyhow::{Context, Result};
use momentry_core::core::db::{Database, PostgresDb};
use momentry_core::core::text::tokenizer::{contains_chinese, tokenize_chinese_text};
use momentry_core::core::text::{global_synonym_expander, normalize_chinese_query};
use std::env;
#[tokio::main]
async fn main() -> Result<()> {
env::set_var("RUST_LOG", "info");
println!("=== 同義詞擴展測試 ===\n");
// 初始化 PostgreSQL
let pg = PostgresDb::init()
.await
.context("Failed to initialize PostgreSQL database")?;
let expander = global_synonym_expander();
// 測試查詢
let test_queries = vec![
"電腦",
"視頻",
"分析",
"工作",
"檔案",
"電腦工作",
"工作檔案",
];
for query_str in test_queries {
println!("\n🔍 測試查詢: '{}'", query_str);
// 顯示同義詞擴展
if contains_chinese(query_str) {
let normalized = normalize_chinese_query(query_str);
let expanded = expander.expand_chinese_query(&normalized);
println!(" 同義詞擴展: {}", expanded);
}
// 顯示轉換後的 tsquery
match pg.prepare_tsquery(query_str) {
Ok(tsquery) => println!(" TSQUERY: {}", tsquery),
Err(e) => println!(" TSQUERY 錯誤: {}", e),
}
// 執行搜索(即使沒有結果)
let results = pg.search_bm25(query_str, None, 2).await?;
println!(" 找到 {} 筆結果", results.len());
for (i, r) in results.iter().enumerate() {
println!(" {}. [{}] {}", i + 1, r.uuid, r.text);
}
}
Ok(())
}

View File

@@ -0,0 +1,56 @@
use anyhow::{Context, Result};
use momentry_core::core::db::{Database, PostgresDb};
use momentry_core::core::text::tokenizer::{contains_chinese, tokenize_chinese_text};
use momentry_core::core::text::{global_synonym_expander, normalize_chinese_query};
use std::env;
#[tokio::main]
async fn main() -> Result<()> {
env::set_var("RUST_LOG", "info");
println!("=== 同義詞擴展測試 ===\n");
// 初始化 PostgreSQL
let pg = PostgresDb::init()
.await
.context("Failed to initialize PostgreSQL database")?;
let expander = global_synonym_expander();
// 測試查詢
let test_queries = vec![
"電腦",
"視頻",
"分析",
"工作",
"檔案",
"電腦工作",
"工作檔案",
];
for query_str in test_queries {
println!("\n🔍 測試查詢: '{}'", query_str);
// 顯示同義詞擴展
if contains_chinese(query_str) {
let normalized = normalize_chinese_query(query_str);
let expanded = expander.expand_chinese_query(&normalized);
println!(" 同義詞擴展: {}", expanded);
}
// 顯示轉換後的 tsquery
match pg.prepare_tsquery(query_str) {
Ok(tsquery) => println!(" TSQUERY: {}", tsquery),
Err(e) => println!(" TSQUERY 錯誤: {}", e),
}
// 執行搜索(即使沒有結果)
let results = pg.search_bm25(query_str, None, 2).await?;
println!(" 找到 {} 筆結果", results.len());
for (i, r) in results.iter().enumerate() {
println!(" {}. [{}] {}", i + 1, r.uuid, r.text);
}
}
Ok(())
}

View File

@@ -0,0 +1,27 @@
use momentry_core::core::text::tokenizer::{contains_chinese, tokenize_chinese_text};
fn main() {
let texts = ["電腦", "工作", "視頻", "分析", "檔案", "這是一個測試"];
for text in texts {
let tokens = tokenize_chinese_text(text);
println!("Text: '{}' -> Tokens: '{}'", text, tokens);
let split: Vec<&str> = tokens.split_whitespace().collect();
println!(" Split: {:?}", split);
}
println!("\n=== Testing complex queries ===");
let complex = [
"(電腦 | 計算機 | 微机)",
"(工作 | 任務 | 作業)",
"電腦 & 工作",
"(電腦:* | 計算機:* | 微机:*)",
];
for query in complex {
let tokens = tokenize_chinese_text(query);
println!("Query: '{}' -> Tokens: '{}'", query, tokens);
let split: Vec<&str> = tokens.split_whitespace().collect();
println!(" Split: {:?}", split);
println!("---");
}
}