Initial commit: Momentry Core v0.1

- Rust-based digital asset management system
- Video analysis: ASR, OCR, YOLO, Face, Pose
- RAG capabilities with Qdrant vector database
- Multi-database support: PostgreSQL, Redis, MongoDB
- Monitoring system with launchd plists
- n8n workflow automation integration
This commit is contained in:
accusys
2026-03-16 15:07:33 +08:00
parent ca24794853
commit 75edf0aa71
101 changed files with 19858 additions and 0 deletions

73
src/core/processor/asr.rs Normal file
View File

@@ -0,0 +1,73 @@
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::path::Path;
use std::process::Command;
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrResult {
pub language: Option<String>,
pub language_probability: Option<f64>,
pub segments: Vec<AsrSegment>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrSegment {
pub start: f64,
pub end: f64,
pub text: String,
}
pub async fn process_asr(video_path: &str, output_path: &str) -> Result<AsrResult> {
let script_path = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("scripts")
.join("asr_processor.py");
let venv_python = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("venv")
.join("bin")
.join("python");
println!("[ASR] Starting ASR processing...");
println!("[ASR] Video: {}", video_path);
let output = Command::new(venv_python)
.arg(script_path)
.arg(video_path)
.arg(output_path)
.output()
.context("Failed to run ASR processor")?;
let stderr = String::from_utf8_lossy(&output.stderr);
for line in stderr.lines() {
if line.starts_with("ASR_START") {
println!("[ASR] Loading model...");
} else if line.starts_with("ASR_LANGUAGE:") {
let lang = line.trim_start_matches("ASR_LANGUAGE:");
println!("[ASR] Detected language: {}", lang);
} else if line.starts_with("ASR_PROGRESS:") {
let count = line.trim_start_matches("ASR_PROGRESS:");
println!("[ASR] Processed {} segments...", count);
} else if line.starts_with("ASR_COMPLETE:") {
let count = line.trim_start_matches("ASR_COMPLETE:");
println!("[ASR] Completed! Total: {} segments", count);
}
}
if !output.status.success() {
anyhow::bail!("ASR failed: {}", stderr);
}
let json_str = std::fs::read_to_string(output_path).context("Failed to read ASR output")?;
let result: AsrResult =
serde_json::from_str(&json_str).context("Failed to parse ASR output")?;
println!(
"[ASR] Result: {} segments, language: {:?}",
result.segments.len(),
result.language
);
Ok(result)
}

View File

@@ -0,0 +1,28 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrxResult {
pub segments: Vec<AsrxSegment>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrxSegment {
pub start: f64,
pub end: f64,
pub text: String,
pub speaker_id: String,
pub speaker_embedding: Option<Vec<f32>>,
}
pub async fn process_asrx(video_path: &str, output_path: &str) -> Result<AsrxResult> {
// TODO: Implement speaker diarization
// Options:
// 1. Use pyannote.audio
// 2. Use whisperx
// 3. Use Python subprocess
println!("Processing speaker diarization for: {}", video_path);
Ok(AsrxResult { segments: vec![] })
}

View File

@@ -0,0 +1,36 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
pub struct FaceResult {
pub frames: Vec<FaceFrame>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct FaceFrame {
pub frame: u64,
pub timestamp: f64,
pub faces: Vec<Face>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct Face {
pub face_id: String,
pub x: i32,
pub y: i32,
pub width: i32,
pub height: i32,
pub confidence: f32,
pub embedding: Option<Vec<f32>>,
}
pub async fn process_face(video_path: &str, output_path: &str) -> Result<FaceResult> {
// TODO: Implement face detection
// Options:
// 1. Use MTCNN or RetinaFace with ONNX
// 2. Use Python subprocess
println!("Processing face detection for: {}", video_path);
Ok(FaceResult { frames: vec![] })
}

13
src/core/processor/mod.rs Normal file
View File

@@ -0,0 +1,13 @@
pub mod asr;
pub mod asrx;
pub mod face;
pub mod ocr;
pub mod pose;
pub mod yolo;
pub use asr::{process_asr, AsrResult, AsrSegment};
pub use asrx::process_asrx;
pub use face::process_face;
pub use ocr::process_ocr;
pub use pose::process_pose;
pub use yolo::process_yolo;

36
src/core/processor/ocr.rs Normal file
View File

@@ -0,0 +1,36 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
pub struct OcrResult {
pub frames: Vec<OcrFrame>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct OcrFrame {
pub frame: u64,
pub timestamp: f64,
pub texts: Vec<OcrText>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct OcrText {
pub text: String,
pub x: i32,
pub y: i32,
pub width: i32,
pub height: i32,
pub confidence: f32,
}
pub async fn process_ocr(video_path: &str, output_path: &str) -> Result<OcrResult> {
// TODO: Implement OCR processing
// Options:
// 1. Use tesseract
// 2. Use Python pytesseract via subprocess
// 3. Use Rust OCR library
println!("Processing OCR for: {}", video_path);
Ok(OcrResult { frames: vec![] })
}

View File

@@ -0,0 +1,47 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
pub struct PoseResult {
pub frames: Vec<PoseFrame>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct PoseFrame {
pub frame: u64,
pub timestamp: f64,
pub persons: Vec<PersonPose>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct PersonPose {
pub keypoints: Vec<Keypoint>,
pub bbox: Bbox,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct Keypoint {
pub name: String,
pub x: f32,
pub y: f32,
pub confidence: f32,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct Bbox {
pub x: i32,
pub y: i32,
pub width: i32,
pub height: i32,
}
pub async fn process_pose(video_path: &str, output_path: &str) -> Result<PoseResult> {
// TODO: Implement pose estimation
// Options:
// 1. Use MoveNet or PoseNet with ONNX
// 2. Use Python subprocess with ultralytics
println!("Processing pose estimation for: {}", video_path);
Ok(PoseResult { frames: vec![] })
}

View File

@@ -0,0 +1,36 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
pub struct YoloResult {
pub frames: Vec<YoloFrame>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct YoloFrame {
pub frame: u64,
pub timestamp: f64,
pub objects: Vec<YoloObject>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct YoloObject {
pub class_name: String,
pub class_id: u32,
pub x: i32,
pub y: i32,
pub width: i32,
pub height: i32,
pub confidence: f32,
}
pub async fn process_yolo(video_path: &str, output_path: &str) -> Result<YoloResult> {
// TODO: Implement YOLO processing
// Options:
// 1. Use ONNX Runtime (ort) with YOLO model
// 2. Use Python subprocess with ultralytics
println!("Processing YOLO for: {}", video_path);
Ok(YoloResult { frames: vec![] })
}