feat: backup architecture docs, source code, and scripts

This commit is contained in:
Warren
2026-04-25 17:15:45 +08:00
parent 59809dae1f
commit 1f84e5469f
368 changed files with 146329 additions and 261 deletions

View File

@@ -924,6 +924,7 @@ async fn main() -> Result<()> {
user_id: None,
job_id: None,
created_at: String::new(),
registration_time: None,
};
let video_id = db.register_video(&record).await?;
@@ -2373,20 +2374,25 @@ async fn main() -> Result<()> {
target
);
for chunk in sentence_chunks {
println!("Starting to process {} chunks...", sentence_chunks.len());
for (i, chunk) in sentence_chunks.iter().enumerate() {
if i < 3 {
println!("Processing chunk {}/{}: {}", i+1, sentence_chunks.len(), chunk.chunk_id);
}
let text = chunk
.content
.get("data")
.and_then(|data| data.get("text"))
.get("text")
.and_then(|v| v.as_str())
.or_else(|| chunk.content.get("data").and_then(|data| data.get("text")).and_then(|v| v.as_str()))
.or(chunk.text_content.as_deref())
.unwrap_or("");
eprintln!("Embedding chunk {}/{}: {} (text len: {})...", i+1, sentence_chunks.len(), chunk.chunk_id, text.len());
if text.is_empty() {
continue;
}
print!("Embedding chunk {}... ", chunk.chunk_id);
match embedder.embed_document(text).await {
Ok(vector) => {
let vector_id = format!("{}_{}", chunk.uuid, chunk.chunk_id);
@@ -2420,10 +2426,12 @@ async fn main() -> Result<()> {
}
stored_count += 1;
println!("done ({} dims)", vector.len());
if stored_count % 100 == 0 || stored_count <= 3 {
println!("Stored {}/1867 vectors", stored_count);
}
}
Err(e) => {
println!("failed: {}", e);
eprintln!("embed_document error for {}: {}", chunk.chunk_id, e);
}
}
}