feat: backup architecture docs, source code, and scripts
This commit is contained in:
@@ -924,6 +924,7 @@ async fn main() -> Result<()> {
|
||||
user_id: None,
|
||||
job_id: None,
|
||||
created_at: String::new(),
|
||||
registration_time: None,
|
||||
};
|
||||
|
||||
let video_id = db.register_video(&record).await?;
|
||||
@@ -2373,20 +2374,25 @@ async fn main() -> Result<()> {
|
||||
target
|
||||
);
|
||||
|
||||
for chunk in sentence_chunks {
|
||||
println!("Starting to process {} chunks...", sentence_chunks.len());
|
||||
for (i, chunk) in sentence_chunks.iter().enumerate() {
|
||||
if i < 3 {
|
||||
println!("Processing chunk {}/{}: {}", i+1, sentence_chunks.len(), chunk.chunk_id);
|
||||
}
|
||||
let text = chunk
|
||||
.content
|
||||
.get("data")
|
||||
.and_then(|data| data.get("text"))
|
||||
.get("text")
|
||||
.and_then(|v| v.as_str())
|
||||
.or_else(|| chunk.content.get("data").and_then(|data| data.get("text")).and_then(|v| v.as_str()))
|
||||
.or(chunk.text_content.as_deref())
|
||||
.unwrap_or("");
|
||||
|
||||
eprintln!("Embedding chunk {}/{}: {} (text len: {})...", i+1, sentence_chunks.len(), chunk.chunk_id, text.len());
|
||||
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
print!("Embedding chunk {}... ", chunk.chunk_id);
|
||||
|
||||
match embedder.embed_document(text).await {
|
||||
Ok(vector) => {
|
||||
let vector_id = format!("{}_{}", chunk.uuid, chunk.chunk_id);
|
||||
@@ -2420,10 +2426,12 @@ async fn main() -> Result<()> {
|
||||
}
|
||||
|
||||
stored_count += 1;
|
||||
println!("done ({} dims)", vector.len());
|
||||
if stored_count % 100 == 0 || stored_count <= 3 {
|
||||
println!("Stored {}/1867 vectors", stored_count);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!("failed: {}", e);
|
||||
eprintln!("embed_document error for {}: {}", chunk.chunk_id, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user