feat: Phase 1 handover - schema migration, correction mechanism, API fixes

Schema changes: dev.chunks->dev.chunk, remove old_chunk_id/chunk_index
Correction: asr-1.json format, generate/apply scripts
API: 37/37 endpoints fixed and tested
Docs: HANDOVER_V2.0.md for M4
This commit is contained in:
Accusys
2026-05-11 07:03:22 +08:00
parent ef894a44ad
commit 39ba5ddf76
147 changed files with 19843 additions and 3053 deletions

View File

@@ -13,7 +13,6 @@ pub struct MongoDb {
pub struct ChunkDocument {
pub uuid: String,
pub chunk_id: String,
pub chunk_index: u32,
pub chunk_type: String,
pub start_time: f64,
pub end_time: f64,
@@ -34,7 +33,6 @@ impl From<Chunk> for ChunkDocument {
Self {
uuid: chunk.uuid,
chunk_id: chunk.chunk_id,
chunk_index: chunk.chunk_index,
chunk_type: chunk.chunk_type.as_str().to_string(),
start_time,
end_time,
@@ -119,7 +117,7 @@ impl MongoDb {
file_id: 0,
uuid: doc.uuid,
chunk_id: doc.chunk_id,
chunk_index: doc.chunk_index,
chunk_type,
rule: ChunkRule::Rule1,
fps: doc.fps,
@@ -178,7 +176,7 @@ impl MongoDb {
file_id: 0,
uuid: doc.uuid,
chunk_id: doc.chunk_id,
chunk_index: doc.chunk_index,
chunk_type,
rule: ChunkRule::Rule1,
fps: doc.fps,
@@ -234,7 +232,7 @@ impl MongoDb {
file_id: 0,
uuid: doc.uuid,
chunk_id: doc.chunk_id,
chunk_index: doc.chunk_index,
chunk_type,
rule: ChunkRule::Rule1,
fps: doc.fps,

View File

@@ -56,7 +56,7 @@ pub struct CandidateRecord {
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct FileIdentityRecord {
pub id: i64,
pub id: i32,
pub file_uuid: String,
pub identity_id: i32,
pub name: String,
@@ -116,7 +116,7 @@ pub struct IdentityFaceRecord {
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct IdentityChunkRecord {
pub id: i64,
pub id: i32,
pub file_uuid: String,
pub chunk_id: String,
pub chunk_type: String,
@@ -788,8 +788,8 @@ impl PostgresDb {
.await?;
// Chunks
sqlx::query("CREATE TABLE IF NOT EXISTS chunks (id SERIAL PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_index INTEGER NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(file_uuid, chunk_id))").execute(pool).await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_uuid)")
sqlx::query("CREATE TABLE IF NOT EXISTS chunk (id SERIAL PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(file_uuid, chunk_id))").execute(pool).await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunk_file ON chunk(file_uuid)")
.execute(pool)
.await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks(chunk_type)")
@@ -845,7 +845,7 @@ impl PostgresDb {
sqlx::query(
"CREATE TRIGGER chunks_search_vector_trigger
BEFORE INSERT OR UPDATE ON chunks
BEFORE INSERT OR UPDATE ON chunk
FOR EACH ROW EXECUTE FUNCTION update_search_vector()",
)
.execute(pool)
@@ -1232,7 +1232,7 @@ impl PostgresDb {
let tx = self.pool.begin().await?;
let chunk_vectors = schema::table_name("chunk_vectors");
let chunks = schema::table_name("chunks");
let chunks = "dev.chunk";
let processor_results = schema::table_name("processor_results");
let videos = schema::table_name("videos");
@@ -1254,6 +1254,11 @@ impl PostgresDb {
.execute(&self.pool)
.await?;
sqlx::query(&format!("DELETE FROM dev.pre_chunks WHERE file_uuid = $1"))
.bind(uuid)
.execute(&self.pool)
.await?;
sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", videos))
.bind(uuid)
.execute(&self.pool)
@@ -1277,7 +1282,7 @@ impl PostgresDb {
}
pub async fn get_chunk_count(&self, uuid: &str) -> Result<(i64, i64)> {
let chunks = schema::table_name("chunks");
let chunks = "dev.chunk";
let sentence_count: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence'",
chunks
@@ -2417,8 +2422,10 @@ impl PostgresDb {
pub async fn get_identity_by_uuid(&self, uuid: &Uuid) -> Result<Option<IdentityDetailRecord>> {
let query = r#"
SELECT id, uuid, name, identity_type, source, status, metadata, reference_data,
voice_embedding, identity_embedding, face_embedding,
tmdb_id, tmdb_profile, created_at, NULL::timestamptz as updated_at
voice_embedding::real[] as voice_embedding,
identity_embedding::real[] as identity_embedding,
face_embedding::real[] as face_embedding,
tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at
FROM identities
WHERE uuid = $1
"#;
@@ -2497,7 +2504,7 @@ impl PostgresDb {
let query = r#"
SELECT c.id, c.file_uuid, c.chunk_id, c.chunk_type,
c.start_time, c.end_time, c.text_content, c.content
FROM chunks c
FROM dev.chunk c
WHERE c.file_uuid IN (
SELECT DISTINCT fd.file_uuid
FROM face_detections fd
@@ -2538,7 +2545,7 @@ impl PostgresDb {
}
pub async fn store_chunk(&self, chunk: &Chunk) -> Result<()> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let content_with_rule = serde_json::json!({
"rule": chunk.rule.as_str(),
"data": chunk.content
@@ -2567,9 +2574,9 @@ impl PostgresDb {
sqlx::query(&format!(
r#"
INSERT INTO {} (file_id, file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
VALUES ($1, $2, $3, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE SET
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11::jsonb, $12::jsonb, $13, $14, $15, $16, $17)
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
start_time = EXCLUDED.start_time,
end_time = EXCLUDED.end_time,
fps = EXCLUDED.fps,
@@ -2590,7 +2597,6 @@ impl PostgresDb {
.bind(chunk.file_id)
.bind(&chunk.uuid)
.bind(&chunk.chunk_id)
.bind(chunk.chunk_index as i32)
.bind(chunk.chunk_type.as_str())
.bind(chunk.start_time().seconds())
.bind(chunk.end_time().seconds())
@@ -2616,7 +2622,7 @@ impl PostgresDb {
chunk: &Chunk,
tx: &mut sqlx::Transaction<'_, sqlx::Postgres>,
) -> Result<()> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let content_with_rule = serde_json::json!({
"rule": chunk.rule.as_str(),
"data": chunk.content
@@ -2642,9 +2648,9 @@ impl PostgresDb {
sqlx::query(&format!(
r#"
INSERT INTO {} (file_id, file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
VALUES ($1, $2, $3, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE SET
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11::jsonb, $12::jsonb, $13, $14, $15, $16, $17)
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
start_time = EXCLUDED.start_time,
end_time = EXCLUDED.end_time,
fps = EXCLUDED.fps,
@@ -2665,7 +2671,6 @@ impl PostgresDb {
.bind(chunk.file_id)
.bind(&chunk.uuid)
.bind(&chunk.chunk_id)
.bind(chunk.chunk_index as i32)
.bind(chunk.chunk_type.as_str())
.bind(chunk.start_time().seconds())
.bind(chunk.end_time().seconds())
@@ -2687,9 +2692,9 @@ impl PostgresDb {
}
pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let rows = sqlx::query(&format!(
"SELECT COALESCE(file_id, 0) as file_id, file_uuid as uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE file_uuid = $1 ORDER BY chunk_index",
"SELECT COALESCE(file_id, 0) as file_id, file_uuid as uuid, chunk_id, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE file_uuid = $1 ORDER BY id",
table
))
.bind(uuid)
@@ -2699,8 +2704,7 @@ impl PostgresDb {
let chunks: Vec<Chunk> = rows
.into_iter()
.map(|r| {
let chunk_type_str: String = r.get(4);
let chunk_index: i32 = r.get(3);
let chunk_type_str: String = r.get(3);
let chunk_type = match chunk_type_str.as_str() {
"time" => ChunkType::TimeBased,
"sentence" => ChunkType::Sentence,
@@ -2740,7 +2744,7 @@ impl PostgresDb {
file_id,
uuid: r.get("uuid"),
chunk_id: r.get("chunk_id"),
chunk_index: chunk_index as u32,
chunk_type,
rule,
@@ -2768,9 +2772,9 @@ impl PostgresDb {
chunk_id: &str,
uuid: &str,
) -> Result<Option<Chunk>> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let row = sqlx::query(&format!(
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE chunk_id = $1 AND uuid = $2",
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE chunk_id = $1 AND uuid = $2",
table
))
.bind(chunk_id)
@@ -2779,25 +2783,24 @@ impl PostgresDb {
.await?;
if let Some(r) = row {
let chunk_type_str: String = r.get(4);
let chunk_index: i32 = r.get(3);
let chunk_type = match chunk_type_str.as_str() {
"time" => ChunkType::TimeBased,
"sentence" => ChunkType::Sentence,
"cut" => ChunkType::Cut,
"trace" => ChunkType::Trace,
"story" => ChunkType::Story,
_ => ChunkType::TimeBased,
};
let chunk_type_str: String = r.get(3);
let chunk_type = match chunk_type_str.as_str() {
"time" => ChunkType::TimeBased,
"sentence" => ChunkType::Sentence,
"cut" => ChunkType::Cut,
"trace" => ChunkType::Trace,
"story" => ChunkType::Story,
_ => ChunkType::TimeBased,
};
let content: serde_json::Value = r.get(9);
let metadata: Option<serde_json::Value> = r.get(10);
let content: serde_json::Value = r.get(8);
let metadata: Option<serde_json::Value> = r.get(9);
let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();
let pre_chunk_ids: Vec<i32> = r.try_get(12).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(13).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(14).unwrap_or_default();
let (rule, content_data) = if content.get("rule").is_some() {
let (rule, content_data) = if content.get("rule").is_some() {
let rule_str = content
.get("rule")
.and_then(|v| v.as_str())
@@ -2820,7 +2823,7 @@ impl PostgresDb {
file_id,
uuid: r.get("uuid"),
chunk_id: r.get("chunk_id"),
chunk_index: chunk_index as u32,
chunk_type,
rule,
fps: r.get("fps"),
@@ -2996,9 +2999,9 @@ impl PostgresDb {
start_time: f64,
end_time: f64,
) -> Result<Vec<Chunk>> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let rows = sqlx::query(&format!(
"SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids
"SELECT file_id, uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids
FROM {}
WHERE file_id = $1 AND start_time >= $2 AND end_time <= $3
ORDER BY start_time",
@@ -3013,8 +3016,7 @@ impl PostgresDb {
let chunks: Vec<Chunk> = rows
.into_iter()
.map(|r| {
let chunk_type_str: String = r.get(4);
let chunk_index: i32 = r.get(3);
let chunk_type_str: String = r.get(3);
let chunk_type = match chunk_type_str.as_str() {
"time" => ChunkType::TimeBased,
"sentence" => ChunkType::Sentence,
@@ -3024,12 +3026,12 @@ impl PostgresDb {
_ => ChunkType::TimeBased,
};
let content: serde_json::Value = r.get(11);
let metadata: Option<serde_json::Value> = r.get(12);
let content: serde_json::Value = r.get(10);
let metadata: Option<serde_json::Value> = r.get(11);
let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
let pre_chunk_ids: Vec<i32> = r.try_get(14).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(15).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(16).unwrap_or_default();
let (rule, content_data) = if content.get("rule").is_some() {
let rule_str = content
@@ -3054,7 +3056,7 @@ impl PostgresDb {
file_id,
uuid: r.get("uuid"),
chunk_id: r.get("chunk_id"),
chunk_index: chunk_index as u32,
chunk_type,
rule,
@@ -3082,9 +3084,9 @@ impl PostgresDb {
return Ok(vec![]);
}
let table = schema::table_name("chunks");
let table = "dev.chunk";
let rows = sqlx::query(&format!(
"SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids FROM {} WHERE chunk_id = ANY($1) ORDER BY chunk_index",
"SELECT file_id, uuid, chunk_id, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids FROM {} WHERE chunk_id = ANY($1) ORDER BY id",
table
))
.bind(chunk_ids)
@@ -3094,8 +3096,7 @@ impl PostgresDb {
let chunks: Vec<Chunk> = rows
.into_iter()
.map(|r| {
let chunk_type_str: String = r.get(4);
let chunk_index: i32 = r.get(3);
let chunk_type_str: String = r.get(3);
let chunk_type = match chunk_type_str.as_str() {
"time" => ChunkType::TimeBased,
"sentence" => ChunkType::Sentence,
@@ -3135,7 +3136,7 @@ impl PostgresDb {
file_id,
uuid: r.get("uuid"),
chunk_id: r.get("chunk_id"),
chunk_index: chunk_index as u32,
chunk_type,
rule,
@@ -3192,7 +3193,7 @@ impl PostgresDb {
}
pub async fn update_vector_id(&self, chunk_id: &str, vector_id: &str) -> Result<()> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
sqlx::query(&format!(
"UPDATE {} SET vector_id = $1 WHERE chunk_id = $2",
table
@@ -3214,12 +3215,12 @@ impl PostgresDb {
}
pub async fn search_text(&self, query: &str, chunk_type: Option<&str>) -> Result<Vec<Chunk>> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let query_pattern = format!("%{}%", query);
let sql = match chunk_type {
Some(_) => &format!("SELECT uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 AND chunk_type = $2 ORDER BY chunk_index", table),
None => &format!("SELECT uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 ORDER BY chunk_index", table),
Some(_) => &format!("SELECT uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 AND chunk_type = $2 ORDER BY id", table),
None => &format!("SELECT uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 ORDER BY id", table),
};
let chunks = if let Some(ct) = chunk_type {
@@ -3228,7 +3229,6 @@ impl PostgresDb {
(
String,
String,
i32,
String,
f64,
f64,
@@ -3252,7 +3252,6 @@ impl PostgresDb {
(
String,
String,
i32,
String,
f64,
f64,
@@ -3274,7 +3273,7 @@ impl PostgresDb {
let results: Vec<Chunk> = chunks
.into_iter()
.map(|r| {
let chunk_type = match r.3.as_str() {
let chunk_type = match r.2.as_str() {
"time_based" => ChunkType::TimeBased,
"sentence" => ChunkType::Sentence,
"cut" => ChunkType::Cut,
@@ -3284,29 +3283,29 @@ impl PostgresDb {
};
let content: serde_json::Value =
serde_json::from_str(&r.9).unwrap_or(serde_json::json!({}));
serde_json::from_str(&r.8).unwrap_or(serde_json::json!({}));
let metadata: Option<serde_json::Value> =
r.10.and_then(|m| serde_json::from_str(&m).ok());
r.9.and_then(|m| serde_json::from_str(&m).ok());
Chunk {
file_id: 0,
uuid: r.0,
chunk_id: r.1,
chunk_index: r.2 as u32,
chunk_type,
rule: ChunkRule::Rule1,
fps: r.6,
start_frame: r.7,
end_frame: r.8,
text_content: Some(r.9),
fps: r.5,
start_frame: r.6,
end_frame: r.7,
text_content: Some(r.8),
content,
metadata,
vector_id: r.11,
vector_id: r.10,
frame_count: 0,
pre_chunk_ids: vec![],
parent_chunk_id: r.12,
child_chunk_ids: r.13,
parent_chunk_id: r.11,
child_chunk_ids: r.12,
visual_stats: None,
}
})
@@ -3321,13 +3320,13 @@ impl PostgresDb {
uuid: Option<&str>,
limit: usize,
) -> Result<Vec<Bm25Result>> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let tsquery = self.prepare_tsquery(query).await?;
let sql = match uuid {
Some(_) => &format!(
r#"
SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
SELECT c.chunk_id, c.file_uuid, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
c.visual_stats,
pc.metadata->'structured_summary' as scene_summary,
@@ -3342,7 +3341,7 @@ impl PostgresDb {
),
None => &format!(
r#"
SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
SELECT c.chunk_id, c.file_uuid, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
c.visual_stats,
pc.metadata->'structured_summary' as scene_summary,
@@ -3406,7 +3405,7 @@ impl PostgresDb {
Bm25Result {
chunk_id: r.0,
uuid: r.1,
chunk_index: r.2 as u32,
chunk_type: r.3,
start_frame: r.4,
end_frame: r.5,
@@ -3472,7 +3471,7 @@ impl PostgresDb {
HybridSearchResult {
chunk_id: r.chunk_id.clone(),
uuid: r.uuid.clone(),
chunk_index: r.chunk_index,
chunk_type: r.chunk_type.clone(),
start_frame: r.start_frame,
end_frame: r.end_frame,
@@ -3526,7 +3525,7 @@ impl PostgresDb {
HybridSearchResult {
chunk_id: r.chunk_id.clone(),
uuid: r.uuid.clone(),
chunk_index: chunk_data.map(|c| c.chunk_index).unwrap_or(0),
chunk_type: chunk_data
.map(|c| c.chunk_type.as_str().to_string())
.unwrap_or_default(),
@@ -3779,7 +3778,6 @@ pub struct SceneSummary {
pub struct Bm25Result {
pub chunk_id: String,
pub uuid: String,
pub chunk_index: u32,
pub chunk_type: String,
pub start_frame: i64,
pub end_frame: i64,
@@ -3797,7 +3795,6 @@ pub struct Bm25Result {
pub struct HybridSearchResult {
pub uuid: String,
pub chunk_id: String,
pub chunk_index: u32,
pub chunk_type: String,
pub start_frame: i64,
pub end_frame: i64,
@@ -4443,7 +4440,7 @@ impl PostgresDb {
total_frames: u64,
) -> Result<()> {
let table = schema::table_name("videos");
let chunks_table = schema::table_name("chunks");
let chunks_table = "dev.chunk";
let pre_chunks_table = schema::table_name("pre_chunks");
// Query chunks count and frames
@@ -4622,7 +4619,7 @@ impl PostgresDb {
let results = sqlx::query_as::<_, SemanticSearchResult>(
r#"
SELECT
id, chunk_index as scene_order, start_time, end_time,
id as scene_order, start_time, end_time,
COALESCE(summary_text, text_content, '') as summary,
metadata,
(1 - (embedding <=> $1::vector)) as similarity
@@ -4820,7 +4817,7 @@ mod tests {
"file_id": 1,
"uuid": "test",
"chunk_id": "c1",
"chunk_index": 0,
"chunk_type": "time_based",
"rule": "rule1",
"start_time": 0.0,
@@ -4960,7 +4957,7 @@ mod tests {
let result = Bm25Result {
chunk_id: "sentence_001".to_string(),
uuid: "test-uuid".to_string(),
chunk_index: 1,
chunk_type: "sentence".to_string(),
start_frame: 0,
end_frame: 150,
@@ -4985,7 +4982,7 @@ mod tests {
let result = HybridSearchResult {
chunk_id: "sentence_001".to_string(),
uuid: "test-uuid".to_string(),
chunk_index: 1,
chunk_type: "sentence".to_string(),
start_frame: 0,
end_frame: 150,

View File

@@ -120,9 +120,16 @@ impl QdrantDb {
.json(&body)
.send()
.await
.context(format!("Failed to create Qdrant collection: {}", collection))?;
.context(format!(
"Failed to create Qdrant collection: {}",
collection
))?;
tracing::info!("Created Qdrant collection: {} (dim={})", collection, vector_dim);
tracing::info!(
"Created Qdrant collection: {} (dim={})",
collection,
vector_dim
);
Ok(())
}

View File

@@ -129,7 +129,7 @@ impl SyncDb {
let chunk = Chunk::from_seconds(
0, // file_id - will be set later
uuid.to_string(),
i as u32,
format!("{}", i),
ChunkType::Sentence,
ChunkRule::Rule1,
segment.start,