|
|
|
|
@@ -56,7 +56,7 @@ pub struct CandidateRecord {
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
|
|
|
|
|
pub struct FileIdentityRecord {
|
|
|
|
|
pub id: i64,
|
|
|
|
|
pub id: i32,
|
|
|
|
|
pub file_uuid: String,
|
|
|
|
|
pub identity_id: i32,
|
|
|
|
|
pub name: String,
|
|
|
|
|
@@ -116,7 +116,7 @@ pub struct IdentityFaceRecord {
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
|
|
|
|
|
pub struct IdentityChunkRecord {
|
|
|
|
|
pub id: i64,
|
|
|
|
|
pub id: i32,
|
|
|
|
|
pub file_uuid: String,
|
|
|
|
|
pub chunk_id: String,
|
|
|
|
|
pub chunk_type: String,
|
|
|
|
|
@@ -788,8 +788,8 @@ impl PostgresDb {
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
// Chunks
|
|
|
|
|
sqlx::query("CREATE TABLE IF NOT EXISTS chunks (id SERIAL PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_index INTEGER NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(file_uuid, chunk_id))").execute(pool).await?;
|
|
|
|
|
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_uuid)")
|
|
|
|
|
sqlx::query("CREATE TABLE IF NOT EXISTS chunk (id SERIAL PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(file_uuid, chunk_id))").execute(pool).await?;
|
|
|
|
|
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunk_file ON chunk(file_uuid)")
|
|
|
|
|
.execute(pool)
|
|
|
|
|
.await?;
|
|
|
|
|
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks(chunk_type)")
|
|
|
|
|
@@ -845,7 +845,7 @@ impl PostgresDb {
|
|
|
|
|
|
|
|
|
|
sqlx::query(
|
|
|
|
|
"CREATE TRIGGER chunks_search_vector_trigger
|
|
|
|
|
BEFORE INSERT OR UPDATE ON chunks
|
|
|
|
|
BEFORE INSERT OR UPDATE ON chunk
|
|
|
|
|
FOR EACH ROW EXECUTE FUNCTION update_search_vector()",
|
|
|
|
|
)
|
|
|
|
|
.execute(pool)
|
|
|
|
|
@@ -1232,7 +1232,7 @@ impl PostgresDb {
|
|
|
|
|
let tx = self.pool.begin().await?;
|
|
|
|
|
|
|
|
|
|
let chunk_vectors = schema::table_name("chunk_vectors");
|
|
|
|
|
let chunks = schema::table_name("chunks");
|
|
|
|
|
let chunks = "dev.chunk";
|
|
|
|
|
let processor_results = schema::table_name("processor_results");
|
|
|
|
|
let videos = schema::table_name("videos");
|
|
|
|
|
|
|
|
|
|
@@ -1254,6 +1254,11 @@ impl PostgresDb {
|
|
|
|
|
.execute(&self.pool)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
sqlx::query(&format!("DELETE FROM dev.pre_chunks WHERE file_uuid = $1"))
|
|
|
|
|
.bind(uuid)
|
|
|
|
|
.execute(&self.pool)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", videos))
|
|
|
|
|
.bind(uuid)
|
|
|
|
|
.execute(&self.pool)
|
|
|
|
|
@@ -1277,7 +1282,7 @@ impl PostgresDb {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub async fn get_chunk_count(&self, uuid: &str) -> Result<(i64, i64)> {
|
|
|
|
|
let chunks = schema::table_name("chunks");
|
|
|
|
|
let chunks = "dev.chunk";
|
|
|
|
|
let sentence_count: i64 = sqlx::query_scalar(&format!(
|
|
|
|
|
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence'",
|
|
|
|
|
chunks
|
|
|
|
|
@@ -2417,8 +2422,10 @@ impl PostgresDb {
|
|
|
|
|
pub async fn get_identity_by_uuid(&self, uuid: &Uuid) -> Result<Option<IdentityDetailRecord>> {
|
|
|
|
|
let query = r#"
|
|
|
|
|
SELECT id, uuid, name, identity_type, source, status, metadata, reference_data,
|
|
|
|
|
voice_embedding, identity_embedding, face_embedding,
|
|
|
|
|
tmdb_id, tmdb_profile, created_at, NULL::timestamptz as updated_at
|
|
|
|
|
voice_embedding::real[] as voice_embedding,
|
|
|
|
|
identity_embedding::real[] as identity_embedding,
|
|
|
|
|
face_embedding::real[] as face_embedding,
|
|
|
|
|
tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at
|
|
|
|
|
FROM identities
|
|
|
|
|
WHERE uuid = $1
|
|
|
|
|
"#;
|
|
|
|
|
@@ -2497,7 +2504,7 @@ impl PostgresDb {
|
|
|
|
|
let query = r#"
|
|
|
|
|
SELECT c.id, c.file_uuid, c.chunk_id, c.chunk_type,
|
|
|
|
|
c.start_time, c.end_time, c.text_content, c.content
|
|
|
|
|
FROM chunks c
|
|
|
|
|
FROM dev.chunk c
|
|
|
|
|
WHERE c.file_uuid IN (
|
|
|
|
|
SELECT DISTINCT fd.file_uuid
|
|
|
|
|
FROM face_detections fd
|
|
|
|
|
@@ -2538,7 +2545,7 @@ impl PostgresDb {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub async fn store_chunk(&self, chunk: &Chunk) -> Result<()> {
|
|
|
|
|
let table = schema::table_name("chunks");
|
|
|
|
|
let table = "dev.chunk";
|
|
|
|
|
let content_with_rule = serde_json::json!({
|
|
|
|
|
"rule": chunk.rule.as_str(),
|
|
|
|
|
"data": chunk.content
|
|
|
|
|
@@ -2567,9 +2574,9 @@ impl PostgresDb {
|
|
|
|
|
|
|
|
|
|
sqlx::query(&format!(
|
|
|
|
|
r#"
|
|
|
|
|
INSERT INTO {} (file_id, file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
|
|
|
|
VALUES ($1, $2, $3, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
|
|
|
|
|
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE SET
|
|
|
|
|
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
|
|
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11::jsonb, $12::jsonb, $13, $14, $15, $16, $17)
|
|
|
|
|
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
|
|
|
|
|
start_time = EXCLUDED.start_time,
|
|
|
|
|
end_time = EXCLUDED.end_time,
|
|
|
|
|
fps = EXCLUDED.fps,
|
|
|
|
|
@@ -2590,7 +2597,6 @@ impl PostgresDb {
|
|
|
|
|
.bind(chunk.file_id)
|
|
|
|
|
.bind(&chunk.uuid)
|
|
|
|
|
.bind(&chunk.chunk_id)
|
|
|
|
|
.bind(chunk.chunk_index as i32)
|
|
|
|
|
.bind(chunk.chunk_type.as_str())
|
|
|
|
|
.bind(chunk.start_time().seconds())
|
|
|
|
|
.bind(chunk.end_time().seconds())
|
|
|
|
|
@@ -2616,7 +2622,7 @@ impl PostgresDb {
|
|
|
|
|
chunk: &Chunk,
|
|
|
|
|
tx: &mut sqlx::Transaction<'_, sqlx::Postgres>,
|
|
|
|
|
) -> Result<()> {
|
|
|
|
|
let table = schema::table_name("chunks");
|
|
|
|
|
let table = "dev.chunk";
|
|
|
|
|
let content_with_rule = serde_json::json!({
|
|
|
|
|
"rule": chunk.rule.as_str(),
|
|
|
|
|
"data": chunk.content
|
|
|
|
|
@@ -2642,9 +2648,9 @@ impl PostgresDb {
|
|
|
|
|
|
|
|
|
|
sqlx::query(&format!(
|
|
|
|
|
r#"
|
|
|
|
|
INSERT INTO {} (file_id, file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
|
|
|
|
VALUES ($1, $2, $3, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
|
|
|
|
|
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE SET
|
|
|
|
|
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
|
|
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11::jsonb, $12::jsonb, $13, $14, $15, $16, $17)
|
|
|
|
|
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
|
|
|
|
|
start_time = EXCLUDED.start_time,
|
|
|
|
|
end_time = EXCLUDED.end_time,
|
|
|
|
|
fps = EXCLUDED.fps,
|
|
|
|
|
@@ -2665,7 +2671,6 @@ impl PostgresDb {
|
|
|
|
|
.bind(chunk.file_id)
|
|
|
|
|
.bind(&chunk.uuid)
|
|
|
|
|
.bind(&chunk.chunk_id)
|
|
|
|
|
.bind(chunk.chunk_index as i32)
|
|
|
|
|
.bind(chunk.chunk_type.as_str())
|
|
|
|
|
.bind(chunk.start_time().seconds())
|
|
|
|
|
.bind(chunk.end_time().seconds())
|
|
|
|
|
@@ -2687,9 +2692,9 @@ impl PostgresDb {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
|
|
|
|
|
let table = schema::table_name("chunks");
|
|
|
|
|
let table = "dev.chunk";
|
|
|
|
|
let rows = sqlx::query(&format!(
|
|
|
|
|
"SELECT COALESCE(file_id, 0) as file_id, file_uuid as uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE file_uuid = $1 ORDER BY chunk_index",
|
|
|
|
|
"SELECT COALESCE(file_id, 0) as file_id, file_uuid as uuid, chunk_id, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE file_uuid = $1 ORDER BY id",
|
|
|
|
|
table
|
|
|
|
|
))
|
|
|
|
|
.bind(uuid)
|
|
|
|
|
@@ -2699,8 +2704,7 @@ impl PostgresDb {
|
|
|
|
|
let chunks: Vec<Chunk> = rows
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|r| {
|
|
|
|
|
let chunk_type_str: String = r.get(4);
|
|
|
|
|
let chunk_index: i32 = r.get(3);
|
|
|
|
|
let chunk_type_str: String = r.get(3);
|
|
|
|
|
let chunk_type = match chunk_type_str.as_str() {
|
|
|
|
|
"time" => ChunkType::TimeBased,
|
|
|
|
|
"sentence" => ChunkType::Sentence,
|
|
|
|
|
@@ -2740,7 +2744,7 @@ impl PostgresDb {
|
|
|
|
|
file_id,
|
|
|
|
|
uuid: r.get("uuid"),
|
|
|
|
|
chunk_id: r.get("chunk_id"),
|
|
|
|
|
chunk_index: chunk_index as u32,
|
|
|
|
|
|
|
|
|
|
chunk_type,
|
|
|
|
|
rule,
|
|
|
|
|
|
|
|
|
|
@@ -2768,9 +2772,9 @@ impl PostgresDb {
|
|
|
|
|
chunk_id: &str,
|
|
|
|
|
uuid: &str,
|
|
|
|
|
) -> Result<Option<Chunk>> {
|
|
|
|
|
let table = schema::table_name("chunks");
|
|
|
|
|
let table = "dev.chunk";
|
|
|
|
|
let row = sqlx::query(&format!(
|
|
|
|
|
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE chunk_id = $1 AND uuid = $2",
|
|
|
|
|
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE chunk_id = $1 AND uuid = $2",
|
|
|
|
|
table
|
|
|
|
|
))
|
|
|
|
|
.bind(chunk_id)
|
|
|
|
|
@@ -2779,25 +2783,24 @@ impl PostgresDb {
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
if let Some(r) = row {
|
|
|
|
|
let chunk_type_str: String = r.get(4);
|
|
|
|
|
let chunk_index: i32 = r.get(3);
|
|
|
|
|
let chunk_type = match chunk_type_str.as_str() {
|
|
|
|
|
"time" => ChunkType::TimeBased,
|
|
|
|
|
"sentence" => ChunkType::Sentence,
|
|
|
|
|
"cut" => ChunkType::Cut,
|
|
|
|
|
"trace" => ChunkType::Trace,
|
|
|
|
|
"story" => ChunkType::Story,
|
|
|
|
|
_ => ChunkType::TimeBased,
|
|
|
|
|
};
|
|
|
|
|
let chunk_type_str: String = r.get(3);
|
|
|
|
|
let chunk_type = match chunk_type_str.as_str() {
|
|
|
|
|
"time" => ChunkType::TimeBased,
|
|
|
|
|
"sentence" => ChunkType::Sentence,
|
|
|
|
|
"cut" => ChunkType::Cut,
|
|
|
|
|
"trace" => ChunkType::Trace,
|
|
|
|
|
"story" => ChunkType::Story,
|
|
|
|
|
_ => ChunkType::TimeBased,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let content: serde_json::Value = r.get(9);
|
|
|
|
|
let metadata: Option<serde_json::Value> = r.get(10);
|
|
|
|
|
let content: serde_json::Value = r.get(8);
|
|
|
|
|
let metadata: Option<serde_json::Value> = r.get(9);
|
|
|
|
|
|
|
|
|
|
let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
|
|
|
|
|
let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
|
|
|
|
|
let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();
|
|
|
|
|
let pre_chunk_ids: Vec<i32> = r.try_get(12).unwrap_or_default();
|
|
|
|
|
let parent_chunk_id: Option<String> = r.try_get(13).ok().flatten();
|
|
|
|
|
let child_chunk_ids: Vec<String> = r.try_get(14).unwrap_or_default();
|
|
|
|
|
|
|
|
|
|
let (rule, content_data) = if content.get("rule").is_some() {
|
|
|
|
|
let (rule, content_data) = if content.get("rule").is_some() {
|
|
|
|
|
let rule_str = content
|
|
|
|
|
.get("rule")
|
|
|
|
|
.and_then(|v| v.as_str())
|
|
|
|
|
@@ -2820,7 +2823,7 @@ impl PostgresDb {
|
|
|
|
|
file_id,
|
|
|
|
|
uuid: r.get("uuid"),
|
|
|
|
|
chunk_id: r.get("chunk_id"),
|
|
|
|
|
chunk_index: chunk_index as u32,
|
|
|
|
|
|
|
|
|
|
chunk_type,
|
|
|
|
|
rule,
|
|
|
|
|
fps: r.get("fps"),
|
|
|
|
|
@@ -2996,9 +2999,9 @@ impl PostgresDb {
|
|
|
|
|
start_time: f64,
|
|
|
|
|
end_time: f64,
|
|
|
|
|
) -> Result<Vec<Chunk>> {
|
|
|
|
|
let table = schema::table_name("chunks");
|
|
|
|
|
let table = "dev.chunk";
|
|
|
|
|
let rows = sqlx::query(&format!(
|
|
|
|
|
"SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids
|
|
|
|
|
"SELECT file_id, uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids
|
|
|
|
|
FROM {}
|
|
|
|
|
WHERE file_id = $1 AND start_time >= $2 AND end_time <= $3
|
|
|
|
|
ORDER BY start_time",
|
|
|
|
|
@@ -3013,8 +3016,7 @@ impl PostgresDb {
|
|
|
|
|
let chunks: Vec<Chunk> = rows
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|r| {
|
|
|
|
|
let chunk_type_str: String = r.get(4);
|
|
|
|
|
let chunk_index: i32 = r.get(3);
|
|
|
|
|
let chunk_type_str: String = r.get(3);
|
|
|
|
|
let chunk_type = match chunk_type_str.as_str() {
|
|
|
|
|
"time" => ChunkType::TimeBased,
|
|
|
|
|
"sentence" => ChunkType::Sentence,
|
|
|
|
|
@@ -3024,12 +3026,12 @@ impl PostgresDb {
|
|
|
|
|
_ => ChunkType::TimeBased,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let content: serde_json::Value = r.get(11);
|
|
|
|
|
let metadata: Option<serde_json::Value> = r.get(12);
|
|
|
|
|
let content: serde_json::Value = r.get(10);
|
|
|
|
|
let metadata: Option<serde_json::Value> = r.get(11);
|
|
|
|
|
|
|
|
|
|
let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
|
|
|
|
|
let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
|
|
|
|
|
let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
|
|
|
|
|
let pre_chunk_ids: Vec<i32> = r.try_get(14).unwrap_or_default();
|
|
|
|
|
let parent_chunk_id: Option<String> = r.try_get(15).ok().flatten();
|
|
|
|
|
let child_chunk_ids: Vec<String> = r.try_get(16).unwrap_or_default();
|
|
|
|
|
|
|
|
|
|
let (rule, content_data) = if content.get("rule").is_some() {
|
|
|
|
|
let rule_str = content
|
|
|
|
|
@@ -3054,7 +3056,7 @@ impl PostgresDb {
|
|
|
|
|
file_id,
|
|
|
|
|
uuid: r.get("uuid"),
|
|
|
|
|
chunk_id: r.get("chunk_id"),
|
|
|
|
|
chunk_index: chunk_index as u32,
|
|
|
|
|
|
|
|
|
|
chunk_type,
|
|
|
|
|
rule,
|
|
|
|
|
|
|
|
|
|
@@ -3082,9 +3084,9 @@ impl PostgresDb {
|
|
|
|
|
return Ok(vec![]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let table = schema::table_name("chunks");
|
|
|
|
|
let table = "dev.chunk";
|
|
|
|
|
let rows = sqlx::query(&format!(
|
|
|
|
|
"SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids FROM {} WHERE chunk_id = ANY($1) ORDER BY chunk_index",
|
|
|
|
|
"SELECT file_id, uuid, chunk_id, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids FROM {} WHERE chunk_id = ANY($1) ORDER BY id",
|
|
|
|
|
table
|
|
|
|
|
))
|
|
|
|
|
.bind(chunk_ids)
|
|
|
|
|
@@ -3094,8 +3096,7 @@ impl PostgresDb {
|
|
|
|
|
let chunks: Vec<Chunk> = rows
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|r| {
|
|
|
|
|
let chunk_type_str: String = r.get(4);
|
|
|
|
|
let chunk_index: i32 = r.get(3);
|
|
|
|
|
let chunk_type_str: String = r.get(3);
|
|
|
|
|
let chunk_type = match chunk_type_str.as_str() {
|
|
|
|
|
"time" => ChunkType::TimeBased,
|
|
|
|
|
"sentence" => ChunkType::Sentence,
|
|
|
|
|
@@ -3135,7 +3136,7 @@ impl PostgresDb {
|
|
|
|
|
file_id,
|
|
|
|
|
uuid: r.get("uuid"),
|
|
|
|
|
chunk_id: r.get("chunk_id"),
|
|
|
|
|
chunk_index: chunk_index as u32,
|
|
|
|
|
|
|
|
|
|
chunk_type,
|
|
|
|
|
rule,
|
|
|
|
|
|
|
|
|
|
@@ -3192,7 +3193,7 @@ impl PostgresDb {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub async fn update_vector_id(&self, chunk_id: &str, vector_id: &str) -> Result<()> {
|
|
|
|
|
let table = schema::table_name("chunks");
|
|
|
|
|
let table = "dev.chunk";
|
|
|
|
|
sqlx::query(&format!(
|
|
|
|
|
"UPDATE {} SET vector_id = $1 WHERE chunk_id = $2",
|
|
|
|
|
table
|
|
|
|
|
@@ -3214,12 +3215,12 @@ impl PostgresDb {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub async fn search_text(&self, query: &str, chunk_type: Option<&str>) -> Result<Vec<Chunk>> {
|
|
|
|
|
let table = schema::table_name("chunks");
|
|
|
|
|
let table = "dev.chunk";
|
|
|
|
|
let query_pattern = format!("%{}%", query);
|
|
|
|
|
|
|
|
|
|
let sql = match chunk_type {
|
|
|
|
|
Some(_) => &format!("SELECT uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 AND chunk_type = $2 ORDER BY chunk_index", table),
|
|
|
|
|
None => &format!("SELECT uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 ORDER BY chunk_index", table),
|
|
|
|
|
Some(_) => &format!("SELECT uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 AND chunk_type = $2 ORDER BY id", table),
|
|
|
|
|
None => &format!("SELECT uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 ORDER BY id", table),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let chunks = if let Some(ct) = chunk_type {
|
|
|
|
|
@@ -3228,7 +3229,6 @@ impl PostgresDb {
|
|
|
|
|
(
|
|
|
|
|
String,
|
|
|
|
|
String,
|
|
|
|
|
i32,
|
|
|
|
|
String,
|
|
|
|
|
f64,
|
|
|
|
|
f64,
|
|
|
|
|
@@ -3252,7 +3252,6 @@ impl PostgresDb {
|
|
|
|
|
(
|
|
|
|
|
String,
|
|
|
|
|
String,
|
|
|
|
|
i32,
|
|
|
|
|
String,
|
|
|
|
|
f64,
|
|
|
|
|
f64,
|
|
|
|
|
@@ -3274,7 +3273,7 @@ impl PostgresDb {
|
|
|
|
|
let results: Vec<Chunk> = chunks
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|r| {
|
|
|
|
|
let chunk_type = match r.3.as_str() {
|
|
|
|
|
let chunk_type = match r.2.as_str() {
|
|
|
|
|
"time_based" => ChunkType::TimeBased,
|
|
|
|
|
"sentence" => ChunkType::Sentence,
|
|
|
|
|
"cut" => ChunkType::Cut,
|
|
|
|
|
@@ -3284,29 +3283,29 @@ impl PostgresDb {
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let content: serde_json::Value =
|
|
|
|
|
serde_json::from_str(&r.9).unwrap_or(serde_json::json!({}));
|
|
|
|
|
serde_json::from_str(&r.8).unwrap_or(serde_json::json!({}));
|
|
|
|
|
|
|
|
|
|
let metadata: Option<serde_json::Value> =
|
|
|
|
|
r.10.and_then(|m| serde_json::from_str(&m).ok());
|
|
|
|
|
r.9.and_then(|m| serde_json::from_str(&m).ok());
|
|
|
|
|
|
|
|
|
|
Chunk {
|
|
|
|
|
file_id: 0,
|
|
|
|
|
uuid: r.0,
|
|
|
|
|
chunk_id: r.1,
|
|
|
|
|
chunk_index: r.2 as u32,
|
|
|
|
|
|
|
|
|
|
chunk_type,
|
|
|
|
|
rule: ChunkRule::Rule1,
|
|
|
|
|
fps: r.6,
|
|
|
|
|
start_frame: r.7,
|
|
|
|
|
end_frame: r.8,
|
|
|
|
|
text_content: Some(r.9),
|
|
|
|
|
fps: r.5,
|
|
|
|
|
start_frame: r.6,
|
|
|
|
|
end_frame: r.7,
|
|
|
|
|
text_content: Some(r.8),
|
|
|
|
|
content,
|
|
|
|
|
metadata,
|
|
|
|
|
vector_id: r.11,
|
|
|
|
|
vector_id: r.10,
|
|
|
|
|
frame_count: 0,
|
|
|
|
|
pre_chunk_ids: vec![],
|
|
|
|
|
parent_chunk_id: r.12,
|
|
|
|
|
child_chunk_ids: r.13,
|
|
|
|
|
parent_chunk_id: r.11,
|
|
|
|
|
child_chunk_ids: r.12,
|
|
|
|
|
visual_stats: None,
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
@@ -3321,13 +3320,13 @@ impl PostgresDb {
|
|
|
|
|
uuid: Option<&str>,
|
|
|
|
|
limit: usize,
|
|
|
|
|
) -> Result<Vec<Bm25Result>> {
|
|
|
|
|
let table = schema::table_name("chunks");
|
|
|
|
|
let table = "dev.chunk";
|
|
|
|
|
let tsquery = self.prepare_tsquery(query).await?;
|
|
|
|
|
|
|
|
|
|
let sql = match uuid {
|
|
|
|
|
Some(_) => &format!(
|
|
|
|
|
r#"
|
|
|
|
|
SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
|
|
|
|
|
SELECT c.chunk_id, c.file_uuid, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
|
|
|
|
|
c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
|
|
|
|
|
c.visual_stats,
|
|
|
|
|
pc.metadata->'structured_summary' as scene_summary,
|
|
|
|
|
@@ -3342,7 +3341,7 @@ impl PostgresDb {
|
|
|
|
|
),
|
|
|
|
|
None => &format!(
|
|
|
|
|
r#"
|
|
|
|
|
SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
|
|
|
|
|
SELECT c.chunk_id, c.file_uuid, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
|
|
|
|
|
c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
|
|
|
|
|
c.visual_stats,
|
|
|
|
|
pc.metadata->'structured_summary' as scene_summary,
|
|
|
|
|
@@ -3406,7 +3405,7 @@ impl PostgresDb {
|
|
|
|
|
Bm25Result {
|
|
|
|
|
chunk_id: r.0,
|
|
|
|
|
uuid: r.1,
|
|
|
|
|
chunk_index: r.2 as u32,
|
|
|
|
|
|
|
|
|
|
chunk_type: r.3,
|
|
|
|
|
start_frame: r.4,
|
|
|
|
|
end_frame: r.5,
|
|
|
|
|
@@ -3472,7 +3471,7 @@ impl PostgresDb {
|
|
|
|
|
HybridSearchResult {
|
|
|
|
|
chunk_id: r.chunk_id.clone(),
|
|
|
|
|
uuid: r.uuid.clone(),
|
|
|
|
|
chunk_index: r.chunk_index,
|
|
|
|
|
|
|
|
|
|
chunk_type: r.chunk_type.clone(),
|
|
|
|
|
start_frame: r.start_frame,
|
|
|
|
|
end_frame: r.end_frame,
|
|
|
|
|
@@ -3526,7 +3525,7 @@ impl PostgresDb {
|
|
|
|
|
HybridSearchResult {
|
|
|
|
|
chunk_id: r.chunk_id.clone(),
|
|
|
|
|
uuid: r.uuid.clone(),
|
|
|
|
|
chunk_index: chunk_data.map(|c| c.chunk_index).unwrap_or(0),
|
|
|
|
|
|
|
|
|
|
chunk_type: chunk_data
|
|
|
|
|
.map(|c| c.chunk_type.as_str().to_string())
|
|
|
|
|
.unwrap_or_default(),
|
|
|
|
|
@@ -3779,7 +3778,6 @@ pub struct SceneSummary {
|
|
|
|
|
pub struct Bm25Result {
|
|
|
|
|
pub chunk_id: String,
|
|
|
|
|
pub uuid: String,
|
|
|
|
|
pub chunk_index: u32,
|
|
|
|
|
pub chunk_type: String,
|
|
|
|
|
pub start_frame: i64,
|
|
|
|
|
pub end_frame: i64,
|
|
|
|
|
@@ -3797,7 +3795,6 @@ pub struct Bm25Result {
|
|
|
|
|
pub struct HybridSearchResult {
|
|
|
|
|
pub uuid: String,
|
|
|
|
|
pub chunk_id: String,
|
|
|
|
|
pub chunk_index: u32,
|
|
|
|
|
pub chunk_type: String,
|
|
|
|
|
pub start_frame: i64,
|
|
|
|
|
pub end_frame: i64,
|
|
|
|
|
@@ -4443,7 +4440,7 @@ impl PostgresDb {
|
|
|
|
|
total_frames: u64,
|
|
|
|
|
) -> Result<()> {
|
|
|
|
|
let table = schema::table_name("videos");
|
|
|
|
|
let chunks_table = schema::table_name("chunks");
|
|
|
|
|
let chunks_table = "dev.chunk";
|
|
|
|
|
let pre_chunks_table = schema::table_name("pre_chunks");
|
|
|
|
|
|
|
|
|
|
// Query chunks count and frames
|
|
|
|
|
@@ -4622,7 +4619,7 @@ impl PostgresDb {
|
|
|
|
|
let results = sqlx::query_as::<_, SemanticSearchResult>(
|
|
|
|
|
r#"
|
|
|
|
|
SELECT
|
|
|
|
|
id, chunk_index as scene_order, start_time, end_time,
|
|
|
|
|
id as scene_order, start_time, end_time,
|
|
|
|
|
COALESCE(summary_text, text_content, '') as summary,
|
|
|
|
|
metadata,
|
|
|
|
|
(1 - (embedding <=> $1::vector)) as similarity
|
|
|
|
|
@@ -4820,7 +4817,7 @@ mod tests {
|
|
|
|
|
"file_id": 1,
|
|
|
|
|
"uuid": "test",
|
|
|
|
|
"chunk_id": "c1",
|
|
|
|
|
"chunk_index": 0,
|
|
|
|
|
|
|
|
|
|
"chunk_type": "time_based",
|
|
|
|
|
"rule": "rule1",
|
|
|
|
|
"start_time": 0.0,
|
|
|
|
|
@@ -4960,7 +4957,7 @@ mod tests {
|
|
|
|
|
let result = Bm25Result {
|
|
|
|
|
chunk_id: "sentence_001".to_string(),
|
|
|
|
|
uuid: "test-uuid".to_string(),
|
|
|
|
|
chunk_index: 1,
|
|
|
|
|
|
|
|
|
|
chunk_type: "sentence".to_string(),
|
|
|
|
|
start_frame: 0,
|
|
|
|
|
end_frame: 150,
|
|
|
|
|
@@ -4985,7 +4982,7 @@ mod tests {
|
|
|
|
|
let result = HybridSearchResult {
|
|
|
|
|
chunk_id: "sentence_001".to_string(),
|
|
|
|
|
uuid: "test-uuid".to_string(),
|
|
|
|
|
chunk_index: 1,
|
|
|
|
|
|
|
|
|
|
chunk_type: "sentence".to_string(),
|
|
|
|
|
start_frame: 0,
|
|
|
|
|
end_frame: 150,
|
|
|
|
|
|