fix: system consistency - store_vector, search, worker trigger

- store_vector: stub -> actual PG embedding storage
- search_parent_chunks_semantic: include sentence chunks
- Remove early return in check_and_complete_job
This commit is contained in:
M5Max128
2026-05-24 23:20:02 +08:00
parent 932e43518d
commit 78923a8973
3 changed files with 106 additions and 235 deletions

View File

@@ -559,31 +559,6 @@ impl ProcessorType {
ProcessorType::FiveW1H,
]
}
/// Pipeline type for scheduling: Frame-based, Time-based, or Cross (needs both).
pub fn pipeline(&self) -> PipelineType {
match self {
Self::Cut
| Self::Yolo
| Self::Face
| Self::Ocr
| Self::Pose
| Self::VisualChunk
| Self::Scene => PipelineType::Frame,
Self::Asr | Self::Asrx => PipelineType::Time,
Self::Story | Self::FiveW1H => PipelineType::Cross,
}
}
}
/// Pipeline classification for worker scheduling.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PipelineType {
Frame,
Time,
Cross,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
@@ -2069,7 +2044,7 @@ impl PostgresDb {
metadata, \
(1 - (embedding <=> $1::vector)) as similarity \
FROM {} \
WHERE file_uuid = $2 AND chunk_type IN ('story_parent', 'llm_parent') AND embedding IS NOT NULL \
WHERE file_uuid = $2 AND chunk_type IN ('sentence', 'story_parent', 'llm_parent') AND embedding IS NOT NULL \
ORDER BY embedding <=> $1::vector \
LIMIT $3",
chunk_table
@@ -3079,8 +3054,31 @@ impl PostgresDb {
Ok(())
}
pub async fn store_vector(&self, _chunk_id: &str, _vector: &[f32], _uuid: &str) -> Result<()> {
tracing::warn!("[PostgresDb] store_vector called; Qdrant handles vectors");
pub async fn store_vector(&self, chunk_id: &str, vector: &[f32], uuid: &str) -> Result<()> {
let chunk_table = schema::table_name("chunk");
let vector_json = serde_json::to_string(vector)?;
sqlx::query(&format!(
"UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3",
chunk_table
))
.bind(&vector_json)
.bind(chunk_id)
.bind(uuid)
.execute(&self.pool)
.await?;
Ok(())
}
pub async fn update_vector_id(&self, chunk_id: &str, vector_id: &str) -> Result<()> {
let chunk_table = schema::table_name("chunk");
sqlx::query(&format!(
"UPDATE {} SET vector_id = $1 WHERE chunk_id = $2",
chunk_table
))
.bind(vector_id)
.bind(chunk_id)
.execute(&self.pool)
.await?;
Ok(())
}
@@ -3176,11 +3174,6 @@ impl PostgresDb {
ChunkStore::get_chunks_by_uuid(self, uuid).await
}
pub async fn update_vector_id(&self, _chunk_id: &str, _vector_id: &str) -> Result<()> {
tracing::warn!("[PostgresDb] update_vector_id stub");
Ok(())
}
pub async fn create_gitea_token(
&self,
_id: i64,