fix: pre_chunks schema + TMDb movie name extraction

- pre_chunks: add chunk_type, text_content columns; drop NOT NULL on
  coordinate_type/coordinate_index (INSERT statements reference these
  columns but CREATE TABLE was missing them)
- run_migrations: add ALTER TABLE for existing databases
- extract_movie_name: filter noise words (youtube, fps, 24fps, 1080p,
  pure digits) so 'Charade_YouTube_24fps' → 'Charade'
- run-server-3002.sh: add companion worker startup (matching 3003 script)
This commit is contained in:
Accusys
2026-06-22 11:55:12 +08:00
parent f4de741d5b
commit 30b252ac95
3 changed files with 57 additions and 3 deletions

View File

@@ -21,6 +21,17 @@ if [ -n "$PID" ]; then
sleep 2
fi
# Kill existing worker via PID file
if [ -f logs/worker_3002.pid ]; then
WPID=$(cat logs/worker_3002.pid)
if kill -0 "$WPID" 2>/dev/null; then
echo "Killing existing worker (PID: $WPID)"
kill "$WPID" 2>/dev/null || true
sleep 1
fi
rm -f logs/worker_3002.pid
fi
# Build if needed
if [ ! -f target/release/momentry ]; then
echo "Building release binary..."
@@ -32,3 +43,11 @@ echo "Starting momentry server on port 3002..."
./target/release/momentry server --host 0.0.0.0 --port 3002 > logs/momentry_3002.log 2>&1 &
echo "Server started (PID: $!)"
echo "Logs: logs/momentry_3002.log"
# Start companion worker
echo "Starting momentry worker..."
nohup ./target/release/momentry worker --max-concurrent 6 --poll-interval 10 --batch-size 5 > logs/worker_3002.log 2>&1 &
WPID=$!
echo "$WPID" > logs/worker_3002.pid
echo "Worker started (PID: $WPID)"
echo "Worker logs: logs/worker_3002.log"

View File

@@ -1040,13 +1040,25 @@ impl PostgresDb {
.await?;
// ── Pre Chunks ──
sqlx::query("CREATE TABLE IF NOT EXISTS pre_chunks (id BIGSERIAL PRIMARY KEY, file_uuid VARCHAR(255) NOT NULL, processor_type VARCHAR(50) NOT NULL, coordinate_type VARCHAR(50) NOT NULL, coordinate_index BIGINT NOT NULL, start_frame BIGINT, end_frame BIGINT, start_time DOUBLE PRECISION, end_time DOUBLE PRECISION, fps DOUBLE PRECISION, data JSONB NOT NULL, identity_id UUID, confidence DOUBLE PRECISION, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?;
sqlx::query("CREATE TABLE IF NOT EXISTS pre_chunks (id BIGSERIAL PRIMARY KEY, file_uuid VARCHAR(255) NOT NULL, processor_type VARCHAR(50) NOT NULL, chunk_type VARCHAR(50), coordinate_type VARCHAR(50), coordinate_index BIGINT, start_frame BIGINT, end_frame BIGINT, start_time DOUBLE PRECISION, end_time DOUBLE PRECISION, fps DOUBLE PRECISION, data JSONB NOT NULL, text_content TEXT, identity_id UUID, confidence DOUBLE PRECISION, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_pre_chunks_file_uuid ON pre_chunks(file_uuid)")
.execute(pool)
.await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_pre_chunks_processor ON pre_chunks(processor_type)")
.execute(pool)
.await?;
sqlx::query("ALTER TABLE pre_chunks ADD COLUMN IF NOT EXISTS chunk_type VARCHAR(50)")
.execute(pool)
.await?;
sqlx::query("ALTER TABLE pre_chunks ADD COLUMN IF NOT EXISTS text_content TEXT")
.execute(pool)
.await?;
sqlx::query("ALTER TABLE pre_chunks ALTER COLUMN coordinate_type DROP NOT NULL")
.execute(pool)
.await?;
sqlx::query("ALTER TABLE pre_chunks ALTER COLUMN coordinate_index DROP NOT NULL")
.execute(pool)
.await?;
// ── Monitor Jobs ──
sqlx::query("CREATE TABLE IF NOT EXISTS monitor_jobs (id SERIAL PRIMARY KEY, uuid VARCHAR(32) NOT NULL, video_path VARCHAR(512), status VARCHAR(20) NOT NULL DEFAULT 'pending', current_processor VARCHAR(20), progress_total INT DEFAULT 0, progress_current INT DEFAULT 0, error_count INT DEFAULT 0, last_error TEXT, started_at TIMESTAMP, updated_at TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?;

View File

@@ -45,14 +45,37 @@ fn extract_movie_name(filename: &str) -> Option<String> {
.file_stem()
.and_then(|s| s.to_str())?;
// Take only the part before year patterns or separators
let noise_words = [
"youtube", "yt", "fps", "hd", "full", "movie", "official",
"trailer", "teaser", "4k",
];
let cleaned = name
.replace(['.', '_'], " ")
.split(|c: char| c == '(' || c == '[' || c == '│' || c == '|')
.next()
.unwrap_or(&name)
.trim()
.to_string();
.split_whitespace()
.filter(|w| {
let lower = w.to_lowercase();
if noise_words.contains(&lower.as_str()) {
return false;
}
if w.chars().all(|c| c.is_ascii_digit()) {
return false;
}
// Strip tokens like "24fps", "1080p", "60fps", "720p"
if lower.ends_with("fps") || lower.ends_with('p') {
let prefix = lower.trim_end_matches('p').trim_end_matches("fps");
if prefix.chars().all(|c| c.is_ascii_digit()) {
return false;
}
}
true
})
.collect::<Vec<_>>()
.join(" ");
if cleaned.is_empty() || cleaned.len() < 3 {
return None;