fix: pre_chunks schema + TMDb movie name extraction
- pre_chunks: add chunk_type, text_content columns; drop NOT NULL on coordinate_type/coordinate_index (INSERT statements reference these columns but CREATE TABLE was missing them) - run_migrations: add ALTER TABLE for existing databases - extract_movie_name: filter noise words (youtube, fps, 24fps, 1080p, pure digits) so 'Charade_YouTube_24fps' → 'Charade' - run-server-3002.sh: add companion worker startup (matching 3003 script)
This commit is contained in:
@@ -21,6 +21,17 @@ if [ -n "$PID" ]; then
|
||||
sleep 2
|
||||
fi
|
||||
|
||||
# Kill existing worker via PID file
|
||||
if [ -f logs/worker_3002.pid ]; then
|
||||
WPID=$(cat logs/worker_3002.pid)
|
||||
if kill -0 "$WPID" 2>/dev/null; then
|
||||
echo "Killing existing worker (PID: $WPID)"
|
||||
kill "$WPID" 2>/dev/null || true
|
||||
sleep 1
|
||||
fi
|
||||
rm -f logs/worker_3002.pid
|
||||
fi
|
||||
|
||||
# Build if needed
|
||||
if [ ! -f target/release/momentry ]; then
|
||||
echo "Building release binary..."
|
||||
@@ -32,3 +43,11 @@ echo "Starting momentry server on port 3002..."
|
||||
./target/release/momentry server --host 0.0.0.0 --port 3002 > logs/momentry_3002.log 2>&1 &
|
||||
echo "Server started (PID: $!)"
|
||||
echo "Logs: logs/momentry_3002.log"
|
||||
|
||||
# Start companion worker
|
||||
echo "Starting momentry worker..."
|
||||
nohup ./target/release/momentry worker --max-concurrent 6 --poll-interval 10 --batch-size 5 > logs/worker_3002.log 2>&1 &
|
||||
WPID=$!
|
||||
echo "$WPID" > logs/worker_3002.pid
|
||||
echo "Worker started (PID: $WPID)"
|
||||
echo "Worker logs: logs/worker_3002.log"
|
||||
|
||||
@@ -1040,13 +1040,25 @@ impl PostgresDb {
|
||||
.await?;
|
||||
|
||||
// ── Pre Chunks ──
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS pre_chunks (id BIGSERIAL PRIMARY KEY, file_uuid VARCHAR(255) NOT NULL, processor_type VARCHAR(50) NOT NULL, coordinate_type VARCHAR(50) NOT NULL, coordinate_index BIGINT NOT NULL, start_frame BIGINT, end_frame BIGINT, start_time DOUBLE PRECISION, end_time DOUBLE PRECISION, fps DOUBLE PRECISION, data JSONB NOT NULL, identity_id UUID, confidence DOUBLE PRECISION, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?;
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS pre_chunks (id BIGSERIAL PRIMARY KEY, file_uuid VARCHAR(255) NOT NULL, processor_type VARCHAR(50) NOT NULL, chunk_type VARCHAR(50), coordinate_type VARCHAR(50), coordinate_index BIGINT, start_frame BIGINT, end_frame BIGINT, start_time DOUBLE PRECISION, end_time DOUBLE PRECISION, fps DOUBLE PRECISION, data JSONB NOT NULL, text_content TEXT, identity_id UUID, confidence DOUBLE PRECISION, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?;
|
||||
sqlx::query("CREATE INDEX IF NOT EXISTS idx_pre_chunks_file_uuid ON pre_chunks(file_uuid)")
|
||||
.execute(pool)
|
||||
.await?;
|
||||
sqlx::query("CREATE INDEX IF NOT EXISTS idx_pre_chunks_processor ON pre_chunks(processor_type)")
|
||||
.execute(pool)
|
||||
.await?;
|
||||
sqlx::query("ALTER TABLE pre_chunks ADD COLUMN IF NOT EXISTS chunk_type VARCHAR(50)")
|
||||
.execute(pool)
|
||||
.await?;
|
||||
sqlx::query("ALTER TABLE pre_chunks ADD COLUMN IF NOT EXISTS text_content TEXT")
|
||||
.execute(pool)
|
||||
.await?;
|
||||
sqlx::query("ALTER TABLE pre_chunks ALTER COLUMN coordinate_type DROP NOT NULL")
|
||||
.execute(pool)
|
||||
.await?;
|
||||
sqlx::query("ALTER TABLE pre_chunks ALTER COLUMN coordinate_index DROP NOT NULL")
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
// ── Monitor Jobs ──
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS monitor_jobs (id SERIAL PRIMARY KEY, uuid VARCHAR(32) NOT NULL, video_path VARCHAR(512), status VARCHAR(20) NOT NULL DEFAULT 'pending', current_processor VARCHAR(20), progress_total INT DEFAULT 0, progress_current INT DEFAULT 0, error_count INT DEFAULT 0, last_error TEXT, started_at TIMESTAMP, updated_at TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?;
|
||||
|
||||
@@ -45,14 +45,37 @@ fn extract_movie_name(filename: &str) -> Option<String> {
|
||||
.file_stem()
|
||||
.and_then(|s| s.to_str())?;
|
||||
|
||||
// Take only the part before year patterns or separators
|
||||
let noise_words = [
|
||||
"youtube", "yt", "fps", "hd", "full", "movie", "official",
|
||||
"trailer", "teaser", "4k",
|
||||
];
|
||||
|
||||
let cleaned = name
|
||||
.replace(['.', '_'], " ")
|
||||
.split(|c: char| c == '(' || c == '[' || c == '│' || c == '|')
|
||||
.next()
|
||||
.unwrap_or(&name)
|
||||
.trim()
|
||||
.to_string();
|
||||
.split_whitespace()
|
||||
.filter(|w| {
|
||||
let lower = w.to_lowercase();
|
||||
if noise_words.contains(&lower.as_str()) {
|
||||
return false;
|
||||
}
|
||||
if w.chars().all(|c| c.is_ascii_digit()) {
|
||||
return false;
|
||||
}
|
||||
// Strip tokens like "24fps", "1080p", "60fps", "720p"
|
||||
if lower.ends_with("fps") || lower.ends_with('p') {
|
||||
let prefix = lower.trim_end_matches('p').trim_end_matches("fps");
|
||||
if prefix.chars().all(|c| c.is_ascii_digit()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
if cleaned.is_empty() || cleaned.len() < 3 {
|
||||
return None;
|
||||
|
||||
Reference in New Issue
Block a user