#!/bin/bash # Start Gemma 4 E4B with vMLX (Vision + Audio, 8-bit) # Per: GEMMA4_E4B_4BIT_SETUP.md VMLX="/Users/accusys/vmlx/.venv/bin/vmlx" MODEL="/Users/accusys/models/mlx-gemma4-e4b-it-8bit" PORT=8000 LOG_FILE="/Users/accusys/momentry_core/logs/vmlx_8000.log" # Kill existing processes on port lsof -i :$PORT | awk 'NR>1 {print $2}' | while read pid; do kill $pid; done sleep 2 # Start vMLX server (editable install v1.5.59) $VMLX serve $MODEL \ --host 0.0.0.0 --port $PORT \ --enable-prefix-cache \ --use-paged-cache \ --enable-disk-cache \ --kv-cache-quantization q8 \ --max-cache-blocks 2048 \ --timeout 1200 \ --log-level INFO \ --served-model-name gemma-4-E4B \ > $LOG_FILE 2>&1 & echo "vMLX server starting on port $PORT" echo "Model: Gemma 4 E4B 8bit (MLX) — supports Vision + Audio" echo "Log: $LOG_FILE" # Wait for ready for i in $(seq 1 30); do if curl -s -m 2 http://localhost:$PORT/health >/dev/null 2>&1; then echo "✅ Ready ($i s)" echo "API: http://localhost:$PORT/v1/chat/completions" exit 0 fi sleep 2 done echo "❌ Not ready after 60s, check log" exit 1