M4: trace API, portal embed client, EmbeddingGemma sync, release plan

2026-05-08 01:04:23 +08:00
parent 26d9c33419
commit 6d82131589
4 changed files with 506 additions and 31 deletions
--- a/portal/src/api/client.ts
+++ b/portal/src/api/client.ts
@@ -251,30 +251,27 @@ export async function searchVideos(query: string, limit = 10, mode = 'vector'):
  }

  const config = getConfig()
-  const url = mode === 'smart' || mode === 'bm25'
-    ? `${config.api_base_url}/api/v1/search`
-    : `${config.api_base_url}/api/v1/search`
+  const url = `${config.api_base_url}/api/v1/search/universal`

  const response: any = await httpFetch<any>(url, {
    method: 'POST',
    body: JSON.stringify({ query, limit }),
  })

-  // Map backend response ({ results: [...], query: string }) to frontend SearchResult ({ hits: [...], query: string, count: number })
  return {
-    query: response.query,
+    query: response.query || query,
    count: response.results?.length || 0,
    hits: (response.results || []).map((r: any) => ({
      id: r.chunk_id || r.id,
-      vid: r.uuid || r.vid,
-      start_frame: Math.floor((r.start_time || 0) * 30),
-      end_frame: Math.floor((r.end_time || 0) * 30),
-      fps: 30,
+      vid: r.uuid || r.vid || r.file_uuid || '',
+      start_frame: Math.floor((r.start_time || 0) * (r.fps || 30)),
+      end_frame: Math.floor((r.end_time || 0) * (r.fps || 30)),
+      fps: r.fps || 30,
      start: r.start_time || r.start || 0,
      end: r.end_time || r.end || 0,
-      text: r.text || '',
+      text: r.text || r.text_content || '',
      score: r.score || 0,
-      title: r.title || r.file_name,
+      title: r.title || r.file_name || '',
      file_path: r.file_path,
      has_visual_stats: !!r.visual_stats,
      parent_id: r.parent_chunk_id,
@@ -288,29 +285,27 @@ export async function searchChunks(query: string, uuid?: string): Promise<Search
  }

  const config = getConfig()
-  const url = uuid
-    ? `${config.api_base_url}/api/v1/search?uuid=${encodeURIComponent(uuid)}`
-    : `${config.api_base_url}/api/v1/search`
+  const url = `${config.api_base_url}/api/v1/search/universal`

  const response: any = await httpFetch<any>(url, {
    method: 'POST',
-    body: JSON.stringify({ query, limit: 10 }),
+    body: JSON.stringify({ query, uuid, limit: 20 }),
  })

  return {
-    query: response.query,
+    query: response.query || query,
    count: response.results?.length || 0,
    hits: (response.results || []).map((r: any) => ({
      id: r.chunk_id || r.id,
-      vid: r.uuid || r.vid,
-      start_frame: Math.floor((r.start_time || 0) * 30),
-      end_frame: Math.floor((r.end_time || 0) * 30),
-      fps: 30,
+      vid: r.uuid || r.vid || r.file_uuid || '',
+      start_frame: Math.floor((r.start_time || 0) * (r.fps || 30)),
+      end_frame: Math.floor((r.end_time || 0) * (r.fps || 30)),
+      fps: r.fps || 30,
      start: r.start_time || r.start || 0,
      end: r.end_time || r.end || 0,
-      text: r.text || '',
+      text: r.text || r.text_content || '',
      score: r.score || 0,
-      title: r.title || r.file_name,
+      title: r.title || r.file_name || '',
      file_path: r.file_path,
      has_visual_stats: !!r.visual_stats,
      parent_id: r.parent_chunk_id,
@@ -415,12 +410,15 @@ export async function translateText(text: string, targetLang: string = 'zh-TW'):
  return text
 }

-export async function getPersonThumbnail(personId: string): Promise<string> {
+export async function getPersonThumbnail(fileUuid: string, traceId?: number): Promise<string> {
  if (isTauri()) {
-    return tauriInvoke<string>('get_person_thumbnail_b64', { person_id: personId })
+    return tauriInvoke<string>('get_person_thumbnail_b64', { file_uuid: fileUuid, trace_id: traceId })
  }
  const config = getConfig()
-  return `${config.api_base_url}/api/v1/file/:file_uuid/faces/:face_id/thumbnail`
+  if (traceId !== undefined) {
+    return `${config.api_base_url}/api/v1/file/${fileUuid}/trace/${traceId}/video`
+  }
+  return `${config.api_base_url}/api/v1/file/${fileUuid}/thumbnail`
 }

 export async function registerIdentity(name: string, images: string[]): Promise<any> {
@@ -462,7 +460,7 @@ export async function unregisterVideo(fileUuid: string): Promise<UnregisterRespo
    return tauriInvoke<UnregisterResponse>('unregister_video', { file_uuid: fileUuid })
  }
  const config = getConfig()
-  return httpFetch<UnregisterResponse>(`${config.api_base_url}/api/v1/files/unregister`, {
+  return httpFetch<UnregisterResponse>(`${config.api_base_url}/api/v1/unregister`, {
    method: 'POST',
    body: JSON.stringify({ file_uuid: fileUuid }),
  })
@@ -483,17 +481,62 @@ export async function listFaceCandidates(fileUuid?: string, minConfidence = 0.5,
  return httpFetch(`${config.api_base_url}/api/v1/faces/candidates?${params.toString()}`)
 }

+export async function listTracesSorted(
+  fileUuid: string,
+  sortBy = 'face_count',
+  limit = 100,
+  minFaces = 1
+): Promise<any> {
+  if (isTauri()) {
+    return tauriInvoke('list_traces_sorted', { file_uuid: fileUuid, sort_by: sortBy, limit, min_faces: minFaces })
+  }
+  const config = getConfig()
+  return httpFetch(`${config.api_base_url}/api/v1/file/${fileUuid}/face_trace/sortby`, {
+    method: 'POST',
+    body: JSON.stringify({ sort_by: sortBy, limit, min_faces: minFaces }),
+  })
+}
+
+/**
+ * Embed query text using EmbeddingGemma with fallback.
+ * Tries M5 (192.168.110.201:11436) first, falls back to M4 localhost.
+ */
+export async function embedQuery(text: string): Promise<number[]> {
+  const servers = [
+    'http://192.168.110.201:11436/v1/embeddings',
+    'http://localhost:11436/v1/embeddings',
+  ]
+  for (const url of servers) {
+    try {
+      const res = await fetch(url, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ input: text, model: 'embeddinggemma-300m' }),
+        signal: AbortSignal.timeout(5000),
+      })
+      if (!res.ok) continue
+      const data = await res.json()
+      if (data?.data?.[0]?.embedding) return data.data[0].embedding
+    } catch { continue }
+  }
+  throw new Error('Embedding servers unreachable')
+}
+
+export async function listTraceFaces(fileUuid: string, traceId: number, limit = 200, offset = 0): Promise<any> {
+  if (isTauri()) {
+    return tauriInvoke('list_trace_faces', { file_uuid: fileUuid, trace_id: traceId, limit, offset })
+  }
+  const config = getConfig()
+  return httpFetch(`${config.api_base_url}/api/v1/file/${fileUuid}/trace/${traceId}/faces?limit=${limit}&offset=${offset}`)
+}
+
 export async function getIdentityFaces(identityId: number, page = 1, pageSize = 100): Promise<any> {
  if (isTauri()) {
    return tauriInvoke('get_identity_faces', { identity_id: identityId, page, page_size: pageSize })
  }

  const config = getConfig()
-  const params = new URLSearchParams()
-  params.append('page', String(page))
-  params.append('page_size', String(pageSize))
-
-  return httpFetch(`${config.api_base_url}/api/v1/identities/${identityId}/faces?${params.toString()}`)
+  return httpFetch(`${config.api_base_url}/api/v1/identity/${identityId}/files?page_size=${pageSize}&offset=${(page-1)*pageSize}`)
 }

 // ── Config helpers ──────────────────────────────────────────────────────
--- a/portal/src/components/FaceTraceTimeline.vue
+++ b/portal/src/components/FaceTraceTimeline.vue
@@ -0,0 +1,350 @@
+<template>
+  <div class="space-y-4">
+    <!-- Header -->
+    <div class="flex items-center justify-between">
+      <h3 class="text-xl font-semibold flex items-center gap-2">
+        <span>臉部追蹤</span>
+        <span class="text-sm text-gray-400 font-normal">({{ totalTraces }} 個追蹤, {{ totalFaces }} 個臉孔)</span>
+      </h3>
+      <div class="flex items-center gap-2">
+        <select v-model="sortBy" @change="loadTraces"
+          class="bg-gray-700 text-sm rounded px-3 py-1.5 border border-gray-600">
+          <option value="face_count">臉孔數</option>
+          <option value="duration">持續時間</option>
+          <option value="first_appearance">首次出現</option>
+        </select>
+        <select v-model="limit" @change="loadTraces"
+          class="bg-gray-700 text-sm rounded px-3 py-1.5 border border-gray-600">
+          <option :value="50">50 筆</option>
+          <option :value="100">100 筆</option>
+          <option :value="500">500 筆</option>
+        </select>
+      </div>
+    </div>
+
+    <!-- Step 3: Filter Bar -->
+    <div class="bg-gray-750 rounded-lg p-4 border border-gray-700 grid grid-cols-2 md:grid-cols-4 gap-4 text-sm">
+      <div>
+        <label class="text-gray-400 text-xs block mb-1">最少臉孔</label>
+        <input type="number" min="1" max="100" v-model.number="filterMinFaces"
+          @change="loadTraces"
+          class="w-full bg-gray-700 rounded px-2 py-1.5 border border-gray-600 text-white" />
+      </div>
+      <div>
+        <label class="text-gray-400 text-xs block mb-1">最小信心</label>
+        <input type="range" min="0" max="100" v-model.number="filterMinConfPct"
+          @change="loadTraces"
+          class="w-full accent-blue-500" />
+        <span class="text-gray-500 text-xs">{{ filterMinConfPct }}%</span>
+      </div>
+      <div>
+        <label class="text-gray-400 text-xs block mb-1">最大信心</label>
+        <input type="range" min="0" max="100" v-model.number="filterMaxConfPct"
+          @change="loadTraces"
+          class="w-full accent-blue-500" />
+        <span class="text-gray-500 text-xs">{{ filterMaxConfPct }}%</span>
+      </div>
+      <div class="flex items-end">
+        <button @click="resetFilters"
+          class="px-3 py-1.5 bg-gray-700 hover:bg-gray-600 rounded text-xs transition">
+          重設
+        </button>
+      </div>
+    </div>
+
+    <!-- Step 2: Timeline Bar Chart -->
+    <div v-if="Object.keys(traces).length > 0" class="bg-gray-800 rounded-lg p-4 border border-gray-700 overflow-x-auto">
+      <div class="relative" :style="{ height: timelineHeight + 'px' }">
+        <!-- Time axis -->
+        <div class="absolute bottom-0 left-0 right-0 flex text-xs text-gray-500">
+          <div v-for="t in timeTicks" :key="t"
+            class="flex-1 border-l border-gray-700 pl-1">
+            {{ t }}s
+          </div>
+        </div>
+        <!-- Trace bars -->
+        <div v-for="(trace, i) in topTracesForTimeline" :key="trace.trace_id"
+          class="absolute left-0 right-0 flex items-center cursor-pointer hover:opacity-80 transition"
+          :style="{
+            bottom: barPosition(i) + '%',
+            height: barHeight() + '%'
+          }"
+          @click="toggleExpand(trace.trace_id)">
+          <div class="h-full rounded-sm transition-all"
+            :style="{
+              width: barWidthPct(trace) + '%',
+              backgroundColor: barColor(trace.avg_confidence),
+              marginLeft: barOffsetPct(trace) + '%'
+            }"
+            :title="`#${trace.trace_id}: ${trace.face_count} faces`">
+          </div>
+          <span v-if="barWidthPct(trace) > 8"
+            class="absolute left-1 text-xs text-white truncate pointer-events-none">
+            #{{ trace.trace_id }}
+          </span>
+        </div>
+      </div>
+    </div>
+
+    <!-- Grid -->
+    <div v-if="loading" class="flex justify-center py-8">
+      <div class="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-500"></div>
+    </div>
+
+    <div v-else-if="error" class="bg-red-900/30 text-red-300 p-4 rounded-lg text-sm">
+      {{ error }}
+    </div>
+
+    <div v-else-if="Object.keys(traces).length === 0" class="text-gray-500 text-center py-8 text-sm">
+      尚無臉部追蹤資料
+    </div>
+
+    <div v-else class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-3">
+      <div v-for="trace in traces" :key="trace.trace_id"
+        class="bg-gray-800 rounded-lg border border-gray-700 overflow-hidden hover:border-blue-500/50 transition cursor-pointer"
+        @click="toggleExpand(trace.trace_id)">
+        <div class="aspect-video bg-gray-900 relative overflow-hidden">
+          <img v-if="trace.sample_face_id"
+            :src="`${apiBase}/api/v1/file/${fileUuid}/trace/${trace.trace_id}/video`"
+            :alt="`Trace ${trace.trace_id}`"
+            class="w-full h-full object-cover"
+            loading="lazy" />
+          <div class="absolute top-2 left-2 bg-black/70 text-xs px-2 py-0.5 rounded font-mono">
+            #{{ trace.trace_id }}
+          </div>
+          <div class="absolute bottom-2 right-2 bg-black/70 text-xs px-2 py-0.5 rounded"
+            :class="confidenceColor(trace.avg_confidence)">
+            {{ (trace.avg_confidence * 100).toFixed(0) }}%
+          </div>
+        </div>
+        <div class="p-3 text-sm space-y-1">
+          <div class="flex justify-between text-gray-400">
+            <span>臉孔: <strong class="text-white">{{ trace.face_count }}</strong></span>
+            <span>{{ trace.first_sec.toFixed(1) }}s - {{ trace.last_sec.toFixed(1) }}s</span>
+          </div>
+          <div class="flex justify-between text-gray-400">
+            <span>幀: {{ trace.first_frame }}-{{ trace.last_frame }}</span>
+            <span>持續 {{ trace.duration_sec.toFixed(1) }}s</span>
+          </div>
+          <div class="w-full bg-gray-700 rounded-full h-1 mt-1">
+            <div class="bg-blue-500 h-1 rounded-full transition-all"
+              :style="{ width: barWidth(trace) }">
+            </div>
+          </div>
+        </div>
+
+        <!-- Step 1: Expandable Detail -->
+        <div v-if="expandedTrace === trace.trace_id"
+          class="border-t border-gray-700 bg-gray-850"
+          @click.stop>
+          <div class="p-3">
+            <div v-if="loadingFaces[trace.trace_id]" class="flex justify-center py-4">
+              <div class="animate-spin rounded-full h-6 w-6 border-b-2 border-blue-500"></div>
+            </div>
+            <div v-else-if="faceErrors[trace.trace_id]" class="text-red-400 text-xs">
+              {{ faceErrors[trace.trace_id] }}
+            </div>
+            <div v-else-if="traceFaces[trace.trace_id]?.length" class="space-y-2">
+              <div class="text-xs text-gray-500 mb-2">
+                共 {{ faceTotals[trace.trace_id] || 0 }} 個臉孔偵測
+              </div>
+              <div class="grid grid-cols-4 sm:grid-cols-6 md:grid-cols-8 gap-1.5 max-h-48 overflow-y-auto">
+                <div v-for="face in traceFaces[trace.trace_id]" :key="face.id"
+                  class="relative aspect-square bg-gray-900 rounded overflow-hidden group"
+                  :class="face.interpolated ? 'opacity-40' : ''">
+                  <img v-if="!face.interpolated"
+                    :src="`${apiBase}/api/v1/file/${fileUuid}/thumbnail?frame=${face.start_frame}&x=${face.x}&y=${face.y}&w=${face.width}&h=${face.height}`"
+                    :alt="`Frame ${face.start_frame}`"
+                    class="w-full h-full object-cover"
+                    loading="lazy"
+                    @error="onImgError" />
+                  <div v-else
+                    class="w-full h-full flex items-center justify-center border border-dashed border-gray-600 rounded text-gray-600 text-[9px]">
+                    {{ face.start_frame }}
+                  </div>
+                  <div class="absolute bottom-0 inset-x-0 bg-black/70 text-[9px] text-gray-300 px-1 truncate opacity-0 group-hover:opacity-100 transition">
+                    #{{ face.start_frame }} {{ face.interpolated ? '' : (face.confidence * 100).toFixed(0) + '%' }}
+                  </div>
+                </div>
+              </div>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  </div>
+</template>
+
+<script setup lang="ts">
+import { ref, computed, onMounted } from 'vue'
+import { getCurrentConfig, httpFetch } from '@/api/client'
+
+const props = defineProps<{
+  fileUuid: string
+  totalDuration: number
+}>()
+
+const apiBase = ref('')
+const traces = ref<any[]>([])
+const totalTraces = ref(0)
+const totalFaces = ref(0)
+const loading = ref(false)
+const error = ref('')
+const sortBy = ref('face_count')
+const limit = ref(100)
+
+// Filter state
+const filterMinFaces = ref(1)
+const filterMinConfPct = ref(0)
+const filterMaxConfPct = ref(100)
+
+// Expanded trace detail
+const expandedTrace = ref<number | null>(null)
+const traceFaces = ref<Record<number, any[]>>({})
+const faceTotals = ref<Record<number, number>>({})
+const loadingFaces = ref<Record<number, boolean>>({})
+const faceErrors = ref<Record<number, string>>({})
+
+const duration = computed(() => props.totalDuration || 1 || 3000)
+
+// Step 2: Timeline helpers
+const timelineMaxTraces = 30
+const timelineHeight = 120
+const topTracesForTimeline = computed(() => {
+  const sorted = [...traces.value].sort((a, b) => b.face_count - a.face_count)
+  return sorted.slice(0, timelineMaxTraces)
+})
+
+const timeTicks = computed(() => {
+  const dur = duration.value
+  const step = Math.max(30, Math.round(dur / 10 / 30) * 30)
+  const ticks: number[] = []
+  for (let t = 0; t <= dur; t += step) {
+    ticks.push(t)
+  }
+  return ticks
+})
+
+function barPosition(index: number): number {
+  const count = topTracesForTimeline.value.length
+  const gap = 1
+  const barH = Math.max(8, (100 - gap * (count + 1)) / count)
+  return gap + index * (barH + gap)
+}
+
+function barHeight(): number {
+  const count = topTracesForTimeline.value.length
+  const gap = 1
+  const barH = Math.max(8, (100 - gap * (count + 1)) / count)
+  return barH
+}
+
+function barWidthPct(trace: any): number {
+  const dur = duration.value
+  if (!dur) return 0
+  return Math.max(0.5, ((trace.last_sec || 1) - (trace.first_sec || 0)) / dur * 100)
+}
+
+function barOffsetPct(trace: any): number {
+  const dur = duration.value
+  if (!dur) return 0
+  return ((trace.first_sec || 0) / dur) * 100
+}
+
+function barColor(conf: number): string {
+  if (conf >= 0.8) return 'rgba(74, 222, 128, 0.7)'
+  if (conf >= 0.6) return 'rgba(250, 204, 21, 0.7)'
+  return 'rgba(248, 113, 113, 0.7)'
+}
+
+function confidenceColor(conf: number): string {
+  if (conf >= 0.8) return 'text-green-400'
+  if (conf >= 0.6) return 'text-yellow-400'
+  return 'text-red-400'
+}
+
+function barWidth(trace: any): string {
+  const pct = totalTraces.value > 0
+    ? (trace.face_count / (totalFaces.value || 1)) * 100
+    : 0
+  return `${Math.min(pct, 100)}%`
+}
+
+function onImgError(e: Event) {
+  const el = e.target as HTMLImageElement
+  el.style.display = 'none'
+}
+
+function resetFilters() {
+  filterMinFaces.value = 1
+  filterMinConfPct.value = 0
+  filterMaxConfPct.value = 100
+  loadTraces()
+}
+
+async function toggleExpand(traceId: number) {
+  if (expandedTrace.value === traceId) {
+    expandedTrace.value = null
+    return
+  }
+  expandedTrace.value = traceId
+  if (!traceFaces.value[traceId]) {
+    await loadTraceFaces(traceId)
+  }
+}
+
+async function loadTraceFaces(traceId: number) {
+  loadingFaces.value[traceId] = true
+  faceErrors.value[traceId] = ''
+  try {
+    const config = getCurrentConfig()
+    const data = await httpFetch<any>(
+      `${config.api_base_url}/api/v1/file/${props.fileUuid}/trace/${traceId}/faces?limit=200&interpolate=true`,
+    )
+    traceFaces.value[traceId] = data.faces || []
+    faceTotals.value[traceId] = data.total || 0
+  } catch (e: any) {
+    faceErrors.value[traceId] = e?.message || '載入失敗'
+  } finally {
+    loadingFaces.value[traceId] = false
+  }
+}
+
+async function loadTraces() {
+  loading.value = true
+  error.value = ''
+  try {
+    const config = getCurrentConfig()
+    apiBase.value = config.api_base_url
+
+    const apiSort = sortBy.value === 'face_count' ? 'face_count'
+      : sortBy.value === 'duration' ? 'duration'
+      : 'first_appearance'
+
+    const data = await httpFetch<any>(
+      `${config.api_base_url}/api/v1/file/${props.fileUuid}/face_trace/sortby`,
+      {
+        method: 'POST',
+        body: JSON.stringify({
+          sort_by: apiSort,
+          limit: limit.value,
+          min_faces: filterMinFaces.value,
+          min_confidence: filterMinConfPct.value / 100,
+          max_confidence: filterMaxConfPct.value / 100,
+        })
+      }
+    )
+    traces.value = data.traces || []
+    totalTraces.value = data.total_traces || 0
+    totalFaces.value = data.total_faces || 0
+  } catch (e: any) {
+    error.value = e?.message || '載入臉部追蹤資料失敗'
+  } finally {
+    loading.value = false
+  }
+}
+
+onMounted(() => {
+  loadTraces()
+})
+</script>
--- a/portal/src/views/VideoDetailView.vue
+++ b/portal/src/views/VideoDetailView.vue
@@ -146,6 +146,14 @@
            </div>
          </div>
        </div>
+
+        <!-- Face Traces -->
+        <div class="bg-gray-800 rounded-lg p-6 border border-gray-700">
+          <FaceTraceTimeline
+            :file-uuid="uuid"
+            :total-duration="probeInfo?.format?.duration || 0"
+            @select="handleTraceSelect" />
+        </div>
      </template>

      <!-- 3. Generic Probe Info -->
@@ -191,6 +199,7 @@
 import { ref, computed, onMounted } from 'vue'
 import { useRoute, useRouter } from 'vue-router'
 import { getVideos, registerVideo, unregisterVideo, processVideo } from '@/api/client'
+import FaceTraceTimeline from '@/components/FaceTraceTimeline.vue'

 const route = useRoute()
 const router = useRouter()
@@ -310,6 +319,11 @@ async function handleProcess() {
  }
 }

+function handleTraceSelect(traceId: number) {
+  // Navigate to face candidates filtered by this trace
+  router.push(`/faces/candidates?trace_id=${traceId}&file_uuid=${uuid}`)
+}
+
 async function loadVideoDetail() {
  loading.value = true
  try {
--- a/scripts/embeddinggemma_server.py
+++ b/scripts/embeddinggemma_server.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+"""EmbeddingGemma HTTP server - Metal GPU (MPS) accelerated, compatible with M4/M5."""
+
+import argparse, json, time, torch
+from flask import Flask, request, jsonify
+from transformers import AutoModel, AutoTokenizer
+import numpy as np
+
+app = Flask(__name__)
+
+MODEL = None
+TOKENIZER = None
+DEVICE = None
+
+def load_model(model_path: str = "google/embeddinggemma-300m"):
+    global MODEL, TOKENIZER, DEVICE
+    if MODEL is not None:
+        return
+    DEVICE = "mps" if torch.backends.mps.is_available() else "cpu"
+    dtype = torch.float32
+    print(f"[EmbeddingGemma] Loading model on {DEVICE} (dtype={dtype})...")
+    t0 = time.time()
+    MODEL = AutoModel.from_pretrained(
+        model_path,
+        torch_dtype=dtype,
+        trust_remote_code=True,
+    ).eval().to(DEVICE)
+    TOKENIZER = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    print(f"[EmbeddingGemma] Loaded in {time.time()-t0:.1f}s on {DEVICE}")
+
+def embed(texts: list[str]) -> list[list[float]]:
+    inputs = TOKENIZER(texts, padding=True, truncation=True, return_tensors="pt").to(DEVICE)
+    with torch.no_grad():
+        outputs = MODEL(**inputs)
+        mask = inputs["attention_mask"].unsqueeze(-1).to(outputs.last_hidden_state.dtype)
+        pooled = (outputs.last_hidden_state * mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
+        pooled = torch.nn.functional.normalize(pooled, p=2, dim=1)
+    return pooled.cpu().numpy().tolist()
+
+@app.route("/v1/embeddings", methods=["POST"])
+def embeddings():
+    data = request.get_json()
+    texts = data.get("input", [])
+    if isinstance(texts, str):
+        texts = [texts]
+    if not texts:
+        return jsonify({"error": "empty input"}), 400
+    try:
+        emb = embed(texts)
+        result = {
+            "data": [{"embedding": e, "index": i} for i, e in enumerate(emb)],
+            "model": "embeddinggemma-300m",
+        }
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+@app.route("/health", methods=["GET"])
+def health():
+    return jsonify({"status": "ok", "device": str(DEVICE)})
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--port", type=int, default=11436)
+    parser.add_argument("--model", type=str, default="google/embeddinggemma-300m")
+    args = parser.parse_args()
+    load_model(args.model)
+    app.run(host="0.0.0.0", port=args.port, threaded=True)