796 lines
37 KiB
HTML
796 lines
37 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<title>08 Media - Momentry API Docs</title>
|
||
<style>
|
||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #f5f5f5; color: #333; padding: 40px; }
|
||
.container { max-width: 960px; margin: 0 auto; background: white; border-radius: 12px; box-shadow: 0 2px 12px rgba(0,0,0,0.08); padding: 40px; }
|
||
h1 { font-size: 24px; margin: 24px 0 12px; }
|
||
h2 { font-size: 20px; margin: 20px 0 10px; color: #222; }
|
||
h3 { font-size: 16px; margin: 16px 0 8px; color: #444; }
|
||
p { line-height: 1.6; margin: 8px 0; }
|
||
table { border-collapse: collapse; width: 100%; margin: 12px 0; font-size: 14px; }
|
||
th, td { border: 1px solid #ddd; padding: 8px 12px; text-align: left; }
|
||
th { background: #f0f0f0; font-weight: 600; }
|
||
code { background: #f0f0f0; padding: 2px 6px; border-radius: 3px; font-size: 13px; }
|
||
pre { background: #f8f8f8; border: 1px solid #ddd; border-radius: 6px; padding: 12px; overflow-x: auto; margin: 12px 0; }
|
||
pre code { background: none; padding: 0; }
|
||
a { color: #0066cc; }
|
||
.back { display: inline-block; margin-bottom: 20px; color: #666; }
|
||
.back:hover { color: #333; }
|
||
.topbar { display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px; }
|
||
.logout-btn { font-size: 13px; color: #999; text-decoration: none; }
|
||
.logout-btn:hover { color: #cc0000; }
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<div class="container">
|
||
<div class="topbar">
|
||
<a class="back" href="index.html">← Back to index</a>
|
||
<a class="logout-btn" href="#" onclick="fetch('/api/v1/auth/logout',{method:'POST'}).then(()=>window.location.reload());return false">Logout</a>
|
||
</div>
|
||
<!-- module: media -->
|
||
<!-- description: Video streaming & frame extraction -->
|
||
<!-- depends: 01_auth -->
|
||
|
||
<h2>Video Streaming & Frame Extraction</h2>
|
||
<p>All video streaming endpoints support the following common query parameters:</p>
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>Field</th>
|
||
<th>Type</th>
|
||
<th>Required</th>
|
||
<th>Default</th>
|
||
<th>Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>mode</code></td>
|
||
<td>string</td>
|
||
<td>No</td>
|
||
<td><code>normal</code></td>
|
||
<td><code>normal</code> or <code>debug</code> (draws detection overlays)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>audio</code></td>
|
||
<td>string</td>
|
||
<td>No</td>
|
||
<td><code>on</code></td>
|
||
<td><code>on</code> or <code>off</code></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<hr />
|
||
<h3><code>GET /api/v1/file/:file_uuid/video</code></h3>
|
||
<p>Stream the full video file with range support for seeking.</p>
|
||
<p><strong>Auth</strong>: Required
|
||
<strong>Scope</strong>: file-level</p>
|
||
<h4>Response</h4>
|
||
<ul>
|
||
<li><strong>200</strong>: Video stream (<code>Content-Type</code> based on file extension)</li>
|
||
<li><strong>206</strong>: Partial content (range request)</li>
|
||
<li>Supports <code>Range</code> header for seeking</li>
|
||
</ul>
|
||
<hr />
|
||
<h3><code>GET /api/v1/file/:file_uuid/trace/:trace_id/video</code></h3>
|
||
<p>Stream video with highlights for a specific face trace (follows a single person across frames with bounding box overlay).</p>
|
||
<p><strong>Auth</strong>: Required
|
||
<strong>Scope</strong>: file-level</p>
|
||
<hr />
|
||
<h3><code>GET /api/v1/file/:file_uuid/trace/:trace_id/representative-face</code></h3>
|
||
<p>Find the best single face to represent this trace. Uses a two-stage selection: SQL (area × confidence → top 10) then FFmpeg <code>blurdetect</code> (sharpness → pick the least blurry).</p>
|
||
<p><strong>Auth</strong>: Required
|
||
<strong>Scope</strong>: file-level</p>
|
||
<h4>Example</h4>
|
||
<div class="codehilite"><pre><span></span><code>curl<span class="w"> </span>-s<span class="w"> </span><span class="s2">"</span><span class="nv">$API</span><span class="s2">/api/v1/file/</span><span class="nv">$FILE_UUID</span><span class="s2">/trace/1939/representative-face"</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-H<span class="w"> </span><span class="s2">"X-API-Key: </span><span class="nv">$KEY</span><span class="s2">"</span>
|
||
</code></pre></div>
|
||
|
||
<h4>Response (200)</h4>
|
||
<div class="codehilite"><pre><span></span><code><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"success"</span><span class="p">:</span><span class="w"> </span><span class="kc">true</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"file_uuid"</span><span class="p">:</span><span class="w"> </span><span class="s2">"aeed71342a899fe4b4c57b7d41bcb692"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"trace_id"</span><span class="p">:</span><span class="w"> </span><span class="mi">1939</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"face_count"</span><span class="p">:</span><span class="w"> </span><span class="mi">538</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"representative"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"frame_number"</span><span class="p">:</span><span class="w"> </span><span class="mi">68193</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"timestamp_secs"</span><span class="p">:</span><span class="w"> </span><span class="mf">2727.72</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"bbox"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nt">"x"</span><span class="p">:</span><span class="w"> </span><span class="mi">347</span><span class="p">,</span><span class="w"> </span><span class="nt">"y"</span><span class="p">:</span><span class="w"> </span><span class="mi">378</span><span class="p">,</span><span class="w"> </span><span class="nt">"width"</span><span class="p">:</span><span class="w"> </span><span class="mi">427</span><span class="p">,</span><span class="w"> </span><span class="nt">"height"</span><span class="p">:</span><span class="w"> </span><span class="mi">427</span><span class="w"> </span><span class="p">},</span>
|
||
<span class="w"> </span><span class="nt">"confidence"</span><span class="p">:</span><span class="w"> </span><span class="mf">0.760</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"quality_score"</span><span class="p">:</span><span class="w"> </span><span class="mi">138516</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"blur_score"</span><span class="p">:</span><span class="w"> </span><span class="mf">9.46</span>
|
||
<span class="w"> </span><span class="p">}</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
|
||
<h4>Response Fields</h4>
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>Field</th>
|
||
<th>Type</th>
|
||
<th>Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>trace_id</code></td>
|
||
<td>integer</td>
|
||
<td>Face trace ID</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>face_count</code></td>
|
||
<td>integer</td>
|
||
<td>Total face detections in this trace</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>representative.frame_number</code></td>
|
||
<td>integer</td>
|
||
<td>Frame number of the selected face (primary coordinate)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>representative.timestamp_secs</code></td>
|
||
<td>float</td>
|
||
<td>Time in seconds (derived from <code>frame_number / fps</code>)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>representative.bbox</code></td>
|
||
<td>object</td>
|
||
<td>Bounding box <code>{x, y, width, height}</code></td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>representative.confidence</code></td>
|
||
<td>float</td>
|
||
<td>Detection confidence (0.0–1.0)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>representative.quality_score</code></td>
|
||
<td>float</td>
|
||
<td>Pre-selection score (<code>area × confidence</code>)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>representative.blur_score</code></td>
|
||
<td>float</td>
|
||
<td>FFmpeg blurdetect result (lower = sharper)</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<h4>Error Responses</h4>
|
||
<hr />
|
||
<h3><code>GET /api/v1/file/:file_uuid/trace/:trace_id/thumbnail</code></h3>
|
||
<p>Extract the best face image for a trace as JPEG (320×320). Internally selects the face using the same two-stage algorithm as <code>representative-face</code>, then crops via FFmpeg. The result is cacheable for 24 hours.</p>
|
||
<p><strong>Auth</strong>: Required
|
||
<strong>Scope</strong>: file-level</p>
|
||
<h4>Example</h4>
|
||
<div class="codehilite"><pre><span></span><code>curl<span class="w"> </span>-s<span class="w"> </span><span class="s2">"</span><span class="nv">$API</span><span class="s2">/api/v1/file/</span><span class="nv">$FILE_UUID</span><span class="s2">/trace/1939/thumbnail"</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-H<span class="w"> </span><span class="s2">"X-API-Key: </span><span class="nv">$KEY</span><span class="s2">"</span><span class="w"> </span>-o<span class="w"> </span>trace_1939_face.jpg
|
||
</code></pre></div>
|
||
|
||
<h4>Response</h4>
|
||
<ul>
|
||
<li><strong>200</strong>: <code>image/jpeg</code> binary data (320×320 cropped face)</li>
|
||
<li><strong>404</strong>: File, trace not found, or no suitable face</li>
|
||
<li><strong>500</strong>: FFmpeg or database error</li>
|
||
</ul>
|
||
<hr />
|
||
<h3><code>GET /api/v1/file/:file_uuid/identities/:identity_uuid_a/co-occur-with/:identity_uuid_b</code></h3>
|
||
<p>Find the first frame where two identities appear together, with representative face thumbnails for both.</p>
|
||
<p><strong>Auth</strong>: Required
|
||
<strong>Scope</strong>: file-level</p>
|
||
<h4>Example</h4>
|
||
<div class="codehilite"><pre><span></span><code><span class="c1"># Audrey Hepburn & Cary Grant 第一次同框</span>
|
||
curl<span class="w"> </span>-s<span class="w"> </span><span class="s2">"</span><span class="nv">$API</span><span class="s2">/api/v1/file/</span><span class="nv">$FILE_UUID</span><span class="s2">/identities/</span><span class="nv">$AUDREY_UUID</span><span class="s2">/co-occur-with/</span><span class="nv">$CARY_UUID</span><span class="s2">"</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-H<span class="w"> </span><span class="s2">"X-API-Key: </span><span class="nv">$KEY</span><span class="s2">"</span><span class="w"> </span><span class="p">|</span><span class="w"> </span>jq<span class="w"> </span><span class="s1">'{identity_a: .identity_a.name, identity_b: .identity_b.name, first_frame: .first_cooccurrence.frame_number}'</span>
|
||
</code></pre></div>
|
||
|
||
<h4>Response (200)</h4>
|
||
<div class="codehilite"><pre><span></span><code><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"success"</span><span class="p">:</span><span class="w"> </span><span class="kc">true</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"file_uuid"</span><span class="p">:</span><span class="w"> </span><span class="s2">"aeed71342a899fe4b4c57b7d41bcb692"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"identity_a"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"identity_uuid"</span><span class="p">:</span><span class="w"> </span><span class="s2">"c3545906-c82d-4b66-aa1d-150bc02decce"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Audrey Hepburn"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"trace_id"</span><span class="p">:</span><span class="w"> </span><span class="mi">920</span>
|
||
<span class="w"> </span><span class="p">},</span>
|
||
<span class="w"> </span><span class="nt">"identity_b"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"identity_uuid"</span><span class="p">:</span><span class="w"> </span><span class="s2">"2b0ddefe-e2a9-4533-9308-b375594604d5"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Cary Grant"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"trace_id"</span><span class="p">:</span><span class="w"> </span><span class="mi">919</span>
|
||
<span class="w"> </span><span class="p">},</span>
|
||
<span class="w"> </span><span class="nt">"first_cooccurrence"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"frame_number"</span><span class="p">:</span><span class="w"> </span><span class="mi">38165</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"timestamp_secs"</span><span class="p">:</span><span class="w"> </span><span class="mf">1526.60</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"total_cooccurrence_frames"</span><span class="p">:</span><span class="w"> </span><span class="mi">3136</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"representative_face_a"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"frame_number"</span><span class="p">:</span><span class="w"> </span><span class="mi">38199</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"bbox"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nt">"x"</span><span class="p">:</span><span class="w"> </span><span class="mi">122</span><span class="p">,</span><span class="w"> </span><span class="nt">"y"</span><span class="p">:</span><span class="w"> </span><span class="mi">339</span><span class="p">,</span><span class="w"> </span><span class="nt">"width"</span><span class="p">:</span><span class="w"> </span><span class="mi">176</span><span class="p">,</span><span class="w"> </span><span class="nt">"height"</span><span class="p">:</span><span class="w"> </span><span class="mi">176</span><span class="w"> </span><span class="p">},</span>
|
||
<span class="w"> </span><span class="nt">"confidence"</span><span class="p">:</span><span class="w"> </span><span class="mf">0.832</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"thumbnail_url"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/api/v1/file/aeed71342.../trace/920/thumbnail"</span>
|
||
<span class="w"> </span><span class="p">},</span>
|
||
<span class="w"> </span><span class="nt">"representative_face_b"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"frame_number"</span><span class="p">:</span><span class="w"> </span><span class="mi">38291</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"bbox"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nt">"x"</span><span class="p">:</span><span class="w"> </span><span class="mi">511</span><span class="p">,</span><span class="w"> </span><span class="nt">"y"</span><span class="p">:</span><span class="w"> </span><span class="mi">315</span><span class="p">,</span><span class="w"> </span><span class="nt">"width"</span><span class="p">:</span><span class="w"> </span><span class="mi">192</span><span class="p">,</span><span class="w"> </span><span class="nt">"height"</span><span class="p">:</span><span class="w"> </span><span class="mi">192</span><span class="w"> </span><span class="p">},</span>
|
||
<span class="w"> </span><span class="nt">"confidence"</span><span class="p">:</span><span class="w"> </span><span class="mf">0.791</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"thumbnail_url"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/api/v1/file/aeed71342.../trace/919/thumbnail"</span>
|
||
<span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="p">}</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
|
||
<h4>Response Fields</h4>
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>Field</th>
|
||
<th>Type</th>
|
||
<th>Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>identity_a.name</code></td>
|
||
<td>string</td>
|
||
<td>First identity name</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>identity_b.name</code></td>
|
||
<td>string</td>
|
||
<td>Second identity name</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>first_cooccurrence.frame_number</code></td>
|
||
<td>int</td>
|
||
<td>First frame where both appear</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>first_cooccurrence.timestamp_secs</code></td>
|
||
<td>float</td>
|
||
<td>Time in seconds</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>first_cooccurrence.total_cooccurrence_frames</code></td>
|
||
<td>int</td>
|
||
<td>Total frames with both present</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>first_cooccurrence.representative_face_a/b</code></td>
|
||
<td>object</td>
|
||
<td>Best face thumbnail data for each identity</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<h4>Error Responses</h4>
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>HTTP</th>
|
||
<th>When</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>404</code></td>
|
||
<td>File or identity not found</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>404</code></td>
|
||
<td>The two identities never co-occur in this file</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>500</code></td>
|
||
<td>Database or FFmpeg error</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<h3><code>GET /api/v1/file/:file_uuid/video/bbox</code></h3>
|
||
<p>Stream video with bounding box overlay for all detected objects/faces.</p>
|
||
<p><strong>Auth</strong>: Required
|
||
<strong>Scope</strong>: file-level</p>
|
||
<p>Uses a built-in 5×7 bitmap font renderer to draw labels directly on video frames via FFmpeg <code>drawtext</code> filter.</p>
|
||
<hr />
|
||
<h3><code>GET /api/v1/file/:file_uuid/thumbnail</code></h3>
|
||
<p>Extract a single frame from a video as JPEG image. Uses FFmpeg <code>select</code> filter.</p>
|
||
<p>When <code>frame</code> is omitted, the system automatically selects the best representative frame using the TKG bridge (see algorithm below).</p>
|
||
<p><strong>Auth</strong>: Required
|
||
<strong>Scope</strong>: file-level</p>
|
||
<h4>Query Parameters</h4>
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>Field</th>
|
||
<th>Type</th>
|
||
<th>Required</th>
|
||
<th>Default</th>
|
||
<th>Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>frame</code></td>
|
||
<td>integer</td>
|
||
<td>No</td>
|
||
<td>auto-detect</td>
|
||
<td>Zero-based frame number to extract. Omit for auto-detect.</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>x</code></td>
|
||
<td>integer</td>
|
||
<td>No</td>
|
||
<td>—</td>
|
||
<td>Crop start X (left edge). Requires <code>y</code>, <code>w</code>, <code>h</code>.</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>y</code></td>
|
||
<td>integer</td>
|
||
<td>No</td>
|
||
<td>—</td>
|
||
<td>Crop start Y (top edge). Requires <code>x</code>, <code>w</code>, <code>h</code>.</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>w</code></td>
|
||
<td>integer</td>
|
||
<td>No</td>
|
||
<td>—</td>
|
||
<td>Crop width in pixels. Requires <code>x</code>, <code>y</code>, <code>h</code>.</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>h</code></td>
|
||
<td>integer</td>
|
||
<td>No</td>
|
||
<td>—</td>
|
||
<td>Crop height in pixels. Requires <code>x</code>, <code>y</code>, <code>w</code>.</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>All four crop params (<code>x</code>, <code>y</code>, <code>w</code>, <code>h</code>) must be provided together or omitted.</p>
|
||
<h4>Auto-detect Algorithm</h4>
|
||
<p>When <code>frame</code> is not provided, the endpoint finds the best frame using this fallback chain:</p>
|
||
<ol>
|
||
<li><strong>Main characters</strong>: find the two identities with the most face detections (TMDb source)</li>
|
||
<li><strong>Mutual gaze</strong>: if their face traces have a TKG <code>CO_OCCURS_WITH</code> edge with <code>mutual_gaze=true</code>, take <code>first_frame</code></li>
|
||
<li><strong>Co-occurrence</strong>: fallback to the first frame where both identities appear together</li>
|
||
<li><strong>Single identity</strong>: if only one main identity exists, take its highest-quality face frame</li>
|
||
<li><strong>Any identity</strong>: fallback to the best-quality face frame across all identities</li>
|
||
<li><strong>Error</strong>: if no face exists, returns <code>404</code></li>
|
||
</ol>
|
||
<p>The selected frame is constrained to the <strong>first half of the video</strong> (<code>total_frames / 2</code>).</p>
|
||
<h4>Examples</h4>
|
||
<div class="codehilite"><pre><span></span><code><span class="c1"># Auto-detect best representative frame</span>
|
||
curl<span class="w"> </span>-s<span class="w"> </span><span class="s2">"</span><span class="nv">$API</span><span class="s2">/api/v1/file/</span><span class="nv">$FILE_UUID</span><span class="s2">/thumbnail"</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-H<span class="w"> </span><span class="s2">"X-API-Key: </span><span class="nv">$KEY</span><span class="s2">"</span><span class="w"> </span>-o<span class="w"> </span>representative.jpg
|
||
|
||
<span class="c1"># Extract frame 1000 (full frame)</span>
|
||
curl<span class="w"> </span>-s<span class="w"> </span><span class="s2">"</span><span class="nv">$API</span><span class="s2">/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/thumbnail?frame=1000"</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer </span><span class="nv">$JWT</span><span class="s2">"</span><span class="w"> </span>-o<span class="w"> </span>frame_1000.jpg
|
||
|
||
<span class="c1"># Extract and crop face region (x=320, y=240, w=160, h=160)</span>
|
||
curl<span class="w"> </span>-s<span class="w"> </span><span class="s2">"</span><span class="nv">$API</span><span class="s2">/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/thumbnail?frame=1000&x=320&y=240&w=160&h=160"</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer </span><span class="nv">$JWT</span><span class="s2">"</span><span class="w"> </span>-o<span class="w"> </span>face_crop.jpg
|
||
</code></pre></div>
|
||
|
||
<h4>Response</h4>
|
||
<ul>
|
||
<li><strong>200</strong>: <code>image/jpeg</code> binary data</li>
|
||
<li><strong>404</strong>: File not found / No faces in file (auto-detect)</li>
|
||
<li><strong>500</strong>: FFmpeg error (e.g., frame number exceeds video duration)</li>
|
||
</ul>
|
||
<h4>Technical Details</h4>
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>Detail</th>
|
||
<th>Value</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><strong>Backend</strong></td>
|
||
<td>FFmpeg (<code>ffmpeg-full</code>)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Filter</strong></td>
|
||
<td><code>select=eq(n\,FRAME)</code> to select frame, optional <code>crop=W:H:X:Y</code></td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Output</strong></td>
|
||
<td>Single JPEG via pipe (<code>image2pipe</code>, <code>mjpeg</code> codec)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Cache</strong></td>
|
||
<td><code>Cache-Control: public, max-age=86400</code> (24h)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Frame number</strong></td>
|
||
<td>Zero-based (<code>frame=0</code> = first frame of video)</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<hr />
|
||
<h3><code>GET /api/v1/file/:file_uuid/representative-frame</code></h3>
|
||
<p>Return JSON metadata about the best representative frame for the video. Uses the same auto-detect algorithm as <code>GET /thumbnail</code> (without crop support).</p>
|
||
<p><strong>Auth</strong>: Required
|
||
<strong>Scope</strong>: file-level</p>
|
||
<h4>Example</h4>
|
||
<div class="codehilite"><pre><span></span><code>curl<span class="w"> </span>-s<span class="w"> </span><span class="s2">"</span><span class="nv">$API</span><span class="s2">/api/v1/file/</span><span class="nv">$FILE_UUID</span><span class="s2">/representative-frame"</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-H<span class="w"> </span><span class="s2">"X-API-Key: </span><span class="nv">$KEY</span><span class="s2">"</span><span class="w"> </span><span class="p">|</span><span class="w"> </span>jq<span class="w"> </span><span class="s1">'.'</span>
|
||
</code></pre></div>
|
||
|
||
<h4>Response (200)</h4>
|
||
<div class="codehilite"><pre><span></span><code><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"success"</span><span class="p">:</span><span class="w"> </span><span class="kc">true</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"file_uuid"</span><span class="p">:</span><span class="w"> </span><span class="s2">"aeed71342a899fe4b4c57b7d41bcb692"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"frame_number"</span><span class="p">:</span><span class="w"> </span><span class="mi">38165</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"timestamp_secs"</span><span class="p">:</span><span class="w"> </span><span class="mf">1526.6</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"face_quality"</span><span class="p">:</span><span class="w"> </span><span class="mf">37292.97</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"main_identities"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||
<span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"identity_uuid"</span><span class="p">:</span><span class="w"> </span><span class="s2">"c3545906-c82d-4b66-aa1d-150bc02decce"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Audrey Hepburn"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"face_count"</span><span class="p">:</span><span class="w"> </span><span class="mi">16456</span>
|
||
<span class="w"> </span><span class="p">},</span>
|
||
<span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"identity_uuid"</span><span class="p">:</span><span class="w"> </span><span class="s2">"2b0ddefe-e2a9-4533-9308-b375594604d5"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Cary Grant"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"face_count"</span><span class="p">:</span><span class="w"> </span><span class="mi">10643</span>
|
||
<span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="p">],</span>
|
||
<span class="w"> </span><span class="nt">"traces"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||
<span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"trace_id"</span><span class="p">:</span><span class="w"> </span><span class="mi">919</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"identity_uuid"</span><span class="p">:</span><span class="w"> </span><span class="s2">"2b0ddefe-e2a9-4533-9308-b375594604d5"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Cary Grant"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"x"</span><span class="p">:</span><span class="w"> </span><span class="mi">764</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"y"</span><span class="p">:</span><span class="w"> </span><span class="mi">237</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"width"</span><span class="p">:</span><span class="w"> </span><span class="mi">199</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"height"</span><span class="p">:</span><span class="w"> </span><span class="mi">199</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"confidence"</span><span class="p">:</span><span class="w"> </span><span class="mf">0.8426</span>
|
||
<span class="w"> </span><span class="p">},</span>
|
||
<span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"trace_id"</span><span class="p">:</span><span class="w"> </span><span class="mi">920</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"identity_uuid"</span><span class="p">:</span><span class="w"> </span><span class="s2">"c3545906-c82d-4b66-aa1d-150bc02decce"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Audrey Hepburn"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"x"</span><span class="p">:</span><span class="w"> </span><span class="mi">1143</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"y"</span><span class="p">:</span><span class="w"> </span><span class="mi">312</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"width"</span><span class="p">:</span><span class="w"> </span><span class="mi">215</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"height"</span><span class="p">:</span><span class="w"> </span><span class="mi">215</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"confidence"</span><span class="p">:</span><span class="w"> </span><span class="mf">0.8068</span>
|
||
<span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="p">]</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
|
||
<h4>Response Fields</h4>
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>Field</th>
|
||
<th>Type</th>
|
||
<th>Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>frame_number</code></td>
|
||
<td>integer</td>
|
||
<td>Selected representative frame number (primary coordinate)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>timestamp_secs</code></td>
|
||
<td>float</td>
|
||
<td>Time in seconds (derived from <code>frame_number / fps</code>)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>face_quality</code></td>
|
||
<td>float</td>
|
||
<td>Quality score <code>area × confidence</code> of the best face at this frame</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>main_identities</code></td>
|
||
<td>array</td>
|
||
<td>Top 2 most frequent TMDb identities in the file</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>main_identities[].name</code></td>
|
||
<td>string</td>
|
||
<td>Identity display name</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>main_identities[].face_count</code></td>
|
||
<td>integer</td>
|
||
<td>Total face detections count</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>traces</code></td>
|
||
<td>array</td>
|
||
<td>All face traces present at the selected frame</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>traces[].trace_id</code></td>
|
||
<td>integer</td>
|
||
<td>Face trace ID</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>traces[].identity_uuid</code></td>
|
||
<td>string or null</td>
|
||
<td>Matched identity UUID</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>traces[].name</code></td>
|
||
<td>string or null</td>
|
||
<td>Identity name</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>traces[].x, y, width, height</code></td>
|
||
<td>integer</td>
|
||
<td>Bounding box coordinates</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>traces[].confidence</code></td>
|
||
<td>float</td>
|
||
<td>Detection confidence (0.0–1.0)</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<h4>Error Responses</h4>
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>HTTP</th>
|
||
<th>When</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>404</code></td>
|
||
<td>File not found / No faces in file</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>500</code></td>
|
||
<td>Database error</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>Extract a video clip (time range) as MPEG-TS stream. Uses FFmpeg <code>-ss</code> fast seek.</p>
|
||
<p><strong>Auth</strong>: Required
|
||
<strong>Scope</strong>: file-level</p>
|
||
<h4>Query Parameters</h4>
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>Field</th>
|
||
<th>Type</th>
|
||
<th>Required</th>
|
||
<th>Default</th>
|
||
<th>Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>start_frame</code></td>
|
||
<td>integer</td>
|
||
<td>No*</td>
|
||
<td>—</td>
|
||
<td>Start frame (zero-based). <strong>Frame-accurate</strong> — use this for precision.</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>end_frame</code></td>
|
||
<td>integer</td>
|
||
<td>No*</td>
|
||
<td>—</td>
|
||
<td>End frame (zero-based, inclusive). Requires <code>start_frame</code>.</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>start_time</code></td>
|
||
<td>float</td>
|
||
<td>No*</td>
|
||
<td>—</td>
|
||
<td>Start time in seconds. Approximate (FPS-dependent). Fallback if frames not given.</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>end_time</code></td>
|
||
<td>float</td>
|
||
<td>No*</td>
|
||
<td>—</td>
|
||
<td>End time in seconds. Approximate (FPS-dependent). Fallback if frames not given.</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>fps</code></td>
|
||
<td>float</td>
|
||
<td>No</td>
|
||
<td>video FPS</td>
|
||
<td>Override frames-per-second for frame↔time calculation. Defaults to video's detected FPS.</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>mode</code></td>
|
||
<td>string</td>
|
||
<td>No</td>
|
||
<td><code>normal</code></td>
|
||
<td><code>normal</code> or <code>debug</code> (draws "CLIP" overlay)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>audio</code></td>
|
||
<td>string</td>
|
||
<td>No</td>
|
||
<td><code>on</code></td>
|
||
<td><code>on</code> or <code>off</code></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>Either (<code>start_frame</code>+<code>end_frame</code>) OR (<code>start_time</code>+<code>end_time</code>) must be provided.</p>
|
||
<h4>Example</h4>
|
||
<div class="codehilite"><pre><span></span><code><span class="c1"># Clip by frame range (primary)</span>
|
||
curl<span class="w"> </span>-s<span class="w"> </span><span class="s2">"</span><span class="nv">$API</span><span class="s2">/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/clip?start_frame=0&end_frame=47"</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer </span><span class="nv">$JWT</span><span class="s2">"</span><span class="w"> </span>-o<span class="w"> </span>clip.ts
|
||
|
||
<span class="c1"># Clip by time range (fallback)</span>
|
||
curl<span class="w"> </span>-s<span class="w"> </span><span class="s2">"</span><span class="nv">$API</span><span class="s2">/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/clip?start_time=30&end_time=45"</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer </span><span class="nv">$JWT</span><span class="s2">"</span><span class="w"> </span>-o<span class="w"> </span>clip.ts
|
||
</code></pre></div>
|
||
|
||
<h4>Response</h4>
|
||
<ul>
|
||
<li><strong>200</strong>: <code>video/mp2t</code> MPEG-TS stream</li>
|
||
<li><strong>400</strong>: Missing/invalid range parameters</li>
|
||
<li><strong>404</strong>: File not found</li>
|
||
<li><strong>500</strong>: FFmpeg error</li>
|
||
</ul>
|
||
<h4>Technical Notes</h4>
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>Detail</th>
|
||
<th>Value</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><strong>Backend</strong></td>
|
||
<td>FFmpeg (<code>ffmpeg-full</code>)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Seek</strong></td>
|
||
<td><code>-ss</code> before <code>-i</code> (fast keyframe seek)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Format</strong></td>
|
||
<td>MPEG-TS (<code>mpegts</code> muxer, pipe-safe)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Codec</strong></td>
|
||
<td>H.264 + AAC</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Cache</strong></td>
|
||
<td><code>Cache-Control: public, max-age=86400</code> (24h)</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<h3>Video vs Clip: Quality & Format Comparison</h3>
|
||
<p>Both endpoints support time range extraction, but serve different use cases:</p>
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>Feature</th>
|
||
<th><code>/video</code></th>
|
||
<th><code>/clip</code></th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><strong>No params</strong></td>
|
||
<td>Streams full file (Range seek)</td>
|
||
<td>Returns 400 (params required)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>HTTP Range</strong></td>
|
||
<td>✅ Supported</td>
|
||
<td>❌ Not supported</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Encoding</strong></td>
|
||
<td><code>-c copy</code> (zero encoding)</td>
|
||
<td><code>-c:v libx264 -c:a aac</code> (re-encode)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Quality</strong></td>
|
||
<td>Original (bit-exact, zero loss)</td>
|
||
<td>Compressed (default CRF ≈ 23)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Format</strong></td>
|
||
<td><code>video/mp4</code></td>
|
||
<td><code>video/mp2t</code> (MPEG-TS)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Speed</strong></td>
|
||
<td>Fast (no computation)</td>
|
||
<td>Slower (encoding required)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Frame control</strong></td>
|
||
<td>Time-based (<code>dur = (ef-sf)/fps</code>)</td>
|
||
<td>Precise (<code>-vframes</code>)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Debug mode</strong></td>
|
||
<td>❌</td>
|
||
<td>✅ <code>mode=debug</code> overlay</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Cache</strong></td>
|
||
<td>❌</td>
|
||
<td>✅ <code>max-age=86400</code></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<h4>Usage Recommendation</h4>
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>Scenario</th>
|
||
<th>Use</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td>Full video streaming / player seek</td>
|
||
<td><code>/video</code></td>
|
||
</tr>
|
||
<tr>
|
||
<td>Quick preview clip (zero quality loss)</td>
|
||
<td><code>/video?start_frame=...&end_frame=...</code></td>
|
||
</tr>
|
||
<tr>
|
||
<td>Debug frame verification / text overlay</td>
|
||
<td><code>/clip?mode=debug</code></td>
|
||
</tr>
|
||
<tr>
|
||
<td>Precise frame count control</td>
|
||
<td><code>/clip</code></td>
|
||
</tr>
|
||
<tr>
|
||
<td>CDN cacheable clip</td>
|
||
<td><code>/clip</code></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<hr />
|
||
<table class="table">
|
||
<thead>
|
||
<tr>
|
||
<th>Detail</th>
|
||
<th>Value</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><strong>Backend</strong></td>
|
||
<td>FFmpeg (<code>ffmpeg-full</code>)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Filter</strong></td>
|
||
<td><code>select=eq(n\,FRAME)</code> to select frame, optional <code>crop=W:H:X:Y</code></td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Output</strong></td>
|
||
<td>Single JPEG via pipe (<code>image2pipe</code>, <code>mjpeg</code> codec)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Cache</strong></td>
|
||
<td><code>Cache-Control: public, max-age=86400</code> (24h)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><strong>Frame number</strong></td>
|
||
<td>Zero-based (<code>frame=0</code> = first frame of video)</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<hr />
|
||
<p><em>Updated: 2026-05-19 12:49:24</em></p>
|
||
</div>
|
||
</body>
|
||
</html> |