Files
momentry_core/scripts/coreml_embed_server.py
Accusys 39ba5ddf76 feat: Phase 1 handover - schema migration, correction mechanism, API fixes
Schema changes: dev.chunks->dev.chunk, remove old_chunk_id/chunk_index
Correction: asr-1.json format, generate/apply scripts
API: 37/37 endpoints fixed and tested
Docs: HANDOVER_V2.0.md for M4
2026-05-11 07:03:22 +08:00

79 lines
3.0 KiB
Python
Executable File

"""
Simple Flask-like HTTP server for CoreML ANE embedding inference.
Replaces /api/embeddings endpoint that comic_embed.rs calls.
"""
import json, os, argparse
from http.server import HTTPServer, BaseHTTPRequestHandler
import numpy as np
from transformers import AutoTokenizer
# Global model
MODEL = None
TOKENIZER = None
MODEL_PATH = "/Users/accusys/models/mxbai-embed-large-v1.mlpackage"
class EmbeddingHandler(BaseHTTPRequestHandler):
def do_POST(self):
if self.path == "/api/embeddings":
length = int(self.headers.get("Content-Length", 0))
body = self.read(length)
try:
data = json.loads(body)
prompt = data.get("prompt", "")
# Strip search_document: or search_query: prefix
if prompt.startswith("search_document: "):
prompt = prompt[17:]
elif prompt.startswith("search_query: "):
prompt = prompt[14:]
tokens = TOKENIZER(prompt, return_tensors="np", padding="max_length", truncation=True, max_length=512)
input_ids = tokens["input_ids"].astype(np.int32)
attention_mask = tokens["attention_mask"].astype(np.int32)
result = MODEL.predict({"input_ids": input_ids, "attention_mask": attention_mask})
embedding = result["embedding"][0].tolist()
resp = json.dumps({"embedding": embedding}).encode()
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(resp)
except Exception as e:
resp = json.dumps({"error": str(e)}).encode()
self.send_response(500)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(resp)
else:
self.send_response(404)
self.end_headers()
def read(self, length):
return self.rfile.read(length)
def main():
global MODEL, TOKENIZER
parser = argparse.ArgumentParser()
parser.add_argument("--port", type=int, default=11435)
parser.add_argument("--model", default=MODEL_PATH)
args = parser.parse_args()
import coremltools as ct
print(f"Loading CoreML model from {args.model}...")
MODEL = ct.models.MLModel(args.model, compute_units=ct.ComputeUnit.ALL)
print(f"Model loaded (compute: {MODEL.compute_unit})")
print("Loading tokenizer...")
TOKENIZER = AutoTokenizer.from_pretrained("mixedbread-ai/mxbai-embed-large-v1")
print("Tokenizer loaded")
server = HTTPServer(("127.0.0.1", args.port), EmbeddingHandler)
print(f"ANE Embedding server running on port {args.port}")
print(f"API: POST http://127.0.0.1:{args.port}/api/embeddings")
print(f" Body: {{\"model\": \"...\", \"prompt\": \"...\"}}")
print(f" Response: {{\"embedding\": [...]}}")
server.serve_forever()
if __name__ == "__main__":
main()