""" Simple Flask-like HTTP server for CoreML ANE embedding inference. Replaces /api/embeddings endpoint that comic_embed.rs calls. """ import json, os, argparse from http.server import HTTPServer, BaseHTTPRequestHandler import numpy as np from transformers import AutoTokenizer # Global model MODEL = None TOKENIZER = None MODEL_PATH = "/Users/accusys/models/mxbai-embed-large-v1.mlpackage" class EmbeddingHandler(BaseHTTPRequestHandler): def do_POST(self): if self.path == "/api/embeddings": length = int(self.headers.get("Content-Length", 0)) body = self.read(length) try: data = json.loads(body) prompt = data.get("prompt", "") # Strip search_document: or search_query: prefix if prompt.startswith("search_document: "): prompt = prompt[17:] elif prompt.startswith("search_query: "): prompt = prompt[14:] tokens = TOKENIZER(prompt, return_tensors="np", padding="max_length", truncation=True, max_length=512) input_ids = tokens["input_ids"].astype(np.int32) attention_mask = tokens["attention_mask"].astype(np.int32) result = MODEL.predict({"input_ids": input_ids, "attention_mask": attention_mask}) embedding = result["embedding"][0].tolist() resp = json.dumps({"embedding": embedding}).encode() self.send_response(200) self.send_header("Content-Type", "application/json") self.end_headers() self.wfile.write(resp) except Exception as e: resp = json.dumps({"error": str(e)}).encode() self.send_response(500) self.send_header("Content-Type", "application/json") self.end_headers() self.wfile.write(resp) else: self.send_response(404) self.end_headers() def read(self, length): return self.rfile.read(length) def main(): global MODEL, TOKENIZER parser = argparse.ArgumentParser() parser.add_argument("--port", type=int, default=11435) parser.add_argument("--model", default=MODEL_PATH) args = parser.parse_args() import coremltools as ct print(f"Loading CoreML model from {args.model}...") MODEL = ct.models.MLModel(args.model, compute_units=ct.ComputeUnit.ALL) print(f"Model loaded (compute: {MODEL.compute_unit})") print("Loading tokenizer...") TOKENIZER = AutoTokenizer.from_pretrained("mixedbread-ai/mxbai-embed-large-v1") print("Tokenizer loaded") server = HTTPServer(("127.0.0.1", args.port), EmbeddingHandler) print(f"ANE Embedding server running on port {args.port}") print(f"API: POST http://127.0.0.1:{args.port}/api/embeddings") print(f" Body: {{\"model\": \"...\", \"prompt\": \"...\"}}") print(f" Response: {{\"embedding\": [...]}}") server.serve_forever() if __name__ == "__main__": main()