feat: Phase 1 handover - schema migration, correction mechanism, API fixes
Schema changes: dev.chunks->dev.chunk, remove old_chunk_id/chunk_index Correction: asr-1.json format, generate/apply scripts API: 37/37 endpoints fixed and tested Docs: HANDOVER_V2.0.md for M4
This commit is contained in:
78
scripts/coreml_embed_server.py
Executable file
78
scripts/coreml_embed_server.py
Executable file
@@ -0,0 +1,78 @@
|
||||
"""
|
||||
Simple Flask-like HTTP server for CoreML ANE embedding inference.
|
||||
Replaces /api/embeddings endpoint that comic_embed.rs calls.
|
||||
"""
|
||||
import json, os, argparse
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
import numpy as np
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
# Global model
|
||||
MODEL = None
|
||||
TOKENIZER = None
|
||||
MODEL_PATH = "/Users/accusys/models/mxbai-embed-large-v1.mlpackage"
|
||||
|
||||
class EmbeddingHandler(BaseHTTPRequestHandler):
|
||||
def do_POST(self):
|
||||
if self.path == "/api/embeddings":
|
||||
length = int(self.headers.get("Content-Length", 0))
|
||||
body = self.read(length)
|
||||
try:
|
||||
data = json.loads(body)
|
||||
prompt = data.get("prompt", "")
|
||||
# Strip search_document: or search_query: prefix
|
||||
if prompt.startswith("search_document: "):
|
||||
prompt = prompt[17:]
|
||||
elif prompt.startswith("search_query: "):
|
||||
prompt = prompt[14:]
|
||||
|
||||
tokens = TOKENIZER(prompt, return_tensors="np", padding="max_length", truncation=True, max_length=512)
|
||||
input_ids = tokens["input_ids"].astype(np.int32)
|
||||
attention_mask = tokens["attention_mask"].astype(np.int32)
|
||||
result = MODEL.predict({"input_ids": input_ids, "attention_mask": attention_mask})
|
||||
embedding = result["embedding"][0].tolist()
|
||||
|
||||
resp = json.dumps({"embedding": embedding}).encode()
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(resp)
|
||||
except Exception as e:
|
||||
resp = json.dumps({"error": str(e)}).encode()
|
||||
self.send_response(500)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(resp)
|
||||
else:
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
def read(self, length):
|
||||
return self.rfile.read(length)
|
||||
|
||||
def main():
|
||||
global MODEL, TOKENIZER
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--port", type=int, default=11435)
|
||||
parser.add_argument("--model", default=MODEL_PATH)
|
||||
args = parser.parse_args()
|
||||
|
||||
import coremltools as ct
|
||||
print(f"Loading CoreML model from {args.model}...")
|
||||
MODEL = ct.models.MLModel(args.model, compute_units=ct.ComputeUnit.ALL)
|
||||
print(f"Model loaded (compute: {MODEL.compute_unit})")
|
||||
|
||||
print("Loading tokenizer...")
|
||||
TOKENIZER = AutoTokenizer.from_pretrained("mixedbread-ai/mxbai-embed-large-v1")
|
||||
print("Tokenizer loaded")
|
||||
|
||||
server = HTTPServer(("127.0.0.1", args.port), EmbeddingHandler)
|
||||
print(f"ANE Embedding server running on port {args.port}")
|
||||
print(f"API: POST http://127.0.0.1:{args.port}/api/embeddings")
|
||||
print(f" Body: {{\"model\": \"...\", \"prompt\": \"...\"}}")
|
||||
print(f" Response: {{\"embedding\": [...]}}")
|
||||
server.serve_forever()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user