llama.cpp verification source 2026-05-22
Some checks are pending
Copilot Setup Steps / copilot-setup-steps (push) Waiting to run
Check Pre-Tokenizer Hashes / pre-tokenizer-hashes (push) Waiting to run
Python check requirements.txt / check-requirements (push) Waiting to run
Python Type-Check / python type-check (push) Waiting to run
Update Operations Documentation / update-ops-docs (push) Waiting to run
Some checks are pending
Copilot Setup Steps / copilot-setup-steps (push) Waiting to run
Check Pre-Tokenizer Hashes / pre-tokenizer-hashes (push) Waiting to run
Python check requirements.txt / check-requirements (push) Waiting to run
Python Type-Check / python type-check (push) Waiting to run
Update Operations Documentation / update-ops-docs (push) Waiting to run
This commit is contained in:
60
tools/server/tests/unit/test_compat_gcp.py
Normal file
60
tools/server/tests/unit/test_compat_gcp.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import pytest
|
||||
from utils import *
|
||||
|
||||
server: ServerProcess
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def create_server():
|
||||
global server
|
||||
server = ServerPreset.tinyllama2()
|
||||
server.gcp_compat = True
|
||||
|
||||
|
||||
def test_gcp_predict_camel_case():
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/predict", data={
|
||||
"instances": [
|
||||
{
|
||||
"@requestFormat": "chatCompletions",
|
||||
"max_tokens": 8,
|
||||
"messages": [
|
||||
{"role": "user", "content": "What is the meaning of life?"},
|
||||
],
|
||||
}
|
||||
],
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert "predictions" in res.body
|
||||
assert len(res.body["predictions"]) == 1
|
||||
prediction = res.body["predictions"][0]
|
||||
assert "choices" in prediction
|
||||
assert len(prediction["choices"]) == 1
|
||||
assert prediction["choices"][0]["message"]["role"] == "assistant"
|
||||
assert len(prediction["choices"][0]["message"]["content"]) > 0
|
||||
|
||||
|
||||
def test_gcp_predict_multiple_instances():
|
||||
global server
|
||||
server.n_slots = 2
|
||||
server.start()
|
||||
res = server.make_request("POST", "/predict", data={
|
||||
"instances": [
|
||||
{
|
||||
"@requestFormat": "chatCompletions",
|
||||
"max_tokens": 8,
|
||||
"messages": [{"role": "user", "content": "Say hello"}],
|
||||
},
|
||||
{
|
||||
"@requestFormat": "chatCompletions",
|
||||
"max_tokens": 8,
|
||||
"messages": [{"role": "user", "content": "Say world"}],
|
||||
},
|
||||
],
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert len(res.body["predictions"]) == 2
|
||||
for prediction in res.body["predictions"]:
|
||||
assert "choices" in prediction
|
||||
assert len(prediction["choices"][0]["message"]["content"]) > 0
|
||||
Reference in New Issue
Block a user