ollama source for Momentry Core verification
This commit is contained in:
7
llm/llm_darwin.go
Normal file
7
llm/llm_darwin.go
Normal file
@@ -0,0 +1,7 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
|
||||
7
llm/llm_linux.go
Normal file
7
llm/llm_linux.go
Normal file
@@ -0,0 +1,7 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
|
||||
23
llm/llm_windows.go
Normal file
23
llm/llm_windows.go
Normal file
@@ -0,0 +1,23 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
const (
|
||||
CREATE_DEFAULT_ERROR_MODE = 0x04000000
|
||||
ABOVE_NORMAL_PRIORITY_CLASS = 0x00008000
|
||||
CREATE_NO_WINDOW = 0x08000000
|
||||
)
|
||||
|
||||
var LlamaServerSysProcAttr = &syscall.SysProcAttr{
|
||||
// Wire up the default error handling logic If for some reason a DLL is
|
||||
// missing in the path this will pop up a GUI Dialog explaining the fault so
|
||||
// the user can either fix their PATH, or report a bug. Without this
|
||||
// setting, the process exits immediately with a generic exit status but no
|
||||
// way to (easily) figure out what the actual missing DLL was.
|
||||
//
|
||||
// Setting Above Normal priority class ensures when running as a "background service"
|
||||
// with "programs" given best priority, we aren't starved of cpu cycles
|
||||
CreationFlags: CREATE_DEFAULT_ERROR_MODE | ABOVE_NORMAL_PRIORITY_CLASS | CREATE_NO_WINDOW,
|
||||
}
|
||||
1951
llm/server.go
Normal file
1951
llm/server.go
Normal file
File diff suppressed because it is too large
Load Diff
281
llm/server_test.go
Normal file
281
llm/server_test.go
Normal file
@@ -0,0 +1,281 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/ml"
|
||||
"golang.org/x/sync/semaphore"
|
||||
)
|
||||
|
||||
func TestLLMServerFitGPU(t *testing.T) {
|
||||
minMemory := 457 * format.MebiByte
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
gpus []ml.DeviceInfo
|
||||
layers []int
|
||||
numGPU int
|
||||
requireFull bool
|
||||
expected ml.GPULayersList
|
||||
expectedErr error
|
||||
}{
|
||||
{
|
||||
name: "No GPU",
|
||||
layers: []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: -1,
|
||||
expected: ml.GPULayersList{},
|
||||
requireFull: true, // Should not try to evict even though we can't load any layers
|
||||
},
|
||||
{
|
||||
name: "Full single GPU",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: -1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0, 1, 2}}},
|
||||
},
|
||||
{
|
||||
name: "Partial single GPU",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
|
||||
numGPU: -1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1, 2}}},
|
||||
},
|
||||
{
|
||||
name: "Single GPU with numGPU 1",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: 1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1}}},
|
||||
},
|
||||
{
|
||||
name: "Single GPU with numGPU 0",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: 0,
|
||||
expected: ml.GPULayersList{},
|
||||
},
|
||||
{
|
||||
name: "Single GPU with numGPU 999",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
|
||||
numGPU: 999,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0, 1, 2, 3}}},
|
||||
},
|
||||
{
|
||||
name: "Multi GPU fits on one",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: -1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0, 1, 2}}},
|
||||
},
|
||||
{
|
||||
name: "Multi GPU split",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{256 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: -1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1, 2}}},
|
||||
},
|
||||
{
|
||||
name: "Multi GPU partial",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{256 * format.MebiByte, 256 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: -1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{1}}},
|
||||
},
|
||||
{
|
||||
name: "Multi GPU numGPU 1",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: 1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{1}}},
|
||||
},
|
||||
{
|
||||
name: "Multi GPU numGPU 2",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{256 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: 2,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1}}},
|
||||
},
|
||||
{
|
||||
name: "Multi GPU numGPU 999",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{256 * format.MebiByte, 256 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: 999,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0, 1}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{2}}},
|
||||
},
|
||||
{
|
||||
name: "Multi GPU different libraries",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{Library: "CUDA", ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{Library: "ROCm", ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{128 * format.MebiByte, 128 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: -1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1", Library: "ROCm"}, Layers: []int{0, 1}}},
|
||||
},
|
||||
{
|
||||
name: "requireFull",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
|
||||
numGPU: -1,
|
||||
requireFull: true,
|
||||
expectedErr: ErrLoadRequiredFull,
|
||||
},
|
||||
{
|
||||
name: "requireFull numGPU",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256 * format.MebiByte)}},
|
||||
layers: []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
|
||||
numGPU: 4,
|
||||
requireFull: true,
|
||||
expectedErr: ErrLoadRequiredFull,
|
||||
},
|
||||
{
|
||||
name: "iGPU",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, Integrated: true, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: -1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0, 1, 2}}},
|
||||
},
|
||||
{
|
||||
name: "iGPU + dGPU",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, Integrated: true, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: -1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1, 2}}},
|
||||
},
|
||||
{
|
||||
name: "iGPU + dGPU fits on one",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, Integrated: true, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{50 * format.MebiByte, 50 * format.MebiByte},
|
||||
numGPU: -1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0, 1}}},
|
||||
},
|
||||
{
|
||||
name: "iGPU + dGPU partial",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, Integrated: true, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
|
||||
numGPU: -1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0, 1}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{2}}},
|
||||
},
|
||||
{
|
||||
name: "iGPU + dGPU numGPU 1",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, Integrated: true, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
|
||||
numGPU: 1,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{2}}},
|
||||
},
|
||||
{
|
||||
name: "iGPU + dGPU numGPU 999",
|
||||
gpus: []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, Integrated: true, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
|
||||
layers: []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
|
||||
numGPU: 999,
|
||||
expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0}}, {DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{1, 2, 3}}},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
var systemInfo ml.SystemInfo
|
||||
systemInfo.TotalMemory = format.GibiByte
|
||||
systemInfo.FreeMemory = 512 * format.MebiByte
|
||||
systemInfo.FreeSwap = 256 * format.MebiByte
|
||||
|
||||
s := &ollamaServer{
|
||||
llmServer: llmServer{
|
||||
totalLayers: uint64(len(tt.layers)),
|
||||
options: api.Options{
|
||||
Runner: api.Runner{
|
||||
NumGPU: tt.numGPU,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
s.mem = &ml.BackendMemory{CPU: ml.DeviceMemory{
|
||||
Weights: make([]uint64, s.totalLayers),
|
||||
Cache: make([]uint64, s.totalLayers),
|
||||
}, GPUs: make([]ml.DeviceMemory, len(tt.gpus))}
|
||||
|
||||
for i := range tt.layers {
|
||||
s.mem.CPU.Weights[i] = uint64(tt.layers[i])
|
||||
}
|
||||
|
||||
for i := range s.mem.GPUs {
|
||||
s.mem.GPUs[i].DeviceID = tt.gpus[i].DeviceID
|
||||
s.mem.GPUs[i].Weights = make([]uint64, s.totalLayers)
|
||||
s.mem.GPUs[i].Cache = make([]uint64, s.totalLayers)
|
||||
}
|
||||
|
||||
gpuLayers, err := s.createLayout(systemInfo, tt.gpus, s.mem, tt.requireFull, 0)
|
||||
if err != tt.expectedErr {
|
||||
t.Fatalf("fitGPU returned error: %v", err)
|
||||
}
|
||||
if gpuLayers.Hash() != tt.expected.Hash() {
|
||||
t.Errorf("fitGPU assigned %v, want %v", gpuLayers, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLLMServerCompletionFormat(t *testing.T) {
|
||||
// This test was written to fix an already deployed issue. It is a bit
|
||||
// of a mess, and but it's good enough, until we can refactoring the
|
||||
// Completion method to be more testable.
|
||||
|
||||
ctx, cancel := context.WithCancel(t.Context())
|
||||
s := &llmServer{
|
||||
sem: semaphore.NewWeighted(1), // required to prevent nil panic
|
||||
}
|
||||
|
||||
checkInvalid := func(format string) {
|
||||
t.Helper()
|
||||
err := s.Completion(ctx, CompletionRequest{
|
||||
Options: new(api.Options),
|
||||
Format: []byte(format),
|
||||
}, nil)
|
||||
|
||||
want := fmt.Sprintf("invalid format: %q; expected \"json\" or a valid JSON Schema", format)
|
||||
if err == nil || !strings.Contains(err.Error(), want) {
|
||||
t.Fatalf("err = %v; want %q", err, want)
|
||||
}
|
||||
}
|
||||
|
||||
checkInvalid("X") // invalid format
|
||||
checkInvalid(`"X"`) // invalid JSON Schema
|
||||
|
||||
cancel() // prevent further processing if request makes it past the format check
|
||||
|
||||
checkValid := func(err error) {
|
||||
t.Helper()
|
||||
if !errors.Is(err, context.Canceled) {
|
||||
t.Fatalf("Completion: err = %v; expected context.Canceled", err)
|
||||
}
|
||||
}
|
||||
|
||||
valids := []string{
|
||||
// "missing"
|
||||
``,
|
||||
`""`,
|
||||
`null`,
|
||||
|
||||
// JSON
|
||||
`"json"`,
|
||||
`{"type":"object"}`,
|
||||
}
|
||||
for _, valid := range valids {
|
||||
err := s.Completion(ctx, CompletionRequest{
|
||||
Options: new(api.Options),
|
||||
Format: []byte(valid),
|
||||
}, nil)
|
||||
checkValid(err)
|
||||
}
|
||||
|
||||
err := s.Completion(ctx, CompletionRequest{
|
||||
Options: new(api.Options),
|
||||
Format: nil, // missing format
|
||||
}, nil)
|
||||
checkValid(err)
|
||||
}
|
||||
31
llm/server_wait_test.go
Normal file
31
llm/server_wait_test.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestWaitUntilRunningUsesStatusMessageWhenDoneErrIsNil(t *testing.T) {
|
||||
done := make(chan struct{})
|
||||
close(done)
|
||||
|
||||
status := &StatusWriter{}
|
||||
status.SetLastError("llama_init_from_model: failed to initialize the context: failed to initialize Metal backend")
|
||||
|
||||
s := &llmServer{
|
||||
done: done,
|
||||
status: status,
|
||||
}
|
||||
|
||||
err := s.WaitUntilRunning(context.Background())
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
if strings.Contains(err.Error(), "%!w(<nil>)") {
|
||||
t.Fatalf("unexpected wrapped nil error: %q", err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), s.status.LastError()) {
|
||||
t.Fatalf("error %q does not include status message %q", err, s.status.LastError())
|
||||
}
|
||||
}
|
||||
107
llm/status.go
Normal file
107
llm/status.go
Normal file
@@ -0,0 +1,107 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// StatusWriter is a writer that captures error messages from the llama runner process
|
||||
type StatusWriter struct {
|
||||
out io.Writer
|
||||
// StartRunner wires both Stdout and Stderr to the same StatusWriter, and
|
||||
// os/exec serializes Write calls in that case.
|
||||
lastErrMsg atomic.Value
|
||||
}
|
||||
|
||||
const maxCapturedErrorBytes = 8 * 1024
|
||||
|
||||
func NewStatusWriter(out io.Writer) *StatusWriter {
|
||||
return &StatusWriter{
|
||||
out: out,
|
||||
}
|
||||
}
|
||||
|
||||
func (w *StatusWriter) LastError() string {
|
||||
if w == nil {
|
||||
return ""
|
||||
}
|
||||
if v := w.lastErrMsg.Load(); v != nil {
|
||||
return v.(string)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (w *StatusWriter) SetLastError(msg string) {
|
||||
if w == nil {
|
||||
return
|
||||
}
|
||||
w.lastErrMsg.Store(msg)
|
||||
}
|
||||
|
||||
func (w *StatusWriter) AppendError(msg string) {
|
||||
if w == nil || msg == "" {
|
||||
return
|
||||
}
|
||||
|
||||
if current := w.LastError(); current != "" {
|
||||
msg = current + "\n" + msg
|
||||
}
|
||||
|
||||
if len(msg) > maxCapturedErrorBytes {
|
||||
msg = msg[len(msg)-maxCapturedErrorBytes:]
|
||||
if i := strings.IndexByte(msg, '\n'); i >= 0 {
|
||||
msg = msg[i+1:]
|
||||
}
|
||||
}
|
||||
|
||||
w.SetLastError(msg)
|
||||
}
|
||||
|
||||
// TODO - regex matching to detect errors like
|
||||
// libcublasLt.so.11: cannot open shared object file: No such file or directory
|
||||
// TODO - if we later see error lines split across multiple Write calls in real
|
||||
// logs, add a small rolling buffer here to capture those fragments.
|
||||
|
||||
var errorPrefixes = []string{
|
||||
"mlx:",
|
||||
"MLX:",
|
||||
"panic:",
|
||||
"fatal error:",
|
||||
"error:",
|
||||
"Error:",
|
||||
"CUDA error",
|
||||
"ROCm error",
|
||||
"cudaMalloc failed",
|
||||
"\"ERR\"",
|
||||
"error loading model",
|
||||
"GGML_ASSERT",
|
||||
"Deepseek2 does not support K-shift",
|
||||
"signal arrived during cgo execution",
|
||||
"llama_init_from_model:",
|
||||
}
|
||||
|
||||
func (w *StatusWriter) Write(b []byte) (int, error) {
|
||||
var errMsg string
|
||||
errStart := -1
|
||||
var errPrefix string
|
||||
for _, prefix := range errorPrefixes {
|
||||
if i := bytes.Index(b, []byte(prefix)); i >= 0 && (errStart < 0 || i < errStart) {
|
||||
errStart = i
|
||||
errPrefix = prefix
|
||||
}
|
||||
}
|
||||
if errStart >= 0 {
|
||||
line := b[errStart+len(errPrefix):]
|
||||
if j := bytes.IndexByte(line, '\n'); j >= 0 {
|
||||
line = line[:j]
|
||||
}
|
||||
errMsg = errPrefix + string(bytes.TrimRight(line, " \t\r"))
|
||||
}
|
||||
if errMsg != "" {
|
||||
w.AppendError(errMsg)
|
||||
}
|
||||
|
||||
return w.out.Write(b)
|
||||
}
|
||||
68
llm/status_test.go
Normal file
68
llm/status_test.go
Normal file
@@ -0,0 +1,68 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"io"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestStatusWriterCapturesErrorLine(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
log string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "llama init",
|
||||
log: "llama_init_from_model: failed to initialize the context: failed to initialize Metal backend\n",
|
||||
want: "llama_init_from_model: failed to initialize the context: failed to initialize Metal backend",
|
||||
},
|
||||
{
|
||||
name: "cobra error",
|
||||
log: "Error: foo baz bar\n",
|
||||
want: "Error: foo baz bar",
|
||||
},
|
||||
{
|
||||
name: "uppercase mlx",
|
||||
log: "MLX: there was an error\n",
|
||||
want: "MLX: there was an error",
|
||||
},
|
||||
{
|
||||
name: "panic header",
|
||||
log: "time=2026-05-01T15:36:45.053Z level=INFO source=pipeline.go:71 msg=\"peak memory\" size=\"8.26 GiB\"\n" +
|
||||
"panic: mlx: Failed to compile kernel: nvrtc: error: invalid value for --gpu-architecture (-arch)\n" +
|
||||
"\t. at /go/src/github.com/ollama/ollama/build/_deps/mlx-c-src/mlx/c/transforms.cpp:15\n\n" +
|
||||
"goroutine 31 [running]:\n" +
|
||||
"golang.org/x/sync/errgroup.(*Group).Go.func1()\n" +
|
||||
"\tgolang.org/x/sync@v0.17.0/errgroup/errgroup.go:93 +0x50\n",
|
||||
want: "panic: mlx: Failed to compile kernel: nvrtc: error: invalid value for --gpu-architecture (-arch)",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
w := NewStatusWriter(io.Discard)
|
||||
if _, err := w.Write([]byte(tt.log)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if got := w.LastError(); got != tt.want {
|
||||
t.Fatalf("LastError = %q, want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatusWriterAccumulatesErrorLines(t *testing.T) {
|
||||
w := NewStatusWriter(io.Discard)
|
||||
if _, err := w.Write([]byte("error: failed to initialize the Metal library\n")); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := w.Write([]byte("GGML_ASSERT([rsets->data count] == 0) failed\n")); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
want := "error: failed to initialize the Metal library\nGGML_ASSERT([rsets->data count] == 0) failed"
|
||||
if got := w.LastError(); got != want {
|
||||
t.Fatalf("LastError = %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user