ollama source for Momentry Core verification

2026-05-22 17:19:10 +08:00
commit 0b31ff9135
2020 changed files with 1413145 additions and 0 deletions
--- a/llm/llm_darwin.go
+++ b/llm/llm_darwin.go
@@ -0,0 +1,7 @@
+package llm
+
+import (
+	"syscall"
+)
+
+var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
--- a/llm/llm_linux.go
+++ b/llm/llm_linux.go
@@ -0,0 +1,7 @@
+package llm
+
+import (
+	"syscall"
+)
+
+var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
--- a/llm/llm_windows.go
+++ b/llm/llm_windows.go
@@ -0,0 +1,23 @@
+package llm
+
+import (
+	"syscall"
+)
+
+const (
+	CREATE_DEFAULT_ERROR_MODE   = 0x04000000
+	ABOVE_NORMAL_PRIORITY_CLASS = 0x00008000
+	CREATE_NO_WINDOW            = 0x08000000
+)
+
+var LlamaServerSysProcAttr = &syscall.SysProcAttr{
+	// Wire up the default error handling logic If for some reason a DLL is
+	// missing in the path this will pop up a GUI Dialog explaining the fault so
+	// the user can either fix their PATH, or report a bug. Without this
+	// setting, the process exits immediately with a generic exit status but no
+	// way to (easily) figure out what the actual missing DLL was.
+	//
+	// Setting Above Normal priority class ensures when running as a "background service"
+	// with "programs" given best priority, we aren't starved of cpu cycles
+	CreationFlags: CREATE_DEFAULT_ERROR_MODE | ABOVE_NORMAL_PRIORITY_CLASS | CREATE_NO_WINDOW,
+}
--- a/llm/server.go
+++ b/llm/server.go
--- a/llm/server_test.go
+++ b/llm/server_test.go
@@ -0,0 +1,281 @@
+package llm
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/format"
+	"github.com/ollama/ollama/ml"
+	"golang.org/x/sync/semaphore"
+)
+
+func TestLLMServerFitGPU(t *testing.T) {
+	minMemory := 457 * format.MebiByte
+
+	tests := []struct {
+		name        string
+		gpus        []ml.DeviceInfo
+		layers      []int
+		numGPU      int
+		requireFull bool
+		expected    ml.GPULayersList
+		expectedErr error
+	}{
+		{
+			name:        "No GPU",
+			layers:      []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:      -1,
+			expected:    ml.GPULayersList{},
+			requireFull: true, // Should not try to evict even though we can't load any layers
+		},
+		{
+			name:     "Full single GPU",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   -1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0, 1, 2}}},
+		},
+		{
+			name:     "Partial single GPU",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
+			numGPU:   -1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1, 2}}},
+		},
+		{
+			name:     "Single GPU with numGPU 1",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1}}},
+		},
+		{
+			name:     "Single GPU with numGPU 0",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   0,
+			expected: ml.GPULayersList{},
+		},
+		{
+			name:     "Single GPU with numGPU 999",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
+			numGPU:   999,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0, 1, 2, 3}}},
+		},
+		{
+			name:     "Multi GPU fits on one",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   -1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0, 1, 2}}},
+		},
+		{
+			name:     "Multi GPU split",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{256 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   -1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1, 2}}},
+		},
+		{
+			name:     "Multi GPU partial",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{256 * format.MebiByte, 256 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   -1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{1}}},
+		},
+		{
+			name:     "Multi GPU numGPU 1",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{1}}},
+		},
+		{
+			name:     "Multi GPU numGPU 2",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{256 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   2,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1}}},
+		},
+		{
+			name:     "Multi GPU numGPU 999",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{256 * format.MebiByte, 256 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   999,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0, 1}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{2}}},
+		},
+		{
+			name:     "Multi GPU different libraries",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{Library: "CUDA", ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{Library: "ROCm", ID: "gpu1"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{128 * format.MebiByte, 128 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   -1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1", Library: "ROCm"}, Layers: []int{0, 1}}},
+		},
+		{
+			name:        "requireFull",
+			gpus:        []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:      []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
+			numGPU:      -1,
+			requireFull: true,
+			expectedErr: ErrLoadRequiredFull,
+		},
+		{
+			name:        "requireFull numGPU",
+			gpus:        []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(256 * format.MebiByte)}},
+			layers:      []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
+			numGPU:      4,
+			requireFull: true,
+			expectedErr: ErrLoadRequiredFull,
+		},
+		{
+			name:     "iGPU",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, Integrated: true, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   -1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0, 1, 2}}},
+		},
+		{
+			name:     "iGPU + dGPU",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, Integrated: true, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   -1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1, 2}}},
+		},
+		{
+			name:     "iGPU + dGPU fits on one",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, Integrated: true, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:   -1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0, 1}}},
+		},
+		{
+			name:     "iGPU + dGPU partial",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, Integrated: true, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
+			numGPU:   -1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0, 1}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{2}}},
+		},
+		{
+			name:     "iGPU + dGPU numGPU 1",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, Integrated: true, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
+			numGPU:   1,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{2}}},
+		},
+		{
+			name:     "iGPU + dGPU numGPU 999",
+			gpus:     []ml.DeviceInfo{{DeviceID: ml.DeviceID{ID: "gpu0"}, FreeMemory: uint64(128*format.MebiByte + minMemory)}, {DeviceID: ml.DeviceID{ID: "gpu1"}, Integrated: true, FreeMemory: uint64(256*format.MebiByte + minMemory)}},
+			layers:   []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
+			numGPU:   999,
+			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0}}, {DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{1, 2, 3}}},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var systemInfo ml.SystemInfo
+			systemInfo.TotalMemory = format.GibiByte
+			systemInfo.FreeMemory = 512 * format.MebiByte
+			systemInfo.FreeSwap = 256 * format.MebiByte
+
+			s := &ollamaServer{
+				llmServer: llmServer{
+					totalLayers: uint64(len(tt.layers)),
+					options: api.Options{
+						Runner: api.Runner{
+							NumGPU: tt.numGPU,
+						},
+					},
+				},
+			}
+
+			s.mem = &ml.BackendMemory{CPU: ml.DeviceMemory{
+				Weights: make([]uint64, s.totalLayers),
+				Cache:   make([]uint64, s.totalLayers),
+			}, GPUs: make([]ml.DeviceMemory, len(tt.gpus))}
+
+			for i := range tt.layers {
+				s.mem.CPU.Weights[i] = uint64(tt.layers[i])
+			}
+
+			for i := range s.mem.GPUs {
+				s.mem.GPUs[i].DeviceID = tt.gpus[i].DeviceID
+				s.mem.GPUs[i].Weights = make([]uint64, s.totalLayers)
+				s.mem.GPUs[i].Cache = make([]uint64, s.totalLayers)
+			}
+
+			gpuLayers, err := s.createLayout(systemInfo, tt.gpus, s.mem, tt.requireFull, 0)
+			if err != tt.expectedErr {
+				t.Fatalf("fitGPU returned error: %v", err)
+			}
+			if gpuLayers.Hash() != tt.expected.Hash() {
+				t.Errorf("fitGPU assigned %v, want %v", gpuLayers, tt.expected)
+			}
+		})
+	}
+}
+
+func TestLLMServerCompletionFormat(t *testing.T) {
+	// This test was written to fix an already deployed issue. It is a bit
+	// of a mess, and but it's good enough, until we can refactoring the
+	// Completion method to be more testable.
+
+	ctx, cancel := context.WithCancel(t.Context())
+	s := &llmServer{
+		sem: semaphore.NewWeighted(1), // required to prevent nil panic
+	}
+
+	checkInvalid := func(format string) {
+		t.Helper()
+		err := s.Completion(ctx, CompletionRequest{
+			Options: new(api.Options),
+			Format:  []byte(format),
+		}, nil)
+
+		want := fmt.Sprintf("invalid format: %q; expected \"json\" or a valid JSON Schema", format)
+		if err == nil || !strings.Contains(err.Error(), want) {
+			t.Fatalf("err = %v; want %q", err, want)
+		}
+	}
+
+	checkInvalid("X")   // invalid format
+	checkInvalid(`"X"`) // invalid JSON Schema
+
+	cancel() // prevent further processing if request makes it past the format check
+
+	checkValid := func(err error) {
+		t.Helper()
+		if !errors.Is(err, context.Canceled) {
+			t.Fatalf("Completion: err = %v; expected context.Canceled", err)
+		}
+	}
+
+	valids := []string{
+		// "missing"
+		``,
+		`""`,
+		`null`,
+
+		// JSON
+		`"json"`,
+		`{"type":"object"}`,
+	}
+	for _, valid := range valids {
+		err := s.Completion(ctx, CompletionRequest{
+			Options: new(api.Options),
+			Format:  []byte(valid),
+		}, nil)
+		checkValid(err)
+	}
+
+	err := s.Completion(ctx, CompletionRequest{
+		Options: new(api.Options),
+		Format:  nil, // missing format
+	}, nil)
+	checkValid(err)
+}
--- a/llm/server_wait_test.go
+++ b/llm/server_wait_test.go
@@ -0,0 +1,31 @@
+package llm
+
+import (
+	"context"
+	"strings"
+	"testing"
+)
+
+func TestWaitUntilRunningUsesStatusMessageWhenDoneErrIsNil(t *testing.T) {
+	done := make(chan struct{})
+	close(done)
+
+	status := &StatusWriter{}
+	status.SetLastError("llama_init_from_model: failed to initialize the context: failed to initialize Metal backend")
+
+	s := &llmServer{
+		done:   done,
+		status: status,
+	}
+
+	err := s.WaitUntilRunning(context.Background())
+	if err == nil {
+		t.Fatal("expected error")
+	}
+	if strings.Contains(err.Error(), "%!w(<nil>)") {
+		t.Fatalf("unexpected wrapped nil error: %q", err)
+	}
+	if !strings.Contains(err.Error(), s.status.LastError()) {
+		t.Fatalf("error %q does not include status message %q", err, s.status.LastError())
+	}
+}
--- a/llm/status.go
+++ b/llm/status.go
@@ -0,0 +1,107 @@
+package llm
+
+import (
+	"bytes"
+	"io"
+	"strings"
+	"sync/atomic"
+)
+
+// StatusWriter is a writer that captures error messages from the llama runner process
+type StatusWriter struct {
+	out io.Writer
+	// StartRunner wires both Stdout and Stderr to the same StatusWriter, and
+	// os/exec serializes Write calls in that case.
+	lastErrMsg atomic.Value
+}
+
+const maxCapturedErrorBytes = 8 * 1024
+
+func NewStatusWriter(out io.Writer) *StatusWriter {
+	return &StatusWriter{
+		out: out,
+	}
+}
+
+func (w *StatusWriter) LastError() string {
+	if w == nil {
+		return ""
+	}
+	if v := w.lastErrMsg.Load(); v != nil {
+		return v.(string)
+	}
+	return ""
+}
+
+func (w *StatusWriter) SetLastError(msg string) {
+	if w == nil {
+		return
+	}
+	w.lastErrMsg.Store(msg)
+}
+
+func (w *StatusWriter) AppendError(msg string) {
+	if w == nil || msg == "" {
+		return
+	}
+
+	if current := w.LastError(); current != "" {
+		msg = current + "\n" + msg
+	}
+
+	if len(msg) > maxCapturedErrorBytes {
+		msg = msg[len(msg)-maxCapturedErrorBytes:]
+		if i := strings.IndexByte(msg, '\n'); i >= 0 {
+			msg = msg[i+1:]
+		}
+	}
+
+	w.SetLastError(msg)
+}
+
+// TODO - regex matching to detect errors like
+// libcublasLt.so.11: cannot open shared object file: No such file or directory
+// TODO - if we later see error lines split across multiple Write calls in real
+// logs, add a small rolling buffer here to capture those fragments.
+
+var errorPrefixes = []string{
+	"mlx:",
+	"MLX:",
+	"panic:",
+	"fatal error:",
+	"error:",
+	"Error:",
+	"CUDA error",
+	"ROCm error",
+	"cudaMalloc failed",
+	"\"ERR\"",
+	"error loading model",
+	"GGML_ASSERT",
+	"Deepseek2 does not support K-shift",
+	"signal arrived during cgo execution",
+	"llama_init_from_model:",
+}
+
+func (w *StatusWriter) Write(b []byte) (int, error) {
+	var errMsg string
+	errStart := -1
+	var errPrefix string
+	for _, prefix := range errorPrefixes {
+		if i := bytes.Index(b, []byte(prefix)); i >= 0 && (errStart < 0 || i < errStart) {
+			errStart = i
+			errPrefix = prefix
+		}
+	}
+	if errStart >= 0 {
+		line := b[errStart+len(errPrefix):]
+		if j := bytes.IndexByte(line, '\n'); j >= 0 {
+			line = line[:j]
+		}
+		errMsg = errPrefix + string(bytes.TrimRight(line, " \t\r"))
+	}
+	if errMsg != "" {
+		w.AppendError(errMsg)
+	}
+
+	return w.out.Write(b)
+}
--- a/llm/status_test.go
+++ b/llm/status_test.go
@@ -0,0 +1,68 @@
+package llm
+
+import (
+	"io"
+	"testing"
+)
+
+func TestStatusWriterCapturesErrorLine(t *testing.T) {
+	tests := []struct {
+		name string
+		log  string
+		want string
+	}{
+		{
+			name: "llama init",
+			log:  "llama_init_from_model: failed to initialize the context: failed to initialize Metal backend\n",
+			want: "llama_init_from_model: failed to initialize the context: failed to initialize Metal backend",
+		},
+		{
+			name: "cobra error",
+			log:  "Error: foo baz bar\n",
+			want: "Error: foo baz bar",
+		},
+		{
+			name: "uppercase mlx",
+			log:  "MLX: there was an error\n",
+			want: "MLX: there was an error",
+		},
+		{
+			name: "panic header",
+			log: "time=2026-05-01T15:36:45.053Z level=INFO source=pipeline.go:71 msg=\"peak memory\" size=\"8.26 GiB\"\n" +
+				"panic: mlx: Failed to compile kernel: nvrtc: error: invalid value for --gpu-architecture (-arch)\n" +
+				"\t. at /go/src/github.com/ollama/ollama/build/_deps/mlx-c-src/mlx/c/transforms.cpp:15\n\n" +
+				"goroutine 31 [running]:\n" +
+				"golang.org/x/sync/errgroup.(*Group).Go.func1()\n" +
+				"\tgolang.org/x/sync@v0.17.0/errgroup/errgroup.go:93 +0x50\n",
+			want: "panic: mlx: Failed to compile kernel: nvrtc: error: invalid value for --gpu-architecture (-arch)",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			w := NewStatusWriter(io.Discard)
+			if _, err := w.Write([]byte(tt.log)); err != nil {
+				t.Fatal(err)
+			}
+
+			if got := w.LastError(); got != tt.want {
+				t.Fatalf("LastError = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestStatusWriterAccumulatesErrorLines(t *testing.T) {
+	w := NewStatusWriter(io.Discard)
+	if _, err := w.Write([]byte("error: failed to initialize the Metal library\n")); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := w.Write([]byte("GGML_ASSERT([rsets->data count] == 0) failed\n")); err != nil {
+		t.Fatal(err)
+	}
+
+	want := "error: failed to initialize the Metal library\nGGML_ASSERT([rsets->data count] == 0) failed"
+	if got := w.LastError(); got != want {
+		t.Fatalf("LastError = %q, want %q", got, want)
+	}
+}