ollama/x/mlxrunner/batch/batch.go

package batch

import "github.com/ollama/ollama/x/mlxrunner/mlx"

// Batch is the per-forward-pass input handed to a model.
type Batch struct {
	// InputIDs is the input token IDs for this forward pass, shape (B, L).
	InputIDs *mlx.Array

	// SeqOffsets gives each row's current position within its sequence —
	// where the chunk in InputIDs starts. Length equals the batch dimension
	// of InputIDs.
	SeqOffsets []int32

	// SeqQueryLens is each row's real query length in this forward. Values
	// less than L mean the row's tail is padding that must be masked out.
	// Length equals the batch dimension of InputIDs.
	SeqQueryLens []int32

	// Memo is per-forward memoization used to cache results, such as masks,
	// which are often the same across layers.
	Memo Memo
}

type Memo struct {
	entries map[any]any
}

// Get returns the memoized value for key and true if present, or nil
// and false otherwise.
func (m *Memo) Get(key any) (any, bool) {
	v, ok := m.entries[key]
	return v, ok
}

// Put stores value under key, allocating on first use.
func (m *Memo) Put(key, value any) {
	if m.entries == nil {
		m.entries = map[any]any{}
	}
	m.entries[key] = value
}