ollama source for Momentry Core verification

2026-05-22 17:19:10 +08:00
commit 0b31ff9135
2020 changed files with 1413145 additions and 0 deletions
--- a/sample/samplers.go
+++ b/sample/samplers.go
@@ -0,0 +1,206 @@
+package sample
+
+import (
+	"errors"
+	"math"
+	"math/rand/v2"
+	"slices"
+
+	"github.com/ollama/ollama/llama"
+	"github.com/ollama/ollama/tokenizer"
+)
+
+// token represents information about a single token during sampling
+type token struct {
+	id    int32   // The token's unique identifier
+	value float32 // The raw logit or probability from the model
+}
+
+type Sampler struct {
+	rng         *rand.Rand
+	topK        int
+	topP        float32
+	minP        float32
+	temperature float32
+	grammar     *GrammarSampler
+}
+
+func (s *Sampler) Sample(logits []float32) (int32, error) {
+	if len(logits) == 0 {
+		return -1, errors.New("sample: no logits provided to sample")
+	}
+
+	tokens := make([]token, len(logits))
+	for i := range logits {
+		tokens[i].id = int32(i)
+		tokens[i].value = logits[i]
+	}
+
+	t, err := s.sample(tokens)
+	if err != nil {
+		return -1, err
+	}
+
+	if s.grammar != nil {
+		// optimization: first check if the max logit is accepted by the grammar
+		// if the max logit is rejected, apply the grammar to all logits (slower)
+		top := []token{t}
+		s.grammar.Apply(top)
+		if !math.IsInf(float64(top[0].value), -1) {
+			s.grammar.Accept(top[0].id)
+			return top[0].id, nil
+		}
+
+		// since .sample has side effects of modifying the tokens
+		// we need to reset them before applying the grammar and
+		// sampling again
+		for i := range logits {
+			tokens[i].id = int32(i)
+			tokens[i].value = logits[i]
+		}
+		s.grammar.Apply(tokens)
+		t, err = s.sample(tokens)
+		if err != nil {
+			return -1, err
+		}
+		s.grammar.Accept(t.id)
+	}
+
+	return t.id, nil
+}
+
+// greedy returns the highest probability token from the tokens
+func greedy(tokens []token) token {
+	max := tokens[0]
+	for i := 1; i < len(tokens); i++ {
+		if tokens[i].value > max.value {
+			max = tokens[i]
+		}
+	}
+
+	return max
+}
+
+// sample returns the highest probability token from the tokens
+// given sampler parameters. It also has side effects of modifying the tokens
+func (s *Sampler) sample(tokens []token) (token, error) {
+	if s.temperature == 0 {
+		return greedy(tokens), nil
+	}
+
+	// topK also sorts the tokens in descending order of logits
+	tokens = topK(tokens, s.topK)
+
+	// scale and normalize the tokens in place
+	temperature(tokens, s.temperature)
+	softmax(tokens)
+
+	tokens = topP(tokens, s.topP)
+	tokens = minP(tokens, s.minP)
+
+	var r float32
+	if s.rng != nil {
+		r = s.rng.Float32()
+	} else {
+		r = rand.Float32()
+	}
+
+	// Calculate cumulative sum of probabilities
+	var sum float32
+	for i := range tokens {
+		sum += tokens[i].value
+		tokens[i].value = sum
+	}
+	r *= tokens[len(tokens)-1].value
+
+	idx, _ := slices.BinarySearchFunc(tokens, r, func(token token, target float32) int {
+		if token.value < target {
+			return -1
+		}
+		return 1
+	})
+
+	if math.IsNaN(float64(sum)) {
+		return token{}, errors.New("sample: logits sum to NaN, check model output")
+	}
+	return tokens[idx], nil
+}
+
+// TODO(parthsareen): update sampler interface to use json unmarshal https://github.com/ollama/ollama/issues/9278
+func NewSampler(temperature float32, topK int, topP float32, minP float32, seed int, grammar *GrammarSampler) Sampler {
+	var rng *rand.Rand
+	if seed != -1 {
+		// PCG requires two parameters: sequence and stream
+		// Use original seed for sequence
+		sequence := uint64(seed)
+		// Use golden ratio hash to generate statistically independent seeds
+		rng = rand.New(rand.NewPCG(sequence, sequence^0x9E3779B9))
+	}
+	if temperature < 0.0 {
+		temperature = 0.0
+	}
+
+	if topP < 0.0 {
+		topP = 0.0
+	}
+	if topP >= 1.0 {
+		topP = 1.0
+	}
+
+	if minP < 0.0 {
+		minP = 0.0
+	}
+	if minP >= 1.0 {
+		minP = 1.0
+	}
+
+	return Sampler{
+		rng:         rng,
+		topK:        topK,
+		topP:        topP,
+		minP:        minP,
+		temperature: temperature,
+		grammar:     grammar,
+	}
+}
+
+type GrammarSampler struct {
+	grammar *llama.Grammar
+}
+
+func NewGrammarSampler(tok tokenizer.Tokenizer, grammarStr string) (*GrammarSampler, error) {
+	vocabIds := make([]uint32, len(tok.Vocabulary().Values))
+	pieces := make([]string, len(tok.Vocabulary().Values))
+	for i := range tok.Vocabulary().Values {
+		pieces[i], _ = tok.Decode([]int32{int32(i)})
+		vocabIds[i] = uint32(i)
+	}
+
+	grammar := llama.NewGrammar(grammarStr, vocabIds, pieces, tok.Vocabulary().EOS)
+	if grammar == nil {
+		return nil, errors.New("sample: failed to initialize grammar")
+	}
+
+	return &GrammarSampler{grammar: grammar}, nil
+}
+
+func (g *GrammarSampler) Apply(tokens []token) {
+	tds := make([]llama.TokenData, len(tokens))
+	for i, token := range tokens {
+		tds[i].ID = token.id
+		tds[i].Logit = token.value
+	}
+	g.grammar.Apply(tds)
+
+	for i := range tokens {
+		tokens[i].value = tds[i].Logit
+	}
+}
+
+func (g *GrammarSampler) Accept(token int32) {
+	g.grammar.Accept(token)
+}
+
+func (g *GrammarSampler) Free() {
+	g.grammar.Free()
+}
--- a/sample/samplers_benchmark_test.go
+++ b/sample/samplers_benchmark_test.go
@@ -0,0 +1,92 @@
+package sample
+
+import (
+	"fmt"
+	"math/rand"
+	"testing"
+)
+
+func BenchmarkWeightedSampler(b *testing.B) {
+	sizes := []int{10, 100, 1000, 10000}
+
+	for _, size := range sizes {
+		b.Run(fmt.Sprintf("Size %d", size), func(b *testing.B) {
+			logits := make([]float32, size)
+			for i := range logits {
+				logits[i] = float32(rand.Float64()*10 - 5)
+			}
+
+			sampler := NewSampler(0.8, 0, 0, 0, 42, nil)
+			b.ResetTimer()
+			for b.Loop() {
+				sampler.Sample(logits)
+			}
+		})
+	}
+
+	configs := []struct {
+		name        string
+		temperature float32
+		topK        int
+		topP        float32
+		minP        float32
+		seed        int
+	}{
+		{"Greedy", 0, -1, 0, 0, -1},
+		{"Temperature", 0.8, -1, 0, 0, -1},
+		{"TopK", 0.8, 50, 0, 0, -1},
+		{"TopP", 0.8, -1, 0.9, 0, -1},
+		{"MinP", 0.8, -1, 0, 0.05, -1},
+		{"WithSeed", 0.8, 50, 0, 0, 42},
+	}
+
+	// Fixed size for common vocab size
+	size := 128000
+	logits := make([]float32, size)
+	for i := range logits {
+		logits[i] = float32(rand.Float64()*10 - 5)
+	}
+
+	for _, tc := range configs {
+		b.Run("Config"+tc.name, func(b *testing.B) {
+			sampler := NewSampler(tc.temperature, tc.topK, tc.topP, tc.minP, tc.seed, nil)
+			sampler.Sample(logits)
+
+			b.ResetTimer()
+
+			for b.Loop() {
+				sampler.Sample(logits)
+			}
+		})
+	}
+
+	// Test with combined transforms separately - topK influences performance greatly
+	b.Run("TransformCombined", func(b *testing.B) {
+		sampler := NewSampler(0.8, 50, 0.9, 0.05, 42, nil)
+		b.ResetTimer()
+
+		for b.Loop() {
+			sampler.Sample(logits)
+		}
+	})
+}
+
+func BenchmarkGreedySampler(b *testing.B) {
+	sizes := []int{10, 100, 1000, 10000, 100000}
+
+	for _, size := range sizes {
+		b.Run(fmt.Sprintf("Size %d", size), func(b *testing.B) {
+			logits := make([]float32, size)
+			for i := range logits {
+				logits[i] = float32(rand.Float64()*10 - 5)
+			}
+
+			sampler := NewSampler(0, -1, 0, 0, -1, nil)
+			b.ResetTimer()
+
+			for b.Loop() {
+				sampler.Sample(logits)
+			}
+		})
+	}
+}
--- a/sample/samplers_test.go
+++ b/sample/samplers_test.go
@@ -0,0 +1,174 @@
+package sample
+
+import (
+	"encoding/json"
+	"math"
+	"math/rand/v2"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/ollama/ollama/tokenizer"
+)
+
+func TestWeighted(t *testing.T) {
+	logits := []float32{-10, 3, -10, -10}
+	sampler := NewSampler(0, 0, 0, 0, 0, nil)
+	got, err := sampler.Sample(logits)
+	if err != nil {
+		t.Error(err)
+		return
+	}
+	want := int32(1)
+	if want != got {
+		t.Errorf("index mismatch: want %d, got %d", want, got)
+	}
+
+	logits = []float32{-100, -10, 0, 10}
+	sampler = NewSampler(0, 0, 0, 0, 0, nil)
+	got, err = sampler.Sample(logits)
+	if err != nil {
+		t.Error(err)
+		return
+	}
+	want = int32(3) // Should pick highest probability with this r value
+	if want != got {
+		t.Errorf("index mismatch: want %d, got %d", want, got)
+	}
+
+	// Test very high p
+	logits = []float32{1.0, 0.9999999999999999, 0.5, 0.1}
+	// Use extremely small topP to filter out all tokens
+	sampler = NewSampler(1.0, 0, 1e-10, 0, 0, nil)
+	got, err = sampler.Sample(logits)
+	if err != nil {
+		t.Error(err)
+		return
+	}
+	// Should get the token with the highest logit
+	want = int32(0)
+	if want != got {
+		t.Errorf("index mismatch: want %d, got %d", want, got)
+	}
+
+	logits = []float32{float32(math.NaN()), float32(math.NaN()), float32(math.NaN())}
+	sampler = NewSampler(1, 0, 0.95, 0.05, 0, nil)
+	got, err = sampler.Sample(logits)
+	if err == nil {
+		t.Errorf("expected error, got %d", got)
+		return
+	}
+}
+
+func modelHelper(t testing.TB) tokenizer.Tokenizer {
+	t.Helper()
+
+	f, err := os.Open(filepath.FromSlash("../tokenizer/testdata/llama3.2/encoder.json"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer f.Close()
+
+	vocab := make(map[string]int32)
+	if err := json.NewDecoder(f).Decode(&vocab); err != nil {
+		t.Fatal(err)
+	}
+
+	tokens := make([]string, len(vocab))
+	for token, id := range vocab {
+		tokens[id] = token
+	}
+
+	merges := make([]string, 0, 1)
+	// Only need vocab for Grammar Test
+	return tokenizer.NewBytePairEncoding(
+		&tokenizer.Vocabulary{
+			Values: tokens,
+			Types:  make([]int32, len(vocab)),
+			Merges: merges,
+		},
+	)
+}
+
+func TestGrammar(t *testing.T) {
+	tokenizer := modelHelper(t)
+
+	grammarJSON := `
+	root   ::= object
+	value  ::= object | array | string | number | ("true" | "false" | "null") ws
+	object ::=
+	"{" ws (
+				string ":" ws value
+		("," ws string ":" ws value)*
+	)? "}" ws
+	array  ::=
+	"[" ws (
+				value
+		("," ws value)*
+	)? "]" ws
+	string ::=
+	"\"" (
+		[^"\\\x7F\x00-\x1F] |
+		"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
+	)* "\"" ws
+	number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
+	# Optional space: by convention, applied in this grammar after literal chars when allowed
+	ws ::= ([ \t\n] ws)?
+	`
+	grammar, err := NewGrammarSampler(tokenizer, grammarJSON)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer grammar.Free()
+
+	logits := make([]float32, len(tokenizer.Vocabulary().Values))
+	for i := range logits {
+		logits[i] = rand.Float32()
+	}
+	tokens := make([]token, len(logits))
+	for i := range tokens {
+		tokens[i].id = int32(i)
+		tokens[i].value = logits[i]
+	}
+
+	grammar.Apply(tokens)
+	nonInfCount := 0
+	infCount := 0
+	for _, tok := range tokens {
+		if math.IsInf(float64(tok.value), -1) {
+			infCount++
+		} else {
+			nonInfCount++
+		}
+	}
+	if nonInfCount == 0 {
+		t.Error("expected at least one non -inf token after grammar application, got none")
+	}
+	if infCount == 0 {
+		t.Error("expected some -inf tokens after grammar application, got none")
+	}
+}
+
+func BenchmarkSample(b *testing.B) {
+	samplers := map[string]Sampler{
+		"Greedy":   NewSampler(0, 0, 0, 0, 0, nil), // Use NewSampler with temp=0 for greedy
+		"Weighted": NewSampler(0.5, 10, 0.9, 0.2, -1, nil),
+	}
+
+	// Generate random logits for benchmarking
+	logits := make([]float32, 1<<16)
+	for i := range logits {
+		logits[i] = rand.Float32()
+	}
+
+	for name, s := range samplers {
+		b.Run(name, func(b *testing.B) {
+			b.ResetTimer()
+			for b.Loop() {
+				if _, err := s.Sample(logits); err != nil {
+					b.Fatalf("error sampling: %v", err)
+				}
+			}
+		})
+	}
+}
--- a/sample/transforms.go
+++ b/sample/transforms.go
@@ -0,0 +1,130 @@
+package sample
+
+import (
+	"container/heap"
+	"math"
+	"slices"
+)
+
+// tokenHeap implements heap.Interface and holds tokens as a min-heap to track k largest elements
+type tokenHeap []token
+
+func (h tokenHeap) Len() int           { return len(h) }
+func (h tokenHeap) Less(i, j int) bool { return h[i].value < h[j].value }
+func (h tokenHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
+
+func (h *tokenHeap) Push(x any) {
+	*h = append(*h, x.(token))
+}
+
+func (h *tokenHeap) Pop() any {
+	old := *h
+	n := len(old)
+	x := old[n-1]
+	*h = old[0 : n-1]
+	return x
+}
+
+// temperature applies scaling to the logits
+func temperature(ts []token, temp float32) {
+	// Ensure temperature clipping near 0 to avoid numerical instability
+	temp = max(temp, 1e-7)
+	for i := range ts {
+		ts[i].value = ts[i].value / temp
+	}
+}
+
+// softmax applies normalization to the logits
+func softmax(ts []token) {
+	// Find max logit for numerical stability
+	maxLogit := float32(math.Inf(-1))
+	for _, t := range ts {
+		if t.value > maxLogit {
+			maxLogit = t.value
+		}
+	}
+
+	// Compute exp(x - max)
+	var sum float32
+	for i, v := range ts {
+		ts[i].value = float32(math.Exp(float64(v.value - maxLogit)))
+		sum += ts[i].value
+	}
+
+	// exp(x - max) / sum(exp(x - max))
+	for i := range ts {
+		ts[i].value /= sum
+	}
+}
+
+// topK limits the number of tokens considered to the k highest logits
+func topK(ts []token, k int) []token {
+	if k >= len(ts) || k <= 0 {
+		slices.SortFunc(ts, func(a, b token) int {
+			switch {
+			case a.value < b.value:
+				return 1
+			case a.value > b.value:
+				return -1
+			default:
+				return 0
+			}
+		})
+		return ts
+	}
+
+	// Initialize min-heap with first k elements
+	h := make(tokenHeap, k)
+	copy(h, ts[:k])
+	heap.Init(&h)
+
+	// Process remaining elements
+	for i := k; i < len(ts); i++ {
+		if ts[i].value > h[0].value {
+			heap.Pop(&h)
+			heap.Push(&h, ts[i])
+		}
+	}
+
+	// Convert heap to sorted slice in descending order
+	result := make([]token, len(h))
+	for i := k - 1; i >= 0; i-- {
+		result[i] = heap.Pop(&h).(token)
+	}
+
+	return result
+}
+
+// topP limits tokens to those with cumulative probability p
+// requires ts to be sorted in descending order of probabilities
+func topP(ts []token, p float32) []token {
+	if p == 1.0 {
+		return ts
+	}
+
+	// Find cutoff index where cumulative sum exceeds p
+	var sum float32
+	for i, t := range ts {
+		sum += t.value
+		if sum > float32(p) {
+			return ts[:i+1]
+		}
+	}
+
+	return ts
+}
+
+// minP filters tokens with probabilities >= p * max_prob
+// requires ts to be sorted in descending order of probabilities
+func minP(ts []token, p float32) []token {
+	maxProb := ts[0].value
+
+	threshold := maxProb * p
+
+	for i, t := range ts {
+		if t.value < threshold {
+			return ts[:i]
+		}
+	}
+	return ts
+}
--- a/sample/transforms_test.go
+++ b/sample/transforms_test.go
@@ -0,0 +1,357 @@
+package sample
+
+import (
+	"math"
+	"math/rand/v2"
+	"testing"
+)
+
+// Helper to convert float32 slice to logit slice
+func toTokens(values []float32) []token {
+	tokens := make([]token, len(values))
+	for i, v := range values {
+		tokens[i] = token{
+			id:    int32(i),
+			value: v,
+		}
+	}
+	return tokens
+}
+
+// Helper to compare logit slices
+func compareLogits(t *testing.T, name string, want []float32, got []token) {
+	t.Helper()
+	if len(want) != len(got) {
+		t.Errorf("%s: length mismatch: want %d, got %d", name, len(want), len(got))
+		return
+	}
+	for i := range want {
+		if math.Abs(float64(got[i].value-want[i])) > 1e-6 {
+			t.Errorf("%s: index %d: want %f, got %f", name, i, want[i], got[i].value)
+		}
+	}
+}
+
+func TestTemperature(t *testing.T) {
+	input := []float32{1.0, 4.0, -2.0, 0.0}
+	tokens := toTokens(input)
+	temperature(tokens, 0.5)
+	want := []float32{2.0, 8.0, -4.0, 0.0}
+	compareLogits(t, "temperature(0.5)", want, tokens)
+
+	input = []float32{1.0, 4.0, -2.0, 0.0}
+	tokens = toTokens(input)
+	temperature(tokens, 1.0)
+	want = []float32{1.0, 4.0, -2.0, 0.0}
+	compareLogits(t, "temperature(1)", want, tokens)
+
+	input = []float32{1.0, 4.0, -2.0, 0.0}
+	tokens = toTokens(input)
+	temperature(tokens, 0.0)
+	want = []float32{1e7, 4e7, -2e7, 0.0}
+	compareLogits(t, "temperature(0)", want, tokens)
+}
+
+func TestSoftmax(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    []float32
+		expected []float32
+	}{
+		{
+			name:     "correctness softmax",
+			input:    []float32{1, -2, 3, 0},
+			expected: []float32{0.113550, 0.005653, 0.839024, 0.041773},
+		},
+		{
+			name:  "normal distribution",
+			input: []float32{0.026986899, 0.043722924, 0.036774673, 0.27755088, 0.0046718004, 0.08582123, 0.20409796, 0.00412893, 0.15720603, 0.045046154, 0.0030491839, 0.01681367},
+		},
+		{
+			name:  "single value",
+			input: []float32{1.0},
+		},
+		{
+			name:  "identical values",
+			input: []float32{0.9, 0.9, 0.9},
+		},
+		{
+			name:  "large values",
+			input: []float32{1000.0, 2000.0, 3000.0},
+		},
+		{
+			name:  "small values",
+			input: []float32{1e-6, 2e-6, 3e-6},
+		},
+		{
+			name:  "negative values",
+			input: []float32{-1.0, -2.0, -3.0},
+		},
+		{
+			name:  "mixed values",
+			input: []float32{-100.0, 0.0, 100.0},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tokens := toTokens(tt.input)
+			softmax(tokens)
+
+			if tt.expected != nil {
+				compareLogits(t, tt.name, tt.expected, tokens)
+				return
+			}
+
+			// Check probabilities sum to 1
+			var sum float32
+			for _, token := range tokens {
+				sum += token.value
+				if token.value < 0 || token.value > 1 {
+					t.Errorf("probability out of range [0,1]: got %f", token.value)
+				}
+			}
+			if math.Abs(float64(sum-1.0)) > 1e-6 {
+				t.Errorf("probabilities don't sum to 1: got %f", sum)
+			}
+		})
+	}
+}
+
+func TestTopK(t *testing.T) {
+	input := []float32{0.026986899, 0.043722924, 0.036774673, 0.27755088, 0.0046718004, 0.08582123, 0.20409796, 0.00412893, 0.15720603, 0.045046154, 0.0030491839, 0.01681367}
+	tokens := toTokens(input)
+	tokens = topK(tokens, 5)
+	if len(tokens) != 5 {
+		t.Errorf("topK(5): wrong length: want 5, got %d", len(tokens))
+	}
+	want := []float32{0.27755088, 0.20409796, 0.15720603, 0.08582123, 0.045046154}
+	compareLogits(t, "topK(3)", want, tokens)
+
+	tokens = toTokens(input)
+	tokens = topK(tokens, 20)
+	if len(tokens) != len(input) {
+		t.Errorf("topK(20): wrong length: want %d, got %d", len(input), len(tokens))
+	}
+
+	input = []float32{0.026986899, 0.043722924, 0.036774673, 0.27755088, 0.0046718004, 0.08582123, 0.20409796, 0.00412893, 0.15720603, 0.045046154, 0.0030491839, 0.01681367}
+	want = []float32{0.27755088, 0.20409796, 0.15720603, 0.08582123, 0.045046154, 0.043722924, 0.036774673, 0.026986899, 0.01681367, 0.0046718004, 0.00412893, 0.0030491839}
+	tokens = toTokens(input)
+	tokens = topK(tokens, -1)
+	if len(tokens) != len(input) {
+		t.Errorf("topK(-1): wrong length: want %d, got %d", len(input), len(tokens))
+	}
+	compareLogits(t, "topK(-1)", want, tokens)
+
+	input = []float32{0.026986899, 0.043722924, 0.036774673, 0.27755088, 0.0046718004, 0.08582123, 0.20409796, 0.00412893, 0.15720603, 0.045046154, 0.0030491839, 0.01681367}
+	want = []float32{0.27755088, 0.20409796, 0.15720603, 0.08582123, 0.045046154, 0.043722924, 0.036774673, 0.026986899, 0.01681367, 0.0046718004, 0.00412893, 0.0030491839}
+	tokens = toTokens(input)
+	tokens = topK(tokens, 0)
+	if len(tokens) != len(input) {
+		t.Errorf("topK(-1): wrong length: want %d, got %d", len(input), len(tokens))
+	}
+	compareLogits(t, "topK(-1)", want, tokens)
+
+	input = []float32{-1e7, -2e7, -3e7, -4e7}
+	tokens = toTokens(input)
+	tokens = topK(tokens, 1)
+	if len(tokens) < 1 {
+		t.Error("topK should keep at least one token")
+	}
+}
+
+func TestTopP(t *testing.T) {
+	input := []float32{-3, -2, -1, 0, 1, 2, 4}
+	tokens := toTokens(input)
+
+	// First apply temperature and softmax to get probabilities
+	softmax(tokens)
+	tokens = topK(tokens, 20)
+
+	// Test with very high p value
+	got := topP(tokens, 1.0)
+
+	// Should keep all tokens since p is 1
+	if len(got) != len(input) {
+		t.Errorf("topP(1.0): should keep all tokens, got %d, want %d", len(got), len(input))
+	}
+
+	// Test with normal p value
+	got = topP(tokens, 0.95)
+
+	if len(got) > 3 {
+		t.Errorf("topP(0.95): kept too many tokens: got %d", len(tokens))
+		t.Logf("got: %v", got)
+	}
+
+	// Test edge case - ensure at least one token remains
+	input = []float32{-1e6, -1e6, -1e7}
+	tokens = toTokens(input)
+	tokens = topK(tokens, 20)
+	softmax(tokens)
+	got = topP(tokens, 0.0)
+	if len(got) < 1 {
+		t.Error("topP should keep at least one token")
+	}
+
+	// Test with zero p value
+	got = topP(tokens, 0.0)
+
+	// Should keep only the highest probability token
+	if len(got) != 1 {
+		t.Errorf("topP(0.0): should keep only one token, got %d", len(got))
+		t.Logf("got: %v", got)
+	}
+
+	tokens = toTokens(input)
+	tokens = topK(tokens, 20)
+	softmax(tokens)
+	got = topP(tokens, 1e-10)
+	if len(got) == 0 {
+		t.Errorf("topP(1e-10): should keep at least one token, got %d", len(got))
+		t.Logf("got: %v", got)
+	}
+}
+
+func TestMinP(t *testing.T) {
+	input := []float32{-2, 0, -1, -3, 2, 1, 4, 3}
+	tokens := toTokens(input)
+
+	// First apply temperature and softmax
+	tokens = topK(tokens, 20)
+	softmax(tokens)
+
+	tokens = minP(tokens, 1.0)
+
+	if len(tokens) != 1 {
+		t.Errorf("minP(1.0): should keep all tokens, got %d, want %d", len(tokens), len(tokens))
+	}
+
+	// Test with normal p value
+	tokens = toTokens(input) // Reset tokens
+	tokens = topK(tokens, 20)
+	softmax(tokens)
+	tokens = minP(tokens, 0.2)
+
+	// Should keep tokens with prob >= 0.2 * max_prob
+	if len(tokens) > 3 {
+		t.Errorf("minP(0.2): kept too many tokens: got %d", len(tokens))
+		t.Logf("got: %v", tokens)
+	}
+
+	// Test with zero p value
+	tokens = toTokens(input) // Reset tokens
+	tokens = topK(tokens, 20)
+	softmax(tokens)
+	tokens = minP(tokens, 0.0)
+
+	// Should keep only the highest probability token
+	if len(tokens) != len(input) {
+		t.Errorf("minP(0.0): should keep only one token, got %d", len(tokens))
+		t.Logf("got: %v", tokens)
+	}
+
+	// Test with single token
+	tokens = toTokens(input[:1])
+	tokens = topK(tokens, 20)
+	softmax(tokens)
+	tokens = minP(tokens, 0.1)
+
+	// Should keep only the highest probability token
+	if len(tokens) != 1 {
+		t.Errorf("minP(0.1): should return single token, got %d", len(tokens))
+		t.Logf("got: %v", tokens)
+	}
+
+	input = []float32{1e-10, 1e-10, 1e-10}
+	tokens = toTokens(input)
+	softmax(tokens)
+	tokens = minP(tokens, 1.0)
+	if len(tokens) < 1 {
+		t.Error("minP should keep at least one token even with extreme probabilities")
+		got := minP(tokens, 1.0)
+
+		if len(got) != 1 {
+			t.Errorf("minP(1.0): should keep all tokens, got %d, want %d", len(got), len(tokens))
+		}
+
+		// Test with normal p value
+		got = minP(tokens, 0.2)
+
+		// Should keep tokens with prob >= 0.2 * max_prob
+		if len(got) > 3 {
+			t.Errorf("minP(0.2): kept too many tokens: got %d", len(got))
+			t.Logf("got: %v", got)
+		}
+
+		// Test with zero p value
+		got = minP(tokens, 0.0)
+
+		// Should keep only the highest probability token
+		if len(got) != len(tokens) {
+			t.Errorf("minP(0.0): should keep only one token, got %d", len(got))
+			t.Logf("got: %v", got)
+		}
+	}
+}
+
+func BenchmarkTransforms(b *testing.B) {
+	// Generate random logits
+	tokens := make([]token, 1<<16)
+	for i := range tokens {
+		tokens[i] = token{
+			id:    int32(i),
+			value: rand.Float32(),
+		}
+	}
+
+	tokensCopy := make([]token, len(tokens))
+
+	b.Run("Temperature", func(b *testing.B) {
+		b.ResetTimer()
+		for b.Loop() {
+			copy(tokensCopy, tokens)
+			temperature(tokensCopy, 0.5)
+		}
+	})
+
+	b.Run("Softmax", func(b *testing.B) {
+		b.ResetTimer()
+		for b.Loop() {
+			copy(tokensCopy, tokens)
+			softmax(tokensCopy)
+		}
+	})
+
+	b.Run("TopK", func(b *testing.B) {
+		b.ResetTimer()
+		for b.Loop() {
+			copy(tokensCopy, tokens)
+			tokens = topK(tokensCopy, 10)
+		}
+	})
+
+	b.Run("TopP", func(b *testing.B) {
+		b.ResetTimer()
+		for b.Loop() {
+			copy(tokensCopy, tokens)
+			tokens = topP(tokensCopy, 0.9)
+		}
+	})
+
+	b.Run("MinP", func(b *testing.B) {
+		b.ResetTimer()
+		for b.Loop() {
+			copy(tokensCopy, tokens)
+			tokens = minP(tokensCopy, 0.2)
+		}
+	})
+
+	b.Run("SortTokens", func(b *testing.B) {
+		b.ResetTimer()
+		for b.Loop() {
+			copy(tokensCopy, tokens)
+			tokens = topK(tokensCopy, 200000)
+		}
+	})
+}