ollama source for Momentry Core verification
This commit is contained in:
206
sample/samplers.go
Normal file
206
sample/samplers.go
Normal file
@@ -0,0 +1,206 @@
|
||||
package sample
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math"
|
||||
"math/rand/v2"
|
||||
"slices"
|
||||
|
||||
"github.com/ollama/ollama/llama"
|
||||
"github.com/ollama/ollama/tokenizer"
|
||||
)
|
||||
|
||||
// token represents information about a single token during sampling
|
||||
type token struct {
|
||||
id int32 // The token's unique identifier
|
||||
value float32 // The raw logit or probability from the model
|
||||
}
|
||||
|
||||
type Sampler struct {
|
||||
rng *rand.Rand
|
||||
topK int
|
||||
topP float32
|
||||
minP float32
|
||||
temperature float32
|
||||
grammar *GrammarSampler
|
||||
}
|
||||
|
||||
func (s *Sampler) Sample(logits []float32) (int32, error) {
|
||||
if len(logits) == 0 {
|
||||
return -1, errors.New("sample: no logits provided to sample")
|
||||
}
|
||||
|
||||
tokens := make([]token, len(logits))
|
||||
for i := range logits {
|
||||
tokens[i].id = int32(i)
|
||||
tokens[i].value = logits[i]
|
||||
}
|
||||
|
||||
t, err := s.sample(tokens)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
if s.grammar != nil {
|
||||
// optimization: first check if the max logit is accepted by the grammar
|
||||
// if the max logit is rejected, apply the grammar to all logits (slower)
|
||||
top := []token{t}
|
||||
s.grammar.Apply(top)
|
||||
if !math.IsInf(float64(top[0].value), -1) {
|
||||
s.grammar.Accept(top[0].id)
|
||||
return top[0].id, nil
|
||||
}
|
||||
|
||||
// since .sample has side effects of modifying the tokens
|
||||
// we need to reset them before applying the grammar and
|
||||
// sampling again
|
||||
for i := range logits {
|
||||
tokens[i].id = int32(i)
|
||||
tokens[i].value = logits[i]
|
||||
}
|
||||
s.grammar.Apply(tokens)
|
||||
t, err = s.sample(tokens)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
s.grammar.Accept(t.id)
|
||||
}
|
||||
|
||||
return t.id, nil
|
||||
}
|
||||
|
||||
// greedy returns the highest probability token from the tokens
|
||||
func greedy(tokens []token) token {
|
||||
max := tokens[0]
|
||||
for i := 1; i < len(tokens); i++ {
|
||||
if tokens[i].value > max.value {
|
||||
max = tokens[i]
|
||||
}
|
||||
}
|
||||
|
||||
return max
|
||||
}
|
||||
|
||||
// sample returns the highest probability token from the tokens
|
||||
// given sampler parameters. It also has side effects of modifying the tokens
|
||||
func (s *Sampler) sample(tokens []token) (token, error) {
|
||||
if s.temperature == 0 {
|
||||
return greedy(tokens), nil
|
||||
}
|
||||
|
||||
// topK also sorts the tokens in descending order of logits
|
||||
tokens = topK(tokens, s.topK)
|
||||
|
||||
// scale and normalize the tokens in place
|
||||
temperature(tokens, s.temperature)
|
||||
softmax(tokens)
|
||||
|
||||
tokens = topP(tokens, s.topP)
|
||||
tokens = minP(tokens, s.minP)
|
||||
|
||||
var r float32
|
||||
if s.rng != nil {
|
||||
r = s.rng.Float32()
|
||||
} else {
|
||||
r = rand.Float32()
|
||||
}
|
||||
|
||||
// Calculate cumulative sum of probabilities
|
||||
var sum float32
|
||||
for i := range tokens {
|
||||
sum += tokens[i].value
|
||||
tokens[i].value = sum
|
||||
}
|
||||
r *= tokens[len(tokens)-1].value
|
||||
|
||||
idx, _ := slices.BinarySearchFunc(tokens, r, func(token token, target float32) int {
|
||||
if token.value < target {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
})
|
||||
|
||||
if math.IsNaN(float64(sum)) {
|
||||
return token{}, errors.New("sample: logits sum to NaN, check model output")
|
||||
}
|
||||
return tokens[idx], nil
|
||||
}
|
||||
|
||||
// TODO(parthsareen): update sampler interface to use json unmarshal https://github.com/ollama/ollama/issues/9278
|
||||
func NewSampler(temperature float32, topK int, topP float32, minP float32, seed int, grammar *GrammarSampler) Sampler {
|
||||
var rng *rand.Rand
|
||||
if seed != -1 {
|
||||
// PCG requires two parameters: sequence and stream
|
||||
// Use original seed for sequence
|
||||
sequence := uint64(seed)
|
||||
// Use golden ratio hash to generate statistically independent seeds
|
||||
rng = rand.New(rand.NewPCG(sequence, sequence^0x9E3779B9))
|
||||
}
|
||||
if temperature < 0.0 {
|
||||
temperature = 0.0
|
||||
}
|
||||
|
||||
if topP < 0.0 {
|
||||
topP = 0.0
|
||||
}
|
||||
if topP >= 1.0 {
|
||||
topP = 1.0
|
||||
}
|
||||
|
||||
if minP < 0.0 {
|
||||
minP = 0.0
|
||||
}
|
||||
if minP >= 1.0 {
|
||||
minP = 1.0
|
||||
}
|
||||
|
||||
return Sampler{
|
||||
rng: rng,
|
||||
topK: topK,
|
||||
topP: topP,
|
||||
minP: minP,
|
||||
temperature: temperature,
|
||||
grammar: grammar,
|
||||
}
|
||||
}
|
||||
|
||||
type GrammarSampler struct {
|
||||
grammar *llama.Grammar
|
||||
}
|
||||
|
||||
func NewGrammarSampler(tok tokenizer.Tokenizer, grammarStr string) (*GrammarSampler, error) {
|
||||
vocabIds := make([]uint32, len(tok.Vocabulary().Values))
|
||||
pieces := make([]string, len(tok.Vocabulary().Values))
|
||||
for i := range tok.Vocabulary().Values {
|
||||
pieces[i], _ = tok.Decode([]int32{int32(i)})
|
||||
vocabIds[i] = uint32(i)
|
||||
}
|
||||
|
||||
grammar := llama.NewGrammar(grammarStr, vocabIds, pieces, tok.Vocabulary().EOS)
|
||||
if grammar == nil {
|
||||
return nil, errors.New("sample: failed to initialize grammar")
|
||||
}
|
||||
|
||||
return &GrammarSampler{grammar: grammar}, nil
|
||||
}
|
||||
|
||||
func (g *GrammarSampler) Apply(tokens []token) {
|
||||
tds := make([]llama.TokenData, len(tokens))
|
||||
for i, token := range tokens {
|
||||
tds[i].ID = token.id
|
||||
tds[i].Logit = token.value
|
||||
}
|
||||
g.grammar.Apply(tds)
|
||||
|
||||
for i := range tokens {
|
||||
tokens[i].value = tds[i].Logit
|
||||
}
|
||||
}
|
||||
|
||||
func (g *GrammarSampler) Accept(token int32) {
|
||||
g.grammar.Accept(token)
|
||||
}
|
||||
|
||||
func (g *GrammarSampler) Free() {
|
||||
g.grammar.Free()
|
||||
}
|
||||
92
sample/samplers_benchmark_test.go
Normal file
92
sample/samplers_benchmark_test.go
Normal file
@@ -0,0 +1,92 @@
|
||||
package sample
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkWeightedSampler(b *testing.B) {
|
||||
sizes := []int{10, 100, 1000, 10000}
|
||||
|
||||
for _, size := range sizes {
|
||||
b.Run(fmt.Sprintf("Size %d", size), func(b *testing.B) {
|
||||
logits := make([]float32, size)
|
||||
for i := range logits {
|
||||
logits[i] = float32(rand.Float64()*10 - 5)
|
||||
}
|
||||
|
||||
sampler := NewSampler(0.8, 0, 0, 0, 42, nil)
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
sampler.Sample(logits)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
configs := []struct {
|
||||
name string
|
||||
temperature float32
|
||||
topK int
|
||||
topP float32
|
||||
minP float32
|
||||
seed int
|
||||
}{
|
||||
{"Greedy", 0, -1, 0, 0, -1},
|
||||
{"Temperature", 0.8, -1, 0, 0, -1},
|
||||
{"TopK", 0.8, 50, 0, 0, -1},
|
||||
{"TopP", 0.8, -1, 0.9, 0, -1},
|
||||
{"MinP", 0.8, -1, 0, 0.05, -1},
|
||||
{"WithSeed", 0.8, 50, 0, 0, 42},
|
||||
}
|
||||
|
||||
// Fixed size for common vocab size
|
||||
size := 128000
|
||||
logits := make([]float32, size)
|
||||
for i := range logits {
|
||||
logits[i] = float32(rand.Float64()*10 - 5)
|
||||
}
|
||||
|
||||
for _, tc := range configs {
|
||||
b.Run("Config"+tc.name, func(b *testing.B) {
|
||||
sampler := NewSampler(tc.temperature, tc.topK, tc.topP, tc.minP, tc.seed, nil)
|
||||
sampler.Sample(logits)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for b.Loop() {
|
||||
sampler.Sample(logits)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Test with combined transforms separately - topK influences performance greatly
|
||||
b.Run("TransformCombined", func(b *testing.B) {
|
||||
sampler := NewSampler(0.8, 50, 0.9, 0.05, 42, nil)
|
||||
b.ResetTimer()
|
||||
|
||||
for b.Loop() {
|
||||
sampler.Sample(logits)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkGreedySampler(b *testing.B) {
|
||||
sizes := []int{10, 100, 1000, 10000, 100000}
|
||||
|
||||
for _, size := range sizes {
|
||||
b.Run(fmt.Sprintf("Size %d", size), func(b *testing.B) {
|
||||
logits := make([]float32, size)
|
||||
for i := range logits {
|
||||
logits[i] = float32(rand.Float64()*10 - 5)
|
||||
}
|
||||
|
||||
sampler := NewSampler(0, -1, 0, 0, -1, nil)
|
||||
b.ResetTimer()
|
||||
|
||||
for b.Loop() {
|
||||
sampler.Sample(logits)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
174
sample/samplers_test.go
Normal file
174
sample/samplers_test.go
Normal file
@@ -0,0 +1,174 @@
|
||||
package sample
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"math"
|
||||
"math/rand/v2"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/tokenizer"
|
||||
)
|
||||
|
||||
func TestWeighted(t *testing.T) {
|
||||
logits := []float32{-10, 3, -10, -10}
|
||||
sampler := NewSampler(0, 0, 0, 0, 0, nil)
|
||||
got, err := sampler.Sample(logits)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
want := int32(1)
|
||||
if want != got {
|
||||
t.Errorf("index mismatch: want %d, got %d", want, got)
|
||||
}
|
||||
|
||||
logits = []float32{-100, -10, 0, 10}
|
||||
sampler = NewSampler(0, 0, 0, 0, 0, nil)
|
||||
got, err = sampler.Sample(logits)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
want = int32(3) // Should pick highest probability with this r value
|
||||
if want != got {
|
||||
t.Errorf("index mismatch: want %d, got %d", want, got)
|
||||
}
|
||||
|
||||
// Test very high p
|
||||
logits = []float32{1.0, 0.9999999999999999, 0.5, 0.1}
|
||||
// Use extremely small topP to filter out all tokens
|
||||
sampler = NewSampler(1.0, 0, 1e-10, 0, 0, nil)
|
||||
got, err = sampler.Sample(logits)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
// Should get the token with the highest logit
|
||||
want = int32(0)
|
||||
if want != got {
|
||||
t.Errorf("index mismatch: want %d, got %d", want, got)
|
||||
}
|
||||
|
||||
logits = []float32{float32(math.NaN()), float32(math.NaN()), float32(math.NaN())}
|
||||
sampler = NewSampler(1, 0, 0.95, 0.05, 0, nil)
|
||||
got, err = sampler.Sample(logits)
|
||||
if err == nil {
|
||||
t.Errorf("expected error, got %d", got)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func modelHelper(t testing.TB) tokenizer.Tokenizer {
|
||||
t.Helper()
|
||||
|
||||
f, err := os.Open(filepath.FromSlash("../tokenizer/testdata/llama3.2/encoder.json"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
vocab := make(map[string]int32)
|
||||
if err := json.NewDecoder(f).Decode(&vocab); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
tokens := make([]string, len(vocab))
|
||||
for token, id := range vocab {
|
||||
tokens[id] = token
|
||||
}
|
||||
|
||||
merges := make([]string, 0, 1)
|
||||
// Only need vocab for Grammar Test
|
||||
return tokenizer.NewBytePairEncoding(
|
||||
&tokenizer.Vocabulary{
|
||||
Values: tokens,
|
||||
Types: make([]int32, len(vocab)),
|
||||
Merges: merges,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
func TestGrammar(t *testing.T) {
|
||||
tokenizer := modelHelper(t)
|
||||
|
||||
grammarJSON := `
|
||||
root ::= object
|
||||
value ::= object | array | string | number | ("true" | "false" | "null") ws
|
||||
object ::=
|
||||
"{" ws (
|
||||
string ":" ws value
|
||||
("," ws string ":" ws value)*
|
||||
)? "}" ws
|
||||
array ::=
|
||||
"[" ws (
|
||||
value
|
||||
("," ws value)*
|
||||
)? "]" ws
|
||||
string ::=
|
||||
"\"" (
|
||||
[^"\\\x7F\x00-\x1F] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
||||
)* "\"" ws
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
||||
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
||||
ws ::= ([ \t\n] ws)?
|
||||
`
|
||||
grammar, err := NewGrammarSampler(tokenizer, grammarJSON)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer grammar.Free()
|
||||
|
||||
logits := make([]float32, len(tokenizer.Vocabulary().Values))
|
||||
for i := range logits {
|
||||
logits[i] = rand.Float32()
|
||||
}
|
||||
tokens := make([]token, len(logits))
|
||||
for i := range tokens {
|
||||
tokens[i].id = int32(i)
|
||||
tokens[i].value = logits[i]
|
||||
}
|
||||
|
||||
grammar.Apply(tokens)
|
||||
nonInfCount := 0
|
||||
infCount := 0
|
||||
for _, tok := range tokens {
|
||||
if math.IsInf(float64(tok.value), -1) {
|
||||
infCount++
|
||||
} else {
|
||||
nonInfCount++
|
||||
}
|
||||
}
|
||||
if nonInfCount == 0 {
|
||||
t.Error("expected at least one non -inf token after grammar application, got none")
|
||||
}
|
||||
if infCount == 0 {
|
||||
t.Error("expected some -inf tokens after grammar application, got none")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSample(b *testing.B) {
|
||||
samplers := map[string]Sampler{
|
||||
"Greedy": NewSampler(0, 0, 0, 0, 0, nil), // Use NewSampler with temp=0 for greedy
|
||||
"Weighted": NewSampler(0.5, 10, 0.9, 0.2, -1, nil),
|
||||
}
|
||||
|
||||
// Generate random logits for benchmarking
|
||||
logits := make([]float32, 1<<16)
|
||||
for i := range logits {
|
||||
logits[i] = rand.Float32()
|
||||
}
|
||||
|
||||
for name, s := range samplers {
|
||||
b.Run(name, func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
if _, err := s.Sample(logits); err != nil {
|
||||
b.Fatalf("error sampling: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
130
sample/transforms.go
Normal file
130
sample/transforms.go
Normal file
@@ -0,0 +1,130 @@
|
||||
package sample
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"math"
|
||||
"slices"
|
||||
)
|
||||
|
||||
// tokenHeap implements heap.Interface and holds tokens as a min-heap to track k largest elements
|
||||
type tokenHeap []token
|
||||
|
||||
func (h tokenHeap) Len() int { return len(h) }
|
||||
func (h tokenHeap) Less(i, j int) bool { return h[i].value < h[j].value }
|
||||
func (h tokenHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
||||
|
||||
func (h *tokenHeap) Push(x any) {
|
||||
*h = append(*h, x.(token))
|
||||
}
|
||||
|
||||
func (h *tokenHeap) Pop() any {
|
||||
old := *h
|
||||
n := len(old)
|
||||
x := old[n-1]
|
||||
*h = old[0 : n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
// temperature applies scaling to the logits
|
||||
func temperature(ts []token, temp float32) {
|
||||
// Ensure temperature clipping near 0 to avoid numerical instability
|
||||
temp = max(temp, 1e-7)
|
||||
for i := range ts {
|
||||
ts[i].value = ts[i].value / temp
|
||||
}
|
||||
}
|
||||
|
||||
// softmax applies normalization to the logits
|
||||
func softmax(ts []token) {
|
||||
// Find max logit for numerical stability
|
||||
maxLogit := float32(math.Inf(-1))
|
||||
for _, t := range ts {
|
||||
if t.value > maxLogit {
|
||||
maxLogit = t.value
|
||||
}
|
||||
}
|
||||
|
||||
// Compute exp(x - max)
|
||||
var sum float32
|
||||
for i, v := range ts {
|
||||
ts[i].value = float32(math.Exp(float64(v.value - maxLogit)))
|
||||
sum += ts[i].value
|
||||
}
|
||||
|
||||
// exp(x - max) / sum(exp(x - max))
|
||||
for i := range ts {
|
||||
ts[i].value /= sum
|
||||
}
|
||||
}
|
||||
|
||||
// topK limits the number of tokens considered to the k highest logits
|
||||
func topK(ts []token, k int) []token {
|
||||
if k >= len(ts) || k <= 0 {
|
||||
slices.SortFunc(ts, func(a, b token) int {
|
||||
switch {
|
||||
case a.value < b.value:
|
||||
return 1
|
||||
case a.value > b.value:
|
||||
return -1
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
})
|
||||
return ts
|
||||
}
|
||||
|
||||
// Initialize min-heap with first k elements
|
||||
h := make(tokenHeap, k)
|
||||
copy(h, ts[:k])
|
||||
heap.Init(&h)
|
||||
|
||||
// Process remaining elements
|
||||
for i := k; i < len(ts); i++ {
|
||||
if ts[i].value > h[0].value {
|
||||
heap.Pop(&h)
|
||||
heap.Push(&h, ts[i])
|
||||
}
|
||||
}
|
||||
|
||||
// Convert heap to sorted slice in descending order
|
||||
result := make([]token, len(h))
|
||||
for i := k - 1; i >= 0; i-- {
|
||||
result[i] = heap.Pop(&h).(token)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// topP limits tokens to those with cumulative probability p
|
||||
// requires ts to be sorted in descending order of probabilities
|
||||
func topP(ts []token, p float32) []token {
|
||||
if p == 1.0 {
|
||||
return ts
|
||||
}
|
||||
|
||||
// Find cutoff index where cumulative sum exceeds p
|
||||
var sum float32
|
||||
for i, t := range ts {
|
||||
sum += t.value
|
||||
if sum > float32(p) {
|
||||
return ts[:i+1]
|
||||
}
|
||||
}
|
||||
|
||||
return ts
|
||||
}
|
||||
|
||||
// minP filters tokens with probabilities >= p * max_prob
|
||||
// requires ts to be sorted in descending order of probabilities
|
||||
func minP(ts []token, p float32) []token {
|
||||
maxProb := ts[0].value
|
||||
|
||||
threshold := maxProb * p
|
||||
|
||||
for i, t := range ts {
|
||||
if t.value < threshold {
|
||||
return ts[:i]
|
||||
}
|
||||
}
|
||||
return ts
|
||||
}
|
||||
357
sample/transforms_test.go
Normal file
357
sample/transforms_test.go
Normal file
@@ -0,0 +1,357 @@
|
||||
package sample
|
||||
|
||||
import (
|
||||
"math"
|
||||
"math/rand/v2"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// Helper to convert float32 slice to logit slice
|
||||
func toTokens(values []float32) []token {
|
||||
tokens := make([]token, len(values))
|
||||
for i, v := range values {
|
||||
tokens[i] = token{
|
||||
id: int32(i),
|
||||
value: v,
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
// Helper to compare logit slices
|
||||
func compareLogits(t *testing.T, name string, want []float32, got []token) {
|
||||
t.Helper()
|
||||
if len(want) != len(got) {
|
||||
t.Errorf("%s: length mismatch: want %d, got %d", name, len(want), len(got))
|
||||
return
|
||||
}
|
||||
for i := range want {
|
||||
if math.Abs(float64(got[i].value-want[i])) > 1e-6 {
|
||||
t.Errorf("%s: index %d: want %f, got %f", name, i, want[i], got[i].value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTemperature(t *testing.T) {
|
||||
input := []float32{1.0, 4.0, -2.0, 0.0}
|
||||
tokens := toTokens(input)
|
||||
temperature(tokens, 0.5)
|
||||
want := []float32{2.0, 8.0, -4.0, 0.0}
|
||||
compareLogits(t, "temperature(0.5)", want, tokens)
|
||||
|
||||
input = []float32{1.0, 4.0, -2.0, 0.0}
|
||||
tokens = toTokens(input)
|
||||
temperature(tokens, 1.0)
|
||||
want = []float32{1.0, 4.0, -2.0, 0.0}
|
||||
compareLogits(t, "temperature(1)", want, tokens)
|
||||
|
||||
input = []float32{1.0, 4.0, -2.0, 0.0}
|
||||
tokens = toTokens(input)
|
||||
temperature(tokens, 0.0)
|
||||
want = []float32{1e7, 4e7, -2e7, 0.0}
|
||||
compareLogits(t, "temperature(0)", want, tokens)
|
||||
}
|
||||
|
||||
func TestSoftmax(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input []float32
|
||||
expected []float32
|
||||
}{
|
||||
{
|
||||
name: "correctness softmax",
|
||||
input: []float32{1, -2, 3, 0},
|
||||
expected: []float32{0.113550, 0.005653, 0.839024, 0.041773},
|
||||
},
|
||||
{
|
||||
name: "normal distribution",
|
||||
input: []float32{0.026986899, 0.043722924, 0.036774673, 0.27755088, 0.0046718004, 0.08582123, 0.20409796, 0.00412893, 0.15720603, 0.045046154, 0.0030491839, 0.01681367},
|
||||
},
|
||||
{
|
||||
name: "single value",
|
||||
input: []float32{1.0},
|
||||
},
|
||||
{
|
||||
name: "identical values",
|
||||
input: []float32{0.9, 0.9, 0.9},
|
||||
},
|
||||
{
|
||||
name: "large values",
|
||||
input: []float32{1000.0, 2000.0, 3000.0},
|
||||
},
|
||||
{
|
||||
name: "small values",
|
||||
input: []float32{1e-6, 2e-6, 3e-6},
|
||||
},
|
||||
{
|
||||
name: "negative values",
|
||||
input: []float32{-1.0, -2.0, -3.0},
|
||||
},
|
||||
{
|
||||
name: "mixed values",
|
||||
input: []float32{-100.0, 0.0, 100.0},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tokens := toTokens(tt.input)
|
||||
softmax(tokens)
|
||||
|
||||
if tt.expected != nil {
|
||||
compareLogits(t, tt.name, tt.expected, tokens)
|
||||
return
|
||||
}
|
||||
|
||||
// Check probabilities sum to 1
|
||||
var sum float32
|
||||
for _, token := range tokens {
|
||||
sum += token.value
|
||||
if token.value < 0 || token.value > 1 {
|
||||
t.Errorf("probability out of range [0,1]: got %f", token.value)
|
||||
}
|
||||
}
|
||||
if math.Abs(float64(sum-1.0)) > 1e-6 {
|
||||
t.Errorf("probabilities don't sum to 1: got %f", sum)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTopK(t *testing.T) {
|
||||
input := []float32{0.026986899, 0.043722924, 0.036774673, 0.27755088, 0.0046718004, 0.08582123, 0.20409796, 0.00412893, 0.15720603, 0.045046154, 0.0030491839, 0.01681367}
|
||||
tokens := toTokens(input)
|
||||
tokens = topK(tokens, 5)
|
||||
if len(tokens) != 5 {
|
||||
t.Errorf("topK(5): wrong length: want 5, got %d", len(tokens))
|
||||
}
|
||||
want := []float32{0.27755088, 0.20409796, 0.15720603, 0.08582123, 0.045046154}
|
||||
compareLogits(t, "topK(3)", want, tokens)
|
||||
|
||||
tokens = toTokens(input)
|
||||
tokens = topK(tokens, 20)
|
||||
if len(tokens) != len(input) {
|
||||
t.Errorf("topK(20): wrong length: want %d, got %d", len(input), len(tokens))
|
||||
}
|
||||
|
||||
input = []float32{0.026986899, 0.043722924, 0.036774673, 0.27755088, 0.0046718004, 0.08582123, 0.20409796, 0.00412893, 0.15720603, 0.045046154, 0.0030491839, 0.01681367}
|
||||
want = []float32{0.27755088, 0.20409796, 0.15720603, 0.08582123, 0.045046154, 0.043722924, 0.036774673, 0.026986899, 0.01681367, 0.0046718004, 0.00412893, 0.0030491839}
|
||||
tokens = toTokens(input)
|
||||
tokens = topK(tokens, -1)
|
||||
if len(tokens) != len(input) {
|
||||
t.Errorf("topK(-1): wrong length: want %d, got %d", len(input), len(tokens))
|
||||
}
|
||||
compareLogits(t, "topK(-1)", want, tokens)
|
||||
|
||||
input = []float32{0.026986899, 0.043722924, 0.036774673, 0.27755088, 0.0046718004, 0.08582123, 0.20409796, 0.00412893, 0.15720603, 0.045046154, 0.0030491839, 0.01681367}
|
||||
want = []float32{0.27755088, 0.20409796, 0.15720603, 0.08582123, 0.045046154, 0.043722924, 0.036774673, 0.026986899, 0.01681367, 0.0046718004, 0.00412893, 0.0030491839}
|
||||
tokens = toTokens(input)
|
||||
tokens = topK(tokens, 0)
|
||||
if len(tokens) != len(input) {
|
||||
t.Errorf("topK(-1): wrong length: want %d, got %d", len(input), len(tokens))
|
||||
}
|
||||
compareLogits(t, "topK(-1)", want, tokens)
|
||||
|
||||
input = []float32{-1e7, -2e7, -3e7, -4e7}
|
||||
tokens = toTokens(input)
|
||||
tokens = topK(tokens, 1)
|
||||
if len(tokens) < 1 {
|
||||
t.Error("topK should keep at least one token")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTopP(t *testing.T) {
|
||||
input := []float32{-3, -2, -1, 0, 1, 2, 4}
|
||||
tokens := toTokens(input)
|
||||
|
||||
// First apply temperature and softmax to get probabilities
|
||||
softmax(tokens)
|
||||
tokens = topK(tokens, 20)
|
||||
|
||||
// Test with very high p value
|
||||
got := topP(tokens, 1.0)
|
||||
|
||||
// Should keep all tokens since p is 1
|
||||
if len(got) != len(input) {
|
||||
t.Errorf("topP(1.0): should keep all tokens, got %d, want %d", len(got), len(input))
|
||||
}
|
||||
|
||||
// Test with normal p value
|
||||
got = topP(tokens, 0.95)
|
||||
|
||||
if len(got) > 3 {
|
||||
t.Errorf("topP(0.95): kept too many tokens: got %d", len(tokens))
|
||||
t.Logf("got: %v", got)
|
||||
}
|
||||
|
||||
// Test edge case - ensure at least one token remains
|
||||
input = []float32{-1e6, -1e6, -1e7}
|
||||
tokens = toTokens(input)
|
||||
tokens = topK(tokens, 20)
|
||||
softmax(tokens)
|
||||
got = topP(tokens, 0.0)
|
||||
if len(got) < 1 {
|
||||
t.Error("topP should keep at least one token")
|
||||
}
|
||||
|
||||
// Test with zero p value
|
||||
got = topP(tokens, 0.0)
|
||||
|
||||
// Should keep only the highest probability token
|
||||
if len(got) != 1 {
|
||||
t.Errorf("topP(0.0): should keep only one token, got %d", len(got))
|
||||
t.Logf("got: %v", got)
|
||||
}
|
||||
|
||||
tokens = toTokens(input)
|
||||
tokens = topK(tokens, 20)
|
||||
softmax(tokens)
|
||||
got = topP(tokens, 1e-10)
|
||||
if len(got) == 0 {
|
||||
t.Errorf("topP(1e-10): should keep at least one token, got %d", len(got))
|
||||
t.Logf("got: %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMinP(t *testing.T) {
|
||||
input := []float32{-2, 0, -1, -3, 2, 1, 4, 3}
|
||||
tokens := toTokens(input)
|
||||
|
||||
// First apply temperature and softmax
|
||||
tokens = topK(tokens, 20)
|
||||
softmax(tokens)
|
||||
|
||||
tokens = minP(tokens, 1.0)
|
||||
|
||||
if len(tokens) != 1 {
|
||||
t.Errorf("minP(1.0): should keep all tokens, got %d, want %d", len(tokens), len(tokens))
|
||||
}
|
||||
|
||||
// Test with normal p value
|
||||
tokens = toTokens(input) // Reset tokens
|
||||
tokens = topK(tokens, 20)
|
||||
softmax(tokens)
|
||||
tokens = minP(tokens, 0.2)
|
||||
|
||||
// Should keep tokens with prob >= 0.2 * max_prob
|
||||
if len(tokens) > 3 {
|
||||
t.Errorf("minP(0.2): kept too many tokens: got %d", len(tokens))
|
||||
t.Logf("got: %v", tokens)
|
||||
}
|
||||
|
||||
// Test with zero p value
|
||||
tokens = toTokens(input) // Reset tokens
|
||||
tokens = topK(tokens, 20)
|
||||
softmax(tokens)
|
||||
tokens = minP(tokens, 0.0)
|
||||
|
||||
// Should keep only the highest probability token
|
||||
if len(tokens) != len(input) {
|
||||
t.Errorf("minP(0.0): should keep only one token, got %d", len(tokens))
|
||||
t.Logf("got: %v", tokens)
|
||||
}
|
||||
|
||||
// Test with single token
|
||||
tokens = toTokens(input[:1])
|
||||
tokens = topK(tokens, 20)
|
||||
softmax(tokens)
|
||||
tokens = minP(tokens, 0.1)
|
||||
|
||||
// Should keep only the highest probability token
|
||||
if len(tokens) != 1 {
|
||||
t.Errorf("minP(0.1): should return single token, got %d", len(tokens))
|
||||
t.Logf("got: %v", tokens)
|
||||
}
|
||||
|
||||
input = []float32{1e-10, 1e-10, 1e-10}
|
||||
tokens = toTokens(input)
|
||||
softmax(tokens)
|
||||
tokens = minP(tokens, 1.0)
|
||||
if len(tokens) < 1 {
|
||||
t.Error("minP should keep at least one token even with extreme probabilities")
|
||||
got := minP(tokens, 1.0)
|
||||
|
||||
if len(got) != 1 {
|
||||
t.Errorf("minP(1.0): should keep all tokens, got %d, want %d", len(got), len(tokens))
|
||||
}
|
||||
|
||||
// Test with normal p value
|
||||
got = minP(tokens, 0.2)
|
||||
|
||||
// Should keep tokens with prob >= 0.2 * max_prob
|
||||
if len(got) > 3 {
|
||||
t.Errorf("minP(0.2): kept too many tokens: got %d", len(got))
|
||||
t.Logf("got: %v", got)
|
||||
}
|
||||
|
||||
// Test with zero p value
|
||||
got = minP(tokens, 0.0)
|
||||
|
||||
// Should keep only the highest probability token
|
||||
if len(got) != len(tokens) {
|
||||
t.Errorf("minP(0.0): should keep only one token, got %d", len(got))
|
||||
t.Logf("got: %v", got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTransforms(b *testing.B) {
|
||||
// Generate random logits
|
||||
tokens := make([]token, 1<<16)
|
||||
for i := range tokens {
|
||||
tokens[i] = token{
|
||||
id: int32(i),
|
||||
value: rand.Float32(),
|
||||
}
|
||||
}
|
||||
|
||||
tokensCopy := make([]token, len(tokens))
|
||||
|
||||
b.Run("Temperature", func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
copy(tokensCopy, tokens)
|
||||
temperature(tokensCopy, 0.5)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("Softmax", func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
copy(tokensCopy, tokens)
|
||||
softmax(tokensCopy)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("TopK", func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
copy(tokensCopy, tokens)
|
||||
tokens = topK(tokensCopy, 10)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("TopP", func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
copy(tokensCopy, tokens)
|
||||
tokens = topP(tokensCopy, 0.9)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("MinP", func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
copy(tokensCopy, tokens)
|
||||
tokens = minP(tokensCopy, 0.2)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("SortTokens", func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
copy(tokensCopy, tokens)
|
||||
tokens = topK(tokensCopy, 200000)
|
||||
}
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user