ollama source for Momentry Core verification
This commit is contained in:
20
fs/config.go
Normal file
20
fs/config.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package fs
|
||||
|
||||
import "iter"
|
||||
|
||||
type Config interface {
|
||||
Architecture() string
|
||||
String(string, ...string) string
|
||||
Uint(string, ...uint32) uint32
|
||||
Float(string, ...float32) float32
|
||||
Bool(string, ...bool) bool
|
||||
|
||||
Strings(string, ...[]string) []string
|
||||
Ints(string, ...[]int32) []int32
|
||||
Floats(string, ...[]float32) []float32
|
||||
Bools(string, ...[]bool) []bool
|
||||
|
||||
Len() int
|
||||
Keys() iter.Seq[string]
|
||||
Value(key string) any
|
||||
}
|
||||
923
fs/ggml/ggml.go
Normal file
923
fs/ggml/ggml.go
Normal file
@@ -0,0 +1,923 @@
|
||||
package ggml
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"iter"
|
||||
"log/slog"
|
||||
"maps"
|
||||
"math"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/fs/util/bufioutil"
|
||||
"github.com/ollama/ollama/logutil"
|
||||
"github.com/ollama/ollama/ml"
|
||||
)
|
||||
|
||||
type GGML struct {
|
||||
container
|
||||
model
|
||||
Length int64
|
||||
}
|
||||
|
||||
type model interface {
|
||||
KV() KV
|
||||
Tensors() Tensors
|
||||
}
|
||||
|
||||
type KV map[string]any
|
||||
|
||||
func (kv KV) Architecture() string {
|
||||
return kv.String("general.architecture", "unknown")
|
||||
}
|
||||
|
||||
func (kv KV) Kind() string {
|
||||
return kv.String("general.type", "unknown")
|
||||
}
|
||||
|
||||
func (kv KV) ParameterCount() uint64 {
|
||||
val, _ := keyValue(kv, "general.parameter_count", uint64(0))
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) FileType() FileType {
|
||||
if t := kv.Uint("general.file_type"); t > 0 {
|
||||
return FileType(t)
|
||||
}
|
||||
|
||||
return FileTypeUnknown
|
||||
}
|
||||
|
||||
func (kv KV) BlockCount() uint64 {
|
||||
return uint64(kv.Uint("block_count"))
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingLength() uint64 {
|
||||
return uint64(kv.Uint("embedding_length"))
|
||||
}
|
||||
|
||||
func (kv KV) HeadCount() []uint64 {
|
||||
headCountDefault := uint32(1)
|
||||
headCount := kv.UintOrArrayValueAsArray("attention.head_count", headCountDefault)
|
||||
if len(headCount) == 1 {
|
||||
headCountDefault = headCount[0]
|
||||
}
|
||||
nLayers := int(kv.BlockCount())
|
||||
if len(headCount) > nLayers {
|
||||
slog.Warn("got more elements of attention.head_count than layers", "len(headCount)", len(headCount), "layers", nLayers)
|
||||
}
|
||||
out := make([]uint64, nLayers)
|
||||
for i := range nLayers {
|
||||
if i >= len(headCount) {
|
||||
out[i] = uint64(headCountDefault)
|
||||
} else {
|
||||
out[i] = uint64(headCount[i])
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (kv KV) HeadCountMax() uint64 {
|
||||
return uint64(kv.UintOrMaxArrayValue("attention.head_count", 1))
|
||||
}
|
||||
|
||||
func (kv KV) HeadCountMin() uint64 {
|
||||
return uint64(kv.UintOrMinArrayValue("attention.head_count", 1))
|
||||
}
|
||||
|
||||
func (kv KV) HeadCountKV() []uint64 {
|
||||
headCountKVDefault := uint32(1)
|
||||
headCountKV := kv.UintOrArrayValueAsArray("attention.head_count_kv", headCountKVDefault)
|
||||
if len(headCountKV) == 1 {
|
||||
headCountKVDefault = headCountKV[0]
|
||||
}
|
||||
nLayers := int(kv.BlockCount())
|
||||
if len(headCountKV) > nLayers {
|
||||
slog.Warn("got more elements of attention.head_count than layers", "len(headCountKV)", len(headCountKV), "layers", nLayers)
|
||||
}
|
||||
out := make([]uint64, nLayers)
|
||||
for i := range nLayers {
|
||||
if i >= len(headCountKV) {
|
||||
out[i] = uint64(headCountKVDefault)
|
||||
} else {
|
||||
out[i] = uint64(headCountKV[i])
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (kv KV) HeadCountKVMax() uint64 {
|
||||
return uint64(kv.UintOrMaxArrayValue("attention.head_count_kv", 1))
|
||||
}
|
||||
|
||||
func (kv KV) HeadCountKVMin() uint64 {
|
||||
return uint64(kv.UintOrMinArrayValue("attention.head_count_kv", 1))
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCountMax() uint64 {
|
||||
if heads := kv.HeadCountMin(); heads > 0 {
|
||||
return kv.EmbeddingLength() / heads
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCountK() uint64 {
|
||||
return uint64(kv.Uint("attention.key_length", uint32(kv.EmbeddingHeadCountMax())))
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCountV() uint64 {
|
||||
return uint64(kv.Uint("attention.value_length", uint32(kv.EmbeddingHeadCountMax())))
|
||||
}
|
||||
|
||||
func (kv KV) ContextLength() uint64 {
|
||||
return uint64(kv.Uint("context_length"))
|
||||
}
|
||||
|
||||
func (kv KV) ChatTemplate() string {
|
||||
return kv.String("tokenizer.chat_template")
|
||||
}
|
||||
|
||||
// ssm architecture parameters
|
||||
|
||||
func (kv KV) SSMConvKernel() uint64 {
|
||||
return uint64(kv.Uint("ssm.conv_kernel"))
|
||||
}
|
||||
|
||||
func (kv KV) SSMInnerSize() uint64 {
|
||||
return uint64(kv.Uint("ssm.inner_size"))
|
||||
}
|
||||
|
||||
func (kv KV) SSMStateSize() uint64 {
|
||||
return uint64(kv.Uint("ssm.state_size"))
|
||||
}
|
||||
|
||||
func (kv KV) SSMGroupCount() uint64 {
|
||||
return uint64(kv.Uint("ssm.group_count"))
|
||||
}
|
||||
|
||||
func (kv KV) FFNLength() []uint64 {
|
||||
ffnLengthDefault := uint32(0)
|
||||
ffnLength := kv.UintOrArrayValueAsArray("feed_forward_length", ffnLengthDefault)
|
||||
if len(ffnLength) == 1 {
|
||||
ffnLengthDefault = ffnLength[0]
|
||||
}
|
||||
nLayers := int(kv.BlockCount())
|
||||
if len(ffnLength) > nLayers {
|
||||
slog.Warn("got more elements of feed_forward_length than layers", "len(ffnLength)", len(ffnLength), "layers", nLayers)
|
||||
}
|
||||
out := make([]uint64, nLayers)
|
||||
for i := range nLayers {
|
||||
if i >= len(ffnLength) {
|
||||
out[i] = uint64(ffnLengthDefault)
|
||||
} else {
|
||||
out[i] = uint64(ffnLength[i])
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// general types
|
||||
|
||||
func (kv KV) String(key string, defaultValue ...string) string {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, "")...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) Uint(key string, defaultValue ...uint32) uint32 {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, 0)...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) Float(key string, defaultValue ...float32) float32 {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, 0)...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) Bool(key string, defaultValue ...bool) bool {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, false)...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) UintOrMaxArrayValue(key string, defaultValue uint32) uint32 {
|
||||
_, max := kv.UintOrArrayValue(key, defaultValue)
|
||||
return max
|
||||
}
|
||||
|
||||
func (kv KV) UintOrMinArrayValue(key string, defaultValue uint32) uint32 {
|
||||
min, _ := kv.UintOrArrayValue(key, defaultValue)
|
||||
return min
|
||||
}
|
||||
|
||||
func (kv KV) UintOrArrayValue(key string, defaultValue uint32) (uint32, uint32) {
|
||||
arrVal := kv.UintOrArrayValueAsArray(key, defaultValue)
|
||||
return slices.Min(arrVal), slices.Max(arrVal)
|
||||
}
|
||||
|
||||
func (kv KV) UintOrArrayValueAsArray(key string, defaultValue uint32) []uint32 {
|
||||
if u32, ok := keyValue(kv, key, uint32(0)); ok {
|
||||
return []uint32{u32}
|
||||
} else if u32s, ok := keyValue(kv, key, &array[uint32]{}); ok {
|
||||
return u32s.values
|
||||
} else if i32s, ok := keyValue(kv, key, &array[int32]{}); ok {
|
||||
dst := make([]uint32, len(i32s.values))
|
||||
for i, v := range i32s.values {
|
||||
if v < 0 {
|
||||
slog.Warn("array values are unexpectedly negative", "key", key, "i", i, "v", v)
|
||||
}
|
||||
dst[i] = uint32(v)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
return []uint32{defaultValue}
|
||||
}
|
||||
|
||||
func (kv KV) Strings(key string, defaultValue ...[]string) []string {
|
||||
val, _ := keyValue(kv, key, &array[string]{values: append(defaultValue, []string(nil))[0]})
|
||||
return val.values
|
||||
}
|
||||
|
||||
func (kv KV) Ints(key string, defaultValue ...[]int32) []int32 {
|
||||
val, _ := keyValue(kv, key, &array[int32]{values: append(defaultValue, []int32(nil))[0]})
|
||||
return val.values
|
||||
}
|
||||
|
||||
func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32 {
|
||||
val, _ := keyValue(kv, key, &array[uint32]{values: append(defaultValue, []uint32(nil))[0]})
|
||||
return val.values
|
||||
}
|
||||
|
||||
func (kv KV) Floats(key string, defaultValue ...[]float32) []float32 {
|
||||
val, _ := keyValue(kv, key, &array[float32]{values: append(defaultValue, []float32(nil))[0]})
|
||||
return val.values
|
||||
}
|
||||
|
||||
func (kv KV) Bools(key string, defaultValue ...[]bool) []bool {
|
||||
val, _ := keyValue(kv, key, &array[bool]{values: append(defaultValue, []bool(nil))[0]})
|
||||
return val.values
|
||||
}
|
||||
|
||||
func (kv KV) Len() int {
|
||||
return len(kv)
|
||||
}
|
||||
|
||||
func (kv KV) Keys() iter.Seq[string] {
|
||||
return maps.Keys(kv)
|
||||
}
|
||||
|
||||
func (kv KV) Value(key string) any {
|
||||
return kv[key]
|
||||
}
|
||||
|
||||
func (kv KV) OllamaEngineRequired() bool {
|
||||
return slices.Contains([]string{
|
||||
"bert",
|
||||
"deepseek2",
|
||||
"deepseekocr",
|
||||
"gemma3",
|
||||
"gemma3n",
|
||||
"gemma4",
|
||||
"gptoss", "gpt-oss",
|
||||
"laguna",
|
||||
"llama4",
|
||||
"mistral3",
|
||||
"mllama",
|
||||
"nemotron_h", "nemotron_h_moe", "nemotron_h_omni",
|
||||
"nomic-bert",
|
||||
"olmo3",
|
||||
"qwen25vl",
|
||||
"qwen3", "qwen3moe",
|
||||
"qwen35", "qwen35moe",
|
||||
"qwen3next",
|
||||
"qwen3vl", "qwen3vlmoe",
|
||||
"glm4moelite",
|
||||
"glmocr",
|
||||
"lfm2",
|
||||
"lfm2moe",
|
||||
}, kv.Architecture())
|
||||
}
|
||||
|
||||
type valueTypes interface {
|
||||
uint8 | int8 | uint16 | int16 |
|
||||
uint32 | int32 | uint64 | int64 |
|
||||
string | float32 | float64 | bool
|
||||
}
|
||||
|
||||
type arrayValueTypes interface {
|
||||
*array[uint8] | *array[int8] | *array[uint16] | *array[int16] |
|
||||
*array[uint32] | *array[int32] | *array[uint64] | *array[int64] |
|
||||
*array[string] | *array[float32] | *array[float64] | *array[bool]
|
||||
}
|
||||
|
||||
func keyValue[T valueTypes | arrayValueTypes](kv KV, key string, defaultValue ...T) (T, bool) {
|
||||
if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") {
|
||||
key = kv.Architecture() + "." + key
|
||||
}
|
||||
|
||||
if val, ok := kv[key].(T); ok {
|
||||
return val, true
|
||||
}
|
||||
|
||||
logutil.Trace("key with type not found", "key", key, "default", defaultValue[0])
|
||||
return defaultValue[0], false
|
||||
}
|
||||
|
||||
type Tensors struct {
|
||||
items []*Tensor
|
||||
Offset uint64
|
||||
}
|
||||
|
||||
func (s Tensors) Items(prefix ...string) []*Tensor {
|
||||
if len(prefix) == 0 {
|
||||
return s.items
|
||||
}
|
||||
|
||||
var items []*Tensor
|
||||
for _, t := range s.items {
|
||||
if strings.HasPrefix(t.Name, prefix[0]) {
|
||||
items = append(items, t)
|
||||
}
|
||||
}
|
||||
|
||||
return items
|
||||
}
|
||||
|
||||
func (ts Tensors) GroupLayers() map[string]Layer {
|
||||
layers := make(map[string]Layer)
|
||||
for _, t := range ts.items {
|
||||
parts := strings.Split(t.Name, ".")
|
||||
if index := slices.IndexFunc(parts, func(s string) bool { return s == "blk" || s == "mm" }); index != -1 {
|
||||
if len(parts) > index+2 {
|
||||
// blk and mm should have a number after them, join it
|
||||
parts = append(
|
||||
[]string{strings.Join(parts[:index+2], ".")},
|
||||
parts[index+2:]...)
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := layers[parts[0]]; !ok {
|
||||
layers[parts[0]] = make(Layer)
|
||||
}
|
||||
|
||||
layers[parts[0]][strings.Join(parts[1:], ".")] = t
|
||||
}
|
||||
|
||||
return layers
|
||||
}
|
||||
|
||||
type Layer map[string]*Tensor
|
||||
|
||||
func (l Layer) Size() (size uint64) {
|
||||
for _, t := range l {
|
||||
size += t.Size()
|
||||
}
|
||||
|
||||
return size
|
||||
}
|
||||
|
||||
type Tensor struct {
|
||||
Name string `json:"name"`
|
||||
Kind uint32 `json:"kind"`
|
||||
Offset uint64 `json:"-"`
|
||||
|
||||
// Shape is the number of elements in each dimension
|
||||
Shape []uint64 `json:"shape"`
|
||||
|
||||
io.WriterTo `json:"-"`
|
||||
}
|
||||
|
||||
func (t Tensor) block() (n int) {
|
||||
if _, err := fmt.Sscanf(t.Name, "blk.%d.", &n); err != nil {
|
||||
return math.MaxInt
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (t Tensor) blockSize() uint64 {
|
||||
return TensorType(t.Kind).BlockSize()
|
||||
}
|
||||
|
||||
func (t TensorType) BlockSize() uint64 {
|
||||
switch t {
|
||||
case
|
||||
TensorTypeF32,
|
||||
TensorTypeF16,
|
||||
TensorTypeI8,
|
||||
TensorTypeI16,
|
||||
TensorTypeI32,
|
||||
TensorTypeI64,
|
||||
TensorTypeF64,
|
||||
TensorTypeBF16:
|
||||
return 1
|
||||
case
|
||||
TensorTypeQ4_0,
|
||||
TensorTypeQ4_1,
|
||||
TensorTypeQ5_0,
|
||||
TensorTypeQ5_1,
|
||||
TensorTypeQ8_0,
|
||||
TensorTypeQ8_1,
|
||||
tensorTypeIQ4_NL,
|
||||
4, TensorTypeMXFP4:
|
||||
return 32
|
||||
default:
|
||||
return 256
|
||||
}
|
||||
}
|
||||
|
||||
func (t Tensor) typeSize() uint64 {
|
||||
return TensorType(t.Kind).TypeSize()
|
||||
}
|
||||
|
||||
func (t TensorType) TypeSize() uint64 {
|
||||
blockSize := t.BlockSize()
|
||||
|
||||
switch t {
|
||||
case TensorTypeF32:
|
||||
return 4
|
||||
case TensorTypeF16:
|
||||
return 2
|
||||
case TensorTypeQ4_0:
|
||||
return 2 + blockSize/2
|
||||
case TensorTypeQ4_1:
|
||||
return 2 + 2 + blockSize/2
|
||||
case TensorTypeQ5_0:
|
||||
return 2 + 4 + blockSize/2
|
||||
case TensorTypeQ5_1:
|
||||
return 2 + 2 + 4 + blockSize/2
|
||||
case TensorTypeQ8_0:
|
||||
return 2 + blockSize
|
||||
case TensorTypeQ8_1:
|
||||
return 2 + 2 + blockSize
|
||||
case TensorTypeQ2_K:
|
||||
return blockSize/16 + blockSize/4 + 2 + 2
|
||||
case TensorTypeQ3_K:
|
||||
return blockSize/8 + blockSize/4 + 12 + 2
|
||||
case TensorTypeQ4_K:
|
||||
return 2 + 2 + 12 + blockSize/2
|
||||
case TensorTypeQ5_K:
|
||||
return 2 + 2 + 12 + blockSize/8 + blockSize/2
|
||||
case TensorTypeQ6_K:
|
||||
return blockSize/2 + blockSize/4 + blockSize/16 + 2
|
||||
case TensorTypeQ8_K:
|
||||
return 4 + blockSize + 2*blockSize/16
|
||||
case tensorTypeIQ2_XXS:
|
||||
return 2 + 2*blockSize/8
|
||||
case tensorTypeIQ2_XS:
|
||||
return 2 + 2*blockSize/8 + blockSize/32
|
||||
case tensorTypeIQ3_XXS:
|
||||
return 2 + blockSize/4 + blockSize/8
|
||||
case tensorTypeIQ1_S:
|
||||
return 2 + blockSize/8 + blockSize/16
|
||||
case tensorTypeIQ4_NL:
|
||||
return 2 + blockSize/2
|
||||
case tensorTypeIQ3_S:
|
||||
return 2 + blockSize/4 + blockSize/8 + blockSize/32 + 4
|
||||
case tensorTypeIQ2_S:
|
||||
return 2 + blockSize/4 + blockSize/16
|
||||
case tensorTypeIQ4_XS:
|
||||
return 2 + 2 + blockSize/2 + blockSize/64
|
||||
case TensorTypeI8:
|
||||
return 1
|
||||
case TensorTypeI16:
|
||||
return 2
|
||||
case TensorTypeI32:
|
||||
return 4
|
||||
case TensorTypeI64:
|
||||
return 8
|
||||
case TensorTypeF64:
|
||||
return 8
|
||||
case tensorTypeIQ1_M:
|
||||
return blockSize/8 + blockSize/16 + blockSize/32
|
||||
case TensorTypeBF16:
|
||||
return 2
|
||||
case 4, TensorTypeMXFP4:
|
||||
return 1 + blockSize/2
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func (t Tensor) Elements() uint64 {
|
||||
var count uint64 = 1
|
||||
for _, n := range t.Shape {
|
||||
count *= n
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func (t Tensor) Size() uint64 {
|
||||
return t.Elements() * t.typeSize() / t.blockSize()
|
||||
}
|
||||
|
||||
func (t Tensor) Type() string {
|
||||
return TensorType(t.Kind).String()
|
||||
}
|
||||
|
||||
type container interface {
|
||||
Name() string
|
||||
Decode(io.ReadSeeker) (model, error)
|
||||
}
|
||||
|
||||
const (
|
||||
// Magic constant for `ggml` files (unversioned).
|
||||
FILE_MAGIC_GGML = 0x67676d6c
|
||||
// Magic constant for `ggml` files (versioned, ggmf).
|
||||
FILE_MAGIC_GGMF = 0x67676d66
|
||||
// Magic constant for `ggml` files (versioned, ggjt).
|
||||
FILE_MAGIC_GGJT = 0x67676a74
|
||||
// Magic constant for `ggla` files (LoRA adapter).
|
||||
FILE_MAGIC_GGLA = 0x67676C61
|
||||
// Magic constant for `gguf` files (versioned, gguf)
|
||||
FILE_MAGIC_GGUF_LE = 0x46554747
|
||||
FILE_MAGIC_GGUF_BE = 0x47475546
|
||||
)
|
||||
|
||||
var ErrUnsupportedFormat = errors.New("unsupported model format")
|
||||
|
||||
func DetectContentType(b []byte) string {
|
||||
switch binary.LittleEndian.Uint32(b[:4]) {
|
||||
case FILE_MAGIC_GGML:
|
||||
return "ggml"
|
||||
case FILE_MAGIC_GGMF:
|
||||
return "ggmf"
|
||||
case FILE_MAGIC_GGJT:
|
||||
return "ggjt"
|
||||
case FILE_MAGIC_GGLA:
|
||||
return "ggla"
|
||||
case FILE_MAGIC_GGUF_LE, FILE_MAGIC_GGUF_BE:
|
||||
return "gguf"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// Decode decodes a GGML model from the given reader.
|
||||
//
|
||||
// It collects array values for arrays with a size less than or equal to
|
||||
// maxArraySize. If the maxArraySize is negative, all arrays are collected.
|
||||
func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, error) {
|
||||
rs = bufioutil.NewBufferedSeeker(rs, 32<<10)
|
||||
|
||||
var magic uint32
|
||||
if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var c container
|
||||
switch magic {
|
||||
case FILE_MAGIC_GGUF_LE:
|
||||
c = &containerGGUF{ByteOrder: binary.LittleEndian, maxArraySize: maxArraySize}
|
||||
case FILE_MAGIC_GGUF_BE:
|
||||
c = &containerGGUF{ByteOrder: binary.BigEndian, maxArraySize: maxArraySize}
|
||||
default:
|
||||
return nil, errors.New("invalid file magic")
|
||||
}
|
||||
|
||||
model, err := c.Decode(rs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// final model type
|
||||
return &GGML{
|
||||
container: c,
|
||||
model: model,
|
||||
Length: offset,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType string, useFlashAttention ml.FlashAttentionType) (kv []uint64, partialOffload, fullOffload uint64) {
|
||||
context *= uint64(numParallel)
|
||||
|
||||
embedding := f.KV().EmbeddingLength()
|
||||
heads := f.KV().HeadCountMax()
|
||||
headsArr := f.KV().HeadCount()
|
||||
headsKV := f.KV().HeadCountKVMax()
|
||||
headsKVArr := f.KV().HeadCountKV()
|
||||
vocab := uint64(f.KV()["tokenizer.ggml.tokens"].(*array[string]).size)
|
||||
|
||||
embeddingHeads := f.KV().EmbeddingHeadCountMax()
|
||||
embeddingHeadsK := f.KV().EmbeddingHeadCountK()
|
||||
embeddingHeadsV := f.KV().EmbeddingHeadCountV()
|
||||
|
||||
layers := f.Tensors().GroupLayers()
|
||||
|
||||
bytesPerElement := kvCacheBytesPerElement(kvCacheType)
|
||||
|
||||
// Default for models unless special-cased below. These defaults mirror the
|
||||
// cache usage in llama.cpp under the assumption that models without special
|
||||
// cases below will use the llamarunner and caching will be handled by the
|
||||
// llama.cpp layer.
|
||||
//
|
||||
// This also assumes that a layer without heads or headsKV set is recurrent
|
||||
// which is usually the case. Some models (eg nemotronh) use "blocks" in
|
||||
// place of layers where some are MLP blocks that don't have any cache.
|
||||
// Models like this will need a special case below to be accurately
|
||||
// estimated.
|
||||
var kvTotal uint64
|
||||
kv = make([]uint64, f.KV().BlockCount())
|
||||
kvSizeAttn := uint64(0)
|
||||
kvSizeRecurrent := uint64(0)
|
||||
for i := range kv {
|
||||
headsL := headsArr[i]
|
||||
headsKVL := headsKVArr[i]
|
||||
if headsL > 0 && headsKVL > 0 {
|
||||
// full attention layer
|
||||
// NOTE: Assumes uniform values for all attn layers
|
||||
kv[i] = uint64(float64(context*(embeddingHeadsK+embeddingHeadsV)*headsKVL) * bytesPerElement)
|
||||
kvSizeAttn += kv[i]
|
||||
} else {
|
||||
// recurrent layer
|
||||
ssmDConv := f.KV().SSMConvKernel()
|
||||
ssmDState := f.KV().SSMStateSize()
|
||||
ssmDInner := f.KV().SSMInnerSize()
|
||||
ssmNGroups := f.KV().SSMGroupCount()
|
||||
nEmbdR := uint64(0)
|
||||
if ssmDConv > 0 {
|
||||
nEmbdR = (ssmDConv - 1) * (ssmDInner + 2*ssmNGroups*ssmDState)
|
||||
}
|
||||
nEmbdS := ssmDState * ssmDInner
|
||||
|
||||
// recurrent always uses F32 in llama.cpp backend
|
||||
// https://github.com/ggml-org/llama.cpp/blob/master/src/llama-model.cpp#L18644
|
||||
bytesPerElementRecurrent := kvCacheBytesPerElement("f32")
|
||||
|
||||
kv[i] = (nEmbdR + nEmbdS) * uint64(bytesPerElementRecurrent)
|
||||
kvSizeRecurrent += kv[i]
|
||||
}
|
||||
kvTotal += kv[i]
|
||||
}
|
||||
slog.Debug("default cache size estimate", "attention MiB", float32(kvSizeAttn)/(1024.*1024.), "attention bytes", kvSizeAttn, "recurrent MiB", float32(kvSizeRecurrent)/(1024.*1024.), "recurrent bytes", kvSizeRecurrent)
|
||||
|
||||
switch f.KV().Architecture() {
|
||||
case "llama", "llama4":
|
||||
fullOffload = max(
|
||||
4*batch*(1+4*embedding+context*(1+heads)),
|
||||
4*batch*(embedding+vocab),
|
||||
)
|
||||
|
||||
partialOffload = 4 * batch * embedding
|
||||
partialOffload += max(
|
||||
4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embeddingHeads*headsKV),
|
||||
4*batch*(embedding+vocab)+embedding*vocab*105/128,
|
||||
)
|
||||
|
||||
if ffnGateExpsWeight, ok := layers["blk.0"]["ffn_gate_exps.weight"]; ok {
|
||||
// mixtral 8x22b
|
||||
ff := uint64(f.KV().Uint("feed_forward_length"))
|
||||
partialOffload = max(
|
||||
3*ffnGateExpsWeight.Size()+4*batch*(2*ff+headsKV+embedding+context+embeddingHeads*headsKV),
|
||||
4*(context*batch*heads+context*embeddingHeads*headsKV+batch*1024+embeddingHeads*headsKV*batch),
|
||||
)
|
||||
} else if ffnGateWeight, ok := layers["blk.0"]["ffn_gate.0.weight"]; ok {
|
||||
// mixtral 8x7b
|
||||
ffnGateWeight1 := ffnGateWeight.Shape[1]
|
||||
fullOffload = 4 * batch * (2 + 3*embedding + context*(1+heads) + 2*headsKV + ffnGateWeight1)
|
||||
partialOffload = max(
|
||||
4*batch*(3+embeddingHeads*headsKV+embedding+context*(1+heads)+ffnGateWeight1)+(embedding*embedding+3*embedding*headsKV*ffnGateWeight1)*9/16,
|
||||
4*batch*(1+2*embedding+context*(1+heads))+embedding*(6*context*headsKV/heads+embedding*9/16),
|
||||
)
|
||||
}
|
||||
case "mllama":
|
||||
var visionTokens, tiles uint64 = 1601, 4
|
||||
|
||||
crossAttentionLayers := f.KV().Ints("attention.cross_attention_layers")
|
||||
for i := range kv {
|
||||
if slices.Contains(crossAttentionLayers, int32(i)) {
|
||||
kv[i] = headsKV * (embeddingHeadsK + embeddingHeadsV) *
|
||||
4 * // sizeof(float32)
|
||||
visionTokens *
|
||||
tiles
|
||||
}
|
||||
}
|
||||
|
||||
fullOffload = max(
|
||||
4*batch*(2+3*embedding+embeddingHeadsK*heads+context*(1+heads)),
|
||||
// vocab graph
|
||||
4*batch*(embedding+vocab),
|
||||
)
|
||||
|
||||
var ropeFreqsCount uint64
|
||||
if ropeFreqs, ok := f.Tensors().GroupLayers()["rope_freqs"]; ok {
|
||||
if ropeFreqsWeights, ok := ropeFreqs["weights"]; ok {
|
||||
ropeFreqsCount = ropeFreqsWeights.Elements()
|
||||
}
|
||||
}
|
||||
|
||||
partialOffload = max(
|
||||
4*(batch*
|
||||
(2*embedding+1+context*(1+heads)+embeddingHeadsK*heads)+
|
||||
ropeFreqsCount+
|
||||
embeddingHeadsK*context*headsKV),
|
||||
// vocab graph
|
||||
4*batch*(embedding+vocab)+embedding*vocab*105/128,
|
||||
)
|
||||
case "gemma", "gemma2", "gemma3", "gemma3n":
|
||||
fullOffload = max(
|
||||
4*batch*(embedding+vocab),
|
||||
4*batch*(2+context+context*heads+2*embedding+2*embeddingHeadsK*heads),
|
||||
)
|
||||
|
||||
partialOffload = max(
|
||||
4*embedding*batch+embedding*vocab*105/128+4*vocab*batch,
|
||||
4*batch*(2*embedding+1+2*embeddingHeadsK*heads+context+context*heads)+
|
||||
4*embeddingHeadsK*context*8+
|
||||
embedding*embeddingHeadsK*heads*9/16,
|
||||
)
|
||||
|
||||
if f.KV().Architecture() == "gemma3n" {
|
||||
fullOffload *= 4
|
||||
partialOffload *= 4
|
||||
}
|
||||
|
||||
// Gemma2 also has sliding window attention but we only have an optimized implementation in the Ollama
|
||||
// engine. Gemma3 always uses the Ollama engine.
|
||||
if f.KV().Architecture() == "gemma3" {
|
||||
const gemma3GlobalCacheCount = 6
|
||||
slidingWindow := (uint64(numParallel) * uint64(f.KV().Uint("attention.sliding_window"))) + batch
|
||||
for i := range kv {
|
||||
// Every 6th layer is a global layer, which is the full context size that has already been set. The other
|
||||
// layers are the smaller local (sliding) layers.
|
||||
if (i+1)%gemma3GlobalCacheCount != 0 {
|
||||
kv[i] = uint64(float64(slidingWindow*(embeddingHeadsK+embeddingHeadsV)*headsKV) * bytesPerElement)
|
||||
}
|
||||
}
|
||||
}
|
||||
case "command-r":
|
||||
fullOffload = max(
|
||||
4*batch*(embedding+vocab),
|
||||
4*batch*(2+4*embedding+context*(1+heads)),
|
||||
)
|
||||
|
||||
partialOffload = max(
|
||||
4*batch*(embedding+vocab)+embedding*vocab*105/128,
|
||||
4*batch*(1+2*embedding+context*(1+heads))+4*embedding*context+embedding*embedding*9/16,
|
||||
)
|
||||
case "qwen2":
|
||||
fullOffload = max(
|
||||
4*batch*(embedding+vocab),
|
||||
4*batch*(1+2*embedding+context+context*heads),
|
||||
)
|
||||
|
||||
partialOffload = max(
|
||||
4*batch*(embedding+vocab)+embedding*vocab*105/128,
|
||||
4*(batch*(1+2*embedding+context*(1+heads))+embedding*(1+context)),
|
||||
)
|
||||
case "phi2":
|
||||
fullOffload = max(
|
||||
4*batch*(embedding+vocab),
|
||||
4*batch*(1+4*embedding+context+context*heads),
|
||||
)
|
||||
|
||||
partialOffload = max(
|
||||
4*batch*(2*embedding+vocab)+embedding*vocab*105/128,
|
||||
4*batch*(2+3*embedding+context+context*heads),
|
||||
)
|
||||
case "stablelm":
|
||||
fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
|
||||
partialOffload = max(
|
||||
4*batch*(vocab+2*embedding),
|
||||
fullOffload,
|
||||
)
|
||||
case "deepseek2":
|
||||
fullOffload = max(
|
||||
4*batch*(3*embedding+vocab),
|
||||
4*batch*(3*embedding+2+context*(1+headsKV)+2*embeddingHeadsK*headsKV),
|
||||
)
|
||||
|
||||
partialOffload = max(
|
||||
4*batch*(3*embedding+vocab)+embedding*vocab*105/128,
|
||||
4*batch*(2*embedding+1+2*embeddingHeadsK*headsKV+context+context*headsKV)+4*embeddingHeadsK*context*headsKV+embedding*embeddingHeadsK*headsKV*9/16,
|
||||
)
|
||||
case "chatglm":
|
||||
fullOffload = 4 * batch * (embedding + vocab)
|
||||
partialOffload = 4*batch*(embedding+vocab) + embedding*vocab*105/128
|
||||
if qkvBias, ok := layers["blk.0"]["attn_qkv.bias"]; ok {
|
||||
fullOffload = max(
|
||||
fullOffload,
|
||||
4*batch*(2+
|
||||
2*embedding+
|
||||
context+
|
||||
context*heads+
|
||||
embeddingHeadsK*heads+
|
||||
qkvBias.Shape[0]),
|
||||
)
|
||||
|
||||
partialOffload = max(
|
||||
partialOffload,
|
||||
4*batch*(1+
|
||||
2*embedding+
|
||||
embeddingHeadsK*heads+
|
||||
context+
|
||||
context*heads)+
|
||||
4*embeddingHeadsK*context+
|
||||
4*context*embeddingHeadsK+
|
||||
4*qkvBias.Shape[0],
|
||||
)
|
||||
}
|
||||
case "gptoss", "gpt-oss":
|
||||
kv = make([]uint64, f.KV().BlockCount())
|
||||
for i := range kv {
|
||||
kv[i] = uint64(float64((embeddingHeadsK+embeddingHeadsV)*headsKV) * bytesPerElement)
|
||||
if i%2 == 0 {
|
||||
kv[i] *= (uint64(numParallel)*4096 + batch)
|
||||
} else {
|
||||
kv[i] *= context
|
||||
}
|
||||
}
|
||||
|
||||
partialOffload = 2 * f.KV().HeadCountMax() / cmp.Or(f.KV().HeadCountKVMin(), 1) * kvTotal / 6
|
||||
if useFlashAttention == ml.FlashAttentionEnabled {
|
||||
// rough estimate of graph size with flash attention on
|
||||
partialOffload = (4*uint64(numParallel) + context>>10 + 110) * format.MebiByte
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// SupportsKVCacheType checks if the requested cache type is supported
|
||||
func (f GGML) SupportsKVCacheType(cacheType string) bool {
|
||||
if cacheType == "" || cacheType == "f16" {
|
||||
return true
|
||||
}
|
||||
|
||||
return slices.Contains([]string{"q8_0", "q4_0"}, cacheType)
|
||||
}
|
||||
|
||||
// KVCacheTypeIsQuantized checks if the requested cache type is a quantized type
|
||||
func (f GGML) KVCacheTypeIsQuantized(cacheType string) bool {
|
||||
if cacheType == "" || cacheType == "f16" || cacheType == "f32" || cacheType == "bf16" {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// SupportsFlashAttention checks if the model supports flash attention
|
||||
func (f GGML) SupportsFlashAttention() bool {
|
||||
_, isEmbedding := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())]
|
||||
if isEmbedding {
|
||||
return false
|
||||
}
|
||||
|
||||
arch := f.KV().Architecture()
|
||||
if slices.Contains([]string{"qwen35", "qwen35moe", "qwen3next"}, arch) {
|
||||
return true
|
||||
}
|
||||
|
||||
if slices.Contains([]string{"gemma2", "grok"}, arch) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check head counts match and are non-zero
|
||||
headCountK := f.KV().EmbeddingHeadCountK()
|
||||
headCountV := f.KV().EmbeddingHeadCountV()
|
||||
return headCountK != 0 && headCountV != 0 && headCountK == headCountV
|
||||
}
|
||||
|
||||
// FlashAttention checks if the model should enable flash attention
|
||||
func (f GGML) FlashAttention() bool {
|
||||
return slices.Contains([]string{
|
||||
"bert",
|
||||
"gemma3",
|
||||
"gemma4",
|
||||
"glm4moelite",
|
||||
"glmocr",
|
||||
"gptoss", "gpt-oss",
|
||||
"lfm2",
|
||||
"lfm2moe",
|
||||
"mistral3",
|
||||
"nemotron_h", "nemotron_h_moe", "nemotron_h_omni",
|
||||
"olmo3",
|
||||
"qwen3", "qwen3moe",
|
||||
"qwen35", "qwen35moe",
|
||||
"qwen3next",
|
||||
"qwen3vl", "qwen3vlmoe",
|
||||
}, f.KV().String("general.architecture"))
|
||||
}
|
||||
|
||||
// kvCacheBytesPerElement returns the number of bytes per element for a given KV cache type
|
||||
func kvCacheBytesPerElement(cacheType string) float64 {
|
||||
switch cacheType {
|
||||
case "q8_0":
|
||||
return 1 // 1/2 of fp16
|
||||
case "q4_0":
|
||||
return 0.5 // 1/4 of fp16
|
||||
case "f32":
|
||||
return 4 // f32 (default for recurrent)
|
||||
default:
|
||||
return 2 // f16 (default)
|
||||
}
|
||||
}
|
||||
301
fs/ggml/ggml_test.go
Normal file
301
fs/ggml/ggml_test.go
Normal file
@@ -0,0 +1,301 @@
|
||||
package ggml
|
||||
|
||||
import (
|
||||
"maps"
|
||||
"math"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func TestTensorLayers(t *testing.T) {
|
||||
tensors := make(map[string]*Tensor)
|
||||
for _, name := range []string{
|
||||
"token_embd.weight",
|
||||
"blk.0.attn_k.weight",
|
||||
"blk.0.attn_output.weight",
|
||||
"blk.0.attn_q.weight",
|
||||
"blk.0.attn_v.weight",
|
||||
"blk.0.attn_norm.weight",
|
||||
"blk.0.ffn_down.weight",
|
||||
"blk.0.ffn_gate.weight",
|
||||
"blk.0.ffn_up.weight",
|
||||
"blk.0.ffn_norm.weight",
|
||||
"output_norm.weight",
|
||||
"mm.0.bias",
|
||||
"mm.0.weight",
|
||||
"v.blk.0.attn_k.weight",
|
||||
"v.blk.0.attn_output.weight",
|
||||
"v.blk.0.attn_q.weight",
|
||||
"v.blk.0.attn_v.weight",
|
||||
"v.blk.0.attn_norm.weight",
|
||||
"v.blk.0.ffn_down.weight",
|
||||
"v.blk.0.ffn_gate.weight",
|
||||
"v.blk.0.ffn_up.weight",
|
||||
"v.blk.0.ffn_norm.weight",
|
||||
"v.patch_embd.weight",
|
||||
"v.position_embd.gate",
|
||||
"v.position_embd.weight",
|
||||
} {
|
||||
tensors[name] = &Tensor{Name: name}
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
items []*Tensor
|
||||
want map[string]Layer
|
||||
}{
|
||||
{
|
||||
name: "text",
|
||||
items: slices.Collect(func(yield func(*Tensor) bool) {
|
||||
for k, v := range tensors {
|
||||
if !strings.HasPrefix(k, "mm.") && !strings.HasPrefix(k, "v.") {
|
||||
if !yield(v) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}),
|
||||
want: map[string]Layer{
|
||||
"blk.0": {
|
||||
"attn_k.weight": tensors["blk.0.attn_k.weight"],
|
||||
"attn_q.weight": tensors["blk.0.attn_q.weight"],
|
||||
"attn_v.weight": tensors["blk.0.attn_v.weight"],
|
||||
"attn_output.weight": tensors["blk.0.attn_output.weight"],
|
||||
"attn_norm.weight": tensors["blk.0.attn_norm.weight"],
|
||||
"ffn_down.weight": tensors["blk.0.ffn_down.weight"],
|
||||
"ffn_gate.weight": tensors["blk.0.ffn_gate.weight"],
|
||||
"ffn_up.weight": tensors["blk.0.ffn_up.weight"],
|
||||
"ffn_norm.weight": tensors["blk.0.ffn_norm.weight"],
|
||||
},
|
||||
"token_embd": {"weight": tensors["token_embd.weight"]},
|
||||
"output_norm": {"weight": tensors["output_norm.weight"]},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "vision",
|
||||
items: slices.Collect(func(yield func(*Tensor) bool) {
|
||||
for k, v := range tensors {
|
||||
if strings.HasPrefix(k, "mm.") || strings.HasPrefix(k, "v.") {
|
||||
if !yield(v) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}),
|
||||
want: map[string]Layer{
|
||||
"mm.0": {
|
||||
"bias": tensors["mm.0.bias"],
|
||||
"weight": tensors["mm.0.weight"],
|
||||
},
|
||||
"v.blk.0": {
|
||||
"attn_k.weight": tensors["v.blk.0.attn_k.weight"],
|
||||
"attn_q.weight": tensors["v.blk.0.attn_q.weight"],
|
||||
"attn_v.weight": tensors["v.blk.0.attn_v.weight"],
|
||||
"attn_output.weight": tensors["v.blk.0.attn_output.weight"],
|
||||
"attn_norm.weight": tensors["v.blk.0.attn_norm.weight"],
|
||||
"ffn_down.weight": tensors["v.blk.0.ffn_down.weight"],
|
||||
"ffn_gate.weight": tensors["v.blk.0.ffn_gate.weight"],
|
||||
"ffn_up.weight": tensors["v.blk.0.ffn_up.weight"],
|
||||
"ffn_norm.weight": tensors["v.blk.0.ffn_norm.weight"],
|
||||
},
|
||||
"v": {
|
||||
"patch_embd.weight": tensors["v.patch_embd.weight"],
|
||||
"position_embd.gate": tensors["v.position_embd.gate"],
|
||||
"position_embd.weight": tensors["v.position_embd.weight"],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "vision and text",
|
||||
items: slices.Collect(maps.Values(tensors)),
|
||||
want: map[string]Layer{
|
||||
"blk.0": {
|
||||
"attn_k.weight": tensors["blk.0.attn_k.weight"],
|
||||
"attn_q.weight": tensors["blk.0.attn_q.weight"],
|
||||
"attn_v.weight": tensors["blk.0.attn_v.weight"],
|
||||
"attn_output.weight": tensors["blk.0.attn_output.weight"],
|
||||
"attn_norm.weight": tensors["blk.0.attn_norm.weight"],
|
||||
"ffn_down.weight": tensors["blk.0.ffn_down.weight"],
|
||||
"ffn_gate.weight": tensors["blk.0.ffn_gate.weight"],
|
||||
"ffn_up.weight": tensors["blk.0.ffn_up.weight"],
|
||||
"ffn_norm.weight": tensors["blk.0.ffn_norm.weight"],
|
||||
},
|
||||
"token_embd": {"weight": tensors["token_embd.weight"]},
|
||||
"output_norm": {"weight": tensors["output_norm.weight"]},
|
||||
"mm.0": {
|
||||
"bias": tensors["mm.0.bias"],
|
||||
"weight": tensors["mm.0.weight"],
|
||||
},
|
||||
"v.blk.0": {
|
||||
"attn_k.weight": tensors["v.blk.0.attn_k.weight"],
|
||||
"attn_q.weight": tensors["v.blk.0.attn_q.weight"],
|
||||
"attn_v.weight": tensors["v.blk.0.attn_v.weight"],
|
||||
"attn_output.weight": tensors["v.blk.0.attn_output.weight"],
|
||||
"attn_norm.weight": tensors["v.blk.0.attn_norm.weight"],
|
||||
"ffn_down.weight": tensors["v.blk.0.ffn_down.weight"],
|
||||
"ffn_gate.weight": tensors["v.blk.0.ffn_gate.weight"],
|
||||
"ffn_up.weight": tensors["v.blk.0.ffn_up.weight"],
|
||||
"ffn_norm.weight": tensors["v.blk.0.ffn_norm.weight"],
|
||||
},
|
||||
"v": {
|
||||
"patch_embd.weight": tensors["v.patch_embd.weight"],
|
||||
"position_embd.gate": tensors["v.position_embd.gate"],
|
||||
"position_embd.weight": tensors["v.position_embd.weight"],
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := Tensors{items: tt.items}.GroupLayers()
|
||||
if diff := cmp.Diff(got, tt.want); diff != "" {
|
||||
t.Errorf("unexpected layers (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ref: https://github.com/ggml-org/llama.cpp/blob/a82c9e7c23ef6db48cebfa194dc9cebbc4ac3552/ggml/src/ggml.c#L572
|
||||
func TestTensorTypes(t *testing.T) {
|
||||
cases := []struct {
|
||||
kind uint32
|
||||
blockSize uint64
|
||||
typeSize uint64
|
||||
}{
|
||||
{0, 1, 4},
|
||||
{1, 1, 2},
|
||||
{2, 32, 18},
|
||||
{3, 32, 20},
|
||||
{6, 32, 22},
|
||||
{7, 32, 24},
|
||||
{8, 32, 34},
|
||||
{9, 32, 36},
|
||||
{10, 256, 84},
|
||||
{11, 256, 110},
|
||||
{12, 256, 144},
|
||||
{13, 256, 176},
|
||||
{14, 256, 210},
|
||||
{15, 256, 292},
|
||||
{16, 256, 66},
|
||||
{17, 256, 74},
|
||||
{18, 256, 98},
|
||||
{19, 256, 50},
|
||||
{20, 32, 18},
|
||||
{21, 256, 110},
|
||||
{22, 256, 82},
|
||||
{23, 256, 136},
|
||||
{24, 1, 1},
|
||||
{25, 1, 2},
|
||||
{26, 1, 4},
|
||||
{27, 1, 8},
|
||||
{28, 1, 8},
|
||||
{29, 256, 56},
|
||||
{30, 1, 2},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(strconv.Itoa(int(tt.kind)), func(t *testing.T) {
|
||||
tensor := Tensor{Kind: tt.kind}
|
||||
if tensor.blockSize() != tt.blockSize {
|
||||
t.Errorf("unexpected block size: got=%d want=%d", tensor.blockSize(), tt.blockSize)
|
||||
}
|
||||
|
||||
if tensor.typeSize() != tt.typeSize {
|
||||
t.Errorf("unexpected type size: got=%d want=%d", tensor.typeSize(), tt.typeSize)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestKeyValue(t *testing.T) {
|
||||
kv := KV{
|
||||
"general.architecture": "test",
|
||||
"test.strings": &array[string]{size: 3, values: []string{"a", "b", "c"}},
|
||||
"test.float32s": &array[float32]{size: 3, values: []float32{1.0, 2.0, 3.0}},
|
||||
"test.int32s": &array[int32]{size: 3, values: []int32{1, 2, 3}},
|
||||
"test.uint32s": &array[uint32]{size: 3, values: []uint32{1, 2, 3}},
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kv.Strings("strings"), []string{"a", "b", "c"}); diff != "" {
|
||||
t.Errorf("unexpected strings (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kv.Strings("nonexistent.strings"), []string(nil)); diff != "" {
|
||||
t.Errorf("unexpected strings (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kv.Strings("default.strings", []string{"ollama"}), []string{"ollama"}); diff != "" {
|
||||
t.Errorf("unexpected strings (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kv.Floats("float32s"), []float32{1.0, 2.0, 3.0}); diff != "" {
|
||||
t.Errorf("unexpected float32s (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kv.Floats("nonexistent.float32s"), []float32(nil)); diff != "" {
|
||||
t.Errorf("unexpected float32s (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kv.Floats("default.float32s", []float32{math.MaxFloat32}), []float32{math.MaxFloat32}); diff != "" {
|
||||
t.Errorf("unexpected float32s (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kv.Ints("int32s"), []int32{1, 2, 3}); diff != "" {
|
||||
t.Errorf("unexpected int8s (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kv.Ints("nonexistent.int32s"), []int32(nil)); diff != "" {
|
||||
t.Errorf("unexpected int8s (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kv.Ints("default.int32s", []int32{math.MaxInt32}), []int32{math.MaxInt32}); diff != "" {
|
||||
t.Errorf("unexpected int8s (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kv.Uints("uint32s"), []uint32{1, 2, 3}); diff != "" {
|
||||
t.Errorf("unexpected uint8s (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kv.Uints("nonexistent.uint32s"), []uint32(nil)); diff != "" {
|
||||
t.Errorf("unexpected uint8s (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kv.Uints("default.uint32s", []uint32{math.MaxUint32}), []uint32{math.MaxUint32}); diff != "" {
|
||||
t.Errorf("unexpected uint8s (-got +want):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeadCount(t *testing.T) {
|
||||
valuesArray := []int32{1, 5, 3, 4}
|
||||
cases := []struct {
|
||||
kv KV
|
||||
want uint64
|
||||
}{
|
||||
{
|
||||
kv: KV{
|
||||
"general.architecture": "abc",
|
||||
"abc.attention.head_count": &array[int32]{values: valuesArray, size: len(valuesArray)},
|
||||
},
|
||||
want: uint64(5),
|
||||
},
|
||||
{
|
||||
kv: KV{
|
||||
"general.architecture": "abc",
|
||||
"abc.attention.head_count": uint32(3),
|
||||
},
|
||||
want: uint64(3),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
got := tt.kv.HeadCountMax()
|
||||
if got != tt.want {
|
||||
t.Errorf("unexpected max value: got=%d want=%d", got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
689
fs/ggml/gguf.go
Normal file
689
fs/ggml/gguf.go
Normal file
@@ -0,0 +1,689 @@
|
||||
package ggml
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
type containerGGUF struct {
|
||||
ByteOrder binary.ByteOrder
|
||||
|
||||
Version uint32
|
||||
|
||||
V1 struct {
|
||||
NumTensor uint32
|
||||
NumKV uint32
|
||||
}
|
||||
|
||||
V2 struct {
|
||||
NumTensor uint64
|
||||
NumKV uint64
|
||||
}
|
||||
|
||||
V3 struct {
|
||||
NumTensor uint64
|
||||
NumKV uint64
|
||||
}
|
||||
|
||||
maxArraySize int
|
||||
}
|
||||
|
||||
func (c *containerGGUF) Name() string {
|
||||
return "gguf"
|
||||
}
|
||||
|
||||
func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
|
||||
if err := binary.Read(rs, c.ByteOrder, &c.Version); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var err error
|
||||
switch c.Version {
|
||||
case 1:
|
||||
err = binary.Read(rs, c.ByteOrder, &c.V1)
|
||||
case 2:
|
||||
err = binary.Read(rs, c.ByteOrder, &c.V2)
|
||||
default:
|
||||
err = binary.Read(rs, c.ByteOrder, &c.V3)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
model := newGGUF(c)
|
||||
if err := model.Decode(rs); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return model, nil
|
||||
}
|
||||
|
||||
const (
|
||||
ggufTypeUint8 uint32 = iota
|
||||
ggufTypeInt8
|
||||
ggufTypeUint16
|
||||
ggufTypeInt16
|
||||
ggufTypeUint32
|
||||
ggufTypeInt32
|
||||
ggufTypeFloat32
|
||||
ggufTypeBool
|
||||
ggufTypeString
|
||||
ggufTypeArray
|
||||
ggufTypeUint64
|
||||
ggufTypeInt64
|
||||
ggufTypeFloat64
|
||||
)
|
||||
|
||||
type gguf struct {
|
||||
*containerGGUF
|
||||
|
||||
kv KV
|
||||
tensors []*Tensor
|
||||
|
||||
parameters uint64
|
||||
tensorOffset uint64
|
||||
|
||||
scratch [16 << 10]byte
|
||||
}
|
||||
|
||||
func newGGUF(container *containerGGUF) *gguf {
|
||||
return &gguf{
|
||||
containerGGUF: container,
|
||||
kv: make(KV),
|
||||
}
|
||||
}
|
||||
|
||||
func (llm *gguf) KV() KV {
|
||||
return llm.kv
|
||||
}
|
||||
|
||||
func (llm *gguf) Tensors() Tensors {
|
||||
return Tensors{
|
||||
items: llm.tensors,
|
||||
Offset: llm.tensorOffset,
|
||||
}
|
||||
}
|
||||
|
||||
func (llm *gguf) numTensor() uint64 {
|
||||
switch llm.Version {
|
||||
case 1:
|
||||
return uint64(llm.V1.NumTensor)
|
||||
case 2:
|
||||
return llm.V2.NumTensor
|
||||
default:
|
||||
return llm.V3.NumTensor
|
||||
}
|
||||
}
|
||||
|
||||
func (llm *gguf) numKV() uint64 {
|
||||
switch llm.Version {
|
||||
case 1:
|
||||
return uint64(llm.V1.NumKV)
|
||||
case 2:
|
||||
return llm.V2.NumKV
|
||||
default:
|
||||
return llm.V3.NumKV
|
||||
}
|
||||
}
|
||||
|
||||
func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
// decode key-values
|
||||
for i := 0; uint64(i) < llm.numKV(); i++ {
|
||||
k, err := readGGUFString(llm, rs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
t, err := readGGUF[uint32](llm, rs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var v any
|
||||
switch t {
|
||||
case ggufTypeUint8:
|
||||
v, err = readGGUF[uint8](llm, rs)
|
||||
case ggufTypeInt8:
|
||||
v, err = readGGUF[int8](llm, rs)
|
||||
case ggufTypeUint16:
|
||||
v, err = readGGUF[uint16](llm, rs)
|
||||
case ggufTypeInt16:
|
||||
v, err = readGGUF[int16](llm, rs)
|
||||
case ggufTypeUint32:
|
||||
v, err = readGGUF[uint32](llm, rs)
|
||||
case ggufTypeInt32:
|
||||
v, err = readGGUF[int32](llm, rs)
|
||||
case ggufTypeUint64:
|
||||
v, err = readGGUF[uint64](llm, rs)
|
||||
case ggufTypeInt64:
|
||||
v, err = readGGUF[int64](llm, rs)
|
||||
case ggufTypeFloat32:
|
||||
v, err = readGGUF[float32](llm, rs)
|
||||
case ggufTypeFloat64:
|
||||
v, err = readGGUF[float64](llm, rs)
|
||||
case ggufTypeBool:
|
||||
v, err = readGGUF[bool](llm, rs)
|
||||
case ggufTypeString:
|
||||
v, err = readGGUFString(llm, rs)
|
||||
case ggufTypeArray:
|
||||
v, err = readGGUFArray(llm, rs)
|
||||
default:
|
||||
return fmt.Errorf("invalid type: %d", t)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
llm.kv[k] = v
|
||||
}
|
||||
|
||||
// decode tensors
|
||||
for range llm.numTensor() {
|
||||
name, err := readGGUFString(llm, rs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read tensor name: %w", err)
|
||||
}
|
||||
|
||||
// dims is the number of dimensions in the tensor
|
||||
dims, err := readGGUF[uint32](llm, rs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read tensor dimensions: %w", err)
|
||||
}
|
||||
|
||||
shape := make([]uint64, dims)
|
||||
for i := 0; uint32(i) < dims; i++ {
|
||||
shape[i], err = readGGUF[uint64](llm, rs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read tensor shape: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
kind, err := readGGUF[uint32](llm, rs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read tensor kind: %w", err)
|
||||
}
|
||||
|
||||
offset, err := readGGUF[uint64](llm, rs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read tensor offset: %w", err)
|
||||
}
|
||||
|
||||
tensor := Tensor{
|
||||
Name: name,
|
||||
Kind: kind,
|
||||
Offset: offset,
|
||||
Shape: shape[:],
|
||||
}
|
||||
|
||||
llm.tensors = append(llm.tensors, &tensor)
|
||||
llm.parameters += tensor.Elements()
|
||||
}
|
||||
|
||||
// patch KV with parameter count
|
||||
llm.kv["general.parameter_count"] = llm.parameters
|
||||
|
||||
alignment := llm.kv.Uint("general.alignment", 32)
|
||||
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
padding := ggufPadding(offset, int64(alignment))
|
||||
llm.tensorOffset = uint64(offset + padding)
|
||||
|
||||
// get file size to validate tensor bounds
|
||||
fileSize, err := rs.Seek(0, io.SeekEnd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determine file size: %w", err)
|
||||
}
|
||||
|
||||
if _, err := rs.Seek(offset, io.SeekStart); err != nil {
|
||||
return fmt.Errorf("failed to seek back after size check: %w", err)
|
||||
}
|
||||
|
||||
for _, tensor := range llm.tensors {
|
||||
tensorEnd := llm.tensorOffset + tensor.Offset + tensor.Size()
|
||||
if tensorEnd > uint64(fileSize) {
|
||||
return fmt.Errorf("tensor %q offset+size (%d) exceeds file size (%d)", tensor.Name, tensorEnd, fileSize)
|
||||
}
|
||||
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get current offset: %w", err)
|
||||
}
|
||||
|
||||
padding := ggufPadding(offset, int64(alignment))
|
||||
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
|
||||
return fmt.Errorf("failed to seek to init padding: %w", err)
|
||||
}
|
||||
|
||||
if _, err := rs.Seek(int64(tensor.Size()), io.SeekCurrent); err != nil {
|
||||
return fmt.Errorf("failed to seek to tensor: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
|
||||
var t T
|
||||
err := binary.Read(r, llm.ByteOrder, &t)
|
||||
return t, err
|
||||
}
|
||||
|
||||
func writeGGUF[V any](w io.Writer, t uint32, v V) error {
|
||||
if err := binary.Write(w, binary.LittleEndian, t); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return binary.Write(w, binary.LittleEndian, v)
|
||||
}
|
||||
|
||||
func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
|
||||
var length uint64
|
||||
if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
if _, err := io.CopyN(&b, r, int64(length)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// gguf v1 strings are null-terminated
|
||||
b.Truncate(b.Len() - 1)
|
||||
|
||||
return b.String(), nil
|
||||
}
|
||||
|
||||
func readGGUFV1StringsData(llm *gguf, r io.Reader, a *array[string]) (any, error) {
|
||||
for i := range a.size {
|
||||
if a.values != nil {
|
||||
e, err := readGGUFV1String(llm, r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
a.values[i] = e
|
||||
} else {
|
||||
_ = discardGGUFString(llm, r)
|
||||
}
|
||||
}
|
||||
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func discardGGUFString(llm *gguf, r io.Reader) error {
|
||||
buf := llm.scratch[:8]
|
||||
_, err := io.ReadFull(r, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
size := int(llm.ByteOrder.Uint64(buf))
|
||||
for size > 0 {
|
||||
n, err := r.Read(llm.scratch[:min(size, cap(llm.scratch))])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
size -= n
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func readGGUFString(llm *gguf, r io.Reader) (string, error) {
|
||||
if llm.Version == 1 {
|
||||
return readGGUFV1String(llm, r)
|
||||
}
|
||||
|
||||
buf := llm.scratch[:8]
|
||||
_, err := io.ReadFull(r, buf)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
length := int(llm.ByteOrder.Uint64(buf))
|
||||
if length > len(llm.scratch) {
|
||||
buf = make([]byte, length)
|
||||
} else {
|
||||
buf = llm.scratch[:length]
|
||||
}
|
||||
clear(buf)
|
||||
|
||||
_, err = io.ReadFull(r, buf)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(buf), nil
|
||||
}
|
||||
|
||||
func writeGGUFString(w io.Writer, s string) error {
|
||||
if err := binary.Write(w, binary.LittleEndian, ggufTypeString); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err := io.Copy(w, strings.NewReader(s))
|
||||
return err
|
||||
}
|
||||
|
||||
func readGGUFStringsData(llm *gguf, r io.Reader, a *array[string]) (any, error) {
|
||||
for i := range a.size {
|
||||
if a.values != nil {
|
||||
e, err := readGGUFString(llm, r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
a.values[i] = e
|
||||
} else {
|
||||
discardGGUFString(llm, r)
|
||||
}
|
||||
}
|
||||
|
||||
return a, nil
|
||||
}
|
||||
|
||||
type array[T any] struct {
|
||||
// size is the actual size of the array
|
||||
size int
|
||||
|
||||
// values is the array of values. this is nil if the array is larger than configured maxSize
|
||||
values []T
|
||||
}
|
||||
|
||||
func (a *array[T]) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(a.values)
|
||||
}
|
||||
|
||||
func newArray[T any](size, maxSize int) *array[T] {
|
||||
a := array[T]{size: size}
|
||||
if maxSize < 0 || size <= maxSize {
|
||||
a.values = make([]T, size)
|
||||
}
|
||||
return &a
|
||||
}
|
||||
|
||||
func readGGUFArray(llm *gguf, r io.Reader) (any, error) {
|
||||
t, err := readGGUF[uint32](llm, r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
n, err := readGGUF[uint64](llm, r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch t {
|
||||
case ggufTypeUint8:
|
||||
a := newArray[uint8](int(n), llm.maxArraySize)
|
||||
return readGGUFArrayData(llm, r, a)
|
||||
case ggufTypeInt8:
|
||||
a := newArray[int8](int(n), llm.maxArraySize)
|
||||
return readGGUFArrayData(llm, r, a)
|
||||
case ggufTypeUint16:
|
||||
a := newArray[uint16](int(n), llm.maxArraySize)
|
||||
return readGGUFArrayData(llm, r, a)
|
||||
case ggufTypeInt16:
|
||||
a := newArray[int16](int(n), llm.maxArraySize)
|
||||
return readGGUFArrayData(llm, r, a)
|
||||
case ggufTypeUint32:
|
||||
a := newArray[uint32](int(n), llm.maxArraySize)
|
||||
return readGGUFArrayData(llm, r, a)
|
||||
case ggufTypeInt32:
|
||||
a := newArray[int32](int(n), llm.maxArraySize)
|
||||
return readGGUFArrayData(llm, r, a)
|
||||
case ggufTypeUint64:
|
||||
a := newArray[uint64](int(n), llm.maxArraySize)
|
||||
return readGGUFArrayData(llm, r, a)
|
||||
case ggufTypeInt64:
|
||||
a := newArray[int64](int(n), llm.maxArraySize)
|
||||
return readGGUFArrayData(llm, r, a)
|
||||
case ggufTypeFloat32:
|
||||
a := newArray[float32](int(n), llm.maxArraySize)
|
||||
return readGGUFArrayData(llm, r, a)
|
||||
case ggufTypeFloat64:
|
||||
a := newArray[float64](int(n), llm.maxArraySize)
|
||||
return readGGUFArrayData(llm, r, a)
|
||||
case ggufTypeBool:
|
||||
a := newArray[bool](int(n), llm.maxArraySize)
|
||||
return readGGUFArrayData(llm, r, a)
|
||||
case ggufTypeString:
|
||||
a := newArray[string](int(n), llm.maxArraySize)
|
||||
if llm.Version == 1 {
|
||||
return readGGUFV1StringsData(llm, r, a)
|
||||
}
|
||||
|
||||
return readGGUFStringsData(llm, r, a)
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid array type: %d", t)
|
||||
}
|
||||
}
|
||||
|
||||
func readGGUFArrayData[T any](llm *gguf, r io.Reader, a *array[T]) (any, error) {
|
||||
for i := range a.size {
|
||||
e, err := readGGUF[T](llm, r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if a.values != nil {
|
||||
a.values[i] = e
|
||||
}
|
||||
}
|
||||
|
||||
return a, nil
|
||||
}
|
||||
|
||||
// writeGGUFArray writes a slice s of type E to the write with a gguf type of t
|
||||
func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
|
||||
if err := binary.Write(w, binary.LittleEndian, ggufTypeArray); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(w, binary.LittleEndian, t); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if t == ggufTypeString {
|
||||
for _, e := range any(s).([]string) {
|
||||
if err := binary.Write(w, binary.LittleEndian, uint64(len(e))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(w, binary.LittleEndian, []byte(e)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return binary.Write(w, binary.LittleEndian, s)
|
||||
}
|
||||
|
||||
func WriteGGUF(f *os.File, kv fs.Config, ts []*Tensor) error {
|
||||
arch := kv.String("general.architecture")
|
||||
if arch == "" {
|
||||
return fmt.Errorf("architecture not set")
|
||||
}
|
||||
|
||||
if err := binary.Write(f, binary.LittleEndian, []byte("GGUF")); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, binary.LittleEndian, uint32(3)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, binary.LittleEndian, uint64(len(ts))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, binary.LittleEndian, uint64(kv.Len())); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, key := range slices.Sorted(kv.Keys()) {
|
||||
if err := ggufWriteKV(f, arch, key, kv.Value(key)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
slices.SortStableFunc(
|
||||
ts,
|
||||
func(a, b *Tensor) int {
|
||||
return cmp.Or(
|
||||
cmp.Compare(a.block(), b.block()),
|
||||
cmp.Compare(a.Name, b.Name),
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
alignment := kv.Uint("general.alignment", 32)
|
||||
|
||||
var s uint64
|
||||
for i := range ts {
|
||||
ts[i].Offset = s
|
||||
if err := ggufWriteTensorInfo(f, ts[i]); err != nil {
|
||||
return err
|
||||
}
|
||||
s += ts[i].Size()
|
||||
s += uint64(ggufPadding(int64(s), int64(alignment)))
|
||||
}
|
||||
|
||||
offset, err := f.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
offset += ggufPadding(offset, int64(alignment))
|
||||
|
||||
var g errgroup.Group
|
||||
g.SetLimit(runtime.GOMAXPROCS(0))
|
||||
// TODO consider reducing if tensors size * gomaxprocs is larger than free memory
|
||||
for _, t := range ts {
|
||||
w := io.NewOffsetWriter(f, offset+int64(t.Offset))
|
||||
g.Go(func() error {
|
||||
_, err := t.WriteTo(w)
|
||||
return err
|
||||
})
|
||||
}
|
||||
|
||||
return g.Wait()
|
||||
}
|
||||
|
||||
func ggufWriteKV(ws io.WriteSeeker, arch, k string, v any) error {
|
||||
if !strings.HasPrefix(k, arch+".") &&
|
||||
!strings.HasPrefix(k, "general.") &&
|
||||
!strings.HasPrefix(k, "adapter.") &&
|
||||
!strings.HasPrefix(k, "tokenizer.") {
|
||||
k = arch + "." + k
|
||||
}
|
||||
|
||||
slog.Debug(k, "type", fmt.Sprintf("%T", v))
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(k))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, []byte(k)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var err error
|
||||
switch v := v.(type) {
|
||||
case int32:
|
||||
err = writeGGUF(ws, ggufTypeInt32, v)
|
||||
case int64:
|
||||
err = writeGGUF(ws, ggufTypeInt64, v)
|
||||
case uint32, FileType:
|
||||
err = writeGGUF(ws, ggufTypeUint32, v)
|
||||
case uint64:
|
||||
err = writeGGUF(ws, ggufTypeUint64, v)
|
||||
case float32:
|
||||
err = writeGGUF(ws, ggufTypeFloat32, v)
|
||||
case bool:
|
||||
err = writeGGUF(ws, ggufTypeBool, v)
|
||||
case string:
|
||||
err = writeGGUFString(ws, v)
|
||||
case []int32:
|
||||
err = writeGGUFArray(ws, ggufTypeInt32, v)
|
||||
case *array[int32]:
|
||||
err = writeGGUFArray(ws, ggufTypeInt32, v.values)
|
||||
case []int64:
|
||||
err = writeGGUFArray(ws, ggufTypeInt64, v)
|
||||
case *array[int64]:
|
||||
err = writeGGUFArray(ws, ggufTypeInt64, v.values)
|
||||
case []uint32:
|
||||
err = writeGGUFArray(ws, ggufTypeUint32, v)
|
||||
case *array[uint32]:
|
||||
err = writeGGUFArray(ws, ggufTypeUint32, v.values)
|
||||
case []float32:
|
||||
err = writeGGUFArray(ws, ggufTypeFloat32, v)
|
||||
case *array[float32]:
|
||||
err = writeGGUFArray(ws, ggufTypeFloat32, v.values)
|
||||
case []string:
|
||||
err = writeGGUFArray(ws, ggufTypeString, v)
|
||||
case *array[string]:
|
||||
err = writeGGUFArray(ws, ggufTypeString, v.values)
|
||||
case []bool:
|
||||
err = writeGGUFArray(ws, ggufTypeBool, v)
|
||||
case *array[bool]:
|
||||
err = writeGGUFArray(ws, ggufTypeBool, v.values)
|
||||
default:
|
||||
return fmt.Errorf("improper type for '%s'", k)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func ggufWriteTensorInfo(ws io.WriteSeeker, t *Tensor) error {
|
||||
slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset)
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(t.Name))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, []byte(t.Name)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint32(len(t.Shape))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, n := range t.Shape {
|
||||
if err := binary.Write(ws, binary.LittleEndian, n); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, t.Kind); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return binary.Write(ws, binary.LittleEndian, t.Offset)
|
||||
}
|
||||
|
||||
func ggufPadding(offset, align int64) int64 {
|
||||
return (align - offset%align) % align
|
||||
}
|
||||
129
fs/ggml/gguf_test.go
Normal file
129
fs/ggml/gguf_test.go
Normal file
@@ -0,0 +1,129 @@
|
||||
package ggml
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math/rand/v2"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func TestWriteGGUF(t *testing.T) {
|
||||
tensorData := make([]byte, 2*3*4) // 6 F32 elements = 24 bytes
|
||||
for range 8 {
|
||||
t.Run("shuffle", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ts := []*Tensor{
|
||||
{Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
|
||||
{Name: "blk.0.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
|
||||
{Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
|
||||
{Name: "blk.1.ffn_up.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
|
||||
{Name: "blk.2.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
|
||||
{Name: "blk.1.ffn_down.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
|
||||
{Name: "blk.0.attn_k.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
|
||||
{Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewReader(tensorData)},
|
||||
{Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewReader(tensorData)},
|
||||
}
|
||||
|
||||
rand.Shuffle(len(ts), func(i, j int) {
|
||||
ts[i], ts[j] = ts[j], ts[i]
|
||||
})
|
||||
|
||||
w, err := os.CreateTemp(t.TempDir(), strings.ReplaceAll(t.Name(), "/", "_")+"*.bin")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer w.Close()
|
||||
|
||||
if err := WriteGGUF(w, KV{
|
||||
"general.architecture": "test",
|
||||
"general.alignment": uint32(16),
|
||||
"test.key": "value",
|
||||
"test.int32_key": int32(-42),
|
||||
"test.int64_key": int64(-9223372036854775808),
|
||||
"test.int32_array": []int32{-1, 0, 1, 2147483647, -2147483648},
|
||||
"test.int64_array": []int64{-1, 0, 1, 9223372036854775807, -9223372036854775808},
|
||||
"attention.key": "value2",
|
||||
"tokenizer.key": "value3",
|
||||
"adapter.key": "value4",
|
||||
}, ts); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
r, err := os.Open(w.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
ff, err := Decode(r, -1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(KV{
|
||||
"general.architecture": "test",
|
||||
"general.alignment": uint32(16),
|
||||
"general.parameter_count": uint64(54),
|
||||
"test.key": "value",
|
||||
"test.int32_key": int32(-42),
|
||||
"test.int64_key": int64(-9223372036854775808),
|
||||
"test.int32_array": &array[int32]{size: 5, values: []int32{-1, 0, 1, 2147483647, -2147483648}},
|
||||
"test.int64_array": &array[int64]{size: 5, values: []int64{-1, 0, 1, 9223372036854775807, -9223372036854775808}},
|
||||
"test.attention.key": "value2",
|
||||
"tokenizer.key": "value3",
|
||||
"adapter.key": "value4",
|
||||
}, ff.KV(), cmp.AllowUnexported(array[int32]{}, array[int64]{})); diff != "" {
|
||||
t.Errorf("Mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(Tensors{
|
||||
Offset: 992,
|
||||
items: []*Tensor{
|
||||
{Name: "blk.0.attn_k.weight", Offset: 0, Shape: []uint64{2, 3}},
|
||||
{Name: "blk.0.attn_norm.weight", Offset: 32, Shape: []uint64{2, 3}},
|
||||
{Name: "blk.0.ffn_norm.weight", Offset: 64, Shape: []uint64{2, 3}},
|
||||
{Name: "blk.1.ffn_down.weight", Offset: 96, Shape: []uint64{2, 3}},
|
||||
{Name: "blk.1.ffn_up.weight", Offset: 128, Shape: []uint64{2, 3}},
|
||||
{Name: "blk.2.ffn_norm.weight", Offset: 160, Shape: []uint64{2, 3}},
|
||||
{Name: "output.weight", Offset: 192, Shape: []uint64{3, 2}},
|
||||
{Name: "output_norm.weight", Offset: 224, Shape: []uint64{3, 2}},
|
||||
{Name: "token_embd.weight", Offset: 256, Shape: []uint64{2, 3}},
|
||||
},
|
||||
}, ff.Tensors(), cmp.AllowUnexported(Tensors{})); diff != "" {
|
||||
t.Errorf("Mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("truncated_tensor_data", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ts := []*Tensor{
|
||||
{Name: "blk.0.attn.weight", Kind: 0, Shape: []uint64{512, 2}, WriterTo: bytes.NewBuffer(make([]byte, 32))},
|
||||
}
|
||||
|
||||
w, err := os.CreateTemp(t.TempDir(), "truncated_*.bin")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer w.Close()
|
||||
|
||||
if err := WriteGGUF(w, KV{"general.architecture": "test"}, ts); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
r, err := os.Open(w.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
if _, err := Decode(r, -1); err == nil {
|
||||
t.Error("Decode should reject GGUF files where tensor data extends beyond file size")
|
||||
}
|
||||
})
|
||||
}
|
||||
327
fs/ggml/type.go
Normal file
327
fs/ggml/type.go
Normal file
@@ -0,0 +1,327 @@
|
||||
package ggml
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// FileType is the Go equivalent to llama_ftype used for gguf file typing
|
||||
type FileType uint32
|
||||
|
||||
const (
|
||||
FileTypeF32 FileType = iota
|
||||
FileTypeF16
|
||||
fileTypeQ4_0
|
||||
fileTypeQ4_1
|
||||
fileTypeMXFP4 // originally fileTypeQ4_1_F16 // unused by GGML
|
||||
fileTypeQ4_2 // unused by GGML
|
||||
fileTypeQ4_3 // unused by GGML
|
||||
FileTypeQ8_0
|
||||
fileTypeQ5_0
|
||||
fileTypeQ5_1
|
||||
fileTypeQ2_K
|
||||
fileTypeQ3_K_S
|
||||
fileTypeQ3_K_M
|
||||
fileTypeQ3_K_L
|
||||
FileTypeQ4_K_S
|
||||
FileTypeQ4_K_M
|
||||
fileTypeQ5_K_S
|
||||
fileTypeQ5_K_M
|
||||
fileTypeQ6_K
|
||||
fileTypeIQ2_XXS
|
||||
fileTypeIQ2_XS
|
||||
fileTypeQ2_K_S
|
||||
fileTypeIQ3_XS
|
||||
fileTypeIQ3_XXS
|
||||
fileTypeIQ1_S
|
||||
fileTypeIQ4_NL
|
||||
fileTypeIQ3_S
|
||||
fileTypeIQ3_M
|
||||
fileTypeIQ2_S
|
||||
fileTypeIQ2_M
|
||||
fileTypeIQ4_XS
|
||||
fileTypeIQ1_M
|
||||
FileTypeBF16
|
||||
fileTypeQ4_0_4_4 // unused by GGML
|
||||
fileTypeQ4_0_4_8 // unused by GGML
|
||||
fileTypeQ4_0_8_8 // unused by GGML
|
||||
fileTypeTQ1_0
|
||||
fileTypeTQ2_0
|
||||
|
||||
FileTypeUnknown = 1024
|
||||
)
|
||||
|
||||
// ParseFileType parses the provided GGUF file type
|
||||
// Only Ollama supported types are considered valid
|
||||
func ParseFileType(s string) (FileType, error) {
|
||||
switch s {
|
||||
case "F32":
|
||||
return FileTypeF32, nil
|
||||
case "F16":
|
||||
return FileTypeF16, nil
|
||||
case "Q8_0":
|
||||
return FileTypeQ8_0, nil
|
||||
case "Q4_K_S":
|
||||
return FileTypeQ4_K_S, nil
|
||||
case "Q4_K_M", "Q4_K":
|
||||
return FileTypeQ4_K_M, nil
|
||||
case "BF16":
|
||||
return FileTypeBF16, nil
|
||||
default:
|
||||
supportedFileTypes := []FileType{
|
||||
FileTypeF32,
|
||||
FileTypeF16,
|
||||
FileTypeQ4_K_S,
|
||||
FileTypeQ4_K_M,
|
||||
FileTypeQ8_0,
|
||||
// fsggml.FileTypeBF16, // TODO
|
||||
}
|
||||
strs := make([]string, len(supportedFileTypes))
|
||||
for i := range supportedFileTypes {
|
||||
strs[i] = supportedFileTypes[i].String()
|
||||
}
|
||||
|
||||
return FileTypeUnknown, fmt.Errorf("unsupported quantization type %s - supported types are %s", s, strings.Join(strs, ", "))
|
||||
}
|
||||
}
|
||||
|
||||
func (t FileType) String() string {
|
||||
// Note: this routine will return a broader set of file types for existing models
|
||||
switch t {
|
||||
case FileTypeF32:
|
||||
return "F32"
|
||||
case FileTypeF16:
|
||||
return "F16"
|
||||
case fileTypeQ4_0:
|
||||
return "Q4_0"
|
||||
case fileTypeQ4_1:
|
||||
return "Q4_1"
|
||||
case fileTypeMXFP4:
|
||||
return "MXFP4"
|
||||
case FileTypeQ8_0:
|
||||
return "Q8_0"
|
||||
case fileTypeQ5_0:
|
||||
return "Q5_0"
|
||||
case fileTypeQ5_1:
|
||||
return "Q5_1"
|
||||
case fileTypeQ2_K:
|
||||
return "Q2_K"
|
||||
case fileTypeQ3_K_S:
|
||||
return "Q3_K_S"
|
||||
case fileTypeQ3_K_M:
|
||||
return "Q3_K_M"
|
||||
case fileTypeQ3_K_L:
|
||||
return "Q3_K_L"
|
||||
case FileTypeQ4_K_S:
|
||||
return "Q4_K_S"
|
||||
case FileTypeQ4_K_M:
|
||||
return "Q4_K_M"
|
||||
case fileTypeQ5_K_S:
|
||||
return "Q5_K_S"
|
||||
case fileTypeQ5_K_M:
|
||||
return "Q5_K_M"
|
||||
case fileTypeQ6_K:
|
||||
return "Q6_K"
|
||||
case fileTypeQ2_K_S:
|
||||
return "Q2_K_S"
|
||||
case FileTypeBF16:
|
||||
return "BF16"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
func (t FileType) Value() uint32 {
|
||||
return uint32(t)
|
||||
}
|
||||
|
||||
func (ftype FileType) ToTensorType() TensorType {
|
||||
switch ftype {
|
||||
case FileTypeF32:
|
||||
return TensorTypeF32
|
||||
case FileTypeF16:
|
||||
return TensorTypeF16
|
||||
case fileTypeQ4_0:
|
||||
return TensorTypeQ4_0
|
||||
case fileTypeQ4_1:
|
||||
return TensorTypeQ4_1
|
||||
case FileTypeQ8_0:
|
||||
return TensorTypeQ8_0
|
||||
case fileTypeQ5_0:
|
||||
return TensorTypeQ5_0
|
||||
case fileTypeQ5_1:
|
||||
return TensorTypeQ5_1
|
||||
case fileTypeQ2_K:
|
||||
return TensorTypeQ2_K
|
||||
case fileTypeQ3_K_S:
|
||||
return TensorTypeQ3_K
|
||||
case fileTypeQ3_K_M:
|
||||
return TensorTypeQ3_K
|
||||
case fileTypeQ3_K_L:
|
||||
return TensorTypeQ3_K
|
||||
case FileTypeQ4_K_S:
|
||||
return TensorTypeQ4_K
|
||||
case FileTypeQ4_K_M:
|
||||
return TensorTypeQ4_K
|
||||
case fileTypeQ5_K_S:
|
||||
return TensorTypeQ5_K
|
||||
case fileTypeQ5_K_M:
|
||||
return TensorTypeQ5_K
|
||||
case fileTypeQ6_K:
|
||||
return TensorTypeQ6_K
|
||||
case fileTypeQ2_K_S:
|
||||
return TensorTypeQ2_K
|
||||
case FileTypeBF16:
|
||||
return TensorTypeBF16
|
||||
case fileTypeMXFP4:
|
||||
return TensorTypeMXFP4
|
||||
default:
|
||||
slog.Warn("unsupported file type", "type", ftype)
|
||||
return 0 // F32
|
||||
}
|
||||
}
|
||||
|
||||
// TensorType is equivalent to ggml_type for individual tensor types
|
||||
// Note: these are not the same as FileType
|
||||
type TensorType uint32
|
||||
|
||||
const (
|
||||
TensorTypeF32 TensorType = iota
|
||||
TensorTypeF16
|
||||
TensorTypeQ4_0
|
||||
TensorTypeQ4_1
|
||||
tensorTypeQ4_2
|
||||
tensorTypeQ4_3 // unused by GGML
|
||||
TensorTypeQ5_0
|
||||
TensorTypeQ5_1
|
||||
TensorTypeQ8_0
|
||||
TensorTypeQ8_1
|
||||
TensorTypeQ2_K
|
||||
TensorTypeQ3_K
|
||||
TensorTypeQ4_K
|
||||
TensorTypeQ5_K
|
||||
TensorTypeQ6_K
|
||||
TensorTypeQ8_K
|
||||
tensorTypeIQ2_XXS // not supported by ollama
|
||||
tensorTypeIQ2_XS // not supported by ollama
|
||||
tensorTypeIQ3_XXS // not supported by ollama
|
||||
tensorTypeIQ1_S // not supported by ollama
|
||||
tensorTypeIQ4_NL // not supported by ollama
|
||||
tensorTypeIQ3_S // not supported by ollama
|
||||
tensorTypeIQ2_S // not supported by ollama
|
||||
tensorTypeIQ4_XS // not supported by ollama
|
||||
TensorTypeI8
|
||||
TensorTypeI16
|
||||
TensorTypeI32
|
||||
TensorTypeI64
|
||||
TensorTypeF64
|
||||
tensorTypeIQ1_M // not supported by ollama
|
||||
TensorTypeBF16
|
||||
tensorTypeQ4_0_4_4 // unused by GGML
|
||||
tensorTypeQ4_0_4_8 // unused by GGML
|
||||
tensorTypeQ4_0_8_8 // unused by GGML
|
||||
tensorTypeTQ1_0 // not supported by ollama
|
||||
tensorTypeTQ2_0 // not supported by ollama
|
||||
tensorTypeIQ4_NL_4_4 // unused by GGML
|
||||
tensorTypeIQ4_NL_4_8 // unused by GGML
|
||||
tensorTypeIQ4_NL_8_8 // unused by GGML
|
||||
TensorTypeMXFP4
|
||||
)
|
||||
|
||||
// ParseTensorType parses the provided GGUF tensor type
|
||||
// Only Ollama supported types are considered valid
|
||||
func ParseTensorType(s string) (TensorType, error) {
|
||||
switch s {
|
||||
case "F32":
|
||||
return TensorTypeF32, nil
|
||||
case "F16":
|
||||
return TensorTypeF16, nil
|
||||
case "Q4_0":
|
||||
return TensorTypeQ4_0, nil
|
||||
case "Q4_1":
|
||||
return TensorTypeQ4_1, nil
|
||||
case "Q5_0":
|
||||
return TensorTypeQ5_0, nil
|
||||
case "Q5_1":
|
||||
return TensorTypeQ5_1, nil
|
||||
case "Q8_0":
|
||||
return TensorTypeQ8_0, nil
|
||||
case "Q8_1":
|
||||
return TensorTypeQ8_1, nil
|
||||
case "Q2_K":
|
||||
return TensorTypeQ2_K, nil
|
||||
case "Q3_K":
|
||||
return TensorTypeQ3_K, nil
|
||||
case "Q4_K":
|
||||
return TensorTypeQ4_K, nil
|
||||
case "Q5_K":
|
||||
return TensorTypeQ5_K, nil
|
||||
case "Q6_K":
|
||||
return TensorTypeQ6_K, nil
|
||||
case "Q8_K":
|
||||
return TensorTypeQ8_K, nil
|
||||
case "F64":
|
||||
return TensorTypeF64, nil
|
||||
case "BF16":
|
||||
return TensorTypeBF16, nil
|
||||
case "MXFP4":
|
||||
return TensorTypeMXFP4, nil
|
||||
default:
|
||||
return 0, fmt.Errorf("unsupported quantization type %s", s)
|
||||
}
|
||||
}
|
||||
|
||||
func (t TensorType) IsQuantized() bool {
|
||||
switch t {
|
||||
case TensorTypeF32, TensorTypeF16, TensorTypeBF16:
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func (t TensorType) RowSize(ne uint64) uint64 {
|
||||
return t.TypeSize() * ne / t.BlockSize()
|
||||
}
|
||||
|
||||
func (t TensorType) String() string {
|
||||
switch t {
|
||||
case TensorTypeF32:
|
||||
return "F32"
|
||||
case TensorTypeF16:
|
||||
return "F16"
|
||||
case TensorTypeQ4_0:
|
||||
return "Q4_0"
|
||||
case TensorTypeQ4_1:
|
||||
return "Q4_1"
|
||||
case TensorTypeQ5_0:
|
||||
return "Q5_0"
|
||||
case TensorTypeQ5_1:
|
||||
return "Q5_1"
|
||||
case TensorTypeQ8_0:
|
||||
return "Q8_0"
|
||||
case TensorTypeQ8_1:
|
||||
return "Q8_1"
|
||||
case TensorTypeQ2_K:
|
||||
return "Q2_K"
|
||||
case TensorTypeQ3_K:
|
||||
return "Q3_K"
|
||||
case TensorTypeQ4_K:
|
||||
return "Q4_K"
|
||||
case TensorTypeQ5_K:
|
||||
return "Q5_K"
|
||||
case TensorTypeQ6_K:
|
||||
return "Q6_K"
|
||||
case TensorTypeQ8_K:
|
||||
return "Q8_K"
|
||||
case TensorTypeF64:
|
||||
return "F64"
|
||||
case TensorTypeBF16:
|
||||
return "BF16"
|
||||
case 4, TensorTypeMXFP4:
|
||||
return "MXFP4"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
347
fs/gguf/gguf.go
Normal file
347
fs/gguf/gguf.go
Normal file
@@ -0,0 +1,347 @@
|
||||
package gguf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"iter"
|
||||
"os"
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
typeUint8 uint32 = iota
|
||||
typeInt8
|
||||
typeUint16
|
||||
typeInt16
|
||||
typeUint32
|
||||
typeInt32
|
||||
typeFloat32
|
||||
typeBool
|
||||
typeString
|
||||
typeArray
|
||||
typeUint64
|
||||
typeInt64
|
||||
typeFloat64
|
||||
)
|
||||
|
||||
var ErrUnsupported = errors.New("unsupported")
|
||||
|
||||
type File struct {
|
||||
Magic [4]byte
|
||||
Version uint32
|
||||
|
||||
keyValues *lazy[KeyValue]
|
||||
tensors *lazy[TensorInfo]
|
||||
offset int64
|
||||
|
||||
file *os.File
|
||||
reader *bufferedReader
|
||||
bts []byte
|
||||
}
|
||||
|
||||
func Open(path string) (f *File, err error) {
|
||||
f = &File{bts: make([]byte, 4096)}
|
||||
f.file, err = os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
f.reader = newBufferedReader(f.file, 32<<10)
|
||||
|
||||
if err := binary.Read(f.reader, binary.LittleEndian, &f.Magic); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if bytes.Equal(f.Magic[:], []byte("gguf")) {
|
||||
return nil, fmt.Errorf("%w file type %v", ErrUnsupported, f.Magic)
|
||||
}
|
||||
|
||||
if err := binary.Read(f.reader, binary.LittleEndian, &f.Version); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if f.Version < 2 {
|
||||
return nil, fmt.Errorf("%w version %v", ErrUnsupported, f.Version)
|
||||
}
|
||||
|
||||
f.tensors, err = newLazy(f, f.readTensor)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
f.tensors.successFunc = func() error {
|
||||
offset := f.reader.offset
|
||||
|
||||
alignment := cmp.Or(f.KeyValue("general.alignment").Int(), 32)
|
||||
f.offset = offset + (alignment-offset%alignment)%alignment
|
||||
return nil
|
||||
}
|
||||
|
||||
f.keyValues, err = newLazy(f, f.readKeyValue)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return f, nil
|
||||
}
|
||||
|
||||
func (f *File) readTensor() (TensorInfo, error) {
|
||||
name, err := readString(f)
|
||||
if err != nil {
|
||||
return TensorInfo{}, err
|
||||
}
|
||||
|
||||
dims, err := read[uint32](f)
|
||||
if err != nil {
|
||||
return TensorInfo{}, err
|
||||
}
|
||||
|
||||
shape := make([]uint64, dims)
|
||||
for i := range dims {
|
||||
shape[i], err = read[uint64](f)
|
||||
if err != nil {
|
||||
return TensorInfo{}, err
|
||||
}
|
||||
}
|
||||
|
||||
type_, err := read[uint32](f)
|
||||
if err != nil {
|
||||
return TensorInfo{}, err
|
||||
}
|
||||
|
||||
offset, err := read[uint64](f)
|
||||
if err != nil {
|
||||
return TensorInfo{}, err
|
||||
}
|
||||
|
||||
return TensorInfo{
|
||||
Name: name,
|
||||
Offset: offset,
|
||||
Shape: shape,
|
||||
Type: TensorType(type_),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (f *File) readKeyValue() (KeyValue, error) {
|
||||
key, err := readString(f)
|
||||
if err != nil {
|
||||
return KeyValue{}, err
|
||||
}
|
||||
|
||||
t, err := read[uint32](f)
|
||||
if err != nil {
|
||||
return KeyValue{}, err
|
||||
}
|
||||
|
||||
value, err := func() (any, error) {
|
||||
switch t {
|
||||
case typeUint8:
|
||||
return read[uint8](f)
|
||||
case typeInt8:
|
||||
return read[int8](f)
|
||||
case typeUint16:
|
||||
return read[uint16](f)
|
||||
case typeInt16:
|
||||
return read[int16](f)
|
||||
case typeUint32:
|
||||
return read[uint32](f)
|
||||
case typeInt32:
|
||||
return read[int32](f)
|
||||
case typeUint64:
|
||||
return read[uint64](f)
|
||||
case typeInt64:
|
||||
return read[int64](f)
|
||||
case typeFloat32:
|
||||
return read[float32](f)
|
||||
case typeFloat64:
|
||||
return read[float64](f)
|
||||
case typeBool:
|
||||
return read[bool](f)
|
||||
case typeString:
|
||||
return readString(f)
|
||||
case typeArray:
|
||||
return readArray(f)
|
||||
default:
|
||||
return nil, fmt.Errorf("%w type %d", ErrUnsupported, t)
|
||||
}
|
||||
}()
|
||||
if err != nil {
|
||||
return KeyValue{}, err
|
||||
}
|
||||
|
||||
return KeyValue{
|
||||
Key: key,
|
||||
Value: Value{value},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func read[T any](f *File) (t T, err error) {
|
||||
err = binary.Read(f.reader, binary.LittleEndian, &t)
|
||||
return t, err
|
||||
}
|
||||
|
||||
func readString(f *File) (string, error) {
|
||||
n, err := read[uint64](f)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if int(n) > len(f.bts) {
|
||||
f.bts = make([]byte, n)
|
||||
}
|
||||
|
||||
bts := f.bts[:n]
|
||||
if _, err := io.ReadFull(f.reader, bts); err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer clear(bts)
|
||||
|
||||
return string(bts), nil
|
||||
}
|
||||
|
||||
func readArray(f *File) (any, error) {
|
||||
t, err := read[uint32](f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
n, err := read[uint64](f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch t {
|
||||
case typeUint8:
|
||||
return readArrayData[uint8](f, n)
|
||||
case typeInt8:
|
||||
return readArrayData[int8](f, n)
|
||||
case typeUint16:
|
||||
return readArrayData[uint16](f, n)
|
||||
case typeInt16:
|
||||
return readArrayData[int16](f, n)
|
||||
case typeUint32:
|
||||
return readArrayData[uint32](f, n)
|
||||
case typeInt32:
|
||||
return readArrayData[int32](f, n)
|
||||
case typeUint64:
|
||||
return readArrayData[uint64](f, n)
|
||||
case typeInt64:
|
||||
return readArrayData[int64](f, n)
|
||||
case typeFloat32:
|
||||
return readArrayData[float32](f, n)
|
||||
case typeFloat64:
|
||||
return readArrayData[float64](f, n)
|
||||
case typeBool:
|
||||
return readArrayData[bool](f, n)
|
||||
case typeString:
|
||||
return readArrayString(f, n)
|
||||
default:
|
||||
return nil, fmt.Errorf("%w type %d", ErrUnsupported, t)
|
||||
}
|
||||
}
|
||||
|
||||
func readArrayData[T any](f *File, n uint64) (s []T, err error) {
|
||||
s = make([]T, n)
|
||||
for i := range n {
|
||||
e, err := read[T](f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s[i] = e
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func readArrayString(f *File, n uint64) (s []string, err error) {
|
||||
s = make([]string, n)
|
||||
for i := range n {
|
||||
e, err := readString(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s[i] = e
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (f *File) Close() error {
|
||||
f.keyValues.stop()
|
||||
f.tensors.stop()
|
||||
return f.file.Close()
|
||||
}
|
||||
|
||||
func (f *File) KeyValue(key string) KeyValue {
|
||||
if !strings.HasPrefix(key, "general.") && !strings.HasPrefix(key, "tokenizer.") {
|
||||
key = f.KeyValue("general.architecture").String() + "." + key
|
||||
}
|
||||
|
||||
if index := slices.IndexFunc(f.keyValues.values, func(kv KeyValue) bool {
|
||||
return kv.Key == key
|
||||
}); index >= 0 {
|
||||
return f.keyValues.values[index]
|
||||
}
|
||||
|
||||
for keyValue, ok := f.keyValues.next(); ok; keyValue, ok = f.keyValues.next() {
|
||||
if keyValue.Key == key {
|
||||
return keyValue
|
||||
}
|
||||
}
|
||||
|
||||
return KeyValue{}
|
||||
}
|
||||
|
||||
func (f *File) NumKeyValues() int {
|
||||
return int(f.keyValues.count)
|
||||
}
|
||||
|
||||
func (f *File) KeyValues() iter.Seq2[int, KeyValue] {
|
||||
return f.keyValues.All()
|
||||
}
|
||||
|
||||
func (f *File) TensorInfo(name string) TensorInfo {
|
||||
if index := slices.IndexFunc(f.tensors.values, func(t TensorInfo) bool {
|
||||
return t.Name == name
|
||||
}); index >= 0 {
|
||||
return f.tensors.values[index]
|
||||
}
|
||||
|
||||
// fast-forward through key values if we haven't already
|
||||
_ = f.keyValues.rest()
|
||||
for tensor, ok := f.tensors.next(); ok; tensor, ok = f.tensors.next() {
|
||||
if tensor.Name == name {
|
||||
return tensor
|
||||
}
|
||||
}
|
||||
|
||||
return TensorInfo{}
|
||||
}
|
||||
|
||||
func (f *File) NumTensors() int {
|
||||
return int(f.tensors.count)
|
||||
}
|
||||
|
||||
func (f *File) TensorInfos() iter.Seq2[int, TensorInfo] {
|
||||
// fast forward through key values if we haven't already
|
||||
f.keyValues.rest()
|
||||
return f.tensors.All()
|
||||
}
|
||||
|
||||
func (f *File) TensorReader(name string) (TensorInfo, io.Reader, error) {
|
||||
t := f.TensorInfo(name)
|
||||
if t.NumBytes() == 0 {
|
||||
return TensorInfo{}, nil, fmt.Errorf("tensor %s not found", name)
|
||||
}
|
||||
|
||||
// fast forward through tensor info if we haven't already
|
||||
_ = f.tensors.rest()
|
||||
return t, io.NewSectionReader(f.file, f.offset+int64(t.Offset), t.NumBytes()), nil
|
||||
}
|
||||
249
fs/gguf/gguf_test.go
Normal file
249
fs/gguf/gguf_test.go
Normal file
@@ -0,0 +1,249 @@
|
||||
package gguf_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
"github.com/ollama/ollama/fs/gguf"
|
||||
)
|
||||
|
||||
func createBinFile(tb testing.TB) string {
|
||||
tb.Helper()
|
||||
f, err := os.CreateTemp(tb.TempDir(), "")
|
||||
if err != nil {
|
||||
tb.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
kv := ggml.KV{
|
||||
"general.architecture": "llama",
|
||||
"llama.block_count": uint32(8),
|
||||
"llama.embedding_length": uint32(3),
|
||||
"llama.attention.head_count": uint32(2),
|
||||
"llama.attention.head_count_kv": uint32(2),
|
||||
"llama.attention.key_length": uint32(3),
|
||||
"llama.rope.dimension_count": uint32(4),
|
||||
"llama.rope.freq_base": float32(10000.0),
|
||||
"llama.rope.freq_scale": float32(1.0),
|
||||
"llama.attention.layer_norm_rms_epsilon": float32(1e-6),
|
||||
"tokenizer.ggml.eos_token_id": uint32(0),
|
||||
"tokenizer.ggml.eos_token_ids": []int32{1, 2, 3},
|
||||
"tokenizer.ggml.tokens": []string{"hello", "world"},
|
||||
"tokenizer.ggml.scores": []float32{0, 1},
|
||||
}
|
||||
|
||||
tensors := []*ggml.Tensor{
|
||||
{
|
||||
Name: "token_embd.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{2, 3},
|
||||
WriterTo: bytes.NewBuffer(make([]byte, 4*2*3)),
|
||||
},
|
||||
{
|
||||
Name: "output.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{3, 2},
|
||||
WriterTo: bytes.NewBuffer(make([]byte, 4*3*2)),
|
||||
},
|
||||
}
|
||||
|
||||
for i := range 8 {
|
||||
tensors = append(tensors, &ggml.Tensor{
|
||||
Name: "blk." + strconv.Itoa(i) + ".attn_q.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{3, 3},
|
||||
WriterTo: bytes.NewBuffer(make([]byte, 4*3*3)),
|
||||
}, &ggml.Tensor{
|
||||
Name: "blk." + strconv.Itoa(i) + ".attn_k.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{3, 3},
|
||||
WriterTo: bytes.NewBuffer(make([]byte, 4*3*3)),
|
||||
}, &ggml.Tensor{
|
||||
Name: "blk." + strconv.Itoa(i) + ".attn_v.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{3, 3},
|
||||
WriterTo: bytes.NewBuffer(make([]byte, 4*3*3)),
|
||||
}, &ggml.Tensor{
|
||||
Name: "blk." + strconv.Itoa(i) + ".attn_output.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{3, 3},
|
||||
WriterTo: bytes.NewBuffer(make([]byte, 4*3*3)),
|
||||
})
|
||||
}
|
||||
|
||||
if err := ggml.WriteGGUF(f, kv, tensors); err != nil {
|
||||
tb.Fatal(err)
|
||||
}
|
||||
|
||||
return f.Name()
|
||||
}
|
||||
|
||||
func TestRead(t *testing.T) {
|
||||
f, err := gguf.Open(createBinFile(t))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if got := f.KeyValue("does.not.exist").Valid(); got {
|
||||
t.Errorf(`KeyValue("does.not.exist").Exists() = %v, want false`, got)
|
||||
}
|
||||
|
||||
if got := f.KeyValue("general.architecture").String(); got != "llama" {
|
||||
t.Errorf(`KeyValue("general.architecture").String() = %q, want %q`, got, "llama")
|
||||
}
|
||||
|
||||
if got := f.TensorInfo("token_embd.weight"); got.Name != "token_embd.weight" {
|
||||
t.Errorf(`TensorInfo("token_embd.weight").Name = %q, want %q`, got.Name, "token_embd.weight")
|
||||
} else if diff := cmp.Diff(got.Shape, []uint64{2, 3}); diff != "" {
|
||||
t.Errorf(`TensorInfo("token_embd.weight").Shape mismatch (-got +want):\n%s`, diff)
|
||||
} else if got.Type != gguf.TensorTypeF32 {
|
||||
t.Errorf(`TensorInfo("token_embd.weight").Type = %d, want %d`, got.Type, gguf.TensorTypeF32)
|
||||
}
|
||||
|
||||
if got := f.KeyValue("block_count").Uint(); got != 8 {
|
||||
t.Errorf(`KeyValue("block_count").Uint() = %d, want %d`, got, 8)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(f.KeyValue("tokenizer.ggml.tokens").Strings(), []string{"hello", "world"}); diff != "" {
|
||||
t.Errorf("KeyValue(\"tokenizer.ggml.tokens\").Strings() mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(f.KeyValue("tokenizer.ggml.scores").Floats(), []float64{0, 1}); diff != "" {
|
||||
t.Errorf("KeyValue(\"tokenizer.ggml.scores\").Ints() mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
var kvs []string
|
||||
for _, kv := range f.KeyValues() {
|
||||
if !kv.Valid() {
|
||||
t.Error("found invalid key-value pair:", kv)
|
||||
}
|
||||
|
||||
kvs = append(kvs, kv.Key)
|
||||
}
|
||||
|
||||
if len(kvs) != f.NumKeyValues() {
|
||||
t.Errorf("iterated key count = %d, want %d", len(kvs), f.NumKeyValues())
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(kvs, []string{
|
||||
"general.architecture",
|
||||
"llama.block_count",
|
||||
"llama.embedding_length",
|
||||
"llama.attention.head_count",
|
||||
"llama.attention.head_count_kv",
|
||||
"llama.attention.key_length",
|
||||
"llama.rope.dimension_count",
|
||||
"llama.rope.freq_base",
|
||||
"llama.rope.freq_scale",
|
||||
"llama.attention.layer_norm_rms_epsilon",
|
||||
"tokenizer.ggml.eos_token_id",
|
||||
"tokenizer.ggml.eos_token_ids",
|
||||
"tokenizer.ggml.tokens",
|
||||
"tokenizer.ggml.scores",
|
||||
}, cmpopts.SortSlices(strings.Compare)); diff != "" {
|
||||
t.Errorf("KeyValues() mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
var tis []string
|
||||
for _, ti := range f.TensorInfos() {
|
||||
if !ti.Valid() {
|
||||
t.Error("found invalid tensor info:", ti)
|
||||
}
|
||||
|
||||
tis = append(tis, ti.Name)
|
||||
}
|
||||
|
||||
if len(tis) != f.NumTensors() {
|
||||
t.Errorf("iterated tensor count = %d, want %d", len(tis), f.NumTensors())
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(tis, []string{
|
||||
"token_embd.weight",
|
||||
"output.weight",
|
||||
"blk.0.attn_q.weight",
|
||||
"blk.0.attn_k.weight",
|
||||
"blk.0.attn_v.weight",
|
||||
"blk.0.attn_output.weight",
|
||||
"blk.1.attn_q.weight",
|
||||
"blk.1.attn_k.weight",
|
||||
"blk.1.attn_v.weight",
|
||||
"blk.1.attn_output.weight",
|
||||
"blk.2.attn_q.weight",
|
||||
"blk.2.attn_k.weight",
|
||||
"blk.2.attn_v.weight",
|
||||
"blk.2.attn_output.weight",
|
||||
"blk.3.attn_q.weight",
|
||||
"blk.3.attn_k.weight",
|
||||
"blk.3.attn_v.weight",
|
||||
"blk.3.attn_output.weight",
|
||||
"blk.4.attn_q.weight",
|
||||
"blk.4.attn_k.weight",
|
||||
"blk.4.attn_v.weight",
|
||||
"blk.4.attn_output.weight",
|
||||
"blk.5.attn_q.weight",
|
||||
"blk.5.attn_k.weight",
|
||||
"blk.5.attn_v.weight",
|
||||
"blk.5.attn_output.weight",
|
||||
"blk.6.attn_q.weight",
|
||||
"blk.6.attn_k.weight",
|
||||
"blk.6.attn_v.weight",
|
||||
"blk.6.attn_output.weight",
|
||||
"blk.7.attn_q.weight",
|
||||
"blk.7.attn_k.weight",
|
||||
"blk.7.attn_v.weight",
|
||||
"blk.7.attn_output.weight",
|
||||
}, cmpopts.SortSlices(strings.Compare)); diff != "" {
|
||||
t.Errorf("TensorInfos() mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
ti, r, err := f.TensorReader("output.weight")
|
||||
if err != nil {
|
||||
t.Fatalf(`TensorReader("output.weight") error: %v`, err)
|
||||
}
|
||||
|
||||
if ti.Name != "output.weight" {
|
||||
t.Errorf(`TensorReader("output.weight").Name = %q, want %q`, ti.Name, "output.weight")
|
||||
} else if diff := cmp.Diff(ti.Shape, []uint64{3, 2}); diff != "" {
|
||||
t.Errorf(`TensorReader("output.weight").Shape mismatch (-got +want):\n%s`, diff)
|
||||
} else if ti.Type != gguf.TensorTypeF32 {
|
||||
t.Errorf(`TensorReader("output.weight").Type = %d, want %d`, ti.Type, gguf.TensorTypeF32)
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
if _, err := b.ReadFrom(r); err != nil {
|
||||
t.Fatalf(`ReadFrom TensorReader("output.weight") error: %v`, err)
|
||||
}
|
||||
|
||||
if b.Len() != int(ti.NumBytes()) {
|
||||
t.Errorf(`ReadFrom TensorReader("output.weight") length = %d, want %d`, b.Len(), ti.NumBytes())
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRead(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
p := createBinFile(b)
|
||||
for b.Loop() {
|
||||
f, err := gguf.Open(p)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
if got := f.KeyValue("general.architecture").String(); got != "llama" {
|
||||
b.Errorf("got = %q, want %q", got, "llama")
|
||||
}
|
||||
|
||||
// Iterate through some tensors
|
||||
for range f.TensorInfos() {
|
||||
}
|
||||
|
||||
f.Close()
|
||||
}
|
||||
}
|
||||
90
fs/gguf/keyvalue.go
Normal file
90
fs/gguf/keyvalue.go
Normal file
@@ -0,0 +1,90 @@
|
||||
package gguf
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"slices"
|
||||
)
|
||||
|
||||
type KeyValue struct {
|
||||
Key string
|
||||
Value
|
||||
}
|
||||
|
||||
func (kv KeyValue) Valid() bool {
|
||||
return kv.Key != "" && kv.Value.value != nil
|
||||
}
|
||||
|
||||
type Value struct {
|
||||
value any
|
||||
}
|
||||
|
||||
func value[T any](v Value, kinds ...reflect.Kind) (t T) {
|
||||
vv := reflect.ValueOf(v.value)
|
||||
if slices.Contains(kinds, vv.Kind()) {
|
||||
t = vv.Convert(reflect.TypeOf(t)).Interface().(T)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func values[T any](v Value, kinds ...reflect.Kind) (ts []T) {
|
||||
switch vv := reflect.ValueOf(v.value); vv.Kind() {
|
||||
case reflect.Slice:
|
||||
if slices.Contains(kinds, vv.Type().Elem().Kind()) {
|
||||
ts = make([]T, vv.Len())
|
||||
for i := range vv.Len() {
|
||||
ts[i] = vv.Index(i).Convert(reflect.TypeOf(ts[i])).Interface().(T)
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Int returns Value as a signed integer. If it is not a signed integer, it returns 0.
|
||||
func (v Value) Int() int64 {
|
||||
return value[int64](v, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64)
|
||||
}
|
||||
|
||||
// Ints returns Value as a signed integer slice. If it is not a signed integer slice, it returns nil.
|
||||
func (v Value) Ints() (i64s []int64) {
|
||||
return values[int64](v, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64)
|
||||
}
|
||||
|
||||
// Uint converts an unsigned integer value to uint64. If the value is not a unsigned integer, it returns 0.
|
||||
func (v Value) Uint() uint64 {
|
||||
return value[uint64](v, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64)
|
||||
}
|
||||
|
||||
// Uints returns Value as a unsigned integer slice. If it is not a unsigned integer slice, it returns nil.
|
||||
func (v Value) Uints() (u64s []uint64) {
|
||||
return values[uint64](v, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64)
|
||||
}
|
||||
|
||||
// Float returns Value as a float. If it is not a float, it returns 0.
|
||||
func (v Value) Float() float64 {
|
||||
return value[float64](v, reflect.Float32, reflect.Float64)
|
||||
}
|
||||
|
||||
// Floats returns Value as a float slice. If it is not a float slice, it returns nil.
|
||||
func (v Value) Floats() (f64s []float64) {
|
||||
return values[float64](v, reflect.Float32, reflect.Float64)
|
||||
}
|
||||
|
||||
// Bool returns Value as a boolean. If it is not a boolean, it returns false.
|
||||
func (v Value) Bool() bool {
|
||||
return value[bool](v, reflect.Bool)
|
||||
}
|
||||
|
||||
// Bools returns Value as a boolean slice. If it is not a boolean slice, it returns nil.
|
||||
func (v Value) Bools() (bools []bool) {
|
||||
return values[bool](v, reflect.Bool)
|
||||
}
|
||||
|
||||
// String returns Value as a string. If it is not a string, it returns an empty string.
|
||||
func (v Value) String() string {
|
||||
return value[string](v, reflect.String)
|
||||
}
|
||||
|
||||
// Strings returns Value as a string slice. If it is not a string slice, it returns nil.
|
||||
func (v Value) Strings() (strings []string) {
|
||||
return values[string](v, reflect.String)
|
||||
}
|
||||
208
fs/gguf/keyvalue_test.go
Normal file
208
fs/gguf/keyvalue_test.go
Normal file
@@ -0,0 +1,208 @@
|
||||
package gguf
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func split(name string, values map[string][]any) (matched []any, unmatched []any) {
|
||||
for key, value := range values {
|
||||
if key == name {
|
||||
matched = value
|
||||
} else {
|
||||
unmatched = append(unmatched, value...)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func TestValue(t *testing.T) {
|
||||
values := map[string][]any{
|
||||
"int64": {int(42), int8(42), int16(42), int32(42), int64(42)},
|
||||
"uint64": {uint(42), uint8(42), uint16(42), uint32(42), uint64(42)},
|
||||
"float64": {float32(42), float64(42)},
|
||||
"string": {"42", "hello"},
|
||||
"bool": {true, false},
|
||||
}
|
||||
|
||||
t.Run("int64", func(t *testing.T) {
|
||||
matched, unmatched := split("int64", values)
|
||||
for _, v := range matched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if i64 := kv.Int(); i64 != 42 {
|
||||
t.Errorf("expected 42, got %d", i64)
|
||||
}
|
||||
}
|
||||
|
||||
for _, v := range unmatched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if i64 := kv.Int(); i64 != 0 {
|
||||
t.Errorf("expected 42, got %d", i64)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("uint64", func(t *testing.T) {
|
||||
matched, unmatched := split("uint64", values)
|
||||
for _, v := range matched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if u64 := kv.Uint(); u64 != 42 {
|
||||
t.Errorf("expected 42, got %d", u64)
|
||||
}
|
||||
}
|
||||
|
||||
for _, v := range unmatched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if u64 := kv.Uint(); u64 != 0 {
|
||||
t.Errorf("expected 42, got %d", u64)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("float64", func(t *testing.T) {
|
||||
matched, unmatched := split("float64", values)
|
||||
for _, v := range matched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if f64 := kv.Float(); f64 != 42 {
|
||||
t.Errorf("expected 42, got %f", f64)
|
||||
}
|
||||
}
|
||||
|
||||
for _, v := range unmatched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if f64 := kv.Float(); f64 != 0 {
|
||||
t.Errorf("expected 42, got %f", f64)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("string", func(t *testing.T) {
|
||||
matched, unmatched := split("string", values)
|
||||
for _, v := range matched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if s := kv.String(); s != v {
|
||||
t.Errorf("expected 42, got %s", s)
|
||||
}
|
||||
}
|
||||
|
||||
for _, v := range unmatched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if s := kv.String(); s != "" {
|
||||
t.Errorf("expected 42, got %s", s)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("bool", func(t *testing.T) {
|
||||
matched, unmatched := split("bool", values)
|
||||
for _, v := range matched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if b := kv.Bool(); b != v {
|
||||
t.Errorf("expected true, got %v", b)
|
||||
}
|
||||
}
|
||||
|
||||
for _, v := range unmatched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if b := kv.Bool(); b != false {
|
||||
t.Errorf("expected false, got %v", b)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestValues(t *testing.T) {
|
||||
values := map[string][]any{
|
||||
"int64s": {[]int{42}, []int8{42}, []int16{42}, []int32{42}, []int64{42}},
|
||||
"uint64s": {[]uint{42}, []uint8{42}, []uint16{42}, []uint32{42}, []uint64{42}},
|
||||
"float64s": {[]float32{42}, []float64{42}},
|
||||
"strings": {[]string{"42"}, []string{"hello"}},
|
||||
"bools": {[]bool{true}, []bool{false}},
|
||||
}
|
||||
|
||||
t.Run("int64s", func(t *testing.T) {
|
||||
matched, unmatched := split("int64s", values)
|
||||
for _, v := range matched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if diff := cmp.Diff(kv.Ints(), []int64{42}); diff != "" {
|
||||
t.Errorf("diff: %s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
for _, v := range unmatched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if i64s := kv.Ints(); i64s != nil {
|
||||
t.Errorf("expected nil, got %v", i64s)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("uint64s", func(t *testing.T) {
|
||||
matched, unmatched := split("uint64s", values)
|
||||
for _, v := range matched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if diff := cmp.Diff(kv.Uints(), []uint64{42}); diff != "" {
|
||||
t.Errorf("diff: %s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
for _, v := range unmatched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if u64s := kv.Uints(); u64s != nil {
|
||||
t.Errorf("expected nil, got %v", u64s)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("float64s", func(t *testing.T) {
|
||||
matched, unmatched := split("float64s", values)
|
||||
for _, v := range matched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if diff := cmp.Diff(kv.Floats(), []float64{42}); diff != "" {
|
||||
t.Errorf("diff: %s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
for _, v := range unmatched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if f64s := kv.Floats(); f64s != nil {
|
||||
t.Errorf("expected nil, got %v", f64s)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("strings", func(t *testing.T) {
|
||||
matched, unmatched := split("strings", values)
|
||||
for _, v := range matched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if diff := cmp.Diff(kv.Strings(), v); diff != "" {
|
||||
t.Errorf("diff: %s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
for _, v := range unmatched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if s := kv.Strings(); s != nil {
|
||||
t.Errorf("expected nil, got %v", s)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("bools", func(t *testing.T) {
|
||||
matched, unmatched := split("bools", values)
|
||||
for _, v := range matched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if diff := cmp.Diff(kv.Bools(), v); diff != "" {
|
||||
t.Errorf("diff: %s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
for _, v := range unmatched {
|
||||
kv := KeyValue{"key", Value{v}}
|
||||
if b := kv.Bools(); b != nil {
|
||||
t.Errorf("expected nil, got %v", b)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
89
fs/gguf/lazy.go
Normal file
89
fs/gguf/lazy.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package gguf
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"iter"
|
||||
"log/slog"
|
||||
)
|
||||
|
||||
type lazy[T any] struct {
|
||||
count uint64
|
||||
next func() (T, bool)
|
||||
stop func()
|
||||
values []T
|
||||
|
||||
// successFunc is called when all values have been successfully read.
|
||||
successFunc func() error
|
||||
}
|
||||
|
||||
func newLazy[T any](f *File, fn func() (T, error)) (*lazy[T], error) {
|
||||
it := lazy[T]{}
|
||||
if err := binary.Read(f.reader, binary.LittleEndian, &it.count); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
it.values = make([]T, 0)
|
||||
it.next, it.stop = iter.Pull(func(yield func(T) bool) {
|
||||
for i := range it.count {
|
||||
t, err := fn()
|
||||
if err != nil {
|
||||
slog.Error("error reading tensor", "index", i, "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
it.values = append(it.values, t)
|
||||
if !yield(t) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if it.successFunc != nil {
|
||||
it.successFunc()
|
||||
}
|
||||
})
|
||||
|
||||
return &it, nil
|
||||
}
|
||||
|
||||
func (g *lazy[T]) Values() iter.Seq[T] {
|
||||
return func(yield func(T) bool) {
|
||||
for _, v := range g.All() {
|
||||
if !yield(v) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (g *lazy[T]) All() iter.Seq2[int, T] {
|
||||
return func(yield func(int, T) bool) {
|
||||
for i := range int(g.count) {
|
||||
if i < len(g.values) {
|
||||
if !yield(i, g.values[i]) {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
t, ok := g.next()
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
|
||||
if !yield(i, t) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (g *lazy[T]) rest() (collected bool) {
|
||||
for {
|
||||
_, ok := g.next()
|
||||
collected = collected || ok
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return collected
|
||||
}
|
||||
23
fs/gguf/reader.go
Normal file
23
fs/gguf/reader.go
Normal file
@@ -0,0 +1,23 @@
|
||||
package gguf
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
)
|
||||
|
||||
type bufferedReader struct {
|
||||
offset int64
|
||||
*bufio.Reader
|
||||
}
|
||||
|
||||
func newBufferedReader(rs io.ReadSeeker, size int) *bufferedReader {
|
||||
return &bufferedReader{
|
||||
Reader: bufio.NewReaderSize(rs, size),
|
||||
}
|
||||
}
|
||||
|
||||
func (rs *bufferedReader) Read(p []byte) (n int, err error) {
|
||||
n, err = rs.Reader.Read(p)
|
||||
rs.offset += int64(n)
|
||||
return n, err
|
||||
}
|
||||
288
fs/gguf/tensor.go
Normal file
288
fs/gguf/tensor.go
Normal file
@@ -0,0 +1,288 @@
|
||||
package gguf
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type TensorInfo struct {
|
||||
Name string
|
||||
Offset uint64
|
||||
Shape []uint64
|
||||
Type TensorType
|
||||
}
|
||||
|
||||
func (ti TensorInfo) Valid() bool {
|
||||
return ti.Name != "" && ti.NumBytes() > 0
|
||||
}
|
||||
|
||||
func (ti TensorInfo) NumValues() int64 {
|
||||
var numItems int64 = 1
|
||||
for _, dim := range ti.Shape {
|
||||
numItems *= int64(dim)
|
||||
}
|
||||
return numItems
|
||||
}
|
||||
|
||||
// NumBytes returns the number of bytes in the tensor.
|
||||
func (ti TensorInfo) NumBytes() int64 {
|
||||
return int64(float64(ti.NumValues()) * ti.Type.NumBytes())
|
||||
}
|
||||
|
||||
func (ti TensorInfo) LogValue() slog.Value {
|
||||
return slog.GroupValue(
|
||||
slog.String("name", ti.Name),
|
||||
slog.Int64("offset", int64(ti.Offset)),
|
||||
slog.Any("shape", ti.Shape),
|
||||
slog.Int64("num_values", ti.NumValues()),
|
||||
slog.Int64("num_bytes", ti.NumBytes()),
|
||||
slog.Any("type", ti.Type),
|
||||
)
|
||||
}
|
||||
|
||||
type TensorType uint32
|
||||
|
||||
const (
|
||||
TensorTypeF32 TensorType = iota
|
||||
TensorTypeF16
|
||||
TensorTypeQ4_0
|
||||
TensorTypeQ4_1
|
||||
|
||||
// unexported // unused in gguf
|
||||
tensorTypeQ4_2
|
||||
tensorTypeQ4_3
|
||||
|
||||
TensorTypeQ5_0
|
||||
TensorTypeQ5_1
|
||||
TensorTypeQ8_0
|
||||
TensorTypeQ8_1
|
||||
TensorTypeQ2_K
|
||||
TensorTypeQ3_K
|
||||
TensorTypeQ4_K
|
||||
TensorTypeQ5_K
|
||||
TensorTypeQ6_K
|
||||
TensorTypeQ8_K
|
||||
|
||||
// unexported // unquantizable by ollama
|
||||
tensorTypeIQ2_XXS
|
||||
tensorTypeIQ2_XS
|
||||
tensorTypeIQ3_XXS
|
||||
tensorTypeIQ1_S
|
||||
tensorTypeIQ4_NL
|
||||
tensorTypeIQ3_S
|
||||
tensorTypeIQ2_S
|
||||
tensorTypeIQ4_XS
|
||||
|
||||
TensorTypeI8
|
||||
TensorTypeI16
|
||||
TensorTypeI32
|
||||
TensorTypeI64
|
||||
TensorTypeF64
|
||||
|
||||
// unexported // unquantizable by ollama
|
||||
tensorTypeIQ1_M
|
||||
|
||||
TensorTypeBF16
|
||||
|
||||
// unexported // unused in gguf
|
||||
tensorTypeQ4_0_4_4
|
||||
tensorTypeQ4_0_4_8
|
||||
tensorTypeQ4_0_8_8
|
||||
|
||||
// unexported // unquantizable by ollama
|
||||
tensorTypeTQ1_0
|
||||
tensorTypeTQ2_0
|
||||
|
||||
// unexported // unused in gguf
|
||||
tensorTypeIQ4_NL_4_4
|
||||
tensorTypeIQ4_NL_4_8
|
||||
tensorTypeIQ4_NL_8_8
|
||||
)
|
||||
|
||||
func (tt TensorType) NumBytes() float64 {
|
||||
return float64(tt.typeSize()) / float64(tt.blockSize())
|
||||
}
|
||||
|
||||
func (tt TensorType) typeSize() int64 {
|
||||
switch tt {
|
||||
case TensorTypeF32:
|
||||
return 4
|
||||
case TensorTypeF16:
|
||||
return 2
|
||||
case TensorTypeQ4_0:
|
||||
return 2 + tt.blockSize()/2
|
||||
case TensorTypeQ4_1:
|
||||
return 2 + 2 + tt.blockSize()/2
|
||||
case TensorTypeQ5_0:
|
||||
return 2 + 4 + tt.blockSize()/2
|
||||
case TensorTypeQ5_1:
|
||||
return 2 + 2 + 4 + tt.blockSize()/2
|
||||
case TensorTypeQ8_0:
|
||||
return 2 + tt.blockSize()
|
||||
case TensorTypeQ8_1:
|
||||
return 2 + 2 + tt.blockSize()
|
||||
case TensorTypeQ2_K:
|
||||
return tt.blockSize()/16 + tt.blockSize()/4 + 2 + 2
|
||||
case TensorTypeQ3_K:
|
||||
return tt.blockSize()/8 + tt.blockSize()/4 + 12 + 2
|
||||
case TensorTypeQ4_K:
|
||||
return 2 + 2 + 12 + tt.blockSize()/2
|
||||
case TensorTypeQ5_K:
|
||||
return 2 + 2 + 12 + tt.blockSize()/8 + tt.blockSize()/2
|
||||
case TensorTypeQ6_K:
|
||||
return tt.blockSize()/2 + tt.blockSize()/4 + tt.blockSize()/16 + 2
|
||||
case TensorTypeQ8_K:
|
||||
return 4 + tt.blockSize() + 2*tt.blockSize()/16
|
||||
case tensorTypeIQ2_XXS:
|
||||
return 2 + 2*tt.blockSize()/8
|
||||
case tensorTypeIQ2_XS:
|
||||
return 2 + 2*tt.blockSize()/8 + tt.blockSize()/32
|
||||
case tensorTypeIQ3_XXS:
|
||||
return 2 + tt.blockSize()/4 + tt.blockSize()/8
|
||||
case tensorTypeIQ1_S:
|
||||
return 2 + tt.blockSize()/8 + tt.blockSize()/16
|
||||
case tensorTypeIQ4_NL:
|
||||
return 2 + tt.blockSize()/2
|
||||
case tensorTypeIQ3_S:
|
||||
return 2 + tt.blockSize()/4 + tt.blockSize()/8 + tt.blockSize()/32 + 4
|
||||
case tensorTypeIQ2_S:
|
||||
return 2 + tt.blockSize()/4 + tt.blockSize()/16
|
||||
case tensorTypeIQ4_XS:
|
||||
return 2 + 2 + tt.blockSize()/2 + tt.blockSize()/64
|
||||
case TensorTypeI8:
|
||||
return 1
|
||||
case TensorTypeI16:
|
||||
return 2
|
||||
case TensorTypeI32:
|
||||
return 4
|
||||
case TensorTypeI64:
|
||||
return 8
|
||||
case TensorTypeF64:
|
||||
return 8
|
||||
case tensorTypeIQ1_M:
|
||||
return tt.blockSize()/8 + tt.blockSize()/16 + tt.blockSize()/32
|
||||
case TensorTypeBF16:
|
||||
return 2
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func (tt TensorType) blockSize() int64 {
|
||||
switch tt {
|
||||
case TensorTypeF32,
|
||||
TensorTypeF16,
|
||||
TensorTypeI8,
|
||||
TensorTypeI16,
|
||||
TensorTypeI32,
|
||||
TensorTypeI64,
|
||||
TensorTypeF64,
|
||||
TensorTypeBF16:
|
||||
return 1
|
||||
case TensorTypeQ4_0,
|
||||
TensorTypeQ4_1,
|
||||
TensorTypeQ5_0,
|
||||
TensorTypeQ5_1,
|
||||
TensorTypeQ8_0,
|
||||
TensorTypeQ8_1,
|
||||
tensorTypeIQ4_NL:
|
||||
return 32
|
||||
default:
|
||||
return 256
|
||||
}
|
||||
}
|
||||
|
||||
func (tt TensorType) String() string {
|
||||
switch tt {
|
||||
case TensorTypeF32:
|
||||
return "f32"
|
||||
case TensorTypeF16:
|
||||
return "f16"
|
||||
case TensorTypeQ4_0:
|
||||
return "q4_0"
|
||||
case TensorTypeQ4_1:
|
||||
return "q4_1"
|
||||
case tensorTypeQ4_2:
|
||||
return "q4_2"
|
||||
case tensorTypeQ4_3:
|
||||
return "q4_3"
|
||||
case TensorTypeQ5_0:
|
||||
return "q5_0"
|
||||
case TensorTypeQ5_1:
|
||||
return "q5_1"
|
||||
case TensorTypeQ8_0:
|
||||
return "q8_0"
|
||||
case TensorTypeQ8_1:
|
||||
return "q8_1"
|
||||
case TensorTypeQ2_K:
|
||||
return "q2_k"
|
||||
case TensorTypeQ3_K:
|
||||
return "q3_k"
|
||||
case TensorTypeQ4_K:
|
||||
return "q4_k"
|
||||
case TensorTypeQ5_K:
|
||||
return "q5_k"
|
||||
case TensorTypeQ6_K:
|
||||
return "q6_k"
|
||||
case TensorTypeQ8_K:
|
||||
return "q8_k"
|
||||
case tensorTypeIQ2_XXS:
|
||||
return "iq2_xxs"
|
||||
case tensorTypeIQ2_XS:
|
||||
return "iq2_xs"
|
||||
case tensorTypeIQ3_XXS:
|
||||
return "iq3_xxs"
|
||||
case tensorTypeIQ1_S:
|
||||
return "iq1_s"
|
||||
case tensorTypeIQ4_NL:
|
||||
return "iq4_nl"
|
||||
case tensorTypeIQ3_S:
|
||||
return "iq3_s"
|
||||
case tensorTypeIQ2_S:
|
||||
return "iq2_s"
|
||||
case tensorTypeIQ4_XS:
|
||||
return "iq4_xs"
|
||||
case TensorTypeI8:
|
||||
return "i8"
|
||||
case TensorTypeI16:
|
||||
return "i16"
|
||||
case TensorTypeI32:
|
||||
return "i32"
|
||||
case TensorTypeI64:
|
||||
return "i64"
|
||||
case TensorTypeF64:
|
||||
return "f64"
|
||||
case tensorTypeIQ1_M:
|
||||
return "iq1_m"
|
||||
case TensorTypeBF16:
|
||||
return "bf16"
|
||||
case tensorTypeQ4_0_4_4:
|
||||
return "q4_0_4_4"
|
||||
case tensorTypeQ4_0_4_8:
|
||||
return "q4_0_4_8"
|
||||
case tensorTypeQ4_0_8_8:
|
||||
return "q4_0_8_8"
|
||||
case tensorTypeTQ1_0:
|
||||
return "tq1_0"
|
||||
case tensorTypeTQ2_0:
|
||||
return "tq2_0"
|
||||
case tensorTypeIQ4_NL_4_4:
|
||||
return "iq4_nl_4_4"
|
||||
case tensorTypeIQ4_NL_4_8:
|
||||
return "iq4_nl_4_8"
|
||||
case tensorTypeIQ4_NL_8_8:
|
||||
return "iq4_nl_8_8"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
func (tt TensorType) LogValue() slog.Value {
|
||||
return slog.GroupValue(
|
||||
slog.Uint64("value", uint64(tt)),
|
||||
slog.String("name", strings.ToUpper(tt.String())),
|
||||
slog.Int64("size", tt.typeSize()),
|
||||
slog.Int64("block_size", tt.blockSize()),
|
||||
slog.Float64("num_bytes", tt.NumBytes()),
|
||||
)
|
||||
}
|
||||
34
fs/util/bufioutil/buffer_seeker.go
Normal file
34
fs/util/bufioutil/buffer_seeker.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package bufioutil
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
)
|
||||
|
||||
type BufferedSeeker struct {
|
||||
rs io.ReadSeeker
|
||||
br *bufio.Reader
|
||||
}
|
||||
|
||||
func NewBufferedSeeker(rs io.ReadSeeker, size int) *BufferedSeeker {
|
||||
return &BufferedSeeker{
|
||||
rs: rs,
|
||||
br: bufio.NewReaderSize(rs, size),
|
||||
}
|
||||
}
|
||||
|
||||
func (b *BufferedSeeker) Read(p []byte) (int, error) {
|
||||
return b.br.Read(p)
|
||||
}
|
||||
|
||||
func (b *BufferedSeeker) Seek(offset int64, whence int) (int64, error) {
|
||||
if whence == io.SeekCurrent {
|
||||
offset -= int64(b.br.Buffered())
|
||||
}
|
||||
n, err := b.rs.Seek(offset, whence)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
b.br.Reset(b.rs)
|
||||
return n, nil
|
||||
}
|
||||
64
fs/util/bufioutil/buffer_seeker_test.go
Normal file
64
fs/util/bufioutil/buffer_seeker_test.go
Normal file
@@ -0,0 +1,64 @@
|
||||
package bufioutil
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBufferedSeeker(t *testing.T) {
|
||||
const alphabet = "abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
bs := NewBufferedSeeker(strings.NewReader(alphabet), 0) // minReadBufferSize = 16
|
||||
|
||||
checkRead := func(buf []byte, expected string) {
|
||||
t.Helper()
|
||||
_, err := bs.Read(buf)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !bytes.Equal(buf, []byte(expected)) {
|
||||
t.Fatalf("expected %s, got %s", expected, buf)
|
||||
}
|
||||
}
|
||||
|
||||
// Read the first 5 bytes
|
||||
buf := make([]byte, 5)
|
||||
|
||||
checkRead(buf, "abcde")
|
||||
|
||||
// Seek back to the beginning
|
||||
_, err := bs.Seek(0, io.SeekStart)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// read 'a'
|
||||
checkRead(buf[:1], "a")
|
||||
|
||||
if bs.br.Buffered() == 0 {
|
||||
t.Fatalf("totally unexpected sanity check failed")
|
||||
}
|
||||
|
||||
// Seek past 'b'
|
||||
_, err = bs.Seek(1, io.SeekCurrent)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
checkRead(buf, "cdefg")
|
||||
|
||||
// Seek back to the beginning
|
||||
_, err = bs.Seek(0, io.SeekStart)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
checkRead(buf, "abcde")
|
||||
|
||||
// Seek to the end
|
||||
_, err = bs.Seek(-5, io.SeekEnd)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
checkRead(buf, "vwxyz")
|
||||
}
|
||||
Reference in New Issue
Block a user