ollama source for Momentry Core verification
This commit is contained in:
403
convert/convert.go
Normal file
403
convert/convert.go
Normal file
@@ -0,0 +1,403 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"iter"
|
||||
"log/slog"
|
||||
"maps"
|
||||
"os"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
ofs "github.com/ollama/ollama/fs"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type ModelParameters struct {
|
||||
Architectures []string `json:"architectures"`
|
||||
VocabSize uint32 `json:"vocab_size"`
|
||||
|
||||
// TODO is this needed?
|
||||
ModelType string `json:"model_type"`
|
||||
|
||||
TextModel struct {
|
||||
VocabSize uint32 `json:"vocab_size"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
ModelType string `json:"model_type"`
|
||||
} `json:"text_config"`
|
||||
}
|
||||
|
||||
type AdapterParameters struct {
|
||||
Alpha uint32 `json:"lora_alpha"`
|
||||
LoraLayers uint32 `json:"lora_layers"`
|
||||
LoraParameters struct {
|
||||
Rank uint32 `json:"rank"`
|
||||
Alpha float32 `json:"alpha"`
|
||||
Scale float32 `json:"scale"`
|
||||
} `json:"lora_parameters"`
|
||||
}
|
||||
|
||||
type KV map[string]any
|
||||
|
||||
func (kv KV) Architecture() string {
|
||||
return kv.String("general.architecture", "unknown")
|
||||
}
|
||||
|
||||
type valueTypes interface {
|
||||
uint8 | int8 | uint16 | int16 |
|
||||
uint32 | int32 | uint64 | int64 |
|
||||
string | float32 | float64 | bool
|
||||
}
|
||||
|
||||
type arrayValueTypes interface {
|
||||
[]uint8 | []int8 | []uint16 | []int16 |
|
||||
[]uint32 | []int32 | []uint64 | []int64 |
|
||||
[]string | []float32 | []float64 | []bool
|
||||
}
|
||||
|
||||
func keyValue[T valueTypes | arrayValueTypes](kv KV, key string, defaultValue ...T) (T, bool) {
|
||||
if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") {
|
||||
key = kv.Architecture() + "." + key
|
||||
}
|
||||
|
||||
if val, ok := kv[key].(T); ok {
|
||||
return val, true
|
||||
}
|
||||
return defaultValue[0], false
|
||||
}
|
||||
|
||||
func (kv KV) String(key string, defaultValue ...string) string {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, "")...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) Uint(key string, defaultValue ...uint32) uint32 {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, 0)...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) Float(key string, defaultValue ...float32) float32 {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, 0)...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) Bool(key string, defaultValue ...bool) bool {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, false)...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) Strings(key string, defaultValue ...[]string) []string {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, []string{""})...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) Ints(key string, defaultValue ...[]int32) []int32 {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, []int32{0})...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32 {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, []uint32{0})...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) Floats(key string, defaultValue ...[]float32) []float32 {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, []float32{0})...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) Bools(key string, defaultValue ...[]bool) []bool {
|
||||
val, _ := keyValue(kv, key, append(defaultValue, []bool{false})...)
|
||||
return val
|
||||
}
|
||||
|
||||
func (kv KV) Len() int {
|
||||
return len(kv)
|
||||
}
|
||||
|
||||
func (kv KV) Keys() iter.Seq[string] {
|
||||
return maps.Keys(kv)
|
||||
}
|
||||
|
||||
func (kv KV) Value(key string) any {
|
||||
return kv[key]
|
||||
}
|
||||
|
||||
func (ModelParameters) KV(t *Tokenizer) KV {
|
||||
kv := KV{
|
||||
"general.file_type": uint32(1),
|
||||
"general.quantization_version": uint32(2),
|
||||
"tokenizer.ggml.pre": t.Pre,
|
||||
"tokenizer.ggml.model": t.Vocabulary.Model,
|
||||
"tokenizer.ggml.tokens": t.Vocabulary.Tokens,
|
||||
"tokenizer.ggml.scores": t.Vocabulary.Scores,
|
||||
"tokenizer.ggml.token_type": t.Vocabulary.Types,
|
||||
}
|
||||
|
||||
if len(t.Merges) > 0 {
|
||||
kv["tokenizer.ggml.merges"] = t.Merges
|
||||
}
|
||||
|
||||
if t.Template != "" {
|
||||
kv["tokenizer.chat_template"] = t.Template
|
||||
}
|
||||
|
||||
for _, sv := range t.SpecialVocabulary {
|
||||
kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
|
||||
kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
|
||||
if len(sv.IDs) > 0 {
|
||||
kv[fmt.Sprintf("tokenizer.ggml.%s_token_ids", sv.Key())] = sv.IDs
|
||||
}
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p AdapterParameters) KV() KV {
|
||||
var alpha float32
|
||||
if p.LoraParameters.Alpha == 0 {
|
||||
alpha = float32(p.Alpha)
|
||||
} else {
|
||||
alpha = p.LoraParameters.Alpha
|
||||
}
|
||||
|
||||
kv := KV{
|
||||
"adapter.lora.alpha": alpha,
|
||||
"adapter.type": "lora",
|
||||
"general.file_type": uint32(1),
|
||||
"general.type": "adapter",
|
||||
"general.version": "v0.2",
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (ModelParameters) specialTokenTypes() []string {
|
||||
return []string{
|
||||
"bos", "eos", "unk", "sep", "pad", "cls", "mask",
|
||||
}
|
||||
}
|
||||
|
||||
type ModelKV interface {
|
||||
// KV maps parameters to LLM key-values
|
||||
KV(*Tokenizer) KV
|
||||
}
|
||||
|
||||
type ModelConverter interface {
|
||||
ModelKV
|
||||
|
||||
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
||||
Tensors([]Tensor) []*ggml.Tensor
|
||||
// Replacements returns a list of string pairs to replace in tensor names.
|
||||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||
Replacements() []string
|
||||
|
||||
// specialTokenTypes returns any special token types the model uses
|
||||
specialTokenTypes() []string
|
||||
}
|
||||
|
||||
type moreParser interface {
|
||||
parseMore(fs.FS) error
|
||||
}
|
||||
|
||||
type AdapterConverter interface {
|
||||
// KV maps parameters to LLM key-values
|
||||
KV(ofs.Config) KV
|
||||
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
|
||||
Tensors([]Tensor) []*ggml.Tensor
|
||||
// Replacements returns a list of string pairs to replace in tensor names.
|
||||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||
Replacements() []string
|
||||
}
|
||||
|
||||
func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ofs.Config) error {
|
||||
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var p AdapterParameters
|
||||
if err := json.Unmarshal(bts, &p); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
arch := baseKV.Architecture()
|
||||
if arch == "" {
|
||||
return errors.New("architecture not set for the base model")
|
||||
}
|
||||
|
||||
var conv AdapterConverter
|
||||
switch arch {
|
||||
case "llama":
|
||||
conv = &llamaAdapter{}
|
||||
case "gemma2":
|
||||
conv = &gemma2Adapter{}
|
||||
default:
|
||||
return errors.New("unsupported architecture")
|
||||
}
|
||||
|
||||
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, conv); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return writeFile(f, conv.KV(baseKV), conv.Tensors(ts))
|
||||
}
|
||||
|
||||
func LoadModelMetadata(fsys fs.FS) (ModelKV, *Tokenizer, error) {
|
||||
bts, err := fs.ReadFile(fsys, "config.json")
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
bts = sanitizeNonFiniteJSON(bts)
|
||||
|
||||
var p ModelParameters
|
||||
if err := json.Unmarshal(bts, &p); err != nil {
|
||||
return nil, nil, fmt.Errorf("parse config.json: %w", err)
|
||||
}
|
||||
|
||||
if len(p.Architectures) < 1 {
|
||||
return nil, nil, errors.New("unknown architecture")
|
||||
}
|
||||
|
||||
var conv ModelConverter
|
||||
switch p.Architectures[0] {
|
||||
case "LlamaForCausalLM":
|
||||
conv = &llamaModel{}
|
||||
case "MllamaForConditionalGeneration":
|
||||
conv = &mllamaModel{}
|
||||
case "Llama4ForConditionalGeneration":
|
||||
conv = &llama4Model{}
|
||||
case "Mistral3ForConditionalGeneration":
|
||||
conv = &mistral3Model{}
|
||||
case "Ministral3ForCausalLM":
|
||||
conv = &mistral3CausalModel{}
|
||||
case "MixtralForCausalLM":
|
||||
conv = &mixtralModel{}
|
||||
case "GemmaForCausalLM":
|
||||
conv = &gemmaModel{}
|
||||
case "Gemma2ForCausalLM":
|
||||
conv = &gemma2Model{}
|
||||
case "Gemma3ForCausalLM", "Gemma3ForConditionalGeneration":
|
||||
conv = &gemma3Model{Architecture: p.Architectures[0]}
|
||||
case "Gemma3nForConditionalGeneration":
|
||||
conv = &gemma3nModel{}
|
||||
case "Gemma4ForCausalLM", "Gemma4ForConditionalGeneration":
|
||||
conv = &gemma4Model{Architecture: p.Architectures[0]}
|
||||
case "Phi3ForCausalLM":
|
||||
conv = &phi3Model{}
|
||||
case "Qwen2ForCausalLM":
|
||||
conv = &qwen2Model{}
|
||||
case "Qwen2_5_VLForConditionalGeneration":
|
||||
conv = &qwen25VLModel{}
|
||||
case "Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration":
|
||||
conv = &qwen3VLModel{}
|
||||
case "Olmo3ForCausalLM":
|
||||
conv = &olmoModel{}
|
||||
case "BertModel":
|
||||
conv = &bertModel{}
|
||||
case "NomicBertModel", "NomicBertMoEModel":
|
||||
conv = &nomicbertModel{}
|
||||
case "CohereForCausalLM":
|
||||
conv = &commandrModel{}
|
||||
case "GptOssForCausalLM":
|
||||
conv = &gptossModel{}
|
||||
case "DeepseekOCRForCausalLM":
|
||||
conv = &deepseekocr{}
|
||||
case "DeepseekV3ForCausalLM":
|
||||
conv = &deepseek2Model{}
|
||||
case "Glm4MoeLiteForCausalLM":
|
||||
conv = &glm4MoeLiteModel{}
|
||||
case "LagunaForCausalLM":
|
||||
conv = &lagunaModel{}
|
||||
case "GlmOcrForConditionalGeneration":
|
||||
conv = &glmOcrModel{}
|
||||
case "Lfm2ForCausalLM", "Lfm2MoeForCausalLM":
|
||||
conv = &lfm2Model{}
|
||||
case "Lfm2VlForConditionalGeneration":
|
||||
conv = &lfm2VLTextModel{}
|
||||
case "Qwen3NextForCausalLM", "Qwen3_5ForConditionalGeneration", "Qwen3_5MoeForConditionalGeneration":
|
||||
conv = &qwen3NextModel{}
|
||||
case "NemotronH_Nano_VL_V2", "NemotronH_Nano_Omni_Reasoning_V3":
|
||||
conv = &nemotronHNanoVLModel{}
|
||||
case "NemotronHForCausalLM":
|
||||
conv = &nemotronHModel{}
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("unsupported architecture %q", p.Architectures[0])
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, conv); err != nil {
|
||||
return nil, nil, fmt.Errorf("parse config.json for %q: %w", p.Architectures[0], err)
|
||||
}
|
||||
|
||||
if t, ok := conv.(moreParser); ok {
|
||||
if err := t.parseMore(fsys); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
t, err := parseTokenizer(fsys, conv.specialTokenTypes())
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
vocabSize := int(cmp.Or(p.VocabSize, p.TextModel.VocabSize))
|
||||
|
||||
switch {
|
||||
case vocabSize == 0:
|
||||
slog.Debug("vocabulary size was not explicitly set by the model", "default size", len(t.Vocabulary.Tokens))
|
||||
case vocabSize > len(t.Vocabulary.Tokens):
|
||||
slog.Debug("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
|
||||
for i := range vocabSize - len(t.Vocabulary.Tokens) {
|
||||
t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
|
||||
t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
|
||||
t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
|
||||
}
|
||||
case vocabSize < len(t.Vocabulary.Tokens):
|
||||
slog.Debug("vocabulary is larger than expected", "want", vocabSize, "got", len(t.Vocabulary.Tokens))
|
||||
p.VocabSize = uint32(len(t.Vocabulary.Tokens))
|
||||
p.TextModel.VocabSize = uint32(len(t.Vocabulary.Tokens))
|
||||
default:
|
||||
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
|
||||
}
|
||||
return conv, t, nil
|
||||
}
|
||||
|
||||
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
|
||||
// and files it finds in the input path.
|
||||
// Supported input model formats include safetensors.
|
||||
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
|
||||
func ConvertModel(fsys fs.FS, f *os.File) error {
|
||||
kv, t, err := LoadModelMetadata(fsys)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
conv := kv.(ModelConverter)
|
||||
|
||||
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return writeFile(f, conv.KV(t), conv.Tensors(ts))
|
||||
}
|
||||
|
||||
func writeFile(f *os.File, kv KV, ts []*ggml.Tensor) error {
|
||||
for k, v := range sourceTensorKV(ts) {
|
||||
kv[k] = v
|
||||
}
|
||||
|
||||
for i := range ts {
|
||||
ts[i].Shape = slices.Clone(ts[i].Shape)
|
||||
slices.Reverse(ts[i].Shape)
|
||||
}
|
||||
return ggml.WriteGGUF(f, kv, ts)
|
||||
}
|
||||
178
convert/convert_bert.go
Normal file
178
convert/convert_bert.go
Normal file
@@ -0,0 +1,178 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/json"
|
||||
"io/fs"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type bertModel struct {
|
||||
ModelParameters
|
||||
NLayers uint32 `json:"n_layers"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NLayer uint32 `json:"n_layer"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
NCtx uint32 `json:"n_ctx"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NEmbd uint32 `json:"n_embd"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NInner uint32 `json:"n_inner"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NHead uint32 `json:"n_head"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
LayerNormEPS float32 `json:"layer_norm_eps"`
|
||||
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
|
||||
NormEpsilon float32 `json:"norm_epsilon"`
|
||||
normalizeEmbeddings bool
|
||||
|
||||
PoolingType uint32
|
||||
}
|
||||
|
||||
var (
|
||||
_ ModelConverter = (*bertModel)(nil)
|
||||
_ moreParser = (*bertModel)(nil)
|
||||
)
|
||||
|
||||
func (p *bertModel) parseMore(fsys fs.FS) error {
|
||||
bts, err := fs.ReadFile(fsys, "modules.json")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var modules []struct {
|
||||
Type string `json:"type"`
|
||||
Path string `json:"path"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, &modules); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var pooling string
|
||||
for _, m := range modules {
|
||||
switch m.Type {
|
||||
case "sentence_transformers.models.Pooling":
|
||||
pooling = m.Path
|
||||
case "sentence_transformers.models.Normalize":
|
||||
p.normalizeEmbeddings = true
|
||||
}
|
||||
}
|
||||
|
||||
if pooling != "" {
|
||||
bts, err := fs.ReadFile(fsys, filepath.Join(pooling, "config.json"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var pc struct {
|
||||
PoolingModeCLSToken bool `json:"pooling_mode_cls_token"`
|
||||
PoolingModeMeanTokens bool `json:"pooling_mode_mean_tokens"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, &pc); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if pc.PoolingModeMeanTokens {
|
||||
p.PoolingType = 1
|
||||
} else if pc.PoolingModeCLSToken {
|
||||
p.PoolingType = 2
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *bertModel) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "bert"
|
||||
kv["bert.attention.causal"] = false
|
||||
kv["bert.pooling_type"] = p.PoolingType
|
||||
kv["bert.normalize_embeddings"] = p.normalizeEmbeddings
|
||||
|
||||
kv["bert.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
|
||||
|
||||
if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
|
||||
kv["bert.context_length"] = contextLength
|
||||
}
|
||||
|
||||
if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
|
||||
kv["bert.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
||||
}
|
||||
|
||||
if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
|
||||
kv["bert.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
|
||||
}
|
||||
|
||||
if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
|
||||
kv["bert.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
|
||||
}
|
||||
|
||||
if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
|
||||
kv["bert.attention.layer_norm_epsilon"] = layerNormEpsilon
|
||||
}
|
||||
|
||||
kv["tokenizer.ggml.model"] = "bert"
|
||||
kv["tokenizer.ggml.token_type_count"] = uint32(2)
|
||||
|
||||
// convert to phantom space tokens
|
||||
for i, e := range t.Tokens {
|
||||
if strings.HasPrefix(e, "[") && strings.HasSuffix(e, "]") {
|
||||
// noop
|
||||
} else if strings.HasPrefix(e, "##") {
|
||||
t.Tokens[i] = e[2:]
|
||||
} else {
|
||||
t.Tokens[i] = "\u2581" + e
|
||||
}
|
||||
}
|
||||
|
||||
kv["tokenizer.ggml.tokens"] = t.Tokens
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *bertModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
for _, t := range ts {
|
||||
if slices.Contains([]string{
|
||||
"embeddings.position_ids",
|
||||
"pooler.dense.weight",
|
||||
"pooler.dense.bias",
|
||||
}, t.Name()) {
|
||||
continue
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (bertModel) Replacements() []string {
|
||||
return []string{
|
||||
"encoder.layer", "blk",
|
||||
"encoder.layers", "blk",
|
||||
"embeddings.word_embeddings", "token_embd",
|
||||
"embeddings.token_type_embeddings", "token_types",
|
||||
"embeddings.LayerNorm", "token_embd_norm",
|
||||
"embeddings.position_embeddings", "position_embd",
|
||||
"attention.self.query", "attn_q",
|
||||
"attention.self.key", "attn_k",
|
||||
"attention.self.value", "attn_v",
|
||||
"attention.output.dense", "attn_output",
|
||||
"attention.output.LayerNorm", "attn_output_norm",
|
||||
"intermediate.dense", "ffn_up",
|
||||
"output.dense", "ffn_down",
|
||||
"output.LayerNorm", "layer_output_norm",
|
||||
}
|
||||
}
|
||||
76
convert/convert_commandr.go
Normal file
76
convert/convert_commandr.go
Normal file
@@ -0,0 +1,76 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type commandrModel struct {
|
||||
ModelParameters
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
LayerNormEPS float32 `json:"layer_norm_eps"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
UseQKNorm bool `json:"use_qk_norm"`
|
||||
MaxLength uint32 `json:"model_max_length"`
|
||||
LogitScale float32 `json:"logit_scale"`
|
||||
NCtx uint32 `json:"n_ctx"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*commandrModel)(nil)
|
||||
|
||||
func (p *commandrModel) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "command-r"
|
||||
kv["general.name"] = "command-r"
|
||||
kv["command-r.context_length"] = cmp.Or(p.MaxLength, p.MaxPositionEmbeddings, p.NCtx)
|
||||
kv["command-r.embedding_length"] = p.HiddenSize
|
||||
kv["command-r.block_count"] = p.HiddenLayers
|
||||
kv["command-r.feed_forward_length"] = p.IntermediateSize
|
||||
kv["command-r.attention.head_count"] = p.NumAttentionHeads
|
||||
kv["command-r.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||
kv["command-r.attention.layer_norm_epsilon"] = p.LayerNormEPS
|
||||
kv["command-r.rope.freq_base"] = p.RopeTheta
|
||||
kv["command-r.max_position_embeddings"] = cmp.Or(p.MaxLength, p.MaxPositionEmbeddings)
|
||||
kv["command-r.logit_scale"] = p.LogitScale
|
||||
kv["command-r.rope.scaling.type"] = "none"
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *commandrModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
for _, t := range ts {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *commandrModel) Replacements() []string {
|
||||
return []string{
|
||||
"self_attn.q_norm", "attn_q_norm",
|
||||
"self_attn.k_norm", "attn_k_norm",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"model.norm", "output_norm",
|
||||
"model.embed_tokens", "token_embd",
|
||||
}
|
||||
}
|
||||
173
convert/convert_deepseek2.go
Normal file
173
convert/convert_deepseek2.go
Normal file
@@ -0,0 +1,173 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"regexp"
|
||||
"strconv"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type deepseek2Model struct {
|
||||
ModelParameters // architectures, vocab_size
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
QKNopeHeadDim uint32 `json:"qk_nope_head_dim"`
|
||||
QKRopeHeadDim uint32 `json:"qk_rope_head_dim"`
|
||||
KVLoraRank uint32 `json:"kv_lora_rank"`
|
||||
QLoraRank uint32 `json:"q_lora_rank"`
|
||||
VHeadDim uint32 `json:"v_head_dim"`
|
||||
|
||||
ExpertCount uint32 `json:"n_routed_experts"`
|
||||
ExpertSharedCount uint32 `json:"n_shared_experts"`
|
||||
ExpertIntermediateSize uint32 `json:"moe_intermediate_size"`
|
||||
ExpertUsedCount uint32 `json:"num_experts_per_tok"`
|
||||
ExpertWeightsNorm bool `json:"norm_topk_prob"`
|
||||
ExpertWeightsScale float32 `json:"routed_scaling_factor"`
|
||||
|
||||
ScoringFunc string `json:"scoring_func"`
|
||||
LeadingDenseBlockCount uint32 `json:"first_k_dense_replace"`
|
||||
|
||||
RopeScaling struct {
|
||||
Factor float32 `json:"factor"`
|
||||
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||
Type string `json:"type"`
|
||||
MScaleAllDim float32 `json:"mscale_all_dim"`
|
||||
} `json:"rope_scaling"`
|
||||
|
||||
Architecture string
|
||||
}
|
||||
|
||||
func (p *deepseek2Model) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "deepseek2"
|
||||
kv["general.type"] = "model"
|
||||
kv["deepseek2.block_count"] = p.HiddenLayers
|
||||
|
||||
numHeads := p.NumAttentionHeads
|
||||
numKVHeads := p.NumKeyValueHeads
|
||||
|
||||
kv["deepseek2.attention.head_count"] = numHeads
|
||||
kv["deepseek2.attention.head_count_kv"] = numKVHeads
|
||||
kv["deepseek2.attention.key_length"] = p.QKNopeHeadDim + p.QKRopeHeadDim
|
||||
kv["deepseek2.attention.kv_lora_rank"] = p.KVLoraRank
|
||||
kv["deepseek2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
kv["deepseek2.attention.q_lora_rank"] = p.QLoraRank
|
||||
kv["deepseek2.attention.value_length"] = p.VHeadDim
|
||||
kv["deepseek2.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["deepseek2.embedding_length"] = p.HiddenSize
|
||||
kv["deepseek2.expert_count"] = p.ExpertCount
|
||||
kv["deepseek2.expert_feed_forward_length"] = p.ExpertIntermediateSize
|
||||
kv["deepseek2.expert_shared_count"] = p.ExpertSharedCount
|
||||
|
||||
var scoringFunc uint32
|
||||
switch p.ScoringFunc {
|
||||
case "softmax":
|
||||
// not currently supported in the model, but needed for Deepseek-OCR
|
||||
scoringFunc = 1
|
||||
case "sigmoid":
|
||||
scoringFunc = 2
|
||||
}
|
||||
kv["deepseek2.expert_gating_func"] = scoringFunc
|
||||
kv["deepseek2.expert_used_count"] = p.ExpertUsedCount
|
||||
kv["deepseek2.expert_weights_norm"] = p.ExpertWeightsNorm
|
||||
kv["deepseek2.expert_weights_scale"] = p.ExpertWeightsScale
|
||||
kv["deepseek2.feed_forward_length"] = p.IntermediateSize
|
||||
kv["deepseek2.leading_dense_block_count"] = p.LeadingDenseBlockCount
|
||||
|
||||
kv["deepseek2.rope.dimension_count"] = p.QKRopeHeadDim
|
||||
kv["deepseek2.rope.freq_base"] = cmp.Or(p.RopeTheta, 10000.0)
|
||||
kv["deepseek2.rope.scaling.factor"] = p.RopeScaling.Factor
|
||||
kv["deepseek2.rope.scaling.original_context_length"] = p.RopeScaling.OriginalMaxPositionEmbeddings
|
||||
kv["deepseek2.rope.scaling.type"] = p.RopeScaling.Type
|
||||
kv["deepseek2.rope.scaling.yarn_log_multiplier"] = 0.1 * p.RopeScaling.MScaleAllDim
|
||||
|
||||
kv["tokenizer.ggml.pre"] = "deepseek-v3"
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *deepseek2Model) Replacements() []string {
|
||||
return []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
"language_model.", "",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.kv_a_proj_with_mqa", "attn_kv_a_mqa",
|
||||
"self_attn.kv_a_layernorm", "attn_kv_a_norm",
|
||||
"self_attn.kv_b_proj", "attn_kv_b",
|
||||
"self_attn.q_a_proj", "attn_q_a",
|
||||
"self_attn.q_a_layernorm", "attn_q_a_norm",
|
||||
"self_attn.q_b_proj", "attn_q_b",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"mlp.shared_experts.down_proj", "ffn_down_shexp",
|
||||
"mlp.shared_experts.gate_proj", "ffn_gate_shexp",
|
||||
"mlp.shared_experts.up_proj", "ffn_up_shexp",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"mlp.gate.e_score_correction_bias", "exp_probs_b.bias",
|
||||
"mlp.gate", "ffn_gate_inp",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *deepseek2Model) Tensors(s []Tensor) (out []*ggml.Tensor) {
|
||||
merges := make([]merge, p.HiddenLayers*3)
|
||||
for i := range p.HiddenLayers {
|
||||
merges[i*3+0] = merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.gate_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
|
||||
}
|
||||
merges[i*3+1] = merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.up_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
|
||||
}
|
||||
merges[i*3+2] = merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.down_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
|
||||
}
|
||||
}
|
||||
|
||||
skipLayer := func(n string, minValue uint32) bool {
|
||||
re := regexp.MustCompile(`^blk\.(\d+)`)
|
||||
matches := re.FindStringSubmatch(n)
|
||||
if matches == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
blkNum, err := strconv.Atoi(matches[1])
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return uint32(blkNum) >= minValue
|
||||
}
|
||||
|
||||
out, s = mergeTensors(s, merges...)
|
||||
for _, t := range s {
|
||||
// skip any additional layers (such as the Multi-Token Prediction layer)
|
||||
if skipLayer(t.Name(), p.HiddenLayers) {
|
||||
slog.Debug("skipping layer", "name", t.Name())
|
||||
continue
|
||||
}
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
136
convert/convert_deepseekocr.go
Normal file
136
convert/convert_deepseekocr.go
Normal file
@@ -0,0 +1,136 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type deepseekocr struct {
|
||||
ModelParameters
|
||||
LanguageConfig struct {
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
NumRoutedExperts uint32 `json:"n_routed_experts"`
|
||||
NumSharedExperts uint32 `json:"n_shared_experts"`
|
||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||
FirstKDenseReplace uint32 `json:"first_k_dense_replace"`
|
||||
} `json:"language_config"`
|
||||
|
||||
VisionConfig struct {
|
||||
ImageSize uint32 `json:"image_size"`
|
||||
Width struct {
|
||||
Vision struct {
|
||||
Heads uint32 `json:"heads"`
|
||||
ImageSize uint32 `json:"image_size"`
|
||||
Layers uint32 `json:"layers"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
Width uint32 `json:"width"`
|
||||
} `json:"clip-l-14-224"`
|
||||
Sam struct {
|
||||
GlobalAttentionIndexes []int32 `json:"global_attn_indexes"`
|
||||
Heads uint32 `json:"heads"`
|
||||
Layers uint32 `json:"layers"`
|
||||
Width uint32 `json:"width"`
|
||||
} `json:"sam_vit_b"`
|
||||
}
|
||||
} `json:"vision_config"`
|
||||
}
|
||||
|
||||
func (m *deepseekocr) KV(t *Tokenizer) KV {
|
||||
kv := m.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "deepseekocr"
|
||||
kv["block_count"] = m.LanguageConfig.HiddenLayers
|
||||
kv["context_length"] = m.LanguageConfig.MaxPositionEmbeddings
|
||||
kv["embedding_length"] = m.LanguageConfig.HiddenSize
|
||||
kv["feed_forward_length"] = m.LanguageConfig.IntermediateSize
|
||||
kv["attention.head_count"] = m.LanguageConfig.NumAttentionHeads
|
||||
kv["attention.head_count_kv"] = m.LanguageConfig.NumKeyValueHeads
|
||||
kv["expert_count"] = m.LanguageConfig.NumRoutedExperts
|
||||
kv["expert_used_count"] = m.LanguageConfig.NumExpertsPerToken
|
||||
kv["leading_dense_block_count"] = m.LanguageConfig.FirstKDenseReplace
|
||||
|
||||
kv["vision.block_count"] = m.VisionConfig.Width.Vision.Layers
|
||||
kv["vision.embedding_length"] = m.VisionConfig.Width.Vision.Width
|
||||
kv["vision.head_count"] = m.VisionConfig.Width.Vision.Heads
|
||||
kv["vision.image_size"] = m.VisionConfig.Width.Vision.ImageSize
|
||||
kv["vision.patch_size"] = m.VisionConfig.Width.Vision.PatchSize
|
||||
|
||||
kv["sam.block_count"] = m.VisionConfig.Width.Sam.Layers
|
||||
kv["sam.embedding_length"] = m.VisionConfig.Width.Sam.Width
|
||||
kv["sam.head_count"] = m.VisionConfig.Width.Sam.Heads
|
||||
kv["sam.global_attention_indexes"] = m.VisionConfig.Width.Sam.GlobalAttentionIndexes
|
||||
return kv
|
||||
}
|
||||
|
||||
func (m *deepseekocr) Tensors(s []Tensor) (out []*ggml.Tensor) {
|
||||
merges := make([]merge, m.LanguageConfig.HiddenLayers*3)
|
||||
for i := range m.LanguageConfig.HiddenLayers {
|
||||
merges[i*3+0] = merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.gate_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
|
||||
}
|
||||
merges[i*3+1] = merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.up_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
|
||||
}
|
||||
merges[i*3+2] = merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.down_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
|
||||
}
|
||||
}
|
||||
|
||||
out, s = mergeTensors(s, merges...)
|
||||
for _, t := range s {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (m *deepseekocr) Replacements() []string {
|
||||
return []string{
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate", "ffn_gate_inp",
|
||||
"mlp.shared_experts.gate_proj", "ffn_gate_shexp",
|
||||
"mlp.shared_experts.up_proj", "ffn_up_shexp",
|
||||
"mlp.shared_experts.down_proj", "ffn_down_shexp",
|
||||
"model.norm", "output_norm",
|
||||
"lm_head", "output",
|
||||
|
||||
"model.vision_model", "v",
|
||||
"embeddings.patch_embedding", "patch_embd",
|
||||
"embeddings.class_embedding", "class_embd",
|
||||
"embeddings.position_embedding", "position_embd",
|
||||
"transformer.layers", "blk",
|
||||
|
||||
"model.projector", "mm",
|
||||
"model.image_newline", "mm.image_newline",
|
||||
//nolint:misspell // this misspelling is upstream. fixing it breaks the model
|
||||
"model.view_seperator", "mm.view_seperator",
|
||||
|
||||
"model.sam_model.patch_embed.proj", "s.patch_embd",
|
||||
"model.sam_model.pos_embed", "s.position_embd",
|
||||
"model.sam_model.blocks", "s.blk",
|
||||
"model.sam_model.neck", "s.neck",
|
||||
"model.sam_model.net_", "s.net_",
|
||||
}
|
||||
}
|
||||
100
convert/convert_gemma.go
Normal file
100
convert/convert_gemma.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type gemmaModel struct {
|
||||
ModelParameters
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*gemmaModel)(nil)
|
||||
|
||||
func (p *gemmaModel) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "gemma"
|
||||
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["gemma.embedding_length"] = p.HiddenSize
|
||||
kv["gemma.block_count"] = p.HiddenLayers
|
||||
kv["gemma.feed_forward_length"] = p.IntermediateSize
|
||||
kv["gemma.attention.head_count"] = p.NumAttentionHeads
|
||||
kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||
kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
kv["gemma.attention.key_length"] = p.HeadDim
|
||||
kv["gemma.attention.value_length"] = p.HeadDim
|
||||
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
|
||||
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
|
||||
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
|
||||
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *gemmaModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
for _, t := range ts {
|
||||
if !strings.HasPrefix(t.Name(), "v.") && strings.HasSuffix(t.Name(), "_norm.weight") {
|
||||
t.SetRepacker(p.addOne)
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *gemmaModel) Replacements() []string {
|
||||
return []string{
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
}
|
||||
}
|
||||
|
||||
func (*gemmaModel) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
|
||||
ones := tensor.Ones(tensor.Float32, int(shape[0]))
|
||||
|
||||
n, err := n.Add(ones)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
49
convert/convert_gemma2.go
Normal file
49
convert/convert_gemma2.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package convert
|
||||
|
||||
type gemma2Model struct {
|
||||
gemmaModel
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
|
||||
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
|
||||
}
|
||||
|
||||
func (p *gemma2Model) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "gemma2"
|
||||
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["gemma2.embedding_length"] = p.HiddenSize
|
||||
kv["gemma2.block_count"] = p.HiddenLayers
|
||||
kv["gemma2.feed_forward_length"] = p.IntermediateSize
|
||||
kv["gemma2.attention.head_count"] = p.NumAttentionHeads
|
||||
kv["gemma2.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||
kv["gemma2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
kv["gemma2.attention.key_length"] = p.HeadDim
|
||||
kv["gemma2.attention.value_length"] = p.HeadDim
|
||||
kv["gemma2.attention.sliding_window"] = p.SlidingWindow
|
||||
kv["gemma2.attn_logit_softcapping"] = p.AttentionLogitSoftcap
|
||||
kv["gemma2.final_logit_softcapping"] = p.FinalLogitSoftcap
|
||||
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
|
||||
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
|
||||
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
|
||||
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *gemma2Model) Replacements() []string {
|
||||
return []string{
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"post_attention_layernorm", "post_attention_norm",
|
||||
"pre_feedforward_layernorm", "ffn_norm",
|
||||
"post_feedforward_layernorm", "post_ffw_norm",
|
||||
}
|
||||
}
|
||||
92
convert/convert_gemma2_adapter.go
Normal file
92
convert/convert_gemma2_adapter.go
Normal file
@@ -0,0 +1,92 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/fs"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type gemma2Adapter struct {
|
||||
AdapterParameters
|
||||
}
|
||||
|
||||
var _ AdapterConverter = (*gemma2Adapter)(nil)
|
||||
|
||||
func (p *gemma2Adapter) KV(baseKV fs.Config) KV {
|
||||
kv := p.AdapterParameters.KV()
|
||||
kv["general.architecture"] = "gemma2"
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *gemma2Adapter) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
for _, t := range ts {
|
||||
shape := t.Shape()
|
||||
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||
(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
|
||||
shape[0], shape[1] = shape[1], shape[0]
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *gemma2Adapter) Replacements() []string {
|
||||
return []string{
|
||||
"base_model.model.", "",
|
||||
"model.layers", "blk",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"lora_A.weight", "weight.lora_a",
|
||||
"lora_B.weight", "weight.lora_b",
|
||||
"lora_a", "weight.lora_a",
|
||||
"lora_b", "weight.lora_b",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *gemma2Adapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := []int{int(shape[1]), int(shape[0])}
|
||||
|
||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
|
||||
if err := n.T(1, 0); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
180
convert/convert_gemma3.go
Normal file
180
convert/convert_gemma3.go
Normal file
@@ -0,0 +1,180 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"slices"
|
||||
)
|
||||
|
||||
type gemma3Model struct {
|
||||
gemmaModel
|
||||
Architecture string
|
||||
TextModel struct {
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
} `json:"text_config"`
|
||||
VisionModel struct {
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"` // attention.head_count 16
|
||||
LayerNormEpsilon float32 `json:"layer_norm_eps"` // attention.layer_norm_epsilon 1e-05
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"` // block_count 32
|
||||
HiddenSize uint32 `json:"hidden_size"` // embedding_length 1280
|
||||
IntermediateSize uint32 `json:"intermediate_size"` // feed_forward_length 5120
|
||||
ImageSize uint32 `json:"image_size"` // image_size 560
|
||||
NumChannels uint32 `json:"num_channels"` // num_channels 3
|
||||
PatchSize uint32 `json:"patch_size"` // patch_size 14
|
||||
} `json:"vision_config"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
|
||||
RopeLocalTheta float32 `json:"rope_local_base_freq"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
SlidingWindowPattern *uint32 `json:"sliding_window_pattern"`
|
||||
LayerTypes []string `json:"layer_types"`
|
||||
MultiModalTokensPerImage uint32 `json:"mm_tokens_per_image"`
|
||||
RopeScaling *struct {
|
||||
Type string `json:"rope_type"`
|
||||
Factor float32 `json:"factor"`
|
||||
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||
ExtrapolationFactor float32 `json:"extrapolation_factor"`
|
||||
BetaFast float32 `json:"beta_fast"`
|
||||
BetaSlow float32 `json:"beta_slow"`
|
||||
} `json:"rope_scaling"`
|
||||
}
|
||||
|
||||
const (
|
||||
gemma4BLayerCount = 34
|
||||
gemma12BLayerCount = 48
|
||||
gemma27BLayerCount = 62
|
||||
)
|
||||
|
||||
func (p *gemma3Model) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "gemma3"
|
||||
|
||||
numBlocks := cmp.Or(p.HiddenLayers, p.TextModel.HiddenLayers)
|
||||
kv["gemma3.block_count"] = numBlocks
|
||||
|
||||
var (
|
||||
numHeads uint32
|
||||
numKVHeads uint32
|
||||
)
|
||||
|
||||
switch numBlocks {
|
||||
case gemma4BLayerCount:
|
||||
numHeads = 8
|
||||
numKVHeads = 4
|
||||
case gemma12BLayerCount:
|
||||
numHeads = 16
|
||||
numKVHeads = 8
|
||||
case gemma27BLayerCount:
|
||||
numHeads = 32
|
||||
numKVHeads = 16
|
||||
default:
|
||||
numHeads = p.NumAttentionHeads
|
||||
numKVHeads = p.NumKeyValueHeads
|
||||
}
|
||||
|
||||
kv["gemma3.attention.head_count"] = numHeads
|
||||
kv["gemma3.attention.head_count_kv"] = numKVHeads
|
||||
|
||||
switch p.Architecture {
|
||||
case "Gemma3ForCausalLM":
|
||||
kv["gemma3.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["gemma3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
kv["gemma3.attention.key_length"] = p.HeadDim
|
||||
kv["gemma3.attention.value_length"] = p.HeadDim
|
||||
kv["gemma3.attention.sliding_window"] = p.SlidingWindow
|
||||
|
||||
// The sliding window pattern is either provided as the sliding_window_pattern
|
||||
// key (an int) or as the layer_types key (a list of strings).
|
||||
if p.SlidingWindowPattern != nil || len(p.LayerTypes) > 0 {
|
||||
kv["gemma3.attention.sliding_window_pattern"] = slices.Collect(func(yield func(bool) bool) {
|
||||
for i := range numBlocks {
|
||||
var isLocal bool
|
||||
if len(p.LayerTypes) > 0 && int(i) < len(p.LayerTypes) {
|
||||
isLocal = p.LayerTypes[i] == "sliding_attention"
|
||||
} else if p.SlidingWindowPattern != nil && *p.SlidingWindowPattern > 0 {
|
||||
isLocal = (i+1)%*p.SlidingWindowPattern != 0
|
||||
}
|
||||
if !yield(isLocal) {
|
||||
break
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
if p.FinalLogitSoftcap > 0 {
|
||||
kv["gemma3.final_logit_softcapping"] = p.FinalLogitSoftcap
|
||||
}
|
||||
kv["gemma3.rope.local.freq_base"] = cmp.Or(p.RopeLocalTheta, 10000.0)
|
||||
kv["gemma3.rope.freq_base"] = cmp.Or(p.RopeTheta, 1000000.0)
|
||||
if p.RopeScaling != nil && p.RopeScaling.Type == "yarn" && p.RopeScaling.Factor > 0 {
|
||||
kv["gemma3.rope.scaling.type"] = "yarn"
|
||||
kv["gemma3.rope.scaling.factor"] = p.RopeScaling.Factor
|
||||
kv["gemma3.rope.scaling.original_context_length"] = p.RopeScaling.OriginalMaxPositionEmbeddings
|
||||
kv["gemma3.rope.scaling.extrapolation_factor"] = cmp.Or(p.RopeScaling.ExtrapolationFactor, float32(1.0))
|
||||
kv["gemma3.rope.scaling.beta_fast"] = cmp.Or(p.RopeScaling.BetaFast, float32(64.0))
|
||||
kv["gemma3.rope.scaling.beta_slow"] = cmp.Or(p.RopeScaling.BetaSlow, float32(1.0))
|
||||
}
|
||||
|
||||
kv["gemma3.embedding_length"] = p.HiddenSize
|
||||
kv["gemma3.feed_forward_length"] = p.IntermediateSize
|
||||
default:
|
||||
kv["gemma3.context_length"] = cmp.Or(p.MaxPositionEmbeddings, 131072)
|
||||
kv["gemma3.embedding_length"] = p.TextModel.HiddenSize
|
||||
kv["gemma3.feed_forward_length"] = p.TextModel.IntermediateSize
|
||||
kv["gemma3.attention.sliding_window"] = p.TextModel.SlidingWindow
|
||||
kv["gemma3.vision.block_count"] = p.VisionModel.NumHiddenLayers
|
||||
kv["gemma3.vision.embedding_length"] = p.VisionModel.HiddenSize
|
||||
kv["gemma3.vision.feed_forward_length"] = p.VisionModel.IntermediateSize
|
||||
kv["gemma3.vision.image_size"] = p.VisionModel.ImageSize
|
||||
kv["gemma3.vision.patch_size"] = p.VisionModel.PatchSize
|
||||
kv["gemma3.vision.num_channels"] = cmp.Or(p.VisionModel.NumChannels, 3)
|
||||
kv["gemma3.vision.attention.head_count"] = p.VisionModel.NumAttentionHeads
|
||||
kv["gemma3.vision.attention.layer_norm_epsilon"] = cmp.Or(p.VisionModel.LayerNormEpsilon, 1e-6)
|
||||
kv["gemma3.attention.key_length"] = cmp.Or(p.TextModel.HeadDim, 256)
|
||||
kv["gemma3.attention.value_length"] = cmp.Or(p.TextModel.HeadDim, 256)
|
||||
}
|
||||
|
||||
if p.MultiModalTokensPerImage > 0 {
|
||||
kv["gemma3.mm.tokens_per_image"] = p.MultiModalTokensPerImage
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *gemma3Model) Replacements() []string {
|
||||
return []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
"vision_tower.vision_model.embeddings", "v",
|
||||
"vision_tower.vision_model", "v",
|
||||
"vision_model.vision_model.embeddings", "v",
|
||||
"vision_model.vision_model", "v",
|
||||
"language_model.", "",
|
||||
"model.layers", "blk",
|
||||
"encoder.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.q_norm", "attn_q_norm",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.k_norm", "attn_k_norm",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"self_attn.out_proj", "attn_output",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"post_attention_layernorm", "post_attention_norm",
|
||||
"pre_feedforward_layernorm", "ffn_norm",
|
||||
"post_feedforward_layernorm", "post_ffw_norm",
|
||||
"input_projection_weight", "input_projection.weight",
|
||||
"multi_modal_projector", "mm",
|
||||
}
|
||||
}
|
||||
165
convert/convert_gemma3n.go
Normal file
165
convert/convert_gemma3n.go
Normal file
@@ -0,0 +1,165 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
"gonum.org/v1/gonum/stat/distuv"
|
||||
)
|
||||
|
||||
type gemma3nModel struct {
|
||||
ModelParameters
|
||||
|
||||
TextModel struct {
|
||||
ActivationSparsityPattern []float32 `json:"activation_sparsity_pattern"`
|
||||
AltupActiveIdx uint32 `json:"altup_active_idx"`
|
||||
AltupCoefClip float32 `json:"altup_coef_clip"`
|
||||
AltupCorrectScale bool `json:"altup_correct_scale"`
|
||||
AltupLRMultiplier float32 `json:"altup_lr_multiplier"`
|
||||
AltupNumInputs uint32 `json:"altup_num_inputs"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenSizePerLayerInput uint32 `json:"hidden_size_per_layer_input"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
NumKVSharedLayers uint32 `json:"num_kv_shared_layers"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
RopeLocalBaseFreq float32 `json:"rope_local_base_freq"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
LayerTypes []string `json:"layer_types"`
|
||||
} `json:"text_config"`
|
||||
VisionModel struct{} `json:"vision_config"`
|
||||
}
|
||||
|
||||
func (m *gemma3nModel) KV(t *Tokenizer) KV {
|
||||
kv := m.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "gemma3n"
|
||||
kv["gemma3n.activation_sparsity_scale"] = slices.Collect(func(yield func(float32) bool) {
|
||||
norm := distuv.Normal{Mu: 0, Sigma: 1}
|
||||
for _, v := range m.TextModel.ActivationSparsityPattern {
|
||||
if !yield(float32(norm.Quantile(float64(v)))) {
|
||||
break
|
||||
}
|
||||
}
|
||||
})
|
||||
kv["gemma3n.altup.active_idx"] = m.TextModel.AltupActiveIdx
|
||||
kv["gemma3n.altup.correct_scale"] = m.TextModel.AltupCorrectScale
|
||||
kv["gemma3n.altup.lr_multiplier"] = m.TextModel.AltupLRMultiplier
|
||||
kv["gemma3n.altup.num_inputs"] = m.TextModel.AltupNumInputs
|
||||
kv["gemma3n.attention.head_count_kv"] = m.TextModel.NumKeyValueHeads
|
||||
kv["gemma3n.attention.head_count"] = m.TextModel.NumAttentionHeads
|
||||
kv["gemma3n.attention.layer_norm_rms_epsilon"] = m.TextModel.RMSNormEPS
|
||||
kv["gemma3n.attention.sliding_window"] = m.TextModel.SlidingWindow
|
||||
kv["gemma3n.attention.sliding_window_pattern"] = slices.Collect(func(yield func(bool) bool) {
|
||||
for _, t := range m.TextModel.LayerTypes {
|
||||
if !yield(t == "sliding_attention") {
|
||||
break
|
||||
}
|
||||
}
|
||||
})
|
||||
kv["gemma3n.attention.shared_kv_layers"] = m.TextModel.NumKVSharedLayers
|
||||
kv["gemma3n.block_count"] = m.TextModel.NumHiddenLayers
|
||||
kv["gemma3n.context_length"] = m.TextModel.MaxPositionEmbeddings
|
||||
kv["gemma3n.embedding_length_per_layer_input"] = m.TextModel.HiddenSizePerLayerInput
|
||||
kv["gemma3n.embedding_length"] = m.TextModel.HiddenSize
|
||||
kv["gemma3n.feed_forward_length"] = m.TextModel.IntermediateSize
|
||||
kv["gemma3n.head_dim"] = m.TextModel.HeadDim
|
||||
kv["gemma3n.rope.freq_base_local"] = m.TextModel.RopeLocalBaseFreq
|
||||
kv["gemma3n.rope.freq_base"] = m.TextModel.RopeTheta
|
||||
return kv
|
||||
}
|
||||
|
||||
func (m *gemma3nModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
out, ts := mergeTensors(ts,
|
||||
merge{"altup_proj.*.weight", "altup_proj.weight"},
|
||||
merge{"altup_unembd_proj.*.weight", "altup_unembd_proj.weight"},
|
||||
)
|
||||
|
||||
for _, t := range ts {
|
||||
switch {
|
||||
case strings.Contains(t.Name(), "audio_tower"),
|
||||
strings.Contains(t.Name(), "embed_audio"),
|
||||
strings.Contains(t.Name(), "vision_tower"),
|
||||
strings.Contains(t.Name(), "embed_vision"):
|
||||
// TODO: handle audio and vision towers
|
||||
continue
|
||||
case strings.Contains(t.Name(), "altup_predict_coef"),
|
||||
strings.Contains(t.Name(), "altup_correct_coef"):
|
||||
if m.TextModel.AltupCoefClip > 0 {
|
||||
t.SetRepacker(func(name string, data []float32, shape []uint64) (_ []float32, err error) {
|
||||
dims := make([]int, len(shape))
|
||||
for i := range shape {
|
||||
dims[i] = int(shape[i])
|
||||
}
|
||||
|
||||
var t tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
|
||||
t, err = tensor.Clamp(t, -m.TextModel.AltupCoefClip, m.TextModel.AltupCoefClip)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := t.Reshape(t.Shape().TotalSize()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return native.VectorF32(t.(*tensor.Dense))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (m *gemma3nModel) Replacements() []string {
|
||||
return []string{
|
||||
"model.language_model.embed_tokens_per_layer", "per_layer_token_embd",
|
||||
"model.language_model.embed_tokens", "token_embd",
|
||||
"model.language_model.per_layer_model_projection", "per_layer_model_proj",
|
||||
"model.language_model.per_layer_projection_norm", "per_layer_proj_norm", "model.language_model.altup_projections", "altup_proj",
|
||||
"model.language_model.altup_unembed_projections", "altup_unembd_proj",
|
||||
"model.language_model.norm", "output_norm",
|
||||
"model.language_model.layers", "blk",
|
||||
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.q_norm", "attn_q_norm",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.k_norm", "attn_k_norm",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"post_attention_layernorm", "post_attention_norm",
|
||||
"pre_feedforward_layernorm", "ffn_norm",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"post_feedforward_layernorm", "post_ffw_norm",
|
||||
"per_layer_input_gate", "inp_gate",
|
||||
"per_layer_projection", "proj",
|
||||
"post_per_layer_input_norm", "post_norm",
|
||||
"altup.", "altup_",
|
||||
"modality_router", "router",
|
||||
"prediction_coefs", "predict_coef",
|
||||
"correction_coefs", "correct_coef",
|
||||
"correct_output_scale", "correct_scale.weight",
|
||||
"laurel.", "laurel_",
|
||||
"linear_left", "l",
|
||||
"linear_right", "r",
|
||||
"post_laurel_norm", "post_norm",
|
||||
}
|
||||
}
|
||||
574
convert/convert_gemma4.go
Normal file
574
convert/convert_gemma4.go
Normal file
@@ -0,0 +1,574 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type gemma4Model struct {
|
||||
gemmaModel
|
||||
Architecture string
|
||||
TextModel struct {
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
GlobalHeadDim uint32 `json:"global_head_dim"`
|
||||
VocabSize uint32 `json:"vocab_size"`
|
||||
RMSNormEps float32 `json:"rms_norm_eps"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
SlidingWindowPattern *int32 `json:"_sliding_window_pattern"`
|
||||
LayerTypes []string `json:"layer_types"`
|
||||
FinalLogitSoftcapping float32 `json:"final_logit_softcapping"`
|
||||
EnableMoeBlock bool `json:"enable_moe_block"`
|
||||
NumExperts *uint32 `json:"num_experts"`
|
||||
TopKExperts *uint32 `json:"top_k_experts"`
|
||||
ExpertIntermediateSize *uint32 `json:"moe_intermediate_size"`
|
||||
HiddenSizePerLayerInput *uint32 `json:"hidden_size_per_layer_input"`
|
||||
NumKVSharedLayers uint32 `json:"num_kv_shared_layers"`
|
||||
AttentionKEqV bool `json:"attention_k_eq_v"`
|
||||
NumGlobalKeyValueHeads *uint32 `json:"num_global_key_value_heads"`
|
||||
QueryPreAttnScalar *uint32 `json:"query_pre_attn_scalar"`
|
||||
UseDoubleWideMLP bool `json:"use_double_wide_mlp"`
|
||||
RopeParameters map[string]*struct {
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
PartialRotaryFactor *float32 `json:"partial_rotary_factor"`
|
||||
} `json:"rope_parameters"`
|
||||
} `json:"text_config"`
|
||||
|
||||
VisionModel struct {
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
NumChannels uint32 `json:"num_channels"`
|
||||
PoolingKernelSize uint32 `json:"pooling_kernel_size"`
|
||||
LayerNormEps float32 `json:"layer_norm_eps"`
|
||||
} `json:"vision_config"`
|
||||
|
||||
AudioModel *struct {
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
OutputProjDims uint32 `json:"output_proj_dims"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
ConvKernelSize uint32 `json:"conv_kernel_size"`
|
||||
RMSNormEps float32 `json:"rms_norm_eps"`
|
||||
} `json:"audio_config"`
|
||||
}
|
||||
|
||||
func (p *gemma4Model) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "gemma4"
|
||||
kv["tokenizer.ggml.model"] = "llama"
|
||||
kv["tokenizer.ggml.pre"] = "gemma4"
|
||||
|
||||
tc := p.TextModel
|
||||
|
||||
kv["gemma4.block_count"] = tc.NumHiddenLayers
|
||||
kv["gemma4.embedding_length"] = tc.HiddenSize
|
||||
|
||||
// Per-layer FFN width: when use_double_wide_mlp is set, KV-shared layers get 2x FFN width.
|
||||
if tc.UseDoubleWideMLP && tc.NumKVSharedLayers > 0 {
|
||||
firstShared := int(tc.NumHiddenLayers) - int(tc.NumKVSharedLayers)
|
||||
ffnWidths := make([]int32, tc.NumHiddenLayers)
|
||||
for i := range ffnWidths {
|
||||
if i >= firstShared {
|
||||
ffnWidths[i] = int32(tc.IntermediateSize * 2)
|
||||
} else {
|
||||
ffnWidths[i] = int32(tc.IntermediateSize)
|
||||
}
|
||||
}
|
||||
kv["gemma4.feed_forward_length"] = ffnWidths
|
||||
} else {
|
||||
kv["gemma4.feed_forward_length"] = tc.IntermediateSize
|
||||
}
|
||||
kv["gemma4.context_length"] = tc.MaxPositionEmbeddings
|
||||
kv["gemma4.attention.head_count"] = tc.NumAttentionHeads
|
||||
// Per-layer KV head count array: SWA layers use NumKeyValueHeads, global layers use NumGlobalKeyValueHeads
|
||||
if tc.NumGlobalKeyValueHeads != nil && *tc.NumGlobalKeyValueHeads != tc.NumKeyValueHeads && len(tc.LayerTypes) > 0 {
|
||||
kvHeads := make([]int32, len(tc.LayerTypes))
|
||||
for i, lt := range tc.LayerTypes {
|
||||
if lt == "sliding_attention" {
|
||||
kvHeads[i] = int32(tc.NumKeyValueHeads)
|
||||
} else {
|
||||
kvHeads[i] = int32(*tc.NumGlobalKeyValueHeads)
|
||||
}
|
||||
}
|
||||
kv["gemma4.attention.head_count_kv"] = kvHeads
|
||||
} else {
|
||||
kv["gemma4.attention.head_count_kv"] = tc.NumKeyValueHeads
|
||||
}
|
||||
// key_length = global head dim, key_length_swa = local (SWA) head dim
|
||||
kv["gemma4.attention.key_length"] = tc.GlobalHeadDim
|
||||
kv["gemma4.attention.value_length"] = tc.GlobalHeadDim
|
||||
kv["gemma4.attention.key_length_swa"] = tc.HeadDim
|
||||
kv["gemma4.attention.value_length_swa"] = tc.HeadDim
|
||||
kv["gemma4.attention.layer_norm_rms_epsilon"] = tc.RMSNormEps
|
||||
kv["gemma4.attention.sliding_window"] = tc.SlidingWindow
|
||||
|
||||
// Sliding window pattern from layer_types
|
||||
if len(tc.LayerTypes) > 0 {
|
||||
kv["gemma4.attention.sliding_window_pattern"] = slices.Collect(func(yield func(bool) bool) {
|
||||
for _, lt := range tc.LayerTypes {
|
||||
if !yield(lt == "sliding_attention") {
|
||||
break
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
kv["gemma4.attention.shared_kv_layers"] = tc.NumKVSharedLayers
|
||||
|
||||
// RoPE: dimension_count is the full global head dim (freq_factors handle partial rotation)
|
||||
if rp, ok := tc.RopeParameters["full_attention"]; ok && rp != nil {
|
||||
kv["gemma4.rope.freq_base"] = rp.RopeTheta
|
||||
kv["gemma4.rope.dimension_count"] = tc.GlobalHeadDim
|
||||
}
|
||||
if rp, ok := tc.RopeParameters["sliding_attention"]; ok && rp != nil {
|
||||
kv["gemma4.rope.freq_base_swa"] = rp.RopeTheta
|
||||
kv["gemma4.rope.dimension_count_swa"] = tc.HeadDim
|
||||
}
|
||||
|
||||
if tc.FinalLogitSoftcapping > 0 {
|
||||
kv["gemma4.final_logit_softcapping"] = tc.FinalLogitSoftcapping
|
||||
}
|
||||
|
||||
// MoE
|
||||
if tc.EnableMoeBlock && tc.NumExperts != nil {
|
||||
kv["gemma4.expert_count"] = *tc.NumExperts
|
||||
if tc.TopKExperts != nil {
|
||||
kv["gemma4.expert_used_count"] = *tc.TopKExperts
|
||||
}
|
||||
if tc.ExpertIntermediateSize != nil {
|
||||
kv["gemma4.expert_feed_forward_length"] = *tc.ExpertIntermediateSize
|
||||
}
|
||||
}
|
||||
|
||||
// PLE — always emit, even when 0
|
||||
pleSize := uint32(0)
|
||||
if tc.HiddenSizePerLayerInput != nil {
|
||||
pleSize = *tc.HiddenSizePerLayerInput
|
||||
}
|
||||
kv["gemma4.embedding_length_per_layer_input"] = pleSize
|
||||
|
||||
// Vision model KV metadata
|
||||
vc := p.VisionModel
|
||||
if vc.NumHiddenLayers > 0 {
|
||||
kv["gemma4.vision.block_count"] = vc.NumHiddenLayers
|
||||
kv["gemma4.vision.embedding_length"] = vc.HiddenSize
|
||||
kv["gemma4.vision.attention.head_count"] = vc.NumAttentionHeads
|
||||
kv["gemma4.vision.feed_forward_length"] = vc.IntermediateSize
|
||||
kv["gemma4.vision.patch_size"] = vc.PatchSize
|
||||
numCh := vc.NumChannels
|
||||
if numCh == 0 {
|
||||
numCh = 3
|
||||
}
|
||||
kv["gemma4.vision.num_channels"] = numCh
|
||||
nMerge := vc.PoolingKernelSize
|
||||
if nMerge == 0 {
|
||||
nMerge = 3
|
||||
}
|
||||
kv["gemma4.vision.projector.scale_factor"] = nMerge
|
||||
eps := vc.LayerNormEps
|
||||
if eps == 0 {
|
||||
eps = 1e-6
|
||||
}
|
||||
kv["gemma4.vision.attention.layer_norm_epsilon"] = eps
|
||||
}
|
||||
|
||||
// Audio model KV metadata
|
||||
if p.AudioModel != nil && p.AudioModel.NumHiddenLayers > 0 {
|
||||
ac := p.AudioModel
|
||||
kv["gemma4.audio.block_count"] = ac.NumHiddenLayers
|
||||
kv["gemma4.audio.embedding_length"] = ac.HiddenSize
|
||||
kv["gemma4.audio.feed_forward_length"] = ac.HiddenSize * 4
|
||||
kv["gemma4.audio.attention.head_count"] = ac.NumAttentionHeads
|
||||
eps := ac.RMSNormEps
|
||||
if eps == 0 {
|
||||
eps = 1e-6
|
||||
}
|
||||
kv["gemma4.audio.attention.layer_norm_epsilon"] = eps
|
||||
if ac.ConvKernelSize > 0 {
|
||||
kv["gemma4.audio.conv_kernel_size"] = ac.ConvKernelSize
|
||||
}
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *gemma4Model) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
// First pass: collect vision clamp scalar values into a packed tensor.
|
||||
// Layout: per vision layer (0..N-1), 7 linears (q,k,v,out,gate,up,down) × 4 values (inMin,inMax,outMin,outMax).
|
||||
// Then 4 values for the projector (mm.input_projection).
|
||||
clampSuffixes := []string{".input_min", ".input_max", ".output_min", ".output_max"}
|
||||
clampMap := make(map[string]float32)
|
||||
for _, t := range ts {
|
||||
name := t.Name()
|
||||
for _, sfx := range clampSuffixes {
|
||||
if strings.HasSuffix(name, sfx) && (strings.Contains(name, "vision_tower") || strings.Contains(name, "embed_vision")) {
|
||||
var buf bytes.Buffer
|
||||
t.WriteTo(&buf)
|
||||
data := buf.Bytes()
|
||||
if len(data) >= 4 {
|
||||
clampMap[name] = math.Float32frombits(uint32(data[0]) | uint32(data[1])<<8 | uint32(data[2])<<16 | uint32(data[3])<<24)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var out []*ggml.Tensor
|
||||
for _, t := range ts {
|
||||
name := t.Name()
|
||||
|
||||
// Skip embedding_post_projection_norm — used as weightless RMS norm in inference
|
||||
if strings.Contains(name, "embedding_post_projection_norm") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Vision tensor renaming: match published mmproj GGUF names
|
||||
if strings.HasPrefix(name, "v.blk.") {
|
||||
name = strings.Replace(name, ".attn_norm.", ".ln1.", 1)
|
||||
name = strings.Replace(name, ".ffn_norm.", ".ln2.", 1)
|
||||
name = strings.Replace(name, ".attn_output.", ".attn_out.", 1)
|
||||
name = strings.Replace(name, ".post_attention_norm.", ".attn_post_norm.", 1)
|
||||
name = strings.Replace(name, ".post_ffw_norm.", ".ffn_post_norm.", 1)
|
||||
name = strings.Replace(name, ".layer_output_scale.", ".out_scale.", 1)
|
||||
}
|
||||
|
||||
// per_dim_scale: apply softplus to weight data and add .weight suffix.
|
||||
if strings.HasPrefix(name, "a.blk.") && strings.HasSuffix(name, "per_dim_scale") {
|
||||
name = name + ".weight"
|
||||
t.SetRepacker(softplusRepacker)
|
||||
}
|
||||
|
||||
// Depthwise conv1d: squeeze middle dimension [C, 1, K] → [C, K].
|
||||
if strings.HasPrefix(name, "a.blk.") && strings.Contains(name, "conv_dw") && strings.HasSuffix(name, ".weight") {
|
||||
t.SetRepacker(squeezeMiddleDim)
|
||||
}
|
||||
|
||||
shape := t.Shape()
|
||||
|
||||
// Convert scalar tensors (input_min/max, output_min/max) to 1D
|
||||
if len(shape) == 0 {
|
||||
shape = []uint64{1}
|
||||
}
|
||||
|
||||
// Depthwise conv1d shape: safetensors [C, 1, K] → GGUF ne[K, C].
|
||||
// Shape array here maps to GGUF ne[] directly, but safetensors reader
|
||||
// stores shape in PyTorch order [C, 1, K] which the GGUF writer inverts.
|
||||
// Published GGUF has ne[0]=K, ne[1]=C → shape array must be [K, C].
|
||||
if strings.HasPrefix(name, "a.blk.") && strings.Contains(name, "conv_dw") && strings.HasSuffix(name, ".weight") && len(shape) == 3 {
|
||||
shape = []uint64{shape[0], shape[2]}
|
||||
}
|
||||
|
||||
// MoE expert weights: no transpose needed. Safetensors stores [experts, out, in]
|
||||
// which the framework reverses to GGUF ne=[in, out, experts], matching ggml_mul_mat_id.
|
||||
// (transposeExperts was incorrectly swapping dims — removed)
|
||||
|
||||
// Audio conv weights are forced to F32 via tensorBase.Kind() in reader.go
|
||||
// (im2col doesn't support BF16). No kindOverride needed — the Kind() method
|
||||
// controls both the GGUF header type AND the WriteTo data encoding path.
|
||||
var kindOverride *uint32
|
||||
|
||||
// Vision patch embedding: reshape from [n_embd, ksize_sq_c] to [n_embd, 3, patch_size, patch_size]
|
||||
// Must be stored as F16 (not BF16) because the Conv2D im2col kernel requires F16/F32.
|
||||
if strings.Contains(name, "v.patch_embd.weight") && len(shape) == 2 {
|
||||
nEmbd := shape[0]
|
||||
patchSize := uint64(p.VisionModel.PatchSize)
|
||||
if patchSize == 0 {
|
||||
patchSize = 16
|
||||
}
|
||||
numCh := uint64(p.VisionModel.NumChannels)
|
||||
if numCh == 0 {
|
||||
numCh = 3
|
||||
}
|
||||
t.SetRepacker(p.reshapePatchEmbed)
|
||||
shape = []uint64{nEmbd, numCh, patchSize, patchSize}
|
||||
f16Kind := uint32(1) // tensorKindFP16
|
||||
kindOverride = &f16Kind
|
||||
}
|
||||
|
||||
// Vision position embedding: keep 3D [2, maxPos, nEmbd] — matching published mmproj format.
|
||||
// The framework reverses shape to GGUF ne=[nEmbd, maxPos, 2]. No data repacking needed.
|
||||
|
||||
kind := t.Kind()
|
||||
if kindOverride != nil {
|
||||
kind = *kindOverride
|
||||
}
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: name,
|
||||
Kind: kind,
|
||||
Shape: shape,
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
// Generate a single global rope_freqs.weight for proportional RoPE on global attention layers.
|
||||
// This matches the published GGUF format: one global tensor shared by all layers.
|
||||
// Global layers use partial_rotary_factor (0.25) — only rotate that fraction of dims.
|
||||
// Dimensions beyond the rotated portion get freq_factor=1e30 (effectively no rotation).
|
||||
tc := p.TextModel
|
||||
if tc.GlobalHeadDim > 0 {
|
||||
globalFreqsSize := tc.GlobalHeadDim / 2 // freq_factors are per dimension pair
|
||||
|
||||
// Compute number of rotated pairs for global layers
|
||||
partialRotaryFactor := float32(0.25) // default
|
||||
if rp, ok := tc.RopeParameters["full_attention"]; ok && rp != nil && rp.PartialRotaryFactor != nil {
|
||||
partialRotaryFactor = *rp.PartialRotaryFactor
|
||||
}
|
||||
nRotFull := int(float32(tc.GlobalHeadDim) * partialRotaryFactor / 2)
|
||||
|
||||
freqs := make(ropeFactor, globalFreqsSize)
|
||||
for j := range freqs {
|
||||
if j < nRotFull {
|
||||
freqs[j] = 1.0
|
||||
} else {
|
||||
freqs[j] = 1e30 // effectively disable rotation
|
||||
}
|
||||
}
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: "rope_freqs.weight",
|
||||
Kind: 0, // F32
|
||||
Shape: []uint64{uint64(len(freqs))},
|
||||
WriterTo: freqs,
|
||||
})
|
||||
}
|
||||
|
||||
// Emit packed vision clamp data as a single F32 tensor.
|
||||
// Layout: numLayers × 7 linears (q,k,v,out,gate,up,down) × 4 floats (inMin,inMax,outMin,outMax)
|
||||
// then 4 floats for the projector. Total = (numLayers*7 + 1) * 4 floats.
|
||||
if len(clampMap) > 0 {
|
||||
numLayers := int(p.VisionModel.NumHiddenLayers)
|
||||
linearNames := []string{"attn_q", "attn_k", "attn_v", "attn_out", "ffn_gate", "ffn_up", "ffn_down"}
|
||||
suffixes := []string{".input_min", ".input_max", ".output_min", ".output_max"}
|
||||
|
||||
totalFloats := (numLayers*len(linearNames) + 1) * 4 // +1 for projector
|
||||
clampData := make([]float32, totalFloats)
|
||||
|
||||
for layer := range numLayers {
|
||||
for li, ln := range linearNames {
|
||||
for si, sfx := range suffixes {
|
||||
sfxMap := map[string]string{"attn_q": "q_proj", "attn_k": "k_proj", "attn_v": "v_proj", "attn_out": "o_proj", "ffn_gate": "gate_proj", "ffn_up": "up_proj", "ffn_down": "down_proj"}
|
||||
for origName, val := range clampMap {
|
||||
if strings.Contains(origName, fmt.Sprintf("layers.%d.", layer)) && strings.HasSuffix(origName, sfx) && strings.Contains(origName, sfxMap[ln]) {
|
||||
idx := (layer*len(linearNames)+li)*4 + si
|
||||
clampData[idx] = val
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Projector clamp values
|
||||
projIdx := numLayers * len(linearNames) * 4
|
||||
for si, sfx := range suffixes {
|
||||
for origName, val := range clampMap {
|
||||
if strings.Contains(origName, "input_projection") && strings.HasSuffix(origName, sfx) {
|
||||
clampData[projIdx+si] = val
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
binary.Write(&buf, binary.LittleEndian, clampData)
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: "v.clamp_data",
|
||||
Kind: 0, // F32
|
||||
Shape: []uint64{uint64(totalFloats)},
|
||||
WriterTo: &buf,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// reshapePatchEmbed reshapes the vision patch embedding from HF layout [n_embd, ksize*ksize*channels]
|
||||
// to GGUF layout [n_embd, channels, patch_size, patch_size].
|
||||
func (*gemma4Model) reshapePatchEmbed(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
if len(shape) != 2 {
|
||||
return data, nil
|
||||
}
|
||||
nEmbd := int(shape[0])
|
||||
ksqC := int(shape[1])
|
||||
nChannels := 3
|
||||
patchSize := int(math.Sqrt(float64(ksqC / nChannels)))
|
||||
|
||||
// HF layout: [n_embd, patch_size * patch_size * channels] (row-major)
|
||||
// Need: [n_embd, channels, patch_size, patch_size]
|
||||
result := make([]float32, len(data))
|
||||
for e := range nEmbd {
|
||||
for c := range nChannels {
|
||||
for h := range patchSize {
|
||||
for w := range patchSize {
|
||||
srcIdx := e*ksqC + h*patchSize*nChannels + w*nChannels + c
|
||||
dstIdx := e*nChannels*patchSize*patchSize + c*patchSize*patchSize + h*patchSize + w
|
||||
result[dstIdx] = data[srcIdx]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
shape[0] = uint64(nEmbd)
|
||||
shape[1] = uint64(nChannels * patchSize * patchSize)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// softplusRepacker applies softplus (ln(1 + exp(x))) to tensor data.
|
||||
// Used for per_dim_scale tensors which the published GGUF stores pre-activated.
|
||||
func softplusRepacker(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
result := make([]float32, len(data))
|
||||
for i, x := range data {
|
||||
result[i] = float32(math.Log(1 + math.Exp(float64(x))))
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// squeezeMiddleDim squeezes the middle dimension from [C, 1, K] → [C, K] for depthwise conv1d weights.
|
||||
// Data layout stays the same since the middle dim is 1 — just a shape change.
|
||||
func squeezeMiddleDim(_ string, data []float32, _ []uint64) ([]float32, error) {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (p *gemma4Model) Replacements() []string {
|
||||
return []string{
|
||||
// ClippableLinear wraps nn.Linear — strip .linear. from weight path
|
||||
".linear.weight", ".weight",
|
||||
".linear.bias", ".bias",
|
||||
|
||||
// Audio SSCP (Sub-Sample Convolution Projection)
|
||||
"model.audio_tower.subsample_conv_projection.conv_0.conv", "a.conv1d.0",
|
||||
"model.audio_tower.subsample_conv_projection.conv_0.norm", "a.conv1d.0.norm",
|
||||
"model.audio_tower.subsample_conv_projection.conv_1.conv", "a.conv1d.1",
|
||||
"model.audio_tower.subsample_conv_projection.conv_1.norm", "a.conv1d.1.norm",
|
||||
"model.audio_tower.subsample_conv_projection.layer0.conv", "a.conv1d.0",
|
||||
"model.audio_tower.subsample_conv_projection.layer0.norm", "a.conv1d.0.norm",
|
||||
"model.audio_tower.subsample_conv_projection.layer1.conv", "a.conv1d.1",
|
||||
"model.audio_tower.subsample_conv_projection.layer1.norm", "a.conv1d.1.norm",
|
||||
"model.audio_tower.subsample_conv_projection.input_proj_linear", "a.pre_encode.out",
|
||||
|
||||
// Audio conformer blocks
|
||||
"model.audio_tower.conformer", "a.blk",
|
||||
"model.audio_tower.layers", "a.blk",
|
||||
|
||||
// Audio conformer attention
|
||||
"attention.attn.relative_position_embedding.pos_proj", "linear_pos",
|
||||
"self_attn.relative_k_proj", "linear_pos",
|
||||
"attention.attn.per_dim_key_scale", "per_dim_k_scale",
|
||||
"attention.attn.per_dim_scale", "per_dim_scale",
|
||||
"self_attn.per_dim_scale", "per_dim_scale",
|
||||
"attention.attn.q_proj", "attn_q",
|
||||
"attention.attn.k_proj", "attn_k",
|
||||
"attention.attn.v_proj", "attn_v",
|
||||
"attention.pre_attn_norm", "ln1",
|
||||
"attention.post_norm", "ln2",
|
||||
"attention.post", "attn_out",
|
||||
"self_attn.post", "attn_out",
|
||||
"norm_pre_attn", "ln1",
|
||||
"norm_post_attn", "ln2",
|
||||
|
||||
// Audio conformer feedforward
|
||||
"ffw_layer_start.pre_layer_norm", "ffn_norm",
|
||||
"ffw_layer_start.post_layer_norm", "ffn_post_norm",
|
||||
"ffw_layer_start.ffw_layer_1", "ffn_up",
|
||||
"ffw_layer_start.ffw_layer_2", "ffn_down",
|
||||
"ffw_layer_end.pre_layer_norm", "ffn_norm_1",
|
||||
"ffw_layer_end.post_layer_norm", "ffn_post_norm_1",
|
||||
"ffw_layer_end.ffw_layer_1", "ffn_up_1",
|
||||
"ffw_layer_end.ffw_layer_2", "ffn_down_1",
|
||||
"feed_forward1.pre_layer_norm", "ffn_norm",
|
||||
"feed_forward1.post_layer_norm", "ffn_post_norm",
|
||||
"feed_forward1.ffw_layer_1", "ffn_up",
|
||||
"feed_forward1.ffw_layer_2", "ffn_down",
|
||||
"feed_forward2.pre_layer_norm", "ffn_norm_1",
|
||||
"feed_forward2.post_layer_norm", "ffn_post_norm_1",
|
||||
"feed_forward2.ffw_layer_1", "ffn_up_1",
|
||||
"feed_forward2.ffw_layer_2", "ffn_down_1",
|
||||
|
||||
// Audio conformer lightweight conv1d
|
||||
"lconv1d.depthwise_conv1d", "conv_dw",
|
||||
"lconv1d.pre_layer_norm", "conv_norm",
|
||||
"lconv1d.conv_norm", "norm_conv",
|
||||
"lconv1d.linear_start", "conv_pw1",
|
||||
"lconv1d.linear_end", "conv_pw2",
|
||||
|
||||
// Audio block final norm
|
||||
"norm_out", "layer_pre_norm",
|
||||
|
||||
// Audio embedder and output projection
|
||||
"model.embed_audio.embedding_projection", "mm.a.input_projection",
|
||||
"model.audio_tower.output_proj", "mm.a.fc",
|
||||
|
||||
// Vision encoder
|
||||
"model.vision_tower.encoder.layers", "v.blk",
|
||||
"model.vision_tower.patch_embedder.input_proj", "v.patch_embd",
|
||||
"model.vision_tower.patch_embedder.position_embedding_table", "v.position_embd.weight",
|
||||
"model.vision_tower.std_bias", "v.std_bias",
|
||||
"model.vision_tower.std_scale", "v.std_scale",
|
||||
|
||||
// Vision multimodal projector
|
||||
"model.embed_vision.embedding_projection", "mm.input_projection",
|
||||
|
||||
// Text model
|
||||
"model.language_model.embed_tokens_per_layer", "per_layer_token_embd",
|
||||
"model.language_model.embed_tokens", "token_embd",
|
||||
"model.language_model.per_layer_model_projection", "per_layer_model_proj",
|
||||
"model.language_model.per_layer_projection_norm", "per_layer_proj_norm",
|
||||
"model.language_model.norm", "output_norm",
|
||||
"model.language_model.layers", "blk",
|
||||
|
||||
// Shared attention replacements (work for both text and vision tensors)
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.q_norm", "attn_q_norm",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.k_norm", "attn_k_norm",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
|
||||
// Post norms
|
||||
"post_attention_layernorm", "post_attention_norm",
|
||||
"pre_feedforward_layernorm_2", "pre_ffw_norm_2",
|
||||
"pre_feedforward_layernorm", "ffn_norm",
|
||||
"post_feedforward_layernorm_1", "post_ffw_norm_1",
|
||||
"post_feedforward_layernorm_2", "post_ffw_norm_2",
|
||||
"post_feedforward_layernorm", "post_ffw_norm",
|
||||
|
||||
// PLE
|
||||
"per_layer_input_gate", "inp_gate",
|
||||
"per_layer_projection", "proj",
|
||||
"post_per_layer_input_norm", "post_norm",
|
||||
|
||||
// MoE
|
||||
"router.proj", "ffn_gate_inp",
|
||||
"router.scale", "ffn_gate_inp.scale",
|
||||
"router.per_expert_scale.weight", "ffn_down_exps.scale",
|
||||
"router.per_expert_scale", "ffn_down_exps.scale",
|
||||
"experts.gate_up_proj.weight", "ffn_gate_up_exps.weight",
|
||||
"experts.gate_up_proj", "ffn_gate_up_exps.weight",
|
||||
"experts.down_proj.weight", "ffn_down_exps.weight",
|
||||
"experts.down_proj", "ffn_down_exps.weight",
|
||||
"moe.gate_proj", "ffn_gate_exps.weight",
|
||||
"moe.up_proj", "ffn_up_exps.weight",
|
||||
"moe.gate_up_proj.weight", "ffn_gate_up_exps.weight",
|
||||
"moe.gate_up_proj", "ffn_gate_up_exps.weight",
|
||||
"moe.down_proj", "ffn_down_exps.weight",
|
||||
"moe.per_expert_scale.weight", "ffn_down_exps.scale",
|
||||
"moe.per_expert_scale", "ffn_down_exps.scale",
|
||||
|
||||
// Layer scalar
|
||||
"layer_scalar", "layer_output_scale.weight",
|
||||
}
|
||||
}
|
||||
318
convert/convert_gemma4_test.go
Normal file
318
convert/convert_gemma4_test.go
Normal file
@@ -0,0 +1,318 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGemma4AudioReplacements(t *testing.T) {
|
||||
p := gemma4Model{}
|
||||
r := strings.NewReplacer(p.Replacements()...)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
// SSCP convolution blocks
|
||||
{
|
||||
"sscp conv0 weight",
|
||||
"model.audio_tower.subsample_conv_projection.conv_0.conv.weight",
|
||||
"a.conv1d.0.weight",
|
||||
},
|
||||
{
|
||||
"sscp conv0 norm",
|
||||
"model.audio_tower.subsample_conv_projection.conv_0.norm.weight",
|
||||
"a.conv1d.0.norm.weight",
|
||||
},
|
||||
{
|
||||
"sscp conv1 weight",
|
||||
"model.audio_tower.subsample_conv_projection.conv_1.conv.weight",
|
||||
"a.conv1d.1.weight",
|
||||
},
|
||||
{
|
||||
"sscp input proj weight",
|
||||
"model.audio_tower.subsample_conv_projection.input_proj_linear.weight",
|
||||
"a.pre_encode.out.weight",
|
||||
},
|
||||
{
|
||||
"sscp input proj bias",
|
||||
"model.audio_tower.subsample_conv_projection.input_proj_linear.bias",
|
||||
"a.pre_encode.out.bias",
|
||||
},
|
||||
{
|
||||
"sscp layer0 conv weight (new naming)",
|
||||
"model.audio_tower.subsample_conv_projection.layer0.conv.weight",
|
||||
"a.conv1d.0.weight",
|
||||
},
|
||||
{
|
||||
"sscp layer1 norm weight (new naming)",
|
||||
"model.audio_tower.subsample_conv_projection.layer1.norm.weight",
|
||||
"a.conv1d.1.norm.weight",
|
||||
},
|
||||
|
||||
// Conformer attention
|
||||
{
|
||||
"attn q weight",
|
||||
"model.audio_tower.conformer.0.attention.attn.q_proj.linear.weight",
|
||||
"a.blk.0.attn_q.weight",
|
||||
},
|
||||
{
|
||||
"attn k weight",
|
||||
"model.audio_tower.conformer.5.attention.attn.k_proj.linear.weight",
|
||||
"a.blk.5.attn_k.weight",
|
||||
},
|
||||
{
|
||||
"attn v clamp input_min",
|
||||
"model.audio_tower.conformer.0.attention.attn.v_proj.input_min",
|
||||
"a.blk.0.attn_v.input_min",
|
||||
},
|
||||
{
|
||||
"attn out weight (ClippableLinear)",
|
||||
"model.audio_tower.conformer.0.attention.post.linear.weight",
|
||||
"a.blk.0.attn_out.weight",
|
||||
},
|
||||
{
|
||||
"attn out clamp output_max",
|
||||
"model.audio_tower.conformer.0.attention.post.output_max",
|
||||
"a.blk.0.attn_out.output_max",
|
||||
},
|
||||
{
|
||||
"attn pre norm",
|
||||
"model.audio_tower.conformer.0.attention.pre_attn_norm.weight",
|
||||
"a.blk.0.ln1.weight",
|
||||
},
|
||||
{
|
||||
"attn post norm",
|
||||
"model.audio_tower.conformer.0.attention.post_norm.weight",
|
||||
"a.blk.0.ln2.weight",
|
||||
},
|
||||
{
|
||||
"linear pos",
|
||||
"model.audio_tower.conformer.0.attention.attn.relative_position_embedding.pos_proj.weight",
|
||||
"a.blk.0.linear_pos.weight",
|
||||
},
|
||||
{
|
||||
"per dim scale",
|
||||
"model.audio_tower.conformer.0.attention.attn.per_dim_scale",
|
||||
"a.blk.0.per_dim_scale",
|
||||
},
|
||||
{
|
||||
"per dim key scale",
|
||||
"model.audio_tower.conformer.0.attention.attn.per_dim_key_scale",
|
||||
"a.blk.0.per_dim_k_scale",
|
||||
},
|
||||
{
|
||||
"attn relative k proj (new naming)",
|
||||
"model.audio_tower.layers.0.self_attn.relative_k_proj.weight",
|
||||
"a.blk.0.linear_pos.weight",
|
||||
},
|
||||
{
|
||||
"attn pre norm (new naming)",
|
||||
"model.audio_tower.layers.0.norm_pre_attn.weight",
|
||||
"a.blk.0.ln1.weight",
|
||||
},
|
||||
{
|
||||
"attn post norm (new naming)",
|
||||
"model.audio_tower.layers.0.norm_post_attn.weight",
|
||||
"a.blk.0.ln2.weight",
|
||||
},
|
||||
{
|
||||
"attn out clamp output_max (new naming)",
|
||||
"model.audio_tower.layers.0.self_attn.post.output_max",
|
||||
"a.blk.0.attn_out.output_max",
|
||||
},
|
||||
{
|
||||
"per dim scale (new naming)",
|
||||
"model.audio_tower.layers.0.self_attn.per_dim_scale",
|
||||
"a.blk.0.per_dim_scale",
|
||||
},
|
||||
|
||||
// Conformer feedforward start
|
||||
{
|
||||
"ffn up weight",
|
||||
"model.audio_tower.conformer.0.ffw_layer_start.ffw_layer_1.linear.weight",
|
||||
"a.blk.0.ffn_up.weight",
|
||||
},
|
||||
{
|
||||
"ffn down weight",
|
||||
"model.audio_tower.conformer.0.ffw_layer_start.ffw_layer_2.linear.weight",
|
||||
"a.blk.0.ffn_down.weight",
|
||||
},
|
||||
{
|
||||
"ffn norm",
|
||||
"model.audio_tower.conformer.0.ffw_layer_start.pre_layer_norm.weight",
|
||||
"a.blk.0.ffn_norm.weight",
|
||||
},
|
||||
{
|
||||
"ffn post norm",
|
||||
"model.audio_tower.conformer.0.ffw_layer_start.post_layer_norm.weight",
|
||||
"a.blk.0.ffn_post_norm.weight",
|
||||
},
|
||||
|
||||
// Conformer feedforward end
|
||||
{
|
||||
"ffn up 1 weight",
|
||||
"model.audio_tower.conformer.0.ffw_layer_end.ffw_layer_1.linear.weight",
|
||||
"a.blk.0.ffn_up_1.weight",
|
||||
},
|
||||
{
|
||||
"ffn down 1 weight",
|
||||
"model.audio_tower.conformer.0.ffw_layer_end.ffw_layer_2.linear.weight",
|
||||
"a.blk.0.ffn_down_1.weight",
|
||||
},
|
||||
{
|
||||
"ffn norm 1",
|
||||
"model.audio_tower.conformer.0.ffw_layer_end.pre_layer_norm.weight",
|
||||
"a.blk.0.ffn_norm_1.weight",
|
||||
},
|
||||
{
|
||||
"ffn post norm 1",
|
||||
"model.audio_tower.conformer.0.ffw_layer_end.post_layer_norm.weight",
|
||||
"a.blk.0.ffn_post_norm_1.weight",
|
||||
},
|
||||
{
|
||||
"ffn up output_max (new naming)",
|
||||
"model.audio_tower.layers.10.feed_forward1.ffw_layer_1.output_max",
|
||||
"a.blk.10.ffn_up.output_max",
|
||||
},
|
||||
{
|
||||
"ffn down output_min (new naming)",
|
||||
"model.audio_tower.layers.0.feed_forward1.ffw_layer_2.output_min",
|
||||
"a.blk.0.ffn_down.output_min",
|
||||
},
|
||||
{
|
||||
"ffn up 1 input_max (new naming)",
|
||||
"model.audio_tower.layers.0.feed_forward2.ffw_layer_1.input_max",
|
||||
"a.blk.0.ffn_up_1.input_max",
|
||||
},
|
||||
{
|
||||
"ffn norm 1 (new naming)",
|
||||
"model.audio_tower.layers.0.feed_forward2.pre_layer_norm.weight",
|
||||
"a.blk.0.ffn_norm_1.weight",
|
||||
},
|
||||
|
||||
// Conformer lightweight conv1d
|
||||
{
|
||||
"conv dw weight",
|
||||
"model.audio_tower.conformer.0.lconv1d.depthwise_conv1d.weight",
|
||||
"a.blk.0.conv_dw.weight",
|
||||
},
|
||||
{
|
||||
"conv norm (pre_layer_norm)",
|
||||
"model.audio_tower.conformer.0.lconv1d.pre_layer_norm.weight",
|
||||
"a.blk.0.conv_norm.weight",
|
||||
},
|
||||
{
|
||||
"norm conv (conv_norm)",
|
||||
"model.audio_tower.conformer.0.lconv1d.conv_norm.weight",
|
||||
"a.blk.0.norm_conv.weight",
|
||||
},
|
||||
{
|
||||
"conv pw1 weight",
|
||||
"model.audio_tower.conformer.0.lconv1d.linear_start.linear.weight",
|
||||
"a.blk.0.conv_pw1.weight",
|
||||
},
|
||||
{
|
||||
"conv pw2 weight",
|
||||
"model.audio_tower.conformer.0.lconv1d.linear_end.linear.weight",
|
||||
"a.blk.0.conv_pw2.weight",
|
||||
},
|
||||
|
||||
// Audio embedder
|
||||
{
|
||||
"audio embedder projection weight",
|
||||
"model.embed_audio.embedding_projection.linear.weight",
|
||||
"mm.a.input_projection.weight",
|
||||
},
|
||||
{
|
||||
"audio embedder projection bias",
|
||||
"model.embed_audio.embedding_projection.linear.bias",
|
||||
"mm.a.input_projection.bias",
|
||||
},
|
||||
|
||||
// Audio output projection
|
||||
{
|
||||
"audio output proj weight",
|
||||
"model.audio_tower.output_proj.weight",
|
||||
"mm.a.fc.weight",
|
||||
},
|
||||
{
|
||||
"audio output proj bias",
|
||||
"model.audio_tower.output_proj.bias",
|
||||
"mm.a.fc.bias",
|
||||
},
|
||||
|
||||
// Verify vision tensors still work
|
||||
{
|
||||
"vision q weight",
|
||||
"model.vision_tower.encoder.layers.0.self_attn.q_proj.linear.weight",
|
||||
"v.blk.0.attn_q.weight",
|
||||
},
|
||||
{
|
||||
"vision std bias",
|
||||
"model.vision_tower.std_bias",
|
||||
"v.std_bias",
|
||||
},
|
||||
{
|
||||
"vision std scale",
|
||||
"model.vision_tower.std_scale",
|
||||
"v.std_scale",
|
||||
},
|
||||
{
|
||||
"vision patch embd",
|
||||
"model.vision_tower.patch_embedder.input_proj.weight",
|
||||
"v.patch_embd.weight",
|
||||
},
|
||||
{
|
||||
"vision projector",
|
||||
"model.embed_vision.embedding_projection.linear.weight",
|
||||
"mm.input_projection.weight",
|
||||
},
|
||||
|
||||
// Verify text tensors still work
|
||||
{
|
||||
"text attn q",
|
||||
"model.language_model.layers.0.self_attn.q_proj.weight",
|
||||
"blk.0.attn_q.weight",
|
||||
},
|
||||
{
|
||||
"text token embd",
|
||||
"model.language_model.embed_tokens.weight",
|
||||
"token_embd.weight",
|
||||
},
|
||||
{
|
||||
"text moe gate up fused",
|
||||
"model.language_model.layers.0.experts.gate_up_proj",
|
||||
"blk.0.ffn_gate_up_exps.weight",
|
||||
},
|
||||
{
|
||||
"text moe down",
|
||||
"model.language_model.layers.0.experts.down_proj",
|
||||
"blk.0.ffn_down_exps.weight",
|
||||
},
|
||||
{
|
||||
"text moe down with weight suffix",
|
||||
"model.language_model.layers.0.experts.down_proj.weight",
|
||||
"blk.0.ffn_down_exps.weight",
|
||||
},
|
||||
{
|
||||
"text moe per expert scale",
|
||||
"model.language_model.layers.0.router.per_expert_scale",
|
||||
"blk.0.ffn_down_exps.scale",
|
||||
},
|
||||
{
|
||||
"text moe per expert scale with weight suffix",
|
||||
"model.language_model.layers.0.router.per_expert_scale.weight",
|
||||
"blk.0.ffn_down_exps.scale",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := r.Replace(tt.in); got != tt.want {
|
||||
t.Errorf("Replace(%q) = %q, want %q", tt.in, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
264
convert/convert_glm4moelite.go
Normal file
264
convert/convert_glm4moelite.go
Normal file
@@ -0,0 +1,264 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type glm4MoeLiteModel struct {
|
||||
ModelParameters
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
QKNopeHeadDim uint32 `json:"qk_nope_head_dim"`
|
||||
QKRopeHeadDim uint32 `json:"qk_rope_head_dim"`
|
||||
KVLoraRank uint32 `json:"kv_lora_rank"`
|
||||
QLoraRank uint32 `json:"q_lora_rank"`
|
||||
VHeadDim uint32 `json:"v_head_dim"`
|
||||
|
||||
ExpertCount uint32 `json:"n_routed_experts"`
|
||||
ExpertSharedCount uint32 `json:"n_shared_experts"`
|
||||
ExpertIntermediateSize uint32 `json:"moe_intermediate_size"`
|
||||
ExpertUsedCount uint32 `json:"num_experts_per_tok"`
|
||||
ExpertWeightsNorm bool `json:"norm_topk_prob"`
|
||||
ExpertWeightsScale float32 `json:"routed_scaling_factor"`
|
||||
|
||||
LeadingDenseBlockCount uint32 `json:"first_k_dense_replace"`
|
||||
}
|
||||
|
||||
func (p *glm4MoeLiteModel) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "glm4moelite"
|
||||
kv["general.type"] = "model"
|
||||
kv["glm4moelite.block_count"] = p.HiddenLayers
|
||||
|
||||
numHeads := p.NumAttentionHeads
|
||||
numKVHeads := p.NumKeyValueHeads
|
||||
|
||||
kv["glm4moelite.attention.head_count"] = numHeads
|
||||
kv["glm4moelite.attention.head_count_kv"] = numKVHeads
|
||||
kv["glm4moelite.attention.key_length"] = p.QKNopeHeadDim + p.QKRopeHeadDim
|
||||
kv["glm4moelite.attention.kv_lora_rank"] = p.KVLoraRank
|
||||
kv["glm4moelite.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
kv["glm4moelite.attention.q_lora_rank"] = p.QLoraRank
|
||||
kv["glm4moelite.attention.value_length"] = p.VHeadDim
|
||||
kv["glm4moelite.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["glm4moelite.embedding_length"] = p.HiddenSize
|
||||
kv["glm4moelite.expert_count"] = p.ExpertCount
|
||||
kv["glm4moelite.expert_feed_forward_length"] = p.ExpertIntermediateSize
|
||||
kv["glm4moelite.expert_shared_count"] = p.ExpertSharedCount
|
||||
|
||||
kv["glm4moelite.expert_gating_func"] = uint32(2)
|
||||
kv["glm4moelite.expert_used_count"] = p.ExpertUsedCount
|
||||
kv["glm4moelite.expert_weights_norm"] = p.ExpertWeightsNorm
|
||||
kv["glm4moelite.expert_weights_scale"] = p.ExpertWeightsScale
|
||||
kv["glm4moelite.feed_forward_length"] = p.IntermediateSize
|
||||
kv["glm4moelite.leading_dense_block_count"] = p.LeadingDenseBlockCount
|
||||
|
||||
kv["glm4moelite.rope.dimension_count"] = p.QKRopeHeadDim
|
||||
kv["glm4moelite.rope.freq_base"] = cmp.Or(p.RopeTheta, float32(1000000.0))
|
||||
|
||||
kv["glm4moelite.attention.key_length_mla"] = p.KVLoraRank + p.QKRopeHeadDim
|
||||
kv["glm4moelite.attention.value_length_mla"] = p.KVLoraRank
|
||||
|
||||
kv["tokenizer.ggml.pre"] = "glm4"
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *glm4MoeLiteModel) Replacements() []string {
|
||||
return []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.kv_a_proj_with_mqa", "attn_kv_a_mqa",
|
||||
"self_attn.kv_a_layernorm", "attn_kv_a_norm",
|
||||
"self_attn.kv_b_proj", "attn_kv_b",
|
||||
"self_attn.q_a_proj", "attn_q_a",
|
||||
"self_attn.q_a_layernorm", "attn_q_a_norm",
|
||||
"self_attn.q_b_proj", "attn_q_b",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"mlp.shared_experts.down_proj", "ffn_down_shexp",
|
||||
"mlp.shared_experts.gate_proj", "ffn_gate_shexp",
|
||||
"mlp.shared_experts.up_proj", "ffn_up_shexp",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"mlp.gate.e_score_correction_bias", "exp_probs_b.bias",
|
||||
"mlp.gate", "ffn_gate_inp",
|
||||
}
|
||||
}
|
||||
|
||||
// repackKVB extracts K or V from the combined KV_B tensor for MLA absorption.
|
||||
// K output row-major: [n_head, kv_lora_rank, qk_nope] -> GGML ne[]={qk_nope, kv_lora_rank, n_head}
|
||||
// V output row-major: [n_head, v_head, kv_lora_rank] -> GGML ne[]={kv_lora_rank, v_head, n_head}
|
||||
func (p *glm4MoeLiteModel) repackKVB(extractK bool, kvFirst bool, numHeads int) Repacker {
|
||||
qkNope := int(p.QKNopeHeadDim)
|
||||
vHeadDim := int(p.VHeadDim)
|
||||
kvLoraRank := int(p.KVLoraRank)
|
||||
kvPerHead := qkNope + vHeadDim
|
||||
|
||||
return func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := make([]int, len(shape))
|
||||
for i := range shape {
|
||||
dims[i] = int(shape[i])
|
||||
}
|
||||
|
||||
var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
var err error
|
||||
|
||||
// Normalize to [n_head * (qk_nope + v_head), kv_lora_rank] layout
|
||||
if kvFirst {
|
||||
tt, err = tensor.Transpose(tt, 1, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tt = tensor.Materialize(tt)
|
||||
}
|
||||
|
||||
// Reshape to [n_head, qk_nope + v_head, kv_lora_rank]
|
||||
if err := tt.Reshape(numHeads, kvPerHead, kvLoraRank); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if extractK {
|
||||
// Slice K: [n_head, qk_nope, kv_lora_rank]
|
||||
tt, err = tt.Slice(nil, tensor.S(0, qkNope), nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tt = tensor.Materialize(tt)
|
||||
// Transpose to [n_head, kv_lora_rank, qk_nope]
|
||||
tt, err = tensor.Transpose(tt, 0, 2, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tt = tensor.Materialize(tt)
|
||||
} else {
|
||||
// Slice V: [n_head, v_head, kv_lora_rank] - already correct layout
|
||||
tt, err = tt.Slice(nil, tensor.S(qkNope, kvPerHead), nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tt = tensor.Materialize(tt)
|
||||
}
|
||||
|
||||
if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return native.VectorF32(tt.(*tensor.Dense))
|
||||
}
|
||||
}
|
||||
|
||||
func (p *glm4MoeLiteModel) Tensors(s []Tensor) (out []*ggml.Tensor) {
|
||||
merges := make([]merge, p.HiddenLayers*3)
|
||||
for i := range p.HiddenLayers {
|
||||
merges[i*3+0] = merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.gate_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
|
||||
}
|
||||
merges[i*3+1] = merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.up_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
|
||||
}
|
||||
merges[i*3+2] = merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.down_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
|
||||
}
|
||||
}
|
||||
|
||||
skipLayer := func(n string, minValue uint32) bool {
|
||||
re := regexp.MustCompile(`^blk\.(\d+)`)
|
||||
matches := re.FindStringSubmatch(n)
|
||||
if matches == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
blkNum, err := strconv.Atoi(matches[1])
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return uint32(blkNum) >= minValue
|
||||
}
|
||||
|
||||
out, s = mergeTensors(s, merges...)
|
||||
for _, t := range s {
|
||||
// skip any additional layers (such as the Multi-Token Prediction layer)
|
||||
if skipLayer(t.Name(), p.HiddenLayers) {
|
||||
slog.Debug("skipping layer", "name", t.Name())
|
||||
continue
|
||||
}
|
||||
|
||||
// Split attn_kv_b into separate attn_k_b and attn_v_b for MLA absorption
|
||||
if strings.HasSuffix(t.Name(), ".attn_kv_b.weight") {
|
||||
qkNope := int(p.QKNopeHeadDim)
|
||||
vHeadDim := int(p.VHeadDim)
|
||||
kvLoraRank := int(p.KVLoraRank)
|
||||
kvPerHead := qkNope + vHeadDim
|
||||
numHeads := int(p.NumAttentionHeads)
|
||||
kvFirst := true
|
||||
if len(t.Shape()) == 2 {
|
||||
switch {
|
||||
case int(t.Shape()[0]) == kvLoraRank:
|
||||
if kvPerHead > 0 && int(t.Shape()[1])%kvPerHead == 0 {
|
||||
numHeads = int(t.Shape()[1]) / kvPerHead
|
||||
}
|
||||
kvFirst = true
|
||||
case int(t.Shape()[1]) == kvLoraRank:
|
||||
if kvPerHead > 0 && int(t.Shape()[0])%kvPerHead == 0 {
|
||||
numHeads = int(t.Shape()[0]) / kvPerHead
|
||||
}
|
||||
kvFirst = false
|
||||
default:
|
||||
slog.Warn("glm4moelite: unexpected attn_kv_b layout", "name", t.Name(), "shape", t.Shape())
|
||||
}
|
||||
}
|
||||
|
||||
kTensor := t.Clone()
|
||||
kTensor.SetRepacker(p.repackKVB(true, kvFirst, numHeads))
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: strings.Replace(t.Name(), "attn_kv_b", "attn_k_b", 1),
|
||||
Kind: t.Kind(),
|
||||
Shape: []uint64{uint64(numHeads), uint64(kvLoraRank), uint64(qkNope)},
|
||||
WriterTo: kTensor,
|
||||
})
|
||||
|
||||
vTensor := t.Clone()
|
||||
vTensor.SetRepacker(p.repackKVB(false, kvFirst, numHeads))
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: strings.Replace(t.Name(), "attn_kv_b", "attn_v_b", 1),
|
||||
Kind: t.Kind(),
|
||||
Shape: []uint64{uint64(numHeads), uint64(vHeadDim), uint64(kvLoraRank)},
|
||||
WriterTo: vTensor,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
455
convert/convert_glmocr.go
Normal file
455
convert/convert_glmocr.go
Normal file
@@ -0,0 +1,455 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/json"
|
||||
"io/fs"
|
||||
"log/slog"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
)
|
||||
|
||||
// normalToNeoXRepacker creates a repacker that permutes Q/K weights from interleaved (LLaMA)
|
||||
// to NeoX ordering for compatibility with GGML's M-RoPE kernel.
|
||||
//
|
||||
// For weights: reshape [out, in] -> [n_heads, head_dim, in], permute rotary dims, reshape back
|
||||
// For biases: reshape [out] -> [n_heads, head_dim], permute rotary dims, reshape back
|
||||
func normalToNeoXRepacker(nHeads, headDim int, partialRotaryFactor float32) func(string, []float32, []uint64) ([]float32, error) {
|
||||
return func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
rotaryDim := int(float32(headDim) * partialRotaryFactor)
|
||||
if rotaryDim%2 != 0 {
|
||||
rotaryDim = (rotaryDim / 2) * 2 // Round down to even
|
||||
}
|
||||
|
||||
// Handle 1D (bias) or 2D (weight) tensors
|
||||
is1D := len(shape) == 1
|
||||
var inFeatures int
|
||||
if is1D {
|
||||
inFeatures = 1
|
||||
} else {
|
||||
inFeatures = int(shape[1])
|
||||
}
|
||||
outFeatures := int(shape[0])
|
||||
nEffectiveHeads := outFeatures / headDim
|
||||
|
||||
if nEffectiveHeads != nHeads {
|
||||
slog.Warn("normalToNeoX: unexpected head count", "effective", nEffectiveHeads, "expected", nHeads)
|
||||
}
|
||||
|
||||
// Reshape to [n_heads, head_dim, in_features]
|
||||
reshaped := make([]float32, len(data))
|
||||
copy(reshaped, data)
|
||||
|
||||
// Permute the rotary dimensions: even indices first, then odd
|
||||
// For each head, reorder [0,1,2,3,4,5...] to [0,2,4...,1,3,5...]
|
||||
result := make([]float32, len(data))
|
||||
halfRotary := rotaryDim / 2
|
||||
|
||||
for h := range nEffectiveHeads {
|
||||
for f := range inFeatures {
|
||||
for i := range halfRotary {
|
||||
// Even dim (0, 2, 4, ...) -> position i
|
||||
srcIdx := h*headDim*inFeatures + (2*i)*inFeatures + f
|
||||
dstIdx := h*headDim*inFeatures + i*inFeatures + f
|
||||
result[dstIdx] = reshaped[srcIdx]
|
||||
|
||||
// Odd dim (1, 3, 5, ...) -> position halfRotary + i
|
||||
srcIdx = h*headDim*inFeatures + (2*i+1)*inFeatures + f
|
||||
dstIdx = h*headDim*inFeatures + (halfRotary+i)*inFeatures + f
|
||||
result[dstIdx] = reshaped[srcIdx]
|
||||
}
|
||||
|
||||
// Non-rotary part: copy as-is
|
||||
for i := rotaryDim; i < headDim; i++ {
|
||||
srcIdx := h*headDim*inFeatures + i*inFeatures + f
|
||||
result[srcIdx] = reshaped[srcIdx]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
}
|
||||
|
||||
type glmOcrModel struct {
|
||||
ModelParameters
|
||||
|
||||
TextConfig struct {
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
MaxPositionEmbed uint32 `json:"max_position_embeddings"`
|
||||
RMSNormEps float32 `json:"rms_norm_eps"`
|
||||
PartialRotaryFactor float32 `json:"partial_rotary_factor"`
|
||||
RopeParameters struct {
|
||||
RopeType string `json:"rope_type"`
|
||||
MRopeSection []int32 `json:"mrope_section"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
PartialRotaryFactor float32 `json:"partial_rotary_factor"`
|
||||
} `json:"rope_parameters"`
|
||||
} `json:"text_config"`
|
||||
|
||||
VisionConfig struct {
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
Depth uint32 `json:"depth"`
|
||||
NumHeads uint32 `json:"num_heads"`
|
||||
ImageSize uint32 `json:"image_size"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
OutHiddenSize uint32 `json:"out_hidden_size"`
|
||||
RMSNormEps float32 `json:"rms_norm_eps"`
|
||||
SpatialMergeSize uint32 `json:"spatial_merge_size"`
|
||||
TemporalPatchSize uint32 `json:"temporal_patch_size"`
|
||||
} `json:"vision_config"`
|
||||
|
||||
ImageStartTokenID uint32 `json:"image_start_token_id"`
|
||||
ImageEndTokenID uint32 `json:"image_end_token_id"`
|
||||
VideoStartTokenID uint32 `json:"video_start_token_id"`
|
||||
VideoEndTokenID uint32 `json:"video_end_token_id"`
|
||||
ImageTokenID uint32 `json:"image_token_id"`
|
||||
VideoTokenID uint32 `json:"video_token_id"`
|
||||
|
||||
// Preprocessor config (preprocessor_config.json)
|
||||
Preprocessor struct {
|
||||
Size struct {
|
||||
ShortestEdge uint32 `json:"shortest_edge"`
|
||||
LongestEdge uint32 `json:"longest_edge"`
|
||||
} `json:"size"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
TemporalPatchSize uint32 `json:"temporal_patch_size"`
|
||||
MergeSize uint32 `json:"merge_size"`
|
||||
ImageMean []float32 `json:"image_mean"`
|
||||
ImageStd []float32 `json:"image_std"`
|
||||
} `json:"-"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*glmOcrModel)(nil)
|
||||
|
||||
func (m *glmOcrModel) parseMore(fsys fs.FS) error {
|
||||
bts, err := fs.ReadFile(fsys, "preprocessor_config.json")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return json.Unmarshal(bts, &m.Preprocessor)
|
||||
}
|
||||
|
||||
func (m *glmOcrModel) KV(t *Tokenizer) KV {
|
||||
kv := m.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "glmocr"
|
||||
|
||||
// Text model parameters
|
||||
kv["glmocr.block_count"] = cmp.Or(m.TextConfig.NumHiddenLayers, 16)
|
||||
kv["glmocr.embedding_length"] = cmp.Or(m.TextConfig.HiddenSize, 1536)
|
||||
kv["glmocr.attention.head_count"] = cmp.Or(m.TextConfig.NumAttentionHeads, 16)
|
||||
kv["glmocr.attention.head_count_kv"] = cmp.Or(m.TextConfig.NumKeyValueHeads, 8)
|
||||
headDim := cmp.Or(m.TextConfig.HeadDim, m.TextConfig.HiddenSize/m.TextConfig.NumAttentionHeads)
|
||||
kv["glmocr.attention.key_length"] = headDim
|
||||
kv["glmocr.attention.value_length"] = headDim
|
||||
kv["glmocr.feed_forward_length"] = cmp.Or(m.TextConfig.IntermediateSize, 4608)
|
||||
kv["glmocr.attention.layer_norm_rms_epsilon"] = cmp.Or(m.TextConfig.RMSNormEps, 1e-5)
|
||||
kv["glmocr.context_length"] = cmp.Or(m.TextConfig.MaxPositionEmbed, 131072)
|
||||
kv["glmocr.rope.freq_base"] = cmp.Or(m.TextConfig.RopeParameters.RopeTheta, float32(10000))
|
||||
kv["glmocr.rope.partial_rotary_factor"] = cmp.Or(m.TextConfig.RopeParameters.PartialRotaryFactor, m.TextConfig.PartialRotaryFactor, float32(1.0))
|
||||
if len(m.TextConfig.RopeParameters.MRopeSection) > 0 {
|
||||
kv["glmocr.rope.mrope_section"] = m.TextConfig.RopeParameters.MRopeSection
|
||||
}
|
||||
|
||||
// Vision model parameters
|
||||
kv["glmocr.vision.block_count"] = cmp.Or(m.VisionConfig.Depth, 24)
|
||||
kv["glmocr.vision.embedding_length"] = cmp.Or(m.VisionConfig.HiddenSize, 1024)
|
||||
kv["glmocr.vision.attention.head_count"] = cmp.Or(m.VisionConfig.NumHeads, 16)
|
||||
kv["glmocr.vision.image_size"] = cmp.Or(m.VisionConfig.ImageSize, 336)
|
||||
kv["glmocr.vision.patch_size"] = cmp.Or(m.VisionConfig.PatchSize, m.Preprocessor.PatchSize, 14)
|
||||
kv["glmocr.vision.spatial_merge_size"] = cmp.Or(m.VisionConfig.SpatialMergeSize, m.Preprocessor.MergeSize, 2)
|
||||
kv["glmocr.vision.temporal_patch_size"] = cmp.Or(m.VisionConfig.TemporalPatchSize, m.Preprocessor.TemporalPatchSize, 2)
|
||||
kv["glmocr.vision.out_hidden_size"] = cmp.Or(m.VisionConfig.OutHiddenSize, 1536)
|
||||
kv["glmocr.vision.intermediate_size"] = cmp.Or(m.VisionConfig.IntermediateSize, 4096)
|
||||
kv["glmocr.vision.attention.layer_norm_rms_epsilon"] = cmp.Or(m.VisionConfig.RMSNormEps, 1e-5)
|
||||
|
||||
// Preprocessor-derived image settings (min/max pixels and normalization)
|
||||
// Note: fs.Config.keyValue() auto-prepends architecture prefix, so use full key
|
||||
if m.Preprocessor.Size.ShortestEdge > 0 {
|
||||
kv["glmocr.vision.min_pixels"] = m.Preprocessor.Size.ShortestEdge
|
||||
}
|
||||
if m.Preprocessor.Size.LongestEdge > 0 {
|
||||
kv["glmocr.vision.max_pixels"] = m.Preprocessor.Size.LongestEdge
|
||||
}
|
||||
if len(m.Preprocessor.ImageMean) == 3 {
|
||||
kv["glmocr.vision.image_mean"] = m.Preprocessor.ImageMean
|
||||
}
|
||||
if len(m.Preprocessor.ImageStd) == 3 {
|
||||
kv["glmocr.vision.image_std"] = m.Preprocessor.ImageStd
|
||||
}
|
||||
|
||||
// Special tokens
|
||||
kv["glmocr.image_token_id"] = m.ImageTokenID
|
||||
kv["glmocr.image_start_token_id"] = m.ImageStartTokenID
|
||||
kv["glmocr.image_end_token_id"] = m.ImageEndTokenID
|
||||
kv["glmocr.video_token_id"] = m.VideoTokenID
|
||||
kv["glmocr.video_start_token_id"] = m.VideoStartTokenID
|
||||
kv["glmocr.video_end_token_id"] = m.VideoEndTokenID
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (m *glmOcrModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
|
||||
// Skip layers >= num_hidden_layers (Multi-Token Prediction layers not needed for basic inference)
|
||||
numLayers := int(cmp.Or(m.TextConfig.NumHiddenLayers, 16))
|
||||
skipLayer := func(name string) bool {
|
||||
// Tensor names are already replaced to "blk.N.xxx" format
|
||||
re := regexp.MustCompile(`^blk\.(\d+)`)
|
||||
matches := re.FindStringSubmatch(name)
|
||||
if matches == nil {
|
||||
return false
|
||||
}
|
||||
blkNum, err := strconv.Atoi(matches[1])
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return blkNum >= numLayers
|
||||
}
|
||||
|
||||
for _, t := range ts {
|
||||
name := t.Name()
|
||||
|
||||
// Skip next-n prediction layers (layers >= num_hidden_layers)
|
||||
if skipLayer(name) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Split ffn_gate_up into separate gate and up projections
|
||||
if strings.Contains(name, "ffn_gate_up") {
|
||||
for t := range splitDim(t, 0,
|
||||
split{Replacer: strings.NewReplacer("ffn_gate_up", "ffn_gate")},
|
||||
split{Replacer: strings.NewReplacer("ffn_gate_up", "ffn_up")},
|
||||
) {
|
||||
out = append(out, t)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasSuffix(name, "patch_embd.weight") {
|
||||
shape := t.Shape()
|
||||
if len(shape) == 5 && shape[2] == 2 {
|
||||
newShape := []uint64{shape[0], shape[1], shape[3], shape[4]}
|
||||
|
||||
t0 := t.Clone()
|
||||
t0.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := make([]int, len(shape))
|
||||
for i := range shape {
|
||||
dims[i] = int(shape[i])
|
||||
}
|
||||
var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
tt, err := tt.Slice(nil, nil, tensor.S(0, 1), nil, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tt = tensor.Materialize(tt)
|
||||
newDims := []int{int(shape[0]), int(shape[1]), int(shape[3]), int(shape[4])}
|
||||
if err := tt.Reshape(newDims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return native.VectorF32(tt.(*tensor.Dense))
|
||||
})
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: strings.Replace(name, "patch_embd.weight", "patch_embd_0.weight", 1),
|
||||
Kind: t.Kind(),
|
||||
Shape: newShape,
|
||||
WriterTo: t0,
|
||||
})
|
||||
|
||||
t1 := t.Clone()
|
||||
t1.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := make([]int, len(shape))
|
||||
for i := range shape {
|
||||
dims[i] = int(shape[i])
|
||||
}
|
||||
var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
tt, err := tt.Slice(nil, nil, tensor.S(1, 2), nil, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tt = tensor.Materialize(tt)
|
||||
newDims := []int{int(shape[0]), int(shape[1]), int(shape[3]), int(shape[4])}
|
||||
if err := tt.Reshape(newDims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return native.VectorF32(tt.(*tensor.Dense))
|
||||
})
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: strings.Replace(name, "patch_embd.weight", "patch_embd_1.weight", 1),
|
||||
Kind: t.Kind(),
|
||||
Shape: newShape,
|
||||
WriterTo: t1,
|
||||
})
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if len(shape) == 4 {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: strings.Replace(name, "patch_embd.weight", "patch_embd_0.weight", 1),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
slog.Warn("glmocr: patch_embed weight has unexpected shape - not splitting", "shape", shape)
|
||||
// Fall through to default handling
|
||||
}
|
||||
|
||||
// Handle pre-split patch embedding weights
|
||||
// Pattern 1: v.patch_embd.0.weight, v.patch_embd.1.weight -> patch_embd_0.weight, patch_embd_1.weight
|
||||
// Pattern 2: v.patch_embd.weight.0, v.patch_embd.weight.1 -> patch_embd_0.weight, patch_embd_1.weight
|
||||
if strings.Contains(name, "patch_embd.0.") {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: strings.Replace(name, "patch_embd.0.", "patch_embd_0.", 1),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
continue
|
||||
}
|
||||
if strings.Contains(name, "patch_embd.1.") {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: strings.Replace(name, "patch_embd.1.", "patch_embd_1.", 1),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
continue
|
||||
}
|
||||
// Handle .weight.0 and .weight.1 suffix patterns
|
||||
if strings.HasSuffix(name, "patch_embd.weight.0") {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: strings.Replace(name, "patch_embd.weight.0", "patch_embd_0.weight", 1),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
continue
|
||||
}
|
||||
if strings.HasSuffix(name, "patch_embd.weight.1") {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: strings.Replace(name, "patch_embd.weight.1", "patch_embd_1.weight", 1),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// Permute Q/K weights for M-RoPE compatibility (interleaved -> NeoX ordering)
|
||||
// GGML's M-RoPE kernel uses NeoX-style rotation, but GLM-OCR uses interleaved (LLaMA-style)
|
||||
// We permute at conversion time so the weights work correctly with GGML's kernel
|
||||
// This aligns Q/K rotary dimensions with GGML's NeoX-style rotation
|
||||
if len(m.TextConfig.RopeParameters.MRopeSection) > 0 &&
|
||||
strings.Contains(name, "blk.") && (strings.Contains(name, "attn_q.") || strings.Contains(name, "attn_k.")) {
|
||||
// Get config values for permutation
|
||||
nHeads := int(cmp.Or(m.TextConfig.NumAttentionHeads, 16))
|
||||
nKVHeads := int(cmp.Or(m.TextConfig.NumKeyValueHeads, 8))
|
||||
hiddenSize := int(cmp.Or(m.TextConfig.HiddenSize, 1536))
|
||||
headDim := int(cmp.Or(m.TextConfig.HeadDim, uint32(hiddenSize/nHeads)))
|
||||
partialRotaryFactor := cmp.Or(m.TextConfig.PartialRotaryFactor, m.TextConfig.RopeParameters.PartialRotaryFactor, float32(1.0))
|
||||
|
||||
// Use appropriate head count: nHeads for Q, nKVHeads for K
|
||||
effectiveHeads := nHeads
|
||||
if strings.Contains(name, "attn_k.") {
|
||||
effectiveHeads = nKVHeads
|
||||
}
|
||||
|
||||
permutedT := t.Clone()
|
||||
permutedT.SetRepacker(normalToNeoXRepacker(effectiveHeads, headDim, partialRotaryFactor))
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: name,
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: permutedT,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: name,
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (m *glmOcrModel) Replacements() []string {
|
||||
return []string{
|
||||
// Vision encoder
|
||||
"model.visual.patch_embed.proj_1", "v.patch_embd_1", // Second temporal split
|
||||
"model.visual.patch_embed.proj", "v.patch_embd",
|
||||
"model.visual.blocks", "v.blk",
|
||||
"model.visual.post_layernorm", "v.post_ln",
|
||||
"model.visual.downsample", "mm.patch_merger",
|
||||
|
||||
// Vision attention
|
||||
"attn.qkv", "attn_qkv",
|
||||
"attn.proj", "attn_out",
|
||||
"attn.q_norm", "attn_q_norm",
|
||||
"attn.k_norm", "attn_k_norm",
|
||||
|
||||
// Vision norms
|
||||
"norm1", "ln1",
|
||||
"norm2", "ln2",
|
||||
|
||||
// Vision MLP
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
|
||||
// Merger (multimodal projector)
|
||||
"model.visual.merger.proj", "mm.model.fc",
|
||||
"model.visual.merger.post_projection_norm", "mm.post_norm",
|
||||
"model.visual.merger.gate_proj", "mm.gate",
|
||||
"model.visual.merger.up_proj", "mm.up",
|
||||
"model.visual.merger.down_proj", "mm.down",
|
||||
|
||||
// Language model
|
||||
"model.language_model.embed_tokens", "token_embd",
|
||||
"model.language_model.layers", "blk",
|
||||
"model.language_model.norm", "output_norm",
|
||||
"lm_head", "output",
|
||||
|
||||
// Language model attention
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_out",
|
||||
|
||||
// Language model norms
|
||||
"input_layernorm", "attn_norm",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"post_self_attn_layernorm", "post_attn_norm",
|
||||
"post_mlp_layernorm", "post_ffn_norm",
|
||||
|
||||
// Language model MLP (remove mlp. prefix so ffn_* names work)
|
||||
"mlp.gate_up_proj", "ffn_gate_up",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
}
|
||||
}
|
||||
269
convert/convert_gptoss.go
Normal file
269
convert/convert_gptoss.go
Normal file
@@ -0,0 +1,269 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
)
|
||||
|
||||
type gptossModel struct {
|
||||
ModelParameters
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
AttentionHeads uint32 `json:"num_attention_heads"`
|
||||
KeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
Experts uint32 `json:"num_experts"`
|
||||
LocalExperts uint32 `json:"num_local_experts"`
|
||||
ExpertsPerToken uint32 `json:"experts_per_token"`
|
||||
RMSNormEpsilon float32 `json:"rms_norm_eps"`
|
||||
InitialContextLength uint32 `json:"initial_context_length"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RopeScalingFactor float32 `json:"rope_scaling_factor"`
|
||||
RopeScaling struct {
|
||||
Factor float32 `json:"factor"`
|
||||
} `json:"rope_scaling"`
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*gptossModel)(nil)
|
||||
|
||||
func (m *gptossModel) KV(t *Tokenizer) KV {
|
||||
kv := m.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "gptoss"
|
||||
kv["general.file_type"] = uint32(4)
|
||||
kv["gptoss.context_length"] = cmp.Or(m.MaxPositionEmbeddings, uint32(m.RopeScalingFactor*float32(m.InitialContextLength)))
|
||||
kv["gptoss.block_count"] = m.HiddenLayers
|
||||
kv["gptoss.embedding_length"] = m.HiddenSize
|
||||
kv["gptoss.feed_forward_length"] = m.IntermediateSize
|
||||
kv["gptoss.expert_count"] = cmp.Or(m.Experts, m.LocalExperts)
|
||||
kv["gptoss.expert_used_count"] = m.ExpertsPerToken
|
||||
kv["gptoss.attention.head_count"] = m.AttentionHeads
|
||||
kv["gptoss.attention.head_count_kv"] = m.KeyValueHeads
|
||||
kv["gptoss.attention.key_length"] = m.HeadDim
|
||||
kv["gptoss.attention.value_length"] = m.HeadDim
|
||||
kv["gptoss.attention.layer_norm_rms_epsilon"] = cmp.Or(m.RMSNormEpsilon, 1e-5)
|
||||
kv["gptoss.attention.sliding_window"] = m.SlidingWindow
|
||||
kv["gptoss.rope.freq_base"] = m.RopeTheta
|
||||
kv["gptoss.rope.scaling.factor"] = cmp.Or(m.RopeScalingFactor, m.RopeScaling.Factor)
|
||||
kv["gptoss.rope.scaling.original_context_length"] = m.InitialContextLength
|
||||
kv["tokenizer.ggml.bos_token_id"] = uint32(199998) // <|startoftext|>
|
||||
kv["tokenizer.ggml.add_bos_token"] = false
|
||||
kv["tokenizer.ggml.eos_token_id"] = uint32(199999) // <|endoftext|>
|
||||
kv["tokenizer.ggml.eos_token_ids"] = []int32{
|
||||
199999, /* <|endoftext|> */
|
||||
200002, /* <|return|> */
|
||||
200012, /* <|call|> */
|
||||
}
|
||||
kv["tokenizer.ggml.add_eos_token"] = false
|
||||
return kv
|
||||
}
|
||||
|
||||
func (m *gptossModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
mxfp4s := make(map[string]*mxfp4)
|
||||
for _, t := range ts {
|
||||
if strings.HasSuffix(t.Name(), ".blocks") || strings.HasSuffix(t.Name(), ".scales") {
|
||||
dot := strings.LastIndex(t.Name(), ".")
|
||||
name, suffix := t.Name()[:dot], t.Name()[dot+1:]
|
||||
if _, ok := mxfp4s[name]; !ok {
|
||||
mxfp4s[name] = &mxfp4{}
|
||||
}
|
||||
|
||||
switch suffix {
|
||||
case "blocks":
|
||||
mxfp4s[name].blocks = t
|
||||
case "scales":
|
||||
mxfp4s[name].scales = t
|
||||
}
|
||||
} else if strings.HasSuffix(t.Name(), "gate_up_exps.bias") {
|
||||
// gate_up_exps is interleaved, need to split into gate_exps and up_exps
|
||||
// e.g. gate_exps, up_exps = gate_up_exps[:, 0::2, ...], gate_up_exps[:, 1::2, ...]
|
||||
out = append(out, slices.Collect(splitDim(t, 1,
|
||||
split{
|
||||
Replacer: strings.NewReplacer("gate_up_exps", "gate_exps"),
|
||||
slices: []tensor.Slice{nil, tensor.S(0, int(t.Shape()[1]), 2)},
|
||||
},
|
||||
split{
|
||||
Replacer: strings.NewReplacer("gate_up_exps", "up_exps"),
|
||||
slices: []tensor.Slice{nil, tensor.S(1, int(t.Shape()[1]), 2)},
|
||||
},
|
||||
))...)
|
||||
} else {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for name, mxfp4 := range mxfp4s {
|
||||
dims := mxfp4.blocks.Shape()
|
||||
if !strings.HasSuffix(name, ".weight") {
|
||||
name = name + ".weight"
|
||||
}
|
||||
if strings.Contains(name, "ffn_down_exps") {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: name,
|
||||
Kind: uint32(ggml.TensorTypeMXFP4),
|
||||
Shape: []uint64{dims[0], dims[1], dims[2] * dims[3] * 2},
|
||||
WriterTo: mxfp4,
|
||||
})
|
||||
} else if strings.Contains(name, "ffn_gate_up_exps") {
|
||||
// gate_up_exps is interleaved, need to split into gate_exps and up_exps
|
||||
// e.g. gate_exps, up_exps = gate_up_exps[:, 0::2, ...], gate_up_exps[:, 1::2, ...]
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: strings.Replace(name, "gate_up", "gate", 1),
|
||||
Kind: uint32(ggml.TensorTypeMXFP4),
|
||||
Shape: []uint64{dims[0], dims[1] / 2, dims[2] * dims[3] * 2},
|
||||
WriterTo: mxfp4.slice(1, 0, int(dims[1]), 2),
|
||||
}, &ggml.Tensor{
|
||||
Name: strings.Replace(name, "gate_up", "up", 1),
|
||||
Kind: uint32(ggml.TensorTypeMXFP4),
|
||||
Shape: []uint64{dims[0], dims[1] / 2, dims[2] * dims[3] * 2},
|
||||
WriterTo: mxfp4.slice(1, 1, int(dims[1]), 2),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (m *gptossModel) Replacements() []string {
|
||||
var replacements []string
|
||||
if m.MaxPositionEmbeddings > 0 {
|
||||
// hf flavored model
|
||||
replacements = []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_out",
|
||||
"self_attn.sinks", "attn_sinks",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"mlp.router", "ffn_gate_inp",
|
||||
"mlp.experts.gate_up_proj_", "ffn_gate_up_exps.",
|
||||
"mlp.experts.down_proj_", "ffn_down_exps.",
|
||||
"model.norm", "output_norm",
|
||||
}
|
||||
} else {
|
||||
replacements = []string{
|
||||
// noop replacements so other replacements will not be applied
|
||||
".blocks", ".blocks",
|
||||
".scales", ".scales",
|
||||
// real replacements
|
||||
"block", "blk",
|
||||
"attn.norm", "attn_norm",
|
||||
"attn.qkv", "attn_qkv",
|
||||
"attn.sinks", "attn_sinks",
|
||||
"attn.out", "attn_out",
|
||||
"mlp.norm", "ffn_norm",
|
||||
"mlp.gate", "ffn_gate_inp",
|
||||
"mlp.mlp1_", "ffn_gate_up_exps.",
|
||||
"mlp.mlp2_", "ffn_down_exps.",
|
||||
"embedding", "token_embd",
|
||||
"norm", "output_norm",
|
||||
"unembedding", "output",
|
||||
"scale", "weight",
|
||||
}
|
||||
}
|
||||
return replacements
|
||||
}
|
||||
|
||||
type mxfp4 struct {
|
||||
slices []tensor.Slice
|
||||
|
||||
blocks, scales Tensor
|
||||
}
|
||||
|
||||
func (m *mxfp4) slice(dim, start, end, step int) *mxfp4 {
|
||||
slice := slices.Repeat([]tensor.Slice{nil}, len(m.blocks.Shape()))
|
||||
slice[dim] = tensor.S(start, end, step)
|
||||
return &mxfp4{
|
||||
slices: slice,
|
||||
blocks: m.blocks,
|
||||
scales: m.scales,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *mxfp4) WriteTo(w io.Writer) (int64, error) {
|
||||
var b bytes.Buffer
|
||||
if _, err := m.blocks.WriteTo(&b); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
blocksDims := make([]int, len(m.blocks.Shape()))
|
||||
for i, d := range m.blocks.Shape() {
|
||||
blocksDims[i] = int(d)
|
||||
}
|
||||
|
||||
bts := b.Bytes()
|
||||
var tmp [16]byte
|
||||
for i := 0; i < b.Len(); i += 16 {
|
||||
for j := range 8 {
|
||||
// transform a1b2c3 ... x7y8z9 -> 71xa82yb93zc
|
||||
a, b := bts[i+j], bts[i+j+8]
|
||||
tmp[2*j+0] = (a & 0x0F) | (b << 4)
|
||||
tmp[2*j+1] = (a >> 4) | (b & 0xF0)
|
||||
}
|
||||
|
||||
copy(bts[i:i+16], tmp[:])
|
||||
}
|
||||
|
||||
var blocks tensor.Tensor = tensor.New(tensor.WithShape(blocksDims...), tensor.WithBacking(bts))
|
||||
|
||||
var s bytes.Buffer
|
||||
if _, err := m.scales.WriteTo(&s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
scalesDims := slices.Repeat([]int{1}, len(m.blocks.Shape()))
|
||||
for i, d := range m.scales.Shape() {
|
||||
scalesDims[i] = int(d)
|
||||
}
|
||||
|
||||
var scales tensor.Tensor = tensor.New(tensor.WithShape(scalesDims...), tensor.WithBacking(s.Bytes()))
|
||||
|
||||
out, err := tensor.Concat(3, scales, blocks)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if len(m.slices) > 0 {
|
||||
out, err = out.Slice(m.slices...)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
out = tensor.Materialize(out)
|
||||
|
||||
if err := out.Reshape(out.Shape().TotalSize()); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
u8s, err := native.VectorU8(out.(*tensor.Dense))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if err := binary.Write(w, binary.LittleEndian, u8s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return int64(len(u8s)), nil
|
||||
}
|
||||
604
convert/convert_laguna.go
Normal file
604
convert/convert_laguna.go
Normal file
@@ -0,0 +1,604 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
iofs "io/fs"
|
||||
"math"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type lagunaModel struct {
|
||||
ModelParameters
|
||||
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
PartialRotaryFactor float32 `json:"partial_rotary_factor"`
|
||||
Gating lagunaGatingMode `json:"gating"`
|
||||
QKNormType string `json:"qk_norm_type"`
|
||||
|
||||
LayerTypes []string `json:"layer_types"`
|
||||
NumAttentionHeadsPerLayer []uint32 `json:"num_attention_heads_per_layer"`
|
||||
|
||||
NumExperts uint32 `json:"num_experts"`
|
||||
NumExpertsPerTok uint32 `json:"num_experts_per_tok"`
|
||||
MoEIntermediateSize uint32 `json:"moe_intermediate_size"`
|
||||
SharedExpertIntermediateSize uint32 `json:"shared_expert_intermediate_size"`
|
||||
NormTopKProb bool `json:"norm_topk_prob"`
|
||||
MoeRoutedScalingFactor float32 `json:"moe_routed_scaling_factor"`
|
||||
MoERouterUseSigmoid bool `json:"moe_router_use_sigmoid"`
|
||||
MoEApplyRouterWeightOnInput bool `json:"moe_apply_router_weight_on_input"`
|
||||
DecoderSparseStep uint32 `json:"decoder_sparse_step"`
|
||||
MLPOnlyLayers []uint32 `json:"mlp_only_layers"`
|
||||
MLPLayerTypes []string `json:"mlp_layer_types"`
|
||||
|
||||
RopeParameters lagunaRopeParameters `json:"rope_parameters"`
|
||||
SwaRopeParameters lagunaRopeParameters `json:"swa_rope_parameters"`
|
||||
|
||||
SwaAttentionSinkEnabled bool `json:"swa_attention_sink_enabled"`
|
||||
}
|
||||
|
||||
type lagunaGatingMode string
|
||||
|
||||
type lagunaRopeParameters struct {
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RopeType string `json:"rope_type"`
|
||||
Type string `json:"type"`
|
||||
Factor float32 `json:"factor"`
|
||||
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||
BetaSlow float32 `json:"beta_slow"`
|
||||
BetaFast float32 `json:"beta_fast"`
|
||||
AttentionFactor float32 `json:"attention_factor"`
|
||||
PartialRotaryFactor float32 `json:"partial_rotary_factor"`
|
||||
}
|
||||
|
||||
type lagunaRopeConfig struct {
|
||||
flat lagunaRopeParameters
|
||||
full lagunaRopeParameters
|
||||
sliding lagunaRopeParameters
|
||||
nested bool
|
||||
}
|
||||
|
||||
func (g *lagunaGatingMode) UnmarshalJSON(b []byte) error {
|
||||
var s string
|
||||
if err := json.Unmarshal(b, &s); err == nil {
|
||||
*g = lagunaGatingMode(s)
|
||||
return nil
|
||||
}
|
||||
|
||||
var enabled bool
|
||||
if err := json.Unmarshal(b, &enabled); err == nil {
|
||||
if enabled {
|
||||
*g = "true"
|
||||
} else {
|
||||
*g = "false"
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if string(b) == "null" {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("unsupported Laguna gating JSON value %s", string(b))
|
||||
}
|
||||
|
||||
func (g lagunaGatingMode) perHead() bool {
|
||||
return strings.EqualFold(string(g), "per-head") || strings.EqualFold(string(g), "true")
|
||||
}
|
||||
|
||||
func (r *lagunaRopeConfig) UnmarshalJSON(b []byte) error {
|
||||
if string(b) == "null" {
|
||||
return nil
|
||||
}
|
||||
|
||||
var probe map[string]json.RawMessage
|
||||
if err := json.Unmarshal(b, &probe); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(probe) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if raw, ok := probe["full_attention"]; ok {
|
||||
r.nested = true
|
||||
if err := json.Unmarshal(raw, &r.full); err != nil {
|
||||
return err
|
||||
}
|
||||
if raw = probe["sliding_attention"]; raw != nil {
|
||||
if err := json.Unmarshal(raw, &r.sliding); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if raw, ok := probe["global_attention"]; ok {
|
||||
r.nested = true
|
||||
if err := json.Unmarshal(raw, &r.full); err != nil {
|
||||
return err
|
||||
}
|
||||
if raw = probe["sliding_attention"]; raw != nil {
|
||||
if err := json.Unmarshal(raw, &r.sliding); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return json.Unmarshal(b, &r.flat)
|
||||
}
|
||||
|
||||
func (r lagunaRopeConfig) fullParams() lagunaRopeParameters {
|
||||
if r.nested {
|
||||
return r.full
|
||||
}
|
||||
return r.flat
|
||||
}
|
||||
|
||||
func (r lagunaRopeConfig) slidingParams() (lagunaRopeParameters, bool) {
|
||||
if !r.nested {
|
||||
return lagunaRopeParameters{}, false
|
||||
}
|
||||
return r.sliding, true
|
||||
}
|
||||
|
||||
func (r lagunaRopeParameters) ropeType() string {
|
||||
return cmp.Or(r.RopeType, r.Type)
|
||||
}
|
||||
|
||||
func (r lagunaRopeParameters) withDefaultPartialRotaryFactor(v float32) lagunaRopeParameters {
|
||||
if r.PartialRotaryFactor == 0 {
|
||||
r.PartialRotaryFactor = v
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func (r lagunaRopeParameters) empty() bool {
|
||||
return r == (lagunaRopeParameters{})
|
||||
}
|
||||
|
||||
type rawLagunaModel struct {
|
||||
ModelParameters
|
||||
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
PartialRotaryFactor float32 `json:"partial_rotary_factor"`
|
||||
Gating lagunaGatingMode `json:"gating"`
|
||||
QKNormType string `json:"qk_norm_type"`
|
||||
|
||||
LayerTypes []string `json:"layer_types"`
|
||||
NumAttentionHeadsPerLayer []uint32 `json:"num_attention_heads_per_layer"`
|
||||
|
||||
NumExperts uint32 `json:"num_experts"`
|
||||
NumExpertsPerTok uint32 `json:"num_experts_per_tok"`
|
||||
MoEIntermediateSize uint32 `json:"moe_intermediate_size"`
|
||||
SharedExpertIntermediateSize uint32 `json:"shared_expert_intermediate_size"`
|
||||
NormTopKProb *bool `json:"norm_topk_prob"`
|
||||
MoeRoutedScalingFactor float32 `json:"moe_routed_scaling_factor"`
|
||||
MoERouterUseSigmoid *bool `json:"moe_router_use_sigmoid"`
|
||||
MoEApplyRouterWeightOnInput bool `json:"moe_apply_router_weight_on_input"`
|
||||
DecoderSparseStep uint32 `json:"decoder_sparse_step"`
|
||||
MLPOnlyLayers []uint32 `json:"mlp_only_layers"`
|
||||
MLPLayerTypes []string `json:"mlp_layer_types"`
|
||||
|
||||
RopeParameters lagunaRopeConfig `json:"rope_parameters"`
|
||||
SwaRopeParameters lagunaRopeParameters `json:"swa_rope_parameters"`
|
||||
|
||||
SwaAttentionSinkEnabled bool `json:"swa_attention_sink_enabled"`
|
||||
}
|
||||
|
||||
func (p *lagunaModel) UnmarshalJSON(b []byte) error {
|
||||
var raw rawLagunaModel
|
||||
if err := json.Unmarshal(b, &raw); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
mlpOnlyLayers, err := lagunaDenseLayers(raw.MLPOnlyLayers, raw.MLPLayerTypes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fullRope := raw.RopeParameters.fullParams().withDefaultPartialRotaryFactor(cmp.Or(raw.PartialRotaryFactor, float32(1)))
|
||||
swaRope := raw.SwaRopeParameters
|
||||
if nestedSwa, ok := raw.RopeParameters.slidingParams(); ok && !nestedSwa.empty() {
|
||||
swaRope = nestedSwa
|
||||
}
|
||||
swaRope = swaRope.withDefaultPartialRotaryFactor(cmp.Or(fullRope.PartialRotaryFactor, float32(1)))
|
||||
|
||||
*p = lagunaModel{
|
||||
ModelParameters: raw.ModelParameters,
|
||||
NumHiddenLayers: raw.NumHiddenLayers,
|
||||
HiddenSize: raw.HiddenSize,
|
||||
IntermediateSize: raw.IntermediateSize,
|
||||
NumAttentionHeads: raw.NumAttentionHeads,
|
||||
NumKeyValueHeads: raw.NumKeyValueHeads,
|
||||
HeadDim: raw.HeadDim,
|
||||
RMSNormEPS: raw.RMSNormEPS,
|
||||
MaxPositionEmbeddings: raw.MaxPositionEmbeddings,
|
||||
SlidingWindow: raw.SlidingWindow,
|
||||
PartialRotaryFactor: cmp.Or(raw.PartialRotaryFactor, fullRope.PartialRotaryFactor),
|
||||
Gating: raw.Gating,
|
||||
QKNormType: cmp.Or(raw.QKNormType, "rmsnorm"),
|
||||
LayerTypes: raw.LayerTypes,
|
||||
NumAttentionHeadsPerLayer: raw.NumAttentionHeadsPerLayer,
|
||||
NumExperts: raw.NumExperts,
|
||||
NumExpertsPerTok: raw.NumExpertsPerTok,
|
||||
MoEIntermediateSize: raw.MoEIntermediateSize,
|
||||
SharedExpertIntermediateSize: raw.SharedExpertIntermediateSize,
|
||||
NormTopKProb: defaultBool(raw.NormTopKProb, true),
|
||||
MoeRoutedScalingFactor: raw.MoeRoutedScalingFactor,
|
||||
MoERouterUseSigmoid: defaultBool(raw.MoERouterUseSigmoid, true),
|
||||
MoEApplyRouterWeightOnInput: raw.MoEApplyRouterWeightOnInput,
|
||||
DecoderSparseStep: raw.DecoderSparseStep,
|
||||
MLPOnlyLayers: mlpOnlyLayers,
|
||||
MLPLayerTypes: raw.MLPLayerTypes,
|
||||
RopeParameters: fullRope,
|
||||
SwaRopeParameters: swaRope,
|
||||
SwaAttentionSinkEnabled: raw.SwaAttentionSinkEnabled,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func defaultBool(v *bool, fallback bool) bool {
|
||||
if v == nil {
|
||||
return fallback
|
||||
}
|
||||
return *v
|
||||
}
|
||||
|
||||
const (
|
||||
lagunaGatingFuncSoftmax uint32 = 1
|
||||
lagunaGatingFuncSigmoid uint32 = 2
|
||||
|
||||
lagunaLayerTypeGlobal uint32 = 0
|
||||
lagunaLayerTypeSliding uint32 = 1
|
||||
)
|
||||
|
||||
func (p *lagunaModel) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "laguna"
|
||||
// Laguna's chat template and built-in renderer both emit the leading
|
||||
// special token explicitly. Auto-prepending BOS here would duplicate it.
|
||||
kv["tokenizer.ggml.add_bos_token"] = false
|
||||
kv["tokenizer.ggml.pre"] = "laguna"
|
||||
// Laguna does not need tokenizer.chat_template at runtime: Ollama create
|
||||
// sets the Laguna renderer/parser from the architecture, and the renderer
|
||||
// owns prompt formatting.
|
||||
delete(kv, "tokenizer.chat_template")
|
||||
|
||||
kv["laguna.block_count"] = p.NumHiddenLayers
|
||||
kv["laguna.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["laguna.embedding_length"] = p.HiddenSize
|
||||
kv["laguna.feed_forward_length"] = p.IntermediateSize
|
||||
|
||||
if len(p.NumAttentionHeadsPerLayer) == int(p.NumHiddenLayers) {
|
||||
kv["laguna.attention.head_count"] = p.NumAttentionHeadsPerLayer
|
||||
} else {
|
||||
kv["laguna.attention.head_count"] = p.NumAttentionHeads
|
||||
}
|
||||
kv["laguna.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||
kv["laguna.attention.key_length"] = p.HeadDim
|
||||
kv["laguna.attention.value_length"] = p.HeadDim
|
||||
kv["laguna.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
kv["laguna.attention.sliding_window"] = p.SlidingWindow
|
||||
kv["laguna.attention.sink_enabled"] = p.SwaAttentionSinkEnabled
|
||||
|
||||
if len(p.LayerTypes) > 0 {
|
||||
encoded := make([]uint32, len(p.LayerTypes))
|
||||
slidingPattern := make([]bool, len(p.LayerTypes))
|
||||
for i, layerType := range p.LayerTypes {
|
||||
if lagunaLayerIsSliding(layerType) {
|
||||
encoded[i] = lagunaLayerTypeSliding
|
||||
slidingPattern[i] = true
|
||||
} else {
|
||||
encoded[i] = lagunaLayerTypeGlobal
|
||||
}
|
||||
}
|
||||
kv["laguna.attention.layer_types"] = encoded
|
||||
kv["laguna.attention.sliding_window_pattern"] = slidingPattern
|
||||
}
|
||||
|
||||
if p.Gating.perHead() {
|
||||
kv["laguna.attention.gating_type"] = uint32(1)
|
||||
} else {
|
||||
kv["laguna.attention.gating_type"] = uint32(0)
|
||||
}
|
||||
kv["laguna.attention.qk_norm"] = p.QKNormType == "rmsnorm"
|
||||
|
||||
kv["laguna.expert_count"] = p.NumExperts
|
||||
kv["laguna.expert_used_count"] = p.NumExpertsPerTok
|
||||
kv["laguna.expert_feed_forward_length"] = p.MoEIntermediateSize
|
||||
kv["laguna.expert_shared_feed_forward_length"] = p.SharedExpertIntermediateSize
|
||||
kv["laguna.expert_shared_count"] = uint32(1)
|
||||
kv["laguna.expert_weights_norm"] = p.NormTopKProb
|
||||
kv["laguna.expert_weights_scale"] = p.MoeRoutedScalingFactor
|
||||
kv["laguna.expert_gating_func"] = lagunaMoeGatingFunc(p.MoERouterUseSigmoid)
|
||||
kv["laguna.decoder_sparse_step"] = cmp.Or(p.DecoderSparseStep, uint32(1))
|
||||
|
||||
if leading, ok := lagunaLeadingDensePrefix(p.MLPOnlyLayers); ok {
|
||||
kv["laguna.leading_dense_block_count"] = leading
|
||||
}
|
||||
if len(p.MLPOnlyLayers) > 0 {
|
||||
kv["laguna.dense_layers"] = p.MLPOnlyLayers
|
||||
}
|
||||
|
||||
ropeType := p.RopeParameters.ropeType()
|
||||
kv["laguna.rope.freq_base"] = cmp.Or(p.RopeParameters.RopeTheta, float32(10000))
|
||||
kv["laguna.rope.scaling.type"] = ropeType
|
||||
ropeFactor := cmp.Or(p.RopeParameters.Factor, float32(1))
|
||||
kv["laguna.rope.scaling.factor"] = ropeFactor
|
||||
kv["laguna.rope.scaling.original_context_length"] = p.RopeParameters.OriginalMaxPositionEmbeddings
|
||||
kv["laguna.rope.scaling.beta_fast"] = p.RopeParameters.BetaFast
|
||||
kv["laguna.rope.scaling.beta_slow"] = p.RopeParameters.BetaSlow
|
||||
kv["laguna.rope.scaling.attn_factor"] = lagunaAttentionFactor(ropeType, ropeFactor, p.RopeParameters.AttentionFactor)
|
||||
kv["laguna.rope.partial_rotary_factor"] = cmp.Or(p.PartialRotaryFactor, float32(1))
|
||||
|
||||
swaRopeType := p.SwaRopeParameters.ropeType()
|
||||
kv["laguna.rope.swa.freq_base"] = cmp.Or(p.SwaRopeParameters.RopeTheta, float32(10000))
|
||||
kv["laguna.rope.swa.scaling.type"] = cmp.Or(swaRopeType, "linear")
|
||||
kv["laguna.rope.swa.scaling.factor"] = cmp.Or(p.SwaRopeParameters.Factor, float32(1))
|
||||
kv["laguna.rope.swa.partial_rotary_factor"] = cmp.Or(p.SwaRopeParameters.PartialRotaryFactor, float32(1))
|
||||
|
||||
headDim := p.HeadDim
|
||||
if headDim == 0 && p.NumAttentionHeads > 0 {
|
||||
headDim = p.HiddenSize / p.NumAttentionHeads
|
||||
}
|
||||
kv["laguna.rope.dimension_count"] = lagunaRopeDim(headDim, cmp.Or(p.PartialRotaryFactor, float32(1)))
|
||||
kv["laguna.rope.swa.dimension_count"] = lagunaRopeDim(headDim, cmp.Or(p.SwaRopeParameters.PartialRotaryFactor, float32(1)))
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *lagunaModel) parseMore(_ iofs.FS) error {
|
||||
return p.validate()
|
||||
}
|
||||
|
||||
func (p *lagunaModel) validate() error {
|
||||
if p.NumHiddenLayers == 0 {
|
||||
return fmt.Errorf("laguna: num_hidden_layers must be set")
|
||||
}
|
||||
if p.HiddenSize == 0 {
|
||||
return fmt.Errorf("laguna: hidden_size must be set")
|
||||
}
|
||||
if p.HeadDim == 0 {
|
||||
return fmt.Errorf("laguna: head_dim must be set")
|
||||
}
|
||||
if p.NumKeyValueHeads == 0 {
|
||||
return fmt.Errorf("laguna: num_key_value_heads must be set")
|
||||
}
|
||||
if p.SwaAttentionSinkEnabled {
|
||||
return fmt.Errorf("laguna: unsupported swa_attention_sink_enabled=true")
|
||||
}
|
||||
if !p.Gating.perHead() {
|
||||
return fmt.Errorf("laguna: unsupported attention gating %q: only gating=\"per-head\" is supported", p.Gating)
|
||||
}
|
||||
if p.QKNormType != "rmsnorm" {
|
||||
return fmt.Errorf("laguna: unsupported qk_norm_type %q: only rmsnorm is supported", p.QKNormType)
|
||||
}
|
||||
if !p.MoERouterUseSigmoid {
|
||||
return fmt.Errorf("laguna: unsupported moe_router_use_sigmoid=false")
|
||||
}
|
||||
if p.MoEApplyRouterWeightOnInput {
|
||||
return fmt.Errorf("laguna: unsupported moe_apply_router_weight_on_input=true")
|
||||
}
|
||||
if p.DecoderSparseStep != 0 && p.DecoderSparseStep != 1 {
|
||||
return fmt.Errorf("laguna: unsupported decoder_sparse_step=%d: only 1 is supported", p.DecoderSparseStep)
|
||||
}
|
||||
if len(p.MLPOnlyLayers) != 1 || p.MLPOnlyLayers[0] != 0 {
|
||||
return fmt.Errorf("laguna: unsupported mlp_only_layers=%v: only [0] is supported", p.MLPOnlyLayers)
|
||||
}
|
||||
if p.NumExperts == 0 {
|
||||
return fmt.Errorf("laguna: num_experts must be set")
|
||||
}
|
||||
if p.NumExpertsPerTok == 0 {
|
||||
return fmt.Errorf("laguna: num_experts_per_tok must be set")
|
||||
}
|
||||
if p.MoEIntermediateSize == 0 {
|
||||
return fmt.Errorf("laguna: moe_intermediate_size must be set")
|
||||
}
|
||||
if p.SharedExpertIntermediateSize == 0 {
|
||||
return fmt.Errorf("laguna: shared_expert_intermediate_size must be set")
|
||||
}
|
||||
|
||||
if len(p.LayerTypes) > 0 && len(p.LayerTypes) != int(p.NumHiddenLayers) {
|
||||
return fmt.Errorf("laguna: layer_types has %d entries, expected %d", len(p.LayerTypes), p.NumHiddenLayers)
|
||||
}
|
||||
for i, layerType := range p.LayerTypes {
|
||||
if !lagunaLayerIsGlobal(layerType) && !lagunaLayerIsSliding(layerType) {
|
||||
return fmt.Errorf("laguna: unsupported layer_types[%d]=%q", i, layerType)
|
||||
}
|
||||
}
|
||||
if len(p.NumAttentionHeadsPerLayer) > 0 && len(p.NumAttentionHeadsPerLayer) != int(p.NumHiddenLayers) {
|
||||
return fmt.Errorf("laguna: num_attention_heads_per_layer has %d entries, expected %d", len(p.NumAttentionHeadsPerLayer), p.NumHiddenLayers)
|
||||
}
|
||||
if len(p.NumAttentionHeadsPerLayer) == 0 && p.NumAttentionHeads == 0 {
|
||||
return fmt.Errorf("laguna: num_attention_heads or num_attention_heads_per_layer must be set")
|
||||
}
|
||||
for i, heads := range p.NumAttentionHeadsPerLayer {
|
||||
if heads == 0 {
|
||||
return fmt.Errorf("laguna: num_attention_heads_per_layer[%d] must be non-zero", i)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *lagunaModel) numHeadsForLayer(layer uint32) uint32 {
|
||||
if len(p.NumAttentionHeadsPerLayer) > int(layer) && p.NumAttentionHeadsPerLayer[layer] > 0 {
|
||||
return p.NumAttentionHeadsPerLayer[layer]
|
||||
}
|
||||
return p.NumAttentionHeads
|
||||
}
|
||||
|
||||
func (p *lagunaModel) layerUsesMoE(layer uint32) bool {
|
||||
for _, denseLayer := range p.MLPOnlyLayers {
|
||||
if denseLayer == layer {
|
||||
return false
|
||||
}
|
||||
}
|
||||
step := cmp.Or(p.DecoderSparseStep, uint32(1))
|
||||
return p.NumExperts > 0 && (layer+1)%step == 0
|
||||
}
|
||||
|
||||
func (p *lagunaModel) Replacements() []string {
|
||||
return []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"self_attn.g_proj", "attn_g",
|
||||
"self_attn.q_norm", "attn_q_norm",
|
||||
"self_attn.k_norm", "attn_k_norm",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate.weight", "ffn_gate_inp.weight",
|
||||
"mlp.experts.e_score_correction_bias", "exp_probs_b.bias",
|
||||
"mlp.shared_expert.gate_proj", "ffn_gate_shexp",
|
||||
"mlp.shared_expert.up_proj", "ffn_up_shexp",
|
||||
"mlp.shared_expert.down_proj", "ffn_down_shexp",
|
||||
"mlp.experts.*.gate_proj", "ffn_gate_exps",
|
||||
"mlp.experts.*.up_proj", "ffn_up_exps",
|
||||
"mlp.experts.*.down_proj", "ffn_down_exps",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *lagunaModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
// Current Laguna drops store routed MoE experts as separate per-expert
|
||||
// tensors. GGUF stores each projection as one stacked tensor. If future
|
||||
// drops change expert naming or layout, update these patterns with a
|
||||
// focused conversion test using the new tensor names.
|
||||
merges := make([]merge, 0, p.NumHiddenLayers*3)
|
||||
for i := range p.NumHiddenLayers {
|
||||
merges = append(merges,
|
||||
merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.gate_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
|
||||
},
|
||||
merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.up_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
|
||||
},
|
||||
merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.down_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
out, rest := mergeTensors(ts, merges...)
|
||||
for _, t := range rest {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *lagunaModel) specialTokenTypes() []string {
|
||||
return []string{"bos", "eos", "pad", "unk"}
|
||||
}
|
||||
|
||||
func lagunaLayerIsSliding(layerType string) bool {
|
||||
return strings.EqualFold(layerType, "sliding_attention")
|
||||
}
|
||||
|
||||
func lagunaLayerIsGlobal(layerType string) bool {
|
||||
return strings.EqualFold(layerType, "full_attention") || strings.EqualFold(layerType, "global_attention")
|
||||
}
|
||||
|
||||
func lagunaLeadingDensePrefix(layers []uint32) (uint32, bool) {
|
||||
for i, v := range layers {
|
||||
if v != uint32(i) {
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
return uint32(len(layers)), true
|
||||
}
|
||||
|
||||
func lagunaDenseLayers(mlpOnlyLayers []uint32, mlpLayerTypes []string) ([]uint32, error) {
|
||||
if len(mlpOnlyLayers) > 0 {
|
||||
return mlpOnlyLayers, nil
|
||||
}
|
||||
if len(mlpLayerTypes) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
denseLayers := make([]uint32, 0, len(mlpLayerTypes))
|
||||
for i, layerType := range mlpLayerTypes {
|
||||
switch {
|
||||
case strings.EqualFold(layerType, "dense"):
|
||||
denseLayers = append(denseLayers, uint32(i))
|
||||
case strings.EqualFold(layerType, "sparse"):
|
||||
default:
|
||||
return nil, fmt.Errorf("laguna: unsupported mlp_layer_types[%d]=%q", i, layerType)
|
||||
}
|
||||
}
|
||||
return denseLayers, nil
|
||||
}
|
||||
|
||||
func lagunaMoeGatingFunc(useSigmoid bool) uint32 {
|
||||
if useSigmoid {
|
||||
return lagunaGatingFuncSigmoid
|
||||
}
|
||||
return lagunaGatingFuncSoftmax
|
||||
}
|
||||
|
||||
func lagunaAttentionFactor(ropeType string, scaleFactor, attentionFactor float32) float32 {
|
||||
if attentionFactor != 0 {
|
||||
return attentionFactor
|
||||
}
|
||||
if strings.EqualFold(ropeType, "yarn") && scaleFactor > 1 {
|
||||
return float32(0.1*math.Log(float64(scaleFactor)) + 1)
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
func lagunaRopeDim(headDim uint32, partialRotaryFactor float32) uint32 {
|
||||
if headDim == 0 {
|
||||
return 0
|
||||
}
|
||||
dim := uint32(float32(headDim) * partialRotaryFactor)
|
||||
if dim == 0 || dim > headDim {
|
||||
dim = headDim
|
||||
}
|
||||
if dim%2 != 0 {
|
||||
dim--
|
||||
}
|
||||
if dim == 0 {
|
||||
return headDim
|
||||
}
|
||||
return dim
|
||||
}
|
||||
|
||||
var (
|
||||
_ ModelConverter = (*lagunaModel)(nil)
|
||||
_ moreParser = (*lagunaModel)(nil)
|
||||
)
|
||||
450
convert/convert_laguna_test.go
Normal file
450
convert/convert_laguna_test.go
Normal file
@@ -0,0 +1,450 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type lagunaTestTensor struct {
|
||||
tensorBase
|
||||
}
|
||||
|
||||
func newLagunaTestTensor(name string, shape ...uint64) Tensor {
|
||||
return &lagunaTestTensor{tensorBase: tensorBase{name: name, shape: shape}}
|
||||
}
|
||||
|
||||
func (t *lagunaTestTensor) WriteTo(io.Writer) (int64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (t *lagunaTestTensor) Clone() Tensor {
|
||||
return &lagunaTestTensor{tensorBase: tensorBase{
|
||||
name: t.name,
|
||||
shape: append([]uint64(nil), t.shape...),
|
||||
}}
|
||||
}
|
||||
|
||||
func TestLagunaReplacements(t *testing.T) {
|
||||
p := lagunaModel{}
|
||||
r := strings.NewReplacer(p.Replacements()...)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{"embed", "model.embed_tokens.weight", "token_embd.weight"},
|
||||
{"final_norm", "model.norm.weight", "output_norm.weight"},
|
||||
{"lm_head", "lm_head.weight", "output.weight"},
|
||||
{"block prefix", "model.layers.7.input_layernorm.weight", "blk.7.attn_norm.weight"},
|
||||
{"q", "model.layers.3.self_attn.q_proj.weight", "blk.3.attn_q.weight"},
|
||||
{"k", "model.layers.3.self_attn.k_proj.weight", "blk.3.attn_k.weight"},
|
||||
{"v", "model.layers.3.self_attn.v_proj.weight", "blk.3.attn_v.weight"},
|
||||
{"o", "model.layers.3.self_attn.o_proj.weight", "blk.3.attn_output.weight"},
|
||||
{"g", "model.layers.3.self_attn.g_proj.weight", "blk.3.attn_g.weight"},
|
||||
{"q_norm", "model.layers.3.self_attn.q_norm.weight", "blk.3.attn_q_norm.weight"},
|
||||
{"k_norm", "model.layers.3.self_attn.k_norm.weight", "blk.3.attn_k_norm.weight"},
|
||||
{"post_attn_norm", "model.layers.3.post_attention_layernorm.weight", "blk.3.ffn_norm.weight"},
|
||||
{"dense gate", "model.layers.0.mlp.gate_proj.weight", "blk.0.ffn_gate.weight"},
|
||||
{"dense up", "model.layers.0.mlp.up_proj.weight", "blk.0.ffn_up.weight"},
|
||||
{"dense down", "model.layers.0.mlp.down_proj.weight", "blk.0.ffn_down.weight"},
|
||||
{"shexp gate", "model.layers.5.mlp.shared_expert.gate_proj.weight", "blk.5.ffn_gate_shexp.weight"},
|
||||
{"shexp up", "model.layers.5.mlp.shared_expert.up_proj.weight", "blk.5.ffn_up_shexp.weight"},
|
||||
{"shexp down", "model.layers.5.mlp.shared_expert.down_proj.weight", "blk.5.ffn_down_shexp.weight"},
|
||||
{"router", "model.layers.5.mlp.gate.weight", "blk.5.ffn_gate_inp.weight"},
|
||||
{"score bias", "model.layers.5.mlp.experts.e_score_correction_bias", "blk.5.exp_probs_b.bias"},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := r.Replace(tc.in); got != tc.want {
|
||||
t.Errorf("Replace(%q) = %q, want %q", tc.in, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLagunaValidateRejectsUnsupportedVariants(t *testing.T) {
|
||||
base := validLagunaTestModel()
|
||||
tests := []struct {
|
||||
name string
|
||||
edit func(*lagunaModel)
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "per-element gating",
|
||||
edit: func(m *lagunaModel) {
|
||||
m.Gating = "per-element"
|
||||
},
|
||||
want: "unsupported attention gating",
|
||||
},
|
||||
{
|
||||
name: "attention sinks",
|
||||
edit: func(m *lagunaModel) {
|
||||
m.SwaAttentionSinkEnabled = true
|
||||
},
|
||||
want: "swa_attention_sink_enabled=true",
|
||||
},
|
||||
{
|
||||
name: "qk norm disabled",
|
||||
edit: func(m *lagunaModel) {
|
||||
m.QKNormType = "none"
|
||||
},
|
||||
want: "unsupported qk_norm_type",
|
||||
},
|
||||
{
|
||||
name: "softmax moe",
|
||||
edit: func(m *lagunaModel) {
|
||||
m.MoERouterUseSigmoid = false
|
||||
},
|
||||
want: "moe_router_use_sigmoid=false",
|
||||
},
|
||||
{
|
||||
name: "router weight on input",
|
||||
edit: func(m *lagunaModel) {
|
||||
m.MoEApplyRouterWeightOnInput = true
|
||||
},
|
||||
want: "moe_apply_router_weight_on_input=true",
|
||||
},
|
||||
{
|
||||
name: "unknown layer type",
|
||||
edit: func(m *lagunaModel) {
|
||||
m.LayerTypes[1] = "local_attention"
|
||||
},
|
||||
want: "unsupported layer_types[1]",
|
||||
},
|
||||
{
|
||||
name: "nonstandard dense layout",
|
||||
edit: func(m *lagunaModel) {
|
||||
m.MLPOnlyLayers = []uint32{0, 3}
|
||||
},
|
||||
want: "unsupported mlp_only_layers",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
m := base
|
||||
m.LayerTypes = append([]string(nil), base.LayerTypes...)
|
||||
m.NumAttentionHeadsPerLayer = append([]uint32(nil), base.NumAttentionHeadsPerLayer...)
|
||||
m.MLPOnlyLayers = append([]uint32(nil), base.MLPOnlyLayers...)
|
||||
tc.edit(&m)
|
||||
err := m.validate()
|
||||
if err == nil || !strings.Contains(err.Error(), tc.want) {
|
||||
t.Fatalf("validate() error = %v, want substring %q", err, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLagunaGAConfigNormalizesBoolGatingAndNestedRope(t *testing.T) {
|
||||
var m lagunaModel
|
||||
if err := json.Unmarshal([]byte(`{
|
||||
"architectures": ["LagunaForCausalLM"],
|
||||
"num_hidden_layers": 1,
|
||||
"hidden_size": 8,
|
||||
"num_attention_heads": 2,
|
||||
"num_key_value_heads": 1,
|
||||
"head_dim": 4,
|
||||
"gating": true,
|
||||
"num_experts": 2,
|
||||
"num_experts_per_tok": 1,
|
||||
"moe_intermediate_size": 4,
|
||||
"shared_expert_intermediate_size": 4,
|
||||
"decoder_sparse_step": 1,
|
||||
"mlp_layer_types": ["dense"],
|
||||
"rope_parameters": {
|
||||
"full_attention": {
|
||||
"rope_theta": 500000,
|
||||
"rope_type": "yarn",
|
||||
"factor": 32,
|
||||
"original_max_position_embeddings": 4096,
|
||||
"beta_fast": 64,
|
||||
"beta_slow": 1,
|
||||
"attention_factor": 1,
|
||||
"partial_rotary_factor": 0.5
|
||||
},
|
||||
"sliding_attention": {
|
||||
"rope_theta": 10000,
|
||||
"rope_type": "default",
|
||||
"partial_rotary_factor": 1
|
||||
}
|
||||
}
|
||||
}`), &m); err != nil {
|
||||
t.Fatalf("json.Unmarshal() error = %v", err)
|
||||
}
|
||||
|
||||
if err := m.validate(); err != nil {
|
||||
t.Fatalf("validate() error = %v", err)
|
||||
}
|
||||
if m.Gating != "true" {
|
||||
t.Fatalf("Gating = %q, want raw true marker", m.Gating)
|
||||
}
|
||||
if !m.Gating.perHead() {
|
||||
t.Fatal("expected bool gating to normalize as per-head support")
|
||||
}
|
||||
if m.QKNormType != "rmsnorm" {
|
||||
t.Fatalf("QKNormType = %q, want rmsnorm default", m.QKNormType)
|
||||
}
|
||||
if !m.MoERouterUseSigmoid {
|
||||
t.Fatal("MoERouterUseSigmoid should default true")
|
||||
}
|
||||
if !m.NormTopKProb {
|
||||
t.Fatal("NormTopKProb should default true")
|
||||
}
|
||||
if diff := cmp.Diff(m.MLPOnlyLayers, []uint32{0}); diff != "" {
|
||||
t.Fatalf("MLPOnlyLayers mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
if m.RopeParameters.RopeTheta != 500000 || m.RopeParameters.PartialRotaryFactor != 0.5 {
|
||||
t.Fatalf("full rope = %#v, want theta=500000 partial=0.5", m.RopeParameters)
|
||||
}
|
||||
if m.SwaRopeParameters.RopeTheta != 10000 || m.SwaRopeParameters.PartialRotaryFactor != 1 {
|
||||
t.Fatalf("swa rope = %#v, want theta=10000 partial=1", m.SwaRopeParameters)
|
||||
}
|
||||
}
|
||||
|
||||
func validLagunaTestModel() lagunaModel {
|
||||
return lagunaModel{
|
||||
ModelParameters: ModelParameters{
|
||||
VocabSize: 32,
|
||||
},
|
||||
NumHiddenLayers: 2,
|
||||
HiddenSize: 8,
|
||||
IntermediateSize: 16,
|
||||
NumAttentionHeads: 2,
|
||||
NumKeyValueHeads: 1,
|
||||
HeadDim: 4,
|
||||
RMSNormEPS: 1e-6,
|
||||
MaxPositionEmbeddings: 4096,
|
||||
SlidingWindow: 512,
|
||||
Gating: "per-head",
|
||||
QKNormType: "rmsnorm",
|
||||
LayerTypes: []string{"global_attention", "sliding_attention"},
|
||||
NumAttentionHeadsPerLayer: []uint32{2, 2},
|
||||
NumExperts: 2,
|
||||
NumExpertsPerTok: 1,
|
||||
MoEIntermediateSize: 4,
|
||||
SharedExpertIntermediateSize: 4,
|
||||
NormTopKProb: true,
|
||||
MoeRoutedScalingFactor: 2.5,
|
||||
MoERouterUseSigmoid: true,
|
||||
DecoderSparseStep: 1,
|
||||
MLPOnlyLayers: []uint32{0},
|
||||
}
|
||||
}
|
||||
|
||||
func validLagunaTestTensors(m lagunaModel) []Tensor {
|
||||
ts := []Tensor{
|
||||
newLagunaTestTensor("token_embd.weight", uint64(m.VocabSize), uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor("output_norm.weight", uint64(m.HiddenSize)),
|
||||
}
|
||||
|
||||
for layer := range m.NumHiddenLayers {
|
||||
prefix := fmt.Sprintf("blk.%d", layer)
|
||||
heads := uint64(m.numHeadsForLayer(layer))
|
||||
attnWidth := heads * uint64(m.HeadDim)
|
||||
kvWidth := uint64(m.NumKeyValueHeads * m.HeadDim)
|
||||
ts = append(ts,
|
||||
newLagunaTestTensor(prefix+".attn_norm.weight", uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(prefix+".ffn_norm.weight", uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(prefix+".attn_q.weight", attnWidth, uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(prefix+".attn_k.weight", kvWidth, uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(prefix+".attn_v.weight", kvWidth, uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(prefix+".attn_output.weight", uint64(m.HiddenSize), attnWidth),
|
||||
newLagunaTestTensor(prefix+".attn_g.weight", heads, uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(prefix+".attn_q_norm.weight", uint64(m.HeadDim)),
|
||||
newLagunaTestTensor(prefix+".attn_k_norm.weight", uint64(m.HeadDim)),
|
||||
)
|
||||
|
||||
if m.layerUsesMoE(layer) {
|
||||
ts = append(ts,
|
||||
newLagunaTestTensor(prefix+".ffn_gate_inp.weight", uint64(m.NumExperts), uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(prefix+".exp_probs_b.bias", uint64(m.NumExperts)),
|
||||
newLagunaTestTensor(prefix+".ffn_gate_shexp.weight", uint64(m.SharedExpertIntermediateSize), uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(prefix+".ffn_up_shexp.weight", uint64(m.SharedExpertIntermediateSize), uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(prefix+".ffn_down_shexp.weight", uint64(m.HiddenSize), uint64(m.SharedExpertIntermediateSize)),
|
||||
)
|
||||
for expert := range m.NumExperts {
|
||||
ts = append(ts,
|
||||
newLagunaTestTensor(fmt.Sprintf("%s.mlp.experts.%d.gate_proj.weight", prefix, expert), uint64(m.MoEIntermediateSize), uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(fmt.Sprintf("%s.mlp.experts.%d.up_proj.weight", prefix, expert), uint64(m.MoEIntermediateSize), uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(fmt.Sprintf("%s.mlp.experts.%d.down_proj.weight", prefix, expert), uint64(m.HiddenSize), uint64(m.MoEIntermediateSize)),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
ts = append(ts,
|
||||
newLagunaTestTensor(prefix+".ffn_gate.weight", uint64(m.IntermediateSize), uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(prefix+".ffn_up.weight", uint64(m.IntermediateSize), uint64(m.HiddenSize)),
|
||||
newLagunaTestTensor(prefix+".ffn_down.weight", uint64(m.HiddenSize), uint64(m.IntermediateSize)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return ts
|
||||
}
|
||||
|
||||
func TestLagunaTensorsMergeRoutedExperts(t *testing.T) {
|
||||
m := validLagunaTestModel()
|
||||
out := m.Tensors(validLagunaTestTensors(m))
|
||||
|
||||
tensors := make(map[string]*ggml.Tensor, len(out))
|
||||
for _, t := range out {
|
||||
tensors[t.Name] = t
|
||||
}
|
||||
|
||||
tests := map[string][]uint64{
|
||||
"blk.1.ffn_gate_exps.weight": {uint64(m.NumExperts), uint64(m.MoEIntermediateSize), uint64(m.HiddenSize)},
|
||||
"blk.1.ffn_up_exps.weight": {uint64(m.NumExperts), uint64(m.MoEIntermediateSize), uint64(m.HiddenSize)},
|
||||
"blk.1.ffn_down_exps.weight": {uint64(m.NumExperts), uint64(m.HiddenSize), uint64(m.MoEIntermediateSize)},
|
||||
}
|
||||
for name, wantShape := range tests {
|
||||
tensor, ok := tensors[name]
|
||||
if !ok {
|
||||
t.Fatalf("missing merged tensor %q", name)
|
||||
}
|
||||
if diff := cmp.Diff(wantShape, tensor.Shape); diff != "" {
|
||||
t.Fatalf("%s shape mismatch (-want +got):\n%s", name, diff)
|
||||
}
|
||||
}
|
||||
|
||||
for expert := range m.NumExperts {
|
||||
name := fmt.Sprintf("blk.1.mlp.experts.%d.gate_proj.weight", expert)
|
||||
if _, ok := tensors[name]; ok {
|
||||
t.Fatalf("unexpected unmerged expert tensor %q", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLagunaKVShape(t *testing.T) {
|
||||
m := lagunaModel{
|
||||
NumHiddenLayers: 4,
|
||||
HiddenSize: 128,
|
||||
IntermediateSize: 256,
|
||||
NumAttentionHeads: 8,
|
||||
NumKeyValueHeads: 4,
|
||||
HeadDim: 16,
|
||||
RMSNormEPS: 1e-6,
|
||||
MaxPositionEmbeddings: 4096,
|
||||
SlidingWindow: 512,
|
||||
PartialRotaryFactor: 0.5,
|
||||
Gating: "per-head",
|
||||
QKNormType: "rmsnorm",
|
||||
LayerTypes: []string{"full_attention", "sliding_attention", "sliding_attention", "sliding_attention"},
|
||||
NumAttentionHeadsPerLayer: []uint32{8, 16, 16, 16},
|
||||
NumExperts: 32,
|
||||
NumExpertsPerTok: 4,
|
||||
MoEIntermediateSize: 64,
|
||||
SharedExpertIntermediateSize: 64,
|
||||
NormTopKProb: true,
|
||||
MoeRoutedScalingFactor: 2.5,
|
||||
MoERouterUseSigmoid: true,
|
||||
DecoderSparseStep: 1,
|
||||
MLPOnlyLayers: []uint32{0},
|
||||
}
|
||||
m.RopeParameters.RopeTheta = 500000
|
||||
m.RopeParameters.RopeType = "yarn"
|
||||
m.RopeParameters.Factor = 32
|
||||
m.RopeParameters.OriginalMaxPositionEmbeddings = 4096
|
||||
m.RopeParameters.BetaFast = 64
|
||||
m.RopeParameters.BetaSlow = 1
|
||||
m.SwaRopeParameters.RopeTheta = 10000
|
||||
m.SwaRopeParameters.RopeType = "linear"
|
||||
m.SwaRopeParameters.Factor = 1
|
||||
m.SwaRopeParameters.PartialRotaryFactor = 1
|
||||
|
||||
kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}, Template: "{% include 'chat_template.jinja' %}"})
|
||||
required := []string{
|
||||
"general.architecture",
|
||||
"tokenizer.ggml.pre",
|
||||
"laguna.block_count",
|
||||
"laguna.context_length",
|
||||
"laguna.embedding_length",
|
||||
"laguna.feed_forward_length",
|
||||
"laguna.attention.head_count",
|
||||
"laguna.attention.head_count_kv",
|
||||
"laguna.attention.key_length",
|
||||
"laguna.attention.value_length",
|
||||
"laguna.attention.layer_norm_rms_epsilon",
|
||||
"laguna.attention.sliding_window",
|
||||
"laguna.attention.layer_types",
|
||||
"laguna.attention.sliding_window_pattern",
|
||||
"laguna.attention.gating_type",
|
||||
"laguna.attention.qk_norm",
|
||||
"laguna.expert_count",
|
||||
"laguna.expert_used_count",
|
||||
"laguna.expert_feed_forward_length",
|
||||
"laguna.expert_shared_feed_forward_length",
|
||||
"laguna.expert_shared_count",
|
||||
"laguna.expert_weights_norm",
|
||||
"laguna.expert_weights_scale",
|
||||
"laguna.expert_gating_func",
|
||||
"laguna.leading_dense_block_count",
|
||||
"laguna.dense_layers",
|
||||
"laguna.rope.freq_base",
|
||||
"laguna.rope.scaling.type",
|
||||
"laguna.rope.scaling.factor",
|
||||
"laguna.rope.partial_rotary_factor",
|
||||
"laguna.rope.swa.freq_base",
|
||||
"laguna.rope.swa.scaling.type",
|
||||
"laguna.rope.dimension_count",
|
||||
"laguna.rope.swa.dimension_count",
|
||||
}
|
||||
for _, k := range required {
|
||||
if _, ok := kv[k]; !ok {
|
||||
t.Errorf("missing required KV: %s", k)
|
||||
}
|
||||
}
|
||||
|
||||
if got := kv["general.architecture"]; got != "laguna" {
|
||||
t.Errorf("architecture = %v, want laguna", got)
|
||||
}
|
||||
if got := kv["tokenizer.ggml.add_bos_token"]; got != false {
|
||||
t.Errorf("tokenizer.ggml.add_bos_token = %v, want false", got)
|
||||
}
|
||||
if _, ok := kv["tokenizer.chat_template"]; ok {
|
||||
t.Fatal("tokenizer.chat_template should be omitted for Laguna")
|
||||
}
|
||||
if got := kv["laguna.expert_gating_func"]; got != lagunaGatingFuncSigmoid {
|
||||
t.Errorf("expert_gating_func = %v, want sigmoid(%d)", got, lagunaGatingFuncSigmoid)
|
||||
}
|
||||
if got := kv["laguna.leading_dense_block_count"]; got != uint32(1) {
|
||||
t.Errorf("leading_dense_block_count = %v, want 1", got)
|
||||
}
|
||||
if got := kv["laguna.rope.dimension_count"]; got != uint32(8) {
|
||||
t.Errorf("rope.dimension_count = %v, want 8", got)
|
||||
}
|
||||
if got := kv["laguna.rope.swa.dimension_count"]; got != uint32(16) {
|
||||
t.Errorf("rope.swa.dimension_count = %v, want 16", got)
|
||||
}
|
||||
if got, ok := kv["laguna.attention.layer_types"].([]uint32); !ok || len(got) != 4 || got[0] != 0 || got[1] != 1 || got[2] != 1 || got[3] != 1 {
|
||||
t.Fatalf("layer_types = %#v, want [0 1 1 1]", kv["laguna.attention.layer_types"])
|
||||
}
|
||||
if got, ok := kv["laguna.attention.sliding_window_pattern"].([]bool); !ok || len(got) != 4 || got[0] || !got[1] || !got[2] || !got[3] {
|
||||
t.Fatalf("sliding_window_pattern = %#v, want [false true true true]", kv["laguna.attention.sliding_window_pattern"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestLagunaKVYarnAttentionFactorFallback(t *testing.T) {
|
||||
m := validLagunaTestModel()
|
||||
m.RopeParameters.RopeType = "yarn"
|
||||
m.RopeParameters.Factor = 32
|
||||
|
||||
kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}})
|
||||
got, ok := kv["laguna.rope.scaling.attn_factor"].(float32)
|
||||
if !ok {
|
||||
t.Fatalf("attn_factor type = %T, want float32", kv["laguna.rope.scaling.attn_factor"])
|
||||
}
|
||||
|
||||
want := float32(0.1*math.Log(32) + 1)
|
||||
if diff := math.Abs(float64(got - want)); diff > 1e-6 {
|
||||
t.Fatalf("attn_factor = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
235
convert/convert_lfm2.go
Normal file
235
convert/convert_lfm2.go
Normal file
@@ -0,0 +1,235 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type lfm2Model struct {
|
||||
ModelParameters
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
BlockFFDim uint32 `json:"block_ff_dim"`
|
||||
BlockMultipleOf uint32 `json:"block_multiple_of"`
|
||||
BlockAutoAdjustFFDim bool `json:"block_auto_adjust_ff_dim"`
|
||||
BlockFFNDimMultiplier float32 `json:"block_ffn_dim_multiplier"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
NormEps float32 `json:"norm_eps"`
|
||||
ConvLCache uint32 `json:"conv_L_cache"`
|
||||
MoEIntermediateSize uint32 `json:"moe_intermediate_size"`
|
||||
NumExperts uint32 `json:"num_experts"`
|
||||
NumLocalExperts uint32 `json:"num_local_experts"`
|
||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||
NumDenseLayers uint32 `json:"num_dense_layers"`
|
||||
RoutedScalingFactor float32 `json:"routed_scaling_factor"`
|
||||
LayerTypes []string `json:"layer_types"`
|
||||
TieEmbedding bool `json:"tie_embedding"`
|
||||
RopeParameters struct {
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
} `json:"rope_parameters"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*lfm2Model)(nil)
|
||||
|
||||
const (
|
||||
defaultMaxPositionEmbeddings = uint32(128_000)
|
||||
fallbackContextLength = uint32(32_768)
|
||||
)
|
||||
|
||||
func (p *lfm2Model) isMoE() bool {
|
||||
return p.ModelType == "lfm2_moe" || p.expertCount() > 0
|
||||
}
|
||||
|
||||
func (p *lfm2Model) ropeFreqBase() float32 {
|
||||
if p.RopeTheta != 0 {
|
||||
return p.RopeTheta
|
||||
}
|
||||
|
||||
return p.RopeParameters.RopeTheta
|
||||
}
|
||||
|
||||
func (p *lfm2Model) expertCount() uint32 {
|
||||
if p.NumLocalExperts > 0 {
|
||||
return p.NumLocalExperts
|
||||
}
|
||||
return p.NumExperts
|
||||
}
|
||||
|
||||
func (p *lfm2Model) feedForwardLength() uint32 {
|
||||
ff := p.IntermediateSize
|
||||
if p.BlockFFDim != 0 {
|
||||
ff = p.BlockFFDim
|
||||
}
|
||||
|
||||
if !p.BlockAutoAdjustFFDim || p.BlockMultipleOf == 0 {
|
||||
return ff
|
||||
}
|
||||
|
||||
ff = (2 * ff) / 3
|
||||
|
||||
// Keep default multiplier behavior consistent with llama.cpp conversion.
|
||||
if p.BlockFFNDimMultiplier != 0 {
|
||||
ff = uint32(float32(ff) * p.BlockFFNDimMultiplier)
|
||||
}
|
||||
|
||||
m := p.BlockMultipleOf
|
||||
return m * ((ff + m - 1) / m)
|
||||
}
|
||||
|
||||
func (p *lfm2Model) hasKnownContextLengthFallbackSignature() bool {
|
||||
return p.isMoE() &&
|
||||
p.VocabSize == 65536 &&
|
||||
p.HiddenSize == 2048 &&
|
||||
p.NumHiddenLayers == 40 &&
|
||||
p.IntermediateSize == 11776 &&
|
||||
p.NumAttentionHeads == 32 &&
|
||||
p.NumKeyValueHeads == 8 &&
|
||||
p.NumDenseLayers == 2 &&
|
||||
p.expertCount() == 64 &&
|
||||
p.NumExpertsPerToken == 4 &&
|
||||
p.MoEIntermediateSize == 1536
|
||||
}
|
||||
|
||||
func (p *lfm2Model) contextLength() uint32 {
|
||||
if p.MaxPositionEmbeddings == defaultMaxPositionEmbeddings && p.hasKnownContextLengthFallbackSignature() {
|
||||
return fallbackContextLength
|
||||
}
|
||||
|
||||
return p.MaxPositionEmbeddings
|
||||
}
|
||||
|
||||
func (p *lfm2Model) KV(t *Tokenizer) KV {
|
||||
architecture := "lfm2"
|
||||
if p.isMoE() {
|
||||
architecture = "lfm2moe"
|
||||
}
|
||||
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = architecture
|
||||
kv["tokenizer.ggml.pre"] = "lfm2"
|
||||
kv["vocab_size"] = p.VocabSize
|
||||
kv["block_count"] = p.NumHiddenLayers
|
||||
kv["embedding_length"] = p.HiddenSize
|
||||
kv["feed_forward_length"] = p.feedForwardLength()
|
||||
kv["context_length"] = p.contextLength()
|
||||
|
||||
// Build per-layer KV head count array based on layer_types
|
||||
// (0 = shortconv layer, non-zero = attention layer with that many KV heads).
|
||||
//
|
||||
// Dense LFM2 in HF defaults to all attention layers when layer_types is absent.
|
||||
// Preserve that behavior to avoid accidentally emitting all-conv metadata.
|
||||
kvHeadCounts := make([]uint32, p.NumHiddenLayers)
|
||||
if len(p.LayerTypes) == 0 {
|
||||
for i := range p.NumHiddenLayers {
|
||||
kvHeadCounts[i] = p.NumKeyValueHeads
|
||||
}
|
||||
} else {
|
||||
for i := range p.NumHiddenLayers {
|
||||
if int(i) < len(p.LayerTypes) && p.LayerTypes[i] == "full_attention" {
|
||||
kvHeadCounts[i] = p.NumKeyValueHeads
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kv["attention.head_count"] = p.NumAttentionHeads
|
||||
kv["attention.head_count_kv"] = kvHeadCounts
|
||||
kv["attention.key_length"] = p.HiddenSize / p.NumAttentionHeads
|
||||
kv["attention.value_length"] = p.HiddenSize / p.NumAttentionHeads
|
||||
kv["attention.layer_norm_rms_epsilon"] = p.NormEps
|
||||
kv["shortconv.l_cache"] = p.ConvLCache
|
||||
|
||||
if ropeFreqBase := p.ropeFreqBase(); ropeFreqBase != 0 {
|
||||
kv["rope.freq_base"] = ropeFreqBase
|
||||
}
|
||||
|
||||
if p.isMoE() {
|
||||
kv["expert_count"] = p.expertCount()
|
||||
kv["expert_used_count"] = p.NumExpertsPerToken
|
||||
kv["expert_feed_forward_length"] = p.MoEIntermediateSize
|
||||
kv["leading_dense_block_count"] = p.NumDenseLayers
|
||||
kv["expert_gating_func"] = uint32(2) // sigmoid
|
||||
kv["expert_weights_scale"] = cmp.Or(p.RoutedScalingFactor, float32(1.0))
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *lfm2Model) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
|
||||
if p.isMoE() {
|
||||
merges := make([]merge, 0, p.NumHiddenLayers*3)
|
||||
for i := range p.NumHiddenLayers {
|
||||
if i < p.NumDenseLayers {
|
||||
continue
|
||||
}
|
||||
|
||||
merges = append(merges, merge{
|
||||
fmt.Sprintf("blk.%d.feed_forward.experts.*.w1.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
|
||||
}, merge{
|
||||
fmt.Sprintf("blk.%d.feed_forward.experts.*.w2.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
|
||||
}, merge{
|
||||
fmt.Sprintf("blk.%d.feed_forward.experts.*.w3.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
|
||||
})
|
||||
}
|
||||
|
||||
merged, remaining := mergeTensors(ts, merges...)
|
||||
out = append(out, merged...)
|
||||
ts = remaining
|
||||
}
|
||||
|
||||
for _, t := range ts {
|
||||
shape := t.Shape()
|
||||
|
||||
// Squeeze conv weights: [D, 1, K] -> [D, K]
|
||||
if strings.HasSuffix(t.Name(), "shortconv.conv.weight") {
|
||||
if len(shape) == 3 && shape[1] == 1 {
|
||||
shape = []uint64{shape[0], shape[2]}
|
||||
}
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: slices.Clone(shape),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *lfm2Model) Replacements() []string {
|
||||
return []string{
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.embedding_norm", "token_embd_norm",
|
||||
"model.layers", "blk",
|
||||
"operator_norm", "attn_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.out_proj", "attn_output",
|
||||
"self_attn.q_layernorm", "attn_q_norm",
|
||||
"self_attn.k_layernorm", "attn_k_norm",
|
||||
"conv.conv", "shortconv.conv",
|
||||
"conv.in_proj", "shortconv.in_proj",
|
||||
"conv.out_proj", "shortconv.out_proj",
|
||||
"feed_forward.gate", "ffn_gate_inp",
|
||||
"feed_forward.expert_bias", "exp_probs_b.bias",
|
||||
"feed_forward.w1", "ffn_gate",
|
||||
"feed_forward.w2", "ffn_down",
|
||||
"feed_forward.w3", "ffn_up",
|
||||
"ffn_norm", "ffn_norm",
|
||||
}
|
||||
}
|
||||
271
convert/convert_lfm2_test.go
Normal file
271
convert/convert_lfm2_test.go
Normal file
@@ -0,0 +1,271 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"io"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type lfm2StubTensor struct {
|
||||
tensorBase
|
||||
}
|
||||
|
||||
func newLFM2StubTensor(name string, shape []uint64) *lfm2StubTensor {
|
||||
return &lfm2StubTensor{
|
||||
tensorBase: tensorBase{
|
||||
name: name,
|
||||
shape: shape,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (t *lfm2StubTensor) WriteTo(io.Writer) (int64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (t *lfm2StubTensor) Clone() Tensor {
|
||||
return &lfm2StubTensor{
|
||||
tensorBase: tensorBase{
|
||||
name: t.name,
|
||||
shape: slices.Clone(t.shape),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestLFM2MoEKV(t *testing.T) {
|
||||
var p lfm2Model
|
||||
p.ModelParameters.ModelType = "lfm2_moe"
|
||||
p.VocabSize = 65536
|
||||
p.HiddenSize = 2048
|
||||
p.NumHiddenLayers = 4
|
||||
p.MaxPositionEmbeddings = 128000
|
||||
p.IntermediateSize = 11776
|
||||
p.NumAttentionHeads = 32
|
||||
p.NumKeyValueHeads = 8
|
||||
p.LayerTypes = []string{"conv", "full_attention", "conv", "full_attention"}
|
||||
p.NormEps = 1e-5
|
||||
p.ConvLCache = 3
|
||||
p.MoEIntermediateSize = 1536
|
||||
p.NumExperts = 64
|
||||
p.NumExpertsPerToken = 4
|
||||
p.NumDenseLayers = 2
|
||||
p.RopeParameters.RopeTheta = 1_000_000
|
||||
|
||||
kv := p.KV(&Tokenizer{Vocabulary: &Vocabulary{Model: "gpt2"}})
|
||||
|
||||
if got, want := kv["general.architecture"], "lfm2moe"; got != want {
|
||||
t.Fatalf("general.architecture = %v, want %v", got, want)
|
||||
}
|
||||
if got, want := kv["tokenizer.ggml.pre"], "lfm2"; got != want {
|
||||
t.Fatalf("tokenizer.ggml.pre = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if got, want := kv["expert_count"], uint32(64); got != want {
|
||||
t.Fatalf("expert_count = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if got, want := kv["expert_used_count"], uint32(4); got != want {
|
||||
t.Fatalf("expert_used_count = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if got, want := kv["expert_feed_forward_length"], uint32(1536); got != want {
|
||||
t.Fatalf("expert_feed_forward_length = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if got, want := kv["leading_dense_block_count"], uint32(2); got != want {
|
||||
t.Fatalf("leading_dense_block_count = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if got, want := kv["expert_gating_func"], uint32(2); got != want {
|
||||
t.Fatalf("expert_gating_func = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
gotHeadCounts, ok := kv["attention.head_count_kv"].([]uint32)
|
||||
if !ok {
|
||||
t.Fatalf("attention.head_count_kv has unexpected type %T", kv["attention.head_count_kv"])
|
||||
}
|
||||
|
||||
wantHeadCounts := []uint32{0, 8, 0, 8}
|
||||
if !slices.Equal(gotHeadCounts, wantHeadCounts) {
|
||||
t.Fatalf("attention.head_count_kv = %v, want %v", gotHeadCounts, wantHeadCounts)
|
||||
}
|
||||
|
||||
if got, want := kv["rope.freq_base"], float32(1_000_000); got != want {
|
||||
t.Fatalf("rope.freq_base = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLFM2DenseKV(t *testing.T) {
|
||||
p := lfm2Model{
|
||||
ModelParameters: ModelParameters{ModelType: "lfm2", VocabSize: 32000},
|
||||
HiddenSize: 1024,
|
||||
NumHiddenLayers: 2,
|
||||
MaxPositionEmbeddings: 32768,
|
||||
IntermediateSize: 4096,
|
||||
NumAttentionHeads: 16,
|
||||
NumKeyValueHeads: 4,
|
||||
LayerTypes: []string{"conv", "full_attention"},
|
||||
NormEps: 1e-5,
|
||||
ConvLCache: 3,
|
||||
RopeTheta: 10000,
|
||||
}
|
||||
|
||||
kv := p.KV(&Tokenizer{Vocabulary: &Vocabulary{Model: "gpt2"}})
|
||||
|
||||
if got, want := kv["general.architecture"], "lfm2"; got != want {
|
||||
t.Fatalf("general.architecture = %v, want %v", got, want)
|
||||
}
|
||||
if got, want := kv["tokenizer.ggml.pre"], "lfm2"; got != want {
|
||||
t.Fatalf("tokenizer.ggml.pre = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if _, ok := kv["expert_count"]; ok {
|
||||
t.Fatalf("expert_count should not be set for dense lfm2")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLFM2MoETensors(t *testing.T) {
|
||||
p := lfm2Model{
|
||||
ModelParameters: ModelParameters{ModelType: "lfm2_moe"},
|
||||
NumHiddenLayers: 4,
|
||||
NumDenseLayers: 2,
|
||||
}
|
||||
|
||||
in := []Tensor{
|
||||
newLFM2StubTensor("blk.2.feed_forward.experts.0.w1.weight", []uint64{1536, 2048}),
|
||||
newLFM2StubTensor("blk.2.feed_forward.experts.1.w1.weight", []uint64{1536, 2048}),
|
||||
newLFM2StubTensor("blk.2.feed_forward.experts.0.w2.weight", []uint64{2048, 1536}),
|
||||
newLFM2StubTensor("blk.2.feed_forward.experts.1.w2.weight", []uint64{2048, 1536}),
|
||||
newLFM2StubTensor("blk.2.feed_forward.experts.0.w3.weight", []uint64{1536, 2048}),
|
||||
newLFM2StubTensor("blk.2.feed_forward.experts.1.w3.weight", []uint64{1536, 2048}),
|
||||
newLFM2StubTensor("blk.0.shortconv.conv.weight", []uint64{2048, 1, 3}),
|
||||
}
|
||||
|
||||
out := p.Tensors(in)
|
||||
|
||||
byName := make(map[string][]uint64, len(out))
|
||||
for _, tns := range out {
|
||||
byName[tns.Name] = tns.Shape
|
||||
}
|
||||
|
||||
if got, ok := byName["blk.2.ffn_gate_exps.weight"]; !ok {
|
||||
t.Fatalf("missing merged tensor blk.2.ffn_gate_exps.weight")
|
||||
} else if !slices.Equal(got, []uint64{2, 1536, 2048}) {
|
||||
t.Fatalf("blk.2.ffn_gate_exps.weight shape = %v, want [2 1536 2048]", got)
|
||||
}
|
||||
|
||||
if got, ok := byName["blk.2.ffn_down_exps.weight"]; !ok {
|
||||
t.Fatalf("missing merged tensor blk.2.ffn_down_exps.weight")
|
||||
} else if !slices.Equal(got, []uint64{2, 2048, 1536}) {
|
||||
t.Fatalf("blk.2.ffn_down_exps.weight shape = %v, want [2 2048 1536]", got)
|
||||
}
|
||||
|
||||
if got, ok := byName["blk.2.ffn_up_exps.weight"]; !ok {
|
||||
t.Fatalf("missing merged tensor blk.2.ffn_up_exps.weight")
|
||||
} else if !slices.Equal(got, []uint64{2, 1536, 2048}) {
|
||||
t.Fatalf("blk.2.ffn_up_exps.weight shape = %v, want [2 1536 2048]", got)
|
||||
}
|
||||
|
||||
if got, ok := byName["blk.0.shortconv.conv.weight"]; !ok {
|
||||
t.Fatalf("missing shortconv tensor")
|
||||
} else if !slices.Equal(got, []uint64{2048, 3}) {
|
||||
t.Fatalf("blk.0.shortconv.conv.weight shape = %v, want [2048 3]", got)
|
||||
}
|
||||
|
||||
if _, ok := byName["blk.2.feed_forward.experts.0.w1.weight"]; ok {
|
||||
t.Fatalf("unmerged expert tensor should not be present")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLFM2MoEReplacements(t *testing.T) {
|
||||
p := lfm2Model{}
|
||||
replacer := strings.NewReplacer(p.Replacements()...)
|
||||
|
||||
if got, want := replacer.Replace("model.layers.2.feed_forward.expert_bias"), "blk.2.exp_probs_b.bias"; got != want {
|
||||
t.Fatalf("expert bias replacement = %q, want %q", got, want)
|
||||
}
|
||||
|
||||
if got, want := replacer.Replace("model.layers.2.feed_forward.gate.weight"), "blk.2.ffn_gate_inp.weight"; got != want {
|
||||
t.Fatalf("gate replacement = %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLFM2KVContextLengthEdgeCaseFallbackOverride(t *testing.T) {
|
||||
p := lfm2Model{
|
||||
ModelParameters: ModelParameters{ModelType: "lfm2_moe", VocabSize: 65536},
|
||||
HiddenSize: 2048,
|
||||
NumHiddenLayers: 40,
|
||||
MaxPositionEmbeddings: 128000,
|
||||
IntermediateSize: 11776,
|
||||
NumAttentionHeads: 32,
|
||||
NumKeyValueHeads: 8,
|
||||
LayerTypes: make([]string, 40),
|
||||
NormEps: 1e-5,
|
||||
ConvLCache: 3,
|
||||
MoEIntermediateSize: 1536,
|
||||
NumExperts: 64,
|
||||
NumExpertsPerToken: 4,
|
||||
NumDenseLayers: 2,
|
||||
}
|
||||
for i := 0; i < len(p.LayerTypes); i++ {
|
||||
p.LayerTypes[i] = "conv"
|
||||
}
|
||||
p.LayerTypes[2] = "full_attention"
|
||||
|
||||
kv := p.KV(&Tokenizer{Vocabulary: &Vocabulary{Model: "gpt2"}})
|
||||
|
||||
if got, want := kv["context_length"], uint32(32768); got != want {
|
||||
t.Fatalf("context_length = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLFM2KVContextLengthNoOverride(t *testing.T) {
|
||||
p := lfm2Model{
|
||||
ModelParameters: ModelParameters{ModelType: "lfm2_moe", VocabSize: 65536},
|
||||
HiddenSize: 2048,
|
||||
NumHiddenLayers: 39, // mismatch: should not trigger edge case
|
||||
MaxPositionEmbeddings: 128000,
|
||||
IntermediateSize: 11776,
|
||||
NumAttentionHeads: 32,
|
||||
NumKeyValueHeads: 8,
|
||||
LayerTypes: []string{"conv", "full_attention"},
|
||||
NormEps: 1e-5,
|
||||
ConvLCache: 3,
|
||||
MoEIntermediateSize: 1536,
|
||||
NumExperts: 64,
|
||||
NumExpertsPerToken: 4,
|
||||
NumDenseLayers: 2,
|
||||
}
|
||||
|
||||
kv := p.KV(&Tokenizer{Vocabulary: &Vocabulary{Model: "gpt2"}})
|
||||
|
||||
if got, want := kv["context_length"], uint32(128000); got != want {
|
||||
t.Fatalf("context_length = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLFM2KVFeedForwardLengthAutoAdjust(t *testing.T) {
|
||||
p := lfm2Model{
|
||||
ModelParameters: ModelParameters{ModelType: "lfm2", VocabSize: 65536},
|
||||
HiddenSize: 2048,
|
||||
NumHiddenLayers: 16,
|
||||
MaxPositionEmbeddings: 128000,
|
||||
IntermediateSize: 12288, // should be ignored when block_ff_dim is set
|
||||
BlockFFDim: 12288,
|
||||
BlockAutoAdjustFFDim: true,
|
||||
BlockMultipleOf: 256,
|
||||
BlockFFNDimMultiplier: 1.0,
|
||||
NumAttentionHeads: 32,
|
||||
NumKeyValueHeads: 8,
|
||||
LayerTypes: []string{"conv", "full_attention"},
|
||||
NormEps: 1e-5,
|
||||
ConvLCache: 3,
|
||||
}
|
||||
|
||||
kv := p.KV(&Tokenizer{Vocabulary: &Vocabulary{Model: "gpt2"}})
|
||||
|
||||
if got, want := kv["feed_forward_length"], uint32(8192); got != want {
|
||||
t.Fatalf("feed_forward_length = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
417
convert/convert_lfm2_vl.go
Normal file
417
convert/convert_lfm2_vl.go
Normal file
@@ -0,0 +1,417 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
// lfm2VLTextModel converts the language model component of LFM2 VL checkpoints.
|
||||
type lfm2VLTextModel struct {
|
||||
TextConfig lfm2Model `json:"text_config"`
|
||||
DoImageSplitting *bool `json:"do_image_splitting"`
|
||||
DownsampleFactor uint32 `json:"downsample_factor"`
|
||||
EncoderPatchSize uint32 `json:"encoder_patch_size"`
|
||||
ImageTokenID uint32 `json:"image_token_id"`
|
||||
MaxImageTokens uint32 `json:"max_image_tokens"`
|
||||
MinImageTokens uint32 `json:"min_image_tokens"`
|
||||
MaxTiles uint32 `json:"max_tiles"`
|
||||
MinTiles uint32 `json:"min_tiles"`
|
||||
TileSize uint32 `json:"tile_size"`
|
||||
MaxPixelsTolerance float32 `json:"max_pixels_tolerance"`
|
||||
ProjectorUseLayernorm bool `json:"projector_use_layernorm"`
|
||||
ProjectorHiddenSize uint32 `json:"projector_hidden_size"`
|
||||
ProjectorHiddenAct string `json:"projector_hidden_act"`
|
||||
UseImageSpecialTokens *bool `json:"use_image_special_tokens"`
|
||||
UseThumbnail *bool `json:"use_thumbnail"`
|
||||
VisionConfig struct {
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NumChannels uint32 `json:"num_channels"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
LayerNormEpsilon float32 `json:"layer_norm_eps"`
|
||||
} `json:"vision_config"`
|
||||
Processor struct {
|
||||
ImageProcessor struct {
|
||||
DoImageSplitting *bool `json:"do_image_splitting"`
|
||||
DownsampleFactor uint32 `json:"downsample_factor"`
|
||||
MaxImageTokens uint32 `json:"max_image_tokens"`
|
||||
MinImageTokens uint32 `json:"min_image_tokens"`
|
||||
MaxTiles uint32 `json:"max_tiles"`
|
||||
MinTiles uint32 `json:"min_tiles"`
|
||||
MaxPixelsTol float32 `json:"max_pixels_tolerance"`
|
||||
TileSize uint32 `json:"tile_size"`
|
||||
UseThumbnail *bool `json:"use_thumbnail"`
|
||||
ImageMean []float32 `json:"image_mean"`
|
||||
ImageStd []float32 `json:"image_std"`
|
||||
Size struct {
|
||||
Height uint32 `json:"height"`
|
||||
Width uint32 `json:"width"`
|
||||
} `json:"size"`
|
||||
} `json:"image_processor"`
|
||||
}
|
||||
}
|
||||
|
||||
func (p *lfm2VLTextModel) textModel() *lfm2Model {
|
||||
return &p.TextConfig
|
||||
}
|
||||
|
||||
func (p *lfm2VLTextModel) specialTokenTypes() []string {
|
||||
return p.textModel().specialTokenTypes()
|
||||
}
|
||||
|
||||
func (p *lfm2VLTextModel) parseMore(fsys fs.FS) error {
|
||||
bts, err := fs.ReadFile(fsys, "processor_config.json")
|
||||
if err != nil {
|
||||
if errors.Is(err, fs.ErrNotExist) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
return json.Unmarshal(bts, &p.Processor)
|
||||
}
|
||||
|
||||
func (p *lfm2VLTextModel) visionImageSize() uint32 {
|
||||
// LFM2-VL image processor operates on 512 tiles and downsamples by factor 2
|
||||
// before projection. Keep a fixed square image size compatible with position
|
||||
// embeddings and the simplified runtime image pipeline.
|
||||
tile := cmp.Or(
|
||||
p.Processor.ImageProcessor.TileSize,
|
||||
p.Processor.ImageProcessor.Size.Height,
|
||||
p.Processor.ImageProcessor.Size.Width,
|
||||
uint32(512),
|
||||
)
|
||||
downsample := cmp.Or(p.DownsampleFactor, p.Processor.ImageProcessor.DownsampleFactor, uint32(2))
|
||||
if downsample == 0 {
|
||||
return tile
|
||||
}
|
||||
|
||||
return max(uint32(1), tile/downsample)
|
||||
}
|
||||
|
||||
func (p *lfm2VLTextModel) KV(t *Tokenizer) KV {
|
||||
kv := p.textModel().KV(t)
|
||||
|
||||
boolOr := func(defaultValue bool, values ...*bool) bool {
|
||||
for _, v := range values {
|
||||
if v != nil {
|
||||
return *v
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
kv["vision.block_count"] = cmp.Or(p.VisionConfig.NumHiddenLayers, uint32(27))
|
||||
kv["vision.embedding_length"] = cmp.Or(p.VisionConfig.HiddenSize, uint32(1152))
|
||||
kv["vision.feed_forward_length"] = cmp.Or(p.VisionConfig.IntermediateSize, uint32(4304))
|
||||
kv["vision.attention.head_count"] = cmp.Or(p.VisionConfig.NumAttentionHeads, uint32(16))
|
||||
kv["vision.attention.layer_norm_epsilon"] = cmp.Or(p.VisionConfig.LayerNormEpsilon, float32(1e-6))
|
||||
kv["vision.patch_size"] = cmp.Or(p.VisionConfig.PatchSize, p.EncoderPatchSize, uint32(16))
|
||||
kv["vision.num_channels"] = cmp.Or(p.VisionConfig.NumChannels, uint32(3))
|
||||
kv["vision.image_size"] = p.visionImageSize()
|
||||
kv["vision.projector.scale_factor"] = cmp.Or(p.DownsampleFactor, p.Processor.ImageProcessor.DownsampleFactor, uint32(2))
|
||||
kv["vision.projector.use_layernorm"] = p.ProjectorUseLayernorm
|
||||
kv["vision.do_image_splitting"] = boolOr(true, p.DoImageSplitting, p.Processor.ImageProcessor.DoImageSplitting)
|
||||
kv["vision.min_tiles"] = cmp.Or(p.MinTiles, p.Processor.ImageProcessor.MinTiles, uint32(2))
|
||||
kv["vision.max_tiles"] = cmp.Or(p.MaxTiles, p.Processor.ImageProcessor.MaxTiles, uint32(10))
|
||||
kv["vision.tile_size"] = cmp.Or(p.TileSize, p.Processor.ImageProcessor.TileSize, uint32(512))
|
||||
kv["vision.min_image_tokens"] = cmp.Or(p.MinImageTokens, p.Processor.ImageProcessor.MinImageTokens, uint32(64))
|
||||
kv["vision.max_image_tokens"] = cmp.Or(p.MaxImageTokens, p.Processor.ImageProcessor.MaxImageTokens, uint32(256))
|
||||
kv["vision.max_pixels_tolerance"] = cmp.Or(p.MaxPixelsTolerance, p.Processor.ImageProcessor.MaxPixelsTol, float32(2.0))
|
||||
kv["vision.use_thumbnail"] = boolOr(true, p.UseThumbnail, p.Processor.ImageProcessor.UseThumbnail)
|
||||
kv["vision.use_image_special_tokens"] = boolOr(true, p.UseImageSpecialTokens)
|
||||
kv["vision.image_mean"] = slices.Clone(defaultFloat32Slice(p.Processor.ImageProcessor.ImageMean, []float32{0.5, 0.5, 0.5}))
|
||||
kv["vision.image_std"] = slices.Clone(defaultFloat32Slice(p.Processor.ImageProcessor.ImageStd, []float32{0.5, 0.5, 0.5}))
|
||||
kv["vision.image_token_id"] = cmp.Or(p.ImageTokenID, uint32(396))
|
||||
|
||||
setVisionTokenID := func(k, token string) {
|
||||
if t == nil || t.Vocabulary == nil {
|
||||
return
|
||||
}
|
||||
for i, v := range t.Vocabulary.Tokens {
|
||||
if v == token {
|
||||
kv[k] = uint32(i)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
setVisionTokenID("vision.image_start_token_id", "<|image_start|>")
|
||||
setVisionTokenID("vision.image_end_token_id", "<|image_end|>")
|
||||
setVisionTokenID("vision.image_thumbnail_token_id", "<|img_thumbnail|>")
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *lfm2VLTextModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
patchSize := int(cmp.Or(p.VisionConfig.PatchSize, p.EncoderPatchSize, uint32(16)))
|
||||
numChannels := int(cmp.Or(p.VisionConfig.NumChannels, uint32(3)))
|
||||
|
||||
for _, t := range ts {
|
||||
if t.Name() == "v.patch_embd.weight" {
|
||||
shape := t.Shape()
|
||||
if len(shape) == 2 {
|
||||
inputDim := uint64(numChannels * patchSize * patchSize)
|
||||
if shape[1] == inputDim {
|
||||
channels := numChannels
|
||||
patch := patchSize
|
||||
t.SetRepacker(func(_ string, data []float32, srcShape []uint64) ([]float32, error) {
|
||||
return repackPatchEmbeddingWeight(data, srcShape, channels, patch)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out := p.textModel().Tensors(ts)
|
||||
for _, t := range out {
|
||||
if t.Name == "v.patch_embd.weight" && len(t.Shape) == 2 {
|
||||
t.Shape = []uint64{t.Shape[0], uint64(numChannels), uint64(patchSize), uint64(patchSize)}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *lfm2VLTextModel) Replacements() []string {
|
||||
out := make([]string, 0, 96)
|
||||
|
||||
addText := func(from, to string) {
|
||||
out = append(out, from, to)
|
||||
if strings.HasPrefix(from, "model.") {
|
||||
suffix := strings.TrimPrefix(from, "model.")
|
||||
out = append(out,
|
||||
"model.language_model."+suffix, to,
|
||||
"model.language_model.model."+suffix, to,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
base := p.textModel().Replacements()
|
||||
for i := 0; i+1 < len(base); i += 2 {
|
||||
addText(base[i], base[i+1])
|
||||
}
|
||||
|
||||
// Vision tower + multimodal projector tensors (single-file conversion).
|
||||
out = append(out,
|
||||
"model.vision_tower.vision_model.embeddings.patch_embedding", "v.patch_embd",
|
||||
"model.vision_tower.vision_model.embeddings.position_embedding", "v.position_embd",
|
||||
"model.vision_tower.vision_model.encoder.layers", "v.blk",
|
||||
"model.vision_tower.vision_model.post_layernorm", "v.post_ln",
|
||||
"model.multi_modal_projector.layer_norm", "mm.layer_norm",
|
||||
"model.multi_modal_projector.linear_1", "mm.1",
|
||||
"model.multi_modal_projector.linear_2", "mm.2",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.out_proj", "attn_out",
|
||||
"layer_norm1", "ln1",
|
||||
"layer_norm2", "ln2",
|
||||
"mlp.fc1", "ffn_up",
|
||||
"mlp.fc2", "ffn_down",
|
||||
)
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// lfm2VLProjectorModel converts the vision encoder + projector component of LFM2 VL checkpoints.
|
||||
type lfm2VLProjectorModel struct {
|
||||
ModelParameters
|
||||
DownsampleFactor uint32 `json:"downsample_factor"`
|
||||
ProjectorHiddenDim uint32 `json:"projector_hidden_size"`
|
||||
VisionModel struct {
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NumChannels uint32 `json:"num_channels"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
LayerNormEpsilon float32 `json:"layer_norm_eps"`
|
||||
ImageSize uint32 `json:"image_size"`
|
||||
} `json:"vision_config"`
|
||||
Processor struct {
|
||||
ImageProcessor struct {
|
||||
DownsampleFactor uint32 `json:"downsample_factor"`
|
||||
TileSize uint32 `json:"tile_size"`
|
||||
ImageMean []float32 `json:"image_mean"`
|
||||
ImageStd []float32 `json:"image_std"`
|
||||
Size struct {
|
||||
Height uint32 `json:"height"`
|
||||
Width uint32 `json:"width"`
|
||||
} `json:"size"`
|
||||
} `json:"image_processor"`
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
_ ModelConverter = (*lfm2VLTextModel)(nil)
|
||||
_ ModelConverter = (*lfm2VLProjectorModel)(nil)
|
||||
_ moreParser = (*lfm2VLTextModel)(nil)
|
||||
_ moreParser = (*lfm2VLProjectorModel)(nil)
|
||||
)
|
||||
|
||||
func (p *lfm2VLProjectorModel) parseMore(fsys fs.FS) error {
|
||||
bts, err := fs.ReadFile(fsys, "processor_config.json")
|
||||
if err != nil {
|
||||
if errors.Is(err, fs.ErrNotExist) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
return json.Unmarshal(bts, &p.Processor)
|
||||
}
|
||||
|
||||
func (p *lfm2VLProjectorModel) imageSize() uint32 {
|
||||
if p.VisionModel.ImageSize > 0 {
|
||||
return p.VisionModel.ImageSize
|
||||
}
|
||||
|
||||
downsample := cmp.Or(p.DownsampleFactor, p.Processor.ImageProcessor.DownsampleFactor, uint32(2))
|
||||
baseSize := cmp.Or(
|
||||
p.Processor.ImageProcessor.TileSize,
|
||||
p.Processor.ImageProcessor.Size.Height,
|
||||
p.Processor.ImageProcessor.Size.Width,
|
||||
uint32(256),
|
||||
)
|
||||
if downsample == 0 {
|
||||
return baseSize
|
||||
}
|
||||
|
||||
return max(uint32(1), baseSize/downsample)
|
||||
}
|
||||
|
||||
func (p *lfm2VLProjectorModel) KV(_ *Tokenizer) KV {
|
||||
kv := KV{
|
||||
"general.architecture": "clip",
|
||||
"general.type": "mmproj",
|
||||
"general.file_type": uint32(1),
|
||||
"general.quantization_version": uint32(2),
|
||||
"clip.has_vision_encoder": true,
|
||||
"clip.projector_type": "lfm2",
|
||||
"clip.use_gelu": true,
|
||||
}
|
||||
|
||||
kv["clip.vision.block_count"] = cmp.Or(p.VisionModel.NumHiddenLayers, uint32(27))
|
||||
kv["clip.vision.embedding_length"] = cmp.Or(p.VisionModel.HiddenSize, uint32(1152))
|
||||
kv["clip.vision.feed_forward_length"] = cmp.Or(p.VisionModel.IntermediateSize, uint32(4304))
|
||||
kv["clip.vision.attention.head_count"] = cmp.Or(p.VisionModel.NumAttentionHeads, uint32(16))
|
||||
kv["clip.vision.attention.layer_norm_epsilon"] = cmp.Or(p.VisionModel.LayerNormEpsilon, float32(1e-6))
|
||||
kv["clip.vision.patch_size"] = cmp.Or(p.VisionModel.PatchSize, uint32(16))
|
||||
kv["clip.vision.image_size"] = p.imageSize()
|
||||
kv["clip.vision.projection_dim"] = cmp.Or(p.ProjectorHiddenDim, uint32(2048))
|
||||
kv["clip.vision.projector.scale_factor"] = cmp.Or(p.DownsampleFactor, p.Processor.ImageProcessor.DownsampleFactor, uint32(2))
|
||||
kv["clip.vision.image_mean"] = slices.Clone(defaultFloat32Slice(p.Processor.ImageProcessor.ImageMean, []float32{0.5, 0.5, 0.5}))
|
||||
kv["clip.vision.image_std"] = slices.Clone(defaultFloat32Slice(p.Processor.ImageProcessor.ImageStd, []float32{0.5, 0.5, 0.5}))
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func defaultFloat32Slice(v, fallback []float32) []float32 {
|
||||
if len(v) > 0 {
|
||||
return v
|
||||
}
|
||||
|
||||
return fallback
|
||||
}
|
||||
|
||||
func (p *lfm2VLProjectorModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
|
||||
numChannels := cmp.Or(p.VisionModel.NumChannels, uint32(3))
|
||||
patchSize := cmp.Or(p.VisionModel.PatchSize, uint32(16))
|
||||
|
||||
for _, t := range ts {
|
||||
name := t.Name()
|
||||
if !(strings.HasPrefix(name, "v.") || strings.HasPrefix(name, "mm.")) {
|
||||
continue
|
||||
}
|
||||
|
||||
shape := t.Shape()
|
||||
if name == "v.patch_embd.weight" && len(shape) == 2 {
|
||||
inputDim := uint64(numChannels * patchSize * patchSize)
|
||||
if shape[1] == inputDim {
|
||||
shape = []uint64{shape[0], uint64(numChannels), uint64(patchSize), uint64(patchSize)}
|
||||
channels := int(numChannels)
|
||||
patch := int(patchSize)
|
||||
t.SetRepacker(func(_ string, data []float32, srcShape []uint64) ([]float32, error) {
|
||||
return repackPatchEmbeddingWeight(data, srcShape, channels, patch)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: name,
|
||||
Kind: t.Kind(),
|
||||
Shape: slices.Clone(shape),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *lfm2VLProjectorModel) Replacements() []string {
|
||||
return []string{
|
||||
"model.multi_modal_projector.linear_1", "mm.1",
|
||||
"model.multi_modal_projector.linear_2", "mm.2",
|
||||
"model.vision_tower.vision_model.embeddings.patch_embedding", "v.patch_embd",
|
||||
"model.vision_tower.vision_model.embeddings.position_embedding", "v.position_embd",
|
||||
"model.vision_tower.vision_model.encoder.layers", "v.blk",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.out_proj", "attn_out",
|
||||
"layer_norm1", "ln1",
|
||||
"layer_norm2", "ln2",
|
||||
"mlp.fc1", "ffn_up",
|
||||
"mlp.fc2", "ffn_down",
|
||||
"model.vision_tower.vision_model.post_layernorm", "v.post_ln",
|
||||
}
|
||||
}
|
||||
|
||||
func repackPatchEmbeddingWeight(data []float32, srcShape []uint64, channels, patch int) ([]float32, error) {
|
||||
if len(srcShape) != 2 {
|
||||
return nil, fmt.Errorf("invalid patch embedding shape rank: %d", len(srcShape))
|
||||
}
|
||||
|
||||
outDim := int(srcShape[0])
|
||||
flatInputDim := int(srcShape[1])
|
||||
expectedInputDim := channels * patch * patch
|
||||
if flatInputDim != expectedInputDim {
|
||||
return nil, fmt.Errorf("invalid patch embedding input dim: got %d, want %d", flatInputDim, expectedInputDim)
|
||||
}
|
||||
|
||||
expectedSize := outDim * flatInputDim
|
||||
if len(data) != expectedSize {
|
||||
return nil, fmt.Errorf("invalid patch embedding data size: got %d, want %d", len(data), expectedSize)
|
||||
}
|
||||
|
||||
repacked := make([]float32, len(data))
|
||||
perChannel := patch * patch
|
||||
|
||||
for o := range outDim {
|
||||
inBase := o * flatInputDim
|
||||
outBase := o * flatInputDim
|
||||
|
||||
for y := range patch {
|
||||
for x := range patch {
|
||||
inPixelBase := inBase + (y*patch+x)*channels
|
||||
for c := range channels {
|
||||
src := inPixelBase + c
|
||||
dst := outBase + c*perChannel + y*patch + x
|
||||
repacked[dst] = data[src]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return repacked, nil
|
||||
}
|
||||
249
convert/convert_lfm2_vl_test.go
Normal file
249
convert/convert_lfm2_vl_test.go
Normal file
@@ -0,0 +1,249 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLFM2VLTextModelKVUsesTextConfig(t *testing.T) {
|
||||
p := lfm2VLTextModel{
|
||||
TextConfig: lfm2Model{
|
||||
ModelParameters: ModelParameters{ModelType: "lfm2", VocabSize: 65536},
|
||||
HiddenSize: 2048,
|
||||
NumHiddenLayers: 16,
|
||||
MaxPositionEmbeddings: 128000,
|
||||
IntermediateSize: 12288,
|
||||
BlockFFDim: 12288,
|
||||
BlockAutoAdjustFFDim: true,
|
||||
BlockMultipleOf: 256,
|
||||
BlockFFNDimMultiplier: 1.0,
|
||||
NumAttentionHeads: 32,
|
||||
NumKeyValueHeads: 8,
|
||||
LayerTypes: []string{"conv", "full_attention"},
|
||||
NormEps: 1e-5,
|
||||
ConvLCache: 3,
|
||||
},
|
||||
DownsampleFactor: 2,
|
||||
VisionConfig: struct {
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NumChannels uint32 `json:"num_channels"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
LayerNormEpsilon float32 `json:"layer_norm_eps"`
|
||||
}{
|
||||
HiddenSize: 1152,
|
||||
IntermediateSize: 4304,
|
||||
NumAttentionHeads: 16,
|
||||
NumHiddenLayers: 27,
|
||||
NumChannels: 3,
|
||||
PatchSize: 16,
|
||||
LayerNormEpsilon: 1e-6,
|
||||
},
|
||||
}
|
||||
p.Processor.ImageProcessor.TileSize = 512
|
||||
p.Processor.ImageProcessor.ImageMean = []float32{0.5, 0.5, 0.5}
|
||||
p.Processor.ImageProcessor.ImageStd = []float32{0.5, 0.5, 0.5}
|
||||
|
||||
kv := p.KV(&Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
Tokens: []string{"<|pad|>", "<image>", "<|image_start|>", "<|image_end|>", "<|img_thumbnail|>"},
|
||||
},
|
||||
})
|
||||
|
||||
if got, want := kv["general.architecture"], "lfm2"; got != want {
|
||||
t.Fatalf("general.architecture = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if got, want := kv["feed_forward_length"], uint32(8192); got != want {
|
||||
t.Fatalf("feed_forward_length = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if got, want := kv["vision.block_count"], uint32(27); got != want {
|
||||
t.Fatalf("vision.block_count = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if got, want := kv["vision.image_size"], uint32(256); got != want {
|
||||
t.Fatalf("vision.image_size = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if got, want := kv["vision.image_token_id"], uint32(396); got != want {
|
||||
t.Fatalf("vision.image_token_id = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if got, want := kv["vision.image_start_token_id"], uint32(2); got != want {
|
||||
t.Fatalf("vision.image_start_token_id = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if got, want := kv["vision.do_image_splitting"], true; got != want {
|
||||
t.Fatalf("vision.do_image_splitting = %v, want %v", got, want)
|
||||
}
|
||||
if got, want := kv["vision.min_tiles"], uint32(2); got != want {
|
||||
t.Fatalf("vision.min_tiles = %v, want %v", got, want)
|
||||
}
|
||||
if got, want := kv["vision.max_tiles"], uint32(10); got != want {
|
||||
t.Fatalf("vision.max_tiles = %v, want %v", got, want)
|
||||
}
|
||||
if got, want := kv["vision.tile_size"], uint32(512); got != want {
|
||||
t.Fatalf("vision.tile_size = %v, want %v", got, want)
|
||||
}
|
||||
if got, want := kv["vision.use_thumbnail"], true; got != want {
|
||||
t.Fatalf("vision.use_thumbnail = %v, want %v", got, want)
|
||||
}
|
||||
if got, want := kv["vision.use_image_special_tokens"], true; got != want {
|
||||
t.Fatalf("vision.use_image_special_tokens = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLFM2VLTextModelTensorsIncludeVision(t *testing.T) {
|
||||
p := lfm2VLTextModel{}
|
||||
p.VisionConfig.PatchSize = 16
|
||||
p.VisionConfig.NumChannels = 3
|
||||
input := []Tensor{
|
||||
newLFM2StubTensor("model.embed_tokens.weight", []uint64{65536, 2048}),
|
||||
newLFM2StubTensor("model.layers.0.ffn_norm.weight", []uint64{2048}),
|
||||
newLFM2StubTensor("v.patch_embd.weight", []uint64{1152, 768}),
|
||||
newLFM2StubTensor("v.blk.0.attn_q.weight", []uint64{1152, 1152}),
|
||||
newLFM2StubTensor("mm.1.weight", []uint64{2048, 4608}),
|
||||
}
|
||||
|
||||
out := p.Tensors(input)
|
||||
if len(out) == 0 {
|
||||
t.Fatal("expected non-empty tensor list")
|
||||
}
|
||||
|
||||
foundPatch := false
|
||||
foundVision := false
|
||||
for _, tns := range out {
|
||||
if tns.Name == "v.patch_embd.weight" {
|
||||
foundPatch = true
|
||||
if !slices.Equal(tns.Shape, []uint64{1152, 3, 16, 16}) {
|
||||
t.Fatalf("v.patch_embd.weight shape = %v, want [1152 3 16 16]", tns.Shape)
|
||||
}
|
||||
}
|
||||
if strings.HasPrefix(tns.Name, "v.") || strings.HasPrefix(tns.Name, "mm.") {
|
||||
foundVision = true
|
||||
}
|
||||
}
|
||||
|
||||
if !foundPatch {
|
||||
t.Fatal("expected v.patch_embd.weight in output tensors")
|
||||
}
|
||||
if !foundVision {
|
||||
t.Fatal("expected at least one vision/projector tensor in output")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLFM2VLTextModelReplacements(t *testing.T) {
|
||||
p := lfm2VLTextModel{}
|
||||
r := strings.NewReplacer(p.Replacements()...)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "language_model_embed_tokens",
|
||||
in: "model.language_model.embed_tokens.weight",
|
||||
want: "token_embd.weight",
|
||||
},
|
||||
{
|
||||
name: "language_model_layers",
|
||||
in: "model.language_model.layers.2.self_attn.q_proj.weight",
|
||||
want: "blk.2.attn_q.weight",
|
||||
},
|
||||
{
|
||||
name: "nested_language_model_prefix",
|
||||
in: "model.language_model.model.embedding_norm.weight",
|
||||
want: "token_embd_norm.weight",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := r.Replace(tt.in); got != tt.want {
|
||||
t.Fatalf("replacement(%q) = %q, want %q", tt.in, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLFM2VLProjectorKV(t *testing.T) {
|
||||
p := lfm2VLProjectorModel{
|
||||
DownsampleFactor: 2,
|
||||
ProjectorHiddenDim: 2048,
|
||||
}
|
||||
p.VisionModel.NumHiddenLayers = 27
|
||||
p.VisionModel.HiddenSize = 1152
|
||||
p.VisionModel.IntermediateSize = 4304
|
||||
p.VisionModel.NumAttentionHeads = 16
|
||||
p.VisionModel.PatchSize = 16
|
||||
p.VisionModel.LayerNormEpsilon = 1e-6
|
||||
p.Processor.ImageProcessor.TileSize = 512
|
||||
p.Processor.ImageProcessor.ImageMean = []float32{0.5, 0.5, 0.5}
|
||||
p.Processor.ImageProcessor.ImageStd = []float32{0.5, 0.5, 0.5}
|
||||
|
||||
kv := p.KV(nil)
|
||||
|
||||
if got, want := kv["general.architecture"], "clip"; got != want {
|
||||
t.Fatalf("general.architecture = %v, want %v", got, want)
|
||||
}
|
||||
if got, want := kv["clip.projector_type"], "lfm2"; got != want {
|
||||
t.Fatalf("clip.projector_type = %v, want %v", got, want)
|
||||
}
|
||||
if got, want := kv["clip.vision.image_size"], uint32(256); got != want {
|
||||
t.Fatalf("clip.vision.image_size = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLFM2VLProjectorTensorsPatchReshape(t *testing.T) {
|
||||
p := lfm2VLProjectorModel{}
|
||||
p.VisionModel.NumChannels = 3
|
||||
p.VisionModel.PatchSize = 16
|
||||
|
||||
input := []Tensor{
|
||||
newLFM2StubTensor("v.patch_embd.weight", []uint64{1152, 768}),
|
||||
newLFM2StubTensor("mm.1.weight", []uint64{2048, 4608}),
|
||||
newLFM2StubTensor("model.embed_tokens.weight", []uint64{65536, 2048}),
|
||||
}
|
||||
|
||||
out := p.Tensors(input)
|
||||
if len(out) != 2 {
|
||||
t.Fatalf("expected 2 tensors, got %d", len(out))
|
||||
}
|
||||
|
||||
var patchShape []uint64
|
||||
for _, tns := range out {
|
||||
if tns.Name == "v.patch_embd.weight" {
|
||||
patchShape = tns.Shape
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !slices.Equal(patchShape, []uint64{1152, 3, 16, 16}) {
|
||||
t.Fatalf("v.patch_embd.weight shape = %v, want [1152 3 16 16]", patchShape)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRepackPatchEmbeddingWeight(t *testing.T) {
|
||||
data := []float32{
|
||||
0, 1, // y=0,x=0
|
||||
2, 3, // y=0,x=1
|
||||
4, 5, // y=1,x=0
|
||||
6, 7, // y=1,x=1
|
||||
}
|
||||
|
||||
got, err := repackPatchEmbeddingWeight(data, []uint64{1, 8}, 2, 2)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
want := []float32{0, 2, 4, 6, 1, 3, 5, 7}
|
||||
if !slices.Equal(got, want) {
|
||||
t.Fatalf("repacked data = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
221
convert/convert_llama.go
Normal file
221
convert/convert_llama.go
Normal file
@@ -0,0 +1,221 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type llamaModel struct {
|
||||
ModelParameters
|
||||
NLayers uint32 `json:"n_layers"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NLayer uint32 `json:"n_layer"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
NCtx uint32 `json:"n_ctx"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NEmbd uint32 `json:"n_embd"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NInner uint32 `json:"n_inner"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NHead uint32 `json:"n_head"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RopeScaling struct {
|
||||
Type string `json:"type"`
|
||||
RopeType string `json:"rope_type"`
|
||||
Factor float32 `json:"factor"`
|
||||
LowFrequencyFactor float32 `json:"low_freq_factor"`
|
||||
HighFrequencyFactor float32 `json:"high_freq_factor"`
|
||||
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||
|
||||
factors ropeFactor
|
||||
} `json:"rope_scaling"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
LayerNormEPS float32 `json:"layer_norm_eps"`
|
||||
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
|
||||
NormEpsilon float32 `json:"norm_epsilon"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
|
||||
skipRepack bool
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*llamaModel)(nil)
|
||||
|
||||
func (p *llamaModel) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "llama"
|
||||
kv["llama.vocab_size"] = p.VocabSize
|
||||
|
||||
kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
|
||||
|
||||
if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
|
||||
kv["llama.context_length"] = contextLength
|
||||
}
|
||||
|
||||
if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
|
||||
kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
||||
}
|
||||
|
||||
if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
|
||||
kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
|
||||
}
|
||||
|
||||
if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
|
||||
kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
|
||||
kv["llama.rope.dimension_count"] = p.HiddenSize / headCount
|
||||
}
|
||||
|
||||
if p.HeadDim > 0 {
|
||||
kv["llama.attention.head_dim"] = p.HeadDim
|
||||
}
|
||||
|
||||
if p.RopeTheta > 0 {
|
||||
kv["llama.rope.freq_base"] = p.RopeTheta
|
||||
}
|
||||
|
||||
if p.RopeScaling.Type == "linear" {
|
||||
kv["llama.rope.scaling.type"] = p.RopeScaling.Type
|
||||
kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
|
||||
} else if p.RopeScaling.RopeType == "llama3" {
|
||||
dim := p.HiddenSize / p.NumAttentionHeads
|
||||
for i := uint32(0); i < dim; i += 2 {
|
||||
factor := cmp.Or(p.RopeScaling.Factor, 8.0)
|
||||
factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
|
||||
factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
|
||||
|
||||
original := cmp.Or(p.RopeScaling.OriginalMaxPositionEmbeddings, 8192)
|
||||
lambdaLow := float32(original) / factorLow
|
||||
lambdaHigh := float32(original) / factorHigh
|
||||
|
||||
lambda := 2 * math.Pi * math.Pow(float64(p.RopeTheta), float64(i)/float64(dim))
|
||||
if lambda < float64(lambdaHigh) {
|
||||
p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0)
|
||||
} else if lambda > float64(lambdaLow) {
|
||||
p.RopeScaling.factors = append(p.RopeScaling.factors, factor)
|
||||
} else {
|
||||
smooth := (float32(original)/float32(lambda) - factorLow) / (factorHigh - factorLow)
|
||||
p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0/((1-smooth)/factor+smooth))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if p.NumKeyValueHeads > 0 {
|
||||
kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||
}
|
||||
|
||||
if p.RMSNormEPS > 0 {
|
||||
kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
}
|
||||
|
||||
if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
|
||||
kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon
|
||||
}
|
||||
|
||||
if p.HeadDim > 0 {
|
||||
kv["llama.attention.key_length"] = p.HeadDim
|
||||
kv["llama.attention.value_length"] = p.HeadDim
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *llamaModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
|
||||
if p.RopeScaling.factors != nil {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: "rope_freqs.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{uint64(len(p.RopeScaling.factors))},
|
||||
WriterTo: p.RopeScaling.factors,
|
||||
})
|
||||
}
|
||||
|
||||
for _, t := range ts {
|
||||
if strings.HasSuffix(t.Name(), "attn_q.weight") || strings.HasSuffix(t.Name(), "attn_k.weight") ||
|
||||
strings.HasSuffix(t.Name(), "attn_q_proj.weight") || strings.HasSuffix(t.Name(), "attn_k_proj.weight") {
|
||||
if !p.skipRepack {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *llamaModel) Replacements() []string {
|
||||
return []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *llamaModel) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
var dims []int
|
||||
for _, dim := range shape {
|
||||
dims = append(dims, int(dim))
|
||||
}
|
||||
|
||||
var heads uint32
|
||||
if strings.HasSuffix(name, "attn_q.weight") || strings.HasSuffix(name, "attn_q_proj.weight") {
|
||||
heads = p.NumAttentionHeads
|
||||
} else if strings.HasSuffix(name, "attn_k.weight") || strings.HasSuffix(name, "attn_k_proj.weight") {
|
||||
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||
} else {
|
||||
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
|
||||
}
|
||||
|
||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.T(0, 2, 1, 3); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
169
convert/convert_llama4.go
Normal file
169
convert/convert_llama4.go
Normal file
@@ -0,0 +1,169 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type llama4Model struct {
|
||||
ModelParameters
|
||||
TextModel struct {
|
||||
llamaModel
|
||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||
NumLocalExperts uint32 `json:"num_local_experts"`
|
||||
InterleaveMOELayerStep uint32 `json:"interleave_moe_layer_step"`
|
||||
UseQKNorm bool `json:"use_qk_norm"`
|
||||
IntermediateSizeMLP uint32 `json:"intermediate_size_mlp"`
|
||||
AttentionChunkSize uint32 `json:"attention_chunk_size"`
|
||||
} `json:"text_config"`
|
||||
VisionModel struct {
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
ImageSize uint32 `json:"image_size"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
NormEpsilon float32 `json:"norm_eps"`
|
||||
PixelShuffleRatio float32 `json:"pixel_shuffle_ratio"`
|
||||
} `json:"vision_config"`
|
||||
}
|
||||
|
||||
// KV implements ModelConverter.
|
||||
func (p *llama4Model) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "llama4"
|
||||
|
||||
for k, v := range p.TextModel.KV(t) {
|
||||
if strings.HasPrefix(k, "llama.") {
|
||||
kv[strings.ReplaceAll(k, "llama.", "llama4.")] = v
|
||||
}
|
||||
}
|
||||
|
||||
kv["llama4.feed_forward_length"] = p.TextModel.IntermediateSizeMLP
|
||||
kv["llama4.expert_feed_forward_length"] = p.TextModel.IntermediateSize
|
||||
|
||||
kv["llama4.expert_count"] = p.TextModel.NumLocalExperts
|
||||
kv["llama4.expert_used_count"] = p.TextModel.NumExpertsPerToken
|
||||
kv["llama4.interleave_moe_layer_step"] = p.TextModel.InterleaveMOELayerStep
|
||||
kv["llama4.use_qk_norm"] = p.TextModel.UseQKNorm
|
||||
kv["llama4.attention.chunk_size"] = p.TextModel.AttentionChunkSize
|
||||
|
||||
kv["llama4.vision.block_count"] = p.VisionModel.NumHiddenLayers
|
||||
kv["llama4.vision.embedding_length"] = p.VisionModel.HiddenSize
|
||||
kv["llama4.vision.feed_forward_length"] = p.VisionModel.IntermediateSize
|
||||
kv["llama4.vision.attention.head_count"] = p.VisionModel.NumAttentionHeads
|
||||
kv["llama4.vision.image_size"] = p.VisionModel.ImageSize
|
||||
kv["llama4.vision.patch_size"] = p.VisionModel.PatchSize
|
||||
kv["llama4.vision.rope.freq_base"] = p.VisionModel.RopeTheta
|
||||
kv["llama4.vision.layer_norm_epsilon"] = p.VisionModel.NormEpsilon
|
||||
kv["llama4.vision.pixel_shuffle_ratio"] = p.VisionModel.PixelShuffleRatio
|
||||
return kv
|
||||
}
|
||||
|
||||
// Replacements implements ModelConverter.
|
||||
func (p *llama4Model) Replacements() []string {
|
||||
return append(
|
||||
p.TextModel.Replacements(),
|
||||
"language_model.", "",
|
||||
"vision_model", "v",
|
||||
"multi_modal_projector", "mm",
|
||||
"feed_forward.down_proj", "ffn_down",
|
||||
"feed_forward.up_proj", "ffn_up",
|
||||
"feed_forward.gate_proj", "ffn_gate",
|
||||
"feed_forward.", "ffn_",
|
||||
"shared_expert.down_proj", "down_shexp",
|
||||
"shared_expert.gate_proj", "gate_shexp",
|
||||
"shared_expert.up_proj", "up_shexp",
|
||||
"experts.down_proj", "down_exps.weight",
|
||||
"experts.gate_up_proj", "gate_up_exps.weight",
|
||||
"router", "gate_inp",
|
||||
"patch_embedding.linear", "patch_embedding",
|
||||
)
|
||||
}
|
||||
|
||||
// Tensors implements ModelConverter.
|
||||
func (p *llama4Model) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
|
||||
var textTensors []Tensor
|
||||
for _, t := range ts {
|
||||
if strings.HasPrefix(t.Name(), "v.") || strings.HasPrefix(t.Name(), "mm.") {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
} else if strings.Contains(t.Name(), "ffn_gate_up_exps") {
|
||||
// gate and up projectors are fused
|
||||
// dims[1], dims[2] must be swapped
|
||||
// [experts, hidden_size, intermediate_size * 2] --> [experts, intermediate_size, hidden_size]
|
||||
halfDim := int(t.Shape()[2]) / 2
|
||||
|
||||
newShape := slices.Clone(t.Shape())
|
||||
newShape[1], newShape[2] = newShape[2]/2, newShape[1]
|
||||
for i, name := range []string{"ffn_gate_exps", "ffn_up_exps"} {
|
||||
// clone tensor since we need separate repackers
|
||||
tt := t.Clone()
|
||||
tt.SetRepacker(p.repack(nil, nil, tensor.S(i*halfDim, (i+1)*halfDim)))
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: strings.ReplaceAll(tt.Name(), "ffn_gate_up_exps", name),
|
||||
Kind: tt.Kind(),
|
||||
Shape: newShape,
|
||||
WriterTo: tt,
|
||||
})
|
||||
}
|
||||
} else if strings.Contains(t.Name(), "ffn_down_exps") {
|
||||
// dims[1], dims[2] must be swapped
|
||||
// [experts, intermediate_size, hidden_size] --> [experts, hidden_size, intermediate_size]
|
||||
t.SetRepacker(p.repack())
|
||||
newShape := slices.Clone(t.Shape())
|
||||
newShape[1], newShape[2] = newShape[2], newShape[1]
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: newShape,
|
||||
WriterTo: t,
|
||||
})
|
||||
} else {
|
||||
textTensors = append(textTensors, t)
|
||||
}
|
||||
}
|
||||
|
||||
p.TextModel.skipRepack = true
|
||||
out = append(out, p.TextModel.Tensors(textTensors)...)
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *llama4Model) repack(slice ...tensor.Slice) Repacker {
|
||||
return func(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := make([]int, len(shape))
|
||||
for i, dim := range shape {
|
||||
dims[i] = int(dim)
|
||||
}
|
||||
|
||||
var t tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
t, err := t.Slice(slice...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := t.T(0, 2, 1); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
t = tensor.Materialize(t)
|
||||
// flatten tensor so it can be return as a vector
|
||||
if err := t.Reshape(t.Shape().TotalSize()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return native.VectorF32(t.(*tensor.Dense))
|
||||
}
|
||||
}
|
||||
170
convert/convert_llama_adapter.go
Normal file
170
convert/convert_llama_adapter.go
Normal file
@@ -0,0 +1,170 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/fs"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type llamaAdapter struct {
|
||||
AdapterParameters
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
}
|
||||
|
||||
var _ AdapterConverter = (*llamaAdapter)(nil)
|
||||
|
||||
func (p *llamaAdapter) KV(baseKV fs.Config) KV {
|
||||
kv := p.AdapterParameters.KV()
|
||||
kv["general.architecture"] = "llama"
|
||||
kv["llama.attention.head_count"] = baseKV.Value("llama.attention.head_count")
|
||||
kv["llama.attention.head_count_kv"] = baseKV.Value("llama.attention.head_count_kv")
|
||||
|
||||
p.NumAttentionHeads = baseKV.Value("llama.attention.head_count").(uint32)
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *llamaAdapter) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
for _, t := range ts {
|
||||
shape := t.Shape()
|
||||
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||
(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
|
||||
shape[0], shape[1] = shape[1], shape[0]
|
||||
t.SetRepacker(p.repackAndTranspose)
|
||||
} else {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: shape,
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *llamaAdapter) Replacements() []string {
|
||||
return []string{
|
||||
"base_model.model.", "",
|
||||
"model.layers", "blk",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"lora_A.weight", "weight.lora_a",
|
||||
"lora_B.weight", "weight.lora_b",
|
||||
"lora_a", "weight.lora_a",
|
||||
"lora_b", "weight.lora_b",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *llamaAdapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := []int{int(shape[1]), int(shape[0])}
|
||||
|
||||
var heads uint32
|
||||
if strings.HasSuffix(name, "attn_q.weight.lora_a") {
|
||||
heads = p.NumAttentionHeads
|
||||
} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
|
||||
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||
} else {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
|
||||
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.T(0, 2, 1, 3); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
|
||||
func (p *llamaAdapter) repackAndTranspose(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := []int{int(shape[1]), int(shape[0])}
|
||||
|
||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
|
||||
var heads uint32
|
||||
if strings.HasSuffix(name, "attn_q.weight.lora_a") {
|
||||
heads = p.NumAttentionHeads
|
||||
} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
|
||||
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||
}
|
||||
|
||||
if heads > 0 {
|
||||
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.T(0, 2, 1, 3); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if err := n.T(1, 0); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
221
convert/convert_mistral.go
Normal file
221
convert/convert_mistral.go
Normal file
@@ -0,0 +1,221 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type mistral3Model struct {
|
||||
ModelParameters
|
||||
ImageTokenIndex uint32 `json:"image_token_index"`
|
||||
SpatialMergeSize uint32 `json:"spatial_merge_size"`
|
||||
VisionFeatureLayer int32 `json:"vision_feature_layer"`
|
||||
TextModel struct {
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
SlidingWindow *uint32 `json:"sliding_window"`
|
||||
HiddenAct string `json:"hidden_act"`
|
||||
VocabSize uint32 `json:"vocab_size"`
|
||||
RopeParameters struct {
|
||||
BetaFast float32 `json:"beta_fast"`
|
||||
BetaSlow float32 `json:"beta_slow"`
|
||||
Factor float32 `json:"factor"`
|
||||
Llama4ScalingBeta *float32 `json:"llama_4_scaling_beta"`
|
||||
OrigMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||
RopeType string `json:"rope_type"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
Mscale *float32 `json:"mscale"`
|
||||
MscaleAllDim *float32 `json:"mscale_all_dim"`
|
||||
} `json:"rope_parameters"`
|
||||
} `json:"text_config"`
|
||||
VisionModel struct {
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
ImageSize uint32 `json:"image_size"`
|
||||
NumChannels uint32 `json:"num_channels"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
HiddenAct string `json:"hidden_act"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RopeParameters struct {
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
} `json:"rope_parameters"`
|
||||
} `json:"vision_config"`
|
||||
MultiModalProjectorBias bool `json:"multimodal_projector_bias"`
|
||||
ProjectorHiddenAct string `json:"projector_hidden_act"`
|
||||
}
|
||||
|
||||
func (p *mistral3Model) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "mistral3"
|
||||
kv["mistral3.vocab_size"] = p.TextModel.VocabSize
|
||||
|
||||
// Text configuration
|
||||
kv["mistral3.block_count"] = p.TextModel.NumHiddenLayers
|
||||
kv["mistral3.context_length"] = p.TextModel.MaxPositionEmbeddings
|
||||
kv["mistral3.embedding_length"] = p.TextModel.HiddenSize
|
||||
kv["mistral3.feed_forward_length"] = p.TextModel.IntermediateSize
|
||||
kv["mistral3.attention.head_count"] = p.TextModel.NumAttentionHeads
|
||||
kv["mistral3.attention.head_count_kv"] = p.TextModel.NumKeyValueHeads
|
||||
kv["mistral3.attention.layer_norm_rms_epsilon"] = p.TextModel.RMSNormEPS
|
||||
kv["mistral3.attention.key_length"] = p.TextModel.HeadDim
|
||||
kv["mistral3.attention.value_length"] = p.TextModel.HeadDim
|
||||
kv["mistral3.rope.dimension_count"] = cmp.Or(p.TextModel.HeadDim, p.TextModel.HiddenSize/p.TextModel.NumAttentionHeads)
|
||||
kv["mistral3.rope.freq_base"] = cmp.Or(p.TextModel.RopeTheta, p.TextModel.RopeParameters.RopeTheta)
|
||||
kv["mistral3.rope.scaling.factor"] = p.TextModel.RopeParameters.Factor
|
||||
kv["mistral3.rope.scaling.type"] = p.TextModel.RopeParameters.RopeType
|
||||
kv["mistral3.rope.scaling.beta_fast"] = p.TextModel.RopeParameters.BetaFast
|
||||
kv["mistral3.rope.scaling.beta_slow"] = p.TextModel.RopeParameters.BetaSlow
|
||||
|
||||
if p.TextModel.RopeParameters.Mscale != nil {
|
||||
kv["mistral3.rope.scaling.mscale"] = *p.TextModel.RopeParameters.Mscale
|
||||
}
|
||||
if p.TextModel.RopeParameters.MscaleAllDim != nil {
|
||||
kv["mistral3.rope.scaling.mscale_all_dim"] = *p.TextModel.RopeParameters.MscaleAllDim
|
||||
}
|
||||
if p.TextModel.RopeParameters.OrigMaxPositionEmbeddings > 0 {
|
||||
kv["mistral3.rope.scaling.original_context_length"] = p.TextModel.RopeParameters.OrigMaxPositionEmbeddings
|
||||
}
|
||||
if p.TextModel.RopeParameters.Llama4ScalingBeta != nil {
|
||||
kv["mistral3.rope.scaling_beta"] = *p.TextModel.RopeParameters.Llama4ScalingBeta
|
||||
}
|
||||
|
||||
// Vision configuration
|
||||
kv["mistral3.vision.block_count"] = p.VisionModel.NumHiddenLayers
|
||||
kv["mistral3.vision.embedding_length"] = p.VisionModel.HiddenSize
|
||||
kv["mistral3.vision.feed_forward_length"] = p.VisionModel.IntermediateSize
|
||||
kv["mistral3.vision.attention.head_count"] = p.VisionModel.NumAttentionHeads
|
||||
kv["mistral3.vision.attention.key_length"] = p.VisionModel.HeadDim
|
||||
kv["mistral3.vision.image_size"] = p.VisionModel.ImageSize
|
||||
kv["mistral3.vision.patch_size"] = p.VisionModel.PatchSize
|
||||
kv["mistral3.vision.num_channels"] = p.VisionModel.NumChannels
|
||||
// kv["mistral3.vision.attention.layer_norm_epsilon"] = 1e-05 // Default value
|
||||
kv["mistral3.vision.rope.freq_base"] = cmp.Or(p.VisionModel.RopeTheta, p.VisionModel.RopeParameters.RopeTheta)
|
||||
|
||||
// Multimodal configuration
|
||||
kv["mistral3.image_token_index"] = p.ImageTokenIndex
|
||||
kv["mistral3.spatial_merge_size"] = p.SpatialMergeSize
|
||||
|
||||
kv["mistral3.mm.projector_bias"] = p.MultiModalProjectorBias
|
||||
|
||||
if p.ProjectorHiddenAct != "" {
|
||||
kv["mistral3.mm.projector_hidden_act"] = p.ProjectorHiddenAct
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *mistral3Model) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
|
||||
for _, t := range ts {
|
||||
if !strings.HasPrefix(t.Name(), "v.") {
|
||||
if strings.HasSuffix(t.Name(), ".attn_q.weight") ||
|
||||
strings.HasSuffix(t.Name(), ".attn_k.weight") {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *mistral3Model) Replacements() []string {
|
||||
return []string{
|
||||
"language_model.model.norm", "output_norm",
|
||||
"language_model.model.", "",
|
||||
"language_model.", "",
|
||||
"layers", "blk",
|
||||
"transformer.layers", "blk",
|
||||
"vision_tower", "v",
|
||||
"ln_pre", "encoder_norm",
|
||||
"input_layernorm", "attn_norm",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"embed_tokens", "token_embd",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"attention.q_proj", "attn_q",
|
||||
"attention.k_proj", "attn_k",
|
||||
"attention.v_proj", "attn_v",
|
||||
"attention.o_proj", "attn_output",
|
||||
"attention_norm", "attn_norm",
|
||||
"feed_forward.gate_proj", "ffn_gate",
|
||||
"feed_forward.down_proj", "ffn_down",
|
||||
"feed_forward.up_proj", "ffn_up",
|
||||
"multi_modal_projector", "mm",
|
||||
"ffn_norm", "ffn_norm",
|
||||
"lm_head", "output",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *mistral3Model) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
var dims []int
|
||||
for _, dim := range shape {
|
||||
dims = append(dims, int(dim))
|
||||
}
|
||||
|
||||
var heads uint32
|
||||
if strings.HasSuffix(name, ".attn_q.weight") {
|
||||
heads = p.TextModel.NumAttentionHeads
|
||||
} else if strings.HasSuffix(name, ".attn_k.weight") {
|
||||
heads = cmp.Or(p.TextModel.NumKeyValueHeads, p.TextModel.NumAttentionHeads)
|
||||
} else {
|
||||
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
|
||||
}
|
||||
|
||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.T(0, 2, 1, 3); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
181
convert/convert_mistral_causal.go
Normal file
181
convert/convert_mistral_causal.go
Normal file
@@ -0,0 +1,181 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type mistral3CausalModel struct {
|
||||
ModelParameters
|
||||
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
SlidingWindow *uint32 `json:"sliding_window"`
|
||||
HiddenAct string `json:"hidden_act"`
|
||||
VocabSize uint32 `json:"vocab_size"`
|
||||
RopeParameters struct {
|
||||
BetaFast float32 `json:"beta_fast"`
|
||||
BetaSlow float32 `json:"beta_slow"`
|
||||
Factor float32 `json:"factor"`
|
||||
Llama4ScalingBeta *float32 `json:"llama_4_scaling_beta"`
|
||||
OrigMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||
RopeType string `json:"rope_type"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
Mscale *float32 `json:"mscale"`
|
||||
MscaleAllDim *float32 `json:"mscale_all_dim"`
|
||||
} `json:"rope_parameters"`
|
||||
}
|
||||
|
||||
func (p *mistral3CausalModel) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "mistral3"
|
||||
kv["mistral3.vocab_size"] = p.VocabSize
|
||||
|
||||
// Text configuration
|
||||
kv["mistral3.block_count"] = p.NumHiddenLayers
|
||||
kv["mistral3.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["mistral3.embedding_length"] = p.HiddenSize
|
||||
kv["mistral3.feed_forward_length"] = p.IntermediateSize
|
||||
kv["mistral3.attention.head_count"] = p.NumAttentionHeads
|
||||
kv["mistral3.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||
kv["mistral3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
kv["mistral3.attention.key_length"] = p.HeadDim
|
||||
kv["mistral3.attention.value_length"] = p.HeadDim
|
||||
kv["mistral3.rope.dimension_count"] = cmp.Or(p.HeadDim, p.HiddenSize/p.NumAttentionHeads)
|
||||
kv["mistral3.rope.freq_base"] = cmp.Or(p.RopeTheta, p.RopeParameters.RopeTheta)
|
||||
kv["mistral3.rope.scaling.factor"] = p.RopeParameters.Factor
|
||||
kv["mistral3.rope.scaling.type"] = p.RopeParameters.RopeType
|
||||
kv["mistral3.rope.scaling.beta_fast"] = p.RopeParameters.BetaFast
|
||||
kv["mistral3.rope.scaling.beta_slow"] = p.RopeParameters.BetaSlow
|
||||
|
||||
if p.RopeParameters.Mscale != nil {
|
||||
kv["mistral3.rope.scaling.mscale"] = *p.RopeParameters.Mscale
|
||||
}
|
||||
|
||||
if p.RopeParameters.MscaleAllDim != nil {
|
||||
kv["mistral3.rope.scaling.mscale_all_dim"] = *p.RopeParameters.MscaleAllDim
|
||||
}
|
||||
|
||||
if p.RopeParameters.OrigMaxPositionEmbeddings > 0 {
|
||||
kv["mistral3.rope.scaling.original_context_length"] = p.RopeParameters.OrigMaxPositionEmbeddings
|
||||
kv["mistral3.rope.scaling_beta"] = *p.RopeParameters.Llama4ScalingBeta
|
||||
}
|
||||
|
||||
if p.RopeParameters.Llama4ScalingBeta != nil {
|
||||
kv["mistral3.rope.scaling_beta"] = *p.RopeParameters.Llama4ScalingBeta
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *mistral3CausalModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
|
||||
for _, t := range ts {
|
||||
if !strings.HasPrefix(t.Name(), "v.") {
|
||||
if strings.HasSuffix(t.Name(), ".attn_q.weight") ||
|
||||
strings.HasSuffix(t.Name(), ".attn_k.weight") {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *mistral3CausalModel) Replacements() []string {
|
||||
return []string{
|
||||
"model.norm", "output_norm",
|
||||
"model.", "",
|
||||
"layers", "blk",
|
||||
"transformer.layers", "blk",
|
||||
"vision_tower", "v",
|
||||
"ln_pre", "encoder_norm",
|
||||
"input_layernorm", "attn_norm",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"embed_tokens", "token_embd",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"attention.q_proj", "attn_q",
|
||||
"attention.k_proj", "attn_k",
|
||||
"attention.v_proj", "attn_v",
|
||||
"attention.o_proj", "attn_output",
|
||||
"attention_norm", "attn_norm",
|
||||
"feed_forward.gate_proj", "ffn_gate",
|
||||
"feed_forward.down_proj", "ffn_down",
|
||||
"feed_forward.up_proj", "ffn_up",
|
||||
"multi_modal_projector", "mm",
|
||||
"ffn_norm", "ffn_norm",
|
||||
"lm_head", "output",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *mistral3CausalModel) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
var dims []int
|
||||
for _, dim := range shape {
|
||||
dims = append(dims, int(dim))
|
||||
}
|
||||
|
||||
var heads uint32
|
||||
if strings.HasSuffix(name, ".attn_q.weight") {
|
||||
heads = p.NumAttentionHeads
|
||||
} else if strings.HasSuffix(name, ".attn_k.weight") {
|
||||
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||
} else {
|
||||
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
|
||||
}
|
||||
|
||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.T(0, 2, 1, 3); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
64
convert/convert_mixtral.go
Normal file
64
convert/convert_mixtral.go
Normal file
@@ -0,0 +1,64 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type mixtralModel struct {
|
||||
llamaModel
|
||||
NumLocalExperts uint32 `json:"num_local_experts"`
|
||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||
}
|
||||
|
||||
func (p *mixtralModel) KV(t *Tokenizer) KV {
|
||||
kv := p.llamaModel.KV(t)
|
||||
|
||||
if p.NumLocalExperts > 0 {
|
||||
kv["llama.expert_count"] = p.NumLocalExperts
|
||||
}
|
||||
|
||||
if p.NumExpertsPerToken > 0 {
|
||||
kv["llama.expert_used_count"] = p.NumExpertsPerToken
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *mixtralModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
merges := make([]merge, 0, p.NumHiddenLayers*6)
|
||||
for i := range p.NumHiddenLayers {
|
||||
merges = append(merges, merge{
|
||||
fmt.Sprintf("blk.%d.*.w1.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
|
||||
}, merge{
|
||||
fmt.Sprintf("blk.%d.*.w1.bias", i),
|
||||
fmt.Sprintf("blk.%d.ffn_gate_exps.bias", i),
|
||||
}, merge{
|
||||
fmt.Sprintf("blk.%d.*.w2.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
|
||||
}, merge{
|
||||
fmt.Sprintf("blk.%d.*.w2.bias", i),
|
||||
fmt.Sprintf("blk.%d.ffn_up_exps.bias", i),
|
||||
}, merge{
|
||||
fmt.Sprintf("blk.%d.*.w3.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
|
||||
}, merge{
|
||||
fmt.Sprintf("blk.%d.*.w3.bias", i),
|
||||
fmt.Sprintf("blk.%d.ffn_down_exps.bias", i),
|
||||
})
|
||||
}
|
||||
|
||||
out, ts := mergeTensors(ts, merges...)
|
||||
return append(out, p.llamaModel.Tensors(ts)...)
|
||||
}
|
||||
|
||||
func (p *mixtralModel) Replacements() []string {
|
||||
return append(
|
||||
p.llamaModel.Replacements(),
|
||||
"model.layers", "blk",
|
||||
"block_sparse_moe.gate", "ffn_gate_inp",
|
||||
"block_sparse_moe.experts.", ".",
|
||||
)
|
||||
}
|
||||
179
convert/convert_mllama.go
Normal file
179
convert/convert_mllama.go
Normal file
@@ -0,0 +1,179 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
)
|
||||
|
||||
type mllamaModel struct {
|
||||
ModelParameters
|
||||
TextModel struct {
|
||||
llamaModel
|
||||
|
||||
CrossAttentionLayers []int32 `json:"cross_attention_layers"`
|
||||
} `json:"text_config"`
|
||||
VisionModel struct {
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NumGlobalLayers uint32 `json:"num_global_layers"`
|
||||
IntermediateLayersIndices []int32 `json:"intermediate_layers_indices"`
|
||||
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
|
||||
AttentionHeads uint32 `json:"attention_heads"`
|
||||
|
||||
ImageSize uint32 `json:"image_size"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
NumChannels uint32 `json:"num_channels"`
|
||||
MaxNumTiles uint32 `json:"max_num_tiles"`
|
||||
NormEpsilon float32 `json:"norm_eps"`
|
||||
RopeTheta float32 `json:"rope.freq_base"`
|
||||
} `json:"vision_config"`
|
||||
}
|
||||
|
||||
func (m *mllamaModel) KV(t *Tokenizer) KV {
|
||||
kv := m.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "mllama"
|
||||
|
||||
for k, v := range m.TextModel.KV(t) {
|
||||
if strings.HasPrefix(k, "llama.") {
|
||||
kv[strings.ReplaceAll(k, "llama.", "mllama.")] = v
|
||||
}
|
||||
}
|
||||
|
||||
kv["mllama.attention.cross_attention_layers"] = m.TextModel.CrossAttentionLayers
|
||||
|
||||
kv["mllama.vision.block_count"] = m.VisionModel.NumHiddenLayers
|
||||
kv["mllama.vision.global.block_count"] = m.VisionModel.NumGlobalLayers
|
||||
kv["mllama.vision.intermediate_layers_indices"] = m.VisionModel.IntermediateLayersIndices
|
||||
|
||||
kv["mllama.vision.embedding_length"] = m.VisionModel.HiddenSize
|
||||
kv["mllama.vision.feed_forward_length"] = m.VisionModel.IntermediateSize
|
||||
|
||||
kv["mllama.vision.attention.head_count"] = m.VisionModel.AttentionHeads
|
||||
kv["mllama.vision.attention.layer_norm_epsilon"] = m.VisionModel.NormEpsilon
|
||||
|
||||
kv["mllama.vision.image_size"] = m.VisionModel.ImageSize
|
||||
kv["mllama.vision.patch_size"] = m.VisionModel.PatchSize
|
||||
kv["mllama.vision.max_num_tiles"] = m.VisionModel.MaxNumTiles
|
||||
kv["mllama.vision.num_channels"] = m.VisionModel.NumChannels
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (m *mllamaModel) Replacements() []string {
|
||||
return append(
|
||||
m.TextModel.Replacements(),
|
||||
"language_model.", "",
|
||||
"gate_attn", "attn_gate",
|
||||
"gate_ffn", "ffn_gate",
|
||||
"cross_attn.", "cross_attn_",
|
||||
"vision_model", "v",
|
||||
"class_embedding", "class_embd",
|
||||
"patch_embedding", "patch_embd",
|
||||
"gated_positional_embedding.tile_embedding", "tile_position_embd",
|
||||
"gated_positional_embedding.embedding", "position_embd.weight",
|
||||
"gated_positional_embedding", "position_embd",
|
||||
"embedding.weight", "weight",
|
||||
"pre_tile_positional_embedding", "pre_tile_position_embd",
|
||||
"post_tile_positional_embedding", "post_tile_position_embd",
|
||||
"layernorm_pre", "pre_ln",
|
||||
"layernorm_post", "post_ln",
|
||||
"global_transformer.layers", "global.blk",
|
||||
"transformer.layers", "blk",
|
||||
"mlp.fc1", "ffn_up",
|
||||
"mlp.fc2", "ffn_down",
|
||||
"multi_modal_projector", "mm.0",
|
||||
)
|
||||
}
|
||||
|
||||
func (m *mllamaModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
var text []Tensor
|
||||
for _, t := range ts {
|
||||
if !strings.HasPrefix(t.Name(), "v.") && !strings.HasPrefix(t.Name(), "mm.") {
|
||||
text = append(text, t)
|
||||
} else if t.Name() == "v.position_embd.gate" {
|
||||
for _, name := range []string{"v.position_embd.gate", "v.tile_position_embd.gate"} {
|
||||
tt := t.Clone()
|
||||
tt.SetRepacker(m.repack(name))
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: name,
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: tt,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
if t.Name() == "v.pre_tile_position_embd.gate" || t.Name() == "v.post_tile_position_embd.gate" {
|
||||
t.SetRepacker(m.repack(t.Name()))
|
||||
} else if strings.HasSuffix(t.Name(), "attn_q.weight") || strings.HasSuffix(t.Name(), "attn_k.weight") {
|
||||
t.SetRepacker(m.repack(t.Name()))
|
||||
} else if strings.HasSuffix(t.Name(), "attn_gate") || strings.HasSuffix(t.Name(), "ffn_gate") {
|
||||
t.SetRepacker(m.repack(t.Name()))
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return append(out, m.TextModel.Tensors(text)...)
|
||||
}
|
||||
|
||||
func (m *mllamaModel) repack(name string) Repacker {
|
||||
return func(_ string, data []float32, shape []uint64) (_ []float32, err error) {
|
||||
dims := make([]int, len(shape))
|
||||
for i, dim := range shape {
|
||||
dims[i] = int(dim)
|
||||
}
|
||||
|
||||
var t tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
|
||||
if strings.HasSuffix(name, "attn_q.weight") || strings.HasSuffix(name, "attn_k.weight") {
|
||||
heads := m.VisionModel.AttentionHeads
|
||||
if err := t.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := t.T(0, 2, 1, 3); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := t.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := t.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
t, err = tensor.Tanh(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if name == "v.position_embd.gate" {
|
||||
t, err = tensor.Sub(float32(1), t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
t = tensor.Materialize(t)
|
||||
// flatten tensor so it can be return as a vector
|
||||
if err := t.Reshape(t.Shape().TotalSize()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return native.VectorF32(t.(*tensor.Dense))
|
||||
}
|
||||
}
|
||||
794
convert/convert_nemotron_h.go
Normal file
794
convert/convert_nemotron_h.go
Normal file
@@ -0,0 +1,794 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"math"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type hybridPattern string
|
||||
|
||||
func (p *hybridPattern) UnmarshalJSON(data []byte) error {
|
||||
if string(data) == "null" {
|
||||
*p = ""
|
||||
return nil
|
||||
}
|
||||
|
||||
var single string
|
||||
if err := json.Unmarshal(data, &single); err == nil {
|
||||
*p = hybridPattern(strings.TrimSpace(single))
|
||||
return nil
|
||||
}
|
||||
|
||||
var parts []string
|
||||
if err := json.Unmarshal(data, &parts); err == nil {
|
||||
*p = hybridPattern(strings.Join(parts, ""))
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("hybrid_override_pattern must be a string or string array")
|
||||
}
|
||||
|
||||
type nemotronHModel struct {
|
||||
ModelParameters
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
|
||||
NormEpsilon float32 `json:"norm_eps"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
PartialRotaryFactor float32 `json:"partial_rotary_factor"`
|
||||
ConvKernel uint32 `json:"conv_kernel"`
|
||||
SSMStateSize uint32 `json:"ssm_state_size"`
|
||||
MambaNumHeads uint32 `json:"mamba_num_heads"`
|
||||
MambaHeadDim uint32 `json:"mamba_head_dim"`
|
||||
NGroups uint32 `json:"n_groups"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
HybridOverridePattern hybridPattern `json:"hybrid_override_pattern"`
|
||||
|
||||
// MoE
|
||||
NumExperts uint32 `json:"num_experts"`
|
||||
NumSharedExperts uint32 `json:"num_shared_experts"`
|
||||
NRoutedExperts uint32 `json:"n_routed_experts"`
|
||||
NSharedExperts uint32 `json:"n_shared_experts"`
|
||||
NumExpertsPerTok uint32 `json:"num_experts_per_tok"`
|
||||
MoEIntermediateSize uint32 `json:"moe_intermediate_size"`
|
||||
MoESharedExpertIntermediate uint32 `json:"moe_shared_expert_intermediate_size"`
|
||||
NormTopKProb bool `json:"norm_topk_prob"`
|
||||
RoutedScalingFactor float32 `json:"routed_scaling_factor"`
|
||||
ExpertGroupCount uint32 `json:"n_group"`
|
||||
ExpertGroupUsedCount uint32 `json:"topk_group"`
|
||||
}
|
||||
|
||||
type nemotronHNanoVLModel struct {
|
||||
ModelParameters
|
||||
MaxSequenceLength uint32 `json:"max_sequence_length"`
|
||||
ForceImageSize uint32 `json:"force_image_size"`
|
||||
DownsampleRatio float32 `json:"downsample_ratio"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
UseThumbnail *bool `json:"use_thumbnail"`
|
||||
ImgContextTokenID uint32 `json:"img_context_token_id"`
|
||||
ImgContextToken string `json:"img_context_token"`
|
||||
ImgStartToken string `json:"img_start_token"`
|
||||
ImgEndToken string `json:"img_end_token"`
|
||||
VitHiddenSize uint32 `json:"vit_hidden_size"`
|
||||
ProjectorHidden uint32 `json:"projector_hidden_size"`
|
||||
SoundContextTokenID uint32 `json:"sound_context_token_id"`
|
||||
SoundContextToken string `json:"sound_context_token"`
|
||||
NormMean []float32 `json:"norm_mean"`
|
||||
NormStd []float32 `json:"norm_std"`
|
||||
VisionConfig radioConfig `json:"vision_config"`
|
||||
SoundConfig soundConfig `json:"sound_config"`
|
||||
LLMConfig nemotronHModel `json:"llm_config"`
|
||||
Preprocessor struct {
|
||||
ImageSize uint32 `json:"image_size"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
DownsampleRatio float32 `json:"downsample_ratio"`
|
||||
MaxNumTiles uint32 `json:"max_num_tiles"`
|
||||
UseThumbnail *bool `json:"use_thumbnail"`
|
||||
NormMean []float32 `json:"norm_mean"`
|
||||
NormStd []float32 `json:"norm_std"`
|
||||
}
|
||||
}
|
||||
|
||||
type soundConfig struct {
|
||||
ModelType string `json:"model_type"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
ConvKernelSize uint32 `json:"conv_kernel_size"`
|
||||
SubsamplingConvChannels uint32 `json:"subsampling_conv_channels"`
|
||||
SubsamplingConvKernelSize uint32 `json:"subsampling_conv_kernel_size"`
|
||||
SubsamplingConvStride uint32 `json:"subsampling_conv_stride"`
|
||||
SubsamplingFactor uint32 `json:"subsampling_factor"`
|
||||
NumMelBins uint32 `json:"num_mel_bins"`
|
||||
ProjectionHiddenSize uint32 `json:"projection_hidden_size"`
|
||||
SamplingRate uint32 `json:"sampling_rate"`
|
||||
ScaleInput bool `json:"scale_input"`
|
||||
}
|
||||
|
||||
type radioConfig struct {
|
||||
Version string `json:"version"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
MaxResolution uint32 `json:"max_resolution"`
|
||||
MinNumPatches uint32 `json:"min_num_patches"`
|
||||
MaxNumPatches uint32 `json:"max_num_patches"`
|
||||
SeparateVideoEmbedder bool `json:"separate_video_embedder"`
|
||||
Args struct {
|
||||
MinNumPatches uint32 `json:"min_num_patches"`
|
||||
MaxNumPatches uint32 `json:"max_num_patches"`
|
||||
} `json:"args"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*nemotronHModel)(nil)
|
||||
var _ ModelConverter = (*nemotronHNanoVLModel)(nil)
|
||||
|
||||
func (n *nemotronHNanoVLModel) parseMore(fsys fs.FS) error {
|
||||
if n.MaxSequenceLength > 0 {
|
||||
n.LLMConfig.MaxPositionEmbeddings = n.MaxSequenceLength
|
||||
}
|
||||
|
||||
if err := n.LLMConfig.parseMore(fsys); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if bts, err := fs.ReadFile(fsys, "preprocessor_config.json"); err == nil {
|
||||
if err := json.Unmarshal(bts, &n.Preprocessor); err != nil {
|
||||
return fmt.Errorf("nemotron_h_omni: parse preprocessor_config.json: %w", err)
|
||||
}
|
||||
} else if !errors.Is(err, fs.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
if version := strings.TrimSpace(n.VisionConfig.Version); version != "" && version != "radio_v2.5-h" {
|
||||
return fmt.Errorf("nemotron_h_omni: unsupported RADIO version %q", version)
|
||||
}
|
||||
if patchSize := n.visionPatchSize(); patchSize != 16 {
|
||||
return fmt.Errorf("nemotron_h_omni: unsupported vision patch_size=%d", patchSize)
|
||||
}
|
||||
if scale := n.visionProjectorScaleFactor(); scale != 2 {
|
||||
return fmt.Errorf("nemotron_h_omni: unsupported vision projector scale factor=%d", scale)
|
||||
}
|
||||
|
||||
if n.SoundConfig.NumHiddenLayers > 0 {
|
||||
if modelType := strings.TrimSpace(n.SoundConfig.ModelType); modelType != "" && modelType != "parakeet" {
|
||||
return fmt.Errorf("nemotron_h_omni: unsupported sound model_type %q", modelType)
|
||||
}
|
||||
if n.soundHiddenSize() == 0 {
|
||||
return fmt.Errorf("nemotron_h_omni: sound hidden_size must be set")
|
||||
}
|
||||
if n.soundAttentionHeads() == 0 {
|
||||
return fmt.Errorf("nemotron_h_omni: sound num_attention_heads must be set")
|
||||
}
|
||||
if n.soundSubsamplingFactor() != 8 {
|
||||
return fmt.Errorf("nemotron_h_omni: unsupported sound subsampling_factor=%d", n.soundSubsamplingFactor())
|
||||
}
|
||||
if n.soundMelBins() != 128 {
|
||||
return fmt.Errorf("nemotron_h_omni: unsupported sound num_mel_bins=%d", n.soundMelBins())
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) KV(t *Tokenizer) KV {
|
||||
kv := n.LLMConfig.KV(t)
|
||||
kv["general.architecture"] = "nemotron_h_omni"
|
||||
|
||||
kv["vision.block_count"] = n.visionBlockCount()
|
||||
kv["vision.embedding_length"] = n.visionEmbeddingLength()
|
||||
kv["vision.feed_forward_length"] = n.visionFeedForwardLength()
|
||||
kv["vision.attention.head_count"] = n.visionAttentionHeads()
|
||||
kv["vision.attention.layer_norm_epsilon"] = float32(1e-6)
|
||||
kv["vision.patch_size"] = n.visionPatchSize()
|
||||
kv["vision.image_size"] = n.visionImageSize()
|
||||
kv["vision.max_tiles"] = n.visionMaxTiles()
|
||||
kv["vision.use_thumbnail"] = n.visionUseThumbnail()
|
||||
if minPatches := n.visionMinNumPatches(); minPatches > 0 {
|
||||
kv["vision.min_num_patches"] = minPatches
|
||||
}
|
||||
if maxPatches := n.visionMaxNumPatches(); maxPatches > 0 {
|
||||
kv["vision.max_num_patches"] = maxPatches
|
||||
}
|
||||
kv["vision.num_channels"] = uint32(3)
|
||||
kv["vision.image_mean"] = slices.Clone(defaultFloat32Slice(n.visionMean(), imageNetStandardMean))
|
||||
kv["vision.image_std"] = slices.Clone(defaultFloat32Slice(n.visionStd(), imageNetStandardSTD))
|
||||
kv["vision.projector.scale_factor"] = n.visionProjectorScaleFactor()
|
||||
|
||||
setTokenID := func(key string, explicit uint32, token string) {
|
||||
if explicit > 0 {
|
||||
kv[key] = explicit
|
||||
return
|
||||
}
|
||||
if t == nil || t.Vocabulary == nil {
|
||||
return
|
||||
}
|
||||
for i, v := range t.Vocabulary.Tokens {
|
||||
if v == token {
|
||||
kv[key] = uint32(i)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setTokenID("vision.image_token_id", n.ImgContextTokenID, cmp.Or(n.ImgContextToken, "<image>"))
|
||||
setTokenID("vision.image_start_token_id", 0, cmp.Or(n.ImgStartToken, "<img>"))
|
||||
setTokenID("vision.image_end_token_id", 0, cmp.Or(n.ImgEndToken, "</img>"))
|
||||
|
||||
if n.SoundConfig.NumHiddenLayers > 0 {
|
||||
kv["audio.block_count"] = n.SoundConfig.NumHiddenLayers
|
||||
kv["audio.embedding_length"] = n.soundHiddenSize()
|
||||
kv["audio.feed_forward_length"] = n.soundFeedForwardLength()
|
||||
kv["audio.attention.head_count"] = n.soundAttentionHeads()
|
||||
kv["audio.attention.layer_norm_epsilon"] = float32(1e-5)
|
||||
kv["audio.conv_kernel_size"] = n.soundConvKernelSize()
|
||||
kv["audio.num_mel_bins"] = n.soundMelBins()
|
||||
kv["audio.sample_rate"] = n.soundSampleRate()
|
||||
kv["audio.subsampling_factor"] = n.soundSubsamplingFactor()
|
||||
kv["audio.subsampling_conv_channels"] = n.soundSubsamplingConvChannels()
|
||||
kv["audio.subsampling_conv_kernel_size"] = n.soundSubsamplingConvKernelSize()
|
||||
kv["audio.subsampling_conv_stride"] = n.soundSubsamplingConvStride()
|
||||
kv["audio.projection_hidden_size"] = n.soundProjectionHiddenSize()
|
||||
kv["audio.scale_input"] = n.SoundConfig.ScaleInput
|
||||
setTokenID("audio.sound_token_id", n.SoundContextTokenID, cmp.Or(n.SoundContextToken, "<so_embedding>"))
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var textTensors []Tensor
|
||||
var out []*ggml.Tensor
|
||||
|
||||
for _, t := range ts {
|
||||
switch {
|
||||
case isNemotronHNanoVLOmittedTensor(t.Name()):
|
||||
continue
|
||||
case strings.Contains(t.Name(), ".attn_qkv"):
|
||||
out = append(out, slices.Collect(splitDim(t, 0,
|
||||
split{Replacer: strings.NewReplacer("attn_qkv", "attn_q")},
|
||||
split{Replacer: strings.NewReplacer("attn_qkv", "attn_k")},
|
||||
split{Replacer: strings.NewReplacer("attn_qkv", "attn_v")},
|
||||
))...)
|
||||
case t.Name() == "v.position_embd":
|
||||
shape := t.Shape()
|
||||
if len(shape) == 3 && shape[0] == 1 {
|
||||
shape = shape[1:]
|
||||
}
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: shape,
|
||||
WriterTo: t,
|
||||
})
|
||||
case strings.HasPrefix(t.Name(), "a.") || strings.HasPrefix(t.Name(), "v.") || strings.HasPrefix(t.Name(), "mm."):
|
||||
name := t.Name()
|
||||
shape := slices.Clone(t.Shape())
|
||||
if strings.HasPrefix(name, "a.blk.") && strings.Contains(name, ".conv_dw.") && strings.HasSuffix(name, ".weight") && len(shape) == 3 {
|
||||
t.SetRepacker(squeezeMiddleDim)
|
||||
shape = []uint64{shape[0], shape[2]}
|
||||
}
|
||||
if strings.HasPrefix(name, "a.blk.") && (strings.Contains(name, ".conv_pw1.") || strings.Contains(name, ".conv_pw2.")) && strings.HasSuffix(name, ".weight") && len(shape) == 3 && shape[2] == 1 {
|
||||
t.SetRepacker(squeezeLastDim)
|
||||
shape = shape[:2]
|
||||
}
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: name,
|
||||
Kind: t.Kind(),
|
||||
Shape: shape,
|
||||
WriterTo: t,
|
||||
})
|
||||
default:
|
||||
textTensors = append(textTensors, t)
|
||||
}
|
||||
}
|
||||
|
||||
return append(n.LLMConfig.Tensors(textTensors), out...)
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) Replacements() []string {
|
||||
return append([]string{
|
||||
"language_model.", "",
|
||||
"vision_model.radio_model.model.patch_generator.embedder", "v.patch_embd",
|
||||
"vision_model.radio_model.model.patch_generator.pos_embed", "v.position_embd",
|
||||
"vision_model.radio_model.model.patch_generator.cls_token.token", "v.cls_embd",
|
||||
"vision_model.radio_model.model.blocks", "v.blk",
|
||||
"attn.qkv", "attn_qkv",
|
||||
"attn.proj", "attn_out",
|
||||
"mlp.fc1", "ffn_up",
|
||||
"mlp.fc2", "ffn_down",
|
||||
"norm1", "ln1",
|
||||
"norm2", "ln2",
|
||||
"mlp1.0", "mm.norm",
|
||||
"mlp1.1", "mm.1",
|
||||
"mlp1.3", "mm.2",
|
||||
"sound_encoder.encoder.feature_extractor.featurizer.fb", "a.feature_extractor.fb",
|
||||
"sound_encoder.encoder.feature_extractor.featurizer.window", "a.feature_extractor.window",
|
||||
"sound_encoder.encoder.subsampling.layers.0", "a.subsampling.conv0",
|
||||
"sound_encoder.encoder.subsampling.layers.2", "a.subsampling.dw1",
|
||||
"sound_encoder.encoder.subsampling.layers.3", "a.subsampling.pw1",
|
||||
"sound_encoder.encoder.subsampling.layers.5", "a.subsampling.dw2",
|
||||
"sound_encoder.encoder.subsampling.layers.6", "a.subsampling.pw2",
|
||||
"sound_encoder.encoder.subsampling.linear", "a.subsampling.linear",
|
||||
"sound_encoder.encoder.layers", "a.blk",
|
||||
"feed_forward1.linear1", "ffn1_up",
|
||||
"feed_forward1.linear2", "ffn1_down",
|
||||
"feed_forward2.linear1", "ffn2_up",
|
||||
"feed_forward2.linear2", "ffn2_down",
|
||||
"norm_feed_forward1", "ffn1_norm",
|
||||
"norm_feed_forward2", "ffn2_norm",
|
||||
"norm_self_att", "attn_norm",
|
||||
"norm_conv", "conv_norm",
|
||||
"norm_out", "out_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_out",
|
||||
"self_attn.relative_k_proj", "attn_rel_k",
|
||||
"self_attn.bias_u", "attn_bias_u",
|
||||
"self_attn.bias_v", "attn_bias_v",
|
||||
"conv.pointwise_conv1", "conv_pw1",
|
||||
"conv.pointwise_conv2", "conv_pw2",
|
||||
"conv.depthwise_conv", "conv_dw",
|
||||
"conv.norm", "conv_bn",
|
||||
"sound_projection.norm", "mm.a.norm",
|
||||
"sound_projection.linear1", "mm.a.1",
|
||||
"sound_projection.linear2", "mm.a.2",
|
||||
}, n.LLMConfig.Replacements()...)
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) specialTokenTypes() []string {
|
||||
return n.LLMConfig.specialTokenTypes()
|
||||
}
|
||||
|
||||
func isNemotronHNanoVLOmittedTensor(name string) bool {
|
||||
return strings.HasSuffix(name, ".conv_bn.num_batches_tracked") ||
|
||||
strings.HasPrefix(name, "vision_model.radio_model.input_conditioner.") ||
|
||||
strings.HasPrefix(name, "vision_model.radio_model.model.patch_generator.video_embedder")
|
||||
}
|
||||
|
||||
func squeezeLastDim(_ string, data []float32, _ []uint64) ([]float32, error) {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionImageSize() uint32 {
|
||||
return cmp.Or(n.ForceImageSize, n.Preprocessor.ImageSize, uint32(512))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionPatchSize() uint32 {
|
||||
return cmp.Or(n.PatchSize, n.Preprocessor.PatchSize, n.VisionConfig.PatchSize, uint32(16))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionProjectorScaleFactor() uint32 {
|
||||
ratio := cmp.Or(n.DownsampleRatio, n.Preprocessor.DownsampleRatio, float32(0.5))
|
||||
if ratio <= 0 {
|
||||
return 2
|
||||
}
|
||||
|
||||
return max(uint32(1), uint32(math.Round(1.0/float64(ratio))))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionBlockCount() uint32 {
|
||||
return 32
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionEmbeddingLength() uint32 {
|
||||
return cmp.Or(n.VitHiddenSize, uint32(1280))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionAttentionHeads() uint32 {
|
||||
return 16
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionFeedForwardLength() uint32 {
|
||||
return 4 * n.visionEmbeddingLength()
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionMaxTiles() uint32 {
|
||||
return cmp.Or(n.Preprocessor.MaxNumTiles, uint32(12))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionMinNumPatches() uint32 {
|
||||
return cmp.Or(n.VisionConfig.MinNumPatches, n.VisionConfig.Args.MinNumPatches)
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionMaxNumPatches() uint32 {
|
||||
return cmp.Or(n.VisionConfig.MaxNumPatches, n.VisionConfig.Args.MaxNumPatches)
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionUseThumbnail() bool {
|
||||
for _, v := range []*bool{n.UseThumbnail, n.Preprocessor.UseThumbnail} {
|
||||
if v != nil {
|
||||
return *v
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionMean() []float32 {
|
||||
if len(n.NormMean) > 0 {
|
||||
return n.NormMean
|
||||
}
|
||||
return n.Preprocessor.NormMean
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) visionStd() []float32 {
|
||||
if len(n.NormStd) > 0 {
|
||||
return n.NormStd
|
||||
}
|
||||
return n.Preprocessor.NormStd
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) soundHiddenSize() uint32 {
|
||||
return cmp.Or(n.SoundConfig.HiddenSize, uint32(1024))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) soundAttentionHeads() uint32 {
|
||||
return cmp.Or(n.SoundConfig.NumAttentionHeads, uint32(8))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) soundFeedForwardLength() uint32 {
|
||||
return cmp.Or(n.SoundConfig.IntermediateSize, 4*n.soundHiddenSize())
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) soundConvKernelSize() uint32 {
|
||||
return cmp.Or(n.SoundConfig.ConvKernelSize, uint32(9))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) soundMelBins() uint32 {
|
||||
return cmp.Or(n.SoundConfig.NumMelBins, uint32(128))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) soundSampleRate() uint32 {
|
||||
return cmp.Or(n.SoundConfig.SamplingRate, uint32(16000))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) soundSubsamplingFactor() uint32 {
|
||||
return cmp.Or(n.SoundConfig.SubsamplingFactor, uint32(8))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) soundSubsamplingConvChannels() uint32 {
|
||||
return cmp.Or(n.SoundConfig.SubsamplingConvChannels, uint32(256))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) soundSubsamplingConvKernelSize() uint32 {
|
||||
return cmp.Or(n.SoundConfig.SubsamplingConvKernelSize, uint32(3))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) soundSubsamplingConvStride() uint32 {
|
||||
return cmp.Or(n.SoundConfig.SubsamplingConvStride, uint32(2))
|
||||
}
|
||||
|
||||
func (n *nemotronHNanoVLModel) soundProjectionHiddenSize() uint32 {
|
||||
return cmp.Or(n.SoundConfig.ProjectionHiddenSize, uint32(4096))
|
||||
}
|
||||
|
||||
var (
|
||||
imageNetStandardMean = []float32{0.48145466, 0.4578275, 0.40821073}
|
||||
imageNetStandardSTD = []float32{0.26862954, 0.26130258, 0.27577711}
|
||||
)
|
||||
|
||||
func (n *nemotronHModel) parseMore(_ fs.FS) error {
|
||||
if n.NumHiddenLayers == 0 {
|
||||
return fmt.Errorf("nemotron_h: num_hidden_layers must be set")
|
||||
}
|
||||
if n.HiddenSize == 0 {
|
||||
return fmt.Errorf("nemotron_h: hidden_size must be set")
|
||||
}
|
||||
if n.NumAttentionHeads == 0 {
|
||||
return fmt.Errorf("nemotron_h: num_attention_heads must be set")
|
||||
}
|
||||
if n.HeadDim == 0 {
|
||||
if n.HiddenSize%n.NumAttentionHeads != 0 {
|
||||
return fmt.Errorf("nemotron_h: hidden_size (%d) must be divisible by num_attention_heads (%d)", n.HiddenSize, n.NumAttentionHeads)
|
||||
}
|
||||
n.HeadDim = n.HiddenSize / n.NumAttentionHeads
|
||||
}
|
||||
if n.NumKeyValueHeads == 0 {
|
||||
n.NumKeyValueHeads = n.NumAttentionHeads
|
||||
}
|
||||
if n.ConvKernel == 0 {
|
||||
return fmt.Errorf("nemotron_h: conv_kernel must be set")
|
||||
}
|
||||
if n.SSMStateSize == 0 {
|
||||
return fmt.Errorf("nemotron_h: ssm_state_size must be set")
|
||||
}
|
||||
if n.ssmHeadCount() == 0 {
|
||||
return fmt.Errorf("nemotron_h: mamba_num_heads must be set")
|
||||
}
|
||||
if n.MambaHeadDim == 0 {
|
||||
return fmt.Errorf("nemotron_h: mamba_head_dim must be set")
|
||||
}
|
||||
if n.NGroups == 0 {
|
||||
n.NGroups = 1
|
||||
}
|
||||
|
||||
if _, _, err := n.layerArrays(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if n.isMoE() {
|
||||
if n.routedExpertCount() == 0 {
|
||||
return fmt.Errorf("nemotron_h: routed expert count must be set for MoE models")
|
||||
}
|
||||
if n.NumExpertsPerTok == 0 {
|
||||
return fmt.Errorf("nemotron_h: num_experts_per_tok must be set for MoE models")
|
||||
}
|
||||
if n.NumExpertsPerTok > n.routedExpertCount() {
|
||||
return fmt.Errorf("nemotron_h: num_experts_per_tok (%d) cannot exceed expert_count (%d)", n.NumExpertsPerTok, n.routedExpertCount())
|
||||
}
|
||||
if n.moeIntermediateSize() == 0 {
|
||||
return fmt.Errorf("nemotron_h: moe_intermediate_size must be set for MoE models")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *nemotronHModel) isMoE() bool {
|
||||
return cmp.Or(n.routedExpertCount(), n.NumExpertsPerTok, n.MoEIntermediateSize) > 0
|
||||
}
|
||||
|
||||
func (n *nemotronHModel) routedExpertCount() uint32 {
|
||||
return cmp.Or(n.NRoutedExperts, n.NumExperts)
|
||||
}
|
||||
|
||||
func (n *nemotronHModel) sharedExpertCount() uint32 {
|
||||
return cmp.Or(n.NSharedExperts, n.NumSharedExperts)
|
||||
}
|
||||
|
||||
func (n *nemotronHModel) ssmHeadCount() uint32 {
|
||||
return n.MambaNumHeads
|
||||
}
|
||||
|
||||
func (n *nemotronHModel) ssmInnerSize() uint32 {
|
||||
return n.MambaHeadDim * n.ssmHeadCount()
|
||||
}
|
||||
|
||||
func (n *nemotronHModel) epsilon() float32 {
|
||||
return cmp.Or(n.NormEpsilon, n.LayerNormEpsilon, float32(1e-5))
|
||||
}
|
||||
|
||||
func (n *nemotronHModel) moeIntermediateSize() uint32 {
|
||||
return cmp.Or(n.MoEIntermediateSize, n.IntermediateSize)
|
||||
}
|
||||
|
||||
func (n *nemotronHModel) denseIntermediateSize() uint32 {
|
||||
return cmp.Or(n.IntermediateSize, n.MoEIntermediateSize)
|
||||
}
|
||||
|
||||
func (n *nemotronHModel) layerArrays() (headCountKV []uint32, ffnLengths []uint32, err error) {
|
||||
pattern := strings.TrimSpace(string(n.HybridOverridePattern))
|
||||
if pattern == "" {
|
||||
return nil, nil, fmt.Errorf("nemotron_h: hybrid_override_pattern must be set")
|
||||
}
|
||||
|
||||
runes := []rune(pattern)
|
||||
if len(runes) != int(n.NumHiddenLayers) {
|
||||
return nil, nil, fmt.Errorf("nemotron_h: hybrid_override_pattern length (%d) must match num_hidden_layers (%d)", len(runes), n.NumHiddenLayers)
|
||||
}
|
||||
|
||||
headCountKV = make([]uint32, n.NumHiddenLayers)
|
||||
ffnLengths = make([]uint32, n.NumHiddenLayers)
|
||||
|
||||
attnKVHeads := cmp.Or(n.NumKeyValueHeads, n.NumAttentionHeads)
|
||||
moeFFN := n.moeIntermediateSize()
|
||||
denseFFN := n.denseIntermediateSize()
|
||||
|
||||
for i, layerType := range runes {
|
||||
switch layerType {
|
||||
case 'M':
|
||||
// Recurrent layer: no KV heads and no FFN.
|
||||
case '*', 'A':
|
||||
// Attention-only layer.
|
||||
headCountKV[i] = attnKVHeads
|
||||
case 'E':
|
||||
// MoE layer.
|
||||
if moeFFN == 0 {
|
||||
return nil, nil, fmt.Errorf("nemotron_h: moe layer at index %d but moe_intermediate_size is zero", i)
|
||||
}
|
||||
ffnLengths[i] = moeFFN
|
||||
case '-':
|
||||
// Dense FFN layer.
|
||||
if denseFFN == 0 {
|
||||
return nil, nil, fmt.Errorf("nemotron_h: dense FFN layer at index %d but intermediate_size is zero", i)
|
||||
}
|
||||
ffnLengths[i] = denseFFN
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("nemotron_h: unsupported layer type %q in hybrid_override_pattern at index %d", layerType, i)
|
||||
}
|
||||
}
|
||||
|
||||
return headCountKV, ffnLengths, nil
|
||||
}
|
||||
|
||||
func (n *nemotronHModel) KV(t *Tokenizer) KV {
|
||||
kv := n.ModelParameters.KV(t)
|
||||
|
||||
arch := "nemotron_h"
|
||||
if n.isMoE() {
|
||||
arch = "nemotron_h_moe"
|
||||
}
|
||||
kv["general.architecture"] = arch
|
||||
kv["block_count"] = n.NumHiddenLayers
|
||||
kv["context_length"] = n.MaxPositionEmbeddings
|
||||
kv["embedding_length"] = n.HiddenSize
|
||||
kv["attention.head_count"] = n.NumAttentionHeads
|
||||
kv["attention.key_length"] = n.HeadDim
|
||||
kv["attention.value_length"] = n.HeadDim
|
||||
kv["attention.layer_norm_epsilon"] = n.epsilon()
|
||||
kv["attention.layer_norm_rms_epsilon"] = n.epsilon()
|
||||
kv["rope.freq_base"] = cmp.Or(n.RopeTheta, float32(10000))
|
||||
if n.PartialRotaryFactor > 0 && n.PartialRotaryFactor <= 1 {
|
||||
kv["rope.dimension_count"] = uint32(float32(n.HeadDim) * n.PartialRotaryFactor)
|
||||
}
|
||||
|
||||
if headCountKV, ffnLengths, err := n.layerArrays(); err == nil {
|
||||
kv["attention.head_count_kv"] = headCountKV
|
||||
kv["feed_forward_length"] = ffnLengths
|
||||
}
|
||||
|
||||
kv["ssm.conv_kernel"] = n.ConvKernel
|
||||
kv["ssm.inner_size"] = n.ssmInnerSize()
|
||||
kv["ssm.state_size"] = n.SSMStateSize
|
||||
kv["ssm.group_count"] = n.NGroups
|
||||
kv["ssm.time_step_rank"] = n.ssmHeadCount()
|
||||
|
||||
if n.isMoE() {
|
||||
kv["expert_count"] = n.routedExpertCount()
|
||||
kv["expert_used_count"] = n.NumExpertsPerTok
|
||||
kv["expert_feed_forward_length"] = n.moeIntermediateSize()
|
||||
if n.sharedExpertCount() > 0 {
|
||||
kv["expert_shared_count"] = n.sharedExpertCount()
|
||||
}
|
||||
if n.MoESharedExpertIntermediate > 0 {
|
||||
kv["expert_shared_feed_forward_length"] = n.MoESharedExpertIntermediate
|
||||
}
|
||||
kv["expert_weights_norm"] = n.NormTopKProb
|
||||
kv["expert_weights_scale"] = n.RoutedScalingFactor
|
||||
if n.ExpertGroupCount > 0 {
|
||||
kv["expert_group_count"] = n.ExpertGroupCount
|
||||
}
|
||||
if n.ExpertGroupUsedCount > 0 {
|
||||
kv["expert_group_used_count"] = n.ExpertGroupUsedCount
|
||||
}
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func normalizeVectorShapeToColumn(shape []uint64) []uint64 {
|
||||
switch len(shape) {
|
||||
case 1:
|
||||
return []uint64{shape[0], 1}
|
||||
case 2:
|
||||
if shape[0] == 1 && shape[1] > 1 {
|
||||
return []uint64{shape[1], 1}
|
||||
}
|
||||
if shape[1] == 1 && shape[0] > 1 {
|
||||
return []uint64{shape[0], 1}
|
||||
}
|
||||
}
|
||||
|
||||
return slices.Clone(shape)
|
||||
}
|
||||
|
||||
func (n *nemotronHModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
|
||||
remaining := ts
|
||||
if n.isMoE() {
|
||||
merges := make([]merge, 0, n.NumHiddenLayers*2)
|
||||
for i := range n.NumHiddenLayers {
|
||||
merges = append(merges, merge{
|
||||
fmt.Sprintf("blk.%d.mixer.experts.*.up_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
|
||||
}, merge{
|
||||
fmt.Sprintf("blk.%d.mixer.experts.*.down_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
|
||||
})
|
||||
}
|
||||
|
||||
merged, rest := mergeTensors(ts, merges...)
|
||||
out = append(out, merged...)
|
||||
remaining = rest
|
||||
}
|
||||
|
||||
nGroups := uint64(cmp.Or(n.NGroups, uint32(1)))
|
||||
for _, t := range remaining {
|
||||
name := t.Name()
|
||||
shape := slices.Clone(t.Shape())
|
||||
|
||||
switch {
|
||||
case strings.HasSuffix(name, ".ssm_a"):
|
||||
shape = normalizeVectorShapeToColumn(shape)
|
||||
t.SetRepacker(func(_ string, data []float32, _ []uint64) ([]float32, error) {
|
||||
out := make([]float32, len(data))
|
||||
for i, v := range data {
|
||||
out[i] = -float32(math.Exp(float64(v)))
|
||||
}
|
||||
return out, nil
|
||||
})
|
||||
case strings.HasSuffix(name, ".ssm_d"):
|
||||
shape = normalizeVectorShapeToColumn(shape)
|
||||
case strings.HasSuffix(name, ".ssm_norm.weight"):
|
||||
switch len(shape) {
|
||||
case 1:
|
||||
if nGroups > 0 && shape[0]%nGroups == 0 {
|
||||
shape = []uint64{nGroups, shape[0] / nGroups}
|
||||
}
|
||||
case 2:
|
||||
if shape[0] == 1 && nGroups > 0 && shape[1]%nGroups == 0 {
|
||||
shape = []uint64{nGroups, shape[1] / nGroups}
|
||||
}
|
||||
}
|
||||
case strings.HasSuffix(name, ".ssm_conv1d.weight"):
|
||||
if len(shape) == 3 {
|
||||
if shape[0] == 1 {
|
||||
shape = []uint64{shape[1], shape[2]}
|
||||
} else if shape[1] == 1 {
|
||||
shape = []uint64{shape[0], shape[2]}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: name,
|
||||
Kind: t.Kind(),
|
||||
Shape: shape,
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (n *nemotronHModel) Replacements() []string {
|
||||
return []string{
|
||||
// Embedding and output
|
||||
"lm_head", "output",
|
||||
"backbone.embeddings", "token_embd",
|
||||
"backbone.norm_f", "output_norm",
|
||||
"backbone.layers", "blk",
|
||||
|
||||
// Recurrent (Mamba2) tensors
|
||||
"mixer.in_proj", "ssm_in",
|
||||
"mixer.out_proj", "ssm_out",
|
||||
"mixer.dt_bias", "ssm_dt.bias",
|
||||
"mixer.A_log", "ssm_a",
|
||||
"mixer.D", "ssm_d",
|
||||
"mixer.conv1d", "ssm_conv1d",
|
||||
"mixer.norm.weight", "ssm_norm.weight",
|
||||
|
||||
// Attention tensors
|
||||
"mixer.q_proj", "attn_q",
|
||||
"mixer.k_proj", "attn_k",
|
||||
"mixer.v_proj", "attn_v",
|
||||
"mixer.o_proj", "attn_output",
|
||||
|
||||
// FFN / MoE tensors
|
||||
"mixer.gate.e_score_correction_bias", "exp_probs_b.bias",
|
||||
"mixer.gate", "ffn_gate_inp",
|
||||
"mixer.fc1_latent_proj", "ffn_latent_in",
|
||||
"mixer.fc2_latent_proj", "ffn_latent_out",
|
||||
"mixer.shared_experts.up_proj", "ffn_up_shexp",
|
||||
"mixer.shared_experts.down_proj", "ffn_down_shexp",
|
||||
"mixer.up_proj", "ffn_up",
|
||||
"mixer.down_proj", "ffn_down",
|
||||
|
||||
// Per-layer pre-norm
|
||||
".norm.weight", ".attn_norm.weight",
|
||||
}
|
||||
}
|
||||
540
convert/convert_nemotron_h_test.go
Normal file
540
convert/convert_nemotron_h_test.go
Normal file
@@ -0,0 +1,540 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestHybridPatternUnmarshal(t *testing.T) {
|
||||
t.Run("string", func(t *testing.T) {
|
||||
var p hybridPattern
|
||||
if err := json.Unmarshal([]byte(`"MEM*"`), &p); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got, want := string(p), "MEM*"; got != want {
|
||||
t.Fatalf("unexpected pattern: got %q want %q", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("array", func(t *testing.T) {
|
||||
var p hybridPattern
|
||||
if err := json.Unmarshal([]byte(`["M","E","M","*"]`), &p); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got, want := string(p), "MEM*"; got != want {
|
||||
t.Fatalf("unexpected pattern: got %q want %q", got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestNemotronHLayerArrays(t *testing.T) {
|
||||
m := &nemotronHModel{
|
||||
NumHiddenLayers: 5,
|
||||
NumAttentionHeads: 32,
|
||||
NumKeyValueHeads: 8,
|
||||
HybridOverridePattern: "MEM*E",
|
||||
NRoutedExperts: 128,
|
||||
NumExpertsPerTok: 6,
|
||||
MoEIntermediateSize: 1856,
|
||||
}
|
||||
|
||||
headsKV, ffn, err := m.layerArrays()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if got, want := headsKV, []uint32{0, 0, 0, 8, 0}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected head_count_kv: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := ffn, []uint32{0, 1856, 0, 0, 1856}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected feed_forward_length: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNemotronHKV(t *testing.T) {
|
||||
m := &nemotronHModel{
|
||||
MaxPositionEmbeddings: 1048576,
|
||||
HiddenSize: 2688,
|
||||
NumHiddenLayers: 5,
|
||||
NumAttentionHeads: 32,
|
||||
NumKeyValueHeads: 2,
|
||||
HeadDim: 128,
|
||||
LayerNormEpsilon: 1e-5,
|
||||
RopeTheta: 10000,
|
||||
PartialRotaryFactor: 0.5,
|
||||
ConvKernel: 4,
|
||||
SSMStateSize: 128,
|
||||
MambaNumHeads: 64,
|
||||
MambaHeadDim: 64,
|
||||
NGroups: 8,
|
||||
HybridOverridePattern: "MEM*E",
|
||||
NRoutedExperts: 128,
|
||||
NSharedExperts: 1,
|
||||
NumExpertsPerTok: 6,
|
||||
MoEIntermediateSize: 1856,
|
||||
MoESharedExpertIntermediate: 3712,
|
||||
NormTopKProb: true,
|
||||
RoutedScalingFactor: 2.5,
|
||||
}
|
||||
if err := m.parseMore(nil); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}})
|
||||
if got, want := kv["general.architecture"], "nemotron_h_moe"; got != want {
|
||||
t.Fatalf("unexpected architecture: got %v want %v", got, want)
|
||||
}
|
||||
|
||||
headCountKV, ok := kv["attention.head_count_kv"].([]uint32)
|
||||
if !ok {
|
||||
t.Fatalf("attention.head_count_kv has unexpected type: %T", kv["attention.head_count_kv"])
|
||||
}
|
||||
if got, want := headCountKV, []uint32{0, 0, 0, 2, 0}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected attention.head_count_kv: got %v want %v", got, want)
|
||||
}
|
||||
|
||||
ffnLength, ok := kv["feed_forward_length"].([]uint32)
|
||||
if !ok {
|
||||
t.Fatalf("feed_forward_length has unexpected type: %T", kv["feed_forward_length"])
|
||||
}
|
||||
if got, want := ffnLength, []uint32{0, 1856, 0, 0, 1856}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected feed_forward_length: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNemotronHTensorsTransforms(t *testing.T) {
|
||||
m := &nemotronHModel{NGroups: 8}
|
||||
in := []Tensor{
|
||||
&fakeTensor{
|
||||
name: "blk.0.ssm_a",
|
||||
shape: []uint64{4},
|
||||
data: []float32{0, 1, 2, 3},
|
||||
},
|
||||
&fakeTensor{
|
||||
name: "blk.0.ssm_d",
|
||||
shape: []uint64{4},
|
||||
data: []float32{0, 1, 2, 3},
|
||||
},
|
||||
&fakeTensor{
|
||||
name: "blk.0.ssm_norm.weight",
|
||||
shape: []uint64{16},
|
||||
data: make([]float32, 16),
|
||||
},
|
||||
&fakeTensor{
|
||||
name: "blk.0.ssm_conv1d.weight",
|
||||
shape: []uint64{10, 1, 4},
|
||||
data: make([]float32, 40),
|
||||
},
|
||||
}
|
||||
|
||||
out := m.Tensors(in)
|
||||
if len(out) != len(in) {
|
||||
t.Fatalf("unexpected output tensor count: got %d want %d", len(out), len(in))
|
||||
}
|
||||
|
||||
got := map[string]struct {
|
||||
shape []uint64
|
||||
writer io.WriterTo
|
||||
}{}
|
||||
for _, t := range out {
|
||||
got[t.Name] = struct {
|
||||
shape []uint64
|
||||
writer io.WriterTo
|
||||
}{shape: t.Shape, writer: t.WriterTo}
|
||||
}
|
||||
|
||||
if shape := got["blk.0.ssm_a"].shape; !slices.Equal(shape, []uint64{4, 1}) {
|
||||
t.Fatalf("unexpected ssm_a shape: %v", shape)
|
||||
}
|
||||
if shape := got["blk.0.ssm_d"].shape; !slices.Equal(shape, []uint64{4, 1}) {
|
||||
t.Fatalf("unexpected ssm_d shape: %v", shape)
|
||||
}
|
||||
if shape := got["blk.0.ssm_norm.weight"].shape; !slices.Equal(shape, []uint64{8, 2}) {
|
||||
t.Fatalf("unexpected ssm_norm shape: %v", shape)
|
||||
}
|
||||
if shape := got["blk.0.ssm_conv1d.weight"].shape; !slices.Equal(shape, []uint64{10, 4}) {
|
||||
t.Fatalf("unexpected ssm_conv1d shape: %v", shape)
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
if _, err := got["blk.0.ssm_a"].writer.WriteTo(&b); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
values := make([]float32, 4)
|
||||
if err := binary.Read(&b, binary.LittleEndian, &values); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// 0 -> -exp(0) == -1
|
||||
if values[0] != -1 {
|
||||
t.Fatalf("unexpected transformed ssm_a[0]: got %v want -1", values[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestNemotronHLoadModelMetadata(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
config := `{
|
||||
"architectures": ["NemotronHForCausalLM"],
|
||||
"model_type": "nemotron_h",
|
||||
"num_hidden_layers": 4,
|
||||
"hidden_size": 512,
|
||||
"max_position_embeddings": 32768,
|
||||
"num_attention_heads": 8,
|
||||
"num_key_value_heads": 2,
|
||||
"head_dim": 64,
|
||||
"layer_norm_epsilon": 1e-5,
|
||||
"conv_kernel": 4,
|
||||
"ssm_state_size": 128,
|
||||
"mamba_num_heads": 16,
|
||||
"mamba_head_dim": 32,
|
||||
"n_groups": 8,
|
||||
"hybrid_override_pattern": "ME*M",
|
||||
"n_routed_experts": 16,
|
||||
"num_experts_per_tok": 4,
|
||||
"moe_intermediate_size": 256
|
||||
}`
|
||||
|
||||
if err := os.WriteFile(filepath.Join(tempDir, "config.json"), []byte(config), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(tempDir, "tokenizer.json"), []byte(`{}`), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kv, _, err := LoadModelMetadata(os.DirFS(tempDir))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, ok := kv.(*nemotronHModel); !ok {
|
||||
t.Fatalf("unexpected converter type: %T", kv)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNemotronHNanoVLLoadModelMetadata(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
config := `{
|
||||
"architectures": ["NemotronH_Nano_VL_V2"],
|
||||
"model_type": "NemotronH_Nano_VL_V2",
|
||||
"max_sequence_length": 131072,
|
||||
"force_image_size": 512,
|
||||
"downsample_ratio": 0.5,
|
||||
"patch_size": 16,
|
||||
"use_thumbnail": true,
|
||||
"img_context_token_id": 18,
|
||||
"img_context_token": "<image>",
|
||||
"img_start_token": "<img>",
|
||||
"img_end_token": "</img>",
|
||||
"sound_context_token_id": 27,
|
||||
"sound_context_token": "<so_embedding>",
|
||||
"vit_hidden_size": 1280,
|
||||
"projector_hidden_size": 20480,
|
||||
"norm_mean": [0.48145466, 0.4578275, 0.40821073],
|
||||
"norm_std": [0.26862954, 0.26130258, 0.27577711],
|
||||
"vision_config": {
|
||||
"version": "radio_v2.5-h",
|
||||
"patch_size": 16,
|
||||
"max_resolution": 2048,
|
||||
"separate_video_embedder": true
|
||||
},
|
||||
"sound_config": {
|
||||
"model_type": "parakeet",
|
||||
"hidden_size": 1024,
|
||||
"num_attention_heads": 8,
|
||||
"num_hidden_layers": 24,
|
||||
"intermediate_size": 4096,
|
||||
"conv_kernel_size": 9,
|
||||
"subsampling_conv_channels": 256,
|
||||
"subsampling_conv_kernel_size": 3,
|
||||
"subsampling_conv_stride": 2,
|
||||
"subsampling_factor": 8,
|
||||
"num_mel_bins": 128,
|
||||
"projection_hidden_size": 4096,
|
||||
"sampling_rate": 16000
|
||||
},
|
||||
"llm_config": {
|
||||
"architectures": ["NemotronHForCausalLM"],
|
||||
"model_type": "nemotron_h",
|
||||
"num_hidden_layers": 4,
|
||||
"hidden_size": 512,
|
||||
"max_position_embeddings": 262144,
|
||||
"num_attention_heads": 8,
|
||||
"num_key_value_heads": 2,
|
||||
"head_dim": 64,
|
||||
"layer_norm_epsilon": 1e-5,
|
||||
"conv_kernel": 4,
|
||||
"ssm_state_size": 128,
|
||||
"mamba_num_heads": 16,
|
||||
"mamba_head_dim": 32,
|
||||
"n_groups": 8,
|
||||
"hybrid_override_pattern": "ME*M",
|
||||
"n_routed_experts": 16,
|
||||
"num_experts_per_tok": 4,
|
||||
"moe_intermediate_size": 256
|
||||
}
|
||||
}`
|
||||
|
||||
if err := os.WriteFile(filepath.Join(tempDir, "config.json"), []byte(config), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(tempDir, "preprocessor_config.json"), []byte(`{
|
||||
"image_size": 512,
|
||||
"patch_size": 16,
|
||||
"downsample_ratio": 0.5,
|
||||
"max_num_tiles": 12,
|
||||
"use_thumbnail": true,
|
||||
"norm_mean": [0.48145466, 0.4578275, 0.40821073],
|
||||
"norm_std": [0.26862954, 0.26130258, 0.27577711]
|
||||
}`), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(tempDir, "tokenizer.json"), []byte(`{}`), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
conv, tokenizer, err := LoadModelMetadata(os.DirFS(tempDir))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, ok := conv.(*nemotronHNanoVLModel); !ok {
|
||||
t.Fatalf("unexpected converter type: %T", conv)
|
||||
}
|
||||
|
||||
kv := conv.KV(tokenizer)
|
||||
if got, want := kv["general.architecture"], "nemotron_h_omni"; got != want {
|
||||
t.Fatalf("unexpected architecture: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["context_length"], uint32(131072); got != want {
|
||||
t.Fatalf("unexpected context length: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["vision.block_count"], uint32(32); got != want {
|
||||
t.Fatalf("unexpected vision block count: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["vision.image_size"], uint32(512); got != want {
|
||||
t.Fatalf("unexpected vision image size: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["vision.projector.scale_factor"], uint32(2); got != want {
|
||||
t.Fatalf("unexpected projector scale factor: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["audio.block_count"], uint32(24); got != want {
|
||||
t.Fatalf("unexpected audio block count: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["audio.sound_token_id"], uint32(27); got != want {
|
||||
t.Fatalf("unexpected audio token id: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["audio.subsampling_factor"], uint32(8); got != want {
|
||||
t.Fatalf("unexpected audio subsampling factor: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNemotronHNanoOmniReasoningV3LoadModelMetadata(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
config := `{
|
||||
"architectures": ["NemotronH_Nano_Omni_Reasoning_V3"],
|
||||
"model_type": "NemotronH_Nano_Omni_Reasoning_V3",
|
||||
"max_sequence_length": 131072,
|
||||
"force_image_size": 512,
|
||||
"downsample_ratio": 0.5,
|
||||
"patch_size": 16,
|
||||
"img_context_token_id": 18,
|
||||
"img_context_token": "<image>",
|
||||
"img_start_token": "<img>",
|
||||
"img_end_token": "</img>",
|
||||
"sound_context_token_id": 27,
|
||||
"sound_context_token": "<so_embedding>",
|
||||
"vit_hidden_size": 1280,
|
||||
"projector_hidden_size": 4096,
|
||||
"vision_config": {
|
||||
"version": "radio_v2.5-h",
|
||||
"patch_size": 16,
|
||||
"min_num_patches": 1024,
|
||||
"max_num_patches": 13312,
|
||||
"args": {
|
||||
"min_num_patches": 1024,
|
||||
"max_num_patches": 13312
|
||||
}
|
||||
},
|
||||
"sound_config": {
|
||||
"model_type": "parakeet",
|
||||
"hidden_size": 1024,
|
||||
"num_attention_heads": 8,
|
||||
"num_hidden_layers": 24,
|
||||
"intermediate_size": 4096,
|
||||
"conv_kernel_size": 9,
|
||||
"subsampling_conv_channels": 256,
|
||||
"subsampling_conv_kernel_size": 3,
|
||||
"subsampling_conv_stride": 2,
|
||||
"subsampling_factor": 8,
|
||||
"num_mel_bins": 128,
|
||||
"projection_hidden_size": 4096,
|
||||
"sampling_rate": 16000
|
||||
},
|
||||
"llm_config": {
|
||||
"architectures": ["NemotronHForCausalLM"],
|
||||
"model_type": "nemotron_h",
|
||||
"num_hidden_layers": 4,
|
||||
"hidden_size": 512,
|
||||
"max_position_embeddings": 262144,
|
||||
"num_attention_heads": 8,
|
||||
"num_key_value_heads": 2,
|
||||
"head_dim": 64,
|
||||
"layer_norm_epsilon": 1e-5,
|
||||
"conv_kernel": 4,
|
||||
"ssm_state_size": 128,
|
||||
"mamba_num_heads": 16,
|
||||
"mamba_head_dim": 32,
|
||||
"n_groups": 8,
|
||||
"hybrid_override_pattern": "ME*M",
|
||||
"n_routed_experts": 16,
|
||||
"num_experts_per_tok": 4,
|
||||
"moe_intermediate_size": 256
|
||||
}
|
||||
}`
|
||||
|
||||
if err := os.WriteFile(filepath.Join(tempDir, "config.json"), []byte(config), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(tempDir, "tokenizer.json"), []byte(`{}`), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
conv, tokenizer, err := LoadModelMetadata(os.DirFS(tempDir))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, ok := conv.(*nemotronHNanoVLModel); !ok {
|
||||
t.Fatalf("unexpected converter type: %T", conv)
|
||||
}
|
||||
|
||||
kv := conv.KV(tokenizer)
|
||||
if got, want := kv["general.architecture"], "nemotron_h_omni"; got != want {
|
||||
t.Fatalf("unexpected architecture: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["vision.block_count"], uint32(32); got != want {
|
||||
t.Fatalf("unexpected vision block count: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["vision.min_num_patches"], uint32(1024); got != want {
|
||||
t.Fatalf("unexpected vision min patches: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["vision.max_num_patches"], uint32(13312); got != want {
|
||||
t.Fatalf("unexpected vision max patches: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["audio.block_count"], uint32(24); got != want {
|
||||
t.Fatalf("unexpected audio block count: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["audio.sound_token_id"], uint32(27); got != want {
|
||||
t.Fatalf("unexpected audio token id: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNemotronHNanoVLTensorsRetainVisionAndAudio(t *testing.T) {
|
||||
m := &nemotronHNanoVLModel{
|
||||
LLMConfig: nemotronHModel{NGroups: 8},
|
||||
}
|
||||
|
||||
in := []Tensor{
|
||||
&fakeTensor{
|
||||
name: "blk.0.ssm_a",
|
||||
shape: []uint64{4},
|
||||
data: []float32{0, 1, 2, 3},
|
||||
},
|
||||
&fakeTensor{name: "v.blk.0.attn_qkv.weight", shape: []uint64{3840, 1280}},
|
||||
&fakeTensor{name: "v.position_embd", shape: []uint64{1, 16384, 1280}},
|
||||
&fakeTensor{name: "v.cls_embd", shape: []uint64{10, 1280}},
|
||||
&fakeTensor{name: "mm.norm.weight", shape: []uint64{5120}},
|
||||
&fakeTensor{name: "a.feature_extractor.fb", shape: []uint64{1, 128, 257}},
|
||||
&fakeTensor{name: "a.subsampling.dw1.weight", shape: []uint64{256, 1, 3, 3}},
|
||||
&fakeTensor{name: "a.blk.0.conv_dw.weight", shape: []uint64{1024, 1, 9}},
|
||||
&fakeTensor{name: "a.blk.0.conv_pw1.weight", shape: []uint64{2048, 1024, 1}},
|
||||
&fakeTensor{name: "a.blk.0.conv_bn.num_batches_tracked", shape: []uint64{1}},
|
||||
&fakeTensor{name: "mm.a.1.weight", shape: []uint64{4096, 1024}},
|
||||
}
|
||||
|
||||
out := m.Tensors(in)
|
||||
got := map[string][]uint64{}
|
||||
for _, tns := range out {
|
||||
got[tns.Name] = tns.Shape
|
||||
}
|
||||
|
||||
for _, name := range []string{
|
||||
"blk.0.ssm_a",
|
||||
"v.blk.0.attn_q.weight",
|
||||
"v.blk.0.attn_k.weight",
|
||||
"v.blk.0.attn_v.weight",
|
||||
"v.position_embd",
|
||||
"v.cls_embd",
|
||||
"mm.norm.weight",
|
||||
"a.feature_extractor.fb",
|
||||
"a.subsampling.dw1.weight",
|
||||
"a.blk.0.conv_dw.weight",
|
||||
"a.blk.0.conv_pw1.weight",
|
||||
"mm.a.1.weight",
|
||||
} {
|
||||
if _, ok := got[name]; !ok {
|
||||
t.Fatalf("expected tensor %q in output", name)
|
||||
}
|
||||
}
|
||||
|
||||
if gotShape, want := got["blk.0.ssm_a"], []uint64{4, 1}; !slices.Equal(gotShape, want) {
|
||||
t.Fatalf("unexpected ssm_a shape: got %v want %v", gotShape, want)
|
||||
}
|
||||
if gotShape, want := got["v.position_embd"], []uint64{16384, 1280}; !slices.Equal(gotShape, want) {
|
||||
t.Fatalf("unexpected position embedding shape: got %v want %v", gotShape, want)
|
||||
}
|
||||
if gotShape, want := got["a.blk.0.conv_dw.weight"], []uint64{1024, 9}; !slices.Equal(gotShape, want) {
|
||||
t.Fatalf("unexpected audio conv_dw shape: got %v want %v", gotShape, want)
|
||||
}
|
||||
if gotShape, want := got["a.blk.0.conv_pw1.weight"], []uint64{2048, 1024}; !slices.Equal(gotShape, want) {
|
||||
t.Fatalf("unexpected audio conv_pw1 shape: got %v want %v", gotShape, want)
|
||||
}
|
||||
if _, ok := got["a.blk.0.conv_bn.num_batches_tracked"]; ok {
|
||||
t.Fatal("audio batchnorm num_batches_tracked should be omitted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNemotronHNanoVLReplacements(t *testing.T) {
|
||||
m := &nemotronHNanoVLModel{}
|
||||
r := strings.NewReplacer(m.Replacements()...)
|
||||
|
||||
if got, want := r.Replace("language_model.backbone.layers.1.mixer.fc1_latent_proj.weight"), "blk.1.ffn_latent_in.weight"; got != want {
|
||||
t.Fatalf("unexpected fc1 replacement: got %q want %q", got, want)
|
||||
}
|
||||
if got, want := r.Replace("language_model.lm_head.weight"), "output.weight"; got != want {
|
||||
t.Fatalf("unexpected lm_head replacement: got %q want %q", got, want)
|
||||
}
|
||||
if got, want := r.Replace("vision_model.radio_model.model.blocks.0.attn.qkv.weight"), "v.blk.0.attn_qkv.weight"; got != want {
|
||||
t.Fatalf("unexpected vision replacement: got %q want %q", got, want)
|
||||
}
|
||||
if got, want := r.Replace("mlp1.1.weight"), "mm.1.weight"; got != want {
|
||||
t.Fatalf("unexpected projector replacement: got %q want %q", got, want)
|
||||
}
|
||||
if got, want := r.Replace("sound_encoder.encoder.layers.0.self_attn.q_proj.weight"), "a.blk.0.attn_q.weight"; got != want {
|
||||
t.Fatalf("unexpected audio q_proj replacement: got %q want %q", got, want)
|
||||
}
|
||||
if got, want := r.Replace("sound_encoder.encoder.layers.0.conv.pointwise_conv1.weight"), "a.blk.0.conv_pw1.weight"; got != want {
|
||||
t.Fatalf("unexpected audio conv replacement: got %q want %q", got, want)
|
||||
}
|
||||
if got, want := r.Replace("sound_projection.linear2.weight"), "mm.a.2.weight"; got != want {
|
||||
t.Fatalf("unexpected audio projector replacement: got %q want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNemotronHReplacementsLatentProjections(t *testing.T) {
|
||||
m := &nemotronHModel{}
|
||||
r := strings.NewReplacer(m.Replacements()...)
|
||||
|
||||
if got, want := r.Replace("backbone.layers.1.mixer.fc1_latent_proj.weight"), "blk.1.ffn_latent_in.weight"; got != want {
|
||||
t.Fatalf("unexpected fc1 replacement: got %q want %q", got, want)
|
||||
}
|
||||
if got, want := r.Replace("backbone.layers.1.mixer.fc2_latent_proj.weight"), "blk.1.ffn_latent_out.weight"; got != want {
|
||||
t.Fatalf("unexpected fc2 replacement: got %q want %q", got, want)
|
||||
}
|
||||
}
|
||||
213
convert/convert_nomicbert.go
Normal file
213
convert/convert_nomicbert.go
Normal file
@@ -0,0 +1,213 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/json"
|
||||
"io/fs"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type nomicbertModel struct {
|
||||
ModelParameters
|
||||
NLayers uint32 `json:"n_layers"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
LayerNormEPS float32 `json:"layer_norm_eps"`
|
||||
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
|
||||
RopeFreqBase float32 `json:"rope_theta"`
|
||||
normalizeEmbeddings bool
|
||||
PoolingType uint32
|
||||
|
||||
// MoE parameters (only present in v2 models)
|
||||
NumExperts uint32 `json:"num_local_experts"`
|
||||
NumExpertsUsed uint32 `json:"num_experts_per_tok"`
|
||||
MoEEveryNLayers uint32 `json:"moe_every_n_layers"`
|
||||
}
|
||||
|
||||
var (
|
||||
_ ModelConverter = (*nomicbertModel)(nil)
|
||||
_ moreParser = (*nomicbertModel)(nil)
|
||||
)
|
||||
|
||||
func (p *nomicbertModel) parseMore(fsys fs.FS) error {
|
||||
bts, err := fs.ReadFile(fsys, "modules.json")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var modules []struct {
|
||||
Type string `json:"type"`
|
||||
Path string `json:"path"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, &modules); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var pooling string
|
||||
for _, m := range modules {
|
||||
switch m.Type {
|
||||
case "sentence_transformers.models.Pooling":
|
||||
pooling = m.Path
|
||||
case "sentence_transformers.models.Normalize":
|
||||
p.normalizeEmbeddings = true
|
||||
}
|
||||
}
|
||||
|
||||
if pooling != "" {
|
||||
bts, err := fs.ReadFile(fsys, filepath.Join(pooling, "config.json"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var pc struct {
|
||||
PoolingModeCLSToken bool `json:"pooling_mode_cls_token"`
|
||||
PoolingModeMeanTokens bool `json:"pooling_mode_mean_tokens"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, &pc); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if pc.PoolingModeMeanTokens {
|
||||
p.PoolingType = 1
|
||||
} else if pc.PoolingModeCLSToken {
|
||||
p.PoolingType = 2
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *nomicbertModel) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
|
||||
// Determine architecture based on MoE parameters (following qwen3 pattern)
|
||||
arch := "nomic-bert"
|
||||
if p.MoEEveryNLayers > 0 {
|
||||
arch += "-moe"
|
||||
}
|
||||
|
||||
kv["general.architecture"] = arch
|
||||
kv["attention.causal"] = false
|
||||
kv["pooling_type"] = p.PoolingType
|
||||
kv["normalize_embeddings"] = p.normalizeEmbeddings
|
||||
|
||||
kv["block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers)
|
||||
|
||||
if contextLength := p.MaxPositionEmbeddings; contextLength > 0 {
|
||||
kv["context_length"] = contextLength
|
||||
}
|
||||
|
||||
if embeddingLength := p.HiddenSize; embeddingLength > 0 {
|
||||
kv["embedding_length"] = p.HiddenSize
|
||||
}
|
||||
|
||||
if feedForwardLength := p.IntermediateSize; feedForwardLength > 0 {
|
||||
kv["feed_forward_length"] = p.IntermediateSize
|
||||
}
|
||||
|
||||
if headCount := p.NumAttentionHeads; headCount > 0 {
|
||||
kv["attention.head_count"] = p.NumAttentionHeads
|
||||
}
|
||||
|
||||
if kvHeadCount := p.NumKeyValueHeads; kvHeadCount > 0 {
|
||||
kv["attention.head_count_kv"] = p.NumKeyValueHeads
|
||||
}
|
||||
|
||||
if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon); layerNormEpsilon > 0 {
|
||||
kv["attention.layer_norm_epsilon"] = layerNormEpsilon
|
||||
}
|
||||
|
||||
if p.RopeFreqBase > 0 {
|
||||
kv["rope.freq_base"] = p.RopeFreqBase
|
||||
}
|
||||
|
||||
// MoE specific parameters (only if MoE is enabled)
|
||||
if p.NumExperts > 0 {
|
||||
kv["expert_count"] = p.NumExperts
|
||||
}
|
||||
|
||||
if p.NumExpertsUsed > 0 {
|
||||
kv["expert_used_count"] = p.NumExpertsUsed
|
||||
}
|
||||
|
||||
if p.MoEEveryNLayers > 0 {
|
||||
kv["moe_every_n_layers"] = p.MoEEveryNLayers
|
||||
}
|
||||
|
||||
kv["tokenizer.ggml.model"] = "bert"
|
||||
kv["tokenizer.ggml.token_type_count"] = uint32(2)
|
||||
|
||||
// convert to phantom space tokens
|
||||
for i, e := range t.Tokens {
|
||||
switch {
|
||||
case strings.HasPrefix(e, "[") && strings.HasSuffix(e, "]"):
|
||||
// noop - keep special tokens as-is
|
||||
case strings.HasPrefix(e, "##"):
|
||||
t.Tokens[i] = e[2:]
|
||||
default:
|
||||
t.Tokens[i] = "\u2581" + e
|
||||
}
|
||||
}
|
||||
|
||||
kv["tokenizer.ggml.tokens"] = t.Tokens
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *nomicbertModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
out := make([]*ggml.Tensor, 0, len(ts))
|
||||
for _, t := range ts {
|
||||
if slices.Contains([]string{
|
||||
"embeddings.position_ids",
|
||||
"pooler.dense.weight",
|
||||
"pooler.dense.bias",
|
||||
}, t.Name()) {
|
||||
continue
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (nomicbertModel) Replacements() []string {
|
||||
return []string{
|
||||
"encoder.layer", "blk",
|
||||
"encoder.layers", "blk",
|
||||
"embeddings.word_embeddings", "token_embd",
|
||||
"embeddings.token_type_embeddings", "token_types",
|
||||
"embeddings.LayerNorm", "token_embd_norm",
|
||||
|
||||
"attention.self.qkv", "attn_qkv",
|
||||
|
||||
"attention.output.dense", "attn_output",
|
||||
"attention.output.LayerNorm", "attn_output_norm",
|
||||
|
||||
"mlp.up", "ffn_up",
|
||||
"mlp.down", "ffn_down",
|
||||
|
||||
"mlp.router", "ffn_gate_inp",
|
||||
"mlp.experts.up", "ffn_up_exps",
|
||||
"mlp.experts.down", "ffn_down_exps",
|
||||
|
||||
"intermediate.dense", "ffn_up",
|
||||
"output.dense", "ffn_down",
|
||||
"output.LayerNorm", "layer_output_norm",
|
||||
}
|
||||
}
|
||||
117
convert/convert_olmo.go
Normal file
117
convert/convert_olmo.go
Normal file
@@ -0,0 +1,117 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type ropeScaling struct {
|
||||
Factor float32 `json:"factor"`
|
||||
OriginalMaxPositionEmbeds uint32 `json:"original_max_position_embeddings"`
|
||||
AttentionFactor float32 `json:"attention_factor"`
|
||||
BetaFast float32 `json:"beta_fast"`
|
||||
BetaSlow float32 `json:"beta_slow"`
|
||||
RopeType string `json:"rope_type"`
|
||||
ExtrapolationFactor float32 `json:"extrapolation_factor"`
|
||||
}
|
||||
|
||||
type olmoModel struct {
|
||||
ModelParameters
|
||||
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RopeScaling *ropeScaling `json:"rope_scaling"`
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
LayerTypes []string `json:"layer_types"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*olmoModel)(nil)
|
||||
|
||||
func (p *olmoModel) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "olmo3"
|
||||
kv["olmo3.block_count"] = p.NumHiddenLayers
|
||||
kv["olmo3.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["olmo3.embedding_length"] = p.HiddenSize
|
||||
kv["olmo3.feed_forward_length"] = p.IntermediateSize
|
||||
kv["olmo3.attention.head_count"] = p.NumAttentionHeads
|
||||
kv["olmo3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||
|
||||
if p.RopeTheta > 0 {
|
||||
kv["olmo3.rope.freq_base"] = p.RopeTheta
|
||||
}
|
||||
|
||||
if p.RopeScaling != nil {
|
||||
if p.RopeScaling.Factor > 0 {
|
||||
kv["olmo3.rope.scaling.factor"] = p.RopeScaling.Factor
|
||||
}
|
||||
if p.RopeScaling.OriginalMaxPositionEmbeds > 0 {
|
||||
kv["olmo3.rope.scaling.original_context_length"] = p.RopeScaling.OriginalMaxPositionEmbeds
|
||||
}
|
||||
if p.RopeScaling.AttentionFactor > 0 {
|
||||
kv["olmo3.rope.scaling.attn_factor"] = p.RopeScaling.AttentionFactor
|
||||
}
|
||||
if p.RopeScaling.RopeType != "" {
|
||||
kv["olmo3.rope.scaling.type"] = p.RopeScaling.RopeType
|
||||
}
|
||||
}
|
||||
|
||||
if p.RMSNormEPS > 0 {
|
||||
kv["olmo3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
}
|
||||
|
||||
if p.SlidingWindow > 0 {
|
||||
kv["olmo3.attention.sliding_window"] = p.SlidingWindow
|
||||
}
|
||||
|
||||
if len(p.LayerTypes) > 0 {
|
||||
slidingPattern := make([]bool, len(p.LayerTypes))
|
||||
for i, layerType := range p.LayerTypes {
|
||||
slidingPattern[i] = (layerType == "sliding_attention")
|
||||
}
|
||||
kv["olmo3.attention.sliding_window_pattern"] = slidingPattern
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *olmoModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
out := make([]*ggml.Tensor, 0, len(ts))
|
||||
for _, t := range ts {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *olmoModel) Replacements() []string {
|
||||
return []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.layers", "blk",
|
||||
"model.norm", "output_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"self_attn.q_norm", "attn_q_norm",
|
||||
"self_attn.k_norm", "attn_k_norm",
|
||||
"post_attention_layernorm", "post_attention_norm",
|
||||
"post_feedforward_layernorm", "post_ffw_norm",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
}
|
||||
}
|
||||
122
convert/convert_phi3.go
Normal file
122
convert/convert_phi3.go
Normal file
@@ -0,0 +1,122 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"math"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type phi3Model struct {
|
||||
ModelParameters
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NLayers uint32 `json:"n_layers"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NEmbd uint32 `json:"n_embd"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NHead uint32 `json:"n_head"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
NHeadKV uint32 `json:"n_head_kv"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RopeScaling struct {
|
||||
Type string `json:"type"`
|
||||
LongFactor ropeFactor `json:"long_factor"`
|
||||
ShortFactor ropeFactor `json:"short_factor"`
|
||||
} `json:"rope_scaling"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
NPositions uint32 `json:"n_positions"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*phi3Model)(nil)
|
||||
|
||||
func (p *phi3Model) KV(t *Tokenizer) KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "phi3"
|
||||
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
||||
kv["phi3.feed_forward_length"] = p.IntermediateSize
|
||||
kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
|
||||
kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
|
||||
kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
|
||||
kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
|
||||
kv["phi3.rope.freq_base"] = p.RopeTheta
|
||||
kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
|
||||
kv["phi3.attention.sliding_window"] = p.SlidingWindow
|
||||
|
||||
scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
|
||||
|
||||
switch p.RopeScaling.Type {
|
||||
case "":
|
||||
// no scaling
|
||||
case "su", "longrope":
|
||||
kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
|
||||
case "yarn":
|
||||
kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
|
||||
default:
|
||||
panic("unknown rope scaling type")
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *phi3Model) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var addRopeFactors sync.Once
|
||||
|
||||
out := make([]*ggml.Tensor, 0, len(ts)+2)
|
||||
for _, t := range ts {
|
||||
if strings.HasPrefix(t.Name(), "blk.0.") {
|
||||
addRopeFactors.Do(func() {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: "rope_factors_long.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
|
||||
WriterTo: p.RopeScaling.LongFactor,
|
||||
}, &ggml.Tensor{
|
||||
Name: "rope_factors_short.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
|
||||
WriterTo: p.RopeScaling.ShortFactor,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *phi3Model) Replacements() []string {
|
||||
return []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.qkv_proj", "attn_qkv",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate_up_proj", "ffn_up",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
}
|
||||
}
|
||||
|
||||
type ropeFactor []float32
|
||||
|
||||
func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
|
||||
return 0, binary.Write(w, binary.LittleEndian, r)
|
||||
}
|
||||
81
convert/convert_qwen2.go
Normal file
81
convert/convert_qwen2.go
Normal file
@@ -0,0 +1,81 @@
|
||||
package convert
|
||||
|
||||
import "github.com/ollama/ollama/fs/ggml"
|
||||
|
||||
type qwen2Model struct {
|
||||
ModelParameters
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RopeScaling struct {
|
||||
Type string `json:"type"`
|
||||
Factor ropeFactor `json:"factor"`
|
||||
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||
MropeSection []int32 `json:"mrope_section"`
|
||||
} `json:"rope_scaling"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*qwen2Model)(nil)
|
||||
|
||||
func (q *qwen2Model) KV(t *Tokenizer) KV {
|
||||
kv := q.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "qwen2"
|
||||
kv["qwen2.block_count"] = q.HiddenLayers
|
||||
kv["qwen2.context_length"] = q.MaxPositionEmbeddings
|
||||
kv["qwen2.embedding_length"] = q.HiddenSize
|
||||
kv["qwen2.feed_forward_length"] = q.IntermediateSize
|
||||
kv["qwen2.attention.head_count"] = q.NumAttentionHeads
|
||||
kv["qwen2.attention.head_count_kv"] = q.NumKeyValueHeads
|
||||
kv["qwen2.rope.freq_base"] = q.RopeTheta
|
||||
kv["qwen2.attention.layer_norm_rms_epsilon"] = q.RMSNormEPS
|
||||
|
||||
switch q.RopeScaling.Type {
|
||||
case "":
|
||||
// no scaling
|
||||
case "yarn":
|
||||
kv["qwen2.rope.scaling.type"] = q.RopeScaling.Type
|
||||
kv["qwen2.rope.scaling.factor"] = q.RopeScaling.Factor
|
||||
case "mrope", "default":
|
||||
kv["qwen2.rope.mrope_section"] = q.RopeScaling.MropeSection
|
||||
default:
|
||||
panic("unknown rope scaling type")
|
||||
}
|
||||
return kv
|
||||
}
|
||||
|
||||
func (q *qwen2Model) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
for _, t := range ts {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *qwen2Model) Replacements() []string {
|
||||
return []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"model.norm", "output_norm",
|
||||
}
|
||||
}
|
||||
102
convert/convert_qwen25vl.go
Normal file
102
convert/convert_qwen25vl.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type qwen25VLModel struct {
|
||||
qwen2Model
|
||||
|
||||
VisionModel struct {
|
||||
Depth uint32 `json:"depth"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NumHeads uint32 `json:"num_heads"`
|
||||
InChannels uint32 `json:"in_chans"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
SpatialMergeSize uint32 `json:"spatial_merge_size"`
|
||||
SpatialPatchSize uint32 `json:"spatial_patch_size"`
|
||||
WindowSize uint32 `json:"window_size"`
|
||||
RMSNormEps float32 `json:"layer_norm_epsilon"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
FullAttentionBlocks []int32 `json:"fullatt_block_indexes"`
|
||||
TemporalPatchSize uint32 `json:"temporal_patch_size"`
|
||||
} `json:"vision_config"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*qwen25VLModel)(nil)
|
||||
|
||||
func (q *qwen25VLModel) KV(t *Tokenizer) KV {
|
||||
kv := q.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "qwen25vl"
|
||||
|
||||
for k, v := range q.qwen2Model.KV(t) {
|
||||
if strings.HasPrefix(k, "qwen2.") {
|
||||
kv[strings.Replace(k, "qwen2.", "qwen25vl.", 1)] = v
|
||||
}
|
||||
}
|
||||
|
||||
if q.VisionModel.FullAttentionBlocks == nil {
|
||||
kv["qwen25vl.vision.fullatt_block_indexes"] = []int32{7, 15, 23, 31}
|
||||
}
|
||||
|
||||
kv["qwen25vl.vision.block_count"] = cmp.Or(q.VisionModel.Depth, 32)
|
||||
kv["qwen25vl.vision.embedding_length"] = q.VisionModel.HiddenSize
|
||||
kv["qwen25vl.vision.attention.head_count"] = cmp.Or(q.VisionModel.NumHeads, 16)
|
||||
kv["qwen25vl.vision.num_channels"] = q.VisionModel.InChannels
|
||||
kv["qwen25vl.vision.patch_size"] = cmp.Or(q.VisionModel.PatchSize, 14)
|
||||
kv["qwen25vl.vision.spatial_merge_size"] = cmp.Or(q.VisionModel.SpatialMergeSize, 2)
|
||||
kv["qwen25vl.vision.spatial_patch_size"] = q.VisionModel.SpatialPatchSize
|
||||
kv["qwen25vl.vision.window_size"] = cmp.Or(q.VisionModel.WindowSize, 112)
|
||||
kv["qwen25vl.vision.attention.layer_norm_epsilon"] = cmp.Or(q.VisionModel.RMSNormEps, 1e-6)
|
||||
kv["qwen25vl.vision.rope.freq_base"] = cmp.Or(q.VisionModel.RopeTheta, 1e4)
|
||||
kv["qwen25vl.vision.fullatt_block_indexes"] = q.VisionModel.FullAttentionBlocks
|
||||
kv["qwen25vl.vision.temporal_patch_size"] = cmp.Or(q.VisionModel.TemporalPatchSize, 2)
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (q *qwen25VLModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
|
||||
for _, t := range ts {
|
||||
if strings.Contains(t.Name(), "patch_embed.proj") {
|
||||
for t := range splitDim(t, 2,
|
||||
split{Replacer: strings.NewReplacer("patch_embed.proj", "patch_embd_0")},
|
||||
split{Replacer: strings.NewReplacer("patch_embed.proj", "patch_embd_1")},
|
||||
) {
|
||||
t.Shape = slices.DeleteFunc(t.Shape, func(i uint64) bool { return i == 1 })
|
||||
out = append(out, t)
|
||||
}
|
||||
} else if strings.Contains(t.Name(), "attn.qkv") {
|
||||
out = append(out, slices.Collect(splitDim(t, 0,
|
||||
split{Replacer: strings.NewReplacer("attn.qkv", "attn_q")},
|
||||
split{Replacer: strings.NewReplacer("attn.qkv", "attn_k")},
|
||||
split{Replacer: strings.NewReplacer("attn.qkv", "attn_v")},
|
||||
))...)
|
||||
} else {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *qwen25VLModel) Replacements() []string {
|
||||
return append(
|
||||
p.qwen2Model.Replacements(),
|
||||
"visual", "v",
|
||||
"blocks", "blk",
|
||||
"attn.proj", "attn_out",
|
||||
"norm1", "ln1",
|
||||
"norm2", "ln2",
|
||||
)
|
||||
}
|
||||
157
convert/convert_qwen3.go
Normal file
157
convert/convert_qwen3.go
Normal file
@@ -0,0 +1,157 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
)
|
||||
|
||||
type qwen3Model struct {
|
||||
ModelParameters
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
NumExperts uint32 `json:"num_experts"`
|
||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||
NormTopkProb bool `json:"norm_topk_prob"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RopeScaling struct {
|
||||
Type string `json:"type"`
|
||||
Factor ropeFactor `json:"factor"`
|
||||
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||
MropeSection []int32 `json:"mrope_section"`
|
||||
} `json:"rope_scaling"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
}
|
||||
|
||||
// KV implements ModelConverter.
|
||||
func (q *qwen3Model) KV(t *Tokenizer) KV {
|
||||
arch := "qwen3"
|
||||
if q.NumExperts > 0 {
|
||||
arch += "moe"
|
||||
}
|
||||
|
||||
kv := q.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = arch
|
||||
kv["block_count"] = q.HiddenLayers
|
||||
kv["context_length"] = q.MaxPositionEmbeddings
|
||||
kv["embedding_length"] = q.HiddenSize
|
||||
kv["feed_forward_length"] = q.IntermediateSize
|
||||
kv["attention.head_count"] = q.NumAttentionHeads
|
||||
kv["attention.head_count_kv"] = q.NumKeyValueHeads
|
||||
kv["attention.key_length"] = q.HeadDim
|
||||
kv["attention.value_length"] = q.HeadDim
|
||||
|
||||
if q.NumExperts > 0 {
|
||||
kv["expert_count"] = q.NumExperts
|
||||
kv["expert_used_count"] = q.NumExpertsPerToken
|
||||
kv["norm_top_k_prob"] = q.NormTopkProb
|
||||
}
|
||||
|
||||
kv["rope.freq_base"] = q.RopeTheta
|
||||
kv["attention.layer_norm_rms_epsilon"] = q.RMSNormEPS
|
||||
|
||||
switch q.RopeScaling.Type {
|
||||
case "":
|
||||
// no scaling
|
||||
case "yarn":
|
||||
kv["rope.scaling.type"] = q.RopeScaling.Type
|
||||
kv["rope.scaling.factor"] = q.RopeScaling.Factor
|
||||
case "mrope", "default":
|
||||
kv["rope.mrope_section"] = q.RopeScaling.MropeSection
|
||||
default:
|
||||
panic("unknown rope scaling type")
|
||||
}
|
||||
return kv
|
||||
}
|
||||
|
||||
// Tensors implements ModelConverter.
|
||||
func (q *qwen3Model) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
|
||||
// TODO: handle split experts
|
||||
|
||||
for _, t := range ts {
|
||||
switch {
|
||||
case strings.Contains(t.Name(), "ffn_gate_up_exps"):
|
||||
afterFunc := func(t tensor.Tensor) (tensor.Tensor, error) { return tensor.Transpose(t, 0, 2, 1) }
|
||||
for t := range splitDim(t, 2,
|
||||
split{Replacer: strings.NewReplacer("gate_up", "gate"), afterFunc: afterFunc},
|
||||
split{Replacer: strings.NewReplacer("gate_up", "up"), afterFunc: afterFunc},
|
||||
) {
|
||||
t.Shape[1], t.Shape[2] = t.Shape[2], t.Shape[1]
|
||||
out = append(out, t)
|
||||
}
|
||||
case strings.Contains(t.Name(), "ffn_down_exps"):
|
||||
shape := slices.Clone(t.Shape())
|
||||
shape[1], shape[2] = shape[2], shape[1]
|
||||
t.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := make([]int, len(shape))
|
||||
for i := range shape {
|
||||
dims[i] = int(shape[i])
|
||||
}
|
||||
|
||||
var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
tt, err := tensor.Transpose(tt, 0, 2, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// flatten tensor so it can be written as a vector
|
||||
if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return native.VectorF32(tt.(*tensor.Dense))
|
||||
})
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: shape,
|
||||
WriterTo: t,
|
||||
})
|
||||
default:
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// Replacements implements ModelConverter.
|
||||
func (q *qwen3Model) Replacements() []string {
|
||||
return []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.k_norm", "attn_k_norm",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.q_norm", "attn_q_norm",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"mlp.gate.weight", "ffn_gate_inp.weight",
|
||||
"mlp.experts.down_proj", "ffn_down_exps.weight",
|
||||
"mlp.experts.gate_up_proj", "ffn_gate_up_exps.weight",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"model.norm", "output_norm",
|
||||
}
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*qwen3Model)(nil)
|
||||
946
convert/convert_qwen3next.go
Normal file
946
convert/convert_qwen3next.go
Normal file
@@ -0,0 +1,946 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"math"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type qwen3NextRopeScaling struct {
|
||||
Type string `json:"type"`
|
||||
Factor ropeFactor `json:"factor"`
|
||||
MropeSection []int32 `json:"mrope_section"`
|
||||
}
|
||||
|
||||
type qwen3NextRopeParams struct {
|
||||
MRopeInterleaved bool `json:"mrope_interleaved"`
|
||||
MropeSection []int32 `json:"mrope_section"`
|
||||
RopeType string `json:"rope_type"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
PartialRotaryFactor float32 `json:"partial_rotary_factor"`
|
||||
}
|
||||
|
||||
type qwen3NextTextConfig struct {
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
|
||||
// MoE config
|
||||
NumExperts uint32 `json:"num_experts"`
|
||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||
NormTopkProb *bool `json:"norm_topk_prob"`
|
||||
MoEIntermediateSize uint32 `json:"moe_intermediate_size"`
|
||||
SharedExpertIntermSize uint32 `json:"shared_expert_intermediate_size"`
|
||||
|
||||
// Hybrid attention config
|
||||
FullAttentionInterval uint32 `json:"full_attention_interval"`
|
||||
LayerTypes []string `json:"layer_types"`
|
||||
|
||||
// Linear attention (Gated Delta Net) config
|
||||
LinearConvKernelDim uint32 `json:"linear_conv_kernel_dim"`
|
||||
LinearKeyHeadDim uint32 `json:"linear_key_head_dim"`
|
||||
LinearNumKeyHeads uint32 `json:"linear_num_key_heads"`
|
||||
LinearNumValueHeads uint32 `json:"linear_num_value_heads"`
|
||||
LinearValueHeadDim uint32 `json:"linear_value_head_dim"`
|
||||
|
||||
// RoPE config
|
||||
PartialRotaryFactor float32 `json:"partial_rotary_factor"`
|
||||
RopeScaling qwen3NextRopeScaling `json:"rope_scaling"`
|
||||
RopeParameters qwen3NextRopeParams `json:"rope_parameters"`
|
||||
}
|
||||
|
||||
type qwen3NextVisionConfig struct {
|
||||
Depth uint32 `json:"depth"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NumHeads uint32 `json:"num_heads"`
|
||||
InChannels uint32 `json:"in_channels"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
SpatialMergeSize uint32 `json:"spatial_merge_size"`
|
||||
RMSNormEps float32 `json:"layer_norm_epsilon"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
TemporalPatchSize uint32 `json:"temporal_patch_size"`
|
||||
DeepstackVisualIndexes []int32 `json:"deepstack_visual_indexes"`
|
||||
|
||||
Size struct {
|
||||
ShortestEdge uint32 `json:"shortest_edge"`
|
||||
LongestEdge uint32 `json:"longest_edge"`
|
||||
} `json:"size"`
|
||||
|
||||
ImageMean []float32 `json:"image_mean"`
|
||||
ImageStd []float32 `json:"image_std"`
|
||||
}
|
||||
|
||||
type qwen3NextModel struct {
|
||||
ModelParameters
|
||||
qwen3NextTextConfig
|
||||
|
||||
TextConfig *qwen3NextTextConfig `json:"text_config"`
|
||||
VisionModel qwen3NextVisionConfig `json:"vision_config"`
|
||||
|
||||
ImageTokenID uint32 `json:"image_token_id"`
|
||||
VisionStartTokenID uint32 `json:"vision_start_token_id"`
|
||||
VisionEndTokenID uint32 `json:"vision_end_token_id"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*qwen3NextModel)(nil)
|
||||
|
||||
func (q *qwen3NextModel) parseMore(fsys fs.FS) error {
|
||||
if q.TextConfig != nil {
|
||||
q.qwen3NextTextConfig = *q.TextConfig
|
||||
}
|
||||
|
||||
if q.RopeTheta == 0 {
|
||||
q.RopeTheta = q.RopeParameters.RopeTheta
|
||||
}
|
||||
if q.PartialRotaryFactor == 0 {
|
||||
q.PartialRotaryFactor = q.RopeParameters.PartialRotaryFactor
|
||||
}
|
||||
|
||||
if q.RopeScaling.Type == "" && q.RopeParameters.RopeType != "" {
|
||||
q.RopeScaling.Type = q.RopeParameters.RopeType
|
||||
}
|
||||
|
||||
// Pull vision preprocessing fields when present.
|
||||
if q.VisionModel.Depth > 0 {
|
||||
if bts, err := fs.ReadFile(fsys, "preprocessor_config.json"); err == nil {
|
||||
var pre struct {
|
||||
Size struct {
|
||||
ShortestEdge uint32 `json:"shortest_edge"`
|
||||
LongestEdge uint32 `json:"longest_edge"`
|
||||
} `json:"size"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
TemporalPatchSize uint32 `json:"temporal_patch_size"`
|
||||
MergeSize uint32 `json:"merge_size"`
|
||||
ImageMean []float32 `json:"image_mean"`
|
||||
ImageStd []float32 `json:"image_std"`
|
||||
}
|
||||
if json.Unmarshal(bts, &pre) == nil {
|
||||
if q.VisionModel.PatchSize == 0 {
|
||||
q.VisionModel.PatchSize = pre.PatchSize
|
||||
}
|
||||
if q.VisionModel.TemporalPatchSize == 0 {
|
||||
q.VisionModel.TemporalPatchSize = pre.TemporalPatchSize
|
||||
}
|
||||
if q.VisionModel.SpatialMergeSize == 0 {
|
||||
q.VisionModel.SpatialMergeSize = pre.MergeSize
|
||||
}
|
||||
if q.VisionModel.Size.ShortestEdge == 0 {
|
||||
q.VisionModel.Size.ShortestEdge = pre.Size.ShortestEdge
|
||||
}
|
||||
if q.VisionModel.Size.LongestEdge == 0 {
|
||||
q.VisionModel.Size.LongestEdge = pre.Size.LongestEdge
|
||||
}
|
||||
if len(q.VisionModel.ImageMean) == 0 {
|
||||
q.VisionModel.ImageMean = pre.ImageMean
|
||||
}
|
||||
if len(q.VisionModel.ImageStd) == 0 {
|
||||
q.VisionModel.ImageStd = pre.ImageStd
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if q.NumHiddenLayers == 0 {
|
||||
return fmt.Errorf("qwen3next: num_hidden_layers must be set")
|
||||
}
|
||||
if q.NumAttentionHeads == 0 {
|
||||
return fmt.Errorf("qwen3next: num_attention_heads must be set")
|
||||
}
|
||||
if q.NumKeyValueHeads == 0 {
|
||||
return fmt.Errorf("qwen3next: num_key_value_heads must be set")
|
||||
}
|
||||
if q.HeadDim == 0 {
|
||||
return fmt.Errorf("qwen3next: head_dim must be set")
|
||||
}
|
||||
if q.RopeTheta == 0 {
|
||||
return fmt.Errorf("qwen3next: rope_theta must be set")
|
||||
}
|
||||
if q.PartialRotaryFactor <= 0 || q.PartialRotaryFactor > 1 {
|
||||
return fmt.Errorf("qwen3next: partial_rotary_factor must be in (0,1], got %v", q.PartialRotaryFactor)
|
||||
}
|
||||
if q.LinearNumKeyHeads == 0 || q.LinearNumValueHeads == 0 || q.LinearKeyHeadDim == 0 || q.LinearValueHeadDim == 0 {
|
||||
return fmt.Errorf("qwen3next: linear attention config must be set (linear_num_key_heads, linear_num_value_heads, linear_key_head_dim, linear_value_head_dim)")
|
||||
}
|
||||
if _, err := q.kvHeadCounts(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) kvHeadCounts() ([]uint32, error) {
|
||||
if len(q.LayerTypes) > 0 {
|
||||
kv := make([]uint32, q.NumHiddenLayers)
|
||||
hasFull := false
|
||||
hasRecurrent := false
|
||||
for i := range q.NumHiddenLayers {
|
||||
layerType := ""
|
||||
if i < uint32(len(q.LayerTypes)) {
|
||||
layerType = q.LayerTypes[i]
|
||||
}
|
||||
if layerType == "full_attention" {
|
||||
kv[i] = q.NumKeyValueHeads
|
||||
hasFull = true
|
||||
} else {
|
||||
hasRecurrent = true
|
||||
}
|
||||
}
|
||||
if !hasFull || !hasRecurrent {
|
||||
return nil, fmt.Errorf("qwen3next: layer_types must include both full_attention and linear_attention")
|
||||
}
|
||||
return kv, nil
|
||||
}
|
||||
|
||||
if q.FullAttentionInterval == 0 {
|
||||
return nil, fmt.Errorf("qwen3next: full_attention_interval must be set")
|
||||
}
|
||||
if q.FullAttentionInterval > q.NumHiddenLayers {
|
||||
return nil, fmt.Errorf("qwen3next: full_attention_interval (%d) exceeds num_hidden_layers (%d)", q.FullAttentionInterval, q.NumHiddenLayers)
|
||||
}
|
||||
|
||||
kv := make([]uint32, q.NumHiddenLayers)
|
||||
hasFull := false
|
||||
for i := range q.NumHiddenLayers {
|
||||
if (i+1)%q.FullAttentionInterval == 0 {
|
||||
kv[i] = q.NumKeyValueHeads
|
||||
hasFull = true
|
||||
}
|
||||
}
|
||||
if !hasFull {
|
||||
return nil, fmt.Errorf("qwen3next: head_count_kv would be all zeros (full_attention_interval=%d, num_hidden_layers=%d)", q.FullAttentionInterval, q.NumHiddenLayers)
|
||||
}
|
||||
return kv, nil
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) ropeSections() []int32 {
|
||||
if len(q.RopeParameters.MropeSection) > 0 {
|
||||
return q.RopeParameters.MropeSection
|
||||
}
|
||||
return q.RopeScaling.MropeSection
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) shouldReorderVHeads() bool {
|
||||
modelType := strings.ToLower(q.ModelType)
|
||||
if strings.Contains(modelType, "qwen3_next") || strings.Contains(modelType, "qwen3next") {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, arch := range q.Architectures {
|
||||
arch = strings.ToLower(arch)
|
||||
if strings.Contains(arch, "qwen3next") || strings.Contains(arch, "qwen3_next") {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Default to qwen3.5 layout for all other qwen3next-family imports.
|
||||
return true
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) KV(t *Tokenizer) KV {
|
||||
kv := q.ModelParameters.KV(t)
|
||||
|
||||
arch := "qwen35"
|
||||
if q.NumExperts > 0 {
|
||||
arch = "qwen35moe"
|
||||
}
|
||||
kv["general.architecture"] = arch
|
||||
kv["tokenizer.ggml.pre"] = "qwen35"
|
||||
kv["block_count"] = q.NumHiddenLayers
|
||||
kv["context_length"] = q.MaxPositionEmbeddings
|
||||
kv["embedding_length"] = q.HiddenSize
|
||||
kv["feed_forward_length"] = q.IntermediateSize
|
||||
kv["attention.head_count"] = q.NumAttentionHeads
|
||||
|
||||
headDim := q.HeadDim
|
||||
if headDim == 0 && q.NumAttentionHeads > 0 {
|
||||
headDim = q.HiddenSize / q.NumAttentionHeads
|
||||
}
|
||||
kv["attention.key_length"] = headDim
|
||||
kv["attention.value_length"] = headDim
|
||||
kv["attention.layer_norm_rms_epsilon"] = q.RMSNormEPS
|
||||
kv["rope.freq_base"] = q.RopeTheta
|
||||
|
||||
partialRotary := q.PartialRotaryFactor
|
||||
if partialRotary > 0 && partialRotary <= 1 {
|
||||
kv["rope.dimension_count"] = uint32(float32(headDim) * partialRotary)
|
||||
}
|
||||
|
||||
if sections := q.ropeSections(); len(sections) > 0 {
|
||||
kv["mrope_sections"] = sections
|
||||
kv["rope.mrope_section"] = sections
|
||||
kv["rope.dimension_sections"] = sections
|
||||
}
|
||||
if q.RopeParameters.MRopeInterleaved {
|
||||
kv["rope.mrope_interleaved"] = true
|
||||
}
|
||||
|
||||
if q.RopeScaling.Type != "" && q.RopeScaling.Type != "default" {
|
||||
kv["rope.scaling.type"] = q.RopeScaling.Type
|
||||
kv["rope.scaling.factor"] = q.RopeScaling.Factor
|
||||
}
|
||||
|
||||
if q.NumExperts > 0 {
|
||||
kv["expert_count"] = q.NumExperts
|
||||
kv["expert_used_count"] = q.NumExpertsPerToken
|
||||
if q.NormTopkProb != nil {
|
||||
kv["norm_top_k_prob"] = *q.NormTopkProb
|
||||
}
|
||||
if q.MoEIntermediateSize > 0 {
|
||||
kv["expert_feed_forward_length"] = q.MoEIntermediateSize
|
||||
}
|
||||
if q.SharedExpertIntermSize > 0 {
|
||||
kv["expert_shared_feed_forward_length"] = q.SharedExpertIntermSize
|
||||
}
|
||||
}
|
||||
|
||||
dInner := q.LinearValueHeadDim * q.LinearNumValueHeads
|
||||
kv["ssm.inner_size"] = dInner
|
||||
kv["ssm.state_size"] = q.LinearKeyHeadDim
|
||||
kv["ssm.group_count"] = q.LinearNumKeyHeads
|
||||
kv["ssm.time_step_rank"] = q.LinearNumValueHeads
|
||||
kv["ssm.conv_kernel"] = q.LinearConvKernelDim
|
||||
if q.shouldReorderVHeads() {
|
||||
kv["ssm.v_head_reordered"] = true
|
||||
}
|
||||
if q.FullAttentionInterval > 0 {
|
||||
kv["full_attention_interval"] = q.FullAttentionInterval
|
||||
}
|
||||
|
||||
if headCounts, err := q.kvHeadCounts(); err == nil {
|
||||
kv["attention.head_count_kv"] = headCounts
|
||||
}
|
||||
|
||||
if q.VisionModel.Depth > 0 {
|
||||
kv["vision.block_count"] = q.VisionModel.Depth
|
||||
kv["vision.embedding_length"] = q.VisionModel.HiddenSize
|
||||
kv["vision.attention.head_count"] = q.VisionModel.NumHeads
|
||||
kv["vision.num_channels"] = q.VisionModel.InChannels
|
||||
if q.VisionModel.PatchSize > 0 {
|
||||
kv["vision.patch_size"] = q.VisionModel.PatchSize
|
||||
}
|
||||
if q.VisionModel.SpatialMergeSize > 0 {
|
||||
kv["vision.spatial_merge_size"] = q.VisionModel.SpatialMergeSize
|
||||
}
|
||||
if q.VisionModel.RMSNormEps > 0 {
|
||||
kv["vision.attention.layer_norm_epsilon"] = q.VisionModel.RMSNormEps
|
||||
}
|
||||
if q.VisionModel.RopeTheta > 0 {
|
||||
kv["vision.rope.freq_base"] = q.VisionModel.RopeTheta
|
||||
}
|
||||
if q.VisionModel.TemporalPatchSize > 0 {
|
||||
kv["vision.temporal_patch_size"] = q.VisionModel.TemporalPatchSize
|
||||
}
|
||||
kv["vision.deepstack_visual_indexes"] = q.VisionModel.DeepstackVisualIndexes
|
||||
if q.VisionModel.Size.ShortestEdge > 0 {
|
||||
kv["vision.shortest_edge"] = q.VisionModel.Size.ShortestEdge
|
||||
}
|
||||
if q.VisionModel.Size.LongestEdge > 0 {
|
||||
kv["vision.longest_edge"] = q.VisionModel.Size.LongestEdge
|
||||
}
|
||||
if len(q.VisionModel.ImageMean) > 0 {
|
||||
kv["vision.image_mean"] = q.VisionModel.ImageMean
|
||||
}
|
||||
if len(q.VisionModel.ImageStd) > 0 {
|
||||
kv["vision.image_std"] = q.VisionModel.ImageStd
|
||||
}
|
||||
}
|
||||
|
||||
if q.ImageTokenID > 0 {
|
||||
kv["image_token_id"] = q.ImageTokenID
|
||||
}
|
||||
if q.VisionStartTokenID > 0 {
|
||||
kv["vision_start_token_id"] = q.VisionStartTokenID
|
||||
}
|
||||
if q.VisionEndTokenID > 0 {
|
||||
kv["vision_end_token_id"] = q.VisionEndTokenID
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var out []*ggml.Tensor
|
||||
|
||||
merges := make([]merge, q.NumHiddenLayers*3)
|
||||
for i := range q.NumHiddenLayers {
|
||||
merges[i*3+0] = merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.gate_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
|
||||
}
|
||||
merges[i*3+1] = merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.up_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
|
||||
}
|
||||
merges[i*3+2] = merge{
|
||||
fmt.Sprintf("blk.%d.mlp.experts.*.down_proj.weight", i),
|
||||
fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
|
||||
}
|
||||
}
|
||||
|
||||
merged, remaining := mergeTensors(ts, merges...)
|
||||
out = append(out, merged...)
|
||||
|
||||
for _, t := range remaining {
|
||||
name := t.Name()
|
||||
shape := t.Shape()
|
||||
|
||||
if strings.HasSuffix(name, ".ssm_in.weight") {
|
||||
if qkv, gate, ok := q.splitQKVZTensor(t); ok {
|
||||
out = append(out, qkv, gate)
|
||||
continue
|
||||
}
|
||||
panic(fmt.Sprintf("qwen3next: failed to split %s into attn_qkv/attn_gate (shape=%v)", name, shape))
|
||||
}
|
||||
|
||||
switch {
|
||||
case strings.Contains(name, ".mlp.experts.gate_up_proj"):
|
||||
out = append(out, slices.Collect(splitDim(t, 1,
|
||||
split{Replacer: strings.NewReplacer(".mlp.experts.gate_up_proj", ".ffn_gate_exps.weight")},
|
||||
split{Replacer: strings.NewReplacer(".mlp.experts.gate_up_proj", ".ffn_up_exps.weight")},
|
||||
))...)
|
||||
|
||||
case strings.Contains(name, ".mlp.experts.down_proj"):
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: strings.NewReplacer(".mlp.experts.down_proj", ".ffn_down_exps.weight").Replace(name),
|
||||
Kind: t.Kind(),
|
||||
Shape: slices.Clone(shape),
|
||||
WriterTo: t,
|
||||
})
|
||||
|
||||
case strings.HasPrefix(name, "v.blk.") && strings.Contains(name, ".attn_qkv"):
|
||||
out = append(out, slices.Collect(splitDim(t, 0,
|
||||
split{Replacer: strings.NewReplacer("attn_qkv", "attn_q")},
|
||||
split{Replacer: strings.NewReplacer("attn_qkv", "attn_k")},
|
||||
split{Replacer: strings.NewReplacer("attn_qkv", "attn_v")},
|
||||
))...)
|
||||
|
||||
case strings.Contains(name, "patch_embed") && strings.HasSuffix(name, "weight"):
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: name,
|
||||
Kind: t.Kind(),
|
||||
Shape: append([]uint64{shape[0] * shape[1]}, shape[2:]...),
|
||||
WriterTo: t,
|
||||
})
|
||||
|
||||
case strings.HasSuffix(name, "_norm.weight") && !strings.HasSuffix(name, ".ssm_norm.weight"):
|
||||
t.SetRepacker(q.addOne)
|
||||
out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
|
||||
|
||||
case strings.HasSuffix(name, ".ssm_a"):
|
||||
t.SetRepacker(q.repackSSMA())
|
||||
out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
|
||||
|
||||
case strings.HasSuffix(name, ".attn_qkv.weight"):
|
||||
if q.shouldReorderVHeads() {
|
||||
t.SetRepacker(q.repackAttnQKV())
|
||||
}
|
||||
out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
|
||||
|
||||
case strings.HasSuffix(name, ".attn_gate.weight"):
|
||||
if q.shouldReorderVHeads() {
|
||||
// HF tensor layout is [out_features, in_features]; reorder rows.
|
||||
t.SetRepacker(q.repackReorderDim(0, int(q.LinearValueHeadDim)))
|
||||
}
|
||||
out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
|
||||
|
||||
case strings.HasSuffix(name, ".ssm_beta.weight"), strings.HasSuffix(name, ".ssm_alpha.weight"):
|
||||
if q.shouldReorderVHeads() {
|
||||
// HF tensor layout is [out_features, in_features]; reorder rows.
|
||||
t.SetRepacker(q.repackReorderDim(0, 1))
|
||||
}
|
||||
out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
|
||||
|
||||
case strings.HasSuffix(name, ".ssm_dt"):
|
||||
if q.shouldReorderVHeads() {
|
||||
t.SetRepacker(q.repackReorderDim(0, 1))
|
||||
}
|
||||
out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
|
||||
|
||||
case strings.HasSuffix(name, ".ssm_out.weight"):
|
||||
if q.shouldReorderVHeads() {
|
||||
// HF out_proj layout is [out_features, in_features]; reorder columns.
|
||||
t.SetRepacker(q.repackReorderDim(1, int(q.LinearValueHeadDim)))
|
||||
}
|
||||
out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
|
||||
|
||||
case strings.HasSuffix(name, ".ssm_conv1d.weight"):
|
||||
newShape := slices.Clone(shape)
|
||||
if len(shape) == 3 {
|
||||
if shape[0] == 1 {
|
||||
newShape = []uint64{shape[1], shape[2]}
|
||||
} else if shape[1] == 1 {
|
||||
newShape = []uint64{shape[0], shape[2]}
|
||||
}
|
||||
}
|
||||
if q.shouldReorderVHeads() {
|
||||
t.SetRepacker(q.repackConv1D())
|
||||
}
|
||||
out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: newShape, WriterTo: t})
|
||||
|
||||
default:
|
||||
out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) repackReorderDim(dim, headDim int) Repacker {
|
||||
return func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
if !q.shouldReorderVHeads() {
|
||||
return data, nil
|
||||
}
|
||||
numK := int(q.LinearNumKeyHeads)
|
||||
numVPerK := int(q.LinearNumValueHeads / q.LinearNumKeyHeads)
|
||||
return reorderHeadLayout(data, shape, dim, numK, numVPerK, headDim)
|
||||
}
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) repackAttnQKV() Repacker {
|
||||
return func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
if !q.shouldReorderVHeads() || len(shape) != 2 {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
rows := int(shape[0])
|
||||
cols := int(shape[1])
|
||||
numK := int(q.LinearNumKeyHeads)
|
||||
numV := int(q.LinearNumValueHeads)
|
||||
headK := int(q.LinearKeyHeadDim)
|
||||
headV := int(q.LinearValueHeadDim)
|
||||
qDim := headK * numK
|
||||
kDim := headK * numK
|
||||
vDim := headV * numV
|
||||
qkvDim := qDim + kDim + vDim
|
||||
|
||||
switch {
|
||||
case rows == qkvDim:
|
||||
// HF layout: [out_features, in_features]. Keep Q/K rows unchanged and
|
||||
// reorder only V rows from grouped -> tiled head layout.
|
||||
out := make([]float32, len(data))
|
||||
qkRows := qDim + kDim
|
||||
qkSize := qkRows * cols
|
||||
copy(out[:qkSize], data[:qkSize])
|
||||
|
||||
vStart := qkSize
|
||||
vEnd := vStart + vDim*cols
|
||||
reorderedV, err := reorderHeadLayout(data[vStart:vEnd], []uint64{uint64(vDim), uint64(cols)}, 0, numK, numV/numK, headV)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
copy(out[vStart:vEnd], reorderedV)
|
||||
copy(out[vEnd:], data[vEnd:])
|
||||
return out, nil
|
||||
|
||||
case cols == qkvDim:
|
||||
// Fallback for already-transposed [in_features, out_features] tensors.
|
||||
out := make([]float32, len(data))
|
||||
copy(out, data)
|
||||
for r := range rows {
|
||||
base := r * cols
|
||||
vStart := base + qDim + kDim
|
||||
vEnd := vStart + vDim
|
||||
reorderedV, err := reorderHeadLayout(out[vStart:vEnd], []uint64{uint64(vDim)}, 0, numK, numV/numK, headV)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
copy(out[vStart:vEnd], reorderedV)
|
||||
}
|
||||
return out, nil
|
||||
|
||||
default:
|
||||
return data, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) repackConv1D() Repacker {
|
||||
return func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
if !q.shouldReorderVHeads() {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
normShape := slices.Clone(shape)
|
||||
if len(shape) == 3 {
|
||||
if shape[0] == 1 {
|
||||
normShape = []uint64{shape[1], shape[2]}
|
||||
} else if shape[1] == 1 {
|
||||
normShape = []uint64{shape[0], shape[2]}
|
||||
}
|
||||
}
|
||||
if len(normShape) != 2 {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
rows := int(normShape[0])
|
||||
cols := int(normShape[1])
|
||||
numK := int(q.LinearNumKeyHeads)
|
||||
numV := int(q.LinearNumValueHeads)
|
||||
headK := int(q.LinearKeyHeadDim)
|
||||
headV := int(q.LinearValueHeadDim)
|
||||
qkChannels := 2 * headK * numK
|
||||
totalChannels := qkChannels + headV*numV
|
||||
if qkChannels <= 0 {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
switch {
|
||||
case rows == totalChannels:
|
||||
// HF layout after squeeze: [channels, kernel]
|
||||
out := make([]float32, len(data))
|
||||
prefix := qkChannels * cols
|
||||
copy(out[:prefix], data[:prefix])
|
||||
reorderedV, err := reorderHeadLayout(data[prefix:], []uint64{uint64(totalChannels - qkChannels), uint64(cols)}, 0, numK, numV/numK, headV)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
copy(out[prefix:], reorderedV)
|
||||
return out, nil
|
||||
case cols == totalChannels:
|
||||
// Fallback for transposed [kernel, channels]
|
||||
out := make([]float32, len(data))
|
||||
copy(out, data)
|
||||
vChannels := totalChannels - qkChannels
|
||||
for r := range rows {
|
||||
base := r * cols
|
||||
vStart := base + qkChannels
|
||||
vEnd := vStart + vChannels
|
||||
reorderedV, err := reorderHeadLayout(out[vStart:vEnd], []uint64{uint64(vChannels)}, 0, numK, numV/numK, headV)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
copy(out[vStart:vEnd], reorderedV)
|
||||
}
|
||||
return out, nil
|
||||
default:
|
||||
return data, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) repackSSMA() Repacker {
|
||||
return func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
result := make([]float32, len(data))
|
||||
for i, v := range data {
|
||||
result[i] = -float32(math.Exp(float64(v)))
|
||||
}
|
||||
if !q.shouldReorderVHeads() {
|
||||
return result, nil
|
||||
}
|
||||
numK := int(q.LinearNumKeyHeads)
|
||||
numVPerK := int(q.LinearNumValueHeads / q.LinearNumKeyHeads)
|
||||
return reorderHeadLayout(result, shape, 0, numK, numVPerK, 1)
|
||||
}
|
||||
}
|
||||
|
||||
func reorderHeadLayout(data []float32, shape []uint64, dim int, numKHeads, numVPerK, headDim int) ([]float32, error) {
|
||||
if len(shape) == 0 || numKHeads <= 0 || numVPerK <= 0 || headDim <= 0 {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
dims := make([]int, len(shape))
|
||||
for i := range shape {
|
||||
dims[i] = int(shape[i])
|
||||
}
|
||||
if dim < 0 {
|
||||
dim += len(dims)
|
||||
}
|
||||
if dim < 0 || dim >= len(dims) {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
expected := numKHeads * numVPerK * headDim
|
||||
if dims[dim] != expected {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
newShape := make([]int, 0, len(dims)+2)
|
||||
newShape = append(newShape, dims[:dim]...)
|
||||
newShape = append(newShape, numKHeads, numVPerK, headDim)
|
||||
newShape = append(newShape, dims[dim+1:]...)
|
||||
|
||||
var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
if err := tt.Reshape(newShape...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
perm := make([]int, len(newShape))
|
||||
for i := range perm {
|
||||
perm[i] = i
|
||||
}
|
||||
perm[dim], perm[dim+1] = perm[dim+1], perm[dim]
|
||||
|
||||
tt, err := tensor.Transpose(tt, perm...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tt = tensor.Materialize(tt)
|
||||
|
||||
total := 1
|
||||
for _, d := range dims {
|
||||
total *= d
|
||||
}
|
||||
if err := tt.Reshape(total); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return native.VectorF32(tt.(*tensor.Dense))
|
||||
}
|
||||
|
||||
type qkvzSplitSpec struct {
|
||||
hidden int
|
||||
headKDim int
|
||||
headVDim int
|
||||
numKHeads int
|
||||
numVHeads int
|
||||
qkvzDim int
|
||||
qkvOut int
|
||||
gateOut int
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) qkvzSpec(shape []uint64) (qkvzSplitSpec, bool) {
|
||||
if len(shape) != 2 {
|
||||
return qkvzSplitSpec{}, false
|
||||
}
|
||||
|
||||
numKHeads := int(q.LinearNumKeyHeads)
|
||||
numVHeads := int(q.LinearNumValueHeads)
|
||||
headKDim := int(q.LinearKeyHeadDim)
|
||||
headVDim := int(q.LinearValueHeadDim)
|
||||
if numKHeads == 0 || numVHeads == 0 || headKDim == 0 || headVDim == 0 {
|
||||
return qkvzSplitSpec{}, false
|
||||
}
|
||||
if numVHeads%numKHeads != 0 {
|
||||
return qkvzSplitSpec{}, false
|
||||
}
|
||||
|
||||
hidden := int(shape[1])
|
||||
vPerHead := headVDim * (numVHeads / numKHeads)
|
||||
qkvzDim := 2*headKDim + 2*vPerHead
|
||||
expectedOut := qkvzDim * numKHeads
|
||||
if int(shape[0]) != expectedOut {
|
||||
return qkvzSplitSpec{}, false
|
||||
}
|
||||
|
||||
return qkvzSplitSpec{
|
||||
hidden: hidden,
|
||||
headKDim: headKDim,
|
||||
headVDim: headVDim,
|
||||
numKHeads: numKHeads,
|
||||
numVHeads: numVHeads,
|
||||
qkvzDim: qkvzDim,
|
||||
qkvOut: 2*headKDim*numKHeads + headVDim*numVHeads,
|
||||
gateOut: headVDim * numVHeads,
|
||||
}, true
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) splitQKVZTensor(t Tensor) (*ggml.Tensor, *ggml.Tensor, bool) {
|
||||
spec, ok := q.qkvzSpec(t.Shape())
|
||||
if !ok {
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
qkvTensor := t.Clone()
|
||||
qkvTensor.SetRepacker(q.repackQKVZ(spec, false))
|
||||
|
||||
gateTensor := t.Clone()
|
||||
gateTensor.SetRepacker(q.repackQKVZ(spec, true))
|
||||
|
||||
qkvName := strings.Replace(t.Name(), "ssm_in", "attn_qkv", 1)
|
||||
gateName := strings.Replace(t.Name(), "ssm_in", "attn_gate", 1)
|
||||
|
||||
return &ggml.Tensor{
|
||||
Name: qkvName,
|
||||
Kind: t.Kind(),
|
||||
Shape: []uint64{uint64(spec.qkvOut), uint64(spec.hidden)},
|
||||
WriterTo: qkvTensor,
|
||||
}, &ggml.Tensor{
|
||||
Name: gateName,
|
||||
Kind: t.Kind(),
|
||||
Shape: []uint64{uint64(spec.gateOut), uint64(spec.hidden)},
|
||||
WriterTo: gateTensor,
|
||||
}, true
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) repackQKVZ(spec qkvzSplitSpec, extractGate bool) Repacker {
|
||||
vPerHead := spec.headVDim * (spec.numVHeads / spec.numKHeads)
|
||||
|
||||
return func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := make([]int, len(shape))
|
||||
for i := range shape {
|
||||
dims[i] = int(shape[i])
|
||||
}
|
||||
|
||||
var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
var err error
|
||||
|
||||
tt, err = tensor.Transpose(tt, 1, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tt = tensor.Materialize(tt)
|
||||
|
||||
if err := tt.Reshape(spec.hidden, spec.numKHeads, spec.qkvzDim); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
offset := 0
|
||||
qSlice, err := tt.Slice(nil, nil, tensor.S(offset, offset+spec.headKDim))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
offset += spec.headKDim
|
||||
kSlice, err := tt.Slice(nil, nil, tensor.S(offset, offset+spec.headKDim))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
offset += spec.headKDim
|
||||
vSlice, err := tt.Slice(nil, nil, tensor.S(offset, offset+vPerHead))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
offset += vPerHead
|
||||
zSlice, err := tt.Slice(nil, nil, tensor.S(offset, offset+vPerHead))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
qMat := tensor.Materialize(qSlice).(*tensor.Dense)
|
||||
kMat := tensor.Materialize(kSlice).(*tensor.Dense)
|
||||
vMat := tensor.Materialize(vSlice).(*tensor.Dense)
|
||||
zMat := tensor.Materialize(zSlice).(*tensor.Dense)
|
||||
|
||||
if err := qMat.Reshape(spec.hidden, spec.numKHeads*spec.headKDim); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := kMat.Reshape(spec.hidden, spec.numKHeads*spec.headKDim); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := vMat.Reshape(spec.hidden, spec.numKHeads*vPerHead); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := zMat.Reshape(spec.hidden, spec.numKHeads*vPerHead); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var out tensor.Tensor
|
||||
if extractGate {
|
||||
out = zMat
|
||||
} else {
|
||||
out, err = tensor.Concat(1, qMat, kMat, vMat)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
out = tensor.Materialize(out)
|
||||
out, err = tensor.Transpose(out, 1, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = tensor.Materialize(out)
|
||||
|
||||
if err := out.Reshape(out.Shape().TotalSize()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return native.VectorF32(out.(*tensor.Dense))
|
||||
}
|
||||
}
|
||||
|
||||
func (*qwen3NextModel) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
|
||||
ones := tensor.Ones(tensor.Float32, int(shape[0]))
|
||||
|
||||
n, err := n.Add(ones)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
|
||||
func (q *qwen3NextModel) Replacements() []string {
|
||||
return []string{
|
||||
// Embeddings and output
|
||||
"lm_head", "output",
|
||||
"model.language_model.embed_tokens", "token_embd",
|
||||
"model.language_model.norm", "output_norm",
|
||||
"model.language_model.layers", "blk",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
"model.layers", "blk",
|
||||
|
||||
// Vision
|
||||
"model.visual", "v",
|
||||
"patch_embed.proj", "patch_embed",
|
||||
"blocks", "blk",
|
||||
"attn.qkv", "attn_qkv",
|
||||
"attn.proj", "attn_out",
|
||||
"deepstack_merger_list", "deepstack_merger",
|
||||
|
||||
// Layer norms
|
||||
"input_layernorm", "attn_norm",
|
||||
"post_attention_layernorm", "post_attention_norm",
|
||||
|
||||
// Full attention (self_attn)
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.q_norm", "attn_q_norm",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.k_norm", "attn_k_norm",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
|
||||
// Linear attention (legacy qwen3next)
|
||||
"linear_attn.in_proj_qkvz", "ssm_in",
|
||||
"linear_attn.in_proj_ba", "ssm_ba",
|
||||
|
||||
// Linear attention (qwen35)
|
||||
"linear_attn.in_proj_qkv", "attn_qkv",
|
||||
"linear_attn.in_proj_z", "attn_gate",
|
||||
"linear_attn.in_proj_a", "ssm_alpha",
|
||||
"linear_attn.in_proj_b", "ssm_beta",
|
||||
|
||||
"linear_attn.conv1d", "ssm_conv1d",
|
||||
"linear_attn.dt_bias", "ssm_dt",
|
||||
"linear_attn.dt_proj", "ssm_dt",
|
||||
"linear_attn.A_log", "ssm_a",
|
||||
"linear_attn.norm", "ssm_norm",
|
||||
"linear_attn.out_proj", "ssm_out",
|
||||
|
||||
// MoE
|
||||
"mlp.gate.weight", "ffn_gate_inp.weight",
|
||||
"mlp.shared_expert.down_proj", "ffn_down_shexp",
|
||||
"mlp.shared_expert.gate_proj", "ffn_gate_shexp",
|
||||
"mlp.shared_expert.up_proj", "ffn_up_shexp",
|
||||
"mlp.shared_expert_gate", "ffn_gate_inp_shexp",
|
||||
|
||||
// Dense FFN
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
}
|
||||
}
|
||||
563
convert/convert_qwen3next_test.go
Normal file
563
convert/convert_qwen3next_test.go
Normal file
@@ -0,0 +1,563 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"os"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
func boolPtr(v bool) *bool {
|
||||
return &v
|
||||
}
|
||||
|
||||
func readTensorData(t *testing.T, tensor *ggml.Tensor) []float32 {
|
||||
t.Helper()
|
||||
|
||||
var b bytes.Buffer
|
||||
if _, err := tensor.WriteTo(&b); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
numel := 1
|
||||
for _, d := range tensor.Shape {
|
||||
numel *= int(d)
|
||||
}
|
||||
|
||||
values := make([]float32, numel)
|
||||
if err := binary.Read(&b, binary.LittleEndian, &values); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
return values
|
||||
}
|
||||
|
||||
func TestQwen3NextLegacyModelTypeDisablesReorder(t *testing.T) {
|
||||
m := &qwen3NextModel{
|
||||
ModelParameters: ModelParameters{
|
||||
ModelType: "qwen3_next",
|
||||
},
|
||||
}
|
||||
|
||||
if m.shouldReorderVHeads() {
|
||||
t.Fatalf("legacy qwen3_next model_type should not reorder v-head layout")
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3NextLegacyArchitectureDisablesReorder(t *testing.T) {
|
||||
m := &qwen3NextModel{
|
||||
ModelParameters: ModelParameters{
|
||||
Architectures: []string{"Qwen3NextForCausalLM"},
|
||||
},
|
||||
}
|
||||
|
||||
if m.shouldReorderVHeads() {
|
||||
t.Fatalf("legacy Qwen3Next architecture should not reorder v-head layout")
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3NextKVLegacyConfig(t *testing.T) {
|
||||
m := &qwen3NextModel{
|
||||
ModelParameters: ModelParameters{
|
||||
ModelType: "qwen3_next",
|
||||
},
|
||||
qwen3NextTextConfig: qwen3NextTextConfig{
|
||||
MaxPositionEmbeddings: 8192,
|
||||
HiddenSize: 512,
|
||||
NumHiddenLayers: 4,
|
||||
IntermediateSize: 2048,
|
||||
NumAttentionHeads: 8,
|
||||
NumKeyValueHeads: 2,
|
||||
HeadDim: 64,
|
||||
RopeTheta: 1_000_000,
|
||||
RMSNormEPS: 1e-6,
|
||||
|
||||
NumExperts: 8,
|
||||
NumExpertsPerToken: 2,
|
||||
NormTopkProb: boolPtr(true),
|
||||
MoEIntermediateSize: 256,
|
||||
SharedExpertIntermSize: 512,
|
||||
|
||||
FullAttentionInterval: 2,
|
||||
|
||||
LinearConvKernelDim: 4,
|
||||
LinearKeyHeadDim: 64,
|
||||
LinearNumKeyHeads: 2,
|
||||
LinearNumValueHeads: 4,
|
||||
LinearValueHeadDim: 64,
|
||||
|
||||
PartialRotaryFactor: 0.25,
|
||||
},
|
||||
}
|
||||
|
||||
if err := m.parseMore(os.DirFS(t.TempDir())); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}})
|
||||
if got, want := kv["general.architecture"], "qwen35moe"; got != want {
|
||||
t.Fatalf("unexpected architecture: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := kv["tokenizer.ggml.pre"], "qwen35"; got != want {
|
||||
t.Fatalf("unexpected tokenizer pre: got %v want %v", got, want)
|
||||
}
|
||||
|
||||
headCountKV, ok := kv["attention.head_count_kv"].([]uint32)
|
||||
if !ok {
|
||||
t.Fatalf("attention.head_count_kv has unexpected type: %T", kv["attention.head_count_kv"])
|
||||
}
|
||||
if got, want := headCountKV, []uint32{0, 2, 0, 2}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected attention.head_count_kv: got %v want %v", got, want)
|
||||
}
|
||||
|
||||
if _, ok := kv["ssm.v_head_reordered"]; ok {
|
||||
t.Fatalf("legacy qwen3next should not enable ssm.v_head_reordered")
|
||||
}
|
||||
if got, want := kv["norm_top_k_prob"], true; got != want {
|
||||
t.Fatalf("unexpected norm_top_k_prob: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen35MoeOmitsNormTopKProbWhenUnset(t *testing.T) {
|
||||
m := &qwen3NextModel{
|
||||
ModelParameters: ModelParameters{
|
||||
ModelType: "qwen3_5",
|
||||
},
|
||||
qwen3NextTextConfig: qwen3NextTextConfig{
|
||||
MaxPositionEmbeddings: 4096,
|
||||
HiddenSize: 512,
|
||||
NumHiddenLayers: 4,
|
||||
IntermediateSize: 2048,
|
||||
NumAttentionHeads: 8,
|
||||
NumKeyValueHeads: 2,
|
||||
HeadDim: 64,
|
||||
RopeTheta: 1_000_000,
|
||||
RMSNormEPS: 1e-6,
|
||||
NumExperts: 8,
|
||||
NumExpertsPerToken: 2,
|
||||
FullAttentionInterval: 2,
|
||||
LinearConvKernelDim: 4,
|
||||
LinearKeyHeadDim: 64,
|
||||
LinearNumKeyHeads: 2,
|
||||
LinearNumValueHeads: 4,
|
||||
LinearValueHeadDim: 64,
|
||||
PartialRotaryFactor: 0.25,
|
||||
},
|
||||
}
|
||||
|
||||
if err := m.parseMore(os.DirFS(t.TempDir())); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}})
|
||||
if _, ok := kv["norm_top_k_prob"]; ok {
|
||||
t.Fatalf("expected norm_top_k_prob to be omitted when not set in config")
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen35KVFromTextConfig(t *testing.T) {
|
||||
m := &qwen3NextModel{
|
||||
ModelParameters: ModelParameters{
|
||||
ModelType: "qwen3_5",
|
||||
},
|
||||
TextConfig: &qwen3NextTextConfig{
|
||||
MaxPositionEmbeddings: 16384,
|
||||
HiddenSize: 1024,
|
||||
NumHiddenLayers: 4,
|
||||
IntermediateSize: 4096,
|
||||
NumAttentionHeads: 8,
|
||||
NumKeyValueHeads: 4,
|
||||
HeadDim: 128,
|
||||
RMSNormEPS: 1e-6,
|
||||
|
||||
LayerTypes: []string{
|
||||
"linear_attention",
|
||||
"full_attention",
|
||||
"linear_attention",
|
||||
"full_attention",
|
||||
},
|
||||
|
||||
LinearConvKernelDim: 4,
|
||||
LinearKeyHeadDim: 128,
|
||||
LinearNumKeyHeads: 2,
|
||||
LinearNumValueHeads: 4,
|
||||
LinearValueHeadDim: 128,
|
||||
|
||||
RopeParameters: qwen3NextRopeParams{
|
||||
MRopeInterleaved: true,
|
||||
MropeSection: []int32{11, 11, 10},
|
||||
RopeType: "default",
|
||||
RopeTheta: 10_000_000,
|
||||
PartialRotaryFactor: 0.25,
|
||||
},
|
||||
},
|
||||
VisionModel: qwen3NextVisionConfig{
|
||||
Depth: 2,
|
||||
HiddenSize: 128,
|
||||
NumHeads: 4,
|
||||
InChannels: 3,
|
||||
PatchSize: 16,
|
||||
SpatialMergeSize: 2,
|
||||
RMSNormEps: 1e-6,
|
||||
RopeTheta: 10_000,
|
||||
TemporalPatchSize: 2,
|
||||
DeepstackVisualIndexes: []int32{1},
|
||||
},
|
||||
ImageTokenID: 1001,
|
||||
VisionStartTokenID: 1002,
|
||||
VisionEndTokenID: 1003,
|
||||
}
|
||||
m.VisionModel.Size.ShortestEdge = 224
|
||||
m.VisionModel.Size.LongestEdge = 4096
|
||||
m.VisionModel.ImageMean = []float32{0.5, 0.5, 0.5}
|
||||
m.VisionModel.ImageStd = []float32{0.2, 0.2, 0.2}
|
||||
|
||||
if err := m.parseMore(os.DirFS(t.TempDir())); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}})
|
||||
if got, want := kv["general.architecture"], "qwen35"; got != want {
|
||||
t.Fatalf("unexpected architecture: got %v want %v", got, want)
|
||||
}
|
||||
|
||||
headCountKV, ok := kv["attention.head_count_kv"].([]uint32)
|
||||
if !ok {
|
||||
t.Fatalf("attention.head_count_kv has unexpected type: %T", kv["attention.head_count_kv"])
|
||||
}
|
||||
if got, want := headCountKV, []uint32{0, 4, 0, 4}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected attention.head_count_kv: got %v want %v", got, want)
|
||||
}
|
||||
|
||||
if got, ok := kv["ssm.v_head_reordered"].(bool); !ok || !got {
|
||||
t.Fatalf("expected ssm.v_head_reordered=true, got %v (%T)", kv["ssm.v_head_reordered"], kv["ssm.v_head_reordered"])
|
||||
}
|
||||
|
||||
mrope, ok := kv["mrope_sections"].([]int32)
|
||||
if !ok {
|
||||
t.Fatalf("mrope_sections has unexpected type: %T", kv["mrope_sections"])
|
||||
}
|
||||
if got, want := mrope, []int32{11, 11, 10}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected mrope_sections: got %v want %v", got, want)
|
||||
}
|
||||
ropeSections, ok := kv["rope.dimension_sections"].([]int32)
|
||||
if !ok {
|
||||
t.Fatalf("rope.dimension_sections has unexpected type: %T", kv["rope.dimension_sections"])
|
||||
}
|
||||
if got, want := ropeSections, []int32{11, 11, 10}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected rope.dimension_sections: got %v want %v", got, want)
|
||||
}
|
||||
|
||||
if got, ok := kv["rope.mrope_interleaved"].(bool); !ok || !got {
|
||||
t.Fatalf("expected rope.mrope_interleaved=true, got %v (%T)", kv["rope.mrope_interleaved"], kv["rope.mrope_interleaved"])
|
||||
}
|
||||
|
||||
if got, want := kv["vision.block_count"], uint32(2); got != want {
|
||||
t.Fatalf("unexpected vision.block_count: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3NextReplacements(t *testing.T) {
|
||||
r := strings.NewReplacer((&qwen3NextModel{}).Replacements()...)
|
||||
|
||||
if got, want := r.Replace("model.language_model.layers.1.linear_attn.in_proj_qkv.weight"), "blk.1.attn_qkv.weight"; got != want {
|
||||
t.Fatalf("unexpected language-model replacement: got %q want %q", got, want)
|
||||
}
|
||||
if got, want := r.Replace("model.visual.blocks.0.attn.qkv.weight"), "v.blk.0.attn_qkv.weight"; got != want {
|
||||
t.Fatalf("unexpected vision replacement: got %q want %q", got, want)
|
||||
}
|
||||
if got, want := r.Replace("model.layers.1.linear_attn.in_proj_qkvz.weight"), "blk.1.ssm_in.weight"; got != want {
|
||||
t.Fatalf("unexpected legacy replacement: got %q want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen35ReordersVHeads(t *testing.T) {
|
||||
m := &qwen3NextModel{
|
||||
ModelParameters: ModelParameters{
|
||||
ModelType: "qwen3_5",
|
||||
},
|
||||
qwen3NextTextConfig: qwen3NextTextConfig{
|
||||
LinearNumKeyHeads: 2,
|
||||
LinearNumValueHeads: 4,
|
||||
LinearValueHeadDim: 1,
|
||||
},
|
||||
}
|
||||
|
||||
out := m.Tensors([]Tensor{
|
||||
&fakeTensor{
|
||||
name: "blk.0.attn_gate.weight",
|
||||
shape: []uint64{4, 2},
|
||||
data: []float32{0, 1, 2, 3, 4, 5, 6, 7},
|
||||
},
|
||||
})
|
||||
if len(out) != 1 {
|
||||
t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
|
||||
}
|
||||
|
||||
if got, want := readTensorData(t, out[0]), []float32{0, 1, 4, 5, 2, 3, 6, 7}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected data: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen35ReordersAttnQKVOutputDim(t *testing.T) {
|
||||
m := &qwen3NextModel{
|
||||
ModelParameters: ModelParameters{
|
||||
ModelType: "qwen3_5",
|
||||
},
|
||||
qwen3NextTextConfig: qwen3NextTextConfig{
|
||||
LinearNumKeyHeads: 2,
|
||||
LinearNumValueHeads: 4,
|
||||
LinearKeyHeadDim: 1,
|
||||
LinearValueHeadDim: 1,
|
||||
},
|
||||
}
|
||||
|
||||
out := m.Tensors([]Tensor{
|
||||
&fakeTensor{
|
||||
name: "blk.0.attn_qkv.weight",
|
||||
shape: []uint64{8, 2}, // [out_features, in_features] (HF layout)
|
||||
data: []float32{
|
||||
0, 1, // q0
|
||||
2, 3, // q1
|
||||
4, 5, // k0
|
||||
6, 7, // k1
|
||||
10, 11, // v(k0,v0)
|
||||
12, 13, // v(k0,v1)
|
||||
20, 21, // v(k1,v0)
|
||||
22, 23, // v(k1,v1)
|
||||
},
|
||||
},
|
||||
})
|
||||
if len(out) != 1 {
|
||||
t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
|
||||
}
|
||||
|
||||
if got, want := readTensorData(t, out[0]), []float32{
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
10, 11, 20, 21, 12, 13, 22, 23,
|
||||
}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected qkv data: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen35ReordersSsmOutInputDim(t *testing.T) {
|
||||
m := &qwen3NextModel{
|
||||
ModelParameters: ModelParameters{
|
||||
ModelType: "qwen3_5",
|
||||
},
|
||||
qwen3NextTextConfig: qwen3NextTextConfig{
|
||||
LinearNumKeyHeads: 2,
|
||||
LinearNumValueHeads: 4,
|
||||
LinearValueHeadDim: 1,
|
||||
},
|
||||
}
|
||||
|
||||
out := m.Tensors([]Tensor{
|
||||
&fakeTensor{
|
||||
name: "blk.0.ssm_out.weight",
|
||||
shape: []uint64{2, 4},
|
||||
data: []float32{0, 1, 2, 3, 4, 5, 6, 7},
|
||||
},
|
||||
})
|
||||
if len(out) != 1 {
|
||||
t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
|
||||
}
|
||||
|
||||
if got, want := readTensorData(t, out[0]), []float32{0, 2, 1, 3, 4, 6, 5, 7}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected ssm_out data: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen35ReordersSsmBetaRows(t *testing.T) {
|
||||
m := &qwen3NextModel{
|
||||
ModelParameters: ModelParameters{
|
||||
ModelType: "qwen3_5",
|
||||
},
|
||||
qwen3NextTextConfig: qwen3NextTextConfig{
|
||||
LinearNumKeyHeads: 2,
|
||||
LinearNumValueHeads: 4,
|
||||
},
|
||||
}
|
||||
|
||||
out := m.Tensors([]Tensor{
|
||||
&fakeTensor{
|
||||
name: "blk.0.ssm_beta.weight",
|
||||
shape: []uint64{4, 2},
|
||||
data: []float32{0, 1, 2, 3, 4, 5, 6, 7},
|
||||
},
|
||||
})
|
||||
if len(out) != 1 {
|
||||
t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
|
||||
}
|
||||
|
||||
if got, want := readTensorData(t, out[0]), []float32{0, 1, 4, 5, 2, 3, 6, 7}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected ssm_beta data: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen35ReordersConv1DChannelDim(t *testing.T) {
|
||||
m := &qwen3NextModel{
|
||||
ModelParameters: ModelParameters{
|
||||
ModelType: "qwen3_5",
|
||||
},
|
||||
qwen3NextTextConfig: qwen3NextTextConfig{
|
||||
LinearNumKeyHeads: 2,
|
||||
LinearNumValueHeads: 4,
|
||||
LinearKeyHeadDim: 1,
|
||||
LinearValueHeadDim: 1,
|
||||
},
|
||||
}
|
||||
|
||||
out := m.Tensors([]Tensor{
|
||||
&fakeTensor{
|
||||
name: "blk.0.ssm_conv1d.weight",
|
||||
shape: []uint64{8, 2}, // [channels, kernel] after squeeze
|
||||
data: []float32{
|
||||
0, 1, // q0
|
||||
2, 3, // q1
|
||||
4, 5, // k0
|
||||
6, 7, // k1
|
||||
10, 11, // v(k0,v0)
|
||||
12, 13, // v(k0,v1)
|
||||
20, 21, // v(k1,v0)
|
||||
22, 23, // v(k1,v1)
|
||||
},
|
||||
},
|
||||
})
|
||||
if len(out) != 1 {
|
||||
t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
|
||||
}
|
||||
|
||||
if got, want := readTensorData(t, out[0]), []float32{
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
10, 11, 20, 21, 12, 13, 22, 23,
|
||||
}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected conv1d data: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLegacyQwen3NextDoesNotReorderVHeads(t *testing.T) {
|
||||
m := &qwen3NextModel{
|
||||
ModelParameters: ModelParameters{
|
||||
ModelType: "qwen3_next",
|
||||
},
|
||||
qwen3NextTextConfig: qwen3NextTextConfig{
|
||||
LinearNumKeyHeads: 2,
|
||||
LinearNumValueHeads: 4,
|
||||
LinearValueHeadDim: 1,
|
||||
},
|
||||
}
|
||||
|
||||
out := m.Tensors([]Tensor{
|
||||
&fakeTensor{
|
||||
name: "blk.0.attn_gate.weight",
|
||||
shape: []uint64{4, 1},
|
||||
data: []float32{0, 1, 2, 3},
|
||||
},
|
||||
})
|
||||
if len(out) != 1 {
|
||||
t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
|
||||
}
|
||||
|
||||
if got, want := readTensorData(t, out[0]), []float32{0, 1, 2, 3}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected data for legacy qwen3next: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen35MoePackedExperts(t *testing.T) {
|
||||
m := &qwen3NextModel{
|
||||
qwen3NextTextConfig: qwen3NextTextConfig{
|
||||
NumHiddenLayers: 1,
|
||||
},
|
||||
}
|
||||
|
||||
out := m.Tensors([]Tensor{
|
||||
&fakeTensor{
|
||||
name: "blk.0.mlp.experts.gate_up_proj",
|
||||
shape: []uint64{2, 4, 3},
|
||||
data: []float32{
|
||||
0, 1, 2,
|
||||
3, 4, 5,
|
||||
6, 7, 8,
|
||||
9, 10, 11,
|
||||
12, 13, 14,
|
||||
15, 16, 17,
|
||||
18, 19, 20,
|
||||
21, 22, 23,
|
||||
},
|
||||
},
|
||||
&fakeTensor{
|
||||
name: "blk.0.mlp.experts.down_proj",
|
||||
shape: []uint64{2, 5, 3},
|
||||
data: make([]float32, 2*5*3),
|
||||
},
|
||||
})
|
||||
|
||||
get := func(name string) *ggml.Tensor {
|
||||
for _, tensor := range out {
|
||||
if tensor.Name == name {
|
||||
return tensor
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
gate := get("blk.0.ffn_gate_exps.weight")
|
||||
if gate == nil {
|
||||
t.Fatalf("missing tensor %q", "blk.0.ffn_gate_exps.weight")
|
||||
}
|
||||
if got, want := gate.Shape, []uint64{2, 2, 3}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected gate shape: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := readTensorData(t, gate), []float32{
|
||||
0, 1, 2, 3, 4, 5,
|
||||
12, 13, 14, 15, 16, 17,
|
||||
}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected gate values: got %v want %v", got, want)
|
||||
}
|
||||
|
||||
up := get("blk.0.ffn_up_exps.weight")
|
||||
if up == nil {
|
||||
t.Fatalf("missing tensor %q", "blk.0.ffn_up_exps.weight")
|
||||
}
|
||||
if got, want := up.Shape, []uint64{2, 2, 3}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected up shape: got %v want %v", got, want)
|
||||
}
|
||||
if got, want := readTensorData(t, up), []float32{
|
||||
6, 7, 8, 9, 10, 11,
|
||||
18, 19, 20, 21, 22, 23,
|
||||
}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected up values: got %v want %v", got, want)
|
||||
}
|
||||
|
||||
down := get("blk.0.ffn_down_exps.weight")
|
||||
if down == nil {
|
||||
t.Fatalf("missing tensor %q", "blk.0.ffn_down_exps.weight")
|
||||
}
|
||||
if got, want := down.Shape, []uint64{2, 5, 3}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected down shape: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen35SharedExpertGateKeepsMatrixShape(t *testing.T) {
|
||||
m := &qwen3NextModel{}
|
||||
|
||||
out := m.Tensors([]Tensor{
|
||||
&fakeTensor{
|
||||
name: "blk.0.ffn_gate_inp_shexp.weight",
|
||||
shape: []uint64{1, 4},
|
||||
data: []float32{0, 1, 2, 3},
|
||||
},
|
||||
})
|
||||
if len(out) != 1 {
|
||||
t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
|
||||
}
|
||||
|
||||
if got, want := out[0].Shape, []uint64{1, 4}; !slices.Equal(got, want) {
|
||||
t.Fatalf("unexpected shared gate shape: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
116
convert/convert_qwen3vl.go
Normal file
116
convert/convert_qwen3vl.go
Normal file
@@ -0,0 +1,116 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/json"
|
||||
"io/fs"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type qwen3VLModel struct {
|
||||
qwen3Model `json:"text_config"`
|
||||
|
||||
VisionModel struct {
|
||||
Depth uint32 `json:"depth"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NumHeads uint32 `json:"num_heads"`
|
||||
InChannels uint32 `json:"in_channels"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
SpatialMergeSize uint32 `json:"spatial_merge_size"`
|
||||
WindowSize uint32 `json:"window_size"`
|
||||
RMSNormEps float32 `json:"layer_norm_epsilon"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
TemporalPatchSize uint32 `json:"temporal_patch_size"`
|
||||
DeepstackVisualIndexes []int32 `json:"deepstack_visual_indexes"`
|
||||
|
||||
Size struct {
|
||||
ShortestEdge uint32 `json:"shortest_edge"`
|
||||
LongestEdge uint32 `json:"longest_edge"`
|
||||
} `json:"size"`
|
||||
|
||||
ImageMean []float32 `json:"image_mean"`
|
||||
ImageStd []float32 `json:"image_std"`
|
||||
} `json:"vision_config"`
|
||||
}
|
||||
|
||||
func (m *qwen3VLModel) parseMore(fsys fs.FS) error {
|
||||
bts, err := fs.ReadFile(fsys, "preprocessor_config.json")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return json.Unmarshal(bts, &m.VisionModel)
|
||||
}
|
||||
|
||||
func (m *qwen3VLModel) KV(t *Tokenizer) KV {
|
||||
kv := m.qwen3Model.KV(t)
|
||||
|
||||
arch := "qwen3vl"
|
||||
if m.NumExperts > 0 {
|
||||
arch += "moe"
|
||||
}
|
||||
// override architecture
|
||||
kv["general.architecture"] = arch
|
||||
|
||||
kv["vision.block_count"] = cmp.Or(m.VisionModel.Depth, 32)
|
||||
kv["vision.embedding_length"] = m.VisionModel.HiddenSize
|
||||
kv["vision.attention.head_count"] = cmp.Or(m.VisionModel.NumHeads, 16)
|
||||
kv["vision.num_channels"] = m.VisionModel.InChannels
|
||||
kv["vision.patch_size"] = cmp.Or(m.VisionModel.PatchSize, 14)
|
||||
kv["vision.spatial_merge_size"] = cmp.Or(m.VisionModel.SpatialMergeSize, 2)
|
||||
kv["vision.attention.layer_norm_epsilon"] = cmp.Or(m.VisionModel.RMSNormEps, 1e-6)
|
||||
kv["vision.rope.freq_base"] = cmp.Or(m.VisionModel.RopeTheta, 1e4)
|
||||
kv["vision.temporal_patch_size"] = cmp.Or(m.VisionModel.TemporalPatchSize, 2)
|
||||
kv["vision.deepstack_visual_indexes"] = m.VisionModel.DeepstackVisualIndexes
|
||||
|
||||
kv["vision.shortest_edge"] = m.VisionModel.Size.ShortestEdge
|
||||
kv["vision.longest_edge"] = m.VisionModel.Size.LongestEdge
|
||||
|
||||
kv["vision.image_mean"] = m.VisionModel.ImageMean
|
||||
kv["vision.image_std"] = m.VisionModel.ImageStd
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (m *qwen3VLModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||
var rest []Tensor
|
||||
var out []*ggml.Tensor
|
||||
for _, t := range ts {
|
||||
switch {
|
||||
case strings.Contains(t.Name(), "attn_qkv"):
|
||||
out = append(out, slices.Collect(splitDim(t, 0,
|
||||
split{Replacer: strings.NewReplacer("attn_qkv", "attn_q")},
|
||||
split{Replacer: strings.NewReplacer("attn_qkv", "attn_k")},
|
||||
split{Replacer: strings.NewReplacer("attn_qkv", "attn_v")},
|
||||
))...)
|
||||
case strings.Contains(t.Name(), "patch_embed") && strings.HasSuffix(t.Name(), "weight"):
|
||||
shape := t.Shape()
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: append([]uint64{shape[0] * shape[1]}, shape[2:]...),
|
||||
WriterTo: t,
|
||||
})
|
||||
default:
|
||||
rest = append(rest, t)
|
||||
}
|
||||
}
|
||||
|
||||
return append(m.qwen3Model.Tensors(rest), out...)
|
||||
}
|
||||
|
||||
func (m *qwen3VLModel) Replacements() []string {
|
||||
return append(
|
||||
m.qwen3Model.Replacements(),
|
||||
"model.language_", "",
|
||||
"model.visual", "v",
|
||||
"patch_embed.proj", "patch_embed",
|
||||
"blocks", "blk",
|
||||
"attn.qkv", "attn_qkv",
|
||||
"attn.proj", "attn_out",
|
||||
"deepstack_merger_list", "deepstack_merger",
|
||||
)
|
||||
}
|
||||
471
convert/convert_test.go
Normal file
471
convert/convert_test.go
Normal file
@@ -0,0 +1,471 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"log/slog"
|
||||
"maps"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
fsc "github.com/ollama/ollama/fs"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type tensorData struct {
|
||||
Offsets []int `json:"data_offsets"`
|
||||
Type string `json:"dtype"`
|
||||
Shape []int `json:"shape"`
|
||||
}
|
||||
|
||||
func convertFull(t *testing.T, fsys fs.FS) (*os.File, fsc.Config, ggml.Tensors) {
|
||||
t.Helper()
|
||||
|
||||
f, err := os.CreateTemp(t.TempDir(), "f16")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if err := ConvertModel(fsys, f); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
r, err := os.Open(f.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Cleanup(func() { r.Close() })
|
||||
|
||||
m, err := ggml.Decode(r, -1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
return r, m.KV(), m.Tensors()
|
||||
}
|
||||
|
||||
func generateResultsJSON(t *testing.T, f *os.File, kv fsc.Config, tensors ggml.Tensors) map[string]string {
|
||||
actual := make(map[string]string)
|
||||
for k := range kv.Keys() {
|
||||
v := kv.Value(k)
|
||||
if s, ok := v.(json.Marshaler); !ok {
|
||||
actual[k] = fmt.Sprintf("%v", v)
|
||||
} else {
|
||||
bts, err := json.Marshal(s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
|
||||
}
|
||||
}
|
||||
|
||||
for _, tensor := range tensors.Items() {
|
||||
sha256sum := sha256.New()
|
||||
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
|
||||
if _, err := io.Copy(sha256sum, sr); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
|
||||
}
|
||||
|
||||
return actual
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
var level slog.Level
|
||||
flag.TextVar(&level, "level", slog.LevelInfo, "log level")
|
||||
flag.Parse()
|
||||
slog.SetLogLoggerLevel(level)
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func TestConvertModel(t *testing.T) {
|
||||
cases := []string{
|
||||
"Meta-Llama-3-8B-Instruct",
|
||||
"Meta-Llama-3.1-8B-Instruct",
|
||||
"Mistral-7B-Instruct-v0.2",
|
||||
"Mixtral-8x7B-Instruct-v0.1",
|
||||
"gemma-2b-it",
|
||||
"gemma-2-2b-it",
|
||||
// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
|
||||
"Phi-3-mini-128k-instruct",
|
||||
"all-MiniLM-L6-v2",
|
||||
"gemma-2-9b-it",
|
||||
"Qwen2.5-0.5B-Instruct",
|
||||
"c4ai-command-r-v01",
|
||||
}
|
||||
|
||||
for i := range cases {
|
||||
tt := cases[i]
|
||||
t.Run(tt, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
p := filepath.Join("testdata", tt)
|
||||
if testing.Short() {
|
||||
t.Skip("skipping in short mode")
|
||||
} else if _, err := os.Stat(p); err != nil {
|
||||
t.Skipf("%s not found", p)
|
||||
}
|
||||
|
||||
f, kv, tensors := convertFull(t, os.DirFS(p))
|
||||
actual := generateResultsJSON(t, f, kv, tensors)
|
||||
|
||||
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer expectFile.Close()
|
||||
|
||||
var expect map[string]string
|
||||
if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, k := range slices.Sorted(maps.Keys(expect)) {
|
||||
if v, ok := actual[k]; !ok {
|
||||
t.Errorf("missing %s", k)
|
||||
} else if v != expect[k] {
|
||||
t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertInvalidTensorNames(t *testing.T) {
|
||||
f, err := os.CreateTemp(t.TempDir(), "testmodel")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
tempDir := t.TempDir()
|
||||
|
||||
td := map[string]*tensorData{}
|
||||
offset := 4096
|
||||
|
||||
td["model.layers.0.self_attn.q_proj.weight"] = &tensorData{
|
||||
Offsets: []int{0, offset},
|
||||
Type: "F32",
|
||||
Shape: []int{4096, 4096},
|
||||
}
|
||||
td["blk.0.attn_q.weight"] = &tensorData{
|
||||
Offsets: []int{offset, offset * 2},
|
||||
Type: "F32",
|
||||
Shape: []int{4096, 4096},
|
||||
}
|
||||
generateSafetensorTestData(t, tempDir, td)
|
||||
|
||||
err = ConvertModel(os.DirFS(tempDir), f)
|
||||
if err == nil || !strings.HasPrefix(err.Error(), "duplicate tensor name") {
|
||||
t.Errorf("expected error but didn't get one")
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertInvalidDatatype(t *testing.T) {
|
||||
f, err := os.CreateTemp(t.TempDir(), "testmodel")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
tempDir := t.TempDir()
|
||||
|
||||
td := map[string]*tensorData{}
|
||||
offset := 4096 * 14336
|
||||
|
||||
td["model.layers.0.mlp.down_proj.weight"] = &tensorData{
|
||||
Offsets: []int{0, offset},
|
||||
Type: "I8",
|
||||
Shape: []int{4096, 14336},
|
||||
}
|
||||
td["model.layers.0.mlp.down_proj.weight_format"] = &tensorData{
|
||||
Offsets: []int{offset, offset},
|
||||
Type: "U8",
|
||||
Shape: []int{},
|
||||
}
|
||||
generateSafetensorTestData(t, tempDir, td)
|
||||
|
||||
err = ConvertModel(os.DirFS(tempDir), f)
|
||||
if err == nil || !strings.Contains(err.Error(), "unknown data type") {
|
||||
t.Errorf("expected 'unknown data type' error but got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func generateSafetensorTestData(t *testing.T, tempDir string, tensorData map[string]*tensorData) {
|
||||
data, err := json.Marshal(tensorData)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
|
||||
l := int64(len(data))
|
||||
err = binary.Write(&buf, binary.LittleEndian, l)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, err = buf.Write(data)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fdata, err := os.Create(filepath.Join(tempDir, "model-00001-of-00001.safetensors"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fdata.Close()
|
||||
|
||||
_, err = fdata.Write(buf.Bytes())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
configData := `
|
||||
{
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
f, err := os.Create(filepath.Join(tempDir, "config.json"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(configData)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
tokenizerData := `
|
||||
{
|
||||
}
|
||||
`
|
||||
|
||||
f, err = os.Create(filepath.Join(tempDir, "tokenizer.json"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(tokenizerData)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertAdapter(t *testing.T) {
|
||||
type AdapterCase struct {
|
||||
Name string
|
||||
BaseKV KV
|
||||
Expected map[string]string
|
||||
}
|
||||
|
||||
cases := []AdapterCase{
|
||||
{
|
||||
Name: "discollama",
|
||||
BaseKV: map[string]any{
|
||||
"general.architecture": "llama",
|
||||
"llama.attention.head_count": uint32(32),
|
||||
"llama.attention.head_count_kv": uint32(8),
|
||||
},
|
||||
Expected: map[string]string{
|
||||
"general.architecture": "llama",
|
||||
"general.file_type": "1",
|
||||
"general.parameter_count": "106496",
|
||||
"general.type": "adapter",
|
||||
"general.version": "v0.2",
|
||||
"adapter.lora.alpha": "16",
|
||||
"adapter.type": "lora",
|
||||
"llama.attention.head_count": "32",
|
||||
"llama.attention.head_count_kv": "8",
|
||||
"blk.31.attn_q.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
||||
"blk.31.attn_q.weight.lora_b": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
||||
"blk.31.attn_v.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
||||
"blk.31.attn_v.weight.lora_b": "071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.Name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
f, err := os.CreateTemp(t.TempDir(), "f16")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
tempDir := t.TempDir()
|
||||
generateLoraTestData(t, tempDir)
|
||||
|
||||
if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
r, err := os.Open(f.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
m, err := ggml.Decode(r, -1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
actual := generateResultsJSON(t, r, m.KV(), m.Tensors())
|
||||
if diff := cmp.Diff(c.Expected, actual); diff != "" {
|
||||
t.Errorf("mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func generateLoraTestData(t *testing.T, tempDir string) {
|
||||
offset := 4096 * 8 * 4
|
||||
|
||||
td := map[string]*tensorData{"__metadata__": nil}
|
||||
td["model.layers.31.self_attn.q_proj.lora_a"] = &tensorData{
|
||||
Offsets: []int{0, offset},
|
||||
Type: "F32",
|
||||
Shape: []int{4096, 8},
|
||||
}
|
||||
td["model.layers.31.self_attn.q_proj.lora_b"] = &tensorData{
|
||||
Offsets: []int{offset, offset * 2},
|
||||
Type: "F32",
|
||||
Shape: []int{8, 4096},
|
||||
}
|
||||
td["model.layers.31.self_attn.v_proj.lora_a"] = &tensorData{
|
||||
Offsets: []int{offset * 2, offset * 3},
|
||||
Type: "F32",
|
||||
Shape: []int{4096, 8},
|
||||
}
|
||||
td["model.layers.31.self_attn.v_proj.lora_b"] = &tensorData{
|
||||
Offsets: []int{offset * 3, offset*3 + 8*1024*4},
|
||||
Type: "F32",
|
||||
Shape: []int{8, 1024},
|
||||
}
|
||||
|
||||
data, err := json.Marshal(td)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
|
||||
l := int64(len(data))
|
||||
err = binary.Write(&buf, binary.LittleEndian, l)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, err = buf.Write(data)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// write some data for the tensors
|
||||
|
||||
ones := make([]float32, 4096*8)
|
||||
for i := range ones {
|
||||
ones[i] = float32(1)
|
||||
}
|
||||
|
||||
for range 3 {
|
||||
err = binary.Write(&buf, binary.LittleEndian, ones)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
ones = make([]float32, 1024*8)
|
||||
for i := range ones {
|
||||
ones[i] = float32(1)
|
||||
}
|
||||
|
||||
err = binary.Write(&buf, binary.LittleEndian, ones)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fdata, err := os.Create(filepath.Join(tempDir, "adapters.safetensors"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fdata.Close()
|
||||
|
||||
_, err = fdata.Write(buf.Bytes())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
configData := `
|
||||
{
|
||||
"adapter_path": "adapters-test",
|
||||
"batch_size": 8,
|
||||
"config": "config-tiny.json",
|
||||
"data": "../discollama-completion",
|
||||
"grad_checkpoint": null,
|
||||
"iters": 1000,
|
||||
"learning_rate": 1e-05,
|
||||
"lora_layers": 1,
|
||||
"lora_parameters": {
|
||||
"rank": 8,
|
||||
"alpha": 16,
|
||||
"dropout": 0.0,
|
||||
"scale": 2.0
|
||||
},
|
||||
"lr_schedule": null,
|
||||
"max_seq_length": 2048,
|
||||
"model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
|
||||
"resume_adapter_file": null,
|
||||
"save_every": 100,
|
||||
"seed": 0,
|
||||
"steps_per_eval": 200,
|
||||
"steps_per_report": 10,
|
||||
"test": false,
|
||||
"test_batches": 500,
|
||||
"train": true,
|
||||
"use_dora": false,
|
||||
"val_batches": 25
|
||||
}
|
||||
`
|
||||
f, err := os.Create(filepath.Join(tempDir, "adapter_config.json"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(configData)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
97
convert/json_compat.go
Normal file
97
convert/json_compat.go
Normal file
@@ -0,0 +1,97 @@
|
||||
package convert
|
||||
|
||||
// sanitizeNonFiniteJSON rewrites non-standard JSON numeric tokens that some
|
||||
// HF configs emit (Infinity, -Infinity, NaN) into standard JSON numbers.
|
||||
//
|
||||
// This is intentionally conservative:
|
||||
// - only runs outside quoted strings
|
||||
// - only rewrites full tokens
|
||||
//
|
||||
// We map these values to 0 because encoding/json rejects non-finite values,
|
||||
// and these fields are typically model-side metadata not consumed by the
|
||||
// converter.
|
||||
func sanitizeNonFiniteJSON(in []byte) []byte {
|
||||
if len(in) == 0 {
|
||||
return in
|
||||
}
|
||||
|
||||
out := make([]byte, 0, len(in))
|
||||
inString := false
|
||||
escape := false
|
||||
|
||||
for i := 0; i < len(in); {
|
||||
c := in[i]
|
||||
|
||||
if inString {
|
||||
out = append(out, c)
|
||||
if escape {
|
||||
escape = false
|
||||
} else if c == '\\' {
|
||||
escape = true
|
||||
} else if c == '"' {
|
||||
inString = false
|
||||
}
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if c == '"' {
|
||||
inString = true
|
||||
out = append(out, c)
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if hasToken(in, i, "-Infinity") {
|
||||
out = append(out, '0')
|
||||
i += len("-Infinity")
|
||||
continue
|
||||
}
|
||||
|
||||
if hasToken(in, i, "Infinity") {
|
||||
out = append(out, '0')
|
||||
i += len("Infinity")
|
||||
continue
|
||||
}
|
||||
|
||||
if hasToken(in, i, "NaN") {
|
||||
out = append(out, '0')
|
||||
i += len("NaN")
|
||||
continue
|
||||
}
|
||||
|
||||
out = append(out, c)
|
||||
i++
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func hasToken(in []byte, at int, tok string) bool {
|
||||
end := at + len(tok)
|
||||
if at < 0 || end > len(in) {
|
||||
return false
|
||||
}
|
||||
if string(in[at:end]) != tok {
|
||||
return false
|
||||
}
|
||||
if at > 0 && !isJSONValuePrefixBoundary(in[at-1]) {
|
||||
return false
|
||||
}
|
||||
if end < len(in) && !isJSONValueSuffixBoundary(in[end]) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func isJSONWhitespace(b byte) bool {
|
||||
return b == ' ' || b == '\t' || b == '\n' || b == '\r'
|
||||
}
|
||||
|
||||
func isJSONValuePrefixBoundary(b byte) bool {
|
||||
return isJSONWhitespace(b) || b == ':' || b == ',' || b == '['
|
||||
}
|
||||
|
||||
func isJSONValueSuffixBoundary(b byte) bool {
|
||||
return isJSONWhitespace(b) || b == ',' || b == ']' || b == '}'
|
||||
}
|
||||
46
convert/json_compat_test.go
Normal file
46
convert/json_compat_test.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package convert
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestSanitizeNonFiniteJSON(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "infinity token",
|
||||
in: `{"a":[0,Infinity,1]}`,
|
||||
want: `{"a":[0,0,1]}`,
|
||||
},
|
||||
{
|
||||
name: "negative infinity token",
|
||||
in: `{"a":-Infinity}`,
|
||||
want: `{"a":0}`,
|
||||
},
|
||||
{
|
||||
name: "nan token",
|
||||
in: `{"a":NaN}`,
|
||||
want: `{"a":0}`,
|
||||
},
|
||||
{
|
||||
name: "tokens inside strings untouched",
|
||||
in: `{"a":"Infinity -Infinity NaN","b":Infinity}`,
|
||||
want: `{"a":"Infinity -Infinity NaN","b":0}`,
|
||||
},
|
||||
{
|
||||
name: "identifier-like token untouched",
|
||||
in: `{"a":InfinityValue}`,
|
||||
want: `{"a":InfinityValue}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := string(sanitizeNonFiniteJSON([]byte(tt.in)))
|
||||
if got != tt.want {
|
||||
t.Fatalf("sanitizeNonFiniteJSON() = %q, want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
101
convert/reader.go
Normal file
101
convert/reader.go
Normal file
@@ -0,0 +1,101 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"io/fs"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Tensor interface {
|
||||
Name() string
|
||||
Shape() []uint64
|
||||
Kind() uint32
|
||||
SetRepacker(Repacker)
|
||||
WriteTo(io.Writer) (int64, error)
|
||||
Clone() Tensor
|
||||
}
|
||||
|
||||
type tensorBase struct {
|
||||
name string
|
||||
shape []uint64
|
||||
repacker Repacker
|
||||
}
|
||||
|
||||
func (t tensorBase) Name() string {
|
||||
return t.name
|
||||
}
|
||||
|
||||
func (t tensorBase) Shape() []uint64 {
|
||||
return t.shape
|
||||
}
|
||||
|
||||
const (
|
||||
tensorKindFP32 uint32 = iota
|
||||
tensorKindFP16
|
||||
tensorKindBF16 = 30
|
||||
tensorKindMXFP4 = 39
|
||||
)
|
||||
|
||||
func (t tensorBase) Kind() uint32 {
|
||||
if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
|
||||
strings.HasSuffix(t.name, ".bias") ||
|
||||
strings.HasSuffix(t.name, ".shortconv.conv.weight") ||
|
||||
strings.HasSuffix(t.name, ".ssm_conv1d.weight") || // SSM conv kernel must be F32 for Metal
|
||||
strings.HasPrefix(t.name, "a.feature_extractor.") || // audio feature-extractor constants are read with BackendGet and must be real F32 values
|
||||
strings.HasPrefix(t.name, "a.conv1d.") || // audio SSCP conv weights are kept F32 for im2col; this likely slows audio and should be revisited
|
||||
strings.HasPrefix(t.name, "a.subsampling.") || // audio Parakeet subsampling weights are kept F32 for conv/linear stability; this likely slows audio and should be revisited
|
||||
strings.Contains(t.name, ".conv_dw.") || // audio depthwise conv weights are kept F32; this likely slows audio and should be revisited
|
||||
t.name == "token_types.weight" ||
|
||||
t.name == "v.positional_embedding_vlm" ||
|
||||
t.name == "v.position_embd.weight" ||
|
||||
t.name == "v.tile_position_embd.weight" ||
|
||||
t.name == "v.pre_tile_position_embd.weight" ||
|
||||
t.name == "v.post_tile_position_embd.weight" ||
|
||||
t.name == "s.position_embd" ||
|
||||
strings.HasSuffix(t.name, "rel_pos_h") ||
|
||||
strings.HasSuffix(t.name, "rel_pos_w") {
|
||||
// these tensors are always F32
|
||||
return tensorKindFP32
|
||||
}
|
||||
|
||||
switch len(t.shape) {
|
||||
case 0:
|
||||
panic("invalid tensor shape")
|
||||
case 1:
|
||||
return tensorKindFP32
|
||||
default:
|
||||
return tensorKindFP16
|
||||
}
|
||||
}
|
||||
|
||||
func (t *tensorBase) SetRepacker(fn Repacker) {
|
||||
t.repacker = fn
|
||||
}
|
||||
|
||||
type Repacker func(string, []float32, []uint64) ([]float32, error)
|
||||
|
||||
func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
|
||||
patterns := []struct {
|
||||
Pattern string
|
||||
Func func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error)
|
||||
}{
|
||||
{"*.safetensors", parseSafetensors},
|
||||
{"pytorch_model-*-of-*.bin", parseTorch},
|
||||
{"pytorch_model.bin", parseTorch},
|
||||
{"consolidated.*.pth", parseTorch},
|
||||
}
|
||||
|
||||
for _, pattern := range patterns {
|
||||
matches, err := fs.Glob(fsys, pattern.Pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(matches) > 0 {
|
||||
return pattern.Func(fsys, replacer, matches...)
|
||||
}
|
||||
}
|
||||
|
||||
return nil, errors.New("unknown tensor format")
|
||||
}
|
||||
613
convert/reader_safetensors.go
Normal file
613
convert/reader_safetensors.go
Normal file
@@ -0,0 +1,613 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"maps"
|
||||
"math"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/d4l3k/go-bfloat16"
|
||||
"github.com/x448/float16"
|
||||
)
|
||||
|
||||
type safetensorMetadata struct {
|
||||
Type string `json:"dtype"`
|
||||
Shape []uint64 `json:"shape"`
|
||||
Offsets []int64 `json:"data_offsets"`
|
||||
}
|
||||
|
||||
func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
|
||||
fp8Block, err := safetensorsFP8BlockSize(fsys)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ts []Tensor
|
||||
for _, p := range ps {
|
||||
f, err := fsys.Open(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var n int64
|
||||
if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
b := bytes.NewBuffer(make([]byte, 0, n))
|
||||
if _, err = io.CopyN(b, f, n); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var headers map[string]safetensorMetadata
|
||||
if err := json.NewDecoder(b).Decode(&headers); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
keys := slices.Sorted(maps.Keys(headers))
|
||||
|
||||
names := make(map[string]struct{}, len(keys))
|
||||
|
||||
fp8Scales, err := collectSafetensorsFP8Scales(n, headers)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, key := range keys {
|
||||
if value := headers[key]; value.Type != "" {
|
||||
if _, ok := fp8Scales.consumed[key]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// Scalar tensors (e.g. clipped linear min/max) are 0-dim in safetensors.
|
||||
// Promote them to 1-dim so they can be stored in GGUF.
|
||||
if len(value.Shape) == 0 {
|
||||
value.Shape = []uint64{1}
|
||||
}
|
||||
|
||||
var scale *safetensorScale
|
||||
if value.Type == "F8_E4M3" {
|
||||
if !fp8Block.ok {
|
||||
return nil, fmt.Errorf("missing fp8 block size metadata for tensor %q", key)
|
||||
}
|
||||
scale = fp8Scales.byWeight[key]
|
||||
if scale == nil {
|
||||
return nil, fmt.Errorf("missing fp8 scale companion for tensor %q", key)
|
||||
}
|
||||
}
|
||||
|
||||
ggufName := replacer.Replace(key)
|
||||
if _, ok := names[ggufName]; ok {
|
||||
return nil, fmt.Errorf("duplicate tensor name '%s' was found for this model", ggufName)
|
||||
}
|
||||
names[ggufName] = struct{}{}
|
||||
ts = append(ts, safetensor{
|
||||
fs: fsys,
|
||||
path: p,
|
||||
dtype: value.Type,
|
||||
offset: safetensorsPad(n, value.Offsets[0]),
|
||||
size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
|
||||
scale: scale,
|
||||
fp8Block: fp8Block,
|
||||
tensorBase: &tensorBase{
|
||||
name: ggufName,
|
||||
shape: value.Shape,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ts, nil
|
||||
}
|
||||
|
||||
// safetensorsPad returns the padded size of the safetensors file given a length n and offset s
|
||||
func safetensorsPad(n, offset int64) int64 {
|
||||
return 8 + n + offset
|
||||
}
|
||||
|
||||
type safetensorScale struct {
|
||||
name string
|
||||
dtype string
|
||||
shape []uint64
|
||||
offset int64
|
||||
size int64
|
||||
}
|
||||
|
||||
type safetensor struct {
|
||||
fs fs.FS
|
||||
path string
|
||||
dtype string
|
||||
offset int64
|
||||
size int64
|
||||
scale *safetensorScale
|
||||
fp8Block safetensorFP8BlockSize
|
||||
*tensorBase
|
||||
}
|
||||
|
||||
func (st safetensor) Kind() uint32 {
|
||||
kind := st.tensorBase.Kind()
|
||||
if st.dtype == "BF16" &&
|
||||
!strings.HasPrefix(st.name, "v.") &&
|
||||
!strings.HasPrefix(st.name, "s.") &&
|
||||
!strings.HasPrefix(st.name, "mm.") &&
|
||||
!strings.Contains(st.name, "ffn_gate_inp_shexp.weight") &&
|
||||
kind != tensorKindFP32 {
|
||||
kind = tensorKindBF16
|
||||
}
|
||||
if st.dtype == "F8_E4M3" && kind != tensorKindFP32 {
|
||||
kind = tensorKindBF16
|
||||
}
|
||||
|
||||
return kind
|
||||
}
|
||||
|
||||
func (st safetensor) SourceDType() string {
|
||||
return st.dtype
|
||||
}
|
||||
|
||||
func (st safetensor) Clone() Tensor {
|
||||
return &safetensor{
|
||||
fs: st.fs,
|
||||
path: st.path,
|
||||
dtype: st.dtype,
|
||||
offset: st.offset,
|
||||
size: st.size,
|
||||
scale: st.scale.Clone(),
|
||||
fp8Block: st.fp8Block,
|
||||
tensorBase: &tensorBase{
|
||||
name: st.name,
|
||||
repacker: st.repacker,
|
||||
shape: slices.Clone(st.shape),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (ss *safetensorScale) Clone() *safetensorScale {
|
||||
if ss == nil {
|
||||
return nil
|
||||
}
|
||||
return &safetensorScale{
|
||||
name: ss.name,
|
||||
dtype: ss.dtype,
|
||||
shape: slices.Clone(ss.shape),
|
||||
offset: ss.offset,
|
||||
size: ss.size,
|
||||
}
|
||||
}
|
||||
|
||||
func (st safetensor) WriteTo(w io.Writer) (int64, error) {
|
||||
f, err := st.fs.Open(st.path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
r, err := func() (io.Reader, error) {
|
||||
if readerAt, ok := f.(io.ReaderAt); ok {
|
||||
return io.NewSectionReader(readerAt, st.offset, st.size), nil
|
||||
} else if seeker, ok := f.(io.Seeker); ok {
|
||||
_, err := seeker.Seek(st.offset, io.SeekStart)
|
||||
return f, err
|
||||
} else {
|
||||
_, err := io.CopyN(io.Discard, f, st.offset)
|
||||
return f, err
|
||||
}
|
||||
}()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
br := bufio.NewReaderSize(r, min(32<<10, int(st.size)))
|
||||
// special case when input and output are same type and the
|
||||
// tensor doesn't need repacking
|
||||
if (st.repacker == nil) &&
|
||||
((st.dtype == "F32" && st.Kind() == tensorKindFP32) ||
|
||||
(st.dtype == "F16" && st.Kind() == tensorKindFP16) ||
|
||||
(st.dtype == "U8")) {
|
||||
return io.CopyN(w, br, st.size)
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
switch st.dtype {
|
||||
case "F32":
|
||||
f32s = make([]float32, st.size/4)
|
||||
if err = binary.Read(br, binary.LittleEndian, f32s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
case "F16":
|
||||
u16s := make([]uint16, st.size/2)
|
||||
if err = binary.Read(br, binary.LittleEndian, u16s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
f32s = make([]float32, len(u16s))
|
||||
for i := range u16s {
|
||||
f32s[i] = float16.Frombits(u16s[i]).Float32()
|
||||
}
|
||||
|
||||
case "BF16":
|
||||
u8s := make([]uint8, st.size)
|
||||
if err = binary.Read(br, binary.LittleEndian, u8s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
f32s = bfloat16.DecodeFloat32(u8s)
|
||||
case "F8_E4M3":
|
||||
u8s := make([]uint8, st.size)
|
||||
if err = binary.Read(br, binary.LittleEndian, u8s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
f32s, err = st.decodeFP8E4M3(u8s)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown data type: %s", st.dtype)
|
||||
}
|
||||
|
||||
if st.repacker != nil {
|
||||
f32s, err = st.repacker(st.Name(), f32s, st.Shape())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
switch st.Kind() {
|
||||
case tensorKindFP32:
|
||||
return int64(len(f32s) * 4), binary.Write(w, binary.LittleEndian, f32s)
|
||||
case tensorKindFP16:
|
||||
f16s := make([]uint16, len(f32s))
|
||||
for i := range f32s {
|
||||
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
|
||||
}
|
||||
|
||||
return int64(len(f16s) * 2), binary.Write(w, binary.LittleEndian, f16s)
|
||||
case tensorKindBF16:
|
||||
u8s := bfloat16.EncodeFloat32(f32s)
|
||||
return int64(len(u8s)), binary.Write(w, binary.LittleEndian, u8s)
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown storage type: %d", st.Kind())
|
||||
}
|
||||
}
|
||||
|
||||
type safetensorsFP8Scales struct {
|
||||
byWeight map[string]*safetensorScale
|
||||
consumed map[string]struct{}
|
||||
}
|
||||
|
||||
func collectSafetensorsFP8Scales(n int64, headers map[string]safetensorMetadata) (safetensorsFP8Scales, error) {
|
||||
scales := safetensorsFP8Scales{
|
||||
byWeight: make(map[string]*safetensorScale),
|
||||
consumed: make(map[string]struct{}),
|
||||
}
|
||||
|
||||
for key, value := range headers {
|
||||
if value.Type != "F8_E4M3" {
|
||||
continue
|
||||
}
|
||||
|
||||
scaleKey, scaleValue, ok, err := safetensorsFP8Scale(key, headers)
|
||||
if err != nil {
|
||||
return safetensorsFP8Scales{}, err
|
||||
}
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, ok := scales.consumed[scaleKey]; ok {
|
||||
return safetensorsFP8Scales{}, fmt.Errorf("fp8 scale companion %q is used by multiple tensors", scaleKey)
|
||||
}
|
||||
|
||||
scales.byWeight[key] = &safetensorScale{
|
||||
name: scaleKey,
|
||||
dtype: scaleValue.Type,
|
||||
shape: slices.Clone(scaleValue.Shape),
|
||||
offset: safetensorsPad(n, scaleValue.Offsets[0]),
|
||||
size: safetensorsPad(n, scaleValue.Offsets[1]) - safetensorsPad(n, scaleValue.Offsets[0]),
|
||||
}
|
||||
scales.consumed[scaleKey] = struct{}{}
|
||||
}
|
||||
|
||||
return scales, nil
|
||||
}
|
||||
|
||||
func safetensorsFP8Scale(key string, headers map[string]safetensorMetadata) (string, safetensorMetadata, bool, error) {
|
||||
candidates := safetensorsFP8ScaleCandidates(key)
|
||||
|
||||
var scaleKey string
|
||||
var scaleValue safetensorMetadata
|
||||
if strings.HasSuffix(key, ".weight") {
|
||||
// Keep support for compressed-tensors exports that place the scale name
|
||||
// between the module path and weight suffix.
|
||||
base := strings.TrimSuffix(key, ".weight")
|
||||
candidates = appendUnique(candidates, base+".weight_scale")
|
||||
candidates = appendUnique(candidates, base+".weight_scale_inv")
|
||||
}
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if value, ok := headers[candidate]; ok && value.Type != "" {
|
||||
if scaleKey != "" {
|
||||
return "", safetensorMetadata{}, false, fmt.Errorf("multiple fp8 scale companions for tensor %q: %q and %q", key, scaleKey, candidate)
|
||||
}
|
||||
scaleKey = candidate
|
||||
scaleValue = value
|
||||
}
|
||||
}
|
||||
if scaleKey == "" {
|
||||
return "", safetensorMetadata{}, false, nil
|
||||
}
|
||||
|
||||
return scaleKey, scaleValue, true, nil
|
||||
}
|
||||
|
||||
func safetensorsFP8ScaleCandidates(key string) []string {
|
||||
var candidates []string
|
||||
candidates = appendUnique(candidates, key+"_scale")
|
||||
candidates = appendUnique(candidates, key+"_scale_inv")
|
||||
candidates = appendUnique(candidates, key+".scale")
|
||||
candidates = appendUnique(candidates, key+".scale_inv")
|
||||
return candidates
|
||||
}
|
||||
|
||||
func appendUnique(values []string, value string) []string {
|
||||
if !slices.Contains(values, value) {
|
||||
values = append(values, value)
|
||||
}
|
||||
return values
|
||||
}
|
||||
|
||||
type safetensorFP8BlockSize struct {
|
||||
rows int
|
||||
cols int
|
||||
ok bool
|
||||
}
|
||||
|
||||
type safetensorsSourceQuantization struct {
|
||||
QuantMethod string `json:"quant_method"`
|
||||
Format string `json:"format"`
|
||||
WeightBlockSize []int `json:"weight_block_size"`
|
||||
ConfigGroups map[string]struct {
|
||||
Format string `json:"format"`
|
||||
Weights struct {
|
||||
BlockStructure []int `json:"block_structure"`
|
||||
NumBits int `json:"num_bits"`
|
||||
Type string `json:"type"`
|
||||
} `json:"weights"`
|
||||
} `json:"config_groups"`
|
||||
}
|
||||
|
||||
type safetensorsModelConfig struct {
|
||||
Quantization safetensorsSourceQuantization `json:"quantization"`
|
||||
QuantizationConfig safetensorsSourceQuantization `json:"quantization_config"`
|
||||
CompressionConfig safetensorsSourceQuantization `json:"compression_config"`
|
||||
TextConfig struct {
|
||||
Quantization safetensorsSourceQuantization `json:"quantization"`
|
||||
QuantizationConfig safetensorsSourceQuantization `json:"quantization_config"`
|
||||
CompressionConfig safetensorsSourceQuantization `json:"compression_config"`
|
||||
} `json:"text_config"`
|
||||
}
|
||||
|
||||
func safetensorsFP8BlockSize(fsys fs.FS) (safetensorFP8BlockSize, error) {
|
||||
bts, err := fs.ReadFile(fsys, "config.json")
|
||||
if errors.Is(err, fs.ErrNotExist) {
|
||||
return safetensorFP8BlockSize{}, nil
|
||||
}
|
||||
if err != nil {
|
||||
return safetensorFP8BlockSize{}, err
|
||||
}
|
||||
bts = sanitizeNonFiniteJSON(bts)
|
||||
|
||||
var cfg safetensorsModelConfig
|
||||
if err := json.Unmarshal(bts, &cfg); err != nil {
|
||||
return safetensorFP8BlockSize{}, fmt.Errorf("parse config.json fp8 metadata: %w", err)
|
||||
}
|
||||
|
||||
var blocks []safetensorFP8BlockSize
|
||||
for _, q := range []safetensorsSourceQuantization{
|
||||
cfg.Quantization,
|
||||
cfg.QuantizationConfig,
|
||||
cfg.CompressionConfig,
|
||||
cfg.TextConfig.Quantization,
|
||||
cfg.TextConfig.QuantizationConfig,
|
||||
cfg.TextConfig.CompressionConfig,
|
||||
} {
|
||||
if strings.EqualFold(q.QuantMethod, "fp8") && len(q.WeightBlockSize) == 2 {
|
||||
block, err := newSafetensorFP8BlockSize(q.WeightBlockSize[0], q.WeightBlockSize[1])
|
||||
if err != nil {
|
||||
return safetensorFP8BlockSize{}, err
|
||||
}
|
||||
blocks = append(blocks, block)
|
||||
}
|
||||
|
||||
if !strings.EqualFold(q.QuantMethod, "compressed-tensors") && !strings.EqualFold(q.Format, "float-quantized") {
|
||||
continue
|
||||
}
|
||||
for _, group := range q.ConfigGroups {
|
||||
if !strings.EqualFold(group.Format, "float-quantized") ||
|
||||
group.Weights.NumBits != 8 ||
|
||||
!strings.EqualFold(group.Weights.Type, "float") ||
|
||||
len(group.Weights.BlockStructure) != 2 {
|
||||
continue
|
||||
}
|
||||
block, err := newSafetensorFP8BlockSize(group.Weights.BlockStructure[0], group.Weights.BlockStructure[1])
|
||||
if err != nil {
|
||||
return safetensorFP8BlockSize{}, err
|
||||
}
|
||||
blocks = append(blocks, block)
|
||||
}
|
||||
}
|
||||
|
||||
if len(blocks) == 0 {
|
||||
return safetensorFP8BlockSize{}, nil
|
||||
}
|
||||
|
||||
block := blocks[0]
|
||||
for _, other := range blocks[1:] {
|
||||
if other.rows != block.rows || other.cols != block.cols {
|
||||
return safetensorFP8BlockSize{}, fmt.Errorf("multiple fp8 block sizes in config.json: %dx%d and %dx%d", block.rows, block.cols, other.rows, other.cols)
|
||||
}
|
||||
}
|
||||
return block, nil
|
||||
}
|
||||
|
||||
func newSafetensorFP8BlockSize(rows, cols int) (safetensorFP8BlockSize, error) {
|
||||
if rows <= 0 || cols <= 0 {
|
||||
return safetensorFP8BlockSize{}, fmt.Errorf("invalid fp8 block size %dx%d", rows, cols)
|
||||
}
|
||||
return safetensorFP8BlockSize{rows: rows, cols: cols, ok: true}, nil
|
||||
}
|
||||
|
||||
func (st safetensor) decodeFP8E4M3(data []byte) ([]float32, error) {
|
||||
if st.scale == nil {
|
||||
return nil, fmt.Errorf("missing fp8 scale companion for tensor %q", st.name)
|
||||
}
|
||||
if !st.fp8Block.ok {
|
||||
return nil, fmt.Errorf("missing fp8 block size metadata for tensor %q", st.name)
|
||||
}
|
||||
if len(st.shape) != 2 {
|
||||
return nil, fmt.Errorf("expected 2D fp8 tensor %q, got shape %v", st.name, st.shape)
|
||||
}
|
||||
|
||||
rows, cols := int(st.shape[0]), int(st.shape[1])
|
||||
if rows < 0 || cols < 0 || rows*cols != len(data) {
|
||||
return nil, fmt.Errorf("fp8 tensor %q shape %v does not match %d bytes", st.name, st.shape, len(data))
|
||||
}
|
||||
|
||||
scale, err := st.readScale()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(st.scale.shape) != 2 {
|
||||
return nil, fmt.Errorf("expected 2D fp8 scale tensor %q, got shape %v", st.scale.name, st.scale.shape)
|
||||
}
|
||||
|
||||
blockRows := st.fp8Block.rows
|
||||
blockCols := st.fp8Block.cols
|
||||
scaleRows, scaleCols := int(st.scale.shape[0]), int(st.scale.shape[1])
|
||||
expectedRows := (rows + blockRows - 1) / blockRows
|
||||
expectedCols := (cols + blockCols - 1) / blockCols
|
||||
if scaleRows != expectedRows || scaleCols != expectedCols {
|
||||
return nil, fmt.Errorf("unexpected fp8 scale shape %v for tensor %q shape %v; want [%d %d]", st.scale.shape, st.name, st.shape, expectedRows, expectedCols)
|
||||
}
|
||||
if len(scale) != scaleRows*scaleCols {
|
||||
return nil, fmt.Errorf("fp8 scale tensor %q shape %v does not match decoded length %d", st.scale.name, st.scale.shape, len(scale))
|
||||
}
|
||||
|
||||
f32s := make([]float32, len(data))
|
||||
for r := range rows {
|
||||
scaleRow := r / blockRows
|
||||
rowOffset := r * cols
|
||||
for c := range cols {
|
||||
f32s[rowOffset+c] = decodeFloat8E4M3FN(data[rowOffset+c]) * scale[scaleRow*scaleCols+c/blockCols]
|
||||
}
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
|
||||
func (st safetensor) readScale() ([]float32, error) {
|
||||
r, err := st.sectionReader(st.scale.offset, st.scale.size)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read fp8 scale tensor %q: %w", st.scale.name, err)
|
||||
}
|
||||
if closer, ok := r.(io.Closer); ok {
|
||||
defer closer.Close()
|
||||
}
|
||||
br := bufio.NewReaderSize(r, min(32<<10, int(st.scale.size)))
|
||||
|
||||
switch st.scale.dtype {
|
||||
case "F32":
|
||||
f32s := make([]float32, st.scale.size/4)
|
||||
if err := binary.Read(br, binary.LittleEndian, f32s); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return f32s, nil
|
||||
case "F16":
|
||||
u16s := make([]uint16, st.scale.size/2)
|
||||
if err := binary.Read(br, binary.LittleEndian, u16s); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f32s := make([]float32, len(u16s))
|
||||
for i := range u16s {
|
||||
f32s[i] = float16.Frombits(u16s[i]).Float32()
|
||||
}
|
||||
return f32s, nil
|
||||
case "BF16":
|
||||
u8s := make([]uint8, st.scale.size)
|
||||
if err := binary.Read(br, binary.LittleEndian, u8s); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return bfloat16.DecodeFloat32(u8s), nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported fp8 scale dtype %q for tensor %q", st.scale.dtype, st.scale.name)
|
||||
}
|
||||
}
|
||||
|
||||
func (st safetensor) sectionReader(offset, size int64) (io.Reader, error) {
|
||||
f, err := st.fs.Open(st.path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if readerAt, ok := f.(io.ReaderAt); ok {
|
||||
return &readCloserReader{
|
||||
Reader: io.NewSectionReader(readerAt, offset, size),
|
||||
Closer: f,
|
||||
}, nil
|
||||
}
|
||||
if seeker, ok := f.(io.Seeker); ok {
|
||||
if _, err := seeker.Seek(offset, io.SeekStart); err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &readCloserReader{
|
||||
Reader: io.LimitReader(f, size),
|
||||
Closer: f,
|
||||
}, nil
|
||||
}
|
||||
if _, err := io.CopyN(io.Discard, f, offset); err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &readCloserReader{
|
||||
Reader: io.LimitReader(f, size),
|
||||
Closer: f,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type readCloserReader struct {
|
||||
io.Reader
|
||||
io.Closer
|
||||
}
|
||||
|
||||
func decodeFloat8E4M3FN(v byte) float32 {
|
||||
sign := float32(1)
|
||||
if v&0x80 != 0 {
|
||||
sign = -1
|
||||
}
|
||||
|
||||
exp := int((v >> 3) & 0x0f)
|
||||
mant := int(v & 0x07)
|
||||
if exp == 0 {
|
||||
if mant == 0 {
|
||||
return 0 * sign
|
||||
}
|
||||
return sign * float32(math.Ldexp(float64(mant)/8, -6))
|
||||
}
|
||||
if exp == 0x0f && mant == 0x07 {
|
||||
return float32(math.NaN())
|
||||
}
|
||||
|
||||
return sign * float32(math.Ldexp(1+float64(mant)/8, exp-7))
|
||||
}
|
||||
523
convert/reader_test.go
Normal file
523
convert/reader_test.go
Normal file
@@ -0,0 +1,523 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/d4l3k/go-bfloat16"
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/x448/float16"
|
||||
)
|
||||
|
||||
func TestSafetensors(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
root, err := os.OpenRoot(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer root.Close()
|
||||
|
||||
cases := []struct {
|
||||
name,
|
||||
dtype string
|
||||
offset,
|
||||
size int64
|
||||
shape []uint64
|
||||
setup func(*testing.T, *os.File)
|
||||
want []byte
|
||||
}{
|
||||
{
|
||||
name: "fp32-fp32",
|
||||
dtype: "F32",
|
||||
size: 32 * 4, // 32 floats, each 4 bytes
|
||||
shape: []uint64{32},
|
||||
setup: func(t *testing.T, f *os.File) {
|
||||
f32s := make([]float32, 32)
|
||||
for i := range f32s {
|
||||
f32s[i] = float32(i)
|
||||
}
|
||||
|
||||
if err := binary.Write(f, binary.LittleEndian, f32s); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
},
|
||||
want: []byte{
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x40,
|
||||
0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0x40,
|
||||
0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x10, 0x41, 0x00, 0x00, 0x20, 0x41, 0x00, 0x00, 0x30, 0x41,
|
||||
0x00, 0x00, 0x40, 0x41, 0x00, 0x00, 0x50, 0x41, 0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41,
|
||||
0x00, 0x00, 0x80, 0x41, 0x00, 0x00, 0x88, 0x41, 0x00, 0x00, 0x90, 0x41, 0x00, 0x00, 0x98, 0x41,
|
||||
0x00, 0x00, 0xa0, 0x41, 0x00, 0x00, 0xa8, 0x41, 0x00, 0x00, 0xb0, 0x41, 0x00, 0x00, 0xb8, 0x41,
|
||||
0x00, 0x00, 0xc0, 0x41, 0x00, 0x00, 0xc8, 0x41, 0x00, 0x00, 0xd0, 0x41, 0x00, 0x00, 0xd8, 0x41,
|
||||
0x00, 0x00, 0xe0, 0x41, 0x00, 0x00, 0xe8, 0x41, 0x00, 0x00, 0xf0, 0x41, 0x00, 0x00, 0xf8, 0x41,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "fp32-fp16",
|
||||
dtype: "F32",
|
||||
size: 32 * 4, // 32 floats, each 4 bytes
|
||||
shape: []uint64{16, 2},
|
||||
setup: func(t *testing.T, f *os.File) {
|
||||
f32s := make([]float32, 32)
|
||||
for i := range f32s {
|
||||
f32s[i] = float32(i)
|
||||
}
|
||||
|
||||
if err := binary.Write(f, binary.LittleEndian, f32s); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
},
|
||||
want: []byte{
|
||||
0x00, 0x00, 0x00, 0x3c, 0x00, 0x40, 0x00, 0x42, 0x00, 0x44, 0x00, 0x45, 0x00, 0x46, 0x00, 0x47,
|
||||
0x00, 0x48, 0x80, 0x48, 0x00, 0x49, 0x80, 0x49, 0x00, 0x4a, 0x80, 0x4a, 0x00, 0x4b, 0x80, 0x4b,
|
||||
0x00, 0x4c, 0x40, 0x4c, 0x80, 0x4c, 0xc0, 0x4c, 0x00, 0x4d, 0x40, 0x4d, 0x80, 0x4d, 0xc0, 0x4d,
|
||||
0x00, 0x4e, 0x40, 0x4e, 0x80, 0x4e, 0xc0, 0x4e, 0x00, 0x4f, 0x40, 0x4f, 0x80, 0x4f, 0xc0, 0x4f,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "fp16-fp16",
|
||||
dtype: "F16",
|
||||
size: 32 * 2, // 32 floats, each 2 bytes
|
||||
shape: []uint64{16, 2},
|
||||
setup: func(t *testing.T, f *os.File) {
|
||||
u16s := make([]uint16, 32)
|
||||
for i := range u16s {
|
||||
u16s[i] = float16.Fromfloat32(float32(i)).Bits()
|
||||
}
|
||||
|
||||
if err := binary.Write(f, binary.LittleEndian, u16s); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
},
|
||||
want: []byte{
|
||||
0x00, 0x00, 0x00, 0x3c, 0x00, 0x40, 0x00, 0x42, 0x00, 0x44, 0x00, 0x45, 0x00, 0x46, 0x00, 0x47,
|
||||
0x00, 0x48, 0x80, 0x48, 0x00, 0x49, 0x80, 0x49, 0x00, 0x4a, 0x80, 0x4a, 0x00, 0x4b, 0x80, 0x4b,
|
||||
0x00, 0x4c, 0x40, 0x4c, 0x80, 0x4c, 0xc0, 0x4c, 0x00, 0x4d, 0x40, 0x4d, 0x80, 0x4d, 0xc0, 0x4d,
|
||||
0x00, 0x4e, 0x40, 0x4e, 0x80, 0x4e, 0xc0, 0x4e, 0x00, 0x4f, 0x40, 0x4f, 0x80, 0x4f, 0xc0, 0x4f,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "fp16-fp32",
|
||||
dtype: "F16",
|
||||
size: 32 * 2, // 32 floats, each 2 bytes
|
||||
shape: []uint64{32},
|
||||
setup: func(t *testing.T, f *os.File) {
|
||||
u16s := make([]uint16, 32)
|
||||
for i := range u16s {
|
||||
u16s[i] = float16.Fromfloat32(float32(i)).Bits()
|
||||
}
|
||||
|
||||
if err := binary.Write(f, binary.LittleEndian, u16s); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
},
|
||||
want: []byte{
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x40,
|
||||
0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0x40,
|
||||
0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x10, 0x41, 0x00, 0x00, 0x20, 0x41, 0x00, 0x00, 0x30, 0x41,
|
||||
0x00, 0x00, 0x40, 0x41, 0x00, 0x00, 0x50, 0x41, 0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41,
|
||||
0x00, 0x00, 0x80, 0x41, 0x00, 0x00, 0x88, 0x41, 0x00, 0x00, 0x90, 0x41, 0x00, 0x00, 0x98, 0x41,
|
||||
0x00, 0x00, 0xa0, 0x41, 0x00, 0x00, 0xa8, 0x41, 0x00, 0x00, 0xb0, 0x41, 0x00, 0x00, 0xb8, 0x41,
|
||||
0x00, 0x00, 0xc0, 0x41, 0x00, 0x00, 0xc8, 0x41, 0x00, 0x00, 0xd0, 0x41, 0x00, 0x00, 0xd8, 0x41,
|
||||
0x00, 0x00, 0xe0, 0x41, 0x00, 0x00, 0xe8, 0x41, 0x00, 0x00, 0xf0, 0x41, 0x00, 0x00, 0xf8, 0x41,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "bf16-bf16",
|
||||
dtype: "BF16",
|
||||
size: 32 * 2, // 32 brain floats, each 2 bytes
|
||||
shape: []uint64{16, 2},
|
||||
setup: func(t *testing.T, f *os.File) {
|
||||
f32s := make([]float32, 32)
|
||||
for i := range f32s {
|
||||
f32s[i] = float32(i)
|
||||
}
|
||||
|
||||
if err := binary.Write(f, binary.LittleEndian, bfloat16.EncodeFloat32(f32s)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
},
|
||||
want: []byte{
|
||||
0x00, 0x00, 0x80, 0x3f, 0x00, 0x40, 0x40, 0x40, 0x80, 0x40, 0xa0, 0x40, 0xc0, 0x40, 0xe0, 0x40,
|
||||
0x00, 0x41, 0x10, 0x41, 0x20, 0x41, 0x30, 0x41, 0x40, 0x41, 0x50, 0x41, 0x60, 0x41, 0x70, 0x41,
|
||||
0x80, 0x41, 0x88, 0x41, 0x90, 0x41, 0x98, 0x41, 0xa0, 0x41, 0xa8, 0x41, 0xb0, 0x41, 0xb8, 0x41,
|
||||
0xc0, 0x41, 0xc8, 0x41, 0xd0, 0x41, 0xd8, 0x41, 0xe0, 0x41, 0xe8, 0x41, 0xf0, 0x41, 0xf8, 0x41,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "bf16-fp32",
|
||||
dtype: "BF16",
|
||||
size: 32 * 2, // 32 brain floats, each 2 bytes
|
||||
shape: []uint64{32},
|
||||
setup: func(t *testing.T, f *os.File) {
|
||||
f32s := make([]float32, 32)
|
||||
for i := range f32s {
|
||||
f32s[i] = float32(i)
|
||||
}
|
||||
|
||||
if err := binary.Write(f, binary.LittleEndian, bfloat16.EncodeFloat32(f32s)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
},
|
||||
want: []byte{
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x40,
|
||||
0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0x40,
|
||||
0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x10, 0x41, 0x00, 0x00, 0x20, 0x41, 0x00, 0x00, 0x30, 0x41,
|
||||
0x00, 0x00, 0x40, 0x41, 0x00, 0x00, 0x50, 0x41, 0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41,
|
||||
0x00, 0x00, 0x80, 0x41, 0x00, 0x00, 0x88, 0x41, 0x00, 0x00, 0x90, 0x41, 0x00, 0x00, 0x98, 0x41,
|
||||
0x00, 0x00, 0xa0, 0x41, 0x00, 0x00, 0xa8, 0x41, 0x00, 0x00, 0xb0, 0x41, 0x00, 0x00, 0xb8, 0x41,
|
||||
0x00, 0x00, 0xc0, 0x41, 0x00, 0x00, 0xc8, 0x41, 0x00, 0x00, 0xd0, 0x41, 0x00, 0x00, 0xd8, 0x41,
|
||||
0x00, 0x00, 0xe0, 0x41, 0x00, 0x00, 0xe8, 0x41, 0x00, 0x00, 0xf0, 0x41, 0x00, 0x00, 0xf8, 0x41,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "u8-u8",
|
||||
dtype: "U8",
|
||||
size: 32, // 32 brain floats, each 1 bytes
|
||||
shape: []uint64{32},
|
||||
setup: func(t *testing.T, f *os.File) {
|
||||
u8s := make([]uint8, 32)
|
||||
for i := range u8s {
|
||||
u8s[i] = uint8(i)
|
||||
}
|
||||
|
||||
if err := binary.Write(f, binary.LittleEndian, u8s); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
},
|
||||
want: []byte{
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
path := filepath.Base(t.Name())
|
||||
st := safetensor{
|
||||
fs: root.FS(),
|
||||
path: path,
|
||||
dtype: tt.dtype,
|
||||
offset: tt.offset,
|
||||
size: tt.size,
|
||||
tensorBase: &tensorBase{
|
||||
name: tt.name,
|
||||
shape: tt.shape,
|
||||
},
|
||||
}
|
||||
|
||||
f, err := root.Create(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
tt.setup(t, f)
|
||||
|
||||
var b bytes.Buffer
|
||||
if _, err := st.WriteTo(&b); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(tt.want, b.Bytes()); diff != "" {
|
||||
t.Errorf("safetensor.WriteTo() mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSafetensorWriteToFP8E4M3(t *testing.T) {
|
||||
root, err := os.OpenRoot(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer root.Close()
|
||||
|
||||
path := filepath.Base(t.Name())
|
||||
f, err := root.Create(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// E4M3FN encodings for 1.0, 2.0, 0.5, and -1.0.
|
||||
if _, err := f.Write([]byte{0x38, 0x40, 0x30, 0xb8}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := f.Write(bfloat16.EncodeFloat32([]float32{2})); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
st := safetensor{
|
||||
fs: root.FS(),
|
||||
path: path,
|
||||
dtype: "F8_E4M3",
|
||||
offset: 0,
|
||||
size: 4,
|
||||
fp8Block: safetensorFP8BlockSize{rows: 128, cols: 128, ok: true},
|
||||
scale: &safetensorScale{
|
||||
name: "linear.weight_scale",
|
||||
dtype: "BF16",
|
||||
shape: []uint64{1, 1},
|
||||
offset: 4,
|
||||
size: 2,
|
||||
},
|
||||
tensorBase: &tensorBase{
|
||||
name: "linear.weight",
|
||||
shape: []uint64{2, 2},
|
||||
},
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
if _, err := st.WriteTo(&b); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
want := bfloat16.EncodeFloat32([]float32{2, 4, 1, -2})
|
||||
if diff := cmp.Diff(want, b.Bytes()); diff != "" {
|
||||
t.Errorf("safetensor.WriteTo() mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSafetensorWriteToFP8E4M3UsesConfiguredBlockSize(t *testing.T) {
|
||||
root, err := os.OpenRoot(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer root.Close()
|
||||
|
||||
path := filepath.Base(t.Name())
|
||||
f, err := root.Create(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := f.Write(bytes.Repeat([]byte{0x38}, 12)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := f.Write(bfloat16.EncodeFloat32([]float32{1, 2, 3, 4})); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
st := safetensor{
|
||||
fs: root.FS(),
|
||||
path: path,
|
||||
dtype: "F8_E4M3",
|
||||
offset: 0,
|
||||
size: 12,
|
||||
fp8Block: safetensorFP8BlockSize{rows: 2, cols: 3, ok: true},
|
||||
scale: &safetensorScale{
|
||||
name: "linear.weight_scale",
|
||||
dtype: "BF16",
|
||||
shape: []uint64{2, 2},
|
||||
offset: 12,
|
||||
size: 8,
|
||||
},
|
||||
tensorBase: &tensorBase{
|
||||
name: "linear.weight",
|
||||
shape: []uint64{3, 4},
|
||||
},
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
if _, err := st.WriteTo(&b); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
want := bfloat16.EncodeFloat32([]float32{
|
||||
1, 1, 1, 2,
|
||||
1, 1, 1, 2,
|
||||
3, 3, 3, 4,
|
||||
})
|
||||
if diff := cmp.Diff(want, b.Bytes()); diff != "" {
|
||||
t.Errorf("safetensor.WriteTo() mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSafetensorsConsumesFP8ScaleCompanion(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
generateSafetensorTestData(t, tempDir, map[string]*tensorData{
|
||||
"linear.weight": {
|
||||
Offsets: []int{0, 4},
|
||||
Type: "F8_E4M3",
|
||||
Shape: []int{2, 2},
|
||||
},
|
||||
"linear.weight_scale": {
|
||||
Offsets: []int{4, 6},
|
||||
Type: "BF16",
|
||||
Shape: []int{1, 1},
|
||||
},
|
||||
})
|
||||
writeFP8BlockConfig(t, tempDir, 128, 128)
|
||||
|
||||
tensors, err := parseSafetensors(os.DirFS(tempDir), strings.NewReplacer(), "model-00001-of-00001.safetensors")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(tensors) != 1 {
|
||||
t.Fatalf("expected one tensor, got %d", len(tensors))
|
||||
}
|
||||
if got := tensors[0].Name(); got != "linear.weight" {
|
||||
t.Fatalf("unexpected tensor name %q", got)
|
||||
}
|
||||
if got := tensors[0].Kind(); got != tensorKindBF16 {
|
||||
t.Fatalf("unexpected fp8 converted kind %d, want %d", got, tensorKindBF16)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSafetensorsRejectsFP8WithoutBlockMetadata(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
generateSafetensorTestData(t, tempDir, map[string]*tensorData{
|
||||
"linear.weight": {
|
||||
Offsets: []int{0, 4},
|
||||
Type: "F8_E4M3",
|
||||
Shape: []int{2, 2},
|
||||
},
|
||||
"linear.weight_scale": {
|
||||
Offsets: []int{4, 6},
|
||||
Type: "BF16",
|
||||
Shape: []int{1, 1},
|
||||
},
|
||||
})
|
||||
|
||||
_, err := parseSafetensors(os.DirFS(tempDir), strings.NewReplacer(), "model-00001-of-00001.safetensors")
|
||||
if err == nil || !strings.Contains(err.Error(), "missing fp8 block size metadata") {
|
||||
t.Fatalf("expected missing fp8 block size metadata error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSafetensorsRejectsAmbiguousFP8ScaleCompanion(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
generateSafetensorTestData(t, tempDir, map[string]*tensorData{
|
||||
"linear.weight": {
|
||||
Offsets: []int{0, 4},
|
||||
Type: "F8_E4M3",
|
||||
Shape: []int{2, 2},
|
||||
},
|
||||
"linear.weight_scale": {
|
||||
Offsets: []int{4, 6},
|
||||
Type: "BF16",
|
||||
Shape: []int{1, 1},
|
||||
},
|
||||
"linear.weight.scale": {
|
||||
Offsets: []int{6, 8},
|
||||
Type: "BF16",
|
||||
Shape: []int{1, 1},
|
||||
},
|
||||
})
|
||||
writeFP8BlockConfig(t, tempDir, 128, 128)
|
||||
|
||||
_, err := parseSafetensors(os.DirFS(tempDir), strings.NewReplacer(), "model-00001-of-00001.safetensors")
|
||||
if err == nil || !strings.Contains(err.Error(), "multiple fp8 scale companions") {
|
||||
t.Fatalf("expected ambiguous fp8 scale companion error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func writeFP8BlockConfig(t *testing.T, dir string, rows, cols int) {
|
||||
t.Helper()
|
||||
|
||||
config := fmt.Sprintf(`{
|
||||
"architectures": ["GenericForCausalLM"],
|
||||
"compression_config": {
|
||||
"format": "float-quantized",
|
||||
"config_groups": {
|
||||
"group_0": {
|
||||
"format": "float-quantized",
|
||||
"weights": {
|
||||
"type": "float",
|
||||
"num_bits": 8,
|
||||
"block_structure": [%d, %d]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}`, rows, cols)
|
||||
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(config), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSafetensorKind(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
st safetensor
|
||||
expected uint32
|
||||
}{
|
||||
{
|
||||
name: "BF16 dtype with non-v. prefix and non-FP32 base kind should return BF16",
|
||||
st: safetensor{
|
||||
tensorBase: &tensorBase{
|
||||
name: "weight.matrix",
|
||||
shape: []uint64{10, 10}, // will default to FP16
|
||||
},
|
||||
dtype: "BF16",
|
||||
},
|
||||
expected: tensorKindBF16,
|
||||
},
|
||||
{
|
||||
name: "BF16 dtype with v. prefix should return base kind",
|
||||
st: safetensor{
|
||||
tensorBase: &tensorBase{
|
||||
name: "v.weight.matrix",
|
||||
shape: []uint64{10, 10}, // will default to FP16
|
||||
},
|
||||
dtype: "BF16",
|
||||
},
|
||||
expected: tensorKindFP16,
|
||||
},
|
||||
{
|
||||
name: "BF16 audio feature extractor constants should return FP32",
|
||||
st: safetensor{
|
||||
tensorBase: &tensorBase{
|
||||
name: "a.feature_extractor.fb",
|
||||
shape: []uint64{1, 128, 257},
|
||||
},
|
||||
dtype: "BF16",
|
||||
},
|
||||
expected: tensorKindFP32,
|
||||
},
|
||||
{
|
||||
name: "BF16 dtype with FP32 base kind should return FP32",
|
||||
st: safetensor{
|
||||
tensorBase: &tensorBase{
|
||||
name: "weight.matrix",
|
||||
shape: []uint64{10}, // will default to FP32
|
||||
},
|
||||
dtype: "BF16",
|
||||
},
|
||||
expected: tensorKindFP32,
|
||||
},
|
||||
{
|
||||
name: "Non-BF16 dtype should return base kind",
|
||||
st: safetensor{
|
||||
tensorBase: &tensorBase{
|
||||
name: "weight.matrix",
|
||||
shape: []uint64{10, 10}, // will default to FP16
|
||||
},
|
||||
dtype: "FP16",
|
||||
},
|
||||
expected: tensorKindFP16,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := tt.st.Kind()
|
||||
if result != tt.expected {
|
||||
t.Errorf("Kind() = %d, expected %d", result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
59
convert/reader_torch.go
Normal file
59
convert/reader_torch.go
Normal file
@@ -0,0 +1,59 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"io"
|
||||
"io/fs"
|
||||
"strings"
|
||||
|
||||
"github.com/nlpodyssey/gopickle/pytorch"
|
||||
"github.com/nlpodyssey/gopickle/types"
|
||||
)
|
||||
|
||||
func parseTorch(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
|
||||
var ts []Tensor
|
||||
for _, p := range ps {
|
||||
pt, err := pytorch.Load(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, k := range pt.(*types.Dict).Keys() {
|
||||
t := pt.(*types.Dict).MustGet(k)
|
||||
|
||||
var shape []uint64
|
||||
for dim := range t.(*pytorch.Tensor).Size {
|
||||
shape = append(shape, uint64(dim))
|
||||
}
|
||||
|
||||
ts = append(ts, torch{
|
||||
storage: t.(*pytorch.Tensor).Source,
|
||||
tensorBase: &tensorBase{
|
||||
name: replacer.Replace(k.(string)),
|
||||
shape: shape,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return ts, nil
|
||||
}
|
||||
|
||||
type torch struct {
|
||||
storage pytorch.StorageInterface
|
||||
*tensorBase
|
||||
}
|
||||
|
||||
func (t torch) Clone() Tensor {
|
||||
return torch{
|
||||
storage: t.storage,
|
||||
tensorBase: &tensorBase{
|
||||
name: t.name,
|
||||
shape: t.shape,
|
||||
repacker: t.repacker,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (pt torch) WriteTo(w io.Writer) (int64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
1497
convert/sentencepiece/sentencepiece_model.pb.go
Normal file
1497
convert/sentencepiece/sentencepiece_model.pb.go
Normal file
File diff suppressed because it is too large
Load Diff
333
convert/sentencepiece_model.proto
Normal file
333
convert/sentencepiece_model.proto
Normal file
@@ -0,0 +1,333 @@
|
||||
// Copyright 2016 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.!
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
// TODO(taku): Needs to use LITE RUNTIME in OSS release.
|
||||
option optimize_for = LITE_RUNTIME;
|
||||
option go_package = "./sentencepiece";
|
||||
|
||||
package sentencepiece;
|
||||
|
||||
// TrainerSpec encodes a various parameters for SentencePiece training.
|
||||
// Next id: 55
|
||||
message TrainerSpec {
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// General parameters
|
||||
//
|
||||
// Input corpus files.
|
||||
// Trainer accepts the following two formats:
|
||||
// A) Monolingual: plain text, one sentence per line.
|
||||
// B) Bilingual: TSV, source sentence <tab> target sentence
|
||||
// When bilingual data is passed, shared vocabulary model is built.
|
||||
// Note that the input file must be raw corpus, not a preprocessed corpus.
|
||||
// Trainer only loads the first `input_sentence_size` sentences specified
|
||||
// with this parameter.
|
||||
repeated string input = 1;
|
||||
|
||||
// Input corpus format:
|
||||
// "text": one-sentence-per-line text format (default)
|
||||
// "tsv": sentence <tab> freq
|
||||
optional string input_format = 7;
|
||||
|
||||
// Output model file prefix.
|
||||
// <model_prefix>.model and <model_prefix>.vocab are generated.
|
||||
optional string model_prefix = 2;
|
||||
|
||||
// Model type. only have UNIGRAM now.
|
||||
enum ModelType {
|
||||
UNIGRAM = 1; // Unigram language model with dynamic algorithm
|
||||
BPE = 2; // Byte Pair Encoding
|
||||
WORD = 3; // Delimitered by whitespace.
|
||||
CHAR = 4; // tokenizes into character sequence
|
||||
}
|
||||
optional ModelType model_type = 3 [default = UNIGRAM];
|
||||
|
||||
// Vocabulary size. 8k is the default size.
|
||||
optional int32 vocab_size = 4 [default = 8000];
|
||||
|
||||
// List of the languages this model can accept.
|
||||
// Since the model is language-agnostic, this field is used as a reference.
|
||||
repeated string accept_language = 5;
|
||||
|
||||
// Size of self-test samples, which are encoded in the model file.
|
||||
optional int32 self_test_sample_size = 6 [default = 0];
|
||||
|
||||
// Whether to use DP version of sentencepiece. Use it with TSV input format
|
||||
// (requires precomputed word tab counts to work).
|
||||
optional bool enable_differential_privacy = 50 [default = false];
|
||||
// Set these parameters if you need DP version of sentencepiece.
|
||||
// std of noise to add.
|
||||
optional float differential_privacy_noise_level = 51 [default = 0.0];
|
||||
// Clipping threshold to apply after adding noise. All the words with
|
||||
// frequency less than this value are dropped.
|
||||
optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Training parameters.
|
||||
//
|
||||
// Uses characters which cover the corpus with the ratio of `chars_coverage`.
|
||||
// This parameter determines the set of basic Alphabet of sentence piece.
|
||||
// 1.0 - `chars_coverage` characters are treated as UNK.
|
||||
// See also required_chars field.
|
||||
optional float character_coverage = 10 [default = 0.9995];
|
||||
|
||||
// Maximum size of sentences the trainer loads from `input` parameter.
|
||||
// Trainer simply loads the `input` files in sequence.
|
||||
// It is better to shuffle the input corpus randomly.
|
||||
optional uint64 input_sentence_size = 11 [default = 0];
|
||||
optional bool shuffle_input_sentence = 19 [default = true];
|
||||
|
||||
// Maximum size of sentences to make seed sentence pieces.
|
||||
// Extended suffix array is constructed to extract frequent
|
||||
// sub-strings from the corpus. This uses 20N working space,
|
||||
// where N is the size of corpus.
|
||||
optional int32 mining_sentence_size = 12 [deprecated = true];
|
||||
|
||||
// Maximum size of sentences to train sentence pieces.
|
||||
optional int32 training_sentence_size = 13 [deprecated = true];
|
||||
|
||||
// The size of seed sentencepieces.
|
||||
// `seed_sentencepiece_size` must be larger than `vocab_size`.
|
||||
optional int32 seed_sentencepiece_size = 14 [default = 1000000];
|
||||
|
||||
// In every EM sub-iterations, keeps top
|
||||
// `shrinking_factor` * `current sentencepieces size` with respect to
|
||||
// the loss of the sentence piece. This value should be smaller than 1.0.
|
||||
optional float shrinking_factor = 15 [default = 0.75];
|
||||
|
||||
// The maximum sentence length in byte. The sentences with the length
|
||||
// larger than `max_sentence_length` is simply ignored.
|
||||
// Longer input tends to bring the following risks:
|
||||
// * Overflow during EM training (unigram language model only)
|
||||
// * Performance drop because of O(n log n) cost in BPE.
|
||||
optional int32 max_sentence_length = 18 [default = 4192];
|
||||
|
||||
// Number of threads in the training.
|
||||
optional int32 num_threads = 16 [default = 16];
|
||||
|
||||
// Number of EM sub iterations.
|
||||
optional int32 num_sub_iterations = 17 [default = 2];
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// SentencePiece parameters which control the shapes of sentence piece.
|
||||
//
|
||||
// Maximum length of sentencepiece.
|
||||
optional int32 max_sentencepiece_length = 20 [default = 16];
|
||||
|
||||
// Uses Unicode script to split sentence pieces.
|
||||
// When `split_by_unicode_script` is true, we do not allow sentence piece to
|
||||
// include multiple Unicode scripts, e.g. "F1" is not a valid piece.
|
||||
// Exception: CJ characters (Hiragana/Katakana/Han) are all handled
|
||||
// as one script type, since Japanese word can consist of multiple scripts.
|
||||
// This exception is always applied regardless of the accept-language
|
||||
// parameter.
|
||||
optional bool split_by_unicode_script = 21 [default = true];
|
||||
|
||||
// When `split_by_number` is true, put a boundary between number and
|
||||
// non-number transition. If we want to treat "F1" is one token, set this flag
|
||||
// to be false.
|
||||
optional bool split_by_number = 23 [default = true];
|
||||
|
||||
// Use a white space to split sentence pieces.
|
||||
// When `split_by_whitespace` is false, we may have the piece containing
|
||||
// a white space in the middle. e.g., "in_the".
|
||||
optional bool split_by_whitespace = 22 [default = true];
|
||||
|
||||
// Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
|
||||
// hello_. When `treat_whitespace_as_suffix` is true,
|
||||
// NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
|
||||
// of sentence.
|
||||
optional bool treat_whitespace_as_suffix = 24 [default = false];
|
||||
|
||||
// Allows pieces that only contain whitespaces instead of appearing only as
|
||||
// prefix or suffix of other pieces.
|
||||
optional bool allow_whitespace_only_pieces = 26 [default = false];
|
||||
|
||||
// Split all digits (0-9) into separate pieces.
|
||||
optional bool split_digits = 25 [default = false];
|
||||
|
||||
// Defines the pre-tokenization delimiter.
|
||||
// When specified, no pieces crossing this delimiter is not included
|
||||
// in the vocab. Then the delimiter string is virtually ignored
|
||||
// during the training. This field can allows constraints on the vocabulary
|
||||
// selection. Note that this field is available on unigram mode.
|
||||
optional string pretokenization_delimiter = 53 [ default = ""];
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Vocabulary management
|
||||
//
|
||||
// Defines control symbols used as an indicator to
|
||||
// change the behavior of the decoder. <s> and </s> are pre-defined.
|
||||
// We can use this field to encode various meta information,
|
||||
// including language indicator in multilingual model.
|
||||
// These symbols are not visible to users, but visible to
|
||||
// the decoder. Note that when the input sentence contains control symbols,
|
||||
// they are not treated as one token, but segmented into normal pieces.
|
||||
// Control symbols must be inserted independently from the segmentation.
|
||||
repeated string control_symbols = 30;
|
||||
|
||||
// Defines user defined symbols.
|
||||
// These symbols are added with extremely high score
|
||||
// so they are always treated as one unique symbol in any context.
|
||||
// Typical usage of user_defined_symbols is placeholder for named entities.
|
||||
repeated string user_defined_symbols = 31;
|
||||
|
||||
// Defines required characters. Each UTF8 character in this string is included
|
||||
// in the character set regardless of character_coverage value. Unlike
|
||||
// user_defined_symbols, these characters have scores based on the frequency
|
||||
// on input sentences, and the model can form subwords using characters
|
||||
// in this field.
|
||||
optional string required_chars = 36;
|
||||
|
||||
// Decomposes unknown pieces into UTF-8 bytes.
|
||||
optional bool byte_fallback = 35 [default = false];
|
||||
|
||||
// When creating the vocabulary file, defines whether or not to additionally
|
||||
// output the score for each piece.
|
||||
optional bool vocabulary_output_piece_score = 32 [default = true];
|
||||
|
||||
// `vocab_size` is treated as hard limit. Crash if
|
||||
// the model can not produce the vocab of size `vocab_size`,
|
||||
// When `hard_vocab_limit` is false, vocab_size is treated
|
||||
// as soft limit. Note that when model_type=char,
|
||||
// always assumes hard_vocab_limit = false.
|
||||
optional bool hard_vocab_limit = 33 [default = true];
|
||||
|
||||
// use all symbols for vocab extraction. This flag is valid
|
||||
// if model type is either CHAR or WORD
|
||||
optional bool use_all_vocab = 34 [default = false];
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Reserved special meta tokens.
|
||||
// * -1 is not used.
|
||||
// * unk_id must not be -1.
|
||||
// Id must start with 0 and be contiguous.
|
||||
optional int32 unk_id = 40 [default = 0]; // <unk>
|
||||
optional int32 bos_id = 41 [default = 1]; // <s>
|
||||
optional int32 eos_id = 42 [default = 2]; // </s>
|
||||
optional int32 pad_id = 43 [default = -1]; // <pad> (padding)
|
||||
optional string unk_piece = 45 [default = "<unk>"];
|
||||
optional string bos_piece = 46 [default = "<s>"];
|
||||
optional string eos_piece = 47 [default = "</s>"];
|
||||
optional string pad_piece = 48 [default = "<pad>"];
|
||||
|
||||
// Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
|
||||
// since this character can be useful both for user and
|
||||
// developer. We can easily figure out that <unk> is emitted.
|
||||
optional string unk_surface = 44 [default = " \xE2\x81\x87 "];
|
||||
|
||||
// Increase bit depth to allow unigram model training on large
|
||||
// (>10M sentences) corpora. A Side-effect of enabling this flag
|
||||
// is increased memory usage.
|
||||
optional bool train_extremely_large_corpus = 49 [default = false];
|
||||
|
||||
// Path to a seed sentencepieces file, with one tab-separated
|
||||
// seed sentencepiece <tab> frequency per line.
|
||||
optional string seed_sentencepieces_file = 54 [default = ""];
|
||||
|
||||
// Customized extensions: the range of field numbers
|
||||
// are open to third-party extensions.
|
||||
extensions 200 to max;
|
||||
}
|
||||
|
||||
// NormalizerSpec encodes a various parameters for string normalizaiton
|
||||
message NormalizerSpec {
|
||||
// name of normalization rule.
|
||||
optional string name = 1;
|
||||
|
||||
// Pre-compiled normalization rule created by
|
||||
// Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
|
||||
// Usually this field is set by Builder::GetNormalizerSpec() method.
|
||||
optional bytes precompiled_charsmap = 2;
|
||||
|
||||
// Adds dummy whitespace at the beginning of text in order to
|
||||
// treat "world" in "world" and "hello world" in the same way.
|
||||
optional bool add_dummy_prefix = 3 [default = true];
|
||||
|
||||
// Removes leading, trailing, and duplicate internal whitespace.
|
||||
optional bool remove_extra_whitespaces = 4 [default = true];
|
||||
|
||||
// Replaces whitespace with meta symbol.
|
||||
// This field must be true to train sentence piece model.
|
||||
optional bool escape_whitespaces = 5 [default = true];
|
||||
|
||||
// Custom normalization rule file in TSV format.
|
||||
// https://github.com/google/sentencepiece/blob/master/doc/normalization.md
|
||||
// This field is only used in SentencePieceTrainer::Train() method, which
|
||||
// compiles the rule into the binary rule stored in `precompiled_charsmap`.
|
||||
optional string normalization_rule_tsv = 6;
|
||||
|
||||
// Customized extensions: the range of field numbers
|
||||
// are open to third-party extensions.
|
||||
extensions 200 to max;
|
||||
}
|
||||
|
||||
// Proto to store samples for self-testing.
|
||||
message SelfTestData {
|
||||
message Sample {
|
||||
optional string input = 1;
|
||||
optional string expected = 2;
|
||||
}
|
||||
repeated Sample samples = 1;
|
||||
|
||||
// Customized extensions: the range of field numbers
|
||||
// are open to third-party extensions.
|
||||
extensions 200 to max;
|
||||
}
|
||||
|
||||
// ModelProto stores model parameters.
|
||||
// SentencePieceProcessor is supposed to be self-contained.
|
||||
// All settings/parameters which may change the behavior must be encoded
|
||||
// in ModelProto.
|
||||
message ModelProto {
|
||||
message SentencePiece {
|
||||
enum Type {
|
||||
NORMAL = 1; // normal symbol
|
||||
UNKNOWN = 2; // unknown symbol. only <unk> for now.
|
||||
CONTROL = 3; // control symbols. </s>, <s>, <2ja> etc.
|
||||
USER_DEFINED = 4; // user defined symbols.
|
||||
// Typical usage of USER_DEFINED symbol
|
||||
// is placeholder.
|
||||
BYTE = 6; // byte symbols. Used when `byte_fallback` is true.
|
||||
UNUSED = 5; // this piece is not used.
|
||||
}
|
||||
optional string piece = 1; // piece must not be empty.
|
||||
optional float score = 2;
|
||||
optional Type type = 3 [default = NORMAL];
|
||||
|
||||
// Customized extensions: the range of field numbers
|
||||
// are open to third-party extensions.
|
||||
extensions 200 to max;
|
||||
}
|
||||
|
||||
// Sentence pieces with scores.
|
||||
repeated SentencePiece pieces = 1;
|
||||
|
||||
// Spec used to generate this model file.
|
||||
optional TrainerSpec trainer_spec = 2;
|
||||
|
||||
// Spec for text normalization.
|
||||
optional NormalizerSpec normalizer_spec = 3;
|
||||
|
||||
// Stores sample input and its expected segmentation to verify the model.
|
||||
optional SelfTestData self_test_data = 4;
|
||||
|
||||
// Spec for text de-normalization.
|
||||
optional NormalizerSpec denormalizer_spec = 5;
|
||||
|
||||
// Customized extensions: the range of field numbers
|
||||
// are open to third-party extensions.
|
||||
extensions 200 to max;
|
||||
}
|
||||
207
convert/tensor.go
Normal file
207
convert/tensor.go
Normal file
@@ -0,0 +1,207 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"errors"
|
||||
"io"
|
||||
"iter"
|
||||
"maps"
|
||||
"path"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type split struct {
|
||||
*strings.Replacer
|
||||
dim int
|
||||
slices []tensor.Slice
|
||||
|
||||
// afterFunc is an optional function to apply to the tensor after slicing
|
||||
afterFunc func(tensor.Tensor) (tensor.Tensor, error)
|
||||
}
|
||||
|
||||
// splitDim splits a tensor along a specified dimension into multiple tensors. The dimension
|
||||
// is split evenly based on the number of replacers provided unless a specific count is given.
|
||||
func splitDim(t Tensor, dim int, splits ...split) iter.Seq[*ggml.Tensor] {
|
||||
return func(yield func(*ggml.Tensor) bool) {
|
||||
var offset int
|
||||
for _, split := range splits {
|
||||
t := t.Clone()
|
||||
shape := slices.Clone(t.Shape())
|
||||
shape[dim] = cmp.Or(uint64(split.dim), shape[dim]/uint64(len(splits)))
|
||||
|
||||
slice := split.slices
|
||||
if len(slice) == 0 {
|
||||
slice = slices.Repeat([]tensor.Slice{nil}, len(shape))
|
||||
slice[dim] = tensor.S(offset, offset+int(shape[dim]))
|
||||
offset += int(shape[dim])
|
||||
}
|
||||
|
||||
t.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := make([]int, len(shape))
|
||||
for i := range shape {
|
||||
dims[i] = int(shape[i])
|
||||
}
|
||||
|
||||
var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
tt, err := tt.Slice(slice...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tt = tensor.Materialize(tt)
|
||||
|
||||
if split.afterFunc != nil {
|
||||
tt, err = split.afterFunc(tt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// flatten tensor so it can be written as a vector
|
||||
if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return native.VectorF32(tt.(*tensor.Dense))
|
||||
})
|
||||
|
||||
if !yield(&ggml.Tensor{
|
||||
Name: split.Replace(t.Name()),
|
||||
Kind: t.Kind(),
|
||||
Shape: shape,
|
||||
WriterTo: t,
|
||||
}) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type merge struct {
|
||||
pattern, name string
|
||||
}
|
||||
|
||||
// mergeTensors merges tensors that match a given pattern into a single tensor.
|
||||
func mergeTensors(unmatched []Tensor, merges ...merge) (out []*ggml.Tensor, _ []Tensor) {
|
||||
var matched []Tensor
|
||||
for i := range merges {
|
||||
matched, unmatched = slicesSplitFunc(unmatched, func(t Tensor) bool {
|
||||
matched, _ := path.Match(merges[i].pattern, t.Name())
|
||||
return matched
|
||||
})
|
||||
|
||||
slices.SortStableFunc(matched, func(a, b Tensor) int {
|
||||
x := strings.Split(a.Name(), ".")
|
||||
y := strings.Split(b.Name(), ".")
|
||||
if len(x) != len(y) {
|
||||
return cmp.Compare(len(x), len(y))
|
||||
}
|
||||
|
||||
vals := make([]int, len(x))
|
||||
for i := range x {
|
||||
vals[i] = strings.Compare(x[i], y[i])
|
||||
m, err := strconv.ParseInt(x[i], 0, 0)
|
||||
n, err2 := strconv.ParseInt(y[i], 0, 0)
|
||||
if errors.Join(err, err2) == nil {
|
||||
vals[i] = cmp.Compare(m, n)
|
||||
}
|
||||
}
|
||||
|
||||
return cmp.Or(vals...)
|
||||
})
|
||||
|
||||
if len(matched) > 0 {
|
||||
out = append(out, &ggml.Tensor{
|
||||
Name: merges[i].name,
|
||||
Kind: matched[0].Kind(),
|
||||
Shape: append([]uint64{uint64(len(matched))}, matched[0].Shape()...),
|
||||
WriterTo: mergeGroup(matched),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return out, unmatched
|
||||
}
|
||||
|
||||
// slicesSplitFunc splits a slice into two slices based on a predicate function.
|
||||
func slicesSplitFunc[S ~[]E, E comparable](s S, fn func(e E) bool) (matched, unmatched S) {
|
||||
for _, e := range s {
|
||||
if fn(e) {
|
||||
matched = append(matched, e)
|
||||
} else {
|
||||
unmatched = append(unmatched, e)
|
||||
}
|
||||
}
|
||||
|
||||
return matched, unmatched
|
||||
}
|
||||
|
||||
type mergeGroup []Tensor
|
||||
|
||||
func (g mergeGroup) WriteTo(w io.Writer) (int64, error) {
|
||||
for _, t := range g {
|
||||
if _, err := t.WriteTo(w); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func sourceTensorKV(ts []*ggml.Tensor) KV {
|
||||
sourceFP8 := make(map[string]struct{})
|
||||
for _, t := range ts {
|
||||
if writerSourceDType(t.WriterTo) == "F8_E4M3" {
|
||||
sourceFP8[t.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
if len(sourceFP8) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return KV{
|
||||
"source_quantization": "hf_fp8",
|
||||
"source_fp8_tensors": slices.Sorted(maps.Keys(sourceFP8)),
|
||||
}
|
||||
}
|
||||
|
||||
type sourceDTypeTensor interface {
|
||||
SourceDType() string
|
||||
}
|
||||
|
||||
func writerSourceDType(w io.WriterTo) string {
|
||||
switch w := w.(type) {
|
||||
case sourceDTypeTensor:
|
||||
return w.SourceDType()
|
||||
case mergeGroup:
|
||||
if len(w) == 0 {
|
||||
return ""
|
||||
}
|
||||
dtype := sourceDType(w[0])
|
||||
if dtype == "" {
|
||||
return ""
|
||||
}
|
||||
for _, t := range w[1:] {
|
||||
if sourceDType(t) != dtype {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
return dtype
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func sourceDType(t Tensor) string {
|
||||
if t, ok := t.(sourceDTypeTensor); ok {
|
||||
return t.SourceDType()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
1043
convert/tensor_test.go
Normal file
1043
convert/tensor_test.go
Normal file
File diff suppressed because it is too large
Load Diff
313
convert/testdata/Meta-Llama-3-8B-Instruct.json
vendored
Normal file
313
convert/testdata/Meta-Llama-3-8B-Instruct.json
vendored
Normal file
@@ -0,0 +1,313 @@
|
||||
{
|
||||
"general.architecture": "llama",
|
||||
"general.file_type": "1",
|
||||
"general.quantization_version": "2",
|
||||
"llama.block_count": "32",
|
||||
"llama.context_length": "8192",
|
||||
"llama.embedding_length": "4096",
|
||||
"llama.feed_forward_length": "14336",
|
||||
"llama.rope.dimension_count": "128",
|
||||
"llama.rope.freq_base": "500000",
|
||||
"llama.vocab_size": "128256",
|
||||
"llama.attention.head_count": "32",
|
||||
"llama.attention.head_count_kv": "8",
|
||||
"llama.attention.layer_norm_rms_epsilon": "1e-05",
|
||||
"tokenizer.ggml.model": "gpt2",
|
||||
"tokenizer.ggml.pre": "llama-bpe",
|
||||
"tokenizer.ggml.bos_token_id": "128000",
|
||||
"tokenizer.ggml.eos_token_id": "128009",
|
||||
"tokenizer.ggml.merges": "d0cbac1fcc9dcf03724b8db5c9bfb593ae1cf68fb9bc72eb1d15274dcbbf618b",
|
||||
"tokenizer.ggml.token_type": "d70a88809fd7da6f1f028622685cd64268a7a922c5d343c96f25b66327358978",
|
||||
"tokenizer.ggml.tokens": "765b529dbcbc42dd202ce657341c63807b51f3b07e09898f6aa6196326865d5a",
|
||||
"token_embd.weight": "b53102a11d9064bbd404833e3464b1b13e08ce73300b442312cccde2f19b2698",
|
||||
"blk.0.attn_norm.weight": "7318df3cca9e8d153ff0a503026a1265e63d20b2a8c1dd7a2769585082b5d1ee",
|
||||
"blk.0.ffn_down.weight": "b950806a1fc722c9fad7fd0b20c3c0a7fb50f14395e1e7663a590bfd62e20900",
|
||||
"blk.0.ffn_gate.weight": "e73e580af6d4f08e060a74a3c25efdf5d3bed99e183d95a5a85ae859014839fd",
|
||||
"blk.0.ffn_up.weight": "c8158af679ef99746da1befb67eebb19489e0bbe6ce7d97e13e348508244e516",
|
||||
"blk.0.ffn_norm.weight": "7ec69c3c31e95e49a3359003b0033f6b9e85561a3e3fd83e7476661ecdd756bb",
|
||||
"blk.0.attn_k.weight": "2732303257bac969b4964e0e32ec08b5a7f5c031bb02bf6ac4467b3ea0ebcf1e",
|
||||
"blk.0.attn_output.weight": "ecda1d43b4ccc91cd5b366d7e7a275353990ac78561a07c83d9c77031aba12dc",
|
||||
"blk.0.attn_q.weight": "569b1f5faf92b6f00910cf7effb2d5862f91038ce5c3b0019fc10e5d79fbd5e1",
|
||||
"blk.0.attn_v.weight": "aa8416c5ef7e32fb54a1f20d6ac651656845d4af240564b397c39bd83e06e3b8",
|
||||
"blk.1.attn_norm.weight": "03327e02862908c2a44b2f52decdb924bf4201f400b46f8037a9cb2e1d7a61ff",
|
||||
"blk.1.ffn_down.weight": "5a83a87603f38c99f8e1e370a2d5f967bb45ac51d881a609304a7811027321e0",
|
||||
"blk.1.ffn_gate.weight": "31da0572c79e655186c721c231376f85e56cdcc6257c28d08c8c5b40d5c22b40",
|
||||
"blk.1.ffn_up.weight": "e0c811d64ca155c8de10a868e72015d43888834804614ee1aa2953129ffbc90f",
|
||||
"blk.1.ffn_norm.weight": "5861f313d6137d6f0f904d423df47fffc6069e224ff746e1b637ac9c7f0af862",
|
||||
"blk.1.attn_k.weight": "5fbbec0acca6457b9416ebdcd90e526885d0224537b7628f6be376a7f275313d",
|
||||
"blk.1.attn_output.weight": "b237c9763fa3f75166a6f70b70f1566e77d0d89dfa164ed1b3137393e90575c3",
|
||||
"blk.1.attn_q.weight": "c0a9cf4a98b4882b16f3eb2b49d933793dcc5357abb246fd3fe3134ed2b12e1c",
|
||||
"blk.1.attn_v.weight": "96867111727200cac1af7865189dd41fd62b47584e5e5f33a91f1d34509cbd40",
|
||||
"blk.2.attn_norm.weight": "f392f8a88ee3a95b1cc19c40dd4ef66317037b0faaa1800f610779e129ee0539",
|
||||
"blk.2.ffn_down.weight": "73823eef46632aedcc8c1cb08a736b6aa97ca97842cd1fdfc5567d8dec459662",
|
||||
"blk.2.ffn_gate.weight": "f4909ae19fc3848b00bb8b9050122e74f8e903b89e22937036f4cc9fea20a718",
|
||||
"blk.2.ffn_up.weight": "16f4904a3d814ea68f00519724fc4943e48444a84c786bda39aa5efc298a7d84",
|
||||
"blk.2.ffn_norm.weight": "e3ccdf56e75cb969f6f69c39caf6daf7c4e70e89e25df0f4d2e4bc60e159aafe",
|
||||
"blk.2.attn_k.weight": "c3beb1e0a11bcf007ef0f0d8f6bdd3082d8b29090cd29597846b5d51e308a8e5",
|
||||
"blk.2.attn_output.weight": "bb9f66c32cff51154fea92933c2cd62549236f8cb1a767f9ef28d3f99809b343",
|
||||
"blk.2.attn_q.weight": "8eba394132eef2a05c5a92d62d2376000f7948448d7a2dc74e6b608203add20d",
|
||||
"blk.2.attn_v.weight": "88f61f77c53567c617db3eef8f30621109a750e679f6784f7911739bd42c2f02",
|
||||
"blk.3.attn_norm.weight": "7b996675b7ca75fa24107b3ebe0788653ede0f49ac83b8659d71ff54d591f81a",
|
||||
"blk.3.ffn_down.weight": "2cb332bc05e4821962fdc9dcbcc7cc12630f32117711b687d18fb53c0bc4fbf4",
|
||||
"blk.3.ffn_gate.weight": "340b387c7f208c8f0a6db904ef8d87c1e84b7d6ad57177abd32d86c8d18b760f",
|
||||
"blk.3.ffn_up.weight": "07484433f8a7ee061c55aa0de2ecc009f769b0617c9c0ec096e9bb2946df9f0e",
|
||||
"blk.3.ffn_norm.weight": "4f1a4ade36b393af341240bc894a2aab09cff7e4d56dc4658445deb107f9371b",
|
||||
"blk.3.attn_k.weight": "483dcd96acb4528df84b9842970994630dbd82b8715ace394aa8b39fcf8d6291",
|
||||
"blk.3.attn_output.weight": "beaff0810687923585642ee11d929cbf3b43dc6f87f30ddb552c222ab57bdbb3",
|
||||
"blk.3.attn_q.weight": "0739355002f6fce520863add697e0ff25fc88215322dc3f993be7bb68dcce7e8",
|
||||
"blk.3.attn_v.weight": "c216d17b6d90ee3e07f82598b8161fae34de2f392dbb0f745b682b578c324767",
|
||||
"blk.4.attn_norm.weight": "91ab405bc4ba15bf63af233f266aa43aaab43789a9e6596e14a357c2ac7df217",
|
||||
"blk.4.ffn_down.weight": "620f34ee75cdc73aecb8949af5fbb0d2437fd81422b6d8eb7acfc52addb9fc68",
|
||||
"blk.4.ffn_gate.weight": "f6feec7bc9acadf35ec22532f8998d8e50f31afedabb19263590dcf8b9a92eee",
|
||||
"blk.4.ffn_up.weight": "4a72af7cd28fd07b038f6cc4406678d120517280236ea85d9e76eff40ab2cc22",
|
||||
"blk.4.ffn_norm.weight": "1805b37b44d5d682bdbd2fadeafb763ee001617d7870848cc487079ee34b21f9",
|
||||
"blk.4.attn_k.weight": "a1e4f9d97cdf4c1b0d177cf00c4e32d1be30c1984a239b3c9bd73f8848888853",
|
||||
"blk.4.attn_output.weight": "a1547e2497c423b0aff0eee71d9300d6fdf4e4986679418b6e637b69a9a6720b",
|
||||
"blk.4.attn_q.weight": "0677483a9264ea6803d03d304d87a54632242cb516e8b76b6e3e8284c2f4de04",
|
||||
"blk.4.attn_v.weight": "02691ba3af344fcc1969428ab0df811ac94aaa2fd91b0dc4ec1ac0a58806980d",
|
||||
"blk.5.attn_norm.weight": "ba9c028335e5c895b87a5bd1448ca429248f9746ed97bdcb8679923206117156",
|
||||
"blk.5.ffn_down.weight": "ccfdc9006acad1940a6bc05042a3947f1066acd671e0bb53b7684e9eea9ef5c9",
|
||||
"blk.5.ffn_gate.weight": "623157679f1e742ccc3807c0b0153ddc8450104de75ec62f1370ec3807c09cf4",
|
||||
"blk.5.ffn_up.weight": "05748804c65091f963729b58b085f58351891cac8a2861f5eae26b06aa60b2a0",
|
||||
"blk.5.ffn_norm.weight": "84bae55af2efc8b8429f09056c8c04990c466dae31cb3f9356038b8957f1b406",
|
||||
"blk.5.attn_k.weight": "8c766180c726b037d587fc52371de6e3307140c52409011609d1225624b6a3eb",
|
||||
"blk.5.attn_output.weight": "490b582b3b1dc151ae55aee8b6743dad6c01fb49e43afefb6e68394b74be3d73",
|
||||
"blk.5.attn_q.weight": "6f7b8ca4d9025ec836a44bbcca46be30c66b471a9fb62943ddff8288b3731409",
|
||||
"blk.5.attn_v.weight": "9f70df3ba00c9e723214b3da83ff435a2163fff5915f75515c9664c05c866c27",
|
||||
"blk.6.attn_norm.weight": "1a4a66613a682df6f061fc7c4d986f9f7e9175b62f0c42fc1ef31db536bd5942",
|
||||
"blk.6.ffn_down.weight": "c56f25e4e49b443dbc82d88311ee63bc1f5002cc67e52f4787fd5f003aedeac1",
|
||||
"blk.6.ffn_gate.weight": "31a5cf1aa9b831a81588d508550f51fc425f9517c43254d4ef7096d38029cf04",
|
||||
"blk.6.ffn_up.weight": "ce135f3a1163e0c9297a615bdbe68a67ead21edce8debbfa9f6e15e6af8d4c94",
|
||||
"blk.6.ffn_norm.weight": "4e328ce0648c94e732bc40501858ef6262ad1161e2e407b0cdcf4813fa9d45d8",
|
||||
"blk.6.attn_k.weight": "1eb1c4c9f9c4c7ff7f5429075e0dc6a7782bed55109fa88df209a817dd8ef960",
|
||||
"blk.6.attn_output.weight": "3d32986b56873b88655ee1edabdd413fdd9ab18b82108c9ce90bdbc2d3a6f3a3",
|
||||
"blk.6.attn_q.weight": "8432f583b3a2809c99c393f9beb077cb0534dd5d247c17108f2986cadc6651f6",
|
||||
"blk.6.attn_v.weight": "5045381513815bb91839dbac8335ffe49bbc7b0008369de7ea97eb676c5e2b36",
|
||||
"blk.7.attn_norm.weight": "3dabd003638ec2499bfc8a48c49eef34276caab4fe76894eb963207848c2fdaf",
|
||||
"blk.7.ffn_down.weight": "194fae858608bdcffd235be59ab119d0b91c8549f864ea06dae69249e099935f",
|
||||
"blk.7.ffn_gate.weight": "00b24c29c30246892bce0791be804a89701d4c1332777e0bcdad5d9d5666604f",
|
||||
"blk.7.ffn_up.weight": "44d7082a5280080c90cef9e19d410391de34f212ca0736377769b8ddd0c82d5e",
|
||||
"blk.7.ffn_norm.weight": "21fe8a7fd6911c64e0d15a788b3b4cb6d71dd6ec51de65f760ee89afbb6ae53e",
|
||||
"blk.7.attn_k.weight": "57a149eec5f6744a9526cd3925ac073f9d12db0fbcb5afe042ef4dc846458c44",
|
||||
"blk.7.attn_output.weight": "0e9c28a3e81a2880251ce5eed77bcb8be8aaa1a51c9cb6de820b47ed83849fc2",
|
||||
"blk.7.attn_q.weight": "15ee75263ee4e2a43eb322bc159ae004bb7d77e3a7e63ee4ddab700430693fff",
|
||||
"blk.7.attn_v.weight": "440aa970bba4bff429fd7b7b1de21f2ad14fb2952b776cfa4acee68d7c6e9b8f",
|
||||
"blk.8.attn_norm.weight": "af5b44825633c42c1ae964c82bb2be6a242d3a751f0a91f1bae4f593e8f5b6ec",
|
||||
"blk.8.ffn_down.weight": "b11c14c76adca94fa200496dd2c10743becb23aab6642443ef1ae6d8710edbc1",
|
||||
"blk.8.ffn_gate.weight": "7bb03d3325bf8637ae2fa1296b0651356515578d46a7c5ca65c7a923d7de27bc",
|
||||
"blk.8.ffn_up.weight": "b956ef0a0669b5a9c9bf3a8da2d1c24f52d331cfb7354f6d7c51bd65be355e30",
|
||||
"blk.8.ffn_norm.weight": "c78c3d748302edfef76f71ea5cb2055c94352122eee8b9b1173779a1814d224e",
|
||||
"blk.8.attn_k.weight": "c0fba6a596ed9c1c32a7055c31a935a8b31e42b77282ee47c1f03ee3bde736b5",
|
||||
"blk.8.attn_output.weight": "83cf9947080c5d8d571f04a842bc3dcfe7bbb0195fb25b346e22635e8649f2d4",
|
||||
"blk.8.attn_q.weight": "47409350a576b333d97b7c877d69f47f46df504f3765102dfc0be9e521c7ecd6",
|
||||
"blk.8.attn_v.weight": "1999dff91404fdcf1ecb34d9eaaaa9244ec7658a74dec8feb7cfd1fddba0347e",
|
||||
"blk.9.attn_norm.weight": "1e6e29d5c3889ab4e1b0a5b9998cba60179b0f1fca133515df49cbc19d092593",
|
||||
"blk.9.ffn_down.weight": "acb898a6490adff592e10b4c62d70edc5941661ee6da44658500e9205357c8e9",
|
||||
"blk.9.ffn_gate.weight": "4cff63013593aadc3ffbaaa6ed70ffdba1224cd43c3644bf6f4162b5ac1ab542",
|
||||
"blk.9.ffn_up.weight": "f985b5a2d6cf4fe32c7256301c3c89b8ad22b59e516342c52da42d8110766a4e",
|
||||
"blk.9.ffn_norm.weight": "0d659c538bc6b21ed0018f107ab674a7424a00a42946c80e07208b479b21918f",
|
||||
"blk.9.attn_k.weight": "f67611d888780d1b38c1c146b361c65310c8183bdf64fd73e2259985c6e8517f",
|
||||
"blk.9.attn_output.weight": "f12ca1fa62a02ddc3f77f798bfb5707e0c50bf18ee0eaa67025521a98355f26b",
|
||||
"blk.9.attn_q.weight": "3865185f4361a645b086ad47b72904c095313fb1c624e511647bf1a7dfc1c476",
|
||||
"blk.9.attn_v.weight": "92125bbfed63544ab56052bd1e4aa453bbf34c795249ee54cde54907c8c6d1d3",
|
||||
"blk.10.attn_norm.weight": "5d6bfbe545bcc2fcb2fc75c68f64b1f4c918badaf53e0156fe2d88aa977b2f94",
|
||||
"blk.10.ffn_down.weight": "1dd9da8b0d2696ab5531fbca8a29c7d67567620a9d3e5fc2a19ec5d7e4c6cc8a",
|
||||
"blk.10.ffn_gate.weight": "6e55e7f014edaebda0ac6819a426221d3b025c27312a2e18cc5806f31e3db226",
|
||||
"blk.10.ffn_up.weight": "d80dde54af5db51241345ee8d64c1972608644f4deeac1e8195dc423bf27474a",
|
||||
"blk.10.ffn_norm.weight": "f6ca65951d58ae3379eee8247bec34ebd0db05674cc9295593573841b8a55df3",
|
||||
"blk.10.attn_k.weight": "b58e350bd6b49aba0fba4e4dd6865de3a2a0651ab865dbf2419b627b53ffc187",
|
||||
"blk.10.attn_output.weight": "6b26a986e12fe66ec286a21d7d5af5eaa1bfe6f2bf502165d270e4497235a54a",
|
||||
"blk.10.attn_q.weight": "3440e0e5b7e0d1e426424ae5a33f4e057be623249e9035ea12e57dbe5d3893c4",
|
||||
"blk.10.attn_v.weight": "ebfadcfe14bcd6dee933053df0a67e12e7a196d5cc45728c1ffb2a2daedd5ca2",
|
||||
"blk.11.attn_norm.weight": "3ed057b9576cd2de84507ef64c7646dc478c651efca4c2024cbe91a4f3fbf0bc",
|
||||
"blk.11.ffn_down.weight": "8ff1c2487d22f5c499761e4eb721418f141f960160d0bab779595a34e4d68898",
|
||||
"blk.11.ffn_gate.weight": "9c74e4507c7e45bf39b7cc7402198cd1dd77e3fff8c625b0413acaeb16efeb9f",
|
||||
"blk.11.ffn_up.weight": "4367158007161d29939e00a322bb6776016e43f648a94f9b08a96a477aae75be",
|
||||
"blk.11.ffn_norm.weight": "1cc0288c1491072121f4c9a0af20be0e13af49895696a3320e4fcac608768de3",
|
||||
"blk.11.attn_k.weight": "066f5b3c144fce1366835e1ebf376f768b333b8ae29f5b478c42d1d0c809c855",
|
||||
"blk.11.attn_output.weight": "e0d9f3d3f2c54aed59c02713ea4fb562799ddbacbe67ca3998dfc887bc44e47b",
|
||||
"blk.11.attn_q.weight": "28d3ecc8a88cb3815e89a7f7a7d043da7a71f702b337a126e4d3a2ac1cd6370f",
|
||||
"blk.11.attn_v.weight": "7c5cdef10ee73bca0a3b9f6ece5f0a0155664e0ce3d8de90ccdccfab5545e5e7",
|
||||
"blk.12.attn_norm.weight": "973b133301a1af760cd7b3a7955371ea0a750808b442deb6adaf7b98482bd0c6",
|
||||
"blk.12.ffn_down.weight": "d6c87b4b4ca03f75546ddd6a9e7fca720585a309188723c1ace8122438d4b200",
|
||||
"blk.12.ffn_gate.weight": "2189a6e0cab1540bd05d6089b922aa8fd694be51255654933c165f302a0c955f",
|
||||
"blk.12.ffn_up.weight": "5affbec19b58d092b9305721e3552481fe2eff51269ea3ed91cda3b9ef84d4df",
|
||||
"blk.12.ffn_norm.weight": "f650fd42a34e950f758b4a130e7b8b1a712b1dcbede0291bb8edde47aaed0ef6",
|
||||
"blk.12.attn_k.weight": "59b1e86f10450a7cc188beefc0856d2dcf44e8d7fdd9cd8859c30ec1ebaf24b6",
|
||||
"blk.12.attn_output.weight": "446b0d36b2f66bd72a2323f4f4e9d85a0f621e9a58872e89a27248d6b1123238",
|
||||
"blk.12.attn_q.weight": "3ed6bfd39f040301ed99fad882d3e569769d594259f9948445bef0e44ec881fb",
|
||||
"blk.12.attn_v.weight": "e73652cd5d0029b1931be3ba9d82508f6696dce5a29d085476a54fb7a2ddbabc",
|
||||
"blk.13.attn_norm.weight": "491b85278c0bd67bd31b9b8a9720902c244bd067e53a4a03641b7c0994782e82",
|
||||
"blk.13.ffn_down.weight": "ad71cc248a85e9ced49307a24a9bfae01d387e979a7689c82ff59998e09741f3",
|
||||
"blk.13.ffn_gate.weight": "0a55984d53971fab97575ee0ef5882013be7fdecfa76e3fbebb5dc85a07a14d4",
|
||||
"blk.13.ffn_up.weight": "378b697b35e2e53c0de98e8e29b73d42ae3ec112ec16129aa5997a9e2f3b5943",
|
||||
"blk.13.ffn_norm.weight": "f8aff2f69ab286210fad45a62b03f8d10b38f96a420d7baadf6b95d7b0b0bcd2",
|
||||
"blk.13.attn_k.weight": "25ceb841afb1034831bea7f4d6a6c578def2ce4d4c412c780ef147dc9a598360",
|
||||
"blk.13.attn_output.weight": "a242b322889c6bdaa14b67a7bab593db39df8eea3721638ef639abbb74d482e3",
|
||||
"blk.13.attn_q.weight": "d80be9945a369439e835c55cfb0e97828b8a66bb7ced534d9059c92487bf20a9",
|
||||
"blk.13.attn_v.weight": "ac33274cf9b67979d9ecdc967a55175afe0c9c4aeeff6391433cd9840c818706",
|
||||
"blk.14.attn_norm.weight": "12a1e1091de5b2da12c9e7c0b1c8e6f09ce2a749733cf7d5240445b8e21cd093",
|
||||
"blk.14.ffn_down.weight": "cfd41965c88266e32bc2dcdadda512499c35519e8686fefb9a7f249ab2291eb5",
|
||||
"blk.14.ffn_gate.weight": "8dcfe774f07a095c7c6cf0a901c9df70d938bad7b5ba347fbc8f694e7603c0d1",
|
||||
"blk.14.ffn_up.weight": "c7995577fe4a72ea0fb17c4a7b6b87b959072bbfdd5edacc6c367d43465809ae",
|
||||
"blk.14.ffn_norm.weight": "81c41ebde41739e7016ffec31d2256217b825dc3cae049a935f5f61a60d22003",
|
||||
"blk.14.attn_k.weight": "fb708bdebe4384f5c4b479c110028554f4d122f166b8091eda7d8d65e6780eb8",
|
||||
"blk.14.attn_output.weight": "f5295caf2dfdc60553dcabe17537a80577e8b153c902247daac058df23542514",
|
||||
"blk.14.attn_q.weight": "c12b7a3601c68c63ab5dc9d2599ebf3f3a10abc2c59d3a2126fffd5818f2763b",
|
||||
"blk.14.attn_v.weight": "1ce968d9149bf0d5e237d52cc6d6433565b4bbf03252a736262bb00a2b34a687",
|
||||
"blk.15.attn_norm.weight": "266fd2c36d7dcefc6b6bb7f1c9374c41f2bab5d6c84a063b6f91c4f682dad3c4",
|
||||
"blk.15.ffn_down.weight": "6154886e9ef0a6cc08ab0d264a35f497e6f0987efdac992ed04e87088bea7801",
|
||||
"blk.15.ffn_gate.weight": "183d9fd3c1b5657840099053d2fd3f72ad953b1de523296159b7761f20491a76",
|
||||
"blk.15.ffn_up.weight": "51546d4498842ae2340ee226a0888d5f61e7d2ca4d052dfa06a77b0451242d3d",
|
||||
"blk.15.ffn_norm.weight": "ef7378091a41a25a5f58bf1bf9d3bc64ea562e7f421e1c232b1f177c30fd3500",
|
||||
"blk.15.attn_k.weight": "8d556ab8d9639324141774999b6eed0e91d7ee645bf3e7a3dcd200b2e7a00751",
|
||||
"blk.15.attn_output.weight": "54aa6ba87def7cbe18b0c6ab3aff5c351cb3b6ca4a0d7b2cd5f75a1312991429",
|
||||
"blk.15.attn_q.weight": "10731b0dc031ea8e0ef37bd7f010e0a78518a10a6df05a8bae48e3148b73ef3e",
|
||||
"blk.15.attn_v.weight": "cbbe50c2ed7224866d3cf9b489c599f3ec41a4ea1aa3181e9f4e87e1fa0cefec",
|
||||
"blk.16.attn_norm.weight": "387058eb39d4b28c04cf1368247417f1faeae8ae79d894c9f293457e0eaa00b0",
|
||||
"blk.16.ffn_down.weight": "2cb26ccee585e933401ad5c82ed36ddacb3289efa0b28f8cf91b020ffbd9c333",
|
||||
"blk.16.ffn_gate.weight": "d745985efb5bab42304e5d509024631efe35f92f2b2ec4931ead6db97ca9727e",
|
||||
"blk.16.ffn_up.weight": "7a67bd195e0642828ca36eb7818149bb70c2c25f82de07e2b5807c520daf540e",
|
||||
"blk.16.ffn_norm.weight": "7cefd061c8182482a89272f8a4e88a954b12609a62716923ca1cb3593b1c1651",
|
||||
"blk.16.attn_k.weight": "d7968a2de67e755b4533e061aaad1cb62f8882af92dcad67f99d6d5112513439",
|
||||
"blk.16.attn_output.weight": "9e9ab5788272ca3394ea89eadbce8c86ecc3fd75b7899184d6191c134ad9aae0",
|
||||
"blk.16.attn_q.weight": "ef81c261b536c1a3a093b33f44cf2d42b86e5aa2d821674f07a0c80e992ed925",
|
||||
"blk.16.attn_v.weight": "aef38e7958301b4a437cbdd2fbae6197f677b09269ec1eaf63188cd5da428d25",
|
||||
"blk.17.attn_norm.weight": "28f6b289f1bc3131041e9f791b7a2a3a48baee0dfea27bf7051ebbb7ed364d80",
|
||||
"blk.17.ffn_down.weight": "1a502829aafc6a9bd6bc81f12573bf8632d5c8c659f0dfb13c8b2411f3b1ec05",
|
||||
"blk.17.ffn_gate.weight": "ddfd8aa0eb98846ebc9afe31366249159f46ae9815199dd70161527ed241ac4d",
|
||||
"blk.17.ffn_up.weight": "4211a3cc247071bd361b30de2131d02382f552855062bf3b3e004c17992e5d09",
|
||||
"blk.17.ffn_norm.weight": "647e5fa99a5b0d232af36d15816539f4d27e60a50a341b00aa88bb6e4474f8b9",
|
||||
"blk.17.attn_k.weight": "d9125ff33a19c502c0f8846433ffc24395048582fc2f463d34a0301a82156f02",
|
||||
"blk.17.attn_output.weight": "3d64fbb1cfef04444827f37c35fd9ad3413eb2165094d339ef89f00503f09de4",
|
||||
"blk.17.attn_q.weight": "e5b29424028f578beca385fd82e29f37adedf3037cd51e5889d5a1ffb0428ca7",
|
||||
"blk.17.attn_v.weight": "1809c5aaf2ac04c5d65539097564ad62796e87d24bb8b9ce5b095561a61d908a",
|
||||
"blk.18.attn_norm.weight": "99daca58d001c627523d3adfbca1d95f04e590382a326866544d57989d5f4835",
|
||||
"blk.18.ffn_down.weight": "84f30231ce6ca0f10227541dfc602d6418c1a210386b0c4926ef1656e7d4635c",
|
||||
"blk.18.ffn_gate.weight": "ca5bbe4468b541740e54f69b9e08fcc8e478c344b70551dab21b1206acfbaadb",
|
||||
"blk.18.ffn_up.weight": "0b3067b9dded31686dcfdc1e247eae3974a28a61ac59e9862758dbfaad64e8f7",
|
||||
"blk.18.ffn_norm.weight": "8154a102232dbc0f90ce77ae5c1ff8f26f8b6e4dcf326e9ec1645749669e7960",
|
||||
"blk.18.attn_k.weight": "25abb26021ccc481471a30e0d4cbeb7e1db29828417ec5136edeb93fecf09ac4",
|
||||
"blk.18.attn_output.weight": "d87d481d9b046b68efa06ccdd4ed8cbf61e692d61114b75b7fad5ed75f5d87b2",
|
||||
"blk.18.attn_q.weight": "cc6400379e15766992ff1293be79dc67682c28e9e15155a78109f4b64653b164",
|
||||
"blk.18.attn_v.weight": "45c75cb1dd496aea3173aafe2575b841dd1d02cbe010b3198099731eb98f531c",
|
||||
"blk.19.attn_norm.weight": "65389efc75297684773284ef8e5f8789a4504b636c9f33b8a32e0ee42499fa72",
|
||||
"blk.19.ffn_down.weight": "4eefab7e939f64a17e4a214ca3c77a6fa110d94f677e2d6401086f70fc538b04",
|
||||
"blk.19.ffn_gate.weight": "f1c0a59cafda66f466ab585b0b8b4861b58abe87a67cea1f6a488492242edfdf",
|
||||
"blk.19.ffn_up.weight": "c42d045eef588db4a0e56960a57e110e1ff92eb8041107d19899165fd3b90f17",
|
||||
"blk.19.ffn_norm.weight": "a8f33eda6d5d62ff5f333ad9771783caff556641f4e7df713451385676f441fa",
|
||||
"blk.19.attn_k.weight": "0bab5d9e9083492bfb05a5a3bb23b79c0e7b99ef6a6644817b4d57d5c453b8a5",
|
||||
"blk.19.attn_output.weight": "c99c551d70eafad0f7aea98fb6f9251635897168eb3895f76abf0d4ea3b3aa6f",
|
||||
"blk.19.attn_q.weight": "c98bde95627c3b54c9443813ca50b4e14f518319681db6bbf7b2332ba26e9a60",
|
||||
"blk.19.attn_v.weight": "ff3a490518cf64904db89ce0dc7d6eb89e870f1440e41883c6b55a221f82de84",
|
||||
"blk.20.ffn_gate.weight": "761f0e317229cafe9d3754048ab038a0a84e9a287b196ab65f633139f2d29aba",
|
||||
"blk.20.attn_k.weight": "45d13439b41066d282e8490a726785abf513605f46c79bd0c840f6419d27e790",
|
||||
"blk.20.attn_output.weight": "a3b958d84b4a097844179b7d55c18fd0e4f319cb15e918c6fde33b68de1bcac6",
|
||||
"blk.20.attn_q.weight": "127ab8e7d8c3f882874904196a02712bab42e6744fde45871b67350609d19f5e",
|
||||
"blk.20.attn_v.weight": "5f0ad2d14a8ae42dd3bbeccfb33295687a14055fa92c54bc946249373c1c9f17",
|
||||
"blk.20.attn_norm.weight": "77300b1755edc8c70089e0f45efa646056b9add7d8568b2324d2f3e62b64971a",
|
||||
"blk.20.ffn_down.weight": "ab93d0e075b42e9017b701a070d561e698050d90aac4b4b9919256fbe50c3204",
|
||||
"blk.20.ffn_up.weight": "4fd6628a07acc57a48d1ef83f81b7d7aa0bce569c1160a99d307284f8821322c",
|
||||
"blk.20.ffn_norm.weight": "2a9e46b9e48e8e55215de56592e1f189530037c1c94a1428e3d6f106c7f26fb2",
|
||||
"blk.21.attn_norm.weight": "4b3b5912c7bc61eb9da8e47d4651f896e85d9e59c4ecaa65df7acf3c21737298",
|
||||
"blk.21.ffn_down.weight": "7146f931663d93b8771cd84405cd4802ea6560d0729b0d6d44588203c095bc53",
|
||||
"blk.21.ffn_gate.weight": "b44ec5d64388fa40b90b3e9976d97a8b6800fa3b97584f32e64b03daffb8601f",
|
||||
"blk.21.ffn_up.weight": "0cf3643fd23c685e17062cd11e116e17ce57a405e5e78953bab94cd62fe48789",
|
||||
"blk.21.ffn_norm.weight": "4ef2cdb53da166df70b39f3e6b17af51848cfa5ea3c27ad6a1ae2a1bb1da1ce9",
|
||||
"blk.21.attn_k.weight": "5d40f32a706f670c19972b14176bf660d5b045e3637b110dbf8d7de4ff32101a",
|
||||
"blk.21.attn_output.weight": "18afaa916752ce16c9653ec0ec7e2fe60be55faa2aa5025d147be184adb75cac",
|
||||
"blk.21.attn_q.weight": "2621daa5f858931514a4b2f0fe8d81cf9b96f541e6af99bfa7539e9bde8e34ee",
|
||||
"blk.21.attn_v.weight": "63226dafc54c899bbce4aa49efceeedd8908e94faa613450fdda91f332b62864",
|
||||
"blk.22.attn_norm.weight": "cf3058daab4d2c04387e7d169d1553bb8e7358eea66285ec067703f6ce62043a",
|
||||
"blk.22.ffn_down.weight": "6a58d5fd220abdbac6cee7ba048abab794731af318f04982c2506df59413d0b3",
|
||||
"blk.22.ffn_gate.weight": "d5614535324b03c7b91727a903b2a72f8d07ad17f7aa8b61ea173cf9b895069e",
|
||||
"blk.22.ffn_up.weight": "ec20da3949566e93f66cabb67f8cd7eab399047ec6ebf5d43edfaf3669b82296",
|
||||
"blk.22.ffn_norm.weight": "84c82f38f53a649972a44466fc476bf764e064ce18de870291edc302f3700e28",
|
||||
"blk.22.attn_k.weight": "a3d2ecc37fde7c201176bb8abadf27f0d8ede9679a6034913e03d9db924fda12",
|
||||
"blk.22.attn_output.weight": "5a3b8bb433f43a387df43dd371bdf80ddfac986dfeaf38e9bac1d7a0ec6628de",
|
||||
"blk.22.attn_q.weight": "3a875cec661b4859f30a8fd2c866811184b25b68c9e36fe2663d299caf8b59c6",
|
||||
"blk.22.attn_v.weight": "8717a83b79035058dcfd3ef6f8e5b36e71d77379e5a239e1899eef8766fb7703",
|
||||
"blk.23.attn_norm.weight": "2b4a68a0a2f023dd646e4755c9bef17c2f631901154afd839edac7ac006ec99c",
|
||||
"blk.23.ffn_down.weight": "29499b1586c6fc4883c9b7a9c8cf388035146b5aecf90c5c4c8c8e082c71e7d7",
|
||||
"blk.23.ffn_gate.weight": "7d6554036d21c587b9b556428054f9c15cbef96d24b257f906fcef4ae38bd9c8",
|
||||
"blk.23.ffn_up.weight": "19761ecb288d6ebd44b681c4535661583b1e19dc29e96d0c007333cd8f00aacf",
|
||||
"blk.23.ffn_norm.weight": "37dc35500790a4ca33807b39cf7af65065e535dc25b9e94f3ed2759f61887ac9",
|
||||
"blk.23.attn_k.weight": "717547d00323817b0cb40a72ec5f8cf42ecd1f9e3e42715c2cc5e38f07fffffe",
|
||||
"blk.23.attn_output.weight": "a24786feb6a905fdf166d7500133757cbe494779d4ebcba9eb03046b319557df",
|
||||
"blk.23.attn_q.weight": "6a2c4a98f138b928d22136efa163562691d3b4ed526d52d46a2fa2694a8f3965",
|
||||
"blk.23.attn_v.weight": "c6e6081eb9c38a7fda023085957b460e9ea321e1fff408b38c2b58595c39979c",
|
||||
"blk.24.attn_norm.weight": "5e6283f891e538670425f3e244b08dc6f96f33dfa4aefa913f8eb17212421850",
|
||||
"blk.24.ffn_down.weight": "e09eb170f389deea0a4a1cbfdb52c12490768a2c60491b7bef8a4c445e2a08f5",
|
||||
"blk.24.ffn_gate.weight": "af29d815cf49a38fc2ebd0bf9b2dd9933d023a29f2d766981acb9a1b53f09117",
|
||||
"blk.24.ffn_up.weight": "36ccd9333426666de9d3088bd4dcdf5b624b09dca9e3a83a22fc0383f2d950fa",
|
||||
"blk.24.ffn_norm.weight": "a88e1692318826db6ac42582d182e51a3c698c655d0e21e04fa086318832d07b",
|
||||
"blk.24.attn_k.weight": "f7d61d6d1225289bcc502e3bbb0168b4584add0253218c1b77ac92ccef9a1c2e",
|
||||
"blk.24.attn_output.weight": "85a1363b3ccc87312094c2195022687c16b0dad7fafb9e80bb4ec474d53c29ac",
|
||||
"blk.24.attn_q.weight": "53482a2c008f42f4fad779ca323addc3712040149dfc12f782417756388a72bb",
|
||||
"blk.24.attn_v.weight": "67498272369af7dd10097c73b07f731b565cfc9a559e711cc0d526389e7b44e2",
|
||||
"blk.25.attn_norm.weight": "98dd617def5cb7825ee4833132ca2da2121245921585e1d9e36b93344adc321b",
|
||||
"blk.25.ffn_down.weight": "7fd477d6c50aed5f424a878dd284343379cffbee8a34c0b6e55100c8305fa13f",
|
||||
"blk.25.ffn_gate.weight": "f892c9806c8ec22e8aa746734ac9213428c534921cf161239e1d249fdb5d1ec0",
|
||||
"blk.25.ffn_up.weight": "528bed14c9bf9762f790525ee40412545221f4321d2a2323fa8e73c58b7643c5",
|
||||
"blk.25.ffn_norm.weight": "ca5831966672e7be6a578feeb631ec3570d3b5afe12860819ccb96e896ffc346",
|
||||
"blk.25.attn_k.weight": "610d3068cc9b20401f0c3a0efea39a279dd9f564fde19baf3403b2ec2319e4c4",
|
||||
"blk.25.attn_output.weight": "798aaf702e53b657265ac3b5e6caf3a0ab515bdadfeb1a3a156b4f3bfba76666",
|
||||
"blk.25.attn_q.weight": "8a7fa25248de83029fb97b51d036a01baebe31fcb4be121ab00dd8b7de209b10",
|
||||
"blk.25.attn_v.weight": "2a53d5e9f8a1218c66958c6388d3b37400a9af7956c785024ca44bfbc3c7d371",
|
||||
"blk.26.attn_norm.weight": "5f44fc043481eb0771f3e6d2420bcbcf73140afb9a9feb8eddb6575452acebee",
|
||||
"blk.26.ffn_down.weight": "944a60a409d0d5b6a851e33c69aca152454b691711a8b96f5bcc488772ab2833",
|
||||
"blk.26.ffn_gate.weight": "2a0ca4abb3de5593e6693d8be69b63d6d1a639855ac8332a75f520353f030c62",
|
||||
"blk.26.ffn_up.weight": "0b1df496163f9ac07bf89375d3eb441b51a81d41b47d769a04a61efc18dbe35b",
|
||||
"blk.26.ffn_norm.weight": "56b8dd046e9be6ea71f7efd80dbd14e7fb1aa020d3cd38e063275f3873fd12f8",
|
||||
"blk.26.attn_k.weight": "b1dabfabb970e6971c7ea6e53c63cf7ef56341e6a2edd9cf177785cad9af2f9a",
|
||||
"blk.26.attn_output.weight": "39532c7e836baad164a655fb97ec5114ea4da37ffba9fdea2684f6e4450e6f84",
|
||||
"blk.26.attn_q.weight": "8f48bf6aaa1252bc149e98af2be1777a5c0d2c3274c6d314171ea9344a41b604",
|
||||
"blk.26.attn_v.weight": "02fb145f7fd905133750e90571effacadddfd3f4966552dc59982ac3900ab8c4",
|
||||
"blk.27.attn_norm.weight": "654d168fc3cab716d91261f5719f180b7d697218401633b4878a759f1b5283f2",
|
||||
"blk.27.ffn_down.weight": "2823272bec3a1c12f02cc4cb24aa4031abd7e9dbe0b02676e2305b21671818f0",
|
||||
"blk.27.ffn_gate.weight": "b1a1d40cd02f97182cac17a79971d1934ee0daf3aa0bf11303568c636e208a64",
|
||||
"blk.27.ffn_up.weight": "ed62ec72a020d070e64eb7b50237b32213944727b5b2427f45d989f50df5fb2a",
|
||||
"blk.27.ffn_norm.weight": "c69649ac65d694b306a905dee8b03b89eec1ed188b1eaaf38f8e29d4b12e38a0",
|
||||
"blk.27.attn_k.weight": "cc57bbf413f1fd227128dc66efc8590c73634cbd6f96d01ec4878b5e7ca6a925",
|
||||
"blk.27.attn_output.weight": "cac407ad02361d53207b3c7e25ceab84dcb4347b8087055162e2efe14d11d84a",
|
||||
"blk.27.attn_q.weight": "0af18e07cee12015761c07c94407024f4f4d77d97bdb24163db0e16669e2cef3",
|
||||
"blk.27.attn_v.weight": "a1d08fbdfa40af773c5adcf93bd68b78a44ed144e3fc6bbeb8af02e937527eb6",
|
||||
"blk.28.attn_norm.weight": "f39a51f814512b040a1082143150e4a49ff730f85cef49d7f77fc79d83e91f40",
|
||||
"blk.28.ffn_down.weight": "74f29ed51055d1c1adb8f0660bbe538a27e016c65650f2d67efc6f1c84fa1b45",
|
||||
"blk.28.ffn_gate.weight": "ae48bb16487ded6781c60aafc0bf738fb4ae15729952906f247d216592ce249a",
|
||||
"blk.28.ffn_up.weight": "543009727718ac22f11ee4b17815f68ea6f15ba1f3e7ed5ecdb755cf6417565b",
|
||||
"blk.28.ffn_norm.weight": "b8f9e54c322079ff20a82b88948cdc2916c22c7db40b9a9ed6d3cbe89efb727e",
|
||||
"blk.28.attn_k.weight": "55d055ba653b728d6e784f9e013786fed07115c9fdf23367e3941386d5e77db8",
|
||||
"blk.28.attn_output.weight": "155101c03ddbf18f4fd0694bfc982f33c7bae25c9b087d6f5273c2bfbffcf2c9",
|
||||
"blk.28.attn_q.weight": "1ed19bfdd22e9c14eca014739982492e9516d411515a8585f65cf754d849e53f",
|
||||
"blk.28.attn_v.weight": "11ba854dd575c025d37256eee9041f6d1bd2b549a083d6409a09bfc1542913f3",
|
||||
"blk.29.attn_norm.weight": "02b0bf5e2fcefd11a153cc988c81ba672682e4844fcf6442423e21a0e10d566d",
|
||||
"blk.29.ffn_down.weight": "594bb692ec2779938721ff4748666ca8370e0e4fe85229503f616438b8884f5f",
|
||||
"blk.29.ffn_gate.weight": "8bedcf47e91dcb2cf4093de56b048ee411faab6ff472f89ab2c9c113a08e6967",
|
||||
"blk.29.ffn_up.weight": "e241a547b5fd6dfca8200b8141e21c1c487a96cbc4e5855f181a7ed1be91b642",
|
||||
"blk.29.ffn_norm.weight": "e63eba5e4c6b288bfd9f15e46e236086456c8b7f1f9c732c0b5de84962a2e7cc",
|
||||
"blk.29.attn_k.weight": "afe5979d5bcf211aebb526620f5974bcb0a2c39c8be71e815575c55d6385e3aa",
|
||||
"blk.29.attn_output.weight": "9c944ed44b124b014906fc240afd3b90aed56bbd9567f2eddfd5b7a685b3cb48",
|
||||
"blk.29.attn_q.weight": "e234e08e5c1bd9245a2edc8d63e9933b6b879f97c01392209cad4f55f05f3ada",
|
||||
"blk.29.attn_v.weight": "5cb8e3e5f954e775c5a5e4de7a9a62b17e9c6931bb0ff0e2f82c4126fd3e1a1c",
|
||||
"blk.30.attn_norm.weight": "a65483ee51a0b214144ec8a14f28ea5437586e9e12ebe342a57d1f8627ee12af",
|
||||
"blk.30.ffn_down.weight": "417959da77ceb33ead4271cbb9428b195196173a893c44e52880a7ec61b4856b",
|
||||
"blk.30.ffn_gate.weight": "a0d503ffcbe45dc927600bb98c9f6082487e65cb577ab545add400d666a87638",
|
||||
"blk.30.ffn_up.weight": "f8ab957b82ffcd10b21303cb5e866209b6fe95f827b1b94e9a949207952d12c0",
|
||||
"blk.30.ffn_norm.weight": "210c7ceb0514a9ef27b5d4d1b3aff6dde43f1af0345a050d71097940e0e73e03",
|
||||
"blk.30.attn_k.weight": "16861b9abcf5a3fe73c93d977ca45a1e6daa65be0fd85c2cff53486ce2033afa",
|
||||
"blk.30.attn_output.weight": "ca541fb2e57e2257118c35784845b0c731278af8db3036ac53d71aa1681fdbdc",
|
||||
"blk.30.attn_q.weight": "f7834917748e26bb456b945e230bc926c228e93696bc01fbc2b134bdeeac71a1",
|
||||
"blk.30.attn_v.weight": "9292783171dbe5eb689d17c9bda11e537f0e9b328fced6986c938d61ed590e81",
|
||||
"blk.31.ffn_gate.weight": "e4766a04bcd8f937ba883c6a144101e546747804ca66c35c97281d6ccb47b566",
|
||||
"blk.31.ffn_up.weight": "cc1e666116f7e6b06736db4aa4b81003c583f54f4d9200bfa48842249940e16a",
|
||||
"blk.31.attn_k.weight": "fc80b57557687504efae7d24265cb7dc39b8f826bb3d897a11783012dbedc44f",
|
||||
"blk.31.attn_output.weight": "215617f50a1f5d9b2250b82f3652b35a9e9aa0ad9ef2b485d73965a14b2b872a",
|
||||
"blk.31.attn_q.weight": "274b4f1dfb0bdec28632705677049fb3e327ce6d9e1f3baaad1560439039982f",
|
||||
"blk.31.attn_v.weight": "e641b8b926f9dfcbbf6b6da1c02555525ac4b1c306d96f20cfbba7d6662c4e56",
|
||||
"blk.31.attn_norm.weight": "b3243c361d4041ddb892ce6862dd5091f57d87357e3c67e177451b85d8baf34d",
|
||||
"blk.31.ffn_down.weight": "0a00cd3ecd5e91624a27f9e239b1de425d5ba3cfff82c256a11a4ad434abf3c2",
|
||||
"blk.31.ffn_norm.weight": "2a0d67ea2bb1303975712243f07273c92fce83baa11b1cd6d8e42e74ea3c810b",
|
||||
"output.weight": "768615f077fb797967844571c58b94d7c399d884d115be3ab4b0154504cae892",
|
||||
"output_norm.weight": "7cc5b7ce10e5082000fa00bfa68af8c7c5da218e59e2c41cf2f1499d40ca229e"
|
||||
}
|
||||
3
convert/testdata/Meta-Llama-3.1-8B-Instruct.json
vendored
Normal file
3
convert/testdata/Meta-Llama-3.1-8B-Instruct.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"rope_freqs.weight": "80fd5efb2f729381785b293a091a268cfeceb0079167f6ece9b07070e662b222"
|
||||
}
|
||||
313
convert/testdata/Mistral-7B-Instruct-v0.2.json
vendored
Normal file
313
convert/testdata/Mistral-7B-Instruct-v0.2.json
vendored
Normal file
@@ -0,0 +1,313 @@
|
||||
{
|
||||
"general.architecture": "llama",
|
||||
"general.file_type": "1",
|
||||
"general.quantization_version": "2",
|
||||
"llama.block_count": "32",
|
||||
"llama.context_length": "32768",
|
||||
"llama.embedding_length": "4096",
|
||||
"llama.feed_forward_length": "14336",
|
||||
"llama.attention.head_count": "32",
|
||||
"llama.attention.head_count_kv": "8",
|
||||
"llama.attention.layer_norm_rms_epsilon": "1e-05",
|
||||
"llama.rope.dimension_count": "128",
|
||||
"tokenizer.ggml.model": "llama",
|
||||
"tokenizer.ggml.add_bos_token": "true",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.bos_token_id": "1",
|
||||
"tokenizer.ggml.eos_token_id": "2",
|
||||
"tokenizer.ggml.unknown_token_id": "0",
|
||||
"tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
|
||||
"tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
|
||||
"tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
|
||||
"token_embd.weight": "cde834ccac5e94324b25cb81b02d27312cac0c551b55a7e1d555d90bf6cb6e81",
|
||||
"blk.0.attn_k.weight": "458bfdd9715c66e017c2447b1ed3c582963a3111479314e664faad8c914f42be",
|
||||
"blk.0.attn_norm.weight": "e1fd60b95f713bae7b7e3ca933c64ae6c9cd1e8d808000204bbfdc19f0ba635b",
|
||||
"blk.0.attn_output.weight": "df13b6a157d9d4f96c53b012b3b9bcd207d0c94144cbd22ae3ec13bb07d6c373",
|
||||
"blk.0.attn_q.weight": "13b4126b4245bf06c915a93317c42b8174e05053535ec99dc576541e4cec7c25",
|
||||
"blk.0.attn_v.weight": "5b1781d3a341214511b27eb4e268674ea3ea829dbdf8ae5a6bb89b3c0b33fafd",
|
||||
"blk.0.ffn_down.weight": "49186f5d8148d316b07458841d13a2e66587f4af69b776188a809591ed9c070d",
|
||||
"blk.0.ffn_gate.weight": "4397e30ece09136f00f4ff84ff49e5241b765a374deb8c5a12e897e2bf73473e",
|
||||
"blk.0.ffn_norm.weight": "43260589aac3850a779bca3f9649f793bbfbe5db538361cb743b3830217f8287",
|
||||
"blk.0.ffn_up.weight": "fd7ac918240a07566f6967527ffca58fcf433a30b78fdd6d84b2136d4ebd9987",
|
||||
"blk.1.attn_k.weight": "209839566c7d235bdc20565a4766378b6ee8553133a5a3315abe8a85baa80712",
|
||||
"blk.1.attn_norm.weight": "58c52986f7c69784ba327cb7f350923420782bee17fa39b1fbd13839d4005357",
|
||||
"blk.1.attn_output.weight": "5067cc628449682665dfcf59b16e58fe2a9d2a81cb099f0fcd42f4f8670c6740",
|
||||
"blk.1.attn_q.weight": "f410f9f0dd5edc09401af597d02e2a4c727f1502ec3ec3898321617b36c6df6b",
|
||||
"blk.1.attn_v.weight": "d40fa49e07c102c0644e130e7909eaa93ed0d54e2edddc0759e721d58a4e4f5e",
|
||||
"blk.1.ffn_down.weight": "594b1eff6ed4defbdd819fabbe2d48764984f08878a860bdb808511d5a25b8db",
|
||||
"blk.1.ffn_gate.weight": "4cda97541e388a5bb607ce4cc8b3db1da7045830a630e7ba4d17807befcff346",
|
||||
"blk.1.ffn_norm.weight": "66c13d7481be65b97aa474735ddc9674f33d512ddda76fa6fb45c7464b09f1ed",
|
||||
"blk.1.ffn_up.weight": "1adc6de288ba4cc1237833ca8b4eb81107149842e38bc452e18e5cfe284338a2",
|
||||
"blk.2.attn_k.weight": "5420423559f236ab22d85a00849f31e0cc6e9c7dd879de724393d8cd2b379153",
|
||||
"blk.2.attn_norm.weight": "495fe1ab40cc52aa054ddd4f0c2d2790f4326c8d103296b1b38f3b1060db2a24",
|
||||
"blk.2.attn_output.weight": "ccb83e7085381f558bfd65588c525ad2671feddcbc3887afb4038ad9c7aac348",
|
||||
"blk.2.attn_q.weight": "2e8f77478392bc93c2a391f2e0f4a173a952bbab88a7aca099c6ee909726409a",
|
||||
"blk.2.attn_v.weight": "d64512590f3b7ebbb9e77c2eb97fbda90b00d45c944f2b174f03a2cb11007567",
|
||||
"blk.2.ffn_down.weight": "1de5084a05dcaa6b1bd926e83517dbe9ebe7fde79235fe56018b3028b1aa6397",
|
||||
"blk.2.ffn_gate.weight": "cbea526b557f49aad8c976973cf367fcd12175b900f551984f498b9e07e4b7fd",
|
||||
"blk.2.ffn_norm.weight": "530aa49b10c7eae08899d143409240deb95dae4e1d5bf78cea3b26393cff3ba1",
|
||||
"blk.2.ffn_up.weight": "13a5fc19b96b4dcc1e9bd01998c8272ebe52034c1933ed123a506b711fae9a5c",
|
||||
"blk.3.attn_k.weight": "1913b63a73305941d8cdc472e7f101c633d3357a78602eac0a4b49a744261075",
|
||||
"blk.3.attn_norm.weight": "9c11bed5ab41f4adbfdae4ead65b525c8f19443e656a8c61ba412a4e1ad1193b",
|
||||
"blk.3.attn_output.weight": "bb0b42c1d34779c5943272ed71f1dbb31ad8edd75f8bcd5c868f88505ac3a610",
|
||||
"blk.3.attn_q.weight": "3461a1fe4e49f5319ea047cae98ccdb46528a3ec23831183fe87610b48c94948",
|
||||
"blk.3.attn_v.weight": "82aa30be6a61526a41fb79bb28a2617416f5909f0477aa9e95e16be9370fcb38",
|
||||
"blk.3.ffn_down.weight": "68521011ae03f5e3b0966127111afa8ee9f2eaeeef8d3a0b86b633e0332e9fbf",
|
||||
"blk.3.ffn_gate.weight": "1e89e26338fd364bb679695968c65106382f15ad55c95cbb5ec9bdfeb766f432",
|
||||
"blk.3.ffn_norm.weight": "c81932529a5a8c417c27b888dbe95fff8b447c2ea5f6f560444ec5d50b93832c",
|
||||
"blk.3.ffn_up.weight": "305021735afd8669afefd713f56137248d5e817e60471a112ad06b7fa07ffe88",
|
||||
"blk.4.attn_k.weight": "cc26ba5c5c28082a79e6abfe61186029e80b145252ca6a7924c437f0bcf2d51b",
|
||||
"blk.4.attn_norm.weight": "302d251fdcc91f7468cf33f80b49484251d8917d7018ad264ab3a85c8ecf9ddd",
|
||||
"blk.4.attn_output.weight": "a012f5bee3520cd4ce51f0076c132ebc3653309f304032ad051aa308f55f36de",
|
||||
"blk.4.attn_q.weight": "3c8d607e447f5ef21e73af71e3c0d32fae16f91f31faae34ff06912cf9cb68fa",
|
||||
"blk.4.attn_v.weight": "49f6c81a634ce46d71c2350206ecbd231b1732af96e4e4e67693c41a07e007d8",
|
||||
"blk.4.ffn_down.weight": "e89504f311a4a34dc819a67b761022f14d71c43df3ead4f892c87aaa8e9f0adf",
|
||||
"blk.4.ffn_gate.weight": "18b22f079a2fbaefe3572eec61fdcd996fd747724e2f0ff4f08cfcb43eb7bfb6",
|
||||
"blk.4.ffn_norm.weight": "22415a492c168a0878912b05c854a631228b01c3ea8842e1d75989ec46c18a65",
|
||||
"blk.4.ffn_up.weight": "f57379eae2874d8853f14ddf0f0fcc4ff1338574d5ed5d7e88331d5fb84f5642",
|
||||
"blk.5.attn_k.weight": "d627af853c40bddf9762ce3988008c1ff17f2686fa8f73a0b5da38010147c316",
|
||||
"blk.5.attn_norm.weight": "9ce01092c7f7f1c3ef72d6b794da12d77aa1f6a24fb96ba1b9bd5a0bcc3e2443",
|
||||
"blk.5.attn_output.weight": "0388da8064c4b6b795ce2d8079e8a36535e82b2c9cf794e38ce8ae460aae726d",
|
||||
"blk.5.attn_q.weight": "039b7ce1c909761fdf475c06cf14cabe5a90199282c89e4dcf460e95a4b6275d",
|
||||
"blk.5.attn_v.weight": "c47bfd8d2496bdb6e00e03b903e15fd0ee806a515094ec257e43cc433147ab7e",
|
||||
"blk.5.ffn_down.weight": "1d62e6708974bae318cbf00a8bf621d9ba0537e549ce4710a536520a8d14168e",
|
||||
"blk.5.ffn_gate.weight": "8b42b1b11c92db19985094cbb50434e3a7c9cfea71ee6f21ea79eae7c49284a5",
|
||||
"blk.5.ffn_norm.weight": "e0bc520f1505e687ec391d632a381d38d8ebcdec19f614a11a2000ab573e8b7b",
|
||||
"blk.5.ffn_up.weight": "8cdcd17d2ea89bb9ab902dbc6bf3f827fa4ee029c6bf19eecbdefd146d8b6f2f",
|
||||
"blk.6.attn_k.weight": "5dc6bcff89794d1756bf57ec665b58622d9352130d31082a6c66e1a079f99932",
|
||||
"blk.6.attn_norm.weight": "13b26008abe0f119b5104b9d78ebd5e797d3cdd68122b93d73a3b4831a54d085",
|
||||
"blk.6.attn_output.weight": "f5a49917ea70c3fb311ccfffbfafa63ab18416a5d55e5429b70ce8bfba57c075",
|
||||
"blk.6.attn_q.weight": "d9c2f652c87dbd09ec3822e12876648fa32e86553ac25afab723b1cd9f8cef90",
|
||||
"blk.6.attn_v.weight": "5ecc5fe67609a35151011cb526f45c56fc0a999079ae0ff37c755ca03c68c555",
|
||||
"blk.6.ffn_down.weight": "0ec125ae0ecb2d9277fdb1b04f17efee94e37d0ae37311057c212ca2db3fe6d1",
|
||||
"blk.6.ffn_gate.weight": "fa4d6d38355ee8aa3b80b476d65ae7e343c9b7770d7b097fc848ee8a6e091d1f",
|
||||
"blk.6.ffn_norm.weight": "30e8f7defc627532e1739dc76d31223d45767391a431f925b63dabe334b0f392",
|
||||
"blk.6.ffn_up.weight": "6b97cc32b290fa9087806b5d65aa6dc1760737730c8c71394cc4f30c2157f9ab",
|
||||
"blk.7.attn_k.weight": "0231cb127cb7c3714cd72b8f39343891d7715a9bab2237ade9e7bc5f4ed2e68a",
|
||||
"blk.7.attn_norm.weight": "7c3187f07eead7d219d98ab2daf87905e88d5f1ace109b6f5fa55dce3914981f",
|
||||
"blk.7.attn_output.weight": "2f30ad972c284ae7c8eb0482053433495ebe8fe9c5ee2c28b4bc4ed1f33050fe",
|
||||
"blk.7.attn_q.weight": "3a2b4b8d61cc9956d304fa9f82a9e65b4bb9fda2196670b16df7e0d8c43eff2c",
|
||||
"blk.7.attn_v.weight": "d2aab97d0dcf0f61dd2f32848f7a8a99c423a4948a660a660a03a546972b8db8",
|
||||
"blk.7.ffn_down.weight": "2270d520468c5549cd30023ff9c452a277058310104c4239a616373fc5a94387",
|
||||
"blk.7.ffn_gate.weight": "4134a3ef71b3eac8f76b6f1a2e58625b3bae48081f175994bc3ed7d8b0d4f2d0",
|
||||
"blk.7.ffn_norm.weight": "42df4abd4b8769b16f3930068f96960af1b061f1aeb7505384f272233b2badff",
|
||||
"blk.7.ffn_up.weight": "c920549054ec16ff8c73a72f5d837cf4e11885e44db57c1c1c584c18fbd7a9a5",
|
||||
"blk.8.attn_k.weight": "01c609bd3bf31ce65688f1f640ee413740e821330134d4ed1877a3065d1527d5",
|
||||
"blk.8.attn_norm.weight": "48857411f769b00290f4e4f2e593e092781fdc2503f80c1e3eeda1b85a20f74d",
|
||||
"blk.8.attn_output.weight": "90fb273f8df83744554bd59236515c16c5a5a698ca3fbedc17cc89ddcee354ff",
|
||||
"blk.8.attn_q.weight": "ade617ac4653c7f00593dbb51837a468afef20a14eaab3780fb96ac3d6714369",
|
||||
"blk.8.attn_v.weight": "c2c37496494864fee5c527d1fe1f88529d31c73f9cbd02ef9b2e9b23611ea50f",
|
||||
"blk.8.ffn_down.weight": "2da58572e9ad79087c03cbb0c23c9ef69f93ec221fd5fe4ed92fb93871d23ffa",
|
||||
"blk.8.ffn_gate.weight": "4483294e628edaa4901708e73e92c917bdd93b780fa01aa74aed57166f2bbf0a",
|
||||
"blk.8.ffn_norm.weight": "c0cbb7a4f8123b62f0c4652a687f3b394802bc32870dc446eefb709e42043a7f",
|
||||
"blk.8.ffn_up.weight": "9eaf8a2060cb9224cd585997cd671866c4051ad885c2c6d9fdc7056c2a5c0d89",
|
||||
"blk.9.attn_k.weight": "5dd36c45fbc9c50fd35c36cd75576288506971eac5c5311d4f5c16ef60099645",
|
||||
"blk.9.attn_norm.weight": "3c8ca64f2f75ed7c8fc1da010c23be787648139a96ca0ef3ad10be7b14942b8d",
|
||||
"blk.9.attn_output.weight": "6277e1f833024f53c409be919ec76d34464a78b278c8f9dbf79e777746e3b995",
|
||||
"blk.9.attn_q.weight": "87352b70d9e328c2d51d59090cf5ea5a046529864a890d0bc8986447a0a5c006",
|
||||
"blk.9.attn_v.weight": "2efdf01161d7a82a9117cc2d87d37dba5ffefcf730781cb94fcc95130e48ff9e",
|
||||
"blk.9.ffn_down.weight": "e7658a2ca984961c7ace16acb679387bedb1fef656b5330bbbf588db19673a75",
|
||||
"blk.9.ffn_gate.weight": "773cd330d4ff5d64be8af00adf2e2722fae4e33fc26bb9d03549f6f4b3b0fe57",
|
||||
"blk.9.ffn_norm.weight": "c8b86cd5c43b332f72060b807091c33a258e5dac01358ff4733b916cd34c9c97",
|
||||
"blk.9.ffn_up.weight": "d8cc3bcff18bd46124ba2aa7caacc71220b44eeef6fccb993b4c6cb53e8f2c3a",
|
||||
"blk.10.attn_k.weight": "964bdf3b4e77b915a216f750ff7b0f2eb1dd6bfa071358aef21010b90111044d",
|
||||
"blk.10.attn_norm.weight": "59ed411d91d14775764eb514acb0895a75a10cbbfbc1c15d453bc50f8046cb7f",
|
||||
"blk.10.attn_output.weight": "4d35a2a44cfe4ac0a83fd3ab0dcf1f5a0bf54cdb3b7be9fc353ed32c8a3eb81c",
|
||||
"blk.10.attn_q.weight": "defff5339450dd881ac352f5c459293f39e07b9619ebd10ed632d79a3f310278",
|
||||
"blk.10.attn_v.weight": "b9803e8d6a54acea58f662d4c0a5c8ebdf986676de7dfe12d4b288937881ce93",
|
||||
"blk.10.ffn_down.weight": "eba856be64e4be20b92fb4639a783454dd92427250759df92a337e39f1971c08",
|
||||
"blk.10.ffn_gate.weight": "2d5c509b066584db4de3632b01234e86edcde35409c5ebce18957dc80fe465e3",
|
||||
"blk.10.ffn_norm.weight": "ecb9a8679945ff0273856624ce435dd250ffe5a440ea0861a5c84f0e4c44d2c6",
|
||||
"blk.10.ffn_up.weight": "e76ec7e993f399af02958778c643aa78368e3067846714165eb5aba9d5f547f5",
|
||||
"blk.11.attn_k.weight": "29c6d1f34bd3ba2f0904e57b32a5bf8dcb2834d439159a33edf234ce0b775677",
|
||||
"blk.11.attn_norm.weight": "b5817b275149cd2abe18a6a10e19854605fc58fd364666744362ceee8cfe49f4",
|
||||
"blk.11.attn_output.weight": "1e05653220e237cbe0cc770033e183c9a0eed5680510997409b16186c6691950",
|
||||
"blk.11.attn_q.weight": "03db725ae669151e4d536e50285b3b047ad097f52475df208ed3e790e31a44be",
|
||||
"blk.11.attn_v.weight": "27cdf1d4e971326c451a4615a0b79a8c7fe9508f9b76c0d52fa01971fc7eb403",
|
||||
"blk.11.ffn_down.weight": "176938cd7c2966094f614cace8ba568b10532e45a0d438f80eccd19b6c2a7f87",
|
||||
"blk.11.ffn_gate.weight": "9782339915dd6fa70013628a01524ee1d01ad8beab04068da7ac6a5ee7603a60",
|
||||
"blk.11.ffn_norm.weight": "8245f6391e3be97811c0ff27f0d8f484ecc82a468a837c893f059745bfcd95eb",
|
||||
"blk.11.ffn_up.weight": "15616ddde096d0d25e906375c548b6de4bd5576d1f6b68eefdc29f14e183af42",
|
||||
"blk.12.attn_k.weight": "66dd21604993edd1b1fe547bcaa06f5bb7e31c9204902d147a227e4badf7feec",
|
||||
"blk.12.attn_norm.weight": "23a69f85dd8a0904b9839cc5d0afcda299b74e82ae2642106224a1c820f2b761",
|
||||
"blk.12.attn_output.weight": "4a98d132e376beb274a39d4ea9b6a1b870ad5c66625439d7ff6f45c229c3ca04",
|
||||
"blk.12.attn_q.weight": "1c6c309d63afcfde32fe37257e300a78e25d01117e33490801107c0e75d1ea66",
|
||||
"blk.12.attn_v.weight": "723d9e4ebe4e2b1974afa01d8f512b52933698fa36717dd47b37b07760c50a10",
|
||||
"blk.12.ffn_down.weight": "00e0fb09e1f1fbbf3803f1dee373eaae7a93756b6e13063ab77f9927bc6f996a",
|
||||
"blk.12.ffn_gate.weight": "89159f7f97aefb1e100107e3ac2d694e1008ad873f79bb953d60c2c1bb22724d",
|
||||
"blk.12.ffn_norm.weight": "5f70aebd0e43a39d6373d8658cc670c13aadd7818831d3d84f761d5f688442f0",
|
||||
"blk.12.ffn_up.weight": "faec21b446f061eb4dca561a3180712724347b77a71eb312e7afe9be9e89fa04",
|
||||
"blk.13.attn_k.weight": "3d440825d19eac3b1753b34d94fee2b3a3cb6636c10b2703ffcf688d3c1eded3",
|
||||
"blk.13.attn_norm.weight": "47b575e57e410738ad13fd3c74bb49c06b3d31030910834ece509cd1a5c6d9be",
|
||||
"blk.13.attn_output.weight": "05436d8e613f4475741c1798a7c371b53d61b229507fa04fe23c504ba1f0e12a",
|
||||
"blk.13.attn_q.weight": "002b5024ce520da41256e3ded5cdc60e5ae07ad9b202cb19d76ab511efd02b1b",
|
||||
"blk.13.attn_v.weight": "c1f2d6763587c50312cee0d7140fa2c7ee326f5b172bc99b2d8946e08329cabd",
|
||||
"blk.13.ffn_down.weight": "b5c4e0d8a3ff96cd76a135e415b89f02d28c28f7f3c16a36af31ef0ab8773da5",
|
||||
"blk.13.ffn_gate.weight": "ae06e9e3d2e1f64c7ad23a4009dc904c2eccd7241f9f91c4974ab2504f116be0",
|
||||
"blk.13.ffn_norm.weight": "e44a22321bcbcb4a3c345b504e939e8071370f54a8cd702fabdb40b97e0d7683",
|
||||
"blk.13.ffn_up.weight": "7e6f366d538e21ad431264b12c011892d0be9dfe4c4da9f730af677f920641ba",
|
||||
"blk.14.attn_k.weight": "95492d6417952ec24b2cab87bceb750fc7e95ac6b1944fc328a3852d980164be",
|
||||
"blk.14.attn_norm.weight": "6b7b09e1c51addcdbb160ea59edf032531421c520ec5645fe1ff9ca4180cef54",
|
||||
"blk.14.attn_output.weight": "75887474e4d72c218e6ab0f69f1bf3ec3dc414d51b36fc59df00cdb23421bb6a",
|
||||
"blk.14.attn_q.weight": "940e33f76e48c21215d19e8a21234c8246d4d084381a7d9806aecb24b071d5bd",
|
||||
"blk.14.attn_v.weight": "c58601cf5a9833f80f7f9a5b2656e8eab5eb133211446ebd48f8be15fed4ebb9",
|
||||
"blk.14.ffn_down.weight": "f9f886e7f9b2a54d717b08947a25a0a93e8c2a5b8bcd5a907c06817c8ee3ac11",
|
||||
"blk.14.ffn_gate.weight": "727ed0ee68594a3f59d704ed3240b6929f083b9c36650fb848d182315737245c",
|
||||
"blk.14.ffn_norm.weight": "bd2471008ff1b2bae9aa26bea019393fb2bbc5b9493b8cec3ebd2c280fca24ca",
|
||||
"blk.14.ffn_up.weight": "b006446769f51e4f93b503c4727deae897bc1fc7f4fad49f85024b63c4548d38",
|
||||
"blk.15.attn_k.weight": "23bb70f9035356624039547a603e46be7d1e4403616eafc2451cc09c5373d522",
|
||||
"blk.15.attn_norm.weight": "718cb371ca052eeb3bfac6ac506abb887df125271821fd171797a7f2d8dd6313",
|
||||
"blk.15.attn_output.weight": "c76a2695a204b43a8e5acfa5720590b5d449a9ad9e082cbe3e80fab5903ea16a",
|
||||
"blk.15.attn_q.weight": "2b3e4037b9e91bdd26d6e8d904cf39f948192dcf09bb6445cb55ca058d4f4626",
|
||||
"blk.15.attn_v.weight": "7c15e89b6acafc8619e86aa9d412f5893ab17843ff2cfaf40eea9637b24910c6",
|
||||
"blk.15.ffn_down.weight": "e16fd4bdc6d1c1209c6b633454df4992870c8cefb2cb0e8c92a7e489e9fb5d19",
|
||||
"blk.15.ffn_gate.weight": "95a46bea366c260337c537fde06b4cbeaeec52484a69c3390bb1d178eb0525c9",
|
||||
"blk.15.ffn_norm.weight": "37730293f704da265dc6d1896b3be00c39c0a41dab07f573af39dc30a481d623",
|
||||
"blk.15.ffn_up.weight": "ba74a199da2d0875d7410824238c4ffafbda3993568812284a72b8800df91f15",
|
||||
"blk.16.attn_k.weight": "f58f79a2a91c9a763adefce0c53a71eb5ce6bd8442f4af554b04b58083bff27e",
|
||||
"blk.16.attn_norm.weight": "0c16e41b95e81978e0e0e3b338e2afe2d297426578cacee94de15df74e94eaad",
|
||||
"blk.16.attn_output.weight": "ead22fc337514e4add49aee19720008558e52090466866e849671953a1fccba4",
|
||||
"blk.16.attn_q.weight": "ef59c4e8fe8918c1add43d7e9c6fb3ef799dd3e1bdd731ec7b6a4a6f97c86048",
|
||||
"blk.16.attn_v.weight": "902e6b84c2b64241470b13e6f412f859f66b4b223bcfb9c15d5cb1106b07ef3b",
|
||||
"blk.16.ffn_down.weight": "2ad6e9eb4d8372c32a554395d460d17cfb02d6dbcb757cc962b6bfa36db4f5ee",
|
||||
"blk.16.ffn_gate.weight": "825b2d50fcce3dbe6a5d8d8a50a95466f83ca4a10343efe67894c20b4628fb15",
|
||||
"blk.16.ffn_norm.weight": "3bf6ac90befb0e17e077c8ea9454a8485a30f89f2d761ec7751b60c90aed1af9",
|
||||
"blk.16.ffn_up.weight": "9fbdd08739b32411f5ab0252174d386bab19eb0b17884862f760429b7d41d78c",
|
||||
"blk.17.attn_k.weight": "4033398718bf3674830ed1b73071ed8482b6dd4ef27f31a6c5fbb998321b6c07",
|
||||
"blk.17.attn_norm.weight": "714f2e8ac9592966a0f1c02ee979eee8f84586405b992e8ee9543e840199ffa1",
|
||||
"blk.17.attn_output.weight": "b6bbb618597d767b8f535117be68f92911e4a71d4eb4d8b5d943444151445ece",
|
||||
"blk.17.attn_q.weight": "b84a0dc00ceb515faa2628125dcec502eed923077b21cfe900a4ff16c2e5f9ed",
|
||||
"blk.17.attn_v.weight": "4387c7d6a17da9cc7a6bca8f4a75618b20407d570792056283a8e93b6ec65f18",
|
||||
"blk.17.ffn_down.weight": "47db95c6f1e12b399c3eaf9ddba261782dd71173dd163b52af96541cf87b5196",
|
||||
"blk.17.ffn_gate.weight": "59abaded0aedfd12f01df81f7a811e84db6a227f51b60abe9a247ca726e87392",
|
||||
"blk.17.ffn_norm.weight": "b7e86445be5c7b722e01ddb98d5c7527ca86cb827ce0354f2c269e0f2558751e",
|
||||
"blk.17.ffn_up.weight": "8e31c293bac649d2f60da4b3fc4a3acdce1111ec6058d8805eeeb242443011de",
|
||||
"blk.18.attn_k.weight": "5ce762ab7b032511c131df81093b587871718c7097f79d8e07d707571f18a47b",
|
||||
"blk.18.attn_norm.weight": "1f52cdc7af1f4dc1f0ef6ad1ad02e18cda32133654e57cfa9c72ada9c0b1d995",
|
||||
"blk.18.attn_output.weight": "6486957f30bf8a88516e25772c6650f98b13923f490a2865a8752e36439d1cfa",
|
||||
"blk.18.attn_q.weight": "93621c8abf69d2ca29c5207180eb628fb2b544d89de6c4a7fb0699be95534899",
|
||||
"blk.18.attn_v.weight": "11604083b5a74828ac1d226af015ad5dc0215a1fdca44fa7131c2163c02d8156",
|
||||
"blk.18.ffn_down.weight": "8f9997feb94385f106915df810239c9753b31efda2bf14bdf18a9fbbeec8233d",
|
||||
"blk.18.ffn_gate.weight": "427c213b3a4e94af703429daf2f65766f70424d8230c123e7e712a18bceb5ecb",
|
||||
"blk.18.ffn_norm.weight": "c45d305c4ea6a54013ba112f12dafaade064a32cf01317373464a3618d8ba44a",
|
||||
"blk.18.ffn_up.weight": "a2811f2e73ac9eb9cce91a21a454e84e230a155244e2cd73f2c12aad3c9b8cfd",
|
||||
"blk.19.attn_k.weight": "b2daed159925eac58c291e2f1e2000beed21002b03c9e1bc7e7a52e22240666c",
|
||||
"blk.19.attn_norm.weight": "6307306ede2ab5bffa1bcac3f8b139354678c0376b1d9f5530c1fcb4268cfeb4",
|
||||
"blk.19.attn_output.weight": "ebb98218b2a9c84d3fb6baeb02c5df264b7ab80d994d1098ba1cd47aa398effe",
|
||||
"blk.19.attn_q.weight": "4f10df2ad09177e7528e9456039b670d07db22940a49417101b725d239c16724",
|
||||
"blk.19.attn_v.weight": "30f1efc5114badaeaafa91fa466dc7fa14b1616db433c6f563ab851f7333a5dd",
|
||||
"blk.19.ffn_down.weight": "be5ec7fe6b48855cd0015b0e430d1b70c620de87a7ff188c7c1afef546d7b6bd",
|
||||
"blk.19.ffn_gate.weight": "10dffea4213881f8a9b583ee0fd370e033756d32255ed15053f794375b9400e9",
|
||||
"blk.19.ffn_norm.weight": "e75cd24ade45dca78fdb0cbcaaa2d4a17d83a5a73dcc94ce0ec2d68fbdb2a881",
|
||||
"blk.19.ffn_up.weight": "63e81bdb951410ffa81bcfba1b94a679ec9ebae59cd1623ce2651ed5d4c78bfd",
|
||||
"blk.20.attn_k.weight": "c2fc5ad39e9bdd45e73c6e54aecc474388d944c4be1ee1921b7fcd035bad02e0",
|
||||
"blk.20.attn_norm.weight": "aaa9169171937bdce20c1f057e94e9252f221cabacf1ced12e11b9586f23d308",
|
||||
"blk.20.attn_output.weight": "a9f4fb496e4bc053e3f6cf2e72e22d4cd2b545ef6c32f7e782c2ef6ebcc21d4b",
|
||||
"blk.20.attn_q.weight": "5a07ac619ed251494170b213921ef3fcc4c2712839da262516d9d5b8ea1ff185",
|
||||
"blk.20.attn_v.weight": "d6689473105d241eacb17f09f06000ee237336916cf5ec4f48271c5b41bcb8e7",
|
||||
"blk.20.ffn_down.weight": "74be38db51df736f26ede7c6b52ea787e385f181cb66231e2cced4556a25c9b8",
|
||||
"blk.20.ffn_gate.weight": "ea91e06dc3d051c0ba0243b5a8bb40edbf254eadfb54fda7247e05cfdd88cbe2",
|
||||
"blk.20.ffn_norm.weight": "5fbd357b3d6f44a7a91e8a4fc246b24303891b7957e0f3c32818ae5dc16ddd8d",
|
||||
"blk.20.ffn_up.weight": "fe3290333e056af4ed12942ac72aeba97a6b562e2db05e79cd35dd07eab5b101",
|
||||
"blk.21.attn_k.weight": "201ec6ee95f06ea5eb80fe86fd07bd016d3ae9ab6abd25d631834414e14a010e",
|
||||
"blk.21.attn_norm.weight": "ea8154f93e06485828475a00b98cc397ac84768dd70e06ecc0c075b5712d7276",
|
||||
"blk.21.attn_output.weight": "9f8af74d531478fd304723fd8e4e01578db598441b80dc7c960cb801dbbc501e",
|
||||
"blk.21.attn_q.weight": "277de9953a8d3cff894ffd06c15ad0ee1407e319df0c1a693d4f45fa9c74ac7f",
|
||||
"blk.21.attn_v.weight": "6bfdc16cfb898909b7788ddd39dd04b928f31d6732772195d53c558004638dca",
|
||||
"blk.21.ffn_down.weight": "173877146cb94801157796ee9e5eecf3f46acb3b5e797f90b83a3fc22395eb30",
|
||||
"blk.21.ffn_gate.weight": "53146713e2ca1be80496024077a028f6b6d749b02e71003c349e113b436f48f4",
|
||||
"blk.21.ffn_norm.weight": "b28b97e18ab20a5c553ba422f7d7f6014f5902f1d62a69abd20d9fe19a5f9462",
|
||||
"blk.21.ffn_up.weight": "5c39d0ac4d602b8ec8909dade93b2efcd6b6d9d84a19b252d76bb66dcfaab87c",
|
||||
"blk.22.attn_k.weight": "01f26272c82917a87a3ccf922fa1d521a952b05de878241b7efe3525b617ac87",
|
||||
"blk.22.attn_norm.weight": "5ffc96249d8873b506e9eb7158bdfd07fa1429e53c1951430ca7505d25f11c76",
|
||||
"blk.22.attn_output.weight": "9c2201569358f720244b9c9497e4da02585a167b1414c8a506b85ad75ba990d0",
|
||||
"blk.22.attn_q.weight": "906036eb4ddf027f6d920f9356a6a2a5e529b96f4e1231a0496d46b4434a5842",
|
||||
"blk.22.attn_v.weight": "30ede8b0d166003a4b8a81fc99437f557719fc36e5c4dd510c9f161f36a47e73",
|
||||
"blk.22.ffn_down.weight": "d04c164beabab30e1837b843e18852260efccfbb9d96a34ddd816e6fb3ba23c5",
|
||||
"blk.22.ffn_gate.weight": "19c889db6b19179f0a62d5981a1506592c65de83760d67afbe00d202202750a8",
|
||||
"blk.22.ffn_norm.weight": "4885eff2d851b32dbd306bd632c725857e6d164f0fa8b3d5857e572e6ef98ee9",
|
||||
"blk.22.ffn_up.weight": "365594d8db8e95cf87cc33ac23947942dc326110175cc8ec5a07b5c7059089a7",
|
||||
"blk.23.attn_k.weight": "badfea1569da0fc6ab817c5727ca3a69b07d9cfd622fb8be5e66678d5b3f7ae2",
|
||||
"blk.23.attn_norm.weight": "8968f78a379ac3ca5458b4ed4251e8d9112aca6d6dd1ef6440b4bb0b380375a4",
|
||||
"blk.23.attn_output.weight": "93e43393c03956287b1fe31e9735ff1cfe84f4ae56b83dbaebe96275e4e11831",
|
||||
"blk.23.attn_q.weight": "aaff73c725a8700ae66bf26ac8869dfe96738eff23a8ff340de2ab53400a5795",
|
||||
"blk.23.attn_v.weight": "3a86a8dcf14a746ed1411f5a7e634064bc4dfd6511c24cfeccfb2c9ebb6b4101",
|
||||
"blk.23.ffn_down.weight": "d4da6f37bd7ef69bb203f7b0dd59f50bce37432c70627e6cf274ab81548af5cf",
|
||||
"blk.23.ffn_gate.weight": "5b6072936c4a693923bb4e3d1473fd45545cb02fc07799aca458ef0449a04061",
|
||||
"blk.23.ffn_norm.weight": "cd76e37025f84773180298ddb15e0d4ba9cfc7d832e19c791049daa47c6d9c10",
|
||||
"blk.23.ffn_up.weight": "cde43b99b83124a13b2e4753d12674b3a61dfb34c04703007ced3e8e2aee1801",
|
||||
"blk.24.attn_k.weight": "457379edc4cce4cbbe107385079019bc922264fdfc7bd1d1ae84343a81460c66",
|
||||
"blk.24.attn_norm.weight": "0ce0dfab2edeede5da419fa7833db78e36222cf25c358d08f3ec664310f031fb",
|
||||
"blk.24.attn_output.weight": "0cf91c2fd40c204d2fd4b9c85b69281e5ad4ea8442972fcd44b5fc8e835ffdf8",
|
||||
"blk.24.attn_q.weight": "87ede30c09eafec6a4e6285674c1bc4637140b168b2da4ed34f36fdb6e176cc9",
|
||||
"blk.24.attn_v.weight": "4c0b078b2798ca35d6d2c2258fe499820d2bc88700654ba4016e4b028f563590",
|
||||
"blk.24.ffn_down.weight": "cdb8540c32b1ab988f984484928d39f6841f2131c1cebe90ad9456737fccbcaf",
|
||||
"blk.24.ffn_gate.weight": "da2e0e913648b5526bd2bbb344038dd067639343aed3b413662b064b0db7556e",
|
||||
"blk.24.ffn_norm.weight": "8940bd781c610d75eb2be63cfc8d869a3af05e53c963dc7fd4c6f653df5a80ab",
|
||||
"blk.24.ffn_up.weight": "90cbac2a58801abe11ed6c24560aa4acb949f79429f2aa8ff129ac05868bb87d",
|
||||
"blk.25.attn_k.weight": "90607131e36998e990ce718ad05cbecd1bcaed010931401ce6baa3b0d93ebce6",
|
||||
"blk.25.attn_norm.weight": "fbf679c85656c04a6cf8fedd5412c1ace22960e6c2d47f2d43997827811fbb97",
|
||||
"blk.25.attn_output.weight": "08412724ee7a2086514406e6f68fb9f622e10bac25b0c373b294709f4b09bd2b",
|
||||
"blk.25.attn_q.weight": "9c1238e98a2747654a0d4371d3e7ea8b979867f609dc42482544f25591e85c7f",
|
||||
"blk.25.attn_v.weight": "a57796a535c6cb09581cbafd6a91dc14adc8cca2a2465a7ffd0aec546cd84074",
|
||||
"blk.25.ffn_down.weight": "f7e34e8a6391b480da08b52640613ccadce268373934b409759743a1735b74d6",
|
||||
"blk.25.ffn_gate.weight": "b8d0b2f4612678b5ce42bd4a683f8024514b75fb5ebf6b22c600811e95582ee4",
|
||||
"blk.25.ffn_norm.weight": "cde1fdba2369d315f3c6940a997c471ec891924e642505db580d732763bd7b75",
|
||||
"blk.25.ffn_up.weight": "72e700c32ac8b9c47559c2222e45888a480b527ea512075423c5dc01678e2bb3",
|
||||
"blk.26.attn_k.weight": "6ac83b3414ae75bf3a9055c32e49d2c40fe611ab21f8444f03d2f465d18122c9",
|
||||
"blk.26.attn_norm.weight": "55f9d6dc9d75973dc75136ecb9d991b4398097ac133070873fb96ec76a6f60bc",
|
||||
"blk.26.attn_output.weight": "ebc4fcbd15b33263e50ed2ad45740867cce15bc90e1216623babcb1820734509",
|
||||
"blk.26.attn_q.weight": "080f057521073e412936fe3fee64fd574c8128fa4a148b879d3e598fe4954581",
|
||||
"blk.26.attn_v.weight": "0fa2830d6746487ac91b243716e4302361f891e4e008eddd14abec47c7809d5e",
|
||||
"blk.26.ffn_down.weight": "cb2ab8af1653adc57111ada49d2825c6995e338c8208455b92de10e580f60f31",
|
||||
"blk.26.ffn_gate.weight": "231ce30966086bce2dc0e0afd34a22a1958cfda7a57c41b3b8e9444c5dfde8a6",
|
||||
"blk.26.ffn_norm.weight": "35d959d25d17b00617590f5d5831bf705c385c51e46297a14375a700effca6af",
|
||||
"blk.26.ffn_up.weight": "367680c8d332538b467d1ef87cfeb36cc5c6af564c5023c5fb50e728e3438287",
|
||||
"blk.27.attn_k.weight": "0bfcb351c6d17aeac5b55a915074fbdf00f11c4bda98babb196ac8804805746b",
|
||||
"blk.27.attn_norm.weight": "5d598a88c2e75ba59dd7ba4fee940bdec92d72038f1286536d2dfb71d008a09c",
|
||||
"blk.27.attn_output.weight": "23a9da7347336479f6a10ded14cb3f46e06b5bd56dc4b0fbc526c688552ec840",
|
||||
"blk.27.attn_q.weight": "b83319dba9055f069208e9c9d66da08bc6874f23e575288fcd81697d1777aa54",
|
||||
"blk.27.attn_v.weight": "36ed34ccb2f36fdf16b2c2dd225a98ea6b7b0e376e7791191136ccd7bd7a4add",
|
||||
"blk.27.ffn_down.weight": "5488e1d3a58c71b5e9ddda430540b4776b268cfe1457cbc1c2622dedd9e4526e",
|
||||
"blk.27.ffn_gate.weight": "4ff48011ee0bac39af704849d9132a2410392c87a509c684f2062f6b76b498fb",
|
||||
"blk.27.ffn_norm.weight": "32afe99675983da3de2961d1b5ca41c98970a356823597fe29e91f6e86abf0e8",
|
||||
"blk.27.ffn_up.weight": "1eae3088a75629571fdbf6a20f141bc2bb2ed3f5ba2b9fd1d949f80695e442a1",
|
||||
"blk.28.attn_k.weight": "c4e80af714962d6f9040d2c09f316f4a1cbc3a2e994e19902d7c653cf3c73dba",
|
||||
"blk.28.attn_norm.weight": "c1ecf85dedc1c83d5d402bb7c94fb8b9c11f1a3e5f64e7680f80912d4a560794",
|
||||
"blk.28.attn_output.weight": "72ba47c061b21f5ebc5213a455eaf6fc49c8f8e04ff9ce37e6ed4921b629161d",
|
||||
"blk.28.attn_q.weight": "c4abc47234307f44b8ca789aa6668e298158fa4b459b2c1e84bd581806591cc1",
|
||||
"blk.28.attn_v.weight": "aeba950799d4950e491ad0fcbe30334e39b8975177990a2cb339031c45ac153c",
|
||||
"blk.28.ffn_down.weight": "4e84ce382a37b994fb8608df451a60040559e3f4f3241c3b3cb8989a3ed50d83",
|
||||
"blk.28.ffn_gate.weight": "04df157acdc8e8534ad60acc2d2a4dd3a7a6610f6382535ec728994fa6f83f83",
|
||||
"blk.28.ffn_norm.weight": "4d0386dae2bd1c1a9d0f9730718333e3a486c3bc6a5c5d482193c75d39832c80",
|
||||
"blk.28.ffn_up.weight": "fec60bb0a3daf182a14bd8311fe6dd1e3fd020c5fc273e2549cdb1a2d6b79b05",
|
||||
"blk.29.attn_k.weight": "b0532a263aa5a4e2a7a80adc83fc5dec974493bd18da7f953e7ebfc3f3a19aae",
|
||||
"blk.29.attn_norm.weight": "593fc3b4000c35b7a59dace09ca1756c08be0105b2edd354a0e1c16c82898859",
|
||||
"blk.29.attn_output.weight": "315b896f9f0cbacd0ca8937384c3a3a227efa908cb8c3a9125ec00c480e32b9b",
|
||||
"blk.29.attn_q.weight": "d482d45386d4ad3394f08e9dff233ee3a70d0427d65c0b8fa05905da7e25ca53",
|
||||
"blk.29.attn_v.weight": "cd3b5a6e2852da796902930a6a84bc87fc6a7c7bf51f8fc23758d12a39013b36",
|
||||
"blk.29.ffn_down.weight": "5b3dba6f9753bd1b1ebcba65ef5373dd62c38e755c44b7231b95d93d45761f89",
|
||||
"blk.29.ffn_gate.weight": "8610d9d2db15c256243ffcca3ffd31786d0ada0af0e7c7aa3fd20524370ab036",
|
||||
"blk.29.ffn_norm.weight": "1a2ef2d38b7ac3e51190b9ccb8b6552ba83ab290e523356a7f851ddb35dedca2",
|
||||
"blk.29.ffn_up.weight": "a5fdd15811bde16dc27677cf1a4c97daab4c28cb12a9530f1a0e573134fdb69c",
|
||||
"blk.30.attn_k.weight": "1efeb0b5f4b45a85cdf47300f892ac77ac1f38000ec3653565d1303d1fb8c743",
|
||||
"blk.30.attn_norm.weight": "c73934c182c7fe80838ec1d0b92f50a583f75f7a3d78d822f009b58ad2c80e65",
|
||||
"blk.30.attn_output.weight": "3a0fd89de2d274614750345d827a9c886a4f97b343a13cdf680390505df596a3",
|
||||
"blk.30.attn_q.weight": "711e113362bdb067db843c66236704eb1cd3fc5f40e3767143e96d510686ef4e",
|
||||
"blk.30.attn_v.weight": "82b12a9a74fd3d91b73cc2e841e2b3f0a5197ccd2998afa17020995f880d2267",
|
||||
"blk.30.ffn_down.weight": "af9f4b1287c0d824ae22d6e335d19e04a70135b835be7caa2435f1d85e931993",
|
||||
"blk.30.ffn_gate.weight": "e2ab3e6f15f5c50fca66c084cb6a57a2b6b82406d65150e82ea0437b93dd9a46",
|
||||
"blk.30.ffn_norm.weight": "c1b9c325c83f00e177386a4d7e769945f2995e60950c4a576c0a2c4ab9703d04",
|
||||
"blk.30.ffn_up.weight": "9b94a21efd419715d82071b490d3b635cf1e8da080620dcc39e5bde976d7e9a6",
|
||||
"blk.31.attn_k.weight": "0db0d82e3ddcc2c06209f5f013e1d72a84a996c40bf00186be485b909cc268e8",
|
||||
"blk.31.attn_norm.weight": "2b8b7239471f57140c5cdfe06bd224a4f6326282f99736e44fba4c7b120ac101",
|
||||
"blk.31.attn_output.weight": "a310b048840cc3ff2be4b84796340e8e2cdf05ec89d14bd3655c109b2bfa9fcd",
|
||||
"blk.31.attn_q.weight": "f45e0cd95645175ea82813455356d171838539bc3f7676d877c698f2af0a0eda",
|
||||
"blk.31.attn_v.weight": "8bde008e809112aa7e7c23e9c3099087bcc557313b01306c87efa0a4a30805ba",
|
||||
"blk.31.ffn_down.weight": "8266fec7e203fbfad7033120861e44984581ff8b6851d01dfb7b81c5d8fa90ec",
|
||||
"blk.31.ffn_gate.weight": "b73bc0aa5baf006d9ef6403104891b8133671b0992398fe038380b67e0d7e2cf",
|
||||
"blk.31.ffn_norm.weight": "9c62cc27a7b6017c1df8ad49bff249a8245e8895c6754f402cd44623fda83268",
|
||||
"blk.31.ffn_up.weight": "5b970a4694ea3171a0167f6e1636d9f00268bc1c9640430ffc35218494884adb",
|
||||
"output.weight": "74fa0ef08c57a30e633e7117b1e9c805f833e2e5e21434bc79ddf9c92c6d7330",
|
||||
"output_norm.weight": "59b8a59fd3fbf39353506116e43e5e76edd0cbf2a2873d869da4cf27a04997c3"
|
||||
}
|
||||
348
convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
vendored
Normal file
348
convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
vendored
Normal file
@@ -0,0 +1,348 @@
|
||||
{
|
||||
"general.architecture": "llama",
|
||||
"general.file_type": "1",
|
||||
"general.quantization_version": "2",
|
||||
"llama.block_count": "32",
|
||||
"llama.context_length": "32768",
|
||||
"llama.embedding_length": "4096",
|
||||
"llama.feed_forward_length": "14336",
|
||||
"llama.rope.dimension_count": "128",
|
||||
"llama.rope.freq_base": "1e+06",
|
||||
"llama.attention.head_count": "32",
|
||||
"llama.attention.head_count_kv": "8",
|
||||
"llama.attention.layer_norm_rms_epsilon": "1e-05",
|
||||
"llama.expert_count": "8",
|
||||
"llama.expert_used_count": "2",
|
||||
"tokenizer.ggml.model": "llama",
|
||||
"tokenizer.ggml.add_bos_token": "true",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.bos_token_id": "1",
|
||||
"tokenizer.ggml.eos_token_id": "2",
|
||||
"tokenizer.ggml.unknown_token_id": "0",
|
||||
"tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
|
||||
"tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
|
||||
"tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
|
||||
"token_embd.weight": "1d1d1d39a867d5a4bfb32792a47247d2638c10c95a6259391d02843583505cc4",
|
||||
"blk.0.ffn_gate_exps.weight": "2e5cd43ac3f26c44f071926ff6c3f239ecc52a34bc9a5b5906d3d4c1bf2fbbfa",
|
||||
"blk.0.ffn_down_exps.weight": "a4dfc7e7c96e7402eb70279601675b956bb7331da8101e63fe5c0a611b6972e5",
|
||||
"blk.0.ffn_up_exps.weight": "2d5d87b378b2319c344ed2c642598b6f7cb6beeb582a8ea51abc9ae690d473c3",
|
||||
"blk.0.ffn_gate_inp.weight": "a46aaf5aba7401ce6e41f158242b4879d34901661f3ede85496cbd0ce79d6314",
|
||||
"blk.0.attn_norm.weight": "3fe37d913bdd2b65076bcdd6efe64a37b0b03cacbb1b80b9f7089068aa35f38c",
|
||||
"blk.0.ffn_norm.weight": "5e14308a3c894734eb204c8f558bdc817e94bbd5b4e9cb4094e91ba388c8f7f2",
|
||||
"blk.0.attn_k.weight": "73d943dcac0911e87bd771f4aa1c901e1bfe1aed293af06e1a67812159859f67",
|
||||
"blk.0.attn_output.weight": "4c5f754c855e262e8d4c94c6fbbb57af06399dc0e170d7d99a1a17fc9aab9227",
|
||||
"blk.0.attn_q.weight": "d6fd7403c873d49c05f6f03208f30d99ad34cb3b71c9990c47334d502a8e4c7b",
|
||||
"blk.0.attn_v.weight": "cf17cf64b2d683bd9de6cebaf60e5c264df6fdc38fe719dde9d54c80334f6366",
|
||||
"blk.1.ffn_gate_inp.weight": "0d524de81cd915816b4e714bf595ad6946a9130b3de731cd89428b2781230809",
|
||||
"blk.1.attn_k.weight": "2ea47f412992b374c70674730fe84700e0c8cce177086ce9b6635e42408964bd",
|
||||
"blk.1.attn_output.weight": "b4b2520794d54113e86c8ff678eacfc62e35be4395a594a6c8c22b4383ebcc0c",
|
||||
"blk.1.attn_q.weight": "5db930c98c4f91f6eab57eb974c72210b158e366d23d6d2890b2759c053bee33",
|
||||
"blk.1.attn_v.weight": "079bdde09668394bf7af9f8bc175017b4f48f0ab64e6dd855a4d7561d1693c0f",
|
||||
"blk.1.ffn_gate_exps.weight": "146a62de19f9ab093deb101f9640534ffc3dc40d69f508be12fc0475d01b0c7a",
|
||||
"blk.1.ffn_down_exps.weight": "949da94a3c0f375160672a979e85f7def284264b10d48d038238aad5f5ece793",
|
||||
"blk.1.ffn_up_exps.weight": "7016a3f467d9e3f2f4b4019579ed86b757469cd367f2b225483305376b4bb3c1",
|
||||
"blk.1.attn_norm.weight": "1614d1e6ed537737275eb888666c7bac533f4eefbe73dec92b591045ca9e1afd",
|
||||
"blk.1.ffn_norm.weight": "405a455fa7d1ec36894652ceb554bbcb09a07fd6405f42741e66dc4a4665c19c",
|
||||
"blk.2.ffn_gate_exps.weight": "90d5003fc7421f44220c0842d43128955e91488f6f785fe570b62d81b719e964",
|
||||
"blk.2.ffn_down_exps.weight": "ecdc2b5a8b504ef0a7833acff47d69b0c1fa9c22126de1bb120ff5e48c3d6e2c",
|
||||
"blk.2.ffn_up_exps.weight": "2cbd9485a32460d315eb50a2f3b00863fd77245bfe885b7565efac1cdb1f191e",
|
||||
"blk.2.ffn_gate_inp.weight": "0d0a17a1a2c7a61f2cca49ecbb479154dc93a870873257bc4f225e7607f2e2c2",
|
||||
"blk.2.attn_norm.weight": "b2e4c5a977f87a6f880896bd73596234c9b83622fa0d7add5892501e3155913c",
|
||||
"blk.2.ffn_norm.weight": "0ab875b4280afa922376cfc7b9aa3f7071c9432ea1254091ce7de3749df0e8e6",
|
||||
"blk.2.attn_k.weight": "bb884af51fb51550acfef54ccf1b58ce8284e587806e6a2f88c8265e1ad05a5e",
|
||||
"blk.2.attn_output.weight": "0f03099ba1ef342ea61af9cd71d028123bbd8b1dd7d7fd9b509aef77815427d9",
|
||||
"blk.2.attn_q.weight": "8fad0d29eb4c9d24e564774ee3316b9eb7a4c4985e4567111d2c836c830f6cf3",
|
||||
"blk.2.attn_v.weight": "fe04c847ff677632401a94e7b6b6fdca60391ab21cb23bd791533115de6303a1",
|
||||
"blk.3.ffn_gate_inp.weight": "29e3aaa724590c070e614af8288939603d2641b0ef11e8c0f476bebb2776673c",
|
||||
"blk.3.attn_k.weight": "231cc5631def10f7f292d8862d6125ff555164cd70480ac76362149fad204497",
|
||||
"blk.3.attn_output.weight": "86467a605c62852e05fda1a7ef43150df2cf715fe59785dbcba09f1c27cfa086",
|
||||
"blk.3.attn_q.weight": "901822402453922225c2d6ac79616691d48217635d5ff7338daa971d5ddee210",
|
||||
"blk.3.attn_v.weight": "27030784f44375720df2f090933645a31a022d3fb3b14573e5ca0b78f44070c1",
|
||||
"blk.3.ffn_gate_exps.weight": "231ba59cc0b988d125d77bf627aa3f04636684870af88f081f3944b48a160d86",
|
||||
"blk.3.ffn_down_exps.weight": "530c3ab44ae4d66e8afa4d10c153ba5dfcdfb7321989a988e62e9d12e7234625",
|
||||
"blk.3.ffn_up_exps.weight": "b85c2d4d9d11332e702b3c0a6610d4f525f9a93e5d12f5c7c55c592c40755e75",
|
||||
"blk.3.attn_norm.weight": "05dbb6d88cfa6b199f9d705ccbda97c0ef13f9ec875c595398a1a42d009a4555",
|
||||
"blk.3.ffn_norm.weight": "6880b1c27d46969ce36fac049c05dc8b89e4bb47dc89df357e32df7e18fc512e",
|
||||
"blk.4.ffn_gate_exps.weight": "a883b4f225b760c5a2f6605dc5e2167ab85bb398c70bf64ceb539fcbd6128dcd",
|
||||
"blk.4.ffn_down_exps.weight": "d291bb656aae77947d4b525e2819bf4112afece53ff31de9dab999af1f65f9c4",
|
||||
"blk.4.ffn_up_exps.weight": "38592afb8ba3dcfb26970f906174f7d3fa62da44fa4be4fc6912a19030ea9164",
|
||||
"blk.4.ffn_gate_inp.weight": "1596cb74e8fd6c3080b937b06468bb397b0dbb661e6d180a6bcbdc43e8bfd0c6",
|
||||
"blk.4.attn_norm.weight": "f90c83c5ff4366281d283384efc941620542b9cfdea160d678dc54a75e33f758",
|
||||
"blk.4.ffn_norm.weight": "d28d8c49d1746b7cc085562d1074905fd14023844de823dc4fb22202bb280790",
|
||||
"blk.4.attn_k.weight": "792bbf412cc357140fdaba543e547a9b2f7582919e307bbd9a80c7d6d8f5f1f9",
|
||||
"blk.4.attn_output.weight": "d98e4a062d2631d9c315f1990d5f6ca9a88e7e0e46387f611ccb0353f876aa12",
|
||||
"blk.4.attn_q.weight": "1a11a55a91d9f748a72176ff6b1c174844df406e00d1b66b9aa64dc6ee4bcd1d",
|
||||
"blk.4.attn_v.weight": "04cb3c02b12a6313c7ac7044513441083d534fb4c5a3f63bbaa58f7edbd2fadb",
|
||||
"blk.5.ffn_gate_inp.weight": "cbd5cdf015d33a2da6703eb74c22fcb97581fb9175435173b6dc4f9e8364320d",
|
||||
"blk.5.attn_k.weight": "4fdf3405e4d657403f5647b51233521310ee984b4b81bbcd901cb3e6ab76b7ff",
|
||||
"blk.5.attn_output.weight": "4a25662c46979a29600ed77e1907cf81fb16ef30e724c155444e54ccb76af481",
|
||||
"blk.5.attn_q.weight": "e2acb30e30b97300039bb20ad0878f05159d5657fa811748a51d5b6fb35d631e",
|
||||
"blk.5.attn_v.weight": "306504b6a26aa123c63dbbed3f4ced0ed2ee8fb6a30bf0093539b817539f5ece",
|
||||
"blk.5.ffn_gate_exps.weight": "7e34df9b9944dbeea5e8565786d3aa6937314a4b87acd4d0874687877c5a39fd",
|
||||
"blk.5.ffn_down_exps.weight": "c4b7a57a42b5ac0a8ae27dcd5cb2646d7a7cc7123126d44a56ab128e85f60b13",
|
||||
"blk.5.ffn_up_exps.weight": "09d47593b6dd6c664a9155bff02fc2eb7ac4a70219a88162d05c802a01d3c6ba",
|
||||
"blk.5.attn_norm.weight": "58804a036d6ac4c1fe357b8b6a97a5c37cae1c2f06ee0086c041d449c1c6ef6a",
|
||||
"blk.5.ffn_norm.weight": "d872dee6789f0826211aa46ca9d0869e3e96bcace9e77d6559a7b6f3e524f3ca",
|
||||
"blk.6.ffn_gate_inp.weight": "fb1eae732e974d6c1d020a5b4ef98c5f33016f984701bcea656f999a99daad66",
|
||||
"blk.6.attn_k.weight": "55e9c59c5051ab5519b3a7962e1b5fa96a3c0251cb6200dc2f177885ad2de470",
|
||||
"blk.6.attn_output.weight": "f3c834a8d0027370350e2b6294d95434d31432e57be6313b013c15a56303d61c",
|
||||
"blk.6.attn_q.weight": "efaefe5f11c2140dc7cb532b0832c2a0b363a165cbda21f00fadae77efca377b",
|
||||
"blk.6.attn_v.weight": "900bd734d75616d846a90a121c97e081c956a3d1ab012f66dd0bc62c43e1ec3c",
|
||||
"blk.6.ffn_gate_exps.weight": "312a99661b1468fcaed2474621116f1681432755e973f3ee79d01912974fd424",
|
||||
"blk.6.ffn_down_exps.weight": "ac9cd7db67a2ef0d2b5def86873673d05e48d49d147dd944469dbb8e2d4c46f6",
|
||||
"blk.6.ffn_up_exps.weight": "57613e7e09579400a1a09fee4445acfbfe83f2f327fdf317877787d96ada6b84",
|
||||
"blk.6.attn_norm.weight": "0e8801e09885c633bc01a9a5b85d4e878d30158a4eb41a937dc5b760ebd044cb",
|
||||
"blk.6.ffn_norm.weight": "b8c58062ac93072f878446b0e7f958c737aa47fb769fc3a8f593133d12db2dd1",
|
||||
"blk.7.ffn_gate_exps.weight": "1ef611732ff13edfa8d30981ed9dac00c15ceba9fc012ed0b199e9280a849948",
|
||||
"blk.7.ffn_down_exps.weight": "856c6811945c7b0fa461ca17811cfa43436b4cdf5326bad23cbc30883486d7cc",
|
||||
"blk.7.ffn_up_exps.weight": "6725e3e33994302ee13fa5ec163631ce2dcaa08aadde8fc166c2265d4561c5c5",
|
||||
"blk.7.ffn_gate_inp.weight": "36b49d7f80c1003dc392b2c1b9960cd49889dd69e77b26b9e4b13d01f3d0a32a",
|
||||
"blk.7.attn_norm.weight": "7a0ec49acc5e20ee71c6f80ca02f4f1e564c485e0ae0621309e7c2eb0c616cf0",
|
||||
"blk.7.ffn_norm.weight": "eeae035c39ab6e64bc06a4baa1bf6e50d4c8b8797cb0ad8abd48be86974802c0",
|
||||
"blk.7.attn_k.weight": "e8f78c1def01a7a38d2d9bf7becb17755e28fefe4927856f7890fbee52840187",
|
||||
"blk.7.attn_output.weight": "5367f05ac3bb49ef8745ba5902e1bdd4442415a3ebff2c7e1a3918d7be6fe948",
|
||||
"blk.7.attn_q.weight": "37c95fc5acc55a4f6e5f02cab9be60e4fe54c08b65f98f4455741b4aa542ff4e",
|
||||
"blk.7.attn_v.weight": "c89f1343486ba55814233511e94090f7365662a8a4214aa4c278cdadc79196c2",
|
||||
"blk.8.ffn_gate_inp.weight": "4e239afe8c7afb8de3a005757c887cf14b1622ca2d224227591cb0e5301f4c17",
|
||||
"blk.8.attn_k.weight": "2ad0229f30fdcc1e85ce64e00d8f75902238294844a81d5af43e14ba75c02983",
|
||||
"blk.8.attn_output.weight": "2e44a4722acb3b521b81d0b910f8ca2f6c286d874a92ddd02150566454061699",
|
||||
"blk.8.attn_q.weight": "1cd2b09cb2f43e08de776b5f7eac197a5a6d4ffdfd52b21baa36319450147bd0",
|
||||
"blk.8.attn_v.weight": "5a22c57ebfd33ac500cbcfd321d5b5b1783f8728801db6f3f8bed51c7183e4db",
|
||||
"blk.8.ffn_gate_exps.weight": "91063fe56cb4f3ff3b41052bb5046fcf8ef61516a603ee90aab893a9d68c15a7",
|
||||
"blk.8.ffn_down_exps.weight": "d4c3abc8f1d1b462f67f70bd8f404b3fcf45dceeaa8527fa120527254c383c90",
|
||||
"blk.8.ffn_up_exps.weight": "76a1a1f08ec577716a2e7027b45293e9205751126424f1bebe1de89c78f087d5",
|
||||
"blk.8.attn_norm.weight": "f980d774da39eb76c52358afac3e38cb4c81cb323deaabbe5c41822e3f17a98e",
|
||||
"blk.8.ffn_norm.weight": "1c937658cf90f1a85db9a5f26e077730fdd4b694607dbeeb825c5fb2bc407e0b",
|
||||
"blk.9.ffn_gate_exps.weight": "a2532471ecb7896d5c78e5a34e10cfaf4125265e1595166c8d0d0dfbe2a3187f",
|
||||
"blk.9.ffn_down_exps.weight": "b47921a28412d48fee450b8b9d97cee42344a2e69f06d407fd9523d7adf13333",
|
||||
"blk.9.ffn_up_exps.weight": "7c461bd1b2a73b439cff6a10d94afa01e8b06f7e6f09d9a6f28e3876aef48bce",
|
||||
"blk.9.ffn_gate_inp.weight": "1648dfb08b5c06d7953a5a97ecb764995fae9487fb729a1c867023b2538149d0",
|
||||
"blk.9.attn_norm.weight": "8635db0f299882a63b7cfcd1d4259c9e53fab22c31d3d054de36b1001380b31b",
|
||||
"blk.9.ffn_norm.weight": "f9309aa323062d174c463613afef9b0a33501b510bfaa58a8e0e866d12ffef3c",
|
||||
"blk.9.attn_k.weight": "dfe62030441e947a588512d18d9c6e4ed72c2f71c227d622c095e4263b23dadf",
|
||||
"blk.9.attn_output.weight": "1977beb75c6349c50ba7dd3865d7c0a9c5c5ddc854413147b0eec98ac4fda351",
|
||||
"blk.9.attn_q.weight": "eb132596719605cd6bd1782487f121994629e115190edd69240b12af66e734f5",
|
||||
"blk.9.attn_v.weight": "9e708f15d332d7c5187b0693b1a977eb30a2fa10bf7df48ed9d7537c0aa6ed99",
|
||||
"blk.10.ffn_gate_inp.weight": "97503a5d166c1925f9b65c0eed980753d411714d66896f3d0fad5286c7aba702",
|
||||
"blk.10.attn_k.weight": "1ebdd222336bd25b48df1b138cdbe09021c4a5562ea7cb78cadd1255d2be3a39",
|
||||
"blk.10.attn_output.weight": "5e98faa38e9d514b9057e1c8342c509cbe1083defd518e506f6bad89117d1f5a",
|
||||
"blk.10.attn_q.weight": "3323a26c87d936d1dd87c577d0b763459fced726679612c874b3de5fc6d969c5",
|
||||
"blk.10.attn_v.weight": "d5fa73cb56aca388e205f44455e4b4f676fdc12ed7fac4542fbb3b41ecea59ad",
|
||||
"blk.10.ffn_gate_exps.weight": "225021b53782800906cd13b70be3a4161e8b300b97f984a959ccad6a6e8adcbd",
|
||||
"blk.10.ffn_down_exps.weight": "f08eb91526bd22f5fd0402fe925d6141cdbb308a1ced0330858d0c85c71f5ef3",
|
||||
"blk.10.ffn_up_exps.weight": "a9f688350c3b53eaada5103b5848bd9a3d7d6b327a70fa16c24bf28ece933eac",
|
||||
"blk.10.attn_norm.weight": "5ba426c9dfc79805015ccd76cd1068b0ad3bb7a8453e14bb1d35486f122d8f95",
|
||||
"blk.10.ffn_norm.weight": "98891d6acbc3986b2581b7a3af9f5946a392d9188972c6a8b15d4e745a4f2482",
|
||||
"blk.11.ffn_gate_inp.weight": "b2365a60566e7dace892e1cb0e62eb73ce387352601723e847052b34874feaa6",
|
||||
"blk.11.attn_k.weight": "0efbc1d1430505543ff71532a4fcda821aeac616ef6c1dca40e00d4f2ff70bea",
|
||||
"blk.11.attn_output.weight": "3d5bd4d9a41236f30d4293edb9ae27beaa113ffb31b4fbfadff3a4c370dfd3e6",
|
||||
"blk.11.attn_q.weight": "aa11e9db14dd9c77951511443077c2a1a78070753d7bd3d9811038473f69e325",
|
||||
"blk.11.attn_v.weight": "5adc567f377aa11d1763d35f50e53fb2896a8b03b623ac36acc45efa2486d512",
|
||||
"blk.11.ffn_gate_exps.weight": "71d07d982aabfab9eed3c733d49c20f023bf475368fc71db5084d91beadc4b47",
|
||||
"blk.11.ffn_down_exps.weight": "9a06e61461e48b3925a9f7d9cca634d048c8b62163d7bc5c43e35899f959319e",
|
||||
"blk.11.ffn_up_exps.weight": "bc05494d0dcec61021b3ac0c5bc1bf502736cadf48224e213bc139d562699a89",
|
||||
"blk.11.attn_norm.weight": "a5758a10bdd0404ae1470e8e9db903985d4d07f60553c5001a5e7b660d4f7ada",
|
||||
"blk.11.ffn_norm.weight": "814ae037563aad3771787316bec4806c95bf6f5991dd6474b4b1e5cc13dc18ee",
|
||||
"blk.12.ffn_gate_exps.weight": "3a68b831ba1606fb9ef6dffed4732032447ecef23ea563ff4e79317586c7eb49",
|
||||
"blk.12.ffn_down_exps.weight": "268b25e13f4b7beab08686e83705a41b21d15251809ee4784526f78a580da829",
|
||||
"blk.12.ffn_up_exps.weight": "9105751a5b5b42ca2614d0456f24f779d2e2ac8cdff0f96842aa7ae2b70f341e",
|
||||
"blk.12.ffn_gate_inp.weight": "d0de1558cc1d458c5c504f63ddc59785c323df7330474bb0644c346104b40a3a",
|
||||
"blk.12.attn_norm.weight": "859a4c8113678e2e202d10299850e0cfb52eb11ea50bcbf4fe3ff39bdd394154",
|
||||
"blk.12.ffn_norm.weight": "7fbf4c459c1760218877e9ee3f5ad49e960956a4369bcfe96c143f04ff9ddf97",
|
||||
"blk.12.attn_k.weight": "0a7e254fdf3730a57372b6ff421a613eabaea68cdefd64800857941411318374",
|
||||
"blk.12.attn_output.weight": "ceb763fc15d88af149d8fb78e82db2b7dab3aeae584af8cf7611a12356a397e5",
|
||||
"blk.12.attn_q.weight": "a43402d23c46cb2d3cb3c2a98c81b19d10026b7e6742370fed6b2880b6e049b5",
|
||||
"blk.12.attn_v.weight": "3bc24f2c0480ce91ef72993ee8f1cf962f7359e12183424583ffa1246bf3db52",
|
||||
"blk.13.ffn_gate_inp.weight": "a6d68c82bfe66d8bab68f980f5f18268a9e2c0cd6b8832ed39010e0de198ae05",
|
||||
"blk.13.attn_k.weight": "0166c39546b37dc2e01b2b396ba43e183f797dd04eaa51a6d103d8b58ee4bace",
|
||||
"blk.13.attn_output.weight": "2ce5eb198deab9557475a58b69b11e9874b547e05c23f223c6e42fa35ddca069",
|
||||
"blk.13.attn_q.weight": "745c1bbdf434284a7fae98f45e821c076dd9c2a2467dba6a9d8cf0041e419dbc",
|
||||
"blk.13.attn_v.weight": "9ece68d5ac64d1421ea7aa32e1cff9cc1fecf5175f4c4da858dd31d8633e3337",
|
||||
"blk.13.ffn_gate_exps.weight": "ccfdcb4670b131689de12d396a010b5ea737795cf5c15a14a304d720b3c7c899",
|
||||
"blk.13.ffn_down_exps.weight": "8b8fb328664764f1aaa5cbdec336d5654e981e965a02ef622bde5f07ea1c164d",
|
||||
"blk.13.ffn_up_exps.weight": "d2ace0236c2fb3365fdc85499d676a7f65813c48e5085348b1df1799922766ec",
|
||||
"blk.13.attn_norm.weight": "1ed29d7d89ce52d7cb4d57e895ff7115430466e917136c049c385c030ed44e9c",
|
||||
"blk.13.ffn_norm.weight": "a194fc542597a4dcfdfaec5e3cba2a2b2b21b21edfc87c39c0d7f7651355bc4d",
|
||||
"blk.14.ffn_gate_exps.weight": "a625e3574e5e740e7f8e2f9c40390f2f382c720aab5b10534e298002dd8d1fb9",
|
||||
"blk.14.ffn_down_exps.weight": "bc366f015b83c865946afd74c8a884943e0ea2c671314a0b7bb72f21a44d2f78",
|
||||
"blk.14.ffn_up_exps.weight": "ee3199bf2086de77b49f57f487676be8ee70e102a2fb5a5ef8ddbbc28a9eff41",
|
||||
"blk.14.ffn_gate_inp.weight": "2b437870c850fa2e2044d032bb02908af634356e37466fdae260b933e48ee8b4",
|
||||
"blk.14.attn_norm.weight": "cd8344d193a1cbd42bd898e17f4bcb1ca0b2918420fbdafa9249a6f2b7f4ae06",
|
||||
"blk.14.ffn_norm.weight": "70eec40374e558fed5b07257283cf36342b6b0129285a00007deb59c32c9f7c8",
|
||||
"blk.14.attn_k.weight": "4053bdb507e0543d724b632570bac86b31707696d90a0db44c49b2a082e0d599",
|
||||
"blk.14.attn_output.weight": "0182632cb0e06a07241b8293d25d109fbc1862e1e337d435f908e8681e2eb1ab",
|
||||
"blk.14.attn_q.weight": "ffc7794a4c1b6f793c842dba969435330a7a80b9212e457b4b2ac33e68b41241",
|
||||
"blk.14.attn_v.weight": "6411805292d528e61bbaad8f9aab9dd073529a17946c057fb06864fad9cf3211",
|
||||
"blk.15.ffn_gate_inp.weight": "77d0744567c76e6abb67f81ba9c715b2b544841186d5b948309571eff213bafb",
|
||||
"blk.15.attn_k.weight": "1f7957954ea4c6521c257b35a360e868ffa02bdb3de91f146d5e06bb4a545c98",
|
||||
"blk.15.attn_output.weight": "d7809d36bd8d3342240c46fd87bcc7f9821a222f48d9a95e45ae50460265d3cf",
|
||||
"blk.15.attn_q.weight": "25f509313ae4d8401b871904059f472a26f5714e7c791c725de77a1a522c976e",
|
||||
"blk.15.attn_v.weight": "96fedf5a591fc0f020e6de10fd72ff12b3ef9cf70cd21dabaa0d3e7b06f54e73",
|
||||
"blk.15.ffn_gate_exps.weight": "8f950d976b2fd9a3d213b84123cf114c1377efde9352767fb2ddee89e177c8ef",
|
||||
"blk.15.ffn_down_exps.weight": "6fd09d1557bb94b06efbd4f6a1ca4be532a202ba290e9315bc8da3d12a5c4c4a",
|
||||
"blk.15.ffn_up_exps.weight": "cbeb59ae7b0266a928dc7e3a6e70a9330b92f9ee1b17ee1ed91022108204a33c",
|
||||
"blk.15.attn_norm.weight": "2005330911ac2edc7b6d27aca021c67d30d16eb632e49b1a13f30fdb2717aed0",
|
||||
"blk.15.ffn_norm.weight": "0e9198f3b548eb78acc8961f2b3350d238d26cec110933ba753a8cf0035c501c",
|
||||
"blk.16.ffn_gate_inp.weight": "a41d1f99d739c8b150c3945b6949763988d0c6a4c5a2b5855592ca1a48ed23d5",
|
||||
"blk.16.attn_k.weight": "b624e2ec88c2d3047f60530fb87e72cb4a5e655a9663f6f3e9b09e5ad32cddaa",
|
||||
"blk.16.attn_output.weight": "687759ea75e45108526ffc1573d6fdf084728079bfc2dc89b9979e76280f43c4",
|
||||
"blk.16.attn_q.weight": "beff3a45c7e9ec82ffc6d3c701126be28654d10aabd747d03441210491fd31b6",
|
||||
"blk.16.attn_v.weight": "43a349b13f0b9d040cacecd942bcb168c030fef8c75c987d59a4fce6c14e855b",
|
||||
"blk.16.ffn_gate_exps.weight": "793406d6c13d727c82bb7b692ca98d65ca975baee69fc57be5378d77c5a19b62",
|
||||
"blk.16.ffn_down_exps.weight": "9bad3dd150d0230404b7f886ac7ff8803225757e813f195cdb26bad245243b4d",
|
||||
"blk.16.ffn_up_exps.weight": "7449d663023fea3496475bf0a9c1de7272ad0ce9adcb3265e8e424badaa674dc",
|
||||
"blk.16.attn_norm.weight": "a424ce34c195a401df1ce37ac4f2794e8a6720b1ee8acb21428e2b68c65e0125",
|
||||
"blk.16.ffn_norm.weight": "405a68bb8e16e1064df2de55ca3cd9ceddda1d9fc0af007a9bd7cad4b2676248",
|
||||
"blk.17.ffn_gate_exps.weight": "97c6e5321491ca5dc039ee88da0eb0e78f347372785411809af84b3298cb19dd",
|
||||
"blk.17.ffn_down_exps.weight": "1617ac19788a1be19bac69277408761e6bdf5719d63a8c7fea14d41cc27641b5",
|
||||
"blk.17.ffn_up_exps.weight": "4ead1c365f112581c10610ea3f63d2a1474311d2503d2060fed4b458ef337f5d",
|
||||
"blk.17.ffn_gate_inp.weight": "ed4b3393f2523f2b5e0fc7680a1caa2842e605728a529b5af68a7fa8d7abf940",
|
||||
"blk.17.attn_norm.weight": "beac17ef86a7fb2b5840cc72f7a95a5e3d6bd24e7fa698e0b0ebb9bdac45c561",
|
||||
"blk.17.ffn_norm.weight": "81cb58ec6d6dc02a0b4ede10adc336dc865fa76f982d4eab0e4a37b40f5b0fac",
|
||||
"blk.17.attn_k.weight": "eab569e5ea8c8b05e5a6a209fba031129453c2e28181eee3e736b3b04b36bbec",
|
||||
"blk.17.attn_output.weight": "f85b70f01438ce8fe5d10599b113f30bf18dee2bbae0657d3eba295870001db3",
|
||||
"blk.17.attn_q.weight": "887ceebfbf6a2b94b43d2df4439ac3a5bbc29311d4b28addc04d525546032047",
|
||||
"blk.17.attn_v.weight": "2df9414d65014c06a93da22ba3a668be7b83e2e8008e98d7771f7dfebed98298",
|
||||
"blk.18.ffn_gate_inp.weight": "9b07741a0950fc667e5fd25937e33bc22e1f764f80eb4ff3119f005327ae0f6e",
|
||||
"blk.18.attn_k.weight": "8649598dbb63938744c39bcda5ce8c31773e29c573be8d4d2c114f5030f8d3e8",
|
||||
"blk.18.attn_output.weight": "f8e391adb92622298ca834d5d1eda48b69c3b1c51c5a584ef6c54a725c298d75",
|
||||
"blk.18.attn_q.weight": "84bf8708a2eed618f48f69c178ed7dd11fa4c468102376e72e910ebd037d131f",
|
||||
"blk.18.attn_v.weight": "31db3cd773f09548c2c1b1eac2718e46364a7810970fe9c433fad9d8de5397eb",
|
||||
"blk.18.ffn_gate_exps.weight": "be2a2ba378002f1b61f86c273a69eede9b93786d5ce96b4fee1861f730dca4c4",
|
||||
"blk.18.ffn_down_exps.weight": "d35196159e37705db50a5343e3989f7335477f1a4add67ef42ad64a638cd07ae",
|
||||
"blk.18.ffn_up_exps.weight": "c6ceedd86e97913a6dcadc838e7abb762d629fb8dd55f15cf02fd9bd66d2ba78",
|
||||
"blk.18.attn_norm.weight": "41f0b1ad83d6e3cb9fbe0d27878c2e7ad4a351b9f554a6bc9117c01745cdf6e5",
|
||||
"blk.18.ffn_norm.weight": "96646204bd0d82f25dc77faba4dbd86b1332e449313e6684e00122da8be99057",
|
||||
"blk.19.ffn_gate_exps.weight": "c6eb7f61e7938bda0492dbc05e51e8f631c99224fe18e99861fc4fc53ba9e9ff",
|
||||
"blk.19.ffn_down_exps.weight": "4384803da3a3a3d44120d7dd192fe2c9bbd9a1a0cb492dbec1fdd7565230f1e8",
|
||||
"blk.19.ffn_up_exps.weight": "22d73de2fbb8bb0f1bd2caf17fad8a355c47d914143f7f6e6d0128f66f074a60",
|
||||
"blk.19.ffn_gate_inp.weight": "9a0cc4a2301a5634022fbce41189021bf0d1a961792d2d9330fd35556d18e5bd",
|
||||
"blk.19.attn_norm.weight": "c5cc56ec5df9a1f7d5ad71fbda49f1433132e58895d45cb44c73420bd61ebd6b",
|
||||
"blk.19.ffn_norm.weight": "77e17de741742ef2482fc7872fd423c8e3c1454dc4d2be89ee939084b6d78bc0",
|
||||
"blk.19.attn_k.weight": "a92ea36ce2e3569656306aeefb835ccd5d1b03b33a86e0d3d030644cc923b813",
|
||||
"blk.19.attn_output.weight": "5e2a912b37855f84ea964907a1a86d609cbdd79efa0c93c3e8e2fc07caf7c226",
|
||||
"blk.19.attn_q.weight": "4ef3a5913292ac3c1a6fd3e9e53d011021f2b41d0276cf849706d1ca925cf7a7",
|
||||
"blk.19.attn_v.weight": "42981b75b68ae852cee638b5433605c147da4392aaa6d7a06e756115b0171f39",
|
||||
"blk.20.ffn_gate_inp.weight": "71381b9879a7c80b9f7b475abc0aa31b8cd71ccc00856ebe89764a2acb9df2dc",
|
||||
"blk.20.attn_k.weight": "1928b7ebc054eb3967929ed6fb446314d5352f4aaf8b475ce55c6345019f2ea4",
|
||||
"blk.20.attn_output.weight": "6071ecd9ca91af0d2ba93fef4a1a56f3b243dd70f862a21a2d164d56f386043b",
|
||||
"blk.20.attn_q.weight": "002e95042a40f36ceed5829e3d0c8072e5f5e4ee86a089e2902b2348fed24dd5",
|
||||
"blk.20.attn_v.weight": "42f509cdb1c0e298f89f896e349be86952c5168e49b3f83bb17badbcb7596d57",
|
||||
"blk.20.ffn_gate_exps.weight": "a684a3ffe4b0a57c819a5fa9cb3521de223f392732927271e97ce925b6e33765",
|
||||
"blk.20.ffn_down_exps.weight": "e3081a7bc7ba750d8a4886bc8ca4f231b55db4ca082b54b4106c7531964725cb",
|
||||
"blk.20.ffn_up_exps.weight": "fad0fd5eca36ab154788da28be8ec25bb5d6db06c9d133db89e96df358a2f6a2",
|
||||
"blk.20.attn_norm.weight": "c3e3f2429715ae95e884ef1246b0b461b23c5cc0ed08beecf70a14cddd184820",
|
||||
"blk.20.ffn_norm.weight": "ff31f609dda65ca496b0584fabea6550e42edd05ebf229812aa6b7bb5ede15e6",
|
||||
"blk.21.ffn_gate_exps.weight": "366f09ef0ecfb86808eb3296cc9abdb957951d27f6533c03f1422b54061da660",
|
||||
"blk.21.ffn_down_exps.weight": "3fc495947d27fcca7fc0893c8a96e5d48ba27b2c8c58f8fcfb8dcfcd5539741c",
|
||||
"blk.21.ffn_up_exps.weight": "6713ed51410bcc8283cbb001c4ad784098f25701e8021f4fa4f411e186859c4a",
|
||||
"blk.21.ffn_gate_inp.weight": "6d4c92c01ec801647134d907bf1108878156df266a6107abc10526332b328b93",
|
||||
"blk.21.attn_norm.weight": "27605719ae2df24f4f2e85a730927cab20367631612cb501631f6bbf38eb1209",
|
||||
"blk.21.ffn_norm.weight": "ca80ee8177db185b15a4a378c1cb6f7143c76546a7f1726bda23f329323d4ffa",
|
||||
"blk.21.attn_k.weight": "9e49f743d4a5bda9b4bd9c40c2ca37cdae5aec7e54cb193897ac8b4945ada14d",
|
||||
"blk.21.attn_output.weight": "ab923540879753feaed152f5950f69cdd83d8f2413ca873f5f038b63ab0aea12",
|
||||
"blk.21.attn_q.weight": "62617fc3f1c9d2aa672a4d91a121c7a91b92d145b65e75f0b06b4bb7c825dc36",
|
||||
"blk.21.attn_v.weight": "15f8b2e72f8e8e992f2f6b3e93238a9d7be7bd6136f91c9d04b4b4cd0cd60369",
|
||||
"blk.22.ffn_gate_inp.weight": "3ddb1773d9257b68add7a2a4e94dad25ed926803e02707863dd742ab9b2dc179",
|
||||
"blk.22.attn_k.weight": "680e45a9e8d5feddee5266e119dc053bf80718fa9af1cf6803e6f493b265f1eb",
|
||||
"blk.22.attn_output.weight": "0d5fae3402fb2c5aa3a860010e3973fc8e3168d1015f7a76b7b2964681693206",
|
||||
"blk.22.attn_q.weight": "eee7e3d426ab533bd18d62c9aa142eedbde394bed07db58313e0fccc82a23237",
|
||||
"blk.22.attn_v.weight": "26b5be1fe3c2b6824c5a648a3e4bdf17691904526fca158fbc3ebb627b67e2f4",
|
||||
"blk.22.ffn_gate_exps.weight": "32ab7a7735313d60f6a75229b1aeee940b6aee176c9648536bf5921b0dc2929a",
|
||||
"blk.22.ffn_down_exps.weight": "67590808f6a67777d3eb7976c31fe616d388b98fecbb12253b72d1241d70753f",
|
||||
"blk.22.ffn_up_exps.weight": "fc245c0183e6d90829ff5e71a4ec93e4860b3d4c1a17b9dda2fb64f5f5c9ed32",
|
||||
"blk.22.attn_norm.weight": "128e99d206d4d6724758ec97468af767fa0aea592149c324b731659c1e74a1a8",
|
||||
"blk.22.ffn_norm.weight": "e45f498033f0cffa15da0eff2c47b4472e43fcf8921729fc4eeb2e3a6b3c78e2",
|
||||
"blk.23.ffn_gate_inp.weight": "d63e686f5325fbc89fa242c2c52a3b8ff54f867dca914c9ae6eea13e9d6f46e5",
|
||||
"blk.23.attn_k.weight": "f71f5a577f46ea12b1818f3a5ff4b85ddc45f9a2afb0fa2e041d71a3e31c6779",
|
||||
"blk.23.attn_output.weight": "92b13563c1e0eac0d748fb67b235dfd7a64c8f16e2dafb316885744582e23b4b",
|
||||
"blk.23.attn_q.weight": "2f9b9c35dc4f912f3f51c06e2d68f417b51a0de0a84aac530a64f9d3d7b0a2dd",
|
||||
"blk.23.attn_v.weight": "268e40813806e74a5c364b19556d087bf8374e76e7b6fcf55c381eb7da13ccd1",
|
||||
"blk.23.ffn_gate_exps.weight": "12f857e7a7ce228afac34d99b602c8d6fe96984f2a21118f459a58cb767ee65e",
|
||||
"blk.23.ffn_down_exps.weight": "cdb082c16599c3bb36a28066dcc122d9529b54fa91b6cf0153437ec960a5e16d",
|
||||
"blk.23.ffn_up_exps.weight": "f4b99f6f44d7b8b5a305894e88633bf5938fc1f6303a2b2092399da9c8b64d7c",
|
||||
"blk.23.attn_norm.weight": "a691392210383915916b4d3886d5e4d56e7855e27e37e414fbd73bf66b3712e6",
|
||||
"blk.23.ffn_norm.weight": "0c3dc72f667e5ae19b69bfa9f2bd2a01a57681f89ef9527bad4eb0d8c7b70da8",
|
||||
"blk.24.ffn_gate_exps.weight": "86baca2a3157994df7fd8ced5e08436d5c1810dc29c0715637c36de723e0e7d1",
|
||||
"blk.24.ffn_down_exps.weight": "ac5d559562b35c34993e34b071f66d15c65be5907797078c2d2a49aba54e3192",
|
||||
"blk.24.ffn_up_exps.weight": "fce0a099cf09777f44fbab3606ceb75f7fae6f0b80725f9e871654b8cdf9262a",
|
||||
"blk.24.ffn_gate_inp.weight": "e7c6800c0cfc56b565b2d35ad6f1dbfdb70dd0b05b338bc8da2286ffc3678d79",
|
||||
"blk.24.attn_norm.weight": "dc6cc18ec52d102d015153c4a1132f9d7a504e29cbdec81c5edbf3b9e65815e1",
|
||||
"blk.24.ffn_norm.weight": "480d5a1397af5e0e657f1e67d20ec0cdef5724e71246a326843321b87ffabd33",
|
||||
"blk.24.attn_k.weight": "338c0597954a9b95a782545b2fe36469553e73f86ae2d2b5697767b28e1c7daa",
|
||||
"blk.24.attn_output.weight": "a77d23b79933c67e52f1eef7f83a3dff4f767ce0bbcc39572f8cec4acd457643",
|
||||
"blk.24.attn_q.weight": "45c9478593002be1998e96e70668aafa2dd3972380fbc1df12fb05c24ba959e0",
|
||||
"blk.24.attn_v.weight": "515729420885408a6a9614bc27cda393ed907521318d14d21335d39a3eff0b61",
|
||||
"blk.25.ffn_gate_inp.weight": "aae4ac40e9ab3925241f9d784b54b38851d9bc999a6c3bc03fc3f17c9b28a67c",
|
||||
"blk.25.attn_k.weight": "4ab4808d02396c35b00b426f536015673b71c17ae6cd55bbc2e6bfe7a4c59d0c",
|
||||
"blk.25.attn_output.weight": "1990bb982b77e0c947cd1a8ef0b36227ee1259e6dbbc2829e5c136edf88675eb",
|
||||
"blk.25.attn_q.weight": "a1490f3048e8c0ec8784f8550c43adf5cc8d0f2f90131c934713fe4b1b015bd7",
|
||||
"blk.25.attn_v.weight": "f15e53c6d45b3b6f58808fa968425d65e0b26b7f9b268127a77abb1227c67431",
|
||||
"blk.25.ffn_gate_exps.weight": "656662447ff54f56ee80f78a1b9483f7efdc40f7375d0cd8a9c72ccf21f77e7b",
|
||||
"blk.25.ffn_down_exps.weight": "db06f101bccbaef19cced0f6c185166e18202465f4a42cddfd535fbe5cbabb4a",
|
||||
"blk.25.ffn_up_exps.weight": "584a7b02456f27fe1d8d3c7ccd21d426b6ea887795a3ed77f704596a1e3841d7",
|
||||
"blk.25.attn_norm.weight": "8f0f3597982930fd237e9d609776c64f2b909a455b21678f83a7ebd4bbb83e64",
|
||||
"blk.25.ffn_norm.weight": "3e7079c32582afba0c55e032f254adc18d2997705eec860185e9a6dd3d82f07e",
|
||||
"blk.26.ffn_gate_exps.weight": "e70341691b583b86489812b29b77aa41eb658b1865733d6118da54c66e3bfcc6",
|
||||
"blk.26.ffn_down_exps.weight": "5c1b812d11dfb064af816ced5ab6463bf9722eefdfc341b8a93705d5038fd781",
|
||||
"blk.26.ffn_up_exps.weight": "e18118362ae54ef7432781c83884f9fb230a9d934e342aabeda8822ea5f71fb6",
|
||||
"blk.26.ffn_gate_inp.weight": "cd1c5f6710166b9567c6b74c97b2348b191c60aa860958c6bc264ab095261dff",
|
||||
"blk.26.attn_norm.weight": "71d087531af2520bda2e676c489e8529cef5db8aeea1eec0a937a8b4f2fa2e54",
|
||||
"blk.26.ffn_norm.weight": "7f704e936fda28eb5c2cc339f0f6a5f78170b5aa43c01265b21668870d819c82",
|
||||
"blk.26.attn_k.weight": "1cc62a0ce0ae251275d898c52c4a9fba5995fca10955d2011d10dd1a59e1afb8",
|
||||
"blk.26.attn_output.weight": "636e881b1505f9cef656a4be98bec6a4765321d51f9bf1dac8933397cf44b765",
|
||||
"blk.26.attn_q.weight": "89a3c4d202d7d6adebb9e0c1bcfd8b775f6456386f1be25e86e43acc949c1e16",
|
||||
"blk.26.attn_v.weight": "ff2cc963b597cdf1a21703f3e7022af3bb4c65a34a19e19d9309a7c5e198b5bd",
|
||||
"blk.27.ffn_gate_inp.weight": "6150139498fefe380bb99d11e72028da47a15ecb73dfc5b2774f726f4bed8f9e",
|
||||
"blk.27.attn_k.weight": "f286eb9e5c56c7b801a497aedc40158c2a27877d7f9fb59b3fc67834798902d2",
|
||||
"blk.27.attn_output.weight": "5dc3d3a05f9f7729509147fd09c16fb53f85f520cdab5cb69abf4bae3fd460c7",
|
||||
"blk.27.attn_q.weight": "8462e40f86b24251960d6f35a9ea99b8793a01937faf1aec2859f2e5395dbb61",
|
||||
"blk.27.attn_v.weight": "bac1a99e38e25953f8315f7212eb9777dc216cadb09b959977885ae62724ceca",
|
||||
"blk.27.ffn_gate_exps.weight": "6a15eca7f0f6ecfd93db2e55c63875348ec4a78c4ff643ec46df9e958c0101e4",
|
||||
"blk.27.ffn_down_exps.weight": "2e1c91247c4359e2073a8e5f26fd7f6426da7be3ed5bc65dcfff701f0a5022b2",
|
||||
"blk.27.ffn_up_exps.weight": "65d6f5c553c9332085eae4aeadf25090b5d7768212ea7b08ed698102c21b29a1",
|
||||
"blk.27.attn_norm.weight": "7fab8ae63ec8e91ce625cd130ab96d8427dad3a7413bb21b25ec5f408c5b9f5a",
|
||||
"blk.27.ffn_norm.weight": "532720546b0fdcd423a02ca6e3e9d8aacb84b1b3e8269968f88a47fe2a69bab4",
|
||||
"blk.28.ffn_gate_inp.weight": "a305ea58d98962d9dcf0c53ad2389b7acc8936fb35a0e3fc9410e7767cd49dea",
|
||||
"blk.28.attn_k.weight": "8315e8a2e4f78dfdf36d4fc18fffc74bc95fe42c3ae4f9af2b6c874612c0f71b",
|
||||
"blk.28.attn_output.weight": "9b5fdedd32d39ef46a22cca7cd5355d7b93bd07ea305f466a8aad6ca5a4f3778",
|
||||
"blk.28.attn_q.weight": "4e8fb96997c30e231c437130f410d7c91d541a816f6c568b5f3bfdb4b8dece74",
|
||||
"blk.28.attn_v.weight": "1fec739cf3bd7b4913f72ca358d4cf31391c304de44ac0ae31ecb825beaa7cfd",
|
||||
"blk.28.ffn_gate_exps.weight": "9f259789d535e09268266b9a8020f32d6a6779966c909d91d3a10574f06238a2",
|
||||
"blk.28.ffn_down_exps.weight": "516d3f8abaedb01b9916a4b67d4672159769138ef2850158bc1b32c41e31f0e8",
|
||||
"blk.28.ffn_up_exps.weight": "f2f1d88d2c31ed588806fb5ad981d68f5134d7284c4fc022fd018de2eef437fc",
|
||||
"blk.28.attn_norm.weight": "960fd005598deadaebd969996f4367a9dbfad90539a863674fe95730935acc64",
|
||||
"blk.28.ffn_norm.weight": "e1993b37ced93d4049e9af2c47b0d9207d8f7e6f2cc3a52f57bef30bc806d805",
|
||||
"blk.29.ffn_gate_exps.weight": "58927146338f443513337476b3cd30e6341742f096c2beb5890d400f10121298",
|
||||
"blk.29.ffn_down_exps.weight": "03a3386e4f0b75a28c5608e23b2de8f0de25f21954e4aa7fc343431bde9db07e",
|
||||
"blk.29.ffn_up_exps.weight": "6916b7490a7ae7b04a5d81cc1e7ac9b20c483434f3b186b12d87fe176bf1567b",
|
||||
"blk.29.ffn_gate_inp.weight": "98e710e467a3d567abe4ce29d78b8e8dc033148762290c0c5e1ae4d78efd8c78",
|
||||
"blk.29.attn_norm.weight": "4e64cb307d37be20d55f38c94faf7e451d11df5e60df347906cbaf9c5441be71",
|
||||
"blk.29.ffn_norm.weight": "696c23a52f742679bd44440d687a4c44b4302d57f1e9dc5610d23374336187e7",
|
||||
"blk.29.attn_k.weight": "e85253652fd6120c623634ba66b725bf7cd491318b54ccdad2c7df8851d64c0a",
|
||||
"blk.29.attn_output.weight": "4f650a71efb150d1f24cd4d114d4187bf570ac424da3b92ea6455abdf1aea705",
|
||||
"blk.29.attn_q.weight": "69fa7da901026ebcbbbc848455b425458b7e3295007d7fc093acf4b38e2166ea",
|
||||
"blk.29.attn_v.weight": "17e2e7590b317b21f106de546aafd955579703d1e95d6aea044ee72ec3a514c9",
|
||||
"blk.30.ffn_gate_inp.weight": "3a03284b4aa60d59d4a2ec86253469b61fc656372afca427cb77a5332fbcc62c",
|
||||
"blk.30.attn_k.weight": "d518cfd0db9708e769eb1399e87ee49357dc54d5afdbac3d4c0ca46c64e789eb",
|
||||
"blk.30.attn_output.weight": "9b44378714d784c5ef9ab604359091baca4e0ec222afa139b7f840eaefb371fd",
|
||||
"blk.30.attn_q.weight": "cbb95365bbfbcad0c9cd99b4eebb5a5d32de68ce08e4063b5ec3e792b7548044",
|
||||
"blk.30.attn_v.weight": "e7985c04fe1740e35a9598f43b67b0922b4fc2d00b68a92a9f917b82c3248de1",
|
||||
"blk.30.ffn_gate_exps.weight": "8ac4bbd07935d98f895ba94dc174e5ad5046c3c222b53729d60f987c05e7eb70",
|
||||
"blk.30.ffn_down_exps.weight": "dd672cc71e82abf05064a18121b8e55fe1a4f19bc1d7cb9a142f4add54bc336e",
|
||||
"blk.30.ffn_up_exps.weight": "12282f664a2a12aa25e2deac58946108715ebb978bafed5274cef24569107646",
|
||||
"blk.30.attn_norm.weight": "1a33458fee054c6c9c896a4bb0a4e1fbfa0293b2408c7dd2b81d692e966e7273",
|
||||
"blk.30.ffn_norm.weight": "311e33b68051f507f1478ed8f2693fddb846170ddb7285a91be43f795c2ce31e",
|
||||
"blk.31.ffn_gate_exps.weight": "8af43d9867a51cd8392fb48b981b0ceee0ae979c491c07d711b3b56b5162c786",
|
||||
"blk.31.ffn_down_exps.weight": "5579cb7758c1600b19d1f540deffe081b575962e37437b3b2efb2fb0a2924e40",
|
||||
"blk.31.ffn_up_exps.weight": "f2e7c005276b3a001fb40753f027fa10b4d5a346f43cf4b4bbdeec6e74e1cf6a",
|
||||
"blk.31.ffn_gate_inp.weight": "89885dc0e30b6b16a90c0331d7fa3174671e941364e8102d934f02132237e61b",
|
||||
"blk.31.attn_norm.weight": "99e4e9bf86a9edf8c404153a7e8a82324ba79da462622196e2faba161bd95172",
|
||||
"blk.31.ffn_norm.weight": "55335997cf6de781bf332b943de96ff4646966b05d9fee86b76ea897e27b6ca7",
|
||||
"blk.31.attn_k.weight": "cee570762b78da6316b637892cc4b080e40f57af5551ffb1866b9a8e80e96628",
|
||||
"blk.31.attn_output.weight": "fa321ff55ec7819ead7b819fd45215262f39744569765ba2113c989c03588802",
|
||||
"blk.31.attn_q.weight": "9e2c409b878f8a2a1436874abf428fceb1c534b21f9ad4dd6f532b8a469007f0",
|
||||
"blk.31.attn_v.weight": "a845d0be68ba537b4a775bfba4d897faf7c82a811a2612b0b7420cc4f3574cb8",
|
||||
"output.weight": "16101cbb74b54cda9ebc07ca3c762e3263a56efb3cc011156184b95807d7cf13",
|
||||
"output_norm.weight": "d7aa61585baedd60157aafe157930785742c55989c288573566a971b02423564"
|
||||
}
|
||||
225
convert/testdata/Phi-3-mini-128k-instruct.json
vendored
Normal file
225
convert/testdata/Phi-3-mini-128k-instruct.json
vendored
Normal file
@@ -0,0 +1,225 @@
|
||||
{
|
||||
"general.architecture": "phi3",
|
||||
"general.file_type": "1",
|
||||
"general.quantization_version": "2",
|
||||
"phi3.block_count": "32",
|
||||
"phi3.context_length": "131072",
|
||||
"phi3.embedding_length": "3072",
|
||||
"phi3.feed_forward_length": "8192",
|
||||
"phi3.rope.scaling.original_context_length": "4096",
|
||||
"phi3.rope.dimension_count": "96",
|
||||
"phi3.rope.freq_base": "10000",
|
||||
"phi3.rope.scaling.attn_factor": "1.1902381",
|
||||
"phi3.attention.head_count": "32",
|
||||
"phi3.attention.head_count_kv": "32",
|
||||
"phi3.attention.layer_norm_rms_epsilon": "1e-05",
|
||||
"phi3.attention.sliding_window": "262144",
|
||||
"tokenizer.ggml.model": "llama",
|
||||
"tokenizer.ggml.pre": "default",
|
||||
"tokenizer.ggml.add_bos_token": "false",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.bos_token_id": "1",
|
||||
"tokenizer.ggml.eos_token_id": "32000",
|
||||
"tokenizer.ggml.unknown_token_id": "0",
|
||||
"tokenizer.ggml.padding_token_id": "32000",
|
||||
"tokenizer.ggml.scores": "6e37bcde2adc7e350e87c496eddd7a2124329c1dc66c5bf3ad3997253e4f7a62",
|
||||
"tokenizer.ggml.token_type": "b6ecf55ec64ee67d87750bdb8d757a2c58bf78377e9f4219f5689a6c4dea57ce",
|
||||
"tokenizer.ggml.tokens": "d168da3ddd3eee820916945fcb9baf24dd3cde42f606cffa2d19e7c8a8743918",
|
||||
"blk.0.attn_norm.weight": "216aeb2c9e0c271f899e1ef2a63cceeb8f41e97642e84fada54b1d3c1c11cf25",
|
||||
"blk.0.attn_output.weight": "b597d56f7188ffc1fafc273fadc59d41738cffd677ae98c61a62c3285b3a3099",
|
||||
"blk.0.attn_qkv.weight": "d28a6b44e13f59be5483e4be2bedb544e346168d720aca27f47d1a5a722be91e",
|
||||
"blk.0.ffn_down.weight": "4a691370e5a61fcbbf540fbcbf4c0f1d15dec0364528c0e916d0744f6262b63b",
|
||||
"blk.0.ffn_norm.weight": "0c00af2b4a3128bec64a0cbb1084b042fdbe13d9ad0d03bd577f9449dfead338",
|
||||
"blk.0.ffn_up.weight": "b32b52f790c1c083bfb8a3126dc1111cfeeb28dc8c584a930a1e5334cb176bf4",
|
||||
"blk.1.attn_norm.weight": "68748011503c6c029e8e69a84a8e5a89338f378769627b6dbf7f93d715c292e1",
|
||||
"blk.1.attn_output.weight": "2267344add13b048ca59e4377c86dc512be8046a57156901fa32a20fa74e4ee0",
|
||||
"blk.1.attn_qkv.weight": "9109d2e3d7a2eacfda5226587b8be124a3bf44b972da7ebb17aa15795897eacc",
|
||||
"blk.1.ffn_down.weight": "d675df4df4dd039c0c339ad6445d39eddd2004db6bf35bed6314c7497245a633",
|
||||
"blk.1.ffn_norm.weight": "3b5767ae977bc8baaa06b06efdbea193b6b3ba605ce76d77a76ce317e935500c",
|
||||
"blk.1.ffn_up.weight": "80dfd6d9d234b00334c89b8e0a02f81899c2efd377321c34ba5ba51a5f61b5ff",
|
||||
"blk.2.attn_norm.weight": "6a6743b057e5088f145bc179e92c9bfb41163e7295d7b81c62e23dd89d2b59c4",
|
||||
"blk.2.attn_output.weight": "bc5491ea54e0db81462d7d9b7d25cbdda380c2db8de041bd1c4ab7b76a1d19c3",
|
||||
"blk.2.attn_qkv.weight": "a61287a9852e2f5aca9c100b471d98398b2913a3497c743de3c70ec9ddd7087f",
|
||||
"blk.2.ffn_down.weight": "4fddcc382c8dceeab027fe43d8d44e67edb5e8ce4b9a1b7f773c87770380ade1",
|
||||
"blk.2.ffn_norm.weight": "07e05f82b3f63f711db3b684ca79aed25c0657917e66f88af47348a82065c227",
|
||||
"blk.2.ffn_up.weight": "4835a682ef1826c12df01ae7663fc45f9c82bc8e64b665f13fb7da8e201ec0fb",
|
||||
"blk.3.attn_norm.weight": "f22aba7c03999ba7136f39cda747a39715e498699dc1716cd97fc5dfc58d1b1c",
|
||||
"blk.3.attn_output.weight": "53b579855366fd786c5126b2b30aac4d583ca7bda56833c4865f5cadb5c18c6d",
|
||||
"blk.3.attn_qkv.weight": "bb56aba78158123140fcea59c69ac562ca208f6d3086819417cdad8c50f333ad",
|
||||
"blk.3.ffn_down.weight": "97280897a7cd86db2830c004bccc5bc094f50e293baded0189159a2019145a6e",
|
||||
"blk.3.ffn_norm.weight": "10a8c99f8b57a960e8e0a1133c4a26f9148403d1b9bff2eff114917de996f3b5",
|
||||
"blk.3.ffn_up.weight": "7324046c915e75d621b2043597a245a428d8eea31869135e6257a861491d8dcc",
|
||||
"blk.4.attn_norm.weight": "507d8e164de94646edbfe33def8e8fbf7c9a6ee3fbaedb5000f72d9f51ec5e36",
|
||||
"blk.4.attn_output.weight": "bbb3429e6efa98c150e0fdbf48c16180cbf0d0cbc1b3c253c6c319d78f4593a2",
|
||||
"blk.4.attn_qkv.weight": "b95ee5be0786d3901273d806c339fe6c20e6bfffd2a20672a9f56af80921e8ab",
|
||||
"blk.4.ffn_down.weight": "806bbf91df92a5a22bd5aa1ffb7fc2869f7293ffc7704771c290ecc583b27975",
|
||||
"blk.4.ffn_norm.weight": "cfc2930a81df7aee3a5e7f726a15c1182233e868bf0d9d37f6b6ae6d8c15c234",
|
||||
"blk.4.ffn_up.weight": "c3390c69533de2c8424e8069323ccc5d0c4543111535da04cf2c7d26745576aa",
|
||||
"blk.5.attn_norm.weight": "0d71c4fbcefabbd021569442853d2fe90668b19409ae2805a718a829ca60beab",
|
||||
"blk.5.attn_output.weight": "10ebd93629112bf2df5c30dd0953a4a5e9020306768283181ed426934d47e14f",
|
||||
"blk.5.attn_qkv.weight": "5cb05633369f12d4b00e0ff787736bd846856682115720ebc6cce05270c334f6",
|
||||
"blk.5.ffn_down.weight": "e28bcc5094212eafc7476dbc5b7a520d25b79578cbf4229d698e2655956a80ad",
|
||||
"blk.5.ffn_norm.weight": "b6f2c4cf9f34bb4d59989f96165c14a67dc1e266ad0a6d0fcc49f1add929e6ff",
|
||||
"blk.5.ffn_up.weight": "0f9ef99423cc07ebedc0e9cfa95809f2d7108d910bb4ef97ebc0b0309c440750",
|
||||
"blk.6.attn_norm.weight": "b3edcc47a42218234f7564d7470611b49401a41ae8cd42123f86557c69f5d7f2",
|
||||
"blk.6.attn_output.weight": "eb9b7d257b388bb5b8fe0515e5c6873317239cb94cda236e4b6ada2a6c57c65c",
|
||||
"blk.6.attn_qkv.weight": "eb968081f478c52f07bd9c2761741e982dba33cc4eeadeea3557d391b9ac2106",
|
||||
"blk.6.ffn_down.weight": "1b8588bb7463206290322695577dcfced300895d6e6f4b26966c53a9ae2f0f84",
|
||||
"blk.6.ffn_norm.weight": "1219c04b7770983c77814200eefe743f46d15328ea2b12711e44f8103eab08d3",
|
||||
"blk.6.ffn_up.weight": "197ef287239fec47c55677f0fbb66eaf0644f775bc382de843971730721394f6",
|
||||
"blk.7.attn_norm.weight": "b630ad08c80d564ed1c024384818e9fd3f22a36cd7a14aa96e7e2759a8285099",
|
||||
"blk.7.attn_output.weight": "970255aa750828a47d6b9d399f9612b5bf25aefe7dadbcba41fc416d0d4067c1",
|
||||
"blk.7.attn_qkv.weight": "ebb157c880293e6de8d629f263ba8853ed1dbdc02c311d43432bb8cfbb310739",
|
||||
"blk.7.ffn_down.weight": "24bcd4db4cba844c89f878b81843c373dbbc0675e889d32c5b12e63384a7b670",
|
||||
"blk.7.ffn_norm.weight": "b9c6f71001808ee873ce7db8056e4b53fb4cccec8b7f0f312899b575fae39d39",
|
||||
"blk.7.ffn_up.weight": "979f1828d227455c26015a2a11afe9dd05f2bb97a8ba6b38c8dab3f50e627401",
|
||||
"blk.8.attn_norm.weight": "4e8e347e3775010b7112ee630f2f4f2383be7ff64e6ca6154b9b22566552eaa6",
|
||||
"blk.8.attn_output.weight": "65a44babf44a435a1829945211b3168f9ec78ac3cb7a049a733e93d11f0d6659",
|
||||
"blk.8.attn_qkv.weight": "343ed07671da400b040812a4058482fa38284b5d9af9becfed07417fe26ce747",
|
||||
"blk.8.ffn_down.weight": "7fb7e073e3c2c503c4e9d60efa0988fed7398d900cc003695fe3fffd3e188b82",
|
||||
"blk.8.ffn_norm.weight": "b07c1f655d8593e3892a2cf73f8a0c19ce8e5cb613fafbe7cbd430da8ce4c57d",
|
||||
"blk.8.ffn_up.weight": "8b26e14de54b3fdc2e2d3ea41720f9d9c236a93688c3b7fd7bf43f5fbb327c9b",
|
||||
"blk.9.attn_norm.weight": "46394d408a8e316916177e6aa261de32e137a82d729c0b1800b072f0c38c39b6",
|
||||
"blk.9.attn_output.weight": "d57f3d46107947a7073373a0b35d6ecf7759b5df15406f4a3590a60666af6b16",
|
||||
"blk.9.attn_qkv.weight": "14bb8ace8c5453148f4b536e9f4279c813f31136716947256f5cca333448639c",
|
||||
"blk.9.ffn_down.weight": "2b8d98e2b5ed68338f6e4de43bf7de0c4858cc69103cd5177725f7444eec7694",
|
||||
"blk.9.ffn_norm.weight": "41a499dfd418cc4c6b8c12313f673f7e2cd4a3f9c4065eb6c4feb5eed02fb542",
|
||||
"blk.9.ffn_up.weight": "143aab7533a64b17fbe201490a6f674bc7f0bd370c094500b2e100419073d1c2",
|
||||
"blk.10.attn_norm.weight": "ebb670aafd36816a794347287269d8f1a5b19c1e3c0a1e38023bc19fdba9b073",
|
||||
"blk.10.attn_output.weight": "b5d65bbc0ed5e49fdd9d754bc18163cd042a285024d0cf6f954c503bc8c877cb",
|
||||
"blk.10.attn_qkv.weight": "f06b15bac88da798fa34a62b03eaac0dbe8b846020516603c387541f2d8dd672",
|
||||
"blk.10.ffn_down.weight": "fb091fcd1b4de25d1bea94d1755e255cb02914a030d23e3a234e57b8d46bde6e",
|
||||
"blk.10.ffn_norm.weight": "eb347bdf9c40414af87e13a8e72e40b31f004b50f7cb366f1a219ced60a61355",
|
||||
"blk.10.ffn_up.weight": "ed2d52fc881a173f404fe8a1067862c9856d6c3e0d2e90a330a7aa394e3f84d1",
|
||||
"blk.11.attn_norm.weight": "64e252603cf010a0e502ca39fdf8d0a196a79aec67c0d2bb9213fc0cb80c47d4",
|
||||
"blk.11.attn_output.weight": "228e33e21c69f52efc74fdfc831bc9af271e44b2a29a3dced1d64e667ce36eb5",
|
||||
"blk.11.attn_qkv.weight": "ab9ce6d4ef9e42ee0da3f20a7708a3bbc5e79e967b05fa86ba946a05e2eb63eb",
|
||||
"blk.11.ffn_down.weight": "0ca133b7835c98dc77c25d64e4eb7873778bdb5e4d22d8b80f920f46865b43bd",
|
||||
"blk.11.ffn_norm.weight": "02455741a0dfd161c79aa1ecc381901721f229fdcda5615622a629631fb61cfd",
|
||||
"blk.11.ffn_up.weight": "9fecdcc099fbb8e23c6b1ea9294702a027f4a58d265543ec5e7be79b8f63b354",
|
||||
"blk.12.attn_norm.weight": "783bb459911b1b3609a9b2bdfe272f1670add73b5471da738e07ac47e2e07dfd",
|
||||
"blk.12.attn_output.weight": "1e1a914c9e48b857206ac5a1f7cead994bc1ea91d5d4fff8c834d73f2e38ef5d",
|
||||
"blk.12.attn_qkv.weight": "5953e7185ccb87fb4dae8f9426ec86315d4c7794326e8ab59b3a95d4af2189f0",
|
||||
"blk.12.ffn_down.weight": "a3eecf0f394f86e2cfb48a5940a5c50ca86d71883b2f79fcc642a935fabce0d4",
|
||||
"blk.12.ffn_norm.weight": "0a4272e41373c23bd72f10d2d82930aa3a1480aac75832bfbf01cebf0b86b6a4",
|
||||
"blk.12.ffn_up.weight": "06f42776de3a7ceac3025f26a7a8bd20e062233cce2bdaa2183470dc4b30b87d",
|
||||
"blk.13.attn_norm.weight": "5915da60fb03e201fa649faba780e5fdf1c761c262b206e5415cf83181f65780",
|
||||
"blk.13.attn_output.weight": "4dbf6eab074fa3835fd32bd631a8208e511037d5056d2fd3015735cca7674ef7",
|
||||
"blk.13.attn_qkv.weight": "d3d8339a1c4782d9e73d77fdebe154d3c5b83ac40c9175b3e91a4977d08f876b",
|
||||
"blk.13.ffn_down.weight": "de6772b46a55e1fd42b007637dfbf68b6598e5d5b61622da0935002e1e192d3a",
|
||||
"blk.13.ffn_norm.weight": "5a640ea3b8c7be49c95a58a2327e10d8e8d9d142504bde5c8091613e5b961d7a",
|
||||
"blk.13.ffn_up.weight": "f35e3545e4bd3531b2e843b5efd31dee0c13c807ee6386e65473ba67bbec30d0",
|
||||
"blk.14.attn_norm.weight": "9b34986450b7c98b4927e81e61a816f9e84b1addc7c14926402100037aad6678",
|
||||
"blk.14.attn_output.weight": "155d52efb23d366016d861a251d4d1f4a0c13699188c50d50dba016a0d8bfcd9",
|
||||
"blk.14.attn_qkv.weight": "8e1415084e1f33c73a777f19e752489f4dd312cca047733e5ea643cd4a955e04",
|
||||
"blk.14.ffn_down.weight": "a2a142226b94baa01ccb65bdea2b7418e49085c1d9c3c63e544e3112c58a25da",
|
||||
"blk.14.ffn_norm.weight": "8aecfd9b0ae6affaea31a80c5c9a4a14b31deaa0db7bd8f6da2a64d23447921c",
|
||||
"blk.14.ffn_up.weight": "0c1407237b8c1bd02f193346b5681926fe698a5055eac6a7450451b0f991707c",
|
||||
"blk.15.attn_norm.weight": "e037bd19880bfa83d983200fb0c7866f8ad16c3ff5cc4b4f3a37ca7373870ff6",
|
||||
"blk.15.attn_output.weight": "045fe4fc95cc129a1b92771b179c11b12845c4c088786c607f17bd98857e68e1",
|
||||
"blk.15.attn_qkv.weight": "7621b7559705cab1d4dea1c69f76dbf9dc1c8837a203b656f484703b9c1b70ce",
|
||||
"blk.15.ffn_down.weight": "7e5ac20e290bc60761e1cd972354fde225b7fa861048d44d9a0dd9b046d55f58",
|
||||
"blk.15.ffn_norm.weight": "b6d830d88f1db1825687973c8c2b1a24c6fa84f07af8d0e3ef9c86009baca0b2",
|
||||
"blk.15.ffn_up.weight": "dcda0957cd04fc45476774dba2bbf9aa89d6b05d5ca7b10ae6f73ad2c49b1cd3",
|
||||
"blk.16.attn_norm.weight": "4ee9b70ba15cb2a08240f93990e90f5068c48fceb481f8e2186bec8b7214eb3f",
|
||||
"blk.16.attn_output.weight": "315cfe5536658d2498192b2980eade15b2c9a4ff220e4011911457b1727fa103",
|
||||
"blk.16.attn_qkv.weight": "3c8122e3ad637583b9dcde8ff3a323267d3014bb1f0f9771e5322260ca9ecc8d",
|
||||
"blk.16.ffn_down.weight": "3b5fbebd5ee2b86cad96fb8a9b45a8770d08f82c1c8b74d7061e866f7020a18d",
|
||||
"blk.16.ffn_norm.weight": "ffab69f20bda372de6e5878f0539163e2fc6ba113621ded95705fc3b1465c9f0",
|
||||
"blk.16.ffn_up.weight": "0935ea3d258da42d6258406365f39f58ddaabfe97ea5977580db3635188f24a1",
|
||||
"blk.17.attn_norm.weight": "f030441733f3d147b4a06a1eb4aeb8465c7c24d9c53bf4c48fe7e134d3629803",
|
||||
"blk.17.attn_output.weight": "07a955ef09e8dc766ac0df647d0b2c69f23c4c69a7137654b4aad80303ed0eda",
|
||||
"blk.17.attn_qkv.weight": "1c10688061e21e2fe12ad0cb54bf03895c1f83c3b0df743a42f548b52cbca1b2",
|
||||
"blk.17.ffn_down.weight": "ebb9cc9836f41d88fdae2aa9a4355514e4edaec8d1577ffeb947a35204e77f52",
|
||||
"blk.17.ffn_norm.weight": "50aff44f6528b13db5389f2ddcdb7676244947610bd7ffbff3f881c968c2a0d4",
|
||||
"blk.17.ffn_up.weight": "d716537949582be33bde6b02e38f5a70081c9642a9fb05a61312126718b8d148",
|
||||
"blk.18.attn_norm.weight": "0ea695c4e53d637902f46663a6ee42adc493c36794476acc7dbddaa05b13840d",
|
||||
"blk.18.attn_output.weight": "5fd35b500221a612eb4f4bddf0e9b6b7db4d7733032a75f8802fb2d884647c2e",
|
||||
"blk.18.attn_qkv.weight": "b0da37fd030fe69581f990bf23bfd35467a1bbe558af6de7c0924f6b72e92317",
|
||||
"blk.18.ffn_down.weight": "b355c33f44b328f4bb977567de8f7544db4b005d7a8fbded658518ecf3c5a153",
|
||||
"blk.18.ffn_norm.weight": "58b3fe9094079989a86e0387143259e1cc35952d24dc3df290c4ba6df44f5c51",
|
||||
"blk.18.ffn_up.weight": "2ce530954c342c30ed2ead5353f931960bfae1d278868504c0efb973560fabbe",
|
||||
"blk.19.attn_norm.weight": "533e9aed66feea8f0392aa81f9e293240e1f009a5334253915fb60c2749b615d",
|
||||
"blk.19.attn_output.weight": "84f2d00f98a4113a779d3b5d1c3e7c914eb47784d3ab13b290367c124c2994aa",
|
||||
"blk.19.attn_qkv.weight": "fbe6b9f53b07fa7537d3b3d452d20a9bc666f9fd41ec2091dd28bc2f70fc668f",
|
||||
"blk.19.ffn_down.weight": "b30199e098c8bb3f890183d8b18471e80b62b604729b277ad62488dd71e1206b",
|
||||
"blk.19.ffn_norm.weight": "c81373e41cd340b7badb19f9517c77c4250b4eb9a02dc758b8b49b652487d7ff",
|
||||
"blk.19.ffn_up.weight": "5a5cb083ca7725720e3a890f7fa46354760e8007a8188849a092e305694a75e3",
|
||||
"blk.20.attn_norm.weight": "4953091b4477e354357a8e743ba0a1900633e52f1599ee082a0c9b0b2b5cd978",
|
||||
"blk.20.attn_output.weight": "62d54f7749cd6856097b2632066a322b0296df915fe66f382c5b5981be0d4f23",
|
||||
"blk.20.attn_qkv.weight": "406de9e35b0729ebe902d7a47905cc7fb29a921431ed35dbef0c03e5690a1329",
|
||||
"blk.20.ffn_down.weight": "62fb678b0d1261e19a4903a2b347d67afcc8acff01feb33a687a35a2d1e6f9a5",
|
||||
"blk.20.ffn_norm.weight": "cd9d36b7e71e55c8925b97bb09c28219f182626bcff094878ae39c3db887a14b",
|
||||
"blk.20.ffn_up.weight": "b9276771d79d3e932e73ccc520c3f8476342b9ef312ed2ee1e0da822e6e3ad18",
|
||||
"blk.21.attn_norm.weight": "66d8c8a35e13ce9c2a0e75b670150e2c31484a55c2316df46075312196178ed3",
|
||||
"blk.21.attn_output.weight": "12ab46c9382648f9b3350fdd92a6be6352743d62d6b520d7e2024e0c838588f5",
|
||||
"blk.21.attn_qkv.weight": "a7909676ee1675ca23cd29a5fdd226df8dd9d68f94c6c9bbb51dd9fd38504008",
|
||||
"blk.21.ffn_down.weight": "6fb317279c6542e82f97d5a12a60fac1bd0fa0405154f9fbe265e2fe39bd49cc",
|
||||
"blk.21.ffn_norm.weight": "c0f703eb3ff161b5ba4490d87d8684b8a6c47a8f433e12f418333b9db439010a",
|
||||
"blk.21.ffn_up.weight": "6dbdb80ef0c35e364bbce12d40d5e74c7963c7b55d58d9579567a07ffce7b863",
|
||||
"blk.22.attn_norm.weight": "f94237433bf03d675cb2f655b81ca91a1ce2447bc6b00b13d6b0ccfe2d411eff",
|
||||
"blk.22.attn_output.weight": "e821f95995ce497c01e63ca64f737713b1b65f11df1903e51d444aa516f33f71",
|
||||
"blk.22.attn_qkv.weight": "1b0f717c73afb5eb4c82a1708c4e85c969e8a2a8770d9ddb78b1870a2d8a781e",
|
||||
"blk.22.ffn_down.weight": "0f33f7a3cdc685484be99aa0c03642b0b20850a27d1fddbe054b13a9382f3ccb",
|
||||
"blk.22.ffn_norm.weight": "9df285cf211ddd7df2b36a50489af574755c7d4d98b29a05cd04566ae613c8dc",
|
||||
"blk.22.ffn_up.weight": "63ac300e1efb34041dd0136cf43ea622fac6f0caccce1cd9262f5e08d2cf179c",
|
||||
"blk.23.attn_norm.weight": "5f72d9e88689b4027b28f5f8f26cd3abb03635ceea7ec98a4c91a9fc691f6707",
|
||||
"blk.23.attn_output.weight": "6ecf04ff61125c5fc768f8656497152149373daf321ee9c957e8f7245a1184d1",
|
||||
"blk.23.attn_qkv.weight": "a9d9978806724c2959f2cf386c233831f08e1e933dbf2b32665e788d9d512ea4",
|
||||
"blk.23.ffn_down.weight": "72c7d17886a3da17fa0daa456aa5e877b2ef5b8b403182b870d9ca5ca9c70347",
|
||||
"blk.23.ffn_norm.weight": "971e4b712e3025a13419b5b57d674b5e4ab7f18f74b57b9afc4671623da90c4b",
|
||||
"blk.23.ffn_up.weight": "df2b5c7dbd5834545b815073af0c7355b065124e6d6f0fee78d8fa5b2076dc3e",
|
||||
"blk.24.attn_norm.weight": "c41957c4a79ad3b16f6e11daec1c7f530b9f3f4b618e1e4367c3b67787ac4ab6",
|
||||
"blk.24.attn_output.weight": "ef7d61f5fc88ac6f31bf60cb5f4d2d6b8df42d38825807112361a7224b0dee3b",
|
||||
"blk.24.attn_qkv.weight": "3e6a58fe7d49c90bb6971efbad3371c32256881173ea5aee4b0c296cb206490f",
|
||||
"blk.24.ffn_down.weight": "f43619144047de42fed81dfa495f1815d3cb771330e574043e2b67620819292c",
|
||||
"blk.24.ffn_norm.weight": "5501d4a2a98c8ca6b42e77b53b221dbc08f530f6a067256d787534ec6fe028bd",
|
||||
"blk.24.ffn_up.weight": "d64c8b0e509e2b1118f6000176f8956cacecdbb200c7e95ed93fb78b6e26c84a",
|
||||
"blk.25.attn_norm.weight": "502fa3c302d371f61c5791f4615b73018ffb1daa09b6499b227116581244c5d4",
|
||||
"blk.25.attn_output.weight": "ad8391d4e9c980856f2547aa945b2b6a407a6382158dc1ddd4f08d94ecc24be6",
|
||||
"blk.25.attn_qkv.weight": "42e8983780d4a01a02c54ad23d4df21eea437f119a10af5a9c12a76a42d308c1",
|
||||
"blk.25.ffn_down.weight": "302dd010d4e0ab4eeaee89090409ea0dddeeeed3236415eb8f97c942497eea91",
|
||||
"blk.25.ffn_norm.weight": "fb34c1ee5bca96986c08834df0a0c047ba041c1123ac1f563e9d64312bf82d6a",
|
||||
"blk.25.ffn_up.weight": "10739a8de156816d93c92b935386540bfa976bdbef204f0312960f6fc657582f",
|
||||
"blk.26.attn_norm.weight": "7036c711609128c4e55968ff3681d3043338879a5737efd6c2ac9e1a2a61f1a0",
|
||||
"blk.26.attn_output.weight": "db5db45dead5cb911fa01da59832f121b7c18b2d167bf53741c40819f24d346c",
|
||||
"blk.26.attn_qkv.weight": "cae34c6b7f82ed14348d5ed30a79919c383737c1694a9cb9c0de609d3b0c1d0a",
|
||||
"blk.26.ffn_down.weight": "491ec3a4da9b4f49f8ebc6be658ce397a9b801ae9fb35e82177e47808c65e5d0",
|
||||
"blk.26.ffn_norm.weight": "fd7059d75d7f0e5288511ddeeb0f772eb3cae3ccfe4226b877015834edc3c386",
|
||||
"blk.26.ffn_up.weight": "ea1ee1274c56458ce056d2205e5bb6e5422ce4cb0ad58006b8141749b97a0c39",
|
||||
"blk.27.attn_norm.weight": "cc362c9a937609265052cd38544af17a1a7448cea086d4c801139e1fc865832d",
|
||||
"blk.27.attn_output.weight": "ba757a81dabde9cb1b069d1bb616fe79649a1724f756567ec61caed1304fe6cf",
|
||||
"blk.27.attn_qkv.weight": "1ab8d7d02d87756c12c2275636823aa5ede3d683178225c4cac4bd892c319bd4",
|
||||
"blk.27.ffn_down.weight": "deb1c711c8a66acf4dcd2d088e1548f8e08f296f755e4067d6557fa55afde88c",
|
||||
"blk.27.ffn_norm.weight": "fc6242d8cb8a4a37a8ddb7e41e7e60a63d4a89edf36acb35df052f10b9c91ece",
|
||||
"blk.27.ffn_up.weight": "8df39b09c4801f343aca78f2918a1f6db78c8c55e591eda4c69eadb74c26e180",
|
||||
"blk.28.attn_norm.weight": "75b539308f77e3cefdc6d98484d8b5cbf0538f0c2869a77b7373a145a18bc850",
|
||||
"blk.28.attn_output.weight": "ae128940eb60a6d2e121762ef4b3e9dcf9eb3e105b249507fa7f12de0e19822c",
|
||||
"blk.28.attn_qkv.weight": "bdda781c288e9326c240e33905f8e621b6a2ad902e620739d34f93fcd6f933de",
|
||||
"blk.28.ffn_down.weight": "f1d6e6d1c286b1138bfd7e53fe477f399ae93bc2c04e35416f84218ed7247965",
|
||||
"blk.28.ffn_norm.weight": "3f837ce82c8b9bde0d61d08b6f5fe5574886ea5328dbdc53f2929f18da8b4087",
|
||||
"blk.28.ffn_up.weight": "2af027002e31d1b6cfedbdb30a2b9d7213f3aa691167c353913adfd48fda31e4",
|
||||
"blk.29.attn_norm.weight": "61e8003b5329462ffe0fe172f2b160260de006aed858332d49d75504b6b6aa7a",
|
||||
"blk.29.attn_output.weight": "ca44542a72a37476dc73dbdcc01f5b7497cb3ebc4ea230a55c9634ccd8e56ad4",
|
||||
"blk.29.attn_qkv.weight": "abb3d9d6abe57872ae3daa51935d43264093ded5ce63b49d1e280ee5758be0e4",
|
||||
"blk.29.ffn_down.weight": "6764b895fce881df097489c263446f0106de36217997660c15984b3ee22a5a06",
|
||||
"blk.29.ffn_norm.weight": "89e03e9a33fc0e6e31ba9f0c2bd7c5734a118c5602bb90148793e08a80e8d0ae",
|
||||
"blk.29.ffn_up.weight": "fa7ad57a84954f4121653152efed1a871d8adb20a1ea9086e3e849ce359d7d2e",
|
||||
"blk.30.attn_norm.weight": "91a697aca1e42af54f806a20211031c3369e8d0bd58df1b0147fe24954e1f5a4",
|
||||
"blk.30.attn_output.weight": "36063fcf766c89ac75be56f688cc63cefe5f2c733fbf4378ea9956ad386fa148",
|
||||
"blk.30.attn_qkv.weight": "2cacd1161f1121a2c0b979930134f4666f73fb8d7237b3b0659ae091b15955a6",
|
||||
"blk.30.ffn_down.weight": "9f3fcb6217100595850c05dc98f9ab2a263afdb6ab28df2fcb08aeff512057d7",
|
||||
"blk.30.ffn_norm.weight": "6c600bc1fc7de39d4f8917b81fc7d1d5ed2a9b56492234c13a4bd6028c30d880",
|
||||
"blk.30.ffn_up.weight": "73cabd1bb011956b2689ea3338bb76642ef3a57c197377d666d2ab5f56317668",
|
||||
"blk.31.attn_norm.weight": "72d3e1cc771380645fa75a899858c95f39857a4f3f1ed60fe1578df383b8bc53",
|
||||
"blk.31.attn_output.weight": "40089cdd29994dc19a1d89fa15902a89cfeca3540f12dc9bf4d00ef82506e456",
|
||||
"blk.31.attn_qkv.weight": "1d0bb40e9258071ae14290a53c619a8e331dda07354d2a02ef45766c029ae5e4",
|
||||
"blk.31.ffn_down.weight": "8defa0e06335b793fa8be03883f0a322d6c5b33f52c69c943c35c60d16e42c0a",
|
||||
"blk.31.ffn_norm.weight": "33c55d9d0c496ccfb130361fe131649346e098abaaac39c0519507e5d846721d",
|
||||
"blk.31.ffn_up.weight": "599f6503f61c692c1f82001973d35119f9688db5e6be9d9c298411491c93f09b",
|
||||
"output.weight": "14b8dc662bfa3308ebb2e102c562d8e52c15670e538f20f3216a9c310ca9dd41",
|
||||
"output_norm.weight": "7f2294ba94ce65681df6c7ddd8698799199b9d77dc83c10bdad5c3999f0fdb82",
|
||||
"rope_factors_long.weight": "e34d378664e354652c38f47d10dafb0498ccc2fb042d39ff7fef768146fff22b",
|
||||
"rope_factors_short.weight": "9379146a4988f373d362fe47b06c75e7fe7c54aa4dc9558758df79b7a87471fd",
|
||||
"token_embd.weight": "19a03c1fb5ac0baee93b0a7d8b0f26e9a9b011e229b694afc50ebfc13d84f8bf"
|
||||
}
|
||||
314
convert/testdata/Qwen2.5-0.5B-Instruct.json
vendored
Normal file
314
convert/testdata/Qwen2.5-0.5B-Instruct.json
vendored
Normal file
@@ -0,0 +1,314 @@
|
||||
{
|
||||
"general.architecture": "qwen2",
|
||||
"general.file_type": "1",
|
||||
"general.parameter_count": "494032768",
|
||||
"general.quantization_version": "2",
|
||||
"output_norm.weight": "93a01a6db3419e85320a244bbf8ae81c43033b1d10c342bea3797ff2ce348390",
|
||||
"qwen2.attention.head_count": "14",
|
||||
"qwen2.attention.head_count_kv": "2",
|
||||
"qwen2.attention.layer_norm_rms_epsilon": "1e-06",
|
||||
"qwen2.block_count": "24",
|
||||
"qwen2.context_length": "32768",
|
||||
"qwen2.embedding_length": "896",
|
||||
"qwen2.feed_forward_length": "4864",
|
||||
"qwen2.rope.freq_base": "1e+06",
|
||||
"token_embd.weight": "d74257dc547b48be5ae7b93f1c9af072c0c42dbbb85503078e25c59cd09e68d0",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.add_padding_token": "false",
|
||||
"tokenizer.ggml.eos_token_id": "151645",
|
||||
"tokenizer.ggml.merges": "6b1b1c58f1223d74f9095929d3e6416cdd74784440221a5507b87b8197f2bfd2",
|
||||
"tokenizer.ggml.model": "gpt2",
|
||||
"tokenizer.ggml.padding_token_id": "151643",
|
||||
"tokenizer.ggml.pre": "qwen2",
|
||||
"tokenizer.ggml.scores": "94e247e531e8b0fa3d248f3de09c9beae0c87da8106208a8edfaac0b8ec4b53d",
|
||||
"tokenizer.ggml.token_type": "b178dbc9d1b2e08f84d02918e00fc2de2619a250e6c188c91a6605f701860055",
|
||||
"tokenizer.ggml.tokens": "1d93f6679b23a1152b725f7f473792d54d53c1040c5250d3e46b42f81e0a1a34",
|
||||
"blk.0.attn_k.bias": "5ce6617845f66c34515978d23d52e729c298d8bffa28c356a0428bef17142cf1",
|
||||
"blk.0.attn_k.weight": "a960832a9e0e83e4d95402e5d1a01cc74300fcca0c381237162126330e1a7af8",
|
||||
"blk.0.attn_norm.weight": "32c7d51cd0958f1f1771174192db341f9770516d7595a2f0fd18a4d78bd5aba3",
|
||||
"blk.0.attn_output.weight": "c67e6e7e868354a11bf9121c70ee56c140b20eec611a8955e7dfe54a21d40a98",
|
||||
"blk.0.attn_q.bias": "3e9e994eb1f03bccfc82f8bb3c324c920d42d547e07de5be83be12c428645063",
|
||||
"blk.0.attn_q.weight": "dc12132f789b97cfa1e3f5775ceb835247fa67aa47400fd09c8f9f3769208583",
|
||||
"blk.0.attn_v.bias": "a3fd0757b31fdc78af5ec320332d239c1a79d34e8804df06c5454e86955e8cc9",
|
||||
"blk.0.attn_v.weight": "f43094a2134c7ee2dcc52aac3c8b7d9d64fb0295a8adb94cabfd49213f017b84",
|
||||
"blk.0.ffn_down.weight": "18c2aec92db14f21976838a8c35d5575f80d0e4b1e05ccc0d8388d5877e80147",
|
||||
"blk.0.ffn_gate.weight": "a3a1c4ef38f8f750eabadfe3d83bbb0f77941eec1cc1a388e51852e99c8691f6",
|
||||
"blk.0.ffn_norm.weight": "b59b779c42d44b5c4cec41e39b4eb61e0491a07c1b3e946ccb5b8d5c657eda3f",
|
||||
"blk.0.ffn_up.weight": "db64f09987ea59449e90abae5a2ffcc20efd9203f0eebec77a6aacb5809d6cff",
|
||||
"blk.1.attn_k.bias": "a5c8c5671703ec0aa0143ff70a20ffdd67b5d5790ca1dfa5bba4e87e4071ed9f",
|
||||
"blk.1.attn_k.weight": "835c7c7cc95b3cb2e55bd9cac585aa0760a033896621d3e06421f3378c540f7d",
|
||||
"blk.1.attn_norm.weight": "f4c36fb6c14fce721fab0de78cc118d6f66e3a3d3ea0017bb14aade24c3c5434",
|
||||
"blk.1.attn_output.weight": "cc1e80310c97cef068e48e40b7096f32fa2138519d6209c6a1a9994985999016",
|
||||
"blk.1.attn_q.bias": "bc332780e66b0aac80ec5e63ac32344919a840db2fcc8f87bcef16a43a54138e",
|
||||
"blk.1.attn_q.weight": "d766f06c925cce38d4b31b2165b3448e1fb49a7d561985f95d9cd2fcba52367a",
|
||||
"blk.1.attn_v.bias": "9f486626fb6ed9ac84970a71e9b9818dd2758501fd3f61bb1c08540dcc7a8631",
|
||||
"blk.1.attn_v.weight": "e873d1e5bd4f4d6abfd47c0f55119c2c111105838753ee273a03c5ccea25ce5c",
|
||||
"blk.1.ffn_down.weight": "b3ce82b093f187344de04284b1783a452de1b72640914609b8f830dc81580521",
|
||||
"blk.1.ffn_gate.weight": "5cd44ad237edaca525a28a3ac13975d1b565f576d6a8003237a341ae0d156f2e",
|
||||
"blk.1.ffn_norm.weight": "4ac774ee8afaee119610c46aa1ff89fc6c9084a29d226075bc4aa4d2f15f746c",
|
||||
"blk.1.ffn_up.weight": "042d81ab5f1983d85c81213232f3bfc05a9302d9dfaa98d931ebba326b6058b8",
|
||||
"blk.10.attn_k.bias": "767ecfeacd60a2c2221ac4d76c357190849dd9cdf64ced418d9d0c7949101401",
|
||||
"blk.10.attn_k.weight": "a9f3df343227537636be8202303453086375091944e498bad11e0b91e45e8c71",
|
||||
"blk.10.attn_norm.weight": "01acd0e7b3e363f873dbfde6f0995ffcce83f5aaa10ff91c31dbf775035f6d5a",
|
||||
"blk.10.attn_output.weight": "a531fe660769604ab869f01b203eb115e025cad4c0baeacdd1bcca99cf6d0264",
|
||||
"blk.10.attn_q.bias": "356a02c9163dd660c1340fbe1e049b335ac6178891e00996131bba9ab4cb3e59",
|
||||
"blk.10.attn_q.weight": "81be0cfb227339d83f954cd8dcf35828441211c6e1d184060e3eb76085041e2f",
|
||||
"blk.10.attn_v.bias": "ed0450653284b62f8bf2c2db19c0ff7a6cf3cda1324d0a044c5e3db7bb692bd3",
|
||||
"blk.10.attn_v.weight": "c1247ff7092babd2ed979883095b9aa022b2996cab1c77fb9e6176ddc1498d16",
|
||||
"blk.10.ffn_down.weight": "fda7544965dc9af874f1062c22151c6cefc8ba08cbe15dc67aa89979e77b2de4",
|
||||
"blk.10.ffn_gate.weight": "9f2632b1dee7304d10c70bd38d85bb1f148a628a8468f894f57975b8a2f1d945",
|
||||
"blk.10.ffn_norm.weight": "94f8cbd6b17a4d5aabd93fa32930a687db3b11f086142f1cd71c535c11adcad4",
|
||||
"blk.10.ffn_up.weight": "8dc2f8db0474939a277a3d89db34c3bcc3381cfea57bd05a8426a164634d9112",
|
||||
"blk.11.attn_k.bias": "3b8e5a662b19411e3f6530714b766aad2ee41eebc8161bec9db0bc82d383a6e0",
|
||||
"blk.11.attn_k.weight": "2c29f1ed1ce53ce9604e9ea3663c2c373157e909a0d6064a8920005f6d15dad9",
|
||||
"blk.11.attn_norm.weight": "48f68a99c3da4ab4c9e492677b606d1b8e0e3de1fdbf6a977523f97b8c21ec31",
|
||||
"blk.11.attn_output.weight": "5859f3838a94898b020c23040941ed88f4fcb132db400d0849f30a01f62c0f1c",
|
||||
"blk.11.attn_q.bias": "c5ad89a5628f2bd81252ef44ef6bbcbff15c33ad16fba66435509b959c2af6d3",
|
||||
"blk.11.attn_q.weight": "d102104e5d61c1e3219564f1d0149fd593db6c6daa9f3872460c84403323cfef",
|
||||
"blk.11.attn_v.bias": "8653f7d48c5f75a5b55630819f99ecf01c932f12d33fd1a3ee634613e70edde8",
|
||||
"blk.11.attn_v.weight": "e0a7c7d89b9f2d0d781ce85330022229126e130a8600a09d4a5f920f0bbd50b2",
|
||||
"blk.11.ffn_down.weight": "4a22b3361eba8bbe1d9a6fda1812618e894c49f13bcacb505defa9badb6b96a6",
|
||||
"blk.11.ffn_gate.weight": "484698b206760d3fd8df68b252a3c5bae65c8bf6392fb53a5261b021b6f39144",
|
||||
"blk.11.ffn_norm.weight": "da69e96338cbe30882cf5a9544004387f5bbc0bcb6038e61ba2baabbd2623bac",
|
||||
"blk.11.ffn_up.weight": "26ec74f1f504d1281715680dfbcc321db4e9900c53932fa40955daceb891b9aa",
|
||||
"blk.12.attn_k.bias": "f94b49ec3e498f14f6bc3ebefe1f82018935bbe594df03253bfffae36bc20751",
|
||||
"blk.12.attn_k.weight": "ae6323d0bbcfcea01f598d308993d1a7530317e78c1f64923e36d4b1649e9e73",
|
||||
"blk.12.attn_norm.weight": "3784536a7611a839a42a29a5cc538c74ee4f9793092e5efe1b227b48f8c4d37f",
|
||||
"blk.12.attn_output.weight": "46826c00b066829355db78293ab216e890f5eaaed3a70499ee68785189a6b0d9",
|
||||
"blk.12.attn_q.bias": "b14db2d327ce0deec97beda7d3965a56c43e1e63dc9181840fb176b114cf643a",
|
||||
"blk.12.attn_q.weight": "30f67df52ced06f76b6c85531657584276a454d6ec9bb7d0c7d2ca8f067f5551",
|
||||
"blk.12.attn_v.bias": "57ab4b7e43f4fc5853bca7bfbb2702f8c2c391a49252a760abbb7b26330dc4aa",
|
||||
"blk.12.attn_v.weight": "3ccd9da0cfe241cd33a63310f3ca6d81c5bc5a50d200bfea6612ac376166aca2",
|
||||
"blk.12.ffn_down.weight": "a095774413198a83c549ce132d7c9684c0baef33145eaa889be370ef9c881c81",
|
||||
"blk.12.ffn_gate.weight": "bb3b2bbdfb065d2a0a795909c53beec327781a4a7e974bf9f99c436cea459991",
|
||||
"blk.12.ffn_norm.weight": "3b486c6cd97eb4b17967d9d6c0cc3821a1a6ad73d96b4d8fbf980101b32b8dab",
|
||||
"blk.12.ffn_up.weight": "d020b82dd39a5d5a9d3881397bf53a567790a07f395284e6eb0f5fe0fef53de3",
|
||||
"blk.13.attn_k.bias": "69381f8254586eba3623eceb18697fe79f9b4d8f2c30136acb10d5926e3ba1d0",
|
||||
"blk.13.attn_k.weight": "c4d7a31495d71269f81b586203a50abea3a9e2985667faf258c9306ec6030f1d",
|
||||
"blk.13.attn_norm.weight": "907da11075d16eda668dabe548af3cfd794df26b8ab53939af1344d91bec6fba",
|
||||
"blk.13.attn_output.weight": "ca01cf6d2b8ece2fb3b0f56f1eb76194471ac27b54fe264f99c909f5eb7fef4a",
|
||||
"blk.13.attn_q.bias": "2f5ecebafe03b1d485b93c41cff756ca57fb65b02e9d8336f14a3d26ab5d159a",
|
||||
"blk.13.attn_q.weight": "f557f8acad7f0fa62da06b5da134182fe04a5bed8bdb269e316f970c9cc440fb",
|
||||
"blk.13.attn_v.bias": "a492a88ae131e95714b092545a8752eaea7c7d2f9cb77852628ca8296c415525",
|
||||
"blk.13.attn_v.weight": "d1220b1fe9f1cc0a5a88ee239d65fec900f5eaf6c448b6c2cbe74c81e15ed333",
|
||||
"blk.13.ffn_down.weight": "53184e33440b49848a896304eb16a983efbc6b8bee0b93de8c8de716e1585fcb",
|
||||
"blk.13.ffn_gate.weight": "684bf8896f148c851506c62717e45c426921b93c10d536ecdeb0fb28259a106d",
|
||||
"blk.13.ffn_norm.weight": "6cb4e547ad8665eb7c174855c08afe1e5490fece66122522c1e9e8132d9064eb",
|
||||
"blk.13.ffn_up.weight": "c64107897e38c06727075aba4ea7940b2cdd0e278b5c555dffb2790ef553bb57",
|
||||
"blk.14.attn_k.bias": "2814ca9b160b16ae39557c9b629482fbe3a7592d372c1e1bf1ac59a2d578fde1",
|
||||
"blk.14.attn_k.weight": "3377177396463afba667742972920ebb45dfdc37e9950e1f0e1d60a2f936b27d",
|
||||
"blk.14.attn_norm.weight": "5cae870477d51dd35a6d22aaeacfce4dff218ffba693820ede6a4e11f02afd6d",
|
||||
"blk.14.attn_output.weight": "3cfe9ccf3d48ae9e95b93a132a1c6240189a277d764f58590fb36fdbb714cad0",
|
||||
"blk.14.attn_q.bias": "6a75acc2f090b2e67bfc26f7fca080ae8bd7c7aa090ec252e694be66b8b8f038",
|
||||
"blk.14.attn_q.weight": "5ef45c86d7dda1df585aa1b827b89823adf679a6bb9c164bd0f97b2aa6eb96f1",
|
||||
"blk.14.attn_v.bias": "5534480443e10ed72c31a917f3d104b0f49df5e6dbfa58d0eb5e7318120e3aee",
|
||||
"blk.14.attn_v.weight": "58f45cf3240c4623626ec415c7d5441eaa8d2fb184f101aba973f222989422d1",
|
||||
"blk.14.ffn_down.weight": "2dc82a0f20c05b77512458738130d8d05ce150cc078680ae7ee6dd7ed68d955d",
|
||||
"blk.14.ffn_gate.weight": "d4a6c6f0fcccddfd1fcaa074846622f4a74cb22b9a654ab497abdc1d0dde9450",
|
||||
"blk.14.ffn_norm.weight": "777e444932a0212ff3feac98442444e17bd8a98cb758ea3356697d0846d12c56",
|
||||
"blk.14.ffn_up.weight": "6b75f6bd00195198447b69a417ed9d98f8ca28b3cb8be82f4bad908be0777d57",
|
||||
"blk.15.attn_k.bias": "2d07211a58e6c2f23aa3a6dc03c80a7d135dfb28726b60b0e0fdd0f35ea5c37b",
|
||||
"blk.15.attn_k.weight": "e77f3c0075a1810e70df956cc51fd08612f576cc09b6de8708dcae5daedb0739",
|
||||
"blk.15.attn_norm.weight": "379a10d90609a5d5ba67d633803eda1424fc61ba5cca8d3bffe70c8b18b58ebf",
|
||||
"blk.15.attn_output.weight": "402751c12ee9dbc9db5e3bf66a7b23ebe7d36c0500e0be67be4c8b1c4357fa62",
|
||||
"blk.15.attn_q.bias": "acb37fc409ee725ceedf7a3a41b40106086abc47b76780728f781942c5120208",
|
||||
"blk.15.attn_q.weight": "89cd3047a09b46ed2bb57c69dd687f67a1f0235149b30376fa31b525898e4a55",
|
||||
"blk.15.attn_v.bias": "f081a37289cbe811978feb4da3ef543bdeb7355414d476f44e09b498da10cb2c",
|
||||
"blk.15.attn_v.weight": "8404f242a11e6d512c9ead9b2f083cda031e9b269f8a0a83f57ee4c56934764e",
|
||||
"blk.15.ffn_down.weight": "93438f43ee8cc4f1a7fd3840a6afdd5f02123e76db4f0d9474430c0100d148fc",
|
||||
"blk.15.ffn_gate.weight": "ff935a2698843e87fad9dbf7125f53e460190ec71ee128b650b3fc027fe37bfc",
|
||||
"blk.15.ffn_norm.weight": "4be80f199841cba831982e988451e1833c3c938a4d6ca1169319087bf0bd723e",
|
||||
"blk.15.ffn_up.weight": "ee9ba63c66d71053e33551ddd519878bb30b88eeb03cfe047119c5c4000fb0a6",
|
||||
"blk.16.attn_k.bias": "3f5fbabed4510c620b99d9d542739295fa6a262a7157f3a00a4889253f8341b8",
|
||||
"blk.16.attn_k.weight": "8ca6eb139b281c257324cddea97a8e9aa7c048b53075cf00153123b967c27ee5",
|
||||
"blk.16.attn_norm.weight": "290157f005e5aa7dddf4bd60100e7ee7b0baa7f11ec5c2cea5e0ead2aad3a4c6",
|
||||
"blk.16.attn_output.weight": "b1f4d80a7447f08f1c331712527f750d00147f35c042442ade96fd029dadc5a1",
|
||||
"blk.16.attn_q.bias": "e3e4e442ad4416791b468cad8de0d0d2d68c7e7df8d06002f4d49b4da9cb25e4",
|
||||
"blk.16.attn_q.weight": "cc7392fa5bb1107d3816e7e7363de252d37efd4165d065e258806291ce0a147b",
|
||||
"blk.16.attn_v.bias": "a7629830f2f6293e018916849614636d40b1bcd11245f75dbc34d38abae8f324",
|
||||
"blk.16.attn_v.weight": "b6c7856c7d594437630929c8cf3b31d476e817875daf1095334ec08e40c5e355",
|
||||
"blk.16.ffn_down.weight": "f9c0a777a00170990a4982d5a06717511bf9b0dd08aeaab64d9040d59bcbebba",
|
||||
"blk.16.ffn_gate.weight": "ed88f11bc3176c9f22004e3559ccb9830a278b75edd05e11971d51c014bd5cd2",
|
||||
"blk.16.ffn_norm.weight": "ab24abdcc4957895e434c6bb3a5237a71ff5044efb9f76c1a9e76e280c128410",
|
||||
"blk.16.ffn_up.weight": "99f594dc8db37f554efa606e71d215fbc3907aa464a54038d6e40e9229a547ff",
|
||||
"blk.17.attn_k.bias": "f236625676f9b2faa6781c7184d12d84c089c130d2a9350a6cf70210990f6bf1",
|
||||
"blk.17.attn_k.weight": "c2a4f20cd3e98538308a13afe9cc5880bdd90d543449c6072dedd694b511ee1a",
|
||||
"blk.17.attn_norm.weight": "5a9da4ee168311f487a79fc9d065a035432c6cafa8adb963a84954cf32f57a2a",
|
||||
"blk.17.attn_output.weight": "d5df7031e354186ce65dc09d6f8a92eb721c0319816f8596b0c8a5d148ed0a2a",
|
||||
"blk.17.attn_q.bias": "3212d5eeaa7ed7fac93cc99e16544de93c01bb681ae9391256ed4a8671fc6b00",
|
||||
"blk.17.attn_q.weight": "d18cd9aa7ee10c551cb705549fa1ae974aea233f86471c9a19022dc29b63d0d5",
|
||||
"blk.17.attn_v.bias": "a74ad11a1f8357742f80e2a0c0b3a2578fc8bbaf14c8223000767e07a5d79703",
|
||||
"blk.17.attn_v.weight": "da18ac0e90884436a1cb0ad6a067f97a37f321b03c70b8b03bf481339fef5c80",
|
||||
"blk.17.ffn_down.weight": "81a8a5d7a194fb53d976558e0347efbe9fdb1effffde9634c70162e1a20eff51",
|
||||
"blk.17.ffn_gate.weight": "72870d83ab62f2dcd45f593924e291a45e4ae1b87f804b5b88aa34cfd76dd15e",
|
||||
"blk.17.ffn_norm.weight": "cae39ac69b9bdaeefab7533796fdf11dbb7a4bdbdeed601e20f209503aafe008",
|
||||
"blk.17.ffn_up.weight": "e7cb40b0842468507cec0e502bbed8a86428b51d439e3466bc12f44b2754e28f",
|
||||
"blk.18.attn_k.bias": "8bfc02b94f9587aa125e2d8bbc2b15f0a5eb8f378d8b3e64a8150ae0a8ca3df2",
|
||||
"blk.18.attn_k.weight": "434bc3b3332ea48afee890aa689eb458a75c50bc783492b0cbf64d42db40e8ad",
|
||||
"blk.18.attn_norm.weight": "d6ffc09396c42a70d1f0e97d81113eee704d3bfc9eeae2bed022075a5dd08075",
|
||||
"blk.18.attn_output.weight": "133f001f81f3b082468a7de67cb2e7a76508fce34bcc4dee7f0858e06eee082c",
|
||||
"blk.18.attn_q.bias": "758d0e28bf5e660b3090aafb70e2a3191b4f3bb218d65e9139a086ceacaf599f",
|
||||
"blk.18.attn_q.weight": "12d7b86fc1b09b9fa7f8b7ed43d8a410892cec8672d0c752f8346f6193343696",
|
||||
"blk.18.attn_v.bias": "9efd15bab0519462431d6c6e8a5b7dd4e151dc449468097ee0ddca369c0ecc2e",
|
||||
"blk.18.attn_v.weight": "f631231a79d4a2e9730fb2e386d8c18621eb3fb7900fbfdff5e6d52cc42db122",
|
||||
"blk.18.ffn_down.weight": "874a2dddf456f3ab56b958b0860d71c8c680a6f89322c9bf6b2f32a113592300",
|
||||
"blk.18.ffn_gate.weight": "4549ef8976c345a511df4a7133bdaf6fe387335f52dfd8a4605a8ae3f728c403",
|
||||
"blk.18.ffn_norm.weight": "80c258a2536a860e19bfcbd9f29afa13214fbb4c34bde0d4da51287d354e9a59",
|
||||
"blk.18.ffn_up.weight": "8b03308a581457a3c038b7a086f3cdf14941d7ad4107c4bd6d9d6b062fd00d73",
|
||||
"blk.19.attn_k.bias": "e77f7b0c8e3e0a9b0d61918cd88371047752a1b02b1576936f4ec807d4d870ee",
|
||||
"blk.19.attn_k.weight": "a2a318e93355230c0d0f95c441b080bf9c4914507255f363fb67a5e771d4d1e6",
|
||||
"blk.19.attn_norm.weight": "9a4bdeb3970be21ac74a94c2c81eb36986533db81b78db6edec48d9802910d59",
|
||||
"blk.19.attn_output.weight": "2369b103dd3947e2cef02b2669b405af5957fb3a7f9d0ff40646078c4b4317ad",
|
||||
"blk.19.attn_q.bias": "e20bf427bef69059ae84a5d9f98f7d688489627f198fb6153def018ff9fd2e34",
|
||||
"blk.19.attn_q.weight": "45a3bb3bdfd2f29dd76e5f78ddae73678b9a2a85dfaf609e460240ef5b7be2ad",
|
||||
"blk.19.attn_v.bias": "a441f58a3e02ed86ee1819eefc9bd4e8b70d11b864a929d58a2c2ac0aeb8203d",
|
||||
"blk.19.attn_v.weight": "30b0b04480c510450a7abb2ce9fa05c65b150a3cc4dc76f8916bf8d013f1b6be",
|
||||
"blk.19.ffn_down.weight": "eebb9ab8fdb6a6efcfff8cf383adac9ec2d64aeeff703d16ed60d3621f86c395",
|
||||
"blk.19.ffn_gate.weight": "3fef1493029298378886586478410b3d2e4e879f6aa83c07e210a7ce6481817f",
|
||||
"blk.19.ffn_norm.weight": "e1be99ea1e8fb9678f7b8ba200f3f37e03878f3574d65d57bcd3a9fd796e2112",
|
||||
"blk.19.ffn_up.weight": "f07cf25e09394fb69fe3ef324bdc0df9a4cecf3dc53070b8acc39e6d1689bf82",
|
||||
"blk.2.attn_k.bias": "b29baa8221f125eff6b8ac1a950fa1d7cfc1bce7bdc636bf3df7d4065ab6466c",
|
||||
"blk.2.attn_k.weight": "4bd0c179bced8bc37a09f5748c394e0cf50273942fb38a866e5cf50b6c96c437",
|
||||
"blk.2.attn_norm.weight": "07b3edc6a6325c3428aa12f29bcae0be0de363ce61a6af487bc5c93fb8c468d9",
|
||||
"blk.2.attn_output.weight": "056b5b31dbc81087c81b9d41c25960aa66c7190004c842ba343979644d7f4d88",
|
||||
"blk.2.attn_q.bias": "479b6212401e097767c9d52b12a1adb8961c0fce9fcaaab81f202a9d85744376",
|
||||
"blk.2.attn_q.weight": "f89196076f446a6dd8a9eee017f303504f9c03094c326449cee5a7fc0a97fade",
|
||||
"blk.2.attn_v.bias": "ef9b1b986dbd9d7291027a88b67dc31434435b20e76e4f1e9d6273ebd31224f0",
|
||||
"blk.2.attn_v.weight": "9322f4f00e85f8c0936845c51ca64b202a93df104f36886986a8452a8e4967a5",
|
||||
"blk.2.ffn_down.weight": "7beac0d2440dc49af33ededb85a6cc3ba23ab33ad3ffa5760714b2ef84d94f6e",
|
||||
"blk.2.ffn_gate.weight": "818a93864a5890c1f4dc66429004fad07645a50142350e9bff9a68fe24608a52",
|
||||
"blk.2.ffn_norm.weight": "152c924d5514942ad274aafb8cc91b35c1db3627c3d973d92f60ff75f3daf9ba",
|
||||
"blk.2.ffn_up.weight": "9c9579e600f209546db6015c9acfeda4f51b6d3cca6e8db4d20a04285fe61a37",
|
||||
"blk.20.attn_k.bias": "fd22bfeffb63d818ce2ff1ea2ace0db5d940f7a9489b6bfc1ec4a5398848d7fe",
|
||||
"blk.20.attn_k.weight": "f74439bc74c2f9252130c9c28384fd7352368b58bb7ce3f2444cf0288dfff861",
|
||||
"blk.20.attn_norm.weight": "5c15d2613df87be6495fb7546b7dcedd2801d12fa5ecc02c877df889330e8f37",
|
||||
"blk.20.attn_output.weight": "6731a39286a67f6859832f96695732e579e14e0c36956eccd1edce3db11595b8",
|
||||
"blk.20.attn_q.bias": "04466e5a3f454a19b9b433fc2585396feac780027ece7ccb4e4bb3e406fc14d8",
|
||||
"blk.20.attn_q.weight": "ead4c71daaeb17bf20d014a34c88b97f238456488e815ae0f281a5daf6fc99b8",
|
||||
"blk.20.attn_v.bias": "adcc848e043025de9bd55ccb14dd8fb6343e8b5185ed07e12964be41d0faf99f",
|
||||
"blk.20.attn_v.weight": "81bfc23f83526386a4761c2c16b6a93cd0bbf9d846c1a51b82c71f1474a465f1",
|
||||
"blk.20.ffn_down.weight": "9bf660af3bafad919d03173c89a65fc9c89440a76c42c9e55e4d171076f3c17f",
|
||||
"blk.20.ffn_gate.weight": "c04b4f3ccce44917ee228b998e2c19dd702aef10a43413afb152e808b5ac5c42",
|
||||
"blk.20.ffn_norm.weight": "3d5b555d7746a71220143c6b8fff5ce4eb63283d9d9c772f1233d848f69f4ff4",
|
||||
"blk.20.ffn_up.weight": "d7a196505c39e5469dfc7c6958bdbb54e93629ac1a047a6663ed96b318753094",
|
||||
"blk.21.attn_k.bias": "4db1f48e5c6a3bc5720a5da813bbef08283e6269e12d83f8a9c54e52715d8011",
|
||||
"blk.21.attn_k.weight": "c687b2f0e132a5e220a2a059b61aa2a537f37d8a674d7709f87880637b263b31",
|
||||
"blk.21.attn_norm.weight": "ec23b0ff847a4b45585ab8e04f10fc20bb1637c5f1fbcdc4d73f336bcb5d1bd0",
|
||||
"blk.21.attn_output.weight": "01255390576316c1731ef201e32c6e934eba356c28438cd06d9027ac6a3ff84f",
|
||||
"blk.21.attn_q.bias": "3098f37205a15418e1681e407c82b7ce7c6fda6c6826b0590a13e1b68a38a1ea",
|
||||
"blk.21.attn_q.weight": "30ea62cbb702a5359229dc96819df17ee535e2e9988d044b005c73ea536e1005",
|
||||
"blk.21.attn_v.bias": "7bbedb2c22a04737f21993115701d4a06b985b7ca3b64681f53cd1be8d7ea39e",
|
||||
"blk.21.attn_v.weight": "e11905e63579e36fbee978062af7599339ae29633765a4835628d79a795ec8df",
|
||||
"blk.21.ffn_down.weight": "84def2ffd8aca766f9ce12ed9ac76919ab81eb34bdeae44fa4224417c38af527",
|
||||
"blk.21.ffn_gate.weight": "4e99f05377b4a0b8d875045530a5c59dee6a46ac8a45597f6579f6fdfa800787",
|
||||
"blk.21.ffn_norm.weight": "af48f13d03fba38ff8794a5f5005e666e501f971ca2e30bbded2777a8096f37d",
|
||||
"blk.21.ffn_up.weight": "a29541c39a6acbc364be86994632a5bf55d701027cb7f23320f8c6d55ee42c91",
|
||||
"blk.22.attn_k.bias": "c97f84db6c75422df6ef5768676d4e9abefaa3b8337aa2730ff260f8fc350480",
|
||||
"blk.22.attn_k.weight": "af9a0c56f68779513e95be11611b7be6175ddae27d48bee9dd72fdbf05f6cbfa",
|
||||
"blk.22.attn_norm.weight": "1c7518eb5bcff4a202c6f4a2827f14abd76f9bcc64ce75fe9db60b69437a5c9c",
|
||||
"blk.22.attn_output.weight": "1abcf1f3caa2f59dd018646b93f9cf8fd30d49e98a473e6a8704419a751be46f",
|
||||
"blk.22.attn_q.bias": "7221e01cb692faf2f7f8c2eb6e2fac38a1b751a9c9fdb6a21a0a936eb0bf4b96",
|
||||
"blk.22.attn_q.weight": "faaf8fb7b6c19f343d47f3ea6b57151fb46c787e0b3bd2c292fd327d3d4d8e35",
|
||||
"blk.22.attn_v.bias": "3ec05942e82d735de99dfd0d8228d8425e63e2fc584da98b3326bdef89ecb2e5",
|
||||
"blk.22.attn_v.weight": "42e7b0ad06db76227837da9d4e74b2db97f3df4050ecb3a87cb9b55e08dfcb42",
|
||||
"blk.22.ffn_down.weight": "87ef98ad2d0e824b0fa5ad8aa18787162922e527c9b1b721a99bc07d3bf97c82",
|
||||
"blk.22.ffn_gate.weight": "562d6e5a1654b03aaa0e33864d23c10297fd4bcaa72d30fac69fb771ee1df9d6",
|
||||
"blk.22.ffn_norm.weight": "f8a405dee467749d59427ce05cdd4b9c11bb18934a89258ea461f013b7d251f5",
|
||||
"blk.22.ffn_up.weight": "90e1f4ae4062649d4d838399eb353e8bb8d56a49982b6a7f64aa3945377f7187",
|
||||
"blk.23.attn_k.bias": "9ad22178a85f3be7e25f5aff462f31627466364f2f5e92f265cc91db0da9a8a8",
|
||||
"blk.23.attn_k.weight": "d813beffb10f03278f5b58eea0f9d73cdcb7b5b4045ae025c379592e854f7dfd",
|
||||
"blk.23.attn_norm.weight": "f583c9836044bdb056d6f8911088ac28add68e500043ae1f97b5d9158fe3d769",
|
||||
"blk.23.attn_output.weight": "02789911ac3b97f6b761e958b7dd6dc7da61a46a1be92bd0b346039ca7ecd2b2",
|
||||
"blk.23.attn_q.bias": "38c4970fb9b4f7e4a139258a45639d848653814b4bc89ea9849709b13f16414b",
|
||||
"blk.23.attn_q.weight": "eb694be9a5ab5858b8dab064ee4cce247dc757424e65282989bd4d015b8580ce",
|
||||
"blk.23.attn_v.bias": "0a25f6533aa7e7a152a4b198cf6c411c2408a34afa4f161bb4d5ffba2f74e33f",
|
||||
"blk.23.attn_v.weight": "187e1bac6b70f74e6364de226565aa8275ee2854d09cbe5895451a689596049e",
|
||||
"blk.23.ffn_down.weight": "88880dd9ba7ee80ade972927f810b5d2c30a69520c615190b27f9daabc0a8c5a",
|
||||
"blk.23.ffn_gate.weight": "5abec63197935ab3eb8e6de0a5307396ec46cdb1cc5de25d87c845f3c4a3e887",
|
||||
"blk.23.ffn_norm.weight": "60e1f5e6310c3a531c554a6bb7cd883aed58db1e51853f739436ea461c1843d7",
|
||||
"blk.23.ffn_up.weight": "3d7f502771743f4a634188dfcd8b8a384fb07467ca8528366aee59ddb25b7bce",
|
||||
"blk.3.attn_k.bias": "0b6b442ebbac29c8c4b67e8e3876d0382dd2dc52efdf4ab0ebbc6f71b6252393",
|
||||
"blk.3.attn_k.weight": "480f40584fbda692c26f2cee45f5923780b236f8b4e8ec7bbee0237777a0918d",
|
||||
"blk.3.attn_norm.weight": "39872be2af31bc9cd6b583ebba6fb759f621d586d66e5a2fc0b85991615a8923",
|
||||
"blk.3.attn_output.weight": "924b2c80d8513bf637f8ebb3756a340d9cf2243de723fd08d7f5dccd46b3f8b6",
|
||||
"blk.3.attn_q.bias": "863c9d848156847a3fe9bbc44415a4395245b5d13e95673c014fdb71e494ab0a",
|
||||
"blk.3.attn_q.weight": "bff73ee5de92fba8f6c089bbb19ce57e17ab3c9c29295712804bb752711b882e",
|
||||
"blk.3.attn_v.bias": "e1b6fea126e86189112fcdfee79ffc66a087461527bc9c2dc52dc80f3b7de95e",
|
||||
"blk.3.attn_v.weight": "7812b7f5133636f06cdbb4dcc48ef7803206538641b6c960777b37f60a8e6752",
|
||||
"blk.3.ffn_down.weight": "00b393d6a7e3ad9b5224211ccdbc54a96aae151f24ed631764ac224972a6bc82",
|
||||
"blk.3.ffn_gate.weight": "cfd63fa3a038af05dc53c6eeb3c192f1602f26ff24cb840bcf1510fcb37b5513",
|
||||
"blk.3.ffn_norm.weight": "7389fc240a282949580ea2f5b0d7973ac79f32f76dc0155b537bb6b751f8e27a",
|
||||
"blk.3.ffn_up.weight": "2a945f47090df9cb16f92f1f06c520f156f8e232182eaaed09f257b8947a2a62",
|
||||
"blk.4.attn_k.bias": "62533c31f0de498187593f238c6597503fef2a92e920cd540a96bc5311b3b2a0",
|
||||
"blk.4.attn_k.weight": "93e829868bffd980a8e589b9c4566cd81e6ce4296a5f357a2ae93febe1284156",
|
||||
"blk.4.attn_norm.weight": "9e0aaa4bbdd1389890f8abec20533f3ab16d61b872b1a8dbd623023921c660a9",
|
||||
"blk.4.attn_output.weight": "74467d6f44357d67f452ac49da861468b38e98057017bd38bc9a449f9d3538e6",
|
||||
"blk.4.attn_q.bias": "8e6d9026fd69b314c1773c5946be2e11daf806ef22a5d91d744344fd30c58c59",
|
||||
"blk.4.attn_q.weight": "e5bfbafd94a4d530f3769f5edbba8cc08d9b5bee8f66ebf4cb54e69bc0b7f63b",
|
||||
"blk.4.attn_v.bias": "20c570f92022d9905eb85c0e41d1fdb30db22007a9628b51f512f8268d6c34a2",
|
||||
"blk.4.attn_v.weight": "9638d459d61da03c9dd34dad985e03c43b4f8a5bc9701a82153478329b0517e0",
|
||||
"blk.4.ffn_down.weight": "9d91b06e89d52f4365dece7eaeec50f81e52cb2407b333248a81e6e2f84c05b8",
|
||||
"blk.4.ffn_gate.weight": "bf6350a79c6a6ee9146edfd788b88d4a4c2b54db1aa0adcc1464dbba8a84b646",
|
||||
"blk.4.ffn_norm.weight": "11a70a6b9f7ce336292f4e3a2c6c92d366d4ee4306ad4fdb1870fde107e9cc31",
|
||||
"blk.4.ffn_up.weight": "64f23f493d02b147a72a59605e6b7dd1c4c74f6813a38a2a60818bd66f697347",
|
||||
"blk.5.attn_k.bias": "f6c2c279c0ed686f298ad1e5514b5cd882199341f896abbb2c2129d4c64ce9c5",
|
||||
"blk.5.attn_k.weight": "0e682f75870abf9efaca10dac5f04c580f42820ecf4e234d43af967019acb86f",
|
||||
"blk.5.attn_norm.weight": "01efae7653705e741932fcd79dff3be643d7e97f4b5719b887835dffe44b3a82",
|
||||
"blk.5.attn_output.weight": "69e841d00d196acc489cd70bc5ffbbb63530ac5fabb169d40c4fb3a32ebb8ed8",
|
||||
"blk.5.attn_q.bias": "f3304d76ccd44fed887565857c8e513b1211d89a5d3e81782de507ab3f6fc045",
|
||||
"blk.5.attn_q.weight": "98612a6b7920a247853ada95c240807d4ca8e43604279e7a2fc9bb265ae40469",
|
||||
"blk.5.attn_v.bias": "39940a9b353ceed3edfd4a39b985c9520490aa1b9f11749c94fdf6d879d1a259",
|
||||
"blk.5.attn_v.weight": "839f84b828cf83aecf479a0dc7bc86cce05145ef77dcf29916dc3e0680f5b665",
|
||||
"blk.5.ffn_down.weight": "1f48cbb0960f15e06ab8a3754ade792995a655856389ddbca629c07e89d1b114",
|
||||
"blk.5.ffn_gate.weight": "33d8219fce3189e1aab376039896eebd4ad36ebd26a8278cd19b26e4357e4f81",
|
||||
"blk.5.ffn_norm.weight": "0f4a0f83d37127fa4483f2905cb4f38ef6ddc71584b6cb05632c62a9af313dda",
|
||||
"blk.5.ffn_up.weight": "22a64a11e5f0a1ff45ca327bf9e1efa258f085ff6a96edc398b7474f725b4514",
|
||||
"blk.6.attn_k.bias": "baa91df99d4df2d25e8d590bca4e334b97f2d9aa3df8e748fedc8a6188499111",
|
||||
"blk.6.attn_k.weight": "121f3b9f4b9491996499392e2688a929cafe102a67920b4cb2a039349c43d8eb",
|
||||
"blk.6.attn_norm.weight": "b4cf987e923d71f2f84c58d20ea8af7576b225bf61952145b489fdd395e3d411",
|
||||
"blk.6.attn_output.weight": "a112642150a138d54b2a4038042fd33619035a35694771e966f3575856c635d6",
|
||||
"blk.6.attn_q.bias": "a97ea10469cdfa3fdddf8bad6de683ef99f6170eb8d29d15dcf6bf4bce37c5a3",
|
||||
"blk.6.attn_q.weight": "d80c787019317a87361de6bbc7df6701357216bdd9b404522cede34a719a5500",
|
||||
"blk.6.attn_v.bias": "d846269db9cd77ae28da26ba0914cace1b6754bd5301af9c44607085dfcbd2d7",
|
||||
"blk.6.attn_v.weight": "06567c433e8a391647633291b50828a076ad7c2436106bb9278c60a3f8fccb3b",
|
||||
"blk.6.ffn_down.weight": "f15f66f56b3c474eac8c6315c5fff07c3e29c6e483d7efd4d303c7f43814be91",
|
||||
"blk.6.ffn_gate.weight": "47768f89c6da8eefb29adb766ff4eb38c9dfd79320bbc1386248319fcbcf567f",
|
||||
"blk.6.ffn_norm.weight": "7f8195e6b148212967145fc9d86ce36b699cff0de026042245c2d344f1ef8510",
|
||||
"blk.6.ffn_up.weight": "53d7707ae4347aadb445289f9f87a008b72df5cb855b00080a605442fdd8edf3",
|
||||
"blk.7.attn_k.bias": "63e274df3217dde25b8369a383e480fe4f6b403a74385f15ac0b5db71dce2744",
|
||||
"blk.7.attn_k.weight": "f6fce88602f5945eee09767acbcad387d132614e6da39ae359f2bbf380d94b1f",
|
||||
"blk.7.attn_norm.weight": "bbf5dc7336c0f9a511afef6bf5efeffd78f1b83940850c3eb7eb20c621b75656",
|
||||
"blk.7.attn_output.weight": "d9fb907a138396a859cecbfcb377927308dc93c24c7fb52dba5eb59265feadec",
|
||||
"blk.7.attn_q.bias": "f02ba1318346af77e309f40aee716e2de7ee8cab67e67b17636db9bf40894fb0",
|
||||
"blk.7.attn_q.weight": "54a691e824be287a61c35c172edc01922ed792d2addeee029afc17ba6c7e11b9",
|
||||
"blk.7.attn_v.bias": "3a4f182f51e84ce862d558fb2751b91802b65d74596bb14d624808513a8a83ec",
|
||||
"blk.7.attn_v.weight": "a142fe6e106d3ab484e2dc6f9c72b8fc0a385279dde08deb1ad1fd05ac25deb1",
|
||||
"blk.7.ffn_down.weight": "8daf7e8c430d183a4d6ab3eb575fafa4b5e31689f68b290c8b370411ad9d0f12",
|
||||
"blk.7.ffn_gate.weight": "a2a786b45eb660994254b48e2aaf22f3e9821cfb383dee0ba04cc4350a2f8e72",
|
||||
"blk.7.ffn_norm.weight": "73828bbc8c9610cc139fcf03e96272648cdc291263251fe3a67367408deb69e1",
|
||||
"blk.7.ffn_up.weight": "e85dd0f63fed449ce16893c5795ea6a050a2d7a66d9534410a227e22c905dafa",
|
||||
"blk.8.attn_k.bias": "91a752a6e2c364e5ee6a015770fe289aece4911ae6c6bbfe74ac52f465465f93",
|
||||
"blk.8.attn_k.weight": "99c069e92c43a2efb74e23188256b3cabbbe06399878e681ce203a05d5da378a",
|
||||
"blk.8.attn_norm.weight": "c76d36d3cc06aa2a9edb1abf9f602bb7ed61ac9d61f8ef7ed736a1e619abe717",
|
||||
"blk.8.attn_output.weight": "ee5ff156a2625e1f203f65e69b514f9df04bd9a5e82b28e3876e16cf1c6f65c5",
|
||||
"blk.8.attn_q.bias": "8fbd868a93b330c8b0418b488c5301f42a7eb0c58445a4e515d56777f1d96ed5",
|
||||
"blk.8.attn_q.weight": "9f20ef86e80098ba52a3a31ebcc315bea3a614dac9cba7ac1db02f156db9b577",
|
||||
"blk.8.attn_v.bias": "c4813571d5d618742183a7890c0b89cd7f18e210c758f63aad564659bc38a26d",
|
||||
"blk.8.attn_v.weight": "ea88e1a4cf8bd56e9a88ada427d2b0cd352234827640757ee2a9ed594fb67a53",
|
||||
"blk.8.ffn_down.weight": "b0d1a7495811580b189aaa3e20ea871d6d01ed7b6c23e59825078ef786944ff2",
|
||||
"blk.8.ffn_gate.weight": "0a17c0caa0b06721c49b59b2a63a5dcbf744dd1cffa55962b404ba910c658a62",
|
||||
"blk.8.ffn_norm.weight": "f15f109d4a8e9d1ff7c71fa5bc6373df7ee80c5f7d1de3fa0d4849d747e36bcb",
|
||||
"blk.8.ffn_up.weight": "bbf4c5c4c5c8a0f9ae8b88e3cc8b86f81b98148722d5a350995af176c0b774f2",
|
||||
"blk.9.attn_k.bias": "a7f60d962686b8ca60f69643e0e0fa8614688be738fb0b1c6bd54de35c2beb5e",
|
||||
"blk.9.attn_k.weight": "dd80ce4adb00e338fc04b307e4c18a27071f4ba4397184a24d765e6e4a268ef4",
|
||||
"blk.9.attn_norm.weight": "721e6487547e2b3986ab4b4e2500ceade59d908bccf4436e1e8031f246deb2bd",
|
||||
"blk.9.attn_output.weight": "5a800af39107b363861e5f5173483cdcd644d8ac3b0c8a443b9c759d71285db8",
|
||||
"blk.9.attn_q.bias": "0a19b4925ea8ca8067acc909b058adc327de3874cfc94cc9eb4a106d3f370123",
|
||||
"blk.9.attn_q.weight": "93e84906684c0c7ede79967236d9fc8344da84a9f1daa04e8295c2c9b6b26a24",
|
||||
"blk.9.attn_v.bias": "615421f812f821e230ecde4e6da35d868823248355ce7e4e51e2d650ead565f9",
|
||||
"blk.9.attn_v.weight": "7f4913e289aefd9ceecbdaf9767b1e95303f5d59dd67ecb2cc15768477f4d08e",
|
||||
"blk.9.ffn_down.weight": "95d1b3933221e87dc4af70dd566daec9498bf358070b8d26f1fc70766a84a152",
|
||||
"blk.9.ffn_gate.weight": "530f2d04f6a1fbffaaa5f2fbc3a328ebed7b330e3af14b4fc7d8a51b13ad8d42",
|
||||
"blk.9.ffn_norm.weight": "28077de416217ea1df94b96017bef4cc562ab62e51b1a03a671c70abc29ce52a",
|
||||
"blk.9.ffn_up.weight": "b87b6190778aaee4695938e24ac6c90dbbee6dce7c5c2ab5bc26ba4564581822"
|
||||
}
|
||||
124
convert/testdata/all-MiniLM-L6-v2.json
vendored
Normal file
124
convert/testdata/all-MiniLM-L6-v2.json
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
{
|
||||
"general.architecture": "bert",
|
||||
"general.file_type": "1",
|
||||
"general.quantization_version": "2",
|
||||
"bert.attention.causal": "false",
|
||||
"bert.attention.head_count": "12",
|
||||
"bert.attention.layer_norm_epsilon": "1e-12",
|
||||
"bert.block_count": "6",
|
||||
"bert.context_length": "512",
|
||||
"bert.embedding_length": "384",
|
||||
"bert.feed_forward_length": "1536",
|
||||
"bert.pooling_type": "1",
|
||||
"tokenizer.ggml.model": "bert",
|
||||
"tokenizer.ggml.padding_token_id": "0",
|
||||
"tokenizer.ggml.unknown_token_id": "100",
|
||||
"tokenizer.ggml.cls_token_id": "101",
|
||||
"tokenizer.ggml.seperator_token_id": "102",
|
||||
"tokenizer.ggml.mask_token_id": "103",
|
||||
"tokenizer.ggml.token_type_count": "2",
|
||||
"tokenizer.ggml.scores": "6db964fe67338aca57790481a390121ff3dd643eebe49f7dd308029ad99abb6f",
|
||||
"tokenizer.ggml.token_type": "98d247c5404b6b18f05f133b92dd56edf6efefefac326794b00d7b351f6c5aa1",
|
||||
"tokenizer.ggml.tokens": "9efe405e229a45ff9916f54c475d151d2200cd2ab0006f347abfb069cf096c86",
|
||||
"token_embd.weight": "8c1ee80a9ea4f65aa385ba30112010068af3d209bebc6e149d3d4589c2cd0a5a",
|
||||
"position_embd.weight": "6c516f0b1c4e2388ab90394dd80ad69e4e4509b890982fc3408108ae66210eb6",
|
||||
"token_types.weight": "f879f8e422ed211948f28b560d3c5e17aae7993f063b51196a28cf5c0fb3da21",
|
||||
"token_embd_norm.weight": "75076e095d717aab96f8b6beeee503c27940d9a76f2b891a0e3de72f8a6043e4",
|
||||
"token_embd_norm.bias": "298735285ffe944e1bf03e5d35c7280326b85cf121bde9874f1af5dc51ab939d",
|
||||
"blk.0.attn_q.weight": "ab0923ce4c1549175112dcdfcc860fe30137f991e03ea6857fb5993670adaf6c",
|
||||
"blk.0.attn_q.bias": "a3ec29551dabf976e1d34256b8ab5ab7b758f3ed9742c3cafdbd984d5441df62",
|
||||
"blk.0.attn_k.weight": "4c1038a6d035c3e9ffed7fa672b614627814752503755fbad0cfb76a41ad71ba",
|
||||
"blk.0.attn_k.bias": "e0363930eb588d91816aa3d230bb03b6e2551c165117b80b8d60397413819ef9",
|
||||
"blk.0.attn_v.weight": "425e2e53e3f00ce98d29c3e6a161eb55d3e6ae0d96fdb9f6242d1c4fd6eef4b3",
|
||||
"blk.0.attn_v.bias": "6579173a1e65ee124fbd0bd53cbdca4225515b4f2c5f18fb1bfd000f5978f9bb",
|
||||
"blk.0.attn_output.weight": "a6d70a08cd7164de5d12af65d86d657c3db35aaecde778b2b3fda9193c4c9802",
|
||||
"blk.0.attn_output.bias": "2b8d12c4f9a9c5bfaa29c597839568f6e0525cb41eeaf64ddeb6bd84dfeb9701",
|
||||
"blk.0.attn_output_norm.weight": "bbe6e502a473228b525aeed26cc31b7db123ad63bdc5a6eebac6ea70b8b51d62",
|
||||
"blk.0.attn_output_norm.bias": "36eaacaf0007c5c62daea97aab0115390c0682914f78482e37eb76885f4b7a50",
|
||||
"blk.0.ffn_up.weight": "24654561c76ce387d125759ba843f06b904ef721fcceaeff6ccc62180a48e874",
|
||||
"blk.0.ffn_up.bias": "fd3f0126aa1d95768fa60eb6f4ab8a2763cfcb7e5405f35b92353031d86f4d34",
|
||||
"blk.0.ffn_down.weight": "97a829763a6a5bf3329ceb4d39c424ba4787d61653a5b0bbd1f84782e4d4e0ca",
|
||||
"blk.0.ffn_down.bias": "7aa980c30ae8b4ee7f69df28808dbf5c431f56ccc4a80340f644a0419f16c054",
|
||||
"blk.0.layer_output_norm.weight": "ef30dad4c2a083ae1ff5039a2a6cda60ecc89bf1e486a6f8c0d15f50589603f8",
|
||||
"blk.0.layer_output_norm.bias": "8b1b77e67568b1bce43fc476de1b177c53ff688d66beb66995e8eb3dc290da8a",
|
||||
"blk.1.attn_q.weight": "284331622a1f6f9b87ccee4f652bd66a394ca493c4d93be4d1844e4f6159ad10",
|
||||
"blk.1.attn_q.bias": "e24ebd4860330e08f6bfdd077a82db0bee33f4c8846cf1db26327a34754c7069",
|
||||
"blk.1.attn_k.weight": "729dd0d555544b5bd0f7580b3c8b384256b974605f0e7487b95f295aa032997d",
|
||||
"blk.1.attn_k.bias": "2aa51a828a858f35473f54477583fea54ce2ccc34ea60fbd1d228fbe9bca827f",
|
||||
"blk.1.attn_v.weight": "6be304671cc311d5ca5c103f2b51467ee800c589bc5b8101e09ff5aed1f68c21",
|
||||
"blk.1.attn_v.bias": "43bcbab78a8819e07f723bc9e5b737b71e87a7594f15234e882b63e327a64199",
|
||||
"blk.1.attn_output.weight": "15ec8a1a12b26c9976445308a09f748ab0e4bef0f583d13ab08c3129f8738d73",
|
||||
"blk.1.attn_output.bias": "dac2146f4baa6ed16f6c0dc7443831fb7ec79bedcceafd80d1a4b628a1bb072d",
|
||||
"blk.1.attn_output_norm.weight": "d2151eb33bffac536787a4c9a5d2b31c7a80b17c4611877842a3cce2cd6e98d8",
|
||||
"blk.1.attn_output_norm.bias": "31e1b779716dafb855d2cf5631ee168a0ccf372eb9c6ea6091f66fa97a9b9d2d",
|
||||
"blk.1.ffn_up.weight": "a57547fc3fc3b77406f5cdcb0c87af9bc184701f175c39c1f35297826fce3cc7",
|
||||
"blk.1.ffn_up.bias": "123be6d541d086202913c75d878c54d59a749f3af7b58f7ef9eb9e7c62a24c9a",
|
||||
"blk.1.ffn_down.weight": "cfdb79788377e5cbded8790cd41b9e66c397ecab75474071fcd7cf32d30f9613",
|
||||
"blk.1.ffn_down.bias": "bcb58315519a573097960891c9ae41cf4c685ab78c3e0e77471471758a7eae88",
|
||||
"blk.1.layer_output_norm.weight": "819b554271452bfb1d84c2603b90377b2e41a0ac1e3aa8b417ccf9dce63375bd",
|
||||
"blk.1.layer_output_norm.bias": "47a3433ac27f5ce8947fb38dd491f3706df4ef6adb0ddf74612bf0f54b19e164",
|
||||
"blk.2.attn_q.weight": "1557a9ea852b1880551f7290e00aded4f35e6c4180fdcbed1b0039bf805f639e",
|
||||
"blk.2.attn_q.bias": "c3bfe5f3066f655fd36b055530997b59ff33ef013563aaeb3cb8ff07dabd59a9",
|
||||
"blk.2.attn_k.weight": "cfd08eb69c61ae2f9f14f9b7ff5c5394ca264b1a9f3d48156677f90dd1766289",
|
||||
"blk.2.attn_k.bias": "9b839bc0e79974a0b3f5d1895972bc6f5c9a1bc16052e1af786e6a530758152d",
|
||||
"blk.2.attn_v.weight": "02b26b1208480eaeeb00e7b4cf8b690006ca14759357fc44ed4a2a8924ead993",
|
||||
"blk.2.attn_v.bias": "e7e6f0089fded1659a867ab736c220d9653ea7da6b1b94baf5c8d30a748b63ab",
|
||||
"blk.2.attn_output.weight": "a1db121c7d33806b349cadd050300a57db49fdc91224fd07c9ac43bf4299dc79",
|
||||
"blk.2.attn_output.bias": "7675128b6a92555cd955c820311e91e9417d31f48848f45d047b4100c62148b3",
|
||||
"blk.2.attn_output_norm.weight": "5b4595e0fbcba67a700c4331adf746d2fba3546364a4db5607ae241947bb1a21",
|
||||
"blk.2.attn_output_norm.bias": "7b8e16826ea30e5a2ba0b02e0095a901775981a296e98819625320e983060d08",
|
||||
"blk.2.ffn_up.weight": "a0d815d946ac07a65095c4ae4df77b818845e6d97795c7d82f55e689d944db59",
|
||||
"blk.2.ffn_up.bias": "ce37c0a4174d6bf773ded7bd016ede627ad3bdb8bc99b9992a18dc8e8898f252",
|
||||
"blk.2.ffn_down.weight": "f6231d2a25426fbd45b9f1160aa484220eb227ceef0348c4a6a6de890606e5ef",
|
||||
"blk.2.ffn_down.bias": "429e00556e8dc63a785238b309b9d83738500c1ef6d736fe6526ad88ea496d27",
|
||||
"blk.2.layer_output_norm.weight": "651457a573adf3f7dd9ee5dfe1c8e89389e94443993aab77ec6a0b05aa621e35",
|
||||
"blk.2.layer_output_norm.bias": "41fbbeda7fd89b0cef5f945ae44011c316982390401d6f75ba8c6d365e185247",
|
||||
"blk.3.attn_q.weight": "95a43f32949d2cb8d22815bb27a44abfc6665ba96221af817dfe058cb6ca72c6",
|
||||
"blk.3.attn_q.bias": "f4e34385e75d8108b6b3bd336106e2133a8c9be0cc343dfe5dc48c32a823c7cb",
|
||||
"blk.3.attn_k.weight": "6b892da6a17d4d3265265a15f695864a31813ee8c8e710ae9bc9e1adbc6c9a18",
|
||||
"blk.3.attn_k.bias": "40b8067b641a56014cee42548240aa8930820958b1933004892b5f04fbaef39e",
|
||||
"blk.3.attn_v.weight": "9fcd5922319dd2a461082a5ce040c1dfe65d87d70ca6547dd0b46eeecc3eeb2b",
|
||||
"blk.3.attn_v.bias": "b528c56212e66931fdbe267ac327a9c2f87cd03baff3ea719e30afe681da15f1",
|
||||
"blk.3.attn_output.weight": "e3b178c1b03981e75510e0d277af23ea59cc404b5394e61bd32291825719b502",
|
||||
"blk.3.attn_output.bias": "712c84d39a6a5a9c06a09da8fd9939ba0d5525524a4bba61ea4de09b48f45cae",
|
||||
"blk.3.attn_output_norm.weight": "d1ffac88e675592ff72f8a617be32b4a381d443b2f8f2645dbe44a1e5745aac0",
|
||||
"blk.3.attn_output_norm.bias": "ea31a1c73146234c50e0e43f485c458413714867b8e2703af66482f7db2d6c40",
|
||||
"blk.3.ffn_up.weight": "4ef4f3b9a1ea6ab2ef2eb6e8b008e06a44790d099d97482a05a51e39a29afac0",
|
||||
"blk.3.ffn_up.bias": "06a4296dda16f452675c51f108079fe7722552d6521c737d97734943818b9a2b",
|
||||
"blk.3.ffn_down.weight": "f114b2bebe392c7d80433bb880c6730293aa4561b0b0370dcdaf7472daebd847",
|
||||
"blk.3.ffn_down.bias": "2c8e67831d28a3bf613fc7912ae3259b63d72abcaf4d30efd8800758400158de",
|
||||
"blk.3.layer_output_norm.weight": "a1dfeb7b5a51dd56447312ca41e2ad2f361a3ea12ddc355127f5f4219fb0a482",
|
||||
"blk.3.layer_output_norm.bias": "1ed630021b25c6c6fc93fd32988b9907df966d4982a93081f639aac3044618ab",
|
||||
"blk.4.attn_q.weight": "b5fae4c1f9a5f33a2a2e816ac0c01c25f422e4efdd59ef1ed93da2610e5370fc",
|
||||
"blk.4.attn_q.bias": "c2e376524ea98ac3b10d9eee19ecb1b1e261fa5149efe0232844c923dfb428fb",
|
||||
"blk.4.attn_k.weight": "a4632f5ebf9321d9d08f9112a4e5dda2efe5671df4a4e67fee24845f5b14af16",
|
||||
"blk.4.attn_k.bias": "a9a02ffb8b8b4f6dfe487a7e0341f1d5318c9d2b793a688f34cb1b22fc66ef60",
|
||||
"blk.4.attn_v.weight": "10ad8deb81d9fa093b1e5c0f24ea82aa7df43e6aca49e260fcbea56eab8cc86a",
|
||||
"blk.4.attn_v.bias": "7326813e181e021130bd33ac136293fcffccce2d1d8cb59041e5b13a8cceacf6",
|
||||
"blk.4.attn_output.weight": "c92573088c7437c2b3cda51490e152c27fb19e5468df591eabba5a49d5398d44",
|
||||
"blk.4.attn_output.bias": "14e10b419e5859af1eb685af5c330aee67048cd704dcead9217840c6f5393222",
|
||||
"blk.4.attn_output_norm.weight": "02b6831c0e0fb0edbc579a92812a1dd972cb15d14fcd382d4427c5a7b300ac44",
|
||||
"blk.4.attn_output_norm.bias": "7eed5cd503bb6bb6ceb1bc8b07cc077903a4f14fb8b9d6cdf39644815ecf1374",
|
||||
"blk.4.ffn_up.weight": "8d0c91d62e74d6431321116a37cf3339e630bd50ba164d3304fc4fe8dd831223",
|
||||
"blk.4.ffn_up.bias": "d325f07f73c005a273c484c7be8e7abb4d6e8a5c4fd093f5869133b97629d017",
|
||||
"blk.4.ffn_down.weight": "7ba7bd81143f40537b84f938e403e19f30e4928625eb371de052b9025beb4d21",
|
||||
"blk.4.ffn_down.bias": "2853d9c2a75288214a4bf4907dc19d04d01926f4913d302b1aa7bdbfcce0f7a1",
|
||||
"blk.4.layer_output_norm.weight": "a4ed1885fa77b90fed5300c355ef0aa0c876a8c747151d9d790939d464d57d4f",
|
||||
"blk.4.layer_output_norm.bias": "62142a81e813a9e636333b2b805d6bc3b17c5e7cd4b15adce1ada6bc9a32563c",
|
||||
"blk.5.attn_q.weight": "afc1dff080a72c3daad01384b1448d476aaf789871017c8ff8e144788887995d",
|
||||
"blk.5.attn_q.bias": "748a820371c1d4f872c84545b36358d239c35bf6c99e2812c237d88c3292763b",
|
||||
"blk.5.attn_k.weight": "59e30c1ed8acd2cbb01de5f62e7804015b9ecf98ba157d98cab016344639eda5",
|
||||
"blk.5.attn_k.bias": "f839520078f9e589496e982e86d0126c7aa14196047339abffcf49a696229f77",
|
||||
"blk.5.attn_v.weight": "3e21fb874e21b90308e1f46af034a3c32d3eba1628d62ae5f2246d6af5818923",
|
||||
"blk.5.attn_v.bias": "5cd4852bf95c1444d10d756750f6bf49f842c0b39e9953c7f408bb67c325ac8c",
|
||||
"blk.5.attn_output.weight": "636ce6a7752895f204b9d01ba0aedd9a294f908b42f372c22a16d9dd590d7471",
|
||||
"blk.5.attn_output.bias": "82d924d4b0d2b94f2bbff91619216d6967a3541ce9b1531a6a60457a67b5d219",
|
||||
"blk.5.attn_output_norm.weight": "5e7bd0a8d3396080f3360d7c4700bf094a06216431bd014c4479eef72ecf4271",
|
||||
"blk.5.attn_output_norm.bias": "66c6de5edda5466d029c6753780be81ccd4218bf8bc00680000e0f06856ab712",
|
||||
"blk.5.ffn_up.weight": "5bbf6e7ea380e216e33f8bee06d25f2265359d3876a300e92bc6e41d48e33430",
|
||||
"blk.5.ffn_up.bias": "9d795388bb36fb33ad3a37fea3ccb4937838e02800a608fb47d363cd06b47370",
|
||||
"blk.5.ffn_down.weight": "2fd628974e7f075479dd227b46fbd48ae8d3ca34d735b36f391ac06410730368",
|
||||
"blk.5.ffn_down.bias": "cd213ba9eaa75fa541648097fbe9c96e58077e6c3ad6ad2fb1f21f8350f44291",
|
||||
"blk.5.layer_output_norm.weight": "159a9df41d15b7022d136f86a2a2631c4635f9816e957472217077b522bcf52a",
|
||||
"blk.5.layer_output_norm.bias": "24c1f27ffd1eb4e5be7e3a2909943e6f0980635d761fa1efdd0c19645da23766"
|
||||
}
|
||||
344
convert/testdata/c4ai-command-r-v01.json
vendored
Normal file
344
convert/testdata/c4ai-command-r-v01.json
vendored
Normal file
@@ -0,0 +1,344 @@
|
||||
{
|
||||
"general.architecture": "command-r",
|
||||
"general.name": "command-r",
|
||||
"command-r.attention.head_count": "64",
|
||||
"command-r.attention.head_count_kv": "64",
|
||||
"command-r.attention.layer_norm_epsilon": "1e-05",
|
||||
"command-r.block_count": "40",
|
||||
"command-r.context_length": "131072",
|
||||
"command-r.embedding_length": "8192",
|
||||
"command-r.feed_forward_length": "22528",
|
||||
"command-r.logit_scale": "0.0625",
|
||||
"command-r.rope.freq_base": "8e+06",
|
||||
"command-r.rope.scaling.type": "none",
|
||||
"tokenizer.ggml.add_bos_token": "true",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.bos_token_id": "5",
|
||||
"tokenizer.ggml.eos_token_id": "255001",
|
||||
"tokenizer.ggml.merges": "902a060cac8884a5793d2a857dd2e53a259de46c8d08c4deb243c239671e1350",
|
||||
"tokenizer.ggml.model": "gpt2",
|
||||
"tokenizer.ggml.padding_token_id": "0",
|
||||
"tokenizer.ggml.token_type": "b7a352ccd1c99d4413bcf452c2db707b0526d0e1216616b865560fab80296462",
|
||||
"tokenizer.ggml.tokens": "815ac90ff23565081522d7258f46648c8a0619eb847a9c7c31b238a9b984e4ae",
|
||||
"blk.0.attn_k.weight": "6fcfdb466f9ceb1229404ce4ec4e480751b8d00da12707a11783dad7256cb864",
|
||||
"blk.0.attn_norm.weight": "6063317f731371864049c7704a70772f1eb632194201ebdc2ed0f8e483507c72",
|
||||
"blk.0.attn_output.weight": "920f49716a1e2fc73b6794ec777947f1c122701e63ed302422ac89e90f06e9da",
|
||||
"blk.0.attn_q.weight": "ddbcd7cde197e632564ac58e4f25d9e3a8ca52917329eeb6081eb41a797932ab",
|
||||
"blk.0.attn_v.weight": "318fc02a189d87420f0cbf57f47f11e00c21ec1ed472ce0a2a895b44f7fa0fca",
|
||||
"blk.0.ffn_down.weight": "aa71975b6eb1f4c77b03d2ac4a194cf8d95718efac741bb12f0f3ff79a27f9bc",
|
||||
"blk.0.ffn_gate.weight": "42967702fa0bc738b88dc50007ace26dbe74a5a9e0978124dd093f818241a9e1",
|
||||
"blk.0.ffn_up.weight": "5282c8788b086bd30f46525e7995a17464882a72703fd27165491afdd8bfd4af",
|
||||
"blk.1.attn_k.weight": "cd248882e64fd2c3402c44790ebe12440133dc671b6893fdad0564c461973adc",
|
||||
"blk.1.attn_norm.weight": "ba84e1c8fd30af6ec94208db4078befac8c921aad3acb887812887f3282ea2be",
|
||||
"blk.1.attn_output.weight": "2efa3ef7c5666ccceb05e339b83ad680cc0d2c3ec78203f5da5959f23a80e14f",
|
||||
"blk.1.attn_q.weight": "5106f2e255358a1303c22e8b5f0ec044852bb30a866c52cabefd30017a7a6b7d",
|
||||
"blk.1.attn_v.weight": "a211a634a1a5df1d5f973645438be0461dd922210f9747c6b04e386c7f1ebe95",
|
||||
"blk.1.ffn_down.weight": "37093afe48d32c578ec956c9ed85242cd000d6aa979e60526aafa10c822dbb10",
|
||||
"blk.1.ffn_gate.weight": "469860819e9159caefb1aad0bc66db790f3393f05fd87b08e52256a7ed256543",
|
||||
"blk.1.ffn_up.weight": "736742c97d35d1a011f9cafd3c0ce947ad559bb2fba6da73c816f6bfd0fa9aeb",
|
||||
"blk.2.attn_k.weight": "92c219d92804d832ab404bd6dc7339c90877bb7cf405dd030c121f8b27757739",
|
||||
"blk.2.attn_norm.weight": "61e4466069474b76b6d1e702566420eb669faf3556b00ff7b824784aca13a2d6",
|
||||
"blk.2.attn_output.weight": "d2fb38a2b2171fd91caf037faa585a62225819aa232d86fd4f7f9d2c3c8a45e9",
|
||||
"blk.2.attn_q.weight": "f6faf5cc6844e3daa4f9f68d90f5458c64879de68a7728860e38374e30c3429d",
|
||||
"blk.2.attn_v.weight": "f340ef8f7341d987a6f37c0e9afe0aef5be67be00c0ce5f57612daf73319cce1",
|
||||
"blk.2.ffn_down.weight": "c7be61a701d779860b621b143fb6365b607bf99ec7c0f153b07908ac8120885a",
|
||||
"blk.2.ffn_gate.weight": "b64f0878187bd3392abfa4c3e8ad2f8b4c133903e54246747ff8f3b4639ad83e",
|
||||
"blk.2.ffn_up.weight": "50b11c712652e90ee7428dbb45cffebb80662ac982bc72bd9eafff361b5eb5a8",
|
||||
"blk.3.attn_k.weight": "2b7bcbe9ee5c9c630c8c8d7483887e78b73581016f4cbb6933db2a147a25f431",
|
||||
"blk.3.attn_norm.weight": "0181dac7f4eee7252980323e8032cf339bef2046ce0a16c0fd72af7c98a8a37b",
|
||||
"blk.3.attn_output.weight": "aef8843b636ce231da9e7c9acbee197883cc15df0e2887709324c6a50f16da7b",
|
||||
"blk.3.attn_q.weight": "55404130fa10e81322d33eb378aa0de31a92990ce7730f1338c0ace0406bb1b1",
|
||||
"blk.3.attn_v.weight": "76f7fb8040d82b957d689ce34fea2302a6640ad5bbaa0052ad2b7ebce270c33d",
|
||||
"blk.3.ffn_down.weight": "648628933eff3b357c3729c33c5b1ae51c28e59b9c19acd1601a2ff7c5d5d9a5",
|
||||
"blk.3.ffn_gate.weight": "6a588885d16e98d5f50ebed05af089154f680085ca9c97691e5b489088630a4a",
|
||||
"blk.3.ffn_up.weight": "e12455a1d702f4986e1a663493e3d5102b367af74d45557522002a35d63ecac2",
|
||||
"blk.4.attn_k.weight": "40d943380a8a85e4eab147934bf6e16f23cc8ab753f6636526382c074d182288",
|
||||
"blk.4.attn_norm.weight": "4ab2c098983d4599fe540eef624c4df954adb7473faebda7471ef0ba4134814c",
|
||||
"blk.4.attn_output.weight": "d14b91e40f58bf4a3c8c2eca0b12bb541de406574af39027d56f6c588a147082",
|
||||
"blk.4.attn_q.weight": "e1224960a3562107488589f883fa32414bae41712fa8dbd47c5f3e3a7801452f",
|
||||
"blk.4.attn_v.weight": "063f297bc4aa6e709fc32c4c32e35af7d07d80e83cb939b76adbba858006c03d",
|
||||
"blk.4.ffn_down.weight": "f88a18020c5e1caaa29596895eb348e76ee5bfad27ed57651a86cd8cd1f9b5aa",
|
||||
"blk.4.ffn_gate.weight": "48e7e1eed3fb52e92e61d3557dd0ec002418327090e034ce4322fd68542266f8",
|
||||
"blk.4.ffn_up.weight": "1ca8a7aa17355b6ce0d9ad5539fdad3899fa47fd359c285fbfb31f19f47bf073",
|
||||
"blk.5.attn_k.weight": "2bdf15f8e73d068d972380f25d207004cf0bf3b5bfa46946803ba6fba07d9175",
|
||||
"blk.5.attn_norm.weight": "60448d7cde6e1b6467aa31bdea012e39cdb08c88081cee7d102dca4f93f766ef",
|
||||
"blk.5.attn_output.weight": "f9f687d7c457537f9fca8a4087a59f1c3bebfaf5537b94e42c831a13224f7799",
|
||||
"blk.5.attn_q.weight": "987db7a2ad68657a92625e1980effbb1f79697c2183f2b9f3b3a0570c51b0ab9",
|
||||
"blk.5.attn_v.weight": "cf696891148f3e4783ad1d20f93462ae091eb8651c656bba9b662253b6263e02",
|
||||
"blk.5.ffn_down.weight": "c0662b0bd0929136005fb9d691fdd9b2c33867d9ce9622339a6a456b720b059a",
|
||||
"blk.5.ffn_gate.weight": "200bbdfab615d7a3a84719b6ced7751e3ce52757ef212d96f87798bc1de5e987",
|
||||
"blk.5.ffn_up.weight": "df5d23e7e035fb1b9d163da7ddfdfe38da6a37e86e96534dc02ad20f011b55b3",
|
||||
"blk.6.attn_k.weight": "c0dae2d272a7c5a2fa004bbb8475dbab362fc1f6d008e73d5a4434a9382ac6ba",
|
||||
"blk.6.attn_norm.weight": "51c57ac8b55e04354d5dca6bb9c0cf4177639d3b038e80209e33036209688f64",
|
||||
"blk.6.attn_output.weight": "229d97892c62f85bcdf431675250e01c976ad69ffa450b01fb543bf88f14a2fb",
|
||||
"blk.6.attn_q.weight": "c20e49621821bd46ed156e6823864a5bda4f317750e71ab8dc54e44eb48cf7c2",
|
||||
"blk.6.attn_v.weight": "53ceb1a2ee43fce3c7b5b33c58a9fc5ee7f44dc1c6f29bc9dbefc37582102dc9",
|
||||
"blk.6.ffn_down.weight": "7923c943b7629d560a032d1efa210d1d75c6692140f1be94464ee7ed24f44ed0",
|
||||
"blk.6.ffn_gate.weight": "57593d350361af753a6a39f53b066282634c0fb44f396f6f2966a574b01d8f8c",
|
||||
"blk.6.ffn_up.weight": "327b6a7a387098b8899d3ded04a4d4e7c658ca61b80d4e7b17594be232721602",
|
||||
"blk.7.attn_k.weight": "9ca48b87a10116fd8868e62b76f211d4bb91f166096be9061439ee2e1c3a5c20",
|
||||
"blk.7.attn_norm.weight": "cd56cfcc4e2ad6b96e23ea7b0d32b4caf236107d99a0b22c56760b62e63c8cfd",
|
||||
"blk.7.attn_output.weight": "7352b509a03cae2491ffc060e577d189341a0f861233f18c96f9d275dc4234bf",
|
||||
"blk.7.attn_q.weight": "2b3791c8c008c33ddbe12bedba8191322ceea2dcce5cf0eb7a93d40ad254e672",
|
||||
"blk.7.attn_v.weight": "3ae721d52466487a3d48150581e57f6d64ea1e83ab929f23b28c3d777422eeb6",
|
||||
"blk.7.ffn_down.weight": "3b6fa8ececdb3c34af3a5363863d6f94289c1c95bf47fce3a3ddcf184c5f0848",
|
||||
"blk.7.ffn_gate.weight": "dbd7df6c5ae5eb4adb859f0d36453813a4e289a359a1ba8f72d67fcbf21c3e22",
|
||||
"blk.7.ffn_up.weight": "de68380a334b4c5cfd4c318b0e9854aec59bd79aa0f0c30af3f56414f83482b0",
|
||||
"blk.8.attn_k.weight": "7303c4e4480abc72a7ee271811311199245fb5c2ea27a2bd3b8cad3a53a03c27",
|
||||
"blk.8.attn_norm.weight": "2e3d1921898d1b943ce1a1b6818546c8b471d6d542da24f51a8b514b8c3dd4ef",
|
||||
"blk.8.attn_output.weight": "30421520887b66bf97a18dbcdc283bc8d0b60590b612fd638a319a6eae923227",
|
||||
"blk.8.attn_q.weight": "73e064d5433c9b500068a1c31744dbd53f4ade298fb450a0e8c97f62cf1f8a8d",
|
||||
"blk.8.attn_v.weight": "27e21f8b9a9a8533e8178ca34a72aa1d786393d57302b7806dcdf3e51de511a8",
|
||||
"blk.8.ffn_down.weight": "bf694bd8e00047982108000e7b3dee7b225db8b19abc595e5697b6bbefd92e7c",
|
||||
"blk.8.ffn_gate.weight": "d55fdbf8606d9141b774b0500c58944fd1253b9e69d1f765eaa9a680b9f2ca40",
|
||||
"blk.8.ffn_up.weight": "1ae3f580655e7c8e8dd6c34fa4ac574fdfc5e3f1a8536da0c5442d3a2976f0e7",
|
||||
"blk.9.attn_k.weight": "b18080626012d8aabcf78542d6c7bf31c712bf55a70172fbfe173fcf34481036",
|
||||
"blk.9.attn_norm.weight": "2e3620620dc09998c6d3063a7d5de5433fbbae8c11e5b00d13f145d39140e162",
|
||||
"blk.9.attn_output.weight": "69c3c0e27ef1c0fc933eeb7b612b70909f18cde238873c0d576a2ba9714ef174",
|
||||
"blk.9.attn_q.weight": "68330e5aa28a28873c9a6e67f032186ef651df2df5844e0f27094ba349fbe4ab",
|
||||
"blk.9.attn_v.weight": "3df8d45a102be082d0793a51cb82aa62a43cd0e9d047ba4115ca0f2414b39325",
|
||||
"blk.9.ffn_down.weight": "1d6cc162b73745b135b4f040a0aac3c06d5135a3dc5b2421e7ee2af48662fd7f",
|
||||
"blk.9.ffn_gate.weight": "034a9d40fb1e32b534b45f4bccd65cbe43c4a6a3f5d01132bd245ca0005de5fc",
|
||||
"blk.9.ffn_up.weight": "c838c38d0e1a0ac0da17eb2a66023ed31929f07d8fcfe1cc546df26096c91f0c",
|
||||
"blk.10.attn_k.weight": "a78507cb72f744b86ceaa032596e74e5571c822d0226d334881169addb32cbd5",
|
||||
"blk.10.attn_norm.weight": "35f48d0b28ee0e6b4cad4e983925737562d64824be5b168b3e26df3d6b260cf1",
|
||||
"blk.10.attn_output.weight": "53712db06796de39b131323e7abf9a58551b6d52da6db66a471580386d396252",
|
||||
"blk.10.attn_q.weight": "efe08429ba196026b81cd1c471e1c7418afd9e966659feb3936b674aa0803b58",
|
||||
"blk.10.attn_v.weight": "7ec6055e134f89da0cbe79ec9f13ef2e442ac584b1f03c3e13e7d0cdad0078bd",
|
||||
"blk.10.ffn_down.weight": "37e66af4bcd1f3079e841e892255b8255070655901864ea3a8c602a7f681a640",
|
||||
"blk.10.ffn_gate.weight": "1825282bc34830d371c6edcc3c1e73e6ecc1e10f4aea0122dbb7acc1d6f7b1bc",
|
||||
"blk.10.ffn_up.weight": "819b3b276a4d4c14a35ed6682d5ef18a5e8ed468e5ce3f12e8c75ec18ac20ec4",
|
||||
"blk.11.attn_k.weight": "5327e6a2af82dfff0619a14971f5864a15553c36fead84e1af42c7630f2729c6",
|
||||
"blk.11.attn_norm.weight": "fec363b3c4a43036d2c635fb8aa9e122dd87ee79811839f2f6cd955be3373e7b",
|
||||
"blk.11.attn_output.weight": "ccf7b38f18ee8798b8a6a35018e2df3eb3e007de62876befb68025dd66c79763",
|
||||
"blk.11.attn_q.weight": "da8c4a1c824ffe174e39f126cd72f7ef83c56aff1259d452a1212de80f98f5e9",
|
||||
"blk.11.attn_v.weight": "d17ae6bb77f03982b55d341eb67acb5969e9ad3da5994b96eafc09793dcfe3a0",
|
||||
"blk.11.ffn_down.weight": "a6bac521e2791345f22c57205fa1c2f2f687794dfd24d0e98d50ae0d0eb6088a",
|
||||
"blk.11.ffn_gate.weight": "5ed902c488cb51ba5635f3df08258c5f84f31a679a00211ea5f9d8b824ef6d9d",
|
||||
"blk.11.ffn_up.weight": "ee9f1437eb890d2cf9df2574afa1cecf20aafdd847cd75b152d7eb74419afd34",
|
||||
"blk.12.attn_k.weight": "5a069c06e1019b0f889088e67458f7a11ec77fa190ada6069e46211f62219947",
|
||||
"blk.12.attn_norm.weight": "194d7e5fcc8c49aea62daf1940532419cf3c505afdce6be377286b677db5db8f",
|
||||
"blk.12.attn_output.weight": "6534995fd4d6fecb55e317add4b1723aba4d825e1e9471d0b08813dfdc247176",
|
||||
"blk.12.attn_q.weight": "4ab51ca519b5995581fa34f846276feca3b907ef2b51f192f6cc0b3263c3f5a2",
|
||||
"blk.12.attn_v.weight": "5652ca3fa81ef9a1ac1543d71fc6813f8517f8ec54b25c701f6f98061614830f",
|
||||
"blk.12.ffn_down.weight": "4b2c263f54c88516b8eb273bb8d9615b01c5c8b484dc70358adb91b50b300edd",
|
||||
"blk.12.ffn_gate.weight": "8f50c3c3e3e8568991d6c1b0e74b500cf4f208e7700bbb8e87c3f6a6d359b6b5",
|
||||
"blk.12.ffn_up.weight": "1c1a581fec1fbe959e1427fa513f400100b5e1ee9d83932630be9905fb49c231",
|
||||
"blk.13.attn_k.weight": "efd7a38c46f08d8376d82974f33c644e3a02220e142d63b1704718699a8a884c",
|
||||
"blk.13.attn_norm.weight": "d28fa4f1bd75abbd063b0e622e08f579c89cd0c0c5ce63c1952ec9f944f8ee13",
|
||||
"blk.13.attn_output.weight": "71e0068a639288718bdb70a6cfdefd50bc8b3ec3993347a65129e70001ca5827",
|
||||
"blk.13.attn_q.weight": "b97077adc92cff07a2e07d80ee38f214ad8713571c69cd5c70ebd43dc501ac87",
|
||||
"blk.13.attn_v.weight": "79b3e2749ab4b459c81e96e322b215f1e8af645eb346e176c326bd00cf6ed2fd",
|
||||
"blk.13.ffn_down.weight": "9f8687d11effa1db7cfecf7bec5631734bcf2962aad74a9f519144491e08ec85",
|
||||
"blk.13.ffn_gate.weight": "7d14dfa0543852e7777fe8fff29ca533744cbcf1ebcf10067e5adfc4eb345e65",
|
||||
"blk.13.ffn_up.weight": "852b9527b97fdab211ff3f832a660ee1d93ccb56906144c50f01319a6e8ee615",
|
||||
"blk.14.attn_k.weight": "79e926b20f36f66d58226cb358881f2f68ae7b468787d33cafae5110287a14a0",
|
||||
"blk.14.attn_norm.weight": "97d481b63deb0df6142c2c6cd23043720c62eb609e390f47a7113751c79974ec",
|
||||
"blk.14.attn_output.weight": "aa6e94d7176d5c79fbb89b96e5f13ce75702ce3dd23ee52986446da436a6c3d6",
|
||||
"blk.14.attn_q.weight": "214becb6d1bb460da9fb8ace0f99b9a5afa9edf7aa7acc19606c7401b11d6305",
|
||||
"blk.14.attn_v.weight": "488b0e6d7f1a7a2ed0972aaa6d10ef9c775ee5373460324efcf5b3e3da9311df",
|
||||
"blk.14.ffn_down.weight": "29c7ad16cf9542e30996a1a01ab95b844533b28051f04cc7949c371afb796471",
|
||||
"blk.14.ffn_gate.weight": "b7ef208f2b054803665b377f5a5980c122c026841809cf855c6ba06d1c3a885a",
|
||||
"blk.14.ffn_up.weight": "76a5cc28100748d79c4398ce7b9176aab4d661548b6293a82f99144812e5b70e",
|
||||
"blk.15.attn_k.weight": "a6b8f9e98ab878fa7ebc5d080978ebf2d050acc2ab2fa8ea9188eb10e27702c8",
|
||||
"blk.15.attn_norm.weight": "a26d07a9752d6dccb68e3a8a2a49fd0752cdd0a415e05547819bc37d9ba63d5e",
|
||||
"blk.15.attn_output.weight": "c63616c69048ccbee801e05be4f56d21fda21aa0cc470f41d57c31b4d9283a4d",
|
||||
"blk.15.attn_q.weight": "fd595a67bf96c6ba16eb148a9d02fa52fa3c1d33ed10be28a08f851409fd6e64",
|
||||
"blk.15.attn_v.weight": "1c5c9d33fa07c05d5f4ed0032c6c4aa83d863f0d31c94a66109d239dcd03cea3",
|
||||
"blk.15.ffn_down.weight": "585ea62ab8aff7d7d212ea5c1a03226fda6b68370c890b776834af70c948dcbc",
|
||||
"blk.15.ffn_gate.weight": "a13c63f86f879b03a573d5dd2a25cfd1f4dc73e8132e6454ecc23e538b4cdf6f",
|
||||
"blk.15.ffn_up.weight": "f7112450f57c12fcd511f049e0dc0b541625a107a7901c3261ed9e984299f65c",
|
||||
"blk.16.attn_k.weight": "2d2c8b11dd71fba6d1c106aa1673c113a5448653cca7eab897c8739212ed5003",
|
||||
"blk.16.attn_norm.weight": "95c2ec7be9469690e18a9a1779684acb3e9da44b13e263a0da840305646fbf8a",
|
||||
"blk.16.attn_output.weight": "31a65046e677f54dae654ded4e733479fcc0f7283d83076b7dc7cbcae8528230",
|
||||
"blk.16.attn_q.weight": "bfc6292b9c6d49b7118d08060242a138182eb182d136ba5dfaf469437c16081d",
|
||||
"blk.16.attn_v.weight": "68f81d037340217d87c7853ff4d6edfbc46d9e827ee6d5bff7c3f6238e3a95ad",
|
||||
"blk.16.ffn_down.weight": "bbd6629691950cef4d5113e1c6670e91b216a9b872cb92cee02dfda4d6c4f7b8",
|
||||
"blk.16.ffn_gate.weight": "63cb56f282b7401ed6c76e5bb6fdf1bf68a64f9af0c82c014209b55bcb5191d0",
|
||||
"blk.16.ffn_up.weight": "b54f39a2541063cbfb6f713aa81c3b69a04100e999aa2ebbeec195dc382eceec",
|
||||
"blk.17.attn_k.weight": "3d9ba49799cc56664ec30a002bcad61eb651294212a68c3ddb573eb042aef5a4",
|
||||
"blk.17.attn_norm.weight": "42ee0db4b9d63257bca0012a30b12737ead1caafeb5ed3d93c8f48ffec4b46de",
|
||||
"blk.17.attn_output.weight": "a38fd100f05c9041c592bc739e287de0b10d08ef2bda41a879225bdca9002f71",
|
||||
"blk.17.attn_q.weight": "8a3bee285b0180a9eb35662e449ee4cbe16d992bdd48fb3a94bc4a347728cfa2",
|
||||
"blk.17.attn_v.weight": "d7f8f1b8b863494ed4392a1656775912e9b264ad36016547b12e832a1d6757d6",
|
||||
"blk.17.ffn_down.weight": "bb7ee58f61da8630972e25b621996fbe8ec06f4dc9ab1e268ab5b120c526ca28",
|
||||
"blk.17.ffn_gate.weight": "6b652dbf167fee09a45ebfd78d500ff6548fb2756dbe5343ffec3f7e6207179f",
|
||||
"blk.17.ffn_up.weight": "3b67f727e55e742715de978fab80457781e7a3762bc48f79d13b45dcb8de664c",
|
||||
"blk.18.attn_k.weight": "ff7fe57c57b90c6fcc0aefc39ec24593c3a7d1ea1c23770480075a015450e0f5",
|
||||
"blk.18.attn_norm.weight": "1d40faca082d2633ef0ccf19e121870dd6c7c3e2154607c7f3543fa96e99cb2d",
|
||||
"blk.18.attn_output.weight": "9adfecaaa397a92db4687efd5fcabfa0daef9e6b0493763b7ff5ebc185c43a6c",
|
||||
"blk.18.attn_q.weight": "ad1803eb9b291948639277afe981e666b07167eb3fcae903ba5b73bf86d8f50b",
|
||||
"blk.18.attn_v.weight": "308cf23399adccf27401a4ab60d74dac6fb9d4cd4b9c5940d9145118d1881b34",
|
||||
"blk.18.ffn_down.weight": "7de4ac9a561fb580619b745687dfd7ca8a69ef70471dee978741b80e9ff7bead",
|
||||
"blk.18.ffn_gate.weight": "0c66970f696b33bd5ee8f1f2fbcb41fd78fa5ccabdc927e11a4d5a4089f19c69",
|
||||
"blk.18.ffn_up.weight": "66a42e988e8a1f468fabf976c48e9e4bb045eaac6916ef16555ac101cd674abc",
|
||||
"blk.19.attn_k.weight": "a928ab50390bacbcebe2e4b66922498134ce22d7b93beaa87d6cf4ab52eb7174",
|
||||
"blk.19.attn_norm.weight": "b4a02c55b46c2a96aec9c64a254087cf48e6c1d4b6f31782c77a46fc4daebad1",
|
||||
"blk.19.attn_output.weight": "b768319c641dff1eac5d1f8ceb960c9899c795bf2b24c1d6bf70aa24fda45f77",
|
||||
"blk.19.attn_q.weight": "79ef3f57d187d3954a26362096e1b6c222d76f537dff73e034d6e9999935b8bc",
|
||||
"blk.19.attn_v.weight": "ce13d6b13e24fcb2d5bc6a2662e5bd295b31b12db10a6d0307f86cf29b8d5001",
|
||||
"blk.19.ffn_down.weight": "cf90d7e2137482cfd50934a8223ad774621d08554969da80a9712df5e6227eb0",
|
||||
"blk.19.ffn_gate.weight": "71ce30150f003b6eeb3bf7464e05b6ae615f135110d8e47f0a47fd973e537c0f",
|
||||
"blk.19.ffn_up.weight": "7f92aca0cc29866633feec701ec01a85a8ee2fd4e2b9630173a6cffb1d9d50ee",
|
||||
"blk.20.attn_k.weight": "a2df23159d6fb74ef28e14b61028fe8b00a693a2fc9234a980be74f20b958682",
|
||||
"blk.20.attn_norm.weight": "c6cd5f1b096fc5efa4eb59ca1c8c4bd28730f3dcedd59a63601663eccc6724ed",
|
||||
"blk.20.attn_output.weight": "896a8a166d0f006d4b09867ae4345426303cbc3fb13a18d3d4e1bde00f16dbdf",
|
||||
"blk.20.attn_q.weight": "01eb79588fe61baea0da43e99f4dc5939590e1bafd01e12dadb8326f102bfea2",
|
||||
"blk.20.attn_v.weight": "bd39630fdd5a7c859ac1addaf53e63faf524c3f32f5f4896d86b6e746b1d5c06",
|
||||
"blk.20.ffn_down.weight": "0304a5d39957a0e3f031c4bcc4549a135d396c8d97c8d276fd1c823ce86560c2",
|
||||
"blk.20.ffn_gate.weight": "117b79d595b1dca0c8b37586beaecc4d84411507276212dc286cde7fc36c9bef",
|
||||
"blk.20.ffn_up.weight": "6e799346db145c125f01783539749d3828fcc451cd4f10c5352f047a47e28714",
|
||||
"blk.21.attn_k.weight": "1c37e4c0664147e775bb006b226b9553e3421140cd96288ea755f81731ab80ba",
|
||||
"blk.21.attn_norm.weight": "00ae783a29000ccda5e4bdbff03df0752fb82805dc3f9b987500ebd80714476e",
|
||||
"blk.21.attn_output.weight": "7588b84f9fb19f15095b5265c60b4a4e7ae74bcc47d4607dfa5d0bfab6f136cb",
|
||||
"blk.21.attn_q.weight": "a65f1c0dd06d45bb97532d3e932689c1eecfe7359089b39174a96a149335cbc1",
|
||||
"blk.21.attn_v.weight": "4220b77e7d5e8709b4eef33a679b5dad11f297085ef44c9977f9e54ef08f7a2d",
|
||||
"blk.21.ffn_down.weight": "b8c082a0530d4b5328e67db0df84c5498f2af956de23c639fa0198ffea853950",
|
||||
"blk.21.ffn_gate.weight": "cd1b656ee72d00e9835ef667c19ef89a88de261eb8eb7c0e936e0f9ddf83ef9f",
|
||||
"blk.21.ffn_up.weight": "dc445f73e36ec7a3bd86884186b728f8e0187f32848c3b8b69d4d41f8571bf31",
|
||||
"blk.22.attn_k.weight": "e37cf0b893ec8b9ee8c78dd139b8d9c45cb997a3bc0c3d93a70ca1c3f6af8859",
|
||||
"blk.22.attn_norm.weight": "248a27838d3c46cc03a5c312facc84e2e0e2c990ef8401e93da25918497f88d1",
|
||||
"blk.22.attn_output.weight": "fc191a18f6d18332c66761f7ab28008bfe295dd1f5c8741a2488442f9e00d0f5",
|
||||
"blk.22.attn_q.weight": "4b193a2ab8bc2b085db18f2bf3eeba26e02b537b2cdd738160c8f14b165d0f5a",
|
||||
"blk.22.attn_v.weight": "7a60ce5ccac7e045e55ba1e1e85bd2a0f93f8c781daee96c5223665e22f0c666",
|
||||
"blk.22.ffn_down.weight": "e0a34fb4244e2c7168f3dbaa1904c15d339ec39999cdf27128bbaf619ee0a237",
|
||||
"blk.22.ffn_gate.weight": "8bac872d4b8549c8812f927efa309f1792b524f33601095fff61b826de5a5615",
|
||||
"blk.22.ffn_up.weight": "b67fa2b94dd901b6ec64c0853ce8ca2d86fe9cb1cc6d2f15fbbbe0e691c0c648",
|
||||
"blk.23.attn_k.weight": "2c32e66ad01942b819ac09a197c71579fe66f02226a264fdd72ad1e02c67a27e",
|
||||
"blk.23.attn_norm.weight": "825fdc94deb439cb93c713eeb077c1052b90ed658d6d464fc4ad3d611e911d48",
|
||||
"blk.23.attn_output.weight": "95ca6707a95b8750b0c7c5d379d368f0f2e7ebef631954e7d4d8ec0f41f13a3a",
|
||||
"blk.23.attn_q.weight": "6eccc84faca5fac015d1b26e2854501edcfd292a302228fe14cf99f5eb59a34b",
|
||||
"blk.23.attn_v.weight": "b343ac3d226040f1033ee049668aa1d89b1774bc18431965682e5dbdce78ccdc",
|
||||
"blk.23.ffn_down.weight": "9fc599befea8d3b1e342d564a110074f66d2542df406c4b90b6bdc5828fbb2b2",
|
||||
"blk.23.ffn_gate.weight": "488556c1b0c9f0b20b0c99b4bac2e0f4046b81edb601d7b91e7e5b3bab47d667",
|
||||
"blk.23.ffn_up.weight": "1088e291d7008dd9c7c2dd6830af686a8a84b724d123a016209bd5156d6898f1",
|
||||
"blk.24.attn_k.weight": "a923fbe35e61e009a53927d7828818e0592bb737d6a1106c4b0b5a1efc367e07",
|
||||
"blk.24.attn_norm.weight": "9b51aaaa939cefafdd9b13a7e5b74ac7fa2d603427e55a16a909d6f3f353750a",
|
||||
"blk.24.attn_output.weight": "1beb2baba56f8409466434b037771248c2f620ec5f53e15f44c271d5a2d9ecf4",
|
||||
"blk.24.attn_q.weight": "4b0194fe5bfae0c6bf6131dcf8cb6e2b994f6ea10b27cb03574f0f4f8cc0c950",
|
||||
"blk.24.attn_v.weight": "6ac34b1ab0f66226d85bca1194a7c212cd93d384ecbc8b8395de48aec0970a61",
|
||||
"blk.24.ffn_down.weight": "5508f74cb732a662c2936b32ac5e90742d172b9f961a747b0e5cba0e5906a89d",
|
||||
"blk.24.ffn_gate.weight": "095e39b8584403835f9bb1ac33e0e81f54175575e4800273d281b845bff381e7",
|
||||
"blk.24.ffn_up.weight": "2d43ec21637dda12973de367b0113ee9840b0d815bf6fce042f7c3f270b0b530",
|
||||
"blk.25.attn_k.weight": "9e2aee029f3d2c7f67dfc7926e72c8228fb978382c8e5a4701bbf82c93801419",
|
||||
"blk.25.attn_norm.weight": "220cd7164fb4cdbe22d26058e4153b26c27c7b5ce2bec8e95bf2c0ea08d23103",
|
||||
"blk.25.attn_output.weight": "a17f4a5dc6aa51f03dbd75602d98e9491767c205cdc2c3a5f8667fc54bbf7c64",
|
||||
"blk.25.attn_q.weight": "f60827496835c440c794bf57ce9780704d10a59d8229886bf75ebb18900ba4ef",
|
||||
"blk.25.attn_v.weight": "9cac217e9e9f4f4c85f14ee51165a77c580165bd4a34b202389169bbe61a1ced",
|
||||
"blk.25.ffn_down.weight": "a0f36949b663e80849581dfb71e7babcc73580793bbcb0c80ab26d5a6e000359",
|
||||
"blk.25.ffn_gate.weight": "df4d1be4d50d6afe5ad3ef0d0e0fac76a33e85c963dea769641d612dd53e7d13",
|
||||
"blk.25.ffn_up.weight": "992da76be762632e25ebc5ef4d03728eece1b43f7c4e31827df19ca724aea694",
|
||||
"blk.26.attn_k.weight": "34199ff856ac32a500c754539d070258574192a34ecba87a182897cb59fdff52",
|
||||
"blk.26.attn_norm.weight": "a8e9dfb2dae5d22b5c0aec5f3675991c0e3c3e6a44153db2579136b73f456e00",
|
||||
"blk.26.attn_output.weight": "1c4f257ffb0d7db0f11cfb275e38b4af736917b43ad82de1badce3f1d227da4d",
|
||||
"blk.26.attn_q.weight": "33d55786274c2e718cf61e8fbecf3dfa5ee0c208f0b716d42b061f55459acb3c",
|
||||
"blk.26.attn_v.weight": "684b636939cd4ffcfec5a6238a0790ffa43d853c95783af9b9e8275e74071a7a",
|
||||
"blk.26.ffn_down.weight": "89d0bf066db154e6d312b5433aed1714f6a28b40f4c52e3e1530ee07703303c8",
|
||||
"blk.26.ffn_gate.weight": "393d649bebe5e2940e1b043649f6c860b4b8b9f380f30e9da1744a830f358156",
|
||||
"blk.26.ffn_up.weight": "179edc85ababd9d8440cc6093eecd1004290aa1cb96434b26ecf7585b6cca17b",
|
||||
"blk.27.attn_k.weight": "334841445a7f1e14731b08f56eb0b1f0938c63823d28bc6d078c4c5f05b36f19",
|
||||
"blk.27.attn_norm.weight": "57344471bbda2e9deffdfdb2dd05a07aa47f8761e24de53525588639145bf551",
|
||||
"blk.27.attn_output.weight": "506126af9ee54b535d49f97e36f630e74834f480329f098d6d62e96246d8d65a",
|
||||
"blk.27.attn_q.weight": "dd984df1acb4783849e25ba7ae378bfd385cd9efc540fb798cd5bdd873f0118f",
|
||||
"blk.27.attn_v.weight": "b4b3fe9a4455d34c297ff20a2f537b647cef424741d840a747b265f23d320ac0",
|
||||
"blk.27.ffn_down.weight": "621fdb185ba0d35ba5476dae73d2c81ec1482a0e878d5bfd5c3b29fe837af013",
|
||||
"blk.27.ffn_gate.weight": "e4fbab45f2ec506fa374103251a0bdb7baa6f576080bdd796f3e9db92098e08f",
|
||||
"blk.27.ffn_up.weight": "a0c57e463e988002bbd6a6c6792baa21a65e6f89ae303a2c301951b0ae6e4bbe",
|
||||
"blk.28.attn_k.weight": "bac36cbd52ec5056841663865e1291ddab4b47ef9a2544dd285d4503bfb0e4a0",
|
||||
"blk.28.attn_norm.weight": "5774a9df2bbb2e86d1f70179c7b92d81e1f401160148b3328fb64db6646a5425",
|
||||
"blk.28.attn_output.weight": "e8712622d1569557000c75f26c3f55fad267fd300463c2c2cfe3afbfa1c8f908",
|
||||
"blk.28.attn_q.weight": "11677751fddee52cc739699c02836f7be54d96038be4240be5d4f53d00161608",
|
||||
"blk.28.attn_v.weight": "e5ee459b8958d65e1445997b9aa1e90e2f5d17761ebcf5357313119a45322507",
|
||||
"blk.28.ffn_down.weight": "3934518f9f85292da8475fe38a8edcbfc4e24ac56c351b472d6351f98750871e",
|
||||
"blk.28.ffn_gate.weight": "6ba735d57e98d0847e487f25ffaa25256deaa8abec76f428cb70bd9774279d83",
|
||||
"blk.28.ffn_up.weight": "977fae6e1e5353114fc645dd98429464749758765cbc6e6457593d596e57850c",
|
||||
"blk.29.attn_k.weight": "8122a457307d580ad6f1e0acea09a2f593d97f595ba0d6737f5fea16d2433642",
|
||||
"blk.29.attn_norm.weight": "d626f721e05aa1202439b01027031d4caf1adace61ed37870a277cb6297c77cc",
|
||||
"blk.29.attn_output.weight": "7fb7122ab1b6b1e6615ca746897da27bc52c92cb70d3147183cdde61795b72b3",
|
||||
"blk.29.attn_q.weight": "be43e94ff6b6e391024dc824101efa0ddf4005d5b002ac26cb03765c0c73c2fa",
|
||||
"blk.29.attn_v.weight": "af93c85ebff908f74f9935b81bde0516ca487c84139868a1ce079c3ae20036b1",
|
||||
"blk.29.ffn_down.weight": "39dae12340ed3120bd19c495fe0872b559613641e41fde69d02d8631900b84c0",
|
||||
"blk.29.ffn_gate.weight": "36fd482439840ef197c9f3b8905d86acfcea49bcf018544106ca465d4bf8d5c7",
|
||||
"blk.29.ffn_up.weight": "5243fbdfdc1e2a1dd84b6210a9869d18a014db9088897e345240cdc99990bd5d",
|
||||
"blk.30.attn_k.weight": "948f263616bd3788b2b968baafd69b9c5bd1b77578665f096c4b7e247b4cea42",
|
||||
"blk.30.attn_norm.weight": "e168df981e744874ff303faf2eb470e5f6868c2040ba5f383f6c5148669975e7",
|
||||
"blk.30.attn_output.weight": "4cf0ccca04b792573b756655a24fc89cfb1f272da8305633f0bc66ef14990b93",
|
||||
"blk.30.attn_q.weight": "21e07d6cba6c50d65350289258209717174a13c42be57e8141d69712cbaf32c1",
|
||||
"blk.30.attn_v.weight": "65a8ca29c7237b3182ccf03e2fc94e84f9a53d0e160fb679ab401c853170dd9c",
|
||||
"blk.30.ffn_down.weight": "8b00500a6d00d84058f6658ee1d6f06fb4fcae2f90d4341792259362923b3c13",
|
||||
"blk.30.ffn_gate.weight": "5bc0e19ab7a31b50ac2118ad1b36e31055271a322cd8ff661d47c3ac0210703c",
|
||||
"blk.30.ffn_up.weight": "f37a0561955725bd59ee2d064fa9f4e00a12a1b620b624db3bc3add5330bc321",
|
||||
"blk.31.attn_k.weight": "9a5663edda227f5d87533897146764f8e8a7481b9e71fae197c39204f8463221",
|
||||
"blk.31.attn_norm.weight": "060a4f438a1ee5e220b5b5278ad2f5c085a428bf38c515766781815597c87529",
|
||||
"blk.31.attn_output.weight": "6ada5d3cad9dea4780ffbb43302bb6ccc2f24eddd0fc4f5f84c9ce0fc0c6e5dd",
|
||||
"blk.31.attn_q.weight": "bb5d08c08603907981ad388d5d8b70fcc9b98034ba264b8474c8890cc0297af0",
|
||||
"blk.31.attn_v.weight": "e01b4252ea9c6a889c32b21144b441a347464d04536ef4f6572425be55759796",
|
||||
"blk.31.ffn_down.weight": "8ba4d679c36e93ba65ba03180385ef35ea86b3b7cdf2fded9df59369f1c09630",
|
||||
"blk.31.ffn_gate.weight": "e5b41dc93645f8b5e8eebae3ada3ea43a18f97ce2654228655170b07b463ccb0",
|
||||
"blk.31.ffn_up.weight": "25b88cdddc8b547af294ed107d3d1312e90b983cae87936fa6062ecd8ea02539",
|
||||
"blk.32.attn_k.weight": "4bcf86dc0858c8ca2fbdf6aa76674d43eb698f78979fdc1a38f556a7af1facc4",
|
||||
"blk.32.attn_norm.weight": "cdcc12f3b8b9773c6722736bfb748a2729230b21478cbcc4104859d3148df815",
|
||||
"blk.32.attn_output.weight": "d43f1196822995ed89a9365c97054753a8b30ce20b6e273c8edcc42673a1e141",
|
||||
"blk.32.attn_q.weight": "ebf2972bb3865cbc5be4840113a322089752038344beab2a0122c7cb4fb399b6",
|
||||
"blk.32.attn_v.weight": "714db81704ff34fa137512903c1013acee7877467473e46600728b9240582eb7",
|
||||
"blk.32.ffn_down.weight": "2cde3da1258bb170a79d5d3cdfe10c86a71eb34b77da46b74c5ed71e7f4fe274",
|
||||
"blk.32.ffn_gate.weight": "c7e1ed792532613ff9d4e5834b6536e2e0f47df2303bc0fdaa90aac0c1f4e8db",
|
||||
"blk.32.ffn_up.weight": "d8d6f13fe66a716e28f79101a29817f0c0d6f99969a6f017d51bafd1a16c600c",
|
||||
"blk.33.attn_k.weight": "a0a28f6cbca88da00cab2ca37094d9b0503bf9defdae77b91895b911c408cbb6",
|
||||
"blk.33.attn_norm.weight": "0251200c24cc8445607ace6dc8c5aa0566567997262b7cca53a11ac23cc564b2",
|
||||
"blk.33.attn_output.weight": "b2423205bdf6a1096d43c44d8d12f1a84fcd4e1bb70fcf6dc8542b8b8a71a13c",
|
||||
"blk.33.attn_q.weight": "00b425c3ef71065ce5e0234e702bf38143b4952da78a85f52ab2c2e3073d97ab",
|
||||
"blk.33.attn_v.weight": "035edd2335df816c42c765a5e66b9d9b9e15a822a8dc1863508145499c942c14",
|
||||
"blk.33.ffn_down.weight": "4894a923a3db75bae4496ba3ce5f28796ad31fe33996a066271fb8654964310e",
|
||||
"blk.33.ffn_gate.weight": "8f6c819b8bbfbe3357fae89e1ac5a3d58be85b3b04be3bacf7b62775869046ff",
|
||||
"blk.33.ffn_up.weight": "257c3544b5b544fd5d839665bf5caf107a329b59dbc3751efcaa24ae63c56179",
|
||||
"blk.34.attn_k.weight": "b6cd8bba892e38dac4a2ebc3ba1bce49e71b967fc436fde30c6d76f54a18935f",
|
||||
"blk.34.attn_norm.weight": "2b3c8e60a064cba9955752bbbbdd92c71ba5c2f1bd721097bdbe88b5abc68787",
|
||||
"blk.34.attn_output.weight": "8cc272551c9aaca9db5a660c6927bab94a0243d74a30b2bc165f06bd577714ea",
|
||||
"blk.34.attn_q.weight": "74b561eb4792484e6a94b58fe2583848c3ae28ff2f1bf3d02939a0cfdfa49990",
|
||||
"blk.34.attn_v.weight": "dba19e24ff05154dc5a1f55c023729303a583d13d68732ce22ea74d4410dc8f0",
|
||||
"blk.34.ffn_down.weight": "76eca5dfeb274c35774e0bf9f22ee420ed9085c8e99aa2cd5a236e4918b44c61",
|
||||
"blk.34.ffn_gate.weight": "9af0862d5fcbc24732846488e653db8242a467765c0cdbc00332b3a40256b4a6",
|
||||
"blk.34.ffn_up.weight": "2a03126bf73587eaba99ece2066103d12e47bcd4ce30ff6c17b2f383b81d40df",
|
||||
"blk.35.attn_k.weight": "52513fc0cd4e997a842729af7d21dd09399bce0a339558374738be266d0fa2f0",
|
||||
"blk.35.attn_norm.weight": "e5281fa911964263ccf1630b14762edbd41d0b9472d6ec695fc600fed4892c35",
|
||||
"blk.35.attn_output.weight": "b391d6705d5dc6f48326b5fd16573f679edf64109d86fb729a498819676590ca",
|
||||
"blk.35.attn_q.weight": "d16446921966db9b0e0539626ad22a2511ace780e59379d6a4162d8c5441440b",
|
||||
"blk.35.attn_v.weight": "9d8cdf23ffdb0c5c74106843390b94b24c9f33ef0eb9998d39f78c73390101ea",
|
||||
"blk.35.ffn_down.weight": "938eb6301f7bbf162d7dd965682a5ed11d0a4a530c6fedd7e5469ce80012fc17",
|
||||
"blk.35.ffn_gate.weight": "5ad84f5a0c8edcfea1ecf1a3e3d21d85ceda0c4ad9e3c6ca68885eeff8ed3c2f",
|
||||
"blk.35.ffn_up.weight": "1c4330d9dc71bf4c98812c34356c51f520f47610a534152aa6d29284b758090d",
|
||||
"blk.36.attn_k.weight": "ef720655e5ca2465f13db2dfc4732fb4ef2c9d53acde52f514fd4f301e974081",
|
||||
"blk.36.attn_norm.weight": "88f4b9310b3c8c2644e3029160cd35678c79dfa59280430e03f5c29a6fe84a58",
|
||||
"blk.36.attn_output.weight": "aec6f915fffd7bb72cd783273e871b4f09605950089d45e72059d1316b6c4b01",
|
||||
"blk.36.attn_q.weight": "72f9408a2405d42f8db6ce5fcf1d26a3660b6f225fc60e77d0277109cfcb82ed",
|
||||
"blk.36.attn_v.weight": "0f3b3d851dc44b3893ef53f6cca5b4acc9658bacfe1cc2d13c3d704ddd409b67",
|
||||
"blk.36.ffn_down.weight": "470aec48ce8c5129a6654d9fd26fcae72776f9fc1429a8bb05818072a876475d",
|
||||
"blk.36.ffn_gate.weight": "7f5f296d09cf55679767b5d15de3eff489c456782119f25204be4b1647f18dcf",
|
||||
"blk.36.ffn_up.weight": "b7ef74a1f7ffb4982711d93f1787be3a70edc3d2358d5203c41d8900508037d4",
|
||||
"blk.37.attn_k.weight": "c4ffa5412e4ff2dcfe1aed991c1f54169fd171a4c7638e4b9f21a1ca64c5e1d6",
|
||||
"blk.37.attn_norm.weight": "4eb6c888d841cccfacf5b963f8611120f6ff24b84af0b5714fd9ab36dcda422f",
|
||||
"blk.37.attn_output.weight": "db2a7bbf9682f9f6eea672dae8e150738f1bf74dbc80edc7022017a3f040c8ac",
|
||||
"blk.37.attn_q.weight": "e38c0462aff139afcbab289189823527e453abc9e541154adde5e7af88cacf0b",
|
||||
"blk.37.attn_v.weight": "952eb2492ed452a72f96bcc12d4b2affad9dfdf46ee39ce4a5d7b57a5dc301e5",
|
||||
"blk.37.ffn_down.weight": "25f23a8fbc44febf6dc4848fd7fe03a580e2822bd3b3b5a51f4990826bfe3e4e",
|
||||
"blk.37.ffn_gate.weight": "707da5eb40118b035305d3262444382351f170a20a537386a70e90c5a83a7817",
|
||||
"blk.37.ffn_up.weight": "d2d2ba5cfc4ef47338dd7384219e22bf030a5a2209e0354d88f5bbaaafd20e87",
|
||||
"blk.38.attn_k.weight": "abc4bb189dedf7ce661e79028427623a4f91ac091c2cd60e31b58bc62b1cda71",
|
||||
"blk.38.attn_norm.weight": "9f4803a7d03fd40fcb83d85f84eb1d5682ea4e5bb084f210c02850675d804c3d",
|
||||
"blk.38.attn_output.weight": "77cb66007f1a41df7135d0e7f900ceb499c2f667dfc3f1a6ac01a3203bbd3ccf",
|
||||
"blk.38.attn_q.weight": "d94a8b26cd375bf2bcaa76597e314aa8268ee50a479d00931e5e0e021feadb5d",
|
||||
"blk.38.attn_v.weight": "660c907888bc5016dc69b7d35fe6f55c7ded697c93be0e2d332a2f17aff88758",
|
||||
"blk.38.ffn_down.weight": "6f06173bae5b00ffaf88ef383619a8b9c6a8d0d5c6494695d17f6c1de1a68a13",
|
||||
"blk.38.ffn_gate.weight": "89f99be149d03f116527bfcabe073c50001c874de40fb6e817f6619027f3cd05",
|
||||
"blk.38.ffn_up.weight": "8d57557c8d5e2d2688b73f01dddf1ce8d5194990cda6358153320aea88aac7f8",
|
||||
"blk.39.attn_k.weight": "21be09c988b46c8393e6c2ec9230f3b5136eb7607dd1953ba92d0811c2f0dd75",
|
||||
"blk.39.attn_norm.weight": "ba7c1912dd1c4e2d16917201f62396fd0600e4a451137eaddff255548c209abd",
|
||||
"blk.39.attn_output.weight": "acfaf4abb3fd27fd899b5563c3877f176b597d8f6cdb2f2fd3f3a0bd4da15ed6",
|
||||
"blk.39.attn_q.weight": "e8adbc140d4c8f0db2a27ca584c5531d5b1e080555fe627e34d80d0814a92bed",
|
||||
"blk.39.attn_v.weight": "92f96b0e1f724e73a0f90a76c145654418844c04a6d4b14c05eb5af8a62bf8dc",
|
||||
"blk.39.ffn_down.weight": "4d9ee7c65fc16fe95d10c47b79ac6a525741947600a64b5fcea5d300a82c50de",
|
||||
"blk.39.ffn_gate.weight": "7e18507989f39b32191133d2657c2ee3b74f42f070579204d727eb72215793d1",
|
||||
"blk.39.ffn_up.weight": "22cda752269c9757ba918abede1df95bb0f83a5c772dea13c8deea3d5f2723d9",
|
||||
"output_norm.weight": "2858cf0e39d32caf52b7861378ace076000241e147f10b9eb21d8a5cd149e3cb"
|
||||
}
|
||||
312
convert/testdata/gemma-2-2b-it.json
vendored
Normal file
312
convert/testdata/gemma-2-2b-it.json
vendored
Normal file
@@ -0,0 +1,312 @@
|
||||
{
|
||||
"general.architecture": "gemma2",
|
||||
"general.file_type": "1",
|
||||
"general.quantization_version": "2",
|
||||
"gemma2.block_count": "26",
|
||||
"gemma2.context_length": "8192",
|
||||
"gemma2.embedding_length": "2304",
|
||||
"gemma2.feed_forward_length": "9216",
|
||||
"gemma2.attention.head_count": "8",
|
||||
"gemma2.attention.head_count_kv": "4",
|
||||
"gemma2.attention.key_length": "256",
|
||||
"gemma2.attention.value_length": "256",
|
||||
"gemma2.attention.layer_norm_rms_epsilon": "1e-06",
|
||||
"tokenizer.ggml.model": "llama",
|
||||
"tokenizer.ggml.add_bos_token": "true",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.bos_token_id": "2",
|
||||
"tokenizer.ggml.eos_token_id": "1",
|
||||
"tokenizer.ggml.padding_token_id": "0",
|
||||
"tokenizer.ggml.unknown_token_id": "3",
|
||||
"tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8",
|
||||
"tokenizer.ggml.token_type": "8d40143b3477df77beea4139420335ede458bf5e14102f01b0170197b55da8d8",
|
||||
"tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda",
|
||||
"token_embd.weight": "64a9d30707e659e2e673656d71f5aef7a9fb9fd83bb9a77558dfc5abbe218a05",
|
||||
"blk.0.attn_k.weight": "d8b4437c5edb3cddf6af9987038e1bb2b191c4f0fce0e160d2abace717f5d5d7",
|
||||
"blk.0.attn_norm.weight": "1eb73e3f7aa8e502f6ca31cd19efbb8e4fd9a89692e13e48ac8205545a7fa7e8",
|
||||
"blk.0.attn_output.weight": "39e7b78e57d356a22dd89ce1c4d7163b970712ba756545e1703f97866cd2192e",
|
||||
"blk.0.attn_q.weight": "795058e23b6109febd9d55c89e1eebe6af0714ec8c56fd86a160876a6135ffe8",
|
||||
"blk.0.attn_v.weight": "0cd6e583d1887c020472e961bbb113fe5a0d23ae2f1c2c876fc366cdb7692b52",
|
||||
"blk.0.ffn_down.weight": "51eb4d962189e945a84e94e0dc1aad3f8f90cc1a11e18029670afcd0ea0acb1b",
|
||||
"blk.0.ffn_gate.weight": "9811a29b8ad48432925897ab21dfcb13c5cbd372aeccbbefca9b7866883b4ce3",
|
||||
"blk.0.ffn_norm.weight": "92cbf4652ef503c1de5b10f2be00b3fcf00100980cb3baa8f3013a8d8bf3d851",
|
||||
"blk.0.ffn_up.weight": "af87de21746879483ed1b374cdd76b19ba11ca2b6dbb1beba98efdf3be3e8077",
|
||||
"blk.0.post_attention_norm.weight": "32e135f1f258ffe407018899e39af1725d59d66d60022b9a21575ba160e0357a",
|
||||
"blk.0.post_ffw_norm.weight": "ba286f5ac11b07fbc986173708c66f1920427be5a6d108af38fa0a837c1c8eb6",
|
||||
"blk.1.attn_k.weight": "51584435552051f7fade76beca582b3f7190cf7fc07adcf527c2774d4b1c3901",
|
||||
"blk.1.attn_norm.weight": "6833104c7fbf35a7e799ae56c262b97fffa14789642aee14381b25acd21ed80a",
|
||||
"blk.1.attn_output.weight": "14c39481369087bf292ac9a3ab2ef166f9fe376a9f90c246653213ef264febdc",
|
||||
"blk.1.attn_q.weight": "443f64ae2229f857c69d6bebb7800b685786cb77884c3ae19d4286aeed081325",
|
||||
"blk.1.attn_v.weight": "0df482de2038f1e4c8a7733ac0ddb69ad90759dab5968b942af0155588de4c4a",
|
||||
"blk.1.ffn_down.weight": "66f30763a8bbbcaea609a0087ed75fadb5e771c06378dd2cea94cf17e492e8cf",
|
||||
"blk.1.ffn_gate.weight": "a7151bff00a545fa18b2c92dcd2a14572ccf9beb957a6c494f1374e8ebe174c9",
|
||||
"blk.1.ffn_norm.weight": "e197d71ea11b5276bc0167d2663b88089b3ff42b47ba91e85f6c5d95f6306435",
|
||||
"blk.1.ffn_up.weight": "57c182e0b14cccd1350d388f0c616991702e74281db54637451b70f4ccc24f9b",
|
||||
"blk.1.post_attention_norm.weight": "3c56f837168d784c2d8bac247c130bdca6610c095c8da4558c536ccad7605609",
|
||||
"blk.1.post_ffw_norm.weight": "d2a51d320fd01069dd7ccaa7082f16a7faeb671885607d7900b10a89c354d0fa",
|
||||
"blk.2.attn_k.weight": "bc103c818192de7ce36caaf89dc117be4df13fb902e0bd9a23c64edace5df9b6",
|
||||
"blk.2.attn_norm.weight": "0f2503aa126083a5d6ac72481be1ef66c6014705b573682b35bd864e4749a3d5",
|
||||
"blk.2.attn_output.weight": "05fcd4a1226e482f91803a266f72caca887a93e63c2d2ba5611ab3c68d38743a",
|
||||
"blk.2.attn_q.weight": "6a10b5c2fd423d1e4c4fd60fa8c154a0159b6b2501ea79cae2ef19f45a674e5e",
|
||||
"blk.2.attn_v.weight": "3cf891945a1f8ae7cc908a5c6b729ff5b70f4436c5ffdbf245cc0ed4cc19cd1b",
|
||||
"blk.2.ffn_down.weight": "ea204fd04e0d2fc728a9861a459216bbfec629c152004ba625f52cd8837bd51e",
|
||||
"blk.2.ffn_gate.weight": "3a3518729f1b8b64a82b8792f33987db5418fdb094be0263c68f146a5c38de54",
|
||||
"blk.2.ffn_norm.weight": "754ede678b725de41a34b82f0edf7688b5c065be7c0d46df6f7ad9430d986884",
|
||||
"blk.2.ffn_up.weight": "ffdcb88439f5828ffbd9fc844b03ff91637b790b9838097258cc3ae75935720c",
|
||||
"blk.2.post_attention_norm.weight": "4b3f53b7ba26e8c36b2dfda3b7e5fc4b1065257cefdea235fc7df9af130ac2fd",
|
||||
"blk.2.post_ffw_norm.weight": "e550369e26b8485e2b54ad34b34bc98af5494287dcc513c2c39cf1eaa5b89d07",
|
||||
"blk.3.attn_k.weight": "89f24ea450e37d9e95757651a83205c085d81b354ee9489dd6310a391d8409f3",
|
||||
"blk.3.attn_norm.weight": "24e2ea662b7cb822b4ca5cd61bc17f2709f406d990ec3b4a0dac1cc112db45cf",
|
||||
"blk.3.attn_output.weight": "ac4dad69473c6e3fac56669212cadd8c34ecc5973d945972e974d94805334967",
|
||||
"blk.3.attn_q.weight": "b6a9c9a7d4722b9096631c65de62228dfddca6e26edfe6af7fce01e116ef0f4c",
|
||||
"blk.3.attn_v.weight": "f272a960a40093942309bc342a379984cbacec2d7bc64428db3f64e6b1887ed4",
|
||||
"blk.3.ffn_down.weight": "c0188ba50d8228805982029c277fc0e87aa57473b8363037c648f6d006ff828a",
|
||||
"blk.3.ffn_gate.weight": "a04aec1561ee6c0fbb18c3db49dc62fb533619cf697fd548cbf2279761aaec3b",
|
||||
"blk.3.ffn_norm.weight": "bc053837d44087ec05eb5d9458357b2a5be787789b19cdbbdc694b57697f99a6",
|
||||
"blk.3.ffn_up.weight": "b3ce8b274f20796d3b1a7c08ba27a919066f9de89a782faa544c4a8d6bea1382",
|
||||
"blk.3.post_attention_norm.weight": "9c922dee7a7df5667289e2788e60170238239cee2dfdbbd9e435763f9f416718",
|
||||
"blk.3.post_ffw_norm.weight": "b682544ac953ad2e0b49027ed8916f2e9d1aba5d1587bb4127ac703570c7a03a",
|
||||
"blk.4.attn_k.weight": "143b0cbb4b787b95c2b6212374410e32173ccef2adb914908a2f89a7916de512",
|
||||
"blk.4.attn_norm.weight": "5668f60491b780273745192662d02c9a92a4f692b29d16aa0bbc7413fec4f85b",
|
||||
"blk.4.attn_output.weight": "b9f2bdb68be1e0cf66dd19f8fa2afb105910ad2ef394864cb32cea8f8944e0d5",
|
||||
"blk.4.attn_q.weight": "ddcf1343dafbc2dfcd0b8741225af22fe4b54b2becce29240bd01c34265d126c",
|
||||
"blk.4.attn_v.weight": "6dc7074366e7ed52d9f48c594dcc85bef738e096276cb99d28228c89eecc5b9c",
|
||||
"blk.4.ffn_down.weight": "30334ffc59ce343cf2a1b973174acb7722823463adc07e19a99bd0f404bc9906",
|
||||
"blk.4.ffn_gate.weight": "890f7c8af208d63b28db52c4b8c16c2288a382d87ff5a6a6d6b0a5b3bf27e6cd",
|
||||
"blk.4.ffn_norm.weight": "ff0316cc7847221eb86a90c1ab441d4ee61553d410c66414a7755021b3b12448",
|
||||
"blk.4.ffn_up.weight": "6af97d113f91564c636734f215e25ee602d48eb045458f300b3ec7582be0f41d",
|
||||
"blk.4.post_attention_norm.weight": "69438f231e105e68216b078bdeb35a7cdc8b12c4e2845e18ecf4c8d361d6a321",
|
||||
"blk.4.post_ffw_norm.weight": "0fd535da78bcf2b32c95b05b2b83dc49817393765be90d8cc1ed3d56f47b68ec",
|
||||
"blk.5.attn_k.weight": "0166eb3c6d20dcf3d3c169e94caa8dee057535bb525e29f698fb6f8844f18a6c",
|
||||
"blk.5.attn_norm.weight": "a7808f27f164023d5cde2be00fc23cac6c71aa0ddeb60bc23e12411b80087672",
|
||||
"blk.5.attn_output.weight": "8b65b2027a0842b68c5308f91d6a31de9599d794157d77df8418b19f9e0d9334",
|
||||
"blk.5.attn_q.weight": "966bc626ef2c2394d872087a41c126bb1b67d1d5f6de920204ef5e5b16c34003",
|
||||
"blk.5.attn_v.weight": "9a362aef3f4437fbf0ef6e1ba785f3329c3db2960f93fe36547d2795e9c254ea",
|
||||
"blk.5.ffn_down.weight": "63e53541d34197720c06f297aa8142ac6b6eec002c7987b296f26e8b1400f931",
|
||||
"blk.5.ffn_gate.weight": "d9591fdd32f783e0fc26e20d5d587ee8971ac8ae2e4c818c6eac1c125c7c7f37",
|
||||
"blk.5.ffn_norm.weight": "677334cc60ecce3a7f4ab3acda15d359353d7358872f614ad8914e3780e9fc6e",
|
||||
"blk.5.ffn_up.weight": "a63764110e1c655ffbd55af0669b2dfe4cc29d0e198d33a8e5426461b08a85f7",
|
||||
"blk.5.post_attention_norm.weight": "c55499f859b2c0a7f5cabceaae47309a5ad38bc29d0f4a8db81f1357023162a9",
|
||||
"blk.5.post_ffw_norm.weight": "82752754665f842418f3e302cb5f43d1e0504dcd124c4b8ddb77018b2c793837",
|
||||
"blk.6.attn_k.weight": "e20a5f0d6c807273c8d491439566b428497ac02097cf0aa55e33748c28e14be6",
|
||||
"blk.6.attn_norm.weight": "2c6ba42fd3c73d72073ced03a32dd28d70a89ed9bbbc8fea1ba03a7ade951e6c",
|
||||
"blk.6.attn_output.weight": "4de7c5c2f4a133a266e17ed8c14c52959466b54cc7ab9e19f789a33b4850f284",
|
||||
"blk.6.attn_q.weight": "56462d921800e6b8cd2213fef04c4ff16d728905cb2f4c58e966d0a053a3b0ae",
|
||||
"blk.6.attn_v.weight": "b758dcbff769d6240c2245ede1dbc62c4170a67c77458e866312589220fe29af",
|
||||
"blk.6.ffn_down.weight": "582247fb3c2bf687cbe9413fe18d18ad47bef4b65df7d78905e10335c6134764",
|
||||
"blk.6.ffn_gate.weight": "3035444d5286aefb7a6d04e55bc27e1fac7cf895cd5be02319a431b8e047b4ae",
|
||||
"blk.6.ffn_norm.weight": "e582d24c66e01b96faa20ce6adfda3d8583b11e809bff89969927398175e369a",
|
||||
"blk.6.ffn_up.weight": "6f4b7bbfedeacf61a4866ae0616c4ba6c9e856662e8f00ae6aaec7f52c53e7b4",
|
||||
"blk.6.post_attention_norm.weight": "8fe51b50bd677d21586aecab0b565c4bf9fa68ad50bfe366f45e8fea3c657ca8",
|
||||
"blk.6.post_ffw_norm.weight": "81ba3cb4c2bf5c546b86855b7a885d3fafededc67eb3a35cd3598b03c9e26e65",
|
||||
"blk.7.attn_k.weight": "2e044179cdcae0946708c86bfea7aa0391e1f7e2a09b33fca035d384cc3ca758",
|
||||
"blk.7.attn_norm.weight": "94b48c546b046803c60e75a3acb17a356b710735989938021b565f68df9b4985",
|
||||
"blk.7.attn_output.weight": "65709b4ad7a581f4d75793d39d4032a359f6bcc0c3835205242a0b99e5b66824",
|
||||
"blk.7.attn_q.weight": "8ded993c95d1f7caf201ceb6fa035cd6ed6d351b50b999fa9355dfee9486cb5b",
|
||||
"blk.7.attn_v.weight": "c92d5e2d2d48397542bc03bea25bf39154075e66c5bb1ead85188505aa04ae91",
|
||||
"blk.7.ffn_down.weight": "e8ba8fb57208805ef1dc23cd7c86e9a2d1fb7c52c3940d292cd5bb2eb24b3fac",
|
||||
"blk.7.ffn_gate.weight": "f0f06d6a2e06c5ac252083bc61d05c814e6289d3f4e4a87d2f06918254c02c36",
|
||||
"blk.7.ffn_norm.weight": "ebf8ef775f72624148e09d68a4332187a7a5020c521fe0623da1cd3485ad33e0",
|
||||
"blk.7.ffn_up.weight": "a554adc4fc7122c247c77670e169916ba1794c787b5be30a2b36705138f1f746",
|
||||
"blk.7.post_attention_norm.weight": "3aa6bc21d85c3a0c12b964e82b12feaedfdd13130c3cd2229228e24e0967ebdf",
|
||||
"blk.7.post_ffw_norm.weight": "508bc7b19ee8ff08f0007c890133a462fc57c7e72b16ee8f6dd64def264ef876",
|
||||
"blk.8.attn_k.weight": "363c8e74056642fe9e7c2f3f9769d57319cd3fa0a6022810189ab8d894322885",
|
||||
"blk.8.attn_norm.weight": "685b49a1f1acb169f4df0bdd8e3de6943f3033cebad14b898a72000595610d92",
|
||||
"blk.8.attn_output.weight": "7bde571e4efef1c6a6143f0526721dfb59e0a0ea0e1a3616a322b2eb937efa48",
|
||||
"blk.8.attn_q.weight": "fc993dbc1074c28a0e1d85e5ab2f4ea6a9c6c1affe7ee56027000a275daed9b6",
|
||||
"blk.8.attn_v.weight": "281e8791d3aef9b3864f1cb054da0ae0c2fef4ce0a58b1bad8bc136b2fa0f62b",
|
||||
"blk.8.ffn_down.weight": "b1164a2578a7f87ed99c2bbc76c5dfbbbc6a1a803605391acc3f320fc989ffd7",
|
||||
"blk.8.ffn_gate.weight": "6b39a3b3aaaa79aee61416b54d62160b9258042650e61c6b47bc77c2dd17daf3",
|
||||
"blk.8.ffn_norm.weight": "17ea1362c72da27f12bc936500492035bdef3fd8f940cb12b57f37d42ba8ecb1",
|
||||
"blk.8.ffn_up.weight": "bc3a7c47afc440d2bdf8fbe9ddf2c9220467472c60c8b4ded8c0f181470ec96c",
|
||||
"blk.8.post_attention_norm.weight": "5c506204e00411ef9c8b4134d40eedcc19fffe68dd0af7d7cc49dcabf2dfac7e",
|
||||
"blk.8.post_ffw_norm.weight": "002faec235c3678864e2901eed275ce4e9dc229164a91c9cd4c965142ba62305",
|
||||
"blk.9.attn_k.weight": "0bab39d8c237f1b6d0010db40467142625a9e6f2e0e4c49a56c12b41e4e0b1fa",
|
||||
"blk.9.attn_norm.weight": "de5f38e873b17f07aa7598831b89cc1cae2c9bc3eb2e042ee9af059d2563e84e",
|
||||
"blk.9.attn_output.weight": "8a8184702c25a62df9ff309c0c7badc8587208523b2be3e8fa90ce7080573e6f",
|
||||
"blk.9.attn_q.weight": "7c961b2431b09ddf95377acd07201cb91bf13d9cd3ae0f2c25c7d6a0358d9f50",
|
||||
"blk.9.attn_v.weight": "e22d240cb4743067033e659cbf210ebe2ebbab3e1dea6ccbe5eaa982382ca038",
|
||||
"blk.9.ffn_down.weight": "a426f81210f03d6ad53277416e1fdcdf37d8065e4817613edaf6c67a343426be",
|
||||
"blk.9.ffn_gate.weight": "a82eba825cb77b8e64f85ff99ede2fc71bc9b01751eeb17e9e6c246ee12ea62e",
|
||||
"blk.9.ffn_norm.weight": "1a97f9b1302a3a326d534c5c3fed2db6db0ae45fd0edd381a3e4fc1c75d81030",
|
||||
"blk.9.ffn_up.weight": "5f20bac2bbf03bb42adb92fbf99561651e1edda57e0b61935ac7f6c08c0ed7cb",
|
||||
"blk.9.post_attention_norm.weight": "9f9866d13988e1946b1e1c80d9374a92a6e3be33748f8eaed3e126d1e1a4c796",
|
||||
"blk.9.post_ffw_norm.weight": "a6896dbf698db4dbbe5dbf12417d4fd80e9cad0c539c858892ec0aa5b046bb58",
|
||||
"blk.10.attn_k.weight": "ca8446e5d21ecd4e6a70dca8d321be480be4fba94d70cba065205436feb44270",
|
||||
"blk.10.attn_norm.weight": "4f41fe290e8f21f63b82151b6cce94bf7318d121468816b0c58af0ff7c1658ab",
|
||||
"blk.10.attn_output.weight": "c626d2e9681c5c941bbde43dddfae1a8d4986bf2be4470857bc8e8bd7f869044",
|
||||
"blk.10.attn_q.weight": "1e61b210a13a429977325cf15d781ab77d604cfa862f4270329cbd94237d5835",
|
||||
"blk.10.attn_v.weight": "8ff8d3e3f058ec3b35ada1057f2ed59c06494d0e0be6a8dc3ff9edf9f0e1a115",
|
||||
"blk.10.ffn_down.weight": "bcebc04219f8081a5f483e58103c0ddbbbc631a0a54fd6dd9d55778e041f70ee",
|
||||
"blk.10.ffn_gate.weight": "7a23a1e620ef871384ddf9611ccdcfb893fbf013cc203ac8e72f745420f1eea0",
|
||||
"blk.10.ffn_norm.weight": "e3a375e43c349a1c6c66c22328e513cc1af3137fe839e43dc8e9be2f65914fd7",
|
||||
"blk.10.ffn_up.weight": "5d182e7c94369194fca5f19cbbe668a999911e57f3d363bc7fb6088428700cb9",
|
||||
"blk.10.post_attention_norm.weight": "b841c6308296e8984f3c5f549c6e3a242f4b3e19141e1f54cc08de9c46759c09",
|
||||
"blk.10.post_ffw_norm.weight": "9d66fa05b5c940208f634f5053d809094c99a2a10a1d1e8847c8281fbd99fb49",
|
||||
"blk.11.attn_k.weight": "14adf24ebb2bb17b336ca81cec3e690fd854782f4440ca6c66cc1d7e7bf1c850",
|
||||
"blk.11.attn_norm.weight": "2d2213f311f50414702b5b34f22aafb9d9a0b6787243e7578562583dc40ad195",
|
||||
"blk.11.attn_output.weight": "de1f14cc2a7fff00cf11b229f0576999205f17b9536e97abc9d6de3cc79a7884",
|
||||
"blk.11.attn_q.weight": "2bcc5c147524003109ece0be08b89ac8b25baa71416ffa76573c6c052ffc6eea",
|
||||
"blk.11.attn_v.weight": "2e6ab8573070c22dc1e0d7aebe4d52123226dacf7822dcce06fadbb38fb036a4",
|
||||
"blk.11.ffn_down.weight": "1b86902f4e36868421e5228b9445051f8290b292df22a6d1af836dcecc1f25c3",
|
||||
"blk.11.ffn_gate.weight": "e756e8081bd0a16aea4a9ef5076ad102113524f7a3d50a3a77aaa7f7938b63e8",
|
||||
"blk.11.ffn_norm.weight": "6913887267be227cf9d1991a3dd8db2e7e74bb9b5fbdfcb9ac954fd7d7b95b3b",
|
||||
"blk.11.ffn_up.weight": "619a3ac0609ebdf42c3fb2b6e4b1db48df79e6dd8418d7ab8f1bbff13d8a6a50",
|
||||
"blk.11.post_attention_norm.weight": "e4b4ba92cef7b6a78407e8ab1b0307d47dac6c3df7b6817e28038317ff662d7e",
|
||||
"blk.11.post_ffw_norm.weight": "40aceeec58cb855f0c158c9cc217168fcd5d0e735567d587217b1d78df17bc5f",
|
||||
"blk.12.attn_k.weight": "c54c5a4d4892522022d1aa2204cfc624f0b4042caa536e678967316293fe5cb1",
|
||||
"blk.12.attn_norm.weight": "7cd2ef58298569ffdf244d9b390f3917245276c8206e5780af5f96d8c0bbb446",
|
||||
"blk.12.attn_output.weight": "85495ef9cc8b3deb21f741bde463ff6493acae2be51f02ecdeef952cbdec3375",
|
||||
"blk.12.attn_q.weight": "d19383f83fd119bfb8c0280c9515705c11d8e7d502019fcf8f49efeef0d106d0",
|
||||
"blk.12.attn_v.weight": "869ac669ba49531d9128892a0e27cef15de508ff40cdf80cc1681dde50d09204",
|
||||
"blk.12.ffn_down.weight": "578f39f8f9fc2f09138afc884a952d7cc3a9a31de4216acd10e88e19e0b75f8c",
|
||||
"blk.12.ffn_gate.weight": "e29a0186bc6c4a0720246306e922d3a83f777dadcf4ac80bad468287031cc8b5",
|
||||
"blk.12.ffn_norm.weight": "e1ee95c6584b5cb57fcf1db8ce2bcc03aff91eb389238c094a61c00dde93d1f2",
|
||||
"blk.12.ffn_up.weight": "2a826f06d7cdfb3edc6ae250ff44363ef77a2a9cdf96313e23a331b99ebfa17d",
|
||||
"blk.12.post_attention_norm.weight": "4bafc7699b948d5cbc0d3e09b418b06c6abc4651a61ada9609d9a2f21c7e5607",
|
||||
"blk.12.post_ffw_norm.weight": "bbb8c34a7176bb1a49f9fe2bacca0bd26b673d52c0835b2e90fa11f2962f077f",
|
||||
"blk.13.attn_k.weight": "ffeefccfe8255d1b694382012ff4134eee5fec9d9491c8d0ff0a13832d1a37e8",
|
||||
"blk.13.attn_norm.weight": "35713726529e3887c4135a88e86e8a4d7270ba5b9f2d1ab462622fbf40a7cdce",
|
||||
"blk.13.attn_output.weight": "0d60b7c5cd71190a9ef4b873b0f516be15447c32d83914db2794b14592b0b460",
|
||||
"blk.13.attn_q.weight": "8296069e65bef794cefc61257fc65789b3cb22955e30f3df129205e5041b2222",
|
||||
"blk.13.attn_v.weight": "ca0f4ab9d16a748fc643a5c0c7a19826a811bf2a4e7316a8c935d4bf0ce8abc6",
|
||||
"blk.13.ffn_down.weight": "d5514e0c8e7b3ed1cbcc1605eb5be1733b6ab3514cf8a0508fc72f7d05ed8bcb",
|
||||
"blk.13.ffn_gate.weight": "8108e517a82e08a3aefbbd267bfa50a1668f92a76273280ce8a6bc1f6dd61521",
|
||||
"blk.13.ffn_norm.weight": "5fcb6132d2134bf1f835b904a99820fa501dbc57d2224129f7098bf3cabc1d36",
|
||||
"blk.13.ffn_up.weight": "6d744b7cd390a3cae3aa350dd379b81246acd056a2259996b6aaadece8465ccc",
|
||||
"blk.13.post_attention_norm.weight": "e08b14698912509790e9575b8676971fbb0a4d82d719367e3756c0d0c4ab8cc0",
|
||||
"blk.13.post_ffw_norm.weight": "2b196e4450fc5f1e7367b2cf7fe33a15fe919fbcdd861d11002346f16e980535",
|
||||
"blk.14.attn_k.weight": "120e5f48d7268dfd9ab5f4bc9cc57a7cec63ea9635f56b80d435eb22936e9483",
|
||||
"blk.14.attn_norm.weight": "146367bcce4db72cc894419a2e0145a6f533507dd68e4739c10ee480308c401f",
|
||||
"blk.14.attn_output.weight": "720fa0165e756876c5cb6ad9e2780dd910390933f3f8849e5add5da04266650b",
|
||||
"blk.14.attn_q.weight": "f5183466f56219ca1aca52d8b82c2d966a4198fea40fdd6b39f4d8b06ca2a6dd",
|
||||
"blk.14.attn_v.weight": "24f8ea3d5512cd37c43c8329cb0da0c90d1895aef763ac2dcee3fe5157ec50a2",
|
||||
"blk.14.ffn_down.weight": "e29960965b384ae5ab3d898a4dbaa8fddd28fa0e477ac28bcac49dec12a5ac67",
|
||||
"blk.14.ffn_gate.weight": "6d0d6a74bfe9692e8f8eedff0fc34fc4fa1c8687794f35f2e2b033ab2d7510b8",
|
||||
"blk.14.ffn_norm.weight": "f7036c1a9a71e046c9d2af16e9218fda5dbb0f7241ab44747abed1f0f9d602ca",
|
||||
"blk.14.ffn_up.weight": "7d69ea1424007ffc9c12247dd0308c616e93ac02a59ec341cfa48f92d6ce3b10",
|
||||
"blk.14.post_attention_norm.weight": "65b9712834d9445d4236bec362f3fb795c20d60c541b3dc6dbb7914d9b493e41",
|
||||
"blk.14.post_ffw_norm.weight": "9c6a8da2e4e437d5cfdf3b9097e9f8b64bf07946a048badec20f4d374613f38f",
|
||||
"blk.15.attn_k.weight": "864bc618303a0e4ee67fb1d5e751de61e936cd51e96669dd86f8cd08f2305045",
|
||||
"blk.15.attn_norm.weight": "f9f4187da6eeadc2fc5921d8fe669741697d16c13d71e4aaeb73b82f50dc577e",
|
||||
"blk.15.attn_output.weight": "ce2419a0b097036b2a31f2f4ad731d5814bcc2ef4c511786e24471e5eefd273b",
|
||||
"blk.15.attn_q.weight": "9539db5a970d11ebe99722d1e13fcd635e250033630811efe583d2f97778e4a9",
|
||||
"blk.15.attn_v.weight": "1c834b48ccd88adaeabb7d8bcb6be0bcd6d5ac1354ce88fc28f19a1a96b81ab3",
|
||||
"blk.15.ffn_down.weight": "bc1f97a65dde6fa2c1e5397afb612266944b343f2eaa868b635ddd25829f8a42",
|
||||
"blk.15.ffn_gate.weight": "1b14529d57056b79037f6cb5008132e62cc35992353b38dda59572274623103b",
|
||||
"blk.15.ffn_norm.weight": "9af77458de9ee55c66f93865759f9c2c398557f94f3fa8fa6af30543d7339cde",
|
||||
"blk.15.ffn_up.weight": "41d524a26b61a9595816b4fd53cf57ef50a702e4ef32933ff6136dca9136a267",
|
||||
"blk.15.post_attention_norm.weight": "c60a03cd0e63a7db5c80015e58e9b97ba2208caa19f66a6fef5c4447eca900ce",
|
||||
"blk.15.post_ffw_norm.weight": "34f7f9f96769215bbc3d17084df091864aef96a6645b7d0b3b7d9bd92f1a4b0b",
|
||||
"blk.16.attn_k.weight": "7e27240d9f3a8c6cf0f4a980113d43234f514eadc3e3e1792b86efb29ffb1a6d",
|
||||
"blk.16.attn_norm.weight": "af798acc0899282a30448edec48223b3e8efda177090273e612d8eca5e377301",
|
||||
"blk.16.attn_output.weight": "79df39a3709d3d53e84146291e0944a7a653d06705293d9ccb5648dceadb432c",
|
||||
"blk.16.attn_q.weight": "db58a1c3b83ad294804e5fd7321005719e200659173466df5a52a182b80b7165",
|
||||
"blk.16.attn_v.weight": "2af6d48cbaeb225b5c1a704f76abd89c8ab1521417695b112b4dcc2cbd39b74d",
|
||||
"blk.16.ffn_down.weight": "fc1c813eb5e7da3d6194569d6cb21602fc6eff2dc8e1b0eb753f2d5df148189c",
|
||||
"blk.16.ffn_gate.weight": "7a80bcbc42464bd55df4814a6edbd7b5c153e0428323bbe49de55e2d2add33e7",
|
||||
"blk.16.ffn_norm.weight": "2041685ee926d30f3f2ae4ec35b5688f1cd834167a6359a7d4057eac804c58b2",
|
||||
"blk.16.ffn_up.weight": "8da4b718973ac1d43b928829bc45e062fd101984d6c98dd825bd7c5d08ebfbe3",
|
||||
"blk.16.post_attention_norm.weight": "975c48fe680a6167438a106140a8872eee7765191f152d80e3b8ddf47693e095",
|
||||
"blk.16.post_ffw_norm.weight": "4de2d4d483acfe4fc77860ea929025df2f4e15c10729413f36a18c94eaa6d689",
|
||||
"blk.17.attn_k.weight": "f937e61f0af8c4cd98ee742648eb60e02e579683e21d421071295a3b70aebaad",
|
||||
"blk.17.attn_norm.weight": "c3270583ed28b7e423f5b170c59113234f258169b93a867d9274f4c10b7cb115",
|
||||
"blk.17.attn_output.weight": "b8c1150e81e685e539a5dcf2c19047a24eba2b281fabe166674b1d71ef4612ea",
|
||||
"blk.17.attn_q.weight": "c255100ae2011e7dc7e3bf3bc3ccd96d859fbb98581cae993d7b82c1ba8e8b39",
|
||||
"blk.17.attn_v.weight": "5830bb0a555984c6485348067f70b5d22ae337c011aa9248dac2ff4c95944551",
|
||||
"blk.17.ffn_down.weight": "8ff9a7cccaa3776434a9d895aae4fb5c36c736bf2ec98784226b4c234940fbb0",
|
||||
"blk.17.ffn_gate.weight": "1b52876739712831c272911533da206f407b46034a1a4ae8a88c1f96b6bd5747",
|
||||
"blk.17.ffn_norm.weight": "d0e16ba5e87c91b545334e022058c7d03849665c3b1a6298771b656531366b66",
|
||||
"blk.17.ffn_up.weight": "4dd6211d01dbebbe21052708eddc242b082a58b5f18ed16479e17987c1d3432e",
|
||||
"blk.17.post_attention_norm.weight": "6f49c775c7417dade77ba8268a0f8441c1e5ec28b5d7e4dc5ed07a04d04600c8",
|
||||
"blk.17.post_ffw_norm.weight": "b91a0bb2e6679e9c9be06ad323adae441d00a3d673efb19d7c4954be2aa84b27",
|
||||
"blk.18.attn_k.weight": "22b565ace1b4da8b33865a58625be1d90beea9891f29686a69fa9cf7c93217db",
|
||||
"blk.18.attn_norm.weight": "3e0160d7063c8753de65d2356a66648e47d921efdc5c917efb8209892120f8db",
|
||||
"blk.18.attn_output.weight": "e3180f0bb4ca90b31e9b08158db38e332de62dfbaefe34aa94cc316409331e09",
|
||||
"blk.18.attn_q.weight": "f3a5a83614c3ba7ea41cdd5b1b0819a241ee2a951a381ce4a9e001d3f700ed8f",
|
||||
"blk.18.attn_v.weight": "f3350a5984fb951fc738adcf78147e6d812ff1c576670c460cafc99c253c1654",
|
||||
"blk.18.ffn_down.weight": "9e9d09b13a33525e14bdaee6efc65c551ac7cf7680e534b940ab122a3a7c1ac9",
|
||||
"blk.18.ffn_gate.weight": "ebaec8b4b578a2e8d815baac12f1675c208f80c68074d5a18288a2e1a60680ee",
|
||||
"blk.18.ffn_norm.weight": "33e7687c53a242f2f8dc7093a491c97b18d4a5a8c14d183f02bd586a770f05aa",
|
||||
"blk.18.ffn_up.weight": "78a1816662378ce56cc870e705174492781897b3afd2d4d97a51f10f2f2987c1",
|
||||
"blk.18.post_attention_norm.weight": "a58dde3f12df3e94cbc27d87c8ea86f89af8a388a506446ff6758f05399b05fc",
|
||||
"blk.18.post_ffw_norm.weight": "cebf90cc143577d483cca27b032dfd82031ee59bdf17c0e2cf60a0a3ad5bf996",
|
||||
"blk.19.attn_k.weight": "4683375d0599ac9e2232196aae1e90af13a14cae26e865465de5c8e257bb2055",
|
||||
"blk.19.attn_norm.weight": "f3eba936bfb1814bbcb0a1d62739eb66daac839df8c9c836fe0e94860df88525",
|
||||
"blk.19.attn_output.weight": "51c0f01d38a9dcfe9bdbc4643576fab164c1d9e4b7168b7695c0ee55e6965667",
|
||||
"blk.19.attn_q.weight": "28d15b69b8416f2e7ddc88fe381cb1e2ef2ad705fb1c268139ba96498cc74848",
|
||||
"blk.19.attn_v.weight": "6860f1cd720638e63a981fa2c0b4db900129826bcb9823c9ddf9fb8b1b9f3383",
|
||||
"blk.19.ffn_down.weight": "bc7f2d7827ee01c2dd41401c7b3b1700ad3a4ff620e8bb734f92630d342dcc7f",
|
||||
"blk.19.ffn_gate.weight": "54d03ef69ba373fc410fbca8f1e34a565d58e4296d9a035ff7e48340b9c848e7",
|
||||
"blk.19.ffn_norm.weight": "9178fc796a340ee6e8128ca74c0cb6203d1adbed6927af4e5ac7863da57affc7",
|
||||
"blk.19.ffn_up.weight": "a77bd708026c6e83ad5c79c223278e74621bcf74a9641c7818d96b595daaad20",
|
||||
"blk.19.post_attention_norm.weight": "ae94aa26f4c411bf9496a6fd4a6df64ee589ee1ae9a04b531d45acc95721e582",
|
||||
"blk.19.post_ffw_norm.weight": "9ad210700edeef12133bdcff04bf1c7f62b49f6f4a9ba483c7cdc59857c24a5c",
|
||||
"blk.20.attn_k.weight": "e35bce1e9f4a7a09ef34721f57ea38cfca68c272f52d923fe50af8308f66cfaa",
|
||||
"blk.20.attn_norm.weight": "644800f6926fd34f233795c4dec1151a295d2138ca8cac33e3e48167d26f8b41",
|
||||
"blk.20.attn_output.weight": "8d3758cd236471741e1ad66c0710cb79077dc8c7a3a292d35bc551c0c5abe627",
|
||||
"blk.20.attn_q.weight": "c333b1f0f6f956b5d73891df10b1a0321e55fc31c40d623a24e1f52caa6a998b",
|
||||
"blk.20.attn_v.weight": "8562b418d0c4868a050fb19fa3fcaf50a8cf1c669f537d666c80c7b3a04714e1",
|
||||
"blk.20.ffn_down.weight": "97efb608ac44cc804198faec3ee66eafe56ced6b7ca5359700c6f1df75b7205e",
|
||||
"blk.20.ffn_gate.weight": "5c61151d86f28415c73c73d90ec088c646cbe5c1640197caf58eb501ba7db293",
|
||||
"blk.20.ffn_norm.weight": "24bbe0a701afd4bbeea65b3edde712b3cbb2281043bbc43dbf250582453116ed",
|
||||
"blk.20.ffn_up.weight": "e170cf68e249566aa99eb6f6b265679bf9a5a6b76830ba24e7e130c2515910c4",
|
||||
"blk.20.post_attention_norm.weight": "e092d751cfe20dbf2d348358f3b38397bd83e4ed94d6bbaa6bbaddcd902b2ac4",
|
||||
"blk.20.post_ffw_norm.weight": "219a18a47dcba76e669e4322223a5a9227bd3db1de3fbd3d3cfb22e54a783c5a",
|
||||
"blk.21.attn_k.weight": "c3a095ebddb42c63824f1c98da65263dc88e4d790a26aa1632840b44f5cc7cb1",
|
||||
"blk.21.attn_norm.weight": "ef8bbaded5fbc45ad9cf3985ae02174524e7090fe6362811124f942ef643bec7",
|
||||
"blk.21.attn_output.weight": "668f018aba72baac6252aa3ad58569ddd55ab751a0dd8d7bcc9fb9b6efb4bf53",
|
||||
"blk.21.attn_q.weight": "e759c65663089f3bbbd51847934c185e680c82f1249065d5d487da638e519e6d",
|
||||
"blk.21.attn_v.weight": "2ff57762686cf9ba1f5a6be76503454b97556ce67f4ac98254bd0562231197ba",
|
||||
"blk.21.ffn_down.weight": "3fd106556fb721b1c28ae3f4026bc83eb1b08ed910f2ba5f466c6b5f327d91cb",
|
||||
"blk.21.ffn_gate.weight": "338022d882f4b6619e8054a6fb909696fa3eef3013cf69b65c3cacdfc5b9e42c",
|
||||
"blk.21.ffn_norm.weight": "1e77660c23a3f9653ee721a863d1960f773d87437cabc4dc0a6e17ee3d4e5e44",
|
||||
"blk.21.ffn_up.weight": "7d31b20fbc2e6eba8f350f170069dc36f0cb12f68fbc4206ec5022a74085ebcb",
|
||||
"blk.21.post_attention_norm.weight": "9638bae8d8bdcd7ed68da282979cd84a07c41ff9cabcaea94ebc846a1803db23",
|
||||
"blk.21.post_ffw_norm.weight": "d622ef11115fe0cbe04b727d5a3b6371e7f39bf08c8d5eb9bc6da52e3f3cfb9d",
|
||||
"blk.22.attn_k.weight": "5c321cb29deffbe57de200dd206a62005f1e80acb86c4fd2349dd44c8d3594fd",
|
||||
"blk.22.attn_norm.weight": "198d949705d7170a331d75889d8c7500c3635254dac2cc6aa4dc35d556584536",
|
||||
"blk.22.attn_output.weight": "19805cd5d7025b457e5d41d70db8b3fd63c2dd0e4a94d3ef1704d50ef4e749e8",
|
||||
"blk.22.attn_q.weight": "177836cd583fc87405975ddc21ebfebdaa090a0363799664c72caa3da851ae2c",
|
||||
"blk.22.attn_v.weight": "fea255692483e30d0108f9e4e250eb3ed7dbda8d83f499b06519b8c223ae6096",
|
||||
"blk.22.ffn_down.weight": "00cb8939f03e5817d6d412de8cf2c923c9568d5493e382cec7faf5718fb034eb",
|
||||
"blk.22.ffn_gate.weight": "b0591065b91281b2fbd8a9567f3568d40479f680e1f0a29e27ae213f37642489",
|
||||
"blk.22.ffn_norm.weight": "96b5c5d0737c2ceb8fc869f54adb9e5f46e28cb7b177c40f49fa926b923c00f8",
|
||||
"blk.22.ffn_up.weight": "81f472185b24344ab0594ea8246cc6e200e0dc1cab4943e74fbe4ca19d5a9701",
|
||||
"blk.22.post_attention_norm.weight": "27fa9aa6260aa3071e0391e1a1d49322dcb6e8072315b8a9b7064087108dbd06",
|
||||
"blk.22.post_ffw_norm.weight": "f37e1dcd7f643d9545675ffe9dc527a11eba86eb204989c2f44f636b266d896a",
|
||||
"blk.23.attn_k.weight": "5d82f36658a56c3f94d0bb2d61f65509c966fa6568f81812e0d3e338b380ef8c",
|
||||
"blk.23.attn_norm.weight": "b7983f88d9cad88bc88a528923e6da592ad20e699965b223ebc10840fe1f4fec",
|
||||
"blk.23.attn_output.weight": "59f97f80f430d71606aab0158a195aed29ccd3405e6c0a5c41c809be8eb01898",
|
||||
"blk.23.attn_q.weight": "53ac4789fe958919cc02ea4222bcd64c0ea1b4baa54304bff46635bdf42f7490",
|
||||
"blk.23.attn_v.weight": "ec8abe09b9e84dbb52c7a068094657c6d3c62fe551ba8d7c3a3f23da622e9756",
|
||||
"blk.23.ffn_down.weight": "3cf547eccb1b82aa64f208cee9682d7f558ca84e0aead7d9d3d1420d90f3d992",
|
||||
"blk.23.ffn_gate.weight": "366aa2486d911ba81eb519119e13807deacf7e9908bc1975a2a63e00d6b10124",
|
||||
"blk.23.ffn_norm.weight": "6d1d4a4af34bb7dc090ac87d6457d398c3e0fb68bd2e2b60b099dc318b6cfac3",
|
||||
"blk.23.ffn_up.weight": "53f76692e253f5d2420b3f200c731b9f3b7a83e379920b4a067c729b4674aa4d",
|
||||
"blk.23.post_attention_norm.weight": "7c952fa0efa76b3f048c8c4c9e8dcb5e3724d231327eda6423a34d3f3d3367de",
|
||||
"blk.23.post_ffw_norm.weight": "7ab188cfe61f0a91b40309a0ab6bfa99f19d0ff2a37b6ac10e5f0c7f44eb5270",
|
||||
"blk.24.attn_k.weight": "225798792f9bfdd10eff0505ebe61e0aad0209c17b431f6044ee7968ffe8c198",
|
||||
"blk.24.attn_norm.weight": "635e3c1ebf5219bbebfc40ef164bc32d2b726ef595a94da64ac524ae878e2915",
|
||||
"blk.24.attn_output.weight": "482f5bb2db8d9ed22b253d9a3296333b239efe698e5992e5d77e7e12dc2a5cf5",
|
||||
"blk.24.attn_q.weight": "43805bbccddb65d58fffc4be9b5c374d4e1df1395ec1e1ffb4bcff03e98d5adb",
|
||||
"blk.24.attn_v.weight": "fa741af54b4a3b1775d32f59134756090c5df2e7345a12a2d8db94fe289667a7",
|
||||
"blk.24.ffn_down.weight": "83c6351e3162626b276f524a57836144625c2556dbe321b57cbd8fd486a68fab",
|
||||
"blk.24.ffn_gate.weight": "fbe66be0d84d12cea5176cc7eaef64382ffc7324cd9d6266a3342dc43442f2ac",
|
||||
"blk.24.ffn_norm.weight": "77c1445a8639ad24938bdf0280233eea2362d47391421833dfa72ec756dfc1e8",
|
||||
"blk.24.ffn_up.weight": "78235ac729ee23c1cf1ae543751e3af32776d8808cee6e529c2a625a1f027654",
|
||||
"blk.24.post_attention_norm.weight": "161f71b6d07628d43e4ae51a4c9088ec6ca2db123a17986a14505d83fdd04dad",
|
||||
"blk.24.post_ffw_norm.weight": "cf1ba692aa683368b02ac413e69b2521b98c69a5274eacbb54165b53bf38a8b2",
|
||||
"blk.25.attn_k.weight": "057a56bd8c8d2b41608d1f71faa3052902152ddf85e47669ad950c1c3e77c33f",
|
||||
"blk.25.attn_norm.weight": "b7179fe02c334da556ddcf6c1b502245639a728c4cbba8b552d8e1df4565ee9d",
|
||||
"blk.25.attn_output.weight": "4fed8b05b08a0ff75ffd022701bbeb52f17b23d09332a1ddcba737244bd0d3b0",
|
||||
"blk.25.attn_q.weight": "c52e99f5d38bf7538d6106a0bbf38ac6dc6296bca9a3f849afa384ea67b4af01",
|
||||
"blk.25.attn_v.weight": "c49c23d8e1cfa6a8eb971eb69942204890c6d7d830dc8774c84b108a80598912",
|
||||
"blk.25.ffn_down.weight": "c08d4dc8412b19fdc870c164b83c341b236ec6fe7bb4a9bcfe0dc100faa20286",
|
||||
"blk.25.ffn_gate.weight": "1a4cb3f36735d59181721471452807903006539e5e1b5ceb4f72d1d7ae134127",
|
||||
"blk.25.ffn_norm.weight": "8fd6bd0dcec5198761525a36992a57c9ec5e9da60a22092839a84ae8c4e87f26",
|
||||
"blk.25.ffn_up.weight": "3a00f39bdd5f31dc5e3b281d2002e1ac4f2475d49a0ac1d7720a25b377dcd04a",
|
||||
"blk.25.post_attention_norm.weight": "e5f31a648612c859b6d21c9ee426e87a86cb1973dfdd86276c767371d9cef5ad",
|
||||
"blk.25.post_ffw_norm.weight": "553c3bd774922c99c2384380a142d019881d30dbf0fe3bf9430dabfb3f6cbd33",
|
||||
"output_norm.weight": "49445c4585ab0a8135717a0bdb1cda4a062a030177d0119561d91542aec5744b"
|
||||
}
|
||||
6
convert/testdata/gemma-2-9b-it.json
vendored
Normal file
6
convert/testdata/gemma-2-9b-it.json
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"general.architecture": "gemma2",
|
||||
"gemma2.attention.sliding_window": "4096",
|
||||
"gemma2.attn_logit_softcapping": "50",
|
||||
"gemma2.final_logit_softcapping": "30"
|
||||
}
|
||||
188
convert/testdata/gemma-2b-it.json
vendored
Normal file
188
convert/testdata/gemma-2b-it.json
vendored
Normal file
@@ -0,0 +1,188 @@
|
||||
{
|
||||
"general.architecture": "gemma",
|
||||
"general.file_type": "1",
|
||||
"general.quantization_version": "2",
|
||||
"gemma.block_count": "18",
|
||||
"gemma.context_length": "8192",
|
||||
"gemma.embedding_length": "2048",
|
||||
"gemma.feed_forward_length": "16384",
|
||||
"gemma.attention.head_count": "8",
|
||||
"gemma.attention.head_count_kv": "1",
|
||||
"gemma.attention.key_length": "256",
|
||||
"gemma.attention.value_length": "256",
|
||||
"gemma.attention.layer_norm_rms_epsilon": "1e-06",
|
||||
"tokenizer.ggml.model": "llama",
|
||||
"tokenizer.ggml.add_bos_token": "true",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.bos_token_id": "2",
|
||||
"tokenizer.ggml.eos_token_id": "1",
|
||||
"tokenizer.ggml.padding_token_id": "0",
|
||||
"tokenizer.ggml.unknown_token_id": "3",
|
||||
"tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8",
|
||||
"tokenizer.ggml.token_type": "485e40bf3d715a4764818fc097d6a2a41db872d82ee714bc500872a3437ff48d",
|
||||
"tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda",
|
||||
"token_embd.weight": "17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07",
|
||||
"blk.0.attn_k.weight": "28ac0da05754ad2714ae95da28a5ad191192140b30b8fd22d108d4700c9d989f",
|
||||
"blk.0.attn_norm.weight": "3f9d5675d1ab0eb8a816719dac9fab81f2e95c52be02c34263339acbc087febb",
|
||||
"blk.0.attn_output.weight": "703295c2c63990ff896778685c678f145298886f680f3ed5dc2a7ad54c293265",
|
||||
"blk.0.attn_q.weight": "69c2d0e4870e9d722a190d356203c9605575a16863466c3d1747966ef1cf5791",
|
||||
"blk.0.attn_v.weight": "95219c9c07b5ffe9a9a01e456d845eef2b11f4fc12c93dbbba479db395444c13",
|
||||
"blk.0.ffn_down.weight": "a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7",
|
||||
"blk.0.ffn_gate.weight": "fcca072c445c31f4dc4d5dfaa785b1bdf7271342442099b74fd17268b5829fbf",
|
||||
"blk.0.ffn_norm.weight": "7621f95dbd245cade6fffd6b08797d69d8e3954e960f0b5551b90d967ab95448",
|
||||
"blk.0.ffn_up.weight": "14a9bcdd451403c67136391e1b6e53b3b1830f00199bd911dbcc56d8749c14f4",
|
||||
"blk.1.attn_k.weight": "c70f73c5df20579cb44d971164b48b5f0d8d5abdb38b381e7a8b880ba12aa406",
|
||||
"blk.1.attn_norm.weight": "88b6b91f93a1ef83425a7c7dc2a2fbd3b22704a04c64a80061df376ac8c33626",
|
||||
"blk.1.attn_output.weight": "f031a537490c452be3b3bb51e6b7949a636405756e160976a1c070a792ea00ee",
|
||||
"blk.1.attn_q.weight": "bdb23214b1cf9cfd30f863a0a5868e52c6809d93b7e8f44df096a94204d9896a",
|
||||
"blk.1.attn_v.weight": "e9bbc0b05f2c872fb1403f8f938cd1612b502229ee401f12593b1164c61acc00",
|
||||
"blk.1.ffn_down.weight": "5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79",
|
||||
"blk.1.ffn_gate.weight": "205085c8c951a5c7543b1495183cd96028fb49f67464b3e9862a2693a6077a33",
|
||||
"blk.1.ffn_norm.weight": "798f354fc85afce9625f5d10093a585a966831698a0560e6c9b97ce659eb4b22",
|
||||
"blk.1.ffn_up.weight": "db92dc5684cb6e90940e13f4d1da555ed20ba4f8cab1e990ddfd7553e2e91315",
|
||||
"blk.2.attn_k.weight": "ef5ce360c4eed6d00d03ca4761e0f8e4b0af4509978468314be14f3d46621044",
|
||||
"blk.2.attn_norm.weight": "6dadbc05dbd0d3fabb4216affa60a3de1378a82d2859dc90b338cbe70f50d455",
|
||||
"blk.2.attn_output.weight": "6bbf87a966f691bbfd7c8d25629aa4e6710107bd431a667434861febb391edc5",
|
||||
"blk.2.attn_q.weight": "4e575c09ae2de417ce9057ce8b073680e860a24aae13a472b68f101b760752e5",
|
||||
"blk.2.attn_v.weight": "cd33f7f01141e9439afdaf2ea1aaced9feaa335e32a58daa136ebd555d4d96f4",
|
||||
"blk.2.ffn_down.weight": "b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f",
|
||||
"blk.2.ffn_gate.weight": "dbb3e1360402e0e369b101995bb686b73f95d4a7673f061be85d64d15dfb0061",
|
||||
"blk.2.ffn_norm.weight": "bfb7980105d8ac9647710454f57a5cdac50598a0f6f4884e16f1d94b00844687",
|
||||
"blk.2.ffn_up.weight": "50ef89339b275a438b664686f6227dd9b6e43853ed6856ec9e33ef4bbd90bda1",
|
||||
"blk.3.attn_k.weight": "be942ea98151434eebcd2c1da4b00e0146152fe524a530689b1fd491cb833d21",
|
||||
"blk.3.attn_norm.weight": "0df2f218daf609c289fb7c60c5f375fa99c0d4e04381ad5a494a19144edd8e20",
|
||||
"blk.3.attn_output.weight": "c2184aaf86aa2cb8f47be49f60b165834e97205f39c6ee1dfd19fd4411a156ce",
|
||||
"blk.3.attn_q.weight": "4f86e2a0a4221c1c84ff9c409ac89893cb95d7208cf65bf1e98e24e01125f991",
|
||||
"blk.3.attn_v.weight": "abfdb8a60c349dadde641d1afc9542025e24fbf41a3238bfa9675e0b1f1e4b68",
|
||||
"blk.3.ffn_down.weight": "58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e",
|
||||
"blk.3.ffn_gate.weight": "776e051f1a0ddd5c4934e69186683a75ca9a3c8c0f61911bba321fed1dd287d2",
|
||||
"blk.3.ffn_norm.weight": "7f380f29335e28be90bfcfae6f6d69fdf5751211b36d2dd62aa5541ed113e4f2",
|
||||
"blk.3.ffn_up.weight": "fc5ae8d488894cbd4951059675468d227da27871d26e925c9941863841c097ee",
|
||||
"blk.4.attn_k.weight": "14833b078cc4c5137bdd5fdc0538047974ca147a99b0282e1b144440c78bc1db",
|
||||
"blk.4.attn_norm.weight": "0a69957d4a15599fb80ad4753558020804925221457d9a5052926754d3768065",
|
||||
"blk.4.attn_output.weight": "887a49b6130fb6297cf10767207c3dd97191b2cf63723449af9c27bca8dbeda0",
|
||||
"blk.4.attn_q.weight": "51fd577b76764824dd6f0d4891c137ebe4736f591b5ca2793c5fff2be49abbde",
|
||||
"blk.4.attn_v.weight": "1a623c43cf9c509d1b7ea0d1a5c04d0af4809665f9f9e93b7d6dba8c5df178fa",
|
||||
"blk.4.ffn_down.weight": "5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec",
|
||||
"blk.4.ffn_gate.weight": "06640f7273764f8ca5df7e386547417916b6cd7d565a8343153113239a94b0a1",
|
||||
"blk.4.ffn_norm.weight": "91a6c6c41b894228e361435ecbc5058dca34d4911a23da5b56de219299c964d3",
|
||||
"blk.4.ffn_up.weight": "d016dac1055e36d6a10b6317e57f98a904709ea892ef3194342f4d2f6326561e",
|
||||
"blk.5.attn_k.weight": "987146afe124131500808cc0da33c06d207433656d41df6e6d8c99118a83bac5",
|
||||
"blk.5.attn_norm.weight": "6b354938966f2608a2fb8d0f5b363ed0d8b0967c2ec8d0abd5c625b413042ded",
|
||||
"blk.5.attn_output.weight": "cdcbfe02c6ff79d5326882b017a02099f5af71beedf6b1b3eb4de01e3a844536",
|
||||
"blk.5.attn_q.weight": "b910d0cff781d3efb42eab0a302f46f286b2de717079175680d5b42bf8c309c8",
|
||||
"blk.5.attn_v.weight": "66d3a279f747412f9f4b0e8abad44540c122ab2e811a7ee74c1f33bc36caade9",
|
||||
"blk.5.ffn_down.weight": "c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653",
|
||||
"blk.5.ffn_gate.weight": "fe96b94109ca141c01f6a04788e20783019ca6ec334aa1f3134810bdb499e557",
|
||||
"blk.5.ffn_norm.weight": "aa7b016e832e7055a36c6e20de58ea1936f995f390401fff1c5fc65906064e49",
|
||||
"blk.5.ffn_up.weight": "555ce27c4873d3375394f38ad3b45e3d8848f9d5642dc1602383d0f0a33c2a14",
|
||||
"blk.6.attn_k.weight": "88280d461db324c4f36475ce396793063e61a27283ec64511b0480890fb5b3b4",
|
||||
"blk.6.attn_norm.weight": "af8f460c411f660d33196286d208f1845fd5a2b45f7b56549a4df31e7515447a",
|
||||
"blk.6.attn_output.weight": "dd9996fb0a256e8375ad3917705258a33fce006bcea0f536caae420a77974d8b",
|
||||
"blk.6.attn_q.weight": "7a4841541191e037cfb9b07930c4d8cab451809658b182f0ada6ccde9615c003",
|
||||
"blk.6.attn_v.weight": "ae81e6a592b64d701a9d40233e986039a56cba8d8d24f61aea93c6393cf3078a",
|
||||
"blk.6.ffn_down.weight": "622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd",
|
||||
"blk.6.ffn_gate.weight": "8389a735c0bd5591010f8ced9805a2a12c749f6df0d3c18ad4d05c2a302e7168",
|
||||
"blk.6.ffn_norm.weight": "621f5346400382474d61358397bd58fb1459b07c53e376e4bca15e08b3f9b3fb",
|
||||
"blk.6.ffn_up.weight": "8d834e4c42f13c251dfee36cf89e12f1bd400680d00d5c2e6cac0459e9ce2f7f",
|
||||
"blk.7.attn_k.weight": "8bd0412de65a3e64901ef8fe6a28c95e116bf39dc9aa22f0126b9d36688e5ea7",
|
||||
"blk.7.attn_norm.weight": "056d8e56be4e87d6dc6f900762f0dc6fde07bfdc50dd85bfc510415e2bba3f3d",
|
||||
"blk.7.attn_output.weight": "27972eda51da53d416ff95aed78149a2c5a287b47d2cd46f2f544ca692ecb3bb",
|
||||
"blk.7.attn_q.weight": "41eca977b9371f7932800c11a9c45b931310196919e2a0651b847703b180fc7f",
|
||||
"blk.7.attn_v.weight": "13c74fd7e07f08883a09fb070a1fe5bbdd2341b4cb8d1cac07c4b637049b5774",
|
||||
"blk.7.ffn_down.weight": "9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10",
|
||||
"blk.7.ffn_gate.weight": "db6bdc3117f910088aaf7db51f2da63ea5bd933de36af5599c215bfb26f7db2b",
|
||||
"blk.7.ffn_norm.weight": "48bb82b49bfc8679a1e77f282ee182d952db7a3c11be7ef9a102ee2ddd8011e2",
|
||||
"blk.7.ffn_up.weight": "feebea87175817a0f3585ec0af09dc873d94c203581ae97a712eb356d3b49efe",
|
||||
"blk.8.attn_k.weight": "d5640ad71b6af68d88e17bf8e7fc26c907d2262605457a84247dd9afc2884d69",
|
||||
"blk.8.attn_norm.weight": "75b850c481a69083ae09d0207ba7317b37c735a39fcf5fef5400e6c84fb1257f",
|
||||
"blk.8.attn_output.weight": "cbd669dbdea2bdd90f9f0cc97566b3dffff3c56cecb4f47290ceef30da83b2d6",
|
||||
"blk.8.attn_q.weight": "9edcb63087a431bac361822497e6ecdaa06d9ea4a1a754e36da7ba9f8db81c7c",
|
||||
"blk.8.attn_v.weight": "3fb72c2c4f95a83626aa3e30062f9450b09ab37c7871e229f18bbc5cf744633c",
|
||||
"blk.8.ffn_down.weight": "bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef",
|
||||
"blk.8.ffn_gate.weight": "d04689c80553edd011d1cbaa5d570fffa7fa91e88b66cf1352d89ab60b72f908",
|
||||
"blk.8.ffn_norm.weight": "e49984183b735b7f2c4e4730c289eed9394056d2e283a00fd83ea0915df31a73",
|
||||
"blk.8.ffn_up.weight": "8fe62a1ce8e847e567add6c6f6bf2922bc467495b5eb4c116b3cb85b85b3b211",
|
||||
"blk.9.attn_k.weight": "d90904959e5004cf0d6e729c6bff18cc33c094798b802473c1ec55ab8d276183",
|
||||
"blk.9.attn_norm.weight": "79277f290cc07411115d8fa138045edf4a17b3416ab2145409cbe8ab829fd4ee",
|
||||
"blk.9.attn_output.weight": "5a21bf2e1f09a81405025f96d4153ffb630158e17269cff8ffff935c38ceb1a7",
|
||||
"blk.9.attn_q.weight": "51b1d0febc3b350945be4504f55afa4347517bde0f710e1a4b88e6b17e71e7c7",
|
||||
"blk.9.attn_v.weight": "aab7e1db0a8b50a03036356791ffce736ab010d15674c96eaef8049d80076054",
|
||||
"blk.9.ffn_down.weight": "cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764",
|
||||
"blk.9.ffn_gate.weight": "dce0e8661c778327bed7f03b6790d26710764188aed9dc746e6e05863891fa57",
|
||||
"blk.9.ffn_norm.weight": "6d41642104f995c77bf31122b13237caebda3e7fcccb1367ce91db36b015e923",
|
||||
"blk.9.ffn_up.weight": "82fe4c67bf24e7b2d6f6e05f7b1234c2bf90c3932951091a9066211b8e15ecbb",
|
||||
"blk.10.attn_k.weight": "f6a9ed8fd8d3229b5d03175c413ffc56a07f2ce7236271986361dd3d8993f9aa",
|
||||
"blk.10.attn_norm.weight": "cebbef89f0326ca8e02df3867a571e4d61c20c2a12f295f98ae590d62bc86010",
|
||||
"blk.10.attn_output.weight": "34f5efb86accb4f06347d83a32558ea8eab3039d128969161a741ebacbb656ff",
|
||||
"blk.10.attn_q.weight": "1e0efe27df2d5d50f7157253ba2cfd436d6781c3dc78ca176d0c16a210b5b763",
|
||||
"blk.10.attn_v.weight": "8f085bf50a2b0f83cd6cdda3c8ef5a9e204a36348ed95871aac725d1f68640cf",
|
||||
"blk.10.ffn_down.weight": "bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b",
|
||||
"blk.10.ffn_gate.weight": "d3df7a1413b1c5adf1a1dcda9e5225a15c89874bae53bb6137ad1ea42fca2d34",
|
||||
"blk.10.ffn_norm.weight": "a1da603b0480471b5ed8e862148cecd5fed918f8304d6933ab0bdb25b8d2fb8f",
|
||||
"blk.10.ffn_up.weight": "bffbba605922e972dc47dda88a0b4659aa52236c76e5fe861a949e6d9a367492",
|
||||
"blk.11.attn_k.weight": "9f31c63d66cd32c29b1eb8bb829d0c8525ce2ae936e0eefdaab6335a2d12a3df",
|
||||
"blk.11.attn_norm.weight": "0bde1a266d8b2e8f202bb7e2e88b19147ca83021901f6d3cae77a4df5548c754",
|
||||
"blk.11.attn_output.weight": "e10725c7cf746ed4a7e472cf7aea6cb564e5db6a1d5197adc980d650a387ccea",
|
||||
"blk.11.attn_q.weight": "05ee758a7d065802630f8c65dca424364c1c8825e389aa33f9405c45e8a50cce",
|
||||
"blk.11.attn_v.weight": "0c3ae7090f11775d24c51120db6e305db6aff706493e7ee123dcab74485ba789",
|
||||
"blk.11.ffn_down.weight": "7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709",
|
||||
"blk.11.ffn_gate.weight": "db76388a023b98097972d354ba1c6a5e26efdeb1c596b9c28bf2cd8f6596975e",
|
||||
"blk.11.ffn_norm.weight": "a38c3ae1b89a68ddc7b72c99c5b28be7fe3787c4fad9904d0c43d64eaf00c474",
|
||||
"blk.11.ffn_up.weight": "13c8142f9cf1eddc658babf978daf3515c4ccc45f849f3e7e3930aa18a8480a0",
|
||||
"blk.12.attn_k.weight": "f03241c36ac87cb57429a2ef22186b8d7d0b590a8b173beb01fa13d93772f3b1",
|
||||
"blk.12.attn_norm.weight": "4568f654e6d65104d586e7c16ba960c83428698ce103022b7e0be15e2884e13b",
|
||||
"blk.12.attn_output.weight": "04867603f82f91e41306e09b33ecda0104b3ee4834061f2c0bbdc8da33c72509",
|
||||
"blk.12.attn_q.weight": "70fe04b9a8e08b6100cc8d6b58bf4cbbad15ca1de82d63baca5d352ba6c4cbae",
|
||||
"blk.12.attn_v.weight": "15cb28db61a86c98687991d7e611bc92a1fcc6007f3432149cfb5fe518a4f65e",
|
||||
"blk.12.ffn_down.weight": "6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7",
|
||||
"blk.12.ffn_gate.weight": "3462a2d8f6b4743b25e24da51b90018ac2858d05ac7e582bcb69063cfdac1104",
|
||||
"blk.12.ffn_norm.weight": "1f96392c1faa34e34ae5dea55a6a86c5aa4c79758952075d53d28de89dd88456",
|
||||
"blk.12.ffn_up.weight": "d22eacc612a7411953d948483c5fb201e11722955ee0754da866e7bec578ac6d",
|
||||
"blk.13.attn_k.weight": "5864977e6b733ea942647d6feed5c76156c48c200649c22e4e11b9e5860e57f3",
|
||||
"blk.13.attn_norm.weight": "87e053535144723db4145aa5402acc54331b7696752d852bb9fc542ff33f0fb5",
|
||||
"blk.13.attn_output.weight": "078145f5ad83f8b14f97a869346f7fd1583b24d1e3edadaa95d3da4242973f8f",
|
||||
"blk.13.attn_q.weight": "3b8caf35504cbc4d1a7dd6e011a95760703b7f71e2218b030b1254f811362dd7",
|
||||
"blk.13.attn_v.weight": "4fdf8365a603e043e5b40c4a21c84ac167f9be62794178f9d8a608dfe5653bf9",
|
||||
"blk.13.ffn_down.weight": "a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd",
|
||||
"blk.13.ffn_gate.weight": "1d2ab39666aa2909acc96787432a3ed13b19d25170f74665fadff9b17bbaffb1",
|
||||
"blk.13.ffn_norm.weight": "4f2e809fda5f3eadf52578ee50e0ba36e53be91e55dce418c12dfe595f5f18e7",
|
||||
"blk.13.ffn_up.weight": "8783d2720c2c37ca176a5801e0b3ef1f9cc9cf3ef1cd37af423aaf6b2a27e2bd",
|
||||
"blk.14.attn_k.weight": "ce9428e2b55d43ae0c6690dbd56182f99adc427694ba8236b405cc8ea5035e86",
|
||||
"blk.14.attn_norm.weight": "6abb35f9db8251d6ae954bda147c6ada2371b0574d11702e828f3c6ac99b7cc0",
|
||||
"blk.14.attn_output.weight": "fe3880916d0ceb5bff672c88bbefb7060a545be609bf049beb2024b38221836d",
|
||||
"blk.14.attn_q.weight": "7c8ad81be6f4a350931fd108b5f7c9e366e8c26ef62d1d85ffef5dca8fd893f8",
|
||||
"blk.14.attn_v.weight": "e4bdedffacbebe38567a0734dfd67db90e911d9a9669fcde9a7c4ad8a0066c52",
|
||||
"blk.14.ffn_down.weight": "ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c",
|
||||
"blk.14.ffn_gate.weight": "db63c4179e2db704bc505e2b4696e055b593e295a1b7c4c586fc793bdd5aab19",
|
||||
"blk.14.ffn_norm.weight": "2796a62d832a9710148f95d533320492a33e712b2e5218659c548705bd11684d",
|
||||
"blk.14.ffn_up.weight": "3f78c78d8c2d54df45f799d4ff902316628af296834afe4ceed63d4a324ff03e",
|
||||
"blk.15.attn_k.weight": "6e810ee3859e07695645ee0c9a5efc7962668984a5f0a9325f47e462743b447c",
|
||||
"blk.15.attn_norm.weight": "0956b576ae96db0b28cb09f761f801cfd9281432284664f0fe181c8d9c55d1ec",
|
||||
"blk.15.attn_output.weight": "03a17f7e94208177aace5cc41b7f54670ba57873b7274ff6e23caf58cce110ca",
|
||||
"blk.15.attn_q.weight": "b8edafe7d2216a6f8b4ae4905a906475490e6ea418f6e1d3cec563dbdc6fab91",
|
||||
"blk.15.attn_v.weight": "f8ae8cae0f4cfa34a459824eba57350c3c248104ba5607e7d9dc7d7c39aaf4a6",
|
||||
"blk.15.ffn_down.weight": "8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b",
|
||||
"blk.15.ffn_gate.weight": "ed5bf361c67db8731f186b775826f21c33bdb521111fd2d922539719a770239f",
|
||||
"blk.15.ffn_norm.weight": "5942ca3c73209ac9a0c8bfd9b4aab7f7be7aee9aa12d9c35833493b44af76767",
|
||||
"blk.15.ffn_up.weight": "f4bebf4ad99ec5f911327dec347be6c595814885309c7bc5647ce28c7f4d1cf5",
|
||||
"blk.16.attn_k.weight": "756a534c19364448e0958b8948fe33891c6ccda0fbb4dfa2024e1f532a87804b",
|
||||
"blk.16.attn_norm.weight": "386b7b9e4e6509f6af9c022d942b6c6c6cc136aeed8751ecb037c74d7c4bfb93",
|
||||
"blk.16.attn_output.weight": "3ba1a766a25830b84d7c22178203635f9c5624caad290bc5e5d73da5d5e7a2ec",
|
||||
"blk.16.attn_q.weight": "d39b0c91e1fda7685d50a0f7cc8d18c44b5bdc90a142c7fda0bc329cca1afa74",
|
||||
"blk.16.attn_v.weight": "98b33fcb0ee3483cff1b06ecb44d7b7ffb4d34c268248e4d73dfdf82b2065b2f",
|
||||
"blk.16.ffn_down.weight": "14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948",
|
||||
"blk.16.ffn_gate.weight": "12f8abae2d301d8f88bedb6af98b1daecc7b0b8d05148594f931f30958d77aca",
|
||||
"blk.16.ffn_norm.weight": "129a15a046ee96d06de288bd43c80f77a6b0fb3a159c7367154c6e4aaf362672",
|
||||
"blk.16.ffn_up.weight": "b4a5911a45f3871ef1d4efb7dc7108645a564b70f818eccf45beebef2e844ee9",
|
||||
"blk.17.attn_k.weight": "5e1bfcff0146ebdde3817b656952892eb671e14e75afc92fa53f84f8eecbec4c",
|
||||
"blk.17.attn_norm.weight": "60bc988fab7c4b29ee9de599df41a8de00caa94fcd74677da011fac82f60f465",
|
||||
"blk.17.attn_output.weight": "ba49b40d6a0b5685f749c24b0edbed3adc44dbe13b5d5e5fa1e56169fc746555",
|
||||
"blk.17.attn_q.weight": "82bb415d24efcd14d03ace03f907bb70db6a204c76a0bdd1892e0fba165db87d",
|
||||
"blk.17.attn_v.weight": "73dbe54beb91a899884e275ea81ffc5187a20cb7d5b68d5c299b783096999d94",
|
||||
"blk.17.ffn_down.weight": "7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3",
|
||||
"blk.17.ffn_gate.weight": "51a32f78244d42a539f619c5ce661db9e6cf41636280a826d439b5444edcd28c",
|
||||
"blk.17.ffn_norm.weight": "c4bb247fccd1ecc84875028af63dd20aaf5cbd17eb94a9bc36679c09285dccab",
|
||||
"blk.17.ffn_up.weight": "b5886182790bc6fbadd63de9bc4ffee416f3b69a66280d197ab8c18edf769abf",
|
||||
"output_norm.weight": "481f3097d0a20412e35b3a739b1b958487bcd41ff67744baa3c9acbddd2ee4d4"
|
||||
}
|
||||
336
convert/tokenizer.go
Normal file
336
convert/tokenizer.go
Normal file
@@ -0,0 +1,336 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"log/slog"
|
||||
"maps"
|
||||
"os"
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
_ int32 = iota
|
||||
tokenTypeNormal
|
||||
tokenTypeUnknown
|
||||
tokenTypeControl
|
||||
tokenTypeUserDefined
|
||||
tokenTypeUnused
|
||||
tokenTypeByte
|
||||
)
|
||||
|
||||
type Tokenizer struct {
|
||||
*Vocabulary
|
||||
SpecialVocabulary []*SpecialVocabulary
|
||||
Merges []string
|
||||
|
||||
Pre string
|
||||
Template string
|
||||
}
|
||||
|
||||
func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error) {
|
||||
v, err := parseVocabulary(fsys)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
t := &Tokenizer{
|
||||
Vocabulary: v,
|
||||
Pre: "default",
|
||||
}
|
||||
|
||||
addedTokens := make(map[string]token)
|
||||
if f, err := fsys.Open("tokenizer.json"); errors.Is(err, os.ErrNotExist) {
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
} else {
|
||||
defer f.Close()
|
||||
|
||||
var tt tokenizer
|
||||
if err := json.NewDecoder(f).Decode(&tt); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, t := range tt.AddedTokens {
|
||||
addedTokens[t.Content] = t
|
||||
}
|
||||
|
||||
if len(tt.Model.Merges) == 0 {
|
||||
// noop; merges is empty
|
||||
} else if err := json.Unmarshal(tt.Model.Merges, &t.Merges); err == nil {
|
||||
// noop; merges is []string
|
||||
} else if merges, err := func() ([][]string, error) {
|
||||
var merges [][]string
|
||||
if err := json.Unmarshal(tt.Model.Merges, &merges); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return merges, nil
|
||||
}(); err == nil {
|
||||
t.Merges = make([]string, len(merges))
|
||||
for i := range merges {
|
||||
t.Merges[i] = strings.Join(merges[i], " ")
|
||||
}
|
||||
} else {
|
||||
return nil, fmt.Errorf("could not parse tokenizer merges. expected []string or [][]string: %w", err)
|
||||
}
|
||||
|
||||
sha256sum := sha256.New()
|
||||
for _, pt := range tt.PreTokenizer.PreTokenizers {
|
||||
switch pt.Type {
|
||||
case "Split":
|
||||
if pt.Pattern.Regex != "" {
|
||||
// create a checksum of all Split pretokenizers which should be sufficient
|
||||
// to identify the pretokenizer
|
||||
sha256sum.Write([]byte(pt.Pattern.Regex))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch digest := hex.EncodeToString(sha256sum.Sum(nil)); digest {
|
||||
case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
|
||||
t.Pre = "llama-bpe"
|
||||
case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
|
||||
t.Pre = "deepseek-llm"
|
||||
case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
|
||||
t.Pre = "deepseek-coder"
|
||||
case "1ff7f41064896984db5d1bb6ff64fa4bc29007d08c1b439e505b7392777a319e":
|
||||
t.Pre = "qwen2"
|
||||
case "00431aed57e696b747435f734d1e3b9b1bfd931a121fb5cac7129e97c181e9ba":
|
||||
t.Pre = "qwen35"
|
||||
case "b92c0824a58e1d8dc3221cf3e12c433c3a86f57e46d57229993489f0798e7702":
|
||||
t.Pre = "laguna"
|
||||
case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855":
|
||||
// noop, empty pretokenizer
|
||||
default:
|
||||
slog.Warn("unknown pretokenizer, using default", "digest", digest)
|
||||
}
|
||||
}
|
||||
|
||||
if f, err := fsys.Open("tokenizer_config.json"); errors.Is(err, os.ErrNotExist) {
|
||||
// noop
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
} else {
|
||||
defer f.Close()
|
||||
|
||||
var p map[string]json.RawMessage
|
||||
if err := json.NewDecoder(f).Decode(&p); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if template, ok := p["chat_template"]; ok {
|
||||
var s []struct {
|
||||
Name string `json:"name"`
|
||||
Template string `json:"template"`
|
||||
}
|
||||
if err := json.Unmarshal(template, &t.Template); err == nil {
|
||||
// noop
|
||||
} else if err := json.Unmarshal(template, &s); err == nil {
|
||||
for _, e := range s {
|
||||
if e.Name == "default" {
|
||||
t.Template = e.Template
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return nil, fmt.Errorf("invalid chat_template: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, st := range specialTokenTypes {
|
||||
sv := SpecialVocabulary{Type: st}
|
||||
if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok {
|
||||
if err := json.Unmarshal(bts, &sv.AddToken); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if bts, ok := p[fmt.Sprintf("%s_token", st)]; ok {
|
||||
var content string
|
||||
if err := json.Unmarshal(bts, &content); err != nil {
|
||||
var mm map[string]any
|
||||
if err := json.Unmarshal(bts, &mm); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
content, ok = mm["content"].(string)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
sv.Content = content
|
||||
}
|
||||
|
||||
if id, ok := addedTokens[sv.Content]; ok {
|
||||
sv.ID = id.ID
|
||||
t.SpecialVocabulary = append(t.SpecialVocabulary, &sv)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if f, err := fsys.Open("generation_config.json"); errors.Is(err, os.ErrNotExist) {
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
} else {
|
||||
defer f.Close()
|
||||
|
||||
var p map[string]json.RawMessage
|
||||
if err := json.NewDecoder(f).Decode(&p); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, st := range specialTokenTypes {
|
||||
if bts, ok := p[fmt.Sprintf("%s_token_id", st)]; ok {
|
||||
var ids []int32
|
||||
if err := json.Unmarshal(bts, &ids); err != nil {
|
||||
// value is not a list so the existing ID is used
|
||||
continue
|
||||
}
|
||||
|
||||
if i := slices.IndexFunc(t.SpecialVocabulary, func(sv *SpecialVocabulary) bool {
|
||||
return sv.Type == st
|
||||
}); i >= 0 {
|
||||
t.SpecialVocabulary[i].IDs = ids
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return t, nil
|
||||
}
|
||||
|
||||
type tokenizer struct {
|
||||
AddedTokens []token `json:"added_tokens"`
|
||||
Model struct {
|
||||
Type string `json:"type"`
|
||||
Vocab map[string]int `json:"vocab"`
|
||||
Merges json.RawMessage `json:"merges"`
|
||||
} `json:"model"`
|
||||
|
||||
PreTokenizer struct {
|
||||
PreTokenizers []struct {
|
||||
Type string `json:"type"`
|
||||
Behavior string `json:"behavior"`
|
||||
Invert bool `json:"invert"`
|
||||
AddPrefixSpace bool `json:"add_prefix_space"`
|
||||
TrimOffsets bool `json:"trim_offsets"`
|
||||
UseRegex bool `json:"use_regex"`
|
||||
Pattern struct {
|
||||
Regex string `json:"Regex"`
|
||||
} `json:"pattern"`
|
||||
} `json:"pretokenizers"`
|
||||
} `json:"pre_tokenizer"`
|
||||
}
|
||||
|
||||
type token struct {
|
||||
ID int `json:"id"`
|
||||
Content string `json:"content"`
|
||||
Special bool `json:"special"`
|
||||
UserDefined bool
|
||||
}
|
||||
|
||||
type Vocabulary struct {
|
||||
Model string
|
||||
Tokens []string
|
||||
Scores []float32
|
||||
Types []int32
|
||||
}
|
||||
|
||||
func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) {
|
||||
f, err := fsys.Open("tokenizer.json")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var t tokenizer
|
||||
if err := json.NewDecoder(f).Decode(&t); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tokens := make(map[int]token, len(t.Model.Vocab))
|
||||
for k, v := range t.Model.Vocab {
|
||||
tokens[v] = token{
|
||||
ID: v,
|
||||
Content: k,
|
||||
}
|
||||
}
|
||||
|
||||
for _, token := range t.AddedTokens {
|
||||
token.UserDefined = true
|
||||
tokens[token.ID] = token
|
||||
}
|
||||
|
||||
v := Vocabulary{Model: "gpt2"}
|
||||
for _, k := range slices.Sorted(maps.Keys(tokens)) {
|
||||
token := tokens[k]
|
||||
v.Tokens = append(v.Tokens, token.Content)
|
||||
v.Scores = append(v.Scores, float32(token.ID))
|
||||
|
||||
switch {
|
||||
case token.Special:
|
||||
v.Types = append(v.Types, tokenTypeControl)
|
||||
case token.UserDefined:
|
||||
v.Types = append(v.Types, tokenTypeUserDefined)
|
||||
default:
|
||||
v.Types = append(v.Types, tokenTypeNormal)
|
||||
}
|
||||
}
|
||||
|
||||
return &v, nil
|
||||
}
|
||||
|
||||
func parseVocabulary(fsys fs.FS) (*Vocabulary, error) {
|
||||
patterns := []struct {
|
||||
Pattern string
|
||||
Func func(fs.FS) (*Vocabulary, error)
|
||||
}{
|
||||
{"tokenizer.model", parseSentencePiece},
|
||||
{"tokenizer.json", parseVocabularyFromTokenizer},
|
||||
}
|
||||
|
||||
for _, pattern := range patterns {
|
||||
if _, err := fs.Stat(fsys, pattern.Pattern); errors.Is(err, os.ErrNotExist) {
|
||||
continue
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return pattern.Func(fsys)
|
||||
}
|
||||
|
||||
return nil, errors.New("unknown tokenizer format")
|
||||
}
|
||||
|
||||
type SpecialVocabulary struct {
|
||||
Type string
|
||||
ID int
|
||||
Content string
|
||||
AddToken bool
|
||||
|
||||
// IDs is populated by generation_config.json
|
||||
IDs []int32
|
||||
}
|
||||
|
||||
func (sv SpecialVocabulary) Key() string {
|
||||
switch t := sv.Type; t {
|
||||
case "bos", "eos", "cls", "mask":
|
||||
return t
|
||||
case "unk":
|
||||
return "unknown"
|
||||
case "sep":
|
||||
//nolint:misspell // this is an upstream typo
|
||||
return "seperator"
|
||||
case "pad":
|
||||
return "padding"
|
||||
}
|
||||
|
||||
panic("unknown special vocabulary type")
|
||||
}
|
||||
172
convert/tokenizer_spm.go
Normal file
172
convert/tokenizer_spm.go
Normal file
@@ -0,0 +1,172 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"log/slog"
|
||||
"os"
|
||||
"reflect"
|
||||
"slices"
|
||||
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
"github.com/ollama/ollama/convert/sentencepiece"
|
||||
)
|
||||
|
||||
func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
|
||||
slog.Debug("using spm vocabulary")
|
||||
|
||||
ast, err := parseAdditionalSpecialTokens(fsys)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
bts, err := fs.ReadFile(fsys, "tokenizer.model")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var spm sentencepiece.ModelProto
|
||||
if err := proto.Unmarshal(bts, &spm); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
v := Vocabulary{Model: "llama"}
|
||||
for _, piece := range spm.GetPieces() {
|
||||
v.Tokens = append(v.Tokens, piece.GetPiece())
|
||||
v.Scores = append(v.Scores, piece.GetScore())
|
||||
|
||||
switch t := piece.GetType(); t {
|
||||
case sentencepiece.ModelProto_SentencePiece_UNKNOWN,
|
||||
sentencepiece.ModelProto_SentencePiece_CONTROL,
|
||||
sentencepiece.ModelProto_SentencePiece_UNUSED,
|
||||
sentencepiece.ModelProto_SentencePiece_BYTE:
|
||||
v.Types = append(v.Types, int32(t))
|
||||
default:
|
||||
tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)
|
||||
|
||||
// temporary fix to handle gemma3 broken configs
|
||||
// TODO(parthsareen): allow reading of tokenizer.json to allow managing special tokens when using spm
|
||||
if slices.Contains([]string{"<end_of_turn>", "<start_of_turn>", "<start_function_declaration>", "<end_function_declaration>", "<start_function_call>", "<end_function_call>", "<start_function_response>", "<end_function_response>", "<escape>"}, piece.GetPiece()) {
|
||||
tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
|
||||
}
|
||||
|
||||
for _, t := range ast {
|
||||
if t.Content == piece.GetPiece() {
|
||||
tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
v.Types = append(v.Types, tt)
|
||||
}
|
||||
}
|
||||
|
||||
f, err := fsys.Open("added_tokens.json")
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return &v, nil
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var atm map[string]int
|
||||
if err := json.NewDecoder(f).Decode(&atm); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
type t struct {
|
||||
id int
|
||||
content string
|
||||
}
|
||||
|
||||
var ts []t
|
||||
for content, id := range atm {
|
||||
ts = append(ts, t{id, content})
|
||||
}
|
||||
|
||||
slices.SortFunc(ts, func(i, j t) int {
|
||||
return cmp.Compare(i.id, j.id)
|
||||
})
|
||||
|
||||
for _, t := range ts {
|
||||
if t.id < len(v.Tokens) {
|
||||
if v.Tokens[t.id] == t.content {
|
||||
slog.Warn("tokenizer", "duplicate token", t.content, "id", t.id)
|
||||
continue
|
||||
}
|
||||
return nil, fmt.Errorf("token mismatch: %s != %s at pos [%d]", t.content, v.Tokens[t.id], t.id)
|
||||
}
|
||||
if t.id != len(v.Tokens) {
|
||||
return nil, fmt.Errorf("invalid token id: [%d] as pos [%d]", t.id, len(v.Tokens))
|
||||
}
|
||||
|
||||
v.Tokens = append(v.Tokens, t.content)
|
||||
v.Scores = append(v.Scores, -1000.0)
|
||||
v.Types = append(v.Types, tokenTypeUserDefined)
|
||||
}
|
||||
|
||||
return &v, nil
|
||||
}
|
||||
|
||||
type specialToken struct {
|
||||
Content string `json:"content"`
|
||||
Lstrip bool `json:"lstrip"`
|
||||
Normalized bool `json:"normalized"`
|
||||
Rstrip bool `json:"rstrip"`
|
||||
SingleWord bool `json:"single_word"`
|
||||
}
|
||||
|
||||
func parseAdditionalSpecialTokens(fsys fs.FS) ([]specialToken, error) {
|
||||
f, err := fsys.Open("special_tokens_map.json")
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return nil, nil
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var m struct {
|
||||
AdditionalSpecialTokens any `json:"additional_special_tokens"`
|
||||
}
|
||||
|
||||
if err := json.NewDecoder(f).Decode(&m); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ast []specialToken
|
||||
|
||||
switch st := m.AdditionalSpecialTokens.(type) {
|
||||
case []string:
|
||||
for _, s := range st {
|
||||
ast = append(ast, specialToken{Content: s})
|
||||
}
|
||||
case []any:
|
||||
for _, s := range st {
|
||||
// marshal and unmarshal the object to get the special token
|
||||
tMap := s.(map[string]any)
|
||||
data, err := json.Marshal(tMap)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var token specialToken
|
||||
err = json.Unmarshal(data, &token)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ast = append(ast, token)
|
||||
}
|
||||
|
||||
default:
|
||||
slog.Warn("special token", "unknown token", reflect.TypeOf(st))
|
||||
}
|
||||
|
||||
slog.Debug("spm tokenizer", "additional tokens", ast)
|
||||
|
||||
return ast, nil
|
||||
}
|
||||
425
convert/tokenizer_test.go
Normal file
425
convert/tokenizer_test.go
Normal file
@@ -0,0 +1,425 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func createTokenizerFS(t *testing.T, dir string, files map[string]io.Reader) fs.FS {
|
||||
t.Helper()
|
||||
|
||||
for k, v := range files {
|
||||
if err := func() error {
|
||||
f, err := os.Create(filepath.Join(dir, k))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if _, err := io.Copy(f, v); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}(); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return os.DirFS(dir)
|
||||
}
|
||||
|
||||
func TestParseTokenizer(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
fsys fs.FS
|
||||
specialTokenTypes []string
|
||||
want *Tokenizer
|
||||
}{
|
||||
{
|
||||
name: "string chat template",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{}`),
|
||||
"tokenizer_config.json": strings.NewReader(`{
|
||||
"chat_template": "<default template>"
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{Model: "gpt2"},
|
||||
Pre: "default",
|
||||
Template: "<default template>",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "list chat template",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{}`),
|
||||
"tokenizer_config.json": strings.NewReader(`{
|
||||
"chat_template": [
|
||||
{
|
||||
"name": "default",
|
||||
"template": "<default template>"
|
||||
},
|
||||
{
|
||||
"name": "tools",
|
||||
"template": "<tools template>"
|
||||
}
|
||||
]
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{Model: "gpt2"},
|
||||
Pre: "default",
|
||||
Template: "<default template>",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "added tokens",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"added_tokens": [
|
||||
{
|
||||
"id": 999,
|
||||
"content": "<unused999>",
|
||||
"special": false
|
||||
}
|
||||
]
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
Tokens: []string{"<unused999>"},
|
||||
Scores: []float32{999},
|
||||
Types: []int32{4},
|
||||
},
|
||||
Pre: "default",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "added tokens overlap vocab",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"added_tokens": [
|
||||
{
|
||||
"id": 0,
|
||||
"content": "<pad>",
|
||||
"special": true
|
||||
}
|
||||
],
|
||||
"model": {
|
||||
"vocab": {
|
||||
"<pad>": 0
|
||||
}
|
||||
}
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
Tokens: []string{"<pad>"},
|
||||
Scores: []float32{0},
|
||||
Types: []int32{3},
|
||||
},
|
||||
Pre: "default",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "special token types",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"added_tokens": [
|
||||
{
|
||||
"id": 0,
|
||||
"content": "<pad>",
|
||||
"special": true
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"content": "<eos>",
|
||||
"special": true
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "<bos>",
|
||||
"special": true
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "<unk>",
|
||||
"special": true
|
||||
}
|
||||
],
|
||||
"model": {
|
||||
"vocab": {
|
||||
"<pad>": 0,
|
||||
"<eos>": 1,
|
||||
"<bos>": 2,
|
||||
"<unk>": 3
|
||||
}
|
||||
}
|
||||
}`),
|
||||
"tokenizer_config.json": strings.NewReader(`{
|
||||
"add_bos_token": true,
|
||||
"add_eos_token": false,
|
||||
"bos_token": "<bos>",
|
||||
"eos_token": "<eos>",
|
||||
"pad_token": "<pad>",
|
||||
"unk_token": "<unk>"
|
||||
}`),
|
||||
}),
|
||||
specialTokenTypes: []string{"pad", "eos", "bos", "unk"},
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
Tokens: []string{"<pad>", "<eos>", "<bos>", "<unk>"},
|
||||
Scores: []float32{0, 1, 2, 3},
|
||||
Types: []int32{3, 3, 3, 3},
|
||||
},
|
||||
SpecialVocabulary: []*SpecialVocabulary{
|
||||
{Type: "pad", Content: "<pad>", ID: 0, AddToken: false},
|
||||
{Type: "eos", Content: "<eos>", ID: 1, AddToken: false},
|
||||
{Type: "bos", Content: "<bos>", ID: 2, AddToken: true},
|
||||
{Type: "unk", Content: "<unk>", ID: 3, AddToken: false},
|
||||
},
|
||||
Pre: "default",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "llama-bpe pretokenizer and control tokens",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"added_tokens": [
|
||||
{"id": 1, "content": "<|startoftext|>", "special": true},
|
||||
{"id": 6, "content": "<|im_start|>", "special": true},
|
||||
{"id": 7, "content": "<|im_end|>", "special": true},
|
||||
{"id": 8, "content": "<|tool_list_start|>", "special": true},
|
||||
{"id": 9, "content": "<|tool_list_end|>", "special": true},
|
||||
{"id": 10, "content": "<|tool_call_start|>", "special": true},
|
||||
{"id": 11, "content": "<|tool_call_end|>", "special": true},
|
||||
{"id": 12, "content": "<|tool_response_start|>", "special": true},
|
||||
{"id": 13, "content": "<|tool_response_end|>", "special": true},
|
||||
{"id": 396, "content": "<image>", "special": true},
|
||||
{"id": 64400, "content": "<think>", "special": true},
|
||||
{"id": 64401, "content": "</think>", "special": true}
|
||||
],
|
||||
"model": {
|
||||
"vocab": {
|
||||
"<|startoftext|>": 1,
|
||||
"<|im_start|>": 6,
|
||||
"<|im_end|>": 7,
|
||||
"<|tool_list_start|>": 8,
|
||||
"<|tool_list_end|>": 9,
|
||||
"<|tool_call_start|>": 10,
|
||||
"<|tool_call_end|>": 11,
|
||||
"<|tool_response_start|>": 12,
|
||||
"<|tool_response_end|>": 13,
|
||||
"<image>": 396,
|
||||
"<think>": 64400,
|
||||
"</think>": 64401
|
||||
}
|
||||
},
|
||||
"pre_tokenizer": {
|
||||
"type": "Sequence",
|
||||
"pretokenizers": [
|
||||
{
|
||||
"type": "Split",
|
||||
"pattern": {
|
||||
"Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
||||
},
|
||||
"behavior": "Isolated",
|
||||
"invert": false
|
||||
},
|
||||
{
|
||||
"type": "ByteLevel",
|
||||
"add_prefix_space": false,
|
||||
"trim_offsets": true,
|
||||
"use_regex": false
|
||||
}
|
||||
]
|
||||
}
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
Tokens: []string{
|
||||
"<|startoftext|>",
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|tool_list_start|>",
|
||||
"<|tool_list_end|>",
|
||||
"<|tool_call_start|>",
|
||||
"<|tool_call_end|>",
|
||||
"<|tool_response_start|>",
|
||||
"<|tool_response_end|>",
|
||||
"<image>",
|
||||
"<think>",
|
||||
"</think>",
|
||||
},
|
||||
Scores: []float32{1, 6, 7, 8, 9, 10, 11, 12, 13, 396, 64400, 64401},
|
||||
Types: []int32{3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
|
||||
},
|
||||
Pre: "llama-bpe",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "list string merges",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"model": {
|
||||
"merges": [
|
||||
"a b",
|
||||
"c d",
|
||||
"e f"
|
||||
]
|
||||
}
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
},
|
||||
Merges: []string{
|
||||
"a b",
|
||||
"c d",
|
||||
"e f",
|
||||
},
|
||||
Pre: "default",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "list list string merges",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"model": {
|
||||
"merges": [
|
||||
[
|
||||
"a", "b"
|
||||
],
|
||||
[
|
||||
"c", "d"
|
||||
],
|
||||
[
|
||||
"e", "f"
|
||||
]
|
||||
]
|
||||
}
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
},
|
||||
Merges: []string{
|
||||
"a b",
|
||||
"c d",
|
||||
"e f",
|
||||
},
|
||||
Pre: "default",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "generation config eos token ids",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"added_tokens": [
|
||||
{
|
||||
"id": 0,
|
||||
"content": "<bos>",
|
||||
"special": true
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"content": "<eos>",
|
||||
"special": true
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "<eot>",
|
||||
"special": true
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "<eom>",
|
||||
"special": true
|
||||
}
|
||||
],
|
||||
"model": {
|
||||
"vocab": {
|
||||
"<bos>": 0,
|
||||
"<eos>": 1,
|
||||
"<eot>": 2,
|
||||
"<eom>": 3
|
||||
}
|
||||
}
|
||||
}`),
|
||||
"tokenizer_config.json": strings.NewReader(`{
|
||||
"add_bos_token": true,
|
||||
"add_eos_token": false,
|
||||
"bos_token": "<bos>",
|
||||
"eos_token": "<eos>"
|
||||
}`),
|
||||
"generation_config.json": strings.NewReader(`{
|
||||
"bos_token_id": 0,
|
||||
"eos_token_id": [1, 2, 3]
|
||||
}`),
|
||||
}),
|
||||
specialTokenTypes: []string{"pad", "eos", "bos", "unk"},
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
Tokens: []string{"<bos>", "<eos>", "<eot>", "<eom>"},
|
||||
Scores: []float32{0, 1, 2, 3},
|
||||
Types: []int32{3, 3, 3, 3},
|
||||
},
|
||||
SpecialVocabulary: []*SpecialVocabulary{
|
||||
{Type: "eos", Content: "<eos>", ID: 1, IDs: []int32{1, 2, 3}, AddToken: false},
|
||||
{Type: "bos", Content: "<bos>", ID: 0, AddToken: true},
|
||||
},
|
||||
Pre: "default",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "qwen35 pretokenizer",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"pre_tokenizer": {
|
||||
"type": "Sequence",
|
||||
"pretokenizers": [
|
||||
{
|
||||
"type": "Split",
|
||||
"pattern": {
|
||||
"Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{Model: "gpt2"},
|
||||
Pre: "qwen35",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tokenizer, err := parseTokenizer(tt.fsys, tt.specialTokenTypes)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(tt.want, tokenizer); diff != "" {
|
||||
t.Errorf("unexpected tokenizer (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user