ollama source for Momentry Core verification
This commit is contained in:
242
discover/cpu_linux.go
Normal file
242
discover/cpu_linux.go
Normal file
@@ -0,0 +1,242 @@
|
||||
package discover
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
)
|
||||
|
||||
func GetCPUMem() (memInfo, error) {
|
||||
mem, err := getCPUMem()
|
||||
if err != nil {
|
||||
return memInfo{}, err
|
||||
}
|
||||
return getCPUMemByCgroups(mem), nil
|
||||
}
|
||||
|
||||
func getCPUMem() (memInfo, error) {
|
||||
var mem memInfo
|
||||
var total, available, free, buffers, cached, freeSwap uint64
|
||||
f, err := os.Open("/proc/meminfo")
|
||||
if err != nil {
|
||||
return mem, err
|
||||
}
|
||||
defer f.Close()
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
line := s.Text()
|
||||
switch {
|
||||
case strings.HasPrefix(line, "MemTotal:"):
|
||||
_, err = fmt.Sscanf(line, "MemTotal:%d", &total)
|
||||
case strings.HasPrefix(line, "MemAvailable:"):
|
||||
_, err = fmt.Sscanf(line, "MemAvailable:%d", &available)
|
||||
case strings.HasPrefix(line, "MemFree:"):
|
||||
_, err = fmt.Sscanf(line, "MemFree:%d", &free)
|
||||
case strings.HasPrefix(line, "Buffers:"):
|
||||
_, err = fmt.Sscanf(line, "Buffers:%d", &buffers)
|
||||
case strings.HasPrefix(line, "Cached:"):
|
||||
_, err = fmt.Sscanf(line, "Cached:%d", &cached)
|
||||
case strings.HasPrefix(line, "SwapFree:"):
|
||||
_, err = fmt.Sscanf(line, "SwapFree:%d", &freeSwap)
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
return mem, err
|
||||
}
|
||||
}
|
||||
mem.TotalMemory = total * format.KibiByte
|
||||
mem.FreeSwap = freeSwap * format.KibiByte
|
||||
if available > 0 {
|
||||
mem.FreeMemory = available * format.KibiByte
|
||||
} else {
|
||||
mem.FreeMemory = (free + buffers + cached) * format.KibiByte
|
||||
}
|
||||
return mem, nil
|
||||
}
|
||||
|
||||
func getCPUMemByCgroups(mem memInfo) memInfo {
|
||||
total, err := getUint64ValueFromFile("/sys/fs/cgroup/memory.max")
|
||||
if err == nil {
|
||||
mem.TotalMemory = total
|
||||
}
|
||||
used, err := getUint64ValueFromFile("/sys/fs/cgroup/memory.current")
|
||||
if err == nil {
|
||||
mem.FreeMemory = mem.TotalMemory - used
|
||||
}
|
||||
return mem
|
||||
}
|
||||
|
||||
func getUint64ValueFromFile(path string) (uint64, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
line := s.Text()
|
||||
return strconv.ParseUint(line, 10, 64)
|
||||
}
|
||||
return 0, errors.New("empty file content")
|
||||
}
|
||||
|
||||
const CpuInfoFilename = "/proc/cpuinfo"
|
||||
|
||||
type linuxCpuInfo struct {
|
||||
ID string `cpuinfo:"processor"`
|
||||
VendorID string `cpuinfo:"vendor_id"`
|
||||
ModelName string `cpuinfo:"model name"`
|
||||
PhysicalID string `cpuinfo:"physical id"`
|
||||
Siblings string `cpuinfo:"siblings"`
|
||||
CoreID string `cpuinfo:"core id"`
|
||||
}
|
||||
|
||||
func GetCPUDetails() []CPU {
|
||||
file, err := os.Open(CpuInfoFilename)
|
||||
if err != nil {
|
||||
slog.Warn("failed to get CPU details", "error", err)
|
||||
return nil
|
||||
}
|
||||
defer file.Close()
|
||||
cpus := linuxCPUDetails(file)
|
||||
return overwriteThreadCountByLinuxCgroups(cpus)
|
||||
}
|
||||
|
||||
func overwriteThreadCountByLinuxCgroups(cpus []CPU) []CPU {
|
||||
file, err := os.Open("/sys/fs/cgroup/cpu.max")
|
||||
if err != nil {
|
||||
return cpus
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if sl := strings.Split(line, " "); len(sl) == 2 {
|
||||
allowdUs, err := strconv.ParseInt(sl[0], 10, 64)
|
||||
if err != nil {
|
||||
slog.Warn("failed to parse CPU allowed micro secs", "error", err)
|
||||
return cpus
|
||||
}
|
||||
unitUs, err := strconv.ParseInt(sl[1], 10, 64)
|
||||
if err != nil {
|
||||
slog.Warn("failed to parse CPU unit micro secs", "error", err)
|
||||
return cpus
|
||||
}
|
||||
|
||||
threads := int(max(allowdUs/unitUs, 1))
|
||||
|
||||
cpu := cpus[0]
|
||||
cpu.CoreCount = threads
|
||||
cpu.ThreadCount = threads
|
||||
return []CPU{cpu}
|
||||
}
|
||||
}
|
||||
return cpus
|
||||
}
|
||||
|
||||
func linuxCPUDetails(file io.Reader) []CPU {
|
||||
reColumns := regexp.MustCompile("\t+: ")
|
||||
scanner := bufio.NewScanner(file)
|
||||
cpuInfos := []linuxCpuInfo{}
|
||||
cpu := &linuxCpuInfo{}
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if sl := reColumns.Split(line, 2); len(sl) > 1 {
|
||||
t := reflect.TypeOf(cpu).Elem()
|
||||
s := reflect.ValueOf(cpu).Elem()
|
||||
for i := range t.NumField() {
|
||||
field := t.Field(i)
|
||||
tag := field.Tag.Get("cpuinfo")
|
||||
if tag == sl[0] {
|
||||
s.FieldByName(field.Name).SetString(sl[1])
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if strings.TrimSpace(line) == "" && cpu.ID != "" {
|
||||
cpuInfos = append(cpuInfos, *cpu)
|
||||
cpu = &linuxCpuInfo{}
|
||||
}
|
||||
}
|
||||
if cpu.ID != "" {
|
||||
cpuInfos = append(cpuInfos, *cpu)
|
||||
}
|
||||
|
||||
// Process the sockets/cores/threads
|
||||
socketByID := map[string]*CPU{}
|
||||
coreBySocket := map[string]map[string]struct{}{}
|
||||
threadsByCoreBySocket := map[string]map[string]int{}
|
||||
for _, c := range cpuInfos {
|
||||
if _, found := socketByID[c.PhysicalID]; !found {
|
||||
socketByID[c.PhysicalID] = &CPU{
|
||||
ID: c.PhysicalID,
|
||||
VendorID: c.VendorID,
|
||||
ModelName: c.ModelName,
|
||||
}
|
||||
coreBySocket[c.PhysicalID] = map[string]struct{}{}
|
||||
threadsByCoreBySocket[c.PhysicalID] = map[string]int{}
|
||||
}
|
||||
if c.CoreID != "" {
|
||||
coreBySocket[c.PhysicalID][c.PhysicalID+":"+c.CoreID] = struct{}{}
|
||||
threadsByCoreBySocket[c.PhysicalID][c.PhysicalID+":"+c.CoreID]++
|
||||
} else {
|
||||
coreBySocket[c.PhysicalID][c.PhysicalID+":"+c.ID] = struct{}{}
|
||||
threadsByCoreBySocket[c.PhysicalID][c.PhysicalID+":"+c.ID]++
|
||||
}
|
||||
}
|
||||
|
||||
// Tally up the values from the tracking maps
|
||||
for id, s := range socketByID {
|
||||
s.CoreCount = len(coreBySocket[id])
|
||||
s.ThreadCount = 0
|
||||
|
||||
// This only works if HT is enabled, consider a more reliable model, maybe cache size comparisons?
|
||||
efficiencyCoreCount := 0
|
||||
for _, threads := range threadsByCoreBySocket[id] {
|
||||
s.ThreadCount += threads
|
||||
if threads == 1 {
|
||||
efficiencyCoreCount++
|
||||
}
|
||||
}
|
||||
if efficiencyCoreCount == s.CoreCount {
|
||||
// 1:1 mapping means they're not actually efficiency cores, but regular cores
|
||||
s.EfficiencyCoreCount = 0
|
||||
} else {
|
||||
s.EfficiencyCoreCount = efficiencyCoreCount
|
||||
}
|
||||
}
|
||||
keys := make([]string, 0, len(socketByID))
|
||||
result := make([]CPU, 0, len(socketByID))
|
||||
for k := range socketByID {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
for _, k := range keys {
|
||||
result = append(result, *socketByID[k])
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func IsNUMA() bool {
|
||||
ids := map[string]any{}
|
||||
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
|
||||
for _, packageId := range packageIds {
|
||||
id, err := os.ReadFile(packageId)
|
||||
if err == nil {
|
||||
ids[strings.TrimSpace(string(id))] = struct{}{}
|
||||
}
|
||||
}
|
||||
return len(ids) > 1
|
||||
}
|
||||
2084
discover/cpu_linux_test.go
Normal file
2084
discover/cpu_linux_test.go
Normal file
File diff suppressed because it is too large
Load Diff
221
discover/cpu_windows.go
Normal file
221
discover/cpu_windows.go
Normal file
@@ -0,0 +1,221 @@
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ollama/ollama/logutil"
|
||||
)
|
||||
|
||||
type MEMORYSTATUSEX struct {
|
||||
length uint32
|
||||
MemoryLoad uint32
|
||||
TotalPhys uint64
|
||||
AvailPhys uint64
|
||||
TotalPageFile uint64
|
||||
AvailPageFile uint64
|
||||
TotalVirtual uint64
|
||||
AvailVirtual uint64
|
||||
AvailExtendedVirtual uint64
|
||||
}
|
||||
|
||||
var (
|
||||
k32 = syscall.NewLazyDLL("kernel32.dll")
|
||||
globalMemoryStatusExProc = k32.NewProc("GlobalMemoryStatusEx")
|
||||
sizeofMemoryStatusEx = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
|
||||
GetLogicalProcessorInformationEx = k32.NewProc("GetLogicalProcessorInformationEx")
|
||||
)
|
||||
|
||||
func GetCPUMem() (memInfo, error) {
|
||||
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
|
||||
r1, _, err := globalMemoryStatusExProc.Call(uintptr(unsafe.Pointer(&memStatus)))
|
||||
if r1 == 0 {
|
||||
return memInfo{}, fmt.Errorf("GlobalMemoryStatusEx failed: %w", err)
|
||||
}
|
||||
return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
|
||||
}
|
||||
|
||||
type LOGICAL_PROCESSOR_RELATIONSHIP uint32
|
||||
|
||||
const (
|
||||
RelationProcessorCore LOGICAL_PROCESSOR_RELATIONSHIP = iota
|
||||
RelationNumaNode
|
||||
RelationCache
|
||||
RelationProcessorPackage
|
||||
RelationGroup
|
||||
RelationProcessorDie
|
||||
RelationNumaNodeEx
|
||||
RelationProcessorModule
|
||||
)
|
||||
const RelationAll LOGICAL_PROCESSOR_RELATIONSHIP = 0xffff
|
||||
|
||||
type GROUP_AFFINITY struct {
|
||||
Mask uintptr // KAFFINITY
|
||||
Group uint16
|
||||
Reserved [3]uint16
|
||||
}
|
||||
|
||||
type PROCESSOR_RELATIONSHIP struct {
|
||||
Flags byte
|
||||
EfficiencyClass byte
|
||||
Reserved [20]byte
|
||||
GroupCount uint16
|
||||
GroupMask [1]GROUP_AFFINITY // len GroupCount
|
||||
}
|
||||
|
||||
// Omitted unused structs: NUMA_NODE_RELATIONSHIP CACHE_RELATIONSHIP GROUP_RELATIONSHIP
|
||||
|
||||
type SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX struct {
|
||||
Relationship LOGICAL_PROCESSOR_RELATIONSHIP
|
||||
Size uint32
|
||||
U [1]byte // Union len Size
|
||||
// PROCESSOR_RELATIONSHIP
|
||||
// NUMA_NODE_RELATIONSHIP
|
||||
// CACHE_RELATIONSHIP
|
||||
// GROUP_RELATIONSHIP
|
||||
}
|
||||
|
||||
func (group *GROUP_AFFINITY) IsMember(target *GROUP_AFFINITY) bool {
|
||||
if group == nil || target == nil {
|
||||
return false
|
||||
}
|
||||
return group.Mask&target.Mask != 0
|
||||
}
|
||||
|
||||
type winPackage struct {
|
||||
groups []*GROUP_AFFINITY
|
||||
coreCount int // performance cores = coreCount - efficiencyCoreCount
|
||||
efficiencyCoreCount int
|
||||
threadCount int
|
||||
}
|
||||
|
||||
func (pkg *winPackage) IsMember(target *GROUP_AFFINITY) bool {
|
||||
for _, group := range pkg.groups {
|
||||
if group.IsMember(target) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func getLogicalProcessorInformationEx() ([]byte, error) {
|
||||
buf := make([]byte, 1)
|
||||
bufSize := len(buf)
|
||||
ret, _, err := GetLogicalProcessorInformationEx.Call(
|
||||
uintptr(RelationAll),
|
||||
uintptr(unsafe.Pointer(&buf[0])),
|
||||
uintptr(unsafe.Pointer(&bufSize)),
|
||||
)
|
||||
if ret != 0 {
|
||||
logutil.Trace("failed to retrieve CPU payload size", "ret", ret, "size", bufSize, "error", err)
|
||||
return nil, fmt.Errorf("failed to determine size info ret:%d %w", ret, err)
|
||||
}
|
||||
|
||||
buf = make([]byte, bufSize)
|
||||
ret, _, err = GetLogicalProcessorInformationEx.Call(
|
||||
uintptr(RelationAll),
|
||||
uintptr(unsafe.Pointer(&buf[0])),
|
||||
uintptr(unsafe.Pointer(&bufSize)),
|
||||
)
|
||||
if ret == 0 {
|
||||
logutil.Trace("failed to retrieve CPU information", "ret", ret, "size", len(buf), "new_size", bufSize, "error", err)
|
||||
return nil, fmt.Errorf("failed to gather processor information ret:%d buflen:%d %w", ret, bufSize, err)
|
||||
}
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func processSystemLogicalProcessorInforationList(buf []byte) []*winPackage {
|
||||
var slpi *SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
|
||||
// Find all the packages first
|
||||
packages := []*winPackage{}
|
||||
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
||||
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
||||
if slpi.Relationship != RelationProcessorPackage {
|
||||
continue
|
||||
}
|
||||
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
||||
pkg := &winPackage{}
|
||||
ga0 := unsafe.Pointer(&pr.GroupMask[0])
|
||||
for j := range pr.GroupCount {
|
||||
gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
|
||||
pkg.groups = append(pkg.groups, gm)
|
||||
}
|
||||
packages = append(packages, pkg)
|
||||
}
|
||||
|
||||
slog.Info("packages", "count", len(packages))
|
||||
|
||||
// To identify efficiency cores we have to compare the relative values
|
||||
// Larger values are "less efficient" (aka, more performant)
|
||||
var maxEfficiencyClass byte
|
||||
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
||||
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
||||
if slpi.Relationship != RelationProcessorCore {
|
||||
continue
|
||||
}
|
||||
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
||||
if pr.EfficiencyClass > maxEfficiencyClass {
|
||||
maxEfficiencyClass = pr.EfficiencyClass
|
||||
}
|
||||
}
|
||||
if maxEfficiencyClass > 0 {
|
||||
slog.Info("efficiency cores detected", "maxEfficiencyClass", maxEfficiencyClass)
|
||||
}
|
||||
|
||||
// then match up the Cores to the Packages, count up cores, threads and efficiency cores
|
||||
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
||||
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
||||
if slpi.Relationship != RelationProcessorCore {
|
||||
continue
|
||||
}
|
||||
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
||||
ga0 := unsafe.Pointer(&pr.GroupMask[0])
|
||||
for j := range pr.GroupCount {
|
||||
gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
|
||||
for _, pkg := range packages {
|
||||
if pkg.IsMember(gm) {
|
||||
pkg.coreCount++
|
||||
if pr.Flags == 0 {
|
||||
pkg.threadCount++
|
||||
} else {
|
||||
pkg.threadCount += 2
|
||||
}
|
||||
if pr.EfficiencyClass < maxEfficiencyClass {
|
||||
pkg.efficiencyCoreCount++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Summarize the results
|
||||
for i, pkg := range packages {
|
||||
slog.Info("", "package", i, "cores", pkg.coreCount, "efficiency", pkg.efficiencyCoreCount, "threads", pkg.threadCount)
|
||||
}
|
||||
|
||||
return packages
|
||||
}
|
||||
|
||||
func GetCPUDetails() []CPU {
|
||||
buf, err := getLogicalProcessorInformationEx()
|
||||
if err != nil {
|
||||
slog.Warn("failed to get CPU details", "error", err)
|
||||
return nil
|
||||
}
|
||||
packages := processSystemLogicalProcessorInforationList(buf)
|
||||
cpus := make([]CPU, len(packages))
|
||||
|
||||
for i, pkg := range packages {
|
||||
cpus[i].CoreCount = pkg.coreCount
|
||||
cpus[i].EfficiencyCoreCount = pkg.efficiencyCoreCount
|
||||
cpus[i].ThreadCount = pkg.threadCount
|
||||
}
|
||||
return cpus
|
||||
}
|
||||
|
||||
func IsNUMA() bool {
|
||||
// numa support in ggml is linux only
|
||||
return false
|
||||
}
|
||||
77
discover/cpu_windows_test.go
Normal file
77
discover/cpu_windows_test.go
Normal file
File diff suppressed because one or more lines are too long
81
discover/gpu.go
Normal file
81
discover/gpu.go
Normal file
@@ -0,0 +1,81 @@
|
||||
package discover
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/logutil"
|
||||
"github.com/ollama/ollama/ml"
|
||||
)
|
||||
|
||||
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
|
||||
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
||||
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
||||
|
||||
// GetSystemInfo returns the last cached state of the GPUs on the system
|
||||
func GetSystemInfo() ml.SystemInfo {
|
||||
logutil.Trace("performing CPU discovery")
|
||||
startDiscovery := time.Now()
|
||||
defer func() {
|
||||
logutil.Trace("CPU discovery completed", "duration", time.Since(startDiscovery))
|
||||
}()
|
||||
|
||||
memInfo, err := GetCPUMem()
|
||||
if err != nil {
|
||||
slog.Warn("error looking up system memory", "error", err)
|
||||
}
|
||||
var threadCount int
|
||||
cpus := GetCPUDetails()
|
||||
for _, c := range cpus {
|
||||
threadCount += c.CoreCount - c.EfficiencyCoreCount
|
||||
}
|
||||
|
||||
if threadCount == 0 {
|
||||
// Fall back to Go's num CPU
|
||||
threadCount = runtime.NumCPU()
|
||||
}
|
||||
|
||||
return ml.SystemInfo{
|
||||
ThreadCount: threadCount,
|
||||
TotalMemory: memInfo.TotalMemory,
|
||||
FreeMemory: memInfo.FreeMemory,
|
||||
FreeSwap: memInfo.FreeSwap,
|
||||
}
|
||||
}
|
||||
|
||||
func cudaJetpack() string {
|
||||
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
||||
if CudaTegra != "" {
|
||||
ver := strings.Split(CudaTegra, ".")
|
||||
if len(ver) > 0 {
|
||||
return "jetpack" + ver[0]
|
||||
}
|
||||
} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
|
||||
r := regexp.MustCompile(` R(\d+) `)
|
||||
m := r.FindSubmatch(data)
|
||||
if len(m) != 2 {
|
||||
slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
|
||||
} else {
|
||||
if l4t, err := strconv.Atoi(string(m[1])); err == nil {
|
||||
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
|
||||
// https://developer.nvidia.com/embedded/jetpack-archive
|
||||
switch l4t {
|
||||
case 35:
|
||||
return "jetpack5"
|
||||
case 36:
|
||||
return "jetpack6"
|
||||
default:
|
||||
// Newer Jetson systems use the SBSU runtime
|
||||
slog.Debug("unrecognized L4T version", "nv_tegra_release", string(data))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
54
discover/gpu_darwin.go
Normal file
54
discover/gpu_darwin.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package discover
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -x objective-c
|
||||
#cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal
|
||||
#include "gpu_info_darwin.h"
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"syscall"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
)
|
||||
|
||||
const (
|
||||
metalMinimumMemory = 512 * format.MebiByte
|
||||
)
|
||||
|
||||
func GetCPUMem() (memInfo, error) {
|
||||
return memInfo{
|
||||
TotalMemory: uint64(C.getPhysicalMemory()),
|
||||
FreeMemory: uint64(C.getFreeMemory()),
|
||||
// FreeSwap omitted as Darwin uses dynamic paging
|
||||
}, nil
|
||||
}
|
||||
|
||||
func GetCPUDetails() []CPU {
|
||||
query := "hw.perflevel0.physicalcpu"
|
||||
perfCores, err := syscall.SysctlUint32(query)
|
||||
if err != nil {
|
||||
slog.Warn("failed to discover physical CPU details", "query", query, "error", err)
|
||||
}
|
||||
query = "hw.perflevel1.physicalcpu"
|
||||
efficiencyCores, _ := syscall.SysctlUint32(query) // On x86 xeon this wont return data
|
||||
|
||||
// Determine thread count
|
||||
query = "hw.logicalcpu"
|
||||
logicalCores, _ := syscall.SysctlUint32(query)
|
||||
|
||||
return []CPU{
|
||||
{
|
||||
CoreCount: int(perfCores + efficiencyCores),
|
||||
EfficiencyCoreCount: int(efficiencyCores),
|
||||
ThreadCount: int(logicalCores),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func IsNUMA() bool {
|
||||
// numa support in ggml is linux only
|
||||
return false
|
||||
}
|
||||
5
discover/gpu_info_darwin.h
Normal file
5
discover/gpu_info_darwin.h
Normal file
@@ -0,0 +1,5 @@
|
||||
#import <Metal/Metal.h>
|
||||
#include <stdint.h>
|
||||
uint64_t getRecommendedMaxVRAM();
|
||||
uint64_t getPhysicalMemory();
|
||||
uint64_t getFreeMemory();
|
||||
35
discover/gpu_info_darwin.m
Normal file
35
discover/gpu_info_darwin.m
Normal file
@@ -0,0 +1,35 @@
|
||||
#import <Foundation/Foundation.h>
|
||||
#import <mach/mach.h>
|
||||
#include "gpu_info_darwin.h"
|
||||
|
||||
uint64_t getRecommendedMaxVRAM() {
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
uint64_t result = device.recommendedMaxWorkingSetSize;
|
||||
CFRelease(device);
|
||||
return result;
|
||||
}
|
||||
|
||||
// getPhysicalMemory returns the total physical memory in bytes
|
||||
uint64_t getPhysicalMemory() {
|
||||
return [NSProcessInfo processInfo].physicalMemory;
|
||||
}
|
||||
|
||||
// getFreeMemory returns the total free memory in bytes, including inactive
|
||||
// memory that can be reclaimed by the system.
|
||||
uint64_t getFreeMemory() {
|
||||
mach_port_t host_port = mach_host_self();
|
||||
mach_msg_type_number_t host_size = sizeof(vm_statistics64_data_t) / sizeof(integer_t);
|
||||
vm_size_t pagesize;
|
||||
vm_statistics64_data_t vm_stat;
|
||||
|
||||
host_page_size(host_port, &pagesize);
|
||||
if (host_statistics64(host_port, HOST_VM_INFO64, (host_info64_t)&vm_stat, &host_size) != KERN_SUCCESS) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t free_memory = (uint64_t)vm_stat.free_count * pagesize;
|
||||
free_memory += (uint64_t)vm_stat.speculative_count * pagesize;
|
||||
free_memory += (uint64_t)vm_stat.inactive_count * pagesize;
|
||||
|
||||
return free_memory;
|
||||
}
|
||||
555
discover/runner.go
Normal file
555
discover/runner.go
Normal file
@@ -0,0 +1,555 @@
|
||||
package discover
|
||||
|
||||
// Runner based GPU discovery
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/logutil"
|
||||
"github.com/ollama/ollama/ml"
|
||||
)
|
||||
|
||||
var (
|
||||
deviceMu sync.Mutex
|
||||
devices []ml.DeviceInfo
|
||||
libDirs map[string]struct{}
|
||||
exe string
|
||||
bootstrapped bool
|
||||
)
|
||||
|
||||
func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.DeviceInfo {
|
||||
deviceMu.Lock()
|
||||
defer deviceMu.Unlock()
|
||||
startDiscovery := time.Now()
|
||||
msg := "overall device VRAM discovery took"
|
||||
defer func() {
|
||||
slog.Debug(msg, "duration", time.Since(startDiscovery))
|
||||
}()
|
||||
|
||||
if !bootstrapped {
|
||||
msg = "GPU bootstrap discovery took"
|
||||
libDirs = make(map[string]struct{})
|
||||
var err error
|
||||
exe, err = os.Executable()
|
||||
if err != nil {
|
||||
slog.Error("unable to lookup executable path", "error", err)
|
||||
return nil
|
||||
}
|
||||
if eval, err := filepath.EvalSymlinks(exe); err == nil {
|
||||
exe = eval
|
||||
}
|
||||
files, err := filepath.Glob(filepath.Join(ml.LibOllamaPath, "*", "*ggml-*"))
|
||||
if err != nil {
|
||||
slog.Debug("unable to lookup runner library directories", "error", err)
|
||||
}
|
||||
for _, file := range files {
|
||||
libDirs[filepath.Dir(file)] = struct{}{}
|
||||
}
|
||||
|
||||
if len(libDirs) == 0 {
|
||||
libDirs[""] = struct{}{}
|
||||
}
|
||||
|
||||
slog.Info("discovering available GPUs...")
|
||||
detectIncompatibleLibraries()
|
||||
|
||||
// Warn if any user-overrides are set which could lead to incorrect GPU discovery
|
||||
overrideWarnings()
|
||||
|
||||
requested := envconfig.LLMLibrary()
|
||||
jetpack := cudaJetpack()
|
||||
|
||||
// For our initial discovery pass, we gather all the known GPUs through
|
||||
// all the libraries that were detected. This pass may include GPUs that
|
||||
// are enumerated, but not actually supported.
|
||||
// We run this in serial to avoid potentially initializing a GPU multiple
|
||||
// times concurrently leading to memory contention
|
||||
for dir := range libDirs {
|
||||
// Typically bootstrapping takes < 1s, but on some systems, with devices
|
||||
// in low power/idle mode, initialization can take multiple seconds. We
|
||||
// set a longer timeout just for bootstrap discovery to reduce the chance
|
||||
// of giving up too quickly
|
||||
bootstrapTimeout := 30 * time.Second
|
||||
if runtime.GOOS == "windows" {
|
||||
// On Windows with Defender enabled, AV scanning of the DLLs
|
||||
// takes place sequentially and this can significantly increase
|
||||
// the time it takes too do the initial discovery pass.
|
||||
// Subsequent loads will be faster as the scan results are
|
||||
// cached
|
||||
bootstrapTimeout = 90 * time.Second
|
||||
}
|
||||
var dirs []string
|
||||
if dir != "" {
|
||||
if requested != "" && !strings.HasPrefix(requested, "mlx_") && filepath.Base(dir) != requested {
|
||||
slog.Debug("skipping available library at user's request", "requested", requested, "libDir", dir)
|
||||
continue
|
||||
} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
|
||||
continue
|
||||
} else if jetpack == "" && strings.Contains(filepath.Base(dir), "cuda_jetpack") {
|
||||
slog.Debug("jetpack not detected (set JETSON_JETPACK or OLLAMA_LLM_LIBRARY to override), skipping", "libDir", dir)
|
||||
continue
|
||||
} else if !envconfig.EnableVulkan() && strings.Contains(filepath.Base(dir), "vulkan") {
|
||||
slog.Info("experimental Vulkan support disabled. To enable, set OLLAMA_VULKAN=1")
|
||||
continue
|
||||
}
|
||||
dirs = []string{ml.LibOllamaPath, dir}
|
||||
} else {
|
||||
dirs = []string{ml.LibOllamaPath}
|
||||
}
|
||||
|
||||
ctx1stPass, cancel := context.WithTimeout(ctx, bootstrapTimeout)
|
||||
// For this pass, we retain duplicates in case any are incompatible with some libraries
|
||||
devices = append(devices, bootstrapDevicesWithMetalRetry(ctx1stPass, ctx, bootstrapTimeout, dirs, nil)...)
|
||||
cancel()
|
||||
}
|
||||
|
||||
// In the second pass, we more deeply initialize the GPUs to weed out devices that
|
||||
// aren't supported by a given library. We run this phase in parallel to speed up discovery.
|
||||
// Only devices that need verification are included in this pass
|
||||
slog.Debug("evaluating which, if any, devices to filter out", "initial_count", len(devices))
|
||||
ctx2ndPass, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
var wg sync.WaitGroup
|
||||
needsDelete := make([]bool, len(devices))
|
||||
supportedMu := sync.Mutex{}
|
||||
supported := make(map[string]map[string]map[string]int) // [Library][libDir][ID] = pre-deletion devices index
|
||||
for i := range devices {
|
||||
libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
|
||||
if !devices[i].NeedsInitValidation() {
|
||||
// No need to validate, add to the supported map
|
||||
supportedMu.Lock()
|
||||
if _, ok := supported[devices[i].Library]; !ok {
|
||||
supported[devices[i].Library] = make(map[string]map[string]int)
|
||||
}
|
||||
if _, ok := supported[devices[i].Library][libDir]; !ok {
|
||||
supported[devices[i].Library][libDir] = make(map[string]int)
|
||||
}
|
||||
supported[devices[i].Library][libDir][devices[i].ID] = i
|
||||
supportedMu.Unlock()
|
||||
continue
|
||||
}
|
||||
slog.Debug("verifying if device is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
extraEnvs := ml.GetDevicesEnv(devices[i:i+1], true)
|
||||
devices[i].AddInitValidation(extraEnvs)
|
||||
if len(bootstrapDevicesWithMetalRetry(ctx2ndPass, ctx, 30*time.Second, devices[i].LibraryPath, extraEnvs)) == 0 {
|
||||
slog.Debug("filtering device which didn't fully initialize",
|
||||
"id", devices[i].ID,
|
||||
"libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
|
||||
"pci_id", devices[i].PCIID,
|
||||
"library", devices[i].Library,
|
||||
)
|
||||
needsDelete[i] = true
|
||||
} else {
|
||||
supportedMu.Lock()
|
||||
if _, ok := supported[devices[i].Library]; !ok {
|
||||
supported[devices[i].Library] = make(map[string]map[string]int)
|
||||
}
|
||||
if _, ok := supported[devices[i].Library][libDir]; !ok {
|
||||
supported[devices[i].Library][libDir] = make(map[string]int)
|
||||
}
|
||||
supported[devices[i].Library][libDir][devices[i].ID] = i
|
||||
supportedMu.Unlock()
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
logutil.Trace("supported GPU library combinations before filtering", "supported", supported)
|
||||
|
||||
// Mark for deletion any overlaps - favoring the library version that can cover all GPUs if possible
|
||||
filterOverlapByLibrary(supported, needsDelete)
|
||||
|
||||
// Any Libraries that utilize numeric IDs need adjusting based on any possible filtering taking place
|
||||
postFilteredID := map[string]int{}
|
||||
for i := 0; i < len(needsDelete); i++ {
|
||||
if needsDelete[i] {
|
||||
logutil.Trace("removing unsupported or overlapping GPU combination", "libDir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1], "description", devices[i].Description, "compute", devices[i].Compute(), "pci_id", devices[i].PCIID)
|
||||
devices = append(devices[:i], devices[i+1:]...)
|
||||
needsDelete = append(needsDelete[:i], needsDelete[i+1:]...)
|
||||
i--
|
||||
} else {
|
||||
if _, ok := postFilteredID[devices[i].Library]; !ok {
|
||||
postFilteredID[devices[i].Library] = 0
|
||||
}
|
||||
if _, err := strconv.Atoi(devices[i].ID); err == nil {
|
||||
// Replace the numeric ID with the post-filtered IDs
|
||||
slog.Debug("adjusting filtering IDs", "FilterID", devices[i].ID, "new_ID", strconv.Itoa(postFilteredID[devices[i].Library]))
|
||||
devices[i].FilterID = devices[i].ID
|
||||
devices[i].ID = strconv.Itoa(postFilteredID[devices[i].Library])
|
||||
}
|
||||
postFilteredID[devices[i].Library]++
|
||||
}
|
||||
}
|
||||
|
||||
// Now filter out any overlap with different libraries (favor CUDA/HIP over others)
|
||||
for i := 0; i < len(devices); i++ {
|
||||
for j := i + 1; j < len(devices); j++ {
|
||||
// For this pass, we only drop exact duplicates
|
||||
switch devices[i].Compare(devices[j]) {
|
||||
case ml.SameBackendDevice:
|
||||
// Same library and device, skip it
|
||||
devices = append(devices[:j], devices[j+1:]...)
|
||||
j--
|
||||
continue
|
||||
case ml.DuplicateDevice:
|
||||
// Different library, choose based on priority
|
||||
var droppedDevice ml.DeviceInfo
|
||||
if devices[i].PreferredLibrary(devices[j]) {
|
||||
droppedDevice = devices[j]
|
||||
} else {
|
||||
droppedDevice = devices[i]
|
||||
devices[i] = devices[j]
|
||||
}
|
||||
devices = append(devices[:j], devices[j+1:]...)
|
||||
j--
|
||||
|
||||
typeStr := "discrete"
|
||||
if droppedDevice.Integrated {
|
||||
typeStr = "iGPU"
|
||||
}
|
||||
slog.Debug("dropping duplicate device",
|
||||
"id", droppedDevice.ID,
|
||||
"library", droppedDevice.Library,
|
||||
"compute", droppedDevice.Compute(),
|
||||
"name", droppedDevice.Name,
|
||||
"description", droppedDevice.Description,
|
||||
"libdirs", strings.Join(droppedDevice.LibraryPath, ","),
|
||||
"driver", droppedDevice.Driver(),
|
||||
"pci_id", droppedDevice.PCIID,
|
||||
"type", typeStr,
|
||||
"total", format.HumanBytes2(droppedDevice.TotalMemory),
|
||||
"available", format.HumanBytes2(droppedDevice.FreeMemory),
|
||||
)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the libDirs to what we actually wind up using for future refreshes
|
||||
libDirs = make(map[string]struct{})
|
||||
for _, dev := range devices {
|
||||
dir := dev.LibraryPath[len(dev.LibraryPath)-1]
|
||||
if dir != ml.LibOllamaPath {
|
||||
libDirs[dir] = struct{}{}
|
||||
}
|
||||
}
|
||||
if len(libDirs) == 0 {
|
||||
libDirs[""] = struct{}{}
|
||||
}
|
||||
|
||||
bootstrapped = true
|
||||
} else {
|
||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
||||
// metal never updates free VRAM
|
||||
return append([]ml.DeviceInfo{}, devices...)
|
||||
}
|
||||
|
||||
slog.Debug("refreshing free memory")
|
||||
updated := make([]bool, len(devices))
|
||||
allDone := func() bool {
|
||||
allDone := true
|
||||
for _, done := range updated {
|
||||
if !done {
|
||||
allDone = false
|
||||
break
|
||||
}
|
||||
}
|
||||
return allDone
|
||||
}
|
||||
|
||||
// First try to use existing runners to refresh VRAM since they're already
|
||||
// active on GPU(s)
|
||||
for _, runner := range runners {
|
||||
if runner == nil {
|
||||
continue
|
||||
}
|
||||
deviceIDs := runner.GetActiveDeviceIDs()
|
||||
if len(deviceIDs) == 0 {
|
||||
// Skip this runner since it doesn't have active GPU devices
|
||||
continue
|
||||
}
|
||||
|
||||
// Check to see if this runner is active on any devices that need a refresh
|
||||
skip := true
|
||||
devCheck:
|
||||
for _, dev := range deviceIDs {
|
||||
for i := range devices {
|
||||
if dev == devices[i].DeviceID {
|
||||
if !updated[i] {
|
||||
skip = false
|
||||
break devCheck
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if skip {
|
||||
continue
|
||||
}
|
||||
|
||||
// Typical refresh on existing runner is ~500ms but allow longer if the system
|
||||
// is under stress before giving up and using stale data.
|
||||
ctx, cancel := context.WithTimeout(ctx, 3*time.Second)
|
||||
defer cancel()
|
||||
start := time.Now()
|
||||
updatedDevices := runner.GetDeviceInfos(ctx)
|
||||
slog.Debug("existing runner discovery took", "duration", time.Since(start))
|
||||
for _, u := range updatedDevices {
|
||||
for i := range devices {
|
||||
if u.DeviceID == devices[i].DeviceID {
|
||||
updated[i] = true
|
||||
devices[i].FreeMemory = u.FreeMemory
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// Short circuit if we've updated all the devices
|
||||
if allDone() {
|
||||
break
|
||||
}
|
||||
}
|
||||
if !allDone() {
|
||||
slog.Debug("unable to refresh all GPUs with existing runners, performing bootstrap discovery")
|
||||
|
||||
// Bootstrapping may take longer in some cases (AMD windows), but we
|
||||
// would rather use stale free data to get the model running sooner
|
||||
ctx, cancel := context.WithTimeout(ctx, 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Apply any dev filters to avoid re-discovering unsupported devices, and get IDs correct
|
||||
// We avoid CUDA filters here to keep ROCm from failing to discover GPUs in a mixed environment
|
||||
devFilter := ml.GetDevicesEnv(devices, false)
|
||||
|
||||
for dir := range libDirs {
|
||||
updatedDevices := bootstrapDevices(ctx, []string{ml.LibOllamaPath, dir}, devFilter)
|
||||
for _, u := range updatedDevices {
|
||||
for i := range devices {
|
||||
if u.DeviceID == devices[i].DeviceID && u.PCIID == devices[i].PCIID {
|
||||
updated[i] = true
|
||||
devices[i].FreeMemory = u.FreeMemory
|
||||
break
|
||||
}
|
||||
}
|
||||
// TODO - consider evaluating if new devices have appeared (e.g. hotplug)
|
||||
}
|
||||
if allDone() {
|
||||
break
|
||||
}
|
||||
}
|
||||
if !allDone() {
|
||||
slog.Warn("unable to refresh free memory, using old values")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return append([]ml.DeviceInfo{}, devices...)
|
||||
}
|
||||
|
||||
func filterOverlapByLibrary(supported map[string]map[string]map[string]int, needsDelete []bool) {
|
||||
// For multi-GPU systems, use the newest version that supports all the GPUs
|
||||
for _, byLibDirs := range supported {
|
||||
libDirs := make([]string, 0, len(byLibDirs))
|
||||
for libDir := range byLibDirs {
|
||||
libDirs = append(libDirs, libDir)
|
||||
}
|
||||
sort.Sort(sort.Reverse(sort.StringSlice(libDirs)))
|
||||
anyMissing := false
|
||||
var newest string
|
||||
for _, newest = range libDirs {
|
||||
for _, libDir := range libDirs {
|
||||
if libDir == newest {
|
||||
continue
|
||||
}
|
||||
if len(byLibDirs[newest]) != len(byLibDirs[libDir]) {
|
||||
anyMissing = true
|
||||
break
|
||||
}
|
||||
for dev := range byLibDirs[newest] {
|
||||
if _, found := byLibDirs[libDir][dev]; !found {
|
||||
anyMissing = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if !anyMissing {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Now we can mark overlaps for deletion
|
||||
for _, libDir := range libDirs {
|
||||
if libDir == newest {
|
||||
continue
|
||||
}
|
||||
for dev, i := range byLibDirs[libDir] {
|
||||
if _, found := byLibDirs[newest][dev]; found {
|
||||
slog.Debug("filtering device with overlapping libraries",
|
||||
"id", dev,
|
||||
"library", libDir,
|
||||
"delete_index", i,
|
||||
"kept_library", newest,
|
||||
)
|
||||
needsDelete[i] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type bootstrapRunner struct {
|
||||
port int
|
||||
cmd *exec.Cmd
|
||||
}
|
||||
|
||||
func (r *bootstrapRunner) GetPort() int {
|
||||
return r.port
|
||||
}
|
||||
|
||||
func (r *bootstrapRunner) HasExited() bool {
|
||||
if r.cmd != nil && r.cmd.ProcessState != nil {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func bootstrapDevicesWithMetalRetry(firstAttemptCtx, retryParentCtx context.Context, timeout time.Duration, ollamaLibDirs []string, extraEnvs map[string]string) []ml.DeviceInfo {
|
||||
runDiscovery := func(ctx context.Context, extraEnvs map[string]string) ([]ml.DeviceInfo, *llm.StatusWriter, int, error) {
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
slog.Debug("bootstrap discovery took", "duration", time.Since(start), "OLLAMA_LIBRARY_PATH", ollamaLibDirs, "extra_envs", extraEnvs)
|
||||
}()
|
||||
return bootstrapDevicesWithStatus(ctx, ollamaLibDirs, extraEnvs)
|
||||
}
|
||||
|
||||
devices, status, exitCode, err := runDiscovery(firstAttemptCtx, extraEnvs)
|
||||
if err == nil {
|
||||
recordPersistentRunnerEnv(devices, extraEnvs)
|
||||
}
|
||||
if err != nil && llm.ShouldRetryWithMetalTensorDisabled(err, status) && (extraEnvs == nil || extraEnvs["GGML_METAL_TENSOR_DISABLE"] != "1") {
|
||||
retryEnvs := map[string]string{}
|
||||
for k, v := range extraEnvs {
|
||||
retryEnvs[k] = v
|
||||
}
|
||||
retryEnvs["GGML_METAL_TENSOR_DISABLE"] = "1"
|
||||
slog.Warn("retrying GPU discovery with Metal tensor API disabled", "error", err)
|
||||
retryCtx, cancel := context.WithTimeout(retryParentCtx, timeout)
|
||||
defer cancel()
|
||||
devices, status, exitCode, err = runDiscovery(retryCtx, retryEnvs)
|
||||
if err == nil {
|
||||
recordPersistentRunnerEnv(devices, retryEnvs)
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if exitCode >= 0 {
|
||||
// Expected during bootstrapping while we filter out unsupported GPUs.
|
||||
logutil.Trace("runner exited", "OLLAMA_LIBRARY_PATH", ollamaLibDirs, "extra_envs", extraEnvs, "code", exitCode, "detail", status.LastError())
|
||||
} else {
|
||||
slog.Info("failure during GPU discovery", "OLLAMA_LIBRARY_PATH", ollamaLibDirs, "extra_envs", extraEnvs, "error", err, "detail", status.LastError())
|
||||
}
|
||||
}
|
||||
|
||||
return devices
|
||||
}
|
||||
|
||||
func recordPersistentRunnerEnv(devices []ml.DeviceInfo, extraEnvs map[string]string) {
|
||||
if extraEnvs["GGML_METAL_TENSOR_DISABLE"] != "1" {
|
||||
return
|
||||
}
|
||||
|
||||
for i := range devices {
|
||||
if devices[i].Library != "Metal" {
|
||||
continue
|
||||
}
|
||||
if devices[i].RunnerEnvOverrides == nil {
|
||||
devices[i].RunnerEnvOverrides = map[string]string{}
|
||||
}
|
||||
devices[i].RunnerEnvOverrides["GGML_METAL_TENSOR_DISABLE"] = "1"
|
||||
}
|
||||
}
|
||||
|
||||
func bootstrapDevices(ctx context.Context, ollamaLibDirs []string, extraEnvs map[string]string) []ml.DeviceInfo {
|
||||
devices, _, _, _ := bootstrapDevicesWithStatus(ctx, ollamaLibDirs, extraEnvs)
|
||||
return devices
|
||||
}
|
||||
|
||||
func bootstrapDevicesWithStatus(ctx context.Context, ollamaLibDirs []string, extraEnvs map[string]string) ([]ml.DeviceInfo, *llm.StatusWriter, int, error) {
|
||||
var baseOut io.Writer = io.Discard
|
||||
if envconfig.LogLevel() == logutil.LevelTrace {
|
||||
baseOut = os.Stderr
|
||||
}
|
||||
|
||||
status := llm.NewStatusWriter(baseOut)
|
||||
cmd, port, err := llm.StartRunner(
|
||||
true, // ollama engine
|
||||
"", // no model
|
||||
ollamaLibDirs,
|
||||
status,
|
||||
extraEnvs,
|
||||
)
|
||||
if err != nil {
|
||||
slog.Debug("failed to start runner to discovery GPUs", "error", err)
|
||||
return nil, status, -1, err
|
||||
}
|
||||
|
||||
go func() {
|
||||
cmd.Wait() // exit status ignored
|
||||
}()
|
||||
|
||||
defer cmd.Process.Kill()
|
||||
|
||||
devices, err := ml.GetDevicesFromRunner(ctx, &bootstrapRunner{port: port, cmd: cmd})
|
||||
exitCode := -1
|
||||
if cmd.ProcessState != nil {
|
||||
exitCode = cmd.ProcessState.ExitCode()
|
||||
}
|
||||
return devices, status, exitCode, err
|
||||
}
|
||||
|
||||
func overrideWarnings() {
|
||||
anyFound := false
|
||||
m := envconfig.AsMap()
|
||||
for _, k := range []string{
|
||||
"CUDA_VISIBLE_DEVICES",
|
||||
"HIP_VISIBLE_DEVICES",
|
||||
"ROCR_VISIBLE_DEVICES",
|
||||
"GGML_VK_VISIBLE_DEVICES",
|
||||
"GPU_DEVICE_ORDINAL",
|
||||
"HSA_OVERRIDE_GFX_VERSION",
|
||||
} {
|
||||
if e, found := m[k]; found && e.Value != "" {
|
||||
anyFound = true
|
||||
slog.Warn("user overrode visible devices", k, e.Value)
|
||||
}
|
||||
}
|
||||
if anyFound {
|
||||
slog.Warn("if GPUs are not correctly discovered, unset and try again")
|
||||
}
|
||||
}
|
||||
|
||||
func detectIncompatibleLibraries() {
|
||||
if runtime.GOOS != "windows" {
|
||||
return
|
||||
}
|
||||
basePath, err := exec.LookPath("ggml-base.dll")
|
||||
if err != nil || basePath == "" {
|
||||
return
|
||||
}
|
||||
if !strings.HasPrefix(basePath, ml.LibOllamaPath) {
|
||||
slog.Warn("potentially incompatible library detected in PATH", "location", basePath)
|
||||
}
|
||||
}
|
||||
135
discover/runner_test.go
Normal file
135
discover/runner_test.go
Normal file
@@ -0,0 +1,135 @@
|
||||
package discover
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/ml"
|
||||
)
|
||||
|
||||
func init() {
|
||||
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug}))
|
||||
slog.SetDefault(logger)
|
||||
}
|
||||
|
||||
func TestFilterOverlapByLibrary(t *testing.T) {
|
||||
type testcase struct {
|
||||
name string
|
||||
inp map[string]map[string]map[string]int
|
||||
exp []bool
|
||||
}
|
||||
for _, tc := range []testcase{
|
||||
{
|
||||
name: "empty",
|
||||
inp: map[string]map[string]map[string]int{},
|
||||
exp: []bool{}, // needs deletion
|
||||
},
|
||||
{
|
||||
name: "single no overlap",
|
||||
inp: map[string]map[string]map[string]int{
|
||||
"CUDA": {
|
||||
"cuda_v12": {
|
||||
"GPU-d7b00605-c0c8-152d-529d-e03726d5dc52": 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
exp: []bool{false},
|
||||
},
|
||||
{
|
||||
name: "100% overlap pick 2nd",
|
||||
inp: map[string]map[string]map[string]int{
|
||||
"CUDA": {
|
||||
"cuda_v12": {
|
||||
"GPU-d7b00605-c0c8-152d-529d-e03726d5dc52": 0,
|
||||
"GPU-cd6c3216-03d2-a8eb-8235-2ffbf571712e": 1,
|
||||
},
|
||||
"cuda_v13": {
|
||||
"GPU-d7b00605-c0c8-152d-529d-e03726d5dc52": 2,
|
||||
"GPU-cd6c3216-03d2-a8eb-8235-2ffbf571712e": 3,
|
||||
},
|
||||
},
|
||||
},
|
||||
exp: []bool{true, true, false, false},
|
||||
},
|
||||
{
|
||||
name: "100% overlap pick 1st",
|
||||
inp: map[string]map[string]map[string]int{
|
||||
"CUDA": {
|
||||
"cuda_v13": {
|
||||
"GPU-d7b00605-c0c8-152d-529d-e03726d5dc52": 0,
|
||||
"GPU-cd6c3216-03d2-a8eb-8235-2ffbf571712e": 1,
|
||||
},
|
||||
"cuda_v12": {
|
||||
"GPU-d7b00605-c0c8-152d-529d-e03726d5dc52": 2,
|
||||
"GPU-cd6c3216-03d2-a8eb-8235-2ffbf571712e": 3,
|
||||
},
|
||||
},
|
||||
},
|
||||
exp: []bool{false, false, true, true},
|
||||
},
|
||||
{
|
||||
name: "partial overlap pick older",
|
||||
inp: map[string]map[string]map[string]int{
|
||||
"CUDA": {
|
||||
"cuda_v13": {
|
||||
"GPU-d7b00605-c0c8-152d-529d-e03726d5dc52": 0,
|
||||
},
|
||||
"cuda_v12": {
|
||||
"GPU-d7b00605-c0c8-152d-529d-e03726d5dc52": 1,
|
||||
"GPU-cd6c3216-03d2-a8eb-8235-2ffbf571712e": 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
exp: []bool{true, false, false},
|
||||
},
|
||||
{
|
||||
name: "no overlap",
|
||||
inp: map[string]map[string]map[string]int{
|
||||
"CUDA": {
|
||||
"cuda_v13": {
|
||||
"GPU-d7b00605-c0c8-152d-529d-e03726d5dc52": 0,
|
||||
},
|
||||
"cuda_v12": {
|
||||
"GPU-cd6c3216-03d2-a8eb-8235-2ffbf571712e": 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
exp: []bool{false, false},
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
needsDelete := make([]bool, len(tc.exp))
|
||||
filterOverlapByLibrary(tc.inp, needsDelete)
|
||||
for i, exp := range tc.exp {
|
||||
if needsDelete[i] != exp {
|
||||
t.Fatalf("expected: %v\ngot: %v", tc.exp, needsDelete)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecordPersistentRunnerEnv(t *testing.T) {
|
||||
devices := []ml.DeviceInfo{
|
||||
{DeviceID: ml.DeviceID{Library: "Metal", ID: "0"}},
|
||||
{DeviceID: ml.DeviceID{Library: "CUDA", ID: "1"}},
|
||||
}
|
||||
|
||||
recordPersistentRunnerEnv(devices, map[string]string{
|
||||
"GGML_METAL_TENSOR_DISABLE": "1",
|
||||
"CUDA_VISIBLE_DEVICES": "1",
|
||||
})
|
||||
|
||||
if got := devices[0].RunnerEnvOverrides["GGML_METAL_TENSOR_DISABLE"]; got != "1" {
|
||||
t.Fatalf("Metal RunnerEnvOverrides = %q, want %q", got, "1")
|
||||
}
|
||||
|
||||
if _, ok := devices[0].RunnerEnvOverrides["CUDA_VISIBLE_DEVICES"]; ok {
|
||||
t.Fatal("unexpected CUDA_VISIBLE_DEVICES in Metal RunnerEnvOverrides")
|
||||
}
|
||||
|
||||
if devices[1].RunnerEnvOverrides != nil {
|
||||
t.Fatalf("unexpected RunnerEnvOverrides recorded for non-Metal device: %#v", devices[1].RunnerEnvOverrides)
|
||||
}
|
||||
}
|
||||
74
discover/types.go
Normal file
74
discover/types.go
Normal file
@@ -0,0 +1,74 @@
|
||||
package discover
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/ml"
|
||||
)
|
||||
|
||||
type memInfo struct {
|
||||
TotalMemory uint64 `json:"total_memory,omitempty"`
|
||||
FreeMemory uint64 `json:"free_memory,omitempty"`
|
||||
FreeSwap uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
|
||||
}
|
||||
|
||||
// CPU type represents a CPU Package occupying a socket
|
||||
type CPU struct {
|
||||
ID string `cpuinfo:"processor"`
|
||||
VendorID string `cpuinfo:"vendor_id"`
|
||||
ModelName string `cpuinfo:"model name"`
|
||||
CoreCount int
|
||||
EfficiencyCoreCount int // Performance = CoreCount - Efficiency
|
||||
ThreadCount int
|
||||
}
|
||||
|
||||
func LogDetails(devices []ml.DeviceInfo) {
|
||||
sort.Sort(sort.Reverse(ml.ByFreeMemory(devices))) // Report devices in order of scheduling preference
|
||||
for _, dev := range devices {
|
||||
var libs []string
|
||||
for _, dir := range dev.LibraryPath {
|
||||
if strings.Contains(dir, filepath.Join("lib", "ollama")) {
|
||||
libs = append(libs, filepath.Base(dir))
|
||||
}
|
||||
}
|
||||
typeStr := "discrete"
|
||||
if dev.Integrated {
|
||||
typeStr = "iGPU"
|
||||
}
|
||||
slog.Info("inference compute",
|
||||
"id", dev.ID,
|
||||
"filter_id", dev.FilterID,
|
||||
"library", dev.Library,
|
||||
"compute", dev.Compute(),
|
||||
"name", dev.Name,
|
||||
"description", dev.Description,
|
||||
"libdirs", strings.Join(libs, ","),
|
||||
"driver", dev.Driver(),
|
||||
"pci_id", dev.PCIID,
|
||||
"type", typeStr,
|
||||
"total", format.HumanBytes2(dev.TotalMemory),
|
||||
"available", format.HumanBytes2(dev.FreeMemory),
|
||||
)
|
||||
}
|
||||
// CPU inference
|
||||
if len(devices) == 0 {
|
||||
dev, _ := GetCPUMem()
|
||||
slog.Info("inference compute",
|
||||
"id", "cpu",
|
||||
"library", "cpu",
|
||||
"compute", "",
|
||||
"name", "cpu",
|
||||
"description", "cpu",
|
||||
"libdirs", "ollama",
|
||||
"driver", "",
|
||||
"pci_id", "",
|
||||
"type", "",
|
||||
"total", format.HumanBytes2(dev.TotalMemory),
|
||||
"available", format.HumanBytes2(dev.FreeMemory),
|
||||
)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user