Files
gotgt/pkg/util/numa/numa_linux_nocgo.go
2026-03-14 11:45:35 +08:00

416 lines
8.6 KiB
Go

//go:build linux && !cgo
// +build linux,!cgo
/*
Copyright 2024 The GoStor Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package numa
import (
"bufio"
"fmt"
"os"
"runtime"
"strconv"
"strings"
"syscall"
"unsafe"
)
// Syscall numbers for x86_64 Linux
const (
SYS_GETCPU = 309
SYS_SET_MEMPOLICY = 238
SYS_GET_MEMPOLICY = 239
SYS_MBIND = 237
SYS_MIGRATE_PAGES = 238
)
const (
// NUMA memory policies
MPOL_DEFAULT = 0
MPOL_PREFERRED = 1
MPOL_BIND = 2
MPOL_INTERLEAVE = 3
MPOL_LOCAL = 4
// Flags for get_mempolicy
MPOL_F_NODE = 1 << 0
MPOL_F_ADDR = 1 << 1
// Flags for mbind
MPOL_MF_STRICT = 1 << 0
)
//go:noescape
//go:linkname runtime_GetCPU runtime.getcpu
func runtime_GetCPU() uint32
func detectLinuxTopology(topology *Topology) error {
nodes, err := detectNodesFromSys()
if err != nil {
return err
}
topology.NumNodes = len(nodes)
for _, nodeID := range nodes {
nodeInfo := &NodeInfo{
ID: NodeID(nodeID),
}
// Get CPUs for this node
cpus, err := getCPUsForNodeNoCGO(nodeID)
if err == nil {
nodeInfo.CPUs = cpus
for _, cpu := range cpus {
topology.CPUToNodeMap[cpu] = NodeID(nodeID)
}
}
// Get memory info for this node
memInfo, err := getMemoryInfoForNodeNoCGO(nodeID)
if err == nil {
nodeInfo.TotalMemory = memInfo.total
nodeInfo.FreeMemory = memInfo.free
}
// Get distance matrix
distances, err := getDistancesForNodeNoCGO(nodeID, len(nodes))
if err == nil {
nodeInfo.DistanceToNode = distances
}
topology.Nodes[NodeID(nodeID)] = nodeInfo
}
return nil
}
func detectNodesFromSys() ([]int, error) {
entries, err := os.ReadDir("/sys/devices/system/node")
if err != nil {
return nil, err
}
var nodes []int
for _, entry := range entries {
if entry.IsDir() && strings.HasPrefix(entry.Name(), "node") {
nodeID, err := strconv.Atoi(entry.Name()[4:])
if err == nil {
nodes = append(nodes, nodeID)
}
}
}
if len(nodes) == 0 {
return nil, fmt.Errorf("no NUMA nodes found")
}
return nodes, nil
}
type memoryInfo struct {
total uint64
free uint64
}
func getMemoryInfoForNodeNoCGO(nodeID int) (*memoryInfo, error) {
file, err := os.Open(fmt.Sprintf("/sys/devices/system/node/node%d/meminfo", nodeID))
if err != nil {
return nil, err
}
defer file.Close()
info := &memoryInfo{}
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if strings.Contains(line, "MemTotal:") {
fields := strings.Fields(line)
if len(fields) >= 2 {
val, _ := strconv.ParseUint(fields[1], 10, 64)
info.total = val * 1024
}
} else if strings.Contains(line, "MemFree:") {
fields := strings.Fields(line)
if len(fields) >= 2 {
val, _ := strconv.ParseUint(fields[1], 10, 64)
info.free = val * 1024
}
}
}
return info, scanner.Err()
}
func getCPUsForNodeNoCGO(nodeID int) ([]int, error) {
data, err := os.ReadFile(fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", nodeID))
if err != nil {
return nil, err
}
return parseCPUListNoCGO(strings.TrimSpace(string(data)))
}
func parseCPUListNoCGO(list string) ([]int, error) {
var cpus []int
if list == "" {
return cpus, nil
}
parts := strings.Split(list, ",")
for _, part := range parts {
if strings.Contains(part, "-") {
rangeParts := strings.Split(part, "-")
if len(rangeParts) == 2 {
start, _ := strconv.Atoi(rangeParts[0])
end, _ := strconv.Atoi(rangeParts[1])
for i := start; i <= end; i++ {
cpus = append(cpus, i)
}
}
} else {
cpu, _ := strconv.Atoi(part)
cpus = append(cpus, cpu)
}
}
return cpus, nil
}
func getDistancesForNodeNoCGO(nodeID int, numNodes int) ([]uint32, error) {
data, err := os.ReadFile(fmt.Sprintf("/sys/devices/system/node/node%d/distance", nodeID))
if err != nil {
return nil, err
}
fields := strings.Fields(string(data))
distances := make([]uint32, len(fields))
for i, field := range fields {
val, _ := strconv.ParseUint(field, 10, 32)
distances[i] = uint32(val)
}
return distances, nil
}
func getCurrentNodeImpl() (NodeID, error) {
var cpu, node uint32
// Use getcpu syscall
r1, _, errno := syscall.Syscall(SYS_GETCPU,
uintptr(unsafe.Pointer(&cpu)),
uintptr(unsafe.Pointer(&node)),
0)
if errno != 0 {
// Fallback: try to determine from CPU
return getNodeFromSchedGetCPU()
}
_ = r1 // suppress unused warning
return NodeID(node), nil
}
func getNodeFromSchedGetCPU() (NodeID, error) {
// Get current CPU
cpu := runtime.GOMAXPROCS(0)
// Look up in topology
topology := GetTopology()
node, ok := topology.GetNodeForCPU(cpu)
if !ok {
return 0, fmt.Errorf("cannot determine NUMA node for CPU %d", cpu)
}
return node, nil
}
func setPreferredNodeImpl(node NodeID) (*PreferredNode, error) {
mask := uint64(1) << uint64(node)
maxNode := uint64(node) + 1
_, _, errno := syscall.Syscall6(SYS_SET_MEMPOLICY,
uintptr(MPOL_PREFERRED),
uintptr(unsafe.Pointer(&mask)),
uintptr(maxNode),
0, 0, 0)
if errno != 0 {
return nil, fmt.Errorf("set_mempolicy failed: %v", errno)
}
return &PreferredNode{nodeID: node}, nil
}
func revertPreferredNodeImpl(p *PreferredNode) error {
_, _, errno := syscall.Syscall(SYS_SET_MEMPOLICY,
uintptr(MPOL_DEFAULT),
0, 0)
if errno != 0 {
return fmt.Errorf("set_mempolicy failed: %v", errno)
}
return nil
}
func setMemoryPolicyImpl(policy MemoryPolicy, nodes []NodeID) error {
var mode int
switch policy {
case MPDefault:
mode = MPOL_DEFAULT
case MPBind:
mode = MPOL_BIND
case MPPreferred:
mode = MPOL_PREFERRED
case MPInterleave:
mode = MPOL_INTERLEAVE
default:
return fmt.Errorf("unknown memory policy: %d", policy)
}
var mask uint64
for _, node := range nodes {
mask |= 1 << uint64(node)
}
maxNode := uint64(0)
for _, node := range nodes {
if uint64(node) >= maxNode {
maxNode = uint64(node) + 1
}
}
_, _, errno := syscall.Syscall6(SYS_SET_MEMPOLICY,
uintptr(mode),
uintptr(unsafe.Pointer(&mask)),
uintptr(maxNode),
0, 0, 0)
if errno != 0 {
return fmt.Errorf("set_mempolicy failed: %v", errno)
}
return nil
}
func allocateOnNodeImpl(size int, node NodeID) ([]byte, error) {
buf := make([]byte, size)
// Try to use mbind to bind memory to node
mask := uint64(1) << uint64(node)
maxNode := uint64(node) + 1
_, _, errno := syscall.Syscall6(SYS_MBIND,
uintptr(unsafe.Pointer(&buf[0])),
uintptr(size),
uintptr(MPOL_BIND),
uintptr(unsafe.Pointer(&mask)),
uintptr(maxNode),
uintptr(MPOL_MF_STRICT))
if errno != 0 {
// Fall back to regular allocation
return buf, nil
}
return buf, nil
}
func scheduleOnNodeImpl(cpu int, fn func()) error {
var mask syscall.CPUSet
mask.Set(cpu)
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if err := syscall.SchedSetaffinity(0, &mask); err != nil {
return fmt.Errorf("sched_setaffinity failed: %v", err)
}
fn()
return nil
}
func getPreferredNodeForCurrentThreadImpl() NodeID {
var mode int
var node uint32
_, _, errno := syscall.Syscall6(SYS_GET_MEMPOLICY,
uintptr(unsafe.Pointer(&mode)),
0, 0,
uintptr(unsafe.Pointer(&node)),
uintptr(MPOL_F_NODE),
0)
if errno != 0 {
node, _ := getCurrentNodeImpl()
return node
}
if mode == MPOL_DEFAULT {
node, _ := getCurrentNodeImpl()
return node
}
return NodeID(node)
}
// PinThreadToNode pins the current goroutine's OS thread to a specific NUMA node
func PinThreadToNode(node NodeID) error {
topology := GetTopology()
nodeInfo, ok := topology.GetNode(node)
if !ok {
return fmt.Errorf("NUMA node %d not found", node)
}
if len(nodeInfo.CPUs) == 0 {
return fmt.Errorf("NUMA node %d has no CPUs", node)
}
runtime.LockOSThread()
var mask syscall.CPUSet
for _, cpu := range nodeInfo.CPUs {
mask.Set(cpu)
}
if err := syscall.SchedSetaffinity(0, &mask); err != nil {
runtime.UnlockOSThread()
return fmt.Errorf("sched_setaffinity failed: %v", err)
}
return nil
}
// UnpinThread releases the current goroutine's OS thread from NUMA binding
func UnpinThread() {
runtime.UnlockOSThread()
}
// RunOnNode runs a function with the current goroutine pinned to a specific NUMA node
func RunOnNode(node NodeID, fn func()) error {
if err := PinThreadToNode(node); err != nil {
return err
}
defer UnpinThread()
fn()
return nil
}