728 lines
17 KiB
Go
728 lines
17 KiB
Go
//go:build linux
|
|
// +build linux
|
|
|
|
/*
|
|
Copyright 2024 The GoStor Authors All rights reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
// Package iouring provides an io_uring-based backing store for high-performance
|
|
// asynchronous I/O operations on Linux 5.1+ systems.
|
|
package iouring
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"runtime"
|
|
"sync"
|
|
"sync/atomic"
|
|
"syscall"
|
|
"unsafe"
|
|
|
|
log "github.com/sirupsen/logrus"
|
|
|
|
"github.com/gostor/gotgt/pkg/api"
|
|
"github.com/gostor/gotgt/pkg/scsi"
|
|
)
|
|
|
|
const (
|
|
IoUringBackingStorage = "iouring"
|
|
|
|
// Default queue depth for io_uring
|
|
DefaultQueueDepth = 4096
|
|
|
|
// Minimum kernel version required (5.1)
|
|
MinKernelMajor = 5
|
|
MinKernelMinor = 1
|
|
)
|
|
|
|
// io_uring constants (from linux/io_uring.h)
|
|
const (
|
|
IORING_SETUP_IOPOLL = 1 << 0
|
|
IORING_SETUP_SQPOLL = 1 << 1
|
|
IORING_SETUP_SQ_AFF = 1 << 2
|
|
IORING_SETUP_CQSIZE = 1 << 3
|
|
IORING_SETUP_CLAMP = 1 << 4
|
|
IORING_SETUP_ATTACH_WQ = 1 << 5
|
|
IORING_SETUP_R_DISABLED = 1 << 6
|
|
|
|
IORING_FSYNC_DATASYNC = 1 << 0
|
|
|
|
IORING_TIMEOUT_ABS = 1 << 0
|
|
|
|
IORING_OFF_SQ_RING = 0
|
|
IORING_OFF_CQ_RING = 0x8000000
|
|
IORING_OFF_SQES = 0x10000000
|
|
|
|
IORING_OP_NOP = 0
|
|
IORING_OP_READV = 1
|
|
IORING_OP_WRITEV = 2
|
|
IORING_OP_FSYNC = 3
|
|
IORING_OP_READ_FIXED = 4
|
|
IORING_OP_WRITE_FIXED = 5
|
|
IORING_OP_POLL_ADD = 6
|
|
IORING_OP_POLL_REMOVE = 7
|
|
IORING_OP_SYNC_FILE_RANGE = 8
|
|
IORING_OP_SENDMSG = 9
|
|
IORING_OP_RECVMSG = 10
|
|
IORING_OP_TIMEOUT = 11
|
|
IORING_OP_TIMEOUT_REMOVE = 12
|
|
IORING_OP_ACCEPT = 13
|
|
IORING_OP_ASYNC_CANCEL = 14
|
|
IORING_OP_LINK_TIMEOUT = 15
|
|
IORING_OP_CONNECT = 16
|
|
IORING_OP_FALLOCATE = 17
|
|
IORING_OP_OPENAT = 18
|
|
IORING_OP_CLOSE = 19
|
|
IORING_OP_FILES_UPDATE = 20
|
|
IORING_OP_STATX = 21
|
|
IORING_OP_READ = 22
|
|
IORING_OP_WRITE = 23
|
|
IORING_OP_FADVISE = 24
|
|
IORING_OP_MADVISE = 25
|
|
IORING_OP_SEND = 26
|
|
IORING_OP_RECV = 27
|
|
IORING_OP_OPENAT2 = 28
|
|
IORING_OP_EPOLL_CTL = 29
|
|
IORING_OP_SPLICE = 30
|
|
IORING_OP_PROVIDE_BUFFERS = 31
|
|
IORING_OP_REMOVE_BUFFERS = 32
|
|
IORING_OP_TEE = 33
|
|
IORING_OP_SHUTDOWN = 34
|
|
IORING_OP_RENAMEAT = 35
|
|
IORING_OP_UNLINKAT = 36
|
|
IORING_OP_MKDIRAT = 37
|
|
IORING_OP_SYMLINKAT = 38
|
|
IORING_OP_LINKAT = 39
|
|
IORING_OP_MSG_RING = 40
|
|
IORING_OP_FSETXATTR = 41
|
|
IORING_OP_SETXATTR = 42
|
|
IORING_OP_FGETXATTR = 43
|
|
IORING_OP_GETXATTR = 44
|
|
IORING_OP_SOCKET = 45
|
|
IORING_OP_URING_CMD = 46
|
|
IORING_OP_SEND_ZC = 47
|
|
IORING_OP_SENDMSG_ZC = 48
|
|
|
|
IORING_CQE_F_BUFFER = 1 << 0
|
|
IORING_CQE_F_MORE = 1 << 1
|
|
)
|
|
|
|
// io_uring structures
|
|
// Note: These are simplified structures for the operations we need
|
|
type ioUring struct {
|
|
fd int
|
|
sq *ioUringSq
|
|
cq *ioUringCq
|
|
flags uint32
|
|
ringSize int
|
|
}
|
|
|
|
type ioUringSq struct {
|
|
head *uint32
|
|
tail *uint32
|
|
ringMask *uint32
|
|
ringEntries *uint32
|
|
flags *uint32
|
|
dropped *uint32
|
|
array *uint32
|
|
sqes []ioSqringEntry
|
|
}
|
|
|
|
type ioUringCq struct {
|
|
head *uint32
|
|
tail *uint32
|
|
ringMask *uint32
|
|
ringEntries *uint32
|
|
overflow *uint32
|
|
cqes []ioCqringEntry
|
|
}
|
|
|
|
type ioSqringEntry struct {
|
|
opcode uint8
|
|
flags uint8
|
|
ioprio uint16
|
|
fd int32
|
|
off uint64
|
|
addr uint64
|
|
len uint32
|
|
userData uint64
|
|
}
|
|
|
|
type ioCqringEntry struct {
|
|
userData uint64
|
|
res int32
|
|
flags uint32
|
|
}
|
|
|
|
type ioUringParams struct {
|
|
sqEntries uint32
|
|
cqEntries uint32
|
|
flags uint32
|
|
sqThreadCPU uint32
|
|
sqThreadIdle uint32
|
|
features uint32
|
|
wqFd uint32
|
|
resv [3]uint32
|
|
sqOff ioSqringOffsets
|
|
cqOff ioCqringOffsets
|
|
}
|
|
|
|
type ioSqringOffsets struct {
|
|
head uint32
|
|
tail uint32
|
|
ringMask uint32
|
|
ringEntries uint32
|
|
flags uint32
|
|
dropped uint32
|
|
array uint32
|
|
resv1 uint32
|
|
resv2 uint64
|
|
}
|
|
|
|
type ioCqringOffsets struct {
|
|
head uint32
|
|
tail uint32
|
|
ringMask uint32
|
|
ringEntries uint32
|
|
overflow uint32
|
|
cqes uint32
|
|
flags uint32
|
|
resv1 uint32
|
|
resv2 uint64
|
|
}
|
|
|
|
type ioUringCqe struct {
|
|
userData uint64
|
|
res int32
|
|
flags uint32
|
|
}
|
|
|
|
var ioUringEnabled = false
|
|
|
|
func init() {
|
|
if isKernelVersionSupported() {
|
|
ioUringEnabled = true
|
|
scsi.RegisterBackingStore(IoUringBackingStorage, newIOUringBackingStore)
|
|
log.Info("io_uring backing store registered (kernel supports io_uring)")
|
|
} else {
|
|
log.Info("io_uring backing store not available (requires Linux 5.1+)")
|
|
}
|
|
}
|
|
|
|
func isKernelVersionSupported() bool {
|
|
var uname syscall.Utsname
|
|
if err := syscall.Uname(&uname); err != nil {
|
|
return false
|
|
}
|
|
|
|
// Parse kernel version (simplified)
|
|
// Format is typically "5.15.0-generic"
|
|
major := int(uname.Release[0] - '0')
|
|
minor := int(uname.Release[2] - '0')
|
|
|
|
if major > MinKernelMajor {
|
|
return true
|
|
}
|
|
if major == MinKernelMajor && minor >= MinKernelMinor {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// IOUringBackingStore implements BackingStore using io_uring
|
|
type IOUringBackingStore struct {
|
|
scsi.BaseBackingStore
|
|
file *os.File
|
|
ring *ioUring
|
|
queueDepth int
|
|
|
|
// Synchronization
|
|
submitMu sync.Mutex
|
|
|
|
// Statistics
|
|
opsSubmitted uint64
|
|
opsCompleted uint64
|
|
}
|
|
|
|
func newIOUringBackingStore() (api.BackingStore, error) {
|
|
return &IOUringBackingStore{
|
|
BaseBackingStore: scsi.BaseBackingStore{
|
|
Name: IoUringBackingStorage,
|
|
DataSize: 0,
|
|
OflagsSupported: 0,
|
|
},
|
|
queueDepth: DefaultQueueDepth,
|
|
}, nil
|
|
}
|
|
|
|
// Open opens the backing file and initializes io_uring
|
|
func (bs *IOUringBackingStore) Open(dev *api.SCSILu, path string) error {
|
|
var mode os.FileMode
|
|
|
|
finfo, err := os.Stat(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
mode = finfo.Mode()
|
|
|
|
f, err := os.OpenFile(path, os.O_RDWR|syscall.O_DIRECT, os.ModePerm)
|
|
if err != nil {
|
|
// Try without O_DIRECT if not supported
|
|
f, err = os.OpenFile(path, os.O_RDWR, os.ModePerm)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if (mode & os.ModeDevice) != 0 {
|
|
pos, err := f.Seek(0, os.SEEK_END)
|
|
if err != nil {
|
|
f.Close()
|
|
return err
|
|
}
|
|
bs.DataSize = uint64(pos)
|
|
} else {
|
|
bs.DataSize = uint64(finfo.Size())
|
|
}
|
|
|
|
bs.file = f
|
|
|
|
// Initialize io_uring
|
|
ring, err := bs.initIOUring()
|
|
if err != nil {
|
|
f.Close()
|
|
return fmt.Errorf("failed to initialize io_uring: %v", err)
|
|
}
|
|
bs.ring = ring
|
|
|
|
log.Infof("io_uring backing store opened: %s (queue depth: %d)", path, bs.queueDepth)
|
|
return nil
|
|
}
|
|
|
|
func (bs *IOUringBackingStore) initIOUring() (*ioUring, error) {
|
|
params := &ioUringParams{}
|
|
|
|
// Setup io_uring
|
|
fd, _, errno := syscall.Syscall(425, // __NR_io_uring_setup
|
|
uintptr(bs.queueDepth),
|
|
uintptr(unsafe.Pointer(params)),
|
|
0)
|
|
|
|
if errno != 0 {
|
|
return nil, fmt.Errorf("io_uring_setup failed: %v", errno)
|
|
}
|
|
|
|
ring := &ioUring{
|
|
fd: int(fd),
|
|
ringSize: int(params.sqEntries),
|
|
flags: params.flags,
|
|
}
|
|
|
|
// Map the submission queue ring
|
|
sqRingSize := params.sqOff.array + params.sqEntries*uint32(unsafe.Sizeof(uint32(0)))
|
|
cqRingSize := params.cqOff.cqes + params.cqEntries*uint32(unsafe.Sizeof(ioCqringEntry{}))
|
|
|
|
if params.features&1 != 0 { // IORING_FEAT_SINGLE_MMAP
|
|
if cqRingSize > sqRingSize {
|
|
sqRingSize = cqRingSize
|
|
}
|
|
cqRingSize = sqRingSize
|
|
}
|
|
|
|
// mmap submission queue
|
|
sqPtr, _, errno := syscall.Syscall6(syscall.SYS_MMAP,
|
|
0,
|
|
uintptr(sqRingSize),
|
|
syscall.PROT_READ|syscall.PROT_WRITE,
|
|
syscall.MAP_SHARED|syscall.MAP_POPULATE,
|
|
uintptr(fd),
|
|
uintptr(IORING_OFF_SQ_RING))
|
|
|
|
if errno != 0 {
|
|
syscall.Close(int(fd))
|
|
return nil, fmt.Errorf("mmap sq ring failed: %v", errno)
|
|
}
|
|
|
|
sqBase := sqPtr
|
|
|
|
// mmap completion queue (if not single mmap)
|
|
var cqPtr uintptr
|
|
if params.features&1 != 0 {
|
|
cqPtr = sqPtr
|
|
} else {
|
|
cqPtr, _, errno = syscall.Syscall6(syscall.SYS_MMAP,
|
|
0,
|
|
uintptr(cqRingSize),
|
|
syscall.PROT_READ|syscall.PROT_WRITE,
|
|
syscall.MAP_SHARED|syscall.MAP_POPULATE,
|
|
uintptr(fd),
|
|
uintptr(IORING_OFF_CQ_RING))
|
|
|
|
if errno != 0 {
|
|
syscall.Syscall(syscall.SYS_MUNMAP, sqPtr, uintptr(sqRingSize), 0)
|
|
syscall.Close(int(fd))
|
|
return nil, fmt.Errorf("mmap cq ring failed: %v", errno)
|
|
}
|
|
}
|
|
|
|
cqBase := cqPtr
|
|
|
|
// mmap SQEs
|
|
sqeSize := uint32(unsafe.Sizeof(ioSqringEntry{}))
|
|
sqePtr, _, errno := syscall.Syscall6(syscall.SYS_MMAP,
|
|
0,
|
|
uintptr(uint32(bs.queueDepth)*sqeSize),
|
|
syscall.PROT_READ|syscall.PROT_WRITE,
|
|
syscall.MAP_SHARED|syscall.MAP_POPULATE,
|
|
uintptr(fd),
|
|
uintptr(IORING_OFF_SQES))
|
|
|
|
if errno != 0 {
|
|
syscall.Syscall(syscall.SYS_MUNMAP, sqPtr, uintptr(sqRingSize), 0)
|
|
if cqPtr != sqPtr {
|
|
syscall.Syscall(syscall.SYS_MUNMAP, cqPtr, uintptr(cqRingSize), 0)
|
|
}
|
|
syscall.Close(int(fd))
|
|
return nil, fmt.Errorf("mmap sqes failed: %v", errno)
|
|
}
|
|
|
|
// Setup submission queue
|
|
sq := &ioUringSq{
|
|
head: (*uint32)(unsafe.Pointer(sqBase + uintptr(params.sqOff.head))),
|
|
tail: (*uint32)(unsafe.Pointer(sqBase + uintptr(params.sqOff.tail))),
|
|
ringMask: (*uint32)(unsafe.Pointer(sqBase + uintptr(params.sqOff.ringMask))),
|
|
ringEntries: (*uint32)(unsafe.Pointer(sqBase + uintptr(params.sqOff.ringEntries))),
|
|
flags: (*uint32)(unsafe.Pointer(sqBase + uintptr(params.sqOff.flags))),
|
|
dropped: (*uint32)(unsafe.Pointer(sqBase + uintptr(params.sqOff.dropped))),
|
|
array: (*uint32)(unsafe.Pointer(sqBase + uintptr(params.sqOff.array))),
|
|
sqes: make([]ioSqringEntry, bs.queueDepth),
|
|
}
|
|
copy(unsafe.Slice((*ioSqringEntry)(unsafe.Pointer(sqePtr)), bs.queueDepth), sq.sqes)
|
|
|
|
// Setup completion queue
|
|
cq := &ioUringCq{
|
|
head: (*uint32)(unsafe.Pointer(cqBase + uintptr(params.cqOff.head))),
|
|
tail: (*uint32)(unsafe.Pointer(cqBase + uintptr(params.cqOff.tail))),
|
|
ringMask: (*uint32)(unsafe.Pointer(cqBase + uintptr(params.cqOff.ringMask))),
|
|
ringEntries: (*uint32)(unsafe.Pointer(cqBase + uintptr(params.cqOff.ringEntries))),
|
|
overflow: (*uint32)(unsafe.Pointer(cqBase + uintptr(params.cqOff.overflow))),
|
|
cqes: make([]ioCqringEntry, params.cqEntries),
|
|
}
|
|
copy(unsafe.Slice((*ioCqringEntry)(unsafe.Pointer(cqBase+uintptr(params.cqOff.cqes))), params.cqEntries), cq.cqes)
|
|
|
|
ring.sq = sq
|
|
ring.cq = cq
|
|
|
|
return ring, nil
|
|
}
|
|
|
|
// Close closes the backing file and io_uring
|
|
func (bs *IOUringBackingStore) Close(dev *api.SCSILu) error {
|
|
if bs.ring != nil {
|
|
bs.closeIOUring()
|
|
bs.ring = nil
|
|
}
|
|
if bs.file != nil {
|
|
return bs.file.Close()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (bs *IOUringBackingStore) closeIOUring() {
|
|
if bs.ring != nil && bs.ring.fd >= 0 {
|
|
syscall.Close(bs.ring.fd)
|
|
}
|
|
}
|
|
|
|
// Init initializes the backing store
|
|
func (bs *IOUringBackingStore) Init(dev *api.SCSILu, Opts string) error {
|
|
return nil
|
|
}
|
|
|
|
// Exit exits the backing store
|
|
func (bs *IOUringBackingStore) Exit(dev *api.SCSILu) error {
|
|
return nil
|
|
}
|
|
|
|
// Size returns the size of the backing store
|
|
func (bs *IOUringBackingStore) Size(dev *api.SCSILu) uint64 {
|
|
return bs.DataSize
|
|
}
|
|
|
|
// Read reads data from the backing file using io_uring
|
|
func (bs *IOUringBackingStore) Read(offset, tl int64) ([]byte, error) {
|
|
if bs.file == nil {
|
|
return nil, fmt.Errorf("backing store is not open")
|
|
}
|
|
|
|
buf := make([]byte, tl)
|
|
|
|
// Prepare read operation
|
|
bs.submitMu.Lock()
|
|
defer bs.submitMu.Unlock()
|
|
|
|
// Get next SQE
|
|
sqe := bs.getSqe()
|
|
if sqe == nil {
|
|
// Ring is full, submit pending operations first
|
|
if err := bs.submit(); err != nil {
|
|
return nil, err
|
|
}
|
|
sqe = bs.getSqe()
|
|
if sqe == nil {
|
|
return nil, fmt.Errorf("io_uring queue full")
|
|
}
|
|
}
|
|
|
|
// Setup read operation
|
|
*sqe = ioSqringEntry{
|
|
opcode: IORING_OP_READ,
|
|
fd: int32(bs.file.Fd()),
|
|
off: uint64(offset),
|
|
addr: uint64(uintptr(unsafe.Pointer(&buf[0]))),
|
|
len: uint32(tl),
|
|
userData: 1, // 1 = read operation
|
|
}
|
|
|
|
// Submit and wait for completion
|
|
if err := bs.submitAndWait(1); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Get completion
|
|
cqe, err := bs.getCqe()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if cqe.res < 0 {
|
|
return nil, fmt.Errorf("read failed: %d", cqe.res)
|
|
}
|
|
|
|
atomic.AddUint64(&bs.opsCompleted, 1)
|
|
|
|
return buf[:cqe.res], nil
|
|
}
|
|
|
|
// Write writes data to the backing file using io_uring
|
|
func (bs *IOUringBackingStore) Write(wbuf []byte, offset int64) error {
|
|
if bs.file == nil {
|
|
return fmt.Errorf("backing store is not open")
|
|
}
|
|
|
|
bs.submitMu.Lock()
|
|
defer bs.submitMu.Unlock()
|
|
|
|
// Get next SQE
|
|
sqe := bs.getSqe()
|
|
if sqe == nil {
|
|
if err := bs.submit(); err != nil {
|
|
return err
|
|
}
|
|
sqe = bs.getSqe()
|
|
if sqe == nil {
|
|
return fmt.Errorf("io_uring queue full")
|
|
}
|
|
}
|
|
|
|
// Setup write operation
|
|
*sqe = ioSqringEntry{
|
|
opcode: IORING_OP_WRITE,
|
|
fd: int32(bs.file.Fd()),
|
|
off: uint64(offset),
|
|
addr: uint64(uintptr(unsafe.Pointer(&wbuf[0]))),
|
|
len: uint32(len(wbuf)),
|
|
userData: 2, // 2 = write operation
|
|
}
|
|
|
|
// Submit and wait for completion
|
|
if err := bs.submitAndWait(1); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Get completion
|
|
cqe, err := bs.getCqe()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if cqe.res < 0 {
|
|
return fmt.Errorf("write failed: %d", cqe.res)
|
|
}
|
|
|
|
if cqe.res != int32(len(wbuf)) {
|
|
return fmt.Errorf("short write: %d != %d", cqe.res, len(wbuf))
|
|
}
|
|
|
|
atomic.AddUint64(&bs.opsCompleted, 1)
|
|
return nil
|
|
}
|
|
|
|
// DataSync syncs data to disk using io_uring
|
|
func (bs *IOUringBackingStore) DataSync(offset, tl int64) error {
|
|
if bs.file == nil {
|
|
return fmt.Errorf("backing store is not open")
|
|
}
|
|
|
|
bs.submitMu.Lock()
|
|
defer bs.submitMu.Unlock()
|
|
|
|
sqe := bs.getSqe()
|
|
if sqe == nil {
|
|
if err := bs.submit(); err != nil {
|
|
return err
|
|
}
|
|
sqe = bs.getSqe()
|
|
if sqe == nil {
|
|
return fmt.Errorf("io_uring queue full")
|
|
}
|
|
}
|
|
|
|
*sqe = ioSqringEntry{
|
|
opcode: IORING_OP_FSYNC,
|
|
fd: int32(bs.file.Fd()),
|
|
len: IORING_FSYNC_DATASYNC,
|
|
userData: 3, // 3 = fsync operation
|
|
}
|
|
|
|
if err := bs.submitAndWait(1); err != nil {
|
|
return err
|
|
}
|
|
|
|
cqe, err := bs.getCqe()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if cqe.res < 0 {
|
|
return fmt.Errorf("fsync failed: %d", cqe.res)
|
|
}
|
|
|
|
atomic.AddUint64(&bs.opsCompleted, 1)
|
|
return nil
|
|
}
|
|
|
|
// DataAdvise provides advice about data access patterns
|
|
func (bs *IOUringBackingStore) DataAdvise(offset, length int64, advise uint32) error {
|
|
if bs.file == nil {
|
|
return fmt.Errorf("backing store is not open")
|
|
}
|
|
|
|
// Use posix_fadvise via syscall
|
|
_, _, errno := syscall.Syscall6(syscall.SYS_FADVISE64, uintptr(bs.file.Fd()), uintptr(offset), uintptr(length), uintptr(advise), 0, 0)
|
|
if errno != 0 {
|
|
return errno
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Unmap is a no-op for file-based storage
|
|
func (bs *IOUringBackingStore) Unmap([]api.UnmapBlockDescriptor) error {
|
|
return nil
|
|
}
|
|
|
|
// getSqe gets the next available submission queue entry
|
|
func (bs *IOUringBackingStore) getSqe() *ioSqringEntry {
|
|
sq := bs.ring.sq
|
|
tail := atomic.LoadUint32(sq.tail)
|
|
next := tail + 1
|
|
|
|
if next-atomic.LoadUint32(sq.head) > uint32(bs.ring.ringSize) {
|
|
return nil // Queue is full
|
|
}
|
|
|
|
idx := tail & *sq.ringMask
|
|
return &sq.sqes[idx]
|
|
}
|
|
|
|
// submit submits pending SQEs to the kernel
|
|
func (bs *IOUringBackingStore) submit() error {
|
|
if bs.ring == nil {
|
|
return fmt.Errorf("io_uring not initialized")
|
|
}
|
|
|
|
// Update tail
|
|
atomic.StoreUint32(bs.ring.sq.tail, atomic.LoadUint32(bs.ring.sq.tail)+1)
|
|
|
|
// Submit using io_uring_enter syscall
|
|
_, _, errno := syscall.Syscall6(426, // __NR_io_uring_enter
|
|
uintptr(bs.ring.fd),
|
|
uintptr(1), // submit 1 operation
|
|
0, // min complete
|
|
0, // flags
|
|
0, 0)
|
|
|
|
if errno != 0 {
|
|
return fmt.Errorf("io_uring_enter failed: %v", errno)
|
|
}
|
|
|
|
atomic.AddUint64(&bs.opsSubmitted, 1)
|
|
return nil
|
|
}
|
|
|
|
// submitAndWait submits operations and waits for completions
|
|
func (bs *IOUringBackingStore) submitAndWait(minComplete uint32) error {
|
|
if bs.ring == nil {
|
|
return fmt.Errorf("io_uring not initialized")
|
|
}
|
|
|
|
// Update tail
|
|
atomic.StoreUint32(bs.ring.sq.tail, atomic.LoadUint32(bs.ring.sq.tail)+1)
|
|
|
|
// Submit and wait
|
|
_, _, errno := syscall.Syscall6(426, // __NR_io_uring_enter
|
|
uintptr(bs.ring.fd),
|
|
uintptr(1), // submit 1 operation
|
|
uintptr(minComplete), // min complete
|
|
0, // flags
|
|
0, 0)
|
|
|
|
if errno != 0 {
|
|
return fmt.Errorf("io_uring_enter failed: %v", errno)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// getCqe gets a completion queue entry
|
|
func (bs *IOUringBackingStore) getCqe() (*ioCqringEntry, error) {
|
|
cq := bs.ring.cq
|
|
|
|
// Wait for completion
|
|
for atomic.LoadUint32(cq.head) == atomic.LoadUint32(cq.tail) {
|
|
// Spin-wait for completion
|
|
runtime.Gosched()
|
|
}
|
|
|
|
head := atomic.LoadUint32(cq.head)
|
|
idx := head & *cq.ringMask
|
|
cqe := &cq.cqes[idx]
|
|
|
|
// Update head
|
|
atomic.StoreUint32(cq.head, head+1)
|
|
|
|
return cqe, nil
|
|
}
|
|
|
|
// Stats returns io_uring statistics
|
|
func (bs *IOUringBackingStore) Stats() (submitted, completed uint64) {
|
|
return atomic.LoadUint64(&bs.opsSubmitted), atomic.LoadUint64(&bs.opsCompleted)
|
|
}
|
|
|
|
// Available returns true if io_uring is available on this system
|
|
func Available() bool {
|
|
return ioUringEnabled
|
|
}
|