From 227560a1716c77d785f4cb98636a5e23174b4c9a Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Tue, 8 Oct 2024 10:02:03 +0200 Subject: [PATCH] EXPERIMENT vhost server for virtioFS --- example/virtiofs/main.go | 4 +- vhostuser/server.go | 619 +++++++++++++++++++++++++++++++++++++-- vhostuser/types.go | 113 ++++++- 3 files changed, 702 insertions(+), 34 deletions(-) diff --git a/example/virtiofs/main.go b/example/virtiofs/main.go index 99da9b9d6..d67b11b13 100644 --- a/example/virtiofs/main.go +++ b/example/virtiofs/main.go @@ -32,8 +32,8 @@ func main() { break } - dev := vhostuser.FSDevice{} - srv := vhostuser.NewServer(conn, &dev) + dev := vhostuser.NewFSDevice() + srv := vhostuser.NewServer(conn, dev) if err := srv.Serve(); err != nil { log.Printf("Serve: %v %T", err, err) } diff --git a/vhostuser/server.go b/vhostuser/server.go index 6fef19745..39d23b460 100644 --- a/vhostuser/server.go +++ b/vhostuser/server.go @@ -5,17 +5,468 @@ import ( "log" "net" "reflect" + "sort" + "syscall" "unsafe" + + "golang.org/x/sys/unix" ) -type Device interface { - GetFeatures() []int - SetFeatures([]int) - GetProtocolFeatures() []int - SetProtocolFeatures([]int) +type DeviceRegion struct { + VhostUserMemoryRegion + + // MmapAddr uint64 + Data []byte +} + +func (r *DeviceRegion) String() string { + return r.VhostUserMemoryRegion.String() +} + +func (r *DeviceRegion) containsGuestAddr(guestAddr uint64) bool { + return guestAddr >= r.GuestPhysAddr && guestAddr < r.GuestPhysAddr+r.MemorySize +} + +func (r *DeviceRegion) FromDriverAddr(driverAddr uint64) unsafe.Pointer { + if driverAddr < r.VhostUserMemoryRegion.DriverAddr || driverAddr >= r.DriverAddr+r.MemorySize { + return nil + } + + return unsafe.Pointer(&r.Data[driverAddr-r.DriverAddr+r.MmapOffset]) } type FSDevice struct { + reqFD int + + // vring is the same as virtq? + vqs []Virtq + + // sorted by GuestPhysAddr + regions []DeviceRegion + + handle func(*VirtqElem) +} + +func NewFSDevice() *FSDevice { + d := &FSDevice{ + vqs: make([]Virtq, 2), + } + for i := range d.vqs { + d.vqs[i].Notification = true + } + return d +} + +type Ring struct { + Num int + Desc []VringDesc + Avail *VringAvail + AvailRing []uint16 + AvailUsedEvent *uint16 + Used *VringUsed + UsedRing []VringUsedElement + UsedAvailEvent *uint16 + + LogGuestAddr uint64 + Flags uint32 +} + +type VirtqInflight struct { + Features uint64 + Version uint16 + DescNum uint16 + LastBatchHead uint16 + UsedIdx uint16 + + Desc0 DescStateSplit // array. +} + +type DescStateSplit struct { + inflight uint8 + padding [5]uint8 + next uint16 + counter uint64 +} + +type InflightDesc struct { + index uint16 + counter uint64 +} + +type Virtq struct { + Vring Ring + + Inflight VirtqInflight + + ResubmitList *InflightDesc + + ResubmitNum uint16 + + Counter uint64 + LastAvailIdx uint16 + + ShadowAvailIdx uint16 + + UsedIdx uint16 + SignaledUsed uint16 + + SignaledUsedValid bool + Notification bool + + inuse uint + + handler func(*FSDevice, int) + + CallFD int + KickFD int + ErrFD int + Enable uint + Started bool + + Addr VhostVringAddr +} + +func (vq *Virtq) availIdx() uint16 { + // Weird, sideeffect? + vq.ShadowAvailIdx = vq.Vring.Avail.Idx + return vq.ShadowAvailIdx +} + +func (vq *Virtq) queueEmpty() bool { + // dev.broken + // vq.vring == nil + + if vq.ShadowAvailIdx != vq.LastAvailIdx { + return false + } + return vq.availIdx() == vq.LastAvailIdx +} + +func (d *FSDevice) MapRing(vq *Virtq) error { + if d := d.FromDriverAddr(vq.Addr.DescUserAddr); d == nil { + return fmt.Errorf("could not map DescUserAddr %x", vq.Addr.DescUserAddr) + } else { + vq.Vring.Desc = unsafe.Slice((*VringDesc)(d), vq.Vring.Num) + } + if d := d.FromDriverAddr(vq.Addr.UsedUserAddr); d == nil { + return fmt.Errorf("could not map UsedUserAddr %x", + vq.Addr.UsedUserAddr) + } else { + vq.Vring.Used = (*VringUsed)(d) + vq.Vring.UsedRing = unsafe.Slice(&vq.Vring.Used.Ring0, vq.Vring.Num) + //if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + vq.Vring.UsedAvailEvent = (*uint16)(unsafe.Pointer(&unsafe.Slice(&vq.Vring.Used.Ring0, vq.Vring.Num+1)[vq.Vring.Num])) + + } + + if d := d.FromDriverAddr(vq.Addr.AvailUserAddr); d == nil { + return fmt.Errorf("could not map AvailUserAddr %x", + vq.Addr.AvailUserAddr) + } else { + vq.Vring.Avail = (*VringAvail)(d) + vq.Vring.AvailRing = unsafe.Slice(&vq.Vring.Avail.Ring0, vq.Vring.Num) + //if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + vq.Vring.AvailUsedEvent = &unsafe.Slice(&vq.Vring.Avail.Ring0, vq.Vring.Num+1)[vq.Vring.Num] + } + return nil +} + +func (d *FSDevice) FromDriverAddr(driverAddr uint64) unsafe.Pointer { + for _, r := range d.regions { + d := r.FromDriverAddr(driverAddr) + if d != nil { + return d + } + } + return nil +} + +func (d *FSDevice) FromGuestAddr(guestAddr uint64, sz uint64) []byte { + idx := d.findRegionByGuestAddr(guestAddr) + r := d.regions[idx] + if !r.containsGuestAddr(guestAddr) { + return nil + } + + seg := r.Data[guestAddr-r.GuestPhysAddr:] + if len(seg) > int(sz) { + seg = seg[:sz] + } + return seg +} + +func (d *FSDevice) SetVringAddr(addr *VhostVringAddr) error { + vq := &d.vqs[addr.Index] + vq.Addr = *addr + vq.Vring.Flags = uint32(addr.Flags) // bitsize? + vq.Vring.LogGuestAddr = addr.LogGuestAddr + + if err := d.MapRing(vq); err != nil { + return err + } + + vq.UsedIdx = vq.Vring.Used.Idx // LE16toH + if vq.LastAvailIdx != vq.UsedIdx { + resume := true // device->processed_in_order() + if resume { + vq.ShadowAvailIdx = vq.UsedIdx + vq.LastAvailIdx = vq.UsedIdx + } + } + + return nil +} + +func (d *FSDevice) SetVringNum(state *VhostVringState) { + d.vqs[state.Index].Vring.Num = int(state.Num) +} + +func (d *FSDevice) SetVringBase(state *VhostVringState) { + p := &d.vqs[state.Index] + p.ShadowAvailIdx = uint16(state.Num) + p.LastAvailIdx = uint16(state.Num) +} +func (d *FSDevice) SetVringEnable(state *VhostVringState) { + p := &d.vqs[state.Index] + p.Enable = uint(state.Num) + d.kickMe(state.Index) +} + +func (d *FSDevice) kickMe(idx uint32) { + vq := &d.vqs[idx] + + // todo: mimick vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) + go func() { + for { + var id [8]byte + _, err := syscall.Read(vq.KickFD, id[:]) + + data, err := d.popQueue(vq) + if err != nil { + log.Printf("popq: %v", err) + continue + } + if data == nil { + log.Printf("queue was empty") + continue + } + log.Printf("popQ: in %q out %q", + data.in, + data.out) + + // should pass on vq as well? + if d.handle != nil { + d.handle(data) + } else { + log.Printf("no handler defined") + } + } + }() +} + +type VirtqElem struct { + index uint + in [][]byte + out [][]byte +} + +func (d *FSDevice) dumpRegions() { + for i, r := range d.regions { + log.Printf("region %d: %v", i, &r) + } +} + +func (d *FSDevice) popQueue(vq *Virtq) (*VirtqElem, error) { + + /* TODO: unlikely conditions */ + + // dev->broken? + // vq.vring.avail == 0 + if vq.ResubmitList != nil && vq.ResubmitNum > 0 { + return nil, fmt.Errorf("resubmit") + } + + if vq.queueEmpty() { + return nil, nil + } + + if int(vq.inuse) >= vq.Vring.Num { + return nil, fmt.Errorf("virtq size exceeded") + } + + // todo RMB read barrier. + + idx := int(vq.LastAvailIdx) % vq.Vring.Num + result := VirtqElem{ + index: uint(idx), + } + + vq.LastAvailIdx++ + head := vq.Vring.AvailRing[idx] + if int(head) > vq.Vring.Num { + log.Panicf("silly avail %d %d", head, vq.Vring.Num) + } + log.Printf("head %d", head) + if vq.Vring.UsedAvailEvent != nil { + *vq.Vring.UsedAvailEvent = vq.LastAvailIdx + } + + // vu_queue_map_desc + descArray := vq.Vring.Desc + desc := descArray[head] + log.Printf("desc %v", &desc) + d.dumpRegions() + if desc.Flags&VRING_DESC_F_INDIRECT != 0 { + eltSize := unsafe.Sizeof(VringDesc{}) + if (desc.Len % uint32(eltSize)) != 0 { + return nil, fmt.Errorf("modulo size") + } + + indirectAsBytes := d.FromGuestAddr(desc.Addr, uint64(desc.Len)) + if indirectAsBytes == nil { + return nil, fmt.Errorf("OOB read %x %#v", desc.Addr, d.regions) + } + if len(indirectAsBytes) != int(desc.Len) { + return nil, fmt.Errorf("partial read indirect desc") + } + n := desc.Len / uint32(eltSize) + descArray = unsafe.Slice((*VringDesc)(unsafe.Pointer(&indirectAsBytes[0])), n) + desc = descArray[0] + + log.Printf("desc array: %v", descArray) + } + + for { + iov := d.virtqMapDesc(desc.Addr, desc.Len) + log.Printf("got iov %q %d", iov, len(iov)) + if desc.Flags&VRING_DESC_F_WRITE != 0 { + // virtqueue_map_desc + result.in = append(result.in, iov...) + } else { + result.out = append(result.out, iov...) + } + // + + if desc.Flags&VRING_DESC_F_NEXT == 0 { + break + } + + head = desc.Next + // barrier + + // todo: check max + + desc = descArray[head] + } + + return &result, nil +} + +// take VIRTQUEUE_MAX_SIZE ? +func (d *FSDevice) virtqMapDesc(physAddr uint64, sz uint32) [][]byte { + var result [][]byte + + for sz > 0 { + d := d.FromGuestAddr(physAddr, uint64(sz)) + result = append(result, d) + sz -= uint32(len(d)) + physAddr += uint64(len(d)) + } + + return result +} + +func (d *FSDevice) findRegionByGuestAddr(guestAddr uint64) int { + return sort.Search(len(d.regions), + func(i int) bool { + return guestAddr < d.regions[i].GuestPhysAddr+d.regions[i].MemorySize + }) +} + +func (d *FSDevice) AddMemReg(fd int, reg *VhostUserMemoryRegion) error { + if len(d.regions) == int(d.GetMaxMemslots()) { + return fmt.Errorf("hot add memory") + } + + idx := d.findRegionByGuestAddr(reg.GuestPhysAddr) + if hps := GetFDHugepagesize(fd); hps != 0 { + return fmt.Errorf("huge pages") + } + + data, err := syscall.Mmap(fd, int64(reg.MmapOffset), int(reg.MemorySize), + syscall.PROT_READ|syscall.PROT_WRITE, + syscall.MAP_SHARED|syscall.MAP_NORESERVE) + if err != nil { + return err + } + syscall.Madvise(data, unix.MADV_DONTDUMP) + + d.regions = append(d.regions, DeviceRegion{}) + copy(d.regions[idx+1:], d.regions[idx:]) + d.regions[idx] = DeviceRegion{ + VhostUserMemoryRegion: VhostUserMemoryRegion{ + GuestPhysAddr: reg.GuestPhysAddr, + MemorySize: reg.MemorySize, + DriverAddr: reg.DriverAddr, + MmapOffset: 0, // input holds the offset into the fd. + }, + Data: data, + } + return nil +} + +func (d *FSDevice) SetVringKick(fd int, index uint64) error { + if index&(1<<8) != 0 { + log.Panic("not supported") + } + old := d.vqs[index].KickFD + if old != 0 { + syscall.Close(old) + } + d.vqs[index].KickFD = fd + + return syscall.SetNonblock(fd, false) +} + +// todo consolidate +func (d *FSDevice) SetVringErr(fd int, index uint64) { + if index&(1<<8) != 0 { + log.Panic("not supported") + } + + if old := d.vqs[index].ErrFD; old != 0 { + syscall.Close(old) + } + + d.vqs[index].ErrFD = fd +} + +func (d *FSDevice) SetVringCall(fd int, index uint64) { + if index&(1<<8) != 0 { + log.Panic("not supported") + } + if old := d.vqs[index].CallFD; old != 0 { + syscall.Close(old) + } + d.vqs[index].CallFD = fd +} + +func (d *FSDevice) SetOwner() { + +} + +func (d *FSDevice) SetReqFD(fd int) { + d.reqFD = fd +} + +const MAX_MEM_SLOTS = 509 + +func (d *FSDevice) GetMaxMemslots() uint64 { + return MAX_MEM_SLOTS +} + +func (d *FSDevice) GetQueueNum() uint64 { + return uint64(len(d.vqs)) } func (h *FSDevice) GetFeatures() []int { @@ -36,6 +487,8 @@ func (h *FSDevice) SetProtocolFeatures([]int) { } +// not supporting VHOST_USER_PROTOCOL_F_PAGEFAULT, so no support for +// postcopy listening. func (h *FSDevice) GetProtocolFeatures() []int { // ")\204\0\0\0\0\0\0" // x29 x84 @@ -50,12 +503,12 @@ func (h *FSDevice) GetProtocolFeatures() []int { type Server struct { conn *net.UnixConn - device Device + device *FSDevice } type empty struct{} -func NewServer(c *net.UnixConn, d Device) *Server { +func NewServer(c *net.UnixConn, d *FSDevice) *Server { return &Server{conn: c, device: d} } @@ -91,35 +544,69 @@ const hdrSize = int(unsafe.Sizeof(Header{})) func (s *Server) oneRequest() error { var inBuf, oobBuf, outBuf [4096]byte - bufN, oobN, flags, addr, err := s.conn.ReadMsgUnix(inBuf[:], oobBuf[:]) + + // _ = flags is usually CLOEXEC. + bufN, oobN, _, _, err := s.conn.ReadMsgUnix(inBuf[:hdrSize], oobBuf[:]) oob := oobBuf[:oobN] if err != nil { return err } + inHeader := (*Header)(unsafe.Pointer(&inBuf[0])) reqName := (reqNames[int(inHeader.Request)]) - payloadSz := bufN - hdrSize - for payloadSz < int(inHeader.Size) { - n, err := s.conn.Read(inBuf[bufN:]) + var inFDs []int + if len(oob) > 0 { + scms, err := syscall.ParseSocketControlMessage(oob) if err != nil { return err } - payloadSz += n - bufN += n + for _, scm := range scms { + fds, err := syscall.ParseUnixRights(&scm) + if err != nil { + return err + } + inFDs = append(inFDs, fds...) + + // TODO make sockets non-blocking? See util/vhost-user-server.c l.179 + } } - if payloadSz > int(inHeader.Size) { - return fmt.Errorf("read %d bytes, should be %d", payloadSz, inHeader.Size) + if inHeader.Size > 0 { + bufN2, oobN2, flags2, addr2, err := s.conn.ReadMsgUnix(inBuf[hdrSize:hdrSize+int(inHeader.Size)], oobBuf[oobN:]) + if err != nil { + return err + } + if bufN2 < int(inHeader.Size) { + return fmt.Errorf("short read got %d want %d", bufN2, inHeader.Size) + } + oobN += oobN2 + bufN += bufN2 + + if oobN2 > 0 { + log.Printf("oob2 %q flags2 %x addr2 %x", oobBuf[oobN:oobN2+oobN], flags2, addr2) + } } inPayload := unsafe.Pointer(&inBuf[hdrSize]) inDebug := "" if f := decodeIn[inHeader.Request]; f != nil { + // TODO - check payload size inDebug = fmt.Sprintf("%v", f(inPayload)) + } else if inHeader.Size > 0 { + inDebug = fmt.Sprintf("payload %q (%d bytes)", inBuf[hdrSize:hdrSize+int(inHeader.Size)], inHeader.Size) } - log.Printf("rx %s %s flags %x OOB %q addr %x", reqName, inDebug, flags, oob, addr) + needReply := (inHeader.Flags & (0x1 << 3)) != 0 + flagStr := "" + if needReply { + flagStr = "need_reply " + } + log.Printf("rx %-2d %s %s %sFDs %v", inHeader.Request, reqName, inDebug, flagStr, inFDs) + + if c := inFDCount[inHeader.Request]; c != len(inFDs) { + return fmt.Errorf("got %d fds for %s, want %d", len(inFDs), reqName, c) + } var outHeader = (*Header)(unsafe.Pointer(&outBuf[0])) outPayloadPtr := unsafe.Pointer(&outBuf[hdrSize]) @@ -128,6 +615,7 @@ func (s *Server) oneRequest() error { outHeader.Flags |= 0x4 // reply var rep interface{} + var deviceErr error switch inHeader.Request { case REQ_GET_FEATURES: r := (*GetFeaturesReply)(outPayloadPtr) @@ -143,27 +631,106 @@ func (s *Server) oneRequest() error { case REQ_SET_PROTOCOL_FEATURES: req := (*SetProtocolFeaturesRequest)(inPayloadPtr) s.setProtocolFeatures(req) + + case REQ_GET_QUEUE_NUM: + r := (*U64Payload)(outPayloadPtr) + r.Num = s.device.GetQueueNum() + rep = r + case REQ_GET_MAX_MEM_SLOTS: + r := (*U64Payload)(outPayloadPtr) + r.Num = s.device.GetMaxMemslots() + rep = r + case REQ_SET_BACKEND_REQ_FD: + s.device.SetReqFD(inFDs[0]) + case REQ_SET_OWNER: + // should pass in addr or something? + s.device.SetOwner() + case REQ_SET_VRING_CALL: + req := (*U64Payload)(inPayloadPtr) + s.device.SetVringCall(inFDs[0], req.Num) + case REQ_SET_VRING_ERR: + req := (*U64Payload)(inPayloadPtr) + s.device.SetVringErr(inFDs[0], req.Num) + case REQ_SET_VRING_KICK: + req := (*U64Payload)(inPayloadPtr) + deviceErr = s.device.SetVringKick(inFDs[0], req.Num) + case REQ_ADD_MEM_REG: + // req can also be u64 if in postcopy mode (sigh). + req := (*VhostUserMemRegMsg)(inPayloadPtr) + deviceErr = s.device.AddMemReg(inFDs[0], &req.Region) + case REQ_SET_VRING_NUM: + req := (*VhostVringState)(inPayloadPtr) + s.device.SetVringNum(req) + case REQ_SET_VRING_BASE: + req := (*VhostVringState)(inPayloadPtr) + s.device.SetVringBase(req) + case REQ_SET_VRING_ENABLE: + req := (*VhostVringState)(inPayloadPtr) + s.device.SetVringEnable(req) + case REQ_SET_VRING_ADDR: + req := (*VhostVringAddr)(inPayloadPtr) + deviceErr = s.device.SetVringAddr(req) + default: log.Printf("unknown operation %d", inHeader.Request) } outPayloadSz := 0 + if needReply && rep == nil { + r := (*U64Payload)(outPayloadPtr) + if deviceErr != nil { + log.Printf("request error: %v", deviceErr) + r.Num = 1 + } else { + r.Num = 0 + } + rep = r + + // qemu doesn't like NEED_REPLY + outHeader.Flags ^= (1 << 3) + } else if deviceErr != nil { + log.Printf("device error: %v", deviceErr) + } + + var repBytes []byte + outDebug := "no reply" if rep != nil { - outDebug := "" + outPayloadSz = int(reflect.ValueOf(rep).Elem().Type().Size()) + outHeader.Size = uint32(outPayloadSz) + repBytes = outBuf[:hdrSize+outPayloadSz] + if s, ok := rep.(fmt.Stringer); ok { outDebug = s.String() + } else { + outDebug = fmt.Sprintf("payload %q (%d bytes)", repBytes[hdrSize:], outPayloadSz) } + } - log.Printf("tx %s %s", reqName, outDebug) - outPayloadSz = int(reflect.ValueOf(rep).Elem().Type().Size()) + log.Printf("tx %s %s", reqName, outDebug) + + if len(repBytes) > 0 { + if _, err := s.conn.Write(repBytes); err != nil { + log.Printf("%v %T", err, err) + return err + } } - outHeader.Size = uint32(outPayloadSz) - repBytes := outBuf[:hdrSize+outPayloadSz] - log.Printf("replying %q", repBytes) - if _, err := s.conn.Write(repBytes); err != nil { - log.Printf("%v %T", err, err) - return err + return nil +} + +const HUGETLBFS_MAGIC = 0x958458f6 + +func GetFDHugepagesize(fd int) int { + var fs syscall.Statfs_t + var err error + for { + err = syscall.Fstatfs(fd, &fs) + if err != syscall.EINTR { + break + } } - return nil + if err == nil && fs.Type == HUGETLBFS_MAGIC { + return int(fs.Bsize) + } + return 0 } diff --git a/vhostuser/types.go b/vhostuser/types.go index 63b75cbb2..e92e2da0c 100644 --- a/vhostuser/types.go +++ b/vhostuser/types.go @@ -84,6 +84,13 @@ const ( ) var featureNames = map[int]string{ + F_NOTIFY_ON_EMPTY: "NOTIFY_ON_EMPTY", + F_LOG_ALL: "LOG_ALL", + F_ANY_LAYOUT: "ANY_LAYOUT", + RING_F_INDIRECT_DESC: "RING_F_INDIRECT_DESC", + RING_F_EVENT_IDX: "RING_F_EVENT_IDX", + F_PROTOCOL_FEATURES: "PROTOCOL_FEATURES", + F_VERSION_1: "VERSION_1", F_ACCESS_PLATFORM: "ACCESS_PLATFORM", F_RING_PACKED: "RING_PACKED", @@ -229,8 +236,16 @@ type GetFeaturesReply struct { } var decodeIn = map[uint32]func(unsafe.Pointer) interface{}{ + REQ_ADD_MEM_REG: func(p unsafe.Pointer) interface{} { return (*VhostUserMemRegMsg)(p) }, REQ_SET_FEATURES: func(p unsafe.Pointer) interface{} { return (*SetFeaturesRequest)(p) }, REQ_SET_PROTOCOL_FEATURES: func(p unsafe.Pointer) interface{} { return (*SetProtocolFeaturesRequest)(p) }, + REQ_SET_VRING_ADDR: func(p unsafe.Pointer) interface{} { return (*VhostVringAddr)(p) }, + REQ_SET_VRING_BASE: func(p unsafe.Pointer) interface{} { return (*VhostVringState)(p) }, + REQ_SET_VRING_CALL: func(p unsafe.Pointer) interface{} { return (*U64Payload)(p) }, + REQ_SET_VRING_ENABLE: func(p unsafe.Pointer) interface{} { return (*VhostVringState)(p) }, + REQ_SET_VRING_ERR: func(p unsafe.Pointer) interface{} { return (*U64Payload)(p) }, + REQ_SET_VRING_KICK: func(p unsafe.Pointer) interface{} { return (*U64Payload)(p) }, + REQ_SET_VRING_NUM: func(p unsafe.Pointer) interface{} { return (*VhostVringState)(p) }, } var decodeOut = map[uint32]func(unsafe.Pointer) interface{}{ @@ -238,6 +253,14 @@ var decodeOut = map[uint32]func(unsafe.Pointer) interface{}{ REQ_GET_PROTOCOL_FEATURES: func(p unsafe.Pointer) interface{} { return (*GetProtocolFeaturesReply)(p) }, } +var inFDCount = map[uint32]int{ + REQ_SET_BACKEND_REQ_FD: 1, + REQ_SET_VRING_CALL: 1, + REQ_SET_VRING_ERR: 1, + REQ_ADD_MEM_REG: 1, + REQ_SET_VRING_KICK: 1, +} + func (r *GetFeaturesReply) String() string { return fmt.Sprintf("{%s}", maskToString(featureNames, r.Mask)) @@ -247,6 +270,11 @@ type SetFeaturesRequest struct { Mask uint64 } +func (r *SetFeaturesRequest) String() string { + return fmt.Sprintf("{%s}", + maskToString(featureNames, r.Mask)) +} + type GetProtocolFeaturesReply struct { Mask uint64 } @@ -265,6 +293,14 @@ func (r *SetProtocolFeaturesRequest) String() string { maskToString(protocolFeatureNames, r.Mask)) } +type U64Payload struct { + Num uint64 +} + +func (p *U64Payload) String() string { + return fmt.Sprintf("{%d}", p.Num) +} + /* typedef union { #define VHOST_USER_VRING_IDX_MASK (0xff) @@ -285,15 +321,19 @@ typedef union { } VhostUserPayload; */ -type VhostRingState struct { - Index uintptr - Num uintptr +type VhostVringState struct { + Index uint32 + Num uint32 // unsigned int? +} + +func (s *VhostVringState) String() string { + return fmt.Sprintf("idx %d num %d", s.Index, s.Num) } type VhostVringAddr struct { - Index uintptr + Index uint32 /* Option flags. */ - Flags uintptr + Flags uint32 /* Flag values: */ /* Whether log address is valid. If set enables logging. */ //#define VHOST_VRING_F_LOG 0 @@ -310,13 +350,74 @@ type VhostVringAddr struct { LogGuestAddr uint64 } +func (a *VhostVringAddr) String() string { + return fmt.Sprintf("idx %d flags %x Desc %x Used %x Avail %x LogGuest %x", + a.Index, a.Flags, a.DescUserAddr, a.UsedUserAddr, + a.AvailUserAddr, a.LogGuestAddr) +} + +// virtio_ring.h + +// must be aligned on 4 bytes, but that's automatic? +type VringUsedElement struct { + ID uint32 + Len uint32 +} + +// aligned 4 bytes +type VringUsed struct { + Flags uint16 + Idx uint16 + Ring0 VringUsedElement +} + +// qemu:include/standard-headers/linux/virtio_ring.h +const ( + /* This marks a buffer as continuing via the next field. */ + VRING_DESC_F_NEXT = 1 + /* This marks a buffer as write-only (otherwise read-only). */ + VRING_DESC_F_WRITE = 2 + /* This means the buffer contains a list of buffer descriptors. */ + VRING_DESC_F_INDIRECT = 4 +) + +var vringDescNames = map[int]string{ + 0: "NEXT", + 1: "WRITE", + 2: "INDIRECT", +} + +// Aligned 16 byte +type VringDesc struct { + Addr uint64 + Len uint32 + Flags uint16 + Next uint16 +} + +func (d *VringDesc) String() string { + return fmt.Sprintf("[0x%x,+0x%x) %s next %d", d.Addr, d.Len, maskToString(vringDescNames, uint64(d.Flags)), d.Next) +} + +// aligned on 2 bytes +type VringAvail struct { + Flags uint16 + Idx uint16 + Ring0 uint16 +} + type VhostUserMemoryRegion struct { GuestPhysAddr uint64 MemorySize uint64 - UserspaceAddr uint64 + DriverAddr uint64 MmapOffset uint64 } +func (r *VhostUserMemoryRegion) String() string { + return fmt.Sprintf("Guest [0x%x,+0x%x) Driver %x MmapOff %x", + r.GuestPhysAddr, r.MemorySize, r.DriverAddr, r.MmapOffset) +} + type VhostUserMemory struct { Nregions uint32 Padding uint32