Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Continue counting other perf events group, even if there is an error in one #8

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 63 additions & 40 deletions perf/collector_libpfm.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ type collector struct {
onlineCPUs []int
eventToCustomEvent map[Event]*CustomEvent
uncore stats.Collector

// Handle for mocking purposes.
perfEventOpen func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error)
ioctlSetInt func(fd int, req uint, value int) error
}

type group struct {
Expand Down Expand Up @@ -76,7 +80,7 @@ func init() {
}

func newCollector(cgroupPath string, events PerfEvents, onlineCPUs []int, cpuToSocket map[int]int) *collector {
collector := &collector{cgroupPath: cgroupPath, events: events, onlineCPUs: onlineCPUs, cpuFiles: map[int]group{}, uncore: NewUncoreCollector(cgroupPath, events, cpuToSocket)}
collector := &collector{cgroupPath: cgroupPath, events: events, onlineCPUs: onlineCPUs, cpuFiles: map[int]group{}, uncore: NewUncoreCollector(cgroupPath, events, cpuToSocket), perfEventOpen: unix.PerfEventOpen, ioctlSetInt: unix.IoctlSetInt}
mapEventsToCustomEvents(collector)
return collector
}
Expand Down Expand Up @@ -185,44 +189,30 @@ func (c *collector) setup() error {
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
cgroupFd := int(cgroup.Fd())
for i, group := range c.events.Core.Events {
groupIndex := 0
for _, group := range c.events.Core.Events {
// CPUs file descriptors of group leader needed for perf_event_open.
leaderFileDescriptors := make(map[int]int, len(c.onlineCPUs))
for _, cpu := range c.onlineCPUs {
leaderFileDescriptors[cpu] = groupLeaderFileDescriptor
}

for j, event := range group.events {
// First element is group leader.
isGroupLeader := j == 0
customEvent, ok := c.eventToCustomEvent[event]
if ok {
config := c.createConfigFromRawEvent(customEvent)
leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, i, isGroupLeader}, leaderFileDescriptors)
if err != nil {
return err
}
} else {
config, err := c.createConfigFromEvent(event)
if err != nil {
return err
}
leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, i, isGroupLeader}, leaderFileDescriptors)
if err != nil {
return err
}
// Clean memory allocated by C code.
C.free(unsafe.Pointer(config))
}
leaderFileDescriptors, err := c.createLeaderFileDescriptors(group.events, cgroupFd, groupIndex, leaderFileDescriptors)
if err != nil {
klog.Errorf("Cannot count perf event group %v: %v", group.events, err)
c.deleteGroup(groupIndex)
continue

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You forgot about cleaning after the group. This leads to errors when reading stats.

Consider using this function

func (c *collector) deleteGroup(index int) {
	for name, files := range c.cpuFiles[index].cpuFiles {
		for cpu, file := range files {
			klog.V(5).Infof("Closing perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
			err := file.Close()
			if err != nil {
				klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
			}
		}
	}
	delete(c.cpuFiles, index)
}

This is part of code from Destroy(). So it can look now like:

func (c *collector) Destroy() {
	c.uncore.Destroy()
	c.cpuFilesLock.Lock()
	defer c.cpuFilesLock.Unlock()

	for i, _ := range c.cpuFiles {
		c.deleteGroup(i)
	}
}

} else {
groupIndex++
}

// Group is prepared so we should reset and enable counting.
for _, fd := range leaderFileDescriptors {
err = unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0)
err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0)
if err != nil {
return err
}
err = unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0)
err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0)
if err != nil {
return err
}
Expand All @@ -232,6 +222,35 @@ func (c *collector) setup() error {
return nil
}

func (c *collector) createLeaderFileDescriptors(events []Event, cgroupFd int, groupIndex int, leaderFileDescriptors map[int]int) (map[int]int, error) {
for j, event := range events {
// First element is group leader.
isGroupLeader := j == 0
customEvent, ok := c.eventToCustomEvent[event]
var err error
if ok {
config := c.createConfigFromRawEvent(customEvent)
leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors)
if err != nil {
return nil, fmt.Errorf("cannot register perf event: %v", err)
}
} else {
config, err := c.createConfigFromEvent(event)
if err != nil {
return nil, fmt.Errorf("cannot create config from perf event: %v", err)

}
leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors)
if err != nil {
return nil, fmt.Errorf("cannot register perf event: %v", err)
}
// Clean memory allocated by C code.
C.free(unsafe.Pointer(config))
}
}
return leaderFileDescriptors, nil
}

func readPerfEventAttr(name string, pfmGetOsEventEncoding func(string, unsafe.Pointer) error) (*unix.PerfEventAttr, error) {
perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{})))
// Fill memory with 0 values.
Expand Down Expand Up @@ -279,13 +298,13 @@ func (c *collector) registerEvent(event eventInfo, leaderFileDescriptors map[int
setAttributes(event.config, event.isGroupLeader)

for _, cpu := range c.onlineCPUs {
fd, err := unix.PerfEventOpen(event.config, pid, cpu, leaderFileDescriptors[cpu], flags)
fd, err := c.perfEventOpen(event.config, pid, cpu, leaderFileDescriptors[cpu], flags)
if err != nil {
return nil, fmt.Errorf("setting up perf event %#v failed: %q", event.config, err)
return leaderFileDescriptors, fmt.Errorf("setting up perf event %#v failed: %q", event.config, err)
}
perfFile := os.NewFile(uintptr(fd), event.name)
if perfFile == nil {
return nil, fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
return leaderFileDescriptors, fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
}

c.addEventFile(event.groupIndex, event.name, cpu, perfFile)
Expand Down Expand Up @@ -333,6 +352,19 @@ func (c *collector) addEventFile(index int, name string, cpu int, perfFile *os.F
}
}

func (c *collector) deleteGroup(index int) {
for name, files := range c.cpuFiles[index].cpuFiles {
for cpu, file := range files {
klog.V(5).Infof("Closing perf event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
err := file.Close()
if err != nil {
klog.Warningf("Unable to close perf event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
}
}
}
delete(c.cpuFiles, index)
}

func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr {
length := len(event.Config)

Expand Down Expand Up @@ -369,17 +401,8 @@ func (c *collector) Destroy() {
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()

for _, group := range c.cpuFiles {
for name, files := range group.cpuFiles {
for cpu, file := range files {
klog.V(5).Infof("Closing perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
err := file.Close()
if err != nil {
klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
}
}
delete(group.cpuFiles, name)
}
for i := range c.cpuFiles {
c.deleteGroup(i)
}
}

Expand Down
30 changes: 30 additions & 0 deletions perf/collector_libpfm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ package perf
import (
"bytes"
"encoding/binary"
"io/ioutil"
"os"
"testing"
"unsafe"

Expand Down Expand Up @@ -200,6 +202,34 @@ func TestNewCollector(t *testing.T) {
assert.Same(t, &perfCollector.events.Core.CustomEvents[0], perfCollector.eventToCustomEvent[Event("event_2")])
}

func TestCollectorSetup(t *testing.T) {
path, err := ioutil.TempDir("", "cgroup")
assert.Nil(t, err)
defer func() {
err := os.RemoveAll(path)
assert.Nil(t, err)
}()
events := PerfEvents{
Core: Events{
Events: []Group{
{[]Event{"cache-misses"}, false},
{[]Event{"non-existing-event"}, false},

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good one! 👍

},
},
}
c := newCollector(path, events, []int{0}, map[int]int{0: 0})
c.perfEventOpen = func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
return int(attr.Config), nil
}
c.ioctlSetInt = func(fd int, req uint, value int) error {
return nil
}
err = c.setup()
assert.Nil(t, err)
assert.Equal(t, 1, len(c.cpuFiles))
assert.Equal(t, []string{"cache-misses"}, c.cpuFiles[0].names)
}

var readGroupPerfStatCases = []struct {
test string
file GroupReadFormat
Expand Down
74 changes: 46 additions & 28 deletions perf/uncore_libpfm.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,28 @@ func NewUncoreCollector(cgroupPath string, events PerfEvents, cpuToSocket map[in
return collector
}

func (c *uncoreCollector) createLeaderFileDescriptors(events []Event, groupIndex int, groupPMUs map[Event]uncorePMUs,
leaderFileDescriptors map[string]map[uint32]int) (map[string]map[uint32]int, error) {
var err error
for _, event := range events {
eventName, _ := parseEventName(string(event))
customEvent, ok := c.eventToCustomEvent[event]
if ok {
err = c.setupRawEvent(customEvent, groupPMUs[event], groupIndex, leaderFileDescriptors)
} else {
err = c.setupEvent(eventName, groupPMUs[event], groupIndex, leaderFileDescriptors)
}
if err != nil {
break
}
}
if err != nil {
c.deleteGroup(groupIndex)
return nil, fmt.Errorf("cannot create config from perf event: %v", err)
}
return leaderFileDescriptors, nil
}

func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error {
readUncorePMUs, err := getUncorePMUs(devicesPath)
if err != nil {
Expand Down Expand Up @@ -190,21 +212,11 @@ func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error {
leaderFileDescriptors[pmu.name][cpu] = groupLeaderFileDescriptor
}
}

for _, event := range group.events {
eventName, _ := parseEventName(string(event))
customEvent, ok := c.eventToCustomEvent[event]
if ok {
err = c.setupRawEvent(customEvent, groupPMUs[event], i, leaderFileDescriptors)
} else {
err = c.setupEvent(eventName, groupPMUs[event], i, leaderFileDescriptors)
}

if err != nil {
return err
}
leaderFileDescriptors, err = c.createLeaderFileDescriptors(group.events, i, groupPMUs, leaderFileDescriptors)
if err != nil {
klog.Error(err)
continue
}

// Group is prepared so we should reset and enable counting.
for _, pmuCPUs := range leaderFileDescriptors {
for _, fd := range pmuCPUs {
Expand Down Expand Up @@ -320,20 +332,8 @@ func (c *uncoreCollector) Destroy() {
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()

for groupIndex, groupPMUs := range c.cpuFiles {
for pmu, group := range groupPMUs {
for name, cpus := range group.cpuFiles {
for cpu, file := range cpus {
klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
err := file.Close()
if err != nil {
klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
}
}
delete(group.cpuFiles, name)
}
delete(groupPMUs, pmu)
}
for groupIndex := range c.cpuFiles {
c.deleteGroup(groupIndex)
delete(c.cpuFiles, groupIndex)
}
}
Expand Down Expand Up @@ -475,6 +475,24 @@ func (c *uncoreCollector) setupRawEvent(event *CustomEvent, pmus uncorePMUs, gro
return nil
}

func (c *uncoreCollector) deleteGroup(groupIndex int) {
groupPMUs := c.cpuFiles[groupIndex]
for pmu, group := range groupPMUs {
for name, cpus := range group.cpuFiles {
for cpu, file := range cpus {
klog.V(5).Infof("Closing uncore perf event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
err := file.Close()
if err != nil {
klog.Warningf("Unable to close perf event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
}
}
delete(group.cpuFiles, name)
}
delete(groupPMUs, pmu)
}
delete(c.cpuFiles, groupIndex)
}

func readPerfUncoreStat(file readerCloser, group group, cpu int, pmu string, cpuToSocket map[int]int) ([]info.PerfUncoreStat, error) {
values, err := getPerfValues(file, group)
if err != nil {
Expand Down
14 changes: 14 additions & 0 deletions perf/uncore_libpfm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ func TestUncoreCollectorSetup(t *testing.T) {
Uncore: Events{
Events: []Group{
{[]Event{"uncore_imc_1/cas_count_read"}, false},
{[]Event{"uncore_imc_1/non_existing_event"}, false},
{[]Event{"uncore_imc_0/cas_count_write", "uncore_imc_0/cas_count_read"}, true},
},
CustomEvents: []CustomEvent{
Expand All @@ -133,6 +134,11 @@ func TestUncoreCollectorSetup(t *testing.T) {
}

err = collector.setup(events, path)
assert.Equal(t, []string{"uncore_imc_1/cas_count_read"},
getMapKeys(collector.cpuFiles[0]["uncore_imc_1"].cpuFiles))
assert.ElementsMatch(t, []string{"uncore_imc_0/cas_count_write", "uncore_imc_0/cas_count_read"},
getMapKeys(collector.cpuFiles[2]["uncore_imc_0"].cpuFiles))

// There are no errors.
assert.Nil(t, err)
}
Expand Down Expand Up @@ -295,3 +301,11 @@ func TestReadPerfUncoreStat(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, expectedStat, stat)
}

func getMapKeys(someMap map[string]map[int]readerCloser) []string {
var keys []string
for key := range someMap {
keys = append(keys, key)
}
return keys
}