diff --git a/perf/collector_libpfm.go b/perf/collector_libpfm.go index cbc646ba10..a0f4b77631 100644 --- a/perf/collector_libpfm.go +++ b/perf/collector_libpfm.go @@ -47,6 +47,10 @@ type collector struct { onlineCPUs []int eventToCustomEvent map[Event]*CustomEvent uncore stats.Collector + + // Handle for mocking purposes. + perfEventOpen func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) + ioctlSetInt func(fd int, req uint, value int) error } type group struct { @@ -76,7 +80,7 @@ func init() { } func newCollector(cgroupPath string, events PerfEvents, onlineCPUs []int, cpuToSocket map[int]int) *collector { - collector := &collector{cgroupPath: cgroupPath, events: events, onlineCPUs: onlineCPUs, cpuFiles: map[int]group{}, uncore: NewUncoreCollector(cgroupPath, events, cpuToSocket)} + collector := &collector{cgroupPath: cgroupPath, events: events, onlineCPUs: onlineCPUs, cpuFiles: map[int]group{}, uncore: NewUncoreCollector(cgroupPath, events, cpuToSocket), perfEventOpen: unix.PerfEventOpen, ioctlSetInt: unix.IoctlSetInt} mapEventsToCustomEvents(collector) return collector } @@ -185,44 +189,30 @@ func (c *collector) setup() error { c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() cgroupFd := int(cgroup.Fd()) - for i, group := range c.events.Core.Events { + groupIndex := 0 + for _, group := range c.events.Core.Events { // CPUs file descriptors of group leader needed for perf_event_open. leaderFileDescriptors := make(map[int]int, len(c.onlineCPUs)) for _, cpu := range c.onlineCPUs { leaderFileDescriptors[cpu] = groupLeaderFileDescriptor } - for j, event := range group.events { - // First element is group leader. - isGroupLeader := j == 0 - customEvent, ok := c.eventToCustomEvent[event] - if ok { - config := c.createConfigFromRawEvent(customEvent) - leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, i, isGroupLeader}, leaderFileDescriptors) - if err != nil { - return err - } - } else { - config, err := c.createConfigFromEvent(event) - if err != nil { - return err - } - leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, i, isGroupLeader}, leaderFileDescriptors) - if err != nil { - return err - } - // Clean memory allocated by C code. - C.free(unsafe.Pointer(config)) - } + leaderFileDescriptors, err := c.createLeaderFileDescriptors(group.events, cgroupFd, groupIndex, leaderFileDescriptors) + if err != nil { + klog.Errorf("Cannot count perf event group %v: %v", group.events, err) + c.deleteGroup(groupIndex) + continue + } else { + groupIndex++ } // Group is prepared so we should reset and enable counting. for _, fd := range leaderFileDescriptors { - err = unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0) + err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0) if err != nil { return err } - err = unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0) + err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0) if err != nil { return err } @@ -232,6 +222,35 @@ func (c *collector) setup() error { return nil } +func (c *collector) createLeaderFileDescriptors(events []Event, cgroupFd int, groupIndex int, leaderFileDescriptors map[int]int) (map[int]int, error) { + for j, event := range events { + // First element is group leader. + isGroupLeader := j == 0 + customEvent, ok := c.eventToCustomEvent[event] + var err error + if ok { + config := c.createConfigFromRawEvent(customEvent) + leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors) + if err != nil { + return nil, fmt.Errorf("cannot register perf event: %v", err) + } + } else { + config, err := c.createConfigFromEvent(event) + if err != nil { + return nil, fmt.Errorf("cannot create config from perf event: %v", err) + + } + leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors) + if err != nil { + return nil, fmt.Errorf("cannot register perf event: %v", err) + } + // Clean memory allocated by C code. + C.free(unsafe.Pointer(config)) + } + } + return leaderFileDescriptors, nil +} + func readPerfEventAttr(name string, pfmGetOsEventEncoding func(string, unsafe.Pointer) error) (*unix.PerfEventAttr, error) { perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{}))) // Fill memory with 0 values. @@ -279,13 +298,13 @@ func (c *collector) registerEvent(event eventInfo, leaderFileDescriptors map[int setAttributes(event.config, event.isGroupLeader) for _, cpu := range c.onlineCPUs { - fd, err := unix.PerfEventOpen(event.config, pid, cpu, leaderFileDescriptors[cpu], flags) + fd, err := c.perfEventOpen(event.config, pid, cpu, leaderFileDescriptors[cpu], flags) if err != nil { - return nil, fmt.Errorf("setting up perf event %#v failed: %q", event.config, err) + return leaderFileDescriptors, fmt.Errorf("setting up perf event %#v failed: %q", event.config, err) } perfFile := os.NewFile(uintptr(fd), event.name) if perfFile == nil { - return nil, fmt.Errorf("unable to create os.File from file descriptor %#v", fd) + return leaderFileDescriptors, fmt.Errorf("unable to create os.File from file descriptor %#v", fd) } c.addEventFile(event.groupIndex, event.name, cpu, perfFile) @@ -333,6 +352,19 @@ func (c *collector) addEventFile(index int, name string, cpu int, perfFile *os.F } } +func (c *collector) deleteGroup(index int) { + for name, files := range c.cpuFiles[index].cpuFiles { + for cpu, file := range files { + klog.V(5).Infof("Closing perf event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) + err := file.Close() + if err != nil { + klog.Warningf("Unable to close perf event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) + } + } + } + delete(c.cpuFiles, index) +} + func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr { length := len(event.Config) @@ -369,17 +401,8 @@ func (c *collector) Destroy() { c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() - for _, group := range c.cpuFiles { - for name, files := range group.cpuFiles { - for cpu, file := range files { - klog.V(5).Infof("Closing perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) - err := file.Close() - if err != nil { - klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) - } - } - delete(group.cpuFiles, name) - } + for i := range c.cpuFiles { + c.deleteGroup(i) } } diff --git a/perf/collector_libpfm_test.go b/perf/collector_libpfm_test.go index 7e484c538e..c1ef62a836 100644 --- a/perf/collector_libpfm_test.go +++ b/perf/collector_libpfm_test.go @@ -20,6 +20,8 @@ package perf import ( "bytes" "encoding/binary" + "io/ioutil" + "os" "testing" "unsafe" @@ -200,6 +202,34 @@ func TestNewCollector(t *testing.T) { assert.Same(t, &perfCollector.events.Core.CustomEvents[0], perfCollector.eventToCustomEvent[Event("event_2")]) } +func TestCollectorSetup(t *testing.T) { + path, err := ioutil.TempDir("", "cgroup") + assert.Nil(t, err) + defer func() { + err := os.RemoveAll(path) + assert.Nil(t, err) + }() + events := PerfEvents{ + Core: Events{ + Events: []Group{ + {[]Event{"cache-misses"}, false}, + {[]Event{"non-existing-event"}, false}, + }, + }, + } + c := newCollector(path, events, []int{0}, map[int]int{0: 0}) + c.perfEventOpen = func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { + return int(attr.Config), nil + } + c.ioctlSetInt = func(fd int, req uint, value int) error { + return nil + } + err = c.setup() + assert.Nil(t, err) + assert.Equal(t, 1, len(c.cpuFiles)) + assert.Equal(t, []string{"cache-misses"}, c.cpuFiles[0].names) +} + var readGroupPerfStatCases = []struct { test string file GroupReadFormat diff --git a/perf/uncore_libpfm.go b/perf/uncore_libpfm.go index fdd35a34b6..fcdd8fb2f7 100644 --- a/perf/uncore_libpfm.go +++ b/perf/uncore_libpfm.go @@ -158,6 +158,28 @@ func NewUncoreCollector(cgroupPath string, events PerfEvents, cpuToSocket map[in return collector } +func (c *uncoreCollector) createLeaderFileDescriptors(events []Event, groupIndex int, groupPMUs map[Event]uncorePMUs, + leaderFileDescriptors map[string]map[uint32]int) (map[string]map[uint32]int, error) { + var err error + for _, event := range events { + eventName, _ := parseEventName(string(event)) + customEvent, ok := c.eventToCustomEvent[event] + if ok { + err = c.setupRawEvent(customEvent, groupPMUs[event], groupIndex, leaderFileDescriptors) + } else { + err = c.setupEvent(eventName, groupPMUs[event], groupIndex, leaderFileDescriptors) + } + if err != nil { + break + } + } + if err != nil { + c.deleteGroup(groupIndex) + return nil, fmt.Errorf("cannot create config from perf event: %v", err) + } + return leaderFileDescriptors, nil +} + func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error { readUncorePMUs, err := getUncorePMUs(devicesPath) if err != nil { @@ -190,21 +212,11 @@ func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error { leaderFileDescriptors[pmu.name][cpu] = groupLeaderFileDescriptor } } - - for _, event := range group.events { - eventName, _ := parseEventName(string(event)) - customEvent, ok := c.eventToCustomEvent[event] - if ok { - err = c.setupRawEvent(customEvent, groupPMUs[event], i, leaderFileDescriptors) - } else { - err = c.setupEvent(eventName, groupPMUs[event], i, leaderFileDescriptors) - } - - if err != nil { - return err - } + leaderFileDescriptors, err = c.createLeaderFileDescriptors(group.events, i, groupPMUs, leaderFileDescriptors) + if err != nil { + klog.Error(err) + continue } - // Group is prepared so we should reset and enable counting. for _, pmuCPUs := range leaderFileDescriptors { for _, fd := range pmuCPUs { @@ -320,20 +332,8 @@ func (c *uncoreCollector) Destroy() { c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() - for groupIndex, groupPMUs := range c.cpuFiles { - for pmu, group := range groupPMUs { - for name, cpus := range group.cpuFiles { - for cpu, file := range cpus { - klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) - err := file.Close() - if err != nil { - klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) - } - } - delete(group.cpuFiles, name) - } - delete(groupPMUs, pmu) - } + for groupIndex := range c.cpuFiles { + c.deleteGroup(groupIndex) delete(c.cpuFiles, groupIndex) } } @@ -475,6 +475,24 @@ func (c *uncoreCollector) setupRawEvent(event *CustomEvent, pmus uncorePMUs, gro return nil } +func (c *uncoreCollector) deleteGroup(groupIndex int) { + groupPMUs := c.cpuFiles[groupIndex] + for pmu, group := range groupPMUs { + for name, cpus := range group.cpuFiles { + for cpu, file := range cpus { + klog.V(5).Infof("Closing uncore perf event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) + err := file.Close() + if err != nil { + klog.Warningf("Unable to close perf event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) + } + } + delete(group.cpuFiles, name) + } + delete(groupPMUs, pmu) + } + delete(c.cpuFiles, groupIndex) +} + func readPerfUncoreStat(file readerCloser, group group, cpu int, pmu string, cpuToSocket map[int]int) ([]info.PerfUncoreStat, error) { values, err := getPerfValues(file, group) if err != nil { diff --git a/perf/uncore_libpfm_test.go b/perf/uncore_libpfm_test.go index d5b2ab8957..a8e991f6b2 100644 --- a/perf/uncore_libpfm_test.go +++ b/perf/uncore_libpfm_test.go @@ -114,6 +114,7 @@ func TestUncoreCollectorSetup(t *testing.T) { Uncore: Events{ Events: []Group{ {[]Event{"uncore_imc_1/cas_count_read"}, false}, + {[]Event{"uncore_imc_1/non_existing_event"}, false}, {[]Event{"uncore_imc_0/cas_count_write", "uncore_imc_0/cas_count_read"}, true}, }, CustomEvents: []CustomEvent{ @@ -133,6 +134,11 @@ func TestUncoreCollectorSetup(t *testing.T) { } err = collector.setup(events, path) + assert.Equal(t, []string{"uncore_imc_1/cas_count_read"}, + getMapKeys(collector.cpuFiles[0]["uncore_imc_1"].cpuFiles)) + assert.ElementsMatch(t, []string{"uncore_imc_0/cas_count_write", "uncore_imc_0/cas_count_read"}, + getMapKeys(collector.cpuFiles[2]["uncore_imc_0"].cpuFiles)) + // There are no errors. assert.Nil(t, err) } @@ -295,3 +301,11 @@ func TestReadPerfUncoreStat(t *testing.T) { assert.NoError(t, err) assert.Equal(t, expectedStat, stat) } + +func getMapKeys(someMap map[string]map[int]readerCloser) []string { + var keys []string + for key := range someMap { + keys = append(keys, key) + } + return keys +}