From 5f78d87298f07118ea6b534fc0f558c9946d71f5 Mon Sep 17 00:00:00 2001 From: "Wisniewski, Krzysztof2" Date: Wed, 23 Sep 2020 15:26:44 +0200 Subject: [PATCH 1/4] Continue counting other perf events group, even if there is an error in one Signed-off-by: Wisniewski, Krzysztof2 --- perf/collector_libpfm.go | 103 +++++++++++++++++++++------------- perf/collector_libpfm_test.go | 32 +++++++++++ perf/uncore_libpfm.go | 40 ++++++++----- perf/uncore_libpfm_test.go | 14 +++++ 4 files changed, 134 insertions(+), 55 deletions(-) diff --git a/perf/collector_libpfm.go b/perf/collector_libpfm.go index 09d271eb02..f820d498f0 100644 --- a/perf/collector_libpfm.go +++ b/perf/collector_libpfm.go @@ -46,6 +46,10 @@ type collector struct { onlineCPUs []int eventToCustomEvent map[Event]*CustomEvent uncore stats.Collector + + // Handle for mocking purposes. + perfEventOpen func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) + ioctlSetInt func(fd int, req uint, value int) error } type group struct { @@ -75,7 +79,7 @@ func init() { } func newCollector(cgroupPath string, events PerfEvents, onlineCPUs []int, cpuToSocket map[int]int) *collector { - collector := &collector{cgroupPath: cgroupPath, events: events, onlineCPUs: onlineCPUs, cpuFiles: map[int]group{}, uncore: NewUncoreCollector(cgroupPath, events, cpuToSocket)} + collector := &collector{cgroupPath: cgroupPath, events: events, onlineCPUs: onlineCPUs, cpuFiles: map[int]group{}, uncore: NewUncoreCollector(cgroupPath, events, cpuToSocket), perfEventOpen: unix.PerfEventOpen, ioctlSetInt: unix.IoctlSetInt} mapEventsToCustomEvents(collector) return collector } @@ -184,44 +188,30 @@ func (c *collector) setup() error { c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() cgroupFd := int(cgroup.Fd()) - for i, group := range c.events.Core.Events { + groupIndex := 0 + for _, group := range c.events.Core.Events { // CPUs file descriptors of group leader needed for perf_event_open. leaderFileDescriptors := make(map[int]int, len(c.onlineCPUs)) for _, cpu := range c.onlineCPUs { leaderFileDescriptors[cpu] = groupLeaderFileDescriptor } - for j, event := range group.events { - // First element is group leader. - isGroupLeader := j == 0 - customEvent, ok := c.eventToCustomEvent[event] - if ok { - config := c.createConfigFromRawEvent(customEvent) - leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, i, isGroupLeader}, leaderFileDescriptors) - if err != nil { - return err - } - } else { - config, err := c.createConfigFromEvent(event) - if err != nil { - return err - } - leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, i, isGroupLeader}, leaderFileDescriptors) - if err != nil { - return err - } - // Clean memory allocated by C code. - C.free(unsafe.Pointer(config)) - } + leaderFileDescriptors, err := c.createLeaderFileDescriptors(group.events, cgroupFd, groupIndex, leaderFileDescriptors) + if err != nil { + klog.Error(err) + c.deleteGroup(groupIndex) + continue + } else { + groupIndex++ } // Group is prepared so we should reset and enable counting. for _, fd := range leaderFileDescriptors { - err = unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0) + err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0) if err != nil { return err } - err = unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0) + err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0) if err != nil { return err } @@ -231,6 +221,35 @@ func (c *collector) setup() error { return nil } +func (c *collector) createLeaderFileDescriptors(events []Event, cgroupFd int, groupIndex int, leaderFileDescriptors map[int]int) (map[int]int, error) { + for j, event := range events { + // First element is group leader. + isGroupLeader := j == 0 + customEvent, ok := c.eventToCustomEvent[event] + var err error + if ok { + config := c.createConfigFromRawEvent(customEvent) + leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors) + if err != nil { + return nil, fmt.Errorf("cannot to register perf event: %v", err) + } + } else { + config, err := c.createConfigFromEvent(event) + if err != nil { + return nil, fmt.Errorf("cannot create config from perf event: %v", err) + + } + leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors) + if err != nil { + return nil, fmt.Errorf("cannot to register perf event: %v", err) + } + // Clean memory allocated by C code. + C.free(unsafe.Pointer(config)) + } + } + return leaderFileDescriptors, nil +} + func readPerfEventAttr(name string) (*unix.PerfEventAttr, error) { perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{}))) event := pfmPerfEncodeArgT{} @@ -269,13 +288,13 @@ func (c *collector) registerEvent(event eventInfo, leaderFileDescriptors map[int setAttributes(event.config, event.isGroupLeader) for _, cpu := range c.onlineCPUs { - fd, err := unix.PerfEventOpen(event.config, pid, cpu, leaderFileDescriptors[cpu], flags) + fd, err := c.perfEventOpen(event.config, pid, cpu, leaderFileDescriptors[cpu], flags) if err != nil { - return nil, fmt.Errorf("setting up perf event %#v failed: %q", event.config, err) + return leaderFileDescriptors, fmt.Errorf("setting up perf event %#v failed: %q", event.config, err) } perfFile := os.NewFile(uintptr(fd), event.name) if perfFile == nil { - return nil, fmt.Errorf("unable to create os.File from file descriptor %#v", fd) + return leaderFileDescriptors, fmt.Errorf("unable to create os.File from file descriptor %#v", fd) } c.addEventFile(event.groupIndex, event.name, cpu, perfFile) @@ -324,6 +343,19 @@ func (c *collector) addEventFile(index int, name string, cpu int, perfFile *os.F } } +func (c *collector) deleteGroup(index int) { + for name, files := range c.cpuFiles[index].cpuFiles { + for cpu, file := range files { + klog.V(5).Infof("Closing perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) + err := file.Close() + if err != nil { + klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) + } + } + } + delete(c.cpuFiles, index) +} + func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr { length := len(event.Config) @@ -360,17 +392,8 @@ func (c *collector) Destroy() { c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() - for _, group := range c.cpuFiles { - for name, files := range group.cpuFiles { - for cpu, file := range files { - klog.V(5).Infof("Closing perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) - err := file.Close() - if err != nil { - klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) - } - } - delete(group.cpuFiles, name) - } + for i := range c.cpuFiles { + c.deleteGroup(i) } } diff --git a/perf/collector_libpfm_test.go b/perf/collector_libpfm_test.go index 4f43f0d682..c75194918c 100644 --- a/perf/collector_libpfm_test.go +++ b/perf/collector_libpfm_test.go @@ -20,8 +20,12 @@ package perf import ( "bytes" "encoding/binary" + "io/ioutil" + "os" "testing" + "golang.org/x/sys/unix" + "github.com/stretchr/testify/assert" info "github.com/google/cadvisor/info/v1" @@ -197,6 +201,34 @@ func TestNewCollector(t *testing.T) { assert.Same(t, &perfCollector.events.Core.CustomEvents[0], perfCollector.eventToCustomEvent[Event("event_2")]) } +func TestCollectorSetup(t *testing.T) { + path, err := ioutil.TempDir("", "cgroup") + assert.Nil(t, err) + defer func() { + err := os.RemoveAll(path) + assert.Nil(t, err) + }() + events := PerfEvents{ + Core: Events{ + Events: []Group{ + {[]Event{"cache-misses"}, false}, + {[]Event{"non-existing-event"}, false}, + }, + }, + } + c := newCollector(path, events, []int{0}, map[int]int{0: 0}) + c.perfEventOpen = func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { + return int(attr.Config), nil + } + c.ioctlSetInt = func(fd int, req uint, value int) error { + return nil + } + err = c.setup() + assert.Nil(t, err) + assert.Equal(t, 1, len(c.cpuFiles)) + assert.Equal(t, []string{"cache-misses"}, c.cpuFiles[0].names) +} + var readGroupPerfStatCases = []struct { test string file GroupReadFormat diff --git a/perf/uncore_libpfm.go b/perf/uncore_libpfm.go index 2e4cf20d53..ea85178acb 100644 --- a/perf/uncore_libpfm.go +++ b/perf/uncore_libpfm.go @@ -202,9 +202,14 @@ func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error { } if err != nil { - return err + klog.Errorf("cannot create config from perf event: %v", err) + break } } + if err != nil { + c.deleteGroup(i) + continue + } // Group is prepared so we should reset and enable counting. for _, pmuCPUs := range leaderFileDescriptors { @@ -322,20 +327,8 @@ func (c *uncoreCollector) Destroy() { c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() - for groupIndex, groupPMUs := range c.cpuFiles { - for pmu, group := range groupPMUs { - for name, cpus := range group.cpuFiles { - for cpu, file := range cpus { - klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) - err := file.Close() - if err != nil { - klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) - } - } - delete(group.cpuFiles, name) - } - delete(groupPMUs, pmu) - } + for groupIndex, _ := range c.cpuFiles { + c.deleteGroup(groupIndex) delete(c.cpuFiles, groupIndex) } } @@ -478,6 +471,23 @@ func (c *uncoreCollector) setupRawEvent(event *CustomEvent, pmus uncorePMUs, gro return nil } +func (c *uncoreCollector) deleteGroup (groupIndex int) { + groupPMUs := c.cpuFiles[groupIndex] + for pmu, group := range groupPMUs { + for name, cpus := range group.cpuFiles { + for cpu, file := range cpus { + klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) + err := file.Close() + if err != nil { + klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) + } + } + delete(group.cpuFiles, name) + } + delete(groupPMUs, pmu) + } + delete(c.cpuFiles, groupIndex) +} func readPerfUncoreStat(file readerCloser, group group, cpu int, pmu string, cpuToSocket map[int]int) ([]info.PerfUncoreStat, error) { values, err := getPerfValues(file, group) diff --git a/perf/uncore_libpfm_test.go b/perf/uncore_libpfm_test.go index d5b2ab8957..a8e991f6b2 100644 --- a/perf/uncore_libpfm_test.go +++ b/perf/uncore_libpfm_test.go @@ -114,6 +114,7 @@ func TestUncoreCollectorSetup(t *testing.T) { Uncore: Events{ Events: []Group{ {[]Event{"uncore_imc_1/cas_count_read"}, false}, + {[]Event{"uncore_imc_1/non_existing_event"}, false}, {[]Event{"uncore_imc_0/cas_count_write", "uncore_imc_0/cas_count_read"}, true}, }, CustomEvents: []CustomEvent{ @@ -133,6 +134,11 @@ func TestUncoreCollectorSetup(t *testing.T) { } err = collector.setup(events, path) + assert.Equal(t, []string{"uncore_imc_1/cas_count_read"}, + getMapKeys(collector.cpuFiles[0]["uncore_imc_1"].cpuFiles)) + assert.ElementsMatch(t, []string{"uncore_imc_0/cas_count_write", "uncore_imc_0/cas_count_read"}, + getMapKeys(collector.cpuFiles[2]["uncore_imc_0"].cpuFiles)) + // There are no errors. assert.Nil(t, err) } @@ -295,3 +301,11 @@ func TestReadPerfUncoreStat(t *testing.T) { assert.NoError(t, err) assert.Equal(t, expectedStat, stat) } + +func getMapKeys(someMap map[string]map[int]readerCloser) []string { + var keys []string + for key := range someMap { + keys = append(keys, key) + } + return keys +} From cf09a324fad76a7562f0f395e7d344af1f230ec1 Mon Sep 17 00:00:00 2001 From: "Wisniewski, Krzysztof2" Date: Tue, 3 Nov 2020 10:41:41 +0100 Subject: [PATCH 2/4] Fix lint errors Signed-off-by: Wisniewski, Krzysztof2 --- perf/uncore_libpfm.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perf/uncore_libpfm.go b/perf/uncore_libpfm.go index ea85178acb..a6450fabe9 100644 --- a/perf/uncore_libpfm.go +++ b/perf/uncore_libpfm.go @@ -327,7 +327,7 @@ func (c *uncoreCollector) Destroy() { c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() - for groupIndex, _ := range c.cpuFiles { + for groupIndex := range c.cpuFiles { c.deleteGroup(groupIndex) delete(c.cpuFiles, groupIndex) } @@ -471,7 +471,7 @@ func (c *uncoreCollector) setupRawEvent(event *CustomEvent, pmus uncorePMUs, gro return nil } -func (c *uncoreCollector) deleteGroup (groupIndex int) { +func (c *uncoreCollector) deleteGroup(groupIndex int) { groupPMUs := c.cpuFiles[groupIndex] for pmu, group := range groupPMUs { for name, cpus := range group.cpuFiles { From a1dee0124251c5ced63a232d4d1a94d9851fd664 Mon Sep 17 00:00:00 2001 From: "Wisniewski, Krzysztof2" Date: Tue, 10 Nov 2020 10:09:37 +0100 Subject: [PATCH 3/4] Remove redeclaration of unix package Signed-off-by: Wisniewski, Krzysztof2 --- perf/collector_libpfm.go | 10 ++++---- perf/collector_libpfm_test.go | 2 -- perf/uncore_libpfm.go | 48 +++++++++++++++++++++-------------- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/perf/collector_libpfm.go b/perf/collector_libpfm.go index 58a00dada5..a0f4b77631 100644 --- a/perf/collector_libpfm.go +++ b/perf/collector_libpfm.go @@ -199,7 +199,7 @@ func (c *collector) setup() error { leaderFileDescriptors, err := c.createLeaderFileDescriptors(group.events, cgroupFd, groupIndex, leaderFileDescriptors) if err != nil { - klog.Error(err) + klog.Errorf("Cannot count perf event group %v: %v", group.events, err) c.deleteGroup(groupIndex) continue } else { @@ -232,7 +232,7 @@ func (c *collector) createLeaderFileDescriptors(events []Event, cgroupFd int, gr config := c.createConfigFromRawEvent(customEvent) leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors) if err != nil { - return nil, fmt.Errorf("cannot to register perf event: %v", err) + return nil, fmt.Errorf("cannot register perf event: %v", err) } } else { config, err := c.createConfigFromEvent(event) @@ -242,7 +242,7 @@ func (c *collector) createLeaderFileDescriptors(events []Event, cgroupFd int, gr } leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors) if err != nil { - return nil, fmt.Errorf("cannot to register perf event: %v", err) + return nil, fmt.Errorf("cannot register perf event: %v", err) } // Clean memory allocated by C code. C.free(unsafe.Pointer(config)) @@ -355,10 +355,10 @@ func (c *collector) addEventFile(index int, name string, cpu int, perfFile *os.F func (c *collector) deleteGroup(index int) { for name, files := range c.cpuFiles[index].cpuFiles { for cpu, file := range files { - klog.V(5).Infof("Closing perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) + klog.V(5).Infof("Closing perf event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) err := file.Close() if err != nil { - klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) + klog.Warningf("Unable to close perf event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) } } } diff --git a/perf/collector_libpfm_test.go b/perf/collector_libpfm_test.go index 7dbf09f2ed..c1ef62a836 100644 --- a/perf/collector_libpfm_test.go +++ b/perf/collector_libpfm_test.go @@ -27,8 +27,6 @@ import ( "golang.org/x/sys/unix" - "golang.org/x/sys/unix" - "github.com/stretchr/testify/assert" info "github.com/google/cadvisor/info/v1" diff --git a/perf/uncore_libpfm.go b/perf/uncore_libpfm.go index 64c50063e7..8e756ca4cb 100644 --- a/perf/uncore_libpfm.go +++ b/perf/uncore_libpfm.go @@ -158,6 +158,30 @@ func NewUncoreCollector(cgroupPath string, events PerfEvents, cpuToSocket map[in return collector } +func (c *uncoreCollector) createLeaderFileDescriptors(events []Event, groupIndex int, groupPMUs map[Event]uncorePMUs, + leaderFileDescriptors map[string]map[uint32]int) (map[string]map[uint32]int, error) { + var err error + for _, event := range events { + eventName, _ := parseEventName(string(event)) + customEvent, ok := c.eventToCustomEvent[event] + if ok { + err = c.setupRawEvent(customEvent, groupPMUs[event], groupIndex, leaderFileDescriptors) + } else { + err = c.setupEvent(eventName, groupPMUs[event], groupIndex, leaderFileDescriptors) + } + + if err != nil { + klog.Errorf("cannot create config from perf event: %v", err) + break + } + } + if err != nil { + c.deleteGroup(groupIndex) + return nil, fmt.Errorf("cannot create config from perf event: %v", err) + } + return leaderFileDescriptors, nil +} + func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error { readUncorePMUs, err := getUncorePMUs(devicesPath) if err != nil { @@ -190,26 +214,11 @@ func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error { leaderFileDescriptors[pmu.name][cpu] = groupLeaderFileDescriptor } } - - for _, event := range group.events { - eventName, _ := parseEventName(string(event)) - customEvent, ok := c.eventToCustomEvent[event] - if ok { - err = c.setupRawEvent(customEvent, groupPMUs[event], i, leaderFileDescriptors) - } else { - err = c.setupEvent(eventName, groupPMUs[event], i, leaderFileDescriptors) - } - - if err != nil { - klog.Errorf("cannot create config from perf event: %v", err) - break - } - } + leaderFileDescriptors, err = c.createLeaderFileDescriptors(group.events, i, groupPMUs, leaderFileDescriptors) if err != nil { - c.deleteGroup(i) + klog.Error(err) continue } - // Group is prepared so we should reset and enable counting. for _, pmuCPUs := range leaderFileDescriptors { for _, fd := range pmuCPUs { @@ -467,15 +476,16 @@ func (c *uncoreCollector) setupRawEvent(event *CustomEvent, pmus uncorePMUs, gro return nil } + func (c *uncoreCollector) deleteGroup(groupIndex int) { groupPMUs := c.cpuFiles[groupIndex] for pmu, group := range groupPMUs { for name, cpus := range group.cpuFiles { for cpu, file := range cpus { - klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) + klog.V(5).Infof("Closing uncore perf event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) err := file.Close() if err != nil { - klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) + klog.Warningf("Unable to close perf event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) } } delete(group.cpuFiles, name) From 7260c5924b6da7d0f4c9a9504df9436760036694 Mon Sep 17 00:00:00 2001 From: "Wisniewski, Krzysztof2" Date: Wed, 28 Apr 2021 12:09:57 +0200 Subject: [PATCH 4/4] Remove duplicate of error log --- perf/uncore_libpfm.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/perf/uncore_libpfm.go b/perf/uncore_libpfm.go index 8e756ca4cb..fcdd8fb2f7 100644 --- a/perf/uncore_libpfm.go +++ b/perf/uncore_libpfm.go @@ -169,9 +169,7 @@ func (c *uncoreCollector) createLeaderFileDescriptors(events []Event, groupIndex } else { err = c.setupEvent(eventName, groupPMUs[event], groupIndex, leaderFileDescriptors) } - if err != nil { - klog.Errorf("cannot create config from perf event: %v", err) break } }