From f18002bbe421807ebb04b3ba5d7a1da8e0429a38 Mon Sep 17 00:00:00 2001 From: rtevans Date: Mon, 19 Feb 2018 13:33:29 -0600 Subject: [PATCH] skx updates --- tacc_stats/analysis/plot/heatmap.py | 2 +- tacc_stats/analysis/plot/masterplot.py | 69 ++++++++++++++++---------- tacc_stats/analysis/plot/plots.py | 6 +-- tacc_stats/site/machine/views.py | 12 ++--- 4 files changed, 54 insertions(+), 35 deletions(-) diff --git a/tacc_stats/analysis/plot/heatmap.py b/tacc_stats/analysis/plot/heatmap.py index 25222780..41aa0f70 100644 --- a/tacc_stats/analysis/plot/heatmap.py +++ b/tacc_stats/analysis/plot/heatmap.py @@ -43,7 +43,7 @@ def plot(self,jobid,job_data=None): var_cpi = tvar(host_cpi.values()) else: var_cpi= 0.0 - self.fig = Figure(figsize=(10,12),dpi=110) + self.fig = Figure(figsize=(10,15),dpi=110) self.ax=self.fig.add_subplot(1,1,1) ycore = arange(cpi.shape[0]+1) diff --git a/tacc_stats/analysis/plot/masterplot.py b/tacc_stats/analysis/plot/masterplot.py index 412f7949..b51c8b11 100644 --- a/tacc_stats/analysis/plot/masterplot.py +++ b/tacc_stats/analysis/plot/masterplot.py @@ -22,22 +22,24 @@ class MasterPlot(Plot): 'lnet', 'lnet', 'ib_ext','ib_ext','cpu','mem','mem','mem'], 'intel_snb' : ['intel_snb_imc', 'intel_snb_imc', 'intel_snb', 'lnet', 'lnet', 'ib_sw','ib_sw', - 'intel_snb', 'intel_snb', 'intel_snb'] + linux_types, - 'intel_hsw' : ['intel_hsw_imc', 'intel_hsw_imc', 'intel_hsw', + 'intel_snb', 'intel_snb', 'intel_snb', 'intel_snb', 'intel_snb'] + linux_types, + 'intel_hsw' : ['intel_hsw_imc', 'intel_hsw_imc', 'intel_hsw', 'intel_hsw', 'intel_hsw', 'lnet', 'lnet', 'ib_sw','ib_sw'] + linux_types, - 'intel_ivb' : ['intel_ivb_imc', 'intel_ivb_imc', 'intel_ivb', + 'intel_ivb' : ['intel_ivb_imc', 'intel_ivb_imc', 'intel_ivb', 'intel_ivb', 'intel_ivb', 'lnet', 'lnet', 'ib_sw','ib_sw', 'intel_ivb', 'intel_ivb', 'intel_ivb'] + linux_types, - 'intel_bdw' : ['intel_bdw_imc', 'intel_bdw_imc', 'intel_bdw', + 'intel_bdw' : ['intel_bdw_imc', 'intel_bdw_imc', 'intel_bdw', 'intel_bdw', 'intel_bdw', 'lnet', 'lnet', 'ib_sw','ib_sw', 'intel_bdw', 'intel_bdw', 'intel_bdw'] + linux_types, 'intel_skx' : ['intel_skx', 'intel_skx', 'intel_skx', 'intel_skx', 'intel_skx_imc', 'intel_skx_imc', - 'lnet', 'lnet', 'opa', 'opa'] + linux_types, + 'lnet', 'lnet', 'opa', 'opa', + 'intel_skx', 'intel_skx'] + linux_types, 'intel_knl' : ['intel_knl_mc_dclk', 'intel_knl_mc_dclk', 'intel_knl_edc_eclk', 'intel_knl_edc_uclk', 'intel_knl_edc_uclk', 'intel_knl_edc_eclk', - 'lnet', 'lnet', 'opa','opa'] + linux_types + 'lnet', 'lnet', 'opa','opa', + 'intel_knl', 'intel_knl'] + linux_types } @@ -66,22 +68,28 @@ class MasterPlot(Plot): 'intel_snb' : ['CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'rx_bytes','tx_bytes', 'rx_bytes','tx_bytes', 'SSE_DOUBLE_SCALAR', 'SSE_DOUBLE_PACKED', - 'SIMD_DOUBLE_256'] + linux_stats, + 'SIMD_DOUBLE_256', + 'CLOCKS_UNHALTED_CORE', 'CLOCKS_UNHALTED_REF'] + linux_stats, 'intel_hsw' : ['CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', - 'rx_bytes','tx_bytes', 'rx_bytes','tx_bytes'] + linux_stats, + 'rx_bytes','tx_bytes', 'rx_bytes','tx_bytes', + 'CLOCKS_UNHALTED_CORE', 'CLOCKS_UNHALTED_REF'] + linux_stats, 'intel_bdw' : ['CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', - 'rx_bytes','tx_bytes', 'rx_bytes','tx_bytes'] + linux_stats, + 'rx_bytes','tx_bytes', 'rx_bytes','tx_bytes', + 'CLOCKS_UNHALTED_CORE', 'CLOCKS_UNHALTED_REF'] + linux_stats, 'intel_ivb' : ['CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'rx_bytes','tx_bytes', 'rx_bytes','tx_bytes', 'SSE_DOUBLE_SCALAR', 'SSE_DOUBLE_PACKED', - 'SIMD_DOUBLE_256'] + linux_stats, + 'SIMD_DOUBLE_256', + 'CLOCKS_UNHALTED_CORE', 'CLOCKS_UNHALTED_REF'] + linux_stats, 'intel_knl' : ['CAS_READS', 'CAS_WRITES', 'RPQ_INSERTS', 'EDC_MISS_CLEAN', 'EDC_MISS_DIRTY', 'WPQ_INSERTS', - 'rx_bytes','tx_bytes', 'portRcvData','portXmitData'] + linux_stats, + 'rx_bytes','tx_bytes', 'portRcvData','portXmitData', + 'CLOCKS_UNHALTED_CORE', 'CLOCKS_UNHALTED_REF'] + linux_stats, 'intel_skx' : ['FP_ARITH_INST_RETIRED_SCALAR_DOUBLE', 'FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE', 'FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE', 'FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE', 'CAS_READS', 'CAS_WRITES', - 'rx_bytes','tx_bytes', 'portRcvData','portXmitData'] + linux_stats, + 'rx_bytes','tx_bytes', 'portRcvData','portXmitData', + 'CLOCKS_UNHALTED_CORE', 'CLOCKS_UNHALTED_REF'] + linux_stats, } @@ -92,12 +100,12 @@ def plot(self,jobid,job_data=None): if self.wide: self.fig = Figure(figsize=(15.5,12),dpi=110) - self.ax=self.fig.add_subplot(6,2,2) + self.ax=self.fig.add_subplot(7,2,2) cols = 2 shift = 2 else: - self.fig = Figure(figsize=(8,12),dpi=110) - self.ax=self.fig.add_subplot(6,1,1) + self.fig = Figure(figsize=(8,15),dpi=110) + self.ax=self.fig.add_subplot(7,1,1) cols = 1 shift = 1 if self.mode == 'hist': @@ -109,6 +117,9 @@ def plot(self,jobid,job_data=None): else: plot=self.plot_lines + nom_freq = {"intel_snb" : 2.7, "intel_ivb" : 2.8, "intel_hsw" : 2.3, "intel_bdw" : 2.6, "intel_knl" : 1.4, "intel_skx" : 2.1} + nom_freq = nom_freq[self.ts.pmc_type] + k1_tmp=self.k1[self.ts.pmc_type] k2_tmp=self.k2[self.ts.pmc_type] processor_schema = self.ts.j.schemas[self.ts.pmc_type] @@ -140,7 +151,7 @@ def plot(self,jobid,job_data=None): print("FLOP stats not available for JOBID",self.ts.j.id) raise plot_ctr += 1 - ax = self.fig.add_subplot(6,cols,plot_ctr*shift) + ax = self.fig.add_subplot(7,cols,plot_ctr*shift) for host_name in self.ts.j.hosts.keys(): flops = 0 if idx0: flops += 1*self.ts.assemble([idx0],host_name,0) @@ -167,7 +178,7 @@ def plot(self,jobid,job_data=None): idxs = [k2_tmp.index('RPQ_INSERTS'), -k2_tmp.index('EDC_MISS_CLEAN'), -k2_tmp.index('EDC_MISS_DIRTY'), k2_tmp.index('WPQ_INSERTS'), -k2_tmp.index('CAS_READS')] plot_ctr += 1 - plot(self.fig.add_subplot(6,cols,plot_ctr*shift), idxs, 3600., + plot(self.fig.add_subplot(7,cols,plot_ctr*shift), idxs, 3600., (2**30.0)/64., ylabel="MCDRAM BW [GB/s]") # Plot key 2 @@ -177,7 +188,7 @@ def plot(self,jobid,job_data=None): elif 'MEM_UNCORE_RETIRED_REMOTE_DRAM' in k2_tmp and 'MEM_UNCORE_RETIRED_LOCAL_DRAM' in k2_tmp: idxs = [k2_tmp.index('MEM_UNCORE_RETIRED_REMOTE_DRAM'), k2_tmp.index('MEM_UNCORE_RETIRED_LOCAL_DRAM')] plot_ctr += 1 - plot(self.fig.add_subplot(6,cols,plot_ctr*shift), idxs, 3600., + plot(self.fig.add_subplot(7,cols,plot_ctr*shift), idxs, 3600., 1.0/64.0*1024.*1024.*1024., ylabel='DRAM BW [GB/s]') except: print(self.ts.pmc_type + ' missing Memory Bandwidth plot' + ' for jobid ' + self.ts.j.id ) @@ -187,14 +198,14 @@ def plot(self,jobid,job_data=None): idx1=k2_tmp.index('FilePages') idx2=k2_tmp.index('Slab') plot_ctr += 1 - plot(self.fig.add_subplot(6,cols,plot_ctr*shift), [idx0,-idx1,-idx2], 3600.,2.**30.0, + plot(self.fig.add_subplot(7,cols,plot_ctr*shift), [idx0,-idx1,-idx2], 3600.,2.**30.0, ylabel='Memory Use [GB]',do_rate=False) # Plot lnet sum rate idx0=k1_tmp.index('lnet') idx1=idx0 + k1_tmp[idx0+1:].index('lnet') + 1 plot_ctr += 1 - plot(self.fig.add_subplot(6,cols,plot_ctr*shift), [idx0,idx1], 3600., 1024.**2, ylabel='Lustre BW [MB/s]') + plot(self.fig.add_subplot(7,cols,plot_ctr*shift), [idx0,idx1], 3600., 1024.**2, ylabel='Lustre BW [MB/s]') # Plot remaining IB sum rate if 'ib_ext' in self.ts.j.hosts.values()[0].stats: @@ -206,7 +217,7 @@ def plot(self,jobid,job_data=None): idx3=idx2 + k1_tmp[idx2+1:].index('ib_ext') + 1 try: plot_ctr += 1 - plot(self.fig.add_subplot(6,cols,plot_ctr*shift),[idx2,idx3],3600.,2.**20, + plot(self.fig.add_subplot(7,cols,plot_ctr*shift),[idx2,idx3],3600.,2.**20, ylabel='IB BW [MB/s]') except: pass FLITS_PER_MB = 125000 @@ -214,7 +225,7 @@ def plot(self,jobid,job_data=None): idx2=k2_tmp.index('portXmitData') idx3=k2_tmp.index('portRcvData') plot_ctr += 1 - plot(self.fig.add_subplot(6,cols,plot_ctr*shift),[idx2,idx3],3600.,FLITS_PER_MB, + plot(self.fig.add_subplot(7,cols,plot_ctr*shift),[idx2,idx3],3600.,FLITS_PER_MB, ylabel='OPA BW [MB/s]') #Plot CPU user time @@ -222,9 +233,17 @@ def plot(self,jobid,job_data=None): idle = [k2_tmp.index('iowait'), k2_tmp.index('idle'), k2_tmp.index('irq'), k2_tmp.index('softirq')] plot_ctr += 1 - self.plot_ratio(self.fig.add_subplot(6,cols,plot_ctr*shift), busy, busy + idle, 3600., 0.01, - xlabel='Time [hrs]', + self.plot_ratio(self.fig.add_subplot(7,cols,plot_ctr*shift), busy, busy + idle, 3600., 0.01, ylabel='Logical Core Use %') - + + #Plot CPU Frequency + cyc = [k2_tmp.index('CLOCKS_UNHALTED_CORE')] + cyc_ref = [k2_tmp.index('CLOCKS_UNHALTED_REF')] + plot_ctr += 1 + + self.plot_ratio(self.fig.add_subplot(7,cols,plot_ctr*shift), cyc, cyc_ref, 3600., 1/nom_freq, + xlabel='Time [hrs]', + ylabel='Active Freq [GHz]') + self.fig.subplots_adjust(hspace=0.35) self.output('master') diff --git a/tacc_stats/analysis/plot/plots.py b/tacc_stats/analysis/plot/plots.py index 819c5862..80635e66 100644 --- a/tacc_stats/analysis/plot/plots.py +++ b/tacc_stats/analysis/plot/plots.py @@ -81,12 +81,12 @@ def setlabels(self,ax,index,xlabel,ylabel,yscale): ax.set_ylabel('Total '+self.ts.label(self.ts.k1[index[0]], self.ts.k2[index[0]],yscale)+'/s' ) - def plot_ratio(self,ax,index1, index2, xscale=1.0,yscale=1.0,xlabel='',ylabel=''): + def plot_ratio(self,ax, index1, index2, xscale=1.0, yscale=1.0, xlabel='', ylabel=''): ax.hold=True for k in self.ts.j.hosts.keys(): - v1=self.ts.assemble(index1,k,0) - v2=self.ts.assemble(index2,k,0) + v1=self.ts.assemble(index1, k, 0) + v2=self.ts.assemble(index2, k, 0) val=numpy.divide(numpy.diff(v1),numpy.diff(v2)) ax.step(self.ts.t/xscale,numpy.append(val,[val[-1]])/yscale,where="post") diff --git a/tacc_stats/site/machine/views.py b/tacc_stats/site/machine/views.py index e7f8ffca..95317d02 100644 --- a/tacc_stats/site/machine/views.py +++ b/tacc_stats/site/machine/views.py @@ -272,17 +272,17 @@ def heat_map(request, pk): 'intel_skx' : ['intel_skx','intel_skx'], 'intel_pmc3' : ['intel_pmc3','intel_pmc3'] }, - k2={'intel_snb' : ['CLOCKS_UNHALTED_REF', + k2={'intel_snb' : ['CLOCKS_UNHALTED_CORE', 'INSTRUCTIONS_RETIRED'], - 'intel_ivb' : ['CLOCKS_UNHALTED_REF', + 'intel_ivb' : ['CLOCKS_UNHALTED_CORE', 'INSTRUCTIONS_RETIRED'], - 'intel_hsw' : ['CLOCKS_UNHALTED_REF', + 'intel_hsw' : ['CLOCKS_UNHALTED_CORE', 'INSTRUCTIONS_RETIRED'], - 'intel_skx' : ['CLOCKS_UNHALTED_REF', + 'intel_skx' : ['CLOCKS_UNHALTED_CORE', 'INSTRUCTIONS_RETIRED'], - 'intel_knl' : ['CLOCKS_UNHALTED_REF', + 'intel_knl' : ['CLOCKS_UNHALTED_CORE', 'INSTRUCTIONS_RETIRED'], - 'intel_pmc3' : ['CLOCKS_UNHALTED_REF', + 'intel_pmc3' : ['CLOCKS_UNHALTED_CORE', 'INSTRUCTIONS_RETIRED'] }, lariat_data="pass")