diff --git a/PAT-post-processing/cpu_module.py b/PAT-post-processing/cpu_module.py index 69eb899..b029d0b 100644 --- a/PAT-post-processing/cpu_module.py +++ b/PAT-post-processing/cpu_module.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # # Copyright (c) 2015, Intel Corporation # @@ -497,7 +497,7 @@ def write_excel(cluster, wb): def csv_writer(cluster, csv_path_cpu): """write data to a CSV file path""" - csv_file = open(csv_path_cpu, "wb") + csv_file = open(csv_path_cpu, "w") for node in cluster: if hasattr(node, 'cpu_obj'): node_data = node.cpu_obj.data_array diff --git a/PAT-post-processing/disk_module.py b/PAT-post-processing/disk_module.py index 349bf2a..b736b3b 100644 --- a/PAT-post-processing/disk_module.py +++ b/PAT-post-processing/disk_module.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # # Copyright (c) 2015, Intel Corporation # @@ -55,8 +55,9 @@ def extract_data(self): self.ts_index = self.title_line.index("TimeStamp") self.rkbps_index = self.title_line.index("rkB/s") self.wkbps_index = self.title_line.index("wkB/s") - self.await_index = self.title_line.index("await") - self.svctm_index = self.title_line.index("svctm") + self.r_await_index = self.title_line.index("r_await") + self.w_await_index = self.title_line.index("w_await") + self.util_index = self.title_line.index("%util") del self.data_array[0] self.time_stamp_array = [] @@ -68,15 +69,17 @@ def extract_data(self): self.wps = [] self.rkbps = [] self.wkbps = [] - self.await = [] - self.svctm = [] + self.r_await = [] + self.w_await = [] + self.util = [] for self.row in self.data_array: self.rps.append(float(self.row[self.rps_index])) self.wps.append(float(self.row[self.wps_index])) self.rkbps.append(float(self.row[self.rkbps_index])) self.wkbps.append(float(self.row[self.wkbps_index])) - self.await.append(float(self.row[self.await_index])) - self.svctm.append(float(self.row[self.svctm_index])) + self.r_await.append(float(self.row[self.r_await_index])) + self.w_await.append(float(self.row[self.w_await_index])) + self.util.append(float(self.row[self.util_index])) self.ts_sum = [] self.avg_ind = 0 @@ -105,11 +108,14 @@ def extract_data(self): self.rkbps_sum = self.get_sum(self.rkbps_index, self.rkbps) self.avg_array.append(self.rkbps_sum) - self.await_sum = self.get_sum(self.await_index, self.await) - self.avg_array.append(self.await_sum) + self.r_await_sum = self.get_sum(self.r_await_index, self.r_await) + self.avg_array.append(self.r_await_sum) - self.svctm_sum = self.get_sum(self.svctm_index, self.svctm) - self.avg_array.append(self.svctm_sum) + self.w_await_sum = self.get_sum(self.w_await_index, self.w_await) + self.avg_array.append(self.w_await_sum) + + self.util_sum = self.get_sum(self.util_index, self.util) + self.avg_array.append(self.util_sum) self.data_array.insert(0, self.title_line) return self.avg_array @@ -138,8 +144,9 @@ def get_avg_data(cluster, name_node): rps_dic = {} wkbps_dic = {} rkbps_dic = {} - await_dic = {} - svctm_dic = {} + r_await_dic = {} + w_await_dic = {} + util_dic = {} count_dic = {} for node in cluster: @@ -164,14 +171,18 @@ def get_avg_data(cluster, name_node): rkbps += node.disk_obj.avg_array[4][index] rkbps_dic.update(dict([(node.disk_obj.ts_sum[index], rkbps)])) - await = await_dic.get(node.disk_obj.ts_sum[index]) - await += node.disk_obj.avg_array[5][index] - await_dic.update(dict([(node.disk_obj.ts_sum[index], - await)])) - svctm = svctm_dic.get(node.disk_obj.ts_sum[index]) - svctm += node.disk_obj.avg_array[6][index] - svctm_dic.update(dict([(node.disk_obj.ts_sum[index], - svctm)])) + r_await = r_await_dic.get(node.disk_obj.ts_sum[index]) + r_await += node.disk_obj.avg_array[5][index] + r_await_dic.update(dict([(node.disk_obj.ts_sum[index], + r_await)])) + w_await = w_await_dic.get(node.disk_obj.ts_sum[index]) + w_await += node.disk_obj.avg_array[6][index] + w_await_dic.update(dict([(node.disk_obj.ts_sum[index], + w_await)])) + util = util_dic.get(node.disk_obj.ts_sum[index]) + util += node.disk_obj.avg_array[7][index] + util_dic.update(dict([(node.disk_obj.ts_sum[index], + util)])) cnt = count_dic.get(node.disk_obj.ts_sum[index]) cnt += 1 count_dic.update(dict([(node.disk_obj.ts_sum[ @@ -185,10 +196,12 @@ def get_avg_data(cluster, name_node): index], node.disk_obj.avg_array[3][index])])) rkbps_dic.update(dict([(node.disk_obj.ts_sum[ index], node.disk_obj.avg_array[4][index])])) - await_dic.update(dict([(node.disk_obj.ts_sum[ + r_await_dic.update(dict([(node.disk_obj.ts_sum[ index], node.disk_obj.avg_array[5][index])])) - svctm_dic.update(dict([(node.disk_obj.ts_sum[ + w_await_dic.update(dict([(node.disk_obj.ts_sum[ index], node.disk_obj.avg_array[6][index])])) + util_dic.update(dict([(node.disk_obj.ts_sum[ + index], node.disk_obj.avg_array[7][index])])) count_dic.update(dict([(node.disk_obj.ts_sum[ index], 1)])) @@ -198,15 +211,17 @@ def get_avg_data(cluster, name_node): wps = wps_dic.values() rkbps = rkbps_dic.values() wkbps = wkbps_dic.values() - await = await_dic.values() - svctm = svctm_dic.values() + r_await = r_await_dic.values() + w_await = w_await_dic.values() + util = util_dic.values() count = count_dic.values() rps = [x for y, x in sorted(zip(ts, rps))] wps = [x for y, x in sorted(zip(ts, wps))] rkbps = [x for y, x in sorted(zip(ts, rkbps))] wkbps = [x for y, x in sorted(zip(ts, wkbps))] - await = [x for y, x in sorted(zip(ts, await))] - svctm = [x for y, x in sorted(zip(ts, svctm))] + r_await = [x for y, x in sorted(zip(ts, r_await))] + w_await = [x for y, x in sorted(zip(ts, w_await))] + util = [x for y, x in sorted(zip(ts, util))] count = [x for y, x in sorted(zip(ts, count))] ts = sorted(ts) @@ -218,11 +233,13 @@ def get_avg_data(cluster, name_node): wkbps[index] = row / count[index] for index, row in enumerate(rkbps): rkbps[index] = row / count[index] - for index, row in enumerate(await): - await[index] = row / count[index] - for index, row in enumerate(svctm): - svctm[index] = row / count[index] - avg_array = [ts, wps, rps, wkbps, rkbps, await, svctm] + for index, row in enumerate(r_await): + r_await[index] = row / count[index] + for index, row in enumerate(w_await): + w_await[index] = row / count[index] + for index, row in enumerate(util): + util[index] = row / count[index] + avg_array = [ts, wps, rps, wkbps, rkbps, r_await, w_await, util] return avg_array else: return None @@ -305,9 +322,11 @@ def plot_graph(data, pp, graph_title): fig.text(0.14, 0.89, fig_caption, fontsize=10, horizontalalignment='left', verticalalignment='top') # plot graphs - ax.plot(x, data[5], label='await', + ax.plot(x, data[5], label='r_await', color='#00297A', alpha=0.9, linewidth=0.5, rasterized=True) - ax.plot(x, data[6], label='svctm', + ax.plot(x, data[5], label='w_await', + color='#004f7a', alpha=0.9, linewidth=0.5, rasterized=True) + ax.plot(x, data[6], label='util', color='#800000', alpha=0.9, linewidth=0.5, rasterized=True) ax.fill_between(x, 0, data[5], facecolor='#00297A', alpha=0.45, linewidth=0.01, rasterized=True) @@ -351,16 +370,20 @@ def get_data_for_graph(data_array): for entry in data_array[4]: rkbps.append(float(entry)) new_rkbps = get_graph_mean(x, rkbps) - await = [] + r_await = [] for entry in data_array[5]: - await.append(float(entry)) - new_await = get_graph_mean(x, await) - svctm = [] + r_await.append(float(entry)) + new_r_await = get_graph_mean(x, r_await) + w_await = [] for entry in data_array[6]: - svctm.append(float(entry)) - new_svctm = get_graph_mean(x, svctm) - return [new_ts, new_wps, new_rps, new_wkbps, new_rkbps, new_await, - new_svctm], x + w_await.append(float(entry)) + new_w_await = get_graph_mean(x, w_await) + util = [] + for entry in data_array[7]: + util.append(float(entry)) + new_util = get_graph_mean(x, util) + return [new_ts, new_wps, new_rps, new_wkbps, new_rkbps, new_r_await, new_w_await, + new_util], x else: return data_array, x @@ -441,7 +464,7 @@ def write_excel(cluster, wb): def csv_writer(cluster, csv_path_disk): """write data to a CSV file path""" - csv_file = open(csv_path_disk, "wb") + csv_file = open(csv_path_disk, "w") for node in cluster: if hasattr(node, 'disk_obj'): node_data = node.disk_obj.data_array diff --git a/PAT-post-processing/memory_module.py b/PAT-post-processing/memory_module.py index 8f85d41..25ad372 100644 --- a/PAT-post-processing/memory_module.py +++ b/PAT-post-processing/memory_module.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # # Copyright (c) 2015, Intel Corporation # @@ -378,7 +378,7 @@ def write_excel(cluster, wb): def csv_writer(cluster, csv_path_memory): """write data to a CSV file path""" - csv_file = open(csv_path_memory, "wb") + csv_file = open(csv_path_memory, "w") for node in cluster: if hasattr(node, 'memory_obj'): node_data = node.memory_obj.data_array diff --git a/PAT-post-processing/net_module.py b/PAT-post-processing/net_module.py index d7bf6d0..05a524e 100644 --- a/PAT-post-processing/net_module.py +++ b/PAT-post-processing/net_module.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # # Copyright (c) 2015, Intel Corporation # @@ -316,7 +316,7 @@ def write_excel(cluster, wb): def csv_writer(cluster, csv_path_net): """write data to a CSV file path""" - csv_file = open(csv_path_net, "wb") + csv_file = open(csv_path_net, "w") for node in cluster: if hasattr(node, 'net_obj'): node_data = node.net_obj.data_array diff --git a/PAT-post-processing/pat-post-process.py b/PAT-post-processing/pat-post-process.py index 5077b32..8b4ca51 100755 --- a/PAT-post-processing/pat-post-process.py +++ b/PAT-post-processing/pat-post-process.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # # Copyright (c) 2015, Intel Corporation # @@ -41,12 +41,12 @@ cur_python_version = sys.version_info #platform.python_version() #cur_version[:2] will give you first two elements of the list -if cur_python_version[:2] >= (2,7): +if cur_python_version[:2] >= (3,8): found = True - print "---- You currently have Python " + sys.version + print("---- You currently have Python " + sys.version) else: found = False - print "---- Error, You need python 2.7.x+ and currently you have " + sys.version + print("---- Error, You need python 3.8.x+ and currently you have " + sys.version) #====================== matplotlib =========================== try: @@ -56,15 +56,15 @@ req_matplotlib_version = '1.3.1'.replace(".", "") if cur_matplotlib_version.isdigit() >= req_matplotlib_version.isdigit(): found = True - print "---- You currently have matplotlib " + matplotlib.__version__ + print("---- You currently have matplotlib " + matplotlib.__version__) else: found = False - print "---- Error, You need matplotlib 1.3.1+ and currently you have " + matplotlib.__version__ + print("---- Error, You need matplotlib 1.3.1+ and currently you have " + matplotlib.__version__) except ImportError: #handle exception found = False - print '---- missing dependency - python-matplotlib' + \ - '\n---- Please install python module - matplotlib' + print('---- missing dependency - python-matplotlib' + \ + '\n---- Please install python module - matplotlib') #===================== xlsxwriter =========================== try: @@ -73,24 +73,24 @@ req_xlsxwriter_version = '0.6.3'.replace(".", "") if cur_xlsxwriter_version.isdigit() >= req_xlsxwriter_version.isdigit(): found = True - print "---- You currently have xlsxwriter " + xlsxwriter.__version__ + print("---- You currently have xlsxwriter " + xlsxwriter.__version__) else: found = False - print "---- Error, You need xlsxwriter 0.6.3+ and currently you have " + xlsxwriter.__version__ + print("---- Error, You need xlsxwriter 0.6.3+ and currently you have " + xlsxwriter.__version__) except ImportError: #handle exception found = False - print '---- missing dependency - python-xlsxwriter' + \ - '\n---- Please install python module - xlsxwriter' + print('---- missing dependency - python-xlsxwriter' + \ + '\n---- Please install python module - xlsxwriter') #==================== starting stript ===================== if found is False: - print '---- Must use Python 2.7 or grater' + \ - '\n---- dependencies missing - exiting script >>>>>>>>>>>' + print('---- Must use Python 3.8 or grater' + \ + '\n---- dependencies missing - exiting script >>>>>>>>>>>') sys.exit() else: - print '---- You have all required dependencies' + \ - '\n---- PAT-post-processing script will start automatically' + print('---- You have all required dependencies' + \ + '\n---- PAT-post-processing script will start automatically') def get_dirpaths(directory): @@ -122,7 +122,7 @@ def __init__(self, node_folder_path): self.cpu_obj = cpu_module.Cpu(self.node_file_paths[0]) self.has_cpu = True else: - print 'file missing or empty: ', self.node_file_paths[0] + print('file missing or empty: ', self.node_file_paths[0]) self.has_cpu = False # file at location [1] is disk file @@ -130,14 +130,14 @@ def __init__(self, node_folder_path): self.node_file_paths[1]).st_size != 0: self.disk_obj = disk_module.Disk(self.node_file_paths[1]) else: - print 'file missing or empty: ', self.node_file_paths[1] + print('file missing or empty: ', self.node_file_paths[1]) # file at location [2] is net file if os.path.isfile(self.node_file_paths[2]) and os.stat( self.node_file_paths[2]).st_size != 0: self.net_obj = net_module.Net(self.node_file_paths[2]) else: - print 'file missing or empty: ', self.node_file_paths[2] + print('file missing or empty: ', self.node_file_paths[2]) # file at location [3] is perf file if os.path.isfile(self.node_file_paths[3]) and os.stat( @@ -149,14 +149,14 @@ def __init__(self, node_folder_path): self.perf_obj = perf_module.Perf(self.node_file_paths[3], None) else: - print 'file missing or empty: ', self.node_file_paths[3] + print('file missing or empty: ', self.node_file_paths[3]) # file at location [4] is memory file if os.path.isfile(self.node_file_paths[4]) and os.stat( self.node_file_paths[4]).st_size != 0: self.memory_obj = memory_module.Memory(self.node_file_paths[4]) else: - print 'file missing or empty: ', self.node_file_paths[4] + print('file missing or empty: ', self.node_file_paths[4]) def get_file_paths(self, node_folder_path): """generate file paths for raw files for a node""" @@ -274,24 +274,24 @@ def generate_output(cluster): name_node = root[4].text if en_pdf == 'yes': - print "----Rendering pdf", time.ctime(), "----" + print("----Rendering pdf", time.ctime(), "----") # global pdf file that will contain all charts pp = PdfPages(result_path + '/PAT-Result.pdf') - # print average cpu utilization graph to pdf + # print(average cpu utilization graph to pdf if en_avg_cpu == 'yes' or en_avg_cpu == 'Yes': cpu_data = cpu_module.get_avg_data(cluster, name_node) if cpu_data is not None: cpu_module.plot_graph(cpu_data, pp, 'All-nodes average') - # print average disk utilization graph to pdf + # print(average disk utilization graph to pdf if en_avg_disk == 'yes' or en_avg_disk == 'Yes': disk_data = disk_module.get_avg_data(cluster, name_node) if disk_data is not None: disk_module.plot_graph(disk_data, pp, 'All-nodes average') - # print average network utilization graph to pdf + # print(average network utilization graph to pdf if en_avg_net == 'yes' or en_avg_net == 'Yes': net_data = net_module.get_avg_data(cluster, name_node) if net_data is not None: @@ -312,13 +312,13 @@ def generate_output(cluster): "Avg Function", None, perf_list, "avg", cluster, name_node) - # print average memory utilization graph to pdf + # print(average memory utilization graph to pdf if en_avg_memory == 'yes' or en_avg_memory == 'Yes': memory_data = memory_module.get_avg_data(cluster, name_node) if memory_data is not None: memory_module.plot_graph(memory_data, pp, "All-nodes average") - # print data graphs for each individual node + # print(data graphs for each individual node for node in cluster: if en_all_cpu == 'yes' or en_all_cpu == 'Yes': if hasattr(node, 'cpu_obj'): @@ -331,9 +331,15 @@ def generate_output(cluster): disk_module.plot_graph( node.disk_obj.avg_array, pp, str(node_name)) if en_all_net == 'yes' or en_all_net == 'Yes': - node_name = node.net_obj.data_array[1][0] - net_module.plot_graph( - node.net_obj.avg_array, pp, str(node_name)) + if hasattr(node, 'net_obj'): + node_name = node.net_obj.data_array[1][0] + net_module.plot_graph( + node.net_obj.avg_array, pp, str(node_name)) + if en_all_memory == 'yes' or en_all_memory == 'Yes': + if hasattr(node, 'memory_obj'): + node_name = node.memory_obj.data_array[1][0] + memory_module.plot_graph( + node.memory_obj.avg_array, pp, str(node_name)) if en_all_perf == 'yes' or en_all_perf == 'Yes': if hasattr(node, 'perf_obj'): node_name = node.perf_obj.data_array[1][0] @@ -354,16 +360,17 @@ def generate_output(cluster): node.perf_obj.avg_array, metric_list, "node", None, None) if en_all_memory == 'yes' or en_all_memory == 'Yes': - node_name = node.memory_obj.data_array[1][0] - memory_module.plot_graph( - node.memory_obj.avg_array, pp, str(node_name)) - print "----Finished pdf", time.ctime(), "----" + if hasattr(node, 'perf_obj'): + node_name = node.memory_obj.data_array[1][0] + memory_module.plot_graph( + node.memory_obj.avg_array, pp, str(node_name)) + print("----Finished pdf", time.ctime(), "----") pp.close() if en_xl == 'yes': - print "----Generating Excel", time.ctime(), "----" + print("----Generating Excel", time.ctime(), "----") wb = xlsxwriter.Workbook(result_path + '/PAT-Result.xlsm') - print "----Generating CSV", time.ctime(), "----" + print("----Generating CSV", time.ctime(), "----") csv_path_cpu = result_path + "/CPU.csv" csv_path_disk = result_path + "/DISK.csv" csv_path_net = result_path + "/NET.csv" @@ -393,8 +400,8 @@ def generate_output(cluster): if en_memory_csv == 'yes' or en_memory_csv == 'Yes': memory_module.csv_writer(cluster, csv_path_memory) wb.add_vba_project('./vbaProject.bin') - print "----Finished Excel", time.ctime(), "----" - print "----Finished CSV", time.ctime(), "----" + print("----Finished Excel", time.ctime(), "----") + print("----Finished CSV", time.ctime(), "----") wb.close() @@ -406,9 +413,9 @@ def main(): config = ET.parse(config_file) root = config.getroot() - print "Started processing on", time.ctime() + print("Started processing on", time.ctime()) cluster = make_cluster(root[3].text) - print "Completed processing on", time.ctime() + print("Completed processing on", time.ctime()) generate_output(cluster) diff --git a/PAT-post-processing/pat_abc.py b/PAT-post-processing/pat_abc.py index 39dbe09..308a218 100644 --- a/PAT-post-processing/pat_abc.py +++ b/PAT-post-processing/pat_abc.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # # Copyright (c) 2015, Intel Corporation # @@ -27,7 +27,10 @@ import csv -from StringIO import StringIO +try: + from StringIO import StringIO ## for Python 2 +except ImportError: + from io import StringIO ## for Python 3 import fileinput import sys @@ -44,13 +47,14 @@ def get_data(self, file_path): line = line.replace(searchExp,replaceExp) sys.stdout.write(line) - self.file = open(file_path, 'r') + self.file = open(file_path, 'rt') self.arr = [] if 'HostName' in self.file.readline(): self.file.seek(0) self.reader = csv.reader(StringIO(self.file.readline()), delimiter=' ', skipinitialspace=True) - self.arr.append(self.reader.next()) + #self.arr.append(self.reader.next()) + self.arr.append(next(self.reader)) self.file.seek(0) for self.line in self.file: @@ -65,5 +69,5 @@ def get_data(self, file_path): def extract_data(self): """extract useful data from the raw data aray and return it""" - print "function: extract_data() not implemented" + print("function: extract_data() not implemented") return diff --git a/PAT-post-processing/perf_module.py b/PAT-post-processing/perf_module.py index 7c3add2..3630a73 100644 --- a/PAT-post-processing/perf_module.py +++ b/PAT-post-processing/perf_module.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # # Copyright (c) 2015, Intel Corporation # @@ -530,7 +530,7 @@ def write_excel(cluster, wb): def csv_writer(cluster, csv_path_perf): """write data to a CSV file path""" - csv_file = open(csv_path_perf, "wb") + csv_file = open(csv_path_perf, "w") for node in cluster: if hasattr(node, 'perf_obj'): diff --git a/README.md b/README.md index f74b22e..1dce09e 100644 --- a/README.md +++ b/README.md @@ -53,14 +53,14 @@ Figure 5: PAT Post-Processing architecture ## II. SOFTWARE REQUIREMENTS: - For collecting data - - Linux Sysstat (version 9.0.4 or newer) + - Linux sysstat (version 12.2.0 or newer) - gawk - perf - For postprocess data - MS Office 2010 or higher - - **Python 2.7.x+ (on the python2 series). Post-process script will NOT work on Python 3.x+** - - matplotlib 1.3.1+ **matplotlib 1.5.3 is the last one that post-process script will work on** - - xlsxwriter 0.6.3+ + - **Python 3.8.x+ (on the python3 series). ** + - matplotlib 3.1.2+ + - XlsxWriter 3.0.2+ ## III. STEPS FOR INSTALLING DEPENDENCIES: @@ -85,7 +85,7 @@ Gawk, sysstat and perf should be installed by default on every modern full-fledg ## Python: -If you are working on Linux machine then by default you should have Python already install. Update to 2.7 if you have older version. +If you are working on Linux machine then by default you should have Python already install. Update to 3.8 if you have older version. Make sure you are using the same version of Python as the OS is using. To check that @@ -94,7 +94,7 @@ Make sure you are using the same version of Python as the OS is using. To check 2. `python` will tell you which version of Python you are using and `which python` will tell you the location of Python you are using. You want 1. and 2. to be the same. ## pip: -If you have Python 2.7.9+ (on the python2 series) or Python 3.4+ then by default you ought to have pip already install. If not, then +If you have Python 3.8+ then by default you ought to have pip already install. If not, then **_(recomended)_**