diff --git a/mimircache/CExtension/pyBindings/pyHeatmap.c b/mimircache/CExtension/pyBindings/pyHeatmap.c index 75369ff..d714bd2 100644 --- a/mimircache/CExtension/pyBindings/pyHeatmap.c +++ b/mimircache/CExtension/pyBindings/pyHeatmap.c @@ -788,7 +788,7 @@ static PyMethodDef c_heatmap_funcs[] = { METH_VARARGS | METH_KEYWORDS, "heatmap pixel computation"}, {"differential_heatmap_with_Optimal", (PyCFunction)differential_heatmap_with_Optimal, METH_VARARGS | METH_KEYWORDS, "differential heatmap pixel computation compared with Optimal"}, - {"diffHeatmap", (PyCFunction)differential_heatmap_py, + {"diff_heatmap", (PyCFunction)differential_heatmap_py, METH_VARARGS | METH_KEYWORDS, "differential heatmap pixel computation"}, {"heatmap_rd_distribution", (PyCFunction)heatmap_rd_distribution_py, METH_VARARGS | METH_KEYWORDS, "reuse distance distribution heatmap"}, @@ -798,7 +798,7 @@ static PyMethodDef c_heatmap_funcs[] = { METH_VARARGS | METH_KEYWORDS, "reuse distance distribution heatmap"}, {"heatmap_reuse_time_distribution", (PyCFunction)heatmap_rt_distribution_py, METH_VARARGS | METH_KEYWORDS, "reuse distance distribution heatmap"}, - {"getBreakpoints", (PyCFunction)heatmap_get_break_points, + {"get_breakpoints", (PyCFunction)heatmap_get_break_points, METH_VARARGS | METH_KEYWORDS, "generate virtual/real break points"}, {NULL, NULL, 0, NULL} }; diff --git a/mimircache/__init__.py b/mimircache/__init__.py index d61cd10..2cce017 100644 --- a/mimircache/__init__.py +++ b/mimircache/__init__.py @@ -18,13 +18,13 @@ sys.path.extend([CWD, os.path.join(CWD, "..")]) +from mimircache.const import * from mimircache.profiler.LRUProfiler import LRUProfiler as LRUProfiler from mimircache.profiler.cGeneralProfiler import cGeneralProfiler from mimircache.profiler.generalProfiler import generalProfiler as generalProfiler from mimircache.profiler.cHeatmap import cHeatmap from mimircache.profiler.heatmap import heatmap as heatmap from mimircache.top.cachecow import cachecow as cachecow -from mimircache.const import * from version import __version__ as __version__ diff --git a/mimircache/cache/Optimal.py b/mimircache/cache/Optimal.py index 12c06e9..956dcda 100644 --- a/mimircache/cache/Optimal.py +++ b/mimircache/cache/Optimal.py @@ -4,8 +4,8 @@ from mimircache.cache.abstractCache import cache from mimircache.const import CExtensionMode if CExtensionMode: - import mimircache.c_LRUProfiler as c_LRUProfiler - import mimircache.c_heatmap as c_heatmap + import mimircache.c_LRUProfiler + import mimircache.c_heatmap from heapdict import heapdict @@ -15,7 +15,7 @@ def __init__(self, cache_size, reader): # reader.reset() self.reader = reader self.reader.lock.acquire() - self.next_access = c_heatmap.get_next_access_dist(self.reader.cReader) + self.next_access = mimircache.c_heatmap.get_next_access_dist(self.reader.cReader) self.reader.lock.release() self.pq = heapdict() @@ -23,7 +23,7 @@ def __init__(self, cache_size, reader): self.ts = 0 def get_reversed_reuse_dist(self): - return c_LRUProfiler.get_reversed_reuse_dist(self.reader.cReader) + return mimircache.c_LRUProfiler.get_reversed_reuse_dist(self.reader.cReader) def checkElement(self, element): """ diff --git a/mimircache/cacheReader/abstractReader.py b/mimircache/cacheReader/abstractReader.py index 3505a5b..bf38c02 100644 --- a/mimircache/cacheReader/abstractReader.py +++ b/mimircache/cacheReader/abstractReader.py @@ -3,7 +3,9 @@ import os from multiprocessing import Lock from collections import defaultdict -import mimircache.c_cacheReader as c_cacheReader +from mimircache.const import CExtensionMode +if CExtensionMode: + import mimircache.c_cacheReader class cacheReaderAbstract(metaclass=abc.ABCMeta): @@ -44,7 +46,7 @@ def reset(self): self.counter = 0 self.trace_file.seek(0, 0) if self.cReader: - c_cacheReader.reset_reader(self.cReader) + mimircache.c_cacheReader.reset_reader(self.cReader) def get_num_of_req(self): """ @@ -58,7 +60,7 @@ def get_num_of_req(self): # clear before counting self.num_of_req = 0 if self.cReader: - self.num_of_req = c_cacheReader.get_num_of_req(self.cReader) + self.num_of_req = mimircache.c_cacheReader.get_num_of_req(self.cReader) else: while self.read_one_element() is not None: self.num_of_req += 1 @@ -119,12 +121,12 @@ def close(self): if self.trace_file: self.trace_file.close() self.trace_file = None - if self.cReader and c_cacheReader is not None: - c_cacheReader.close_reader(self.cReader) + if self.cReader and mimircache.c_cacheReader is not None: + mimircache.c_cacheReader.close_reader(self.cReader) self.cReader = None except Exception as e: # return - print("Exception during close reader: {}, ccacheReader={}".format(e, c_cacheReader)) + print("Exception during close reader: {}, ccacheReader={}".format(e, mimircache.c_cacheReader)) @abc.abstractmethod def __next__(self): # Python 3 diff --git a/mimircache/cacheReader/binaryReader.py b/mimircache/cacheReader/binaryReader.py index d29a3a2..280b7ed 100644 --- a/mimircache/cacheReader/binaryReader.py +++ b/mimircache/cacheReader/binaryReader.py @@ -5,7 +5,9 @@ """ import io, os, struct -import mimircache.c_cacheReader as c_cacheReader +from mimircache.const import CExtensionMode +if CExtensionMode: + import mimircache.c_cacheReader from mimircache.cacheReader.abstractReader import cacheReaderAbstract @@ -44,7 +46,7 @@ def __init__(self, file_loc, init_params, data_type='c', if open_c_reader: # the data type here is not real data type, it will auto correct in C - self.cReader = c_cacheReader.setup_reader(file_loc, 'b', data_type=self.data_type, + self.cReader = mimircache.c_cacheReader.setup_reader(file_loc, 'b', data_type=self.data_type, block_unit_size=block_unit_size, disk_sector_size=disk_sector_size, init_params=init_params) diff --git a/mimircache/cacheReader/csvReader.py b/mimircache/cacheReader/csvReader.py index 296c201..5d94016 100644 --- a/mimircache/cacheReader/csvReader.py +++ b/mimircache/cacheReader/csvReader.py @@ -2,7 +2,7 @@ import string from mimircache.const import CExtensionMode if CExtensionMode: - import mimircache.c_cacheReader as c_cacheReader + import mimircache.c_cacheReader from mimircache.cacheReader.abstractReader import cacheReaderAbstract @@ -47,7 +47,7 @@ def __init__(self, file_loc, data_type='c', init_params=None, self.read_one_element() if open_c_reader: - self.cReader = c_cacheReader.setup_reader(file_loc, 'c', data_type=data_type, + self.cReader = mimircache.c_cacheReader.setup_reader(file_loc, 'c', data_type=data_type, block_unit_size=block_unit_size, disk_sector_size=disk_sector_size, init_params=init_params) diff --git a/mimircache/cacheReader/multiReader.py b/mimircache/cacheReader/multiReader.py index d6c3de6..82bf258 100644 --- a/mimircache/cacheReader/multiReader.py +++ b/mimircache/cacheReader/multiReader.py @@ -1,8 +1,6 @@ # coding=utf-8 -# from mimircache import -from heapdict import heapdict import heapq diff --git a/mimircache/cacheReader/plainReader.py b/mimircache/cacheReader/plainReader.py index 87ea470..cd28cb2 100644 --- a/mimircache/cacheReader/plainReader.py +++ b/mimircache/cacheReader/plainReader.py @@ -1,6 +1,8 @@ # coding=utf-8 from mimircache.cacheReader.abstractReader import cacheReaderAbstract -import mimircache.c_cacheReader as c_cacheReader +from mimircache.const import CExtensionMode +if CExtensionMode: + import mimircache.c_cacheReader class plainReader(cacheReaderAbstract): @@ -13,7 +15,7 @@ def __init__(self, file_loc, data_type='c', open_c_reader=True): super(plainReader, self).__init__(file_loc, data_type, 0, 0) self.trace_file = open(file_loc, 'r') if open_c_reader: - self.cReader = c_cacheReader.setup_reader(file_loc, 'p', data_type=data_type, block_unit_size=0) + self.cReader = mimircache.c_cacheReader.setup_reader(file_loc, 'p', data_type=data_type, block_unit_size=0) def read_one_element(self): """ diff --git a/mimircache/cacheReader/vscsiReader.py b/mimircache/cacheReader/vscsiReader.py index 38c68c1..54e934f 100644 --- a/mimircache/cacheReader/vscsiReader.py +++ b/mimircache/cacheReader/vscsiReader.py @@ -2,7 +2,9 @@ from mimircache.cacheReader.abstractReader import cacheReaderAbstract -import mimircache.c_cacheReader as c_cacheReader +from mimircache.const import CExtensionMode +if CExtensionMode: + import mimircache.c_cacheReader class vscsiReader(cacheReaderAbstract): @@ -25,7 +27,7 @@ def __init__(self, file_loc, data_type='l', super().__init__(file_loc, data_type='l', block_unit_size=block_unit_size, disk_sector_size=disk_sector_size) if open_c_reader: - self.cReader = c_cacheReader.setup_reader(file_loc, 'v', data_type=data_type, + self.cReader = mimircache.c_cacheReader.setup_reader(file_loc, 'v', data_type=data_type, block_unit_size=block_unit_size, disk_sector_size=disk_sector_size) self.support_size = True @@ -36,14 +38,14 @@ def __init__(self, file_loc, data_type='l', def reset(self): if self.cReader: - c_cacheReader.reset_reader(self.cReader) + mimircache.c_cacheReader.reset_reader(self.cReader) def read_one_element(self): """ read one request, return only block number :return: """ - r = c_cacheReader.read_one_element(self.cReader) + r = mimircache.c_cacheReader.read_one_element(self.cReader) if r and self.block_unit_size != 0 and self.disk_sector_size != 0: r = r * self.disk_sector_size // self.block_unit_size return r @@ -53,7 +55,7 @@ def read_time_request(self): return real_time information for the request in the form of (time, request) :return: """ - r = c_cacheReader.read_time_request(self.cReader) + r = mimircache.c_cacheReader.read_time_request(self.cReader) if r and self.block_unit_size != 0 and self.disk_sector_size != 0: r[1] = r[1] * self.disk_sector_size // self.block_unit_size return r @@ -63,7 +65,7 @@ def read_one_request_full_info(self): obtain more info for the request in the form of (time, request, size) :return: """ - r = c_cacheReader.read_one_request_full_info(self.cReader) + r = mimircache.c_cacheReader.read_one_request_full_info(self.cReader) if r and self.block_unit_size != 0 and self.disk_sector_size != 0: r = list(r) r[1] = r[1] * self.disk_sector_size // self.block_unit_size @@ -92,10 +94,10 @@ def get_timestamp_list(self): :return: """ ts_list = [] - r = c_cacheReader.read_time_request(self.cReader) + r = mimircache.c_cacheReader.read_time_request(self.cReader) while r: ts_list.append(r[0]) - r = c_cacheReader.read_time_request(self.cReader) + r = mimircache.c_cacheReader.read_time_request(self.cReader) return ts_list diff --git a/mimircache/const.py b/mimircache/const.py index d74213e..f50637e 100644 --- a/mimircache/const.py +++ b/mimircache/const.py @@ -17,11 +17,28 @@ DEFAULT_NUM_OF_THREADS = os.cpu_count() +failed_components = [] try: import mimircache.c_cacheReader except: + failed_components.append("cacheReader") +try: + import mimircache.c_LRUProfiler +except: + failed_components.append("LRUProfiler") +try: + import mimircache.c_generalProfiler +except: + failed_components.append("generalProfiler") +try: + import mimircache.c_heatmap +except: + failed_components.append("heatmap") + +if len(failed_components): CExtensionMode = False - print("C extension import failed, which will hurt performance by 10*", file=sys.stderr) + print("C extension {} import failed, which will hurt performance by 10*". + format(", ".join(failed_components)), file=sys.stderr) from mimircache.cache.ARC import ARC diff --git a/mimircache/profiler/LRUProfiler.py b/mimircache/profiler/LRUProfiler.py index 54cc635..8bd81bd 100644 --- a/mimircache/profiler/LRUProfiler.py +++ b/mimircache/profiler/LRUProfiler.py @@ -11,7 +11,9 @@ import os import socket from mimircache.const import INTERNAL_USE -import mimircache.c_LRUProfiler as c_LRUProfiler +from mimircache.const import CExtensionMode +if CExtensionMode: + import mimircache.c_LRUProfiler from mimircache.cacheReader.binaryReader import binaryReader from mimircache.cacheReader.abstractReader import cacheReaderAbstract import matplotlib.pyplot as plt @@ -80,7 +82,7 @@ def save_reuse_dist(self, file_loc, rd_type): """ assert rd_type == 'rd' or rd_type == 'frd', \ "please provide a valid reuse distance type, currently support rd and frd" - c_LRUProfiler.save_reuse_dist(self.reader.cReader, file_loc, rd_type) + mimircache.c_LRUProfiler.save_reuse_dist(self.reader.cReader, file_loc, rd_type) def load_reuse_dist(self, file_loc, rd_type): """ @@ -96,7 +98,7 @@ def load_reuse_dist(self, file_loc, rd_type): "please provide a valid reuse distance type, currently support rd and frd" if not os.path.exists(file_loc): WARNING("pre-computed reuse distance file does not exist") - c_LRUProfiler.load_reuse_dist(self.reader.cReader, file_loc, rd_type) + mimircache.c_LRUProfiler.load_reuse_dist(self.reader.cReader, file_loc, rd_type) self.reader.already_load_rd = True def _del_reuse_dist_file(self): @@ -150,7 +152,7 @@ def get_hit_count(self, **kargs): print("not supported yet") return None else: - hit_count = c_LRUProfiler.get_hit_count_seq(self.reader.cReader, **kargs) + hit_count = mimircache.c_LRUProfiler.get_hit_count_seq(self.reader.cReader, **kargs) return hit_count def get_hit_ratio(self, **kwargs): @@ -170,10 +172,10 @@ def get_hit_ratio(self, **kwargs): kargs['end'] = kwargs['end'] if self.block_unit_size != 0 : - hit_ratio = c_LRUProfiler.get_hit_ratio_with_size(self.reader.cReader, + hit_ratio = mimircache.c_LRUProfiler.get_hit_ratio_with_size(self.reader.cReader, block_unit_size=self.block_unit_size, **kargs) else: - hit_ratio = c_LRUProfiler.get_hit_ratio_seq(self.reader.cReader, **kargs) + hit_ratio = mimircache.c_LRUProfiler.get_hit_ratio_seq(self.reader.cReader, **kargs) return hit_ratio @@ -202,7 +204,7 @@ def get_hit_ratio_shards(self, sample_ratio=0.01, **kwargs): print("not supported yet") return None else: - hit_ratio = c_LRUProfiler.get_hit_ratio_seq_shards(tempReader.cReader, sample_ratio=sample_ratio, + hit_ratio = mimircache.c_LRUProfiler.get_hit_ratio_seq_shards(tempReader.cReader, sample_ratio=sample_ratio, correction=correction, **kargs) return hit_ratio @@ -216,7 +218,7 @@ def get_miss_ratio(self, **kargs): print("not supported yet") return None else: - miss_ratio = c_LRUProfiler.get_miss_ratio_seq(self.reader.cReader, **kargs) + miss_ratio = mimircache.c_LRUProfiler.get_miss_ratio_seq(self.reader.cReader, **kargs) return miss_ratio def get_reuse_distance(self, **kargs): @@ -228,7 +230,7 @@ def get_reuse_distance(self, **kargs): if self.block_unit_size != 0: WARNING("reuse distance calculation does not support variable obj size, " "calculating without considering size") - rd = c_LRUProfiler.get_reuse_dist_seq(self.reader.cReader, **kargs) + rd = mimircache.c_LRUProfiler.get_reuse_dist_seq(self.reader.cReader, **kargs) return rd def get_future_reuse_distance(self, **kargs): @@ -240,7 +242,7 @@ def get_future_reuse_distance(self, **kargs): if self.block_unit_size != 0: WARNING("future reuse distance calculation does not support variable obj size, " "calculating without considering size") - frd = c_LRUProfiler.get_future_reuse_dist(self.reader.cReader, **kargs) + frd = mimircache.c_LRUProfiler.get_future_reuse_dist(self.reader.cReader, **kargs) return frd def plotMRC(self, figname="MRC.png", auto_resize=False, threshold=0.98, **kwargs): diff --git a/mimircache/profiler/cGeneralProfiler.py b/mimircache/profiler/cGeneralProfiler.py index f941c46..1b7f9c9 100644 --- a/mimircache/profiler/cGeneralProfiler.py +++ b/mimircache/profiler/cGeneralProfiler.py @@ -23,7 +23,9 @@ from mimircache.cacheReader.abstractReader import cacheReaderAbstract from mimircache.utils.printing import * -import mimircache.c_generalProfiler as c_generalProfiler +from mimircache.const import CExtensionMode +if CExtensionMode: + import mimircache.c_generalProfiler from mimircache.const import * @@ -87,14 +89,14 @@ def __init__(self, reader, need_convert = False break if need_convert: - self.prepare_file() + self._prepare_file() # this is for deprecated functions, as old version use hit rate instead of hit ratio self.get_hit_rate = self.get_hit_ratio self.get_miss_rate = self.get_miss_ratio - def prepare_file(self): + def _prepare_file(self): """ this is used when user passed in a customized reader, but customized reader is not supported in C backend @@ -129,7 +131,7 @@ def get_hit_count(self, **kwargs): if self.block_unit_size != 0: print("not supported yet") else: - return c_generalProfiler.get_hit_count(self.reader.cReader, self.cache_name, cache_size, + return mimircache.c_generalProfiler.get_hit_count(self.reader.cReader, self.cache_name, cache_size, self.bin_size, cache_params=self.cache_params, **sanity_kwargs) def get_hit_ratio(self, **kwargs): @@ -148,7 +150,7 @@ def get_hit_ratio(self, **kwargs): sanity_kwargs['end'] = kwargs['end'] # handles both withsize and no size, but currently only storage system trace are supported with size - return c_generalProfiler.get_hit_ratio(self.reader.cReader, self.cache_name, cache_size, + return mimircache.c_generalProfiler.get_hit_ratio(self.reader.cReader, self.cache_name, cache_size, bin_size, cache_params=self.cache_params, **sanity_kwargs) @@ -171,7 +173,7 @@ def get_miss_ratio(self, **kwargs): if self.block_unit_size != 0: print("not supported yet") else: - return c_generalProfiler.get_miss_ratio(self.reader.cReader, self.cache_name, cache_size, + return mimircache.c_generalProfiler.get_miss_ratio(self.reader.cReader, self.cache_name, cache_size, bin_size, cache_params=self.cache_params, **sanity_kwargs) def plotMRC(self, figname="MRC.png", **kwargs): diff --git a/mimircache/profiler/cHeatmap.py b/mimircache/profiler/cHeatmap.py index ce050ba..9593d4f 100644 --- a/mimircache/profiler/cHeatmap.py +++ b/mimircache/profiler/cHeatmap.py @@ -16,7 +16,9 @@ from matplotlib import pyplot as plt import matplotlib.ticker as ticker -import mimircache.c_heatmap as c_heatmap +from mimircache.const import CExtensionMode +if CExtensionMode: + import mimircache.c_heatmap from mimircache import const from mimircache.utils.printing import * @@ -25,7 +27,7 @@ class cHeatmap: def __init__(self): self.other_plot_kwargs = {} - def getBreakpoints(self, reader, mode, time_interval=-1, num_of_pixels=-1): + def get_breakpoints(self, reader, mode, time_interval=-1, num_of_pixels=-1): """ :param num_of_pixels: @@ -36,9 +38,9 @@ def getBreakpoints(self, reader, mode, time_interval=-1, num_of_pixels=-1): """ assert time_interval!=-1 or num_of_pixels!=-1, \ "please provide at least one parameter, time_interval or num_of_pixels" - return c_heatmap.getBreakpoints(reader.cReader, mode, - time_interval=time_interval, - num_of_pixels=num_of_pixels) + return mimircache.c_heatmap.get_breakpoints(reader.cReader, mode, + time_interval=time_interval, + num_of_pixels=num_of_pixels) def setPlotParams(self, axis, axis_type, **kwargs): log_base = 1 @@ -178,7 +180,7 @@ def heatmap(self, reader, mode, plot_type, algorithm="LRU", time_interval=-1, nu if algorithm.lower() in const.c_available_cache: - xydict = c_heatmap.heatmap(reader.cReader, mode, plot_type, + xydict = mimircache.c_heatmap.heatmap(reader.cReader, mode, plot_type, cache_size, algorithm, interval_hit_ratio=enable_ihr, decay_coefficient=decay_coefficient, @@ -226,7 +228,7 @@ def heatmap(self, reader, mode, plot_type, algorithm="LRU", time_interval=-1, nu elif plot_type == "cold_miss_count_start_time_end_time": - print("this plot is discontinued") + raise RuntimeError("this plot is deprecated") elif plot_type == "???": @@ -239,7 +241,7 @@ def heatmap(self, reader, mode, plot_type, algorithm="LRU", time_interval=-1, nu if not figname: figname = 'rd_distribution.png' - xydict, log_base = c_heatmap.heatmap_rd_distribution(reader.cReader, mode, + xydict, log_base = mimircache.c_heatmap.heatmap_rd_distribution(reader.cReader, mode, time_interval=time_interval, num_of_pixels=num_of_pixels, num_of_threads=num_of_threads) @@ -262,7 +264,7 @@ def heatmap(self, reader, mode, plot_type, algorithm="LRU", time_interval=-1, nu if not figname: figname = 'rd_distribution_CDF.png' - xydict, log_base = c_heatmap.heatmap_rd_distribution(reader.cReader, mode, + xydict, log_base = mimircache.c_heatmap.heatmap_rd_distribution(reader.cReader, mode, time_interval=time_interval, num_of_pixels=num_of_pixels, num_of_threads=num_of_threads, CDF=1) @@ -276,7 +278,7 @@ def heatmap(self, reader, mode, plot_type, algorithm="LRU", time_interval=-1, nu if not figname: figname = 'future_rd_distribution.png' - xydict, log_base = c_heatmap.heatmap_future_rd_distribution(reader.cReader, mode, + xydict, log_base = mimircache.c_heatmap.heatmap_future_rd_distribution(reader.cReader, mode, time_interval=time_interval, num_of_pixels=num_of_pixels, num_of_threads=num_of_threads) @@ -289,7 +291,7 @@ def heatmap(self, reader, mode, plot_type, algorithm="LRU", time_interval=-1, nu if not figname: figname = 'dist_distribution.png' - xydict, log_base = c_heatmap.heatmap_dist_distribution(reader.cReader, mode, + xydict, log_base = mimircache.c_heatmap.heatmap_dist_distribution(reader.cReader, mode, time_interval=time_interval, num_of_pixels=num_of_pixels, num_of_threads=num_of_threads) @@ -309,7 +311,7 @@ def heatmap(self, reader, mode, plot_type, algorithm="LRU", time_interval=-1, nu if not figname: figname = 'rt_distribution.png' - xydict, log_base = c_heatmap.heatmap_reuse_time_distribution(reader.cReader, mode, + xydict, log_base = mimircache.c_heatmap.heatmap_reuse_time_distribution(reader.cReader, mode, time_interval=time_interval, num_of_pixels=num_of_pixels, num_of_threads=num_of_threads) @@ -387,14 +389,14 @@ def diffHeatmap(self, reader, mode, plot_type, algorithm1, if plot_type == "hit_ratio_start_time_end_time": assert cache_size != -1, "please provide cache_size for plotting hit_ratio_start_time_end_time" - xydict = c_heatmap.diffHeatmap(reader.cReader, mode, - plot_type, cache_size, - algorithm1, algorithm2, - time_interval=time_interval, - num_of_pixels=num_of_pixels, - cache_params1=cache_params1, - cache_params2=cache_params2, - num_of_threads=num_of_threads) + xydict = mimircache.c_heatmap.diff_heatmap(reader.cReader, mode, + plot_type, cache_size, + algorithm1, algorithm2, + time_interval=time_interval, + num_of_pixels=num_of_pixels, + cache_params1=cache_params1, + cache_params2=cache_params2, + num_of_threads=num_of_threads) text = " differential heatmap\n cache size: {},\n cache type: ({}-{})/{},\n" \ " time type: {},\n time interval: {},\n plot type: \n{}".format( diff --git a/mimircache/profiler/evictionStat.py b/mimircache/profiler/evictionStat.py index a957d94..41dd5a5 100644 --- a/mimircache/profiler/evictionStat.py +++ b/mimircache/profiler/evictionStat.py @@ -16,7 +16,9 @@ from matplotlib import pyplot as plt from matplotlib.ticker import MultipleLocator, FormatStrFormatter -import mimircache.c_eviction_stat as c_eviction_stat +from mimircache.const import CExtensionMode +if CExtensionMode: + import mimircache.c_eviction_stat from mimircache.const import * from mimircache.utils.printing import * from mimircache.utils.prepPlotParams import * @@ -45,10 +47,10 @@ def eviction_stat_reuse_dist_plot(reader, algorithm, cache_size, mode, time_inte assert alg=="Optimal", "Currently only Optimal is supported" # get reuse distance of evicted elements by given algorithm - rd_array = c_eviction_stat.get_stat(reader.cReader, algorithm=alg, cache_size=cache_size, stat_type="reuse_dist") + rd_array = mimircache.c_eviction_stat.get_stat(reader.cReader, algorithm=alg, cache_size=cache_size, stat_type="reuse_dist") # generate break points for bucketing the reuse_dist array - bp = cHeatmap().getBreakpoints(reader, mode, time_interval) + bp = cHeatmap().get_breakpoints(reader, mode, time_interval) pos = 1 count = 0 @@ -133,8 +135,8 @@ def eviction_stat_freq_plot(reader, algorithm, cache_size, mode, time_interval, if accumulative: stat_type = "accumulative_freq" assert alg=="Optimal", "Currently only Optimal is supported" - freq_array = c_eviction_stat.get_stat(reader.cReader, algorithm=alg, cache_size=cache_size, stat_type=stat_type) - bp = cHeatmap().getBreakpoints(reader, mode, time_interval) + freq_array = mimircache.c_eviction_stat.get_stat(reader.cReader, algorithm=alg, cache_size=cache_size, stat_type=stat_type) + bp = cHeatmap().get_breakpoints(reader, mode, time_interval) pos = 1 count = 0 diff --git a/mimircache/profiler/heatmap.py b/mimircache/profiler/heatmap.py index 076572d..765c561 100644 --- a/mimircache/profiler/heatmap.py +++ b/mimircache/profiler/heatmap.py @@ -26,7 +26,10 @@ from collections import deque from multiprocessing import Array, Process, Queue -import mimircache.c_heatmap as c_heatmap +# this should be replaced by pure python module +from mimircache.const import CExtensionMode +if CExtensionMode: + import mimircache.c_heatmap from mimircache.profiler.heatmap_subprocess import * from mimircache.utils.printing import * from mimircache.const import * @@ -81,9 +84,9 @@ def _prepare_reuse_distance_and_break_points(self, mode, reader, # check break points are loaded or not, if not need to calculate it if not break_points: - break_points = cHeatmap().getBreakpoints(reader, mode, - time_interval=time_interval, - num_of_pixels=num_of_pixels) + break_points = cHeatmap().get_breakpoints(reader, mode, + time_interval=time_interval, + num_of_pixels=num_of_pixels) if save: with open('temp/break_points_' + mode + str(time_interval) + '.dat', 'wb') as ifile: pickle.dump(break_points, ifile) @@ -135,7 +138,7 @@ def _prepare_multiprocess_params_LRU(self, mode, plot_type, break_points, **kwar lambda x, pos: '{:2.0f}%'.format(x * 100 / len(break_points))) kwargs_plot['title'] = "hit_ratio_start_time_end_time" - last_access = c_heatmap.get_last_access_dist(reader.cReader) + last_access = mimircache.c_heatmap.get_last_access_dist(reader.cReader) last_access_array = Array('l', len(last_access), lock=False) for i, j in enumerate(last_access): last_access_array[i] = j @@ -218,9 +221,9 @@ def calculate_heatmap_dat(self, reader, mode, plot_type, time_interval=-1, num_o # prepare break points if mode[0] == 'r' or mode[0] == 'v': - break_points = cHeatmap().getBreakpoints(reader, mode[0], - time_interval=time_interval, - num_of_pixels=num_of_pixels) + break_points = cHeatmap().get_breakpoints(reader, mode[0], + time_interval=time_interval, + num_of_pixels=num_of_pixels) else: raise RuntimeError("unrecognized mode, it can only be r or v") diff --git a/mimircache/profiler/twoDPlots.py b/mimircache/profiler/twoDPlots.py index f404985..b606b80 100644 --- a/mimircache/profiler/twoDPlots.py +++ b/mimircache/profiler/twoDPlots.py @@ -64,7 +64,7 @@ def request_rate_2d(reader, mode, time_interval, ticker.FuncFormatter(lambda x, pos: '{:2.0f}%'.format(x * 100 / len(break_points)))) assert mode == 'r' or mode == 'v', "currently only support mode r and v, what mode are you using?" - break_points = cHeatmap().getBreakpoints(reader, mode, time_interval) + break_points = cHeatmap().get_breakpoints(reader, mode, time_interval) l = [] for i in range(1, len(break_points)): @@ -95,7 +95,7 @@ def cold_miss_count_2d(reader, mode, time_interval, ticker.FuncFormatter(lambda x, pos: '{:2.0f}%'.format(x * 100 / len(break_points)))) assert mode == 'r' or mode == 'v', "currently only support mode r and v, what mode are you using?" - break_points = cHeatmap().getBreakpoints(reader, mode, time_interval) + break_points = cHeatmap().get_breakpoints(reader, mode, time_interval) cold_miss_list = [0] * (len(break_points) - 1) seen_set = set() @@ -134,7 +134,7 @@ def cold_miss_ratio_2d(reader, mode, time_interval, ticker.FuncFormatter(lambda x, pos: '{:2.0f}%'.format(x * 100 / len(break_points)))) assert mode == 'r' or mode == 'v', "currently only support mode r and v, unknown mode {}".format(mode) - break_points = cHeatmap().getBreakpoints(reader, mode, time_interval) + break_points = cHeatmap().get_breakpoints(reader, mode, time_interval) cold_miss_list = [0] * (len(break_points) - 1) seen_set = set() diff --git a/mimircache/top/cachecow.py b/mimircache/top/cachecow.py index 62da7cc..acb2e7d 100644 --- a/mimircache/top/cachecow.py +++ b/mimircache/top/cachecow.py @@ -1,10 +1,19 @@ # coding=utf-8 """ -this module offer the upper level API to user +this module offer the upper level API to user, it currently supports four types of operations, + +* **trace loading** +* **trace information retrieving** +* **trace profiling** +* **plotting** + """ from matplotlib.ticker import FuncFormatter -import mimircache.c_heatmap as c_heatmap +from mimircache.const import CExtensionMode +if CExtensionMode: + import mimircache.c_heatmap + from mimircache.profiler.evictionStat import * from mimircache.profiler.twoDPlots import * from mimircache.utils.prepPlotParams import * @@ -13,7 +22,7 @@ class cachecow: - all = ("open", + __all__ = ["open", "csv", "vscsi", "binary", @@ -23,13 +32,13 @@ class cachecow: "get_reuse_distance", "get_hit_ratio_dict", "heatmap", - "diffHeatmap", + "diff_heatmap", "twoDPlot", "eviction_plot", "plotHRCs", "plotMRCs", "characterize", - "close") + "close"] def __init__(self, **kwargs): self.reader = None @@ -38,20 +47,38 @@ def __init__(self, **kwargs): self.n_uniq_req = -1 self.cacheclass_mapping = {} - def open(self, file_path, trace_type="p", **kwargs): + def open(self, file_path, trace_type="p", data_type="c", **kwargs): """ - default this opens a plain text file, which contains a label each line - but it also supports only other type of trace by setting trace_type - the parameters for opening other trace type are the same as corresponding call - :param file_path: - :param trace_type: - :param kwargs: - :return: + + The default operation of this function opens a plain text trace, + the format of a plain text trace is such a file that each line contains a label. + + By changing trace type, it can be used for opening other types of trace, + supported trace type includes + + ============== ========== =================== + trace_type file type require init_params + ============== ========== =================== + "p" plain text No + "c" csv Yes + "b" binary Yes + "v" vscsi No + ============== ========== =================== + + the effect of this is the save as calling corresponding functions (csv, binary, vscsi) + + :param file_path: the path to the data + :param trace_type: type of trace, "p" for plainText, "c" for csv, "v" for vscsi, "b" for binary + :param data_type: the type of request label, \ + can be either "c" for string or "l" for number (for example block IO LBA) + :param kwargs: parameters for opening the trace + :return: reader object """ + + if self.reader: + self.reader.close() if trace_type == "p": - if self.reader: - self.reader.close() - self.reader = plainReader(file_path, data_type=kwargs.get("data_type", "c")) + self.reader = plainReader(file_path, data_type=data_type) elif trace_type == "c": assert "init_params" in kwargs, "please provide init_params for csv trace" @@ -59,7 +86,7 @@ def open(self, file_path, trace_type="p", **kwargs): kwargs_new = {} kwargs_new.update(kwargs) del kwargs_new["init_params"] - self.csv(file_path, init_params, **kwargs_new) + self.csv(file_path, init_params, data_type=data_type, **kwargs_new) elif trace_type == 'b': assert "init_params" in kwargs, "please provide init_params for csv trace" @@ -67,7 +94,7 @@ def open(self, file_path, trace_type="p", **kwargs): kwargs_new = {} kwargs_new.update(kwargs) del kwargs_new["init_params"] - self.binary(file_path, init_params, **kwargs_new) + self.binary(file_path, init_params, data_type=data_type, **kwargs_new) elif trace_type == 'v': self.vscsi(file_path, **kwargs) @@ -77,72 +104,132 @@ def open(self, file_path, trace_type="p", **kwargs): return self.reader - def csv(self, file_path, init_params, data_type='c', block_unit_size=0, disk_sector_size=0): - """ - open a csv file - :param file_path: - :param init_params: params related to csv file, see csvReader for detail - :param data_type: can be either 'c' for string or 'l' for number (like block IO) - :param block_unit_size: the page size for a cache - :param disk_sector_size: the disk sector size of input file - :return: + def csv(self, file_path, init_params, data_type="c", + block_unit_size=0, disk_sector_size=0, **kwargs): + """ + open a csv trace, init_params is a dictionary specifying the specs of the csv file, + the possible keys are listed in the table below. + The column/field number begins from 1, so the first column(field) is 1, the second is 2, etc. + + :param file_path: the path to the data + :param init_params: params related to csv file, see above or csvReader for details + :param data_type: the type of request label, \ + can be either "c" for string or "l" for number (for example block IO LBA) + :param block_unit_size: the block size for a cache, currently storage system only + :param disk_sector_size: the disk sector size of input file, storage system only + :return: reader object + + +------------------+-------------+--------------+---------------------+---------------------------------------------------+ + | Keyword Argument | file type | Value Type | Default Value | Description | + +==================+=============+==============+=====================+===================================================+ + | label | csv/ binary | int | this is required | the column of the label of the request | + +------------------+-------------+--------------+---------------------+---------------------------------------------------+ + | fmt | binary | string | this is required | fmt string of binary data, same as python struct | + +------------------+-------------+--------------+---------------------+---------------------------------------------------+ + | header | csv | True/False | False | whether csv data has header | + +------------------+-------------+--------------+---------------------+---------------------------------------------------+ + | delimiter | csv | char | "," | the delimiter separating fields in the csv file | + +------------------+-------------+--------------+---------------------+---------------------------------------------------+ + | real_time | csv/ binary | int | NA | the column of real time | + +------------------+-------------+--------------+---------------------+---------------------------------------------------+ + | op | csv/ binary | int | NA | the column of operation (read/write) | + +------------------+-------------+--------------+---------------------+---------------------------------------------------+ + | size | csv/ binary | int | NA | the column of block/request size | + +------------------+-------------+--------------+---------------------+---------------------------------------------------+ """ + if self.reader: self.reader.close() self.reader = csvReader(file_path, data_type=data_type, block_unit_size=block_unit_size, disk_sector_size=disk_sector_size, - init_params=init_params) + init_params=init_params, **kwargs) return self.reader - def binary(self, file_path, init_params, data_type='l', block_unit_size=0, disk_sector_size=0): + def binary(self, file_path, init_params, data_type='l', + block_unit_size=0, disk_sector_size=0, **kwargs): """ - open a binary file - :param file_path: - :param init_params: params related to csv file, see csvReader for detail - :param data_type: can be either 'c' for string or 'l' for number (like block IO) - :param block_unit_size: the page size for a cache - :param disk_sector_size: the disk sector size of input file - :return: + open a binary trace file, init_params see function csv + + :param file_path: the path to the data + :param init_params: params related to the spec of data, see above csv for details + :param data_type: the type of request label, \ + can be either "c" for string or "l" for number (for example block IO LBA) + :param block_unit_size: the block size for a cache, currently storage system only + :param disk_sector_size: the disk sector size of input file, storage system only + :return: reader object """ + if self.reader: self.reader.close() self.reader = binaryReader(file_path, data_type=data_type, block_unit_size=block_unit_size, disk_sector_size=disk_sector_size, - init_params=init_params) + init_params=init_params, **kwargs) return self.reader - def vscsi(self, file_path, data_type='l', block_unit_size=0, disk_sector_size=512): + def vscsi(self, file_path, block_unit_size=0, disk_sector_size=512, **kwargs): """ open vscsi trace file - :param file_path: - :param data_type: can be either 'c' for string or 'l' for number (like block IO) - :param block_unit_size: the page size for a cache - :param disk_sector_size: the disk sector size of input file - :return: + + :param file_path: the path to the data + :param block_unit_size: the block size for a cache, currently storage system only + :param disk_sector_size: the disk sector size of input file, storage system only + :return: reader object """ + if self.reader: self.reader.close() - self.reader = vscsiReader(file_path, data_type=data_type, + if "data_type" in kwargs: + del kwargs["data_type"] + self.reader = vscsiReader(file_path, data_type="l", block_unit_size=block_unit_size, - disk_sector_size=disk_sector_size) + disk_sector_size=disk_sector_size, **kwargs) return self.reader - def set_size(self, size): + + def reset(self): """ - set the size of cachecow - :param size: - :return: + reset cachecow to initial state, including + reset reader to the beginning of the trace + """ - raise RuntimeWarning("deprecated") - assert isinstance(size, int), "size can only be an integer" - self.cache_size = size + assert self.reader is not None, "reader is None, cannot reset" + self.reader.reset() + + + def close(self): + """ + close the reader opened in cachecow, and clean up in the future + """ + + if self.reader is not None: + self.reader.close() + self.reader = None + + + def stat(self): + """ + obtain the statistical information about the trace, including + + * number of requests + * number of uniq items + * cold miss ratio + * a list of top 10 popular in form of (obj, num of requests): + * number of obj/block accessed only once + * frequency mean + * time span + + :return: a string of the information above + """ + assert self.reader, "you haven't provided a data file" + return traceStat(self.reader).get_stat() + def num_of_req(self): """ - return the number of requests in the trace - :return: + + :return: the number of requests in the trace """ if self.n_req == -1: self.n_req = self.reader.get_num_of_req() @@ -150,8 +237,8 @@ def num_of_req(self): def num_of_uniq_req(self): """ - return the number of unique requests in the trace - :return: + + :return: the number of unique requests in the trace """ if self.n_uniq_req == -1: self.n_uniq_req = self.reader.get_num_of_uniq_req() @@ -159,20 +246,33 @@ def num_of_uniq_req(self): def get_reuse_distance(self): """ - return an array of reuse distance - :return: + + :return: an array of reuse distance """ return LRUProfiler(self.reader).get_reuse_distance() def get_hit_ratio_dict(self, algorithm, cache_size=-1, cache_params=None, bin_size=-1, use_general_profiler=False, **kwargs): """ - return an dict of hit ratio of given algorithms, mapping from cache_size -> hit ratio - :return: + + :param algorithm: cache replacement algorithms + :param cache_size: size of cache + :param cache_params: parameters passed to cache, some of the cache replacement algorithms require parameters, + for example LRU-K, SLRU + :param bin_size: if algorithm is not LRU, then the hit ratio will be calculated by simulating cache at + cache size [0, bin_size, bin_size*2 ... cache_size], this is not required for LRU + :param use_general_profiler: if algorithm is LRU and you don't want to use LRUProfiler, then set this to True, + possible reason for not using a LRUProfiler: 1. LRUProfiler is too slow for your large trace + because the algorithm is O(NlogN) and it uses single thread; 2. LRUProfiler has a bug (let me know if you found a bug). + :param kwargs: other parameters including num_of_threads + :return: an dict of hit ratio of given algorithms, mapping from cache_size -> hit ratio """ + hit_ratio_dict = {} - p = self.profiler(algorithm, cache_params=cache_params, - cache_size=cache_size, bin_size=bin_size, + p = self.profiler(algorithm, + cache_params=cache_params, + cache_size=cache_size, + bin_size=bin_size, use_general_profiler=use_general_profiler, **kwargs) hr = p.get_hit_ratio(cache_size=cache_size) if isinstance(p, LRUProfiler): @@ -183,63 +283,80 @@ def get_hit_ratio_dict(self, algorithm, cache_size=-1, cache_params=None, bin_si hit_ratio_dict[i * p.bin_size] = hr[i] return hit_ratio_dict - def reset(self): - """ - reset reader to the beginning of the trace - :return: - """ - assert self.reader is not None, "reader is None, cannot reset" - self.reader.reset() - def _profiler_pre_check(self, **kwargs): + def profiler(self, algorithm, cache_params=None, cache_size=-1, bin_size=-1, + use_general_profiler=False, **kwargs): """ - check whether user has provided new cache size and data information - :param kwargs: - :return: + get a profiler instance, this should not be used by most users + + :param use_general_profiler: for LRU only, if it is true, then return a cGeneralProfiler for LRU, + otherwise, return a LRUProfiler for LRU. + + Note: LRUProfiler does not require cache_size/bin_size params, + it does not sample thus provides a smooth curve, however, it is O(logN) at each step, + in constrast, cGeneralProfiler samples the curve, but use O(1) at each step + :param kwargs: num_of_threads + :return: a profiler instance """ - reader = None - if 'num_of_threads' in kwargs: - num_of_threads = kwargs['num_of_threads'] - elif 'num_of_thread' in kwargs: - num_of_threads = kwargs['num_of_thread'] - else: - num_of_threads = DEFAULT_NUM_OF_THREADS - - if 'data' in kwargs and 'dataType' in kwargs: - if kwargs['dataType'] == 'plain': - reader = plainReader(kwargs['data']) - if kwargs['dataType'] == 'csv': - assert 'column' in kwargs, "you didn't provide column number for csv reader" - reader = csvReader(kwargs['data'], kwargs['column']) - if kwargs['dataType'] == 'vscsi': - reader = vscsiReader(kwargs['data']) - elif 'reader' in kwargs: - reader = kwargs['reader'] + num_of_threads = kwargs.get("num_of_threads", DEFAULT_NUM_OF_THREADS) + assert self.reader is not None, "you haven't opened a trace yet" + + if algorithm.lower() == "lru" and not use_general_profiler: + profiler = LRUProfiler(self.reader, cache_size, cache_params) else: - reader = self.reader + assert cache_size != -1, "you didn't provide size for cache" + assert cache_size <= self.num_of_req(), "you cannot specify cache size({}) " \ + "larger than trace length({})".format(cache_size, + self.num_of_req()) + if isinstance(algorithm, str): + if algorithm.lower() in c_available_cache: + profiler = cGeneralProfiler(self.reader, cache_alg_mapping[algorithm.lower()], + cache_size, bin_size, + cache_params, num_of_threads) + else: + profiler = generalProfiler(self.reader, self.cacheclass_mapping[algorithm.lower()], + cache_size, bin_size, + cache_params, num_of_threads) + else: + profiler = generalProfiler(self.reader, algorithm, cache_size, bin_size, + cache_params, num_of_threads) - assert reader is not None, "you didn't provide a reader nor data (data file and data type)" - self.reader = reader + return profiler - return reader, num_of_threads - def heatmap(self, mode, plot_type, time_interval=-1, num_of_pixels=-1, + def heatmap(self, time_mode, plot_type, time_interval=-1, num_of_pixels=-1, algorithm="LRU", cache_params=None, cache_size=-1, **kwargs): """ + plot heatmaps, currently supports the following heatmaps - :param cache_size: - :param cache_params: - :param algorithm: - :param num_of_pixels: - :param time_interval: - :param plot_type: - :param mode: - :param kwargs: algorithm: - :return: + * hit_ratio_start_time_end_time + + * hit_ratio_start_time_cache_size (python only) + * avg_rd_start_time_end_time (python only) + * cold_miss_count_start_time_end_time (python only) + + * rd_distribution + * rd_distribution_CDF + * future_rd_distribution + * dist_distribution + * reuse_time_distribution + + :param time_mode: the type of time, can be "v" for virtual time, or "r" for real time + :param plot_type: the name of plot types, see above for plot types + :param time_interval: the time interval of one pixel + :param num_of_pixels: if you don't to use time_interval, + you can also specify how many pixels you want in one dimension, + note this feature is not well tested + :param algorithm: what algorithm to use for plotting heatmap, + this is not required for distance related heatmap like rd_distribution + :param cache_params: parameters passed to cache, some of the cache replacement algorithms require parameters, + for example LRU-K, SLRU + :param cache_size: The size of cache, this is required only for *hit_ratio_start_time_end_time* + :param kwargs: other parameters for computation and plotting such as num_of_threads, figname """ - reader, num_of_threads = self._profiler_pre_check(**kwargs) + assert self.reader is not None, "you haven't opened a trace yet" assert cache_size <= self.num_of_req(), \ "you cannot specify cache size({}) larger than " \ "trace length({})".format(cache_size, self.num_of_req()) @@ -255,7 +372,7 @@ def heatmap(self, mode, plot_type, time_interval=-1, num_of_pixels=-1, else: hm = heatmap() - hm.heatmap(reader, mode, plot_type, + hm.heatmap(self.reader, time_mode, plot_type, time_interval=time_interval, num_of_pixels=num_of_pixels, cache_size=cache_size, @@ -263,36 +380,35 @@ def heatmap(self, mode, plot_type, time_interval=-1, num_of_pixels=-1, cache_params=cache_params, **kwargs) - def diffHeatmap(self, mode, plot_type, algorithm1, time_interval=-1, num_of_pixels=-1, - algorithm2="Optimal", cache_params1=None, cache_params2=None, cache_size=-1, **kwargs): - """ - alg2 - alg1 - :param cache_size: - :param cache_params2: - :param cache_params1: - :param algorithm2: - :param num_of_pixels: - :param time_interval: - :param algorithm1: - :param mode: - :param plot_type: - :param kwargs: - :return: - """ - figname = 'differential_heatmap.png' - if 'figname' in kwargs: - figname = kwargs['figname'] + def diff_heatmap(self, time_mode, plot_type, algorithm1, time_interval=-1, num_of_pixels=-1, + algorithm2="Optimal", cache_params1=None, cache_params2=None, cache_size=-1, **kwargs): + """ + Plot the differential heatmap between two algorithms by alg2 - alg1 + + :param time_mode: time time_mode "v" for virutal time, "r" for real time + :param plot_type: same as the name in heatmap function + :param algorithm1: name of the first alg + :param time_interval: same as in heatmap + :param num_of_pixels: same as in heatmap + :param algorithm2: name of the second algorithm + :param cache_params1: parameters of the first algorithm + :param cache_params2: parameters of the second algorithm + :param kwargs: include num_of_threads + """ + + figname = kwargs.get("figname", 'differential_heatmap.png') + num_of_threads = kwargs.get("num_of_threads", DEFAULT_NUM_OF_THREADS) + assert self.reader is not None, "you haven't opened a trace yet" assert cache_size != -1, "you didn't provide size for cache" assert cache_size <= self.num_of_req(), \ "you cannot specify cache size({}) larger than " \ "trace length({})".format(cache_size, self.num_of_req()) - reader, num_of_threads = self._profiler_pre_check(**kwargs) if algorithm1.lower() in c_available_cache and algorithm2.lower() in c_available_cache: hm = cHeatmap() - hm.diffHeatmap(reader, mode, plot_type, + hm.diffHeatmap(self.reader, time_mode, plot_type, cache_size=cache_size, time_interval=time_interval, num_of_pixels=num_of_pixels, @@ -305,110 +421,86 @@ def diffHeatmap(self, mode, plot_type, algorithm1, time_interval=-1, num_of_pixe else: hm = heatmap() if algorithm1.lower() not in c_available_cache: - xydict1 = hm.calculate_heatmap_dat(reader, mode, plot_type, + xydict1 = hm.calculate_heatmap_dat(self.reader, time_mode, plot_type, time_interval=time_interval, cache_size=cache_size, algorithm=algorithm1, cache_params=cache_params1, **kwargs)[0] else: - xydict1 = c_heatmap.heatmap(reader.cReader, mode, plot_type, - cache_size=cache_size, - time_interval=time_interval, - algorithm=algorithm1, - cache_params=cache_params1, - num_of_threads=num_of_threads) + xydict1 = mimircache.c_heatmap.heatmap(self.reader.cReader, time_mode, plot_type, + cache_size=cache_size, + time_interval=time_interval, + algorithm=algorithm1, + cache_params=cache_params1, + num_of_threads=num_of_threads) if algorithm2.lower() not in c_available_cache: - xydict2 = hm.calculate_heatmap_dat(reader, mode, plot_type, + xydict2 = hm.calculate_heatmap_dat(self.reader, time_mode, plot_type, time_interval=time_interval, cache_size=cache_size, algorithm=algorithm2, cache_params=cache_params2, **kwargs)[0] else: - xydict2 = c_heatmap.heatmap(reader.cReader, mode, plot_type, - time_interval=time_interval, - cache_size=cache_size, - algorithm=algorithm2, - cache_params=cache_params2, - num_of_threads=num_of_threads) + xydict2 = mimircache.c_heatmap.heatmap(self.reader.cReader, time_mode, plot_type, + time_interval=time_interval, + cache_size=cache_size, + algorithm=algorithm2, + cache_params=cache_params2, + num_of_threads=num_of_threads) cHm = cHeatmap() text = " differential heatmap\n cache size: {},\n cache type: ({}-{})/{},\n" \ " time type: {},\n time interval: {},\n plot type: \n{}".format( - cache_size, algorithm2, algorithm1, algorithm1, mode, time_interval, plot_type) + cache_size, algorithm2, algorithm1, algorithm1, time_mode, time_interval, plot_type) x1, y1 = xydict1.shape x1 = int(x1 / 2.8) y1 /= 8 - if mode == 'r': - time_mode_string = "real" - elif mode == "v": - time_mode_string = "virtual" + if time_mode == 'r': + time_mode_string = "Real" + elif time_mode == "v": + time_mode_string = "Virtual" else: - raise RuntimeError("unknown time mode {}".format(mode)) + raise RuntimeError("unknown time time_mode {}".format(time_mode)) - cHm.setPlotParams('x', '{}_time'.format(time_mode_string), xydict=xydict1, - label='start time ({})'.format(time_mode_string), + cHm.setPlotParams('x', '{} Time'.format(time_mode_string), xydict=xydict1, + label='Start Time ({})'.format(time_mode_string), text=(x1, y1, text)) - cHm.setPlotParams('y', '{}_time'.format(time_mode_string), xydict=xydict1, - label='end time ({})'.format(time_mode_string), + cHm.setPlotParams('y', '{} Time'.format(time_mode_string), xydict=xydict1, + label='End Time ({})'.format(time_mode_string), fixed_range=(-1, 1)) np.seterr(divide='ignore', invalid='ignore') plot_dict = (xydict2 - xydict1) / xydict1 cHm.draw_heatmap(plot_dict, figname=figname) - def profiler(self, algorithm, cache_params=None, cache_size=-1, bin_size=-1, - use_general_profiler=False, **kwargs): - """ - profiler - :param cache_size: - :param cache_params: - :param algorithm: - :param use_general_profiler: for LRU only, if it is true, then return a cGeneralProfiler for LRU, - otherwise, return a LRUProfiler for LRU - Note: LRUProfiler does not require cache_size/bin_size params, - it does not sample thus provides a smooth curve, however, it is O(logN) at each step, - in constrast, cGeneralProfiler samples the curve, but use O(1) at each step - :param kwargs: - :return: - """ - reader, num_of_threads = self._profiler_pre_check(**kwargs) - profiler = None + def twoDPlot(self, plot_type, **kwargs): + """ + an aggregate function for all two dimenional plots printing except hit ratio curve - if algorithm.lower() == "lru" and not use_general_profiler: - profiler = LRUProfiler(reader, cache_size, cache_params) - else: - assert cache_size != -1, "you didn't provide size for cache" - assert cache_size <= self.num_of_req(), "you cannot specify cache size({}) " \ - "larger than trace length({})".format(cache_size, - self.num_of_req()) - if isinstance(algorithm, str): - if algorithm.lower() in c_available_cache: - profiler = cGeneralProfiler(reader, cache_alg_mapping[algorithm.lower()], - cache_size, bin_size, - cache_params, num_of_threads) - else: - profiler = generalProfiler(reader, self.cacheclass_mapping[algorithm.lower()], - cache_size, bin_size, - cache_params, num_of_threads) - else: - profiler = generalProfiler(reader, algorithm, cache_size, bin_size, - cache_params, num_of_threads) - return profiler + ======================== ============================ ================================================= + plot type required parameters Description + ======================== ============================ ================================================= + cold_miss_count time_mode, time_interval cold miss count VS time + cold_miss_ratio time_mode, time_interval coid miss ratio VS time + request_rate time_mode, time_interval num of requests VS time + popularity NA Percentage of obj VS frequency + rd_popularity NA Num of req VS reuse distance + rt_popularity NA Num of req VS reuse time + mapping NA mapping from original objID to sequential number + interval_hit_ratio cache_size hit ratio of interval VS time + ======================== ============================ ================================================= - def twoDPlot(self, plot_type, **kwargs): - """ - two dimensional plots - :param plot_type: - :param kwargs: - :return: + + :param plot_type: type of the plot, see above + :param kwargs: paramters related to plots, see twoDPlots module for detailed control over plots """ + kwargs["figname"] = kwargs.get("figname", "{}.png".format(plot_type)) if plot_type == 'cold_miss' or plot_type == "cold_miss_count": @@ -453,19 +545,14 @@ def twoDPlot(self, plot_type, **kwargs): else: WARNING("currently don't support your specified plot_type: " + str(plot_type)) + def evictionPlot(self, mode, time_interval, plot_type, algorithm, cache_size, cache_params=None, **kwargs): """ - plot eviction stat vs time, currently support - reuse_dist, freq, accumulative_freq - :param mode: - :param time_interval: - :param plot_type: - :param algorithm: - :param cache_size: - :param cache_params: - :param kwargs: - :return: + plot eviction stat vs time, currently support reuse_dist, freq, accumulative_freq + + This function is going to be deprecated """ + if plot_type == "reuse_dist": eviction_stat_reuse_dist_plot(self.reader, algorithm, cache_size, mode, time_interval, cache_params=cache_params, **kwargs) @@ -481,20 +568,28 @@ def evictionPlot(self, mode, time_interval, plot_type, algorithm, cache_size, ca plot_type, "reuse_dist, freq, accumulative_freq" )) + def plotHRCs(self, algorithm_list, cache_params=(), cache_size=-1, bin_size=-1, - auto_size=True, figname="HRC.png", **kwargs): + auto_resize=True, figname="HRC.png", **kwargs): """ + this function provides hit ratio curve plotting - :param algorithm_list: - :param cache_params: - :param cache_size: - :param bin_size: - :param auto_size: - :param figname: - :param kwargs: block_unit_size, num_of_threads, label, autosize_threshold, xlimit, ylimit, cache_unit_size + :param algorithm_list: a list of algorithm(s) + :param cache_params: the corresponding cache params for the algorithms, + use None for algorithms that don't require cache params, + if none of the alg requires cache params, you don't need to set this + :param cache_size: maximal size of cache, use -1 for max possible size + :param bin_size: bin size for non-LRU profiling + :param auto_resize: when using max possible size or specified cache size too large, + you will get a huge plateau at the end of hit ratio curve, + set auto_resize to True to cutoff most of the big plateau + :param kwargs: block_unit_size, num_of_threads, + auto_resize_threshold, xlimit, ylimit, cache_unit_size - :return: + save_gradually - save a figure everytime computation for one algorithm finishes, + + label - instead of using algorithm list as label, specified user-defined label, """ plot_dict = prepPlotParams("Hit Ratio Curve", "Cache Size (Items)", "Hit Ratio", figname, **kwargs) @@ -504,13 +599,13 @@ def plotHRCs(self, algorithm_list, cache_params=(), cache_unit_size = kwargs.get("cache_unit_size", 0) use_general_profiler = kwargs.get("use_general_profiler", False) save_gradually = kwargs.get("save_gradually", False) - threshold = kwargs.get('autosize_threshold', 0.98) + threshold = kwargs.get('auto_resize_threshold', 0.98) label = kwargs.get("label", algorithm_list) profiling_with_size = False LRU_HR = None - if cache_size == -1 and auto_size: + if cache_size == -1 and auto_resize: LRU_HR = LRUProfiler(self.reader).plotHRC(auto_resize=True, threshold=threshold, no_save=True) cache_size = len(LRU_HR) else: @@ -609,77 +704,20 @@ def plotHRCs(self, algorithm_list, cache_params=(), plt.clf() return hit_ratio_dict - def plotMRCs(self, algorithm_list, cache_params=None, cache_size=-1, bin_size=-1, auto_size=True, **kwargs): - """ - plot MRCs, not updated, might be deprecated - :param algorithm_list: - :param cache_params: - :param cache_size: - :param bin_size: - :param auto_size: - :param kwargs: - :return: - """ - raise RuntimeWarning("deprecated") - plot_dict = prepPlotParams("Miss Ratio Curve", "Cache Size(item)", "Miss Ratio", "MRC.png", **kwargs) - num_of_threads = 4 - if 'num_of_threads' in kwargs: - num_of_threads = kwargs['num_of_threads'] - if 'label' not in kwargs: - label = algorithm_list - else: - label = kwargs['label'] - - threshold = 0.98 - if 'autosize_threshold' in kwargs: - threshold = kwargs['autosize_threshold'] - ymin = 1 - - if auto_size: - cache_size = LRUProfiler(self.reader).plotMRC(auto_resize=True, threshold=threshold, no_save=True) - else: - assert cache_size < self.num_of_req(), "you cannot specify cache size larger than trace length" - - if bin_size == -1: - bin_size = cache_size // DEFAULT_BIN_NUM_PROFILER + 1 - for i in range(len(algorithm_list)): - alg = algorithm_list[i] - if cache_params and i < len(cache_params): - cache_param = cache_params[i] - else: - cache_param = None - profiler = self.profiler(alg, cache_param, cache_size, - bin_size=bin_size, num_of_threads=num_of_threads) - mr = profiler.get_miss_rate() - ymin = min(ymin, max(min(mr) - 0.02, 0)) - self.reader.reset() - # plt.xlim(0, cache_size) - if alg != "LRU": - plt.plot([i * bin_size for i in range(len(mr))], mr, label=label[i]) - else: - plt.plot(mr[:-2], label=label[i]) + def characterize(self, type, cache_size=-1): + """ + use this function to obtain a series of plots about your trace, the type includes - print("ymin = {}".format(ymin)) - if "ymin" in kwargs: - ymin = kwargs['ymin'] + * short - short run time, fewer plots with less accuracy + * medium + * long + * all - most of the available plots with high accuracy, notice it can take **LONG** time on big trace - plt.ylim(ymin=ymin) - plt.semilogy() - plt.legend(loc="best") - plt.xlabel(plot_dict['xlabel']) - plt.ylabel(plot_dict['ylabel']) - plt.title(plot_dict['title'], fontsize=18, color='black') - if not 'no_save' in kwargs or not kwargs['no_save']: - plt.savefig(plot_dict['figname'], dpi=600) - INFO("plot is saved at the same directory") - try: - plt.show() - except: - pass - plt.clf() + :param type: see above, options: short, medium, long, all + :param cache_size: estimated cache size for the trace, if -1, mimircache will estimate the cache size + """ - def characterize(self, type, cache_size=-1): # TODO: jason: allow one single function call to obtain the most useful information # and would be better to give time estimation while running @@ -708,7 +746,6 @@ def characterize(self, type, cache_size=-1): use_general_profiler=True, save_gradually=True) elif type == "medium": - # medium should support [ if trace_stat.time_span != 0: INFO("now begin to plot request rate curve") self.twoDPlot("request_rate", mode="r", time_interval=trace_stat.time_span//100) @@ -795,10 +832,6 @@ def characterize(self, type, cache_size=-1): cache_size=cache_size) - def stat(self): - assert self.reader, "you haven't provided a data file" - return traceStat(self.reader).get_stat() - def __len__(self): assert self.reader, "you haven't provided a data file" return len(self.reader) @@ -807,20 +840,8 @@ def __iter__(self): assert self.reader, "you haven't provided a data file" return self.reader - def next(self): - return self.__next__() - def __next__(self): # Python 3 return self.reader.next() def __del__(self): self.close() - - def close(self): - """ - close the reader opened in cachecow, and clean up in the future - :return: - """ - if self.reader is not None: - self.reader.close() - self.reader = None diff --git a/test/test_cHeatmap.py b/test/test_cHeatmap.py index ca5820c..ed96473 100644 --- a/test/test_cHeatmap.py +++ b/test/test_cHeatmap.py @@ -26,11 +26,11 @@ class cHeatmapTest(unittest.TestCase): def test1_vReader(self): reader = vscsiReader("{}/trace.vscsi".format(DAT_FOLDER)) cH = cHeatmap() - bpr = cH.getBreakpoints(reader, 'r', time_interval=1000000) + bpr = cH.get_breakpoints(reader, 'r', time_interval=1000000) self.assertEqual(bpr[10], 53) - bpr = cH.getBreakpoints(reader, 'r', num_of_pixels=1000) + bpr = cH.get_breakpoints(reader, 'r', num_of_pixels=1000) # print(bpr) - bpv = cH.getBreakpoints(reader, 'v', time_interval=1000) + bpv = cH.get_breakpoints(reader, 'v', time_interval=1000) self.assertEqual(bpv[10], 10000) cH.heatmap(reader, 'r', "hit_ratio_start_time_end_time", @@ -51,7 +51,7 @@ def test1_vReader(self): def test2_pReader(self): reader = plainReader("{}/trace.txt".format(DAT_FOLDER)) cH = cHeatmap() - bpv = cH.getBreakpoints(reader, 'v', time_interval=1000) + bpv = cH.get_breakpoints(reader, 'v', time_interval=1000) self.assertEqual(bpv[10], 10000) cH.heatmap(reader, 'v', "hit_ratio_start_time_end_time", @@ -74,7 +74,7 @@ def test3_cReader_v(self): reader = csvReader("{}/trace.csv".format(DAT_FOLDER), init_params={"header":True, "label":5}) cH = cHeatmap() - bpv = cH.getBreakpoints(reader, 'v', time_interval=1000) + bpv = cH.get_breakpoints(reader, 'v', time_interval=1000) self.assertEqual(bpv[10], 10000) cH.heatmap(reader, 'v', "hit_ratio_start_time_end_time", @@ -99,7 +99,7 @@ def test4_cReader_r(self): reader = csvReader("{}/trace.csv".format(DAT_FOLDER), init_params={"header":True, "label":5, 'real_time':2}) cH = cHeatmap() - bpr = cH.getBreakpoints(reader, 'r', time_interval=1000000) + bpr = cH.get_breakpoints(reader, 'r', time_interval=1000000) self.assertEqual(bpr[10], 53) cH.heatmap(reader, 'r', "hit_ratio_start_time_end_time", @@ -126,9 +126,9 @@ def test5_bReader(self): init_params={"label":6, "real_time":7, "fmt": "<3I2H2Q"}) cH = cHeatmap() - bpr = cH.getBreakpoints(reader, 'r', time_interval=1000000) + bpr = cH.get_breakpoints(reader, 'r', time_interval=1000000) self.assertEqual(bpr[10], 53) - bpv = cH.getBreakpoints(reader, 'v', time_interval=1000) + bpv = cH.get_breakpoints(reader, 'v', time_interval=1000) self.assertEqual(bpv[10], 10000) cH.heatmap(reader, 'r', "hit_ratio_start_time_end_time", diff --git a/test/test_cachecow.py b/test/test_cachecow.py index 3f2b189..201ca16 100644 --- a/test/test_cachecow.py +++ b/test/test_cachecow.py @@ -40,11 +40,11 @@ def test1(self): num_of_pixels=100, num_of_threads=8, cache_size=2000) c.heatmap('v', "rd_distribution", time_interval=1000, num_of_threads=8) - c.diffHeatmap(TIME_MODE, "hit_ratio_start_time_end_time", - time_interval=TIME_INTERVAL, - cache_size=CACHE_SIZE, - algorithm1="LRU", algorithm2="MRU", - cache_params2=None, num_of_threads=8) + c.diff_heatmap(TIME_MODE, "hit_ratio_start_time_end_time", + time_interval=TIME_INTERVAL, + cache_size=CACHE_SIZE, + algorithm1="LRU", algorithm2="MRU", + cache_params2=None, num_of_threads=8) c.twoDPlot("cold_miss_count", mode='v', time_interval=1000) c.twoDPlot("request_rate", mode='v', time_interval=1000)