-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathshrynk_all.py
102 lines (86 loc) · 2.7 KB
/
shrynk_all.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import os
import sys
import random
import warnings
WEIGHTS = tuple([int(x) for x in sys.argv[-3:]])
warnings.filterwarnings("ignore", category=DeprecationWarning)
sys.stderr = open(os.devnull, "w") # silence stderr
import just
import shrynk
from shrynk.pandas import PandasCompressor
from shrynk.json import JsonCompressor
print()
print(
"===============================================================\n"
"[shrynk=={}] | User Defined Weights: size={}, write={} read={}".format(
shrynk.__version__, *WEIGHTS
)
+ "\n==============================================================="
)
print()
pdc = PandasCompressor("default")
jc = JsonCompressor("json_default")
print("Training RandomForest Model on packaged benchmark results...")
print()
pdc.train_model(*WEIGHTS)
jc.train_model(*WEIGHTS)
sys.stderr = sys.__stderr__ # unsilence stderr
from colorama import Fore
def human_readable_size(size, decimal_places=3):
for unit in ['b', 'kb', 'mb', 'gb', 'tb']:
if size < 1000.0:
break
size /= 1000.0
return f"{size:.{decimal_places}f}{unit}"
dir_path = "/home/pascal/shrynk"
print("Processing:", dir_path)
old_total = 0
new_total = 0
if not dir_path.endswith("/"):
dir_path += "/"
fnames = just.glob(dir_path + "*.json") + just.glob(dir_path + "*.csv")
random.shuffle(fnames)
for x in fnames:
print(x)
if x.endswith(".json"):
shrynk = jc
tp = "JSON"
elif x.endswith(".csv"):
shrynk = pdc
tp = "CSV"
old_size = os.path.getsize(x)
old_total += old_size
data = shrynk.load(x)
new_file = shrynk.save(data, x.replace("csv_", "").replace(".csv", "").replace(".json", ""))
comp = new_file.split(".")[-1]
if comp == "None":
new_size = old_size
else:
new_size = os.path.getsize(new_file)
new_total += new_size
just.remove(new_file)
improvement = 100 - int(new_size / old_size * 100)
if improvement > 50:
improvement = Fore.GREEN + "{}%".format(improvement) + Fore.RESET
elif improvement > 20:
improvement = Fore.YELLOW + "{}%".format(improvement) + Fore.RESET
else:
improvement = Fore.RED + "{}%".format(improvement) + Fore.RESET
improvement = improvement.rjust(13)
print(
"•",
tp.rjust(4),
"->",
comp.rjust(5),
"{} (deflated {})".format(human_readable_size(new_size, 0), improvement).rjust(34),
new_file.replace(".shrynk", ""),
)
print(
"=" * 80
+ "\n"
+ "total shrynkage: {}% (from {} to {})".format(
Fore.GREEN + "{}".format(100 - int(new_total / old_total * 100)) + Fore.RESET,
human_readable_size(old_total, 0),
human_readable_size(new_total, 0),
)
)