forked from polytech-nantes-puddi/tp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
makefile
59 lines (47 loc) · 1.32 KB
/
makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
all:
init:
sudo apt update
sudo apt install openjdk-11-jdk
sudo apt install python3 python3-venv
python3 -m venv spark-env
@echo "Run :"
@echo " source spark-env/bin/activate"
@echo " pip install --upgrade pip"
@echo " pip install pyspark"
activate:
@echo "Run :"
@echo " conda activate nosql"
################
# Part 1
################
part1: part1-download part1-unzip
part1-download:
@for file in $(shell cat files.txt | head -10); do \
echo $${file} ;\
if [ -f dataset/zip//$${file} ]; then \
echo "Nothing to do" ;\
else \
wget http://data.gdeltproject.org/events/$${file} -P dataset/zip/;\
fi;\
done
part1-unzip:
@for file in $(shell ls dataset/zip/); do \
echo $${file} ;\
unzip -n dataset/zip//$${file} -d dataset/raw ;\
done
deactivate:
@echo "Run :"
@echo " deactivate"
clean: deactivate
rm -rf dataset/parquet
rm -rf dataset/raw
rm -rf dataset/zip
rm -rf output/
mr-proper: clean bonus-clean
rm -rf spark-env/
bonus: bonus-download
bonus-download:
mkdir -p dataset/bonus
wget "https://data.nantesmetropole.fr/api/explore/v2.1/shared/datasets/224400028_suivi-qualite-eaux-superficielles-loire-atlantique-stations-departemen@loireatlantique/exports/csv?lang=fr&timezone=Europe%2FBerlin&use_labels=true&delimiter=%3B" -O dataset/bonus/data.csv
bonus-clean:
rm -rf dataset/bonus