-
Notifications
You must be signed in to change notification settings - Fork 168
/
Copy pathinstall_pyspark_conda_create_notebook.sh
62 lines (44 loc) · 1.41 KB
/
install_pyspark_conda_create_notebook.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/bash
#This script allows you to install pyspark conda environment to work with data flow in data science notebook session. It creates a ipynb file that imports ads and loads the SparkMagic extension
# enable conda commands
source /etc/profile.d/enableconda.sh
ENV_NAME=pyspark32_p38_cpu_v3
# Path to the conda environment folder
ENV_FOLDER="$HOME/conda"
# Check if the conda environment exists
if [ -d "$ENV_FOLDER/$ENV_NAME" ]
then
echo "Conda environment '$ENV_NAME' found."
else
echo "Conda environment '$ENV_NAME' not found, installing..."
odsc conda install -s "$ENV_NAME"
fi
# Activate the conda environment
conda activate "$ENV_FOLDER"/"$ENV_NAME"
echo "Conda environment '$ENV_NAME' is now activated."
cat << EOF > pyscript.py
import os
os.system("source activate conda/pyspark32_p38_cpu_v3")
import subprocess
import nbformat as nbf
from nbformat.v4 import new_code_cell, new_notebook, new_markdown_cell
cells = []
cells.append(new_markdown_cell(
source='import ADS and load the SparkMagic extension',
))
cells.append(new_code_cell(
source='import ads\nads.set_auth("resource_principal")\n%load_ext dataflow.magics',
execution_count=1,
))
nb0 = new_notebook(cells=cells,
metadata={
'language': 'python',
}
)
import codecs
f = codecs.open('dataflow_notebook.ipynb', encoding='utf-8', mode='w')
nbf.write(nb0, f, 4)
f.close()
EOF
chmod 755 pyscript.py
python pyscript.py