Skip to content

Commit

Permalink
refactor: change spark testing (add spark_session as pytest fixture)
Browse files Browse the repository at this point in the history
  • Loading branch information
vorel99 committed Sep 17, 2023
1 parent 1f64447 commit d6e6a2e
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 25 deletions.
1 change: 0 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ jobs:
run: echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
- run: echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
- run: make install
- run: make install-spark-ci
- run: pip install -r requirements-spark.txt # Make sure the proper version of pandas is install after everything
- run: make test_spark

9 changes: 1 addition & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ test:
ydata_profiling -h

test_spark:
pytest --spark_home=${SPARK_HOME} tests/backends/spark_backend/
pytest tests/backends/spark_backend/
ydata_profiling -h

test_cov:
Expand Down Expand Up @@ -52,13 +52,6 @@ install_dev_spark:
install:
pip install -e .[notebook]

install-spark-ci:
sudo apt-get update
sudo apt-get -y install openjdk-8-jdk
curl https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
--output ${SPARK_DIRECTORY}/spark.tgz
cd ${SPARK_DIRECTORY} && tar -xvzf spark.tgz && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark

# compile requirements files
compile:
$(PYTHON) -m piptools compile -o requirements.txt pyproject.toml
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ dev = [
"pre-commit>=2.8.2",
"pytest",
"pytest-cov",
"pytest-spark",
"pyarrow",
"twine>=3.1.1",
"wheel",
Expand Down
5 changes: 0 additions & 5 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ fastjsonschema==2.18.0
# via nbformat
filelock==3.12.4
# via virtualenv
findspark==2.0.1
# via pytest-spark
fonttools==4.42.1
# via matplotlib
htmlmin==0.1.12
Expand Down Expand Up @@ -276,12 +274,9 @@ pytest==7.4.2
# via
# nbval
# pytest-cov
# pytest-spark
# ydata-profiling (pyproject.toml)
pytest-cov==4.1.0
# via ydata-profiling (pyproject.toml)
pytest-spark==0.6.0
# via ydata-profiling (pyproject.toml)
python-dateutil==2.8.2
# via
# jupyter-client
Expand Down
5 changes: 0 additions & 5 deletions requirements-spark.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,6 @@ fastjsonschema==2.18.0
# via nbformat
filelock==3.12.4
# via virtualenv
findspark==2.0.1
# via pytest-spark
fonttools==4.42.1
# via matplotlib
htmlmin==0.1.12
Expand Down Expand Up @@ -255,12 +253,9 @@ pytest==7.4.2
# via
# nbval
# pytest-cov
# pytest-spark
# ydata-profiling (pyproject.toml)
pytest-cov==4.1.0
# via ydata-profiling (pyproject.toml)
pytest-spark==0.6.0
# via ydata-profiling (pyproject.toml)
python-dateutil==2.8.2
# via
# jupyter-client
Expand Down
5 changes: 0 additions & 5 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,6 @@ fastjsonschema==2.18.0
# via nbformat
filelock==3.12.4
# via virtualenv
findspark==2.0.1
# via pytest-spark
fonttools==4.42.1
# via matplotlib
htmlmin==0.1.12
Expand Down Expand Up @@ -251,12 +249,9 @@ pytest==7.4.2
# via
# nbval
# pytest-cov
# pytest-spark
# ydata-profiling (pyproject.toml)
pytest-cov==4.1.0
# via ydata-profiling (pyproject.toml)
pytest-spark==0.6.0
# via ydata-profiling (pyproject.toml)
python-dateutil==2.8.2
# via
# jupyter-client
Expand Down
17 changes: 17 additions & 0 deletions tests/backends/spark_backend/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pytest
from pyspark.sql import SparkSession


@pytest.fixture(scope="session")
def spark_session():
spark = (
SparkSession.builder.master("local[1]")
.appName("local-tests")
.config("spark.executor.cores", "1")
.config("spark.executor.instances", "1")
.config("spark.sql.shuffle.partitions", "1")
.config("spark.driver.bindAddress", "127.0.0.1")
.getOrCreate()
)
yield spark
spark.stop()

0 comments on commit d6e6a2e

Please sign in to comment.