diff --git a/dev.ipynb b/dev.ipynb index 7ecdd17..d0158ba 100644 --- a/dev.ipynb +++ b/dev.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -16,11 +16,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "24/09/17 17:02:37 WARN Utils: Your hostname, codespaces-47d427 resolves to a loopback address: 127.0.0.1; using 10.0.2.223 instead (on interface eth0)\n", - "24/09/17 17:02:37 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n", + "24/09/20 09:32:52 WARN Utils: Your hostname, codespaces-2697a2 resolves to a loopback address: 127.0.0.1; using 10.0.1.75 instead (on interface eth0)\n", + "24/09/20 09:32:52 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n", "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", - "24/09/17 17:02:38 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" + "24/09/20 09:32:53 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" ] } ], @@ -29,7 +29,7 @@ "from pyspark.sql import SparkSession\n", "from pyspark.sql import functions as F\n", "\n", - "from pysparky import functions_ext as F_\n", + "# from pysparky import functions_ext as F_\n", "from pysparky import spark_ext as se\n", "from pysparky import transformation_ext as te\n", "from pysparky import decorator\n", @@ -41,6 +41,13 @@ "\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 3, diff --git a/pysparky/functions/__init__.py b/pysparky/functions/__init__.py new file mode 100644 index 0000000..fcc2407 --- /dev/null +++ b/pysparky/functions/__init__.py @@ -0,0 +1,2 @@ +from .general import * +from .math_ import * \ No newline at end of file diff --git a/pysparky/functions_ext.py b/pysparky/functions/general.py similarity index 100% rename from pysparky/functions_ext.py rename to pysparky/functions/general.py diff --git a/pysparky/functions_math.py b/pysparky/functions/math_.py similarity index 100% rename from pysparky/functions_math.py rename to pysparky/functions/math_.py diff --git a/tests/test_functions_ext.py b/tests/test_functions_ext.py index 5ba5b83..67f5f46 100644 --- a/tests/test_functions_ext.py +++ b/tests/test_functions_ext.py @@ -1,8 +1,8 @@ import pytest from pyspark.sql import functions as F -import pysparky.functions_ext as F_ -from pysparky.functions_ext import (chain, lower_, replace_strings_to_none, +import pysparky.functions as F_ +from pysparky.functions.general import (chain, lower_, replace_strings_to_none, single_space_and_trim, startswiths) from pysparky.spark_ext import column_function diff --git a/tests/test_functions_math.py b/tests/test_functions_math.py index 02aeecf..1ea6f13 100644 --- a/tests/test_functions_math.py +++ b/tests/test_functions_math.py @@ -1,7 +1,8 @@ import pytest from pyspark.sql import functions as F -from pysparky import functions_math +from pysparky.functions.math_ import haversine_distance +from pysparky import functions as F_ def test_haversine_distance(spark): @@ -39,7 +40,7 @@ def haversine(lat1, lon1, lat2, lon2): target_value = ( spark.range(1) .select( - functions_math.haversine_distance( + haversine_distance( F.lit(52.1552), F.lit(5.3876), F.lit(59.9111), F.lit(10.7503) ) ) @@ -47,6 +48,17 @@ def haversine(lat1, lon1, lat2, lon2): ) assert distance_km_round4 == target_value + target_value2 = ( + spark.range(1) + .select( + F_.haversine_distance( + F.lit(52.1552), F.lit(5.3876), F.lit(59.9111), F.lit(10.7503) + ) + ) + .collect()[0][0] + ) + assert distance_km_round4 == target_value2 + if __name__ == "__main__": pytest.main([__file__])