adding seperate functions folder

PySparky · Sep 20, 2024 · 11ab21a · 11ab21a
1 parent 39d8b63
commit 11ab21a
Show file tree

Hide file tree

Showing 6 changed files with 30 additions and 9 deletions.
diff --git a/dev.ipynb b/dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -16,11 +16,11 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "24/09/17 17:02:37 WARN Utils: Your hostname, codespaces-47d427 resolves to a loopback address: 127.0.0.1; using 10.0.2.223 instead (on interface eth0)\n",
-      "24/09/17 17:02:37 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n",
+      "24/09/20 09:32:52 WARN Utils: Your hostname, codespaces-2697a2 resolves to a loopback address: 127.0.0.1; using 10.0.1.75 instead (on interface eth0)\n",
+      "24/09/20 09:32:52 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n",
       "Setting default log level to \"WARN\".\n",
       "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
-      "24/09/17 17:02:38 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
+      "24/09/20 09:32:53 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
      ]
     }
    ],
@@ -29,7 +29,7 @@
     "from pyspark.sql import SparkSession\n",
     "from pyspark.sql import functions as F\n",
     "\n",
-    "from pysparky import functions_ext as F_\n",
+    "# from pysparky import functions_ext as F_\n",
     "from pysparky import spark_ext as se\n",
     "from pysparky import transformation_ext as te\n",
     "from pysparky import decorator\n",
@@ -41,6 +41,13 @@
     "\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": 3,

diff --git a/pysparky/functions/__init__.py b/pysparky/functions/__init__.py
@@ -0,0 +1,2 @@
+from .general import *
+from .math_ import *
diff --git a/pysparky/functions_ext.py → pysparky/functions/general.py b/pysparky/functions_ext.py → pysparky/functions/general.py
diff --git a/pysparky/functions_math.py → pysparky/functions/math_.py b/pysparky/functions_math.py → pysparky/functions/math_.py
diff --git a/tests/test_functions_ext.py b/tests/test_functions_ext.py
@@ -1,8 +1,8 @@
 import pytest
 from pyspark.sql import functions as F
 
-import pysparky.functions_ext as F_
-from pysparky.functions_ext import (chain, lower_, replace_strings_to_none,
+import pysparky.functions as F_
+from pysparky.functions.general import (chain, lower_, replace_strings_to_none,
                                     single_space_and_trim, startswiths)
 from pysparky.spark_ext import column_function
 

diff --git a/tests/test_functions_math.py b/tests/test_functions_math.py
@@ -1,7 +1,8 @@
 import pytest
 from pyspark.sql import functions as F
 
-from pysparky import functions_math
+from pysparky.functions.math_ import haversine_distance
+from pysparky import functions as F_
 
 
 def test_haversine_distance(spark):
@@ -39,14 +40,25 @@ def haversine(lat1, lon1, lat2, lon2):
     target_value = (
         spark.range(1)
         .select(
-            functions_math.haversine_distance(
+            haversine_distance(
                 F.lit(52.1552), F.lit(5.3876), F.lit(59.9111), F.lit(10.7503)
             )
         )
         .collect()[0][0]
     )
     assert distance_km_round4 == target_value
 
+    target_value2 = (
+        spark.range(1)
+        .select(
+            F_.haversine_distance(
+                F.lit(52.1552), F.lit(5.3876), F.lit(59.9111), F.lit(10.7503)
+            )
+        )
+        .collect()[0][0]
+    )
+    assert distance_km_round4 == target_value2
+
 
 if __name__ == "__main__":
     pytest.main([__file__])