diff --git a/CHANGELOG.md b/CHANGELOG.md
index cae5edf38e..a0ae7ed486 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,14 @@ Recommendation: for ease of reading, use the following order:
- Fixed
-->
+## [Unreleased]
+### Changed
+- Flight SQL protocol now fully support authentication (anonymous and bearer token)
+- The `kamu notebook` command now defaults to `DataFusion` engine for speed, but you can switch to Spark with `--engine spark` argument
+- The `kamu notebook` command uses new image based on latest Jupyter and new [`kamu-client-python`](https://github.com/kamu-data/kamu-client-python) library
+- The `kamu sql server` command interface changed to use `--engine datafusion/spark`, removing the `--flight-sql` flag
+- Examples in `example/flightsql/python` were updated to new auth and showcasing `kamu` Python library
+
## [0.215.1] - 2024-12-30
### Fixed
- GraphQL: in a multi-tenant workspace, `datasets.createEmpty` and `datasets.createFromSnapshot` mutations now return dataset aliases prefixed with account name.
diff --git a/Cargo.lock b/Cargo.lock
index 8ddda3044b..6aeb4eb468 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5495,21 +5495,30 @@ version = "0.215.1"
dependencies = [
"arrow-flight",
"async-trait",
+ "base32",
"base64 0.22.1",
+ "bytes",
"chrono",
+ "database-common",
"datafusion",
"dill",
"futures",
+ "http 1.2.0",
+ "http-body 1.0.1",
"indoc 2.0.5",
+ "kamu-accounts",
"kamu-core",
"kamu-data-utils",
"like",
+ "mockall",
"prost",
+ "rand",
"test-log",
"time-source",
"tokio",
"tokio-stream",
"tonic",
+ "tower 0.5.2",
"tracing",
"tracing-subscriber",
"uuid",
diff --git a/examples/archive/commercial-fishing/.kamuconfig b/examples/archive/commercial-fishing/.kamuconfig
new file mode 100644
index 0000000000..40ca8eb190
--- /dev/null
+++ b/examples/archive/commercial-fishing/.kamuconfig
@@ -0,0 +1,14 @@
+kind: CLIConfig
+version: 1
+content:
+ users:
+ predefined:
+ - accountName: kamu
+ isAdmin: true
+ avatarUrl: https://avatars.githubusercontent.com/u/50896974?s=200&v=4
+ - accountName: acme.fishing.co
+ accountType: Organization
+ avatarUrl: https://cdn-icons-png.flaticon.com/512/1090/1090630.png
+ - accountName: globalfishingwatch.org
+ accountType: Organization
+ avatarUrl: https://cdn-icons-png.flaticon.com/512/744/744480.png
diff --git a/examples/archive/commercial-fishing/init.sh b/examples/archive/commercial-fishing/init.sh
index 341478c909..015eb505d6 100755
--- a/examples/archive/commercial-fishing/init.sh
+++ b/examples/archive/commercial-fishing/init.sh
@@ -4,6 +4,7 @@ set -e
KAMU_NODE_URL="odf+https://node.demo.kamu.dev/"
kamu init --multi-tenant --exists-ok
+cp -f .kamuconfig .kamu/
kamu --account acme.fishing.co pull "${KAMU_NODE_URL}acme.fishing.co/vessels.gps"
kamu --account acme.fishing.co pull "${KAMU_NODE_URL}acme.fishing.co/vessels.trawl"
diff --git a/examples/archive/commercial-fishing/analysis.ipynb b/examples/archive/commercial-fishing/notebook.ipynb
similarity index 83%
rename from examples/archive/commercial-fishing/analysis.ipynb
rename to examples/archive/commercial-fishing/notebook.ipynb
index 8c215f565a..b7c1cd4a32 100644
--- a/examples/archive/commercial-fishing/analysis.ipynb
+++ b/examples/archive/commercial-fishing/notebook.ipynb
@@ -13,15 +13,14 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "f0d0f5b5",
+ "id": "d5947b7d-4cfd-4a8b-b12f-c1a76402438b",
"metadata": {},
"outputs": [],
"source": [
- "%import_dataset acme.fishing.co/vessels.gps --alias gps\n",
- "%import_dataset acme.fishing.co/vessels.trawl --alias trawl\n",
- "%import_dataset acme.fishing.co/vessels.fuel --alias fuel\n",
- "%import_dataset acme.fishing.co/vessels.location-annotated --alias loc\n",
- "%import_dataset globalfishingwatch.org/protected-areas --alias areas"
+ "import kamu\n",
+ "import kamu.utils\n",
+ "\n",
+ "con = kamu.connect(engine=\"spark\")"
]
},
{
@@ -31,14 +30,12 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"import os\n",
"import pandas as pd\n",
"import plotly.graph_objects as go\n",
"import plotly.express as px\n",
"from mapboxgl.viz import *\n",
"from mapboxgl.utils import *\n",
- "from utils.plotting import *\n",
"\n",
"# Must be a public token, starting with `pk`\n",
"token = os.getenv('MAPBOX_ACCESS_TOKEN')\n",
@@ -68,7 +65,7 @@
" longitude,\n",
" latitude,\n",
" is_trawling\n",
- "from loc"
+ "from `acme.fishing.co/vessels.location-annotated`"
]
},
{
@@ -78,7 +75,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"fig = go.Figure()\n",
"\n",
"for vessel_name in gps['vessel_name'].unique():\n",
@@ -136,7 +132,7 @@
" date,\n",
" wdpa_pid,\n",
" gis_area\n",
- "from areas"
+ "from `globalfishingwatch.org/protected-areas`"
]
},
{
@@ -146,9 +142,8 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"viz = ChoroplethViz(\n",
- " df_to_geojson(areas),\n",
+ " kamu.utils.df_to_geojson(areas),\n",
" style=mapbox_style,\n",
" center=(2, 51),\n",
" zoom=5,\n",
@@ -186,7 +181,8 @@
" name,\n",
" gis_area,\n",
" geometry\n",
- "from areas where parent_iso in (\"NLD\", \"FRA\", \"DMK\", \"BEL\")"
+ "from `globalfishingwatch.org/protected-areas`\n",
+ "where parent_iso in (\"NLD\", \"FRA\", \"DMK\", \"BEL\")"
]
},
{
@@ -197,22 +193,27 @@
"outputs": [],
"source": [
"%%sql -o isect -q\n",
- "select\n",
- " gps.event_time,\n",
- " gps.vessel_name,\n",
- " gps.latitude,\n",
- " gps.longitude\n",
- "from (\n",
+ "with location_trawling as (\n",
" select\n",
" event_time, vessel_name, latitude, longitude, st_point(longitude, latitude) as geometry \n",
- " from loc where is_trawling = 1\n",
- ") gps,\n",
- "(\n",
+ " from `acme.fishing.co/vessels.location-annotated`\n",
+ " where is_trawling = 1\n",
+ "),\n",
+ "protected_areas as (\n",
" select\n",
" st_geomfromgeojson(geometry) as geometry\n",
- " from areas where parent_iso = \"NLD\"\n",
- ") areas\n",
- "where st_contains(areas.geometry, gps.geometry)"
+ " from `globalfishingwatch.org/protected-areas`\n",
+ " where parent_iso = \"NLD\"\n",
+ ")\n",
+ "select\n",
+ " loc.event_time,\n",
+ " loc.vessel_name,\n",
+ " loc.latitude,\n",
+ " loc.longitude\n",
+ "from\n",
+ " location_trawling as loc,\n",
+ " protected_areas as area\n",
+ "where st_contains(area.geometry, loc.geometry)"
]
},
{
@@ -222,8 +223,7 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
- "isect_areas_geojson = df_to_geojson(isect_areas)\n",
+ "isect_areas_geojson = kamu.utils.df_to_geojson(isect_areas)\n",
"\n",
"fig = go.Figure()\n",
"\n",
@@ -283,19 +283,21 @@
],
"metadata": {
"kernelspec": {
- "display_name": "PySpark",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "pysparkkernel"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
- "name": "python",
+ "name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
}
},
"nbformat": 4,
diff --git a/examples/archive/commercial-fishing/utils/plotting.py b/examples/archive/commercial-fishing/utils/plotting.py
deleted file mode 100644
index ef857fe314..0000000000
--- a/examples/archive/commercial-fishing/utils/plotting.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import json
-
-def to_plain(v):
- if type(v) in [int, float, str]:
- return v
- else:
- return str(v)
-
-# For every row we first combine GeoJson geometry with other columns into a Feature object
-# Then we combine all Features into a FeatureCollection
-def df_to_geojson(df, geom='geometry', props=None):
- if props is None:
- props = [
- c for c in df.columns
- if c != geom
- ]
-
- return {
- "type": "FeatureCollection",
- "features": [
- {
- "type": "Feature",
- "geometry": json.loads(row[geom]),
- "properties": {p: to_plain(row[p]) for p in props}
- }
- for _, row in df.iterrows()
- ]
- }
\ No newline at end of file
diff --git a/examples/archive/water-management/.kamuconfig b/examples/archive/water-management/.kamuconfig
new file mode 100644
index 0000000000..8b5a0dceca
--- /dev/null
+++ b/examples/archive/water-management/.kamuconfig
@@ -0,0 +1,12 @@
+kind: CLIConfig
+version: 1
+content:
+ users:
+ predefined:
+ - accountName: kamu
+ isAdmin: true
+ avatarUrl: https://avatars.githubusercontent.com/u/50896974?s=200&v=4
+ - accountName: rijkswaterstaat.nl
+ avatarUrl: https://www.shutterstock.com/image-vector/royal-exclusive-badge-logo-two-260nw-236025661.jpg
+ - accountName: deltares.nl
+ avatarUrl: https://avatars.githubusercontent.com/u/6613768?s=200&v=4
\ No newline at end of file
diff --git a/examples/archive/water-management/init.sh b/examples/archive/water-management/init.sh
index 366394ce0d..50c73b00ee 100755
--- a/examples/archive/water-management/init.sh
+++ b/examples/archive/water-management/init.sh
@@ -4,6 +4,7 @@ set -e
KAMU_NODE_URL="odf+https://node.demo.kamu.dev/"
kamu init --multi-tenant --exists-ok
+cp -f .kamuconfig .kamu/
kamu --account rijkswaterstaat.nl pull "${KAMU_NODE_URL}rijkswaterstaat.nl/stations"
kamu --account rijkswaterstaat.nl pull "${KAMU_NODE_URL}rijkswaterstaat.nl/measurements.boven-rijn"
diff --git a/examples/archive/water-management/analyze.ipynb b/examples/archive/water-management/notebook.ipynb
similarity index 83%
rename from examples/archive/water-management/analyze.ipynb
rename to examples/archive/water-management/notebook.ipynb
index fb5b4ad12f..71b28f10bd 100644
--- a/examples/archive/water-management/analyze.ipynb
+++ b/examples/archive/water-management/notebook.ipynb
@@ -3,35 +3,35 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "ab79739d",
+ "id": "4a15a1c9-7e81-4a17-8282-a32c06bdbaf4",
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
- "import os\n",
- "import numpy as np\n",
- "import xarray as xr\n",
- "import pandas as pd\n",
- "import geopandas as gpd\n",
- "import matplotlib.pyplot as plt\n",
- "import hvplot.pandas # noqa\n",
- "import hvplot.xarray # noqa\n",
- "from datetime import datetime\n",
- "from mapboxgl.utils import create_color_stops, create_numeric_stops, df_to_geojson\n",
- "from mapboxgl.viz import CircleViz"
+ "%load_ext kamu"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "d4774ffb",
+ "id": "1d60c661-237b-4850-bafe-705ac04bd7c9",
"metadata": {},
"outputs": [],
"source": [
- "%load_ext kamu\n",
+ "import kamu\n",
+ "\n",
+ "import os\n",
+ "import numpy as np\n",
+ "import xarray as xr\n",
+ "import pandas as pd\n",
+ "import geopandas as gpd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import hvplot.pandas\n",
+ "import hvplot.xarray\n",
+ "from datetime import datetime\n",
+ "from mapboxgl.utils import create_color_stops, create_numeric_stops, df_to_geojson\n",
+ "from mapboxgl.viz import CircleViz\n",
"\n",
- "%import_dataset rijkswaterstaat.nl/stations\n",
- "%import_dataset rijkswaterstaat.nl/measurements.boven-rijn"
+ "con = kamu.connect()"
]
},
{
@@ -42,7 +42,7 @@
"outputs": [],
"source": [
"%%sql\n",
- "select * from `rijkswaterstaat.nl/stations` limit 5"
+ "select * from 'rijkswaterstaat.nl/stations' limit 3"
]
},
{
@@ -53,7 +53,7 @@
"outputs": [],
"source": [
"%%sql\n",
- "select * from `rijkswaterstaat.nl/measurements.boven-rijn` limit 5"
+ "select * from 'rijkswaterstaat.nl/measurements.boven-rijn' limit 3"
]
},
{
@@ -65,16 +65,6 @@
"Here we show the dataset using a scatterplot. We select the 25th timestep, which corresponds to 2020-01-01 00:00 . We visualize the waterlevels for that timestep, using a scatterplot with 1 value per station."
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6c4470be",
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset deltares.nl/rhine-basin.netherlands"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -94,8 +84,8 @@
" waterlevel,\n",
" velocity,\n",
" discharge\n",
- "from `deltares.nl/rhine-basin.netherlands` m\n",
- "left join `rijkswaterstaat.nl/stations` s \n",
+ "from 'deltares.nl/rhine-basin.netherlands' as m\n",
+ "left join 'rijkswaterstaat.nl/stations' as s \n",
" on m.station_id = s.station_id"
]
},
@@ -106,7 +96,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"ds = df.set_index(['event_time', 'station_id']).to_xarray()\n",
"ds.plot.scatter(x='lon', y='lat', hue='waterlevel', edgecolors='none')"
]
@@ -118,7 +107,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"viz = CircleViz(\n",
" df_to_geojson(\n",
" df, properties=['station_id', 'station_name', 'waterlevel'], lat='lat', lon='lon', precision=3\n",
@@ -145,16 +133,6 @@
"# Simulating Predicted Water Levels"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f77e1cab",
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset deltares.nl/rhine-basin.netherlands.sim"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -171,8 +149,8 @@
" waterlevel,\n",
" velocity,\n",
" discharge,\n",
- " round((cast(sim_time as long) - cast(analysis_time as long))/3600) as lookahead\n",
- "from `deltares.nl/rhine-basin.netherlands.sim` m\n",
+ " round((cast(sim_time as bigint) - cast(analysis_time as bigint))/3600) as lookahead\n",
+ "from 'deltares.nl/rhine-basin.netherlands.sim' as m\n",
"where \n",
" station_id = 'BR_0863.00'\n",
" and waterlevel is not null\n",
@@ -186,7 +164,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"df2.hvplot.scatter(\n",
" x='sim_time', y=['waterlevel', 'velocity', 'discharge'], shared_axes=False, c='lookahead', \n",
" cmap='magma', s=2, height=300, width=800, subplots=True\n",
@@ -216,7 +193,7 @@
" analysis_time,\n",
" sim_time,\n",
" discharge\n",
- "from `deltares.nl/rhine-basin.netherlands.sim`\n",
+ "from 'deltares.nl/rhine-basin.netherlands.sim'\n",
"where station_id = 'BR_0863.00'\n",
"order by analysis_time, sim_time"
]
@@ -236,7 +213,7 @@
" sim_time,\n",
" waterlevel,\n",
" velocity\n",
- "from `deltares.nl/rhine-basin.netherlands.sim`\n",
+ "from 'deltares.nl/rhine-basin.netherlands.sim'\n",
"where station_id = 'WA_0913.00'\n",
"order by analysis_time, sim_time"
]
@@ -264,7 +241,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"lds = lobith.to_xarray()\n",
"tds = thiel.to_xarray()"
]
@@ -276,7 +252,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"fig, axes = plt.subplots(ncols=3, figsize=(13, 6))\n",
"axes[0].hist(lds.discharge.values.ravel())\n",
"axes[1].hist(tds.waterlevel.values.ravel())\n",
@@ -299,7 +274,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"fig, ax = plt.subplots(figsize=(13, 8))\n",
"ax.plot(lds.discharge.values.ravel(), tds.waterlevel.values.ravel(), 'k.', alpha=0.1)\n",
"ax.set_xlabel('Discharge @ Lobith [m3/s]')\n",
@@ -333,19 +307,21 @@
],
"metadata": {
"kernelspec": {
- "display_name": "PySpark",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "pysparkkernel"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
- "name": "python",
+ "name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
}
},
"nbformat": 4,
diff --git a/examples/covid/Covid Using Graphs in Jupyter Notebook.ipynb b/examples/covid/notebook.ipynb
similarity index 53%
rename from examples/covid/Covid Using Graphs in Jupyter Notebook.ipynb
rename to examples/covid/notebook.ipynb
index 8968bbfb12..7bbe254d26 100644
--- a/examples/covid/Covid Using Graphs in Jupyter Notebook.ipynb
+++ b/examples/covid/notebook.ipynb
@@ -74,165 +74,171 @@
},
{
"cell_type": "markdown",
- "id": "c2c61eee",
+ "id": "ff03afd2-8f27-4b4b-bf37-98e4b8aee997",
"metadata": {},
"source": [
- "## Load Kamu Extension\n",
+ "## Connect to Kamu\n",
+ "First we need to import `kamu` library and create a connection to the server. We will let the library to figure out where to find the server, but you can connect to other nodes by providing a URL.\n",
+ "\n",
"
\n",
- "Start by loading kamu
Jupyter extension in your terminal:\n",
+ "\n",
+ "Connect to `kamu` server.\n",
+ "\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "28c1c94a",
- "metadata": {
- "scrolled": true
- },
+ "id": "77ff78d6-d674-41ac-a762-b43fd22d428d",
+ "metadata": {},
"outputs": [],
"source": [
- "%load_ext kamu"
+ "import kamu\n",
+ "\n",
+ "con = kamu.connect()"
]
},
{
"cell_type": "markdown",
- "id": "b4c3ad82",
+ "id": "2c8f5a2f-74c9-4476-a91e-792f45e542b1",
"metadata": {},
"source": [
- "## Import and Test Data\n",
- "\n",
- "Now it is time to start importing your Covid data by province. First import the data from the province of BC by using the command %import dataset
. An alias was created to make it easier to call this file.\n",
- "
"
+ "You can already query data using the connection object."
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "f665cd74",
- "metadata": {
- "scrolled": true
- },
+ "id": "144502a9-831f-4072-9fa6-b9add039c5d1",
+ "metadata": {},
"outputs": [],
"source": [
- "%import_dataset covid19.british-columbia.case-details --alias cases_bc"
+ "con.query(\"select 1 as value\")"
]
},
{
"cell_type": "markdown",
- "id": "64b3449f",
+ "id": "c2c61eee",
"metadata": {},
"source": [
- "\n",
- "To test if the data was loaded correctly a SQL querry is run.\n",
- "
"
+ "## Load Kamu Extension\n",
+ "To avoid typying `con.query(\"...\")` all the time let's load kamu
Jupyter extension."
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "cb505b23",
+ "id": "28c1c94a",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
- "%%sql\n",
- "SELECT * FROM cases_bc\n",
- "ORDER BY id DESC\n",
- "LIMIT 10"
+ "%load_ext kamu"
]
},
{
"cell_type": "markdown",
- "id": "665855a7",
+ "id": "b4c3ad82",
"metadata": {},
"source": [
- "\n",
- "Now it is time to import the rest of the Covid data files and create aliases for them\n",
- "
"
+ "The extension provides a convenient `%%sql` cell magic. Let's use it to look at the data from the province of BC."
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "50a8426b",
- "metadata": {},
+ "id": "cb505b23",
+ "metadata": {
+ "scrolled": true
+ },
"outputs": [],
"source": [
- "%import_dataset covid19.ontario.case-details --alias cases_on\n",
- "%import_dataset covid19.alberta.case-details --alias cases_ab\n",
- "%import_dataset covid19.quebec.case-details --alias cases_qb"
+ "%%sql\n",
+ "select * from 'covid19.british-columbia.case-details' limit 3"
]
},
{
"cell_type": "markdown",
- "id": "7b1540af",
+ "id": "821818ad-90c8-4034-9b7f-6ac16ea6c48b",
"metadata": {},
"source": [
- "\n",
- "Time to test again if the data was imported correctly. You can test the Alberta files by changing cases_on
to cases_ab
. For Quebec change it to cases_qb
and id
to row_id
. \n",
- "
"
+ "## Explore Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "665855a7",
+ "metadata": {},
+ "source": [
+ "We can use the same approach to sample data from other provinces:"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "5c50f473",
+ "id": "599cac31-e7d7-4313-a768-dbb0d1c5fdae",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"%%sql\n",
- "SELECT * FROM cases_on\n",
- "ORDER BY id DESC\n",
- "LIMIT 10"
+ "select * from 'covid19.alberta.case-details' limit 3"
]
},
{
- "cell_type": "markdown",
- "id": "314078ef",
- "metadata": {},
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ad28d886-cb48-40c9-b2b5-161644f304b3",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
"source": [
- "\n",
- "The next file that you import is case details for the four provinces combined. The file covid19.canada.case-details
uses an SQL query in the yaml file to combine that data so that you don't have to combine them with 'UNION ALL'.\n",
- "The SQL queries that harmonize the data of each province can be found in (province).case-details.hm.
If you open these yamls, there are queries that make the datasets be able to be compared without semantic differences between them. For example only two provinces have a 90+ whereas the other two has age ranges of 80+. Therefore, we need to switch the age ranges to 80+ to compare the data.\n",
- "
"
+ "%%sql\n",
+ "select * from 'covid19.ontario.case-details' limit 3"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "d8c32581",
- "metadata": {},
+ "id": "874863d9-e4af-41fc-9a90-a48fe8072a9d",
+ "metadata": {
+ "scrolled": true
+ },
"outputs": [],
"source": [
- "%import_dataset covid19.canada.case-details --alias cases_four_provinces"
+ "%%sql\n",
+ "select * from 'covid19.quebec.case-details' limit 3"
]
},
{
"cell_type": "markdown",
- "id": "03b616c3",
+ "id": "314078ef",
"metadata": {},
"source": [
+ "Notice how data schemas and column semantics are slightly different between provinces. This makes pretty difficult to work with data across all provinces.\n",
+ "\n",
+ "To tackle that we have created several harmonization datasets `{province}.case-details.hm` that bring data from all provinces under a common format. The `covid19.canada.case-details` dataset then uses `UNION ALL` operation to derive a new pan-Canadian dataset.\n",
+ "\n",
"\n",
- "Again, test to see if the data worked by showing the last 10 data rows.\n",
- "
"
+ "Take a minute to study the definitions of these datasets.\n",
+ "\n",
+ "\n",
+ "Let's sample the pan-Canadian dataset now."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee795d89",
- "metadata": {
- "scrolled": false
- },
+ "metadata": {},
"outputs": [],
"source": [
"%%sql\n",
- "SELECT * FROM cases_four_provinces\n",
- "LIMIT 10"
+ "select * from 'covid19.canada.case-details' limit 3"
]
},
{
@@ -240,9 +246,7 @@
"id": "027313e1",
"metadata": {},
"source": [
- "\n",
- "To use this file, a SQL query is created to combine all of the cases by age group and by province\n",
- "
"
+ "Let's write a query that counts the number of cases by age group and by province."
]
},
{
@@ -253,10 +257,13 @@
"outputs": [],
"source": [
"%%sql -o age_cases\n",
- "SELECT province, age_group, COUNT(*) \n",
- "FROM cases_four_provinces\n",
- "GROUP BY province, age_group\n",
- "ORDER BY province, age_group;"
+ "select\n",
+ " province,\n",
+ " age_group,\n",
+ " count(*)\n",
+ "from 'covid19.canada.case-details'\n",
+ "group by province, age_group\n",
+ "order by province, age_group"
]
},
{
@@ -264,8 +271,7 @@
"id": "27e8856f",
"metadata": {},
"source": [
- "\n",
- " Through With plotly.express.pie
a pie chart can be created to compare the cases per province then per age group. As can bee seen over a third of Quebec's cases are unknow which is probably to to Quebec's strict privacy act laws that are part of the Act Respecting Access to Documents Held by Public Bodies and the Protection of Personal Information. These differences in law can cause errors when comparing data.
"
+ "We can use `plotly` to visualize this data as a pie chart."
]
},
{
@@ -275,9 +281,16 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local \n",
"import plotly.express \n",
- "plotly.express.pie(age_cases, values='count(1)', names='age_group', color='age_group', title='Cases by Age Group and Province', facet_col='province')"
+ "plotly.express.pie(age_cases, values='count(*)', names='age_group', color='age_group', title='Cases by Age Group and Province', facet_col='province')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "932dfe7c-1765-466b-b5bf-0b8f1fc7ff67",
+ "metadata": {},
+ "source": [
+ "As can bee seen over a third of Quebec's cases are unknow which is probably due to Quebec's strict privacy act laws that are part of the Act Respecting Access to Documents Held by Public Bodies and the Protection of Personal Information. These differences in law can cause errors when comparing data!"
]
},
{
@@ -285,9 +298,7 @@
"id": "fa22f18f",
"metadata": {},
"source": [
- "\n",
- "Another piece of data we can get from this yaml is gender. Therefore, a SQL query is created to combine all of the cases by gender and by province\n",
- "
"
+ "Now let's look at the distribution of cases by gender and by province"
]
},
{
@@ -300,20 +311,13 @@
"outputs": [],
"source": [
"%%sql -o total_cases\n",
- "SELECT province, gender, COUNT(*) \n",
- "FROM cases_four_provinces\n",
- "GROUP BY province, gender\n",
- "ORDER BY province, gender;"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "65858b3b",
- "metadata": {},
- "source": [
- "\n",
- " Through plotly.express.bar
a bar chart can be created to compare the cases per province then per gender (male, female, unspecified).\n",
- "
"
+ "select\n",
+ " province,\n",
+ " gender,\n",
+ " count(*)\n",
+ "from 'covid19.canada.case-details'\n",
+ "group by province, gender\n",
+ "order by province, gender"
]
},
{
@@ -323,9 +327,7 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local \n",
- "import plotly.express \n",
- "plotly.express.bar(total_cases, x='province', y='count(1)', color='gender', title='Cases per Gender')\n"
+ "plotly.express.bar(total_cases, x='province', y='count(*)', color='gender', title='Cases per Gender')\n"
]
},
{
@@ -333,9 +335,7 @@
"id": "f0c746b6",
"metadata": {},
"source": [
- "\n",
- " By looking through the data you can see that Quebec has a large amount of people who were classified as undefined. This is probably again due to Quebec's strict privacy laws.\n",
- "
"
+ "Here you can see that Quebec has a large amount of people who were classified as undefined. This is probably again due to Quebec's strict privacy laws."
]
},
{
@@ -343,29 +343,7 @@
"id": "d65f66df",
"metadata": {},
"source": [
- "\n",
- "The last dataset that we are importing is daily cases for the four provinces.\n",
- "
"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8b54ceff",
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset covid19.canada.daily-cases --alias daily_cases"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "44ff2b2a",
- "metadata": {},
- "source": [
- "\n",
- "Now test again to see if the data was succcesfully installed for this file.\n",
- "
"
+ "The last dataset that we will look at is daily cases aggregation for the four provinces."
]
},
{
@@ -376,7 +354,7 @@
"outputs": [],
"source": [
"%%sql -o daily_cases\n",
- "select * from daily_cases"
+ "select * from 'covid19.canada.daily-cases'"
]
},
{
@@ -384,9 +362,7 @@
"id": "083fecc2",
"metadata": {},
"source": [
- "\n",
- "The last step is to create a line plot graph to compare the different amount of cases per day by province.\n",
- "
"
+ "We can use it to create a line plot graph to compare the different amount of cases per day by province."
]
},
{
@@ -398,8 +374,6 @@
},
"outputs": [],
"source": [
- "%%local\n",
- "import plotly.express\n",
"plotly.express.line(daily_cases, x=\"reported_date\" , y=\"total_daily\", color=\"province\")"
]
},
@@ -408,14 +382,12 @@
"id": "6bd2a6b0",
"metadata": {},
"source": [
- "\n",
- "\n",
- "As seen in the graph above, the case data has multiple spikes, including two significant ones in Quebec from late December 2020 and early January 2021. As explained in [this data source issue](https://github.com/ccodwg/Covid19Canada/issues/44) these spikes don't reflect an actual surge in cases, but rather a **delay in data entry** due to the holidays and weekends, with cases being attributed to the day they are entered on instead of amending the past data for the days they were registered on. This issue makes data hard to work with, often requiring some \"smoothing\" to get approximate number of cases on a cetrain date.\n",
+ "As seen in the graph above, the case data has multiple spikes, including two extreme ones in Quebec from late December 2020 and early January 2021. As explained in [this data source issue](https://github.com/ccodwg/Covid19Canada/issues/44) these spikes don't reflect an actual surge in cases, but rather a **delay in data entry** due to the holidays and weekends, with cases being attributed to the day they are entered on instead of amending the past data for the days they were registered on. This issue makes data hard to work with, often requiring some \"smoothing\" to get approximate number of cases on a cetrain date.\n",
"\n",
"\n",
- "Kamu offers a combination of techniques like [watermarks](https://docs.kamu.dev/glossary/#watermark), explicit [retractions and corrections](https://docs.kamu.dev/glossary/#retractions-and-corrections) to automatically account for late arriving data and simultaneously provide **minimal latency** and **accuracy and consistency** of data. Continue to [other examples](https://docs.kamu.dev/cli/get-started/examples/) to learn more.\n",
+ "Kamu offers a combination of techniques like [watermarks](https://docs.kamu.dev/glossary/#watermark), explicit [retractions and corrections](https://docs.kamu.dev/glossary/#retractions-and-corrections) to automatically account for late arriving data and simultaneously provide **minimal latency** and **accuracy and consistency** of data.\n",
"\n",
- "
"
+ "Continue to [other examples](https://docs.kamu.dev/cli/get-started/examples/) to learn more!"
]
},
{
@@ -429,19 +401,21 @@
],
"metadata": {
"kernelspec": {
- "display_name": "PySpark",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "pysparkkernel"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
- "name": "python",
+ "name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
}
},
"nbformat": 4,
diff --git a/examples/currency_conversion/rates.ipynb b/examples/currency_conversion/rates.ipynb
index 072c9e31df..6cbfedaa6b 100644
--- a/examples/currency_conversion/rates.ipynb
+++ b/examples/currency_conversion/rates.ipynb
@@ -7,7 +7,7 @@
"outputs": [],
"source": [
"%load_ext kamu\n",
- "# Loads kamu extension to use `import_dataset` command"
+ "# Loads kamu extension to use `%%sql` cell magic and auto-viz"
]
},
{
@@ -16,29 +16,10 @@
"metadata": {},
"outputs": [],
"source": [
- "%import_dataset ca.bankofcanada.exchange-rates.daily --alias rates\n",
- "# Imports dataset and gives it an *SQL table / PySpark* alias"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# By default all code is executed remotely via PySpark and has direct access to imported datasets\n",
- "rates.printSchema()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%local\n",
- "# The above makes python code execute in a *local* notebook kernel \n",
- "print(\"This runs in the notebook\")"
+ "import kamu\n",
+ "\n",
+ "# Create onnection to kamu server\n",
+ "con = kamu.connect()"
]
},
{
@@ -50,7 +31,7 @@
"%%sql -o rates\n",
"-- We can run SQL queries directly\n",
"-- By adding `-o ` we can download the result into the local notebook as Pandas dataframe!\n",
- "select * from rates"
+ "select * from 'ca.bankofcanada.exchange-rates.daily'"
]
},
{
@@ -59,21 +40,9 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
- "# Local notebook now has `rates` variable\n",
"rates.info()"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset my.trading.transactions --alias transactions\n",
- "%import_dataset my.trading.transactions.cad --alias transactions_cad"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -81,7 +50,7 @@
"outputs": [],
"source": [
"%%sql -o tx\n",
- "-- Let's use SQL to shape the data via Spark and download the processed result into the notebook\n",
+ "-- Let's use SQL to shape the data and download the processed result into the notebook\n",
"select \n",
" offset,\n",
" system_time,\n",
@@ -92,7 +61,7 @@
" cast(price_cad as double) as price_cad,\n",
" cast(settlement_usd as double) as settlement_usd,\n",
" cast(settlement_cad as double) as settlement_cad\n",
- "from transactions_cad"
+ "from 'my.trading.transactions.cad'"
]
},
{
@@ -101,14 +70,13 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"import os\n",
"import numpy as np\n",
"import xarray as xr\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
- "import hvplot.pandas # noqa\n",
- "import hvplot.xarray # noqa"
+ "import hvplot.pandas\n",
+ "import hvplot.xarray"
]
},
{
@@ -117,7 +85,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"# Now we can visualize it!\n",
"rates.hvplot.line(\n",
" x=\"date\", \n",
@@ -135,7 +102,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"tx.hvplot.scatter(\n",
" x=\"event_time\", \n",
" y=[\"settlement_cad\"], \n",
@@ -156,19 +122,21 @@
],
"metadata": {
"kernelspec": {
- "display_name": "PySpark",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "pysparkkernel"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
- "name": "python",
+ "name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
}
},
"nbformat": 4,
diff --git a/examples/flight-sql/python/client_flightsql_adbc.py b/examples/flight-sql/python/client_flightsql_adbc.py
index 96d979b885..afcb6787b4 100644
--- a/examples/flight-sql/python/client_flightsql_adbc.py
+++ b/examples/flight-sql/python/client_flightsql_adbc.py
@@ -2,34 +2,43 @@
import adbc_driver_flightsql.dbapi
import pandas
-# No-TLS local connection
-#
-# To test with local server use:
-# cd examples/reth-vs-snp500
-# kamu -vv sql server --flight-sql --port 50050 --address 0.0.0.0
-#
+# # Secure remote connection
con = adbc_driver_flightsql.dbapi.connect(
- "grpc://localhost:50050",
+ "grpc+tls://node.demo.kamu.dev:50050",
db_kwargs={
- adbc_driver_manager.DatabaseOptions.USERNAME.value: "kamu",
- adbc_driver_manager.DatabaseOptions.PASSWORD.value: "kamu",
+ # Anonymous users have to authenticate using basic auth so they could be assigned a session token
+ adbc_driver_manager.DatabaseOptions.USERNAME.value: "anonymous",
+ adbc_driver_manager.DatabaseOptions.PASSWORD.value: "anonymous",
+ # Registered users can provide a bearer token directy
+ # adbc_driver_flightsql.DatabaseOptions.AUTHORIZATION_HEADER.value: "Bearer ",
},
autocommit=True,
)
-# Secure remote connection
+# No-TLS local connection
+#
+# To test with local server use:
+# cd examples/reth-vs-snp500
+# kamu -vv sql server --port 50050 --address 0.0.0.0
+#
# con = adbc_driver_flightsql.dbapi.connect(
-# "grpc+tls://node.demo.kamu.dev:50050",
+# "grpc://localhost:50050",
# db_kwargs={
-# adbc_driver_manager.DatabaseOptions.USERNAME.value: "kamu",
-# adbc_driver_manager.DatabaseOptions.PASSWORD.value: "kamu",
+# # Anonymous users have to authenticate using basic auth so they could be assigned a session token
+# adbc_driver_manager.DatabaseOptions.USERNAME.value: "anonymous",
+# adbc_driver_manager.DatabaseOptions.PASSWORD.value: "anonymous",
+# # Registered users can provide a bearer token directy
+# # adbc_driver_flightsql.DatabaseOptions.AUTHORIZATION_HEADER.value: "Bearer ",
# },
# autocommit=True,
# )
with con:
+ df = pandas.read_sql("select 1", con)
+ print(df)
+
df = pandas.read_sql("show tables", con)
print(df)
- df = pandas.read_sql("select * from 'co.alphavantage.tickers.daily.spy' limit 10", con)
+ df = pandas.read_sql("select * from 'kamu/co.alphavantage.tickers.daily.spy' limit 10", con)
print(df)
diff --git a/examples/flight-sql/python/client_flightsql_dbapi2.py b/examples/flight-sql/python/client_flightsql_dbapi2.py
index 68da8057a7..449bb8da4f 100644
--- a/examples/flight-sql/python/client_flightsql_dbapi2.py
+++ b/examples/flight-sql/python/client_flightsql_dbapi2.py
@@ -1,35 +1,45 @@
from flightsql import connect, FlightSQLClient
+# Secure remote connection
+client = FlightSQLClient(
+ host="node.demo.kamu.dev",
+ port=50050,
+ # Anonymous users have to authenticate using basic auth so they could be assigned a session token
+ user="anonymous",
+ password="anonymous",
+ # Registered users can provide a bearer token
+ # token="",
+)
+
# No-TLS local connection
#
# To test with local server use:
# cd examples/reth-vs-snp500
-# kamu -vv sql server --flight-sql --port 50050 --address 0.0.0.0
+# kamu -vv sql server --port 50050 --address 0.0.0.0
#
-client = FlightSQLClient(
- host='localhost',
- port=50050,
- user='kamu',
- password='kamu',
- insecure=True,
-)
-
-# Secure remote connection
# client = FlightSQLClient(
-# host='node.demo.kamu.dev',
+# host="localhost",
# port=50050,
-# user='kamu',
-# password='kamu',
+# insecure=True,
+# # Anonymous users have to authenticate using basic auth so they could be assigned a session token
+# user="anonymous",
+# password="anonymous",
+# # Registered users can provide a bearer token
+# # token="",
# )
con = connect(client)
cursor = con.cursor()
+cursor.execute("select 1 as value")
+print("columns:", cursor.description)
+print("rows:", [r for r in cursor])
+
cursor.execute("show tables")
print("columns:", cursor.description)
print("rows:", [r for r in cursor])
-cursor.execute("select * from 'co.alphavantage.tickers.daily.spy' limit 10")
+cursor.execute("select * from 'kamu/co.alphavantage.tickers.daily.spy' limit 10")
print("columns:", cursor.description)
print("rows:", [r for r in cursor])
@@ -40,5 +50,5 @@
df = pandas.read_sql("show tables", con)
print(df)
-df = pandas.read_sql("select * from 'co.alphavantage.tickers.daily.spy' limit 10", con)
+df = pandas.read_sql("select * from 'kamu/co.alphavantage.tickers.daily.spy' limit 10", con)
print(df)
diff --git a/examples/flight-sql/python/client_flightsql_sqlalchemy.py b/examples/flight-sql/python/client_flightsql_sqlalchemy.py
index 11acc5bc67..a546ff4fef 100644
--- a/examples/flight-sql/python/client_flightsql_sqlalchemy.py
+++ b/examples/flight-sql/python/client_flightsql_sqlalchemy.py
@@ -2,24 +2,33 @@
import sqlalchemy
import pandas as pd
+# Secure remote connection
+engine = sqlalchemy.create_engine(
+ # Anonymous users have to authenticate using basic auth so they could be assigned a session token
+ "datafusion+flightsql://anonymous:anonymous@node.demo.kamu.dev:50050"
+ # Registered users can provide a bearer token directy
+ # "datafusion+flightsql://node.demo.kamu.dev:50050?token=kamu-token"
+)
+
# No-TLS local connection
#
# To test with local server use:
# cd examples/reth-vs-snp500
-# kamu -vv sql server --flight-sql --port 50050 --address 0.0.0.0
+# kamu -vv sql server --port 50050 --address 0.0.0.0
#
-engine = sqlalchemy.create_engine(
- "datafusion+flightsql://kamu:kamu@localhost:50050?insecure=True"
-)
-
-# Secure remote connection
# engine = sqlalchemy.create_engine(
-# "datafusion+flightsql://kamu:kamu@node.demo.kamu.dev:50050"
+# # Anonymous users have to authenticate using basic auth so they could be assigned a session token
+# "datafusion+flightsql://anonymous:anonymous@localhost:50050?insecure=True"
+# # Registered users can provide a bearer token directy
+# # "datafusion+flightsql://localhost:50050?insecure=True&token=kamu-token"
# )
with engine.connect() as con:
+ df = pd.read_sql(sql="select 1 as value", con=con.connection)
+ print(df)
+
df = pd.read_sql(sql="show tables", con=con.connection)
print(df)
- df = pd.read_sql(sql="select * from 'co.alphavantage.tickers.daily.spy' limit 10", con=con.connection)
+ df = pd.read_sql(sql="select * from 'kamu/co.alphavantage.tickers.daily.spy' limit 10", con=con.connection)
print(df)
diff --git a/examples/flight-sql/python/client_kamu.py b/examples/flight-sql/python/client_kamu.py
new file mode 100644
index 0000000000..4fa1df8842
--- /dev/null
+++ b/examples/flight-sql/python/client_kamu.py
@@ -0,0 +1,32 @@
+import kamu
+
+# See more examples at: https://github.com/kamu-data/kamu-client-python
+
+# Secure remote connection
+con = kamu.connect(
+ "grpc+tls://node.demo.kamu.dev:50050",
+ # Registered users can provide a bearer token
+ # token="",
+)
+
+# No-TLS local connection
+#
+# To test with local server use:
+# cd examples/reth-vs-snp500
+# kamu -vv sql server --port 50050 --address 0.0.0.0
+#
+# con = kamu.connect(
+# "grpc://localhost:50050",
+# # Registered users can provide a bearer token
+# # token="",
+# )
+
+with con:
+ df = con.query("select 1 as value")
+ print(df)
+
+ df = con.query("show tables")
+ print(df)
+
+ df = con.query("select * from 'kamu/co.alphavantage.tickers.daily.spy' limit 10")
+ print(df)
diff --git a/examples/flight-sql/python/requirements.txt b/examples/flight-sql/python/requirements.txt
index a0b91edfe4..c38038faf7 100644
--- a/examples/flight-sql/python/requirements.txt
+++ b/examples/flight-sql/python/requirements.txt
@@ -1,6 +1,8 @@
adbc_driver_manager
adbc_driver_flightsql
flightsql-dbapi
+kamu
sqlalchemy
pandas
+pyarrow
jpype1
diff --git a/examples/flight-sql/python/shell.nix b/examples/flight-sql/python/shell.nix
new file mode 100644
index 0000000000..c3a543d7ac
--- /dev/null
+++ b/examples/flight-sql/python/shell.nix
@@ -0,0 +1,16 @@
+# Development shell for NixOS
+# Currently has to be kept in sync with `requirements.txt` manually
+let
+ pkgs = import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/nixos-unstable.tar.gz") {};
+in pkgs.mkShell {
+ packages = [
+ (pkgs.python3.withPackages (python-pkgs: with python-pkgs; [
+ # adbc_driver_manager
+ # adbc_driver_flightsql
+ # flightsql-dbapi
+ sqlalchemy
+ pandas
+ pyarrow
+ ]))
+ ];
+}
\ No newline at end of file
diff --git a/examples/housing_prices/ca.vancouver.opendata.property.tax-reports.yaml b/examples/housing_prices/ca.vancouver.opendata.property.tax-reports.yaml
index 8ae86cc6b7..24325fa46c 100644
--- a/examples/housing_prices/ca.vancouver.opendata.property.tax-reports.yaml
+++ b/examples/housing_prices/ca.vancouver.opendata.property.tax-reports.yaml
@@ -84,4 +84,4 @@ content:
- REPORT_YEAR
- PID
- kind: SetVocab
- eventTimeColumn: report_year
+ eventTimeColumn: REPORT_YEAR
diff --git a/examples/housing_prices/heatmap.ipynb b/examples/housing_prices/heatmap.ipynb
deleted file mode 100644
index 8761ea7ed9..0000000000
--- a/examples/housing_prices/heatmap.ipynb
+++ /dev/null
@@ -1,197 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%load_ext kamu"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset ca.vancouver.opendata.property.parcel-polygons --alias lots\n",
- "%import_dataset ca.vancouver.opendata.property.tax-reports --alias tax"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "print(lots.count())\n",
- "lots.printSchema()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "print(tax.count())\n",
- "tax.printSchema()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%sql\n",
- "select * from tax limit 10"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%sql\n",
- "CREATE OR REPLACE TEMP VIEW lot_tax AS (\n",
- "SELECT\n",
- " t.*,\n",
- " l.geometry\n",
- "FROM lots as l\n",
- "JOIN tax as t\n",
- "ON l.tax_coord = t.land_coordinate\n",
- "WHERE\n",
- " t.legal_type = 'LAND'\n",
- " AND t.tax_assessment_year = 2020\n",
- " AND t.current_land_value is not null\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%sql -o df\n",
- "SELECT\n",
- " land_coordinate,\n",
- " geometry,\n",
- " CAST(current_land_value AS DOUBLE) + CAST(current_improvement_value AS DOUBLE) AS current_total_value\n",
- "FROM lot_tax"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%local\n",
- "import json\n",
- "\n",
- "# For every row we first combine GeoJson geometry with other columns into a Feature object\n",
- "# Then we combine all Features into a FeatureCollection\n",
- "def df_to_geojson(df, geom='geometry', props=None):\n",
- " if props is None:\n",
- " props = [\n",
- " c for c in df.columns\n",
- " if c != geom\n",
- " ]\n",
- " \n",
- " return {\n",
- " \"type\": \"FeatureCollection\",\n",
- " \"features\": [\n",
- " {\n",
- " \"type\": \"Feature\",\n",
- " \"geometry\": json.loads(row[geom]),\n",
- " \"properties\": {p: row[p] for p in props}\n",
- " }\n",
- " for _, row in df.iterrows()\n",
- " ]\n",
- " }"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%local\n",
- "gj = df_to_geojson(df)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%local\n",
- "import os\n",
- "from mapboxgl.viz import *\n",
- "from mapboxgl.utils import *\n",
- "\n",
- "# Must be a public token, starting with `pk`\n",
- "token = os.getenv('MAPBOX_ACCESS_TOKEN')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "scrolled": false
- },
- "outputs": [],
- "source": [
- "%%local\n",
- "viz = ChoroplethViz(\n",
- " gj,\n",
- " style='mapbox://styles/mapbox/dark-v10',\n",
- " center=(-123.1207, 49.2827),\n",
- " zoom=10,\n",
- " access_token=token,\n",
- " color_property='current_total_value',\n",
- " color_stops=create_color_stops([1000000, 2000000, 3000000, 5000000, 10000000], colors='YlOrRd'),\n",
- " color_default='rgb(158,202,195)',\n",
- " line_width=0,\n",
- " opacity=1.0,\n",
- " legend_layout='horizontal',\n",
- " legend_key_shape='bar',\n",
- " legend_key_borders_on=False)\n",
- "\n",
- "viz.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "PySpark",
- "language": "python",
- "name": "pysparkkernel"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "python",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/housing_prices/notebook.ipynb b/examples/housing_prices/notebook.ipynb
new file mode 100644
index 0000000000..3dbe6560a6
--- /dev/null
+++ b/examples/housing_prices/notebook.ipynb
@@ -0,0 +1,278 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "090c009f-34ae-4262-b9d7-36ca94d88baa",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext kamu"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "213fc2aa-5f6f-48f9-87be-38e76ad731e5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import kamu\n",
+ "con = kamu.connect(engine=\"spark\", connection_params=dict(driver_memory=\"1000m\", executor_memory=\"2000m\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9654a8c1-4981-42a1-aef1-91dc36dded5c",
+ "metadata": {},
+ "source": [
+ "# Land value heatmap\n",
+ "Let's join land tax report records to their corresponding geographical boundaries and visualize their price on a map."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7086bb8c-08b8-4a40-b51d-d74165dcbb18",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "select * from `ca.vancouver.opendata.property.tax-reports` limit 3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "eaa1ed5d-f65b-40dd-b749-1dc327b5677b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "select * from `ca.vancouver.opendata.property.parcel-polygons` limit 3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "36538d2f-04ee-4f43-ac14-de708a066175",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql -o heatmap -q\n",
+ "select\n",
+ " tax.report_year,\n",
+ " tax.pid,\n",
+ " tax.legal_type,\n",
+ " tax.zoning_district,\n",
+ " cast(tax.current_land_value as double) + cast(tax.current_improvement_value as double) as current_total_value,\n",
+ " polys.geometry\n",
+ "from `ca.vancouver.opendata.property.parcel-polygons` as polys\n",
+ "inner join `ca.vancouver.opendata.property.tax-reports` as tax\n",
+ " on tax.land_coordinate = polys.tax_coord\n",
+ "where\n",
+ " tax.legal_type = 'LAND'\n",
+ " and tax.tax_assessment_year = 2024\n",
+ " and tax.current_land_value is not null"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b54cbaf0-9a08-429d-8949-e9f884e13683",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import kamu.utils\n",
+ "\n",
+ "heatmap_gj = kamu.utils.df_to_geojson(heatmap)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f2500525-2095-4e7f-855b-830cd1c1d549",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import kamu.utils\n",
+ "from mapboxgl.viz import *\n",
+ "from mapboxgl.utils import *\n",
+ "\n",
+ "# Must be a public token, starting with `pk`\n",
+ "token = os.getenv('MAPBOX_ACCESS_TOKEN')\n",
+ "\n",
+ "viz = ChoroplethViz(\n",
+ " heatmap_gj,\n",
+ " style='mapbox://styles/mapbox/dark-v10',\n",
+ " center=(-123.1207, 49.2827),\n",
+ " zoom=10,\n",
+ " access_token=token,\n",
+ " color_property='current_total_value',\n",
+ " color_stops=create_color_stops([1000000, 2000000, 3000000, 5000000, 10000000], colors='YlOrRd'),\n",
+ " color_default='rgb(158,202,195)',\n",
+ " line_width=0,\n",
+ " opacity=1.0,\n",
+ " legend_layout='horizontal',\n",
+ " legend_key_shape='bar',\n",
+ " legend_key_borders_on=False)\n",
+ "\n",
+ "viz.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bcc55e34-4871-44fb-828d-364323f8f339",
+ "metadata": {},
+ "source": [
+ "# Spatial JOIN\n",
+ "We have two GIS datasets with outlines of every city block and geo boundaries of city neighbourhoods. Let's classify which neighbourhood every city block belongs to by joining two datasets using `st_contains` to test that a block polygon is fully contained within a neighbourhood polygon."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ba53b40c-7360-4fdc-9f37-175b433121c3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "select * from `ca.vancouver.opendata.property.block-outlines` limit 3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a464c56b-945a-4437-b880-e42c219a070a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "select * from `ca.vancouver.opendata.property.local-area-boundaries` limit 3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "eea7076e-0362-403f-a15a-183664d85f89",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql -o blocks_by_hood -q\n",
+ "with blocks as (\n",
+ " select\n",
+ " st_geomfromgeojson(geometry) as geometry\n",
+ " from `ca.vancouver.opendata.property.block-outlines`\n",
+ "),\n",
+ "hoods as (\n",
+ " select\n",
+ " st_geomfromgeojson(geometry) as geometry,\n",
+ " name\n",
+ " from `ca.vancouver.opendata.property.local-area-boundaries`\n",
+ "),\n",
+ "blocks_by_hood as (\n",
+ " select hoods.name, blocks.geometry\n",
+ " from\n",
+ " blocks,\n",
+ " hoods\n",
+ " where st_intersects(blocks.geometry, hoods.geometry)\n",
+ ")\n",
+ "select\n",
+ " st_asgeojson(geometry) as geometry,\n",
+ " name,\n",
+ " -- calculating median value is left as an excercise :)\n",
+ " rand() as median_value\n",
+ "from blocks_by_hood"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d75afd5d-edbe-4d0b-8a5b-6c5b476a201d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import kamu.utils\n",
+ "\n",
+ "blocks_by_hood_gj = kamu.utils.df_to_geojson(blocks_by_hood)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1f52343f-2889-42a9-83fb-e4068398d267",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "viz = ChoroplethViz(\n",
+ " blocks_by_hood_gj,\n",
+ " style='mapbox://styles/mapbox/dark-v10',\n",
+ " center=(-123.1207, 49.2827),\n",
+ " zoom=10,\n",
+ " access_token=token,\n",
+ " color_property='median_value',\n",
+ " color_stops=create_color_stops([0.25, 0.5, 0.75, 1.0], colors='YlOrRd'),\n",
+ " line_stroke='solid',\n",
+ " line_width=0.1,\n",
+ " line_color='rgb(128,0,38)',\n",
+ " opacity=0.8,\n",
+ " legend_layout='horizontal',\n",
+ " legend_key_shape='bar',\n",
+ " legend_key_borders_on=False)\n",
+ "\n",
+ "viz.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "924d86c3-41b9-40a8-b2c1-ac70b601164e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# adjust view angle\n",
+ "viz.bearing = -15\n",
+ "viz.pitch = 45\n",
+ "\n",
+ "# add extrusion to viz using interpolation keyed on density in GeoJSON features\n",
+ "viz.height_property = 'median_value'\n",
+ "viz.height_stops = create_numeric_stops([0, 1], 0, 500)\n",
+ "viz.height_function_type = 'interpolate'\n",
+ "viz.opacity = 1\n",
+ "\n",
+ "# render again\n",
+ "viz.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f15c06e8-9b55-4ca6-a15a-b3f9d3321518",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/housing_prices/spatial_join.ipynb b/examples/housing_prices/spatial_join.ipynb
deleted file mode 100644
index 62fd4296d7..0000000000
--- a/examples/housing_prices/spatial_join.ipynb
+++ /dev/null
@@ -1,192 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%load_ext kamu"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset ca.vancouver.opendata.property.block-outlines --alias blocks\n",
- "%import_dataset ca.vancouver.opendata.property.local-area-boundaries --alias hoods"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "blocks.printSchema()\n",
- "hoods.printSchema()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%sql\n",
- "create or replace temp view blocks_by_hood as (\n",
- " select h.name, b.geometry\n",
- " from\n",
- " (select st_geomfromgeojson(geometry) as geometry from blocks) b,\n",
- " (select st_geomfromgeojson(geometry) as geometry, name from hoods) h\n",
- " where st_intersects(b.geometry, h.geometry)\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%sql -o df\n",
- "select\n",
- " st_asgeojson(geometry) as geometry, \n",
- " name, \n",
- " rand() as median_value\n",
- "from blocks_by_hood"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%local\n",
- "import json\n",
- "\n",
- "# For every row we first combine GeoJson geometry with other columns into a Feature object\n",
- "# Then we combine all Features into a FeatureCollection\n",
- "def df_to_geojson(df, geom='geometry', props=None):\n",
- " if props is None:\n",
- " props = [\n",
- " c for c in df.columns\n",
- " if c != geom\n",
- " ]\n",
- " \n",
- " return {\n",
- " \"type\": \"FeatureCollection\",\n",
- " \"features\": [\n",
- " {\n",
- " \"type\": \"Feature\",\n",
- " \"geometry\": json.loads(row[geom]),\n",
- " \"properties\": {p: row[p] for p in props}\n",
- " }\n",
- " for _, row in df.iterrows()\n",
- " ]\n",
- " }"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%local\n",
- "gj = df_to_geojson(df)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%local\n",
- "import os\n",
- "from mapboxgl.viz import *\n",
- "from mapboxgl.utils import *\n",
- "\n",
- "# Must be a public token, starting with `pk`\n",
- "token = os.getenv('MAPBOX_ACCESS_TOKEN')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%local\n",
- "viz = ChoroplethViz(\n",
- " gj,\n",
- " style='mapbox://styles/mapbox/dark-v10',\n",
- " center=(-123.1207, 49.2827),\n",
- " zoom=10,\n",
- " access_token=token,\n",
- " color_property='median_value',\n",
- " color_stops=create_color_stops([0.25, 0.5, 0.75, 1.0], colors='YlOrRd'),\n",
- " line_stroke='solid',\n",
- " line_width=0.1,\n",
- " line_color='rgb(128,0,38)',\n",
- " opacity=0.8,\n",
- " legend_layout='horizontal',\n",
- " legend_key_shape='bar',\n",
- " legend_key_borders_on=False)\n",
- "\n",
- "viz.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%local\n",
- "# adjust view angle\n",
- "viz.bearing = -15\n",
- "viz.pitch = 45\n",
- "\n",
- "# add extrusion to viz using interpolation keyed on density in GeoJSON features\n",
- "viz.height_property = 'median_value'\n",
- "viz.height_stops = create_numeric_stops([0, 1], 0, 500)\n",
- "viz.height_function_type = 'interpolate'\n",
- "viz.opacity = 1\n",
- "\n",
- "# render again\n",
- "viz.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "PySpark",
- "language": "python",
- "name": "pysparkkernel"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "python",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/reth-vs-snp500/init-s3-all.sh b/examples/reth-vs-snp500/init-s3-all.sh
deleted file mode 100755
index bf2fe96112..0000000000
--- a/examples/reth-vs-snp500/init-s3-all.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/sh
-set -e
-
-S3_CONTRIB_URL="https://s3.us-west-2.amazonaws.com/datasets.kamu.dev/odf/v2/contrib/"
-S3_EXAMPLE_URL="https://s3.us-west-2.amazonaws.com/datasets.kamu.dev/odf/v2/example/"
-
-kamu init || true
-
-# Root
-kamu pull "${S3_CONTRIB_URL}net.rocketpool.reth.tokens-minted"
-kamu pull "${S3_CONTRIB_URL}net.rocketpool.reth.tokens-burned"
-kamu pull "${S3_CONTRIB_URL}com.cryptocompare.ohlcv.eth-usd"
-kamu pull "${S3_CONTRIB_URL}co.alphavantage.tickers.daily.spy"
-
-kamu pull "${S3_EXAMPLE_URL}account.transactions"
-kamu pull "${S3_EXAMPLE_URL}account.tokens.transfers"
-
-# Deriv
-kamu pull "${S3_EXAMPLE_URL}net.rocketpool.reth.mint-burn"
-kamu pull "${S3_EXAMPLE_URL}account.tokens.portfolio"
-kamu pull "${S3_EXAMPLE_URL}account.tokens.portfolio.market-value"
-kamu pull "${S3_EXAMPLE_URL}account.tokens.portfolio.usd"
-kamu pull "${S3_EXAMPLE_URL}account.whatif.reth-vs-snp500.market-value"
-kamu pull "${S3_EXAMPLE_URL}account.whatif.reth-vs-snp500.portfolio"
diff --git a/examples/reth-vs-snp500/init-s3-ipfs.sh b/examples/reth-vs-snp500/init-s3-ipfs.sh
deleted file mode 100755
index 84226e57c1..0000000000
--- a/examples/reth-vs-snp500/init-s3-ipfs.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/sh
-set -e
-
-S3_CONTRIB_URL="https://s3.us-west-2.amazonaws.com/datasets.kamu.dev/odf/v2/contrib/"
-S3_EXAMPLE_URL="https://s3.us-west-2.amazonaws.com/datasets.kamu.dev/odf/v2/example/"
-
-kamu init || true
-
-# Pull from S3 for speed but then alias to IPFS
-kamu pull "${S3_CONTRIB_URL}net.rocketpool.reth.tokens-minted" --no-alias
-kamu repo alias add --pull net.rocketpool.reth.tokens-minted "ipns://net.rocketpool.reth.tokens-minted.ipns.kamu.dev"
-
-kamu pull "${S3_CONTRIB_URL}net.rocketpool.reth.tokens-burned" --no-alias
-kamu repo alias add --pull net.rocketpool.reth.tokens-burned "ipns://net.rocketpool.reth.tokens-burned.ipns.kamu.dev"
-
-kamu pull "${S3_CONTRIB_URL}com.cryptocompare.ohlcv.eth-usd" --no-alias
-kamu repo alias add --pull com.cryptocompare.ohlcv.eth-usd "ipns://com.cryptocompare.ohlcv.eth-usd.ipns.kamu.dev"
-
-kamu pull "${S3_CONTRIB_URL}co.alphavantage.tickers.daily.spy" --no-alias
-kamu repo alias add --pull co.alphavantage.tickers.daily.spy "ipns://co.alphavantage.tickers.daily.spy.ipns.kamu.dev"
-
-kamu pull "${S3_EXAMPLE_URL}account.transactions" --no-alias
-kamu pull "${S3_EXAMPLE_URL}account.tokens.transfers" --no-alias
-
-kamu add -r .
diff --git a/examples/reth-vs-snp500/init-s3.sh b/examples/reth-vs-snp500/init-s3.sh
deleted file mode 100755
index 264ebd6084..0000000000
--- a/examples/reth-vs-snp500/init-s3.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/sh
-set -e
-
-S3_CONTRIB_URL="https://s3.us-west-2.amazonaws.com/datasets.kamu.dev/odf/v2/contrib/"
-S3_EXAMPLE_URL="https://s3.us-west-2.amazonaws.com/datasets.kamu.dev/odf/v2/example/"
-
-kamu init || true
-
-# Root
-kamu pull "${S3_CONTRIB_URL}net.rocketpool.reth.tokens-minted"
-kamu pull "${S3_CONTRIB_URL}net.rocketpool.reth.tokens-burned"
-kamu pull "${S3_CONTRIB_URL}com.cryptocompare.ohlcv.eth-usd"
-kamu pull "${S3_CONTRIB_URL}co.alphavantage.tickers.daily.spy"
-
-kamu pull "${S3_EXAMPLE_URL}account.transactions"
-kamu pull "${S3_EXAMPLE_URL}account.tokens.transfers"
-
-kamu add -r .
diff --git a/examples/reth-vs-snp500/analysis.ipynb b/examples/reth-vs-snp500/notebook.ipynb
similarity index 83%
rename from examples/reth-vs-snp500/analysis.ipynb
rename to examples/reth-vs-snp500/notebook.ipynb
index d9b3060559..836de66a6b 100644
--- a/examples/reth-vs-snp500/analysis.ipynb
+++ b/examples/reth-vs-snp500/notebook.ipynb
@@ -16,17 +16,7 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
- "import os\n",
- "import numpy as np\n",
- "import xarray as xr\n",
- "import pandas as pd\n",
- "import matplotlib.pyplot as plt\n",
- "import hvplot.pandas # noqa\n",
- "import hvplot.xarray # noqa\n",
- "import holoviews as hv\n",
- "from datetime import datetime\n",
- "pd.set_option('max_colwidth', None)"
+ "%load_ext kamu"
]
},
{
@@ -35,7 +25,9 @@
"metadata": {},
"outputs": [],
"source": [
- "%load_ext kamu"
+ "import kamu\n",
+ "\n",
+ "con = kamu.connect()"
]
},
{
@@ -44,36 +36,34 @@
"metadata": {},
"outputs": [],
"source": [
- "%import_dataset net.rocketpool.reth.mint-burn\n",
- "%import_dataset com.cryptocompare.ohlcv.eth-usd\n",
- "\n",
- "%import_dataset account.transactions\n",
- "%import_dataset account.tokens.transfers\n",
- "%import_dataset account.tokens.portfolio\n",
- "%import_dataset account.tokens.portfolio.market-value\n",
+ "import os\n",
+ "import numpy as np\n",
+ "import xarray as xr\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import hvplot.pandas\n",
+ "import hvplot.xarray\n",
+ "import holoviews as hv\n",
+ "from datetime import datetime\n",
"\n",
- "%import_dataset co.alphavantage.tickers.daily.spy\n",
- "%import_dataset account.whatif.reth-vs-snp500.portfolio\n",
- "%import_dataset account.whatif.reth-vs-snp500.market-value"
+ "pd.set_option('max_colwidth', None)"
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "scrolled": false
- },
+ "metadata": {},
"outputs": [],
"source": [
"%%sql -o reth_pool\n",
"select \n",
" event_time, \n",
" case \n",
- " when event_name = \"TokensMinted\" then \"Mint\"\n",
- " when event_name = \"TokensBurned\" then \"Burn\"\n",
+ " when event_name = 'TokensMinted' then 'Mint'\n",
+ " when event_name = 'TokensBurned' then 'Burn'\n",
" end as event_name, \n",
" avg(eth_amount / amount) as ratio \n",
- "from `net.rocketpool.reth.mint-burn` \n",
+ "from 'net.rocketpool.reth.mint-burn'\n",
"group by event_time, event_name\n",
"order by 1"
]
@@ -84,7 +74,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"reth_pool.hvplot.step(\n",
" x=\"event_time\",\n",
" by=\"event_name\",\n",
@@ -101,7 +90,7 @@
"outputs": [],
"source": [
"%%sql -o eth2usd\n",
- "select event_time, open, close from `com.cryptocompare.ohlcv.eth-usd` order by event_time"
+ "select event_time, open, close from 'com.cryptocompare.ohlcv.eth-usd' order by event_time"
]
},
{
@@ -110,7 +99,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"eth2usd.hvplot.step(x=\"event_time\", y=\"close\", height=500, width=800)"
]
},
@@ -121,7 +109,7 @@
"outputs": [],
"source": [
"%%sql -o portfolio\n",
- "select * from `account.tokens.portfolio` order by block_time"
+ "select * from 'account.tokens.portfolio' order by block_time"
]
},
{
@@ -130,7 +118,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"portfolio[\n",
" portfolio.token_symbol == \"rETH\"\n",
"].hvplot.scatter(\n",
@@ -148,7 +135,7 @@
"outputs": [],
"source": [
"%%sql -o reth_mv\n",
- "select * from `account.tokens.portfolio.market-value` order by event_time"
+ "select * from 'account.tokens.portfolio.market-value' order by event_time"
]
},
{
@@ -157,7 +144,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"reth_mv.hvplot.line(\n",
" x=\"event_time\", \n",
" y=[\"token_book_value_eth\", \"token_market_value_eth\"], \n",
@@ -174,7 +160,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"reth_mv.hvplot.line(\n",
" x=\"event_time\", \n",
" y=[\"token_book_value_eth_as_usd\", \"token_market_value_usd\"], \n",
@@ -192,7 +177,7 @@
"outputs": [],
"source": [
"%%sql -o spy_ticks\n",
- "select * from `co.alphavantage.tickers.daily.spy`"
+ "select * from 'co.alphavantage.tickers.daily.spy'"
]
},
{
@@ -201,7 +186,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"spy_ticks.hvplot.step(\n",
" x=\"event_time\", \n",
" y=[\"close\"],\n",
@@ -218,7 +202,7 @@
"outputs": [],
"source": [
"%%sql -o market_value -q\n",
- "select * from `account.tokens.portfolio.market-value`"
+ "select * from 'account.tokens.portfolio.market-value'"
]
},
{
@@ -228,7 +212,7 @@
"outputs": [],
"source": [
"%%sql -o alternative_market_value -q\n",
- "select * from `account.whatif.reth-vs-snp500.market-value`"
+ "select * from 'account.whatif.reth-vs-snp500.market-value'"
]
},
{
@@ -237,8 +221,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
- "\n",
"max_height = max(\n",
" alternative_market_value[\"alt_spy_market_value_usd\"].max(),\n",
" market_value[\"token_market_value_usd\"].max(),\n",
@@ -270,20 +252,6 @@
"# )"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -369,19 +337,21 @@
],
"metadata": {
"kernelspec": {
- "display_name": "PySpark",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "pysparkkernel"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
- "name": "python",
+ "name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
}
},
"nbformat": 4,
diff --git a/examples/trading/trading.ipynb b/examples/trading/notebook.ipynb
similarity index 61%
rename from examples/trading/trading.ipynb
rename to examples/trading/notebook.ipynb
index ca3a33c7d9..c267bec220 100644
--- a/examples/trading/trading.ipynb
+++ b/examples/trading/notebook.ipynb
@@ -15,10 +15,9 @@
"metadata": {},
"outputs": [],
"source": [
- "%import_dataset com.yahoo.finance.tickers.daily --alias tickers\n",
- "%import_dataset my.trading.transactions --alias transactions\n",
- "%import_dataset my.trading.holdings --alias holdings\n",
- "%import_dataset my.trading.holdings.market-value --alias value"
+ "import kamu\n",
+ "\n",
+ "con = kamu.connect()"
]
},
{
@@ -28,7 +27,7 @@
"outputs": [],
"source": [
"%%sql\n",
- "select * from value"
+ "select * from 'my.trading.holdings.market-value'"
]
},
{
@@ -41,21 +40,23 @@
],
"metadata": {
"kernelspec": {
- "display_name": "PySpark",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "pysparkkernel"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
- "name": "python",
+ "name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}
diff --git a/images/Makefile b/images/Makefile
index a68675ea1d..9c7fecba1e 100644
--- a/images/Makefile
+++ b/images/Makefile
@@ -1,6 +1,6 @@
IMAGE_PLATFORMS = linux/amd64,linux/arm64
IMAGE_REPO = ghcr.io/kamu-data
-IMAGE_JUPYTER_TAG = 0.6.3
+IMAGE_JUPYTER_TAG = 0.7.0
KAMU_VERSION = $(shell cargo metadata --format-version 1 | jq -r '.packages[] | select( .name == "kamu") | .version')
diff --git a/images/demo/Dockerfile.jupyter b/images/demo/Dockerfile.jupyter
index ae291e7e3a..89fcfd3913 100644
--- a/images/demo/Dockerfile.jupyter
+++ b/images/demo/Dockerfile.jupyter
@@ -1,7 +1,7 @@
# Base image info: https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html
# Base image tags: https://quay.io/repository/jupyter/minimal-notebook
# Customization is based on: https://github.com/jupyter-incubator/sparkmagic/blob/master/Dockerfile.jupyter
-FROM quay.io/jupyter/minimal-notebook:2024-02-13
+FROM quay.io/jupyter/minimal-notebook:2024-12-09
ARG TARGETPLATFORM
ARG KAMU_VERSION
ARG dev_mode=false
@@ -10,15 +10,10 @@ ARG dev_mode=false
#########################################################################################
USER root
-# Podman
+# Podman & tools
# Source: https://github.com/containers/podman/blob/056f492f59c333d521ebbbe186abde0278e815db/contrib/podmanimage/stable/Dockerfile
RUN apt update && \
- apt -y install ca-certificates curl wget gnupg unzip jq && \
- . /etc/os-release && \
- echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_${VERSION_ID}/ /" | tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list && \
- curl -L "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_${VERSION_ID}/Release.key" | apt-key add - && \
- apt update && \
- apt -y install podman fuse-overlayfs && \
+ apt -y install ca-certificates curl wget gnupg unzip jq podman fuse-overlayfs && \
apt-get clean && rm -rf /var/lib/apt/lists /var/cache/apt/archives
COPY podman/containers.conf /etc/containers/containers.conf
@@ -47,15 +42,12 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2
# Sparkmagic and tools
-COPY jupyter/requirements/$TARGETPLATFORM/requirements.txt requirements.txt
-
-# TODO: Semi-permanent hack for `mapboxgl` package being broken in conda-forge
-# See: https://github.com/kamu-data/kamu-cli/issues/533
-RUN mamba install -y --file requirements.txt && \
- mamba uninstall mapboxgl && pip install --no-cache-dir mapboxgl && \
- mamba clean --all -f -y && \
- rm requirements.txt && \
- fix-permissions "${CONDA_DIR}" && \
+COPY jupyter/requirements/$TARGETPLATFORM/env.yaml env.yaml
+
+RUN mamba env update -y -f env.yaml && \
+ mamba clean --all -f -y && \
+ rm env.yaml && \
+ fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
@@ -70,15 +62,9 @@ COPY jupyter/.kamuconfig /.kamuconfig
#########################################################################################
USER $NB_USER
-COPY jupyter/kamu.py /opt/conda/lib/python3.11/site-packages/kamu.py
-COPY jupyter/sparkmagic.json /home/$NB_USER/.sparkmagic/config.json
-
-RUN jupyter nbextension enable --py --sys-prefix widgetsnbextension
-#RUN jupyter-kernelspec install --user $(pip show sparkmagic | grep Location | cut -d" " -f2)/sparkmagic/kernels/sparkkernel
-RUN jupyter-kernelspec install --user $(pip show sparkmagic | grep Location | cut -d" " -f2)/sparkmagic/kernels/pysparkkernel
-#RUN jupyter-kernelspec install --user $(pip show sparkmagic | grep Location | cut -d" " -f2)/sparkmagic/kernels/sparkrkernel
-RUN jupyter serverextension enable --py sparkmagic
+COPY jupyter/overrides.json /opt/conda/share/jupyter/lab/settings/overrides.json
+RUN jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
#########################################################################################
USER root
@@ -91,6 +77,4 @@ RUN fix-permissions "/home/${NB_USER}"
#########################################################################################
USER $NB_USER
-# TODO: Remove show_banner option after Sparkmagic supports novebook >= 7.0.0
-# See: https://github.com/jupyter-incubator/sparkmagic/issues/885
-CMD ["jupyter", "notebook", "--ip", "0.0.0.0", "--port", "8080", "--NotebookApp.iopub_data_rate_limit=1e10", "--NotebookApp.show_banner=False"]
+CMD ["jupyter", "notebook", "--ip", "0.0.0.0", "--port", "8080", "--NotebookApp.iopub_data_rate_limit=1e10"]
diff --git a/images/demo/Makefile b/images/demo/Makefile
index c992364a90..afa1c4941e 100644
--- a/images/demo/Makefile
+++ b/images/demo/Makefile
@@ -1,7 +1,9 @@
IMAGE_PLATFORMS = linux/amd64,linux/arm64
IMAGE_REPO = ghcr.io/kamu-data
KAMU_VERSION = $(shell cargo metadata --format-version 1 | jq -r '.packages[] | select( .name == "kamu") | .version')
-DEMO_VERSION = 0.16.6
+
+# Keep in sync with versions of Jupyter and Minio in docker-compose.yml
+DEMO_VERSION = 0.17.0
#########################################################################################
diff --git a/images/demo/docker-compose.yml b/images/demo/docker-compose.yml
index f35a8795b4..9f9bf5fa03 100644
--- a/images/demo/docker-compose.yml
+++ b/images/demo/docker-compose.yml
@@ -5,7 +5,7 @@ networks:
services:
jupyter:
- image: ghcr.io/kamu-data/kamu-cli-demo-jupyter:0.16.6
+ image: ghcr.io/kamu-data/kamu-cli-demo-jupyter:0.17.0
# Unfortunately running podman within another container requires elevated permissions
privileged: true
command:
@@ -36,7 +36,7 @@ services:
- minio
minio:
- image: ghcr.io/kamu-data/kamu-cli-demo-minio:0.16.6
+ image: ghcr.io/kamu-data/kamu-cli-demo-minio:0.17.0
command:
- "server"
- "--address"
diff --git a/images/demo/jupyter/Makefile b/images/demo/jupyter/Makefile
index 4a36ef57ad..488ddb1ceb 100644
--- a/images/demo/jupyter/Makefile
+++ b/images/demo/jupyter/Makefile
@@ -1,5 +1,5 @@
PLATFORM=linux/amd64
-BASE_IMAGE:=quay.io/jupyter/minimal-notebook:2024-02-13
+BASE_IMAGE:=quay.io/jupyter/minimal-notebook:2024-12-09
# Requires QEMU
@@ -23,7 +23,22 @@ requirements-platform:
# Executed from inside the base image
+#
+# The stupidity of Python package management ecosystems is unbelievabe. Jupyter images are
+# based on conda, but some packages we have are only installable by pip. We want to make
+# environment reproducible, but `conda env export` in `dependencies.pip` section includes only
+# **top-level** packages, ignoring all direct and transitive dependencies.
+#
+# To make environment fully reproducible we have to resort to:
+# - Run `conda env export` to lock conda packages (and part of pip packages)
+# - Strig partial `pip` packages from conda env
+# - Run `pip freeze` to lock pip packages
+# - Filter out conda packages from `pip freeze` output
+# - Merge the rest into `dependencies.pip` section of `conda env export`
.PHONY: requirements-install-freeze
requirements-install-freeze:
- mamba install -y --file requirements/$(PLATFORM)/requirements.in
- mamba list --export > requirements/$(PLATFORM)/requirements.txt
+ pip install -r requirements/$(PLATFORM)/requirements.in
+ pip freeze > requirements/$(PLATFORM)/requirements.txt
+ mamba env export --no-builds > requirements/$(PLATFORM)/env.yaml
+ python ./merge_requirements.py requirements/$(PLATFORM)/env.yaml requirements/$(PLATFORM)/requirements.txt
+ rm requirements/$(PLATFORM)/requirements.txt
diff --git a/images/demo/jupyter/kamu.py b/images/demo/jupyter/kamu.py
deleted file mode 100644
index b451f6dd14..0000000000
--- a/images/demo/jupyter/kamu.py
+++ /dev/null
@@ -1,282 +0,0 @@
-import os
-import re
-import json
-import time
-import socket
-import signal
-import subprocess
-from collections import namedtuple
-from IPython.core import magic_arguments
-from IPython.core.magic import line_magic, cell_magic, line_cell_magic, Magics, magics_class
-from IPython.display import clear_output
-
-
-SPARK_INIT_CODE = """
-spark.sparkContext._jvm.org.datasyslab.geosparksql.utils.GeoSparkSQLRegistrator.registerAll(sc._jvm.SQLContext(sc._jsc.sc()))
-"""
-
-
-SPARK_IMPORT_DATASET_CODE = """
-import os
-
-def resolve_dataset_ref(dataset_ref):
- if "/" not in dataset_ref:
- # Single-tenant
- data_path = os.path.join(dataset_ref, "data")
- if os.path.exists(data_path):
- return data_path
- else:
- # Multi-tenant
- # Assumptions:
- # - Layout of the data directory is `//info/alias`
- # - Alias file contains `/`
- account_name, dataset_name = dataset_ref.split("/", 1)
- if os.path.isdir(account_name):
- for dataset_id in os.listdir(account_name):
- alias_path = os.path.join(account_name, dataset_id, "info", "alias")
- if not os.path.exists(alias_path):
- continue
- with open(alias_path) as f:
- alias = f.read().strip()
- if alias != dataset_ref:
- continue
- return os.path.join(account_name, dataset_id, "data")
-
- raise Exception(f"Dataset {{dataset_ref}} not found")
-
-data_path = resolve_dataset_ref("{ref}")
-{alias} = spark.read.parquet(os.path.join(data_path, "*"))
-{alias}.createOrReplaceTempView("`{ref}`")
-{alias}.createOrReplaceTempView("{alias}")
-"""
-
-
-LIVY_START_TIMEOUT = 60
-LIVY_PIDFILE = os.path.expanduser("~/.local/kamu/livy.pid")
-LIVY_STDOUT = os.path.expanduser("~/.local/kamu/livy.out.txt")
-LIVY_STDERR = os.path.expanduser("~/.local/kamu/livy.err.txt")
-
-
-@magics_class
-class KamuMagics(Magics):
- @line_magic
- @magic_arguments.magic_arguments()
- @magic_arguments.argument(
- '--executor-instances',
- type=int,
- default=2,
- help='Number of executor instances to run'
- )
- def kamu(self, line):
- self._ensure_livy_is_running()
-
- args = magic_arguments.parse_argstring(self.kamu, line)
- code = SPARK_INIT_CODE
- self.shell.run_cell_magic('spark', '', code)
-
- @line_magic
- @magic_arguments.magic_arguments()
- @magic_arguments.argument('dataset_ref',
- nargs=1,
- help='Dataset to load'
- )
- @magic_arguments.argument('--alias',
- help='Also registers the dataset under provided alias'
- )
- def import_dataset(self, line):
- self._ensure_images()
- self._ensure_livy_is_running()
-
- args = magic_arguments.parse_argstring(self.import_dataset, line)
- dataset_ref = args.dataset_ref[0]
- if not args.alias:
- args.alias = re.sub(r"[\.\-/]", "_", dataset_ref)
- code = SPARK_IMPORT_DATASET_CODE.format(
- ref=dataset_ref,
- alias=args.alias,
- )
- self.shell.run_cell_magic('spark', '', code)
-
- def _ensure_livy_is_running(self):
- livy = LivyProcessHelper()
- procinfo = livy.get_proc_info(check_running=True)
- if procinfo is None:
- print("Starting Livy server")
- livy.start(timeout=LIVY_START_TIMEOUT)
- clear_output()
-
- def _ensure_images(self):
- out = subprocess.run(["kamu", "init", "--pull-images", "--list-only"], capture_output=True)
- assert out.returncode == 0, "Failed to list images from kamu"
- images = [
- img for img in out.stdout.decode("utf-8").split("\n")
- if "spark" in img
- ]
- assert len(images) > 0, "No images in output"
-
- touch_image_statuses = (
- subprocess.run(["podman", "inspect", img], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
- for img in images
- )
- images_pulled = all(
- status.returncode == 0
- for status in touch_image_statuses
- )
- if images_pulled:
- return
-
- print("First time run. Please wait while we pull the necessary images.")
- for image in images:
- print(f"Pulling: {image}")
- out = subprocess.run(["podman", "pull", image])
- assert out.returncode == 0, f"Failed to pull image: {image}"
-
- clear_output()
-
- @line_magic
- def stop_livy(self, line):
- livy = LivyProcessHelper()
- livy.stop()
-
-
-LivyProcInfo = namedtuple("LivyProcInfo", ["pid", "port"])
-
-
-class LivyProcessHelper:
- def __init__(self, pidfile=LIVY_PIDFILE):
- self._pidfile = pidfile
-
- def get_proc_info(self, check_running=True):
- if not os.path.exists(self._pidfile):
- return None
-
- with open(self._pidfile, 'r') as f:
- procinfo = LivyProcInfo(**json.load(f))
-
- if not check_running:
- return procinfo
-
- if not self.is_running(procinfo=procinfo):
- return None
-
- return procinfo
-
- def save_proc_info(self, procinfo):
- pi_dir, _ = os.path.split(self._pidfile)
- os.makedirs(pi_dir, exist_ok=True)
- with open(self._pidfile, "w") as f:
- json.dump(procinfo._asdict(), f)
-
- def is_running(self, procinfo=None):
- if procinfo is None:
- procinfo = self.get_proc_info(check_running=False)
- if procinfo is None:
- return False
-
- return (
- self.is_process_running(procinfo.pid) and
- self.is_port_open(procinfo.port)
- )
-
- def is_process_running(self, pid=None):
- if pid is None:
- procinfo = self.get_proc_info(check_running=False)
- if procinfo is None:
- return False
- pid = procinfo.pid
-
- try:
- os.kill(pid, 0)
- return True
- except OSError:
- return False
-
- def is_port_open(self, port=None):
- if port is None:
- procinfo = self.get_proc_info(check_running=False)
- if procinfo is None:
- return False
- port = procinfo.port
-
- try:
- s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- s.connect(("127.0.0.1", port))
- s.close()
- return True
- except socket.error:
- return False
-
- def start(self, timeout):
- if not self.is_in_workspace():
- raise Exception(
- "Current directory is not under kamu workspace. "
- "Create a workspace in the desired location by running `kamu init` in the terminal "
- "and place your notebook in that directory."
- )
-
- # TODO: Other ports are not supported due to podman running in host networking mode
- port = 8998
-
- out_dir, _ = os.path.split(LIVY_STDOUT)
- os.makedirs(out_dir, exist_ok=True)
-
- p = subprocess.Popen(
- ["/usr/local/bin/kamu", "sql", "server", "--livy", "--port", str(port)],
- stdout=open(LIVY_STDOUT, "w"),
- stderr=open(LIVY_STDERR, "w"),
- close_fds=True
- )
-
- deadline = time.time() + timeout
- while True:
- try:
- status = p.wait(1)
- raise Exception(
- f"Livy failed to start with status code: {status}\n"
- f"See logs for details:\n"
- f"- {LIVY_STDOUT}\n"
- f"- {LIVY_STDERR}"
- )
- except subprocess.TimeoutExpired:
- pass
-
- if self.is_port_open(port):
- break
-
- if time.time() >= deadline:
- p.send_signal(signal.SIGTERM)
- raise Exception(
- f"Livy failed to start within {timeout} seconds\n"
- f"See logs for details:\n"
- f"- {LIVY_STDOUT}\n"
- f"- {LIVY_STDERR}"
- )
-
- procinfo = LivyProcInfo(pid=p.pid, port=port)
- self.save_proc_info(procinfo)
- return procinfo
-
- def stop(self):
- procinfo = self.get_proc_info(check_running=False)
- if procinfo is None:
- return
-
- try:
- os.kill(procinfo.pid, signal.SIGTERM)
- print("Stopping Livy")
- except OSError:
- pass
-
- def is_in_workspace(self, cwd=None):
- p = subprocess.run(
- ["/usr/local/bin/kamu", "list"],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.DEVNULL,
- cwd=cwd,
- )
-
- return p.returncode == 0
-
-
-def load_ipython_extension(ipython):
- ipython.register_magics(KamuMagics)
diff --git a/images/demo/jupyter/merge_requirements.py b/images/demo/jupyter/merge_requirements.py
new file mode 100644
index 0000000000..6ff0b294e2
--- /dev/null
+++ b/images/demo/jupyter/merge_requirements.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+
+import sys
+import yaml
+
+env_path = sys.argv[1]
+req_path = sys.argv[2]
+
+# Read files
+with open(env_path) as f:
+ env = yaml.safe_load(f)
+
+with open(req_path) as f:
+ reqs = [r.strip() for r in f.readlines()]
+
+# Filter out pip packages from `conda env export`
+env['dependencies'] = [
+ dep for dep in env['dependencies']
+ if not isinstance(dep, dict) or 'pip' not in dep
+]
+
+# Filter conda packages from `pip freeze` output
+reqs = [r for r in reqs if not '@ file://' in r]
+
+# Merge into environment
+env['dependencies'].append({'pip': reqs})
+
+# Replace env file
+with open(env_path, 'w') as f:
+ yaml.safe_dump(env, f)
diff --git a/images/demo/jupyter/overrides.json b/images/demo/jupyter/overrides.json
new file mode 100644
index 0000000000..36a8a9e7b5
--- /dev/null
+++ b/images/demo/jupyter/overrides.json
@@ -0,0 +1,5 @@
+{
+ "@jupyterlab/apputils-extension:themes": {
+ "adaptive-theme": true
+ }
+}
\ No newline at end of file
diff --git a/images/demo/jupyter/requirements/linux/amd64/env.yaml b/images/demo/jupyter/requirements/linux/amd64/env.yaml
new file mode 100644
index 0000000000..42c02bd385
--- /dev/null
+++ b/images/demo/jupyter/requirements/linux/amd64/env.yaml
@@ -0,0 +1,274 @@
+channels:
+- conda-forge
+dependencies:
+- _libgcc_mutex=0.1
+- _openmp_mutex=4.5
+- alembic=1.14.0
+- annotated-types=0.7.0
+- anyio=4.7.0
+- archspec=0.2.3
+- argon2-cffi=23.1.0
+- argon2-cffi-bindings=21.2.0
+- arrow=1.3.0
+- asttokens=3.0.0
+- async-lru=2.0.4
+- async_generator=1.10
+- attrs=24.2.0
+- babel=2.16.0
+- beautifulsoup4=4.12.3
+- bleach=6.2.0
+- blinker=1.9.0
+- boltons=24.0.0
+- brotli-python=1.1.0
+- bzip2=1.0.8
+- c-ares=1.34.3
+- ca-certificates=2024.8.30
+- cached-property=1.5.2
+- cached_property=1.5.2
+- certifi=2024.8.30
+- certipy=0.2.1
+- cffi=1.17.1
+- charset-normalizer=3.4.0
+- colorama=0.4.6
+- comm=0.2.2
+- conda=24.11.0
+- conda-libmamba-solver=24.11.1
+- conda-package-handling=2.4.0
+- conda-package-streaming=0.11.0
+- cpp-expected=1.1.0
+- cryptography=44.0.0
+- debugpy=1.8.9
+- decorator=5.1.1
+- defusedxml=0.7.1
+- distro=1.9.0
+- entrypoints=0.4
+- exceptiongroup=1.2.2
+- executing=2.1.0
+- fmt=11.0.2
+- fqdn=1.5.1
+- frozendict=2.4.6
+- greenlet=3.1.1
+- h11=0.14.0
+- h2=4.1.0
+- hpack=4.0.0
+- httpcore=1.0.7
+- httpx=0.28.1
+- hyperframe=6.0.1
+- idna=3.10
+- importlib-metadata=8.5.0
+- importlib_resources=6.4.5
+- ipykernel=6.29.5
+- ipython=8.30.0
+- ipython_genutils=0.2.0
+- isoduration=20.11.0
+- jedi=0.19.2
+- jinja2=3.1.4
+- json5=0.10.0
+- jsonpatch=1.33
+- jsonpointer=3.0.0
+- jsonschema=4.23.0
+- jsonschema-specifications=2024.10.1
+- jsonschema-with-format-nongpl=4.23.0
+- jupyter-lsp=2.2.5
+- jupyter_client=8.6.3
+- jupyter_core=5.7.2
+- jupyter_events=0.10.0
+- jupyter_server=2.14.2
+- jupyter_server_terminals=0.5.3
+- jupyterhub-base=5.2.1
+- jupyterhub-singleuser=5.2.1
+- jupyterlab=4.3.2
+- jupyterlab_pygments=0.3.0
+- jupyterlab_server=2.27.3
+- keyutils=1.6.1
+- krb5=1.21.3
+- ld_impl_linux-64=2.43
+- libarchive=3.7.7
+- libcurl=8.10.1
+- libedit=3.1.20191231
+- libev=4.33
+- libexpat=2.6.4
+- libffi=3.4.2
+- libgcc=14.2.0
+- libgcc-ng=14.2.0
+- libgomp=14.2.0
+- libiconv=1.17
+- liblzma=5.6.3
+- libmamba=2.0.4
+- libmambapy=2.0.4
+- libnghttp2=1.64.0
+- libnsl=2.0.1
+- libsodium=1.0.20
+- libsolv=0.7.30
+- libsqlite=3.47.0
+- libssh2=1.11.1
+- libstdcxx=14.2.0
+- libstdcxx-ng=14.2.0
+- libuuid=2.38.1
+- libxcrypt=4.4.36
+- libxml2=2.13.5
+- libzlib=1.3.1
+- lz4-c=1.10.0
+- lzo=2.10
+- make=4.4.1
+- mako=1.3.8
+- mamba=2.0.4
+- markupsafe=3.0.2
+- matplotlib-inline=0.1.7
+- menuinst=2.2.0
+- mistune=3.0.2
+- nbclassic=1.1.0
+- nbclient=0.10.1
+- nbconvert-core=7.16.4
+- nbformat=5.10.4
+- ncurses=6.5
+- nest-asyncio=1.6.0
+- nlohmann_json=3.11.3
+- notebook=7.3.1
+- notebook-shim=0.2.4
+- oauthlib=3.2.2
+- openssl=3.4.0
+- overrides=7.7.0
+- packaging=24.2
+- pamela=1.2.0
+- pandocfilters=1.5.0
+- parso=0.8.4
+- pexpect=4.9.0
+- pickleshare=0.7.5
+- pip=24.3.1
+- pkgutil-resolve-name=1.3.10
+- platformdirs=4.3.6
+- pluggy=1.5.0
+- prometheus_client=0.21.1
+- prompt-toolkit=3.0.48
+- psutil=6.1.0
+- ptyprocess=0.7.0
+- pure_eval=0.2.3
+- pybind11-abi=4
+- pycosat=0.6.6
+- pycparser=2.22
+- pydantic=2.10.3
+- pydantic-core=2.27.1
+- pygments=2.18.0
+- pyjwt=2.10.1
+- pysocks=1.7.1
+- python=3.12.8
+- python-dateutil=2.9.0.post0
+- python-fastjsonschema=2.21.1
+- python-json-logger=2.0.7
+- python_abi=3.12
+- pytz=2024.2
+- pyyaml=6.0.2
+- pyzmq=26.2.0
+- readline=8.2
+- referencing=0.35.1
+- reproc=14.2.5.post0
+- reproc-cpp=14.2.5.post0
+- requests=2.32.3
+- rfc3339-validator=0.1.4
+- rfc3986-validator=0.1.1
+- rpds-py=0.22.3
+- ruamel.yaml=0.18.6
+- ruamel.yaml.clib=0.2.8
+- send2trash=1.8.3
+- setuptools=75.6.0
+- simdjson=3.10.1
+- six=1.17.0
+- sniffio=1.3.1
+- soupsieve=2.5
+- spdlog=1.14.1
+- sqlalchemy=2.0.36
+- stack_data=0.6.3
+- terminado=0.18.1
+- tinycss2=1.4.0
+- tk=8.6.13
+- tomli=2.2.1
+- tornado=6.4.2
+- tqdm=4.67.1
+- traitlets=5.14.3
+- truststore=0.10.0
+- types-python-dateutil=2.9.0.20241206
+- typing-extensions=4.12.2
+- typing_extensions=4.12.2
+- typing_utils=0.1.0
+- tzdata=2024b
+- uri-template=1.3.0
+- urllib3=2.2.3
+- wcwidth=0.2.13
+- webcolors=24.11.1
+- webencodings=0.5.1
+- websocket-client=1.8.0
+- wheel=0.45.1
+- yaml=0.2.5
+- yaml-cpp=0.8.0
+- zeromq=4.3.5
+- zipp=3.21.0
+- zstandard=0.23.0
+- zstd=1.5.6
+- pip:
+ - adbc-driver-flightsql==1.3.0
+ - adbc-driver-manager==1.3.0
+ - altair==5.5.0
+ - autovizwidget==0.22.0
+ - bokeh==3.6.2
+ - branca==0.8.1
+ - cftime==1.6.4.post1
+ - chroma-py==0.1.0.dev1
+ - click==8.1.8
+ - cloudpickle==3.1.0
+ - colorcet==3.1.0
+ - colour==0.1.5
+ - contourpy==1.3.1
+ - cycler==0.12.1
+ - dask==2024.12.1
+ - folium==0.19.2
+ - fonttools==4.55.3
+ - fsspec==2024.12.0
+ - geojson==3.2.0
+ - geopandas==1.0.1
+ - hdijupyterutils==0.22.0
+ - holoviews==1.20.0
+ - hvplot==0.11.2
+ - ipywidgets==8.1.5
+ - jupyter==1.1.1
+ - jupyter-console==6.6.3
+ - jupyterlab_widgets==3.0.13
+ - kamu==0.6.0
+ - kiwisolver==1.4.8
+ - linkify-it-py==2.0.3
+ - livy==0.8.0
+ - locket==1.0.0
+ - mapboxgl==0.10.2
+ - Markdown==3.7
+ - markdown-it-py==3.0.0
+ - matplotlib==3.10.0
+ - mdit-py-plugins==0.4.2
+ - mdurl==0.1.2
+ - narwhals==1.19.1
+ - netCDF4==1.7.2
+ - numpy==2.2.1
+ - pandas==2.2.3
+ - pandas-bokeh==0.5.5
+ - panel==1.5.5
+ - param==2.2.0
+ - partd==1.4.2
+ - pillow==11.0.0
+ - plotly==5.24.1
+ - pyarrow==18.1.0
+ - pyogrio==0.10.0
+ - pyparsing==3.2.0
+ - pyproj==3.7.0
+ - pyviz_comms==3.0.3
+ - setuptools==75.6.0
+ - shapely==2.0.6
+ - tenacity==9.0.0
+ - toolz==1.0.0
+ - tzdata==2024.2
+ - uc-micro-py==1.0.3
+ - wheel==0.45.1
+ - widgetsnbextension==4.0.13
+ - xarray==2024.11.0
+ - xyzservices==2024.9.0
+ - zstandard==0.23.0
+name: base
+prefix: /opt/conda
diff --git a/images/demo/jupyter/requirements/linux/amd64/requirements.in b/images/demo/jupyter/requirements/linux/amd64/requirements.in
index d7b27f8f59..68140ad718 100644
--- a/images/demo/jupyter/requirements/linux/amd64/requirements.in
+++ b/images/demo/jupyter/requirements/linux/amd64/requirements.in
@@ -1,21 +1,16 @@
-# TODO: Pinned due to sparkmagic installation issue
-# See: https://github.com/jupyter-incubator/sparkmagic/issues/825
-# See workaround applied in: https://github.com/jupyter-incubator/sparkmagic/blob/master/Dockerfile.jupyter
-notebook==6.5.5
+kamu[jupyter-autoviz,jupyter-sql,spark]
-sparkmagic
-
-pandas
+dask
geopandas
geojson
-xarray
netcdf4
-dask
+pandas
+xarray
+altair
bokeh
-hvplot
-pandas-bokeh
folium
-altair
+hvplot
mapboxgl
+pandas-bokeh
shapely
diff --git a/images/demo/jupyter/requirements/linux/amd64/requirements.txt b/images/demo/jupyter/requirements/linux/amd64/requirements.txt
deleted file mode 100644
index c24a4e3dcf..0000000000
--- a/images/demo/jupyter/requirements/linux/amd64/requirements.txt
+++ /dev/null
@@ -1,414 +0,0 @@
-# This file may be used to create an environment using:
-# $ conda create --name --file
-# platform: linux-64
-_libgcc_mutex=0.1=conda_forge
-_openmp_mutex=4.5=2_gnu
-alembic=1.13.1=pyhd8ed1ab_1
-altair=5.2.0=pyhd8ed1ab_0
-anyio=4.2.0=pyhd8ed1ab_0
-archspec=0.2.2=pyhd8ed1ab_0
-argon2-cffi=23.1.0=pyhd8ed1ab_0
-argon2-cffi-bindings=21.2.0=py311h459d7ec_4
-arrow=1.3.0=pyhd8ed1ab_0
-asttokens=2.4.1=pyhd8ed1ab_0
-async-lru=2.0.4=pyhd8ed1ab_0
-async_generator=1.10=py_0
-attrs=23.2.0=pyh71513ae_0
-autovizwidget=0.21.0=pyh1a96a4e_1
-aws-c-auth=0.7.16=h70caa3e_0
-aws-c-cal=0.6.9=h14ec70c_3
-aws-c-common=0.9.12=hd590300_0
-aws-c-compression=0.2.17=h572eabf_8
-aws-c-event-stream=0.4.2=h17cd1f3_0
-aws-c-http=0.8.0=hc6da83f_5
-aws-c-io=0.14.3=h3c8c088_1
-aws-c-mqtt=0.10.2=h0ef3971_0
-aws-c-s3=0.5.1=h2910485_1
-aws-c-sdkutils=0.1.14=h572eabf_0
-aws-checksums=0.1.17=h572eabf_7
-aws-crt-cpp=0.26.2=ha623a59_3
-aws-sdk-cpp=1.11.267=h0bb408c_0
-azure-core-cpp=1.10.3=h91d86a7_1
-azure-storage-blobs-cpp=12.10.0=h00ab1b0_0
-azure-storage-common-cpp=12.5.0=hb858b4b_2
-babel=2.14.0=pyhd8ed1ab_0
-beautifulsoup4=4.12.3=pyha770c72_0
-bleach=6.1.0=pyhd8ed1ab_0
-blinker=1.7.0=pyhd8ed1ab_0
-blosc=1.21.5=h0f2a231_0
-bokeh=3.3.4=pyhd8ed1ab_0
-boltons=23.1.1=pyhd8ed1ab_0
-branca=0.7.1=pyhd8ed1ab_0
-brotli=1.1.0=hd590300_1
-brotli-bin=1.1.0=hd590300_1
-brotli-python=1.1.0=py311hb755f60_1
-bzip2=1.0.8=hd590300_5
-c-ares=1.26.0=hd590300_0
-ca-certificates=2024.2.2=hbcca054_0
-cached-property=1.5.2=hd8ed1ab_1
-cached_property=1.5.2=pyha770c72_1
-cairo=1.18.0=h3faef2a_0
-certifi=2024.2.2=pyhd8ed1ab_0
-certipy=0.1.3=py_0
-cffi=1.16.0=py311hb3a22ac_0
-cfitsio=4.3.1=hbdc6101_0
-cftime=1.6.3=py311h1f0f07a_0
-charset-normalizer=3.3.2=pyhd8ed1ab_0
-chroma-py=0.1.0.dev1=py_0
-click=8.1.7=unix_pyh707e725_0
-click-plugins=1.1.1=py_0
-cligj=0.7.2=pyhd8ed1ab_1
-cloudpickle=3.0.0=pyhd8ed1ab_0
-colorama=0.4.6=pyhd8ed1ab_0
-colorcet=3.0.1=pyhd8ed1ab_0
-colour=0.1.5=pyhd8ed1ab_1
-comm=0.2.1=pyhd8ed1ab_0
-conda=23.11.0=py311h38be061_1
-conda-libmamba-solver=24.1.0=pyhd8ed1ab_0
-conda-package-handling=2.2.0=pyh38be061_0
-conda-package-streaming=0.9.0=pyhd8ed1ab_0
-configurable-http-proxy=4.6.1=h92b4e83_0
-contourpy=1.2.0=py311h9547e67_0
-cryptography=42.0.2=py311hcb13ee4_0
-cycler=0.12.1=pyhd8ed1ab_0
-cytoolz=0.12.3=py311h459d7ec_0
-dask=2024.2.0=pyhd8ed1ab_0
-dask-core=2024.2.0=pyhd8ed1ab_0
-debugpy=1.8.1=py311hb755f60_0
-decorator=5.1.1=pyhd8ed1ab_0
-defusedxml=0.7.1=pyhd8ed1ab_0
-distributed=2024.2.0=pyhd8ed1ab_0
-distro=1.9.0=pyhd8ed1ab_0
-entrypoints=0.4=pyhd8ed1ab_0
-exceptiongroup=1.2.0=pyhd8ed1ab_2
-executing=2.0.1=pyhd8ed1ab_0
-expat=2.5.0=hcb278e6_1
-fiona=1.9.5=py311hf8e0aa6_3
-fmt=10.2.1=h00ab1b0_0
-folium=0.15.1=pyhd8ed1ab_0
-font-ttf-dejavu-sans-mono=2.37=hab24e00_0
-font-ttf-inconsolata=3.000=h77eed37_0
-font-ttf-source-code-pro=2.038=h77eed37_0
-font-ttf-ubuntu=0.83=h77eed37_1
-fontconfig=2.14.2=h14ed4e7_0
-fonts-conda-ecosystem=1=0
-fonts-conda-forge=1=0
-fonttools=4.49.0=py311h459d7ec_0
-fqdn=1.5.1=pyhd8ed1ab_0
-freetype=2.12.1=h267a509_2
-freexl=2.0.0=h743c826_0
-fsspec=2024.2.0=pyhca7485f_0
-gdal=3.8.4=py311h8be719e_0
-geojson=3.1.0=pyhd8ed1ab_0
-geopandas=0.14.3=pyhd8ed1ab_0
-geopandas-base=0.14.3=pyha770c72_0
-geos=3.12.1=h59595ed_0
-geotiff=1.7.1=h6b2125f_15
-gettext=0.21.1=h27087fc_0
-gflags=2.2.2=he1b5a44_1004
-giflib=5.2.1=h0b41bf4_3
-glog=0.6.0=h6f12383_0
-greenlet=3.0.3=py311hb755f60_0
-h11=0.14.0=pyhd8ed1ab_0
-h2=4.1.0=pyhd8ed1ab_0
-hdf4=4.2.15=h2a13503_7
-hdf5=1.14.3=nompi_h4f84152_100
-hdijupyterutils=0.21.0=pyh1a96a4e_1
-holoviews=1.18.3=pyhd8ed1ab_0
-hpack=4.0.0=pyh9f0ad1d_0
-httpcore=1.0.2=pyhd8ed1ab_0
-httpx=0.26.0=pyhd8ed1ab_0
-hvplot=0.9.2=pyhd8ed1ab_0
-hyperframe=6.0.1=pyhd8ed1ab_0
-icu=73.2=h59595ed_0
-idna=3.6=pyhd8ed1ab_0
-importlib-metadata=7.0.1=pyha770c72_0
-importlib_metadata=7.0.1=hd8ed1ab_0
-importlib_resources=6.1.1=pyhd8ed1ab_0
-ipykernel=6.29.2=pyhd33586a_0
-ipython=8.21.0=pyh707e725_0
-ipython_genutils=0.2.0=py_1
-ipywidgets=8.1.2=pyhd8ed1ab_0
-isoduration=20.11.0=pyhd8ed1ab_0
-jedi=0.19.1=pyhd8ed1ab_0
-jinja2=3.1.3=pyhd8ed1ab_0
-joblib=1.3.2=pyhd8ed1ab_0
-json-c=0.17=h7ab15ed_0
-json5=0.9.14=pyhd8ed1ab_0
-jsonpatch=1.33=pyhd8ed1ab_0
-jsonpointer=2.4=py311h38be061_3
-jsonschema=4.21.1=pyhd8ed1ab_0
-jsonschema-specifications=2023.12.1=pyhd8ed1ab_0
-jsonschema-with-format-nongpl=4.21.1=pyhd8ed1ab_0
-jupyter=1.0.0=pyhd8ed1ab_10
-jupyter-lsp=2.2.2=pyhd8ed1ab_0
-jupyter_client=7.4.9=pyhd8ed1ab_0
-jupyter_console=6.6.3=pyhd8ed1ab_0
-jupyter_core=5.7.1=py311h38be061_0
-jupyter_events=0.9.0=pyhd8ed1ab_0
-jupyter_server=2.12.5=pyhd8ed1ab_0
-jupyter_server_terminals=0.5.2=pyhd8ed1ab_0
-jupyter_telemetry=0.1.0=pyhd8ed1ab_1
-jupyterhub=4.0.2=pyh31011fe_0
-jupyterhub-base=4.0.2=pyh31011fe_0
-jupyterlab=4.1.1=pyhd8ed1ab_0
-jupyterlab_pygments=0.3.0=pyhd8ed1ab_1
-jupyterlab_server=2.25.2=pyhd8ed1ab_0
-jupyterlab_widgets=3.0.10=pyhd8ed1ab_0
-kealib=1.5.3=h2f55d51_0
-keyutils=1.6.1=h166bdaf_0
-kiwisolver=1.4.5=py311h9547e67_1
-krb5=1.21.2=h659d440_0
-lcms2=2.16=hb7c19ff_0
-ld_impl_linux-64=2.40=h41732ed_0
-lerc=4.0.0=h27087fc_0
-libabseil=20230802.1=cxx17_h59595ed_0
-libaec=1.1.2=h59595ed_1
-libarchive=3.7.2=h2aa1ff5_1
-libarrow=15.0.0=h49c8883_4_cpu
-libarrow-acero=15.0.0=h59595ed_4_cpu
-libarrow-dataset=15.0.0=h59595ed_4_cpu
-libarrow-flight=15.0.0=hdc44a87_4_cpu
-libarrow-flight-sql=15.0.0=hfbc7f12_4_cpu
-libarrow-gandiva=15.0.0=h308e607_4_cpu
-libarrow-substrait=15.0.0=hfbc7f12_4_cpu
-libblas=3.9.0=21_linux64_openblas
-libboost-headers=1.84.0=ha770c72_1
-libbrotlicommon=1.1.0=hd590300_1
-libbrotlidec=1.1.0=hd590300_1
-libbrotlienc=1.1.0=hd590300_1
-libcblas=3.9.0=21_linux64_openblas
-libcrc32c=1.1.2=h9c3ff4c_0
-libcurl=8.5.0=hca28451_0
-libdeflate=1.19=hd590300_0
-libedit=3.1.20191231=he28a2e2_2
-libev=4.33=hd590300_2
-libevent=2.1.12=hf998b51_1
-libexpat=2.5.0=hcb278e6_1
-libffi=3.4.2=h7f98852_5
-libgcc-ng=13.2.0=h807b86a_5
-libgdal=3.8.4=h9323651_0
-libgfortran-ng=13.2.0=h69a702a_5
-libgfortran5=13.2.0=ha4646dd_5
-libglib=2.78.4=h783c2da_0
-libgomp=13.2.0=h807b86a_5
-libgoogle-cloud=2.12.0=hef10d8f_5
-libgrpc=1.60.1=h74775cd_0
-libiconv=1.17=hd590300_2
-libjpeg-turbo=3.0.0=hd590300_1
-libkml=1.3.0=h01aab08_1018
-liblapack=3.9.0=21_linux64_openblas
-libllvm15=15.0.7=hb3ce162_4
-libmamba=1.5.6=had39da4_0
-libmambapy=1.5.6=py311hf2555c7_0
-libnetcdf=4.9.2=nompi_h9612171_113
-libnghttp2=1.58.0=h47da74e_1
-libnl=3.9.0=hd590300_0
-libnsl=2.0.1=hd590300_0
-libnuma=2.0.16=h0b41bf4_1
-libopenblas=0.3.26=pthreads_h413a1c8_0
-libparquet=15.0.0=h352af49_4_cpu
-libpng=1.6.42=h2797004_0
-libpq=16.2=h33b98f1_0
-libprotobuf=4.25.1=hf27288f_2
-libre2-11=2023.06.02=h7a70373_0
-librttopo=1.1.0=h8917695_15
-libsodium=1.0.18=h36c2ea0_1
-libsolv=0.7.28=hfc55251_0
-libspatialindex=1.9.3=h9c3ff4c_4
-libspatialite=5.1.0=h7bd4643_4
-libsqlite=3.45.1=h2797004_0
-libssh2=1.11.0=h0841786_0
-libstdcxx-ng=13.2.0=h7e041cc_5
-libthrift=0.19.0=hb90f79a_1
-libtiff=4.6.0=ha9c0a0a_2
-libutf8proc=2.8.0=h166bdaf_0
-libuuid=2.38.1=h0b41bf4_0
-libuv=1.46.0=hd590300_0
-libwebp-base=1.3.2=hd590300_0
-libxcb=1.15=h0b41bf4_0
-libxcrypt=4.4.36=hd590300_1
-libxml2=2.12.5=h232c23b_0
-libzip=1.10.1=h2629f0a_3
-libzlib=1.2.13=hd590300_5
-linkify-it-py=2.0.3=pyhd8ed1ab_0
-locket=1.0.0=pyhd8ed1ab_0
-lz4=4.3.3=py311h38e4bf4_0
-lz4-c=1.9.4=hcb278e6_0
-lzo=2.10=h516909a_1000
-make=4.3=hd18ef5c_1
-mako=1.3.2=pyhd8ed1ab_0
-mamba=1.5.6=py311h3072747_0
-mapboxgl=0.10.2=py_1
-mapclassify=2.6.1=pyhd8ed1ab_0
-markdown=3.5.2=pyhd8ed1ab_0
-markdown-it-py=3.0.0=pyhd8ed1ab_0
-markupsafe=2.1.5=py311h459d7ec_0
-matplotlib-base=3.8.3=py311h54ef318_0
-matplotlib-inline=0.1.6=pyhd8ed1ab_0
-mdit-py-plugins=0.4.0=pyhd8ed1ab_0
-mdurl=0.1.2=pyhd8ed1ab_0
-menuinst=2.0.2=py311h38be061_0
-minizip=4.0.4=h0ab5242_0
-mistune=3.0.2=pyhd8ed1ab_0
-msgpack-python=1.0.7=py311h9547e67_0
-munkres=1.1.4=pyh9f0ad1d_0
-nbclassic=1.0.0=pyhb4ecaf3_1
-nbclient=0.8.0=pyhd8ed1ab_0
-nbconvert=7.16.0=pyhd8ed1ab_0
-nbconvert-core=7.16.0=pyhd8ed1ab_0
-nbconvert-pandoc=7.16.0=pyhd8ed1ab_0
-nbformat=5.9.2=pyhd8ed1ab_0
-ncurses=6.4=h59595ed_2
-nest-asyncio=1.6.0=pyhd8ed1ab_0
-netcdf4=1.6.5=nompi_py311he8ad708_100
-networkx=3.2.1=pyhd8ed1ab_0
-nodejs=20.9.0=hb753e55_0
-notebook=6.5.5=pyha770c72_0
-notebook-shim=0.2.3=pyhd8ed1ab_0
-nspr=4.35=h27087fc_0
-nss=3.98=h1d7d5a4_0
-numpy=1.26.4=py311h64a7726_0
-oauthlib=3.2.2=pyhd8ed1ab_0
-openjpeg=2.5.0=h488ebb8_3
-openssl=3.2.1=hd590300_0
-orc=1.9.2=h7829240_1
-overrides=7.7.0=pyhd8ed1ab_0
-packaging=23.2=pyhd8ed1ab_0
-pamela=1.1.0=pyh1a96a4e_0
-pandas=1.5.3=py311h2872171_1
-pandas-bokeh=0.5.5=pyhd8ed1ab_0
-pandoc=3.1.11.1=ha770c72_0
-pandocfilters=1.5.0=pyhd8ed1ab_0
-panel=1.3.8=pyhd8ed1ab_0
-param=2.0.2=pyhca7485f_0
-parso=0.8.3=pyhd8ed1ab_0
-partd=1.4.1=pyhd8ed1ab_0
-pcre2=10.42=hcad00b1_0
-pexpect=4.9.0=pyhd8ed1ab_0
-pickleshare=0.7.5=py_1003
-pillow=10.2.0=py311ha6c5da5_0
-pip=24.0=pyhd8ed1ab_0
-pixman=0.43.2=h59595ed_0
-pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1
-platformdirs=4.2.0=pyhd8ed1ab_0
-plotly=5.19.0=pyhd8ed1ab_0
-pluggy=1.4.0=pyhd8ed1ab_0
-poppler=24.02.0=h590f24d_0
-poppler-data=0.4.12=hd8ed1ab_0
-postgresql=16.2=h7387d8b_0
-proj=9.3.1=h1d62c97_0
-prometheus_client=0.19.0=pyhd8ed1ab_0
-prompt-toolkit=3.0.42=pyha770c72_0
-prompt_toolkit=3.0.42=hd8ed1ab_0
-psutil=5.9.8=py311h459d7ec_0
-pthread-stubs=0.4=h36c2ea0_1001
-ptyprocess=0.7.0=pyhd3deb0d_0
-pure_eval=0.2.2=pyhd8ed1ab_0
-pyarrow=15.0.0=py311h39c9aba_4_cpu
-pyarrow-hotfix=0.6=pyhd8ed1ab_0
-pybind11-abi=4=hd8ed1ab_3
-pycosat=0.6.6=py311h459d7ec_0
-pycparser=2.21=pyhd8ed1ab_0
-pyct=0.5.0=pyhd8ed1ab_0
-pycurl=7.45.1=py311hae980a4_3
-pygments=2.17.2=pyhd8ed1ab_0
-pyjwt=2.8.0=pyhd8ed1ab_1
-pyopenssl=24.0.0=pyhd8ed1ab_0
-pyparsing=3.1.1=pyhd8ed1ab_0
-pyproj=3.6.1=py311hca0b8b9_5
-pysocks=1.7.1=pyha2e5f31_6
-pyspnego=0.9.1=py311h459d7ec_2
-python=3.11.7=hab00c5b_1_cpython
-python-dateutil=2.8.2=pyhd8ed1ab_0
-python-fastjsonschema=2.19.1=pyhd8ed1ab_0
-python-json-logger=2.0.7=pyhd8ed1ab_0
-python_abi=3.11=4_cp311
-pytz=2024.1=pyhd8ed1ab_0
-pyviz_comms=3.0.0=pyhd8ed1ab_0
-pyyaml=6.0.1=py311h459d7ec_1
-pyzmq=24.0.1=py311ha4b6469_1
-qtconsole-base=5.5.1=pyha770c72_0
-qtpy=2.4.1=pyhd8ed1ab_0
-rdma-core=50.0=hd3aeb46_1
-re2=2023.06.02=h2873b5e_0
-readline=8.2=h8228510_1
-referencing=0.33.0=pyhd8ed1ab_0
-reproc=14.2.4.post0=hd590300_1
-reproc-cpp=14.2.4.post0=h59595ed_1
-requests=2.31.0=pyhd8ed1ab_0
-requests-kerberos=0.14.0=pyhd8ed1ab_1
-rfc3339-validator=0.1.4=pyhd8ed1ab_0
-rfc3986-validator=0.1.1=pyh9f0ad1d_0
-rpds-py=0.17.1=py311h46250e7_0
-rtree=1.2.0=py311h3bb2b0f_0
-ruamel.yaml=0.18.6=py311h459d7ec_0
-ruamel.yaml.clib=0.2.8=py311h459d7ec_0
-s2n=1.4.3=h06160fa_0
-scikit-learn=1.4.1.post1=py311hc009520_0
-scipy=1.12.0=py311h64a7726_2
-send2trash=1.8.2=pyh41d4057_0
-setuptools=69.0.3=pyhd8ed1ab_0
-shapely=2.0.3=py311h2032efe_0
-six=1.16.0=pyh6c4a22f_0
-snappy=1.1.10=h9fff704_0
-sniffio=1.3.0=pyhd8ed1ab_0
-sortedcontainers=2.4.0=pyhd8ed1ab_0
-soupsieve=2.5=pyhd8ed1ab_1
-sparkmagic=0.21.0=pyhd8ed1ab_1
-sqlalchemy=2.0.26=py311h459d7ec_0
-sqlite=3.45.1=h2c6b66d_0
-stack_data=0.6.2=pyhd8ed1ab_0
-tblib=3.0.0=pyhd8ed1ab_0
-tenacity=8.2.3=pyhd8ed1ab_0
-terminado=0.18.0=pyh0d859eb_0
-threadpoolctl=3.3.0=pyhc1e730c_0
-tiledb=2.20.0=h4386cac_0
-tinycss2=1.2.1=pyhd8ed1ab_0
-tk=8.6.13=noxft_h4845f30_101
-tomli=2.0.1=pyhd8ed1ab_0
-toolz=0.12.1=pyhd8ed1ab_0
-tornado=6.3.3=py311h459d7ec_1
-tqdm=4.66.2=pyhd8ed1ab_0
-traitlets=5.9.0=pyhd8ed1ab_0
-truststore=0.8.0=pyhd8ed1ab_0
-types-python-dateutil=2.8.19.20240106=pyhd8ed1ab_0
-typing-extensions=4.9.0=hd8ed1ab_0
-typing_extensions=4.9.0=pyha770c72_0
-typing_utils=0.1.0=pyhd8ed1ab_0
-tzcode=2024a=h3f72095_0
-tzdata=2024a=h0c530f3_0
-uc-micro-py=1.0.3=pyhd8ed1ab_0
-ucx=1.15.0=h75e419f_3
-uri-template=1.3.0=pyhd8ed1ab_0
-uriparser=0.9.7=hcb278e6_1
-urllib3=2.2.0=pyhd8ed1ab_0
-wcwidth=0.2.13=pyhd8ed1ab_0
-webcolors=1.13=pyhd8ed1ab_0
-webencodings=0.5.1=pyhd8ed1ab_2
-websocket-client=1.7.0=pyhd8ed1ab_0
-wheel=0.42.0=pyhd8ed1ab_0
-widgetsnbextension=4.0.10=pyhd8ed1ab_0
-xarray=2024.2.0=pyhd8ed1ab_0
-xerces-c=3.2.5=hac6953d_0
-xorg-kbproto=1.0.7=h7f98852_1002
-xorg-libice=1.1.1=hd590300_0
-xorg-libsm=1.2.4=h7391055_0
-xorg-libx11=1.8.7=h8ee46fc_0
-xorg-libxau=1.0.11=hd590300_0
-xorg-libxdmcp=1.1.3=h7f98852_0
-xorg-libxext=1.3.4=h0b41bf4_2
-xorg-libxrender=0.9.11=hd590300_0
-xorg-renderproto=0.11.1=h7f98852_1002
-xorg-xextproto=7.3.0=h0b41bf4_1003
-xorg-xproto=7.0.31=h7f98852_1007
-xyzservices=2023.10.1=pyhd8ed1ab_0
-xz=5.2.6=h166bdaf_0
-yaml=0.2.5=h7f98852_2
-yaml-cpp=0.8.0=h59595ed_0
-zeromq=4.3.5=h59595ed_0
-zict=3.0.0=pyhd8ed1ab_0
-zipp=3.17.0=pyhd8ed1ab_0
-zlib=1.2.13=hd590300_5
-zstandard=0.22.0=py311haa97af0_0
-zstd=1.5.5=hfc55251_0
diff --git a/images/demo/jupyter/requirements/linux/arm64/env.yaml b/images/demo/jupyter/requirements/linux/arm64/env.yaml
new file mode 100644
index 0000000000..1b0f42d63d
--- /dev/null
+++ b/images/demo/jupyter/requirements/linux/arm64/env.yaml
@@ -0,0 +1,274 @@
+channels:
+- conda-forge
+dependencies:
+- _openmp_mutex=4.5
+- alembic=1.14.0
+- annotated-types=0.7.0
+- anyio=4.7.0
+- archspec=0.2.3
+- argon2-cffi=23.1.0
+- argon2-cffi-bindings=21.2.0
+- arrow=1.3.0
+- asttokens=3.0.0
+- async-lru=2.0.4
+- async_generator=1.10
+- attrs=24.2.0
+- babel=2.16.0
+- beautifulsoup4=4.12.3
+- bleach=6.2.0
+- blinker=1.9.0
+- boltons=24.0.0
+- brotli-python=1.1.0
+- bzip2=1.0.8
+- c-ares=1.34.3
+- ca-certificates=2024.8.30
+- cached-property=1.5.2
+- cached_property=1.5.2
+- certifi=2024.8.30
+- certipy=0.2.1
+- cffi=1.17.1
+- charset-normalizer=3.4.0
+- colorama=0.4.6
+- comm=0.2.2
+- conda=24.11.0
+- conda-libmamba-solver=24.11.1
+- conda-package-handling=2.4.0
+- conda-package-streaming=0.11.0
+- cpp-expected=1.1.0
+- cryptography=44.0.0
+- debugpy=1.8.9
+- decorator=5.1.1
+- defusedxml=0.7.1
+- distro=1.9.0
+- entrypoints=0.4
+- exceptiongroup=1.2.2
+- executing=2.1.0
+- fmt=11.0.2
+- fqdn=1.5.1
+- frozendict=2.4.6
+- greenlet=3.1.1
+- h11=0.14.0
+- h2=4.1.0
+- hpack=4.0.0
+- httpcore=1.0.7
+- httpx=0.28.1
+- hyperframe=6.0.1
+- icu=75.1
+- idna=3.10
+- importlib-metadata=8.5.0
+- importlib_resources=6.4.5
+- ipykernel=6.29.5
+- ipython=8.30.0
+- ipython_genutils=0.2.0
+- isoduration=20.11.0
+- jedi=0.19.2
+- jinja2=3.1.4
+- json5=0.10.0
+- jsonpatch=1.33
+- jsonpointer=3.0.0
+- jsonschema=4.23.0
+- jsonschema-specifications=2024.10.1
+- jsonschema-with-format-nongpl=4.23.0
+- jupyter-lsp=2.2.5
+- jupyter_client=8.6.3
+- jupyter_core=5.7.2
+- jupyter_events=0.10.0
+- jupyter_server=2.14.2
+- jupyter_server_terminals=0.5.3
+- jupyterhub-base=5.2.1
+- jupyterhub-singleuser=5.2.1
+- jupyterlab=4.3.2
+- jupyterlab_pygments=0.3.0
+- jupyterlab_server=2.27.3
+- keyutils=1.6.1
+- krb5=1.21.3
+- ld_impl_linux-aarch64=2.43
+- libarchive=3.7.7
+- libcurl=8.10.1
+- libedit=3.1.20191231
+- libev=4.33
+- libexpat=2.6.4
+- libffi=3.4.2
+- libgcc=14.2.0
+- libgcc-ng=14.2.0
+- libgomp=14.2.0
+- libiconv=1.17
+- liblzma=5.6.3
+- libmamba=2.0.4
+- libmambapy=2.0.4
+- libnghttp2=1.64.0
+- libnsl=2.0.1
+- libsodium=1.0.20
+- libsolv=0.7.30
+- libsqlite=3.47.0
+- libssh2=1.11.1
+- libstdcxx=14.2.0
+- libstdcxx-ng=14.2.0
+- libuuid=2.38.1
+- libxcrypt=4.4.36
+- libxml2=2.13.5
+- libzlib=1.3.1
+- lz4-c=1.10.0
+- lzo=2.10
+- make=4.4.1
+- mako=1.3.8
+- mamba=2.0.4
+- markupsafe=3.0.2
+- matplotlib-inline=0.1.7
+- menuinst=2.2.0
+- mistune=3.0.2
+- nbclassic=1.1.0
+- nbclient=0.10.1
+- nbconvert-core=7.16.4
+- nbformat=5.10.4
+- ncurses=6.5
+- nest-asyncio=1.6.0
+- nlohmann_json=3.11.3
+- notebook=7.3.1
+- notebook-shim=0.2.4
+- oauthlib=3.2.2
+- openssl=3.4.0
+- overrides=7.7.0
+- packaging=24.2
+- pamela=1.2.0
+- pandocfilters=1.5.0
+- parso=0.8.4
+- pexpect=4.9.0
+- pickleshare=0.7.5
+- pip=24.3.1
+- pkgutil-resolve-name=1.3.10
+- platformdirs=4.3.6
+- pluggy=1.5.0
+- prometheus_client=0.21.1
+- prompt-toolkit=3.0.48
+- psutil=6.1.0
+- ptyprocess=0.7.0
+- pure_eval=0.2.3
+- pybind11-abi=4
+- pycosat=0.6.6
+- pycparser=2.22
+- pydantic=2.10.3
+- pydantic-core=2.27.1
+- pygments=2.18.0
+- pyjwt=2.10.1
+- pysocks=1.7.1
+- python=3.12.8
+- python-dateutil=2.9.0.post0
+- python-fastjsonschema=2.21.1
+- python-json-logger=2.0.7
+- python_abi=3.12
+- pytz=2024.2
+- pyyaml=6.0.2
+- pyzmq=26.2.0
+- readline=8.2
+- referencing=0.35.1
+- reproc=14.2.4.post0
+- reproc-cpp=14.2.4.post0
+- requests=2.32.3
+- rfc3339-validator=0.1.4
+- rfc3986-validator=0.1.1
+- rpds-py=0.22.3
+- ruamel.yaml=0.18.6
+- ruamel.yaml.clib=0.2.8
+- send2trash=1.8.3
+- setuptools=75.6.0
+- simdjson=3.10.1
+- six=1.17.0
+- sniffio=1.3.1
+- soupsieve=2.5
+- spdlog=1.14.1
+- sqlalchemy=2.0.36
+- stack_data=0.6.3
+- terminado=0.18.1
+- tinycss2=1.4.0
+- tk=8.6.13
+- tomli=2.2.1
+- tornado=6.4.2
+- tqdm=4.67.1
+- traitlets=5.14.3
+- truststore=0.10.0
+- types-python-dateutil=2.9.0.20241206
+- typing-extensions=4.12.2
+- typing_extensions=4.12.2
+- typing_utils=0.1.0
+- tzdata=2024b
+- uri-template=1.3.0
+- urllib3=2.2.3
+- wcwidth=0.2.13
+- webcolors=24.11.1
+- webencodings=0.5.1
+- websocket-client=1.8.0
+- wheel=0.45.1
+- yaml=0.2.5
+- yaml-cpp=0.8.0
+- zeromq=4.3.5
+- zipp=3.21.0
+- zstandard=0.23.0
+- zstd=1.5.6
+- pip:
+ - adbc-driver-flightsql==1.3.0
+ - adbc-driver-manager==1.3.0
+ - altair==5.5.0
+ - autovizwidget==0.22.0
+ - bokeh==3.6.2
+ - branca==0.8.1
+ - cftime==1.6.4.post1
+ - chroma-py==0.1.0.dev1
+ - click==8.1.8
+ - cloudpickle==3.1.0
+ - colorcet==3.1.0
+ - colour==0.1.5
+ - contourpy==1.3.1
+ - cycler==0.12.1
+ - dask==2024.12.1
+ - folium==0.19.2
+ - fonttools==4.55.3
+ - fsspec==2024.12.0
+ - geojson==3.2.0
+ - geopandas==1.0.1
+ - hdijupyterutils==0.22.0
+ - holoviews==1.20.0
+ - hvplot==0.11.2
+ - ipywidgets==8.1.5
+ - jupyter==1.1.1
+ - jupyter-console==6.6.3
+ - jupyterlab_widgets==3.0.13
+ - kamu==0.6.0
+ - kiwisolver==1.4.8
+ - linkify-it-py==2.0.3
+ - livy==0.8.0
+ - locket==1.0.0
+ - mapboxgl==0.10.2
+ - Markdown==3.7
+ - markdown-it-py==3.0.0
+ - matplotlib==3.10.0
+ - mdit-py-plugins==0.4.2
+ - mdurl==0.1.2
+ - narwhals==1.19.1
+ - netCDF4==1.7.2
+ - numpy==2.2.1
+ - pandas==2.2.3
+ - pandas-bokeh==0.5.5
+ - panel==1.5.5
+ - param==2.2.0
+ - partd==1.4.2
+ - pillow==11.0.0
+ - plotly==5.24.1
+ - pyarrow==18.1.0
+ - pyogrio==0.10.0
+ - pyparsing==3.2.0
+ - pyproj==3.7.0
+ - pyviz_comms==3.0.3
+ - setuptools==75.6.0
+ - shapely==2.0.6
+ - tenacity==9.0.0
+ - toolz==1.0.0
+ - tzdata==2024.2
+ - uc-micro-py==1.0.3
+ - wheel==0.45.1
+ - widgetsnbextension==4.0.13
+ - xarray==2024.11.0
+ - xyzservices==2024.9.0
+ - zstandard==0.23.0
+name: base
+prefix: /opt/conda
diff --git a/images/demo/jupyter/requirements/linux/arm64/requirements.in b/images/demo/jupyter/requirements/linux/arm64/requirements.in
index d7b27f8f59..68140ad718 100644
--- a/images/demo/jupyter/requirements/linux/arm64/requirements.in
+++ b/images/demo/jupyter/requirements/linux/arm64/requirements.in
@@ -1,21 +1,16 @@
-# TODO: Pinned due to sparkmagic installation issue
-# See: https://github.com/jupyter-incubator/sparkmagic/issues/825
-# See workaround applied in: https://github.com/jupyter-incubator/sparkmagic/blob/master/Dockerfile.jupyter
-notebook==6.5.5
+kamu[jupyter-autoviz,jupyter-sql,spark]
-sparkmagic
-
-pandas
+dask
geopandas
geojson
-xarray
netcdf4
-dask
+pandas
+xarray
+altair
bokeh
-hvplot
-pandas-bokeh
folium
-altair
+hvplot
mapboxgl
+pandas-bokeh
shapely
diff --git a/images/demo/jupyter/requirements/linux/arm64/requirements.txt b/images/demo/jupyter/requirements/linux/arm64/requirements.txt
deleted file mode 100644
index 832732c7db..0000000000
--- a/images/demo/jupyter/requirements/linux/arm64/requirements.txt
+++ /dev/null
@@ -1,411 +0,0 @@
-# This file may be used to create an environment using:
-# $ conda create --name --file
-# platform: linux-aarch64
-_openmp_mutex=4.5=2_gnu
-alembic=1.13.1=pyhd8ed1ab_1
-altair=5.2.0=pyhd8ed1ab_0
-anyio=4.2.0=pyhd8ed1ab_0
-archspec=0.2.2=pyhd8ed1ab_0
-argon2-cffi=23.1.0=pyhd8ed1ab_0
-argon2-cffi-bindings=21.2.0=py311hcd402e7_4
-arrow=1.3.0=pyhd8ed1ab_0
-asttokens=2.4.1=pyhd8ed1ab_0
-async-lru=2.0.4=pyhd8ed1ab_0
-async_generator=1.10=py_0
-attrs=23.2.0=pyh71513ae_0
-autovizwidget=0.21.0=pyh1a96a4e_1
-aws-c-auth=0.7.16=h570bf23_5
-aws-c-cal=0.6.10=h967b9ec_1
-aws-c-common=0.9.13=h31becfc_0
-aws-c-compression=0.2.18=h00d1b86_1
-aws-c-event-stream=0.4.2=h10e8a16_3
-aws-c-http=0.8.1=hf0788a4_4
-aws-c-io=0.14.4=h87c19fb_2
-aws-c-mqtt=0.10.2=he8e29e5_3
-aws-c-s3=0.5.1=h71a96cc_6
-aws-c-sdkutils=0.1.15=h00d1b86_1
-aws-checksums=0.1.18=h00d1b86_1
-aws-crt-cpp=0.26.2=h8568a09_5
-aws-sdk-cpp=1.11.267=hfce6cab_1
-azure-core-cpp=1.10.3=hcd87347_1
-azure-storage-blobs-cpp=12.10.0=h2a328a1_0
-azure-storage-common-cpp=12.5.0=hee0c750_2
-babel=2.14.0=pyhd8ed1ab_0
-beautifulsoup4=4.12.3=pyha770c72_0
-bleach=6.1.0=pyhd8ed1ab_0
-blinker=1.7.0=pyhd8ed1ab_0
-blosc=1.21.5=h2f3a684_0
-bokeh=3.3.4=pyhd8ed1ab_0
-boltons=23.1.1=pyhd8ed1ab_0
-branca=0.7.1=pyhd8ed1ab_0
-brotli=1.1.0=h31becfc_1
-brotli-bin=1.1.0=h31becfc_1
-brotli-python=1.1.0=py311h8715677_1
-bzip2=1.0.8=h31becfc_5
-c-ares=1.26.0=h31becfc_0
-ca-certificates=2024.2.2=hcefe29a_0
-cached-property=1.5.2=hd8ed1ab_1
-cached_property=1.5.2=pyha770c72_1
-cairo=1.18.0=ha13f110_0
-certifi=2024.2.2=pyhd8ed1ab_0
-certipy=0.1.3=py_0
-cffi=1.16.0=py311h7963103_0
-cfitsio=4.3.1=hf28c5f1_0
-cftime=1.6.3=py311hf13da56_0
-charset-normalizer=3.3.2=pyhd8ed1ab_0
-chroma-py=0.1.0.dev1=py_0
-click=8.1.7=unix_pyh707e725_0
-click-plugins=1.1.1=py_0
-cligj=0.7.2=pyhd8ed1ab_1
-cloudpickle=3.0.0=pyhd8ed1ab_0
-colorama=0.4.6=pyhd8ed1ab_0
-colorcet=3.0.1=pyhd8ed1ab_0
-colour=0.1.5=pyhd8ed1ab_1
-comm=0.2.1=pyhd8ed1ab_0
-conda=23.11.0=py311hec3470c_1
-conda-libmamba-solver=24.1.0=pyhd8ed1ab_0
-conda-package-handling=2.2.0=pyh38be061_0
-conda-package-streaming=0.9.0=pyhd8ed1ab_0
-configurable-http-proxy=4.6.1=h4e45a9e_0
-contourpy=1.2.0=py311h098ece5_0
-cryptography=42.0.2=py311h2245af3_0
-cycler=0.12.1=pyhd8ed1ab_0
-cytoolz=0.12.3=py311hc8f2f60_0
-dask=2024.2.0=pyhd8ed1ab_0
-dask-core=2024.2.0=pyhd8ed1ab_0
-debugpy=1.8.1=py311h8715677_0
-decorator=5.1.1=pyhd8ed1ab_0
-defusedxml=0.7.1=pyhd8ed1ab_0
-distributed=2024.2.0=pyhd8ed1ab_0
-distro=1.9.0=pyhd8ed1ab_0
-entrypoints=0.4=pyhd8ed1ab_0
-exceptiongroup=1.2.0=pyhd8ed1ab_2
-executing=2.0.1=pyhd8ed1ab_0
-expat=2.5.0=hd600fc2_1
-fiona=1.9.5=py311he15760a_3
-fmt=10.2.1=h2a328a1_0
-folium=0.15.1=pyhd8ed1ab_0
-font-ttf-dejavu-sans-mono=2.37=hab24e00_0
-font-ttf-inconsolata=3.000=h77eed37_0
-font-ttf-source-code-pro=2.038=h77eed37_0
-font-ttf-ubuntu=0.83=h77eed37_1
-fontconfig=2.14.2=ha9a116f_0
-fonts-conda-ecosystem=1=0
-fonts-conda-forge=1=0
-fonttools=4.49.0=py311hcd402e7_0
-fqdn=1.5.1=pyhd8ed1ab_0
-freetype=2.12.1=hf0a5ef3_2
-freexl=2.0.0=h5428426_0
-fsspec=2024.2.0=pyhca7485f_0
-gdal=3.8.4=py311h3b5b607_0
-geojson=3.1.0=pyhd8ed1ab_0
-geopandas=0.14.3=pyhd8ed1ab_0
-geopandas-base=0.14.3=pyha770c72_0
-geos=3.12.1=h2f0025b_0
-geotiff=1.7.1=h3e58e51_15
-gettext=0.21.1=ha18d298_0
-gflags=2.2.2=h54f1f3f_1004
-giflib=5.2.1=hb4cce97_3
-glog=0.6.0=h8ab10f1_0
-greenlet=3.0.3=py311h8715677_0
-h11=0.14.0=pyhd8ed1ab_0
-h2=4.1.0=pyhd8ed1ab_0
-hdf4=4.2.15=hb6ba311_7
-hdf5=1.14.3=nompi_ha486f32_100
-hdijupyterutils=0.21.0=pyh1a96a4e_1
-holoviews=1.18.3=pyhd8ed1ab_0
-hpack=4.0.0=pyh9f0ad1d_0
-httpcore=1.0.2=pyhd8ed1ab_0
-httpx=0.26.0=pyhd8ed1ab_0
-hvplot=0.9.2=pyhd8ed1ab_0
-hyperframe=6.0.1=pyhd8ed1ab_0
-icu=73.2=h787c7f5_0
-idna=3.6=pyhd8ed1ab_0
-importlib-metadata=7.0.1=pyha770c72_0
-importlib_metadata=7.0.1=hd8ed1ab_0
-importlib_resources=6.1.1=pyhd8ed1ab_0
-ipykernel=6.29.2=pyhd33586a_0
-ipython=8.21.0=pyh707e725_0
-ipython_genutils=0.2.0=py_1
-ipywidgets=8.1.2=pyhd8ed1ab_0
-isoduration=20.11.0=pyhd8ed1ab_0
-jedi=0.19.1=pyhd8ed1ab_0
-jinja2=3.1.3=pyhd8ed1ab_0
-joblib=1.3.2=pyhd8ed1ab_0
-json-c=0.17=h9d1147b_0
-json5=0.9.14=pyhd8ed1ab_0
-jsonpatch=1.33=pyhd8ed1ab_0
-jsonpointer=2.4=py311hec3470c_3
-jsonschema=4.21.1=pyhd8ed1ab_0
-jsonschema-specifications=2023.12.1=pyhd8ed1ab_0
-jsonschema-with-format-nongpl=4.21.1=pyhd8ed1ab_0
-jupyter=1.0.0=pyhd8ed1ab_10
-jupyter-lsp=2.2.2=pyhd8ed1ab_0
-jupyter_client=7.4.9=pyhd8ed1ab_0
-jupyter_console=6.6.3=pyhd8ed1ab_0
-jupyter_core=5.7.1=py311hec3470c_0
-jupyter_events=0.9.0=pyhd8ed1ab_0
-jupyter_server=2.12.5=pyhd8ed1ab_0
-jupyter_server_terminals=0.5.2=pyhd8ed1ab_0
-jupyter_telemetry=0.1.0=pyhd8ed1ab_1
-jupyterhub=4.0.2=pyh31011fe_0
-jupyterhub-base=4.0.2=pyh31011fe_0
-jupyterlab=4.1.1=pyhd8ed1ab_0
-jupyterlab_pygments=0.3.0=pyhd8ed1ab_1
-jupyterlab_server=2.25.2=pyhd8ed1ab_0
-jupyterlab_widgets=3.0.10=pyhd8ed1ab_0
-kealib=1.5.3=h4670d8b_0
-keyutils=1.6.1=h4e544f5_0
-kiwisolver=1.4.5=py311h0d5d7b0_1
-krb5=1.21.2=hc419048_0
-lcms2=2.16=h922389a_0
-ld_impl_linux-aarch64=2.40=h2d8c526_0
-lerc=4.0.0=h4de3ea5_0
-libabseil=20230802.1=cxx17_h2f0025b_0
-libaec=1.1.2=h2f0025b_1
-libarchive=3.7.2=hd2f85e0_1
-libarrow=15.0.0=h606a0d5_4_cpu
-libarrow-acero=15.0.0=h2f0025b_4_cpu
-libarrow-dataset=15.0.0=h2f0025b_4_cpu
-libarrow-flight=15.0.0=he69d72d_4_cpu
-libarrow-flight-sql=15.0.0=h1fc705f_4_cpu
-libarrow-gandiva=15.0.0=h90362dd_4_cpu
-libarrow-substrait=15.0.0=h0599332_4_cpu
-libblas=3.9.0=21_linuxaarch64_openblas
-libboost-headers=1.84.0=h8af1aa0_1
-libbrotlicommon=1.1.0=h31becfc_1
-libbrotlidec=1.1.0=h31becfc_1
-libbrotlienc=1.1.0=h31becfc_1
-libcblas=3.9.0=21_linuxaarch64_openblas
-libcrc32c=1.1.2=h01db608_0
-libcurl=8.5.0=h4e8248e_0
-libdeflate=1.19=h31becfc_0
-libedit=3.1.20191231=he28a2e2_2
-libev=4.33=h31becfc_2
-libevent=2.1.12=h4ba1bb4_1
-libexpat=2.5.0=hd600fc2_1
-libffi=3.4.2=h3557bc0_5
-libgcc-ng=13.2.0=hf8544c7_5
-libgdal=3.8.4=h79c3f81_0
-libgfortran-ng=13.2.0=he9431aa_5
-libgfortran5=13.2.0=h582850c_5
-libglib=2.78.4=h311d5f7_0
-libgomp=13.2.0=hf8544c7_5
-libgoogle-cloud=2.12.0=h3b99733_5
-libgrpc=1.60.1=heeb7df3_0
-libiconv=1.17=h31becfc_2
-libjpeg-turbo=3.0.0=h31becfc_1
-libkml=1.3.0=h7d16752_1018
-liblapack=3.9.0=21_linuxaarch64_openblas
-libllvm15=15.0.7=hb4f23b0_4
-libmamba=1.5.6=hea3be6c_0
-libmambapy=1.5.6=py311h765b69a_0
-libnetcdf=4.9.2=nompi_h33102a8_113
-libnghttp2=1.58.0=hb0e430d_1
-libnsl=2.0.1=h31becfc_0
-libnuma=2.0.16=hb4cce97_1
-libopenblas=0.3.26=pthreads_h5a5ec62_0
-libparquet=15.0.0=hb18b541_4_cpu
-libpng=1.6.42=h194ca79_0
-libpq=16.2=h58720eb_0
-libprotobuf=4.25.1=h87e877f_2
-libre2-11=2023.06.02=hf48c5ca_0
-librttopo=1.1.0=hd8968fb_15
-libsodium=1.0.18=hb9de7d4_1
-libsolv=0.7.28=hd84c7bf_0
-libspatialindex=1.9.3=h01db608_4
-libspatialite=5.1.0=h896d346_4
-libsqlite=3.45.1=h194ca79_0
-libssh2=1.11.0=h492db2e_0
-libstdcxx-ng=13.2.0=h9a76618_5
-libthrift=0.19.0=h043aeee_1
-libtiff=4.6.0=h1708d11_2
-libutf8proc=2.8.0=h4e544f5_0
-libuuid=2.38.1=hb4cce97_0
-libuv=1.46.0=h31becfc_0
-libwebp-base=1.3.2=h31becfc_0
-libxcb=1.15=h2a766a3_0
-libxcrypt=4.4.36=h31becfc_1
-libxml2=2.12.5=h3091e33_0
-libzip=1.10.1=h4156a30_3
-libzlib=1.2.13=h31becfc_5
-linkify-it-py=2.0.3=pyhd8ed1ab_0
-locket=1.0.0=pyhd8ed1ab_0
-lz4=4.3.3=py311h6a4b261_0
-lz4-c=1.9.4=hd600fc2_0
-lzo=2.10=h516909a_1000
-make=4.3=h309ac5b_1
-mako=1.3.2=pyhd8ed1ab_0
-mamba=1.5.6=py311hb6c5aa6_0
-mapboxgl=0.10.2=py_1
-mapclassify=2.6.1=pyhd8ed1ab_0
-markdown=3.5.2=pyhd8ed1ab_0
-markdown-it-py=3.0.0=pyhd8ed1ab_0
-markupsafe=2.1.5=py311hc8f2f60_0
-matplotlib-base=3.8.3=py311h1f11223_0
-matplotlib-inline=0.1.6=pyhd8ed1ab_0
-mdit-py-plugins=0.4.0=pyhd8ed1ab_0
-mdurl=0.1.2=pyhd8ed1ab_0
-menuinst=2.0.2=py311hec3470c_0
-minizip=4.0.4=hb75dd74_0
-mistune=3.0.2=pyhd8ed1ab_0
-msgpack-python=1.0.7=py311h0d5d7b0_0
-munkres=1.1.4=pyh9f0ad1d_0
-nbclassic=1.0.0=pyhb4ecaf3_1
-nbclient=0.8.0=pyhd8ed1ab_0
-nbconvert=7.16.0=pyhd8ed1ab_0
-nbconvert-core=7.16.0=pyhd8ed1ab_0
-nbconvert-pandoc=7.16.0=pyhd8ed1ab_0
-nbformat=5.9.2=pyhd8ed1ab_0
-ncurses=6.4=h0425590_2
-nest-asyncio=1.6.0=pyhd8ed1ab_0
-netcdf4=1.6.5=nompi_py311hcd50196_100
-networkx=3.2.1=pyhd8ed1ab_0
-nodejs=20.9.0=hc1f8a26_0
-notebook=6.5.5=pyha770c72_0
-notebook-shim=0.2.3=pyhd8ed1ab_0
-nspr=4.35=h4de3ea5_0
-nss=3.98=hc5a5cc2_0
-numpy=1.26.4=py311h69ead2a_0
-oauthlib=3.2.2=pyhd8ed1ab_0
-openjpeg=2.5.0=h0d9d63b_3
-openssl=3.2.1=h31becfc_0
-orc=1.9.2=h5960ff3_1
-overrides=7.7.0=pyhd8ed1ab_0
-packaging=23.2=pyhd8ed1ab_0
-pamela=1.1.0=pyh1a96a4e_0
-pandas=1.5.3=py311hff2c139_1
-pandas-bokeh=0.5.5=pyhd8ed1ab_0
-pandoc=3.1.11.1=h8af1aa0_0
-pandocfilters=1.5.0=pyhd8ed1ab_0
-panel=1.3.8=pyhd8ed1ab_0
-param=2.0.2=pyhca7485f_0
-parso=0.8.3=pyhd8ed1ab_0
-partd=1.4.1=pyhd8ed1ab_0
-pcre2=10.42=hd0f9c67_0
-pexpect=4.9.0=pyhd8ed1ab_0
-pickleshare=0.7.5=py_1003
-pillow=10.2.0=py311hbcc2232_0
-pip=24.0=pyhd8ed1ab_0
-pixman=0.43.2=h2f0025b_0
-pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1
-platformdirs=4.2.0=pyhd8ed1ab_0
-plotly=5.19.0=pyhd8ed1ab_0
-pluggy=1.4.0=pyhd8ed1ab_0
-poppler=24.02.0=h3cd87ed_0
-poppler-data=0.4.12=hd8ed1ab_0
-postgresql=16.2=he703394_0
-proj=9.3.1=h7b42f86_0
-prometheus_client=0.19.0=pyhd8ed1ab_0
-prompt-toolkit=3.0.42=pyha770c72_0
-prompt_toolkit=3.0.42=hd8ed1ab_0
-psutil=5.9.8=py311hcd402e7_0
-pthread-stubs=0.4=hb9de7d4_1001
-ptyprocess=0.7.0=pyhd3deb0d_0
-pure_eval=0.2.2=pyhd8ed1ab_0
-pyarrow=15.0.0=py311h1eb6f34_4_cpu
-pyarrow-hotfix=0.6=pyhd8ed1ab_0
-pybind11-abi=4=hd8ed1ab_3
-pycosat=0.6.6=py311hcd402e7_0
-pycparser=2.21=pyhd8ed1ab_0
-pyct=0.5.0=pyhd8ed1ab_0
-pycurl=7.45.1=py311h4769251_3
-pygments=2.17.2=pyhd8ed1ab_0
-pyjwt=2.8.0=pyhd8ed1ab_1
-pyopenssl=24.0.0=pyhd8ed1ab_0
-pyparsing=3.1.1=pyhd8ed1ab_0
-pyproj=3.6.1=py311ha6273e5_5
-pysocks=1.7.1=pyha2e5f31_6
-pyspnego=0.9.1=py311hcd402e7_2
-python=3.11.7=h43d1f9e_1_cpython
-python-dateutil=2.8.2=pyhd8ed1ab_0
-python-fastjsonschema=2.19.1=pyhd8ed1ab_0
-python-json-logger=2.0.7=pyhd8ed1ab_0
-python_abi=3.11=4_cp311
-pytz=2024.1=pyhd8ed1ab_0
-pyviz_comms=3.0.0=pyhd8ed1ab_0
-pyyaml=6.0.1=py311hcd402e7_1
-pyzmq=24.0.1=py311h22a2215_1
-qtconsole-base=5.5.1=pyha770c72_0
-qtpy=2.4.1=pyhd8ed1ab_0
-re2=2023.06.02=h887e66c_0
-readline=8.2=h8fc344f_1
-referencing=0.33.0=pyhd8ed1ab_0
-reproc=14.2.4.post0=h31becfc_1
-reproc-cpp=14.2.4.post0=h2f0025b_1
-requests=2.31.0=pyhd8ed1ab_0
-requests-kerberos=0.14.0=pyhd8ed1ab_1
-rfc3339-validator=0.1.4=pyhd8ed1ab_0
-rfc3986-validator=0.1.1=pyh9f0ad1d_0
-rpds-py=0.17.1=py311h32437ce_0
-rtree=1.2.0=py311h04fbf56_0
-ruamel.yaml=0.18.6=py311hcd402e7_0
-ruamel.yaml.clib=0.2.8=py311hcd402e7_0
-s2n=1.4.4=h5a25046_0
-scikit-learn=1.4.1.post1=py311hb93614b_0
-scipy=1.12.0=py311h69ead2a_2
-send2trash=1.8.2=pyh41d4057_0
-setuptools=69.0.3=pyhd8ed1ab_0
-shapely=2.0.3=py311hbbe59c9_0
-six=1.16.0=pyh6c4a22f_0
-snappy=1.1.10=he8610fa_0
-sniffio=1.3.0=pyhd8ed1ab_0
-sortedcontainers=2.4.0=pyhd8ed1ab_0
-soupsieve=2.5=pyhd8ed1ab_1
-sparkmagic=0.21.0=pyhd8ed1ab_1
-sqlalchemy=2.0.26=py311hc8f2f60_0
-sqlite=3.45.1=h3b3482f_0
-stack_data=0.6.2=pyhd8ed1ab_0
-tblib=3.0.0=pyhd8ed1ab_0
-tenacity=8.2.3=pyhd8ed1ab_0
-terminado=0.18.0=pyh0d859eb_0
-threadpoolctl=3.3.0=pyhc1e730c_0
-tiledb=2.20.0=hf61e980_0
-tinycss2=1.2.1=pyhd8ed1ab_0
-tk=8.6.13=h194ca79_0
-tomli=2.0.1=pyhd8ed1ab_0
-toolz=0.12.1=pyhd8ed1ab_0
-tornado=6.3.3=py311hc8f2f60_1
-tqdm=4.66.2=pyhd8ed1ab_0
-traitlets=5.9.0=pyhd8ed1ab_0
-truststore=0.8.0=pyhd8ed1ab_0
-types-python-dateutil=2.8.19.20240106=pyhd8ed1ab_0
-typing-extensions=4.9.0=hd8ed1ab_0
-typing_extensions=4.9.0=pyha770c72_0
-typing_utils=0.1.0=pyhd8ed1ab_0
-tzcode=2024a=h31becfc_0
-tzdata=2024a=h0c530f3_0
-uc-micro-py=1.0.3=pyhd8ed1ab_0
-ucx=1.15.0=hedb98eb_3
-uri-template=1.3.0=pyhd8ed1ab_0
-uriparser=0.9.7=hd600fc2_1
-urllib3=2.2.0=pyhd8ed1ab_0
-wcwidth=0.2.13=pyhd8ed1ab_0
-webcolors=1.13=pyhd8ed1ab_0
-webencodings=0.5.1=pyhd8ed1ab_2
-websocket-client=1.7.0=pyhd8ed1ab_0
-wheel=0.42.0=pyhd8ed1ab_0
-widgetsnbextension=4.0.10=pyhd8ed1ab_0
-xarray=2024.2.0=pyhd8ed1ab_0
-xerces-c=3.2.5=hf13c1fb_0
-xorg-kbproto=1.0.7=h3557bc0_1002
-xorg-libice=1.1.1=h7935292_0
-xorg-libsm=1.2.4=h5a01bc2_0
-xorg-libx11=1.8.7=h055a233_0
-xorg-libxau=1.0.11=h31becfc_0
-xorg-libxdmcp=1.1.3=h3557bc0_0
-xorg-libxext=1.3.4=h2a766a3_2
-xorg-libxrender=0.9.11=h7935292_0
-xorg-renderproto=0.11.1=h3557bc0_1002
-xorg-xextproto=7.3.0=h2a766a3_1003
-xorg-xproto=7.0.31=h3557bc0_1007
-xyzservices=2023.10.1=pyhd8ed1ab_0
-xz=5.2.6=h9cdd2b7_0
-yaml=0.2.5=hf897c2e_2
-yaml-cpp=0.8.0=h2f0025b_0
-zeromq=4.3.5=h2f0025b_0
-zict=3.0.0=pyhd8ed1ab_0
-zipp=3.17.0=pyhd8ed1ab_0
-zlib=1.2.13=h31becfc_5
-zstandard=0.22.0=py311hb827a26_0
-zstd=1.5.5=h4c53e97_0
diff --git a/images/demo/jupyter/sparkmagic.json b/images/demo/jupyter/sparkmagic.json
deleted file mode 100644
index c1849ef986..0000000000
--- a/images/demo/jupyter/sparkmagic.json
+++ /dev/null
@@ -1,70 +0,0 @@
-{
- "kernel_python_credentials": {
- "username": "",
- "password": "",
- "url": "http://localhost:8998",
- "auth": "None"
- },
- "kernel_scala_credentials": {
- "username": "",
- "password": "",
- "url": "http://localhost:8998",
- "auth": "None"
- },
- "kernel_r_credentials": {
- "username": "",
- "password": "",
- "url": "http://localhost:8998"
- },
- "logging_config": {
- "version": 1,
- "formatters": {
- "magicsFormatter": {
- "format": "%(asctime)s\t%(levelname)s\t%(message)s",
- "datefmt": ""
- }
- },
- "handlers": {
- "magicsHandler": {
- "class": "hdijupyterutils.filehandler.MagicsFileHandler",
- "formatter": "magicsFormatter",
- "home_path": "~/.sparkmagic"
- }
- },
- "loggers": {
- "magicsLogger": {
- "handlers": [
- "magicsHandler"
- ],
- "level": "DEBUG",
- "propagate": 0
- }
- }
- },
- "wait_for_idle_timeout_seconds": 15,
- "livy_session_startup_timeout_seconds": 60,
- "fatal_error_suggestion": "The code failed because of a fatal error:\n\t{}.\n\nSome things to try:\na) Make sure Spark has enough available resources for Jupyter to create a Spark context.\nb) Contact your Jupyter administrator to make sure the Spark magics library is configured correctly.\nc) Restart the kernel.",
- "ignore_ssl_errors": false,
- "session_configs": {
- "driverMemory": "1000M",
- "executorCores": 2
- },
- "use_auto_viz": true,
- "coerce_dataframe": true,
- "default_maxrows": 1000000,
- "pyspark_dataframe_encoding": "utf-8",
- "heartbeat_refresh_seconds": 30,
- "livy_server_heartbeat_timeout_seconds": 0,
- "heartbeat_retry_seconds": 10,
- "server_extension_default_kernel_name": "pysparkkernel",
- "custom_headers": {},
- "retry_policy": "configurable",
- "retry_seconds_to_sleep_list": [
- 0.2,
- 0.5,
- 1,
- 3,
- 5
- ],
- "configurable_retry_policy_max_retries": 8
-}
\ No newline at end of file
diff --git a/images/demo/user-home/01 - Kamu Basics (COVID-19 example)/01 - Introduction.ipynb b/images/demo/user-home/01 - Kamu Basics (COVID-19 example)/01 - Introduction.ipynb
index 66516c71e4..79a943335d 100644
--- a/images/demo/user-home/01 - Kamu Basics (COVID-19 example)/01 - Introduction.ipynb
+++ b/images/demo/user-home/01 - Kamu Basics (COVID-19 example)/01 - Introduction.ipynb
@@ -64,8 +64,9 @@
"\n",
"New to Jupyter?
\n",
"\n",
- "* Go back to the Jupyter's main tab that shows the list of files \n",
- "* In the top right corner click New -> Terminal\n",
+ "* Open the File menu at the top of the window\n",
+ "* Select New -> Terminal\n",
+ "* This will open a terminal in a new browser tab\n",
"* Now you can switch between the terminal tab and this lesson as you continue\n",
"\n",
" \n",
@@ -308,7 +309,7 @@
"source": [
"## Analyzing Data\n",
"\n",
- "Getting raw data in is just a small first step on our journey towards collaboration on data, but before we continue, let's take a quick break and see how you can analyze the data that we already have.\n",
+ "Getting raw data in is just a small first step on our journey towards collaboration on data, but before we continue, let's take a quick break and see what we can do with data we already have.\n",
"\n",
"### SQL Shell\n",
"\n",
@@ -347,13 +348,24 @@
"\n",
"### Notebooks\n",
"\n",
- "When you install `kamu` on your computer you can use `kamu notebook` command to start an integrated Jupyter \n",
- "Notebook environment, identical to the one you are currently using.\n",
+ "When you install Kamu CLI on your computer you can use `kamu notebook` command to start an integrated Jupyter \n",
+ "Notebook environment identical to the one you are currently using.\n",
"\n",
"Since we're already in the notebook environment - let's give this integration a try!\n",
"\n",
"\n",
- "Start by loading kamu
Jupyter extension:\n",
+ "Start by creating a connection to kamu
SQL server:\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "New to Jupyter?
\n",
+ "\n",
+ "Jupyter notebooks contain cells that are **executable**, so static text can me mixed with computations and data visualization.\n",
+ "\n",
+ "**You** are in control of what runs when, so you'll need to **select the code cell below** and then click the **\"Run\"** button on the top panel, or press `Shift + Enter`.\n",
+ "\n",
+ " \n",
"
"
]
},
@@ -363,28 +375,49 @@
"metadata": {},
"outputs": [],
"source": [
- "%load_ext kamu"
+ "import kamu\n",
+ "\n",
+ "con = kamu.connect(\"file://\")\n",
+ "print(\"Connected to kamu via\", con)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "\n",
- "\n",
- "New to Jupyter?
\n",
+ "Using `kamu` Python library we can connect to any remote kamu node by providing a URL.\n",
"\n",
- "Jupyter notebooks contain cells that are **executable**, so static text can me mixed with computations and data visualization.\n",
- "\n",
- "**You** are in control of what runs when, so you'll need to **select the code cell above** and then click the **\"Run\"** button on the top panel, or press `Shift + Enter`.\n",
+ "When URL is a local path - `kamu` library will automatically start an SQL server for that local workspace and connect to it. Super convenient!\n",
"\n",
- " \n",
- "
\n",
- "\n",
- "We can now import the dataset we have in our workspace into this notebook environment. We can also give it a less verbose alias.\n",
+ "We can now send SQL requests using `query(sql)` method. The result will be returned as Pandas DataFrame:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "con.query(\"select 1\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "con.query(\"select * from 'covid19.british-columbia.case-details' limit 3\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Writing `con.query(...)` many times can get old fast, but `kamu` Jupyter extension can help with that.\n",
"\n",
"\n",
- "Run the below to import the dataset (may take 15 or so seconds first time):\n",
+ "Load kamu
Jupyter extension:\n",
"
"
]
},
@@ -394,7 +427,14 @@
"metadata": {},
"outputs": [],
"source": [
- "%import_dataset covid19.british-columbia.case-details --alias cases_bc"
+ "%load_ext kamu"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The extension provides a very convenient `%%sql` cell magic:"
]
},
{
@@ -412,30 +452,25 @@
"metadata": {},
"outputs": [],
"source": [
- "cases_bc.printSchema()\n",
- "cases_bc.count()"
+ "%%sql\n",
+ "select count(*) from 'covid19.british-columbia.case-details'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "describe 'covid19.british-columbia.case-details'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "\n",
- "\n",
- "What did we just run?
\n",
- "\n",
- "The code you type into a regular cell is executed by [PySpark](https://spark.apache.org/docs/latest/api/python/) server that `kamu` runs when you are working with notebooks.\n",
- "\n",
- "So it's a Python code, but it is **executed remotely**, not in the notebook kernel. We will discuss benefits of this later.\n",
- "\n",
- " \n",
- "
\n",
- "\n",
- "You can use the `%%sql` cell command to run SQL queries on the imported datasets.\n",
- "\n",
- "\n",
- "To see a sample of data run:\n",
- "
"
+ "To see a sample of data run:"
]
},
{
@@ -445,26 +480,19 @@
"outputs": [],
"source": [
"%%sql\n",
- "select * from cases_bc \n",
+ "select\n",
+ " *\n",
+ "from 'covid19.british-columbia.case-details'\n",
"order by reported_date desc\n",
- "limit 5"
+ "limit 3"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "\n",
- "\n",
- "What did we just run?
\n",
- "\n",
- "Similarly to the PySpark code, the queries in `%%sql` cells are sent to and executed by the Spark SQL engine. The results are then returned back to the notebook kernel.\n",
- "\n",
- " \n",
- "
\n",
- "\n",
"\n",
- "Let's run this simple SQL query to build a histogram of cases by the age group:\n",
+ "Run this simple SQL query to count number of cases per age group:\n",
"
"
]
},
@@ -478,7 +506,7 @@
"select\n",
" age_group,\n",
" count(*) as case_count \n",
- "from cases_bc\n",
+ "from 'covid19.british-columbia.case-details'\n",
"group by age_group"
]
},
@@ -486,17 +514,23 @@
"cell_type": "markdown",
"metadata": {},
"source": [
+ "The `kamu` extension also provides a convenient auto-viz widget that you can use to quickly plot data in a data frame.\n",
+ "\n",
"\n",
" \n",
- "Once you get the results, try using the built-in data visualizer to plot the data as a **bar chart**\n",
+ "Once you get the results, try switching results view from \"Table\" to \"Bar\" tab and build a histogram.\n",
"\n",
"
\n",
"\n",
- "SQL is great for shaping and aggregating data, but for more advanced processing or visualizations you might need more tools. Using `-o ` parameter of the `%%sql` command we can ask for the result of a query to be returned into the notebook as **Pandas dataframe**.\n",
+ "Using `kamu` with Jupyter lets you offload complex computations to a selection of powerful SQL engines. It avoids having to download all data (which often may not fit into memory) into the notebook - instead you can shape and aggregate data on the SQL engine side and only download often much smaller results for the final visualization.\n",
+ "\n",
+ "Using `-o ` parameter of the `%%sql` cell magic we can save the result into a variable.\n",
+ "\n",
+ "When you expect a lot of data and don't want to display a table you can also use `-q` or `--quiet` flag.\n",
"\n",
"\n",
"\n",
- "Let's count the number of cases per day and pull the result from Spark into our notebook:\n",
+ "Let's count the number of cases per day and pull the result from SQL engine into our notebook:\n",
" \n",
"
"
]
@@ -507,11 +541,11 @@
"metadata": {},
"outputs": [],
"source": [
- "%%sql -o df\n",
+ "%%sql -o df -q\n",
"select\n",
" reported_date as date,\n",
" count(*) as case_count\n",
- "from cases_bc\n",
+ "from 'covid19.british-columbia.case-details'\n",
"group by date\n",
"order by date"
]
@@ -522,18 +556,12 @@
"source": [
"We now have a variable `df` containing the data as Pandas dataframe, and you are free to do with it anything you'd normally do in Jupyter.\n",
"\n",
- "\n",
- "\n",
- "Note that if you just type `df` in a cell - you will get an error. That's because by default this kernel executes operations in the remote PySpark environment. To access `df` you need to use `%%local` cell command which will execute code in this local Python kernel.\n",
- " \n",
- "
\n",
- "\n",
"This environment already comes with some popular plotting libraries pre-installed (like `plotly`, `bokeh`, `mapbox`, etc.), but if your favorite library is missing - you can always `pip install` it from the terminal.\n",
"\n",
"\n",
- " \n",
+ "\n",
"Let's do some basic plotting:\n",
- " \n",
+ "\n",
"
"
]
},
@@ -543,7 +571,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"import plotly.express as px\n",
"\n",
"fig = px.scatter(\n",
@@ -569,19 +596,21 @@
],
"metadata": {
"kernelspec": {
- "display_name": "PySpark",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "pysparkkernel"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
- "name": "python",
+ "name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
}
},
"nbformat": 4,
diff --git a/images/demo/user-home/01 - Kamu Basics (COVID-19 example)/02 - Collaboration.ipynb b/images/demo/user-home/01 - Kamu Basics (COVID-19 example)/02 - Collaboration.ipynb
index 413414ea7e..29e60f8f59 100644
--- a/images/demo/user-home/01 - Kamu Basics (COVID-19 example)/02 - Collaboration.ipynb
+++ b/images/demo/user-home/01 - Kamu Basics (COVID-19 example)/02 - Collaboration.ipynb
@@ -517,19 +517,21 @@
],
"metadata": {
"kernelspec": {
- "display_name": "PySpark",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "pysparkkernel"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
- "name": "python",
+ "name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
}
},
"nbformat": 4,
diff --git a/images/demo/user-home/01 - Kamu Basics (COVID-19 example)/03 - Trust.ipynb b/images/demo/user-home/01 - Kamu Basics (COVID-19 example)/03 - Trust.ipynb
index 362c194317..2d4aa14b2c 100644
--- a/images/demo/user-home/01 - Kamu Basics (COVID-19 example)/03 - Trust.ipynb
+++ b/images/demo/user-home/01 - Kamu Basics (COVID-19 example)/03 - Trust.ipynb
@@ -257,19 +257,21 @@
],
"metadata": {
"kernelspec": {
- "display_name": "PySpark",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "pysparkkernel"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
- "name": "python",
+ "name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
}
},
"nbformat": 4,
diff --git a/images/demo/user-home/02 - Web3 Data (Ethereum trading example)/01 - Working with Web3 data.ipynb b/images/demo/user-home/02 - Web3 Data (Ethereum trading example)/01 - Working with Web3 data.ipynb
index 6406cb3a24..4875c21afc 100644
--- a/images/demo/user-home/02 - Web3 Data (Ethereum trading example)/01 - Working with Web3 data.ipynb
+++ b/images/demo/user-home/02 - Web3 Data (Ethereum trading example)/01 - Working with Web3 data.ipynb
@@ -326,35 +326,17 @@
"Jupyter notebook you're using now runs either on our demo server (https://demo.kamu.dev) or can be launched with `kamu notebook` command in your own workspace when you have the tool installed.\n",
" \n",
"To start working with data:\n",
- "- First run `%load_ext kamu` to load our extension\n",
- "- Then use `%import_dataset dataset_name` to import datasets from your workspace\n",
+ "- Import `kamu` Python library\n",
+ "- Create a connection to the node\n",
+ "- Using `file://` as a URL will start and connec to a local SQL server\n",
+ "- (Optionally) Load Jupyter extension to enable `%%sql` cell magic\n",
"\n",
- "Above commands will start the Apache Spark SQL server in the background and connect to it.\n",
- " \n",
- "By default all code cells execute in PySpark environment, which is most of the time not what we want.\n",
- " \n",
- "Instead we use `%%sql` cells to run SQL queries in Spark. It's a great way to explore and shape your data.\n",
- " \n",
- "You can download the result of any SQL query into the notebook's Python process using `%%sql -o pandas_dataframe_variable -n records_limit`.\n",
- " \n",
- "You can then use `%%local` cells to execute Python code inside the notebook to further process or visualize the data.\n",
+ "The `%%sql` cells will execute queries in `kamu`'s powerful SQL engines and return the results as Pandas dataframe.\n",
" \n",
"\n",
""
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%local\n",
- "import pandas as pd\n",
- "import hvplot.pandas\n",
- "pd.set_option('max_colwidth', None)"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -362,7 +344,10 @@
"outputs": [],
"source": [
"%load_ext kamu\n",
- "%import_dataset net.rocketpool.reth.mint-burn"
+ "import kamu\n",
+ "\n",
+ "con = kamu.connect(\"file://\")\n",
+ "print(\"Connected to kamu via\", con)"
]
},
{
@@ -372,7 +357,7 @@
"outputs": [],
"source": [
"%%sql\n",
- "select * from `net.rocketpool.reth.mint-burn` limit 5"
+ "select * from 'net.rocketpool.reth.mint-burn' limit 3"
]
},
{
@@ -382,17 +367,17 @@
"outputs": [],
"source": [
"%%sql -o reth_pool -q\n",
+ "--## The -o option above downloads the SQL query result into the notebook as Pandas dataframe\n",
+ "--## The -q flag skips displaying the data\n",
"\n",
- "--## The -o option above downloads the SQL query result\n",
- "--## into the local notebook as Pandas dataframe\n",
"select \n",
" event_time, \n",
" case \n",
- " when event_name = \"TokensMinted\" then \"Mint\"\n",
- " when event_name = \"TokensBurned\" then \"Burn\"\n",
+ " when event_name = 'TokensMinted' then 'Mint'\n",
+ " when event_name = 'TokensBurned' then 'Burn'\n",
" end as event_name, \n",
" avg(eth_amount / amount) as rate\n",
- "from `net.rocketpool.reth.mint-burn` \n",
+ "from \"net.rocketpool.reth.mint-burn\"\n",
"group by event_time, event_name\n",
"order by 1"
]
@@ -403,7 +388,10 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
+ "import pandas as pd\n",
+ "import hvplot.pandas\n",
+ "pd.set_option('max_colwidth', None)\n",
+ "\n",
"reth_pool.hvplot.step(\n",
" x=\"event_time\", \n",
" by=\"event_name\", \n",
@@ -448,15 +436,6 @@
""
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset com.cryptocompare.ohlcv.eth-usd"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -464,9 +443,9 @@
"outputs": [],
"source": [
"%%sql\n",
- "select * from `com.cryptocompare.ohlcv.eth-usd` \n",
+ "select * from \"com.cryptocompare.ohlcv.eth-usd\"\n",
"order by event_time desc \n",
- "limit 5"
+ "limit 3"
]
},
{
@@ -476,7 +455,12 @@
"outputs": [],
"source": [
"%%sql -o eth2usd -q\n",
- "select event_time, open, close from `com.cryptocompare.ohlcv.eth-usd` order by event_time"
+ "select\n",
+ " event_time,\n",
+ " open,\n",
+ " close\n",
+ "from \"com.cryptocompare.ohlcv.eth-usd\"\n",
+ "order by event_time"
]
},
{
@@ -485,7 +469,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"eth2usd.hvplot.line(\n",
" x=\"event_time\",\n",
" y=\"close\",\n",
@@ -553,15 +536,6 @@
""
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset account.tokens.transfers"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -569,9 +543,9 @@
"outputs": [],
"source": [
"%%sql\n",
- "select * from `account.tokens.transfers` \n",
+ "select * from \"account.tokens.transfers\"\n",
"order by block_number desc\n",
- "limit 5"
+ "limit 3"
]
},
{
@@ -582,9 +556,9 @@
"source": [
"%%sql\n",
"select\n",
- " token_name as `Token`, \n",
- " sum(abs(value) / pow(10, token_decimal)) as `Volume Traded` \n",
- "from `account.tokens.transfers`\n",
+ " token_name as 'Token', \n",
+ " sum(abs(cast(value as double)) / pow(10, cast(token_decimal as int))) as 'Volume Traded'\n",
+ "from \"account.tokens.transfers\"\n",
"group by 1"
]
},
@@ -605,15 +579,6 @@
"This is why we need the `account.transactions` dataset that contains all account transactions along with their `ETH` value."
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset account.transactions"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -622,9 +587,9 @@
"source": [
"%%sql\n",
"select *\n",
- "from `account.transactions` \n",
+ "from \"account.transactions\"\n",
"order by block_number desc\n",
- "limit 5"
+ "limit 3"
]
},
{
@@ -636,8 +601,8 @@
"%%sql -o transactions -q\n",
"select\n",
" *, \n",
- " value / pow(10, 18) as value_eth \n",
- "from `account.transactions` \n",
+ " cast(value as double) / pow(10, 18) as value_eth \n",
+ "from \"account.transactions\"\n",
"order by block_number desc"
]
},
@@ -647,8 +612,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
- "transactions\n",
"transactions.hvplot.scatter(\n",
" x=\"block_time\",\n",
" y=\"value_eth\",\n",
@@ -737,15 +700,6 @@
"In the next chapter we will explore why stream processing model is such a big deal."
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset account.tokens.portfolio"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -753,7 +707,7 @@
"outputs": [],
"source": [
"%%sql -o portfolio -q\n",
- "select * from `account.tokens.portfolio` "
+ "select * from \"account.tokens.portfolio\""
]
},
{
@@ -762,7 +716,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"portfolio[\n",
" portfolio.token_symbol == \"rETH\"\n",
"].hvplot.scatter(\n",
@@ -779,7 +732,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"r = portfolio[\n",
" portfolio.token_symbol == \"rETH\"\n",
"]\n",
@@ -957,15 +909,6 @@
"```"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset account.tokens.portfolio.market-value"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -973,7 +916,7 @@
"outputs": [],
"source": [
"%%sql -o market_value -q\n",
- "select * from `account.tokens.portfolio.market-value` "
+ "select * from \"account.tokens.portfolio.market-value\""
]
},
{
@@ -982,7 +925,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"market_value.hvplot.line(\n",
" x=\"event_time\", \n",
" y=[\"token_book_value_eth\", \"token_market_value_eth\"],\n",
@@ -1001,7 +943,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"market_value.hvplot.line(\n",
" x=\"event_time\",\n",
" y=[\"token_book_value_eth_as_usd\", \"token_market_value_usd\"],\n",
@@ -1054,19 +995,21 @@
],
"metadata": {
"kernelspec": {
- "display_name": "PySpark",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "pysparkkernel"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
- "name": "python",
+ "name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
}
},
"nbformat": 4,
diff --git a/images/demo/user-home/02 - Web3 Data (Ethereum trading example)/02 - Watermarks.ipynb b/images/demo/user-home/02 - Web3 Data (Ethereum trading example)/02 - Watermarks.ipynb
index 74a1167578..25c868d13c 100644
--- a/images/demo/user-home/02 - Web3 Data (Ethereum trading example)/02 - Watermarks.ipynb
+++ b/images/demo/user-home/02 - Web3 Data (Ethereum trading example)/02 - Watermarks.ipynb
@@ -66,18 +66,6 @@
"And just to confirm:"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%local\n",
- "import pandas as pd\n",
- "import hvplot.pandas\n",
- "pd.set_option('max_colwidth', None)"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -85,9 +73,8 @@
"outputs": [],
"source": [
"%load_ext kamu\n",
- "%import_dataset net.rocketpool.reth.mint-burn\n",
- "%import_dataset account.tokens.portfolio.market-value\n",
- "%import_dataset account.tokens.portfolio"
+ "import kamu\n",
+ "con = kamu.connect(\"file://\")"
]
},
{
@@ -99,7 +86,7 @@
"%%sql\n",
"select \n",
" event_time, eth_amount, amount\n",
- "from `net.rocketpool.reth.mint-burn`\n",
+ "from \"net.rocketpool.reth.mint-burn\"\n",
"order by 1 desc\n",
"limit 1"
]
@@ -124,7 +111,7 @@
"%%sql\n",
"select\n",
" event_time, token_symbol, token_balance, token_market_value_eth, token_market_value_usd\n",
- "from `account.tokens.portfolio.market-value` \n",
+ "from \"account.tokens.portfolio.market-value\"\n",
"order by event_time desc\n",
"limit 1"
]
@@ -179,8 +166,8 @@
"--## so let's filter out all other types\n",
"reth_portfolio as (\n",
" select * \n",
- " from `account.tokens.portfolio`\n",
- " where token_symbol = \"rETH\"\n",
+ " from \"account.tokens.portfolio\"\n",
+ " where token_symbol = 'rETH'\n",
"),\n",
"\n",
"--## Join every exchange rate data point\n",
@@ -194,7 +181,7 @@
" pf.token_symbol,\n",
" pf.token_balance,\n",
" reth.eth_amount / reth.amount * pf.token_balance as token_market_value_eth\n",
- " from `net.rocketpool.reth.mint-burn` as reth\n",
+ " from \"net.rocketpool.reth.mint-burn\" as reth\n",
" join reth_portfolio as pf\n",
" on reth.event_time >= pf.block_time\n",
"),\n",
@@ -221,7 +208,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "There are multiple ways to perform a JOIN based on closest preceding timestamp, but all of them will give you the same result.\n",
+ "There are many ways to perform a JOIN based on closest preceding timestamp, but all of them will give you the same result.\n",
"\n",
"Let's compare this \"batch\" result to the \"streaming\" result we get from `kamu`:"
]
@@ -233,7 +220,7 @@
"outputs": [],
"source": [
"%%sql -o mv_streaming -q\n",
- "select * from `account.tokens.portfolio.market-value`"
+ "select * from \"account.tokens.portfolio.market-value\""
]
},
{
@@ -242,7 +229,10 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
+ "import pandas as pd\n",
+ "import hvplot.pandas\n",
+ "pd.set_option('max_colwidth', None)\n",
+ "\n",
"mv_batch.hvplot.line(\n",
" x=\"event_time\", \n",
" y=\"token_market_value_eth\",\n",
@@ -371,15 +361,6 @@
"Let's have a look now:"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset account.tokens.portfolio.market-value"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -387,7 +368,7 @@
"outputs": [],
"source": [
"%%sql -o mv_streaming -q\n",
- "select * from `account.tokens.portfolio.market-value`\n",
+ "select * from \"account.tokens.portfolio.market-value\"\n",
"order by event_time desc"
]
},
@@ -397,7 +378,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
"mv_batch.hvplot.line(\n",
" x=\"event_time\", \n",
" y=\"token_market_value_eth\",\n",
@@ -452,19 +432,21 @@
],
"metadata": {
"kernelspec": {
- "display_name": "PySpark",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "pysparkkernel"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
- "name": "python",
+ "name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
}
},
"nbformat": 4,
diff --git a/images/demo/user-home/02 - Web3 Data (Ethereum trading example)/03 - Alternative reality pipeline.ipynb b/images/demo/user-home/02 - Web3 Data (Ethereum trading example)/03 - Alternative reality pipeline.ipynb
index 47ebe82399..dd4d1a064d 100644
--- a/images/demo/user-home/02 - Web3 Data (Ethereum trading example)/03 - Alternative reality pipeline.ipynb
+++ b/images/demo/user-home/02 - Web3 Data (Ethereum trading example)/03 - Alternative reality pipeline.ipynb
@@ -84,21 +84,15 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
+ "%load_ext kamu\n",
+ "import kamu\n",
"import pandas as pd\n",
"import hvplot.pandas\n",
"import holoviews as hv\n",
- "pd.set_option('max_colwidth', None)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%load_ext kamu\n",
- "%import_dataset co.alphavantage.tickers.daily.spy"
+ "\n",
+ "pd.set_option('max_colwidth', None)\n",
+ "\n",
+ "con = kamu.connect(\"file://\")"
]
},
{
@@ -110,7 +104,7 @@
"%%sql\n",
"select \n",
" event_time, close\n",
- "from `co.alphavantage.tickers.daily.spy`\n",
+ "from \"co.alphavantage.tickers.daily.spy\"\n",
"where event_time > '2010-01-01'\n",
"\n",
"--## Switch to \"Area\" viz type to view results"
@@ -164,17 +158,6 @@
"Drum roll... Here comes the exciting part - let's compare the two investments!"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%import_dataset account.tokens.portfolio.usd\n",
- "%import_dataset account.tokens.portfolio.market-value\n",
- "%import_dataset account.whatif.reth-vs-snp500.market-value"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -182,8 +165,8 @@
"outputs": [],
"source": [
"%%sql -o portfolio -q\n",
- "select * from `account.tokens.portfolio.usd`\n",
- "where token_symbol = \"rETH\""
+ "select * from \"account.tokens.portfolio.usd\"\n",
+ "where token_symbol = 'rETH'"
]
},
{
@@ -193,7 +176,7 @@
"outputs": [],
"source": [
"%%sql -o market_value -q\n",
- "select * from `account.tokens.portfolio.market-value`"
+ "select * from \"account.tokens.portfolio.market-value\""
]
},
{
@@ -203,7 +186,7 @@
"outputs": [],
"source": [
"%%sql -o alternative_market_value -q\n",
- "select * from `account.whatif.reth-vs-snp500.market-value`"
+ "select * from \"account.whatif.reth-vs-snp500.market-value\""
]
},
{
@@ -212,8 +195,6 @@
"metadata": {},
"outputs": [],
"source": [
- "%%local\n",
- "\n",
"max_height = max(\n",
" alternative_market_value[\"alt_spy_market_value_usd\"].max(),\n",
" market_value[\"token_market_value_usd\"].max(),\n",
@@ -294,19 +275,21 @@
],
"metadata": {
"kernelspec": {
- "display_name": "PySpark",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "pysparkkernel"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
- "name": "python",
+ "name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
- "name": "pyspark",
- "pygments_lexer": "python3"
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
}
},
"nbformat": 4,
diff --git a/images/jupyter/Dockerfile b/images/jupyter/Dockerfile
index 1db823667d..661127342f 100644
--- a/images/jupyter/Dockerfile
+++ b/images/jupyter/Dockerfile
@@ -1,7 +1,7 @@
# Base image info: https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html
# Base image tags: https://quay.io/repository/jupyter/minimal-notebook
# Customization is based on: https://github.com/jupyter-incubator/sparkmagic/blob/master/Dockerfile.jupyter
-FROM quay.io/jupyter/minimal-notebook:2024-02-13
+FROM quay.io/jupyter/minimal-notebook:2024-12-09
ARG TARGETPLATFORM
@@ -9,33 +9,24 @@ ARG TARGETPLATFORM
USER root
RUN apt update && \
- apt -y install curl wget gnupg unzip jq && \
+ apt -y install netcat-traditional curl wget gnupg unzip jq && \
apt-get clean && rm -rf /var/lib/apt/lists /var/cache/apt/archives
-COPY requirements/$TARGETPLATFORM/requirements.txt requirements.txt
+COPY requirements/$TARGETPLATFORM/env.yaml env.yaml
-# TODO: Semi-permanent hack for `mapboxgl` package being broken in conda-forge
-# See: https://github.com/kamu-data/kamu-cli/issues/533
-RUN mamba install -y --file requirements.txt && \
- mamba uninstall mapboxgl && pip install --no-cache-dir mapboxgl && \
- mamba clean --all -f -y && \
- rm requirements.txt && \
- fix-permissions "${CONDA_DIR}" && \
+RUN mamba env update -y -f env.yaml && \
+ mamba clean --all -f -y && \
+ rm env.yaml && \
+ fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
#########################################################################################
USER $NB_USER
-COPY kamu.py /opt/conda/lib/python3.11/site-packages/kamu.py
-COPY sparkmagic.json /home/$NB_USER/.sparkmagic/config.json
-
-RUN jupyter nbextension enable --py --sys-prefix widgetsnbextension
-RUN jupyter-kernelspec install --user $(pip show sparkmagic | grep Location | cut -d" " -f2)/sparkmagic/kernels/sparkkernel
-RUN jupyter-kernelspec install --user $(pip show sparkmagic | grep Location | cut -d" " -f2)/sparkmagic/kernels/pysparkkernel
-RUN jupyter-kernelspec install --user $(pip show sparkmagic | grep Location | cut -d" " -f2)/sparkmagic/kernels/sparkrkernel
-RUN jupyter serverextension enable --py sparkmagic
+COPY overrides.json /opt/conda/share/jupyter/lab/settings/overrides.json
+RUN jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
#########################################################################################
USER root
@@ -43,6 +34,4 @@ RUN fix-permissions "/home/${NB_USER}"
USER $NB_USER
-# TODO: Remove show_banner option after Sparkmagic supports novebook >= 7.0.0
-# See: https://github.com/jupyter-incubator/sparkmagic/issues/885
-CMD ["jupyter", "notebook", "--ip", "0.0.0.0", "--port", "8080", "--NotebookApp.show_banner=False"]
+CMD ["jupyter", "lab", "--ip", "0.0.0.0", "--port", "8080", "--NotebookApp.iopub_data_rate_limit=1e10"]
diff --git a/images/jupyter/Makefile b/images/jupyter/Makefile
index 4a36ef57ad..488ddb1ceb 100644
--- a/images/jupyter/Makefile
+++ b/images/jupyter/Makefile
@@ -1,5 +1,5 @@
PLATFORM=linux/amd64
-BASE_IMAGE:=quay.io/jupyter/minimal-notebook:2024-02-13
+BASE_IMAGE:=quay.io/jupyter/minimal-notebook:2024-12-09
# Requires QEMU
@@ -23,7 +23,22 @@ requirements-platform:
# Executed from inside the base image
+#
+# The stupidity of Python package management ecosystems is unbelievabe. Jupyter images are
+# based on conda, but some packages we have are only installable by pip. We want to make
+# environment reproducible, but `conda env export` in `dependencies.pip` section includes only
+# **top-level** packages, ignoring all direct and transitive dependencies.
+#
+# To make environment fully reproducible we have to resort to:
+# - Run `conda env export` to lock conda packages (and part of pip packages)
+# - Strig partial `pip` packages from conda env
+# - Run `pip freeze` to lock pip packages
+# - Filter out conda packages from `pip freeze` output
+# - Merge the rest into `dependencies.pip` section of `conda env export`
.PHONY: requirements-install-freeze
requirements-install-freeze:
- mamba install -y --file requirements/$(PLATFORM)/requirements.in
- mamba list --export > requirements/$(PLATFORM)/requirements.txt
+ pip install -r requirements/$(PLATFORM)/requirements.in
+ pip freeze > requirements/$(PLATFORM)/requirements.txt
+ mamba env export --no-builds > requirements/$(PLATFORM)/env.yaml
+ python ./merge_requirements.py requirements/$(PLATFORM)/env.yaml requirements/$(PLATFORM)/requirements.txt
+ rm requirements/$(PLATFORM)/requirements.txt
diff --git a/images/jupyter/merge_requirements.py b/images/jupyter/merge_requirements.py
new file mode 100644
index 0000000000..6ff0b294e2
--- /dev/null
+++ b/images/jupyter/merge_requirements.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+
+import sys
+import yaml
+
+env_path = sys.argv[1]
+req_path = sys.argv[2]
+
+# Read files
+with open(env_path) as f:
+ env = yaml.safe_load(f)
+
+with open(req_path) as f:
+ reqs = [r.strip() for r in f.readlines()]
+
+# Filter out pip packages from `conda env export`
+env['dependencies'] = [
+ dep for dep in env['dependencies']
+ if not isinstance(dep, dict) or 'pip' not in dep
+]
+
+# Filter conda packages from `pip freeze` output
+reqs = [r for r in reqs if not '@ file://' in r]
+
+# Merge into environment
+env['dependencies'].append({'pip': reqs})
+
+# Replace env file
+with open(env_path, 'w') as f:
+ yaml.safe_dump(env, f)
diff --git a/images/jupyter/overrides.json b/images/jupyter/overrides.json
new file mode 100644
index 0000000000..36a8a9e7b5
--- /dev/null
+++ b/images/jupyter/overrides.json
@@ -0,0 +1,5 @@
+{
+ "@jupyterlab/apputils-extension:themes": {
+ "adaptive-theme": true
+ }
+}
\ No newline at end of file
diff --git a/images/jupyter/requirements/linux/amd64/env.yaml b/images/jupyter/requirements/linux/amd64/env.yaml
new file mode 100644
index 0000000000..42c02bd385
--- /dev/null
+++ b/images/jupyter/requirements/linux/amd64/env.yaml
@@ -0,0 +1,274 @@
+channels:
+- conda-forge
+dependencies:
+- _libgcc_mutex=0.1
+- _openmp_mutex=4.5
+- alembic=1.14.0
+- annotated-types=0.7.0
+- anyio=4.7.0
+- archspec=0.2.3
+- argon2-cffi=23.1.0
+- argon2-cffi-bindings=21.2.0
+- arrow=1.3.0
+- asttokens=3.0.0
+- async-lru=2.0.4
+- async_generator=1.10
+- attrs=24.2.0
+- babel=2.16.0
+- beautifulsoup4=4.12.3
+- bleach=6.2.0
+- blinker=1.9.0
+- boltons=24.0.0
+- brotli-python=1.1.0
+- bzip2=1.0.8
+- c-ares=1.34.3
+- ca-certificates=2024.8.30
+- cached-property=1.5.2
+- cached_property=1.5.2
+- certifi=2024.8.30
+- certipy=0.2.1
+- cffi=1.17.1
+- charset-normalizer=3.4.0
+- colorama=0.4.6
+- comm=0.2.2
+- conda=24.11.0
+- conda-libmamba-solver=24.11.1
+- conda-package-handling=2.4.0
+- conda-package-streaming=0.11.0
+- cpp-expected=1.1.0
+- cryptography=44.0.0
+- debugpy=1.8.9
+- decorator=5.1.1
+- defusedxml=0.7.1
+- distro=1.9.0
+- entrypoints=0.4
+- exceptiongroup=1.2.2
+- executing=2.1.0
+- fmt=11.0.2
+- fqdn=1.5.1
+- frozendict=2.4.6
+- greenlet=3.1.1
+- h11=0.14.0
+- h2=4.1.0
+- hpack=4.0.0
+- httpcore=1.0.7
+- httpx=0.28.1
+- hyperframe=6.0.1
+- idna=3.10
+- importlib-metadata=8.5.0
+- importlib_resources=6.4.5
+- ipykernel=6.29.5
+- ipython=8.30.0
+- ipython_genutils=0.2.0
+- isoduration=20.11.0
+- jedi=0.19.2
+- jinja2=3.1.4
+- json5=0.10.0
+- jsonpatch=1.33
+- jsonpointer=3.0.0
+- jsonschema=4.23.0
+- jsonschema-specifications=2024.10.1
+- jsonschema-with-format-nongpl=4.23.0
+- jupyter-lsp=2.2.5
+- jupyter_client=8.6.3
+- jupyter_core=5.7.2
+- jupyter_events=0.10.0
+- jupyter_server=2.14.2
+- jupyter_server_terminals=0.5.3
+- jupyterhub-base=5.2.1
+- jupyterhub-singleuser=5.2.1
+- jupyterlab=4.3.2
+- jupyterlab_pygments=0.3.0
+- jupyterlab_server=2.27.3
+- keyutils=1.6.1
+- krb5=1.21.3
+- ld_impl_linux-64=2.43
+- libarchive=3.7.7
+- libcurl=8.10.1
+- libedit=3.1.20191231
+- libev=4.33
+- libexpat=2.6.4
+- libffi=3.4.2
+- libgcc=14.2.0
+- libgcc-ng=14.2.0
+- libgomp=14.2.0
+- libiconv=1.17
+- liblzma=5.6.3
+- libmamba=2.0.4
+- libmambapy=2.0.4
+- libnghttp2=1.64.0
+- libnsl=2.0.1
+- libsodium=1.0.20
+- libsolv=0.7.30
+- libsqlite=3.47.0
+- libssh2=1.11.1
+- libstdcxx=14.2.0
+- libstdcxx-ng=14.2.0
+- libuuid=2.38.1
+- libxcrypt=4.4.36
+- libxml2=2.13.5
+- libzlib=1.3.1
+- lz4-c=1.10.0
+- lzo=2.10
+- make=4.4.1
+- mako=1.3.8
+- mamba=2.0.4
+- markupsafe=3.0.2
+- matplotlib-inline=0.1.7
+- menuinst=2.2.0
+- mistune=3.0.2
+- nbclassic=1.1.0
+- nbclient=0.10.1
+- nbconvert-core=7.16.4
+- nbformat=5.10.4
+- ncurses=6.5
+- nest-asyncio=1.6.0
+- nlohmann_json=3.11.3
+- notebook=7.3.1
+- notebook-shim=0.2.4
+- oauthlib=3.2.2
+- openssl=3.4.0
+- overrides=7.7.0
+- packaging=24.2
+- pamela=1.2.0
+- pandocfilters=1.5.0
+- parso=0.8.4
+- pexpect=4.9.0
+- pickleshare=0.7.5
+- pip=24.3.1
+- pkgutil-resolve-name=1.3.10
+- platformdirs=4.3.6
+- pluggy=1.5.0
+- prometheus_client=0.21.1
+- prompt-toolkit=3.0.48
+- psutil=6.1.0
+- ptyprocess=0.7.0
+- pure_eval=0.2.3
+- pybind11-abi=4
+- pycosat=0.6.6
+- pycparser=2.22
+- pydantic=2.10.3
+- pydantic-core=2.27.1
+- pygments=2.18.0
+- pyjwt=2.10.1
+- pysocks=1.7.1
+- python=3.12.8
+- python-dateutil=2.9.0.post0
+- python-fastjsonschema=2.21.1
+- python-json-logger=2.0.7
+- python_abi=3.12
+- pytz=2024.2
+- pyyaml=6.0.2
+- pyzmq=26.2.0
+- readline=8.2
+- referencing=0.35.1
+- reproc=14.2.5.post0
+- reproc-cpp=14.2.5.post0
+- requests=2.32.3
+- rfc3339-validator=0.1.4
+- rfc3986-validator=0.1.1
+- rpds-py=0.22.3
+- ruamel.yaml=0.18.6
+- ruamel.yaml.clib=0.2.8
+- send2trash=1.8.3
+- setuptools=75.6.0
+- simdjson=3.10.1
+- six=1.17.0
+- sniffio=1.3.1
+- soupsieve=2.5
+- spdlog=1.14.1
+- sqlalchemy=2.0.36
+- stack_data=0.6.3
+- terminado=0.18.1
+- tinycss2=1.4.0
+- tk=8.6.13
+- tomli=2.2.1
+- tornado=6.4.2
+- tqdm=4.67.1
+- traitlets=5.14.3
+- truststore=0.10.0
+- types-python-dateutil=2.9.0.20241206
+- typing-extensions=4.12.2
+- typing_extensions=4.12.2
+- typing_utils=0.1.0
+- tzdata=2024b
+- uri-template=1.3.0
+- urllib3=2.2.3
+- wcwidth=0.2.13
+- webcolors=24.11.1
+- webencodings=0.5.1
+- websocket-client=1.8.0
+- wheel=0.45.1
+- yaml=0.2.5
+- yaml-cpp=0.8.0
+- zeromq=4.3.5
+- zipp=3.21.0
+- zstandard=0.23.0
+- zstd=1.5.6
+- pip:
+ - adbc-driver-flightsql==1.3.0
+ - adbc-driver-manager==1.3.0
+ - altair==5.5.0
+ - autovizwidget==0.22.0
+ - bokeh==3.6.2
+ - branca==0.8.1
+ - cftime==1.6.4.post1
+ - chroma-py==0.1.0.dev1
+ - click==8.1.8
+ - cloudpickle==3.1.0
+ - colorcet==3.1.0
+ - colour==0.1.5
+ - contourpy==1.3.1
+ - cycler==0.12.1
+ - dask==2024.12.1
+ - folium==0.19.2
+ - fonttools==4.55.3
+ - fsspec==2024.12.0
+ - geojson==3.2.0
+ - geopandas==1.0.1
+ - hdijupyterutils==0.22.0
+ - holoviews==1.20.0
+ - hvplot==0.11.2
+ - ipywidgets==8.1.5
+ - jupyter==1.1.1
+ - jupyter-console==6.6.3
+ - jupyterlab_widgets==3.0.13
+ - kamu==0.6.0
+ - kiwisolver==1.4.8
+ - linkify-it-py==2.0.3
+ - livy==0.8.0
+ - locket==1.0.0
+ - mapboxgl==0.10.2
+ - Markdown==3.7
+ - markdown-it-py==3.0.0
+ - matplotlib==3.10.0
+ - mdit-py-plugins==0.4.2
+ - mdurl==0.1.2
+ - narwhals==1.19.1
+ - netCDF4==1.7.2
+ - numpy==2.2.1
+ - pandas==2.2.3
+ - pandas-bokeh==0.5.5
+ - panel==1.5.5
+ - param==2.2.0
+ - partd==1.4.2
+ - pillow==11.0.0
+ - plotly==5.24.1
+ - pyarrow==18.1.0
+ - pyogrio==0.10.0
+ - pyparsing==3.2.0
+ - pyproj==3.7.0
+ - pyviz_comms==3.0.3
+ - setuptools==75.6.0
+ - shapely==2.0.6
+ - tenacity==9.0.0
+ - toolz==1.0.0
+ - tzdata==2024.2
+ - uc-micro-py==1.0.3
+ - wheel==0.45.1
+ - widgetsnbextension==4.0.13
+ - xarray==2024.11.0
+ - xyzservices==2024.9.0
+ - zstandard==0.23.0
+name: base
+prefix: /opt/conda
diff --git a/images/jupyter/requirements/linux/amd64/requirements.in b/images/jupyter/requirements/linux/amd64/requirements.in
index d7b27f8f59..68140ad718 100644
--- a/images/jupyter/requirements/linux/amd64/requirements.in
+++ b/images/jupyter/requirements/linux/amd64/requirements.in
@@ -1,21 +1,16 @@
-# TODO: Pinned due to sparkmagic installation issue
-# See: https://github.com/jupyter-incubator/sparkmagic/issues/825
-# See workaround applied in: https://github.com/jupyter-incubator/sparkmagic/blob/master/Dockerfile.jupyter
-notebook==6.5.5
+kamu[jupyter-autoviz,jupyter-sql,spark]
-sparkmagic
-
-pandas
+dask
geopandas
geojson
-xarray
netcdf4
-dask
+pandas
+xarray
+altair
bokeh
-hvplot
-pandas-bokeh
folium
-altair
+hvplot
mapboxgl
+pandas-bokeh
shapely
diff --git a/images/jupyter/requirements/linux/amd64/requirements.txt b/images/jupyter/requirements/linux/amd64/requirements.txt
deleted file mode 100644
index c24a4e3dcf..0000000000
--- a/images/jupyter/requirements/linux/amd64/requirements.txt
+++ /dev/null
@@ -1,414 +0,0 @@
-# This file may be used to create an environment using:
-# $ conda create --name --file
-# platform: linux-64
-_libgcc_mutex=0.1=conda_forge
-_openmp_mutex=4.5=2_gnu
-alembic=1.13.1=pyhd8ed1ab_1
-altair=5.2.0=pyhd8ed1ab_0
-anyio=4.2.0=pyhd8ed1ab_0
-archspec=0.2.2=pyhd8ed1ab_0
-argon2-cffi=23.1.0=pyhd8ed1ab_0
-argon2-cffi-bindings=21.2.0=py311h459d7ec_4
-arrow=1.3.0=pyhd8ed1ab_0
-asttokens=2.4.1=pyhd8ed1ab_0
-async-lru=2.0.4=pyhd8ed1ab_0
-async_generator=1.10=py_0
-attrs=23.2.0=pyh71513ae_0
-autovizwidget=0.21.0=pyh1a96a4e_1
-aws-c-auth=0.7.16=h70caa3e_0
-aws-c-cal=0.6.9=h14ec70c_3
-aws-c-common=0.9.12=hd590300_0
-aws-c-compression=0.2.17=h572eabf_8
-aws-c-event-stream=0.4.2=h17cd1f3_0
-aws-c-http=0.8.0=hc6da83f_5
-aws-c-io=0.14.3=h3c8c088_1
-aws-c-mqtt=0.10.2=h0ef3971_0
-aws-c-s3=0.5.1=h2910485_1
-aws-c-sdkutils=0.1.14=h572eabf_0
-aws-checksums=0.1.17=h572eabf_7
-aws-crt-cpp=0.26.2=ha623a59_3
-aws-sdk-cpp=1.11.267=h0bb408c_0
-azure-core-cpp=1.10.3=h91d86a7_1
-azure-storage-blobs-cpp=12.10.0=h00ab1b0_0
-azure-storage-common-cpp=12.5.0=hb858b4b_2
-babel=2.14.0=pyhd8ed1ab_0
-beautifulsoup4=4.12.3=pyha770c72_0
-bleach=6.1.0=pyhd8ed1ab_0
-blinker=1.7.0=pyhd8ed1ab_0
-blosc=1.21.5=h0f2a231_0
-bokeh=3.3.4=pyhd8ed1ab_0
-boltons=23.1.1=pyhd8ed1ab_0
-branca=0.7.1=pyhd8ed1ab_0
-brotli=1.1.0=hd590300_1
-brotli-bin=1.1.0=hd590300_1
-brotli-python=1.1.0=py311hb755f60_1
-bzip2=1.0.8=hd590300_5
-c-ares=1.26.0=hd590300_0
-ca-certificates=2024.2.2=hbcca054_0
-cached-property=1.5.2=hd8ed1ab_1
-cached_property=1.5.2=pyha770c72_1
-cairo=1.18.0=h3faef2a_0
-certifi=2024.2.2=pyhd8ed1ab_0
-certipy=0.1.3=py_0
-cffi=1.16.0=py311hb3a22ac_0
-cfitsio=4.3.1=hbdc6101_0
-cftime=1.6.3=py311h1f0f07a_0
-charset-normalizer=3.3.2=pyhd8ed1ab_0
-chroma-py=0.1.0.dev1=py_0
-click=8.1.7=unix_pyh707e725_0
-click-plugins=1.1.1=py_0
-cligj=0.7.2=pyhd8ed1ab_1
-cloudpickle=3.0.0=pyhd8ed1ab_0
-colorama=0.4.6=pyhd8ed1ab_0
-colorcet=3.0.1=pyhd8ed1ab_0
-colour=0.1.5=pyhd8ed1ab_1
-comm=0.2.1=pyhd8ed1ab_0
-conda=23.11.0=py311h38be061_1
-conda-libmamba-solver=24.1.0=pyhd8ed1ab_0
-conda-package-handling=2.2.0=pyh38be061_0
-conda-package-streaming=0.9.0=pyhd8ed1ab_0
-configurable-http-proxy=4.6.1=h92b4e83_0
-contourpy=1.2.0=py311h9547e67_0
-cryptography=42.0.2=py311hcb13ee4_0
-cycler=0.12.1=pyhd8ed1ab_0
-cytoolz=0.12.3=py311h459d7ec_0
-dask=2024.2.0=pyhd8ed1ab_0
-dask-core=2024.2.0=pyhd8ed1ab_0
-debugpy=1.8.1=py311hb755f60_0
-decorator=5.1.1=pyhd8ed1ab_0
-defusedxml=0.7.1=pyhd8ed1ab_0
-distributed=2024.2.0=pyhd8ed1ab_0
-distro=1.9.0=pyhd8ed1ab_0
-entrypoints=0.4=pyhd8ed1ab_0
-exceptiongroup=1.2.0=pyhd8ed1ab_2
-executing=2.0.1=pyhd8ed1ab_0
-expat=2.5.0=hcb278e6_1
-fiona=1.9.5=py311hf8e0aa6_3
-fmt=10.2.1=h00ab1b0_0
-folium=0.15.1=pyhd8ed1ab_0
-font-ttf-dejavu-sans-mono=2.37=hab24e00_0
-font-ttf-inconsolata=3.000=h77eed37_0
-font-ttf-source-code-pro=2.038=h77eed37_0
-font-ttf-ubuntu=0.83=h77eed37_1
-fontconfig=2.14.2=h14ed4e7_0
-fonts-conda-ecosystem=1=0
-fonts-conda-forge=1=0
-fonttools=4.49.0=py311h459d7ec_0
-fqdn=1.5.1=pyhd8ed1ab_0
-freetype=2.12.1=h267a509_2
-freexl=2.0.0=h743c826_0
-fsspec=2024.2.0=pyhca7485f_0
-gdal=3.8.4=py311h8be719e_0
-geojson=3.1.0=pyhd8ed1ab_0
-geopandas=0.14.3=pyhd8ed1ab_0
-geopandas-base=0.14.3=pyha770c72_0
-geos=3.12.1=h59595ed_0
-geotiff=1.7.1=h6b2125f_15
-gettext=0.21.1=h27087fc_0
-gflags=2.2.2=he1b5a44_1004
-giflib=5.2.1=h0b41bf4_3
-glog=0.6.0=h6f12383_0
-greenlet=3.0.3=py311hb755f60_0
-h11=0.14.0=pyhd8ed1ab_0
-h2=4.1.0=pyhd8ed1ab_0
-hdf4=4.2.15=h2a13503_7
-hdf5=1.14.3=nompi_h4f84152_100
-hdijupyterutils=0.21.0=pyh1a96a4e_1
-holoviews=1.18.3=pyhd8ed1ab_0
-hpack=4.0.0=pyh9f0ad1d_0
-httpcore=1.0.2=pyhd8ed1ab_0
-httpx=0.26.0=pyhd8ed1ab_0
-hvplot=0.9.2=pyhd8ed1ab_0
-hyperframe=6.0.1=pyhd8ed1ab_0
-icu=73.2=h59595ed_0
-idna=3.6=pyhd8ed1ab_0
-importlib-metadata=7.0.1=pyha770c72_0
-importlib_metadata=7.0.1=hd8ed1ab_0
-importlib_resources=6.1.1=pyhd8ed1ab_0
-ipykernel=6.29.2=pyhd33586a_0
-ipython=8.21.0=pyh707e725_0
-ipython_genutils=0.2.0=py_1
-ipywidgets=8.1.2=pyhd8ed1ab_0
-isoduration=20.11.0=pyhd8ed1ab_0
-jedi=0.19.1=pyhd8ed1ab_0
-jinja2=3.1.3=pyhd8ed1ab_0
-joblib=1.3.2=pyhd8ed1ab_0
-json-c=0.17=h7ab15ed_0
-json5=0.9.14=pyhd8ed1ab_0
-jsonpatch=1.33=pyhd8ed1ab_0
-jsonpointer=2.4=py311h38be061_3
-jsonschema=4.21.1=pyhd8ed1ab_0
-jsonschema-specifications=2023.12.1=pyhd8ed1ab_0
-jsonschema-with-format-nongpl=4.21.1=pyhd8ed1ab_0
-jupyter=1.0.0=pyhd8ed1ab_10
-jupyter-lsp=2.2.2=pyhd8ed1ab_0
-jupyter_client=7.4.9=pyhd8ed1ab_0
-jupyter_console=6.6.3=pyhd8ed1ab_0
-jupyter_core=5.7.1=py311h38be061_0
-jupyter_events=0.9.0=pyhd8ed1ab_0
-jupyter_server=2.12.5=pyhd8ed1ab_0
-jupyter_server_terminals=0.5.2=pyhd8ed1ab_0
-jupyter_telemetry=0.1.0=pyhd8ed1ab_1
-jupyterhub=4.0.2=pyh31011fe_0
-jupyterhub-base=4.0.2=pyh31011fe_0
-jupyterlab=4.1.1=pyhd8ed1ab_0
-jupyterlab_pygments=0.3.0=pyhd8ed1ab_1
-jupyterlab_server=2.25.2=pyhd8ed1ab_0
-jupyterlab_widgets=3.0.10=pyhd8ed1ab_0
-kealib=1.5.3=h2f55d51_0
-keyutils=1.6.1=h166bdaf_0
-kiwisolver=1.4.5=py311h9547e67_1
-krb5=1.21.2=h659d440_0
-lcms2=2.16=hb7c19ff_0
-ld_impl_linux-64=2.40=h41732ed_0
-lerc=4.0.0=h27087fc_0
-libabseil=20230802.1=cxx17_h59595ed_0
-libaec=1.1.2=h59595ed_1
-libarchive=3.7.2=h2aa1ff5_1
-libarrow=15.0.0=h49c8883_4_cpu
-libarrow-acero=15.0.0=h59595ed_4_cpu
-libarrow-dataset=15.0.0=h59595ed_4_cpu
-libarrow-flight=15.0.0=hdc44a87_4_cpu
-libarrow-flight-sql=15.0.0=hfbc7f12_4_cpu
-libarrow-gandiva=15.0.0=h308e607_4_cpu
-libarrow-substrait=15.0.0=hfbc7f12_4_cpu
-libblas=3.9.0=21_linux64_openblas
-libboost-headers=1.84.0=ha770c72_1
-libbrotlicommon=1.1.0=hd590300_1
-libbrotlidec=1.1.0=hd590300_1
-libbrotlienc=1.1.0=hd590300_1
-libcblas=3.9.0=21_linux64_openblas
-libcrc32c=1.1.2=h9c3ff4c_0
-libcurl=8.5.0=hca28451_0
-libdeflate=1.19=hd590300_0
-libedit=3.1.20191231=he28a2e2_2
-libev=4.33=hd590300_2
-libevent=2.1.12=hf998b51_1
-libexpat=2.5.0=hcb278e6_1
-libffi=3.4.2=h7f98852_5
-libgcc-ng=13.2.0=h807b86a_5
-libgdal=3.8.4=h9323651_0
-libgfortran-ng=13.2.0=h69a702a_5
-libgfortran5=13.2.0=ha4646dd_5
-libglib=2.78.4=h783c2da_0
-libgomp=13.2.0=h807b86a_5
-libgoogle-cloud=2.12.0=hef10d8f_5
-libgrpc=1.60.1=h74775cd_0
-libiconv=1.17=hd590300_2
-libjpeg-turbo=3.0.0=hd590300_1
-libkml=1.3.0=h01aab08_1018
-liblapack=3.9.0=21_linux64_openblas
-libllvm15=15.0.7=hb3ce162_4
-libmamba=1.5.6=had39da4_0
-libmambapy=1.5.6=py311hf2555c7_0
-libnetcdf=4.9.2=nompi_h9612171_113
-libnghttp2=1.58.0=h47da74e_1
-libnl=3.9.0=hd590300_0
-libnsl=2.0.1=hd590300_0
-libnuma=2.0.16=h0b41bf4_1
-libopenblas=0.3.26=pthreads_h413a1c8_0
-libparquet=15.0.0=h352af49_4_cpu
-libpng=1.6.42=h2797004_0
-libpq=16.2=h33b98f1_0
-libprotobuf=4.25.1=hf27288f_2
-libre2-11=2023.06.02=h7a70373_0
-librttopo=1.1.0=h8917695_15
-libsodium=1.0.18=h36c2ea0_1
-libsolv=0.7.28=hfc55251_0
-libspatialindex=1.9.3=h9c3ff4c_4
-libspatialite=5.1.0=h7bd4643_4
-libsqlite=3.45.1=h2797004_0
-libssh2=1.11.0=h0841786_0
-libstdcxx-ng=13.2.0=h7e041cc_5
-libthrift=0.19.0=hb90f79a_1
-libtiff=4.6.0=ha9c0a0a_2
-libutf8proc=2.8.0=h166bdaf_0
-libuuid=2.38.1=h0b41bf4_0
-libuv=1.46.0=hd590300_0
-libwebp-base=1.3.2=hd590300_0
-libxcb=1.15=h0b41bf4_0
-libxcrypt=4.4.36=hd590300_1
-libxml2=2.12.5=h232c23b_0
-libzip=1.10.1=h2629f0a_3
-libzlib=1.2.13=hd590300_5
-linkify-it-py=2.0.3=pyhd8ed1ab_0
-locket=1.0.0=pyhd8ed1ab_0
-lz4=4.3.3=py311h38e4bf4_0
-lz4-c=1.9.4=hcb278e6_0
-lzo=2.10=h516909a_1000
-make=4.3=hd18ef5c_1
-mako=1.3.2=pyhd8ed1ab_0
-mamba=1.5.6=py311h3072747_0
-mapboxgl=0.10.2=py_1
-mapclassify=2.6.1=pyhd8ed1ab_0
-markdown=3.5.2=pyhd8ed1ab_0
-markdown-it-py=3.0.0=pyhd8ed1ab_0
-markupsafe=2.1.5=py311h459d7ec_0
-matplotlib-base=3.8.3=py311h54ef318_0
-matplotlib-inline=0.1.6=pyhd8ed1ab_0
-mdit-py-plugins=0.4.0=pyhd8ed1ab_0
-mdurl=0.1.2=pyhd8ed1ab_0
-menuinst=2.0.2=py311h38be061_0
-minizip=4.0.4=h0ab5242_0
-mistune=3.0.2=pyhd8ed1ab_0
-msgpack-python=1.0.7=py311h9547e67_0
-munkres=1.1.4=pyh9f0ad1d_0
-nbclassic=1.0.0=pyhb4ecaf3_1
-nbclient=0.8.0=pyhd8ed1ab_0
-nbconvert=7.16.0=pyhd8ed1ab_0
-nbconvert-core=7.16.0=pyhd8ed1ab_0
-nbconvert-pandoc=7.16.0=pyhd8ed1ab_0
-nbformat=5.9.2=pyhd8ed1ab_0
-ncurses=6.4=h59595ed_2
-nest-asyncio=1.6.0=pyhd8ed1ab_0
-netcdf4=1.6.5=nompi_py311he8ad708_100
-networkx=3.2.1=pyhd8ed1ab_0
-nodejs=20.9.0=hb753e55_0
-notebook=6.5.5=pyha770c72_0
-notebook-shim=0.2.3=pyhd8ed1ab_0
-nspr=4.35=h27087fc_0
-nss=3.98=h1d7d5a4_0
-numpy=1.26.4=py311h64a7726_0
-oauthlib=3.2.2=pyhd8ed1ab_0
-openjpeg=2.5.0=h488ebb8_3
-openssl=3.2.1=hd590300_0
-orc=1.9.2=h7829240_1
-overrides=7.7.0=pyhd8ed1ab_0
-packaging=23.2=pyhd8ed1ab_0
-pamela=1.1.0=pyh1a96a4e_0
-pandas=1.5.3=py311h2872171_1
-pandas-bokeh=0.5.5=pyhd8ed1ab_0
-pandoc=3.1.11.1=ha770c72_0
-pandocfilters=1.5.0=pyhd8ed1ab_0
-panel=1.3.8=pyhd8ed1ab_0
-param=2.0.2=pyhca7485f_0
-parso=0.8.3=pyhd8ed1ab_0
-partd=1.4.1=pyhd8ed1ab_0
-pcre2=10.42=hcad00b1_0
-pexpect=4.9.0=pyhd8ed1ab_0
-pickleshare=0.7.5=py_1003
-pillow=10.2.0=py311ha6c5da5_0
-pip=24.0=pyhd8ed1ab_0
-pixman=0.43.2=h59595ed_0
-pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1
-platformdirs=4.2.0=pyhd8ed1ab_0
-plotly=5.19.0=pyhd8ed1ab_0
-pluggy=1.4.0=pyhd8ed1ab_0
-poppler=24.02.0=h590f24d_0
-poppler-data=0.4.12=hd8ed1ab_0
-postgresql=16.2=h7387d8b_0
-proj=9.3.1=h1d62c97_0
-prometheus_client=0.19.0=pyhd8ed1ab_0
-prompt-toolkit=3.0.42=pyha770c72_0
-prompt_toolkit=3.0.42=hd8ed1ab_0
-psutil=5.9.8=py311h459d7ec_0
-pthread-stubs=0.4=h36c2ea0_1001
-ptyprocess=0.7.0=pyhd3deb0d_0
-pure_eval=0.2.2=pyhd8ed1ab_0
-pyarrow=15.0.0=py311h39c9aba_4_cpu
-pyarrow-hotfix=0.6=pyhd8ed1ab_0
-pybind11-abi=4=hd8ed1ab_3
-pycosat=0.6.6=py311h459d7ec_0
-pycparser=2.21=pyhd8ed1ab_0
-pyct=0.5.0=pyhd8ed1ab_0
-pycurl=7.45.1=py311hae980a4_3
-pygments=2.17.2=pyhd8ed1ab_0
-pyjwt=2.8.0=pyhd8ed1ab_1
-pyopenssl=24.0.0=pyhd8ed1ab_0
-pyparsing=3.1.1=pyhd8ed1ab_0
-pyproj=3.6.1=py311hca0b8b9_5
-pysocks=1.7.1=pyha2e5f31_6
-pyspnego=0.9.1=py311h459d7ec_2
-python=3.11.7=hab00c5b_1_cpython
-python-dateutil=2.8.2=pyhd8ed1ab_0
-python-fastjsonschema=2.19.1=pyhd8ed1ab_0
-python-json-logger=2.0.7=pyhd8ed1ab_0
-python_abi=3.11=4_cp311
-pytz=2024.1=pyhd8ed1ab_0
-pyviz_comms=3.0.0=pyhd8ed1ab_0
-pyyaml=6.0.1=py311h459d7ec_1
-pyzmq=24.0.1=py311ha4b6469_1
-qtconsole-base=5.5.1=pyha770c72_0
-qtpy=2.4.1=pyhd8ed1ab_0
-rdma-core=50.0=hd3aeb46_1
-re2=2023.06.02=h2873b5e_0
-readline=8.2=h8228510_1
-referencing=0.33.0=pyhd8ed1ab_0
-reproc=14.2.4.post0=hd590300_1
-reproc-cpp=14.2.4.post0=h59595ed_1
-requests=2.31.0=pyhd8ed1ab_0
-requests-kerberos=0.14.0=pyhd8ed1ab_1
-rfc3339-validator=0.1.4=pyhd8ed1ab_0
-rfc3986-validator=0.1.1=pyh9f0ad1d_0
-rpds-py=0.17.1=py311h46250e7_0
-rtree=1.2.0=py311h3bb2b0f_0
-ruamel.yaml=0.18.6=py311h459d7ec_0
-ruamel.yaml.clib=0.2.8=py311h459d7ec_0
-s2n=1.4.3=h06160fa_0
-scikit-learn=1.4.1.post1=py311hc009520_0
-scipy=1.12.0=py311h64a7726_2
-send2trash=1.8.2=pyh41d4057_0
-setuptools=69.0.3=pyhd8ed1ab_0
-shapely=2.0.3=py311h2032efe_0
-six=1.16.0=pyh6c4a22f_0
-snappy=1.1.10=h9fff704_0
-sniffio=1.3.0=pyhd8ed1ab_0
-sortedcontainers=2.4.0=pyhd8ed1ab_0
-soupsieve=2.5=pyhd8ed1ab_1
-sparkmagic=0.21.0=pyhd8ed1ab_1
-sqlalchemy=2.0.26=py311h459d7ec_0
-sqlite=3.45.1=h2c6b66d_0
-stack_data=0.6.2=pyhd8ed1ab_0
-tblib=3.0.0=pyhd8ed1ab_0
-tenacity=8.2.3=pyhd8ed1ab_0
-terminado=0.18.0=pyh0d859eb_0
-threadpoolctl=3.3.0=pyhc1e730c_0
-tiledb=2.20.0=h4386cac_0
-tinycss2=1.2.1=pyhd8ed1ab_0
-tk=8.6.13=noxft_h4845f30_101
-tomli=2.0.1=pyhd8ed1ab_0
-toolz=0.12.1=pyhd8ed1ab_0
-tornado=6.3.3=py311h459d7ec_1
-tqdm=4.66.2=pyhd8ed1ab_0
-traitlets=5.9.0=pyhd8ed1ab_0
-truststore=0.8.0=pyhd8ed1ab_0
-types-python-dateutil=2.8.19.20240106=pyhd8ed1ab_0
-typing-extensions=4.9.0=hd8ed1ab_0
-typing_extensions=4.9.0=pyha770c72_0
-typing_utils=0.1.0=pyhd8ed1ab_0
-tzcode=2024a=h3f72095_0
-tzdata=2024a=h0c530f3_0
-uc-micro-py=1.0.3=pyhd8ed1ab_0
-ucx=1.15.0=h75e419f_3
-uri-template=1.3.0=pyhd8ed1ab_0
-uriparser=0.9.7=hcb278e6_1
-urllib3=2.2.0=pyhd8ed1ab_0
-wcwidth=0.2.13=pyhd8ed1ab_0
-webcolors=1.13=pyhd8ed1ab_0
-webencodings=0.5.1=pyhd8ed1ab_2
-websocket-client=1.7.0=pyhd8ed1ab_0
-wheel=0.42.0=pyhd8ed1ab_0
-widgetsnbextension=4.0.10=pyhd8ed1ab_0
-xarray=2024.2.0=pyhd8ed1ab_0
-xerces-c=3.2.5=hac6953d_0
-xorg-kbproto=1.0.7=h7f98852_1002
-xorg-libice=1.1.1=hd590300_0
-xorg-libsm=1.2.4=h7391055_0
-xorg-libx11=1.8.7=h8ee46fc_0
-xorg-libxau=1.0.11=hd590300_0
-xorg-libxdmcp=1.1.3=h7f98852_0
-xorg-libxext=1.3.4=h0b41bf4_2
-xorg-libxrender=0.9.11=hd590300_0
-xorg-renderproto=0.11.1=h7f98852_1002
-xorg-xextproto=7.3.0=h0b41bf4_1003
-xorg-xproto=7.0.31=h7f98852_1007
-xyzservices=2023.10.1=pyhd8ed1ab_0
-xz=5.2.6=h166bdaf_0
-yaml=0.2.5=h7f98852_2
-yaml-cpp=0.8.0=h59595ed_0
-zeromq=4.3.5=h59595ed_0
-zict=3.0.0=pyhd8ed1ab_0
-zipp=3.17.0=pyhd8ed1ab_0
-zlib=1.2.13=hd590300_5
-zstandard=0.22.0=py311haa97af0_0
-zstd=1.5.5=hfc55251_0
diff --git a/images/jupyter/requirements/linux/arm64/env.yaml b/images/jupyter/requirements/linux/arm64/env.yaml
new file mode 100644
index 0000000000..1b0f42d63d
--- /dev/null
+++ b/images/jupyter/requirements/linux/arm64/env.yaml
@@ -0,0 +1,274 @@
+channels:
+- conda-forge
+dependencies:
+- _openmp_mutex=4.5
+- alembic=1.14.0
+- annotated-types=0.7.0
+- anyio=4.7.0
+- archspec=0.2.3
+- argon2-cffi=23.1.0
+- argon2-cffi-bindings=21.2.0
+- arrow=1.3.0
+- asttokens=3.0.0
+- async-lru=2.0.4
+- async_generator=1.10
+- attrs=24.2.0
+- babel=2.16.0
+- beautifulsoup4=4.12.3
+- bleach=6.2.0
+- blinker=1.9.0
+- boltons=24.0.0
+- brotli-python=1.1.0
+- bzip2=1.0.8
+- c-ares=1.34.3
+- ca-certificates=2024.8.30
+- cached-property=1.5.2
+- cached_property=1.5.2
+- certifi=2024.8.30
+- certipy=0.2.1
+- cffi=1.17.1
+- charset-normalizer=3.4.0
+- colorama=0.4.6
+- comm=0.2.2
+- conda=24.11.0
+- conda-libmamba-solver=24.11.1
+- conda-package-handling=2.4.0
+- conda-package-streaming=0.11.0
+- cpp-expected=1.1.0
+- cryptography=44.0.0
+- debugpy=1.8.9
+- decorator=5.1.1
+- defusedxml=0.7.1
+- distro=1.9.0
+- entrypoints=0.4
+- exceptiongroup=1.2.2
+- executing=2.1.0
+- fmt=11.0.2
+- fqdn=1.5.1
+- frozendict=2.4.6
+- greenlet=3.1.1
+- h11=0.14.0
+- h2=4.1.0
+- hpack=4.0.0
+- httpcore=1.0.7
+- httpx=0.28.1
+- hyperframe=6.0.1
+- icu=75.1
+- idna=3.10
+- importlib-metadata=8.5.0
+- importlib_resources=6.4.5
+- ipykernel=6.29.5
+- ipython=8.30.0
+- ipython_genutils=0.2.0
+- isoduration=20.11.0
+- jedi=0.19.2
+- jinja2=3.1.4
+- json5=0.10.0
+- jsonpatch=1.33
+- jsonpointer=3.0.0
+- jsonschema=4.23.0
+- jsonschema-specifications=2024.10.1
+- jsonschema-with-format-nongpl=4.23.0
+- jupyter-lsp=2.2.5
+- jupyter_client=8.6.3
+- jupyter_core=5.7.2
+- jupyter_events=0.10.0
+- jupyter_server=2.14.2
+- jupyter_server_terminals=0.5.3
+- jupyterhub-base=5.2.1
+- jupyterhub-singleuser=5.2.1
+- jupyterlab=4.3.2
+- jupyterlab_pygments=0.3.0
+- jupyterlab_server=2.27.3
+- keyutils=1.6.1
+- krb5=1.21.3
+- ld_impl_linux-aarch64=2.43
+- libarchive=3.7.7
+- libcurl=8.10.1
+- libedit=3.1.20191231
+- libev=4.33
+- libexpat=2.6.4
+- libffi=3.4.2
+- libgcc=14.2.0
+- libgcc-ng=14.2.0
+- libgomp=14.2.0
+- libiconv=1.17
+- liblzma=5.6.3
+- libmamba=2.0.4
+- libmambapy=2.0.4
+- libnghttp2=1.64.0
+- libnsl=2.0.1
+- libsodium=1.0.20
+- libsolv=0.7.30
+- libsqlite=3.47.0
+- libssh2=1.11.1
+- libstdcxx=14.2.0
+- libstdcxx-ng=14.2.0
+- libuuid=2.38.1
+- libxcrypt=4.4.36
+- libxml2=2.13.5
+- libzlib=1.3.1
+- lz4-c=1.10.0
+- lzo=2.10
+- make=4.4.1
+- mako=1.3.8
+- mamba=2.0.4
+- markupsafe=3.0.2
+- matplotlib-inline=0.1.7
+- menuinst=2.2.0
+- mistune=3.0.2
+- nbclassic=1.1.0
+- nbclient=0.10.1
+- nbconvert-core=7.16.4
+- nbformat=5.10.4
+- ncurses=6.5
+- nest-asyncio=1.6.0
+- nlohmann_json=3.11.3
+- notebook=7.3.1
+- notebook-shim=0.2.4
+- oauthlib=3.2.2
+- openssl=3.4.0
+- overrides=7.7.0
+- packaging=24.2
+- pamela=1.2.0
+- pandocfilters=1.5.0
+- parso=0.8.4
+- pexpect=4.9.0
+- pickleshare=0.7.5
+- pip=24.3.1
+- pkgutil-resolve-name=1.3.10
+- platformdirs=4.3.6
+- pluggy=1.5.0
+- prometheus_client=0.21.1
+- prompt-toolkit=3.0.48
+- psutil=6.1.0
+- ptyprocess=0.7.0
+- pure_eval=0.2.3
+- pybind11-abi=4
+- pycosat=0.6.6
+- pycparser=2.22
+- pydantic=2.10.3
+- pydantic-core=2.27.1
+- pygments=2.18.0
+- pyjwt=2.10.1
+- pysocks=1.7.1
+- python=3.12.8
+- python-dateutil=2.9.0.post0
+- python-fastjsonschema=2.21.1
+- python-json-logger=2.0.7
+- python_abi=3.12
+- pytz=2024.2
+- pyyaml=6.0.2
+- pyzmq=26.2.0
+- readline=8.2
+- referencing=0.35.1
+- reproc=14.2.4.post0
+- reproc-cpp=14.2.4.post0
+- requests=2.32.3
+- rfc3339-validator=0.1.4
+- rfc3986-validator=0.1.1
+- rpds-py=0.22.3
+- ruamel.yaml=0.18.6
+- ruamel.yaml.clib=0.2.8
+- send2trash=1.8.3
+- setuptools=75.6.0
+- simdjson=3.10.1
+- six=1.17.0
+- sniffio=1.3.1
+- soupsieve=2.5
+- spdlog=1.14.1
+- sqlalchemy=2.0.36
+- stack_data=0.6.3
+- terminado=0.18.1
+- tinycss2=1.4.0
+- tk=8.6.13
+- tomli=2.2.1
+- tornado=6.4.2
+- tqdm=4.67.1
+- traitlets=5.14.3
+- truststore=0.10.0
+- types-python-dateutil=2.9.0.20241206
+- typing-extensions=4.12.2
+- typing_extensions=4.12.2
+- typing_utils=0.1.0
+- tzdata=2024b
+- uri-template=1.3.0
+- urllib3=2.2.3
+- wcwidth=0.2.13
+- webcolors=24.11.1
+- webencodings=0.5.1
+- websocket-client=1.8.0
+- wheel=0.45.1
+- yaml=0.2.5
+- yaml-cpp=0.8.0
+- zeromq=4.3.5
+- zipp=3.21.0
+- zstandard=0.23.0
+- zstd=1.5.6
+- pip:
+ - adbc-driver-flightsql==1.3.0
+ - adbc-driver-manager==1.3.0
+ - altair==5.5.0
+ - autovizwidget==0.22.0
+ - bokeh==3.6.2
+ - branca==0.8.1
+ - cftime==1.6.4.post1
+ - chroma-py==0.1.0.dev1
+ - click==8.1.8
+ - cloudpickle==3.1.0
+ - colorcet==3.1.0
+ - colour==0.1.5
+ - contourpy==1.3.1
+ - cycler==0.12.1
+ - dask==2024.12.1
+ - folium==0.19.2
+ - fonttools==4.55.3
+ - fsspec==2024.12.0
+ - geojson==3.2.0
+ - geopandas==1.0.1
+ - hdijupyterutils==0.22.0
+ - holoviews==1.20.0
+ - hvplot==0.11.2
+ - ipywidgets==8.1.5
+ - jupyter==1.1.1
+ - jupyter-console==6.6.3
+ - jupyterlab_widgets==3.0.13
+ - kamu==0.6.0
+ - kiwisolver==1.4.8
+ - linkify-it-py==2.0.3
+ - livy==0.8.0
+ - locket==1.0.0
+ - mapboxgl==0.10.2
+ - Markdown==3.7
+ - markdown-it-py==3.0.0
+ - matplotlib==3.10.0
+ - mdit-py-plugins==0.4.2
+ - mdurl==0.1.2
+ - narwhals==1.19.1
+ - netCDF4==1.7.2
+ - numpy==2.2.1
+ - pandas==2.2.3
+ - pandas-bokeh==0.5.5
+ - panel==1.5.5
+ - param==2.2.0
+ - partd==1.4.2
+ - pillow==11.0.0
+ - plotly==5.24.1
+ - pyarrow==18.1.0
+ - pyogrio==0.10.0
+ - pyparsing==3.2.0
+ - pyproj==3.7.0
+ - pyviz_comms==3.0.3
+ - setuptools==75.6.0
+ - shapely==2.0.6
+ - tenacity==9.0.0
+ - toolz==1.0.0
+ - tzdata==2024.2
+ - uc-micro-py==1.0.3
+ - wheel==0.45.1
+ - widgetsnbextension==4.0.13
+ - xarray==2024.11.0
+ - xyzservices==2024.9.0
+ - zstandard==0.23.0
+name: base
+prefix: /opt/conda
diff --git a/images/jupyter/requirements/linux/arm64/requirements.in b/images/jupyter/requirements/linux/arm64/requirements.in
index d7b27f8f59..68140ad718 100644
--- a/images/jupyter/requirements/linux/arm64/requirements.in
+++ b/images/jupyter/requirements/linux/arm64/requirements.in
@@ -1,21 +1,16 @@
-# TODO: Pinned due to sparkmagic installation issue
-# See: https://github.com/jupyter-incubator/sparkmagic/issues/825
-# See workaround applied in: https://github.com/jupyter-incubator/sparkmagic/blob/master/Dockerfile.jupyter
-notebook==6.5.5
+kamu[jupyter-autoviz,jupyter-sql,spark]
-sparkmagic
-
-pandas
+dask
geopandas
geojson
-xarray
netcdf4
-dask
+pandas
+xarray
+altair
bokeh
-hvplot
-pandas-bokeh
folium
-altair
+hvplot
mapboxgl
+pandas-bokeh
shapely
diff --git a/images/jupyter/requirements/linux/arm64/requirements.txt b/images/jupyter/requirements/linux/arm64/requirements.txt
deleted file mode 100644
index 832732c7db..0000000000
--- a/images/jupyter/requirements/linux/arm64/requirements.txt
+++ /dev/null
@@ -1,411 +0,0 @@
-# This file may be used to create an environment using:
-# $ conda create --name --file
-# platform: linux-aarch64
-_openmp_mutex=4.5=2_gnu
-alembic=1.13.1=pyhd8ed1ab_1
-altair=5.2.0=pyhd8ed1ab_0
-anyio=4.2.0=pyhd8ed1ab_0
-archspec=0.2.2=pyhd8ed1ab_0
-argon2-cffi=23.1.0=pyhd8ed1ab_0
-argon2-cffi-bindings=21.2.0=py311hcd402e7_4
-arrow=1.3.0=pyhd8ed1ab_0
-asttokens=2.4.1=pyhd8ed1ab_0
-async-lru=2.0.4=pyhd8ed1ab_0
-async_generator=1.10=py_0
-attrs=23.2.0=pyh71513ae_0
-autovizwidget=0.21.0=pyh1a96a4e_1
-aws-c-auth=0.7.16=h570bf23_5
-aws-c-cal=0.6.10=h967b9ec_1
-aws-c-common=0.9.13=h31becfc_0
-aws-c-compression=0.2.18=h00d1b86_1
-aws-c-event-stream=0.4.2=h10e8a16_3
-aws-c-http=0.8.1=hf0788a4_4
-aws-c-io=0.14.4=h87c19fb_2
-aws-c-mqtt=0.10.2=he8e29e5_3
-aws-c-s3=0.5.1=h71a96cc_6
-aws-c-sdkutils=0.1.15=h00d1b86_1
-aws-checksums=0.1.18=h00d1b86_1
-aws-crt-cpp=0.26.2=h8568a09_5
-aws-sdk-cpp=1.11.267=hfce6cab_1
-azure-core-cpp=1.10.3=hcd87347_1
-azure-storage-blobs-cpp=12.10.0=h2a328a1_0
-azure-storage-common-cpp=12.5.0=hee0c750_2
-babel=2.14.0=pyhd8ed1ab_0
-beautifulsoup4=4.12.3=pyha770c72_0
-bleach=6.1.0=pyhd8ed1ab_0
-blinker=1.7.0=pyhd8ed1ab_0
-blosc=1.21.5=h2f3a684_0
-bokeh=3.3.4=pyhd8ed1ab_0
-boltons=23.1.1=pyhd8ed1ab_0
-branca=0.7.1=pyhd8ed1ab_0
-brotli=1.1.0=h31becfc_1
-brotli-bin=1.1.0=h31becfc_1
-brotli-python=1.1.0=py311h8715677_1
-bzip2=1.0.8=h31becfc_5
-c-ares=1.26.0=h31becfc_0
-ca-certificates=2024.2.2=hcefe29a_0
-cached-property=1.5.2=hd8ed1ab_1
-cached_property=1.5.2=pyha770c72_1
-cairo=1.18.0=ha13f110_0
-certifi=2024.2.2=pyhd8ed1ab_0
-certipy=0.1.3=py_0
-cffi=1.16.0=py311h7963103_0
-cfitsio=4.3.1=hf28c5f1_0
-cftime=1.6.3=py311hf13da56_0
-charset-normalizer=3.3.2=pyhd8ed1ab_0
-chroma-py=0.1.0.dev1=py_0
-click=8.1.7=unix_pyh707e725_0
-click-plugins=1.1.1=py_0
-cligj=0.7.2=pyhd8ed1ab_1
-cloudpickle=3.0.0=pyhd8ed1ab_0
-colorama=0.4.6=pyhd8ed1ab_0
-colorcet=3.0.1=pyhd8ed1ab_0
-colour=0.1.5=pyhd8ed1ab_1
-comm=0.2.1=pyhd8ed1ab_0
-conda=23.11.0=py311hec3470c_1
-conda-libmamba-solver=24.1.0=pyhd8ed1ab_0
-conda-package-handling=2.2.0=pyh38be061_0
-conda-package-streaming=0.9.0=pyhd8ed1ab_0
-configurable-http-proxy=4.6.1=h4e45a9e_0
-contourpy=1.2.0=py311h098ece5_0
-cryptography=42.0.2=py311h2245af3_0
-cycler=0.12.1=pyhd8ed1ab_0
-cytoolz=0.12.3=py311hc8f2f60_0
-dask=2024.2.0=pyhd8ed1ab_0
-dask-core=2024.2.0=pyhd8ed1ab_0
-debugpy=1.8.1=py311h8715677_0
-decorator=5.1.1=pyhd8ed1ab_0
-defusedxml=0.7.1=pyhd8ed1ab_0
-distributed=2024.2.0=pyhd8ed1ab_0
-distro=1.9.0=pyhd8ed1ab_0
-entrypoints=0.4=pyhd8ed1ab_0
-exceptiongroup=1.2.0=pyhd8ed1ab_2
-executing=2.0.1=pyhd8ed1ab_0
-expat=2.5.0=hd600fc2_1
-fiona=1.9.5=py311he15760a_3
-fmt=10.2.1=h2a328a1_0
-folium=0.15.1=pyhd8ed1ab_0
-font-ttf-dejavu-sans-mono=2.37=hab24e00_0
-font-ttf-inconsolata=3.000=h77eed37_0
-font-ttf-source-code-pro=2.038=h77eed37_0
-font-ttf-ubuntu=0.83=h77eed37_1
-fontconfig=2.14.2=ha9a116f_0
-fonts-conda-ecosystem=1=0
-fonts-conda-forge=1=0
-fonttools=4.49.0=py311hcd402e7_0
-fqdn=1.5.1=pyhd8ed1ab_0
-freetype=2.12.1=hf0a5ef3_2
-freexl=2.0.0=h5428426_0
-fsspec=2024.2.0=pyhca7485f_0
-gdal=3.8.4=py311h3b5b607_0
-geojson=3.1.0=pyhd8ed1ab_0
-geopandas=0.14.3=pyhd8ed1ab_0
-geopandas-base=0.14.3=pyha770c72_0
-geos=3.12.1=h2f0025b_0
-geotiff=1.7.1=h3e58e51_15
-gettext=0.21.1=ha18d298_0
-gflags=2.2.2=h54f1f3f_1004
-giflib=5.2.1=hb4cce97_3
-glog=0.6.0=h8ab10f1_0
-greenlet=3.0.3=py311h8715677_0
-h11=0.14.0=pyhd8ed1ab_0
-h2=4.1.0=pyhd8ed1ab_0
-hdf4=4.2.15=hb6ba311_7
-hdf5=1.14.3=nompi_ha486f32_100
-hdijupyterutils=0.21.0=pyh1a96a4e_1
-holoviews=1.18.3=pyhd8ed1ab_0
-hpack=4.0.0=pyh9f0ad1d_0
-httpcore=1.0.2=pyhd8ed1ab_0
-httpx=0.26.0=pyhd8ed1ab_0
-hvplot=0.9.2=pyhd8ed1ab_0
-hyperframe=6.0.1=pyhd8ed1ab_0
-icu=73.2=h787c7f5_0
-idna=3.6=pyhd8ed1ab_0
-importlib-metadata=7.0.1=pyha770c72_0
-importlib_metadata=7.0.1=hd8ed1ab_0
-importlib_resources=6.1.1=pyhd8ed1ab_0
-ipykernel=6.29.2=pyhd33586a_0
-ipython=8.21.0=pyh707e725_0
-ipython_genutils=0.2.0=py_1
-ipywidgets=8.1.2=pyhd8ed1ab_0
-isoduration=20.11.0=pyhd8ed1ab_0
-jedi=0.19.1=pyhd8ed1ab_0
-jinja2=3.1.3=pyhd8ed1ab_0
-joblib=1.3.2=pyhd8ed1ab_0
-json-c=0.17=h9d1147b_0
-json5=0.9.14=pyhd8ed1ab_0
-jsonpatch=1.33=pyhd8ed1ab_0
-jsonpointer=2.4=py311hec3470c_3
-jsonschema=4.21.1=pyhd8ed1ab_0
-jsonschema-specifications=2023.12.1=pyhd8ed1ab_0
-jsonschema-with-format-nongpl=4.21.1=pyhd8ed1ab_0
-jupyter=1.0.0=pyhd8ed1ab_10
-jupyter-lsp=2.2.2=pyhd8ed1ab_0
-jupyter_client=7.4.9=pyhd8ed1ab_0
-jupyter_console=6.6.3=pyhd8ed1ab_0
-jupyter_core=5.7.1=py311hec3470c_0
-jupyter_events=0.9.0=pyhd8ed1ab_0
-jupyter_server=2.12.5=pyhd8ed1ab_0
-jupyter_server_terminals=0.5.2=pyhd8ed1ab_0
-jupyter_telemetry=0.1.0=pyhd8ed1ab_1
-jupyterhub=4.0.2=pyh31011fe_0
-jupyterhub-base=4.0.2=pyh31011fe_0
-jupyterlab=4.1.1=pyhd8ed1ab_0
-jupyterlab_pygments=0.3.0=pyhd8ed1ab_1
-jupyterlab_server=2.25.2=pyhd8ed1ab_0
-jupyterlab_widgets=3.0.10=pyhd8ed1ab_0
-kealib=1.5.3=h4670d8b_0
-keyutils=1.6.1=h4e544f5_0
-kiwisolver=1.4.5=py311h0d5d7b0_1
-krb5=1.21.2=hc419048_0
-lcms2=2.16=h922389a_0
-ld_impl_linux-aarch64=2.40=h2d8c526_0
-lerc=4.0.0=h4de3ea5_0
-libabseil=20230802.1=cxx17_h2f0025b_0
-libaec=1.1.2=h2f0025b_1
-libarchive=3.7.2=hd2f85e0_1
-libarrow=15.0.0=h606a0d5_4_cpu
-libarrow-acero=15.0.0=h2f0025b_4_cpu
-libarrow-dataset=15.0.0=h2f0025b_4_cpu
-libarrow-flight=15.0.0=he69d72d_4_cpu
-libarrow-flight-sql=15.0.0=h1fc705f_4_cpu
-libarrow-gandiva=15.0.0=h90362dd_4_cpu
-libarrow-substrait=15.0.0=h0599332_4_cpu
-libblas=3.9.0=21_linuxaarch64_openblas
-libboost-headers=1.84.0=h8af1aa0_1
-libbrotlicommon=1.1.0=h31becfc_1
-libbrotlidec=1.1.0=h31becfc_1
-libbrotlienc=1.1.0=h31becfc_1
-libcblas=3.9.0=21_linuxaarch64_openblas
-libcrc32c=1.1.2=h01db608_0
-libcurl=8.5.0=h4e8248e_0
-libdeflate=1.19=h31becfc_0
-libedit=3.1.20191231=he28a2e2_2
-libev=4.33=h31becfc_2
-libevent=2.1.12=h4ba1bb4_1
-libexpat=2.5.0=hd600fc2_1
-libffi=3.4.2=h3557bc0_5
-libgcc-ng=13.2.0=hf8544c7_5
-libgdal=3.8.4=h79c3f81_0
-libgfortran-ng=13.2.0=he9431aa_5
-libgfortran5=13.2.0=h582850c_5
-libglib=2.78.4=h311d5f7_0
-libgomp=13.2.0=hf8544c7_5
-libgoogle-cloud=2.12.0=h3b99733_5
-libgrpc=1.60.1=heeb7df3_0
-libiconv=1.17=h31becfc_2
-libjpeg-turbo=3.0.0=h31becfc_1
-libkml=1.3.0=h7d16752_1018
-liblapack=3.9.0=21_linuxaarch64_openblas
-libllvm15=15.0.7=hb4f23b0_4
-libmamba=1.5.6=hea3be6c_0
-libmambapy=1.5.6=py311h765b69a_0
-libnetcdf=4.9.2=nompi_h33102a8_113
-libnghttp2=1.58.0=hb0e430d_1
-libnsl=2.0.1=h31becfc_0
-libnuma=2.0.16=hb4cce97_1
-libopenblas=0.3.26=pthreads_h5a5ec62_0
-libparquet=15.0.0=hb18b541_4_cpu
-libpng=1.6.42=h194ca79_0
-libpq=16.2=h58720eb_0
-libprotobuf=4.25.1=h87e877f_2
-libre2-11=2023.06.02=hf48c5ca_0
-librttopo=1.1.0=hd8968fb_15
-libsodium=1.0.18=hb9de7d4_1
-libsolv=0.7.28=hd84c7bf_0
-libspatialindex=1.9.3=h01db608_4
-libspatialite=5.1.0=h896d346_4
-libsqlite=3.45.1=h194ca79_0
-libssh2=1.11.0=h492db2e_0
-libstdcxx-ng=13.2.0=h9a76618_5
-libthrift=0.19.0=h043aeee_1
-libtiff=4.6.0=h1708d11_2
-libutf8proc=2.8.0=h4e544f5_0
-libuuid=2.38.1=hb4cce97_0
-libuv=1.46.0=h31becfc_0
-libwebp-base=1.3.2=h31becfc_0
-libxcb=1.15=h2a766a3_0
-libxcrypt=4.4.36=h31becfc_1
-libxml2=2.12.5=h3091e33_0
-libzip=1.10.1=h4156a30_3
-libzlib=1.2.13=h31becfc_5
-linkify-it-py=2.0.3=pyhd8ed1ab_0
-locket=1.0.0=pyhd8ed1ab_0
-lz4=4.3.3=py311h6a4b261_0
-lz4-c=1.9.4=hd600fc2_0
-lzo=2.10=h516909a_1000
-make=4.3=h309ac5b_1
-mako=1.3.2=pyhd8ed1ab_0
-mamba=1.5.6=py311hb6c5aa6_0
-mapboxgl=0.10.2=py_1
-mapclassify=2.6.1=pyhd8ed1ab_0
-markdown=3.5.2=pyhd8ed1ab_0
-markdown-it-py=3.0.0=pyhd8ed1ab_0
-markupsafe=2.1.5=py311hc8f2f60_0
-matplotlib-base=3.8.3=py311h1f11223_0
-matplotlib-inline=0.1.6=pyhd8ed1ab_0
-mdit-py-plugins=0.4.0=pyhd8ed1ab_0
-mdurl=0.1.2=pyhd8ed1ab_0
-menuinst=2.0.2=py311hec3470c_0
-minizip=4.0.4=hb75dd74_0
-mistune=3.0.2=pyhd8ed1ab_0
-msgpack-python=1.0.7=py311h0d5d7b0_0
-munkres=1.1.4=pyh9f0ad1d_0
-nbclassic=1.0.0=pyhb4ecaf3_1
-nbclient=0.8.0=pyhd8ed1ab_0
-nbconvert=7.16.0=pyhd8ed1ab_0
-nbconvert-core=7.16.0=pyhd8ed1ab_0
-nbconvert-pandoc=7.16.0=pyhd8ed1ab_0
-nbformat=5.9.2=pyhd8ed1ab_0
-ncurses=6.4=h0425590_2
-nest-asyncio=1.6.0=pyhd8ed1ab_0
-netcdf4=1.6.5=nompi_py311hcd50196_100
-networkx=3.2.1=pyhd8ed1ab_0
-nodejs=20.9.0=hc1f8a26_0
-notebook=6.5.5=pyha770c72_0
-notebook-shim=0.2.3=pyhd8ed1ab_0
-nspr=4.35=h4de3ea5_0
-nss=3.98=hc5a5cc2_0
-numpy=1.26.4=py311h69ead2a_0
-oauthlib=3.2.2=pyhd8ed1ab_0
-openjpeg=2.5.0=h0d9d63b_3
-openssl=3.2.1=h31becfc_0
-orc=1.9.2=h5960ff3_1
-overrides=7.7.0=pyhd8ed1ab_0
-packaging=23.2=pyhd8ed1ab_0
-pamela=1.1.0=pyh1a96a4e_0
-pandas=1.5.3=py311hff2c139_1
-pandas-bokeh=0.5.5=pyhd8ed1ab_0
-pandoc=3.1.11.1=h8af1aa0_0
-pandocfilters=1.5.0=pyhd8ed1ab_0
-panel=1.3.8=pyhd8ed1ab_0
-param=2.0.2=pyhca7485f_0
-parso=0.8.3=pyhd8ed1ab_0
-partd=1.4.1=pyhd8ed1ab_0
-pcre2=10.42=hd0f9c67_0
-pexpect=4.9.0=pyhd8ed1ab_0
-pickleshare=0.7.5=py_1003
-pillow=10.2.0=py311hbcc2232_0
-pip=24.0=pyhd8ed1ab_0
-pixman=0.43.2=h2f0025b_0
-pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1
-platformdirs=4.2.0=pyhd8ed1ab_0
-plotly=5.19.0=pyhd8ed1ab_0
-pluggy=1.4.0=pyhd8ed1ab_0
-poppler=24.02.0=h3cd87ed_0
-poppler-data=0.4.12=hd8ed1ab_0
-postgresql=16.2=he703394_0
-proj=9.3.1=h7b42f86_0
-prometheus_client=0.19.0=pyhd8ed1ab_0
-prompt-toolkit=3.0.42=pyha770c72_0
-prompt_toolkit=3.0.42=hd8ed1ab_0
-psutil=5.9.8=py311hcd402e7_0
-pthread-stubs=0.4=hb9de7d4_1001
-ptyprocess=0.7.0=pyhd3deb0d_0
-pure_eval=0.2.2=pyhd8ed1ab_0
-pyarrow=15.0.0=py311h1eb6f34_4_cpu
-pyarrow-hotfix=0.6=pyhd8ed1ab_0
-pybind11-abi=4=hd8ed1ab_3
-pycosat=0.6.6=py311hcd402e7_0
-pycparser=2.21=pyhd8ed1ab_0
-pyct=0.5.0=pyhd8ed1ab_0
-pycurl=7.45.1=py311h4769251_3
-pygments=2.17.2=pyhd8ed1ab_0
-pyjwt=2.8.0=pyhd8ed1ab_1
-pyopenssl=24.0.0=pyhd8ed1ab_0
-pyparsing=3.1.1=pyhd8ed1ab_0
-pyproj=3.6.1=py311ha6273e5_5
-pysocks=1.7.1=pyha2e5f31_6
-pyspnego=0.9.1=py311hcd402e7_2
-python=3.11.7=h43d1f9e_1_cpython
-python-dateutil=2.8.2=pyhd8ed1ab_0
-python-fastjsonschema=2.19.1=pyhd8ed1ab_0
-python-json-logger=2.0.7=pyhd8ed1ab_0
-python_abi=3.11=4_cp311
-pytz=2024.1=pyhd8ed1ab_0
-pyviz_comms=3.0.0=pyhd8ed1ab_0
-pyyaml=6.0.1=py311hcd402e7_1
-pyzmq=24.0.1=py311h22a2215_1
-qtconsole-base=5.5.1=pyha770c72_0
-qtpy=2.4.1=pyhd8ed1ab_0
-re2=2023.06.02=h887e66c_0
-readline=8.2=h8fc344f_1
-referencing=0.33.0=pyhd8ed1ab_0
-reproc=14.2.4.post0=h31becfc_1
-reproc-cpp=14.2.4.post0=h2f0025b_1
-requests=2.31.0=pyhd8ed1ab_0
-requests-kerberos=0.14.0=pyhd8ed1ab_1
-rfc3339-validator=0.1.4=pyhd8ed1ab_0
-rfc3986-validator=0.1.1=pyh9f0ad1d_0
-rpds-py=0.17.1=py311h32437ce_0
-rtree=1.2.0=py311h04fbf56_0
-ruamel.yaml=0.18.6=py311hcd402e7_0
-ruamel.yaml.clib=0.2.8=py311hcd402e7_0
-s2n=1.4.4=h5a25046_0
-scikit-learn=1.4.1.post1=py311hb93614b_0
-scipy=1.12.0=py311h69ead2a_2
-send2trash=1.8.2=pyh41d4057_0
-setuptools=69.0.3=pyhd8ed1ab_0
-shapely=2.0.3=py311hbbe59c9_0
-six=1.16.0=pyh6c4a22f_0
-snappy=1.1.10=he8610fa_0
-sniffio=1.3.0=pyhd8ed1ab_0
-sortedcontainers=2.4.0=pyhd8ed1ab_0
-soupsieve=2.5=pyhd8ed1ab_1
-sparkmagic=0.21.0=pyhd8ed1ab_1
-sqlalchemy=2.0.26=py311hc8f2f60_0
-sqlite=3.45.1=h3b3482f_0
-stack_data=0.6.2=pyhd8ed1ab_0
-tblib=3.0.0=pyhd8ed1ab_0
-tenacity=8.2.3=pyhd8ed1ab_0
-terminado=0.18.0=pyh0d859eb_0
-threadpoolctl=3.3.0=pyhc1e730c_0
-tiledb=2.20.0=hf61e980_0
-tinycss2=1.2.1=pyhd8ed1ab_0
-tk=8.6.13=h194ca79_0
-tomli=2.0.1=pyhd8ed1ab_0
-toolz=0.12.1=pyhd8ed1ab_0
-tornado=6.3.3=py311hc8f2f60_1
-tqdm=4.66.2=pyhd8ed1ab_0
-traitlets=5.9.0=pyhd8ed1ab_0
-truststore=0.8.0=pyhd8ed1ab_0
-types-python-dateutil=2.8.19.20240106=pyhd8ed1ab_0
-typing-extensions=4.9.0=hd8ed1ab_0
-typing_extensions=4.9.0=pyha770c72_0
-typing_utils=0.1.0=pyhd8ed1ab_0
-tzcode=2024a=h31becfc_0
-tzdata=2024a=h0c530f3_0
-uc-micro-py=1.0.3=pyhd8ed1ab_0
-ucx=1.15.0=hedb98eb_3
-uri-template=1.3.0=pyhd8ed1ab_0
-uriparser=0.9.7=hd600fc2_1
-urllib3=2.2.0=pyhd8ed1ab_0
-wcwidth=0.2.13=pyhd8ed1ab_0
-webcolors=1.13=pyhd8ed1ab_0
-webencodings=0.5.1=pyhd8ed1ab_2
-websocket-client=1.7.0=pyhd8ed1ab_0
-wheel=0.42.0=pyhd8ed1ab_0
-widgetsnbextension=4.0.10=pyhd8ed1ab_0
-xarray=2024.2.0=pyhd8ed1ab_0
-xerces-c=3.2.5=hf13c1fb_0
-xorg-kbproto=1.0.7=h3557bc0_1002
-xorg-libice=1.1.1=h7935292_0
-xorg-libsm=1.2.4=h5a01bc2_0
-xorg-libx11=1.8.7=h055a233_0
-xorg-libxau=1.0.11=h31becfc_0
-xorg-libxdmcp=1.1.3=h3557bc0_0
-xorg-libxext=1.3.4=h2a766a3_2
-xorg-libxrender=0.9.11=h7935292_0
-xorg-renderproto=0.11.1=h3557bc0_1002
-xorg-xextproto=7.3.0=h2a766a3_1003
-xorg-xproto=7.0.31=h3557bc0_1007
-xyzservices=2023.10.1=pyhd8ed1ab_0
-xz=5.2.6=h9cdd2b7_0
-yaml=0.2.5=hf897c2e_2
-yaml-cpp=0.8.0=h2f0025b_0
-zeromq=4.3.5=h2f0025b_0
-zict=3.0.0=pyhd8ed1ab_0
-zipp=3.17.0=pyhd8ed1ab_0
-zlib=1.2.13=h31becfc_5
-zstandard=0.22.0=py311hb827a26_0
-zstd=1.5.5=h4c53e97_0
diff --git a/images/jupyter/sparkmagic.json b/images/jupyter/sparkmagic.json
deleted file mode 100644
index 6f3078dbfd..0000000000
--- a/images/jupyter/sparkmagic.json
+++ /dev/null
@@ -1,70 +0,0 @@
-{
- "kernel_python_credentials": {
- "username": "",
- "password": "",
- "url": "http://kamu-livy:8998",
- "auth": "None"
- },
- "kernel_scala_credentials": {
- "username": "",
- "password": "",
- "url": "http://kamu-livy:8998",
- "auth": "None"
- },
- "kernel_r_credentials": {
- "username": "",
- "password": "",
- "url": "http://kamu-livy:8998"
- },
- "logging_config": {
- "version": 1,
- "formatters": {
- "magicsFormatter": {
- "format": "%(asctime)s\t%(levelname)s\t%(message)s",
- "datefmt": ""
- }
- },
- "handlers": {
- "magicsHandler": {
- "class": "hdijupyterutils.filehandler.MagicsFileHandler",
- "formatter": "magicsFormatter",
- "home_path": "~/.sparkmagic"
- }
- },
- "loggers": {
- "magicsLogger": {
- "handlers": [
- "magicsHandler"
- ],
- "level": "DEBUG",
- "propagate": 0
- }
- }
- },
- "wait_for_idle_timeout_seconds": 15,
- "livy_session_startup_timeout_seconds": 60,
- "fatal_error_suggestion": "The code failed because of a fatal error:\n\t{}.\n\nSome things to try:\na) Make sure Spark has enough available resources for Jupyter to create a Spark context.\nb) Contact your Jupyter administrator to make sure the Spark magics library is configured correctly.\nc) Restart the kernel.",
- "ignore_ssl_errors": false,
- "session_configs": {
- "driverMemory": "1000M",
- "executorCores": 2
- },
- "use_auto_viz": true,
- "coerce_dataframe": true,
- "default_maxrows": 1000000,
- "pyspark_dataframe_encoding": "utf-8",
- "heartbeat_refresh_seconds": 30,
- "livy_server_heartbeat_timeout_seconds": 0,
- "heartbeat_retry_seconds": 10,
- "server_extension_default_kernel_name": "pysparkkernel",
- "custom_headers": {},
- "retry_policy": "configurable",
- "retry_seconds_to_sleep_list": [
- 0.2,
- 0.5,
- 1,
- 3,
- 5
- ],
- "configurable_retry_policy_max_retries": 8
-}
\ No newline at end of file
diff --git a/resources/cli-reference.md b/resources/cli-reference.md
index 0ce96f5a46..270b5520f1 100644
--- a/resources/cli-reference.md
+++ b/resources/cli-reference.md
@@ -652,6 +652,10 @@ Starts the notebook server for exploring the data in the workspace
* `--address ` — Expose HTTP server on specific network interface
* `--http-port ` — Expose HTTP server on specific port
+* `--engine ` — Engine type to use for the notebook
+
+ Possible values: `datafusion`, `spark`
+
* `-e`, `--env ` — Propagate or set an environment variable in the notebook (e.g. `-e VAR` or `-e VAR=foo`)
This command will run the Jupyter server and the Spark engine connected together, letting you query data with SQL before pulling it into the notebook for final processing and visualization.
@@ -1076,7 +1080,7 @@ Executes an SQL query or drops you into an SQL shell
**Subcommands:**
-* `server` — Run JDBC server only
+* `server` — Runs an SQL engine in a server mode
**Options:**
@@ -1145,16 +1149,38 @@ Note: Currently when connecting to a remote SQL kamu server you will need to man
## `kamu sql server`
-Run JDBC server only
+Runs an SQL engine in a server mode
**Usage:** `kamu sql server [OPTIONS]`
**Options:**
-* `--address ` — Expose JDBC server on specific network interface
-* `--port ` — Expose JDBC server on specific port
-* `--livy` — Run Livy server instead of Spark JDBC
-* `--flight-sql` — Run Flight SQL server instead of Spark JDBC
+* `--address ` — Expose server on specific network interface
+* `--port ` — Expose server on specific port
+* `--engine ` — Engine type to use for this server
+
+ Possible values: `datafusion`, `spark`
+
+* `--livy` — Run Livy server instead of JDBC
+
+**Examples:**
+
+By default runs the DataFusion engine exposing the FlightSQL protocol:
+
+ kamu sql server
+
+To customize interface and port:
+
+ kamu sql server --address 0.0.0.0 --port 50050
+
+To run with Spark engine:
+
+ kamu sql server --engine spark
+
+By default Spark runs with JDBC protocol, to instead run with Livy HTTP gateway:
+
+ kamu sql server --engine spark --livy
+
diff --git a/src/adapter/flight-sql/Cargo.toml b/src/adapter/flight-sql/Cargo.toml
index aee8df5739..9fb9a13c6c 100644
--- a/src/adapter/flight-sql/Cargo.toml
+++ b/src/adapter/flight-sql/Cargo.toml
@@ -22,29 +22,39 @@ doctest = false
[dependencies]
+kamu-accounts = { workspace = true }
kamu-core = { workspace = true }
+database-common = { workspace = true }
time-source = { workspace = true }
arrow-flight = { version = "53", features = ["flight-sql-experimental"] }
async-trait = { version = "0.1", default-features = false }
+base32 = { version = "0.5", default-features = false }
base64 = { version = "0.22", default-features = false }
+bytes = { version = "1", default-features = false }
chrono = { version = "0.4", default-features = false }
datafusion = { version = "43", default-features = false }
dill = { version = "0.10", default-features = false }
futures = "0.3"
+http = { version = "1", default-features = false }
+http-body = { version = "1", default-features = false }
like = { version = "0.3", default-features = false }
prost = { version = "0.13", default-features = false }
+rand = { version = "0.8", default-features = false }
tokio = { version = "1", default-features = false, features = [] }
tonic = { version = "0.12", default-features = false }
+tower = { version = "0.5", default-features = false }
tracing = { version = "0.1", default-features = false }
uuid = { version = "1", default-features = false }
[dev-dependencies]
+kamu-accounts = { workspace = true, features = ["testing"] }
kamu-core = { workspace = true, features = ["testing"] }
kamu-data-utils = { workspace = true, features = ["testing"] }
indoc = "2"
+mockall = { version = "0.13", default-features = false }
test-log = { version = "0.2", features = ["trace"] }
tokio = { version = "1", default-features = false, features = [] }
tokio-stream = { version = "0.1", default-features = false, features = ["net"] }
diff --git a/src/adapter/flight-sql/src/auth_layer.rs b/src/adapter/flight-sql/src/auth_layer.rs
new file mode 100644
index 0000000000..43cf956ef9
--- /dev/null
+++ b/src/adapter/flight-sql/src/auth_layer.rs
@@ -0,0 +1,199 @@
+// Copyright Kamu Data, Inc. and contributors. All rights reserved.
+//
+// Use of this software is governed by the Business Source License
+// included in the LICENSE file.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0.
+
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
+use database_common::DatabaseTransactionRunner;
+use futures::Future;
+use kamu_accounts::{
+ Account,
+ AnonymousAccountReason,
+ AuthenticationService,
+ CurrentAccountSubject,
+ GetAccountInfoError,
+};
+use tonic::body::BoxBody;
+use tonic::Status;
+use tower::{Layer, Service};
+
+use crate::SessionId;
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+pub struct SessionAuthConfig {
+ pub allow_anonymous: bool,
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#[derive(Debug, Clone)]
+pub struct AuthenticationLayer {}
+
+impl AuthenticationLayer {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Layer for AuthenticationLayer {
+ type Service = AuthenticationMiddleware;
+
+ fn layer(&self, inner: Svc) -> Self::Service {
+ AuthenticationMiddleware { inner }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#[derive(Debug, Clone)]
+pub struct AuthenticationMiddleware {
+ inner: Svc,
+}
+
+impl AuthenticationMiddleware {
+ fn extract_service_method(request: &http::Request) -> (String, String) {
+ let path = request.uri().path();
+ let mut parts = path.split('/').filter(|x| !x.is_empty());
+ let service = parts.next().unwrap_or_default();
+ let method = parts.next().unwrap_or_default();
+ (service.to_string(), method.to_string())
+ }
+
+ fn extract_bearer_token(request: &http::Request) -> Option {
+ let auth = request.headers().get(http::header::AUTHORIZATION)?;
+ let auth = auth.to_str().ok()?;
+
+ if auth.starts_with("Bearer ") || auth.starts_with("bearer ") {
+ return Some(auth["Bearer ".len()..].to_string());
+ }
+
+ None
+ }
+
+ async fn get_account_by_token(
+ base_catalog: &dill::Catalog,
+ access_token: String,
+ ) -> Result {
+ use tracing::Instrument;
+
+ DatabaseTransactionRunner::new(base_catalog.clone())
+ .transactional_with(
+ |authentication_service: Arc| async move {
+ authentication_service.account_by_token(access_token).await
+ },
+ )
+ .instrument(tracing::debug_span!(
+ "AuthenticationMiddleware::current_account_subject"
+ ))
+ .await
+ }
+}
+
+impl Service> for AuthenticationMiddleware
+where
+ ReqBody: Send + 'static,
+ Svc: Service, Response = http::Response>,
+ Svc: Clone + Send + 'static,
+ Svc::Future: Send + 'static,
+{
+ type Response = http::Response;
+ type Error = Svc::Error;
+ type Future =
+ Pin> + Send + 'static>>;
+
+ fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> {
+ self.inner.poll_ready(cx)
+ }
+
+ fn call(&mut self, mut request: http::Request) -> Self::Future {
+ // Inspired by https://github.com/maxcountryman/axum-login/blob/5239b38b2698a3db3f92075b6ad430aea79c215a/axum-login/src/auth.rs
+ // TODO: PERF: Is cloning a performance concern?
+ let mut inner = self.inner.clone();
+
+ Box::pin(async move {
+ let base_catalog = request
+ .extensions()
+ .get::()
+ .expect("Catalog not found in request extensions");
+
+ let conf: Arc = base_catalog.get_one().unwrap();
+
+ let token = Self::extract_bearer_token(&request);
+ let (service, method) = Self::extract_service_method(&request);
+
+ let subject = match &token {
+ None if conf.allow_anonymous
+ && service == "arrow.flight.protocol.FlightService"
+ && method == "Handshake" =>
+ {
+ CurrentAccountSubject::anonymous(
+ AnonymousAccountReason::NoAuthenticationProvided,
+ )
+ }
+ Some(token) if conf.allow_anonymous && token.starts_with("anon_") => {
+ // TODO: SEC: Anonymous session tokens have to be validated
+ CurrentAccountSubject::anonymous(
+ AnonymousAccountReason::NoAuthenticationProvided,
+ )
+ }
+ Some(token) => {
+ match Self::get_account_by_token(base_catalog, token.clone()).await {
+ Ok(account) => CurrentAccountSubject::logged(
+ account.id,
+ account.account_name,
+ account.is_admin,
+ ),
+ Err(e @ GetAccountInfoError::AccessToken(_)) => {
+ tracing::warn!("{e}");
+ return Ok(Status::unauthenticated(e.to_string()).into_http());
+ }
+ Err(e @ GetAccountInfoError::AccountUnresolved) => {
+ tracing::warn!("{e}");
+ return Ok(Status::unauthenticated(e.to_string()).into_http());
+ }
+ Err(e @ GetAccountInfoError::Internal(_)) => {
+ tracing::error!(
+ error = ?e,
+ error_msg = %e,
+ "Internal error during authentication",
+ );
+ return Ok(Status::internal("Internal error").into_http());
+ }
+ }
+ }
+ _ => {
+ // Disallow fully unauthorized access - anonymous users have to go through
+ // handshare procedure
+ return Ok(Status::unauthenticated(
+ "Unauthenticated access is not allowed. Provide a bearer token or use \
+ basic auth and handshake endpoint to login as anonymous.",
+ )
+ .into_http());
+ }
+ };
+
+ let session_id = token.map(SessionId);
+
+ tracing::debug!(?subject, ?session_id, "Authenticated request");
+
+ let mut derived_catalog_builder = dill::CatalogBuilder::new_chained(base_catalog);
+ if let Some(session_id) = session_id {
+ derived_catalog_builder.add_value(session_id);
+ }
+ derived_catalog_builder.add_value(subject);
+
+ let derived_catalog = derived_catalog_builder.build();
+ request.extensions_mut().insert(derived_catalog);
+
+ inner.call(request).await
+ })
+ }
+}
diff --git a/src/adapter/flight-sql/src/lib.rs b/src/adapter/flight-sql/src/lib.rs
index 60846817ce..29fe7db572 100644
--- a/src/adapter/flight-sql/src/lib.rs
+++ b/src/adapter/flight-sql/src/lib.rs
@@ -9,19 +9,25 @@
#![feature(lint_reasons)]
+mod auth_layer;
mod service;
-mod service_builder;
+mod service_wrapper;
mod session_auth;
+mod session_auth_anon;
+mod session_auth_bearer_only;
mod session_manager;
mod session_manager_caching;
mod session_manager_singleton;
+pub mod sql_info;
+mod types;
+pub use auth_layer::*;
pub use service::*;
-pub use service_builder::*;
+pub use service_wrapper::*;
pub use session_auth::*;
+pub use session_auth_anon::*;
+pub use session_auth_bearer_only::*;
pub use session_manager::*;
pub use session_manager_caching::*;
pub use session_manager_singleton::*;
-
-pub type SessionToken = String;
-pub type PlanToken = String;
+pub use types::*;
diff --git a/src/adapter/flight-sql/src/service.rs b/src/adapter/flight-sql/src/service.rs
index 4569dca3bb..56d2f0dd7b 100644
--- a/src/adapter/flight-sql/src/service.rs
+++ b/src/adapter/flight-sql/src/service.rs
@@ -73,67 +73,41 @@ use tonic::codegen::tokio_stream::Stream;
use tonic::metadata::MetadataValue;
use tonic::{Request, Response, Status, Streaming};
-use crate::{KamuFlightSqlServiceBuilder, PlanToken, SessionManager, SessionToken};
+use crate::{PlanId, SessionAuth, SessionManager};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
const TABLE_TYPES: [&str; 2] = ["TABLE", "VIEW"];
+const CLOSE_SESSION: &str = "CloseSession";
+
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// KamuFlightSqlService
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
pub struct KamuFlightSqlService {
- sql_info: SqlInfoData,
+ sql_info: Arc,
+ // LazyOnce ensures that these objects are instantiated once but only when they are needed -
+ // this is important because during some operations like `handshake` the `SessionId` is not
+ // available so an attempt to instantiate a `SessionManager` may fail
+ session_auth: LazyOnce>,
+ session_manager: LazyOnce>,
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#[dill::component(pub)]
impl KamuFlightSqlService {
- pub fn builder() -> KamuFlightSqlServiceBuilder {
- KamuFlightSqlServiceBuilder::new()
- }
-
- pub(crate) fn new(sql_info: SqlInfoData) -> Self {
- Self { sql_info }
- }
-
- // This type is a singleton. For it to play nicely with DB transactions we
- // follow the same pattern as in Axum where middleware layers are responsible
- // for attaching the Catalog to incoming requests. Here we extract catalog from
- // the extensions to instantiate session manager.
- fn get_session_manager(&self, req: &Request) -> Result, Status> {
- let Some(catalog) = req.extensions().get::() else {
- return Err(Status::internal("Catalog extension is not configured"));
- };
-
- catalog
- .get_one()
- .map_err(|_| Status::internal("Injection error"))
- }
-
- fn get_token(&self, req: &Request) -> Result {
- let auth = req
- .metadata()
- .get("authorization")
- .ok_or_else(|| Status::internal("No authorization header!"))?
- .to_str()
- .map_err(|e| Status::internal(format!("Error parsing header: {e}")))?
- .to_string();
-
- let Some(session_token) = auth.strip_prefix("Bearer ") else {
- return Err(Status::internal("Invalid auth header!"));
- };
-
- Ok(SessionToken::from(session_token))
- }
-
- async fn get_ctx(&self, req: &Request) -> Result, Status> {
- let session_token = self.get_token(req)?;
-
- self.get_session_manager(req)?
- .get_context(&session_token.to_string())
- .await
+ pub fn new(
+ sql_info: Arc,
+ session_auth: dill::Lazy>,
+ session_manager: dill::Lazy>,
+ ) -> Self {
+ Self {
+ sql_info,
+ session_auth: LazyOnce::new(session_auth),
+ session_manager: LazyOnce::new(session_manager),
+ }
}
fn get_sql_info(
@@ -638,6 +612,11 @@ impl KamuFlightSqlService {
let stream = futures::stream::iter(flights.into_iter().map(Ok));
Ok(Response::new(Box::pin(stream)))
}
+
+ #[tracing::instrument(level = "debug", skip_all)]
+ async fn do_action_close_session(&self, _request: Request) -> Result<(), Status> {
+ self.session_manager.close_session().await
+ }
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -657,6 +636,7 @@ impl FlightSqlService for KamuFlightSqlService {
Response> + Send>>>,
Status,
> {
+ use base64::engine::{GeneralPurpose, GeneralPurposeConfig};
use base64::Engine;
let basic = "Basic ";
@@ -672,7 +652,12 @@ impl FlightSqlService for KamuFlightSqlService {
)))?;
}
let base64 = &authorization[basic.len()..];
- let bytes = base64::engine::general_purpose::STANDARD
+ let b64engine = GeneralPurpose::new(
+ &base64::alphabet::STANDARD,
+ GeneralPurposeConfig::new()
+ .with_decode_padding_mode(base64::engine::DecodePaddingMode::Indifferent),
+ );
+ let bytes = b64engine
.decode(base64)
.map_err(|_| Status::invalid_argument("authorization not parsable"))?;
let str = String::from_utf8(bytes)
@@ -684,10 +669,7 @@ impl FlightSqlService for KamuFlightSqlService {
let username = parts[0];
let password = parts[1];
- let session_token = self
- .get_session_manager(&request)?
- .auth_basic(username, password)
- .await?;
+ let session_token = self.session_auth.auth_basic(username, password).await?;
let result = HandshakeResponse {
protocol_version: 0,
@@ -733,7 +715,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandStatementQuery,
request: Request,
) -> Result, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let plan = Self::prepare_statement(&query.query, &ctx).await?;
let df = ctx
.execute_logical_plan(plan)
@@ -754,18 +736,14 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandPreparedStatementQuery,
request: Request,
) -> Result, Status> {
- let session_token = self.get_token(&request)?;
-
- let plan_token = PlanToken::from_utf8(query.prepared_statement_handle.to_vec())
- .map_err(|e| Status::internal(format!("Error decoding handle: {e}")))?;
-
- let session_manager = self.get_session_manager(&request)?;
+ let plan_id = PlanId(
+ String::from_utf8(query.prepared_statement_handle.to_vec())
+ .map_err(|e| Status::internal(format!("Error decoding handle: {e}")))?,
+ );
- let plan = session_manager
- .get_plan(&session_token, &plan_token)
- .await?;
+ let plan = self.session_manager.get_plan(&plan_id).await?;
- let ctx = session_manager.get_context(&session_token).await?;
+ let ctx = self.session_manager.get_context().await?;
let df = ctx
.execute_logical_plan(plan)
@@ -782,7 +760,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetCatalogs,
request: Request,
) -> Result, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let data = self.get_catalogs(&ctx, &query, true)?;
self.record_batch_to_flight_info(&data, &query.as_any(), true)
}
@@ -793,7 +771,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetDbSchemas,
request: Request,
) -> Result, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let data = self.get_schemas(&ctx, &query, true)?;
self.record_batch_to_flight_info(&data, &query.as_any(), true)
}
@@ -804,7 +782,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetTables,
request: Request,
) -> Result, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let data = self.get_tables(ctx, &query, true).await?;
self.record_batch_to_flight_info(&data, &query.as_any(), true)
}
@@ -815,7 +793,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetTableTypes,
request: Request,
) -> Result, Status> {
- let _ctx = self.get_ctx(&request).await?;
+ let _ctx = self.session_manager.get_context().await?;
let data = self.get_table_types(true)?;
self.record_batch_to_flight_info(&data, &query.as_any(), true)
}
@@ -826,7 +804,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetSqlInfo,
request: Request,
) -> Result, Status> {
- let _ctx = self.get_ctx(&request).await?;
+ let _ctx = self.session_manager.get_context().await?;
let data = self.get_sql_info(&query, true)?;
self.record_batch_to_flight_info(&data, &query.as_any(), true)
}
@@ -837,7 +815,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetPrimaryKeys,
request: Request,
) -> Result, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let data = self.get_primary_keys(&ctx, &query, true)?;
self.record_batch_to_flight_info(&data, &query.as_any(), true)
}
@@ -848,7 +826,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetExportedKeys,
request: Request,
) -> Result, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let data = self.get_exported_keys(&ctx, &query, true)?;
self.record_batch_to_flight_info(&data, &query.as_any(), true)
}
@@ -859,7 +837,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetImportedKeys,
request: Request,
) -> Result, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let data = self.get_imported_keys(&ctx, &query, true)?;
self.record_batch_to_flight_info(&data, &query.as_any(), true)
}
@@ -893,7 +871,7 @@ impl FlightSqlService for KamuFlightSqlService {
ticket: TicketStatementQuery,
request: Request,
) -> Result::DoGetStream>, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let query = CommandStatementQuery::decode(ticket.statement_handle)
.map_err(|e| Status::internal(format!("Invalid ticket: {e}")))?;
@@ -915,18 +893,14 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandPreparedStatementQuery,
request: Request,
) -> Result::DoGetStream>, Status> {
- let session_token = self.get_token(&request)?;
-
- let plan_token = PlanToken::from_utf8(query.prepared_statement_handle.into())
- .map_err(|e| Status::internal(format!("Error decoding handle: {e}")))?;
-
- let session_manager = self.get_session_manager(&request)?;
+ let plan_id = PlanId(
+ String::from_utf8(query.prepared_statement_handle.to_vec())
+ .map_err(|e| Status::internal(format!("Error decoding handle: {e}")))?,
+ );
- let plan = session_manager
- .get_plan(&session_token, &plan_token)
- .await?;
+ let plan = self.session_manager.get_plan(&plan_id).await?;
- let ctx = session_manager.get_context(&session_token).await?;
+ let ctx = self.session_manager.get_context().await?;
let df = ctx
.execute_logical_plan(plan)
@@ -942,7 +916,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetCatalogs,
request: Request,
) -> Result::DoGetStream>, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let data = self.get_catalogs(&ctx, &query, false)?;
self.record_batch_to_stream(data)
}
@@ -953,7 +927,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetDbSchemas,
request: Request,
) -> Result::DoGetStream>, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let data = self.get_schemas(&ctx, &query, false)?;
self.record_batch_to_stream(data)
}
@@ -964,7 +938,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetTables,
request: Request,
) -> Result::DoGetStream>, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let data = self.get_tables(ctx, &query, false).await?;
self.record_batch_to_stream(data)
}
@@ -975,7 +949,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetTableTypes,
request: Request,
) -> Result::DoGetStream>, Status> {
- let _ctx = self.get_ctx(&request).await?;
+ let _ctx = self.session_manager.get_context().await?;
let data = self.get_table_types(false)?;
self.record_batch_to_stream(data)
}
@@ -986,7 +960,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetSqlInfo,
request: Request,
) -> Result::DoGetStream>, Status> {
- let _ctx = self.get_ctx(&request).await?;
+ let _ctx = self.session_manager.get_context().await?;
let data = self.get_sql_info(&query, false)?;
self.record_batch_to_stream(data)
}
@@ -997,7 +971,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetPrimaryKeys,
request: Request,
) -> Result::DoGetStream>, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let data = self.get_primary_keys(&ctx, &query, false)?;
self.record_batch_to_stream(data)
}
@@ -1008,7 +982,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetExportedKeys,
request: Request,
) -> Result::DoGetStream>, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let data = self.get_exported_keys(&ctx, &query, false)?;
self.record_batch_to_stream(data)
}
@@ -1019,7 +993,7 @@ impl FlightSqlService for KamuFlightSqlService {
query: CommandGetImportedKeys,
request: Request,
) -> Result::DoGetStream>, Status> {
- let ctx = self.get_ctx(&request).await?;
+ let ctx = self.session_manager.get_context().await?;
let data = self.get_imported_keys(&ctx, &query, false)?;
self.record_batch_to_stream(data)
}
@@ -1070,13 +1044,11 @@ impl FlightSqlService for KamuFlightSqlService {
query: ActionCreatePreparedStatementRequest,
request: Request,
) -> Result {
- let session_token = self.get_token(&request)?;
- let session_manager = self.get_session_manager(&request)?;
- let ctx = session_manager.get_context(&session_token).await?;
+ let ctx = self.session_manager.get_context().await?;
let plan = Self::prepare_statement(&query.query, &ctx).await?;
let schema_bytes = self.df_schema_to_arrow(plan.schema())?;
- let plan_token = session_manager.cache_plan(&session_token, plan).await?;
+ let plan_token = self.session_manager.cache_plan(plan).await?;
tracing::debug!(%plan_token, "Prepared statement");
@@ -1094,14 +1066,12 @@ impl FlightSqlService for KamuFlightSqlService {
query: ActionClosePreparedStatementRequest,
request: Request,
) -> Result<(), Status> {
- let session_token = self.get_token(&request)?;
-
- let plan_token = PlanToken::from_utf8(query.prepared_statement_handle.into())
- .map_err(|e| Status::internal(format!("Error decoding handle: {e}")))?;
+ let plan_id = PlanId(
+ String::from_utf8(query.prepared_statement_handle.to_vec())
+ .map_err(|e| Status::internal(format!("Error decoding handle: {e}")))?,
+ );
- self.get_session_manager(&request)?
- .remove_plan(&session_token, &plan_token)
- .await?;
+ self.session_manager.remove_plan(&plan_id).await?;
Ok(())
}
@@ -1196,6 +1166,50 @@ impl FlightSqlService for KamuFlightSqlService {
/// GetSqlInfo.
#[tracing::instrument(level = "debug", skip_all, fields(%id, ?result))]
async fn register_sql_info(&self, id: i32, result: &SqlInfo) {}
+
+ async fn do_action_fallback(
+ &self,
+ request: Request,
+ ) -> Result::DoActionStream>, Status> {
+ // TODO: Base interface should handle CloseSession action
+ // See: https://github.com/apache/arrow-rs/issues/6516
+ if request.get_ref().r#type == CLOSE_SESSION {
+ self.do_action_close_session(request).await?;
+ Ok(Response::new(Box::pin(futures::stream::empty())))
+ } else {
+ Err(Status::invalid_argument(format!(
+ "do_action: The defined request is invalid: {:?}",
+ request.get_ref().r#type
+ )))
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// TODO: Consider upstreaming into `dill`
+// One downside to this type is that it panics on ingestion errors rather than
+// returning them
+struct LazyOnce {
+ f: dill::Lazy,
+ v: std::sync::OnceLock,
+}
+
+impl LazyOnce {
+ pub fn new(f: dill::Lazy) -> Self {
+ Self {
+ f,
+ v: std::sync::OnceLock::new(),
+ }
+ }
+}
+
+impl std::ops::Deref for LazyOnce {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ self.v.get_or_init(|| self.f.get().unwrap())
+ }
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/adapter/flight-sql/src/service_builder.rs b/src/adapter/flight-sql/src/service_builder.rs
deleted file mode 100644
index 0d16da8a78..0000000000
--- a/src/adapter/flight-sql/src/service_builder.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright Kamu Data, Inc. and contributors. All rights reserved.
-//
-// Use of this software is governed by the Business Source License
-// included in the LICENSE file.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0.
-
-use arrow_flight::sql::metadata::SqlInfoDataBuilder;
-use arrow_flight::sql::{
- SqlInfo,
- SqlNullOrdering,
- SqlSupportedCaseSensitivity,
- SqlSupportedTransactions,
- SupportedSqlGrammar,
-};
-
-use crate::KamuFlightSqlService;
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-pub struct KamuFlightSqlServiceBuilder {
- sql_info: SqlInfoDataBuilder,
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-impl KamuFlightSqlServiceBuilder {
- pub fn new() -> Self {
- let sql_info = Self::default_sql_info();
-
- Self { sql_info }
- }
-
- pub fn build(self) -> KamuFlightSqlService {
- KamuFlightSqlService::new(self.sql_info.build().unwrap())
- }
-
- pub fn with_server_name(mut self, name: &str, version: &str) -> Self {
- self.sql_info.append(SqlInfo::FlightSqlServerName, name);
- self.sql_info
- .append(SqlInfo::FlightSqlServerVersion, version);
- self
- }
-
- // TODO: Revisit
- fn default_sql_info() -> SqlInfoDataBuilder {
- let mut builder = SqlInfoDataBuilder::new();
- // Server information
- builder.append(SqlInfo::FlightSqlServerName, "Unknown");
- builder.append(SqlInfo::FlightSqlServerVersion, "0.0.0");
- // 1.3 comes from https://github.com/apache/arrow/blob/f9324b79bf4fc1ec7e97b32e3cce16e75ef0f5e3/format/Schema.fbs#L24
- builder.append(SqlInfo::FlightSqlServerArrowVersion, "1.3");
- builder.append(SqlInfo::FlightSqlServerReadOnly, true);
- builder.append(SqlInfo::FlightSqlServerSql, true);
- builder.append(SqlInfo::FlightSqlServerSubstrait, false);
- builder.append(
- SqlInfo::FlightSqlServerTransaction,
- SqlSupportedTransactions::SqlTransactionUnspecified as i32,
- );
- // don't yet support `CancelQuery` action
- builder.append(SqlInfo::FlightSqlServerCancel, false);
- builder.append(SqlInfo::FlightSqlServerStatementTimeout, 0i32);
- builder.append(SqlInfo::FlightSqlServerTransactionTimeout, 0i32);
- // SQL syntax information
- builder.append(SqlInfo::SqlDdlCatalog, false);
- builder.append(SqlInfo::SqlDdlSchema, false);
- builder.append(SqlInfo::SqlDdlTable, false);
- builder.append(
- SqlInfo::SqlIdentifierCase,
- SqlSupportedCaseSensitivity::SqlCaseSensitivityLowercase as i32,
- );
- builder.append(SqlInfo::SqlIdentifierQuoteChar, r#"""#);
- builder.append(
- SqlInfo::SqlQuotedIdentifierCase,
- SqlSupportedCaseSensitivity::SqlCaseSensitivityCaseInsensitive as i32,
- );
- builder.append(SqlInfo::SqlAllTablesAreSelectable, true);
- builder.append(
- SqlInfo::SqlNullOrdering,
- SqlNullOrdering::SqlNullsSortedHigh as i32,
- );
- // builder.append(SqlInfo::SqlKeywords, SQL_INFO_SQL_KEYWORDS);
- // builder.append(SqlInfo::SqlNumericFunctions, SQL_INFO_NUMERIC_FUNCTIONS);
- // builder.append(SqlInfo::SqlStringFunctions, SQL_INFO_STRING_FUNCTIONS);
- // builder.append(SqlInfo::SqlSystemFunctions, SQL_INFO_SYSTEM_FUNCTIONS);
- // builder.append(SqlInfo::SqlDatetimeFunctions, SQL_INFO_DATE_TIME_FUNCTIONS);
- builder.append(SqlInfo::SqlSearchStringEscape, "\\");
- builder.append(SqlInfo::SqlExtraNameCharacters, "");
- builder.append(SqlInfo::SqlSupportsColumnAliasing, true);
- builder.append(SqlInfo::SqlNullPlusNullIsNull, true);
- // Skip SqlSupportsConvert (which is the map of the conversions that are
- // supported); .with_sql_info(SqlInfo::SqlSupportsConvert, TBD);
- builder.append(SqlInfo::SqlSupportsTableCorrelationNames, false);
- builder.append(SqlInfo::SqlSupportsDifferentTableCorrelationNames, false);
- builder.append(SqlInfo::SqlSupportsExpressionsInOrderBy, true);
- builder.append(SqlInfo::SqlSupportsOrderByUnrelated, true);
- builder.append(SqlInfo::SqlSupportedGroupBy, 3i32);
- builder.append(SqlInfo::SqlSupportsLikeEscapeClause, true);
- builder.append(SqlInfo::SqlSupportsNonNullableColumns, true);
- builder.append(
- SqlInfo::SqlSupportedGrammar,
- SupportedSqlGrammar::SqlCoreGrammar as i32,
- );
- // report we support all ansi 92
- builder.append(SqlInfo::SqlAnsi92SupportedLevel, 0b111_i32);
- builder.append(SqlInfo::SqlSupportsIntegrityEnhancementFacility, false);
- builder.append(SqlInfo::SqlOuterJoinsSupportLevel, 2i32);
- builder.append(SqlInfo::SqlSchemaTerm, "schema");
- builder.append(SqlInfo::SqlProcedureTerm, "procedure");
- builder.append(SqlInfo::SqlCatalogAtStart, false);
- builder.append(SqlInfo::SqlSchemasSupportedActions, 0i32);
- builder.append(SqlInfo::SqlCatalogsSupportedActions, 0i32);
- builder.append(SqlInfo::SqlSupportedPositionedCommands, 0i32);
- builder.append(SqlInfo::SqlSelectForUpdateSupported, false);
- builder.append(SqlInfo::SqlStoredProceduresSupported, false);
- builder.append(SqlInfo::SqlSupportedSubqueries, 15i32);
- builder.append(SqlInfo::SqlCorrelatedSubqueriesSupported, true);
- builder.append(SqlInfo::SqlSupportedUnions, 3i32);
- // For max lengths, report max arrow string length
- builder.append(SqlInfo::SqlMaxBinaryLiteralLength, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxCharLiteralLength, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxColumnNameLength, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxColumnsInGroupBy, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxColumnsInIndex, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxColumnsInOrderBy, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxColumnsInSelect, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxColumnsInTable, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxConnections, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxCursorNameLength, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxIndexLength, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlDbSchemaNameLength, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxProcedureNameLength, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxCatalogNameLength, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxRowSize, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxRowSizeIncludesBlobs, true);
- builder.append(SqlInfo::SqlMaxStatementLength, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxStatements, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxTableNameLength, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxTablesInSelect, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlMaxUsernameLength, i64::from(i32::MAX));
- builder.append(SqlInfo::SqlDefaultTransactionIsolation, 0i64);
- builder.append(SqlInfo::SqlTransactionsSupported, false);
- builder.append(SqlInfo::SqlSupportedTransactionsIsolationLevels, 0i32);
- builder.append(SqlInfo::SqlDataDefinitionCausesTransactionCommit, false);
- builder.append(SqlInfo::SqlDataDefinitionsInTransactionsIgnored, true);
- builder.append(SqlInfo::SqlSupportedResultSetTypes, 0i32);
- builder.append(
- SqlInfo::SqlSupportedConcurrenciesForResultSetUnspecified,
- 0i32,
- );
- builder.append(
- SqlInfo::SqlSupportedConcurrenciesForResultSetForwardOnly,
- 0i32,
- );
- builder.append(
- SqlInfo::SqlSupportedConcurrenciesForResultSetScrollSensitive,
- 0i32,
- );
- builder.append(
- SqlInfo::SqlSupportedConcurrenciesForResultSetScrollInsensitive,
- 0i32,
- );
- builder.append(SqlInfo::SqlBatchUpdatesSupported, false);
- builder.append(SqlInfo::SqlSavepointsSupported, false);
- builder.append(SqlInfo::SqlNamedParametersSupported, false);
- builder.append(SqlInfo::SqlLocatorsUpdateCopy, false);
- builder.append(SqlInfo::SqlStoredFunctionsUsingCallSyntaxSupported, false);
- builder
- }
-}
diff --git a/src/adapter/flight-sql/src/service_wrapper.rs b/src/adapter/flight-sql/src/service_wrapper.rs
new file mode 100644
index 0000000000..00014f93cd
--- /dev/null
+++ b/src/adapter/flight-sql/src/service_wrapper.rs
@@ -0,0 +1,598 @@
+// Copyright Kamu Data, Inc. and contributors. All rights reserved.
+//
+// Use of this software is governed by the Business Source License
+// included in the LICENSE file.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0.
+
+use std::pin::Pin;
+use std::sync::Arc;
+
+use arrow_flight::flight_service_server::FlightService;
+use arrow_flight::sql::server::{FlightSqlService, PeekableFlightDataStream};
+use arrow_flight::sql::{
+ ActionBeginSavepointRequest,
+ ActionBeginSavepointResult,
+ ActionBeginTransactionRequest,
+ ActionBeginTransactionResult,
+ ActionCancelQueryRequest,
+ ActionCancelQueryResult,
+ ActionClosePreparedStatementRequest,
+ ActionCreatePreparedStatementRequest,
+ ActionCreatePreparedStatementResult,
+ ActionCreatePreparedSubstraitPlanRequest,
+ ActionEndSavepointRequest,
+ ActionEndTransactionRequest,
+ CommandGetCatalogs,
+ CommandGetCrossReference,
+ CommandGetDbSchemas,
+ CommandGetExportedKeys,
+ CommandGetImportedKeys,
+ CommandGetPrimaryKeys,
+ CommandGetSqlInfo,
+ CommandGetTableTypes,
+ CommandGetTables,
+ CommandGetXdbcTypeInfo,
+ CommandPreparedStatementQuery,
+ CommandPreparedStatementUpdate,
+ CommandStatementQuery,
+ CommandStatementSubstraitPlan,
+ CommandStatementUpdate,
+ DoPutPreparedStatementResult,
+ SqlInfo,
+ TicketStatementQuery,
+};
+use arrow_flight::{
+ Action,
+ FlightDescriptor,
+ FlightInfo,
+ HandshakeRequest,
+ HandshakeResponse,
+ Ticket,
+};
+use tonic::codegen::tokio_stream::Stream;
+use tonic::{Request, Response, Status, Streaming};
+
+use crate::KamuFlightSqlService;
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// This type is a singleton that is called by GRPC server. For it to play nicely
+// with DB transactions we follow the same pattern as in Axum where middleware
+// layers are responsible for attaching the Catalog to incoming requests. This
+// wrapper will extract the catalog from the reuqest extensions and instantiate
+// the inner service in the request context.
+pub struct KamuFlightSqlServiceWrapper;
+
+impl KamuFlightSqlServiceWrapper {
+ async fn exec(&self, mut request: Request, f: F) -> Result
+ where
+ F: FnOnce(Request, Arc) -> Fut,
+ Fut: std::future::Future