diff --git a/docs/src/python/user-guide/expressions/user-defined-functions.py b/docs/src/python/user-guide/expressions/user-defined-functions.py index e0658b2d36a4..c001acdfae09 100644 --- a/docs/src/python/user-guide/expressions/user-defined-functions.py +++ b/docs/src/python/user-guide/expressions/user-defined-functions.py @@ -16,7 +16,7 @@ # --8<-- [start:shift_map_batches] out = df.group_by("keys", maintain_order=True).agg( - pl.col("values").map_batches(lambda s: s.shift()).alias("shift_map_batches"), + pl.col("values").map_batches(lambda s: s.shift(), is_elementwise=True).alias("shift_map_batches"), pl.col("values").shift().alias("shift_expression"), ) print(out) @@ -25,7 +25,7 @@ # --8<-- [start:map_elements] out = df.group_by("keys", maintain_order=True).agg( - pl.col("values").map_elements(lambda s: s.shift()).alias("shift_map_elements"), + pl.col("values").map_elements(lambda s: s.shift(), return_dtype=pl.List(int)).alias("shift_map_elements"), pl.col("values").shift().alias("shift_expression"), ) print(out) diff --git a/docs/user-guide/expressions/user-defined-functions.md b/docs/user-guide/expressions/user-defined-functions.md index 882cc11c6ac1..bdec32ad6921 100644 --- a/docs/user-guide/expressions/user-defined-functions.md +++ b/docs/user-guide/expressions/user-defined-functions.md @@ -74,7 +74,9 @@ Let's try that out and see what we get: Ouch.. we clearly get the wrong results here. Group `"b"` even got a value from group `"a"` 😵. -This went horribly wrong, because the `map_batches` applies the function before we aggregate! So that means the whole column `[10, 7, 1`\] got shifted to `[null, 10, 7]` and was then aggregated. +This went horribly wrong because `map_batches` applied the function before aggregation, due to the `is_elementwise=True` parameter being provided. So that means the whole column `[10, 7, 1]` got shifted to `[null, 10, 7]` and was then aggregated. + +However, the good news is that the default value for `is_elementwise` is `False`. The trade-off is that it won't run in the streaming engine anymore. So my advice is to never use `map_batches` in the `group_by` context unless you know you need it and know what you are doing.