diff --git a/src/awkward/_attrs.py b/src/awkward/_attrs.py new file mode 100644 index 0000000000..14a42549d2 --- /dev/null +++ b/src/awkward/_attrs.py @@ -0,0 +1,44 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE +from __future__ import annotations + +from collections.abc import Mapping + +from awkward._typing import Any, JSONMapping + + +def attrs_of_obj(obj, attrs: Mapping | None = None) -> Mapping | None: + from awkward.highlevel import Array, ArrayBuilder, Record + + if attrs is not None: + return attrs + elif isinstance(obj, (Array, Record, ArrayBuilder)): + return obj._attrs + else: + return None + + +def attrs_of(*arrays, attrs: Mapping | None = None) -> Mapping: + # An explicit 'attrs' always wins. + if attrs is not None: + return attrs + + copied = False + for x in reversed(arrays): + x_attrs = attrs_of_obj(x) + if x_attrs is None: + continue + if attrs is None: + attrs = x_attrs + elif attrs is x_attrs: + pass + elif not copied: + attrs = dict(attrs) + attrs.update(x_attrs) + copied = True + else: + attrs.update(x_attrs) + return attrs + + +def without_transient_attrs(attrs: dict[str, Any]) -> JSONMapping: + return {k: v for k, v in attrs.items() if not k.startswith("@")} diff --git a/src/awkward/_backends/dispatch.py b/src/awkward/_backends/dispatch.py index 724d0637f6..2031d8a001 100644 --- a/src/awkward/_backends/dispatch.py +++ b/src/awkward/_backends/dispatch.py @@ -89,7 +89,7 @@ def backend_of_obj(obj, default: D | Sentinel = UNSET) -> Backend | D: def backend_of( - *objects, default: D | Sentinel = UNSET, coerce_to_common: bool = False + *objects, default: D | Sentinel = UNSET, coerce_to_common: bool = True ) -> Backend | D: """ Args: @@ -116,9 +116,9 @@ def backend_of( return common_backend(unique_backends) else: raise ValueError( - "could not find singular backend for", - objects, - "and coercion is not permitted", + f"could not find singular backend for " + f"{', '.join(type(t).__name__ for t in objects)} " + f"and coercion is not permitted", ) diff --git a/src/awkward/_connect/numba/arrayview.py b/src/awkward/_connect/numba/arrayview.py index 1bc5482785..d51843cf0b 100644 --- a/src/awkward/_connect/numba/arrayview.py +++ b/src/awkward/_connect/numba/arrayview.py @@ -11,8 +11,9 @@ from numba.core.errors import NumbaTypeError import awkward as ak -from awkward._behavior import behavior_of, overlay_behavior -from awkward._layout import wrap_layout +from awkward._behavior import overlay_behavior +from awkward._layout import HighLevelContext, wrap_layout +from awkward._lookup import Lookup from awkward._nplikes.numpy_like import NumpyMetadata np = NumpyMetadata.instance() @@ -152,7 +153,17 @@ def to_numbatype(form): ########## Lookup -@numba.extending.typeof_impl.register(ak._lookup.Lookup) +class NumbaLookup(Lookup): + def __init__(self, layout, attrs, generator=None): + super().__init__(layout, generator=generator) + self._attrs = attrs + + @property + def attrs(self): + return self._attrs + + +@numba.extending.typeof_impl.register(NumbaLookup) def typeof_Lookup(obj, c): return LookupType() @@ -192,15 +203,21 @@ def unbox_Lookup(lookuptype, lookupobj, c): class ArrayView: @classmethod def fromarray(cls, array): - behavior = behavior_of(array) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) + with HighLevelContext() as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + use_from_iter=False, + primitive_policy="error", + string_policy="error", + none_policy="error", + ) return ArrayView( to_numbatype(layout.form), - behavior, - ak._lookup.Lookup(layout), + ctx.behavior, + NumbaLookup(layout, ctx.attrs), 0, 0, len(layout), @@ -219,7 +236,7 @@ def __init__(self, type, behavior, lookup, pos, start, stop, fields): def toarray(self): layout = self.type.tolayout(self.lookup, self.pos, self.fields) sliced = layout._getitem_range(self.start, self.stop) - return wrap_layout(sliced, self.behavior) + return wrap_layout(sliced, behavior=self.behavior, attrs=self.lookup.attrs) @numba.extending.typeof_impl.register(ArrayView) @@ -579,20 +596,28 @@ def lower_iternext(context, builder, sig, args, result): class RecordView: @classmethod def fromrecord(cls, record): - behavior = behavior_of(record) - layout = ak.operations.to_layout( - record, allow_record=True, allow_unknown=False, primitive_policy="error" - ) + with HighLevelContext() as ctx: + layout = ctx.unwrap( + record, + allow_record=True, + allow_unknown=False, + use_from_iter=False, + primitive_policy="error", + string_policy="error", + none_policy="error", + ) + array_layout = layout.array + assert isinstance(layout, ak.record.Record) - arraylayout = layout.array + return RecordView( ArrayView( - to_numbatype(arraylayout.form), - behavior, - ak._lookup.Lookup(arraylayout), + to_numbatype(array_layout.form), + ctx.behavior, + NumbaLookup(array_layout, ctx.attrs), 0, 0, - len(arraylayout), + len(array_layout), (), ), layout.at, @@ -603,9 +628,11 @@ def __init__(self, arrayview, at): self.at = at def torecord(self): - arraylayout = self.arrayview.toarray().layout + array = self.arrayview.toarray() return wrap_layout( - ak.record.Record(arraylayout, self.at), self.arrayview.behavior + ak.record.Record(array.layout, self.at), + behavior=self.arrayview.behavior, + attrs=array.attrs, ) diff --git a/src/awkward/_connect/numba/builder.py b/src/awkward/_connect/numba/builder.py index 5c50e19f64..0a739d599b 100644 --- a/src/awkward/_connect/numba/builder.py +++ b/src/awkward/_connect/numba/builder.py @@ -37,13 +37,18 @@ def __init__(self, behavior): @numba.extending.register_model(ArrayBuilderType) class ArrayBuilderModel(numba.core.datamodel.models.StructModel): def __init__(self, dmm, fe_type): - members = [("rawptr", numba.types.voidptr), ("pyptr", numba.types.pyobject)] + members = [ + ("rawptr", numba.types.voidptr), + ("pyptr", numba.types.pyobject), + ("pyattrs", numba.types.pyobject), + ] super().__init__(dmm, fe_type, members) @numba.core.imputils.lower_constant(ArrayBuilderType) def lower_const_ArrayBuilder(context, builder, arraybuildertype, arraybuilder): layout = arraybuilder._layout + attrs = arraybuilder._attrs rawptr = context.get_constant(numba.intp, arraybuilder._layout._ptr) proxyout = context.make_helper(builder, arraybuildertype) proxyout.rawptr = builder.inttoptr( @@ -52,20 +57,26 @@ def lower_const_ArrayBuilder(context, builder, arraybuildertype, arraybuilder): proxyout.pyptr = context.add_dynamic_addr( builder, id(layout), info=str(type(layout)) ) + proxyout.pyattrs = context.add_dynamic_addr( + builder, id(attrs), info=str(type(attrs)) + ) return proxyout._getvalue() @numba.extending.unbox(ArrayBuilderType) def unbox_ArrayBuilder(arraybuildertype, arraybuilderobj, c): + attrs_obj = c.pyapi.object_getattr_string(arraybuilderobj, "_attrs") inner_obj = c.pyapi.object_getattr_string(arraybuilderobj, "_layout") rawptr_obj = c.pyapi.object_getattr_string(inner_obj, "_ptr") proxyout = c.context.make_helper(c.builder, arraybuildertype) proxyout.rawptr = c.pyapi.long_as_voidptr(rawptr_obj) proxyout.pyptr = inner_obj + proxyout.pyattrs = attrs_obj c.pyapi.decref(inner_obj) c.pyapi.decref(rawptr_obj) + c.pyapi.decref(attrs_obj) is_error = numba.core.cgutils.is_not_null(c.builder, c.pyapi.err_occurred()) return numba.extending.NativeValue(proxyout._getvalue(), is_error) @@ -90,8 +101,11 @@ def box_ArrayBuilder(arraybuildertype, arraybuilderval, c): proxyin = c.context.make_helper(c.builder, arraybuildertype, arraybuilderval) c.pyapi.incref(proxyin.pyptr) + attrs_obj = proxyin.pyattrs - out = c.pyapi.call_method(ArrayBuilder_obj, "_wrap", (proxyin.pyptr, behavior_obj)) + out = c.pyapi.call_method( + ArrayBuilder_obj, "_wrap", (proxyin.pyptr, behavior_obj, attrs_obj) + ) c.pyapi.decref(ArrayBuilder_obj) c.pyapi.decref(behavior_obj) diff --git a/src/awkward/_connect/numexpr.py b/src/awkward/_connect/numexpr.py index 3bda451353..85ab566c8c 100644 --- a/src/awkward/_connect/numexpr.py +++ b/src/awkward/_connect/numexpr.py @@ -83,10 +83,7 @@ def evaluate( names, ex_uses_vml = numexpr.necompiler._names_cache[expr_key] arguments = getArguments(names, local_dict, global_dict) - arrays = [ - ak.operations.to_layout(x, allow_record=True, allow_unknown=True) - for x in arguments - ] + arrays = [ak.operations.to_layout(x, allow_unknown=True) for x in arguments] def action(inputs, **ignore): if all( @@ -131,10 +128,7 @@ def re_evaluate(local_dict=None): names = numexpr.necompiler._numexpr_last["argnames"] arguments = getArguments(names, local_dict) - arrays = [ - ak.operations.to_layout(x, allow_record=True, allow_unknown=True) - for x in arguments - ] + arrays = [ak.operations.to_layout(x, allow_unknown=True) for x in arguments] def action(inputs, **ignore): if all( diff --git a/src/awkward/_connect/numpy.py b/src/awkward/_connect/numpy.py index b4afbbf7c6..fb1af71f11 100644 --- a/src/awkward/_connect/numpy.py +++ b/src/awkward/_connect/numpy.py @@ -88,7 +88,14 @@ def _to_rectilinear(arg, backend: Backend): return arg -def array_function(func, types, args, kwargs: dict[str, Any], behavior: Mapping | None): +def array_function( + func, + types, + args, + kwargs: dict[str, Any], + behavior: Mapping | None, + attrs: Mapping[str, Any] | None = None, +): function = implemented.get(func) if function is not None: return function(*args, **kwargs) @@ -106,13 +113,13 @@ def array_function(func, types, args, kwargs: dict[str, Any], behavior: Mapping result, allow_record=True, allow_unknown=True, - allow_none=True, + none_policy="pass-through", regulararray=True, use_from_iter=True, primitive_policy="pass-through", string_policy="pass-through", ) - return wrap_layout(out, behavior=behavior, allow_other=True) + return wrap_layout(out, behavior=behavior, allow_other=True, attrs=attrs) def implements(numpy_function): @@ -152,7 +159,6 @@ def _array_ufunc_custom_cast(inputs, behavior: Mapping | None, backend): cast_fcn = find_custom_cast(x, behavior) maybe_layout = ak.operations.to_layout( x if cast_fcn is None else cast_fcn(x), - allow_record=True, allow_unknown=True, primitive_policy="pass-through", string_policy="pass-through", diff --git a/src/awkward/_layout.py b/src/awkward/_layout.py index 374441b176..1012a5d5e1 100644 --- a/src/awkward/_layout.py +++ b/src/awkward/_layout.py @@ -2,15 +2,21 @@ from __future__ import annotations -from collections.abc import Mapping - +from collections.abc import Mapping, Sequence +from typing import Literal + +from awkward._backends.backend import Backend +from awkward._backends.dispatch import ( + common_backend, + regularize_backend, +) from awkward._backends.numpy import NumpyBackend from awkward._behavior import behavior_of from awkward._nplikes.dispatch import nplike_of_obj from awkward._nplikes.jax import Jax from awkward._nplikes.numpy import Numpy from awkward._nplikes.numpy_like import NumpyMetadata -from awkward._typing import TYPE_CHECKING +from awkward._typing import TYPE_CHECKING, Any, NamedTuple, Self, TypeVar from awkward.errors import AxisError if TYPE_CHECKING: @@ -21,12 +27,184 @@ numpy_backend = NumpyBackend.instance() -def wrap_layout(content, behavior=None, highlevel=True, like=None, allow_other=False): +T = TypeVar("T") + +if TYPE_CHECKING: + from awkward.highlevel import Array + from awkward.highlevel import Record as HighLevelRecord + + +class HighLevelMetadata(NamedTuple): + backend: Backend + attrs: Mapping | None + behavior: Mapping | None + + +K = TypeVar("K") +V = TypeVar("V") + + +def merge_mappings( + mappings: Sequence[Mapping[K, V]], default: Mapping[K, V] | None = None +) -> Mapping[K, V]: + # TODO: add zero-copy optimisation + if len(mappings) == 0: + return default + elif len(mappings) == 1: + return mappings[0] + else: + return {k: v for mapping in mappings for k, v in mapping.items()} + + +class HighLevelContext: + def __init__( + self, behavior: Mapping | None = None, attrs: Mapping[str, Any] | None = None + ): + self._behavior = behavior + self._attrs = attrs + self._is_finalized = False + + self._attrs_from_objects = [] + self._behavior_from_objects = [] + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.finalize() + + def _ensure_finalized(self): + if not self._is_finalized: + raise RuntimeError("HighLevelContext has not yet been finalized") + + def _ensure_not_finalized(self): + if self._is_finalized: + raise RuntimeError("HighLevelContext has already been finalized") + + @property + def attrs(self) -> Mapping[str, Any] | None: + self._ensure_finalized() + return self._attrs + + @property + def behavior(self) -> Mapping | None: + self._ensure_finalized() + return self._behavior + + def finalize(self) -> Self: + self._ensure_not_finalized() + + if self._behavior is None: + # TODO: cleaner reverse + behavior = merge_mappings(self._behavior_from_objects[::-1], default=None) + else: + behavior = self._behavior + + if self._attrs is None: + attrs = merge_mappings(self._attrs_from_objects[::-1], default=None) + else: + attrs = self._attrs + + self._attrs = attrs + self._behavior = behavior + self._is_finalized = True + + return self + + def update(self, obj: T) -> T: + from awkward.highlevel import Array, ArrayBuilder, Record + + self._ensure_not_finalized() + + if isinstance(obj, (Array, Record, ArrayBuilder)): + if obj._attrs is not None: + self._attrs_from_objects.append(obj._attrs) + + if obj._behavior is not None: + self._behavior_from_objects.append(obj._behavior) + + return obj + + def unwrap( + self, + obj: Any, + *, + allow_record: bool = True, + allow_unknown: bool = False, + none_policy: Literal["error", "promote", "pass-through"] = "error", + primitive_policy: Literal["error", "promote", "pass-through"] = "promote", + string_policy: Literal[ + "error", "promote", "pass-through", "as-characters" + ] = "as-characters", + use_from_iter: bool = True, + regulararray: bool = True, + ) -> Any: + from awkward.operations.ak_to_layout import _impl as to_layout_impl + + self.update(obj) + + return to_layout_impl( + obj, + allow_record=allow_record, + allow_unknown=allow_unknown, + none_policy=none_policy, + use_from_iter=use_from_iter, + primitive_policy=primitive_policy, + string_policy=string_policy, + regulararray=regulararray, + ) + + def wrap( + self, obj: Any, *, highlevel: bool = True, allow_other: bool = False + ) -> Any: + self._ensure_finalized() + + return wrap_layout( + obj, + highlevel=highlevel, + attrs=self._attrs, + behavior=self._behavior, + allow_other=allow_other, + ) + + +def ensure_same_backend( + *layouts: Any, default_backend: str | Backend = "cpu" +) -> list[Any]: + """ + + Returns: + object: + """ + backends: set[Backend] = { + layout.backend for layout in layouts if hasattr(layout, "backend") + } + + backend: Backend + if len(backends) >= 1: + backend = common_backend(backends) + else: + backend = regularize_backend(default_backend) + + return [ + layout.to_backend(backend) if hasattr(layout, "to_backend") else layout + for layout in layouts + ] + + +def wrap_layout( + content: T, + behavior: Mapping | None = None, + highlevel: bool = True, + like: Any = None, + allow_other: bool = False, + attrs: Mapping | None = None, +) -> T | Array | HighLevelRecord: import awkward.highlevel from awkward.contents import Content from awkward.record import Record - assert content is None or isinstance(content, (Content, Record)) or allow_other + assert isinstance(content, (Content, Record)) or allow_other assert behavior is None or isinstance(behavior, Mapping) assert isinstance(highlevel, bool) if highlevel: @@ -34,11 +212,18 @@ def wrap_layout(content, behavior=None, highlevel=True, like=None, allow_other=F behavior = behavior_of(like) if isinstance(content, Content): - return awkward.highlevel.Array(content, behavior=behavior) + return awkward.highlevel.Array(content, behavior=behavior, attrs=attrs) elif isinstance(content, Record): - return awkward.highlevel.Record(content, behavior=behavior) + return awkward.highlevel.Record(content, behavior=behavior, attrs=attrs) + elif allow_other: + return content + else: + raise AssertionError - return content + elif isinstance(content, (Content, Record)) or allow_other: + return content + else: + raise AssertionError def maybe_highlevel_to_lowlevel(obj): diff --git a/src/awkward/_pickle.py b/src/awkward/_pickle.py index c0d65c8508..b36cc3e733 100644 --- a/src/awkward/_pickle.py +++ b/src/awkward/_pickle.py @@ -7,8 +7,10 @@ import sys import threading import warnings +from collections.abc import Mapping +from contextlib import contextmanager -from awkward._typing import Any, Protocol, runtime_checkable +from awkward._typing import TYPE_CHECKING, Any, JSONMapping, Protocol, runtime_checkable if sys.version_info < (3, 12): import importlib_metadata @@ -16,6 +18,11 @@ import importlib.metadata as importlib_metadata +if TYPE_CHECKING: + from awkward._nplikes.shape import ShapeItem + from awkward.highlevel import Array, Record + + @runtime_checkable class PickleReducer(Protocol): def __call__(self, obj: Any, protocol: int) -> tuple | NotImplemented: @@ -69,8 +76,72 @@ def get_custom_reducer() -> PickleReducer | None: return _plugin -def custom_reduce(obj, protocol) -> tuple | NotImplemented: - plugin = get_custom_reducer() - if plugin is None: +_DISABLE_CUSTOM_REDUCER = False + + +@contextmanager +def use_builtin_reducer(): + global _DISABLE_CUSTOM_REDUCER + old_value, _DISABLE_CUSTOM_REDUCER = _DISABLE_CUSTOM_REDUCER, True + try: + yield + finally: + _DISABLE_CUSTOM_REDUCER = old_value + + +def custom_reduce(obj, protocol: int) -> tuple | NotImplemented: + if (plugin := get_custom_reducer()) is None or _DISABLE_CUSTOM_REDUCER: return NotImplemented - return plugin(obj, protocol) + else: + return plugin(obj, protocol) + + +def unpickle_array_schema_1( + form_dict: dict, + length: ShapeItem, + container: Mapping[str, Any], + behavior: JSONMapping | None, + attrs: JSONMapping | None, +) -> Array: + from awkward.operations.ak_from_buffers import _impl + + return _impl( + form_dict, + length, + container, + backend="cpu", + behavior=behavior, + attrs=attrs, + highlevel=True, + buffer_key="{form_key}-{attribute}", + byteorder="<", + simplify=False, + ) + + +def unpickle_record_schema_1( + form_dict: dict, + length: ShapeItem, + container: Mapping[str, Any], + behavior: JSONMapping | None, + attrs: JSONMapping | None, + at: int, +) -> Record: + from awkward.highlevel import Record + from awkward.operations.ak_from_buffers import _impl + from awkward.record import Record as LowLevelRecord + + array_layout = _impl( + form_dict, + length, + container, + backend="cpu", + behavior=behavior, + attrs=attrs, + highlevel=False, + buffer_key="{form_key}-{attribute}", + byteorder="<", + simplify=False, + ) + layout = LowLevelRecord(array_layout, at) + return Record(layout, behavior=behavior, attrs=attrs) diff --git a/src/awkward/_slicing.py b/src/awkward/_slicing.py index ca301126da..bb266c3b53 100644 --- a/src/awkward/_slicing.py +++ b/src/awkward/_slicing.py @@ -284,7 +284,7 @@ def normalise_item(item, backend: Backend) -> SliceItem: item, allow_record=False, allow_unknown=False, - allow_none=False, + none_policy="error", regulararray=False, use_from_iter=False, primitive_policy="error", @@ -306,7 +306,7 @@ def normalise_item(item, backend: Backend) -> SliceItem: item, allow_record=False, allow_unknown=False, - allow_none=False, + none_policy="error", regulararray=False, use_from_iter=True, primitive_policy="error", diff --git a/src/awkward/contents/content.py b/src/awkward/contents/content.py index 0007d41f52..8be29bd077 100644 --- a/src/awkward/contents/content.py +++ b/src/awkward/contents/content.py @@ -691,7 +691,7 @@ def _getitem(self, where): where, allow_record=False, allow_unknown=False, - allow_none=False, + none_policy="error", regulararray=False, use_from_iter=False, primitive_policy="error", @@ -716,7 +716,7 @@ def _getitem(self, where): where, allow_record=False, allow_unknown=False, - allow_none=False, + none_policy="error", regulararray=False, use_from_iter=True, primitive_policy="error", diff --git a/src/awkward/forms/form.py b/src/awkward/forms/form.py index 9b65e97269..6cfa90a603 100644 --- a/src/awkward/forms/form.py +++ b/src/awkward/forms/form.py @@ -561,10 +561,18 @@ def length_zero_array( byteorder=ak._util.native_byteorder, highlevel=highlevel, behavior=behavior, + attrs=None, simplify=False, ) def length_one_array(self, *, backend=numpy_backend, highlevel=True, behavior=None): + if highlevel: + deprecate( + "The `highlevel=True` variant of `Form.length_zero_array` is now deprecated. " + "Please use `ak.Array(form.length_zero_array(...), behavior=...)` if an `ak.Array` is required.", + version="2.3.0", + ) + # The naive implementation of a length-1 array requires that we have a sufficiently # large buffer to be able to build _any_ subtree. def max_prefer_unknown(this: ShapeItem, that: ShapeItem) -> ShapeItem: @@ -662,6 +670,7 @@ def prepare(form, multiplier): byteorder=ak._util.native_byteorder, highlevel=highlevel, behavior=behavior, + attrs=None, simplify=False, ) diff --git a/src/awkward/highlevel.py b/src/awkward/highlevel.py index a83e5f59c8..2ebdebe4d3 100644 --- a/src/awkward/highlevel.py +++ b/src/awkward/highlevel.py @@ -19,6 +19,7 @@ import awkward as ak import awkward._connect.hist +from awkward._attrs import attrs_of, without_transient_attrs from awkward._backends.dispatch import register_backend_lookup_factory from awkward._backends.numpy import NumpyBackend from awkward._behavior import behavior_of, get_array_class, get_record_class @@ -26,10 +27,14 @@ from awkward._nplikes.numpy import Numpy from awkward._nplikes.numpy_like import NumpyMetadata from awkward._operators import NDArrayOperatorsMixin -from awkward._pickle import custom_reduce +from awkward._pickle import ( + custom_reduce, + unpickle_array_schema_1, + unpickle_record_schema_1, +) from awkward._prettyprint import Formatter from awkward._regularize import is_non_string_like_iterable -from awkward._typing import TypeVar +from awkward._typing import Any, TypeVar __all__ = ("Array", "ArrayBuilder", "Record") @@ -271,6 +276,7 @@ def __init__( with_name=None, check_valid=False, backend=None, + attrs=None, ): self._cpp_type = None if isinstance(data, ak.contents.Content): @@ -279,6 +285,7 @@ def __init__( elif isinstance(data, Array): layout = data._layout behavior = behavior_of(data, behavior=behavior) + attrs = attrs_of(data, attrs=attrs) elif isinstance(data, dict): fields = [] @@ -309,17 +316,21 @@ def __init__( if with_name is not None: layout = ak.operations.with_name( - layout, with_name, highlevel=False, behavior=behavior + layout, with_name, highlevel=False, behavior=behavior, attrs=attrs ) - if backend is not None and backend != ak.operations.backend(layout): + if not (backend is None or backend == layout.backend.name): layout = ak.operations.to_backend(layout, backend, highlevel=False) if behavior is not None and not isinstance(behavior, Mapping): - raise TypeError("behavior must be None or mapping") + raise TypeError("behavior must be None or a mapping") + + if attrs is not None and not isinstance(attrs, Mapping): + raise TypeError("attrs must be None or a mapping") self._layout = layout self._behavior = behavior + self._attrs = attrs docstr = layout.purelist_parameter("__doc__") if isinstance(docstr, str): @@ -341,6 +352,27 @@ def _update_class(self): self._numbaview = None self.__class__ = get_array_class(self._layout, self._behavior) + @property + def attrs(self) -> Mapping[str, Any]: + """ + The mutable mapping containing top-level metadata, which is serialised + with the array during pickling. + + Keys prefixed with `@` are identified as "transient" attributes + which are discarded prior to pickling, permitting the storage of + non-pickleable types. + """ + if self._attrs is None: + self._attrs = {} + return self._attrs + + @attrs.setter + def attrs(self, value: Mapping[str, Any]): + if isinstance(value, Mapping): + self._attrs = value + else: + raise TypeError("attrs must be a mapping") + @property def layout(self): """ @@ -1100,7 +1132,12 @@ def __setitem__(self, where, what): raise TypeError("only fields may be assigned in-place (by field name)") self._layout = ak.operations.with_field( - self._layout, what, where, highlevel=False + self._layout, + what, + where, + highlevel=False, + attrs=self._attrs, + behavior=self._behavior, ) self._numbaview = None @@ -1132,7 +1169,11 @@ def __delitem__(self, where): raise TypeError("only fields may be removed in-place (by field name)") self._layout = ak.operations.ak_without_field._impl( - self._layout, where, highlevel=False, behavior=self._behavior + self._layout, + where, + highlevel=False, + behavior=self._behavior, + attrs=self._attrs, ) self._numbaview = None @@ -1476,7 +1517,7 @@ def __array_function__(self, func, types, args, kwargs): See also #__array_ufunc__. """ return ak._connect.numpy.array_function( - func, types, args, kwargs, behavior=self._behavior + func, types, args, kwargs, behavior=self._behavior, attrs=self._attrs ) @non_inspectable_property @@ -1518,7 +1559,19 @@ def __reduce_ex__(self, protocol: int) -> tuple: behavior = None else: behavior = self._behavior - return object.__new__, (Array,), (form.to_dict(), length, container, behavior) + + if self._attrs is None: + attrs = self._attrs + else: + attrs = without_transient_attrs(self._attrs) + + return unpickle_array_schema_1, ( + form.to_dict(), + length, + container, + behavior, + attrs, + ) def __setstate__(self, state): form, length, container, behavior, *_ = state @@ -1551,15 +1604,18 @@ def __setstate__(self, state): ) self._layout = layout self._behavior = behavior + self._attrs = None + self._update_class() def __copy__(self): - return Array(self._layout, behavior=self._behavior) + return Array(self._layout, behavior=self._behavior, attrs=self._attrs) def __deepcopy__(self, memo): return Array( copy.deepcopy(self._layout, memo), behavior=copy.deepcopy(self._behavior, memo), + attrs=copy.deepcopy(self._attrs, memo), ) def __bool__(self): @@ -1653,13 +1709,15 @@ def __init__( behavior=None, with_name=None, check_valid=False, - library=None, + backend=None, + attrs=None, ): if isinstance(data, ak.record.Record): layout = data elif isinstance(data, Record): layout = data._layout + attrs = data.attrs elif isinstance(data, str): layout = ak.operations.from_json(data, highlevel=False) @@ -1690,14 +1748,15 @@ def __init__( if with_name is not None: layout = ak.operations.with_name(layout, with_name, highlevel=False) - if library is not None and library != ak.operations.library(layout): - layout = ak.operations.to_library(layout, library, highlevel=False) + if not (backend is None or backend == layout.backend.name): + layout = ak.operations.to_backend(layout, backend, highlevel=False) if behavior is not None and not isinstance(behavior, Mapping): raise TypeError("behavior must be None or mapping") self._layout = layout self._behavior = behavior + self._attrs = attrs docstr = layout.purelist_parameter("__doc__") if isinstance(docstr, str): @@ -1717,6 +1776,27 @@ def _update_class(self): self._numbaview = None self.__class__ = get_record_class(self._layout, self._behavior) + @property + def attrs(self) -> Mapping[str, Any]: + """ + The mapping containing top-level metadata, which is serialised + with the record during pickling. + + Keys prefixed with `@` are identified as "transient" attributes + which are discarded prior to pickling, permitting the storage of + non-pickleable types. + """ + if self._attrs is None: + self._attrs = {} + return self._attrs + + @attrs.setter + def attrs(self, value: Mapping[str, Any]): + if isinstance(value, Mapping): + self._attrs = value + else: + raise TypeError("attrs must be a mapping") + @property def layout(self): """ @@ -1924,7 +2004,12 @@ def __setitem__(self, where, what): raise TypeError("only fields may be assigned in-place (by field name)") self._layout = ak.operations.ak_with_field._impl( - self._layout, what, where, highlevel=False, behavior=self._behavior + self._layout, + what, + where, + highlevel=False, + behavior=self._behavior, + attrs=self._attrs, ) self._numbaview = None @@ -1957,7 +2042,11 @@ def __delitem__(self, where): raise TypeError("only fields may be removed in-place (by field name)") self._layout = ak.operations.ak_without_field._impl( - self._layout, where, highlevel=False, behavior=self._behavior + self._layout, + where, + highlevel=False, + behavior=self._behavior, + attrs=self._attrs, ) self._numbaview = None @@ -2222,10 +2311,19 @@ def __reduce_ex__(self, protocol: int) -> tuple: behavior = None else: behavior = self._behavior - return ( - object.__new__, - (Record,), - (form.to_dict(), length, container, behavior, packed_layout.at), + + if self._attrs is None: + attrs = self._attrs + else: + attrs = without_transient_attrs(self._attrs) + + return unpickle_record_schema_1, ( + form.to_dict(), + length, + container, + behavior, + attrs, + packed_layout.at, ) def __setstate__(self, state): @@ -2242,15 +2340,18 @@ def __setstate__(self, state): layout = ak.record.Record(layout, at) self._layout = layout self._behavior = behavior + self._attrs = None + self._update_class() def __copy__(self): - return Record(self._layout, behavior=self._behavior) + return Record(self._layout, behavior=self._behavior, attrs=self._attrs) def __deepcopy__(self, memo): return Record( copy.deepcopy(self._layout, memo), behavior=copy.deepcopy(self._behavior, memo), + attrs=copy.deepcopy(self._attrs, memo), ) def __bool__(self): @@ -2392,15 +2493,16 @@ class ArrayBuilder(Sized): be considered the "least effort" approach. """ - def __init__(self, *, behavior=None, initial=1024, resize=8): + def __init__(self, *, behavior=None, attrs=None, initial=1024, resize=8): if behavior is not None and not isinstance(behavior, Mapping): raise TypeError("behavior must be None or mapping") self._layout = _ext.ArrayBuilder(initial=initial, resize=resize) self._behavior = behavior + self._attrs = attrs @classmethod - def _wrap(cls, layout, behavior=None): + def _wrap(cls, layout, behavior=None, attrs=None): """ Args: layout (`ak._ext.ArrayBuilder`): Low-level builder to wrap. @@ -2418,8 +2520,30 @@ def _wrap(cls, layout, behavior=None): out = cls.__new__(cls) out._layout = layout out._behavior = behavior + out._attrs = attrs return out + @property + def attrs(self) -> Mapping[str, Any]: + """ + The mapping containing top-level metadata, which is serialised + with the array during pickling. + + Keys prefixed with `@` are identified as "transient" attributes + which are discarded prior to pickling, permitting the storage of + non-pickleable types. + """ + if self._attrs is None: + self._attrs = {} + return self._attrs + + @attrs.setter + def attrs(self, value: Mapping[str, Any]): + if isinstance(value, Mapping): + self._attrs = value + else: + raise TypeError("attrs must be a mapping") + @property def behavior(self): """ @@ -2602,20 +2726,20 @@ def snapshot(self): The currently accumulated data are *copied* into the new array. """ - formstr, length, container = self._layout.to_buffers() - form = ak.forms.from_json(formstr) - with ak._errors.OperationErrorContext("ak.ArrayBuilder.snapshot", [], {}): - return ak.operations.from_buffers( + formstr, length, container = self._layout.to_buffers() + form = ak.forms.from_json(formstr) + return ak.operations.ak_from_buffers._impl( form, length, container, buffer_key="{form_key}-{attribute}", backend="cpu", byteorder=ak._util.native_byteorder, - allow_noncanonical_form=True, + simplify=True, highlevel=True, behavior=self._behavior, + attrs=self._attrs, ) def null(self): diff --git a/src/awkward/operations/ak_all.py b/src/awkward/operations/ak_all.py index a007b49fd8..859bfd98cb 100644 --- a/src/awkward/operations/ak_all.py +++ b/src/awkward/operations/ak_all.py @@ -3,10 +3,9 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -24,6 +23,7 @@ def all( mask_identity=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -44,6 +44,8 @@ def all( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns True in each group of elements from `array` (many types supported, including all Awkward Arrays and Records) if all values are True; False @@ -61,15 +63,13 @@ def all( yield (array,) # Implementation - return _impl(array, axis, keepdims, mask_identity, highlevel, behavior) + return _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs) -def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") reducer = ak._reducers.All() out = ak._do.reduce( @@ -78,12 +78,9 @@ def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): axis=axis, mask=mask_identity, keepdims=keepdims, - behavior=behavior, + behavior=ctx.behavior, ) - if isinstance(out, (ak.contents.Content, ak.record.Record)): - return wrap_layout(out, behavior, highlevel) - else: - return out + return ctx.wrap(out, highlevel=highlevel, allow_other=True) @ak._connect.numpy.implements("all") diff --git a/src/awkward/operations/ak_almost_equal.py b/src/awkward/operations/ak_almost_equal.py index 19a0bb4db4..6f54d3a79b 100644 --- a/src/awkward/operations/ak_almost_equal.py +++ b/src/awkward/operations/ak_almost_equal.py @@ -2,7 +2,7 @@ from __future__ import annotations -from awkward._backends.dispatch import backend_of +from awkward._backends.dispatch import backend_of_obj from awkward._backends.numpy import NumpyBackend from awkward._behavior import behavior_of, get_array_class, get_record_class from awkward._dispatch import high_level_function @@ -56,8 +56,8 @@ def almost_equal( left_behavior = behavior_of(left) right_behavior = behavior_of(right) - left_backend = backend_of(left, default=cpu) - right_backend = backend_of(right, default=cpu) + left_backend = backend_of_obj(left, default=cpu) + right_backend = backend_of_obj(right, default=cpu) if left_backend is not right_backend: return False backend = left_backend diff --git a/src/awkward/operations/ak_any.py b/src/awkward/operations/ak_any.py index 70eb578aef..79c9cc6b83 100644 --- a/src/awkward/operations/ak_any.py +++ b/src/awkward/operations/ak_any.py @@ -3,10 +3,9 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -24,6 +23,7 @@ def any( mask_identity=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -44,6 +44,8 @@ def any( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns True in each group of elements from `array` (many types supported, including all Awkward Arrays and Records) if any values are True; False @@ -61,15 +63,13 @@ def any( yield (array,) # Implementation - return _impl(array, axis, keepdims, mask_identity, highlevel, behavior) + return _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs) -def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") reducer = ak._reducers.Any() out = ak._do.reduce( @@ -78,12 +78,9 @@ def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): axis=axis, mask=mask_identity, keepdims=keepdims, - behavior=behavior, + behavior=ctx.behavior, ) - if isinstance(out, (ak.contents.Content, ak.record.Record)): - return wrap_layout(out, behavior, highlevel) - else: - return out + return ctx.wrap(out, highlevel=highlevel, allow_other=True) @ak._connect.numpy.implements("any") diff --git a/src/awkward/operations/ak_argcartesian.py b/src/awkward/operations/ak_argcartesian.py index 1bc68bfcb8..44facda474 100644 --- a/src/awkward/operations/ak_argcartesian.py +++ b/src/awkward/operations/ak_argcartesian.py @@ -5,11 +5,8 @@ from collections.abc import Mapping import awkward as ak -from awkward._backends.dispatch import backend_of from awkward._backends.numpy import NumpyBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -29,11 +26,12 @@ def argcartesian( with_name=None, highlevel=True, behavior=None, + attrs=None, ): """ Args: - arrays (dict or iterable of arrays): Each value in this dict or iterable - can be any array-like data that #ak.to_layout recognizes. + arrays (mapping or sequence of arrays): Each value in this mapping or + sequence can be any array-like data that #ak.to_layout recognizes. axis (int): The dimension at which this operation is applied. The outermost dimension is `0`, followed by `1`, etc., and negative values count backward from the innermost: `-1` is the innermost @@ -54,6 +52,8 @@ def argcartesian( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Computes a Cartesian product (i.e. cross product) of data from a set of `arrays`, like #ak.cartesian, but returning integer indexes for @@ -103,37 +103,18 @@ def argcartesian( yield arrays # Implementation - return _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior) + return _impl( + arrays, axis, nested, parameters, with_name, highlevel, behavior, attrs + ) -def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): +def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior, attrs): axis = regularize_axis(axis) - if isinstance(arrays, dict): - behavior = behavior_of(*arrays.values(), behavior=behavior) - backend = backend_of(*arrays.values(), default=cpu, coerce_to_common=True) - layouts = { - n: ak._do.local_index( - ak.operations.to_layout( - x, allow_record=False, allow_unknown=False, primitive_policy="error" - ), - axis, - ).to_backend(backend) - for n, x in arrays.items() - } + if isinstance(arrays, Mapping): + index_arrays = {n: ak.local_index(x, axis) for n, x in arrays.items()} else: - arrays = list(arrays) - behavior = behavior_of(*arrays, behavior=behavior) - backend = backend_of(*arrays, default=cpu, coerce_to_common=True) - layouts = [ - ak._do.local_index( - ak.operations.to_layout( - x, allow_record=False, allow_unknown=False, primitive_policy="error" - ), - axis, - ).to_backend(backend) - for x in arrays - ] + index_arrays = [ak.local_index(x) for x in arrays] if with_name is not None: if parameters is None: @@ -142,13 +123,11 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): parameters = dict(parameters) parameters["__record__"] = with_name - result = ak.operations.cartesian( - layouts, + return ak.operations.cartesian( + index_arrays, axis=axis, nested=nested, parameters=parameters, - highlevel=False, + highlevel=highlevel, behavior=behavior, ) - - return wrap_layout(result, behavior, highlevel) diff --git a/src/awkward/operations/ak_argcombinations.py b/src/awkward/operations/ak_argcombinations.py index 28b133c5c8..98a2643855 100644 --- a/src/awkward/operations/ak_argcombinations.py +++ b/src/awkward/operations/ak_argcombinations.py @@ -4,7 +4,7 @@ import awkward as ak from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -25,6 +25,7 @@ def argcombinations( with_name=None, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -50,6 +51,8 @@ def argcombinations( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Computes a Cartesian product (i.e. cross product) of `array` with itself that is restricted to combinations sampled without replacement, @@ -74,11 +77,21 @@ def argcombinations( with_name, highlevel, behavior, + attrs, ) def _impl( - array, n, replacement, axis, fields, parameters, with_name, highlevel, behavior + array, + n, + replacement, + axis, + fields, + parameters, + with_name, + highlevel, + behavior, + attrs, ): axis = regularize_axis(axis) if parameters is None: @@ -91,12 +104,11 @@ def _impl( if axis < 0: raise ValueError("the 'axis' for argcombinations must be non-negative") else: - layout = ak._do.local_index( - ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ), - axis, - ) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ak._do.local_index( + ctx.unwrap(array, allow_record=False, primitive_policy="error"), + axis, + ) out = ak._do.combinations( layout, n, @@ -105,4 +117,4 @@ def _impl( fields=fields, parameters=parameters, ) - return wrap_layout(out, behavior, highlevel, like=array) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_argmax.py b/src/awkward/operations/ak_argmax.py index 8f013d2e57..a4dbe947bd 100644 --- a/src/awkward/operations/ak_argmax.py +++ b/src/awkward/operations/ak_argmax.py @@ -3,10 +3,9 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -24,6 +23,7 @@ def argmax( mask_identity=True, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -44,6 +44,8 @@ def argmax( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns the index position of the maximum value in each group of elements from `array` (many types supported, including all Awkward Arrays and @@ -68,7 +70,7 @@ def argmax( yield (array,) # Implementation - return _impl(array, axis, keepdims, mask_identity, highlevel, behavior) + return _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs) @high_level_function() @@ -80,6 +82,7 @@ def nanargmax( mask_identity=True, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -100,6 +103,8 @@ def nanargmax( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Like #ak.argmax, but treating NaN ("not a number") values as missing. @@ -116,21 +121,20 @@ def nanargmax( # Implementation return _impl( - ak.operations.ak_nan_to_none._impl(array, False, None), + ak.operations.ak_nan_to_none._impl(array, True, behavior, None), axis, keepdims, mask_identity, highlevel, behavior, + attrs, ) -def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") reducer = ak._reducers.ArgMax() out = ak._do.reduce( @@ -139,12 +143,9 @@ def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): axis=axis, mask=mask_identity, keepdims=keepdims, - behavior=behavior, + behavior=ctx.behavior, ) - if isinstance(out, (ak.contents.Content, ak.record.Record)): - return wrap_layout(out, behavior, highlevel) - else: - return out + return ctx.wrap(out, highlevel=highlevel, allow_other=True) @ak._connect.numpy.implements("argmax") diff --git a/src/awkward/operations/ak_argmin.py b/src/awkward/operations/ak_argmin.py index 95f4e10e47..7f21fb3aa8 100644 --- a/src/awkward/operations/ak_argmin.py +++ b/src/awkward/operations/ak_argmin.py @@ -3,10 +3,9 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -24,6 +23,7 @@ def argmin( mask_identity=True, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -44,6 +44,8 @@ def argmin( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns the index position of the minimum value in each group of elements from `array` (many types supported, including all Awkward Arrays and @@ -68,7 +70,7 @@ def argmin( yield (array,) # Implementation - return _impl(array, axis, keepdims, mask_identity, highlevel, behavior) + return _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs) @high_level_function() @@ -80,6 +82,7 @@ def nanargmin( mask_identity=True, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -115,21 +118,21 @@ def nanargmin( # Implementation return _impl( - ak.operations.ak_nan_to_none._impl(array, False, None), + ak.operations.ak_nan_to_none._impl(array, True, behavior, None), axis, keepdims, mask_identity, highlevel, behavior, + attrs, ) -def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") + reducer = ak._reducers.ArgMin() out = ak._do.reduce( @@ -138,12 +141,9 @@ def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): axis=axis, mask=mask_identity, keepdims=keepdims, - behavior=behavior, + behavior=ctx.behavior, ) - if isinstance(out, (ak.contents.Content, ak.record.Record)): - return wrap_layout(out, behavior, highlevel) - else: - return out + return ctx.wrap(out, highlevel=highlevel, allow_other=True) @ak._connect.numpy.implements("argmin") diff --git a/src/awkward/operations/ak_argsort.py b/src/awkward/operations/ak_argsort.py index 73f194471d..bade378b20 100644 --- a/src/awkward/operations/ak_argsort.py +++ b/src/awkward/operations/ak_argsort.py @@ -5,7 +5,7 @@ import awkward as ak from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -16,7 +16,14 @@ @high_level_function() def argsort( - array, axis=-1, *, ascending=True, stable=True, highlevel=True, behavior=None + array, + axis=-1, + *, + ascending=True, + stable=True, + highlevel=True, + behavior=None, + attrs=None, ): """ Args: @@ -34,6 +41,8 @@ def argsort( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns an array of integer indexes that would sort the array if applied as an integer-array slice. @@ -57,16 +66,15 @@ def argsort( yield (array,) # Implementation - return _impl(array, axis, ascending, stable, highlevel, behavior) + return _impl(array, axis, ascending, stable, highlevel, behavior, attrs) -def _impl(array, axis, ascending, stable, highlevel, behavior): +def _impl(array, axis, ascending, stable, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") out = ak._do.argsort(layout, axis, ascending, stable) - return wrap_layout(out, behavior, highlevel, like=array) + return ctx.wrap(out, highlevel=highlevel) @ak._connect.numpy.implements("argsort") diff --git a/src/awkward/operations/ak_broadcast_arrays.py b/src/awkward/operations/ak_broadcast_arrays.py index 6a0be1db72..877c69f9c0 100644 --- a/src/awkward/operations/ak_broadcast_arrays.py +++ b/src/awkward/operations/ak_broadcast_arrays.py @@ -3,6 +3,7 @@ from __future__ import annotations import awkward as ak +from awkward._attrs import attrs_of_obj from awkward._backends.dispatch import backend_of from awkward._backends.numpy import NumpyBackend from awkward._behavior import behavior_of_obj @@ -26,6 +27,7 @@ def broadcast_arrays( right_broadcast=True, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -47,6 +49,8 @@ def broadcast_arrays( otherwise, return a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Like NumPy's [broadcast_arrays](https://docs.scipy.org/doc/numpy/reference/generated/numpy.broadcast_arrays.html) @@ -194,6 +198,7 @@ def broadcast_arrays( right_broadcast, highlevel, behavior, + attrs, ) @@ -205,19 +210,25 @@ def _impl( right_broadcast, highlevel, behavior, + attrs, ): # Need at least one array! if len(arrays) == 0: return [] - backend = backend_of(*arrays, default=cpu, coerce_to_common=True) - - inputs = [] - for x in arrays: - y = ak.operations.to_layout(x, allow_record=True, allow_unknown=True) - if not isinstance(y, (ak.contents.Content, ak.Record)): - y = ak.contents.NumpyArray(backend.nplike.asarray([y])) - inputs.append(y.to_backend(backend)) + backend = backend_of(*arrays, default=cpu) + + inputs = [ + ak.operations.to_layout( + x, + allow_record=True, + allow_unknown=True, + primitive_policy="promote", + string_policy="promote", + none_policy="promote", + ).to_backend(backend) + for x in arrays + ] def action(inputs, depth, **kwargs): # The depth limit is the depth at which we must return, i.e. @@ -243,11 +254,12 @@ def action(inputs, depth, **kwargs): assert isinstance(out, tuple) return [ wrap_layout( - content, - behavior=behavior_of_obj(array, behavior=behavior), + layout_out, + behavior=behavior_of_obj(array_in, behavior=behavior), highlevel=highlevel, + attrs=attrs_of_obj(array_in, attrs=attrs), ) - for content, array in zip(out, arrays) + for layout_out, array_in in zip(out, arrays) ] diff --git a/src/awkward/operations/ak_broadcast_fields.py b/src/awkward/operations/ak_broadcast_fields.py index 178ba4a49e..8036f8e81c 100644 --- a/src/awkward/operations/ak_broadcast_fields.py +++ b/src/awkward/operations/ak_broadcast_fields.py @@ -3,6 +3,7 @@ from __future__ import annotations import awkward as ak +from awkward._attrs import attrs_of_obj from awkward._backends.dispatch import backend_of from awkward._backends.numpy import NumpyBackend from awkward._behavior import behavior_of_obj @@ -15,7 +16,7 @@ @high_level_function() -def broadcast_fields(*arrays, highlevel=True, behavior=None): +def broadcast_fields(*arrays, highlevel=True, behavior=None, attrs=None): """ Args: arrays: Array-like data (anything #ak.to_layout recognizes). @@ -23,6 +24,8 @@ def broadcast_fields(*arrays, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Return a list of arrays whose types contain the same number of fields. Unlike #ak.broadcast_arrays, this function does not require record types to occur at the @@ -56,113 +59,134 @@ def broadcast_fields(*arrays, highlevel=True, behavior=None): yield arrays # Implementation - return _impl(arrays, highlevel, behavior) - - -def _impl(arrays, highlevel, behavior): - backend = backend_of(*arrays, default=cpu, coerce_to_common=True) - layouts = [ak.to_layout(x).to_backend(backend) for x in arrays] - - def identity(content): - return content - - def descend_to_record_or_identity(layout, pullback=identity): - assert layout is not None - if layout.is_record or layout.is_identity_like: - return pullback, layout - elif layout.is_option or layout.is_indexed or layout.is_list: - - def next_pull(content): - return pullback(layout.copy(content=content)) - - return descend_to_record_or_identity(layout.content, next_pull) - elif layout.is_leaf: - return pullback, layout - elif layout.is_union: - raise TypeError("unions are not supported") - else: - raise AssertionError("unexpected content type") - - # Like broadcast_and_apply, we want to walk into each layout, correct the structure, and then rebuilt the arrays - # We do this using "pull back" functions that accept a child content, and return the top-level layout. Unlike - # layout.copy, the pull-back functions can be arbitrarily deep: the closures maintain the structure of the array - def recurse(inputs): - # Descend to records, identities, or leaves - pullbacks, next_inputs = zip( - *[descend_to_record_or_identity(x) for x in inputs] + return _impl(arrays, highlevel, behavior, attrs) + + +def _identity(content): + return content + + +# A "pull-back" is a function that takes a leaf-node, and rebuilds the +# tree between the leaf and the caller. i.e., for some path X.Y.Z.LEAF, +# this function returns (f, LEAF) such that f(LEAF) = X.Y.Z.LEAF. +# Using pull-backs rather than recursive descent allows for the control +# flow to be implemented at the call-site rather than the leaves. +def _descend_to_record_or_leaf(layout, pullback=_identity): + assert layout is not None + if isinstance(layout, ak.record.Record): + return _descend_to_record_or_leaf( + layout.array, lambda x: ak.record.Record(x, layout.at) ) - # We can only work with all non-record, or all record/identity - if any(c.is_record for c in next_inputs): - if not all(c.is_record or c.is_identity_like for c in next_inputs): - raise AssertionError( - "if any inputs are records, all inputs must be records or identities" - ) - # With no records, we can exit here - else: - return [pull(layout) for pull, layout in zip(pullbacks, next_inputs)] - - # Broadcast the fields of only the records - records = [r for r in next_inputs if r.is_record] - all_fields = ak._util.unique_list( - [f for layout in records for f in layout.fields] + elif layout.is_record or layout.is_identity_like or layout.is_leaf: + return pullback, layout + elif layout.is_option or layout.is_indexed or layout.is_list: + + def next_pull(content): + return pullback(layout.copy(content=content)) + + return _descend_to_record_or_leaf(layout.content, next_pull) + elif layout.is_union: + raise TypeError("unions are not supported") + else: + raise AssertionError("unexpected content type") + + +# Like broadcast_and_apply, we want to walk into each layout, correct the structure, and then rebuilt the arrays +# We do this using "pull back" functions that accept a child content, and return the top-level layout. Unlike +# layout.copy, the pull-back functions can be arbitrarily deep: the closures maintain the structure of the array +def _recurse(inputs): + # Descend to records, identities, or leaves + pullbacks, next_inputs = zip(*[_descend_to_record_or_leaf(x) for x in inputs]) + # With no records, we can exit here + if not any(c.is_record for c in next_inputs): + return [pull(layout) for pull, layout in zip(pullbacks, next_inputs)] + # Otherwise, we can only work with all non-record, or all record/identity + elif not all(c.is_record or c.is_identity_like for c in next_inputs): + raise AssertionError( + "if any inputs are records, all inputs must be records or identities" ) - # Build a list of layouts for each field, i.e. [{x: aaaa, y: aaaa}, {x: bbbb, y: bbbb}] becomes - # [[aaaa, bbbb], [aaaa, bbbb]], where fields = [x, y] - # These layouts will be "broadcast" against each other, hence the per-field ordering - layouts_by_field = [] - for field in all_fields: - layouts_to_recurse = [] - layouts_for_field = [] - for layout in records: - if layout.has_field(field): - layouts_for_field.append(None) - layouts_to_recurse.append(layout.content(field)) - else: - layouts_for_field.append(layout.maybe_content(field)) - - # We only want to recurse into non-missing fields, so we build this list separately as a generator - recursed_field_layouts = iter(recurse(layouts_to_recurse)) - - # Now we build the final list of layouts for this field, choosing between the recursion result and the - # original layout according to whether the layout was recursed into - # The pattern here is that `layouts_for_field` maintains positional correspondence with the `records`, - # but uses `None` as a token for "recursed". In this case, we take the layout from `recursed_field_layouts` - # using the knowledge that `len(layouts_to_recurse)` corresponds to the number of `None`s - layouts_by_field.append( - [ - next(recursed_field_layouts) if layout is None else layout - for layout in layouts_for_field - ] - ) - - # Now we transpose the list-of-lists to group layouts by original record, instead of by the field - layouts_by_record = list(zip(*layouts_by_field)) - # Rebuild the original records with the new fields - next_records = iter( + # Broadcast the fields of only the records + next_records = [r for r in next_inputs if r.is_record] + all_fields = ak._util.unique_list( + [f for layout in next_records for f in layout.fields] + ) + + # Build a list of layouts for each field, i.e. [{x: aaaa, y: aaaa}, {x: bbbb, y: bbbb}] becomes + # [[aaaa, bbbb], [aaaa, bbbb]], where fields = [x, y] + # These layouts will be "broadcast" against each other, hence the per-field ordering + layouts_by_field = [] + for field in all_fields: + layouts_to_recurse = [] + layouts_for_field = [] + for layout in next_records: + if layout.has_field(field): + layouts_for_field.append(None) + layouts_to_recurse.append(layout.content(field)) + else: + layouts_for_field.append(layout.maybe_content(field)) + + # We only want to recurse into non-missing fields, so we build this list separately as a generator + recursed_field_layouts = iter(_recurse(layouts_to_recurse)) + + # Now we build the final list of layouts for this field, choosing between the recursion result and the + # original layout according to whether the layout was recursed into + # The pattern here is that `layouts_for_field` maintains positional correspondence with the `records`, + # but uses `None` as a token for "recursed". In this case, we take the layout from `recursed_field_layouts` + # using the knowledge that `len(layouts_to_recurse)` corresponds to the number of `None`s + layouts_by_field.append( [ - record.copy( - fields=all_fields, - contents=contents, - ) - for record, contents in zip(records, layouts_by_record) + next(recursed_field_layouts) if layout is None else layout + for layout in layouts_for_field ] ) - # Merge the records and identities - inner_layouts = [ - (layout if layout.is_identity_like else next(next_records)) - for layout in next_inputs + # Now we transpose the list-of-lists to group layouts by original record, instead of by the field + layouts_by_record = zip(*layouts_by_field) + # Rebuild the original records with the new fields + next_records = iter( + [ + record.copy( + fields=all_fields, + contents=contents, + ) + for record, contents in zip(next_records, layouts_by_record) ] + ) - # Rebuild the outermost layouts using pull-back functions - return [pull(layout) for pull, layout in zip(pullbacks, inner_layouts)] + # Merge the records and identities + inner_layouts = [ + (layout if layout.is_identity_like else next(next_records)) + for layout in next_inputs + ] + + # Rebuild the outermost layouts using pull-back functions + return [pull(layout) for pull, layout in zip(pullbacks, inner_layouts)] + + +def _impl(arrays, highlevel, behavior, attrs): + # Need at least one array! + if len(arrays) == 0: + return [] + + backend = backend_of(*arrays, default=cpu) + layouts = [ak.to_layout(x, allow_record=True).to_backend(backend) for x in arrays] + + result_layouts = _recurse( + [ + record.array[record.at : record.at + 1] + if isinstance(record, ak.record.Record) + else record + for record in layouts + ] + ) return [ wrap_layout( - content, - behavior=behavior_of_obj(array, behavior=behavior), + layout_out, + behavior=behavior_of_obj(array_in, behavior=behavior), highlevel=highlevel, + attrs=attrs_of_obj(array_in, attrs=attrs), ) - for content, array in zip(recurse(layouts), arrays) + for layout_out, array_in in zip(result_layouts, arrays) ] diff --git a/src/awkward/operations/ak_cartesian.py b/src/awkward/operations/ak_cartesian.py index 513e324010..20d431cf7d 100644 --- a/src/awkward/operations/ak_cartesian.py +++ b/src/awkward/operations/ak_cartesian.py @@ -5,11 +5,9 @@ from collections.abc import Mapping import awkward as ak -from awkward._backends.dispatch import backend_of from awkward._backends.numpy import NumpyBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis, wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend, maybe_posaxis from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis from awkward.errors import AxisError @@ -30,11 +28,12 @@ def cartesian( with_name=None, highlevel=True, behavior=None, + attrs=None, ): """ Args: - arrays (dict or iterable of arrays): Each value in this dict or iterable - can be any array-like data that #ak.to_layout recognizes. + arrays (mapping or sequence of arrays): Each value in this mapping or + sequence can be any array-like data that #ak.to_layout recognizes. axis (int): The dimension at which this operation is applied. The outermost dimension is `0`, followed by `1`, etc., and negative values count backward from the innermost: `-1` is the innermost @@ -55,6 +54,8 @@ def cartesian( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Computes a Cartesian product (i.e. cross product) of data from a set of `arrays`. This operation creates records (if `arrays` is a dict) or tuples @@ -207,35 +208,32 @@ def cartesian( yield arrays # Implementation - return _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior) + return _impl( + arrays, axis, nested, parameters, with_name, highlevel, behavior, attrs + ) -def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): +def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior, attrs): axis = regularize_axis(axis) - if isinstance(arrays, dict): - backend = backend_of(*arrays.values(), default=cpu, coerce_to_common=True) - behavior = behavior_of(*arrays.values(), behavior=behavior) - array_layouts = { - name: ak.operations.to_layout( - layout, allow_record=False, allow_unknown=False - ).to_backend(backend) - for name, layout in arrays.items() - } - layouts = list(array_layouts.values()) - fields = list(array_layouts.keys()) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + if isinstance(arrays, Mapping): + layouts = ensure_same_backend( + *( + ctx.unwrap(x, allow_record=False, allow_unknown=False) + for x in arrays.values() + ) + ) + fields = list(arrays.keys()) + array_layouts = dict(zip(fields, layouts)) - else: - arrays = list(arrays) - backend = backend_of(*arrays, default=cpu, coerce_to_common=True) - behavior = behavior_of(*arrays, behavior=behavior) - array_layouts = [ - ak.operations.to_layout( - layout, allow_record=False, allow_unknown=False - ).to_backend(backend) - for layout in arrays - ] - layouts = array_layouts - fields = None + else: + layouts = array_layouts = ensure_same_backend( + *( + ctx.unwrap(x, allow_record=False, allow_unknown=False) + for x in arrays + ) + ) + fields = None if with_name is not None: if parameters is None: @@ -269,7 +267,7 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): else: nested = list(range(len(layouts))[:-1]) else: - if isinstance(array_layouts, dict): + if isinstance(array_layouts, Mapping): if any(not (isinstance(x, str) and x in array_layouts) for x in nested): raise ValueError( "the 'nested' parameter of cartesian must be dict keys " @@ -290,6 +288,7 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): "[0, len(arrays) - 1) for an iterable of arrays" ) + backend = next((layout.backend for layout in layouts), cpu) if posaxis == 0: if fields is not None: nested = [i for i, name in enumerate(fields) if name in nested] @@ -403,4 +402,4 @@ def apply_build_record(inputs, depth, **kwargs): result, axis=axis_to_flatten, highlevel=False, behavior=behavior ) - return wrap_layout(result, behavior, highlevel) + return ctx.wrap(result, highlevel=highlevel) diff --git a/src/awkward/operations/ak_categories.py b/src/awkward/operations/ak_categories.py index 909dae0584..cd7f6ccf4c 100644 --- a/src/awkward/operations/ak_categories.py +++ b/src/awkward/operations/ak_categories.py @@ -3,20 +3,23 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("categories",) @high_level_function() -def categories(array, highlevel=True): +def categories(array, highlevel=True, *, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). highlevel (bool): If True, return an #ak.Array; otherwise, return a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. If the `array` is categorical (contains #ak.contents.IndexedArray or #ak.contents.IndexedOptionArray labeled with parameter @@ -28,24 +31,24 @@ def categories(array, highlevel=True): yield (array,) # Implementation - return _impl(array, highlevel) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel): - output = [None] +def _impl(array, highlevel, behavior, attrs): + output = None def action(layout, **kwargs): + nonlocal output + if layout.parameter("__array__") == "categorical": - output[0] = layout.content + output = layout.content return layout else: return None - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") ak._do.recursively_apply(layout, action) - return wrap_layout(output[0], behavior, highlevel) + return ctx.wrap(output, highlevel=highlevel) diff --git a/src/awkward/operations/ak_combinations.py b/src/awkward/operations/ak_combinations.py index 075bc0ebea..d22708cb4a 100644 --- a/src/awkward/operations/ak_combinations.py +++ b/src/awkward/operations/ak_combinations.py @@ -4,7 +4,7 @@ import awkward as ak from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -25,6 +25,7 @@ def combinations( with_name=None, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -50,6 +51,8 @@ def combinations( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Computes a Cartesian product (i.e. cross product) of `array` with itself that is restricted to combinations sampled without replacement. If the @@ -195,23 +198,33 @@ def combinations( with_name, highlevel, behavior, + attrs, ) def _impl( - array, n, replacement, axis, fields, parameters, with_name, highlevel, behavior + array, + n, + replacement, + axis, + fields, + parameters, + with_name, + highlevel, + behavior, + attrs, ): axis = regularize_axis(axis) - if parameters is None: - parameters = {} + + if with_name is None: + pass + elif parameters is None: + parameters = {"__record__": with_name} else: - parameters = dict(parameters) - if with_name is not None: - parameters["__record__"] = with_name + parameters = {**parameters, "__record__": with_name} - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") out = ak._do.combinations( layout, n, @@ -220,4 +233,4 @@ def _impl( fields=fields, parameters=parameters, ) - return wrap_layout(out, behavior, highlevel, like=array) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_concatenate.py b/src/awkward/operations/ak_concatenate.py index b4f6bf831b..18143b6840 100644 --- a/src/awkward/operations/ak_concatenate.py +++ b/src/awkward/operations/ak_concatenate.py @@ -3,11 +3,10 @@ from __future__ import annotations import awkward as ak -from awkward._backends.dispatch import backend_of +from awkward._backends.dispatch import backend_of_obj from awkward._backends.numpy import NumpyBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis, wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend, maybe_posaxis from awkward._nplikes.numpy_like import NumpyMetadata from awkward._nplikes.shape import unknown_length from awkward._regularize import regularize_axis @@ -23,7 +22,9 @@ @ak._connect.numpy.implements("concatenate") @high_level_function() -def concatenate(arrays, axis=0, *, mergebool=True, highlevel=True, behavior=None): +def concatenate( + arrays, axis=0, *, mergebool=True, highlevel=True, behavior=None, attrs=None +): """ Args: arrays: Array-like data (anything #ak.to_layout recognizes). @@ -39,6 +40,8 @@ def concatenate(arrays, axis=0, *, mergebool=True, highlevel=True, behavior=None a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns an array with `arrays` concatenated. For `axis=0`, this means that one whole array follows another. For `axis=1`, it means that the `arrays` @@ -48,47 +51,72 @@ def concatenate(arrays, axis=0, *, mergebool=True, highlevel=True, behavior=None # Dispatch if ( # Is an array with a known backend - backend_of(arrays, default=None) is not None + backend_of_obj(arrays, default=None) is not None ): yield (arrays,) else: yield arrays # Implementation - return _impl(arrays, axis, mergebool, highlevel, behavior) + return _impl(arrays, axis, mergebool, highlevel, behavior, attrs) + + +def _merge_as_union( + contents: Sequence[Content], parameters=None +) -> ak.contents.UnionArray: + length = sum([c.length for c in contents]) + first = contents[0] + tags = ak.index.Index8.empty(length, first.backend.index_nplike) + index = ak.index.Index64.empty(length, first.backend.index_nplike) + + offset = 0 + for i, content in enumerate(contents): + content.backend.maybe_kernel_error( + content.backend["awkward_UnionArray_filltags_const", tags.dtype.type]( + tags.data, offset, content.length, i + ) + ) + content.backend.maybe_kernel_error( + content.backend["awkward_UnionArray_fillindex_count", index.dtype.type]( + index.data, offset, content.length + ) + ) + offset += content.length + + return ak.contents.UnionArray.simplified( + tags, index, contents, parameters=parameters + ) -def _impl(arrays, axis, mergebool, highlevel, behavior): +def _impl(arrays, axis, mergebool, highlevel, behavior, attrs): axis = regularize_axis(axis) - behavior = behavior_of(*arrays, behavior=behavior) # Simple single-array, axis=0 fast-path if ( # Is an array with a known backend - backend_of(arrays, default=None) is not None + backend_of_obj(arrays, default=None) is not None ): # Convert the array to a layout object content = ak.operations.to_layout( - arrays, allow_record=False, allow_unknown=False, primitive_policy="error" + arrays, allow_record=False, primitive_policy="error" ) # Only handle concatenation along `axis=0` # Let ambiguous depth arrays fall through if maybe_posaxis(content, axis, 1) == 0: - return ak.operations.ak_flatten._impl(content, 1, highlevel, behavior) + return ak.operations.ak_flatten._impl(arrays, 1, highlevel, behavior, attrs) # Now that we're sure `arrays` is not a singular array - backend = backend_of(*arrays, default=cpu, coerce_to_common=True) - content_or_others = [ - x.to_backend(backend) if isinstance(x, ak.contents.Content) else x - for x in ( - ak.operations.to_layout( - x, - allow_record=axis != 0, - allow_unknown=False, - primitive_policy="pass-through", + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + content_or_others = ensure_same_backend( + *( + ctx.unwrap( + x, + allow_record=axis != 0, + allow_unknown=False, + primitive_policy="pass-through", + ) + for x in arrays ) - for x in arrays ) - ] contents = [x for x in content_or_others if isinstance(x, ak.contents.Content)] if len(contents) == 0: @@ -143,7 +171,7 @@ def _impl(arrays, axis, mergebool, highlevel, behavior): else: - def action(inputs, depth, **kwargs): + def action(inputs, depth, backend, **kwargs): if any( x.minmax_depth == (1, 1) for x in inputs @@ -303,31 +331,4 @@ def action(inputs, depth, **kwargs): content_or_others, action, allow_records=True, right_broadcast=False )[0] - return wrap_layout(out, behavior, highlevel) - - -def _merge_as_union( - contents: Sequence[Content], parameters=None -) -> ak.contents.UnionArray: - length = sum([c.length for c in contents]) - first = contents[0] - tags = ak.index.Index8.empty(length, first.backend.index_nplike) - index = ak.index.Index64.empty(length, first.backend.index_nplike) - - offset = 0 - for i, content in enumerate(contents): - content.backend.maybe_kernel_error( - content.backend["awkward_UnionArray_filltags_const", tags.dtype.type]( - tags.data, offset, content.length, i - ) - ) - content.backend.maybe_kernel_error( - content.backend["awkward_UnionArray_fillindex_count", index.dtype.type]( - index.data, offset, content.length - ) - ) - offset += content.length - - return ak.contents.UnionArray.simplified( - tags, index, contents, parameters=parameters - ) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_corr.py b/src/awkward/operations/ak_corr.py index 253eeeb071..74d148831d 100644 --- a/src/awkward/operations/ak_corr.py +++ b/src/awkward/operations/ak_corr.py @@ -3,9 +3,12 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_highlevel_to_lowlevel, wrap_layout +from awkward._layout import ( + HighLevelContext, + ensure_same_backend, + maybe_highlevel_to_lowlevel, +) from awkward._nplikes import ufuncs from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -26,6 +29,7 @@ def corr( mask_identity=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -52,6 +56,8 @@ def corr( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Computes the correlation of `x` and `y` (many types supported, including all Awkward Arrays and Records, must be broadcastable to each other). @@ -74,41 +80,51 @@ def corr( yield x, y, weight # Implementation - return _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior) + return _impl( + x, y, weight, axis, keepdims, mask_identity, highlevel, behavior, attrs + ) -def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - behavior = behavior_of(x, y, weight, behavior=behavior) - x = ak.highlevel.Array( - ak.operations.to_layout( - x, allow_record=False, allow_unknown=False, primitive_policy="error" - ), - behavior=behavior, - ) - y = ak.highlevel.Array( - ak.operations.to_layout( - y, allow_record=False, allow_unknown=False, primitive_policy="error" - ), - behavior=behavior, - ) - if weight is not None: - weight = ak.highlevel.Array( - ak.operations.to_layout( + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + x_layout, y_layout, weight_layout = ensure_same_backend( + ctx.unwrap(x, allow_record=False, primitive_policy="error"), + ctx.unwrap(y, allow_record=False, primitive_policy="error"), + ctx.unwrap( weight, allow_record=False, allow_unknown=False, primitive_policy="error", + none_policy="pass-through", ), - behavior=behavior, ) + x = ctx.wrap(x_layout) + y = ctx.wrap(y_layout) + weight = ctx.wrap(weight_layout, allow_other=True) + with np.errstate(invalid="ignore", divide="ignore"): xmean = ak.operations.ak_mean._impl( - x, weight, axis, False, mask_identity, highlevel=True, behavior=behavior + x, + weight, + axis, + False, + mask_identity, + highlevel=True, + behavior=ctx.behavior, + attrs=ctx.attrs, ) ymean = ak.operations.ak_mean._impl( - y, weight, axis, False, mask_identity, highlevel=True, behavior=behavior + y, + weight, + axis, + False, + mask_identity, + highlevel=True, + behavior=ctx.behavior, + attrs=ctx.attrs, ) xdiff = x - xmean ydiff = y - ymean @@ -119,7 +135,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwyy = ak.operations.ak_sum._impl( ydiff**2, @@ -127,7 +144,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwxy = ak.operations.ak_sum._impl( xdiff * ydiff, @@ -135,7 +153,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) else: sumwxx = ak.operations.ak_sum._impl( @@ -144,7 +163,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwyy = ak.operations.ak_sum._impl( (ydiff**2) * weight, @@ -152,7 +172,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwxy = ak.operations.ak_sum._impl( (xdiff * ydiff) * weight, @@ -160,11 +181,11 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) - return wrap_layout( + return ctx.wrap( maybe_highlevel_to_lowlevel(sumwxy / ufuncs.sqrt(sumwxx * sumwyy)), - behavior=behavior, highlevel=highlevel, allow_other=True, ) diff --git a/src/awkward/operations/ak_count.py b/src/awkward/operations/ak_count.py index dee302077c..85f43a27ee 100644 --- a/src/awkward/operations/ak_count.py +++ b/src/awkward/operations/ak_count.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -23,6 +22,7 @@ def count( mask_identity=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -43,6 +43,8 @@ def count( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Counts elements of `array` (many types supported, including all Awkward Arrays and Records). The identity of counting is `0` and it is @@ -103,15 +105,13 @@ def count( yield (array,) # Implementation - return _impl(array, axis, keepdims, mask_identity, highlevel, behavior) + return _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs) -def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") reducer = ak._reducers.Count() out = ak._do.reduce( @@ -120,9 +120,6 @@ def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): axis=axis, mask=mask_identity, keepdims=keepdims, - behavior=behavior, + behavior=ctx.behavior, ) - if isinstance(out, (ak.contents.Content, ak.record.Record)): - return wrap_layout(out, behavior, highlevel) - else: - return out + return ctx.wrap(out, highlevel=highlevel, allow_other=True) diff --git a/src/awkward/operations/ak_count_nonzero.py b/src/awkward/operations/ak_count_nonzero.py index 567a961686..919a6abf22 100644 --- a/src/awkward/operations/ak_count_nonzero.py +++ b/src/awkward/operations/ak_count_nonzero.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -23,6 +22,7 @@ def count_nonzero( mask_identity=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -43,6 +43,8 @@ def count_nonzero( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Counts nonzero elements of `array` (many types supported, including all Awkward Arrays and Records). The identity of counting is `0` and it is @@ -62,15 +64,13 @@ def count_nonzero( yield (array,) # Implementation - return _impl(array, axis, keepdims, mask_identity, highlevel, behavior) + return _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs) -def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") reducer = ak._reducers.CountNonzero() out = ak._do.reduce( @@ -79,12 +79,9 @@ def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): axis=axis, mask=mask_identity, keepdims=keepdims, - behavior=behavior, + behavior=ctx.behavior, ) - if isinstance(out, (ak.contents.Content, ak.record.Record)): - return wrap_layout(out, behavior, highlevel) - else: - return out + return ctx.wrap(out, highlevel=highlevel, allow_other=True) @ak._connect.numpy.implements("count_nonzero") diff --git a/src/awkward/operations/ak_covar.py b/src/awkward/operations/ak_covar.py index 48e87adf73..a070ac6895 100644 --- a/src/awkward/operations/ak_covar.py +++ b/src/awkward/operations/ak_covar.py @@ -3,9 +3,12 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_highlevel_to_lowlevel, wrap_layout +from awkward._layout import ( + HighLevelContext, + ensure_same_backend, + maybe_highlevel_to_lowlevel, +) from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -25,6 +28,7 @@ def covar( mask_identity=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -51,6 +55,8 @@ def covar( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Computes the covariance of `x` and `y` (many types supported, including all Awkward Arrays and Records, must be broadcastable to each other). @@ -71,41 +77,51 @@ def covar( yield x, y, weight # Implementation - return _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior) + return _impl( + x, y, weight, axis, keepdims, mask_identity, highlevel, behavior, attrs + ) -def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - behavior = behavior_of(x, y, weight, behavior=behavior) - x = ak.highlevel.Array( - ak.operations.to_layout( - x, allow_record=False, allow_unknown=False, primitive_policy="error" - ), - behavior=behavior, - ) - y = ak.highlevel.Array( - ak.operations.to_layout( - y, allow_record=False, allow_unknown=False, primitive_policy="error" - ), - behavior=behavior, - ) - if weight is not None: - weight = ak.highlevel.Array( - ak.operations.to_layout( + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + x_layout, y_layout, weight_layout = ensure_same_backend( + ctx.unwrap(x, allow_record=False, primitive_policy="error"), + ctx.unwrap(y, allow_record=False, primitive_policy="error"), + ctx.unwrap( weight, allow_record=False, allow_unknown=False, primitive_policy="error", + none_policy="pass-through", ), - behavior=behavior, ) + x = ctx.wrap(x_layout) + y = ctx.wrap(y_layout) + weight = ctx.wrap(weight_layout, allow_other=True) + with np.errstate(invalid="ignore", divide="ignore"): xmean = ak.operations.ak_mean._impl( - x, weight, axis, False, mask_identity, highlevel=True, behavior=behavior + x, + weight, + axis, + False, + mask_identity, + highlevel=True, + behavior=None, + attrs=None, ) ymean = ak.operations.ak_mean._impl( - y, weight, axis, False, mask_identity, highlevel=True, behavior=behavior + y, + weight, + axis, + False, + mask_identity, + highlevel=True, + behavior=None, + attrs=None, ) if weight is None: sumw = ak.operations.ak_count._impl( @@ -114,7 +130,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=None, + attrs=None, ) sumwxy = ak.operations.ak_sum._impl( (x - xmean) * (y - ymean), @@ -122,7 +139,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=None, + attrs=None, ) else: sumw = ak.operations.ak_sum._impl( @@ -131,7 +149,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=None, + attrs=None, ) sumwxy = ak.operations.ak_sum._impl( (x - xmean) * (y - ymean) * weight, @@ -139,11 +158,11 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=None, + attrs=None, ) - return wrap_layout( + return ctx.wrap( maybe_highlevel_to_lowlevel(sumwxy / sumw), - behavior=behavior, highlevel=highlevel, allow_other=True, ) diff --git a/src/awkward/operations/ak_drop_none.py b/src/awkward/operations/ak_drop_none.py index 36d8ef3ab6..c6c06014db 100644 --- a/src/awkward/operations/ak_drop_none.py +++ b/src/awkward/operations/ak_drop_none.py @@ -4,7 +4,7 @@ import awkward as ak from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis, wrap_layout +from awkward._layout import HighLevelContext, maybe_posaxis from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis from awkward.errors import AxisError @@ -15,7 +15,7 @@ @high_level_function() -def drop_none(array, axis=None, highlevel=True, behavior=None): +def drop_none(array, axis=None, highlevel=True, behavior=None, attrs=None): """ Args: array: Data in which to remove Nones. @@ -29,6 +29,8 @@ def drop_none(array, axis=None, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Removes missing values (None) from a given array. @@ -51,21 +53,21 @@ def drop_none(array, axis=None, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, axis, highlevel, behavior) + return _impl(array, axis, highlevel, behavior, attrs) -def _impl(array, axis, highlevel, behavior): - axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) +def _drop_none_if_list(layout): + if layout.is_list: + # only drop nones at list level in the recursion; this way ListArray -> ListOffsetArray with unprojected optiontype -> avoid offset mismatch + return layout.drop_none() + else: + return layout - def drop_nones(layout, **kwargs): - if layout.is_list: - # only drop nones at list level in the recursion; this way ListArray -> ListOffsetArray with unprojected optiontype -> avoid offset mismatch - return layout.drop_none() - else: - return layout + +def _impl(array, axis, highlevel, behavior, attrs): + axis = regularize_axis(axis) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") if axis is None: # if the outer layout is_option, drop_nones without affecting offsets @@ -73,7 +75,7 @@ def drop_nones(layout, **kwargs): layout = layout.drop_none() def action(layout, continuation, **kwargs): - return drop_nones(continuation()) + return _drop_none_if_list(continuation()) else: max_axis = layout.branch_depth[1] - 1 @@ -113,9 +115,9 @@ def action(layout, depth, **kwargs): return layout.drop_none() options = {"none_indexes": []} - out = ak._do.recursively_apply(layout, action, options) + out = ak._do.recursively_apply(layout, action, depth_context=options) if len(options["none_indexes"]) > 0: - out = ak._do.recursively_apply(out, recompute_offsets, options) + out = ak._do.recursively_apply(out, recompute_offsets, depth_context=options) - return wrap_layout(out, behavior, highlevel, like=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_enforce_type.py b/src/awkward/operations/ak_enforce_type.py index 2d5e3d2229..e207d46e47 100644 --- a/src/awkward/operations/ak_enforce_type.py +++ b/src/awkward/operations/ak_enforce_type.py @@ -7,7 +7,7 @@ import awkward as ak from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._nplikes.shape import unknown_length from awkward._parameters import type_parameters_equal @@ -20,7 +20,7 @@ @high_level_function() -def enforce_type(array, type, *, highlevel=True, behavior=None): +def enforce_type(array, type, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -29,6 +29,8 @@ def enforce_type(array, type, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns an array whose structure is modified to match the given type. @@ -227,40 +229,9 @@ def enforce_type(array, type, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, type, highlevel, behavior) - + return _impl(array, type, highlevel, behavior, attrs) -def _impl(array, type_, highlevel, behavior): - layout = ak.to_layout(array, allow_record=True) - - if isinstance(type_, str): - type_ = ak.types.from_datashape(type_, highlevel=False) - - if isinstance(type_, (ak.types.ArrayType, ak.types.ScalarType)): - raise TypeError( - "High-level type objects are not supported by this function. Instead, " - "a low-level type object (instance of ak.types.Type) should be used. " - "If you are using a high-level type `type` from another array (e.g. using `array.type`), " - "then the low-level type object can be found under `type.content`" - ) - # Ensure we re-wrap records! - if isinstance(layout, ak.record.Record): - out = ak.record.Record( - _enforce_type(layout.array[layout.at : layout.at + 1], type_), 0 - ) - else: - out = _enforce_type(layout, type_) - - return wrap_layout( - out, - like=array, - behavior=behavior, - highlevel=highlevel, - ) - - -# TODO: move this if it ends up being useful elsewhere def _layout_has_type(layout: ak.contents.Content, type_: ak.types.Type) -> bool: """ Args: @@ -333,6 +304,9 @@ def _layout_has_type(layout: ak.contents.Content, type_: ak.types.Type) -> bool: raise TypeError(layout) +# TODO: move this if it ends up being useful elsewhere + + # TODO: move this if it ends up being useful elsewhere class _TypeEnforceableResult(NamedTuple): is_enforceable: bool @@ -1279,3 +1253,29 @@ def _enforce_type( return _recurse_record_any(layout, type_) else: raise NotImplementedError(type(layout), type_) + + +def _impl(array, type_, highlevel, behavior, attrs): + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=True) + + if isinstance(type_, str): + type_ = ak.types.from_datashape(type_, highlevel=False) + + if isinstance(type_, (ak.types.ArrayType, ak.types.ScalarType)): + raise TypeError( + "High-level type objects are not supported by this function. Instead, " + "a low-level type object (instance of ak.types.Type) should be used. " + "If you are using a high-level type `type` from another array (e.g. using `array.type`), " + "then the low-level type object can be found under `type.content`" + ) + + # Ensure we re-wrap records! + if isinstance(layout, ak.record.Record): + out = ak.record.Record( + _enforce_type(layout.array[layout.at : layout.at + 1], type_), 0 + ) + else: + out = _enforce_type(layout, type_) + + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_fields.py b/src/awkward/operations/ak_fields.py index 0f915bd49c..ff9304248a 100644 --- a/src/awkward/operations/ak_fields.py +++ b/src/awkward/operations/ak_fields.py @@ -36,7 +36,5 @@ def fields(array): def _impl(array): - layout = ak.operations.to_layout( - array, allow_record=True, allow_unknown=False, primitive_policy="error" - ) + layout = ak.operations.to_layout(array, allow_record=True, primitive_policy="error") return layout.fields.copy() diff --git a/src/awkward/operations/ak_fill_none.py b/src/awkward/operations/ak_fill_none.py index 410886a300..739bed11a7 100644 --- a/src/awkward/operations/ak_fill_none.py +++ b/src/awkward/operations/ak_fill_none.py @@ -3,11 +3,9 @@ from __future__ import annotations import awkward as ak -from awkward._backends.dispatch import backend_of from awkward._backends.numpy import NumpyBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis, wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend, maybe_posaxis from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis from awkward.errors import AxisError @@ -19,7 +17,7 @@ @high_level_function() -def fill_none(array, value, axis=-1, *, highlevel=True, behavior=None): +def fill_none(array, value, axis=-1, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -34,6 +32,8 @@ def fill_none(array, value, axis=-1, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces missing values (None) with a given `value`. @@ -67,49 +67,47 @@ def fill_none(array, value, axis=-1, *, highlevel=True, behavior=None): yield array, value # Implementation - return _impl(array, value, axis, highlevel, behavior) + return _impl(array, value, axis, highlevel, behavior, attrs) -def _impl(array, value, axis, highlevel, behavior): +def _impl(array, value, axis, highlevel, behavior, attrs): axis = regularize_axis(axis) - behavior = behavior_of(array, value, behavior=behavior) - # Ensure a common backend exists - backend = backend_of(array, value, default=cpu) - arraylayout = ak.operations.to_layout( - array, allow_record=True, allow_unknown=False - ).to_backend(backend) - valuelayout = ak.operations.to_layout( - value, - allow_record=True, - allow_unknown=False, - use_from_iter=True, - primitive_policy="pass-through", - string_policy="pass-through", - ) - - if isinstance(valuelayout, ak.record.Record): - valuelayout = valuelayout.array[valuelayout.at : valuelayout.at + 1] - elif isinstance(valuelayout, ak.contents.Content): - valuelayout = valuelayout[np.newaxis, ...] + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + array_layout, value_layout = ensure_same_backend( + ctx.unwrap(array, allow_record=True, allow_unknown=False), + ctx.unwrap( + value, + allow_record=True, + allow_unknown=False, + use_from_iter=True, + primitive_policy="pass-through", + string_policy="pass-through", + ), + ) + + if isinstance(value_layout, ak.record.Record): + value_layout = value_layout.array[value_layout.at : value_layout.at + 1] + elif isinstance(value_layout, ak.contents.Content): + value_layout = value_layout[np.newaxis, ...] else: # Now that we know `valuelayout` isn't a low-level type, we must have scalars # Thus, we can safely promote these scalars to a layout without # adding a new axis - valuelayout = ak.operations.to_layout( + value_layout = ak.operations.to_layout( value, allow_record=True, allow_unknown=False, use_from_iter=True, primitive_policy="promote", string_policy="promote", - ) - valuelayout = valuelayout.to_backend(backend) + ).to_backend(array_layout.backend) if axis is None: def action(layout, continuation, **kwargs): if layout.is_option: - return ak._do.fill_none(continuation(), valuelayout) + return ak._do.fill_none(continuation(), value_layout) else: @@ -117,7 +115,7 @@ def action(layout, depth, **kwargs): posaxis = maybe_posaxis(layout, axis, depth) if posaxis is not None and posaxis + 1 == depth: if layout.is_option: - return ak._do.fill_none(layout, valuelayout) + return ak._do.fill_none(layout, value_layout) elif layout.is_union or layout.is_record or layout.is_indexed: return None else: @@ -128,5 +126,5 @@ def action(layout, depth, **kwargs): f"axis={axis} exceeds the depth of this array ({depth})" ) - out = ak._do.recursively_apply(arraylayout, action) - return wrap_layout(out, behavior, highlevel) + out = ak._do.recursively_apply(array_layout, action) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_firsts.py b/src/awkward/operations/ak_firsts.py index 7c37ccd328..e58e8afa4d 100644 --- a/src/awkward/operations/ak_firsts.py +++ b/src/awkward/operations/ak_firsts.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis, wrap_layout +from awkward._layout import HighLevelContext, maybe_posaxis from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import is_integer, regularize_axis from awkward.errors import AxisError @@ -16,7 +15,7 @@ @high_level_function() -def firsts(array, axis=1, *, highlevel=True, behavior=None): +def firsts(array, axis=1, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -28,6 +27,8 @@ def firsts(array, axis=1, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Selects the first element of each non-empty list and inserts None for each empty list. @@ -51,13 +52,13 @@ def firsts(array, axis=1, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, axis, highlevel, behavior) + return _impl(array, axis, highlevel, behavior, attrs) -def _impl(array, axis, highlevel, behavior): +def _impl(array, axis, highlevel, behavior, attrs): + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False) axis = regularize_axis(axis) - layout = ak.operations.to_layout(array) - behavior = behavior_of(array, behavior=behavior) if not is_integer(axis): raise TypeError(f"'axis' must be an integer, not {axis!r}") @@ -75,19 +76,17 @@ def _impl(array, axis, highlevel, behavior): else: - def action(layout, depth, depth_context, **kwargs): + def action(layout, depth, backend, **kwargs): posaxis = maybe_posaxis(layout, axis, depth) if posaxis == depth and layout.is_list: - nplike = layout._backend.index_nplike - # this is a copy of the raw array - index = starts = nplike.asarray( - layout.starts.raw(nplike), dtype=np.int64, copy=True + index = starts = backend.index_nplike.asarray( + layout.starts.data, dtype=np.int64, copy=True ) # this might be a view - stops = layout.stops.raw(nplike) + stops = layout.stops.data empties = starts == stops index[empties] = -1 @@ -103,4 +102,4 @@ def action(layout, depth, depth_context, **kwargs): out = ak._do.recursively_apply(layout, action, numpy_to_regular=True) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_flatten.py b/src/awkward/operations/ak_flatten.py index e3cc0b4fee..b246870463 100644 --- a/src/awkward/operations/ak_flatten.py +++ b/src/awkward/operations/ak_flatten.py @@ -4,7 +4,7 @@ import awkward as ak from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis, wrap_layout +from awkward._layout import HighLevelContext, maybe_posaxis from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -14,7 +14,7 @@ @high_level_function() -def flatten(array, axis=1, *, highlevel=True, behavior=None): +def flatten(array, axis=1, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -28,6 +28,8 @@ def flatten(array, axis=1, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns an array with one level of nesting removed by erasing the boundaries between consecutive lists. Since this operates on a level of @@ -167,14 +169,13 @@ def flatten(array, axis=1, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, axis, highlevel, behavior) + return _impl(array, axis, highlevel, behavior, attrs) -def _impl(array, axis, highlevel, behavior): +def _impl(array, axis, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") if axis is None: out = ak._do.remove_structure(layout, function_name="ak.flatten") @@ -182,17 +183,13 @@ def _impl(array, axis, highlevel, behavior): isinstance(x, ak.contents.Content) for x in out ) - result = ak._do.mergemany(out) - - return wrap_layout(result, behavior, highlevel) + out = ak._do.mergemany(out) elif axis == 0 or maybe_posaxis(layout, axis, 1) == 0: def apply(layout): - backend = layout.backend - if layout.is_unknown: - return apply(ak.contents.NumpyArray(backend.nplike.asarray([]))) + return apply(ak.contents.NumpyArray(layout.backend.nplike.asarray([]))) elif layout.is_indexed: return apply(layout.project()) @@ -204,20 +201,22 @@ def apply(layout): ): return layout - tags = backend.index_nplike.asarray(layout.tags.data) - index = backend.index_nplike.asarray( - backend.nplike.asarray(layout.index.data), copy=True + tags = layout.tags.data + index = layout.backend.index_nplike.asarray( + layout.index.data, copy=True + ) + big_mask = layout.backend.index_nplike.empty( + layout.index.length, dtype=np.bool_ ) - bigmask = backend.index_nplike.empty(len(index), dtype=np.bool_) for tag, content in enumerate(layout.contents): if content.is_option and not isinstance( content, ak.contents.UnmaskedArray ): - bigmask[:] = False - bigmask[tags == tag] = backend.index_nplike.asarray( - content.mask_as_bool(valid_when=False) + big_mask[:] = False + big_mask[tags == tag] = content.mask_as_bool( + valid_when=False ).view(np.bool_) - index[bigmask] = -1 + index[big_mask] = -1 good = index >= 0 return ak.contents.UnionArray( @@ -233,9 +232,6 @@ def apply(layout): return layout out = apply(layout) - - return wrap_layout(out, behavior, highlevel, like=array) - else: out = ak._do.flatten(layout, axis) - return wrap_layout(out, behavior, highlevel, like=array) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_from_arrow.py b/src/awkward/operations/ak_from_arrow.py index 4c31027201..460f09f5e4 100644 --- a/src/awkward/operations/ak_from_arrow.py +++ b/src/awkward/operations/ak_from_arrow.py @@ -4,7 +4,7 @@ import awkward as ak from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("from_arrow",) @@ -13,7 +13,9 @@ @high_level_function() -def from_arrow(array, *, generate_bitmasks=False, highlevel=True, behavior=None): +def from_arrow( + array, *, generate_bitmasks=False, highlevel=True, behavior=None, attrs=None +): """ Args: array (`pyarrow.Array`, `pyarrow.ChunkedArray`, `pyarrow.RecordBatch`, or `pyarrow.Table`): @@ -26,6 +28,8 @@ def from_arrow(array, *, generate_bitmasks=False, highlevel=True, behavior=None) a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Converts an Apache Arrow array into an Awkward Array. @@ -38,14 +42,16 @@ def from_arrow(array, *, generate_bitmasks=False, highlevel=True, behavior=None) See also #ak.to_arrow, #ak.to_arrow_table, #ak.from_parquet, #ak.from_arrow_schema. """ - return _impl(array, generate_bitmasks, highlevel, behavior) + return _impl(array, generate_bitmasks, highlevel, behavior, attrs) -def _impl(array, generate_bitmasks, highlevel, behavior): +def _impl(array, generate_bitmasks, highlevel, behavior, attrs): import awkward._connect.pyarrow pyarrow = awkward._connect.pyarrow.pyarrow + ctx = HighLevelContext(behavior=behavior, attrs=attrs).finalize() + out = awkward._connect.pyarrow.handle_arrow( array, generate_bitmasks=generate_bitmasks, pass_empty_field=True ) @@ -72,4 +78,4 @@ def remove_revertable(layout, **kwargs): ak._do.recursively_apply(out, remove_revertable) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_from_avro_file.py b/src/awkward/operations/ak_from_avro_file.py index 99c8277bb3..dcd49d8f6c 100644 --- a/src/awkward/operations/ak_from_avro_file.py +++ b/src/awkward/operations/ak_from_avro_file.py @@ -15,7 +15,13 @@ @high_level_function() def from_avro_file( - file, limit_entries=None, *, debug_forth=False, highlevel=True, behavior=None + file, + limit_entries=None, + *, + debug_forth=False, + highlevel=True, + behavior=None, + attrs=None, ): """ Args: @@ -26,6 +32,8 @@ def from_avro_file( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Reads Avro files as Awkward Arrays. @@ -40,7 +48,7 @@ def from_avro_file( form, length, container = awkward._connect.avro.ReadAvroFT( opened_file, limit_entries, debug_forth ).outcontents - return _impl(form, length, container, highlevel, behavior) + return _impl(form, length, container, highlevel, behavior, attrs) else: if not hasattr(file, "read") or not hasattr(file, "seek"): @@ -51,10 +59,10 @@ def from_avro_file( form, length, container = awkward._connect.avro.ReadAvroFT( file, limit_entries, debug_forth ).outarr - return _impl(form, length, container, highlevel, behavior) + return _impl(form, length, container, highlevel, behavior, attrs) -def _impl(form, length, container, highlevel, behavior): +def _impl(form, length, container, highlevel, behavior, attrs): return ak.operations.ak_from_buffers._impl( form=form, length=length, @@ -65,4 +73,5 @@ def _impl(form, length, container, highlevel, behavior): highlevel=highlevel, behavior=behavior, simplify=True, + attrs=attrs, ) diff --git a/src/awkward/operations/ak_from_buffers.py b/src/awkward/operations/ak_from_buffers.py index 23fdf12cd7..eed547b60d 100644 --- a/src/awkward/operations/ak_from_buffers.py +++ b/src/awkward/operations/ak_from_buffers.py @@ -34,6 +34,7 @@ def from_buffers( allow_noncanonical_form=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -62,6 +63,8 @@ def from_buffers( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Reconstitutes an Awkward Array from a Form, length, and a collection of memory buffers, so that data can be losslessly read from file formats and storage @@ -106,6 +109,7 @@ def from_buffers( byteorder, highlevel, behavior, + attrs, allow_noncanonical_form, ) @@ -119,6 +123,7 @@ def _impl( byteorder, highlevel, behavior, + attrs, simplify, ): backend = regularize_backend(backend) @@ -142,7 +147,8 @@ def _impl( getkey = regularize_buffer_key(buffer_key) out = _reconstitute(form, length, container, getkey, backend, byteorder, simplify) - return wrap_layout(out, behavior, highlevel) + + return wrap_layout(out, highlevel=highlevel, attrs=attrs, behavior=behavior) def _from_buffer( diff --git a/src/awkward/operations/ak_from_categorical.py b/src/awkward/operations/ak_from_categorical.py index 950e228eab..778bf4ca7f 100644 --- a/src/awkward/operations/ak_from_categorical.py +++ b/src/awkward/operations/ak_from_categorical.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("from_categorical",) @high_level_function() -def from_categorical(array, *, highlevel=True, behavior=None): +def from_categorical(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def from_categorical(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. This function replaces categorical data with non-categorical data (by removing the label that declares it as such). @@ -33,10 +34,10 @@ def from_categorical(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): def action(layout, **kwargs): if layout.parameter("__array__") == "categorical": out = ak.operations.with_parameter( @@ -47,12 +48,7 @@ def action(layout, **kwargs): else: return None - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") out = ak._do.recursively_apply(layout, action) - if highlevel: - return wrap_layout(out, behavior) - else: - return out + return ctx.wrap(out, highlevel=highlevel, allow_other=True) diff --git a/src/awkward/operations/ak_from_cupy.py b/src/awkward/operations/ak_from_cupy.py index d80a5b0688..47e1f46648 100644 --- a/src/awkward/operations/ak_from_cupy.py +++ b/src/awkward/operations/ak_from_cupy.py @@ -9,7 +9,7 @@ @high_level_function() -def from_cupy(array, *, regulararray=False, highlevel=True, behavior=None): +def from_cupy(array, *, regulararray=False, highlevel=True, behavior=None, attrs=None): """ Args: array (cp.ndarray): The CuPy array to convert into an Awkward Array. @@ -22,6 +22,8 @@ def from_cupy(array, *, regulararray=False, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Converts a CuPy array into an Awkward Array. diff --git a/src/awkward/operations/ak_from_dlpack.py b/src/awkward/operations/ak_from_dlpack.py index ad8b7c4337..15c75f45f1 100644 --- a/src/awkward/operations/ak_from_dlpack.py +++ b/src/awkward/operations/ak_from_dlpack.py @@ -14,7 +14,13 @@ @high_level_function() def from_dlpack( - array, *, prefer_cpu=True, regulararray=False, highlevel=True, behavior=None + array, + *, + prefer_cpu=True, + regulararray=False, + highlevel=True, + behavior=None, + attrs=None, ): """ Args: @@ -31,6 +37,8 @@ def from_dlpack( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Converts a DLPack-aware array into an Awkward Array. diff --git a/src/awkward/operations/ak_from_feather.py b/src/awkward/operations/ak_from_feather.py index 67e13d2c6e..ccf1d3c111 100644 --- a/src/awkward/operations/ak_from_feather.py +++ b/src/awkward/operations/ak_from_feather.py @@ -18,6 +18,7 @@ def from_feather( generate_bitmasks=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -36,6 +37,8 @@ def from_feather( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Reads an Feather file as an Awkward Array (through pyarrow). @@ -47,12 +50,26 @@ def from_feather( """ return _impl( - path, columns, use_threads, memory_map, generate_bitmasks, highlevel, behavior + path, + columns, + use_threads, + memory_map, + generate_bitmasks, + highlevel, + behavior, + attrs, ) def _impl( - path, columns, use_threads, memory_map, generate_bitmasks, highlevel, behavior + path, + columns, + use_threads, + memory_map, + generate_bitmasks, + highlevel, + behavior, + attrs, ): import pyarrow.feather @@ -63,4 +80,5 @@ def _impl( generate_bitmasks, highlevel, behavior, + attrs, ) diff --git a/src/awkward/operations/ak_from_iter.py b/src/awkward/operations/ak_from_iter.py index 79d919e8e2..7f06e8cc14 100644 --- a/src/awkward/operations/ak_from_iter.py +++ b/src/awkward/operations/ak_from_iter.py @@ -22,6 +22,7 @@ def from_iter( allow_record=True, highlevel=True, behavior=None, + attrs=None, initial=1024, resize=8, ): @@ -35,6 +36,8 @@ def from_iter( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. initial (int): Initial size (in bytes) of buffers used by the `ak::ArrayBuilder`. resize (float): Resize multiplier for buffers used by the `ak::ArrayBuilder`; should be strictly greater than 1. @@ -64,10 +67,10 @@ def from_iter( See also #ak.to_list. """ - return _impl(iterable, highlevel, behavior, allow_record, initial, resize) + return _impl(iterable, highlevel, behavior, allow_record, initial, resize, attrs) -def _impl(iterable, highlevel, behavior, allow_record, initial, resize): +def _impl(iterable, highlevel, behavior, allow_record, initial, resize, attrs): if not isinstance(iterable, Iterable): raise TypeError( f"cannot produce an array from a non-iterable object ({type(iterable)!r})" @@ -82,6 +85,7 @@ def _impl(iterable, highlevel, behavior, allow_record, initial, resize): False, initial, resize, + attrs, )[0] else: raise ValueError( @@ -108,4 +112,5 @@ def _impl(iterable, highlevel, behavior, allow_record, initial, resize): highlevel=highlevel, behavior=behavior, simplify=True, + attrs=attrs, )[0] diff --git a/src/awkward/operations/ak_from_jax.py b/src/awkward/operations/ak_from_jax.py index 93a9e11bc7..aae1d31683 100644 --- a/src/awkward/operations/ak_from_jax.py +++ b/src/awkward/operations/ak_from_jax.py @@ -10,7 +10,7 @@ @high_level_function() -def from_jax(array, *, regulararray=False, highlevel=True, behavior=None): +def from_jax(array, *, regulararray=False, highlevel=True, behavior=None, attrs=None): """ Args: array (jax.numpy.DeviceArray): The JAX DeviceArray to convert into an Awkward Array. @@ -23,6 +23,8 @@ def from_jax(array, *, regulararray=False, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Converts a JAX DeviceArray array into an Awkward Array. diff --git a/src/awkward/operations/ak_from_json.py b/src/awkward/operations/ak_from_json.py index aa926e637f..5480d0952c 100644 --- a/src/awkward/operations/ak_from_json.py +++ b/src/awkward/operations/ak_from_json.py @@ -5,13 +5,15 @@ import json import pathlib from collections.abc import Iterable, Sized +from contextlib import nullcontext +from io import BytesIO from urllib.parse import urlparse from awkward_cpp.lib import _ext import awkward as ak from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext, wrap_layout from awkward._nplikes.numpy import Numpy from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import is_integer @@ -37,6 +39,7 @@ def from_json( resize=8, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -74,6 +77,8 @@ def from_json( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Converts a JSON string into an Awkward Array. @@ -338,6 +343,7 @@ def from_json( resize, highlevel, behavior, + attrs, ) else: @@ -354,57 +360,28 @@ def from_json( resize, highlevel, behavior, + attrs, ) -class _BytesReader: - __slots__ = ("data", "current") - - def __init__(self, data): - self.data = data - self.current = 0 - - def read(self, num_bytes): - before = self.current - self.current += num_bytes - return self.data[before : self.current] - - def __enter__(self): - return self - - def __exit__(self, exception_type, exception_value, exception_traceback): - pass - - -class _NoContextManager: - def __init__(self, file): - self.file = file - - def __enter__(self): - return self.file - - def __exit__(self, exception_type, exception_value, exception_traceback): - pass - - def _get_reader(source): if isinstance(source, str): source = source.encode("utf8", errors="surrogateescape") if isinstance(source, bytes): - return lambda: _BytesReader(source) + return BytesIO(source) elif isinstance(source, pathlib.Path): parsed_url = urlparse(str(source)) if parsed_url.scheme == "" or parsed_url.netloc == "": - return lambda: open(source, "rb") # pylint: disable=R1732 + return open(source, "rb") else: import fsspec - return lambda: fsspec.open(source, "rb").open() + return fsspec.open(source, "rb").open() else: - return lambda: _NoContextManager(source) + return nullcontext(source) def _record_to_complex(layout, complex_record_fields): @@ -460,12 +437,14 @@ def _no_schema( resize, highlevel, behavior, + attrs, ): + ctx = HighLevelContext(behavior=behavior, attrs=attrs).finalize() builder = _ext.ArrayBuilder(initial=initial, resize=resize) read_one = not line_delimited - with _get_reader(source)() as obj: + with _get_reader(source) as obj: try: _ext.fromjsonobj( obj, @@ -490,10 +469,7 @@ def _no_schema( if read_one: layout = layout[0] - if highlevel and isinstance(layout, (ak.contents.Content, ak.record.Record)): - return wrap_layout(layout, behavior, highlevel) - else: - return layout + return ctx.wrap(layout, highlevel=highlevel, allow_other=True) def _yes_schema( @@ -509,6 +485,7 @@ def _yes_schema( resize, highlevel, behavior, + attrs, ): if isinstance(schema, (bytes, str)): schema = json.loads(schema) @@ -538,7 +515,7 @@ def _yes_schema( read_one = not line_delimited - with _get_reader(source)() as obj: + with _get_reader(source) as obj: try: length = _ext.fromjsonobj_schema( obj, @@ -563,10 +540,7 @@ def _yes_schema( if is_record and read_one: layout = layout[0] - if highlevel and isinstance(layout, (ak.contents.Content, ak.record.Record)): - return wrap_layout(layout, behavior, highlevel) - else: - return layout + return wrap_layout(layout, highlevel=highlevel, attrs=attrs, behavior=behavior) def _build_assembly(schema, container, instructions): diff --git a/src/awkward/operations/ak_from_numpy.py b/src/awkward/operations/ak_from_numpy.py index 6f534019fc..536accffa6 100644 --- a/src/awkward/operations/ak_from_numpy.py +++ b/src/awkward/operations/ak_from_numpy.py @@ -10,7 +10,13 @@ @high_level_function() def from_numpy( - array, *, regulararray=False, recordarray=True, highlevel=True, behavior=None + array, + *, + regulararray=False, + recordarray=True, + highlevel=True, + behavior=None, + attrs=None, ): """ Args: @@ -30,6 +36,8 @@ def from_numpy( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Converts a NumPy array into an Awkward Array. diff --git a/src/awkward/operations/ak_from_parquet.py b/src/awkward/operations/ak_from_parquet.py index 02540fa46f..36b405b8c9 100644 --- a/src/awkward/operations/ak_from_parquet.py +++ b/src/awkward/operations/ak_from_parquet.py @@ -23,6 +23,7 @@ def from_parquet( generate_bitmasks=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -47,6 +48,8 @@ def from_parquet( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Reads data from a local or remote Parquet file or collection of files. @@ -77,6 +80,7 @@ def from_parquet( highlevel, behavior, fs, + attrs, ) @@ -204,6 +208,7 @@ def _load( highlevel, behavior, fs, + attrs, metadata=None, ): arrays = [] @@ -224,14 +229,18 @@ def _load( assert len(arrays) != 0 if len(arrays) == 1: - # make high-level - if isinstance(arrays[0], ak.record.Record): - return ak.Record(arrays[0]) - return wrap_layout(arrays[0], highlevel=highlevel, behavior=behavior) + return wrap_layout( + arrays[0], highlevel=highlevel, attrs=attrs, behavior=behavior + ) else: # TODO: if each array is a record? return ak.operations.ak_concatenate._impl( - arrays, axis=0, mergebool=True, highlevel=highlevel, behavior=behavior + arrays, + axis=0, + mergebool=True, + highlevel=highlevel, + behavior=behavior, + attrs=attrs, ) @@ -294,6 +303,7 @@ def _read_parquet_file( # why is high-level False here? False, None, + None, ) diff --git a/src/awkward/operations/ak_from_rdataframe.py b/src/awkward/operations/ak_from_rdataframe.py index 694dba54c5..37bae96117 100644 --- a/src/awkward/operations/ak_from_rdataframe.py +++ b/src/awkward/operations/ak_from_rdataframe.py @@ -21,6 +21,7 @@ def from_rdataframe( with_name=None, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -38,6 +39,8 @@ def from_rdataframe( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Converts ROOT RDataFrame columns into an Awkward Array. diff --git a/src/awkward/operations/ak_from_regular.py b/src/awkward/operations/ak_from_regular.py index 010da5bdae..b3f840ef31 100644 --- a/src/awkward/operations/ak_from_regular.py +++ b/src/awkward/operations/ak_from_regular.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis, wrap_layout +from awkward._layout import HighLevelContext, maybe_posaxis from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis from awkward.errors import AxisError @@ -16,7 +15,7 @@ @high_level_function() -def from_regular(array, axis=1, *, highlevel=True, behavior=None): +def from_regular(array, axis=1, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -29,6 +28,8 @@ def from_regular(array, axis=1, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Converts a regular axis into an irregular one. @@ -48,13 +49,13 @@ def from_regular(array, axis=1, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, axis, highlevel, behavior) + return _impl(array, axis, highlevel, behavior, attrs) -def _impl(array, axis, highlevel, behavior): +def _impl(array, axis, highlevel, behavior, attrs): + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False) axis = regularize_axis(axis) - layout = ak.operations.to_layout(array) - behavior = behavior_of(array, behavior=behavior) if axis is None: @@ -82,4 +83,4 @@ def action(layout, depth, **kwargs): out = ak._do.recursively_apply(layout, action, numpy_to_regular=True) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_full_like.py b/src/awkward/operations/ak_full_like.py index bf87819f4b..66ce515f0a 100644 --- a/src/awkward/operations/ak_full_like.py +++ b/src/awkward/operations/ak_full_like.py @@ -3,10 +3,9 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend from awkward._nplikes.numpy_like import NumpyMetadata from awkward._nplikes.typetracer import ensure_known_scalar from awkward.operations.ak_zeros_like import _ZEROS @@ -25,6 +24,7 @@ def full_like( including_unknown=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -38,6 +38,8 @@ def full_like( otherwise, return a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. This is the equivalent of NumPy's `np.full_like` for Awkward Arrays. @@ -88,14 +90,22 @@ def full_like( yield array, fill_value # Implementation - return _impl(array, fill_value, highlevel, behavior, dtype, including_unknown) + return _impl( + array, fill_value, highlevel, behavior, dtype, including_unknown, attrs + ) -def _impl(array, fill_value, highlevel, behavior, dtype, including_unknown): - behavior = behavior_of(array, behavior=behavior) - layout = ak.operations.to_layout( - array, allow_record=True, allow_unknown=False, primitive_policy="error" - ) +def _impl(array, fill_value, highlevel, behavior, dtype, including_unknown, attrs): + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout, _ = ensure_same_backend( + ctx.unwrap(array, primitive_policy="error"), + ctx.unwrap( + fill_value, + primitive_policy="pass-through", + string_policy="pass-through", + allow_unknown=True, + ), + ) if dtype is not None: # In the case of strings and byte strings, @@ -198,7 +208,7 @@ def action(layout, backend, **kwargs): if dtype is not None: # Interpret strings as numeric/bool types result = ak.operations.strings_astype( - result, dtype, highlevel=highlevel, behavior=behavior + result, dtype, highlevel=False, behavior=behavior ) # Convert dtype result = ak.operations.values_astype( @@ -209,7 +219,7 @@ def action(layout, backend, **kwargs): return None out = ak._do.recursively_apply(layout, action) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) @ak._connect.numpy.implements("full_like") diff --git a/src/awkward/operations/ak_is_categorical.py b/src/awkward/operations/ak_is_categorical.py index 24fe20e0ae..44853e46ee 100644 --- a/src/awkward/operations/ak_is_categorical.py +++ b/src/awkward/operations/ak_is_categorical.py @@ -30,6 +30,6 @@ def is_categorical(array): def _impl(array): layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" + array, allow_record=False, primitive_policy="error" ) return layout.purelist_parameter("__array__") == "categorical" diff --git a/src/awkward/operations/ak_is_none.py b/src/awkward/operations/ak_is_none.py index 729c62387d..078c86bde6 100644 --- a/src/awkward/operations/ak_is_none.py +++ b/src/awkward/operations/ak_is_none.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis, wrap_layout +from awkward._layout import HighLevelContext, maybe_posaxis from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import is_integer, regularize_axis from awkward.errors import AxisError @@ -16,7 +15,7 @@ @high_level_function() -def is_none(array, axis=0, *, highlevel=True, behavior=None): +def is_none(array, axis=0, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -28,6 +27,8 @@ def is_none(array, axis=0, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns an array whose value is True where an element of `array` is None; False otherwise (at a given `axis` depth). @@ -36,18 +37,18 @@ def is_none(array, axis=0, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, axis, highlevel, behavior) + return _impl(array, axis, highlevel, behavior, attrs) -def _impl(array, axis, highlevel, behavior): +def _impl(array, axis, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout(array) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") if not is_integer(axis): raise TypeError(f"'axis' must be an integer, not {axis!r}") - def action(layout, depth, lateral_context, **kwargs): + def action(layout, depth, backend, lateral_context, **kwargs): posaxis = maybe_posaxis(layout, axis, depth) if posaxis is not None and posaxis + 1 == depth: @@ -58,9 +59,8 @@ def action(layout, depth, lateral_context, **kwargs): return ak.contents.NumpyArray(layout.mask_as_bool(valid_when=False)) else: - nplike = layout._backend.nplike return ak.contents.NumpyArray( - nplike.zeros(layout.length, dtype=np.bool_) + backend.nplike.zeros(layout.length, dtype=np.bool_) ) elif layout.is_leaf: @@ -68,4 +68,4 @@ def action(layout, depth, lateral_context, **kwargs): out = ak._do.recursively_apply(layout, action, numpy_to_regular=True) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_is_tuple.py b/src/awkward/operations/ak_is_tuple.py index 906033dd7c..0b13ba68c1 100644 --- a/src/awkward/operations/ak_is_tuple.py +++ b/src/awkward/operations/ak_is_tuple.py @@ -29,7 +29,7 @@ def _impl(array): array, allow_record=True, allow_unknown=False, - allow_none=False, + none_policy="error", regulararray=True, use_from_iter=True, primitive_policy="error", diff --git a/src/awkward/operations/ak_isclose.py b/src/awkward/operations/ak_isclose.py index 3efb161201..8797c36752 100644 --- a/src/awkward/operations/ak_isclose.py +++ b/src/awkward/operations/ak_isclose.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("isclose",) @@ -15,7 +14,15 @@ @high_level_function() def isclose( - a, b, rtol=1e-05, atol=1e-08, equal_nan=False, *, highlevel=True, behavior=None + a, + b, + rtol=1e-05, + atol=1e-08, + equal_nan=False, + *, + highlevel=True, + behavior=None, + attrs=None, ): """ Args: @@ -29,6 +36,8 @@ def isclose( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Implements [np.isclose](https://numpy.org/doc/stable/reference/generated/numpy.isclose.html) for Awkward Arrays. @@ -37,12 +46,15 @@ def isclose( yield a, b # Implementation - return _impl(a, b, rtol, atol, equal_nan, highlevel, behavior) + return _impl(a, b, rtol, atol, equal_nan, highlevel, behavior, attrs) -def _impl(a, b, rtol, atol, equal_nan, highlevel, behavior): - one = ak.operations.to_layout(a) - two = ak.operations.to_layout(b) +def _impl(a, b, rtol, atol, equal_nan, highlevel, behavior, attrs): + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layouts = ensure_same_backend( + ctx.unwrap(a, allow_record=False), + ctx.unwrap(b, allow_record=False), + ) def action(inputs, backend, **kwargs): if all(isinstance(x, ak.contents.NumpyArray) for x in inputs): @@ -58,11 +70,10 @@ def action(inputs, backend, **kwargs): ), ) - behavior = behavior_of(a, b, behavior=behavior) - out = ak._broadcasting.broadcast_and_apply([one, two], action) + out = ak._broadcasting.broadcast_and_apply(layouts, action) assert isinstance(out, tuple) and len(out) == 1 - return wrap_layout(out[0], behavior, highlevel) + return ctx.wrap(out[0], highlevel=highlevel) @ak._connect.numpy.implements("isclose") diff --git a/src/awkward/operations/ak_linear_fit.py b/src/awkward/operations/ak_linear_fit.py index 32420fb728..dc5234a7bf 100644 --- a/src/awkward/operations/ak_linear_fit.py +++ b/src/awkward/operations/ak_linear_fit.py @@ -3,11 +3,9 @@ from __future__ import annotations import awkward as ak -from awkward._backends.dispatch import backend_of from awkward._backends.numpy import NumpyBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend from awkward._nplikes import ufuncs from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -29,6 +27,7 @@ def linear_fit( mask_identity=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -55,6 +54,8 @@ def linear_fit( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Computes the linear fit of `y` with respect to `x` (many types supported, including all Awkward Arrays and Records, must be broadcastable to each @@ -90,33 +91,31 @@ def linear_fit( yield x, y, weight # Implementation - return _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior) + return _impl( + x, y, weight, axis, keepdims, mask_identity, highlevel, behavior, attrs + ) -def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - behavior = behavior_of(x, y, weight, behavior=behavior) - backend = backend_of(x, y, weight, coerce_to_common=True, default=cpu) - x = ak.highlevel.Array( - ak.operations.to_layout( - x, allow_record=False, allow_unknown=False, primitive_policy="error" - ).to_backend(backend), - behavior=behavior, - ) - y = ak.highlevel.Array( - ak.operations.to_layout( - y, allow_record=False, allow_unknown=False, primitive_policy="error" - ).to_backend(backend), - behavior=behavior, - ) - if weight is not None: - weight = ak.highlevel.Array( - ak.operations.to_layout( - weight, allow_record=False, allow_unknown=False - ).to_backend(backend), - behavior=behavior, + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + x_layout, y_layout, weight_layout = ensure_same_backend( + ctx.unwrap(x, allow_record=False, primitive_policy="error"), + ctx.unwrap(y, allow_record=False, primitive_policy="error"), + ctx.unwrap( + weight, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + none_policy="pass-through", + ), ) + x = ctx.wrap(x_layout) + y = ctx.wrap(y_layout) + weight = ctx.wrap(weight_layout, allow_other=True) + with np.errstate(invalid="ignore", divide="ignore"): if weight is None: sumw = ak.operations.ak_count._impl( @@ -125,7 +124,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwx = ak.operations.ak_sum._impl( x, @@ -133,7 +133,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwy = ak.operations.ak_sum._impl( y, @@ -141,7 +142,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwxx = ak.operations.ak_sum._impl( x**2, @@ -149,7 +151,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwxy = ak.operations.ak_sum._impl( x * y, @@ -157,7 +160,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) else: sumw = ak.operations.ak_sum._impl( @@ -166,7 +170,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwx = ak.operations.ak_sum._impl( x * weight, @@ -174,7 +179,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwy = ak.operations.ak_sum._impl( y * weight, @@ -182,7 +188,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwxx = ak.operations.ak_sum._impl( (x**2) * weight, @@ -190,7 +197,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwxy = ak.operations.ak_sum._impl( x * y * weight, @@ -198,7 +206,8 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) delta = (sumw * sumwxx) - (sumwx * sumwx) intercept = ((sumwxx * sumwy) - (sumwx * sumwxy)) / delta @@ -224,6 +233,4 @@ def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): if is_scalar: out = out[0] - return wrap_layout( - out, highlevel=highlevel, behavior=behavior, allow_other=is_scalar - ) + return ctx.wrap(out, highlevel=highlevel, allow_other=is_scalar) diff --git a/src/awkward/operations/ak_local_index.py b/src/awkward/operations/ak_local_index.py index acc157e883..2231ac229f 100644 --- a/src/awkward/operations/ak_local_index.py +++ b/src/awkward/operations/ak_local_index.py @@ -4,7 +4,7 @@ import awkward as ak from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -14,7 +14,7 @@ @high_level_function() -def local_index(array, axis=-1, *, highlevel=True, behavior=None): +def local_index(array, axis=-1, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -26,6 +26,8 @@ def local_index(array, axis=-1, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. For example, @@ -82,13 +84,12 @@ def local_index(array, axis=-1, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, axis, highlevel, behavior) + return _impl(array, axis, highlevel, behavior, attrs) -def _impl(array, axis, highlevel, behavior): +def _impl(array, axis, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=True, allow_unknown=False, primitive_policy="error" - ) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") out = ak._do.local_index(layout, axis) - return wrap_layout(out, behavior, highlevel, like=array) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_mask.py b/src/awkward/operations/ak_mask.py index 691d28db11..54d9a5e04b 100644 --- a/src/awkward/operations/ak_mask.py +++ b/src/awkward/operations/ak_mask.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("mask",) @@ -14,7 +13,7 @@ @high_level_function() -def mask(array, mask, *, valid_when=True, highlevel=True, behavior=None): +def mask(array, mask, *, valid_when=True, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -27,6 +26,8 @@ def mask(array, mask, *, valid_when=True, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns an array for which @@ -98,10 +99,10 @@ def mask(array, mask, *, valid_when=True, highlevel=True, behavior=None): yield array, mask # Implementation - return _impl(array, mask, valid_when, highlevel, behavior) + return _impl(array, mask, valid_when, highlevel, behavior, attrs) -def _impl(array, mask, valid_when, highlevel, behavior): +def _impl(array, mask, valid_when, highlevel, behavior, attrs): def action(inputs, backend, **kwargs): layoutarray, layoutmask = inputs if layoutmask.is_numpy: @@ -117,16 +118,14 @@ def action(inputs, backend, **kwargs): else: return None - layoutarray = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - layoutmask = ak.operations.to_layout( - mask, allow_record=False, allow_unknown=False, primitive_policy="error" - ) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layouts = ensure_same_backend( + ctx.unwrap(array, allow_record=False, primitive_policy="error"), + ctx.unwrap(mask, allow_record=False, primitive_policy="error"), + ) - behavior = behavior_of(array, mask, behavior=behavior) out = ak._broadcasting.broadcast_and_apply( - [layoutarray, layoutmask], action, numpy_to_regular=True, right_broadcast=False + layouts, action, numpy_to_regular=True, right_broadcast=False ) assert isinstance(out, tuple) and len(out) == 1 - return wrap_layout(out[0], behavior, highlevel) + return ctx.wrap(out[0], highlevel=highlevel) diff --git a/src/awkward/operations/ak_max.py b/src/awkward/operations/ak_max.py index ecd67b7b9c..471101309d 100644 --- a/src/awkward/operations/ak_max.py +++ b/src/awkward/operations/ak_max.py @@ -3,10 +3,9 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -25,6 +24,7 @@ def max( mask_identity=True, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -49,6 +49,8 @@ def max( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns the maximum value in each group of elements from `array` (many types supported, including all Awkward Arrays and Records). The identity @@ -77,6 +79,7 @@ def max( mask_identity, highlevel, behavior, + attrs, ) @@ -90,6 +93,7 @@ def nanmax( mask_identity=True, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -133,15 +137,14 @@ def nanmax( mask_identity, highlevel, behavior, + attrs, ) -def _impl(array, axis, keepdims, initial, mask_identity, highlevel, behavior): +def _impl(array, axis, keepdims, initial, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") reducer = ak._reducers.Max(initial) out = ak._do.reduce( @@ -150,12 +153,9 @@ def _impl(array, axis, keepdims, initial, mask_identity, highlevel, behavior): axis=axis, mask=mask_identity, keepdims=keepdims, - behavior=behavior, + behavior=ctx.behavior, ) - if isinstance(out, (ak.contents.Content, ak.record.Record)): - return wrap_layout(out, behavior, highlevel) - else: - return out + return ctx.wrap(out, highlevel=highlevel, allow_other=True) @ak._connect.numpy.implements("amax") diff --git a/src/awkward/operations/ak_mean.py b/src/awkward/operations/ak_mean.py index 039e0f24a1..c40e9689a0 100644 --- a/src/awkward/operations/ak_mean.py +++ b/src/awkward/operations/ak_mean.py @@ -3,10 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import maybe_highlevel_to_lowlevel, maybe_posaxis, wrap_layout +from awkward._layout import ( + HighLevelContext, + ensure_same_backend, + maybe_highlevel_to_lowlevel, + maybe_posaxis, +) from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -25,6 +29,7 @@ def mean( mask_identity=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -50,6 +55,8 @@ def mean( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Computes the mean in each group of elements from `x` (many types supported, including all Awkward Arrays and Records). The grouping @@ -96,7 +103,7 @@ def mean( yield x, weight # Implementation - return _impl(x, weight, axis, keepdims, mask_identity, highlevel, behavior) + return _impl(x, weight, axis, keepdims, mask_identity, highlevel, behavior, attrs) @high_level_function() @@ -109,6 +116,7 @@ def nanmean( mask_identity=True, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -134,6 +142,8 @@ def nanmean( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Like #ak.mean, but treating NaN ("not a number") values as missing. @@ -149,10 +159,10 @@ def nanmean( yield x, weight if weight is not None: - weight = ak.operations.ak_nan_to_none._impl(weight, False, behavior) + weight = ak.operations.ak_nan_to_none._impl(weight, True, behavior, attrs) return _impl( - ak.operations.ak_nan_to_none._impl(x, False, behavior), + ak.operations.ak_nan_to_none._impl(x, False, behavior, attrs), weight, axis, keepdims, @@ -162,26 +172,24 @@ def nanmean( ) -def _impl(x, weight, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(x, weight, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - behavior = behavior_of(x, weight, behavior=behavior) - x = ak.highlevel.Array( - ak.operations.to_layout( - x, allow_record=False, allow_unknown=False, primitive_policy="error" - ), - behavior=behavior, - ) - if weight is not None: - weight = ak.highlevel.Array( - ak.operations.to_layout( + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + x_layout, weight_layout = ensure_same_backend( + ctx.unwrap(x, allow_record=False, primitive_policy="error"), + ctx.unwrap( weight, allow_record=False, allow_unknown=False, primitive_policy="error", + none_policy="pass-through", ), - behavior=behavior, ) + x = ctx.wrap(x_layout) + weight = ctx.wrap(weight_layout, allow_other=True) + with np.errstate(invalid="ignore", divide="ignore"): if weight is None: sumw = ak.operations.ak_count._impl( @@ -190,7 +198,8 @@ def _impl(x, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims=True, mask_identity=True, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwx = ak.operations.ak_sum._impl( x, @@ -198,7 +207,8 @@ def _impl(x, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims=True, mask_identity=True, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) else: sumw = ak.operations.ak_sum._impl( @@ -207,7 +217,8 @@ def _impl(x, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwx = ak.operations.ak_sum._impl( x * weight, @@ -215,14 +226,20 @@ def _impl(x, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims=True, mask_identity=True, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) out = sumwx / sumw if not mask_identity: out = ak.operations.fill_none( - out, np.nan, axis=-1, behavior=behavior, highlevel=True + out, + np.nan, + axis=-1, + behavior=ctx.behavior, + attrs=ctx.attrs, + highlevel=True, ) if axis is None: @@ -232,11 +249,9 @@ def _impl(x, weight, axis, keepdims, mask_identity, highlevel, behavior): if not keepdims: posaxis = maybe_posaxis(out.layout, axis, 1) out = out[(slice(None, None),) * posaxis + (0,)] - return wrap_layout( - maybe_highlevel_to_lowlevel(out), - behavior=behavior, - highlevel=highlevel, - allow_other=True, + + return ctx.wrap( + maybe_highlevel_to_lowlevel(out), highlevel=highlevel, allow_other=True ) diff --git a/src/awkward/operations/ak_merge_option_of_records.py b/src/awkward/operations/ak_merge_option_of_records.py index b9c2adca20..80b6dfdfb2 100644 --- a/src/awkward/operations/ak_merge_option_of_records.py +++ b/src/awkward/operations/ak_merge_option_of_records.py @@ -4,9 +4,8 @@ import awkward as ak from awkward._backends.numpy import NumpyBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis, wrap_layout +from awkward._layout import HighLevelContext, maybe_posaxis from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis from awkward.errors import AxisError @@ -18,7 +17,9 @@ @high_level_function() -def merge_option_of_records(array, axis=-1, *, highlevel=True, behavior=None): +def merge_option_of_records( + array, axis=-1, *, highlevel=True, behavior=None, attrs=None +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -30,6 +31,8 @@ def merge_option_of_records(array, axis=-1, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Simplifies options of records, e.g. @@ -44,13 +47,13 @@ def merge_option_of_records(array, axis=-1, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, axis, highlevel, behavior) + return _impl(array, axis, highlevel, behavior, attrs) -def _impl(array, axis, highlevel, behavior): +def _impl(array, axis, highlevel, behavior, attrs): axis = regularize_axis(axis) - behavior = behavior_of(array, behavior=behavior) - layout = ak.to_layout(array, allow_record=False) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") # First, normalise type-invsible "index-of-records" to "record-of-index" def apply_displace_index(layout, backend, **kwargs): @@ -94,4 +97,4 @@ def apply(layout, depth, backend, **kwargs): ) out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_merge_union_of_records.py b/src/awkward/operations/ak_merge_union_of_records.py index c6670f362c..dd6dfe5b53 100644 --- a/src/awkward/operations/ak_merge_union_of_records.py +++ b/src/awkward/operations/ak_merge_union_of_records.py @@ -4,11 +4,9 @@ import awkward as ak from awkward._backends.numpy import NumpyBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis, wrap_layout -from awkward._nplikes.array_like import ArrayLike -from awkward._nplikes.numpy_like import NumpyMetadata +from awkward._layout import HighLevelContext, maybe_posaxis +from awkward._nplikes.numpy_like import ArrayLike, NumpyMetadata from awkward._regularize import regularize_axis from awkward.errors import AxisError @@ -19,7 +17,9 @@ @high_level_function() -def merge_union_of_records(array, axis=-1, *, highlevel=True, behavior=None): +def merge_union_of_records( + array, axis=-1, *, highlevel=True, behavior=None, attrs=None +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -31,6 +31,8 @@ def merge_union_of_records(array, axis=-1, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Simplifies unions of records, e.g. @@ -55,18 +57,18 @@ def merge_union_of_records(array, axis=-1, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, axis, highlevel, behavior) + return _impl(array, axis, highlevel, behavior, attrs) -def _impl(array, axis, highlevel, behavior): +def _impl(array, axis, highlevel, behavior, attrs): axis = regularize_axis(axis) - behavior = behavior_of(array, behavior=behavior) - layout = ak.to_layout(array, allow_record=False) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") def invert_record_union( - tags: ArrayLike, index: ArrayLike, contents, *, backend + tags: ArrayLike, index: ArrayLike, contents ) -> ak.contents.RecordArray: - index_nplike = backend.index_nplike + index_nplike = layout.backend.index_nplike # First, create an ordered list containing the union of all fields seen_fields = set() all_fields = [] @@ -102,7 +104,7 @@ def invert_record_union( # Make the tagged content an option, growing by one to ensure we # have a known `None` value to index into tagged_content = field_contents[tag_for_missing] - indexedoption_index = backend.index_nplike.arange( + indexedoption_index = index_nplike.arange( tagged_content.length + 1, dtype=np.int64 ) indexedoption_index[ @@ -144,17 +146,17 @@ def invert_record_union( ) ) return ak.contents.RecordArray( - outer_field_contents, all_fields, backend=backend + outer_field_contents, all_fields, backend=layout.backend ) - def compact_option_index(index: ArrayLike, *, backend) -> ArrayLike: + def compact_option_index(index: ArrayLike) -> ArrayLike: # Find dense (outer) index into non-null items. # This is in trivial order: the re-arranging is done by the union (below) is_none = index < 0 - num_none = backend.index_nplike.count_nonzero(is_none) - dense_index = backend.index_nplike.empty(index.size, dtype=index.dtype) + num_none = layout.backend.index_nplike.count_nonzero(is_none) + dense_index = layout.backend.index_nplike.empty(index.size, dtype=index.dtype) dense_index[is_none] = -1 - dense_index[~is_none] = backend.index_nplike.arange( + dense_index[~is_none] = layout.backend.index_nplike.arange( index.size - num_none, dtype=index.dtype, ) @@ -214,9 +216,7 @@ def apply(layout, depth, backend, **kwargs): # This should have the same length as the original union, and its index should be "dense" # (contiguous, monotonic integers; or -1). Therefore, we can directly compute it from the "sparse" # tags index, which has the same length as the original union, and has only missing items set to -1. - outer_option_dense_index = compact_option_index( - next_tags_data_sparse, backend=backend - ) + outer_option_dense_index = compact_option_index(next_tags_data_sparse) # Ignore missing items for inner union, creating a dense array of tags next_tags_data = next_tags_data_sparse[next_tags_data_sparse >= 0] @@ -230,9 +230,7 @@ def apply(layout, depth, backend, **kwargs): # Return option around record of unions return ak.contents.IndexedOptionArray( ak.index.Index64(outer_option_dense_index), - invert_record_union( - next_tags_data, next_index_data, next_contents, backend=backend - ), + invert_record_union(next_tags_data, next_index_data, next_contents), ) # Any index types need to be re-written @@ -259,16 +257,13 @@ def apply(layout, depth, backend, **kwargs): next_contents.append(content) return invert_record_union( - layout.tags.data, next_index_data, next_contents, backend=backend + layout.tags.data, next_index_data, next_contents ) else: return invert_record_union( - layout.tags.data, - layout.index.data, - layout.contents, - backend=backend, + layout.tags.data, layout.index.data, layout.contents ) out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_min.py b/src/awkward/operations/ak_min.py index 7799201c2b..081bc91faf 100644 --- a/src/awkward/operations/ak_min.py +++ b/src/awkward/operations/ak_min.py @@ -3,10 +3,9 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -25,6 +24,7 @@ def min( mask_identity=True, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -49,6 +49,8 @@ def min( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns the minimum value in each group of elements from `array` (many types supported, including all Awkward Arrays and Records). The identity @@ -77,6 +79,7 @@ def min( mask_identity, highlevel, behavior, + attrs, ) @@ -90,6 +93,7 @@ def nanmin( mask_identity=True, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -133,15 +137,14 @@ def nanmin( mask_identity, highlevel, behavior, + attrs, ) -def _impl(array, axis, keepdims, initial, mask_identity, highlevel, behavior): +def _impl(array, axis, keepdims, initial, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") reducer = ak._reducers.Min(initial) out = ak._do.reduce( @@ -150,12 +153,9 @@ def _impl(array, axis, keepdims, initial, mask_identity, highlevel, behavior): axis=axis, mask=mask_identity, keepdims=keepdims, - behavior=behavior, + behavior=ctx.behavior, ) - if isinstance(out, (ak.contents.Content, ak.record.Record)): - return wrap_layout(out, behavior, highlevel) - else: - return out + return ctx.wrap(out, highlevel=highlevel, allow_other=True) @ak._connect.numpy.implements("amin") diff --git a/src/awkward/operations/ak_moment.py b/src/awkward/operations/ak_moment.py index 0e09e15232..7cac2498ee 100644 --- a/src/awkward/operations/ak_moment.py +++ b/src/awkward/operations/ak_moment.py @@ -3,9 +3,12 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_highlevel_to_lowlevel, wrap_layout +from awkward._layout import ( + HighLevelContext, + ensure_same_backend, + maybe_highlevel_to_lowlevel, +) from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -25,6 +28,7 @@ def moment( mask_identity=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -52,6 +56,8 @@ def moment( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Computes the `n`th moment in each group of elements from `x` (many types supported, including all Awkward Arrays and Records). The grouping @@ -75,29 +81,29 @@ def moment( yield x, weight # Implementation - return _impl(x, n, weight, axis, keepdims, mask_identity, highlevel, behavior) + return _impl( + x, n, weight, axis, keepdims, mask_identity, highlevel, behavior, attrs + ) -def _impl(x, n, weight, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(x, n, weight, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - behavior = behavior_of(x, weight, behavior=behavior) - x = ak.highlevel.Array( - ak.operations.to_layout( - x, allow_record=False, allow_unknown=False, primitive_policy="error" - ), - behavior=behavior, - ) - if weight is not None: - weight = ak.highlevel.Array( - ak.operations.to_layout( + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + x_layout, weight_layout = ensure_same_backend( + ctx.unwrap(x, allow_record=False, primitive_policy="error"), + ctx.unwrap( weight, allow_record=False, allow_unknown=False, primitive_policy="error", + none_policy="pass-through", ), - behavior=behavior, ) + x = ctx.wrap(x_layout) + weight = ctx.wrap(weight_layout, allow_other=True) + with np.errstate(invalid="ignore", divide="ignore"): if weight is None: sumw = ak.operations.ak_count._impl( @@ -106,7 +112,8 @@ def _impl(x, n, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwxn = ak.operations.ak_sum._impl( x**n, @@ -114,7 +121,8 @@ def _impl(x, n, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) else: sumw = ak.operations.ak_sum._impl( @@ -123,7 +131,8 @@ def _impl(x, n, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) sumwxn = ak.operations.ak_sum._impl( (x * weight) ** n, @@ -131,11 +140,11 @@ def _impl(x, n, weight, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) - return wrap_layout( + return ctx.wrap( maybe_highlevel_to_lowlevel(sumwxn / sumw), - behavior=behavior, highlevel=highlevel, allow_other=True, ) diff --git a/src/awkward/operations/ak_nan_to_none.py b/src/awkward/operations/ak_nan_to_none.py index 04820ad35d..7dabbfe828 100644 --- a/src/awkward/operations/ak_nan_to_none.py +++ b/src/awkward/operations/ak_nan_to_none.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("nan_to_none",) @@ -14,7 +13,7 @@ @high_level_function() -def nan_to_none(array, *, highlevel=True, behavior=None): +def nan_to_none(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -22,6 +21,8 @@ def nan_to_none(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Converts NaN ("not a number") into None, i.e. missing values with option-type. @@ -31,33 +32,28 @@ def nan_to_none(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): - def action(layout, continuation, **kwargs): - if isinstance(layout, ak.contents.NumpyArray) and issubclass( - layout.dtype.type, np.floating - ): - mask = layout.backend.nplike.isnan(layout.data) +def _impl(array, highlevel, behavior, attrs): + def action(layout, continuation, backend, **kwargs): + if layout.is_numpy and np.issubdtype(layout.dtype, np.floating): + mask = backend.nplike.isnan(layout.data) return ak.contents.ByteMaskedArray( - ak.index.Index8(mask, nplike=layout.backend.index_nplike), + ak.index.Index8(mask, nplike=backend.index_nplike), layout, valid_when=False, ) elif (layout.is_option or layout.is_indexed) and ( - isinstance(layout.content, ak.contents.NumpyArray) - and issubclass(layout.content.dtype.type, np.floating) + layout.content.is_numpy and np.issubdtype(layout.content.dtype, np.floating) ): return continuation() else: return None - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") out = ak._do.recursively_apply(layout, action) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_nan_to_num.py b/src/awkward/operations/ak_nan_to_num.py index c1a9512206..4c7472a06f 100644 --- a/src/awkward/operations/ak_nan_to_num.py +++ b/src/awkward/operations/ak_nan_to_num.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("nan_to_num",) @@ -23,6 +22,7 @@ def nan_to_num( *, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -37,6 +37,8 @@ def nan_to_num( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Implements [np.nan_to_num](https://numpy.org/doc/stable/reference/generated/numpy.nan_to_num.html) for Awkward Arrays, which replaces NaN ("not a number") or infinity with specified values. @@ -47,47 +49,44 @@ def nan_to_num( yield (array,) # Implementation - return _impl(array, copy, nan, posinf, neginf, highlevel, behavior) - - -def _impl(array, copy, nan, posinf, neginf, highlevel, behavior): - behavior = behavior_of(array, behavior=behavior) + return _impl(array, copy, nan, posinf, neginf, highlevel, behavior, attrs) + + +def _impl(array, copy, nan, posinf, neginf, highlevel, behavior, attrs): + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout, nan_layout, posinf_layout, neginf_layout = ensure_same_backend( + ctx.unwrap(array), + ctx.unwrap( + nan, + allow_unknown=False, + none_policy="pass-through", + primitive_policy="pass-through", + allow_record=False, + ), + ctx.unwrap( + posinf, + allow_unknown=False, + none_policy="pass-through", + primitive_policy="pass-through", + allow_record=False, + ), + ctx.unwrap( + neginf, + allow_unknown=False, + none_policy="pass-through", + primitive_policy="pass-through", + allow_record=False, + ), + ) broadcasting_ids = {} - broadcasting = [] - - layout = ak.operations.to_layout(array) - broadcasting.append(layout) - - nan_layout = ak.operations.to_layout( - nan, - allow_unknown=False, - allow_none=True, - primitive_policy="pass-through", - allow_record=False, - ) + broadcasting = [layout] if isinstance(nan_layout, ak.contents.Content): broadcasting_ids[id(nan)] = len(broadcasting) broadcasting.append(nan_layout) - - posinf_layout = ak.operations.to_layout( - posinf, - allow_unknown=False, - allow_none=True, - primitive_policy="pass-through", - allow_record=False, - ) if isinstance(posinf_layout, ak.contents.Content): broadcasting_ids[id(posinf)] = len(broadcasting) broadcasting.append(posinf_layout) - - neginf_layout = ak.operations.to_layout( - neginf, - allow_unknown=False, - allow_none=True, - primitive_policy="pass-through", - allow_record=False, - ) if isinstance(neginf_layout, ak.contents.Content): broadcasting_ids[id(neginf)] = len(broadcasting) broadcasting.append(neginf_layout) @@ -143,7 +142,7 @@ def action(inputs, backend, **kwargs): assert isinstance(out, tuple) and len(out) == 1 out = out[0] - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) @ak._connect.numpy.implements("nan_to_num") diff --git a/src/awkward/operations/ak_num.py b/src/awkward/operations/ak_num.py index cb7500244e..ad9b4e746c 100644 --- a/src/awkward/operations/ak_num.py +++ b/src/awkward/operations/ak_num.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis, wrap_layout +from awkward._layout import HighLevelContext, maybe_posaxis from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import is_integer, regularize_axis from awkward.errors import AxisError @@ -16,7 +15,7 @@ @high_level_function() -def num(array, axis=1, *, highlevel=True, behavior=None): +def num(array, axis=1, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -28,6 +27,8 @@ def num(array, axis=1, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns an array of integers specifying the number of elements at a particular level. @@ -79,13 +80,13 @@ def num(array, axis=1, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, axis, highlevel, behavior) + return _impl(array, axis, highlevel, behavior, attrs) -def _impl(array, axis, highlevel, behavior): +def _impl(array, axis, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout(array) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") if not is_integer(axis): raise TypeError(f"'axis' must be an integer, not {axis!r}") @@ -108,4 +109,4 @@ def action(layout, depth, **kwargs): out = ak._do.recursively_apply(layout, action, numpy_to_regular=True) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_ones_like.py b/src/awkward/operations/ak_ones_like.py index 4cae14ed71..27b46a7e00 100644 --- a/src/awkward/operations/ak_ones_like.py +++ b/src/awkward/operations/ak_ones_like.py @@ -14,7 +14,13 @@ @high_level_function() def ones_like( - array, *, dtype=None, including_unknown=False, highlevel=True, behavior=None + array, + *, + dtype=None, + including_unknown=False, + highlevel=True, + behavior=None, + attrs=None, ): """ Args: @@ -27,6 +33,8 @@ def ones_like( otherwise, return a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. This is the equivalent of NumPy's `np.ones_like` for Awkward Arrays. @@ -39,12 +47,12 @@ def ones_like( yield (array,) # Implementation - return _impl(array, highlevel, behavior, dtype, including_unknown) + return _impl(array, highlevel, behavior, dtype, including_unknown, attrs) -def _impl(array, highlevel, behavior, dtype, including_unknown): +def _impl(array, highlevel, behavior, dtype, including_unknown, attrs): return ak.operations.ak_full_like._impl( - array, 1, highlevel, behavior, dtype, including_unknown + array, 1, highlevel, behavior, dtype, including_unknown, attrs ) diff --git a/src/awkward/operations/ak_pad_none.py b/src/awkward/operations/ak_pad_none.py index 62cd69b1e4..34355a8546 100644 --- a/src/awkward/operations/ak_pad_none.py +++ b/src/awkward/operations/ak_pad_none.py @@ -4,7 +4,7 @@ import awkward as ak from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -14,7 +14,9 @@ @high_level_function() -def pad_none(array, target, axis=1, *, clip=False, highlevel=True, behavior=None): +def pad_none( + array, target, axis=1, *, clip=False, highlevel=True, behavior=None, attrs=None +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -33,6 +35,8 @@ def pad_none(array, target, axis=1, *, clip=False, highlevel=True, behavior=None a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Increase the lengths of lists to a target length by adding None values. @@ -105,14 +109,13 @@ def pad_none(array, target, axis=1, *, clip=False, highlevel=True, behavior=None yield (array,) # Implementation - return _impl(array, target, axis, clip, highlevel, behavior) + return _impl(array, target, axis, clip, highlevel, behavior, attrs) -def _impl(array, target, axis, clip, highlevel, behavior): +def _impl(array, target, axis, clip, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") out = ak._do.pad_none(layout, target, axis, clip=clip) - return wrap_layout(out, behavior, highlevel, like=array) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_prod.py b/src/awkward/operations/ak_prod.py index 915805da8c..cde898f174 100644 --- a/src/awkward/operations/ak_prod.py +++ b/src/awkward/operations/ak_prod.py @@ -3,10 +3,9 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -24,6 +23,7 @@ def prod( mask_identity=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -44,6 +44,8 @@ def prod( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Multiplies elements of `array` (many types supported, including all Awkward Arrays and Records). The identity of multiplication is `1` and it @@ -61,7 +63,7 @@ def prod( yield (array,) # Implementation - return _impl(array, axis, keepdims, mask_identity, highlevel, behavior) + return _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs) @high_level_function() @@ -73,6 +75,7 @@ def nanprod( mask_identity=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -105,21 +108,20 @@ def nanprod( # Implementation return _impl( - ak.operations.ak_nan_to_none._impl(array, False, None), + ak.operations.ak_nan_to_none._impl(array, True, behavior, attrs), axis, keepdims, mask_identity, highlevel, behavior, + attrs, ) -def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") reducer = ak._reducers.Prod() out = ak._do.reduce( @@ -128,12 +130,9 @@ def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): axis=axis, mask=mask_identity, keepdims=keepdims, - behavior=behavior, + behavior=ctx.behavior, ) - if isinstance(out, (ak.contents.Content, ak.record.Record)): - return wrap_layout(out, behavior, highlevel) - else: - return out + return ctx.wrap(out, highlevel=highlevel, allow_other=True) @ak._connect.numpy.implements("prod") diff --git a/src/awkward/operations/ak_ptp.py b/src/awkward/operations/ak_ptp.py index 4f25e8dda8..56daaa6980 100644 --- a/src/awkward/operations/ak_ptp.py +++ b/src/awkward/operations/ak_ptp.py @@ -3,10 +3,13 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import maybe_highlevel_to_lowlevel, maybe_posaxis, wrap_layout +from awkward._layout import ( + HighLevelContext, + maybe_highlevel_to_lowlevel, + maybe_posaxis, +) from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -24,6 +27,7 @@ def ptp( mask_identity=True, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -75,15 +79,13 @@ def ptp( yield (array,) # Implementation - return _impl(array, axis, keepdims, mask_identity, highlevel, behavior) + return _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs) -def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(array, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - behavior = behavior_of(array, behavior=behavior) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") with np.errstate(invalid="ignore", divide="ignore"): maxi = ak.operations.ak_max._impl( @@ -93,7 +95,8 @@ def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): None, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) mini = ak.operations.ak_min._impl( layout, @@ -102,7 +105,8 @@ def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): None, True, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) out = maxi - mini @@ -111,7 +115,7 @@ def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): if not mask_identity: out = ak.operations.fill_none( - out, 0, axis=-1, behavior=behavior, highlevel=True + out, 0, axis=-1, behavior=ctx.behavior, attrs=ctx.attrs, highlevel=True ) if axis is None: @@ -122,11 +126,8 @@ def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): posaxis = maybe_posaxis(out.layout, axis, 1) out = out[(slice(None, None),) * posaxis + (0,)] - return wrap_layout( - maybe_highlevel_to_lowlevel(out), - behavior=behavior, - highlevel=highlevel, - allow_other=True, + return ctx.wrap( + maybe_highlevel_to_lowlevel(out), highlevel=highlevel, allow_other=True ) diff --git a/src/awkward/operations/ak_ravel.py b/src/awkward/operations/ak_ravel.py index 25687c2eb8..66a3e3a55d 100644 --- a/src/awkward/operations/ak_ravel.py +++ b/src/awkward/operations/ak_ravel.py @@ -5,7 +5,7 @@ import awkward as ak from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("ravel",) @@ -14,7 +14,7 @@ @high_level_function() -def ravel(array, *, highlevel=True, behavior=None): +def ravel(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -22,6 +22,8 @@ def ravel(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns an array with all level of nesting removed by erasing the boundaries between consecutive lists. @@ -59,13 +61,12 @@ def ravel(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) +def _impl(array, highlevel, behavior, attrs): + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") out = ak._do.remove_structure(layout, function_name="ak.ravel", drop_nones=False) assert isinstance(out, tuple) and all( @@ -74,7 +75,7 @@ def _impl(array, highlevel, behavior): result = ak._do.mergemany(out) - return wrap_layout(result, behavior, highlevel, like=array) + return ctx.wrap(result, highlevel=highlevel) @ak._connect.numpy.implements("ravel") diff --git a/src/awkward/operations/ak_run_lengths.py b/src/awkward/operations/ak_run_lengths.py index 7fa0fcafb4..ba5702ea54 100644 --- a/src/awkward/operations/ak_run_lengths.py +++ b/src/awkward/operations/ak_run_lengths.py @@ -3,11 +3,9 @@ from __future__ import annotations import awkward as ak -from awkward._backends.dispatch import backend_of from awkward._backends.numpy import NumpyBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._nplikes.shape import unknown_length @@ -18,7 +16,7 @@ @high_level_function() -def run_lengths(array, *, highlevel=True, behavior=None): +def run_lengths(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -26,6 +24,8 @@ def run_lengths(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Computes the lengths of sequences of identical values at the deepest level of nesting, returning an array with the same structure but with `int64` type. @@ -96,18 +96,20 @@ def run_lengths(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): - backend = backend_of(array, default=cpu) +def _impl(array, highlevel, behavior, attrs): + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") def lengths_of(data, offsets): + backend = layout.backend + if backend.nplike.is_own_array(data): size = data.size else: - layout = ak.to_layout(data) - size = layout.length + size = ak.to_layout(data).length if size is not unknown_length and size == 0: return backend.index_nplike.empty(0, dtype=np.int64), offsets @@ -125,7 +127,7 @@ def lengths_of(data, offsets): # boundary diff ^ # To consider only the interior boundaries, we ignore the start and end # offset values. These can be repeated with empty sublists, so we mask them out. - is_interior = backend.nplike.logical_and( + is_interior = backend.index_nplike.logical_and( 0 < offsets, offsets < backend.index_nplike.shape_item_as_index(size), ) @@ -223,10 +225,5 @@ def action(layout, **kwargs): else: return None - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) - out = ak._do.recursively_apply(layout, action) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_singletons.py b/src/awkward/operations/ak_singletons.py index 8e51641520..35f60d5c97 100644 --- a/src/awkward/operations/ak_singletons.py +++ b/src/awkward/operations/ak_singletons.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis, wrap_layout +from awkward._layout import HighLevelContext, maybe_posaxis from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import is_integer, regularize_axis from awkward.errors import AxisError @@ -16,7 +15,7 @@ @high_level_function() -def singletons(array, axis=0, *, highlevel=True, behavior=None): +def singletons(array, axis=0, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -28,6 +27,8 @@ def singletons(array, axis=0, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns a singleton list (length 1) wrapping each non-missing value and an empty list (length 0) in place of each missing value. @@ -51,18 +52,18 @@ def singletons(array, axis=0, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, axis, highlevel, behavior) + return _impl(array, axis, highlevel, behavior, attrs) -def _impl(array, axis, highlevel, behavior): +def _impl(array, axis, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout(array) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") if not is_integer(axis): raise TypeError(f"'axis' must be an integer, not {axis!r}") - def action(layout, depth, **kwargs): + def action(layout, depth, backend, **kwargs): posaxis = maybe_posaxis(layout, axis, depth) if posaxis is not None and posaxis + 1 == depth: @@ -70,12 +71,10 @@ def action(layout, depth, **kwargs): return None elif layout.is_option: - nplike = layout._backend.index_nplike - - offsets = nplike.empty(layout.length + 1, dtype=np.int64) + offsets = backend.index_nplike.empty(layout.length + 1, dtype=np.int64) offsets[0] = 0 - nplike.cumsum( + backend.index_nplike.cumsum( layout.mask_as_bool(valid_when=True), maybe_out=offsets[1:] ) @@ -91,4 +90,4 @@ def action(layout, depth, **kwargs): out = ak._do.recursively_apply(layout, action, numpy_to_regular=True) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_softmax.py b/src/awkward/operations/ak_softmax.py index dcab326395..5fdc787c2d 100644 --- a/src/awkward/operations/ak_softmax.py +++ b/src/awkward/operations/ak_softmax.py @@ -3,9 +3,11 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import maybe_highlevel_to_lowlevel, wrap_layout +from awkward._layout import ( + HighLevelContext, + maybe_highlevel_to_lowlevel, +) from awkward._nplikes import ufuncs from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -17,7 +19,14 @@ @high_level_function() def softmax( - x, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None + x, + axis=None, + *, + keepdims=False, + mask_identity=False, + highlevel=True, + behavior=None, + attrs=None, ): """ Args: @@ -39,6 +48,8 @@ def softmax( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Computes the softmax in each group of elements from `x` (many types supported, including all Awkward Arrays and Records). The grouping @@ -59,18 +70,15 @@ def softmax( yield (x,) # Implementation - return _impl(x, axis, keepdims, mask_identity, highlevel, behavior) + return _impl(x, axis, keepdims, mask_identity, highlevel, behavior, attrs) -def _impl(x, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(x, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - behavior = behavior_of(x, behavior=behavior) - x = ak.highlevel.Array( - ak.operations.to_layout( - x, allow_record=False, allow_unknown=False, primitive_policy="error" - ), - behavior=behavior, - ) + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + x_layout = ctx.unwrap(x, allow_record=False, primitive_policy="error") + x = ctx.wrap(x_layout) with np.errstate(invalid="ignore", divide="ignore"): expx = ufuncs.exp(x) @@ -80,11 +88,11 @@ def _impl(x, axis, keepdims, mask_identity, highlevel, behavior): keepdims, mask_identity, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) - return wrap_layout( + return ctx.wrap( maybe_highlevel_to_lowlevel(expx / denom), - behavior=behavior, highlevel=highlevel, allow_other=True, ) diff --git a/src/awkward/operations/ak_sort.py b/src/awkward/operations/ak_sort.py index 6a4409010b..5e82e91604 100644 --- a/src/awkward/operations/ak_sort.py +++ b/src/awkward/operations/ak_sort.py @@ -5,7 +5,7 @@ import awkward as ak from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -15,7 +15,16 @@ @high_level_function() -def sort(array, axis=-1, *, ascending=True, stable=True, highlevel=True, behavior=None): +def sort( + array, + axis=-1, + *, + ascending=True, + stable=True, + highlevel=True, + behavior=None, + attrs=None, +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -32,6 +41,8 @@ def sort(array, axis=-1, *, ascending=True, stable=True, highlevel=True, behavio a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns a sorted array. @@ -44,16 +55,15 @@ def sort(array, axis=-1, *, ascending=True, stable=True, highlevel=True, behavio yield (array,) # Implementation - return _impl(array, axis, ascending, stable, highlevel, behavior) + return _impl(array, axis, ascending, stable, highlevel, behavior, attrs) -def _impl(array, axis, ascending, stable, highlevel, behavior): +def _impl(array, axis, ascending, stable, highlevel, behavior, attrs): axis = regularize_axis(axis) - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") out = ak._do.sort(layout, axis, ascending, stable) - return wrap_layout(out, behavior, highlevel, like=array) + return ctx.wrap(out, highlevel=highlevel) @ak._connect.numpy.implements("sort") diff --git a/src/awkward/operations/ak_std.py b/src/awkward/operations/ak_std.py index 949c99d1ad..0385032440 100644 --- a/src/awkward/operations/ak_std.py +++ b/src/awkward/operations/ak_std.py @@ -3,10 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import maybe_highlevel_to_lowlevel, maybe_posaxis, wrap_layout +from awkward._layout import ( + HighLevelContext, + ensure_same_backend, + maybe_highlevel_to_lowlevel, + maybe_posaxis, +) from awkward._nplikes import ufuncs from awkward._nplikes.numpy_like import NumpyMetadata from awkward._regularize import regularize_axis @@ -27,6 +31,7 @@ def std( mask_identity=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -55,6 +60,8 @@ def std( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Computes the standard deviation in each group of elements from `x` (many types supported, including all Awkward Arrays and Records). The @@ -79,7 +86,9 @@ def std( yield x, weight # Implementation - return _impl(x, weight, ddof, axis, keepdims, mask_identity, highlevel, behavior) + return _impl( + x, weight, ddof, axis, keepdims, mask_identity, highlevel, behavior, attrs + ) @high_level_function() @@ -93,6 +102,7 @@ def nanstd( mask_identity=True, highlevel=True, behavior=None, + attrs=None, ): """ Args: @@ -121,6 +131,8 @@ def nanstd( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Like #ak.std, but treating NaN ("not a number") values as missing. @@ -137,40 +149,39 @@ def nanstd( # Implementation if weight is not None: - weight = ak.operations.ak_nan_to_none._impl(weight, False, behavior) + weight = ak.operations.ak_nan_to_none._impl(weight, True, behavior, attrs) return _impl( - ak.operations.ak_nan_to_none._impl(x, False, behavior), + ak.operations.ak_nan_to_none._impl(x, True, behavior, attrs), weight, ddof, axis, keepdims, mask_identity, - highlevel=highlevel, - behavior=behavior, + highlevel, + behavior, + attrs, ) -def _impl(x, weight, ddof, axis, keepdims, mask_identity, highlevel, behavior): +def _impl(x, weight, ddof, axis, keepdims, mask_identity, highlevel, behavior, attrs): axis = regularize_axis(axis) - behavior = behavior_of(x, weight, behavior=behavior) - x = ak.highlevel.Array( - ak.operations.to_layout( - x, allow_record=False, allow_unknown=False, primitive_policy="error" - ), - behavior=behavior, - ) - if weight is not None: - weight = ak.highlevel.Array( - ak.operations.to_layout( + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + x_layout, weight_layout = ensure_same_backend( + ctx.unwrap(x, allow_record=False, primitive_policy="error"), + ctx.unwrap( weight, allow_record=False, allow_unknown=False, primitive_policy="error", + none_policy="pass-through", ), - behavior=behavior, ) + x = ctx.wrap(x_layout) + weight = ctx.wrap(weight_layout, allow_other=True) + with np.errstate(invalid="ignore", divide="ignore"): out = ufuncs.sqrt( ak.operations.ak_var._impl( @@ -181,13 +192,19 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity, highlevel, behavior): keepdims=True, mask_identity=True, highlevel=True, - behavior=behavior, + behavior=ctx.behavior, + attrs=ctx.attrs, ) ) if not mask_identity: out = ak.operations.fill_none( - out, np.nan, axis=-1, behavior=behavior, highlevel=True + out, + np.nan, + axis=-1, + behavior=ctx.behavior, + highlevel=True, + attrs=ctx.attrs, ) if axis is None: @@ -198,11 +215,8 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity, highlevel, behavior): posaxis = maybe_posaxis(out.layout, axis, 1) out = out[(slice(None, None),) * posaxis + (0,)] - return wrap_layout( - maybe_highlevel_to_lowlevel(out), - behavior=behavior, - highlevel=highlevel, - allow_other=True, + return ctx.wrap( + maybe_highlevel_to_lowlevel(out), highlevel=highlevel, allow_other=True ) diff --git a/src/awkward/operations/ak_strings_astype.py b/src/awkward/operations/ak_strings_astype.py index 15e5f02c6f..b0834db3a6 100644 --- a/src/awkward/operations/ak_strings_astype.py +++ b/src/awkward/operations/ak_strings_astype.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy import Numpy from awkward._nplikes.numpy_like import NumpyMetadata @@ -16,7 +15,7 @@ @high_level_function() -def strings_astype(array, to, *, highlevel=True, behavior=None): +def strings_astype(array, to, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -25,6 +24,8 @@ def strings_astype(array, to, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Converts all strings in the array to a new type, leaving the structure untouched. @@ -53,12 +54,10 @@ def strings_astype(array, to, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, to, highlevel, behavior) - + return _impl(array, to, highlevel, behavior, attrs) -def _impl(array, to, highlevel, behavior): - to_dtype = np.dtype(to) +def _impl(array, to, highlevel, behavior, attrs): def action(layout, **kwargs): if layout.is_list and ( layout.parameter("__array__") == "string" @@ -75,15 +74,13 @@ def action(layout, **kwargs): npstrings[maskedarray.mask] = 0 npnumbers = numpy.astype( numpy.reshape(npstrings, (-1,)).view(" 1: return _impl( base, - _impl( - base[where[0]], - what, - where[1:], - highlevel, - behavior, - ), + _impl(base[where[0]], what, where[1:], highlevel, behavior, attrs), where[0], highlevel, behavior, + attrs, ) else: # If we have an iterable here, pull out the only ti if is_non_string_like_sequence(where): where = where[0] - behavior = behavior_of(base, what, behavior=behavior) - backend = backend_of(base, what, default=cpu, coerce_to_common=True) - - base = ak.operations.to_layout( - base, allow_record=True, allow_unknown=False - ).to_backend(backend) - what = ak.operations.to_layout( - what, - allow_record=True, - allow_unknown=False, - allow_none=True, - primitive_policy="pass-through", - string_policy="promote", - ) - if isinstance(what, (ak.contents.Content, ak.record.Record)): - what = what.to_backend(backend) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + base, what = ensure_same_backend( + ctx.unwrap(base, allow_record=True, primitive_policy="error"), + ctx.unwrap( + what, + allow_record=True, + allow_unknown=False, + none_policy="pass-through", + primitive_policy="pass-through", + string_policy="promote", + ), + ) keys = copy.copy(base.fields) if where in base.fields: @@ -172,4 +163,4 @@ def action(inputs, **kwargs): assert isinstance(out, tuple) and len(out) == 1 - return wrap_layout(out[0], behavior, highlevel) + return ctx.wrap(out[0], highlevel=highlevel) diff --git a/src/awkward/operations/ak_with_name.py b/src/awkward/operations/ak_with_name.py index 5ba1fbfe7d..34a9e1cb89 100644 --- a/src/awkward/operations/ak_with_name.py +++ b/src/awkward/operations/ak_with_name.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("with_name",) @@ -14,7 +13,7 @@ @high_level_function() -def with_name(array, name, *, highlevel=True, behavior=None): +def with_name(array, name, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -24,6 +23,8 @@ def with_name(array, name, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns an #ak.Array or #ak.Record (or low-level equivalent, if `highlevel=False`) with a new name. This function does not change the @@ -41,12 +42,12 @@ def with_name(array, name, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, name, highlevel, behavior) + return _impl(array, name, highlevel, behavior, attrs) -def _impl(array, name, highlevel, behavior): - behavior = behavior_of(array, behavior=behavior) - layout = ak.operations.to_layout(array) +def _impl(array, name, highlevel, behavior, attrs): + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=True, primitive_policy="error") def action(layout, **ignore): if isinstance(layout, ak.contents.RecordArray): @@ -61,17 +62,4 @@ def action(layout, **ignore): out = ak._do.recursively_apply(layout, action) - def action2(layout, **ignore): - if layout.is_union: - return ak.contents.UnionArray.simplified( - layout._tags, - layout._index, - layout._contents, - parameters=layout._parameters, - ) - else: - return None - - out2 = ak._do.recursively_apply(out, action2) - - return wrap_layout(out2, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_with_parameter.py b/src/awkward/operations/ak_with_parameter.py index a58b37b585..9349fe3a1f 100644 --- a/src/awkward/operations/ak_with_parameter.py +++ b/src/awkward/operations/ak_with_parameter.py @@ -2,10 +2,8 @@ from __future__ import annotations -import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("with_parameter",) @@ -14,7 +12,9 @@ @high_level_function() -def with_parameter(array, parameter, value, *, highlevel=True, behavior=None): +def with_parameter( + array, parameter, value, *, highlevel=True, behavior=None, attrs=None +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -24,6 +24,8 @@ def with_parameter(array, parameter, value, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. This function returns a new array with a parameter set on the outermost node of its #ak.Array.layout. @@ -38,15 +40,12 @@ def with_parameter(array, parameter, value, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, parameter, value, highlevel, behavior) + return _impl(array, parameter, value, highlevel, behavior, attrs) -def _impl(array, parameter, value, highlevel, behavior): - behavior = behavior_of(array, behavior=behavior) - layout = ak.operations.to_layout( - array, allow_record=True, allow_unknown=False, primitive_policy="error" - ) +def _impl(array, parameter, value, highlevel, behavior, attrs): + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=True, primitive_policy="error") out = layout.with_parameter(parameter, value) - - return wrap_layout(out, behavior_of(array, behavior=behavior), highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_without_field.py b/src/awkward/operations/ak_without_field.py index f5b8034da2..2fc06da053 100644 --- a/src/awkward/operations/ak_without_field.py +++ b/src/awkward/operations/ak_without_field.py @@ -5,9 +5,8 @@ from collections.abc import Sequence import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("without_field",) @@ -16,7 +15,7 @@ @high_level_function() -def without_field(array, where, *, highlevel=True, behavior=None): +def without_field(array, where, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -27,6 +26,8 @@ def without_field(array, where, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns an #ak.Array or #ak.Record (or low-level equivalent, if `highlevel=False`) with an existing field removed. This function does not @@ -41,10 +42,10 @@ def without_field(array, where, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, where, highlevel, behavior) + return _impl(array, where, highlevel, behavior, attrs) -def _impl(base, where, highlevel, behavior): +def _impl(base, where, highlevel, behavior, attrs): if isinstance(where, str): where = [where] elif not (isinstance(where, Sequence) and all(isinstance(x, str) for x in where)): @@ -52,10 +53,8 @@ def _impl(base, where, highlevel, behavior): "Field names must be given as a single string, or a sequence of strings" ) - behavior = behavior_of(base, behavior=behavior) - base = ak.operations.to_layout( - base, allow_record=True, allow_unknown=False, primitive_policy="error" - ) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + base = ctx.unwrap(base, allow_record=True, primitive_policy="error") def action(layout, depth_context, **kwargs): if isinstance(layout, ak.contents.RecordArray): @@ -96,4 +95,4 @@ def action(layout, depth_context, **kwargs): return None out = ak._do.recursively_apply(base, action, depth_context={"where": where}) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_without_parameters.py b/src/awkward/operations/ak_without_parameters.py index 8f3567eb6c..f414915a45 100644 --- a/src/awkward/operations/ak_without_parameters.py +++ b/src/awkward/operations/ak_without_parameters.py @@ -3,9 +3,8 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("without_parameters",) @@ -14,7 +13,7 @@ @high_level_function() -def without_parameters(array, *, highlevel=True, behavior=None): +def without_parameters(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -22,6 +21,8 @@ def without_parameters(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. This function returns a new array without any parameters in its #ak.Array.layout, on nodes of any level of depth. @@ -33,19 +34,15 @@ def without_parameters(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): - behavior = behavior_of(array, behavior=behavior) - layout = ak.operations.to_layout( - array, allow_record=True, allow_unknown=False, primitive_policy="error" - ) +def _impl(array, highlevel, behavior, attrs): + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array, allow_record=False, primitive_policy="error") out = ak._do.recursively_apply( - layout, - (lambda layout, **kwargs: None), - keep_parameters=False, + layout, lambda layout, **kwargs: None, keep_parameters=False ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/ak_zeros_like.py b/src/awkward/operations/ak_zeros_like.py index 97452c73eb..7db5fd88df 100644 --- a/src/awkward/operations/ak_zeros_like.py +++ b/src/awkward/operations/ak_zeros_like.py @@ -16,7 +16,13 @@ @high_level_function() def zeros_like( - array, *, dtype=None, including_unknown=False, highlevel=True, behavior=None + array, + *, + dtype=None, + including_unknown=False, + highlevel=True, + behavior=None, + attrs=None, ): """ Args: @@ -29,6 +35,8 @@ def zeros_like( otherwise, return a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. This is the equivalent of NumPy's `np.zeros_like` for Awkward Arrays. @@ -41,17 +49,18 @@ def zeros_like( yield (array,) # Implementation - return _impl(array, highlevel, behavior, dtype, including_unknown) + return _impl(array, highlevel, behavior, dtype, including_unknown, attrs) -def _impl(array, highlevel, behavior, dtype, including_unknown): +def _impl(array, highlevel, behavior, dtype, including_unknown, attrs): if dtype is not None: return ak.operations.ak_full_like._impl( - array, 0, highlevel, behavior, dtype, including_unknown + array, 0, highlevel, behavior, dtype, including_unknown, attrs + ) + else: + return ak.operations.ak_full_like._impl( + array, _ZEROS, highlevel, behavior, dtype, including_unknown, attrs ) - return ak.operations.ak_full_like._impl( - array, _ZEROS, highlevel, behavior, dtype, including_unknown - ) @ak._connect.numpy.implements("zeros_like") diff --git a/src/awkward/operations/ak_zip.py b/src/awkward/operations/ak_zip.py index c84b40f631..68e38101ee 100644 --- a/src/awkward/operations/ak_zip.py +++ b/src/awkward/operations/ak_zip.py @@ -5,9 +5,8 @@ from collections.abc import Mapping import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("zip",) @@ -26,11 +25,12 @@ def zip( optiontype_outside_record=False, highlevel=True, behavior=None, + attrs=None, ): """ Args: - arrays (dict or iterable of arrays): Each value in this dict or iterable - can be any array-like data that #ak.to_layout recognizes. + arrays (mapping or sequence of arrays): Each value in this mapping or + sequence can be any array-like data that #ak.to_layout recognizes. depth_limit (None or int): If None, attempt to fully broadcast the `array` to all levels. If an int, limit the number of dimensions that get broadcasted. The minimum value is `1`, for no @@ -48,6 +48,8 @@ def zip( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Combines `arrays` into a single structure as the fields of a collection of records or the slots of a collection of tuples. If the `arrays` have @@ -155,6 +157,7 @@ def zip( optiontype_outside_record, highlevel, behavior, + attrs, ) @@ -167,32 +170,48 @@ def _impl( optiontype_outside_record, highlevel, behavior, + attrs, ): if depth_limit is not None and depth_limit <= 0: raise ValueError("depth_limit must be None or at least 1") + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + if isinstance(arrays, Mapping): + layouts = ensure_same_backend( + *( + ctx.unwrap( + x, + allow_record=False, + allow_unknown=False, + none_policy="pass-through", + primitive_policy="pass-through", + ) + for x in arrays.values() + ) + ) + fields = list(arrays.keys()) - if isinstance(arrays, Mapping): - behavior = behavior_of(*arrays.values(), behavior=behavior) - recordlookup = list(arrays) - layouts = [ - ak.operations.to_layout(x, primitive_policy="pass-through") - for x in arrays.values() - ] - - else: - arrays = list(arrays) - behavior = behavior_of(*arrays, behavior=behavior) - recordlookup = None - layouts = [ - ak.operations.to_layout(x, primitive_policy="pass-through") for x in arrays - ] + else: + layouts = ensure_same_backend( + *( + ctx.unwrap( + x, + allow_record=False, + allow_unknown=False, + none_policy="pass-through", + primitive_policy="pass-through", + ) + for x in arrays + ) + ) + fields = None # Promote any integers or records + backend = next((b.backend for b in layouts if hasattr(b, "backend")), "cpu") layout_is_content = [isinstance(x, ak.contents.Content) for x in layouts] layouts = [ x if isinstance(x, (ak.contents.Content, ak.record.Record)) - else ak.operations.to_layout(x, primitive_policy="promote") + else ak.operations.to_layout(x).to_backend(backend) for x in layouts ] @@ -205,14 +224,16 @@ def _impl( parameters = dict(parameters) parameters["__record__"] = with_name - def action(inputs, depth, **ignore): + def action(inputs, depth, backend, **ignore): if depth_limit == depth or all(x.purelist_depth == 1 for x in inputs): # If we want to zip after option types at this depth if optiontype_outside_record and any(x.is_option for x in inputs): return None return ( - ak.contents.RecordArray(inputs, recordlookup, parameters=parameters), + ak.contents.RecordArray( + inputs, fields, parameters=parameters, backend=backend + ), ) else: return None @@ -227,4 +248,4 @@ def action(inputs, depth, **ignore): out = out[0] assert isinstance(out, ak.record.Record) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_capitalize.py b/src/awkward/operations/str/akstr_capitalize.py index 9d540b9572..3f62b50e67 100644 --- a/src/awkward/operations/str/akstr_capitalize.py +++ b/src/awkward/operations/str/akstr_capitalize.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("capitalize",) @high_level_function(module="ak.str") -def capitalize(array, *, highlevel=True, behavior=None): +def capitalize(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def capitalize(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with a capitalized version (correctly transforming Unicode characters), with the first character @@ -40,20 +41,22 @@ def capitalize(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute pc = import_pyarrow_compute("ak.str.capitalize") - behavior = behavior_of(array, behavior=behavior) + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_capitalize, pc.ascii_capitalize, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_center.py b/src/awkward/operations/str/akstr_center.py index 38d7d2262c..df98529a1b 100644 --- a/src/awkward/operations/str/akstr_center.py +++ b/src/awkward/operations/str/akstr_center.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("center",) @high_level_function(module="ak.str") -def center(array, width, padding=" ", *, highlevel=True, behavior=None): +def center(array, width, padding=" ", *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -22,6 +21,8 @@ def center(array, width, padding=" ", *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string or bytestring-valued data with centered strings/bytestrings of a given `width`, padding both sides with the given @@ -46,20 +47,22 @@ def center(array, width, padding=" ", *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, width, padding, highlevel, behavior) + return _impl(array, width, padding, highlevel, behavior, attrs) -def _impl(array, width, padding, highlevel, behavior): +def _impl(array, width, padding, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.center") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("r") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_center, pc.ascii_center, width, padding, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_count_substring.py b/src/awkward/operations/str/akstr_count_substring.py index d0e05bae3e..5e9074e5a3 100644 --- a/src/awkward/operations/str/akstr_count_substring.py +++ b/src/awkward/operations/str/akstr_count_substring.py @@ -3,16 +3,15 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("count_substring",) @high_level_function(module="ak.str") def count_substring( - array, pattern, *, ignore_case=False, highlevel=True, behavior=None + array, pattern, *, ignore_case=False, highlevel=True, behavior=None, attrs=None ): """ Args: @@ -25,6 +24,8 @@ def count_substring( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Counts the number of occurrences of the given literal `pattern` in every string in `array`. Depending upon the value of `ignore_case`, the matching @@ -42,15 +43,22 @@ def count_substring( yield (array,) # Implementation - return _impl(array, pattern, ignore_case, highlevel, behavior) + return _impl(array, pattern, ignore_case, highlevel, behavior, attrs) -def _impl(array, pattern, ignore_case, highlevel, behavior): +def _impl(array, pattern, ignore_case, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute pc = import_pyarrow_compute("ak.str.count_substring") - layout = ak.to_layout(array, allow_record=False) - behavior = behavior_of(array, behavior=behavior) + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) apply = ak.operations.str._get_ufunc_action( pc.count_substring, pc.count_substring, @@ -60,4 +68,4 @@ def _impl(array, pattern, ignore_case, highlevel, behavior): ) out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_count_substring_regex.py b/src/awkward/operations/str/akstr_count_substring_regex.py index 5c7e2d47ef..0540d6818d 100644 --- a/src/awkward/operations/str/akstr_count_substring_regex.py +++ b/src/awkward/operations/str/akstr_count_substring_regex.py @@ -3,16 +3,15 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("count_substring_regex",) @high_level_function(module="ak.str") def count_substring_regex( - array, pattern, *, ignore_case=False, highlevel=True, behavior=None + array, pattern, *, ignore_case=False, highlevel=True, behavior=None, attrs=None ): """ Args: @@ -25,6 +24,8 @@ def count_substring_regex( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Counts the number of occurrences of the given regular expression `pattern` in every string in `array`. Depending upon the value of `ignore_case`, the @@ -42,15 +43,22 @@ def count_substring_regex( yield (array,) # Implementation - return _impl(array, pattern, ignore_case, highlevel, behavior) + return _impl(array, pattern, ignore_case, highlevel, behavior, attrs) -def _impl(array, pattern, ignore_case, highlevel, behavior): +def _impl(array, pattern, ignore_case, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute pc = import_pyarrow_compute("ak.str.count_substring_regex") - layout = ak.to_layout(array, allow_record=False) - behavior = behavior_of(array, behavior=behavior) + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) apply = ak.operations.str._get_ufunc_action( pc.count_substring_regex, pc.count_substring_regex, @@ -60,4 +68,4 @@ def _impl(array, pattern, ignore_case, highlevel, behavior): ) out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_ends_with.py b/src/awkward/operations/str/akstr_ends_with.py index f96c15a42f..3bfe4072ea 100644 --- a/src/awkward/operations/str/akstr_ends_with.py +++ b/src/awkward/operations/str/akstr_ends_with.py @@ -3,15 +3,16 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("ends_with",) @high_level_function(module="ak.str") -def ends_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=None): +def ends_with( + array, pattern, *, ignore_case=False, highlevel=True, behavior=None, attrs=None +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -23,6 +24,8 @@ def ends_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=Non a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns True for every string in `array` if it ends with the given literal suffix `pattern`. Depending upon the value of `ignore_case`, the matching @@ -38,15 +41,22 @@ def ends_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=Non yield (array,) # Implementation - return _impl(array, pattern, ignore_case, highlevel, behavior) + return _impl(array, pattern, ignore_case, highlevel, behavior, attrs) -def _impl(array, pattern, ignore_case, highlevel, behavior): +def _impl(array, pattern, ignore_case, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.ends_with") - layout = ak.to_layout(array, allow_record=False) - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("h") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) apply = ak.operations.str._get_ufunc_action( pc.ends_with, pc.ends_with, @@ -55,4 +65,4 @@ def _impl(array, pattern, ignore_case, highlevel, behavior): pattern=pattern, ) out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_extract_regex.py b/src/awkward/operations/str/akstr_extract_regex.py index 1187ebcc5b..769974a1cd 100644 --- a/src/awkward/operations/str/akstr_extract_regex.py +++ b/src/awkward/operations/str/akstr_extract_regex.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("extract_regex",) @high_level_function(module="ak.str") -def extract_regex(array, pattern, *, highlevel=True, behavior=None): +def extract_regex(array, pattern, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -20,6 +19,8 @@ def extract_regex(array, pattern, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns None for every string in `array` if it does not match `pattern`; otherwise, a record whose fields are named capture groups and whose @@ -57,17 +58,19 @@ def extract_regex(array, pattern, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, pattern, highlevel, behavior) + return _impl(array, pattern, highlevel, behavior, attrs) -def _impl(array, pattern, highlevel, behavior): +def _impl(array, pattern, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.extract_regex") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("x") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.extract_regex, pc.extract_regex, @@ -78,4 +81,4 @@ def _impl(array, pattern, highlevel, behavior): ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_find_substring.py b/src/awkward/operations/str/akstr_find_substring.py index 2c598bbc96..b19059f308 100644 --- a/src/awkward/operations/str/akstr_find_substring.py +++ b/src/awkward/operations/str/akstr_find_substring.py @@ -3,15 +3,16 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("find_substring",) @high_level_function(module="ak.str") -def find_substring(array, pattern, *, ignore_case=False, highlevel=True, behavior=None): +def find_substring( + array, pattern, *, ignore_case=False, highlevel=True, behavior=None, attrs=None +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -23,6 +24,8 @@ def find_substring(array, pattern, *, ignore_case=False, highlevel=True, behavio a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns the index of the first occurrence of the given literal `pattern` for each string in `array`. If the literal pattern is not found inside the @@ -40,15 +43,22 @@ def find_substring(array, pattern, *, ignore_case=False, highlevel=True, behavio yield (array,) # Implementation - return _impl(array, pattern, ignore_case, highlevel, behavior) + return _impl(array, pattern, ignore_case, highlevel, behavior, attrs) -def _impl(array, pattern, ignore_case, highlevel, behavior): +def _impl(array, pattern, ignore_case, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute pc = import_pyarrow_compute("ak.str.find_substring") - layout = ak.to_layout(array, allow_record=False) - behavior = behavior_of(array, behavior=behavior) + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) apply = ak.operations.str._get_ufunc_action( pc.find_substring, pc.find_substring, @@ -57,4 +67,4 @@ def _impl(array, pattern, ignore_case, highlevel, behavior): pattern=pattern, ) out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_find_substring_regex.py b/src/awkward/operations/str/akstr_find_substring_regex.py index afa999ee99..1b630e2ea2 100644 --- a/src/awkward/operations/str/akstr_find_substring_regex.py +++ b/src/awkward/operations/str/akstr_find_substring_regex.py @@ -3,16 +3,15 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("find_substring_regex",) @high_level_function(module="ak.str") def find_substring_regex( - array, pattern, *, ignore_case=False, highlevel=True, behavior=None + array, pattern, *, ignore_case=False, highlevel=True, behavior=None, attrs=None ): """ Args: @@ -25,6 +24,8 @@ def find_substring_regex( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns the index of the first occurrence of the given regular expression `pattern` for each string in `array`. If the literal pattern is not found @@ -42,15 +43,22 @@ def find_substring_regex( yield (array,) # Implementation - return _impl(array, pattern, ignore_case, highlevel, behavior) + return _impl(array, pattern, ignore_case, highlevel, behavior, attrs) -def _impl(array, pattern, ignore_case, highlevel, behavior): +def _impl(array, pattern, ignore_case, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute pc = import_pyarrow_compute("ak.str.find_substring_regex") - layout = ak.to_layout(array, allow_record=False) - behavior = behavior_of(array, behavior=behavior) + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) apply = ak.operations.str._get_ufunc_action( pc.find_substring_regex, pc.find_substring_regex, @@ -59,4 +67,4 @@ def _impl(array, pattern, ignore_case, highlevel, behavior): pattern=pattern, ) out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_index_in.py b/src/awkward/operations/str/akstr_index_in.py index 02b2a84c70..11cbe09cef 100644 --- a/src/awkward/operations/str/akstr_index_in.py +++ b/src/awkward/operations/str/akstr_index_in.py @@ -3,21 +3,19 @@ from __future__ import annotations import awkward as ak -from awkward._backends.dispatch import backend_of -from awkward._backends.numpy import NumpyBackend from awkward._backends.typetracer import TypeTracerBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend __all__ = ("index_in",) -cpu = NumpyBackend.instance() typetracer = TypeTracerBackend.instance() @high_level_function(module="ak.str") -def index_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=None): +def index_in( + array, value_set, *, skip_nones=False, highlevel=True, behavior=None, attrs=None +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -29,6 +27,8 @@ def index_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=Non a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns the index of the first pattern in `value_set` that each string in `array` matches. If the string is not found within `value_set`, then the @@ -44,7 +44,7 @@ def index_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=Non yield (array, value_set) # Implementation - return _impl(array, value_set, skip_nones, highlevel, behavior) + return _impl(array, value_set, skip_nones, highlevel, behavior, attrs) def _is_maybe_optional_list_of_string(layout): @@ -56,17 +56,17 @@ def _is_maybe_optional_list_of_string(layout): return False -def _impl(array, value_set, skip_nones, highlevel, behavior): +def _impl(array, value_set, skip_nones, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute from awkward.operations.str import _apply_through_arrow pc = import_pyarrow_compute("ak.str.index_in") - behavior = behavior_of(array, value_set, behavior=behavior) - backend = backend_of(array, value_set, coerce_to_common=True, default=cpu) - - layout = ak.to_layout(array, allow_record=False).to_backend(backend) - value_set_layout = ak.to_layout(value_set, allow_record=False).to_backend(backend) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout, value_set_layout = ensure_same_backend( + ctx.unwrap(array, allow_record=False), + ctx.unwrap(value_set, allow_record=False), + ) if not _is_maybe_optional_list_of_string(value_set_layout): raise TypeError("`value_set` must be 1D array of (possibly missing) strings") @@ -84,4 +84,4 @@ def apply(layout, **kwargs): out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_is_alnum.py b/src/awkward/operations/str/akstr_is_alnum.py index 3f66490d4d..acfe71b590 100644 --- a/src/awkward/operations/str/akstr_is_alnum.py +++ b/src/awkward/operations/str/akstr_is_alnum.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("is_alnum",) @high_level_function(module="ak.str") -def is_alnum(array, *, highlevel=True, behavior=None): +def is_alnum(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def is_alnum(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with True if the string is non-empty and consists only of alphanumeric Unicode characters, False otherwise. @@ -39,20 +40,22 @@ def is_alnum(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.is_alnum") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("m") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_is_alnum, pc.ascii_is_alnum, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_is_alpha.py b/src/awkward/operations/str/akstr_is_alpha.py index ba49aa5687..bca754a613 100644 --- a/src/awkward/operations/str/akstr_is_alpha.py +++ b/src/awkward/operations/str/akstr_is_alpha.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("is_alpha",) @high_level_function(module="ak.str") -def is_alpha(array, *, highlevel=True, behavior=None): +def is_alpha(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def is_alpha(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with True if the string is non-empty and consists only of alphabetic Unicode characters, False otherwise. @@ -39,20 +40,22 @@ def is_alpha(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.is_alpha") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("a") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_is_alpha, pc.ascii_is_alpha, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_is_ascii.py b/src/awkward/operations/str/akstr_is_ascii.py index 473c7c721f..8e2cbd5e75 100644 --- a/src/awkward/operations/str/akstr_is_ascii.py +++ b/src/awkward/operations/str/akstr_is_ascii.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("is_ascii",) @high_level_function(module="ak.str") -def is_ascii(array, *, highlevel=True, behavior=None): +def is_ascii(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def is_ascii(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with True iff the string consists only of ASCII characters, False otherwise. @@ -39,20 +40,22 @@ def is_ascii(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.is_ascii") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("i") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.string_is_ascii, pc.string_is_ascii, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_is_decimal.py b/src/awkward/operations/str/akstr_is_decimal.py index e77e13ba0a..46e8746d6e 100644 --- a/src/awkward/operations/str/akstr_is_decimal.py +++ b/src/awkward/operations/str/akstr_is_decimal.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("is_decimal",) @high_level_function(module="ak.str") -def is_decimal(array, *, highlevel=True, behavior=None): +def is_decimal(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def is_decimal(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with True if the string is non-empty and consists only of decimal Unicode characters, False otherwise. @@ -39,20 +40,22 @@ def is_decimal(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.is_decimal") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("l") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_is_decimal, pc.ascii_is_decimal, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_is_digit.py b/src/awkward/operations/str/akstr_is_digit.py index c3e34af90d..da1e605066 100644 --- a/src/awkward/operations/str/akstr_is_digit.py +++ b/src/awkward/operations/str/akstr_is_digit.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("is_digit",) @high_level_function(module="ak.str") -def is_digit(array, *, highlevel=True, behavior=None): +def is_digit(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def is_digit(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with True if the string is non-empty and consists only of Unicode digits, False otherwise. @@ -41,20 +42,22 @@ def is_digit(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.is_digit") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("t") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_is_digit, pc.utf8_is_digit, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_is_in.py b/src/awkward/operations/str/akstr_is_in.py index e2ef931c47..b44a698423 100644 --- a/src/awkward/operations/str/akstr_is_in.py +++ b/src/awkward/operations/str/akstr_is_in.py @@ -3,21 +3,19 @@ from __future__ import annotations import awkward as ak -from awkward._backends.dispatch import backend_of -from awkward._backends.numpy import NumpyBackend from awkward._backends.typetracer import TypeTracerBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend __all__ = ("is_in",) -cpu = NumpyBackend.instance() typetracer = TypeTracerBackend.instance() @high_level_function(module="ak.str") -def is_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=None): +def is_in( + array, value_set, *, skip_nones=False, highlevel=True, behavior=None, attrs=None +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -29,6 +27,8 @@ def is_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns True for each string in `array` if it matches any pattern in `value_set`; otherwise, returns False. @@ -43,7 +43,7 @@ def is_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=None): yield (array, value_set) # Implementation - return _impl(array, value_set, skip_nones, highlevel, behavior) + return _impl(array, value_set, skip_nones, highlevel, behavior, attrs) def _is_maybe_optional_list_of_string(layout): @@ -55,17 +55,17 @@ def _is_maybe_optional_list_of_string(layout): return False -def _impl(array, value_set, skip_nones, highlevel, behavior): +def _impl(array, value_set, skip_nones, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute from awkward.operations.str import _apply_through_arrow pc = import_pyarrow_compute("ak.str.is_in") - behavior = behavior_of(array, value_set, behavior=behavior) - backend = backend_of(array, value_set, coerce_to_common=True, default=cpu) - - layout = ak.to_layout(array, allow_record=False).to_backend(backend) - value_set_layout = ak.to_layout(value_set, allow_record=False).to_backend(backend) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout, value_set_layout = ensure_same_backend( + ctx.unwrap(array, allow_record=False), + ctx.unwrap(value_set, allow_record=False), + ) if not _is_maybe_optional_list_of_string(value_set_layout): raise TypeError("`value_set` must be 1D array of (possibly missing) strings") @@ -78,4 +78,4 @@ def apply(layout, **kwargs): out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_is_lower.py b/src/awkward/operations/str/akstr_is_lower.py index b5010f1592..2add221fe1 100644 --- a/src/awkward/operations/str/akstr_is_lower.py +++ b/src/awkward/operations/str/akstr_is_lower.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("is_lower",) @high_level_function(module="ak.str") -def is_lower(array, *, highlevel=True, behavior=None): +def is_lower(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def is_lower(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with True if the string is non-empty and consists only of lowercase Unicode characters, False otherwise. @@ -39,20 +40,22 @@ def is_lower(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.is_lower") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("r") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_is_lower, pc.ascii_is_lower, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_is_numeric.py b/src/awkward/operations/str/akstr_is_numeric.py index abaffecd03..1a3e2696d0 100644 --- a/src/awkward/operations/str/akstr_is_numeric.py +++ b/src/awkward/operations/str/akstr_is_numeric.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("is_numeric",) @high_level_function(module="ak.str") -def is_numeric(array, *, highlevel=True, behavior=None): +def is_numeric(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def is_numeric(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with True if the string is non-empty and consists only of numeric Unicode characters, False otherwise. @@ -41,20 +42,22 @@ def is_numeric(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.is_numeric") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("c") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_is_numeric, pc.utf8_is_numeric, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_is_printable.py b/src/awkward/operations/str/akstr_is_printable.py index 792bbeb878..11e8d3006b 100644 --- a/src/awkward/operations/str/akstr_is_printable.py +++ b/src/awkward/operations/str/akstr_is_printable.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("is_printable",) @high_level_function(module="ak.str") -def is_printable(array, *, highlevel=True, behavior=None): +def is_printable(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def is_printable(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with True if the string is non-empty and consists only of printable Unicode characters, False otherwise. @@ -39,20 +40,22 @@ def is_printable(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.is_printable") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("e") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_is_printable, pc.ascii_is_printable, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_is_space.py b/src/awkward/operations/str/akstr_is_space.py index c6218aa45b..d5bc716029 100644 --- a/src/awkward/operations/str/akstr_is_space.py +++ b/src/awkward/operations/str/akstr_is_space.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("is_space",) @high_level_function(module="ak.str") -def is_space(array, *, highlevel=True, behavior=None): +def is_space(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def is_space(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with True if the string is non-empty and consists only of whitespace Unicode characters, False otherwise. @@ -39,20 +40,22 @@ def is_space(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.is_space") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("e") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_is_space, pc.ascii_is_space, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_is_title.py b/src/awkward/operations/str/akstr_is_title.py index 666ec3fd78..84de708a31 100644 --- a/src/awkward/operations/str/akstr_is_title.py +++ b/src/awkward/operations/str/akstr_is_title.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("is_title",) @high_level_function(module="ak.str") -def is_title(array, *, highlevel=True, behavior=None): +def is_title(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def is_title(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with True if the string is title-cased, i.e. it has at least one cased character, each uppercase character follows @@ -43,20 +44,22 @@ def is_title(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.is_title") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("e") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_is_title, pc.ascii_is_title, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_is_upper.py b/src/awkward/operations/str/akstr_is_upper.py index ddab80bc17..cbbfc72547 100644 --- a/src/awkward/operations/str/akstr_is_upper.py +++ b/src/awkward/operations/str/akstr_is_upper.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("is_upper",) @high_level_function(module="ak.str") -def is_upper(array, *, highlevel=True, behavior=None): +def is_upper(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def is_upper(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with True if the string is non-empty and consists only of uppercase Unicode characters, False otherwise. @@ -39,17 +40,19 @@ def is_upper(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.is_upper") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("r") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_is_upper, pc.ascii_is_upper, @@ -58,4 +61,4 @@ def _impl(array, highlevel, behavior): ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_join.py b/src/awkward/operations/str/akstr_join.py index f07861e519..a5ab638ba5 100644 --- a/src/awkward/operations/str/akstr_join.py +++ b/src/awkward/operations/str/akstr_join.py @@ -3,21 +3,17 @@ from __future__ import annotations import awkward as ak -from awkward._backends.dispatch import backend_of -from awkward._backends.numpy import NumpyBackend from awkward._backends.typetracer import TypeTracerBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend __all__ = ("join",) typetracer = TypeTracerBackend.instance() -cpu = NumpyBackend.instance() @high_level_function(module="ak.str") -def join(array, separator, *, highlevel=True, behavior=None): +def join(array, separator, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -28,6 +24,8 @@ def join(array, separator, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Concatenate the strings in `array`. The `separator` is inserted between each string. If array-like, `separator` is broadcast against `array` which @@ -42,10 +40,10 @@ def join(array, separator, *, highlevel=True, behavior=None): See also: #ak.str.join_element_wise. """ # Dispatch - yield (array, separator) + yield array, separator # Implementation - return _impl(array, separator, highlevel, behavior) + return _impl(array, separator, highlevel, behavior, attrs) def _is_maybe_optional_list_of_string(layout): @@ -57,17 +55,26 @@ def _is_maybe_optional_list_of_string(layout): return False -def _impl(array, separator, highlevel, behavior): +def _impl(array, separator, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - from awkward.operations.str import _apply_through_arrow pc = import_pyarrow_compute("ak.str.join") - behavior = behavior_of(array, separator, behavior=behavior) - backend = backend_of(array, separator, coerce_to_common=True, default=cpu) + from awkward.operations.str import _apply_through_arrow + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout, maybe_separator_layout = ensure_same_backend( + ctx.unwrap(array, allow_record=False, primitive_policy="error"), + ctx.unwrap( + separator, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="pass-through", + ), + ) - layout = ak.to_layout(array, allow_record=False).to_backend(backend) - if isinstance(separator, (bytes, str)): + if isinstance(maybe_separator_layout, (bytes, str)): def apply_unary(layout, **kwargs): if not ( @@ -81,7 +88,7 @@ def apply_unary(layout, **kwargs): layout.copy( content=ak.contents.UnmaskedArray.simplified(layout.content) ), - separator, + maybe_separator_layout, # This kernel requires non-large string/bytestrings string_to32=True, bytestring_to32=True, @@ -89,9 +96,6 @@ def apply_unary(layout, **kwargs): out = ak._do.recursively_apply(layout, apply_unary) else: - separator_layout = ak.to_layout(separator, allow_record=False).to_backend( - backend - ) def apply_binary(layouts, **kwargs): if not ( @@ -120,7 +124,7 @@ def apply_binary(layouts, **kwargs): ) (out,) = ak._broadcasting.broadcast_and_apply( - (layout, separator_layout), apply_binary + (layout, maybe_separator_layout), apply_binary ) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_join_element_wise.py b/src/awkward/operations/str/akstr_join_element_wise.py index f62705351f..98f4e42f91 100644 --- a/src/awkward/operations/str/akstr_join_element_wise.py +++ b/src/awkward/operations/str/akstr_join_element_wise.py @@ -3,21 +3,17 @@ from __future__ import annotations import awkward as ak -from awkward._backends.dispatch import backend_of -from awkward._backends.numpy import NumpyBackend from awkward._backends.typetracer import TypeTracerBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend __all__ = ("join_element_wise",) -cpu = NumpyBackend.instance() typetracer = TypeTracerBackend.instance() @high_level_function(module="ak.str") -def join_element_wise(*arrays, highlevel=True, behavior=None): +def join_element_wise(*arrays, highlevel=True, behavior=None, attrs=None): """ Args: arrays: Array-like data (anything #ak.to_layout recognizes). @@ -25,6 +21,8 @@ def join_element_wise(*arrays, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Broadcasts and concatenates all but the last array of strings in `arrays`; the last is used as a separator. @@ -46,10 +44,10 @@ def join_element_wise(*arrays, highlevel=True, behavior=None): yield arrays # Implementation - return _impl(arrays, highlevel, behavior) + return _impl(arrays, highlevel, behavior, attrs) -def _impl(arrays, highlevel, behavior): +def _impl(arrays, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute from awkward.operations.str import _apply_through_arrow @@ -58,9 +56,8 @@ def _impl(arrays, highlevel, behavior): if len(arrays) < 1: raise TypeError("at least one array is required") - behavior = behavior_of(*arrays, behavior=behavior) - backend = backend_of(*arrays, coerce_to_common=True, default=cpu) - layouts = [ak.to_layout(x).to_backend(backend) for x in arrays] + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layouts = ensure_same_backend(*(ctx.unwrap(x) for x in arrays)) def action(layouts, **kwargs): if all( @@ -71,4 +68,4 @@ def action(layouts, **kwargs): (out,) = ak._broadcasting.broadcast_and_apply(layouts, action) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_length.py b/src/awkward/operations/str/akstr_length.py index df4517b1de..690ff15a5a 100644 --- a/src/awkward/operations/str/akstr_length.py +++ b/src/awkward/operations/str/akstr_length.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("length",) @high_level_function(module="ak.str") -def length(array, *, highlevel=True, behavior=None): +def length(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def length(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with its length in Unicode characters (not its length in bytes). @@ -38,20 +39,22 @@ def length(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.length") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("h") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_length, pc.binary_length, bytestring_to_string=False ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_lower.py b/src/awkward/operations/str/akstr_lower.py index 3bcdfe2445..8c8dc44e3f 100644 --- a/src/awkward/operations/str/akstr_lower.py +++ b/src/awkward/operations/str/akstr_lower.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("lower",) @high_level_function(module="ak.str") -def lower(array, *, highlevel=True, behavior=None): +def lower(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def lower(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with a lowercase version (correctly transforming Unicode characters). @@ -38,20 +39,22 @@ def lower(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.lower") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("r") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_lower, pc.ascii_lower, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_lpad.py b/src/awkward/operations/str/akstr_lpad.py index 79df8f0a81..8f0fe7f6e4 100644 --- a/src/awkward/operations/str/akstr_lpad.py +++ b/src/awkward/operations/str/akstr_lpad.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("lpad",) @high_level_function(module="ak.str") -def lpad(array, width, padding=" ", *, highlevel=True, behavior=None): +def lpad(array, width, padding=" ", *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -22,6 +21,8 @@ def lpad(array, width, padding=" ", *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string or bytestring-valued data with right-aligned strings/bytestrings of a given `width`, padding the left side with the @@ -46,20 +47,21 @@ def lpad(array, width, padding=" ", *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, width, padding, highlevel, behavior) + return _impl(array, width, padding, highlevel, behavior, attrs) -def _impl(array, width, padding, highlevel, behavior): +def _impl(array, width, padding, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.lpad") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("d") + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_lpad, pc.ascii_lpad, width, padding, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_ltrim.py b/src/awkward/operations/str/akstr_ltrim.py index dd115d9fa3..e6eaa4dcfa 100644 --- a/src/awkward/operations/str/akstr_ltrim.py +++ b/src/awkward/operations/str/akstr_ltrim.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("ltrim",) @high_level_function(module="ak.str") -def ltrim(array, characters, *, highlevel=True, behavior=None): +def ltrim(array, characters, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -21,6 +20,8 @@ def ltrim(array, characters, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Removes any leading characters of `characters` from any string or bytestring-valued data. @@ -46,20 +47,22 @@ def ltrim(array, characters, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, characters, highlevel, behavior) + return _impl(array, characters, highlevel, behavior, attrs) -def _impl(array, characters, highlevel, behavior): +def _impl(array, characters, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.ltrim") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("m") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_ltrim, pc.ascii_ltrim, characters, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_ltrim_whitespace.py b/src/awkward/operations/str/akstr_ltrim_whitespace.py index a29cd7d374..dea616a4c7 100644 --- a/src/awkward/operations/str/akstr_ltrim_whitespace.py +++ b/src/awkward/operations/str/akstr_ltrim_whitespace.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("ltrim_whitespace",) @high_level_function(module="ak.str") -def ltrim_whitespace(array, *, highlevel=True, behavior=None): +def ltrim_whitespace(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def ltrim_whitespace(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Removes any leading whitespace from any string or bytestring-valued data. @@ -37,17 +38,19 @@ def ltrim_whitespace(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.ltrim_whitespace") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("e") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_ltrim_whitespace, pc.ascii_ltrim_whitespace, @@ -55,4 +58,4 @@ def _impl(array, highlevel, behavior): ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_match_like.py b/src/awkward/operations/str/akstr_match_like.py index ac86c0a121..4680c85f62 100644 --- a/src/awkward/operations/str/akstr_match_like.py +++ b/src/awkward/operations/str/akstr_match_like.py @@ -3,15 +3,16 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("match_like",) @high_level_function(module="ak.str") -def match_like(array, pattern, *, ignore_case=False, highlevel=True, behavior=None): +def match_like( + array, pattern, *, ignore_case=False, highlevel=True, behavior=None, attrs=None +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -23,6 +24,8 @@ def match_like(array, pattern, *, ignore_case=False, highlevel=True, behavior=No a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. For each string in the array, determine whether it matches the given SQL-style LIKE pattern, which obeys the following rules: @@ -43,15 +46,22 @@ def match_like(array, pattern, *, ignore_case=False, highlevel=True, behavior=No yield (array,) # Implementation - return _impl(array, pattern, ignore_case, highlevel, behavior) + return _impl(array, pattern, ignore_case, highlevel, behavior, attrs) -def _impl(array, pattern, ignore_case, highlevel, behavior): +def _impl(array, pattern, ignore_case, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute pc = import_pyarrow_compute("ak.str.match_like") - layout = ak.to_layout(array, allow_record=False) - behavior = behavior_of(array, behavior=behavior) + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) apply = ak.operations.str._get_ufunc_action( pc.match_like, pc.match_like, @@ -60,4 +70,4 @@ def _impl(array, pattern, ignore_case, highlevel, behavior): pattern=pattern, ) out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_match_substring.py b/src/awkward/operations/str/akstr_match_substring.py index 5f7641d1f2..fe01d2c17f 100644 --- a/src/awkward/operations/str/akstr_match_substring.py +++ b/src/awkward/operations/str/akstr_match_substring.py @@ -3,16 +3,15 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("match_substring",) @high_level_function(module="ak.str") def match_substring( - array, pattern, *, ignore_case=False, highlevel=True, behavior=None + array, pattern, *, ignore_case=False, highlevel=True, behavior=None, attrs=None ): """ Args: @@ -24,6 +23,8 @@ def match_substring( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. For each string in the array, determine whether it contains the given literal `pattern`. @@ -40,15 +41,22 @@ def match_substring( yield (array,) # Implementation - return _impl(array, pattern, ignore_case, highlevel, behavior) + return _impl(array, pattern, ignore_case, highlevel, behavior, attrs) -def _impl(array, pattern, ignore_case, highlevel, behavior): +def _impl(array, pattern, ignore_case, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute pc = import_pyarrow_compute("ak.str.match_substring") - layout = ak.to_layout(array, allow_record=False) - behavior = behavior_of(array, behavior=behavior) + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) apply = ak.operations.str._get_ufunc_action( pc.match_substring, pc.match_substring, @@ -57,4 +65,4 @@ def _impl(array, pattern, ignore_case, highlevel, behavior): pattern=pattern, ) out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_match_substring_regex.py b/src/awkward/operations/str/akstr_match_substring_regex.py index c46b25e574..908e33b8bb 100644 --- a/src/awkward/operations/str/akstr_match_substring_regex.py +++ b/src/awkward/operations/str/akstr_match_substring_regex.py @@ -3,16 +3,15 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("match_substring_regex",) @high_level_function(module="ak.str") def match_substring_regex( - array, pattern, *, ignore_case=False, highlevel=True, behavior=None + array, pattern, *, ignore_case=False, highlevel=True, behavior=None, attrs=None ): """ Args: @@ -24,6 +23,8 @@ def match_substring_regex( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. For each string in the array, determine whether any substring matches the given regular expression `pattern` @@ -40,16 +41,22 @@ def match_substring_regex( yield (array,) # Implementation - return _impl(array, pattern, ignore_case, highlevel, behavior) + return _impl(array, pattern, ignore_case, highlevel, behavior, attrs) -def _impl(array, pattern, ignore_case, highlevel, behavior): +def _impl(array, pattern, ignore_case, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.match_substring_regex") + pc = import_pyarrow_compute("x") - layout = ak.to_layout(array, allow_record=False) - behavior = behavior_of(array, behavior=behavior) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) apply = ak.operations.str._get_ufunc_action( pc.match_substring_regex, pc.match_substring_regex, @@ -58,4 +65,4 @@ def _impl(array, pattern, ignore_case, highlevel, behavior): pattern=pattern, ) out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_repeat.py b/src/awkward/operations/str/akstr_repeat.py index eae9774245..de929c57b7 100644 --- a/src/awkward/operations/str/akstr_repeat.py +++ b/src/awkward/operations/str/akstr_repeat.py @@ -5,23 +5,19 @@ import numbers import awkward as ak -from awkward._backends.dispatch import backend_of -from awkward._backends.numpy import NumpyBackend from awkward._backends.typetracer import TypeTracerBackend -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext, ensure_same_backend from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("repeat",) -cpu = NumpyBackend.instance() typetracer = TypeTracerBackend.instance() np = NumpyMetadata.instance() @high_level_function(module="ak.str") -def repeat(array, num_repeats, *, highlevel=True, behavior=None): +def repeat(array, num_repeats, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -31,6 +27,8 @@ def repeat(array, num_repeats, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued or bytestring-valued data with the same value repeated `num_repeats` times, which can be a scalar integer or a @@ -46,25 +44,26 @@ def repeat(array, num_repeats, *, highlevel=True, behavior=None): on strings and bytestrings, respectively. """ # Dispatch - yield (array, num_repeats) + yield array, num_repeats # Implementation - return _impl(array, num_repeats, highlevel, behavior) + return _impl(array, num_repeats, highlevel, behavior, attrs) -def _impl(array, num_repeats, highlevel, behavior): +def _impl(array, num_repeats, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute from awkward.operations.str import _apply_through_arrow pc = import_pyarrow_compute("ak.str.repeat") - behavior = behavior_of(array, num_repeats, behavior=behavior) - backend = backend_of(array, num_repeats, coerce_to_common=True, default=cpu) - layout = ak.operations.to_layout(array).to_backend(backend) - - num_repeats_layout = ak.operations.to_layout(num_repeats, allow_unknown=True) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout, num_repeats_layout = ensure_same_backend( + ctx.unwrap(array, allow_record=False), + ctx.unwrap( + num_repeats, allow_record=False, primitive_policy="pass-through" + ), + ) if isinstance(num_repeats_layout, ak.contents.Content): - num_repeats_layout = num_repeats_layout.to_backend(backend) def action(inputs, **kwargs): if inputs[0].is_list and inputs[0].parameter("__array__") in ( @@ -99,4 +98,4 @@ def action(layout, **kwargs): out = ak._do.recursively_apply(layout, action) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_replace_slice.py b/src/awkward/operations/str/akstr_replace_slice.py index 2152ebfa79..c8e5cb98b4 100644 --- a/src/awkward/operations/str/akstr_replace_slice.py +++ b/src/awkward/operations/str/akstr_replace_slice.py @@ -3,15 +3,16 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("replace_slice",) @high_level_function(module="ak.str") -def replace_slice(array, start, stop, replacement, *, highlevel=True, behavior=None): +def replace_slice( + array, start, stop, replacement, *, highlevel=True, behavior=None, attrs=None +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -22,6 +23,8 @@ def replace_slice(array, start, stop, replacement, *, highlevel=True, behavior=N a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces slices of any string or bytestring-valued data with `replacement` between `start` and `stop` indexes; `start` is inclusive and `stop` is @@ -46,20 +49,22 @@ def replace_slice(array, start, stop, replacement, *, highlevel=True, behavior=N yield (array,) # Implementation - return _impl(array, start, stop, replacement, highlevel, behavior) + return _impl(array, start, stop, replacement, highlevel, behavior, attrs) -def _impl(array, start, stop, replacement, highlevel, behavior): +def _impl(array, start, stop, replacement, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.replace_slice") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("e") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_replace_slice, pc.binary_replace_slice, start, stop, replacement ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_replace_substring.py b/src/awkward/operations/str/akstr_replace_substring.py index 69abb7b049..41f2869389 100644 --- a/src/awkward/operations/str/akstr_replace_substring.py +++ b/src/awkward/operations/str/akstr_replace_substring.py @@ -3,16 +3,22 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("replace_substring",) @high_level_function(module="ak.str") def replace_substring( - array, pattern, replacement, *, max_replacements=None, highlevel=True, behavior=None + array, + pattern, + replacement, + *, + max_replacements=None, + highlevel=True, + behavior=None, + attrs=None, ): """ Args: @@ -26,6 +32,8 @@ def replace_substring( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces non-overlapping subsequences of any string or bytestring-valued data that match a literal `pattern` with `replacement`. @@ -48,17 +56,21 @@ def replace_substring( yield (array,) # Implementation - return _impl(array, pattern, replacement, max_replacements, highlevel, behavior) + return _impl( + array, pattern, replacement, max_replacements, highlevel, behavior, attrs + ) -def _impl(array, pattern, replacement, max_replacements, highlevel, behavior): +def _impl(array, pattern, replacement, max_replacements, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.replace_substring") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("g") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.replace_substring, pc.replace_substring, @@ -68,4 +80,4 @@ def _impl(array, pattern, replacement, max_replacements, highlevel, behavior): ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_replace_substring_regex.py b/src/awkward/operations/str/akstr_replace_substring_regex.py index 60d391046c..cca19f1a28 100644 --- a/src/awkward/operations/str/akstr_replace_substring_regex.py +++ b/src/awkward/operations/str/akstr_replace_substring_regex.py @@ -3,16 +3,22 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("replace_substring_regex",) @high_level_function(module="ak.str") def replace_substring_regex( - array, pattern, replacement, *, max_replacements=None, highlevel=True, behavior=None + array, + pattern, + replacement, + *, + max_replacements=None, + highlevel=True, + behavior=None, + attrs=None, ): """ Args: @@ -26,6 +32,8 @@ def replace_substring_regex( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces non-overlapping subsequences of any string or bytestring-valued data that match a regular expression `pattern` with `replacement`. @@ -48,17 +56,21 @@ def replace_substring_regex( yield (array,) # Implementation - return _impl(array, pattern, replacement, max_replacements, highlevel, behavior) + return _impl( + array, pattern, replacement, max_replacements, highlevel, behavior, attrs + ) -def _impl(array, pattern, replacement, max_replacements, highlevel, behavior): +def _impl(array, pattern, replacement, max_replacements, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.replace_substring_regex") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("x") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.replace_substring_regex, pc.replace_substring_regex, @@ -68,4 +80,4 @@ def _impl(array, pattern, replacement, max_replacements, highlevel, behavior): ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_reverse.py b/src/awkward/operations/str/akstr_reverse.py index ebb8d8c515..e29f59451a 100644 --- a/src/awkward/operations/str/akstr_reverse.py +++ b/src/awkward/operations/str/akstr_reverse.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("reverse",) @high_level_function(module="ak.str") -def reverse(array, *, highlevel=True, behavior=None): +def reverse(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def reverse(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Reverses the order of Unicode characters in any string-valued data. (This function operates on Unicode codepoints, not grapheme clusters. @@ -40,20 +41,22 @@ def reverse(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.reverse") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("e") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_reverse, pc.binary_reverse, bytestring_to_string=False ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_rpad.py b/src/awkward/operations/str/akstr_rpad.py index 6dd20b54f8..6b6494c315 100644 --- a/src/awkward/operations/str/akstr_rpad.py +++ b/src/awkward/operations/str/akstr_rpad.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("rpad",) @high_level_function(module="ak.str") -def rpad(array, width, padding=" ", *, highlevel=True, behavior=None): +def rpad(array, width, padding=" ", *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -22,6 +21,8 @@ def rpad(array, width, padding=" ", *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string or bytestring-valued data with left-aligned strings/bytestrings of a given `width`, padding the right side with the @@ -46,20 +47,22 @@ def rpad(array, width, padding=" ", *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, width, padding, highlevel, behavior) + return _impl(array, width, padding, highlevel, behavior, attrs) -def _impl(array, width, padding, highlevel, behavior): +def _impl(array, width, padding, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.rpad") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("d") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_rpad, pc.ascii_rpad, width, padding, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_rtrim.py b/src/awkward/operations/str/akstr_rtrim.py index 53e01a5565..4d39f914b3 100644 --- a/src/awkward/operations/str/akstr_rtrim.py +++ b/src/awkward/operations/str/akstr_rtrim.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("rtrim",) @high_level_function(module="ak.str") -def rtrim(array, characters, *, highlevel=True, behavior=None): +def rtrim(array, characters, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -20,6 +19,8 @@ def rtrim(array, characters, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Removes any trailing characters of `characters` from any string or bytestring-valued data. @@ -45,20 +46,22 @@ def rtrim(array, characters, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, characters, highlevel, behavior) + return _impl(array, characters, highlevel, behavior, attrs) -def _impl(array, characters, highlevel, behavior): +def _impl(array, characters, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.rtrim") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("m") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_rtrim, pc.ascii_rtrim, characters, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_rtrim_whitespace.py b/src/awkward/operations/str/akstr_rtrim_whitespace.py index e9c3e9a2e7..103207d31c 100644 --- a/src/awkward/operations/str/akstr_rtrim_whitespace.py +++ b/src/awkward/operations/str/akstr_rtrim_whitespace.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("rtrim_whitespace",) @high_level_function(module="ak.str") -def rtrim_whitespace(array, *, highlevel=True, behavior=None): +def rtrim_whitespace(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def rtrim_whitespace(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Removes any trailing whitespace from any string or bytestring-valued data. @@ -37,17 +38,19 @@ def rtrim_whitespace(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.rtrim_whitespace") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("e") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_rtrim_whitespace, pc.ascii_rtrim_whitespace, @@ -55,4 +58,4 @@ def _impl(array, highlevel, behavior): ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_slice.py b/src/awkward/operations/str/akstr_slice.py index f26aa00475..27c5b5063f 100644 --- a/src/awkward/operations/str/akstr_slice.py +++ b/src/awkward/operations/str/akstr_slice.py @@ -3,15 +3,16 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("slice",) @high_level_function(module="ak.str") -def slice(array, start, stop=None, step=1, *, highlevel=True, behavior=None): +def slice( + array, start, stop=None, step=1, *, highlevel=True, behavior=None, attrs=None +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -23,6 +24,8 @@ def slice(array, start, stop=None, step=1, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string or bytestring-valued data with a slice between `start` and `stop` indexes; `start` is inclusive and `stop` is exclusive and both @@ -45,17 +48,15 @@ def slice(array, start, stop=None, step=1, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, start, stop, step, highlevel, behavior) + return _impl(array, start, stop, step, highlevel, behavior, attrs) -def _impl(array, start, stop, step, highlevel, behavior): +def _impl(array, start, stop, step, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute from awkward.operations.str import _apply_through_arrow pc = import_pyarrow_compute("ak.str.slice") - behavior = behavior_of(array, behavior=behavior) - def action(layout, **absorb): if layout.is_list and layout.parameter("__array__") == "string": return _apply_through_arrow( @@ -65,6 +66,14 @@ def action(layout, **absorb): elif layout.is_list and layout.parameter("__array__") == "bytestring": return layout[:, start:stop:step] - out = ak._do.recursively_apply(ak.operations.to_layout(array), action) - - return wrap_layout(out, behavior, highlevel) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) + out = ak._do.recursively_apply(layout, action) + + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_split_pattern.py b/src/awkward/operations/str/akstr_split_pattern.py index 2af8e33ef8..07a086172c 100644 --- a/src/awkward/operations/str/akstr_split_pattern.py +++ b/src/awkward/operations/str/akstr_split_pattern.py @@ -3,16 +3,22 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("split_pattern",) @high_level_function(module="ak.str") def split_pattern( - array, pattern, *, max_splits=None, reverse=False, highlevel=True, behavior=None + array, + pattern, + *, + max_splits=None, + reverse=False, + highlevel=True, + behavior=None, + attrs=None, ): """ Args: @@ -27,6 +33,8 @@ def split_pattern( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Splits any string or bytestring-valued data into a list of substrings according to the given separator. @@ -43,15 +51,22 @@ def split_pattern( yield (array,) # Implementation - return _impl(array, pattern, max_splits, reverse, highlevel, behavior) + return _impl(array, pattern, max_splits, reverse, highlevel, behavior, attrs) -def _impl(array, pattern, max_splits, reverse, highlevel, behavior): +def _impl(array, pattern, max_splits, reverse, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute pc = import_pyarrow_compute("ak.str.split_pattern") - behavior = behavior_of(array, behavior=behavior) - layout = ak.to_layout(array) + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) action = ak.operations.str._get_split_action( pc.split_pattern, @@ -63,4 +78,4 @@ def _impl(array, pattern, max_splits, reverse, highlevel, behavior): ) out = ak._do.recursively_apply(layout, action) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_split_pattern_regex.py b/src/awkward/operations/str/akstr_split_pattern_regex.py index f29d05886b..161fe29125 100644 --- a/src/awkward/operations/str/akstr_split_pattern_regex.py +++ b/src/awkward/operations/str/akstr_split_pattern_regex.py @@ -3,16 +3,22 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("split_pattern_regex",) @high_level_function(module="ak.str") def split_pattern_regex( - array, pattern, *, max_splits=None, reverse=False, highlevel=True, behavior=None + array, + pattern, + *, + max_splits=None, + reverse=False, + highlevel=True, + behavior=None, + attrs=None, ): """ Args: @@ -28,6 +34,8 @@ def split_pattern_regex( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Splits any string or bytestring-valued data into a list of substrings according to the given regular expression. @@ -44,15 +52,22 @@ def split_pattern_regex( yield (array,) # Implementation - return _impl(array, pattern, max_splits, reverse, highlevel, behavior) + return _impl(array, pattern, max_splits, reverse, highlevel, behavior, attrs) -def _impl(array, pattern, max_splits, reverse, highlevel, behavior): +def _impl(array, pattern, max_splits, reverse, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute pc = import_pyarrow_compute("ak.str.split_pattern_regex") - behavior = behavior_of(array, behavior=behavior) - layout = ak.to_layout(array) + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) if reverse: raise ValueError("Cannot split in reverse with regex") @@ -67,4 +82,4 @@ def _impl(array, pattern, max_splits, reverse, highlevel, behavior): ) out = ak._do.recursively_apply(layout, action) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_split_whitespace.py b/src/awkward/operations/str/akstr_split_whitespace.py index d4203c62b6..b1a35c886f 100644 --- a/src/awkward/operations/str/akstr_split_whitespace.py +++ b/src/awkward/operations/str/akstr_split_whitespace.py @@ -3,16 +3,15 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("split_whitespace",) @high_level_function(module="ak.str") def split_whitespace( - array, *, max_splits=None, reverse=False, highlevel=True, behavior=None + array, *, max_splits=None, reverse=False, highlevel=True, behavior=None, attrs=None ): """ Args: @@ -26,6 +25,8 @@ def split_whitespace( a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Splits any string or bytestring-valued data into a list of substrings according to any non-zero length sequence of @@ -52,15 +53,22 @@ def split_whitespace( yield (array,) # Implementation - return _impl(array, max_splits, reverse, highlevel, behavior) + return _impl(array, max_splits, reverse, highlevel, behavior, attrs) -def _impl(array, max_splits, reverse, highlevel, behavior): +def _impl(array, max_splits, reverse, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute pc = import_pyarrow_compute("ak.str.split_whitespace") - behavior = behavior_of(array, behavior=behavior) - layout = ak.to_layout(array) + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) action = ak.operations.str._get_split_action( pc.utf8_split_whitespace, @@ -71,4 +79,4 @@ def _impl(array, max_splits, reverse, highlevel, behavior): ) out = ak._do.recursively_apply(layout, action) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_starts_with.py b/src/awkward/operations/str/akstr_starts_with.py index c71efd6445..18d2c59046 100644 --- a/src/awkward/operations/str/akstr_starts_with.py +++ b/src/awkward/operations/str/akstr_starts_with.py @@ -3,15 +3,16 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("starts_with",) @high_level_function(module="ak.str") -def starts_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=None): +def starts_with( + array, pattern, *, ignore_case=False, highlevel=True, behavior=None, attrs=None +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -23,6 +24,8 @@ def starts_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=N a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns True for every string in `array` if it starts with the given literal suffix `pattern`. Depending upon the value of `ignore_case`, the matching @@ -38,15 +41,22 @@ def starts_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=N yield (array,) # Implementation - return _impl(array, pattern, ignore_case, highlevel, behavior) + return _impl(array, pattern, ignore_case, highlevel, behavior, attrs) -def _impl(array, pattern, ignore_case, highlevel, behavior): +def _impl(array, pattern, ignore_case, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute pc = import_pyarrow_compute("ak.str.starts_with") - layout = ak.to_layout(array, allow_record=False) - behavior = behavior_of(array, behavior=behavior) + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap( + array, + allow_record=False, + allow_unknown=False, + primitive_policy="error", + string_policy="as-characters", + ) apply = ak.operations.str._get_ufunc_action( pc.starts_with, pc.starts_with, @@ -55,4 +65,4 @@ def _impl(array, pattern, ignore_case, highlevel, behavior): pattern=pattern, ) out = ak._do.recursively_apply(layout, apply) - return wrap_layout(out, highlevel=highlevel, behavior=behavior) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_swapcase.py b/src/awkward/operations/str/akstr_swapcase.py index 4cb8ae8088..a3af83df86 100644 --- a/src/awkward/operations/str/akstr_swapcase.py +++ b/src/awkward/operations/str/akstr_swapcase.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("swapcase",) @high_level_function(module="ak.str") -def swapcase(array, *, highlevel=True, behavior=None): +def swapcase(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def swapcase(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with uppercase characters transformed to lowercase and vice-versa (correctly transforming Unicode characters). @@ -39,20 +40,22 @@ def swapcase(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.swapcase") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("e") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_swapcase, pc.ascii_swapcase, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_title.py b/src/awkward/operations/str/akstr_title.py index 0d2ead1d65..8f4f4e06e9 100644 --- a/src/awkward/operations/str/akstr_title.py +++ b/src/awkward/operations/str/akstr_title.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("title",) @high_level_function(module="ak.str") -def title(array, *, highlevel=True, behavior=None): +def title(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def title(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with a titlecase version (correctly transforming Unicode characters). Each word in the output will start with @@ -41,20 +42,22 @@ def title(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.title") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("e") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_title, pc.ascii_title, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_to_categorical.py b/src/awkward/operations/str/akstr_to_categorical.py index fd3476513c..c835394124 100644 --- a/src/awkward/operations/str/akstr_to_categorical.py +++ b/src/awkward/operations/str/akstr_to_categorical.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("to_categorical",) @high_level_function(module="ak.str") -def to_categorical(array, *, highlevel=True, behavior=None): +def to_categorical(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def to_categorical(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Returns a dictionary-encoded version of the given array of strings. Creates a categorical dataset, which has the following properties: @@ -49,15 +50,14 @@ def to_categorical(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute from awkward.operations.str import _apply_through_arrow pc = import_pyarrow_compute("ak.str.to_categorical") - behavior = behavior_of(array, behavior=behavior) def action(layout, **kwargs): if layout.is_list and layout.parameter("__array__") in {"string", "bytestring"}: @@ -65,6 +65,9 @@ def action(layout, **kwargs): pc.dictionary_encode, layout, expect_option_type=False ) - out = ak._do.recursively_apply(ak.operations.to_layout(array), action) + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) + + out = ak._do.recursively_apply(layout, action) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_trim.py b/src/awkward/operations/str/akstr_trim.py index 38f36f580a..7e29ec2f4e 100644 --- a/src/awkward/operations/str/akstr_trim.py +++ b/src/awkward/operations/str/akstr_trim.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("trim",) @high_level_function(module="ak.str") -def trim(array, characters, *, highlevel=True, behavior=None): +def trim(array, characters, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -21,6 +20,8 @@ def trim(array, characters, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Removes any leading or trailing characters of `characters` from any string or bytestring-valued data. @@ -46,20 +47,22 @@ def trim(array, characters, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, characters, highlevel, behavior) + return _impl(array, characters, highlevel, behavior, attrs) -def _impl(array, characters, highlevel, behavior): +def _impl(array, characters, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.trim") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("m") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_trim, pc.ascii_trim, characters, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_trim_whitespace.py b/src/awkward/operations/str/akstr_trim_whitespace.py index 5cbba9c04f..99eb529e1a 100644 --- a/src/awkward/operations/str/akstr_trim_whitespace.py +++ b/src/awkward/operations/str/akstr_trim_whitespace.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("trim_whitespace",) @high_level_function(module="ak.str") -def trim_whitespace(array, *, highlevel=True, behavior=None): +def trim_whitespace(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def trim_whitespace(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Removes any leading or trailing whitespace from any string or bytestring-valued data. @@ -38,20 +39,22 @@ def trim_whitespace(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.trim_whitespace") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("e") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_trim_whitespace, pc.ascii_trim_whitespace, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/operations/str/akstr_upper.py b/src/awkward/operations/str/akstr_upper.py index e3a04c17ec..68e1acccb2 100644 --- a/src/awkward/operations/str/akstr_upper.py +++ b/src/awkward/operations/str/akstr_upper.py @@ -3,15 +3,14 @@ from __future__ import annotations import awkward as ak -from awkward._behavior import behavior_of from awkward._dispatch import high_level_function -from awkward._layout import wrap_layout +from awkward._layout import HighLevelContext __all__ = ("upper",) @high_level_function(module="ak.str") -def upper(array, *, highlevel=True, behavior=None): +def upper(array, *, highlevel=True, behavior=None, attrs=None): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -19,6 +18,8 @@ def upper(array, *, highlevel=True, behavior=None): a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. Replaces any string-valued data with an uppercase version (correctly transforming Unicode characters). @@ -39,20 +40,22 @@ def upper(array, *, highlevel=True, behavior=None): yield (array,) # Implementation - return _impl(array, highlevel, behavior) + return _impl(array, highlevel, behavior, attrs) -def _impl(array, highlevel, behavior): +def _impl(array, highlevel, behavior, attrs): from awkward._connect.pyarrow import import_pyarrow_compute - pc = import_pyarrow_compute("ak.str.upper") - behavior = behavior_of(array, behavior=behavior) + pc = import_pyarrow_compute("r") + + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + layout = ctx.unwrap(array) out = ak._do.recursively_apply( - ak.operations.to_layout(array), + layout, ak.operations.str._get_ufunc_action( pc.utf8_upper, pc.ascii_upper, bytestring_to_string=True ), ) - return wrap_layout(out, behavior, highlevel) + return ctx.wrap(out, highlevel=highlevel) diff --git a/src/awkward/typetracer.py b/src/awkward/typetracer.py index 100b89797b..69394857e8 100644 --- a/src/awkward/typetracer.py +++ b/src/awkward/typetracer.py @@ -43,7 +43,7 @@ def _length_0_1_if_typetracer(array, function, highlevel: bool, behavior) -> T: typetracer_backend = TypeTracerBackend.instance() - layout = to_layout(array, allow_unknown=False, primitive_policy="error") + layout = to_layout(array, primitive_policy="error") behavior = behavior_of(array, behavior=behavior) if layout.backend is typetracer_backend: @@ -97,7 +97,7 @@ def touch_data(array, *, highlevel: bool = True, behavior=None) -> T: Recursively touches the data and returns a shall copy of the given array. """ behavior = behavior_of(array, behavior=behavior) - layout = to_layout(array, allow_unknown=False, primitive_policy="error") + layout = to_layout(array, primitive_policy="error") _touch_data(layout) return wrap_layout(layout, behavior=behavior, highlevel=highlevel) diff --git a/tests/test_1240_v2_implementation_of_numba_1.py b/tests/test_1240_v2_implementation_of_numba_1.py index 1fba805b8c..84aff82361 100644 --- a/tests/test_1240_v2_implementation_of_numba_1.py +++ b/tests/test_1240_v2_implementation_of_numba_1.py @@ -17,11 +17,13 @@ ak_numba.register_and_check() +from awkward._connect.numba.arrayview import NumbaLookup # noqa: E402 + def roundtrip(layout): assert isinstance(layout, ak.contents.Content) - lookup = ak._lookup.Lookup(layout) + lookup = NumbaLookup(layout, None) assert isinstance(lookup, ak._lookup.Lookup) numbatype = ak_numba_arrayview.to_numbatype(layout.form) diff --git a/tests/test_1504_typetracer_like.py b/tests/test_1504_typetracer_like.py index edd9f82184..3b33831333 100644 --- a/tests/test_1504_typetracer_like.py +++ b/tests/test_1504_typetracer_like.py @@ -36,7 +36,7 @@ def test_zeros_like(dtype, like_dtype): @pytest.mark.parametrize("dtype", [np.float64, np.int64, np.uint8, None]) @pytest.mark.parametrize("like_dtype", [np.float64, np.int64, np.uint8, None]) -@pytest.mark.parametrize("value", [1.0, -20, np.iinfo(np.uint64).max]) +@pytest.mark.parametrize("value", [1.0, -20, np.iinfo(np.int64).max]) def test_full_like(dtype, like_dtype, value): array = ak.contents.numpyarray.NumpyArray( np.array([99, 88, 77, 66, 66], dtype=dtype) diff --git a/tests/test_1565_axis_wrap_if_negative_record.py b/tests/test_1565_axis_wrap_if_negative_record.py index f2cbf6d873..47930e3510 100644 --- a/tests/test_1565_axis_wrap_if_negative_record.py +++ b/tests/test_1565_axis_wrap_if_negative_record.py @@ -2,7 +2,6 @@ from __future__ import annotations -import numpy as np import pytest import awkward as ak @@ -26,8 +25,8 @@ def test_axis_wrap_if_negative_record_v2(): r = ak.Record(dict_cell_chain_field) - with pytest.raises(np.AxisError): - r = ak.operations.to_regular(r, 0) + with pytest.raises(TypeError, match="ak.Record objects are not allowed"): + ak.operations.to_regular(r, 0) list_cell_chain_field = [ [["TRA", "TRAV1", 15], ["TRB", "TRBV1", 12]], diff --git a/tests/test_2085_empty_if_typetracer.py b/tests/test_2085_empty_if_typetracer.py index 27a957952c..f31ba16f8d 100644 --- a/tests/test_2085_empty_if_typetracer.py +++ b/tests/test_2085_empty_if_typetracer.py @@ -56,6 +56,6 @@ def test_multiplier(regulararray): b = ak.from_numpy(a, regulararray=regulararray) assert str(b.type) == "2 * 3 * 5 * int64" - c = b.layout.form.length_one_array() + c = ak.Array(b.layout.form.length_one_array(highlevel=False)) assert str(c.type) == "1 * 3 * 5 * int64" assert c.tolist() == [[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]] diff --git a/tests/test_2646_from_parquet_highlevel.py b/tests/test_2646_from_parquet_highlevel.py index cbf377082a..960314643a 100644 --- a/tests/test_2646_from_parquet_highlevel.py +++ b/tests/test_2646_from_parquet_highlevel.py @@ -7,6 +7,7 @@ import awkward as ak pytest.importorskip("pyarrow") +pytest.importorskip("fsspec") def test(tmp_path): diff --git a/tests/test_2754_highlevel_behavior_missing.py b/tests/test_2754_highlevel_behavior_missing.py index 85c9c29b76..e6542955f0 100644 --- a/tests/test_2754_highlevel_behavior_missing.py +++ b/tests/test_2754_highlevel_behavior_missing.py @@ -2,6 +2,8 @@ from __future__ import annotations +from itertools import product + import pytest import awkward as ak @@ -12,37 +14,45 @@ @pytest.mark.parametrize( - "func", + ("func", "axis"), [ - ak.softmax, - ak.any, - ak.min, - ak.argmin, - ak.sum, - ak.ptp, - ak.std, - ak.count_nonzero, - lambda *args, **kwargs: ak.moment(*args, **kwargs, n=3), - ak.argmax, - ak.all, - ak.mean, - ak.max, - ak.prod, - ak.count, - ak.var, + pytest.param(ak.softmax, 0, marks=pytest.mark.xfail()), + pytest.param(ak.std, 0, marks=pytest.mark.xfail()), + pytest.param(ak.var, 0, marks=pytest.mark.xfail()), + (ak.softmax, 1), + (ak.std, 1), + (ak.var, 1), + *product( + ( + ak.any, + ak.min, + ak.argmin, + ak.sum, + ak.ptp, + ak.count_nonzero, + lambda *args, **kwargs: ak.moment(*args, **kwargs, n=3), + ak.argmax, + ak.all, + ak.mean, + ak.max, + ak.prod, + ak.count, + ), + ([0, 1]), + ), ], ) -def test_impl(func): +def test_reducers(axis, func): assert isinstance( - func([[1, 2, 3, 4], [5], [10]], axis=-1, highlevel=True), ak.Array + func([[1, 2, 3, 4], [5], [10]], axis=axis, highlevel=True), ak.Array ) assert isinstance( - func([[1, 2, 3, 4], [5], [10]], axis=-1, highlevel=False), ak.contents.Content + func([[1, 2, 3, 4], [5], [10]], axis=axis, highlevel=False), ak.contents.Content ) assert ( func( ak.Array([[1, 2, 3, 4], [5], [10]], behavior=behavior_1), - axis=-1, + axis=axis, highlevel=True, behavior=behavior_2, ).behavior @@ -51,7 +61,7 @@ def test_impl(func): assert ( func( ak.Array([[1, 2, 3, 4], [5], [10]], behavior=behavior_1), - axis=-1, + axis=axis, highlevel=True, ).behavior == behavior_1 diff --git a/tests/test_2757_attrs_metadata.py b/tests/test_2757_attrs_metadata.py new file mode 100644 index 0000000000..ab6fcf3776 --- /dev/null +++ b/tests/test_2757_attrs_metadata.py @@ -0,0 +1,332 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE +from __future__ import annotations + +import packaging.version +import pytest + +import awkward as ak +from awkward._pickle import use_builtin_reducer + + +@pytest.fixture +def array_pickler(): + import pickle + + with use_builtin_reducer(): + yield pickle + + +SOME_ATTRS = {"foo": "!SOME"} +OTHER_ATTRS = {"bar": "!OTHER", "foo": "!OTHER"} + + +def test_set_attrs(): + array = ak.Array([1, 2, 3]) + assert array.attrs == {} + + array.attrs = OTHER_ATTRS + assert array.attrs is OTHER_ATTRS + + with pytest.raises(TypeError): + array.attrs = "Hello world!" + + +def test_serialise_with_transient_attrs(array_pickler): + attrs = {**SOME_ATTRS, "@transient_key": lambda: None} + array = ak.Array([1, 2, 3], attrs=attrs) + result = array_pickler.loads(array_pickler.dumps(array)) + assert result.attrs == SOME_ATTRS + + +def test_serialise_with_nonserialisable_attrs(array_pickler): + attrs = {**SOME_ATTRS, "non_transient_key": lambda: None} + array = ak.Array([1, 2, 3], attrs=attrs) + with pytest.raises(AttributeError, match=r"Can't pickle local object"): + array_pickler.loads(array_pickler.dumps(array)) + + +def test_transient_metadata_persists(): + attrs = {**SOME_ATTRS, "@transient_key": lambda: None} + array = ak.Array([[1, 2, 3]], attrs=attrs) + num = ak.num(array) + assert num.attrs is attrs + + +@pytest.mark.parametrize( + "func", + [ + ak.any, + ak.min, + ak.argmin, + ak.sum, + ak.ptp, + ak.count_nonzero, + lambda *args, **kwargs: ak.moment(*args, **kwargs, n=3), + ak.argmax, + ak.all, + ak.mean, + ak.max, + ak.prod, + ak.count, + ], +) +def test_single_arg_ops(func): + # Default no attrs + assert func([[1, 2, 3, 4], [5], [10]], axis=-1, highlevel=True).attrs == {} + # Carry from argument + assert ( + func([[1, 2, 3, 4], [5], [10]], axis=-1, highlevel=True, attrs=SOME_ATTRS).attrs + is SOME_ATTRS + ) + # Carry from outer array + array = ak.Array([[1, 2, 3, 4], [5], [10]], attrs=SOME_ATTRS) + assert func(array, axis=-1, highlevel=True).attrs is SOME_ATTRS + # Carry from argument exclusively + assert func(array, axis=-1, highlevel=True, attrs=OTHER_ATTRS).attrs is OTHER_ATTRS + + +@pytest.mark.parametrize( + "func", + [ + # Predicates + ak.str.is_alnum, + ak.str.is_alpha, + ak.str.is_ascii, + ak.str.is_decimal, + ak.str.is_digit, + ak.str.is_lower, + ak.str.is_numeric, + ak.str.is_printable, + ak.str.is_space, + ak.str.is_title, + ak.str.is_upper, + # Transforms + ak.str.capitalize, + ak.str.lower, + ak.str.upper, + ak.str.reverse, + ak.str.swapcase, + ak.str.title, + # Padding and trimming + ak.str.ltrim_whitespace, + ak.str.rtrim_whitespace, + ak.str.trim_whitespace, + ak.str.split_whitespace, + ], +) +def test_string_operations_unary(func): + pytest.importorskip("pyarrow") + # Default no attrs + assert ( + func( + [["hello", "world!"], [], ["it's a beautiful day!"]], + highlevel=True, + ).attrs + == {} + ) + # Carry from argument + assert ( + func( + [["hello", "world!"], [], ["it's a beautiful day!"]], + highlevel=True, + attrs=SOME_ATTRS, + ).attrs + is SOME_ATTRS + ) + # Carry from outer array + array = ak.Array( + [["hello", "world!"], [], ["it's a beautiful day!"]], attrs=SOME_ATTRS + ) + assert func(array, highlevel=True).attrs is SOME_ATTRS + # Carry from argument exclusively + assert func(array, highlevel=True, attrs=OTHER_ATTRS).attrs is OTHER_ATTRS + + +@pytest.mark.parametrize( + ("func", "arg"), + [ + # Padding and trimming + (ak.str.center, 10), + (ak.str.lpad, 10), + (ak.str.rpad, 10), + (ak.str.ltrim, "hell"), + (ak.str.rtrim, "hell"), + (ak.str.trim, "hell"), + # Containment + (ak.str.count_substring, "hello"), + (ak.str.count_substring_regex, "hello"), + (ak.str.starts_with, "hello"), + (ak.str.ends_with, "hello"), + (ak.str.find_substring, "hello"), + (ak.str.find_substring_regex, "hello"), + (ak.str.match_like, "hello"), + (ak.str.match_substring, "hello"), + (ak.str.match_substring_regex, "hello"), + # Slicing + (ak.str.extract_regex, "hello"), + ], +) +def test_string_operations_unary_with_arg(func, arg): + pytest.importorskip("pyarrow") + # Default no attrs + assert ( + func( + [["hello", "world!"], [], ["it's a beautiful day!"]], + arg, + highlevel=True, + ).attrs + == {} + ) + # Carry from argument + assert ( + func( + [["hello", "world!"], [], ["it's a beautiful day!"]], + arg, + highlevel=True, + attrs=SOME_ATTRS, + ).attrs + is SOME_ATTRS + ) + # Carry from outer array + array = ak.Array( + [["hello", "world!"], [], ["it's a beautiful day!"]], attrs=SOME_ATTRS + ) + assert func(array, arg, highlevel=True).attrs is SOME_ATTRS + # Carry from argument exclusively + assert func(array, arg, highlevel=True, attrs=OTHER_ATTRS).attrs is OTHER_ATTRS + + +def test_string_operations_unary_with_arg_slice(): + pyarrow = pytest.importorskip("pyarrow") + if packaging.version.Version(pyarrow.__version__) < packaging.version.Version("13"): + pytest.xfail("pyarrow<13 fails to perform this slice") + # Default no attrs + assert ( + ak.str.slice( + [["hello", "world!"], [], ["it's a beautiful day!"]], + 1, + highlevel=True, + ).attrs + == {} + ) + # Carry from argument + assert ( + ak.str.slice( + [["hello", "world!"], [], ["it's a beautiful day!"]], + 1, + highlevel=True, + attrs=SOME_ATTRS, + ).attrs + is SOME_ATTRS + ) + # Carry from outer array + array = ak.Array( + [["hello", "world!"], [], ["it's a beautiful day!"]], attrs=SOME_ATTRS + ) + assert ak.str.slice(array, 1, highlevel=True).attrs is SOME_ATTRS + # Carry from argument exclusively + assert ( + ak.str.slice(array, 1, highlevel=True, attrs=OTHER_ATTRS).attrs is OTHER_ATTRS + ) + + +@pytest.mark.parametrize( + "func", + [ + # Containment + ak.str.index_in, + ak.str.is_in, + # Splitting and joining + ak.str.join, + # This function is 1+ args, but we will test the binary variant + ak.str.join_element_wise, + ], +) +def test_string_operations_binary(func): + pytest.importorskip("pyarrow") + assert ( + func( + [["hello", "world!"], [], ["it's a beautiful day!"]], + ["hello"], + highlevel=True, + ).attrs + == {} + ) + assert ( + func( + [["hello", "world!"], [], ["it's a beautiful day!"]], + ["hello"], + highlevel=True, + attrs=SOME_ATTRS, + ).attrs + is SOME_ATTRS + ) + # Carry from first array + array = ak.Array( + [["hello", "world!"], [], ["it's a beautiful day!"]], attrs=SOME_ATTRS + ) + assert func(array, ["hello"], highlevel=True).attrs is SOME_ATTRS + + # Carry from second array + value_array = ak.Array(["hello"], attrs=OTHER_ATTRS) + assert ( + func( + [["hello", "world!"], [], ["it's a beautiful day!"]], + value_array, + highlevel=True, + ).attrs + is OTHER_ATTRS + ) + # Carry from both arrays + assert func( + array, + value_array, + highlevel=True, + ).attrs == {**OTHER_ATTRS, **SOME_ATTRS} + + # Carry from argument + assert ( + func(array, value_array, highlevel=True, attrs=OTHER_ATTRS).attrs is OTHER_ATTRS + ) + + +def test_broadcasting_arrays(): + left = ak.Array([1, 2, 3], attrs=SOME_ATTRS) + right = ak.Array([1], attrs=OTHER_ATTRS) + + left_result, right_result = ak.broadcast_arrays(left, right) + assert left_result.attrs is SOME_ATTRS + assert right_result.attrs is OTHER_ATTRS + + +def test_broadcasting_fields(): + left = ak.Array([{"x": 1}, {"x": 2}], attrs=SOME_ATTRS) + right = ak.Array([{"y": 1}, {"y": 2}], attrs=OTHER_ATTRS) + + left_result, right_result = ak.broadcast_fields(left, right) + assert left_result.attrs is SOME_ATTRS + assert right_result.attrs is OTHER_ATTRS + + +def test_numba_arraybuilder(): + numba = pytest.importorskip("numba") + builder = ak.ArrayBuilder(attrs=SOME_ATTRS) + assert builder.attrs is SOME_ATTRS + + @numba.njit + def func(array): + return array + + assert func(builder).attrs is SOME_ATTRS + + +def test_numba_array(): + numba = pytest.importorskip("numba") + array = ak.Array([1, 2, 3], attrs=SOME_ATTRS) + assert array.attrs is SOME_ATTRS + + @numba.njit + def func(array): + return array + + assert func(array).attrs is SOME_ATTRS