diff --git a/src/awkward/contents/bitmaskedarray.py b/src/awkward/contents/bitmaskedarray.py index ae5e8e2fe5..2c26068d17 100644 --- a/src/awkward/contents/bitmaskedarray.py +++ b/src/awkward/contents/bitmaskedarray.py @@ -17,10 +17,22 @@ ) from awkward._regularize import is_integer, is_integer_like from awkward._slicing import NO_HEAD -from awkward._typing import TYPE_CHECKING, Callable, Final, Self, SupportsIndex, final +from awkward._typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Self, + SupportsIndex, + final, +) from awkward._util import UNSET from awkward.contents.bytemaskedarray import ByteMaskedArray -from awkward.contents.content import Content +from awkward.contents.content import ( + Content, + RemoveStructureOptionsType, + ToArrowOptionsType, +) from awkward.forms.bitmaskedform import BitMaskedForm from awkward.forms.form import Form from awkward.index import Index @@ -675,7 +687,14 @@ def _nbytes_part(self): def _pad_none(self, target, axis, depth, clip): return self.to_ByteMaskedArray()._pad_none(target, axis, depth, clip) - def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): + def _to_arrow( + self, + pyarrow: Any, + mask_node: Content | None, + validbytes: Content | None, + length: int, + options: ToArrowOptionsType, + ): return self.to_ByteMaskedArray()._to_arrow( pyarrow, mask_node, validbytes, length, options ) @@ -683,7 +702,9 @@ def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): def _to_backend_array(self, allow_missing, backend): return self.to_ByteMaskedArray()._to_backend_array(allow_missing, backend) - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: branch, depth = self.branch_depth if branch or options["drop_nones"] or depth > 1: return self.project()._remove_structure(backend, options) diff --git a/src/awkward/contents/bytemaskedarray.py b/src/awkward/contents/bytemaskedarray.py index ab82e40764..28c14a3170 100644 --- a/src/awkward/contents/bytemaskedarray.py +++ b/src/awkward/contents/bytemaskedarray.py @@ -19,9 +19,21 @@ ) from awkward._regularize import is_integer_like from awkward._slicing import NO_HEAD -from awkward._typing import TYPE_CHECKING, Callable, Final, Self, SupportsIndex, final +from awkward._typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Self, + SupportsIndex, + final, +) from awkward._util import UNSET -from awkward.contents.content import Content +from awkward.contents.content import ( + Content, + RemoveStructureOptionsType, + ToArrowOptionsType, +) from awkward.errors import AxisError from awkward.forms.bytemaskedform import ByteMaskedForm from awkward.forms.form import Form @@ -1031,7 +1043,14 @@ def _pad_none(self, target, axis, depth, clip): parameters=self._parameters, ) - def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): + def _to_arrow( + self, + pyarrow: Any, + mask_node: Content | None, + validbytes: Content | None, + length: int, + options: ToArrowOptionsType, + ): this_validbytes = self.mask_as_bool(valid_when=True) return self._content._to_arrow( @@ -1045,7 +1064,9 @@ def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): def _to_backend_array(self, allow_missing, backend): return self.to_IndexedOptionArray64()._to_backend_array(allow_missing, backend) - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: branch, depth = self.branch_depth if branch or options["drop_nones"] or depth > 1: return self.project()._remove_structure(backend, options) diff --git a/src/awkward/contents/content.py b/src/awkward/contents/content.py index a41a2514d1..088f46760e 100644 --- a/src/awkward/contents/content.py +++ b/src/awkward/contents/content.py @@ -81,6 +81,26 @@ class RecursivelyApplyOptionsType(TypedDict): function_name: str | None +class RemoveStructureOptionsType(TypedDict): + flatten_records: bool + function_name: str + drop_nones: bool + keepdims: bool + allow_records: bool + list_to_regular: bool + + +class ToArrowOptionsType(TypedDict): + list_to32: bool + string_to32: bool + bytestring_to32: bool + emptyarray_to: np.dtype | None + categorical_as_dictionary: bool + extensionarray: bool + count_nulls: bool + record_is_scalar: bool + + class Content: is_numpy = False is_unknown = False @@ -1060,10 +1080,10 @@ def to_arrow( def _to_arrow( self, pyarrow: Any, - mask_node: Any, - validbytes: Any, + mask_node: Content | None, + validbytes: Content | None, length: int, - options: dict[str, Any], + options: ToArrowOptionsType, ): raise NotImplementedError @@ -1085,7 +1105,9 @@ def drop_none(self): def _drop_none(self) -> Content: raise NotImplementedError - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: raise NotImplementedError def _recursively_apply( @@ -1093,8 +1115,8 @@ def _recursively_apply( action: ActionType, behavior: dict | None, depth: int, - depth_context: dict | None, - lateral_context: dict | None, + depth_context: dict[str, Any] | None, + lateral_context: dict[str, Any] | None, options: RecursivelyApplyOptionsType, ) -> Content | None: raise NotImplementedError diff --git a/src/awkward/contents/emptyarray.py b/src/awkward/contents/emptyarray.py index 775d757f09..0172a9f567 100644 --- a/src/awkward/contents/emptyarray.py +++ b/src/awkward/contents/emptyarray.py @@ -14,9 +14,21 @@ from awkward._nplikes.shape import ShapeItem from awkward._regularize import is_integer_like from awkward._slicing import NO_HEAD -from awkward._typing import TYPE_CHECKING, Callable, Final, Self, SupportsIndex, final +from awkward._typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Self, + SupportsIndex, + final, +) from awkward._util import UNSET -from awkward.contents.content import Content +from awkward.contents.content import ( + Content, + RemoveStructureOptionsType, + ToArrowOptionsType, +) from awkward.errors import AxisError from awkward.forms.emptyform import EmptyForm from awkward.forms.form import Form @@ -344,7 +356,14 @@ def _pad_none(self, target, axis, depth, clip): else: return self._pad_none_axis0(target, True) - def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): + def _to_arrow( + self, + pyarrow: Any, + mask_node: Content | None, + validbytes: Content | None, + length: int, + options: ToArrowOptionsType, + ): if options["emptyarray_to"] is None: return pyarrow.Array.from_buffers( ak._connect.pyarrow.to_awkwardarrow_type( @@ -373,7 +392,9 @@ def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): def _to_backend_array(self, allow_missing, backend): return backend.nplike.empty(0, dtype=np.float64) - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: return [self] def _recursively_apply( diff --git a/src/awkward/contents/indexedarray.py b/src/awkward/contents/indexedarray.py index d990dea323..cc2e60a57f 100644 --- a/src/awkward/contents/indexedarray.py +++ b/src/awkward/contents/indexedarray.py @@ -18,9 +18,21 @@ ) from awkward._regularize import is_integer_like from awkward._slicing import NO_HEAD -from awkward._typing import TYPE_CHECKING, Callable, Final, Self, SupportsIndex, final +from awkward._typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Self, + SupportsIndex, + final, +) from awkward._util import UNSET -from awkward.contents.content import Content +from awkward.contents.content import ( + Content, + RemoveStructureOptionsType, + ToArrowOptionsType, +) from awkward.errors import AxisError from awkward.forms.form import Form from awkward.forms.indexedform import IndexedForm @@ -984,7 +996,14 @@ def _pad_none(self, target, axis, depth, clip): parameters=self._parameters, ) - def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): + def _to_arrow( + self, + pyarrow: Any, + mask_node: Content | None, + validbytes: Content | None, + length: int, + options: ToArrowOptionsType, + ): if ( not options["categorical_as_dictionary"] and self.parameter("__array__") == "categorical" @@ -1036,7 +1055,9 @@ def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): def _to_backend_array(self, allow_missing, backend): return self.project()._to_backend_array(allow_missing, backend) - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: return self.project()._remove_structure(backend, options) def _recursively_apply( diff --git a/src/awkward/contents/indexedoptionarray.py b/src/awkward/contents/indexedoptionarray.py index 429076affa..c5b5ddb82f 100644 --- a/src/awkward/contents/indexedoptionarray.py +++ b/src/awkward/contents/indexedoptionarray.py @@ -18,9 +18,21 @@ ) from awkward._regularize import is_integer_like from awkward._slicing import NO_HEAD -from awkward._typing import TYPE_CHECKING, Callable, Final, Self, SupportsIndex, final +from awkward._typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Self, + SupportsIndex, + final, +) from awkward._util import UNSET -from awkward.contents.content import Content +from awkward.contents.content import ( + Content, + RemoveStructureOptionsType, + ToArrowOptionsType, +) from awkward.errors import AxisError from awkward.forms.form import Form from awkward.forms.indexedoptionform import IndexedOptionForm @@ -1517,7 +1529,14 @@ def _pad_none(self, target, axis, depth, clip): parameters=self._parameters, ) - def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): + def _to_arrow( + self, + pyarrow: Any, + mask_node: Content | None, + validbytes: Content | None, + length: int, + options: ToArrowOptionsType, + ): index = numpy.asarray(self._index.data, copy=True) this_validbytes = self.mask_as_bool(valid_when=True) index[~this_validbytes] = 0 @@ -1595,7 +1614,9 @@ def _to_backend_array(self, allow_missing, backend): else: return content - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: branch, depth = self.branch_depth if branch or options["drop_nones"] or depth > 1: return self.project()._remove_structure(backend, options) diff --git a/src/awkward/contents/listarray.py b/src/awkward/contents/listarray.py index 98f153925b..3cb33ce132 100644 --- a/src/awkward/contents/listarray.py +++ b/src/awkward/contents/listarray.py @@ -16,9 +16,21 @@ ) from awkward._regularize import is_integer_like from awkward._slicing import NO_HEAD -from awkward._typing import TYPE_CHECKING, Callable, Final, Self, SupportsIndex, final +from awkward._typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Self, + SupportsIndex, + final, +) from awkward._util import UNSET -from awkward.contents.content import Content +from awkward.contents.content import ( + Content, + RemoveStructureOptionsType, + ToArrowOptionsType, +) from awkward.contents.listoffsetarray import ListOffsetArray from awkward.forms.form import Form from awkward.forms.listform import ListForm @@ -1476,7 +1488,14 @@ def _pad_none(self, target, axis, depth, clip): target, axis, depth, clip=True ) - def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): + def _to_arrow( + self, + pyarrow: Any, + mask_node: Content | None, + validbytes: Content | None, + length: int, + options: ToArrowOptionsType, + ): return self.to_ListOffsetArray64(False)._to_arrow( pyarrow, mask_node, validbytes, length, options ) @@ -1490,7 +1509,9 @@ def _to_backend_array(self, allow_missing, backend): else: return self.to_RegularArray()._to_backend_array(allow_missing, backend) - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: return self.to_ListOffsetArray64(False)._remove_structure(backend, options) def _drop_none(self) -> Content: diff --git a/src/awkward/contents/listoffsetarray.py b/src/awkward/contents/listoffsetarray.py index c6aa5f1d8f..33d77c99c8 100644 --- a/src/awkward/contents/listoffsetarray.py +++ b/src/awkward/contents/listoffsetarray.py @@ -16,9 +16,21 @@ ) from awkward._regularize import is_integer_like from awkward._slicing import NO_HEAD -from awkward._typing import TYPE_CHECKING, Callable, Final, Self, SupportsIndex, final +from awkward._typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Self, + SupportsIndex, + final, +) from awkward._util import UNSET -from awkward.contents.content import Content +from awkward.contents.content import ( + Content, + RemoveStructureOptionsType, + ToArrowOptionsType, +) from awkward.errors import AxisError from awkward.forms.form import Form from awkward.forms.listoffsetform import ListOffsetForm @@ -1878,7 +1890,14 @@ def _pad_none(self, target, axis, depth, clip): parameters=self._parameters, ) - def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): + def _to_arrow( + self, + pyarrow: Any, + mask_node: Content | None, + validbytes: Content | None, + length: int, + options: ToArrowOptionsType, + ): is_string = self.parameter("__array__") == "string" is_bytestring = self.parameter("__array__") == "bytestring" if is_string: @@ -2059,7 +2078,9 @@ def _to_backend_array(self, allow_missing, backend): else: return self.to_RegularArray()._to_backend_array(allow_missing, backend) - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: if ( self.parameter("__array__") == "string" or self.parameter("__array__") == "bytestring" diff --git a/src/awkward/contents/numpyarray.py b/src/awkward/contents/numpyarray.py index 1b39cc822b..c42d93efa0 100644 --- a/src/awkward/contents/numpyarray.py +++ b/src/awkward/contents/numpyarray.py @@ -6,7 +6,7 @@ import awkward as ak from awkward._backends.backend import Backend -from awkward._backends.dispatch import backend_of +from awkward._backends.dispatch import backend_of_obj from awkward._backends.numpy import NumpyBackend from awkward._backends.typetracer import TypeTracerBackend from awkward._errors import deprecate @@ -23,9 +23,21 @@ ) from awkward._regularize import is_integer_like from awkward._slicing import NO_HEAD -from awkward._typing import TYPE_CHECKING, Callable, Final, Self, SupportsIndex, final +from awkward._typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Self, + SupportsIndex, + final, +) from awkward._util import UNSET -from awkward.contents.content import Content +from awkward.contents.content import ( + Content, + RemoveStructureOptionsType, + ToArrowOptionsType, +) from awkward.errors import AxisError from awkward.forms.form import Form from awkward.forms.numpyform import NumpyForm @@ -100,7 +112,7 @@ def __getitem__(self, where): def __init__(self, data: ArrayLike, *, parameters=None, backend=None): if backend is None: - backend = backend_of(data, default=NumpyBackend.instance()) + backend = backend_of_obj(data, default=NumpyBackend.instance()) self._data = backend.nplike.asarray(data) @@ -1190,7 +1202,14 @@ def _pad_none(self, target, axis, depth, clip): def _nbytes_part(self): return self.data.nbytes - def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): + def _to_arrow( + self, + pyarrow: Any, + mask_node: Content | None, + validbytes: Content | None, + length: int, + options: ToArrowOptionsType, + ): if self._data.ndim != 1: return self.to_RegularArray()._to_arrow( pyarrow, mask_node, validbytes, length, options @@ -1223,7 +1242,9 @@ def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): def _to_backend_array(self, allow_missing, backend): return to_nplike(self.data, backend.nplike, from_nplike=self._backend.nplike) - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: if options["keepdims"]: shape = (1,) * (self._data.ndim - 1) + (-1,) else: diff --git a/src/awkward/contents/recordarray.py b/src/awkward/contents/recordarray.py index 33eafa96c8..106e51af9b 100644 --- a/src/awkward/contents/recordarray.py +++ b/src/awkward/contents/recordarray.py @@ -19,9 +19,21 @@ type_parameters_equal, ) from awkward._slicing import NO_HEAD -from awkward._typing import TYPE_CHECKING, Callable, Final, Self, SupportsIndex, final +from awkward._typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Self, + SupportsIndex, + final, +) from awkward._util import UNSET -from awkward.contents.content import Content +from awkward.contents.content import ( + Content, + RemoveStructureOptionsType, + ToArrowOptionsType, +) from awkward.errors import AxisError from awkward.forms.form import Form from awkward.forms.recordform import RecordForm @@ -1085,7 +1097,14 @@ def _pad_none(self, target, axis, depth, clip): backend=self._backend, ) - def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): + def _to_arrow( + self, + pyarrow: Any, + mask_node: Content | None, + validbytes: Content | None, + length: int, + options: ToArrowOptionsType, + ): values = [ (x if x.length == length else x[:length])._to_arrow( pyarrow, mask_node, validbytes, length, options @@ -1148,7 +1167,9 @@ def _to_backend_array(self, allow_missing, backend): return out - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: if options["flatten_records"]: out = [] for content in self._contents: diff --git a/src/awkward/contents/regulararray.py b/src/awkward/contents/regulararray.py index 1cae17d07f..eedfaaff70 100644 --- a/src/awkward/contents/regulararray.py +++ b/src/awkward/contents/regulararray.py @@ -17,9 +17,21 @@ ) from awkward._regularize import is_integer, is_integer_like from awkward._slicing import NO_HEAD -from awkward._typing import TYPE_CHECKING, Callable, Final, Self, SupportsIndex, final +from awkward._typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Self, + SupportsIndex, + final, +) from awkward._util import UNSET -from awkward.contents.content import Content +from awkward.contents.content import ( + Content, + RemoveStructureOptionsType, + ToArrowOptionsType, +) from awkward.forms.form import Form from awkward.forms.regularform import RegularForm from awkward.index import Index @@ -1295,7 +1307,14 @@ def _to_backend_array(self, allow_missing, backend): shape, ) - def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): + def _to_arrow( + self, + pyarrow: Any, + mask_node: Content | None, + validbytes: Content | None, + length: int, + options: ToArrowOptionsType, + ): assert self._backend.nplike.known_data if self.parameter("__array__") == "string": @@ -1352,7 +1371,9 @@ def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): ), ) - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: if ( self.parameter("__array__") == "string" or self.parameter("__array__") == "bytestring" diff --git a/src/awkward/contents/unionarray.py b/src/awkward/contents/unionarray.py index 2d0647ce77..fbcde1f40a 100644 --- a/src/awkward/contents/unionarray.py +++ b/src/awkward/contents/unionarray.py @@ -16,9 +16,21 @@ from awkward._parameters import parameters_intersect, parameters_union from awkward._regularize import is_integer_like from awkward._slicing import NO_HEAD -from awkward._typing import TYPE_CHECKING, Callable, Final, Self, SupportsIndex, final +from awkward._typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Self, + SupportsIndex, + final, +) from awkward._util import UNSET -from awkward.contents.content import Content +from awkward.contents.content import ( + Content, + RemoveStructureOptionsType, + ToArrowOptionsType, +) from awkward.errors import AxisError from awkward.forms.form import Form from awkward.forms.unionform import UnionForm @@ -1429,7 +1441,14 @@ def _pad_none(self, target, axis, depth, clip): parameters=self._parameters, ) - def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): + def _to_arrow( + self, + pyarrow: Any, + mask_node: Content | None, + validbytes: Content | None, + length: int, + options: ToArrowOptionsType, + ): nptags = self._tags.raw(numpy) npindex = self._index.raw(numpy) copied_index = False @@ -1512,7 +1531,9 @@ def _to_backend_array(self, allow_missing, backend): "Conversion of irreducible unions to backend arrays is not supported." ) - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: out = [] for i in range(len(self._contents)): index = self._index[self._tags.data == i] diff --git a/src/awkward/contents/unmaskedarray.py b/src/awkward/contents/unmaskedarray.py index 6e605917ae..090f8f1779 100644 --- a/src/awkward/contents/unmaskedarray.py +++ b/src/awkward/contents/unmaskedarray.py @@ -19,9 +19,21 @@ ) from awkward._regularize import is_integer_like from awkward._slicing import NO_HEAD -from awkward._typing import TYPE_CHECKING, Callable, Final, Self, SupportsIndex, final +from awkward._typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Self, + SupportsIndex, + final, +) from awkward._util import UNSET -from awkward.contents.content import Content +from awkward.contents.content import ( + Content, + RemoveStructureOptionsType, + ToArrowOptionsType, +) from awkward.errors import AxisError from awkward.forms.form import Form from awkward.forms.unmaskedform import UnmaskedForm @@ -473,7 +485,14 @@ def _pad_none(self, target, axis, depth, clip): parameters=self._parameters, ) - def _to_arrow(self, pyarrow, mask_node, validbytes, length, options): + def _to_arrow( + self, + pyarrow: Any, + mask_node: Content | None, + validbytes: Content | None, + length: int, + options: ToArrowOptionsType, + ): return self._content._to_arrow(pyarrow, self, None, length, options) def _to_backend_array(self, allow_missing, backend): @@ -483,7 +502,9 @@ def _to_backend_array(self, allow_missing, backend): else: return content - def _remove_structure(self, backend, options): + def _remove_structure( + self, backend: Backend, options: RemoveStructureOptionsType + ) -> list[Content]: branch, depth = self.branch_depth if branch or options["drop_nones"] or depth > 1: return self.project()._remove_structure(backend, options)