Skip to content

Commit

Permalink
fix: don't call asarray on Index objects internally
Browse files Browse the repository at this point in the history
  • Loading branch information
agoose77 committed Oct 11, 2023
1 parent a6e426e commit 8ecc0c9
Show file tree
Hide file tree
Showing 10 changed files with 60 additions and 59 deletions.
2 changes: 1 addition & 1 deletion src/awkward/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def to_IndexedOptionArray64(self) -> IndexedOptionArray:
self._index, self._content, parameters=self._parameters
)

def mask_as_bool(self, valid_when=True):
def mask_as_bool(self, valid_when: bool = True) -> ArrayLike:
if valid_when:
return self._index.data >= 0
else:
Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def to_BitMaskedArray(self, valid_when, lsb_order):
valid_when, lsb_order
)

def mask_as_bool(self, valid_when=True):
def mask_as_bool(self, valid_when: bool = True) -> ArrayLike:
if valid_when:
return self._index.raw(self._backend.index_nplike) >= 0
else:
Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/unmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def to_BitMaskedArray(self, valid_when, lsb_order):
parameters=self._parameters,
)

def mask_as_bool(self, valid_when=True):
def mask_as_bool(self, valid_when: bool = True) -> ArrayLike:
if valid_when:
return self._backend.index_nplike.ones(self._content.length, dtype=np.bool_)
else:
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/operations/ak_flatten.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,9 @@ def apply(layout):
):
return layout

tags = backend.index_nplike.asarray(layout.tags)
tags = backend.index_nplike.asarray(layout.tags.data)
index = backend.index_nplike.asarray(
backend.nplike.asarray(layout.index), copy=True
backend.nplike.asarray(layout.index.data), copy=True
)
bigmask = backend.index_nplike.empty(len(index), dtype=np.bool_)
for tag, content in enumerate(layout.contents):
Expand Down
56 changes: 27 additions & 29 deletions src/awkward/operations/ak_merge_union_of_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ def invert_record_union(
# Build unions for each field
outer_field_contents = []
for field in all_fields:
field_tags = index_nplike.asarray(tags, copy=True)
field_index = index_nplike.asarray(index, copy=True)
field_tags = index_nplike.asarray(tags.data, copy=True)
field_index = index_nplike.asarray(index.data, copy=True)

# Build contents for union representing current field
field_contents = [c.content(field) for c in contents if c.has_field(field)]
Expand Down Expand Up @@ -170,30 +170,31 @@ def apply(layout, depth, backend, **kwargs):
# We'll rebuild the union to include only the non-null items.
inner_union_index_parts = []
next_contents = []
next_tags_sparse = backend.index_nplike.asarray(layout.tags, copy=True)
next_tags_data_sparse = backend.index_nplike.asarray(
layout.tags.data, copy=True
)
for tag, content in enumerate(layout.contents):
is_this_tag = backend.index_nplike.asarray(layout.tags) == tag
is_this_tag = layout.tags.data == tag

# Union arrays for this content
tag_index = backend.index_nplike.asarray(layout.index)[is_this_tag]
tag_index_data = layout.index.data[is_this_tag]

# For unmasked arrays, we can directly take the content
if isinstance(content, ak.contents.UnmaskedArray):
next_contents.append(content.content)
inner_union_index_parts.append(tag_index)
inner_union_index_parts.append(tag_index_data)
# Otherwise, we need to rebuild the index
elif content.is_option or content.is_indexed:
# Let's work with indexed option types for ease
if content.is_option:
content = content.to_IndexedOptionArray64()

# First, find the inner index that actually re-arranges the (non-null) items
content_index = backend.index_nplike.asarray(content.index)
merged_index = content_index[tag_index]
merged_index = content.index.data[tag_index_data]
is_non_null = merged_index >= 0
inner_union_index_parts.append(merged_index[is_non_null])
# Mask out tags of items that are missing
next_tags_sparse[is_this_tag] = backend.index_nplike.where(
next_tags_data_sparse[is_this_tag] = backend.index_nplike.where(
is_non_null, tag, -1
)

Expand All @@ -202,67 +203,64 @@ def apply(layout, depth, backend, **kwargs):
# Non-indexed/option types are trivially included as-is
else:
next_contents.append(content)
inner_union_index_parts.append(tag_index)
inner_union_index_parts.append(tag_index_data)

# We'll create an outermost indexed-option type, which re-instates the missing values.
# This should have the same length as the original union, and its index should be "dense"
# (contiguous, monotonic integers; or -1). Therefore, we can directly compute it from the "sparse"
# tags index, which has the same length as the original union, and has only missing items set to -1.
outer_option_dense_index = compact_option_index(
next_tags_sparse, backend=backend
next_tags_data_sparse, backend=backend
)

# Ignore missing items for inner union, creating a dense array of tags
next_tags = next_tags_sparse[next_tags_sparse >= 0]
next_tags_data = next_tags_data_sparse[next_tags_data_sparse >= 0]
# Build dense index from parts for each tag
next_index = backend.index_nplike.empty(next_tags.size, dtype=np.int64)
next_index_data = backend.index_nplike.empty(
next_tags_data.size, dtype=np.int64
)
for tag, content_index in enumerate(inner_union_index_parts):
next_index[next_tags == tag] = content_index
next_index_data[next_tags_data == tag] = content_index

# Return option around record of unions
return ak.contents.IndexedOptionArray(
ak.index.Index64(outer_option_dense_index),
invert_record_union(
next_tags, next_index, next_contents, backend=backend
next_tags_data, next_index_data, next_contents, backend=backend
),
)

# Any index types need to be re-written
elif any(x.is_indexed for x in layout.contents):
# We'll create an outermost indexed-option type, which re-instates the missing values
current_index = backend.index_nplike.asarray(layout.index)
next_index = backend.index_nplike.empty(
current_index.size, dtype=np.int64
next_index_data = backend.index_nplike.empty(
layout.index.length, dtype=np.int64
)

# We'll rebuild the union to include only the non-null items.
next_contents = []
for tag, content in enumerate(layout.contents):
is_this_tag = backend.index_nplike.asarray(layout.tags) == tag
is_this_tag = layout.tags.data == tag

# Rewrite union index of indexed types
if content.is_indexed:
content_index = backend.index_nplike.asarray(content.index)
next_index[is_this_tag] = content_index[
current_index[is_this_tag]
next_index_data[is_this_tag] = content.index.data[
content.index.data[is_this_tag]
]
next_contents.append(content.content)

else:
next_index[is_this_tag] = current_index[is_this_tag]
next_index_data[is_this_tag] = content.index.data[is_this_tag]
next_contents.append(content)

return invert_record_union(
backend.index_nplike.asarray(layout.tags),
next_index,
next_contents,
backend=backend,
layout.tags.data, next_index_data, next_contents, backend=backend
)

else:
return invert_record_union(
backend.index_nplike.asarray(layout.tags),
backend.index_nplike.asarray(layout.index),
layout.tags.data,
layout.index.data,
layout.contents,
backend=backend,
)
Expand Down
24 changes: 12 additions & 12 deletions src/awkward/operations/ak_run_lengths.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def action(layout, **kwargs):
elif not layout.is_numpy:
raise NotImplementedError("run_lengths on " + type(layout).__name__)

nextcontent, _ = lengths_of(backend.nplike.asarray(layout.data), None)
nextcontent, _ = lengths_of(layout.data, None)
return ak.contents.NumpyArray(nextcontent)

elif layout.branch_depth == (False, 2):
Expand All @@ -178,23 +178,25 @@ def action(layout, **kwargs):
# We also want to trim the _upper_ bound of content,
# so we manually convert the list type to zero-based
listoffsetarray = layout.to_ListOffsetArray64(False)
offsets = backend.index_nplike.asarray(listoffsetarray.offsets)
content = listoffsetarray.content[offsets[0] : offsets[-1]]
content = listoffsetarray.content[
listoffsetarray.offsets[0] : listoffsetarray.offsets[-1]
]

if content.is_indexed:
content = content.project()

offsets = listoffsetarray.offsets.data
nextcontent, nextoffsets = lengths_of(
ak.highlevel.Array(content), offsets - offsets[0]
)
return ak.contents.ListOffsetArray(
ak.index.Index64(nextoffsets),
ak.contents.NumpyArray(nextcontent),
ak.index.Index64(nextoffsets), ak.contents.NumpyArray(nextcontent)
)

listoffsetarray = layout.to_ListOffsetArray64(False)
offsets = backend.index_nplike.asarray(listoffsetarray.offsets)
content = listoffsetarray.content[offsets[0] : offsets[-1]]
content = listoffsetarray.content[
listoffsetarray.offsets[0] : listoffsetarray.offsets[-1]
]

if content.is_indexed:
content = content.project()
Expand All @@ -209,12 +211,10 @@ def action(layout, **kwargs):
+ type(content).__name__
)

nextcontent, nextoffsets = lengths_of(
backend.nplike.asarray(content.data), offsets - offsets[0]
)
offsets = listoffsetarray.offsets.data
nextcontent, nextoffsets = lengths_of(content.data, offsets - offsets[0])
return ak.contents.ListOffsetArray(
ak.index.Index64(nextoffsets),
ak.contents.NumpyArray(nextcontent),
ak.index.Index64(nextoffsets), ak.contents.NumpyArray(nextcontent)
)
else:
return None
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/operations/ak_to_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,13 @@ def action(layout, **kwargs):
mapping[i] = j

if layout.is_indexed and layout.is_option:
original_index = numpy.asarray(layout.index)
original_index = numpy.asarray(layout.index.data)
index = mapping[original_index]
index[original_index < 0] = -1
index = ak.index.Index64(index)

elif layout.is_indexed:
original_index = numpy.asarray(layout.index)
original_index = numpy.asarray(layout.index.data)
index = ak.index.Index64(mapping[original_index])

elif layout.is_option:
Expand Down
3 changes: 1 addition & 2 deletions src/awkward/operations/ak_to_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,7 @@ def recurse(layout, row_arrays, col_names):

elif layout.purelist_depth > 1:
offsets, flattened = layout._offsets_and_flattened(axis=1, depth=1)
offsets = numpy.asarray(offsets)
starts, stops = offsets[:-1], offsets[1:]
starts, stops = offsets.data[:-1], offsets.data[1:]
counts = stops - starts
if ak._util.win or ak._util.bits32:
counts = layout.backend.index_nplike.astype(counts, np.int32)
Expand Down
14 changes: 6 additions & 8 deletions src/awkward/operations/ak_unflatten.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,30 +188,28 @@ def apply(layout, depth, **kwargs):
if posaxis == depth and layout.is_list:
# We are one *above* the level where we want to apply this.
listoffsetarray = layout.to_ListOffsetArray64(True)
outeroffsets = backend.index_nplike.asarray(listoffsetarray.offsets)
outeroffsets = listoffsetarray.offsets

content = unflatten_this_layout(
listoffsetarray.content[: outeroffsets[-1]]
)
if isinstance(content, ak.contents.ByteMaskedArray):
inneroffsets = backend.index_nplike.asarray(content.content.offsets)
inneroffsets = content.content.offsets
elif isinstance(content, ak.contents.RegularArray):
inneroffsets = backend.index_nplike.asarray(
content.to_ListOffsetArray64(True).offsets
)
inneroffsets = content.to_ListOffsetArray64(True).offsets
else:
inneroffsets = backend.index_nplike.asarray(content.offsets)
inneroffsets = content.offsets

positions = (
backend.index_nplike.searchsorted(
inneroffsets, outeroffsets, side="right"
inneroffsets.data, outeroffsets, side="right"
)
- 1
)
if (
backend.index_nplike.known_data
and not backend.index_nplike.array_equal(
inneroffsets[positions], outeroffsets
inneroffsets.data[positions], outeroffsets
)
):
raise ValueError(
Expand Down
8 changes: 7 additions & 1 deletion tests/test_1850_bytemasked_array_to_bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@ def test():
result = layout.to_ByteMaskedArray(False)
assert layout.to_list() == [None, 1, None, 3, None]
assert result.to_list() == [None, 1, None, 3, None]
assert layout.backend.index_nplike.asarray(result.mask).tolist() == [1, 0, 1, 0, 1]
assert layout.backend.index_nplike.asarray(result.mask.data).tolist() == [
1,
0,
1,
0,
1,
]

# Check this works
layout.to_typetracer().to_ByteMaskedArray(False)

0 comments on commit 8ecc0c9

Please sign in to comment.