Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disallow cudf.Index accepting column in favor of ._from_column #16549

Merged
merged 12 commits into from
Aug 15, 2024
Merged
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ cdef object _process_metadata(object df,
if len(filtered_idx) > 0:
idx = cudf.concat(filtered_idx)
else:
idx = cudf.Index(cudf.core.column.column_empty(0))
idx = cudf.Index._from_column(cudf.core.column.column_empty(0))
else:
start = range_index_meta["start"] + skip_rows
stop = range_index_meta["stop"]
Expand All @@ -240,7 +240,7 @@ cdef object _process_metadata(object df,
index_data = df[index_col]
actual_index_names = list(index_col_names.values())
if len(index_data._data) == 1:
idx = cudf.Index(
idx = cudf.Index._from_column(
index_data._data.columns[0],
name=actual_index_names[0]
)
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,12 @@ cpdef generate_pandas_metadata(table, index):
materialize_index = False
if index is not False:
for level, name in enumerate(table._index.names):
if isinstance(table._index, cudf.core.multiindex.MultiIndex):
if isinstance(table._index, cudf.MultiIndex):
idx = table.index.get_level_values(level)
else:
idx = table.index

if isinstance(idx, cudf.core.index.RangeIndex):
if isinstance(idx, cudf.RangeIndex):
if index is None:
descr = {
"kind": "range",
Expand All @@ -110,7 +110,7 @@ cpdef generate_pandas_metadata(table, index):
else:
materialize_index = True
# When `index=True`, RangeIndex needs to be materialized.
materialized_idx = cudf.Index(idx._values, name=idx.name)
materialized_idx = idx._as_int_index()
descr = _index_level_name(
index_name=materialized_idx.name,
level=level,
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def _union_categoricals(
new_categories=sorted_categories
)

return cudf.Index(result_col)
return cudf.CategoricalIndex._from_column(result_col)


def is_bool_dtype(arr_or_dtype):
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1979,7 +1979,7 @@ def from_pandas(cls, index: pd.Index, nan_as_null=no_default):
name=index.name,
)
else:
return cudf.Index(
return cudf.Index._from_column(
column.as_column(index, nan_as_null=nan_as_null),
name=index.name,
)
Expand Down
6 changes: 4 additions & 2 deletions python/cudf/cudf/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np

from cudf.core.column import as_column
from cudf.core.index import RangeIndex, ensure_index
from cudf.core.index import Index, RangeIndex
from cudf.core.scalar import Scalar
from cudf.options import get_option
from cudf.utils.dtypes import can_convert_to_column
Expand Down Expand Up @@ -112,7 +112,9 @@ def factorize(values, sort=False, use_na_sentinel=True, size_hint=None):
dtype="int64" if get_option("mode.pandas_compatible") else None,
).values

return labels, cats.values if return_cupy_array else ensure_index(cats)
return labels, cats.values if return_cupy_array else Index._from_column(
cats
)


def _interpolation(column: ColumnBase, index: BaseIndex) -> ColumnBase:
Expand Down
8 changes: 5 additions & 3 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,11 +601,13 @@ def __setitem__(self, key, value):
to_add_categories = 0
else:
if cudf.api.types.is_scalar(value):
arr = [value]
arr = column.as_column(value, length=1, nan_as_null=False)
else:
arr = value
arr = column.as_column(value, nan_as_null=False)
to_add_categories = len(
cudf.Index(arr, nan_as_null=False).difference(self.categories)
cudf.Index._from_column(arr).difference(
cudf.Index._from_column(self.categories)
)
)

if to_add_categories > 0:
Expand Down
10 changes: 9 additions & 1 deletion python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,10 @@ def __contains__(self, item: ScalarLike) -> bool:
def time_unit(self) -> str:
return np.datetime_data(self.dtype)[0]

@property
def quarter(self) -> ColumnBase:
return libcudf.datetime.extract_quarter(self)

@property
def year(self) -> ColumnBase:
return self.get_dt_field("year")
Expand Down Expand Up @@ -308,14 +312,18 @@ def is_quarter_start(self) -> ColumnBase:
@property
def is_year_end(self) -> ColumnBase:
day_of_year = self.day_of_year
leap_dates = libcudf.datetime.is_leap_year(self)
leap_dates = self.is_leap_year

leap = day_of_year == cudf.Scalar(366)
non_leap = day_of_year == cudf.Scalar(365)
return libcudf.copying.copy_if_else(leap, non_leap, leap_dates).fillna(
False
)

@property
def is_leap_year(self) -> ColumnBase:
return libcudf.datetime.is_leap_year(self)

@property
def is_year_start(self) -> ColumnBase:
return (self.day_of_year == 1).fillna(False)
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/column/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def _return_or_inplace(
"""
if inplace:
self._parent._mimic_inplace(
self._parent.__class__._from_data(
{self._parent.name: new_col}
type(self._parent)._from_column(
new_col, name=self._parent.name
),
inplace=True,
)
Expand All @@ -92,6 +92,6 @@ def _return_or_inplace(
index=self._parent.index if retain_index else None,
)
elif isinstance(self._parent, cudf.BaseIndex):
return cudf.Index(new_col, name=self._parent.name)
return cudf.Index._from_column(new_col, name=self._parent.name)
else:
return self._parent._mimic_inplace(new_col, inplace=False)
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -4693,7 +4693,7 @@ def character_tokenize(self) -> SeriesOrIndex:
result_col, name=self._parent.name, index=index
)
elif isinstance(self._parent, cudf.BaseIndex):
return cudf.Index(result_col, name=self._parent.name)
return cudf.Index._from_column(result_col, name=self._parent.name)
else:
return result_col

Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def cut(
)

# we return a categorical index, as we don't have a Categorical method
categorical_index = cudf.CategoricalIndex._from_data({None: col})
categorical_index = cudf.CategoricalIndex._from_column(col)

if isinstance(orig_x, (pd.Series, cudf.Series)):
# if we have a series input we return a series output
Expand Down
8 changes: 4 additions & 4 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def _getitem_tuple_arg(self, arg):
range(len(tmp_arg[0]))
)
},
index=cudf.Index(tmp_arg[0]),
index=cudf.Index._from_column(tmp_arg[0]),
)
columns_df[cantor_name] = column.as_column(
range(len(columns_df))
Expand Down Expand Up @@ -1758,7 +1758,7 @@ def _concat(
for cols in columns:
table_index = None
if 1 == first_data_column_position:
table_index = cudf.Index(cols[0])
table_index = cudf.Index._from_column(cols[0])
elif first_data_column_position > 1:
table_index = cudf.MultiIndex._from_data(
data=dict(
Expand Down Expand Up @@ -1810,7 +1810,7 @@ def _concat(
if not isinstance(out.index, MultiIndex) and isinstance(
out.index.dtype, cudf.CategoricalDtype
):
out = out.set_index(cudf.Index(out.index._values))
out = out.set_index(out.index)
for name, col in out._data.items():
out._data[name] = col._with_type_metadata(
tables[0]._data[name].dtype
Expand Down Expand Up @@ -3007,7 +3007,7 @@ def set_index(
and not isinstance(keys[0], (cudf.MultiIndex, pd.MultiIndex))
):
# Don't turn single level MultiIndex into an Index
idx = cudf.Index(data_to_add[0], name=names[0])
idx = cudf.Index._from_column(data_to_add[0], name=names[0])
else:
idx = MultiIndex._from_data(dict(enumerate(data_to_add)))
idx.names = names
Expand Down
14 changes: 9 additions & 5 deletions python/cudf/cudf/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def __init__(self, categories=None, ordered: bool = False) -> None:
self._ordered = ordered

@property
def categories(self) -> "cudf.core.index.Index":
def categories(self) -> cudf.Index:
"""
An ``Index`` containing the unique categories allowed.

Expand All @@ -194,10 +194,12 @@ def categories(self) -> "cudf.core.index.Index":
Index(['b', 'a'], dtype='object')
"""
if self._categories is None:
return cudf.Index(
cudf.core.column.column_empty(0, dtype="object", masked=False)
col = cudf.core.column.column_empty(
0, dtype="object", masked=False
)
return cudf.Index(self._categories, copy=False)
else:
col = self._categories
return cudf.Index._from_column(col)

@property
def type(self):
Expand Down Expand Up @@ -259,7 +261,9 @@ def to_pandas(self) -> pd.CategoricalDtype:
categories = self._categories.to_pandas()
return pd.CategoricalDtype(categories=categories, ordered=self.ordered)

def _init_categories(self, categories: Any):
def _init_categories(
self, categories: Any
) -> cudf.core.column.ColumnBase | None:
if categories is None:
return categories
if len(categories) == 0 and not isinstance(
Expand Down
9 changes: 5 additions & 4 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,8 +403,7 @@ def indices(self) -> dict[ScalarLike, cp.ndarray]:
if len(group_keys) > 1:
index = cudf.MultiIndex.from_arrays(group_keys)
else:
(group_keys,) = group_keys
index = cudf.Index(group_keys)
index = cudf.Index._from_column(group_keys[0])
return dict(
zip(index.to_pandas(), cp.split(indices.values, offsets[1:-1]))
)
Expand Down Expand Up @@ -2572,7 +2571,7 @@ def _mimic_pandas_order(
# corresponding output rows in pandas, to do that here
# expand the result by reindexing.
ri = cudf.RangeIndex(0, len(self.obj))
result.index = cudf.Index(ordering)
result.index = cudf.Index._from_column(ordering)
# This reorders and expands
result = result.reindex(ri)
else:
Expand Down Expand Up @@ -3143,7 +3142,9 @@ def keys(self):
dict(zip(range(nkeys), self._key_columns))
)._set_names(self.names)
else:
return cudf.Index(self._key_columns[0], name=self.names[0])
return cudf.Index._from_column(
self._key_columns[0], name=self.names[0]
)

@property
def values(self) -> cudf.core.frame.Frame:
Expand Down
Loading
Loading