From 12f6a3c87e5171ee1633523690d7340b63568b00 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Wed, 15 May 2024 00:35:06 -0700 Subject: [PATCH] FIX-#7233: Display property name in default_to_pandas error messages (#7269) Signed-off-by: Jonathan Shi --- .../algebra/default2pandas/default.py | 11 ++++++-- .../storage_formats/pandas/test_internals.py | 28 +++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/modin/core/dataframe/algebra/default2pandas/default.py b/modin/core/dataframe/algebra/default2pandas/default.py index c5b80a6edd1..f27670ef2eb 100644 --- a/modin/core/dataframe/algebra/default2pandas/default.py +++ b/modin/core/dataframe/algebra/default2pandas/default.py @@ -94,8 +94,6 @@ def register(cls, func, obj_type=None, inplace=None, fn_name=None): Function that takes query compiler, does fallback to pandas and applies `func` to the casted to pandas frame or its property accesed by ``cls.frame_wrapper``. """ - fn_name = getattr(func, "__name__", str(func)) if fn_name is None else fn_name - if isinstance(func, str): if obj_type is None: obj_type = cls.DEFAULT_OBJECT_TYPE @@ -104,7 +102,16 @@ def register(cls, func, obj_type=None, inplace=None, fn_name=None): fn = func if type(fn) is property: + if fn_name is None and hasattr(fn, "fget"): + # When `fn` is a property, `str(fn)` will be something like + # "". We instead check its `fget` method to get + # the name of the property. + # Note that this method is still imperfect because we cannot get the class name + # of the property. For example, we can only get "hour" from `Series.dt.hour`. + fn_name = f"" fn = cls.build_property_wrapper(fn) + else: + fn_name = getattr(fn, "__name__", str(fn)) if fn_name is None else fn_name def applyier(df, *args, **kwargs): """ diff --git a/modin/tests/core/storage_formats/pandas/test_internals.py b/modin/tests/core/storage_formats/pandas/test_internals.py index 85f9886a6e7..bf95d52ae9d 100644 --- a/modin/tests/core/storage_formats/pandas/test_internals.py +++ b/modin/tests/core/storage_formats/pandas/test_internals.py @@ -30,6 +30,7 @@ context, ) from modin.core.dataframe.algebra import Fold +from modin.core.dataframe.algebra.default2pandas import DataFrameDefault from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe from modin.core.dataframe.pandas.dataframe.utils import ColumnInfo, ShuffleSortFunctions from modin.core.dataframe.pandas.metadata import ( @@ -2757,3 +2758,30 @@ def filter_modin_dataframe2(df): filtered_df = modin_df.filter_dataframe2() df_equals(filtered_df, expected_df) + + +def test_default_property_warning_name(): + # Test that when a property defaults to pandas, the raised warning mentions the full name of + # the pandas property rather than a hex address + + @property + def _test_default_property(df): + return "suspicious sentinel value" + + @property + def qc_test_default_property(qc): + return DataFrameDefault.register(_test_default_property)(qc) + + PandasQueryCompiler.qc_test_default_property = qc_test_default_property + + @property + def dataframe_test_default_property(df): + return df._query_compiler.qc_test_default_property + + pd.DataFrame.dataframe_test_default_property = dataframe_test_default_property + + with pytest.warns( + UserWarning, + match="> is not currently supported", + ): + pd.DataFrame([[1]]).dataframe_test_default_property