Skip to content

Commit

Permalink
Fix modin import with mismatched pandas version (#267)
Browse files Browse the repository at this point in the history
* Fix modin import with mismatched pandas version

Signed-off-by: Antoni Baum <[email protected]>

* Fix

Signed-off-by: Antoni Baum <[email protected]>

* Lint

Signed-off-by: Antoni Baum <[email protected]>

---------

Signed-off-by: Antoni Baum <[email protected]>
  • Loading branch information
Yard1 authored Feb 14, 2023
1 parent dcdc4b7 commit f16521a
Showing 1 changed file with 26 additions and 7 deletions.
33 changes: 26 additions & 7 deletions xgboost_ray/data_sources/modin.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@
import modin # noqa: F401
from modin.config.envvars import Engine
from packaging.version import Version
from modin.pandas import ( # noqa: F401
DataFrame as ModinDataFrame, # noqa: F401
Series as ModinSeries # noqa: F401
)
from modin.distributed.dataframe.pandas import ( # noqa: F401
unwrap_partitions # noqa: F401
)
MODIN_INSTALLED = Version(modin.__version__) >= Version("0.9.0")

# Check if importing the Ray engine leads to errors
Expand All @@ -29,8 +36,11 @@ def _assert_modin_installed():
if not MODIN_INSTALLED:
raise RuntimeError(
"Tried to use Modin as a data source, but modin is not "
"installed. This function shouldn't have been called. "
"\nFIX THIS by installing modin: `pip install modin`. "
"installed or it conflicts with the pandas version. "
"This function shouldn't have been called. "
"\nFIX THIS by installing modin: `pip install modin` "
"and making sure that the installed pandas version is "
"supported by modin."
"\nPlease also raise an issue on our GitHub: "
"https://github.com/ray-project/xgboost_ray as this part of "
"the code should not have been reached.")
Expand All @@ -53,8 +63,11 @@ def is_data_type(data: Any,
filetype: Optional[RayFileType] = None) -> bool:
if not MODIN_INSTALLED:
return False
from modin.pandas import DataFrame as ModinDataFrame, \
Series as ModinSeries
# Has to be imported again.
from modin.pandas import ( # noqa: F811
DataFrame as ModinDataFrame, # noqa: F811
Series as ModinSeries # noqa: F811
)

return isinstance(data, (ModinDataFrame, ModinSeries))

Expand Down Expand Up @@ -87,8 +100,11 @@ def load_data(
@staticmethod
def convert_to_series(data: Any) -> pd.Series:
_assert_modin_installed()
from modin.pandas import DataFrame as ModinDataFrame, \
Series as ModinSeries
# Has to be imported again.
from modin.pandas import ( # noqa: F811
DataFrame as ModinDataFrame, # noqa: F811
Series as ModinSeries # noqa: F811
)

if isinstance(data, ModinDataFrame):
return pd.Series(data._to_pandas().squeeze())
Expand All @@ -104,7 +120,10 @@ def get_actor_shards(
Tuple[Any, Optional[Dict[int, Any]]]:
_assert_modin_installed()

from modin.distributed.dataframe.pandas import unwrap_partitions
# Has to be imported again.
from modin.distributed.dataframe.pandas import ( # noqa: F811
unwrap_partitions # noqa: F811
)

actor_rank_ips = get_actor_rank_ips(actors)

Expand Down

0 comments on commit f16521a

Please sign in to comment.