From 8d34dd75115c69f66a842f67f38b29ed9decd881 Mon Sep 17 00:00:00 2001 From: Doris Lee Date: Sat, 2 Apr 2022 15:22:12 -0700 Subject: [PATCH] FEAT-#474: basic working example with mock recommendation and display Signed-off-by: Doris Lee --- lux/__init__.py | 28 +- lux/core/__init__.py | 108 +-- lux/core/frame.py | 1728 ++++++++++++++++++----------------- lux/core/old_frame.py | 874 ++++++++++++++++++ test_nb/refactor test.ipynb | 152 +++ 5 files changed, 1982 insertions(+), 908 deletions(-) create mode 100644 lux/core/old_frame.py create mode 100644 test_nb/refactor test.ipynb diff --git a/lux/__init__.py b/lux/__init__.py index 63459e96..b864ac70 100644 --- a/lux/__init__.py +++ b/lux/__init__.py @@ -13,21 +13,21 @@ # limitations under the License. # Register the commonly used modules (similar to how pandas does it: https://github.com/pandas-dev/pandas/blob/master/pandas/__init__.py) -from lux.vis.Clause import Clause -from lux.core.frame import LuxDataFrame -from lux.core.sqltable import LuxSQLTable -from lux.core.joinedsqltable import JoinedSQLTable -from lux.utils.tracing_utils import LuxTracer -from ._version import __version__, version_info -from lux._config import config -from lux._config.config import warning_format -from lux.utils.debug_utils import debug_info, check_luxwidget_enabled +# from lux.vis.Clause import Clause +# from lux.core.frame import LuxDataFrame +# from lux.core.sqltable import LuxSQLTable +# from lux.core.joinedsqltable import JoinedSQLTable +# from lux.utils.tracing_utils import LuxTracer +# from ._version import __version__, version_info +# from lux._config import config +# from lux._config.config import warning_format +# from lux.utils.debug_utils import debug_info, check_luxwidget_enabled -from lux._config import Config +# from lux._config import Config -config = Config() +# config = Config() -from lux.action.default import register_default_actions +# from lux.action.default import register_default_actions -register_default_actions() -check_luxwidget_enabled() +# register_default_actions() +# check_luxwidget_enabled() diff --git a/lux/core/__init__.py b/lux/core/__init__.py index d01b1976..c79e9768 100644 --- a/lux/core/__init__.py +++ b/lux/core/__init__.py @@ -12,62 +12,62 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas as pd -from .frame import LuxDataFrame -from .groupby import LuxDataFrameGroupBy, LuxSeriesGroupBy -from .series import LuxSeries +# import pandas as pd +# from .frame import LuxDataFrame +# from .groupby import LuxDataFrameGroupBy, LuxSeriesGroupBy +# from .series import LuxSeries -global originalDF -# Keep variable scope of original pandas df -originalDF = pd.core.frame.DataFrame -originalSeries = pd.core.series.Series +# global originalDF +# # Keep variable scope of original pandas df +# originalDF = pd.core.frame.DataFrame +# originalSeries = pd.core.series.Series -def setOption(overridePandas=True): - if overridePandas: - pd.DataFrame = ( - pd.io.json._json.DataFrame - ) = ( - pd.io.sql.DataFrame - ) = ( - pd.io.excel.DataFrame - ) = ( - pd.io.formats.DataFrame - ) = ( - pd.io.sas.DataFrame - ) = ( - pd.io.clipboards.DataFrame - ) = ( - pd.io.common.DataFrame - ) = ( - pd.io.feather_format.DataFrame - ) = ( - pd.io.gbq.DataFrame - ) = ( - pd.io.html.DataFrame - ) = ( - pd.io.orc.DataFrame - ) = ( - pd.io.parquet.DataFrame - ) = ( - pd.io.pickle.DataFrame - ) = ( - pd.io.pytables.DataFrame - ) = ( - pd.io.spss.DataFrame - ) = ( - pd.io.stata.DataFrame - ) = pd.io.api.DataFrame = pd.core.frame.DataFrame = pd._testing.DataFrame = LuxDataFrame - if pd.__version__ < "1.3.0": - pd.io.parsers.DataFrame = LuxDataFrame - else: - pd.io.parsers.readers.DataFrame = LuxDataFrame - pd.Series = pd.core.series.Series = pd.core.groupby.ops.Series = pd._testing.Series = LuxSeries - pd.core.groupby.generic.DataFrameGroupBy = LuxDataFrameGroupBy - pd.core.groupby.generic.SeriesGroupBy = LuxSeriesGroupBy - else: - pd.DataFrame = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = originalDF - pd.Series = originalSeries +# def setOption(overridePandas=True): +# if overridePandas: +# pd.DataFrame = ( +# pd.io.json._json.DataFrame +# ) = ( +# pd.io.sql.DataFrame +# ) = ( +# pd.io.excel.DataFrame +# ) = ( +# pd.io.formats.DataFrame +# ) = ( +# pd.io.sas.DataFrame +# ) = ( +# pd.io.clipboards.DataFrame +# ) = ( +# pd.io.common.DataFrame +# ) = ( +# pd.io.feather_format.DataFrame +# ) = ( +# pd.io.gbq.DataFrame +# ) = ( +# pd.io.html.DataFrame +# ) = ( +# pd.io.orc.DataFrame +# ) = ( +# pd.io.parquet.DataFrame +# ) = ( +# pd.io.pickle.DataFrame +# ) = ( +# pd.io.pytables.DataFrame +# ) = ( +# pd.io.spss.DataFrame +# ) = ( +# pd.io.stata.DataFrame +# ) = pd.io.api.DataFrame = pd.core.frame.DataFrame = pd._testing.DataFrame = LuxDataFrame +# if pd.__version__ < "1.3.0": +# pd.io.parsers.DataFrame = LuxDataFrame +# else: +# pd.io.parsers.readers.DataFrame = LuxDataFrame +# pd.Series = pd.core.series.Series = pd.core.groupby.ops.Series = pd._testing.Series = LuxSeries +# pd.core.groupby.generic.DataFrameGroupBy = LuxDataFrameGroupBy +# pd.core.groupby.generic.SeriesGroupBy = LuxSeriesGroupBy +# else: +# pd.DataFrame = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = originalDF +# pd.Series = originalSeries -setOption(overridePandas=True) +# setOption(overridePandas=True) diff --git a/lux/core/frame.py b/lux/core/frame.py index 4d8da0e8..0df31091 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -13,862 +13,910 @@ # limitations under the License. import pandas as pd -from lux.core.series import LuxSeries +# from lux.core.series import LuxSeries from lux.vis.Clause import Clause -from lux.vis.Vis import Vis -from lux.vis.VisList import VisList -from lux.history.history import History -from lux.utils.date_utils import is_datetime_series -from lux.utils.message import Message -from lux.utils.utils import check_import_lux_widget +# from lux.vis.Vis import Vis +# from lux.vis.VisList import VisList +# from lux.history.history import History +# from lux.utils.date_utils import is_datetime_series +# from lux.utils.message import Message +# from lux.utils.utils import check_import_lux_widget from typing import Dict, Union, List, Callable -# from lux.executor.Executor import * import warnings import traceback import lux -class LuxDataFrame(pd.DataFrame): - """ - A subclass of pd.DataFrame that supports all dataframe operations while housing other variables and functions for generating visual recommendations. - """ - - # MUST register here for new properties!! - _metadata = [ - "_intent", - "_inferred_intent", - "_data_type", - "unique_values", - "cardinality", - "_rec_info", - "_min_max", - "_current_vis", - "_widget", - "_recommendation", - "_prev", - "_history", - "_saved_export", - "_sampled", - "_toggle_pandas_display", - "_message", - "_pandas_only", - "pre_aggregated", - "_type_override", - ] - - def __init__(self, *args, **kw): - self._history = History() - self._intent = [] - self._inferred_intent = [] - self._recommendation = {} - self._saved_export = None - self._current_vis = [] - self._prev = None - self._widget = None - super(LuxDataFrame, self).__init__(*args, **kw) - - self.table_name = "" - if lux.config.SQLconnection == "": - from lux.executor.PandasExecutor import PandasExecutor - - lux.config.executor = PandasExecutor() - else: - from lux.executor.SQLExecutor import SQLExecutor - - # lux.config.executor = SQLExecutor() - - self._sampled = None - self._approx_sample = None - self._toggle_pandas_display = True - self._message = Message() - self._pandas_only = False - # Metadata - self._data_type = {} - self.unique_values = None - self.cardinality = None - self._min_max = None - self.pre_aggregated = None - self._type_override = {} - warnings.formatwarning = lux.warning_format - - @property - def _constructor(self): - return LuxDataFrame - - @property - def _constructor_sliced(self): - def f(*args, **kwargs): - s = LuxSeries(*args, **kwargs) - for attr in self._metadata: # propagate metadata - s.__dict__[attr] = getattr(self, attr, None) - return s - - return f - - @property - def history(self): - return self._history - - @property - def data_type(self): - if not self._data_type: - self.maintain_metadata() - return self._data_type - - def compute_metadata(self) -> None: - """ - Compute dataset metadata and statistics - """ - if len(self) > 0: - if lux.config.executor.name != "SQLExecutor": - lux.config.executor.compute_stats(self) - lux.config.executor.compute_dataset_metadata(self) - self._infer_structure() - self._metadata_fresh = True - - def maintain_metadata(self): - """ - Maintain dataset metadata and statistics (Compute only if needed) - """ - is_sql_tbl = lux.config.executor.name != "PandasExecutor" - - if lux.config.SQLconnection != "" and is_sql_tbl: - from lux.executor.SQLExecutor import SQLExecutor - - # lux.config.executor = SQLExecutor() - - # Check that metadata has not yet been computed - if lux.config.lazy_maintain: - # Check that metadata has not yet been computed - if not hasattr(self, "_metadata_fresh") or not self._metadata_fresh: - # only compute metadata information if the dataframe is non-empty - self.compute_metadata() - else: - self.compute_metadata() - - def expire_recs(self) -> None: - """ - Expires and resets all recommendations - """ - if lux.config.lazy_maintain: - self._recs_fresh = False - self._recommendation = {} - self._widget = None - self._rec_info = None - self._sampled = None - - def expire_metadata(self) -> None: - """ - Expire all saved metadata to trigger a recomputation the next time the data is required. - """ - if lux.config.lazy_maintain: - self._metadata_fresh = False - self._data_type = None - self.unique_values = None - self.cardinality = None - self._min_max = None - self.pre_aggregated = None - - ##################### - ## Override Pandas ## - ##################### - def __getattr__(self, name): - ret_value = super(LuxDataFrame, self).__getattr__(name) - self.expire_metadata() - self.expire_recs() - return ret_value - - def _set_axis(self, axis, labels): - super(LuxDataFrame, self)._set_axis(axis, labels) - self.expire_metadata() - self.expire_recs() - - def _update_inplace(self, *args, **kwargs): - super(LuxDataFrame, self)._update_inplace(*args, **kwargs) - self.expire_metadata() - self.expire_recs() - - def _set_item(self, key, value): - super(LuxDataFrame, self)._set_item(key, value) - self.expire_metadata() - self.expire_recs() - - def _infer_structure(self): - # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data - is_multi_index_flag = self.index.nlevels != 1 - not_int_index_flag = not pd.api.types.is_integer_dtype(self.index) - - is_sql_tbl = lux.config.executor.name != "PandasExecutor" - - small_df_flag = len(self) < 100 and is_sql_tbl - if self.pre_aggregated == None: - self.pre_aggregated = (is_multi_index_flag or not_int_index_flag) and small_df_flag - if "Number of Records" in self.columns: - self.pre_aggregated = True - self.pre_aggregated = "groupby" in [event.name for event in self.history] and not is_sql_tbl - - @property - def intent(self): - """ - Main function to set the intent of the dataframe. - The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. - - Parameters - ---------- - intent : List[str,Clause] - intent list, can be a mix of string shorthand or a lux.Clause object - - Notes - ----- - :doc:`../guide/intent` - """ - return self._intent - - @intent.setter - def intent(self, intent_input: Union[List[Union[str, Clause]], Vis]): - is_list_input = isinstance(intent_input, list) - is_vis_input = isinstance(intent_input, Vis) - if not (is_list_input or is_vis_input): - raise TypeError( - "Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object." - "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" - ) - if is_list_input: - self.set_intent(intent_input) - elif is_vis_input: - self.set_intent_as_vis(intent_input) - - def clear_intent(self): - self.intent = [] - self.expire_recs() - - def set_intent(self, intent: List[Union[str, Clause]]): - self.expire_recs() - self._intent = intent - self._parse_validate_compile_intent() - - def _parse_validate_compile_intent(self): - self.maintain_metadata() - from lux.processor.Parser import Parser - from lux.processor.Validator import Validator - - self._intent = Parser.parse(self._intent) - Validator.validate_intent(self._intent, self) - self.maintain_metadata() - from lux.processor.Compiler import Compiler - - self.current_vis = Compiler.compile_intent(self, self._intent) - - def copy_intent(self): - # creates a true copy of the dataframe's intent - output = [] - for clause in self._intent: - temp_clause = clause.copy_clause() - output.append(temp_clause) - return output - - def set_intent_as_vis(self, vis: Vis): - """ - Set intent of the dataframe based on the intent of a Vis - - Parameters - ---------- - vis : Vis - Input Vis object - """ - self.expire_recs() - self._intent = vis._inferred_intent - self._parse_validate_compile_intent() - - def set_data_type(self, types: dict): - """ - Set the data type for a particular attribute in the dataframe - overriding the automatically-detected type inferred by Lux - - Parameters - ---------- - types: dict - Dictionary that maps attribute/column name to a specified Lux Type. - Possible options: "nominal", "quantitative", "id", and "temporal". - - Example - ---------- - df = pd.read_csv("https://raw.githubusercontent.com/lux-org/lux-datasets/master/data/absenteeism.csv") - df.set_data_type({"ID":"id", - "Reason for absence":"nominal"}) - """ - if self._type_override == None: - self._type_override = types - else: - self._type_override = {**self._type_override, **types} - - if not self.data_type: - self.maintain_metadata() - - for attr in types: - if types[attr] not in ["nominal", "quantitative", "id", "temporal"]: - raise ValueError( - f'Invalid data type option specified for {attr}. Please use one of the following supported types: ["nominal", "quantitative", "id", "temporal"]' - ) - self.data_type[attr] = types[attr] - - self.expire_recs() - - def to_pandas(self): - import lux.core - - return lux.core.originalDF(self, copy=False) - - @property - def recommendation(self): - if self._recommendation is not None and self._recommendation == {}: - from lux.processor.Compiler import Compiler - - self.maintain_metadata() - self.current_vis = Compiler.compile_intent(self, self._intent) - self.maintain_recs() - return self._recommendation - - @recommendation.setter - def recommendation(self, recommendation: Dict): - self._recommendation = recommendation - - @property - def current_vis(self): - from lux.processor.Validator import Validator - - # _parse_validate_compile_intent does not call executor, - # we only attach data to current vis when user request current_vis - valid_current_vis = ( - self._current_vis is not None - and len(self._current_vis) > 0 - and self._current_vis[0].data is None - and self._current_vis[0].intent - ) - if valid_current_vis and Validator.validate_intent(self._current_vis[0].intent, self): - lux.config.executor.execute(self._current_vis, self) - return self._current_vis - - @current_vis.setter - def current_vis(self, current_vis: Dict): - self._current_vis = current_vis - - def _append_rec(self, rec_infolist, recommendations: Dict): - if recommendations["collection"] is not None and len(recommendations["collection"]) > 0: - rec_infolist.append(recommendations) - - def show_all_column_vis(self): - if len(self.columns) > 1 and len(self.columns) < 4 and self.intent == [] or self.intent is None: - vis = Vis(list(self.columns), self) - if vis.mark != "": - vis._all_column = True - self.current_vis = VisList([vis]) - - def maintain_recs(self, is_series="DataFrame"): - # `rec_df` is the dataframe to generate the recommendations on - # check to see if globally defined actions have been registered/removed - if lux.config.update_actions["flag"] == True: - self._recs_fresh = False - show_prev = False # flag indicating whether rec_df is showing previous df or current self - - if self._prev is not None: - rec_df = self._prev - rec_df._message = Message() - rec_df.maintain_metadata() # the prev dataframe may not have been printed before - last_event = self.history._events[-1].name - rec_df._message.add( - f"Lux is visualizing the previous version of the dataframe before you applied {last_event}." - ) - show_prev = True - else: - rec_df = self - rec_df._message = Message() - # Add warning message if there exist ID fields - if len(rec_df) == 0: - rec_df._message.add(f"Lux cannot operate on an empty {is_series}.") - elif len(rec_df) < 5 and not rec_df.pre_aggregated: - rec_df._message.add( - f"The {is_series} is too small to visualize. To generate visualizations in Lux, the {is_series} must contain at least 5 rows." - ) - elif self.index.nlevels >= 2 or self.columns.nlevels >= 2: - rec_df._message.add( - f"Lux does not currently support visualizations in a {is_series} " - f"with hierarchical indexes.\n" - f"Please convert the {is_series} into a flat " - f"table via pandas.DataFrame.reset_index." - ) - else: - id_fields_str = "" - inverted_data_type = lux.config.executor.invert_data_type(rec_df.data_type) - if len(inverted_data_type["id"]) > 0: - for id_field in inverted_data_type["id"]: - id_fields_str += f"{id_field}, " - id_fields_str = id_fields_str[:-2] - rec_df._message.add(f"{id_fields_str} is not visualized since it resembles an ID field.") - - rec_df._prev = None # reset _prev - - # If lazy, check that recs has not yet been computed - lazy_but_not_computed = lux.config.lazy_maintain and ( - not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh - ) - eager = not lux.config.lazy_maintain - - # Check that recs has not yet been computed - if lazy_but_not_computed or eager: - is_sql_tbl = lux.config.executor.name == "SQLExecutor" - rec_infolist = [] - from lux.action.row_group import row_group - from lux.action.column_group import column_group - - # TODO: Rewrite these as register action inside default actions - if rec_df.pre_aggregated: - if rec_df.columns.name is not None: - rec_df._append_rec(rec_infolist, row_group(rec_df)) - rec_df._append_rec(rec_infolist, column_group(rec_df)) - elif not (len(rec_df) < 5 and not rec_df.pre_aggregated and not is_sql_tbl) and not ( - self.index.nlevels >= 2 or self.columns.nlevels >= 2 - ): - from lux.action.custom import custom_actions - - # generate vis from globally registered actions and append to dataframe - custom_action_collection = custom_actions(rec_df) - for rec in custom_action_collection: - rec_df._append_rec(rec_infolist, rec) - lux.config.update_actions["flag"] = False - - # Store _rec_info into a more user-friendly dictionary form - rec_df._recommendation = {} - for rec_info in rec_infolist: - action_type = rec_info["action"] - vlist = rec_info["collection"] - if len(vlist) > 0: - rec_df._recommendation[action_type] = vlist - rec_df._rec_info = rec_infolist - rec_df.show_all_column_vis() - if lux.config.render_widget: - self._widget = rec_df.render_widget() - # re-render widget for the current dataframe if previous rec is not recomputed - elif show_prev: - rec_df.show_all_column_vis() - if lux.config.render_widget: - self._widget = rec_df.render_widget() - self._recs_fresh = True - - ####################################################### - ############## LuxWidget Result Display ############### - ####################################################### - @property - def widget(self): - if self._widget: - return self._widget - - @property - def exported(self) -> Union[Dict[str, VisList], VisList]: - """ - Get selected visualizations as exported Vis List - - Notes - ----- - Convert the _selectedVisIdxs dictionary into a programmable VisList - Example _selectedVisIdxs : - - {'Correlation': [0, 2], 'Occurrence': [1]} - - indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. - - Returns - ------- - Union[Dict[str,VisList], VisList] - When there are no exported vis, return empty list -> [] - When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...) - When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..} - """ - if self.widget is None: - warnings.warn( - "\nNo widget attached to the dataframe." - "Please assign dataframe to an output variable.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", - stacklevel=2, - ) - return [] - exported_vis_lst = self._widget._selectedVisIdxs - exported_vis = [] - if exported_vis_lst == {}: - if self._saved_export: - return self._saved_export + +# pandas +DataFrame = pd.DataFrame +Series = pd.Series + + +# def _dataframe_constructor_sliced(self: DataFrame): +# def f(*args, **kwargs): +# s = Series(*args, **kwargs) +# for attr in self._metadata: # propagate metadata +# s.__dict__[attr] = getattr(self, attr, None) +# return s + +# return f + +# def _dataframe_constructor(self: DataFrame): +# def f(*args, **kwargs): +# s = DataFrame(*args, **kwargs) +# for attr in self._metadata: # propagate metadata +# s.__dict__[attr] = getattr(self, attr, None) +# return s +# return f + +def _mock_dataframe_display_(self: DataFrame): + if hasattr(self, "intent") and self.intent is not None and all(x in self for x in self.intent): + self[self.intent].plot() + else: + self.plot() +def _dataframe_ipython_display_(self: DataFrame): + from IPython.display import display + from IPython.display import clear_output + import ipywidgets as widgets + + try: + # if self._pandas_only: + # display(self.display_pandas()) + # self._pandas_only = False + # else: + # if not self.index.nlevels >= 2 or self.columns.nlevels >= 2: + # self.maintain_metadata() + + # if self._intent != [] and (not hasattr(self, "_compiled") or not self._compiled): + # from lux.processor.Compiler import Compiler + + # self.current_vis = Compiler.compile_intent(self, self._intent) + + # if lux.config.default_display == "lux": + # self._toggle_pandas_display = False + # else: + # self._toggle_pandas_display = True + + # # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) + # self.maintain_recs() + + # MOCK COMPUTED RECOMMENDATION + self.recommendation={'Correlation': []} + + # MOCK DISPLAY RECOMMENDATION + _mock_dataframe_display_(self) + except (KeyboardInterrupt, SystemExit): + raise + except Exception: + if lux.config.pandas_fallback: warnings.warn( - "\nNo visualization selected to export.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + "\nUnexpected error in rendering Lux widget and recommendations. " + "Falling back to Pandas display.\n" + "Please report the following issue on Github: https://github.com/lux-org/lux/issues \n", stacklevel=2, ) - return [] - if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst: - return self.current_vis - elif len(exported_vis_lst) > 1: - exported_vis = {} - if "currentVis" in exported_vis_lst: - exported_vis["Current Vis"] = self.current_vis - for export_action in exported_vis_lst: - if export_action != "currentVis": - exported_vis[export_action] = VisList( - list( - map( - self._recommendation[export_action].__getitem__, - exported_vis_lst[export_action], - ) - ) - ) - return exported_vis - elif len(exported_vis_lst) == 1 and ("currentVis" not in exported_vis_lst): - export_action = list(exported_vis_lst.keys())[0] - exported_vis = VisList( - list( - map( - self._recommendation[export_action].__getitem__, - exported_vis_lst[export_action], - ) - ) - ) - self._saved_export = exported_vis - return exported_vis + warnings.warn(traceback.format_exc()) + display(self.display_pandas()) else: - warnings.warn( - "\nNo visualization selected to export.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", - stacklevel=2, - ) - return [] - - def remove_deleted_recs(self, change): - for action in self._widget.deletedIndices: - deletedSoFar = 0 - for index in self._widget.deletedIndices[action]: - self._recommendation[action].remove_index(index - deletedSoFar) - deletedSoFar += 1 - - def set_intent_on_click(self, change): - from IPython.display import display, clear_output - from lux.processor.Compiler import Compiler + raise - intent_action = list(self._widget.selectedIntentIndex.keys())[0] - vis = self._recommendation[intent_action][self._widget.selectedIntentIndex[intent_action][0]] - self.set_intent_as_vis(vis) +def display_widget(self:DataFrame): + # Observers(callback_function, listen_to_this_variable) + self._widget.observe(self.remove_deleted_recs, names="deletedIndices") + self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") - self.maintain_metadata() - self.current_vis = Compiler.compile_intent(self, self._intent) - self.maintain_recs() + button = widgets.Button( + description="Toggle Pandas/Lux", + layout=widgets.Layout(width="140px", top="5px"), + ) + self.output = widgets.Output() + display(button, self.output) + def on_button_clicked(b): with self.output: + if b: + self._toggle_pandas_display = not self._toggle_pandas_display clear_output() - display(self._widget) - - self._widget.observe(self.remove_deleted_recs, names="deletedIndices") - self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") - - def _ipython_display_(self): - from IPython.display import display - from IPython.display import clear_output - import ipywidgets as widgets - - try: - if self._pandas_only: - display(self.display_pandas()) - self._pandas_only = False - else: - if not self.index.nlevels >= 2 or self.columns.nlevels >= 2: - self.maintain_metadata() - - if self._intent != [] and (not hasattr(self, "_compiled") or not self._compiled): - from lux.processor.Compiler import Compiler - - self.current_vis = Compiler.compile_intent(self, self._intent) - - if lux.config.default_display == "lux": - self._toggle_pandas_display = False - else: - self._toggle_pandas_display = True - - # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) - self.maintain_recs() - - # Observers(callback_function, listen_to_this_variable) - self._widget.observe(self.remove_deleted_recs, names="deletedIndices") - self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") - - button = widgets.Button( - description="Toggle Pandas/Lux", - layout=widgets.Layout(width="140px", top="5px"), - ) - self.output = widgets.Output() - display(button, self.output) - - def on_button_clicked(b): - with self.output: - if b: - self._toggle_pandas_display = not self._toggle_pandas_display - clear_output() - if self._toggle_pandas_display: - display(self.display_pandas()) - else: - # b.layout.display = "none" - display(self._widget) - # b.layout.display = "inline-block" - - button.on_click(on_button_clicked) - on_button_clicked(None) - - except (KeyboardInterrupt, SystemExit): - raise - except Exception: - if lux.config.pandas_fallback: - warnings.warn( - "\nUnexpected error in rendering Lux widget and recommendations. " - "Falling back to Pandas display.\n" - "Please report the following issue on Github: https://github.com/lux-org/lux/issues \n", - stacklevel=2, - ) - warnings.warn(traceback.format_exc()) + if self._toggle_pandas_display: display(self.display_pandas()) else: - raise - - def display_pandas(self): - return self.to_pandas() - - def render_widget(self, renderer: str = "altair", input_current_vis=""): - """ - Generate a LuxWidget based on the LuxDataFrame - - Structure of widgetJSON: - - { - - 'current_vis': {}, - 'recommendation': [ - - { - - 'action': 'Correlation', - 'description': "some description", - 'vspec': [ - - {Vega-Lite spec for vis 1}, - {Vega-Lite spec for vis 2}, - ... - - ] - - }, - ... repeat for other actions - - ] - - } - - Parameters - ---------- - renderer : str, optional - Choice of visualization rendering library, by default "altair" - input_current_vis : lux.LuxDataFrame, optional - User-specified current vis to override default Current Vis, by default - - """ - check_import_lux_widget() - import luxwidget - - widgetJSON = self.to_JSON(self._rec_info, input_current_vis=input_current_vis) - return luxwidget.LuxWidget( - currentVis=widgetJSON["current_vis"], - recommendations=widgetJSON["recommendation"], - intent=LuxDataFrame.intent_to_string(self._intent), - message=self._message.to_html(), - config={"plottingScale": lux.config.plotting_scale}, - ) - - @staticmethod - def intent_to_JSON(intent): - from lux.utils import utils - - filter_specs = utils.get_filter_specs(intent) - attrs_specs = utils.get_attrs_specs(intent) - - intent = {} - intent["attributes"] = [clause.attribute for clause in attrs_specs] - intent["filters"] = [clause.attribute for clause in filter_specs] - return intent - - @staticmethod - def intent_to_string(intent): - if intent: - return ", ".join([clause.to_string() for clause in intent]) - else: - return "" - - def to_JSON(self, rec_infolist, input_current_vis=""): - widget_spec = {} - if self.current_vis: - lux.config.executor.execute(self.current_vis, self) - widget_spec["current_vis"] = LuxDataFrame.current_vis_to_JSON( - self.current_vis, input_current_vis - ) - else: - widget_spec["current_vis"] = {} - widget_spec["recommendation"] = [] - - # Recommended Collection - recCollection = LuxDataFrame.rec_to_JSON(rec_infolist) - widget_spec["recommendation"].extend(recCollection) - return widget_spec - - @staticmethod - def current_vis_to_JSON(vlist, input_current_vis=""): - current_vis_spec = {} - numVC = len(vlist) # number of visualizations in the vis list - if numVC == 1: - current_vis_spec = vlist[0].to_code(language=lux.config.plotting_backend, prettyOutput=False) - elif numVC > 1: - pass - if vlist[0]._all_column: - current_vis_spec["allcols"] = True - else: - current_vis_spec["allcols"] = False - return current_vis_spec - - @staticmethod - def rec_to_JSON(recs): - rec_lst = [] - import copy - - rec_copy = copy.deepcopy(recs) - for idx, rec in enumerate(rec_copy): - if len(rec["collection"]) > 0: - rec["vspec"] = [] - for vis in rec["collection"]: - chart = vis.to_code(language=lux.config.plotting_backend, prettyOutput=False) - rec["vspec"].append(chart) - rec_lst.append(rec) - # delete since not JSON serializable - del rec_lst[idx]["collection"] - return rec_lst - - def save_as_html(self, filename: str = "export.html", output=False): - """ - Save dataframe widget as static HTML file - - Parameters - ---------- - filename : str - Filename for the output HTML file - """ - - if self.widget is None: - self.maintain_metadata() - self.maintain_recs() - - from ipywidgets.embed import embed_data - - data = embed_data(views=[self.widget]) - - import json - - manager_state = json.dumps(data["manager_state"]) - widget_view = json.dumps(data["view_specs"][0]) - - # Separate out header since CSS file conflict with {} notation in Python format strings - header = """ - - - Lux Widget - - - - - - - - - - - """ - html_template = """ - - {header} - - - - - - - - - - - """ - - manager_state = json.dumps(data["manager_state"]) - widget_view = json.dumps(data["view_specs"][0]) - rendered_template = html_template.format( - header=header, manager_state=manager_state, widget_view=widget_view - ) - if output: - return rendered_template - else: - with open(filename, "w") as fp: - fp.write(rendered_template) - print(f"Saved HTML to {filename}") - - # Overridden Pandas Functions - def head(self, n: int = 5): - ret_val = super(LuxDataFrame, self).head(n) - ret_val._prev = self - ret_val._history.append_event("head", n=5) - return ret_val - - def tail(self, n: int = 5): - ret_val = super(LuxDataFrame, self).tail(n) - ret_val._prev = self - ret_val._history.append_event("tail", n=5) - return ret_val - - def groupby(self, *args, **kwargs): - history_flag = False - if "history" not in kwargs or ("history" in kwargs and kwargs["history"]): - history_flag = True - if "history" in kwargs: - del kwargs["history"] - groupby_obj = super(LuxDataFrame, self).groupby(*args, **kwargs) - for attr in self._metadata: - groupby_obj.__dict__[attr] = getattr(self, attr, None) - if history_flag: - groupby_obj._history = groupby_obj._history.copy() - groupby_obj._history.append_event("groupby", *args, **kwargs) - groupby_obj.pre_aggregated = True - return groupby_obj + # b.layout.display = "none" + display(self._widget) + # b.layout.display = "inline-block" + + button.on_click(on_button_clicked) + on_button_clicked(None) +# ------------------------------------------------------------------------------ +# Override Pandas +# ------------------------------------------------------------------------------ + +DataFrame._ipython_display_ = _dataframe_ipython_display_ +# DataFrame._constructor = _dataframe_constructor +# DataFrame._constructor_sliced = _dataframe_constructor_sliced +DataFrame._metadata = ["intent"] +# class LuxDataFrame(pd.DataFrame): +# """ +# A subclass of pd.DataFrame that supports all dataframe operations while housing other variables and functions for generating visual recommendations. +# """ + +# # MUST register here for new properties!! +# _metadata = [ +# "_intent", +# "_inferred_intent", +# "_data_type", +# "unique_values", +# "cardinality", +# "_rec_info", +# "_min_max", +# "_current_vis", +# "_widget", +# "_recommendation", +# "_prev", +# "_history", +# "_saved_export", +# "_sampled", +# "_toggle_pandas_display", +# "_message", +# "_pandas_only", +# "pre_aggregated", +# "_type_override", +# ] + +# def __init__(self, *args, **kw): +# self._history = History() +# self._intent = [] +# self._inferred_intent = [] +# self._recommendation = {} +# self._saved_export = None +# self._current_vis = [] +# self._prev = None +# self._widget = None +# super(LuxDataFrame, self).__init__(*args, **kw) + +# self.table_name = "" +# if lux.config.SQLconnection == "": +# from lux.executor.PandasExecutor import PandasExecutor + +# lux.config.executor = PandasExecutor() +# else: +# from lux.executor.SQLExecutor import SQLExecutor + +# # lux.config.executor = SQLExecutor() + +# self._sampled = None +# self._approx_sample = None +# self._toggle_pandas_display = True +# self._message = Message() +# self._pandas_only = False +# # Metadata +# self._data_type = {} +# self.unique_values = None +# self.cardinality = None +# self._min_max = None +# self.pre_aggregated = None +# self._type_override = {} +# warnings.formatwarning = lux.warning_format + +# @property +# def _constructor(self): +# return LuxDataFrame + +# @property +# def _constructor_sliced(self): +# def f(*args, **kwargs): +# s = LuxSeries(*args, **kwargs) +# for attr in self._metadata: # propagate metadata +# s.__dict__[attr] = getattr(self, attr, None) +# return s + +# return f + +# @property +# def history(self): +# return self._history + +# @property +# def data_type(self): +# if not self._data_type: +# self.maintain_metadata() +# return self._data_type + + + + + + + + + +# def compute_metadata(self) -> None: +# """ +# Compute dataset metadata and statistics +# """ +# if len(self) > 0: +# if lux.config.executor.name != "SQLExecutor": +# lux.config.executor.compute_stats(self) +# lux.config.executor.compute_dataset_metadata(self) +# self._infer_structure() +# self._metadata_fresh = True + +# def maintain_metadata(self): +# """ +# Maintain dataset metadata and statistics (Compute only if needed) +# """ +# is_sql_tbl = lux.config.executor.name != "PandasExecutor" + +# if lux.config.SQLconnection != "" and is_sql_tbl: +# from lux.executor.SQLExecutor import SQLExecutor + +# # lux.config.executor = SQLExecutor() + +# # Check that metadata has not yet been computed +# if lux.config.lazy_maintain: +# # Check that metadata has not yet been computed +# if not hasattr(self, "_metadata_fresh") or not self._metadata_fresh: +# # only compute metadata information if the dataframe is non-empty +# self.compute_metadata() +# else: +# self.compute_metadata() + +# def expire_recs(self) -> None: +# """ +# Expires and resets all recommendations +# """ +# if lux.config.lazy_maintain: +# self._recs_fresh = False +# self._recommendation = {} +# self._widget = None +# self._rec_info = None +# self._sampled = None + +# def expire_metadata(self) -> None: +# """ +# Expire all saved metadata to trigger a recomputation the next time the data is required. +# """ +# if lux.config.lazy_maintain: +# self._metadata_fresh = False +# self._data_type = None +# self.unique_values = None +# self.cardinality = None +# self._min_max = None +# self.pre_aggregated = None + +# ##################### +# ## Override Pandas ## +# ##################### +# def __getattr__(self, name): +# ret_value = super(LuxDataFrame, self).__getattr__(name) +# self.expire_metadata() +# self.expire_recs() +# return ret_value + +# def _set_axis(self, axis, labels): +# super(LuxDataFrame, self)._set_axis(axis, labels) +# self.expire_metadata() +# self.expire_recs() + +# def _update_inplace(self, *args, **kwargs): +# super(LuxDataFrame, self)._update_inplace(*args, **kwargs) +# self.expire_metadata() +# self.expire_recs() + +# def _set_item(self, key, value): +# super(LuxDataFrame, self)._set_item(key, value) +# self.expire_metadata() +# self.expire_recs() + +# def _infer_structure(self): +# # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data +# is_multi_index_flag = self.index.nlevels != 1 +# not_int_index_flag = not pd.api.types.is_integer_dtype(self.index) + +# is_sql_tbl = lux.config.executor.name != "PandasExecutor" + +# small_df_flag = len(self) < 100 and is_sql_tbl +# if self.pre_aggregated == None: +# self.pre_aggregated = (is_multi_index_flag or not_int_index_flag) and small_df_flag +# if "Number of Records" in self.columns: +# self.pre_aggregated = True +# self.pre_aggregated = "groupby" in [event.name for event in self.history] and not is_sql_tbl + +# @property +# def intent(self): +# """ +# Main function to set the intent of the dataframe. +# The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. + +# Parameters +# ---------- +# intent : List[str,Clause] +# intent list, can be a mix of string shorthand or a lux.Clause object + +# Notes +# ----- +# :doc:`../guide/intent` +# """ +# return self._intent + +# @intent.setter +# def intent(self, intent_input: Union[List[Union[str, Clause]], Vis]): +# is_list_input = isinstance(intent_input, list) +# is_vis_input = isinstance(intent_input, Vis) +# if not (is_list_input or is_vis_input): +# raise TypeError( +# "Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object." +# "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" +# ) +# if is_list_input: +# self.set_intent(intent_input) +# elif is_vis_input: +# self.set_intent_as_vis(intent_input) + +# def clear_intent(self): +# self.intent = [] +# self.expire_recs() + +# def set_intent(self, intent: List[Union[str, Clause]]): +# self.expire_recs() +# self._intent = intent +# self._parse_validate_compile_intent() + +# def _parse_validate_compile_intent(self): +# self.maintain_metadata() +# from lux.processor.Parser import Parser +# from lux.processor.Validator import Validator + +# self._intent = Parser.parse(self._intent) +# Validator.validate_intent(self._intent, self) +# self.maintain_metadata() +# from lux.processor.Compiler import Compiler + +# self.current_vis = Compiler.compile_intent(self, self._intent) + +# def copy_intent(self): +# # creates a true copy of the dataframe's intent +# output = [] +# for clause in self._intent: +# temp_clause = clause.copy_clause() +# output.append(temp_clause) +# return output + +# def set_intent_as_vis(self, vis: Vis): +# """ +# Set intent of the dataframe based on the intent of a Vis + +# Parameters +# ---------- +# vis : Vis +# Input Vis object +# """ +# self.expire_recs() +# self._intent = vis._inferred_intent +# self._parse_validate_compile_intent() + +# def set_data_type(self, types: dict): +# """ +# Set the data type for a particular attribute in the dataframe +# overriding the automatically-detected type inferred by Lux + +# Parameters +# ---------- +# types: dict +# Dictionary that maps attribute/column name to a specified Lux Type. +# Possible options: "nominal", "quantitative", "id", and "temporal". + +# Example +# ---------- +# df = pd.read_csv("https://raw.githubusercontent.com/lux-org/lux-datasets/master/data/absenteeism.csv") +# df.set_data_type({"ID":"id", +# "Reason for absence":"nominal"}) +# """ +# if self._type_override == None: +# self._type_override = types +# else: +# self._type_override = {**self._type_override, **types} + +# if not self.data_type: +# self.maintain_metadata() + +# for attr in types: +# if types[attr] not in ["nominal", "quantitative", "id", "temporal"]: +# raise ValueError( +# f'Invalid data type option specified for {attr}. Please use one of the following supported types: ["nominal", "quantitative", "id", "temporal"]' +# ) +# self.data_type[attr] = types[attr] + +# self.expire_recs() + +# def to_pandas(self): +# import lux.core + +# return lux.core.originalDF(self, copy=False) + +# @property +# def recommendation(self): +# if self._recommendation is not None and self._recommendation == {}: +# from lux.processor.Compiler import Compiler + +# self.maintain_metadata() +# self.current_vis = Compiler.compile_intent(self, self._intent) +# self.maintain_recs() +# return self._recommendation + +# @recommendation.setter +# def recommendation(self, recommendation: Dict): +# self._recommendation = recommendation + +# @property +# def current_vis(self): +# from lux.processor.Validator import Validator + +# # _parse_validate_compile_intent does not call executor, +# # we only attach data to current vis when user request current_vis +# valid_current_vis = ( +# self._current_vis is not None +# and len(self._current_vis) > 0 +# and self._current_vis[0].data is None +# and self._current_vis[0].intent +# ) +# if valid_current_vis and Validator.validate_intent(self._current_vis[0].intent, self): +# lux.config.executor.execute(self._current_vis, self) +# return self._current_vis + +# @current_vis.setter +# def current_vis(self, current_vis: Dict): +# self._current_vis = current_vis + +# def _append_rec(self, rec_infolist, recommendations: Dict): +# if recommendations["collection"] is not None and len(recommendations["collection"]) > 0: +# rec_infolist.append(recommendations) + +# def show_all_column_vis(self): +# if len(self.columns) > 1 and len(self.columns) < 4 and self.intent == [] or self.intent is None: +# vis = Vis(list(self.columns), self) +# if vis.mark != "": +# vis._all_column = True +# self.current_vis = VisList([vis]) + +# def maintain_recs(self, is_series="DataFrame"): +# # `rec_df` is the dataframe to generate the recommendations on +# # check to see if globally defined actions have been registered/removed +# if lux.config.update_actions["flag"] == True: +# self._recs_fresh = False +# show_prev = False # flag indicating whether rec_df is showing previous df or current self + +# if self._prev is not None: +# rec_df = self._prev +# rec_df._message = Message() +# rec_df.maintain_metadata() # the prev dataframe may not have been printed before +# last_event = self.history._events[-1].name +# rec_df._message.add( +# f"Lux is visualizing the previous version of the dataframe before you applied {last_event}." +# ) +# show_prev = True +# else: +# rec_df = self +# rec_df._message = Message() +# # Add warning message if there exist ID fields +# if len(rec_df) == 0: +# rec_df._message.add(f"Lux cannot operate on an empty {is_series}.") +# elif len(rec_df) < 5 and not rec_df.pre_aggregated: +# rec_df._message.add( +# f"The {is_series} is too small to visualize. To generate visualizations in Lux, the {is_series} must contain at least 5 rows." +# ) +# elif self.index.nlevels >= 2 or self.columns.nlevels >= 2: +# rec_df._message.add( +# f"Lux does not currently support visualizations in a {is_series} " +# f"with hierarchical indexes.\n" +# f"Please convert the {is_series} into a flat " +# f"table via pandas.DataFrame.reset_index." +# ) +# else: +# id_fields_str = "" +# inverted_data_type = lux.config.executor.invert_data_type(rec_df.data_type) +# if len(inverted_data_type["id"]) > 0: +# for id_field in inverted_data_type["id"]: +# id_fields_str += f"{id_field}, " +# id_fields_str = id_fields_str[:-2] +# rec_df._message.add(f"{id_fields_str} is not visualized since it resembles an ID field.") + +# rec_df._prev = None # reset _prev + +# # If lazy, check that recs has not yet been computed +# lazy_but_not_computed = lux.config.lazy_maintain and ( +# not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh +# ) +# eager = not lux.config.lazy_maintain + +# # Check that recs has not yet been computed +# if lazy_but_not_computed or eager: +# is_sql_tbl = lux.config.executor.name == "SQLExecutor" +# rec_infolist = [] +# from lux.action.row_group import row_group +# from lux.action.column_group import column_group + +# # TODO: Rewrite these as register action inside default actions +# if rec_df.pre_aggregated: +# if rec_df.columns.name is not None: +# rec_df._append_rec(rec_infolist, row_group(rec_df)) +# rec_df._append_rec(rec_infolist, column_group(rec_df)) +# elif not (len(rec_df) < 5 and not rec_df.pre_aggregated and not is_sql_tbl) and not ( +# self.index.nlevels >= 2 or self.columns.nlevels >= 2 +# ): +# from lux.action.custom import custom_actions + +# # generate vis from globally registered actions and append to dataframe +# custom_action_collection = custom_actions(rec_df) +# for rec in custom_action_collection: +# rec_df._append_rec(rec_infolist, rec) +# lux.config.update_actions["flag"] = False + +# # Store _rec_info into a more user-friendly dictionary form +# rec_df._recommendation = {} +# for rec_info in rec_infolist: +# action_type = rec_info["action"] +# vlist = rec_info["collection"] +# if len(vlist) > 0: +# rec_df._recommendation[action_type] = vlist +# rec_df._rec_info = rec_infolist +# rec_df.show_all_column_vis() +# if lux.config.render_widget: +# self._widget = rec_df.render_widget() +# # re-render widget for the current dataframe if previous rec is not recomputed +# elif show_prev: +# rec_df.show_all_column_vis() +# if lux.config.render_widget: +# self._widget = rec_df.render_widget() +# self._recs_fresh = True + +# ####################################################### +# ############## LuxWidget Result Display ############### +# ####################################################### +# @property +# def widget(self): +# if self._widget: +# return self._widget + +# @property +# def exported(self) -> Union[Dict[str, VisList], VisList]: +# """ +# Get selected visualizations as exported Vis List + +# Notes +# ----- +# Convert the _selectedVisIdxs dictionary into a programmable VisList +# Example _selectedVisIdxs : + +# {'Correlation': [0, 2], 'Occurrence': [1]} + +# indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. + +# Returns +# ------- +# Union[Dict[str,VisList], VisList] +# When there are no exported vis, return empty list -> [] +# When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...) +# When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..} +# """ +# if self.widget is None: +# warnings.warn( +# "\nNo widget attached to the dataframe." +# "Please assign dataframe to an output variable.\n" +# "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", +# stacklevel=2, +# ) +# return [] +# exported_vis_lst = self._widget._selectedVisIdxs +# exported_vis = [] +# if exported_vis_lst == {}: +# if self._saved_export: +# return self._saved_export +# warnings.warn( +# "\nNo visualization selected to export.\n" +# "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", +# stacklevel=2, +# ) +# return [] +# if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst: +# return self.current_vis +# elif len(exported_vis_lst) > 1: +# exported_vis = {} +# if "currentVis" in exported_vis_lst: +# exported_vis["Current Vis"] = self.current_vis +# for export_action in exported_vis_lst: +# if export_action != "currentVis": +# exported_vis[export_action] = VisList( +# list( +# map( +# self._recommendation[export_action].__getitem__, +# exported_vis_lst[export_action], +# ) +# ) +# ) +# return exported_vis +# elif len(exported_vis_lst) == 1 and ("currentVis" not in exported_vis_lst): +# export_action = list(exported_vis_lst.keys())[0] +# exported_vis = VisList( +# list( +# map( +# self._recommendation[export_action].__getitem__, +# exported_vis_lst[export_action], +# ) +# ) +# ) +# self._saved_export = exported_vis +# return exported_vis +# else: +# warnings.warn( +# "\nNo visualization selected to export.\n" +# "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", +# stacklevel=2, +# ) +# return [] + +# def remove_deleted_recs(self, change): +# for action in self._widget.deletedIndices: +# deletedSoFar = 0 +# for index in self._widget.deletedIndices[action]: +# self._recommendation[action].remove_index(index - deletedSoFar) +# deletedSoFar += 1 + +# def set_intent_on_click(self, change): +# from IPython.display import display, clear_output +# from lux.processor.Compiler import Compiler + +# intent_action = list(self._widget.selectedIntentIndex.keys())[0] +# vis = self._recommendation[intent_action][self._widget.selectedIntentIndex[intent_action][0]] +# self.set_intent_as_vis(vis) + +# self.maintain_metadata() +# self.current_vis = Compiler.compile_intent(self, self._intent) +# self.maintain_recs() + +# with self.output: +# clear_output() +# display(self._widget) + +# self._widget.observe(self.remove_deleted_recs, names="deletedIndices") +# self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") + +# def display_pandas(self): +# return self.to_pandas() + +# def render_widget(self, renderer: str = "altair", input_current_vis=""): +# """ +# Generate a LuxWidget based on the LuxDataFrame + +# Structure of widgetJSON: + +# { + +# 'current_vis': {}, +# 'recommendation': [ + +# { + +# 'action': 'Correlation', +# 'description': "some description", +# 'vspec': [ + +# {Vega-Lite spec for vis 1}, +# {Vega-Lite spec for vis 2}, +# ... + +# ] + +# }, +# ... repeat for other actions + +# ] + +# } + +# Parameters +# ---------- +# renderer : str, optional +# Choice of visualization rendering library, by default "altair" +# input_current_vis : lux.LuxDataFrame, optional +# User-specified current vis to override default Current Vis, by default + +# """ +# check_import_lux_widget() +# import luxwidget + +# widgetJSON = self.to_JSON(self._rec_info, input_current_vis=input_current_vis) +# return luxwidget.LuxWidget( +# currentVis=widgetJSON["current_vis"], +# recommendations=widgetJSON["recommendation"], +# intent=LuxDataFrame.intent_to_string(self._intent), +# message=self._message.to_html(), +# config={"plottingScale": lux.config.plotting_scale}, +# ) + +# @staticmethod +# def intent_to_JSON(intent): +# from lux.utils import utils + +# filter_specs = utils.get_filter_specs(intent) +# attrs_specs = utils.get_attrs_specs(intent) + +# intent = {} +# intent["attributes"] = [clause.attribute for clause in attrs_specs] +# intent["filters"] = [clause.attribute for clause in filter_specs] +# return intent + +# @staticmethod +# def intent_to_string(intent): +# if intent: +# return ", ".join([clause.to_string() for clause in intent]) +# else: +# return "" + +# def to_JSON(self, rec_infolist, input_current_vis=""): +# widget_spec = {} +# if self.current_vis: +# lux.config.executor.execute(self.current_vis, self) +# widget_spec["current_vis"] = LuxDataFrame.current_vis_to_JSON( +# self.current_vis, input_current_vis +# ) +# else: +# widget_spec["current_vis"] = {} +# widget_spec["recommendation"] = [] + +# # Recommended Collection +# recCollection = LuxDataFrame.rec_to_JSON(rec_infolist) +# widget_spec["recommendation"].extend(recCollection) +# return widget_spec + +# @staticmethod +# def current_vis_to_JSON(vlist, input_current_vis=""): +# current_vis_spec = {} +# numVC = len(vlist) # number of visualizations in the vis list +# if numVC == 1: +# current_vis_spec = vlist[0].to_code(language=lux.config.plotting_backend, prettyOutput=False) +# elif numVC > 1: +# pass +# if vlist[0]._all_column: +# current_vis_spec["allcols"] = True +# else: +# current_vis_spec["allcols"] = False +# return current_vis_spec + +# @staticmethod +# def rec_to_JSON(recs): +# rec_lst = [] +# import copy + +# rec_copy = copy.deepcopy(recs) +# for idx, rec in enumerate(rec_copy): +# if len(rec["collection"]) > 0: +# rec["vspec"] = [] +# for vis in rec["collection"]: +# chart = vis.to_code(language=lux.config.plotting_backend, prettyOutput=False) +# rec["vspec"].append(chart) +# rec_lst.append(rec) +# # delete since not JSON serializable +# del rec_lst[idx]["collection"] +# return rec_lst + +# def save_as_html(self, filename: str = "export.html", output=False): +# """ +# Save dataframe widget as static HTML file + +# Parameters +# ---------- +# filename : str +# Filename for the output HTML file +# """ + +# if self.widget is None: +# self.maintain_metadata() +# self.maintain_recs() + +# from ipywidgets.embed import embed_data + +# data = embed_data(views=[self.widget]) + +# import json + +# manager_state = json.dumps(data["manager_state"]) +# widget_view = json.dumps(data["view_specs"][0]) + +# # Separate out header since CSS file conflict with {} notation in Python format strings +# header = """ +# + +# Lux Widget +# +# +# +# + +# +# + +# +# +# """ +# html_template = """ +# +# {header} +# + +# + +# + +# + +# +# +# """ + +# manager_state = json.dumps(data["manager_state"]) +# widget_view = json.dumps(data["view_specs"][0]) +# rendered_template = html_template.format( +# header=header, manager_state=manager_state, widget_view=widget_view +# ) +# if output: +# return rendered_template +# else: +# with open(filename, "w") as fp: +# fp.write(rendered_template) +# print(f"Saved HTML to {filename}") + +# Overridden Pandas Functions +# def head(self, n: int = 5): +# ret_val = super(LuxDataFrame, self).head(n) +# ret_val._prev = self +# ret_val._history.append_event("head", n=5) +# return ret_val + +# def tail(self, n: int = 5): +# ret_val = super(LuxDataFrame, self).tail(n) +# ret_val._prev = self +# ret_val._history.append_event("tail", n=5) +# return ret_val + +# def groupby(self, *args, **kwargs): +# history_flag = False +# if "history" not in kwargs or ("history" in kwargs and kwargs["history"]): +# history_flag = True +# if "history" in kwargs: +# del kwargs["history"] +# groupby_obj = super(LuxDataFrame, self).groupby(*args, **kwargs) +# for attr in self._metadata: +# groupby_obj.__dict__[attr] = getattr(self, attr, None) +# if history_flag: +# groupby_obj._history = groupby_obj._history.copy() +# groupby_obj._history.append_event("groupby", *args, **kwargs) +# groupby_obj.pre_aggregated = True +# return groupby_obj diff --git a/lux/core/old_frame.py b/lux/core/old_frame.py new file mode 100644 index 00000000..4d8da0e8 --- /dev/null +++ b/lux/core/old_frame.py @@ -0,0 +1,874 @@ +# Copyright 2019-2020 The Lux Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas as pd +from lux.core.series import LuxSeries +from lux.vis.Clause import Clause +from lux.vis.Vis import Vis +from lux.vis.VisList import VisList +from lux.history.history import History +from lux.utils.date_utils import is_datetime_series +from lux.utils.message import Message +from lux.utils.utils import check_import_lux_widget +from typing import Dict, Union, List, Callable + +# from lux.executor.Executor import * +import warnings +import traceback +import lux + + +class LuxDataFrame(pd.DataFrame): + """ + A subclass of pd.DataFrame that supports all dataframe operations while housing other variables and functions for generating visual recommendations. + """ + + # MUST register here for new properties!! + _metadata = [ + "_intent", + "_inferred_intent", + "_data_type", + "unique_values", + "cardinality", + "_rec_info", + "_min_max", + "_current_vis", + "_widget", + "_recommendation", + "_prev", + "_history", + "_saved_export", + "_sampled", + "_toggle_pandas_display", + "_message", + "_pandas_only", + "pre_aggregated", + "_type_override", + ] + + def __init__(self, *args, **kw): + self._history = History() + self._intent = [] + self._inferred_intent = [] + self._recommendation = {} + self._saved_export = None + self._current_vis = [] + self._prev = None + self._widget = None + super(LuxDataFrame, self).__init__(*args, **kw) + + self.table_name = "" + if lux.config.SQLconnection == "": + from lux.executor.PandasExecutor import PandasExecutor + + lux.config.executor = PandasExecutor() + else: + from lux.executor.SQLExecutor import SQLExecutor + + # lux.config.executor = SQLExecutor() + + self._sampled = None + self._approx_sample = None + self._toggle_pandas_display = True + self._message = Message() + self._pandas_only = False + # Metadata + self._data_type = {} + self.unique_values = None + self.cardinality = None + self._min_max = None + self.pre_aggregated = None + self._type_override = {} + warnings.formatwarning = lux.warning_format + + @property + def _constructor(self): + return LuxDataFrame + + @property + def _constructor_sliced(self): + def f(*args, **kwargs): + s = LuxSeries(*args, **kwargs) + for attr in self._metadata: # propagate metadata + s.__dict__[attr] = getattr(self, attr, None) + return s + + return f + + @property + def history(self): + return self._history + + @property + def data_type(self): + if not self._data_type: + self.maintain_metadata() + return self._data_type + + def compute_metadata(self) -> None: + """ + Compute dataset metadata and statistics + """ + if len(self) > 0: + if lux.config.executor.name != "SQLExecutor": + lux.config.executor.compute_stats(self) + lux.config.executor.compute_dataset_metadata(self) + self._infer_structure() + self._metadata_fresh = True + + def maintain_metadata(self): + """ + Maintain dataset metadata and statistics (Compute only if needed) + """ + is_sql_tbl = lux.config.executor.name != "PandasExecutor" + + if lux.config.SQLconnection != "" and is_sql_tbl: + from lux.executor.SQLExecutor import SQLExecutor + + # lux.config.executor = SQLExecutor() + + # Check that metadata has not yet been computed + if lux.config.lazy_maintain: + # Check that metadata has not yet been computed + if not hasattr(self, "_metadata_fresh") or not self._metadata_fresh: + # only compute metadata information if the dataframe is non-empty + self.compute_metadata() + else: + self.compute_metadata() + + def expire_recs(self) -> None: + """ + Expires and resets all recommendations + """ + if lux.config.lazy_maintain: + self._recs_fresh = False + self._recommendation = {} + self._widget = None + self._rec_info = None + self._sampled = None + + def expire_metadata(self) -> None: + """ + Expire all saved metadata to trigger a recomputation the next time the data is required. + """ + if lux.config.lazy_maintain: + self._metadata_fresh = False + self._data_type = None + self.unique_values = None + self.cardinality = None + self._min_max = None + self.pre_aggregated = None + + ##################### + ## Override Pandas ## + ##################### + def __getattr__(self, name): + ret_value = super(LuxDataFrame, self).__getattr__(name) + self.expire_metadata() + self.expire_recs() + return ret_value + + def _set_axis(self, axis, labels): + super(LuxDataFrame, self)._set_axis(axis, labels) + self.expire_metadata() + self.expire_recs() + + def _update_inplace(self, *args, **kwargs): + super(LuxDataFrame, self)._update_inplace(*args, **kwargs) + self.expire_metadata() + self.expire_recs() + + def _set_item(self, key, value): + super(LuxDataFrame, self)._set_item(key, value) + self.expire_metadata() + self.expire_recs() + + def _infer_structure(self): + # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data + is_multi_index_flag = self.index.nlevels != 1 + not_int_index_flag = not pd.api.types.is_integer_dtype(self.index) + + is_sql_tbl = lux.config.executor.name != "PandasExecutor" + + small_df_flag = len(self) < 100 and is_sql_tbl + if self.pre_aggregated == None: + self.pre_aggregated = (is_multi_index_flag or not_int_index_flag) and small_df_flag + if "Number of Records" in self.columns: + self.pre_aggregated = True + self.pre_aggregated = "groupby" in [event.name for event in self.history] and not is_sql_tbl + + @property + def intent(self): + """ + Main function to set the intent of the dataframe. + The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. + + Parameters + ---------- + intent : List[str,Clause] + intent list, can be a mix of string shorthand or a lux.Clause object + + Notes + ----- + :doc:`../guide/intent` + """ + return self._intent + + @intent.setter + def intent(self, intent_input: Union[List[Union[str, Clause]], Vis]): + is_list_input = isinstance(intent_input, list) + is_vis_input = isinstance(intent_input, Vis) + if not (is_list_input or is_vis_input): + raise TypeError( + "Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object." + "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" + ) + if is_list_input: + self.set_intent(intent_input) + elif is_vis_input: + self.set_intent_as_vis(intent_input) + + def clear_intent(self): + self.intent = [] + self.expire_recs() + + def set_intent(self, intent: List[Union[str, Clause]]): + self.expire_recs() + self._intent = intent + self._parse_validate_compile_intent() + + def _parse_validate_compile_intent(self): + self.maintain_metadata() + from lux.processor.Parser import Parser + from lux.processor.Validator import Validator + + self._intent = Parser.parse(self._intent) + Validator.validate_intent(self._intent, self) + self.maintain_metadata() + from lux.processor.Compiler import Compiler + + self.current_vis = Compiler.compile_intent(self, self._intent) + + def copy_intent(self): + # creates a true copy of the dataframe's intent + output = [] + for clause in self._intent: + temp_clause = clause.copy_clause() + output.append(temp_clause) + return output + + def set_intent_as_vis(self, vis: Vis): + """ + Set intent of the dataframe based on the intent of a Vis + + Parameters + ---------- + vis : Vis + Input Vis object + """ + self.expire_recs() + self._intent = vis._inferred_intent + self._parse_validate_compile_intent() + + def set_data_type(self, types: dict): + """ + Set the data type for a particular attribute in the dataframe + overriding the automatically-detected type inferred by Lux + + Parameters + ---------- + types: dict + Dictionary that maps attribute/column name to a specified Lux Type. + Possible options: "nominal", "quantitative", "id", and "temporal". + + Example + ---------- + df = pd.read_csv("https://raw.githubusercontent.com/lux-org/lux-datasets/master/data/absenteeism.csv") + df.set_data_type({"ID":"id", + "Reason for absence":"nominal"}) + """ + if self._type_override == None: + self._type_override = types + else: + self._type_override = {**self._type_override, **types} + + if not self.data_type: + self.maintain_metadata() + + for attr in types: + if types[attr] not in ["nominal", "quantitative", "id", "temporal"]: + raise ValueError( + f'Invalid data type option specified for {attr}. Please use one of the following supported types: ["nominal", "quantitative", "id", "temporal"]' + ) + self.data_type[attr] = types[attr] + + self.expire_recs() + + def to_pandas(self): + import lux.core + + return lux.core.originalDF(self, copy=False) + + @property + def recommendation(self): + if self._recommendation is not None and self._recommendation == {}: + from lux.processor.Compiler import Compiler + + self.maintain_metadata() + self.current_vis = Compiler.compile_intent(self, self._intent) + self.maintain_recs() + return self._recommendation + + @recommendation.setter + def recommendation(self, recommendation: Dict): + self._recommendation = recommendation + + @property + def current_vis(self): + from lux.processor.Validator import Validator + + # _parse_validate_compile_intent does not call executor, + # we only attach data to current vis when user request current_vis + valid_current_vis = ( + self._current_vis is not None + and len(self._current_vis) > 0 + and self._current_vis[0].data is None + and self._current_vis[0].intent + ) + if valid_current_vis and Validator.validate_intent(self._current_vis[0].intent, self): + lux.config.executor.execute(self._current_vis, self) + return self._current_vis + + @current_vis.setter + def current_vis(self, current_vis: Dict): + self._current_vis = current_vis + + def _append_rec(self, rec_infolist, recommendations: Dict): + if recommendations["collection"] is not None and len(recommendations["collection"]) > 0: + rec_infolist.append(recommendations) + + def show_all_column_vis(self): + if len(self.columns) > 1 and len(self.columns) < 4 and self.intent == [] or self.intent is None: + vis = Vis(list(self.columns), self) + if vis.mark != "": + vis._all_column = True + self.current_vis = VisList([vis]) + + def maintain_recs(self, is_series="DataFrame"): + # `rec_df` is the dataframe to generate the recommendations on + # check to see if globally defined actions have been registered/removed + if lux.config.update_actions["flag"] == True: + self._recs_fresh = False + show_prev = False # flag indicating whether rec_df is showing previous df or current self + + if self._prev is not None: + rec_df = self._prev + rec_df._message = Message() + rec_df.maintain_metadata() # the prev dataframe may not have been printed before + last_event = self.history._events[-1].name + rec_df._message.add( + f"Lux is visualizing the previous version of the dataframe before you applied {last_event}." + ) + show_prev = True + else: + rec_df = self + rec_df._message = Message() + # Add warning message if there exist ID fields + if len(rec_df) == 0: + rec_df._message.add(f"Lux cannot operate on an empty {is_series}.") + elif len(rec_df) < 5 and not rec_df.pre_aggregated: + rec_df._message.add( + f"The {is_series} is too small to visualize. To generate visualizations in Lux, the {is_series} must contain at least 5 rows." + ) + elif self.index.nlevels >= 2 or self.columns.nlevels >= 2: + rec_df._message.add( + f"Lux does not currently support visualizations in a {is_series} " + f"with hierarchical indexes.\n" + f"Please convert the {is_series} into a flat " + f"table via pandas.DataFrame.reset_index." + ) + else: + id_fields_str = "" + inverted_data_type = lux.config.executor.invert_data_type(rec_df.data_type) + if len(inverted_data_type["id"]) > 0: + for id_field in inverted_data_type["id"]: + id_fields_str += f"{id_field}, " + id_fields_str = id_fields_str[:-2] + rec_df._message.add(f"{id_fields_str} is not visualized since it resembles an ID field.") + + rec_df._prev = None # reset _prev + + # If lazy, check that recs has not yet been computed + lazy_but_not_computed = lux.config.lazy_maintain and ( + not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh + ) + eager = not lux.config.lazy_maintain + + # Check that recs has not yet been computed + if lazy_but_not_computed or eager: + is_sql_tbl = lux.config.executor.name == "SQLExecutor" + rec_infolist = [] + from lux.action.row_group import row_group + from lux.action.column_group import column_group + + # TODO: Rewrite these as register action inside default actions + if rec_df.pre_aggregated: + if rec_df.columns.name is not None: + rec_df._append_rec(rec_infolist, row_group(rec_df)) + rec_df._append_rec(rec_infolist, column_group(rec_df)) + elif not (len(rec_df) < 5 and not rec_df.pre_aggregated and not is_sql_tbl) and not ( + self.index.nlevels >= 2 or self.columns.nlevels >= 2 + ): + from lux.action.custom import custom_actions + + # generate vis from globally registered actions and append to dataframe + custom_action_collection = custom_actions(rec_df) + for rec in custom_action_collection: + rec_df._append_rec(rec_infolist, rec) + lux.config.update_actions["flag"] = False + + # Store _rec_info into a more user-friendly dictionary form + rec_df._recommendation = {} + for rec_info in rec_infolist: + action_type = rec_info["action"] + vlist = rec_info["collection"] + if len(vlist) > 0: + rec_df._recommendation[action_type] = vlist + rec_df._rec_info = rec_infolist + rec_df.show_all_column_vis() + if lux.config.render_widget: + self._widget = rec_df.render_widget() + # re-render widget for the current dataframe if previous rec is not recomputed + elif show_prev: + rec_df.show_all_column_vis() + if lux.config.render_widget: + self._widget = rec_df.render_widget() + self._recs_fresh = True + + ####################################################### + ############## LuxWidget Result Display ############### + ####################################################### + @property + def widget(self): + if self._widget: + return self._widget + + @property + def exported(self) -> Union[Dict[str, VisList], VisList]: + """ + Get selected visualizations as exported Vis List + + Notes + ----- + Convert the _selectedVisIdxs dictionary into a programmable VisList + Example _selectedVisIdxs : + + {'Correlation': [0, 2], 'Occurrence': [1]} + + indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. + + Returns + ------- + Union[Dict[str,VisList], VisList] + When there are no exported vis, return empty list -> [] + When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...) + When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..} + """ + if self.widget is None: + warnings.warn( + "\nNo widget attached to the dataframe." + "Please assign dataframe to an output variable.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + exported_vis_lst = self._widget._selectedVisIdxs + exported_vis = [] + if exported_vis_lst == {}: + if self._saved_export: + return self._saved_export + warnings.warn( + "\nNo visualization selected to export.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst: + return self.current_vis + elif len(exported_vis_lst) > 1: + exported_vis = {} + if "currentVis" in exported_vis_lst: + exported_vis["Current Vis"] = self.current_vis + for export_action in exported_vis_lst: + if export_action != "currentVis": + exported_vis[export_action] = VisList( + list( + map( + self._recommendation[export_action].__getitem__, + exported_vis_lst[export_action], + ) + ) + ) + return exported_vis + elif len(exported_vis_lst) == 1 and ("currentVis" not in exported_vis_lst): + export_action = list(exported_vis_lst.keys())[0] + exported_vis = VisList( + list( + map( + self._recommendation[export_action].__getitem__, + exported_vis_lst[export_action], + ) + ) + ) + self._saved_export = exported_vis + return exported_vis + else: + warnings.warn( + "\nNo visualization selected to export.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + + def remove_deleted_recs(self, change): + for action in self._widget.deletedIndices: + deletedSoFar = 0 + for index in self._widget.deletedIndices[action]: + self._recommendation[action].remove_index(index - deletedSoFar) + deletedSoFar += 1 + + def set_intent_on_click(self, change): + from IPython.display import display, clear_output + from lux.processor.Compiler import Compiler + + intent_action = list(self._widget.selectedIntentIndex.keys())[0] + vis = self._recommendation[intent_action][self._widget.selectedIntentIndex[intent_action][0]] + self.set_intent_as_vis(vis) + + self.maintain_metadata() + self.current_vis = Compiler.compile_intent(self, self._intent) + self.maintain_recs() + + with self.output: + clear_output() + display(self._widget) + + self._widget.observe(self.remove_deleted_recs, names="deletedIndices") + self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") + + def _ipython_display_(self): + from IPython.display import display + from IPython.display import clear_output + import ipywidgets as widgets + + try: + if self._pandas_only: + display(self.display_pandas()) + self._pandas_only = False + else: + if not self.index.nlevels >= 2 or self.columns.nlevels >= 2: + self.maintain_metadata() + + if self._intent != [] and (not hasattr(self, "_compiled") or not self._compiled): + from lux.processor.Compiler import Compiler + + self.current_vis = Compiler.compile_intent(self, self._intent) + + if lux.config.default_display == "lux": + self._toggle_pandas_display = False + else: + self._toggle_pandas_display = True + + # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) + self.maintain_recs() + + # Observers(callback_function, listen_to_this_variable) + self._widget.observe(self.remove_deleted_recs, names="deletedIndices") + self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") + + button = widgets.Button( + description="Toggle Pandas/Lux", + layout=widgets.Layout(width="140px", top="5px"), + ) + self.output = widgets.Output() + display(button, self.output) + + def on_button_clicked(b): + with self.output: + if b: + self._toggle_pandas_display = not self._toggle_pandas_display + clear_output() + if self._toggle_pandas_display: + display(self.display_pandas()) + else: + # b.layout.display = "none" + display(self._widget) + # b.layout.display = "inline-block" + + button.on_click(on_button_clicked) + on_button_clicked(None) + + except (KeyboardInterrupt, SystemExit): + raise + except Exception: + if lux.config.pandas_fallback: + warnings.warn( + "\nUnexpected error in rendering Lux widget and recommendations. " + "Falling back to Pandas display.\n" + "Please report the following issue on Github: https://github.com/lux-org/lux/issues \n", + stacklevel=2, + ) + warnings.warn(traceback.format_exc()) + display(self.display_pandas()) + else: + raise + + def display_pandas(self): + return self.to_pandas() + + def render_widget(self, renderer: str = "altair", input_current_vis=""): + """ + Generate a LuxWidget based on the LuxDataFrame + + Structure of widgetJSON: + + { + + 'current_vis': {}, + 'recommendation': [ + + { + + 'action': 'Correlation', + 'description': "some description", + 'vspec': [ + + {Vega-Lite spec for vis 1}, + {Vega-Lite spec for vis 2}, + ... + + ] + + }, + ... repeat for other actions + + ] + + } + + Parameters + ---------- + renderer : str, optional + Choice of visualization rendering library, by default "altair" + input_current_vis : lux.LuxDataFrame, optional + User-specified current vis to override default Current Vis, by default + + """ + check_import_lux_widget() + import luxwidget + + widgetJSON = self.to_JSON(self._rec_info, input_current_vis=input_current_vis) + return luxwidget.LuxWidget( + currentVis=widgetJSON["current_vis"], + recommendations=widgetJSON["recommendation"], + intent=LuxDataFrame.intent_to_string(self._intent), + message=self._message.to_html(), + config={"plottingScale": lux.config.plotting_scale}, + ) + + @staticmethod + def intent_to_JSON(intent): + from lux.utils import utils + + filter_specs = utils.get_filter_specs(intent) + attrs_specs = utils.get_attrs_specs(intent) + + intent = {} + intent["attributes"] = [clause.attribute for clause in attrs_specs] + intent["filters"] = [clause.attribute for clause in filter_specs] + return intent + + @staticmethod + def intent_to_string(intent): + if intent: + return ", ".join([clause.to_string() for clause in intent]) + else: + return "" + + def to_JSON(self, rec_infolist, input_current_vis=""): + widget_spec = {} + if self.current_vis: + lux.config.executor.execute(self.current_vis, self) + widget_spec["current_vis"] = LuxDataFrame.current_vis_to_JSON( + self.current_vis, input_current_vis + ) + else: + widget_spec["current_vis"] = {} + widget_spec["recommendation"] = [] + + # Recommended Collection + recCollection = LuxDataFrame.rec_to_JSON(rec_infolist) + widget_spec["recommendation"].extend(recCollection) + return widget_spec + + @staticmethod + def current_vis_to_JSON(vlist, input_current_vis=""): + current_vis_spec = {} + numVC = len(vlist) # number of visualizations in the vis list + if numVC == 1: + current_vis_spec = vlist[0].to_code(language=lux.config.plotting_backend, prettyOutput=False) + elif numVC > 1: + pass + if vlist[0]._all_column: + current_vis_spec["allcols"] = True + else: + current_vis_spec["allcols"] = False + return current_vis_spec + + @staticmethod + def rec_to_JSON(recs): + rec_lst = [] + import copy + + rec_copy = copy.deepcopy(recs) + for idx, rec in enumerate(rec_copy): + if len(rec["collection"]) > 0: + rec["vspec"] = [] + for vis in rec["collection"]: + chart = vis.to_code(language=lux.config.plotting_backend, prettyOutput=False) + rec["vspec"].append(chart) + rec_lst.append(rec) + # delete since not JSON serializable + del rec_lst[idx]["collection"] + return rec_lst + + def save_as_html(self, filename: str = "export.html", output=False): + """ + Save dataframe widget as static HTML file + + Parameters + ---------- + filename : str + Filename for the output HTML file + """ + + if self.widget is None: + self.maintain_metadata() + self.maintain_recs() + + from ipywidgets.embed import embed_data + + data = embed_data(views=[self.widget]) + + import json + + manager_state = json.dumps(data["manager_state"]) + widget_view = json.dumps(data["view_specs"][0]) + + # Separate out header since CSS file conflict with {} notation in Python format strings + header = """ + + + Lux Widget + + + + + + + + + + + """ + html_template = """ + + {header} + + + + + + + + + + + """ + + manager_state = json.dumps(data["manager_state"]) + widget_view = json.dumps(data["view_specs"][0]) + rendered_template = html_template.format( + header=header, manager_state=manager_state, widget_view=widget_view + ) + if output: + return rendered_template + else: + with open(filename, "w") as fp: + fp.write(rendered_template) + print(f"Saved HTML to {filename}") + + # Overridden Pandas Functions + def head(self, n: int = 5): + ret_val = super(LuxDataFrame, self).head(n) + ret_val._prev = self + ret_val._history.append_event("head", n=5) + return ret_val + + def tail(self, n: int = 5): + ret_val = super(LuxDataFrame, self).tail(n) + ret_val._prev = self + ret_val._history.append_event("tail", n=5) + return ret_val + + def groupby(self, *args, **kwargs): + history_flag = False + if "history" not in kwargs or ("history" in kwargs and kwargs["history"]): + history_flag = True + if "history" in kwargs: + del kwargs["history"] + groupby_obj = super(LuxDataFrame, self).groupby(*args, **kwargs) + for attr in self._metadata: + groupby_obj.__dict__[attr] = getattr(self, attr, None) + if history_flag: + groupby_obj._history = groupby_obj._history.copy() + groupby_obj._history.append_event("groupby", *args, **kwargs) + groupby_obj.pre_aggregated = True + return groupby_obj diff --git a/test_nb/refactor test.ipynb b/test_nb/refactor test.ipynb new file mode 100644 index 00000000..11ee0981 --- /dev/null +++ b/test_nb/refactor test.ipynb @@ -0,0 +1,152 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "baking-asian", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/dorislee/Desktop/Research/lux/dorisjlee_fork\n" + ] + }, + { + "data": { + "text/plain": [ + "['~/Desktop/Research/lux/Untitled Folder']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pushd ../dorisjlee_fork/" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "oriental-security", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "from lux.core.frame import *" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "thick-hammer", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "valued-monroe", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "df = DataFrame(dict(x=np.arange(100)))\n", + "df[\"y\"] = df.x ** 2 + 1 + 200 * np.random.normal(size=df.x.shape)\n", + "df[\"z\"] = (100 - df.x) ** 2 + 1 + 200 * np.random.normal(size=df.x.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "moving-malpractice", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/dorislee/Desktop/Research/lux/dorisjlee_fork/lux/core/frame.py:86: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n", + " self.recommendation={'Correlation': []}\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "blind-typing", + "metadata": {}, + "outputs": [], + "source": [ + "df.intent=[\"y\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "banned-median", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}