diff --git a/gseapy/gsea.py b/gseapy/gsea.py index d36195d..9cd0009 100644 --- a/gseapy/gsea.py +++ b/gseapy/gsea.py @@ -75,7 +75,6 @@ def load_data(self) -> Tuple[pd.DataFrame, Dict]: """pre-processed the data frame.new filtering methods will be implement here.""" exprs = self._load_data(self.data) exprs = self._check_data(exprs) - print(exprs) exprs, cls_dict = self._filter_data(exprs) return exprs, cls_dict @@ -110,7 +109,6 @@ def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame: else: df_std = df.groupby(by=cls_dict, axis=1).std(numeric_only=True, ddof=0) - print(df) # remove rows that are all zeros ! df = df.loc[df.abs().sum(axis=1) > 0, :] # remove rows that std are zeros for sample size >= 3 in each group diff --git a/gseapy/plot.py b/gseapy/plot.py index 7602b85..d86955c 100644 --- a/gseapy/plot.py +++ b/gseapy/plot.py @@ -660,7 +660,7 @@ def isfloat(self, x): def process(self, df: pd.DataFrame): # check if any values in `df[colname]` can't be coerced to floats can_be_coerced = df[self.colname].map(self.isfloat).sum() - if can_be_coerced < len(df): + if can_be_coerced < df.shape[0]: msg = "some value in %s could not be typecast to `float`" % self.colname raise ValueError(msg) # subset @@ -669,7 +669,7 @@ def process(self, df: pd.DataFrame): mask.loc[:] = True df = df.loc[mask] - if len(df) < 1: + if df.shape[0] < 1: msg = "Warning: No enrich terms when cutoff = %s" % self.thresh raise ValueError(msg) self.cbar_title = self.colname @@ -683,8 +683,13 @@ def process(self, df: pd.DataFrame): "NOM p-val": "Pval", "FDR q-val": "FDR", } + ## impute the 0s in pval, fdr for visualization purpose if self.colname in ["Adjusted P-value", "P-value", "NOM p-val", "FDR q-val"]: - # get top_terms + # if all values are zeros, raise error + if not all(df[self.colname].abs() > 0): + raise ValueError( + f"Can not detetermine colormap. All values in {self.colname} are 0s" + ) df = df.sort_values(by=self.colname) df[self.colname].replace( 0, method="bfill", inplace=True @@ -696,6 +701,7 @@ def process(self, df: pd.DataFrame): # get top terms; sort ascending if (self.x is not None) and (self.x in df.columns): + # if x is numeric column # get top term of each group df = ( df.groupby(self.x)