Skip to content

Commit

Permalink
when geneid is index, gene name and ranking values are columns #250
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhuoqing Fang authored and Zhuoqing Fang committed Dec 16, 2024
1 parent 7d16d76 commit c53a4c2
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions gseapy/gsea.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,11 +472,15 @@ def _load_ranking(self, rank_metric: pd.DataFrame) -> pd.Series:
"""
# load data
# sort ranking values from high to low
rnk_cols = rank_metric.columns
# if case the input has more than two columns, only select the last two
# this happens when gene id is index, while gene name and ranking value are columns
if len(rnk_cols) > 2:
rank_metric = rank_metric.iloc[:, -2:]
rnk_cols = rank_metric.columns
# if not ranking.is_monotonic_decreasing:
# ranking = ranking.sort_values(ascending=self.ascending)
rank_metric.sort_values(by=rnk_cols[1], ascending=self.ascending, inplace=True)
rank_metric.sort_values(by=rnk_cols[-1], ascending=self.ascending, inplace=True)
# drop na values
if rank_metric.isnull().any(axis=1).sum() > 0:
self._logger.warning(
Expand Down Expand Up @@ -518,12 +522,6 @@ def load_ranking(self):
parse rnk input
"""
rank_metric = self._load_data(self.rnk) # gene id is the first column
# only two column dataframe is accepted
if rank_metric.shape[1] > 2:
raise ValueError(
"Input gene rankings should be a two column dataframe, "
+ "with the first column as gene names and the second column as prerank values."
)
if rank_metric.select_dtypes(np.number).shape[1] == 1:
# return series
return self._load_ranking(rank_metric)
Expand Down

0 comments on commit c53a4c2

Please sign in to comment.