Skip to content

Commit

Permalink
black applied.
Browse files Browse the repository at this point in the history
  • Loading branch information
ypriverol committed Nov 11, 2024
1 parent c11734d commit 7b3fe9d
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 22 deletions.
14 changes: 9 additions & 5 deletions quantmsio/commands/maxquant_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,20 @@ def convert_maxquant_feature(
output_path = output_folder + "/" + filename
if not partitions:
MQ.write_feature_to_file(
evidence_path=evidence_file, sdrf_path=sdrf_file, output_path=output_path, chunksize=chunksize, protein_file=protein_file
evidence_path=evidence_file,
sdrf_path=sdrf_file,
output_path=output_path,
chunksize=chunksize,
protein_file=protein_file,
)
else:
partitions = partitions.split(",")
MQ.write_features_to_file(
evidence_path=evidence_file,
evidence_path=evidence_file,
sdrf_path=sdrf_file,
output_folder = output_folder,
output_folder=output_folder,
filename=filename,
partitions=partitions,
chunksize=chunksize,
protein_file=protein_file
)
protein_file=protein_file,
)
6 changes: 3 additions & 3 deletions quantmsio/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@
"Modified sequence": "peptidoform",
"Raw file": "reference_file_name",
"Score": "andromeda_score",
"Delta score": "andromeda_delta_score",
"PIF": "parent_ion_score"
"Delta score": "andromeda_delta_score",
"PIF": "parent_ion_score",
}

MAXQUANT_FEATURE_MAP = {
Expand All @@ -92,7 +92,7 @@
"Raw file": "reference_file_name",
"Score": "andromeda_score",
"Delta score": "andromeda_delta_score",
"PIF": "parent_ion_score",
"PIF": "parent_ion_score",
"Reverse": "is_decoy",
"m/z": "observed_mz",
"Calibrated retention time": "rt",
Expand Down
4 changes: 2 additions & 2 deletions quantmsio/core/diann.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def write_features_to_file(
output_folder: str,
filename: str,
partitions: list,
file_num:int = 50,
file_num: int = 50,
protein_file=None,
):
pqwriters = {}
Expand All @@ -261,4 +261,4 @@ def write_features_to_file(
feature = Feature.transform_feature(df)
pqwriters = save_slice_file(feature, pqwriters, output_folder, key, filename)
close_file(pqwriters=pqwriters)
self.destroy_duckdb_database()
self.destroy_duckdb_database()
2 changes: 1 addition & 1 deletion quantmsio/core/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pyarrow as pa
import pyarrow.parquet as pq
from quantmsio.operate.tools import get_ahocorasick, get_protein_accession
from quantmsio.utils.file_utils import extract_protein_list,save_slice_file, close_file
from quantmsio.utils.file_utils import extract_protein_list, save_slice_file, close_file
from quantmsio.core.mztab import MzTab
from quantmsio.core.psm import Psm
from quantmsio.core.sdrf import SDRFHandler
Expand Down
15 changes: 10 additions & 5 deletions quantmsio/core/maxquant.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class MaxQuant:
def __init__(self):
pass

def iter_batch(self, file_path: str, label: str = "feature", chunksize: int = 100000, protein_str:str = None):
def iter_batch(self, file_path: str, label: str = "feature", chunksize: int = 100000, protein_str: str = None):
self.mods_map = self.get_mods_map(file_path)
self._automaton = get_ahocorasick(self.mods_map)
col_df = pd.read_csv(file_path, sep="\t", nrows=1)
Expand Down Expand Up @@ -252,7 +252,7 @@ def main_operate(self, df: pd.DataFrame):
{"score_name": "andromeda_score", "score_value": row["andromeda_score"]},
{"score_name": "andromeda_delta_score", "score_value": row["andromeda_delta_score"]},
],
axis=1
axis=1,
)
df.loc[:, "cv_params"] = df["parent_ion_score"].apply(
lambda socre: [{"cv_name": "parent_ion_score", "cv_value": str(socre)}]
Expand Down Expand Up @@ -298,7 +298,12 @@ def _init_sdrf(self, sdrf_path: str):
self._sample_map = Sdrf.get_sample_map_run()

def write_feature_to_file(
self, evidence_path: str, sdrf_path: str, output_path: str, chunksize: int = 1000000, protein_file=None,
self,
evidence_path: str,
sdrf_path: str,
output_path: str,
chunksize: int = 1000000,
protein_file=None,
):
self._init_sdrf(sdrf_path)
pqwriter = None
Expand All @@ -313,7 +318,7 @@ def write_feature_to_file(

def write_features_to_file(
self,
evidence_path: str,
evidence_path: str,
sdrf_path: str,
output_folder: str,
filename: str,
Expand All @@ -331,4 +336,4 @@ def write_features_to_file(
for key, df in Feature.slice(report, partitions):
feature = Feature.transform_feature(df)
pqwriters = save_slice_file(feature, pqwriters, output_folder, key, filename)
close_file(pqwriters=pqwriters)
close_file(pqwriters=pqwriters)
1 change: 1 addition & 0 deletions quantmsio/operate/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from quantmsio.utils.pride_utils import get_unanimous_name
from quantmsio.utils.file_utils import load_de_or_ae, save_slice_file, save_file, close_file


def init_save_info(parquet_path: str):
pqwriters = {}
pqwriter_no_part = None
Expand Down
5 changes: 4 additions & 1 deletion quantmsio/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def calculate_buffer_size(file_path: str) -> int:

return min(int(total_memory * fraction_of_memory), max_buffer_size, file_size)


def save_slice_file(parquet_table, pqwriters, output_folder, partitions, filename):
folder = [output_folder] + [str(col) for col in partitions]
folder = os.path.join(*folder)
Expand All @@ -95,6 +96,7 @@ def save_slice_file(parquet_table, pqwriters, output_folder, partitions, filenam
pqwriters[partitions].write_table(parquet_table)
return pqwriters


def save_file(parquet_table, pqwriter, output_folder, filename):
if not os.path.exists(output_folder):
os.makedirs(output_folder, exist_ok=True)
Expand All @@ -104,9 +106,10 @@ def save_file(parquet_table, pqwriter, output_folder, filename):
pqwriter.write_table(parquet_table)
return pqwriter


def close_file(pqwriters: dict = None, pqwriter: object = None):
if pqwriter:
pqwriter.close()
else:
for pqwriter in pqwriters.values():
pqwriter.close()
pqwriter.close()
6 changes: 3 additions & 3 deletions tests/test_diann.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_transform_feature(self, test_data):
D.add_additional_msg(report)
Feature.convert_to_parquet_format(report)
Feature.transform_feature(report)

@data(*test_datas)
def test_transform_features(self, test_data):
report_file = datafile(test_data[0])
Expand All @@ -36,5 +36,5 @@ def test_transform_features(self, test_data):
for report in D.main_report_df(0.05, mzml, 2):
D.add_additional_msg(report)
Feature.convert_to_parquet_format(report)
for _, df in Feature.slice(report, ["reference_file_name","precursor_charge"]):
Feature.transform_feature(df)
for _, df in Feature.slice(report, ["reference_file_name", "precursor_charge"]):
Feature.transform_feature(df)
3 changes: 1 addition & 2 deletions tests/test_maxquant.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,5 @@ def test_transform_features(self, test_data):
for report in M.iter_batch(evidence_file, chunksize=500000):
M.transform_feature(report)
Feature.convert_to_parquet_format(report)
for _, df in Feature.slice(report, ["reference_file_name","precursor_charge"]):
for _, df in Feature.slice(report, ["reference_file_name", "precursor_charge"]):
Feature.transform_feature(df)

0 comments on commit 7b3fe9d

Please sign in to comment.