Skip to content

Commit

Permalink
Merge with PR #1 and #2
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfG committed Jan 15, 2020
2 parents 3fc7c32 + d60072f commit f51ba37
Showing 1 changed file with 20 additions and 7 deletions.
27 changes: 20 additions & 7 deletions src/binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def read_spectra(self,mzml_file,scan_list):
if spectrum['ms level'] == 2 and 'm/z array' in spectrum:
precursor_mz = spectrum['precursorList']['precursor'][0]['selectedIonList']['selectedIon'][0]['selected ion m/z']
precursor_charge = spectrum['precursorList']['precursor'][0]['selectedIonList']['selectedIon'][0]['charge state']
print(f"INFO: Reading {scan}. Precursor m/z = {precursor_mz}")
print(f"INFO: Reading {scan}. Precursor m/z = {precursor_mz}. n peaks={len(spectrum['m/z array'])}")
peaklist = {
'm/z array': spectrum['m/z array'],
'intensity array': spectrum['intensity array'],
Expand Down Expand Up @@ -163,7 +163,7 @@ def read_spectra_clustered_mgf(self, clustered_mgf_file):
return clusters


def combine_bin_mean(self, peaklists, minimum=100, maximum=2000, binsize=0.02):
def combine_bin_mean(self, peaklists, minimum=100, maximum=2000, binsize=0.02, apply_peak_quorum=True):

array_size = int( (maximum - minimum ) / binsize ) + 1
merged_spectrum = { 'minimum': minimum, 'maximum': maximum, 'binsize': binsize }
Expand All @@ -173,6 +173,11 @@ def combine_bin_mean(self, peaklists, minimum=100, maximum=2000, binsize=0.02):
merged_spectrum['precursor_mzs'] = []
merged_spectrum['precursor_charges'] = []

#### Determine how many peaks need to be present to keep a final peak
peak_quorum = 1
if apply_peak_quorum is True:
peak_quorum = int(len(peaklists) * 0.25) + 1

for peaklist in peaklists:
#### Convert the peak lists to np arrays
intensity_array = np.asarray(peaklist['intensity array'])
Expand All @@ -187,6 +192,8 @@ def combine_bin_mean(self, peaklists, minimum=100, maximum=2000, binsize=0.02):

merged_spectrum['n_peaks'][bin_array] += 1
merged_spectrum['intensities'][bin_array] += intensity_array
merged_spectrum['mzs'][bin_array] += mz_array

merged_spectrum['precursor_mzs'].append(peaklist['precursor mz'])
merged_spectrum['precursor_charges'].append(peaklist['precursor charge'])

Expand All @@ -195,15 +202,21 @@ def combine_bin_mean(self, peaklists, minimum=100, maximum=2000, binsize=0.02):
assert all(x == charges[0] for x in charges), "Not all precursor charges in cluster are equal"

# Take the mean of all peaks per bin
merged_spectrum['intensities'][merged_spectrum['intensities'] == 0] = np.nan
merged_spectrum['intensities'][merged_spectrum['n_peaks'] < peak_quorum] = np.nan
merged_spectrum['intensities'] = np.divide(merged_spectrum['intensities'], merged_spectrum['n_peaks'])

# Only return non-zero intensity bins
nan_mask = ~np.isnan(merged_spectrum['intensities'])
merged_spectrum['intensities'] = merged_spectrum['intensities'][nan_mask]
merged_spectrum['mzs'] = np.arange(
minimum + (binsize / 2), maximum + binsize, binsize, dtype=np.int32
)[nan_mask]

#### EWD Changed this from just the bin size computation to taking the mean of mz values in the bin
#merged_spectrum['mzs'] = np.arange(
# minimum + (binsize / 2), maximum + binsize, binsize, dtype=np.int32
#)[nan_mask]
merged_spectrum['mzs'][merged_spectrum['mzs'] == 0] = np.nan
merged_spectrum['mzs'] = np.divide(merged_spectrum['mzs'], merged_spectrum['n_peaks'])
merged_spectrum['mzs'] = merged_spectrum['mzs'][nan_mask]

merged_spectrum['precursor_mz'] = np.mean(merged_spectrum['precursor_mzs'])
merged_spectrum['precursor_charge'] = charges[0]

Expand All @@ -220,7 +233,7 @@ def write_spectrum(self, spectra, mgf_file):
TITLE={i}
PEPMASS={spectrum['precursor_mz']}
CHARGE={spectrum['precursor_charge']}+
"""
"""
for mz, intensity in zip(spectrum['mzs'], spectrum['intensities']):
if not np.isnan(intensity):
mgf_tmp += f"{mz} {intensity}\n"
Expand Down

0 comments on commit f51ba37

Please sign in to comment.