Skip to content

Commit

Permalink
Merge pull request #2 from mlocardpaulet/TopPICv1.3.3
Browse files Browse the repository at this point in the history
compatibility TopPICv1.3.3
  • Loading branch information
mlocardpaulet authored Jul 3, 2020
2 parents de34856 + 085a54a commit 15d1c64
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 26 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ Figures/
*.Rproj
files/test/TopPic3
test/
files/test/TopPicv133


11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ The fact that you are presently reading this means that you have had knowledge o

## Version history:

*V2.0: version corresponding to the paper.
*V2.1: November 2019 - Make the MS/MS visualisation compatible with the new file formats of TopPic and Proteome Discoverer.
In this new version, the intensity of the precursor for the MSMS that gave the protein ID is not provided on hovering because this information is missing from the new outputs of Proteome Discoverer

* V2.0: version corresponding to the paper.

* V2.1: November 2019 - Make the MS/MS visualisation compatible with the new file formats of TopPic and Proteome Discoverer.
In this new version, the intensity of the precursor for the MSMS that gave the protein ID is not provided on hovering because this information is missing from the new outputs of Proteome Discoverer

* V2.2: July 2020 - adapt to new TopPIC input format for msalign files.

74 changes: 51 additions & 23 deletions app.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,58 +85,88 @@ ThresholdCleaning <- function(l, threshold) {
}

TopPicMS1Parsing <- function(fname) {
cat("== Start parsing TopPIC data MS1 ==\n")
# Return a table in the style of RoWinPro tables for use in VisioProt.
# fname is the path to the file to parse.
allData <- readLines(fname)
allData <- allData[-(1:11)]
numline <- which(grepl("^[^P]+Parameters ", allData, perl = T))[2]
allData <- allData[-(1:numline)]
rep_ions_entries = which(allData=="BEGIN IONS")
IDs <- gsub("ID=", "", allData[rep_ions_entries+1])
SCANs <- gsub("SCANS=", "", allData[rep_ions_entries+2])
RT <- gsub("RETENTION_TIME=", "", allData[rep_ions_entries+3])
removeEntries <- c(rep_ions_entries,rep_ions_entries+1,rep_ions_entries+2,rep_ions_entries+3,rep_ions_entries[2:length(rep_ions_entries)]-1,rep_ions_entries[2:length(rep_ions_entries)]-2, length(allData)-1, length(allData))
ions_per_scan <- diff(rep_ions_entries) - 6
ions_per_scan <- c(ions_per_scan, (length(allData) - rep_ions_entries[length(rep_ions_entries)] - 5))
rep_ions_end = which(allData=="END IONS")
IDs <- gsub("ID=", "", allData[grepl("^ID", allData)])
SCANs <- gsub("SCANS=", "", allData[grepl("^SCANS", allData)])
RT <- gsub("RETENTION_TIME=", "", allData[grepl("^RETENTION_TIME", allData)])
# removeEntries <- c(rep_ions_entries,rep_ions_entries+1,rep_ions_entries+2,rep_ions_entries+3,rep_ions_entries[2:length(rep_ions_entries)]-1,rep_ions_entries[2:length(rep_ions_entries)]-2, length(allData)-1, length(allData))
removeEntries <- which(!(substr(allData, 1, 1) %in% c(0:9))) # Remove all lines not starting with a number
# Count the number of lines with comment per spectrum:
numComments <- sum(!(substr(allData[rep_ions_entries[1]:rep_ions_end[1]], 1, 1) %in% c(0:9)))
ions_per_scan <- sapply(seq_along(rep_ions_entries), function(x) {
rep_ions_end[x] - rep_ions_entries[x] - numComments + 1
})
dat <- fread(paste(allData[-removeEntries], collapse = "\n"), sep = "\t")
class(dat) <- "data.frame"
names(dat) <- c("Mass", "intensity", "charge")
dat$ID <- rep(IDs, ions_per_scan)
dat$SCANs <- rep(SCANs, ions_per_scan)
dat$RT <- rep(RT, ions_per_scan)
dat <- dat[,c(6,1,2,3,5)]
# Keep only the ions >= 5+
c("Keep only the ions >= 5+\n")
dat <- dat[dat[,4]>=5,]
# Change from seconds to minutes:
c("Change from seconds to minutes\n")
dat[,1] <- as.numeric(dat[,1])/60
# Keep only the 100% highest intensities:
c("Keep only the 100% highest intensities\n")
dat <- dat[order(dat[,3], decreasing = T),]
dat <- dat[!is.na(dat[,3]),]
thresh <- floor(1 * nrow(dat))
dat <- dat[c(1:thresh),]
# For the functions to come (thresholding, renaming):
dat[,4] <- rep(NA, nrow(dat))
dat[,5] <- rep(NA, nrow(dat))
cat("== End parsing TopPIC MS1 ==\n\n")
return(dat)

}

TopPicMS2Parsing <- function(fname) {
cat("== Start parsing TopPIC data MS2 ==\n")
# Return a table in the style of RoWinPro tables for use in VisioProt.
# fname is the path to the file to parse.
allData <- readLines(fname)
numline <- which(grepl("^[^P]+Parameters ", allData, perl = T))[2]
allData <- allData[-(1:numline)]
rep_ions_entries = which(allData=="BEGIN IONS")
IDs <- gsub("ID=", "", allData[rep_ions_entries+1])
SCANs <- gsub("SCANS=", "", allData[rep_ions_entries+2])
RT <- gsub("RETENTION_TIME=", "", allData[rep_ions_entries+3])
Mass <- gsub("PRECURSOR_MASS=", "", allData[rep_ions_entries+9])
intensity <- gsub("PRECURSOR_INTENSITY=", "", allData[rep_ions_entries+10])
charge <- gsub("PRECURSOR_CHARGE=", "", allData[rep_ions_entries+8])
rep_ions_end = which(allData=="END IONS")
IDs <- gsub("ID=", "", allData[grepl("^ID", allData)])
SCANs <- gsub("SCANS=", "", allData[grepl("^SCANS", allData)])
RT <- gsub("RETENTION_TIME=", "", allData[grepl("^RETENTION_TIME", allData)])
Mass <- gsub("PRECURSOR_MASS=", "", allData[grepl("^PRECURSOR_MASS", allData)])
intensity <- gsub("PRECURSOR_INTENSITY=", "", allData[grepl("^PRECURSOR_INTENSITY", allData)])
charge <- gsub("PRECURSOR_CHARGE=", "", allData[grepl("^PRECURSOR_CHARGE", allData)])

dat <- data.frame("RT"=RT, "Mass"=Mass, "intensity"=intensity, "Scan"=SCANs, stringsAsFactors = F)
# Change from seconds to minutes:
c("Change from seconds to minutes\n")
dat[,1] <- as.numeric(dat[,1])/60
cat("== End parsing TopPIC MS2 ==\n\n")
return(dat)

}

TopPicIDParsing <- function(fname) {

cat("== Start parsing TopPIC data ID ==\n")
# Return a table in the style of RoWinPro tables for use in VisioProt.
# fname is the path to the file to parse.
allData <- readLines(fname)
numline <- which(grepl("^[^P]+Parameters ", allData, perl = T))[2]
allData <- allData[-(1:numline)]
allData[1] <- gsub("#", "", allData[1])
dat <- fread(paste(allData, collapse = "\n"), sep = "\t", header = T, stringsAsFactors = F)
class(dat) <- "data.frame"
cat("== End parsing TopPIC ID ==\n\n")
return(dat)

}

############################################################################

# App:
Expand Down Expand Up @@ -335,7 +365,9 @@ ui <- fluidPage(
# Footer
tabsetPanel(
tabPanel(
HTML('<footer><font size="0.8">copyright 2017 - CNRS - All rights reserved - VisioProt-MS V2.1</font></footer>')

HTML('<footer><font size="0.8">copyright 2017 - CNRS - All rights reserved - VisioProt-MS V2.2</font></footer>')

)
)
)
Expand Down Expand Up @@ -838,11 +870,7 @@ server <- function(input, output, clientData, session) {
validate(
need(grepl("_ms2.OUTPUT_TABLE", InputFilesMS2TP()$IDfile$name, fixed = T) | grepl("_ms2_toppic", InputFilesMS2TP()$IDfile$name, fixed = T), "Error in file format for plotting ID data.\nYou have to upload the \"_ms2.OUTPUT_TABLE\", or \"_ms2_toppic\" output file from TopPic associated with the deconvoluted MS2 weights uploaded as \"input file for MS2\".")
)
allData <- readLines(InputFilesMS2TP()$IDfile$datapath)
allData <- allData[-(1:23)]
allData[1] <- gsub("#", "", allData[1])
IDTP <- fread(paste(allData, collapse = "\n"), sep = "\t", header = T, stringsAsFactors = F)
class(IDTP) <- "data.frame"
IDTP <- TopPicIDParsing(InputFilesMS2TP()$IDfile$datapath)
MS2TP <- TopPicMS2Parsing(InputFilesMS2TP()$MS2file$datapath)
names(IDTP)[names(IDTP) == "Spectrum ID"] <- "Scan"
dat <- merge(MS2TP, IDTP, by = "Scan", all = T)
Expand Down

0 comments on commit 15d1c64

Please sign in to comment.