diff --git a/R/proforma-parser.R b/R/proforma-parser.R index be461d2..26fc250 100644 --- a/R/proforma-parser.R +++ b/R/proforma-parser.R @@ -37,36 +37,24 @@ #' ) .proforma_delta_masses <- function(x) { rx <- gregexpr( - pattern = "(?<=\\[)[GMURX]?:?[+-][0-9.]+(?=\\])", + pattern = + "(?[A-Z])(?:\\[[GMURX]?:?)?(?[+-][0-9.]+)?(?:\\])?", text = x, perl = TRUE ) - mapply(function(sequence, start, matched_length, n) { - if (any(matched_length < 0)) - return(double(n)) - # add 2 for the surrounding "[" and "]" - matched_length2 <- matched_length + 2L - n_clean <- n - sum(matched_length2, na.rm = TRUE) - masses <- double(n_clean) - - # subtract 2 for the "[" and the previous amino acid position - masses[ - (start - - cumsum(c(2L, matched_length2[-length(matched_length2)]) )) - ] <- as.double( - gsub( - "^[GMURX]:", - "", - substring(sequence, start, start + matched_length - 1L) - ) + mapply(function(sequence, start, matched_length) { + mod <- as.double( + substring(sequence, start, start + matched_length - 1L) ) - masses + mod[is.na(mod)] <- 0 + mod }, sequence = x, - start = rx, - matched_length = lapply(rx, attr, "match.length"), - n = nchar(x), + start = + lapply(rx, function(r)attr(r, "capture.start")[, "DeltaMass"]), + matched_length = + lapply(rx, function(r)attr(r, "capture.length")[, "DeltaMass"]), SIMPLIFY = FALSE, USE.NAMES = FALSE ) }