Skip to content

Commit

Permalink
updated README.md
Browse files Browse the repository at this point in the history
  • Loading branch information
knausb committed Jul 26, 2016
2 parents ff27d4d + 7d098f6 commit 429b570
Show file tree
Hide file tree
Showing 35 changed files with 620 additions and 95 deletions.
28 changes: 26 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,37 @@
# Sample .travis.yml for R projects

language: r
r:
- release
# - devel

cache: packages
warnings_are_errors: true
sudo: required
sudo: false

# https://github.com/jimhester/rio/blob/9026180fd731aaed34f21cba3b5e207124618e5b/.travis.yml#L8-L20

addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- gcc-4.9
- g++-4.9

before_install: |
mkdir ~/.R
cat <<EOF > ~/.R/Makevars
CXX1X=g++-4.9
CXX1XSTD=-std=c++11
r_github_packages:
- jimhester/covr
# - jimhester/covr
# - ./travis-tool.sh install_github knausb/vcfR

r_packages:
- covr

after_success:
- Rscript -e 'library(covr);coveralls()'

Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Description: Facilitates easy manipulation of variant call format (VCF) data.
Once processing is complete data may be written to a VCF file (*.vcf.gz).
It also may be converted into other popular R objects (e.g., genlight, DNAbin).
VcfR provides a link between VCF data and familiar R software.
Version: 1.1.0
Version: 1.2.0
Authors@R: c(person(c('Brian', 'J.'), 'Knaus', role = c('cre', 'aut'),
email = '[email protected]'),
person(c('Niklaus', 'J.'), 'Grunwald', role = 'aut',
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export(heatmap.bp)
export(is.biallelic)
export(is.het)
export(is.polymorphic)
export(is_het)
export(maf)
export(masker)
export(null.plot)
Expand Down
11 changes: 10 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@

# vcfR 1.2.0
Released on CRAN 2016-07-25.

* `vcfR2genind()` greps genotypes containing a missing allele ('.') and sets to NA.
* dplyr v0.5.0 broke some vcfR2tidy functionality. This functionality should be fixed in this release.
* `is_het()` rapidly identifies heterozygotes.
* `extract.info()` scores missing elements as NA.


# vcfR 1.1.0
Submitted to CRAN 2016-05-25.
Released on CRAN 2016-05-26.

This release includes the incorporation of suggestions made by reviewers of the manuscript submitted to Molecular Ecology Resources.

Expand All @@ -24,6 +32,7 @@ This release includes the incorporation of suggestions made by reviewers of the
* `read.vcfR()` now handles tilde expansion.
* `addID()` populates the non-missing values in the ID column of VCF data by concatenating the chromosome and position.


# vcfR 1.0.0
Released on CRAN 2016-02-22.
This release was used to prepare the manuscript that was submitted to Molecular Ecology Resources.
Expand Down
4 changes: 2 additions & 2 deletions R/AllClass.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ setOldClass("DNAbin")
#'
#' The \strong{vcf} slot is an object of class vcfR \code{\link{vcfR-class}}.
#'
#' The \strong{ann} slot is a data.frame containing \href{http://www.sequenceontology.org/gff3.shtml}{gff format} data.
#' The \strong{ann} slot is a data.frame containing \href{https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md}{gff format} data.
#' When this slot is not populated it has nrows equal to zero.
#'
#' The \strong{var.info} slot contains a data.frame containing information about variants.
Expand All @@ -130,7 +130,7 @@ setOldClass("DNAbin")
#'
#' @seealso \code{\link{vcfR-class}}, \code{\link[ape]{DNAbin}},
#' \href{http://www.1000genomes.org/wiki/analysis/variant\%20call\%20format/vcf-variant-call-format-version-41}{vcf format},
#' \href{http://www.sequenceontology.org/gff3.shtml}{gff3 format}
#' \href{https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md}{gff3 format}
#'
#'
#' @import methods
Expand Down
10 changes: 10 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,16 @@ gt_to_popsum <- function(var_info, gt) {
.Call('vcfR_gt_to_popsum', PACKAGE = 'vcfR', var_info, gt)
}

#' @rdname is_het
#' @name is_het
#'
#'
#'
#' @export
is_het <- function(x, na_is_false = TRUE) {
.Call('vcfR_is_het', PACKAGE = 'vcfR', x, na_is_false)
}

NM2winNM <- function(x, pos, maxbp, winsize = 100L) {
.Call('vcfR_NM2winNM', PACKAGE = 'vcfR', x, pos, maxbp, winsize)
}
Expand Down
18 changes: 14 additions & 4 deletions R/addID.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,26 @@
#' @description
#' Populate the ID column of VCF data by concatenating the chromosome, position and optionally an index.
#'
#' @param x an object of class vcfR or chromR
#' @param x an object of class vcfR or chromR.
#' @param sep a character string to separate the terms.
#'
#' @details
#' Variant callers typically leave the ID column empty in VCF data.
#' However, the VCF data may potentially include variants with IDs as well as variants without.
#' This function populates the missing elements by concatenating the chromosome and position.
#' When this concatenation results in non-unique names, an index is added to force uniqueness.
#'
#'
#' @examples
#' data(vcfR_test)
#' head(vcfR_test)
#' vcfR_test <- addID(vcfR_test)
#' head(vcfR_test)
#'
#'
#' @export
addID <- function(x){
#'
addID <- function(x, sep="_"){
if( class(x) == 'chromR' ){
ID <- x@vcf@fix[,'ID']
CHROM <- x@vcf@fix[,'CHROM']
Expand All @@ -28,9 +38,9 @@ addID <- function(x){
}

if( sum(!is.na(ID)) < length(ID) ){
ID[ is.na(ID) ] <- paste( CHROM[ is.na(ID) ], POS[ is.na(ID) ], sep="_" )
ID[ is.na(ID) ] <- paste( CHROM[ is.na(ID) ], POS[ is.na(ID) ], sep=sep )
if( length(unique(ID)) < length(ID) ){
ID <- paste( ID, 1:length(ID), sep="_" )
ID <- paste( ID, 1:length(ID), sep=sep )
}
}

Expand Down
3 changes: 3 additions & 0 deletions R/chromo_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ chromo <- function( chrom,
}
graphics::boxplot( x = drlist1$dmat[,-1],
ylim = bdim,
xaxt = "n",
yaxt = "n",
col = drlist1$bwcol
)
Expand Down Expand Up @@ -205,6 +206,7 @@ chromo <- function( chrom,
}
graphics::boxplot( x = drlist2$dmat[,-1],
ylim = bdim,
xaxt = "n",
yaxt = "n",
col = drlist2$bwcol
)
Expand Down Expand Up @@ -234,6 +236,7 @@ chromo <- function( chrom,
}
graphics::boxplot( x = drlist3$dmat[,-1],
ylim = bdim,
xaxt = "n",
yaxt = "n",
col = drlist3$bwcol
)
Expand Down
2 changes: 1 addition & 1 deletion R/create_chromR.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
#' \code{\link{vcfR-class}},
#' \code{\link[ape]{DNAbin}},
#' \href{http://www.1000genomes.org/wiki/analysis/variant\%20call\%20format/vcf-variant-call-format-version-41}{vcf format},
#' \href{http://www.sequenceontology.org/gff3.shtml}{gff3 format}
#' \href{https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md}{gff3 format}
#'
#' @examples
#' library(vcfR)
Expand Down
21 changes: 14 additions & 7 deletions R/extract_gt.R
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,20 @@ extract.info <- function(x, element, as.numeric=FALSE, mask=FALSE){
stop("Expecting an object of class vcfR or chromR.")
}

values <- unlist(
lapply(strsplit(unlist(
# lapply(strsplit([email protected]$INFO, split=";"),
lapply(strsplit(x@fix[,'INFO'], split=";"),
function(x){grep(paste("^", element, "=", sep=""), x, value=TRUE)})),
split="="), function(x){x[2]})
)
# values <- unlist(
# lapply(strsplit(unlist(
# lapply(strsplit(x@fix[,'INFO'], split=";"),
# function(x){grep(paste("^", element, "=", sep=""), x, value=TRUE)})),
# split="="), function(x){x[2]})
# )

values <- strsplit(x@fix[,'INFO'], split=";")
values <- lapply(values, function(x){grep(paste("^", element, "=", sep=""), x, value=TRUE)})
values <- lapply(values, function(x){ unlist( strsplit(x, split="=") ) })
values <- lapply(values, function(x){x[2]})
values <- lapply(values, function(x){ if(is.null(x)){NA}else{x} })
values <- unlist(values)


if(as.numeric == TRUE){
values <- as.numeric(values)
Expand Down
13 changes: 8 additions & 5 deletions R/heatmap_bp.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,14 @@ heatmap.bp <- function(x, cbarplot = TRUE, rbarplot = TRUE,
graphics::text(min(rowSums(x, na.rm=na.rm), na.rm=na.rm), c(1:nrow(x))-0.5, rownames(x), adj=c(0.0,0.5), srt=0)
}
#
graphics::barplot(rep(1, times=length(col.ramp)), space=0, border=NA, horiz = TRUE,
col = col.ramp,
axes=FALSE)
graphics::text(0.5, 5, "Low", col="#FFFFFF")
graphics::text(0.5, 95, "High", col="#FFFFFF")
mp <- graphics::barplot(rep(1, times=length(col.ramp)), space=0, border=NA, horiz = TRUE,
col = col.ramp, axes=FALSE)
# graphics::text(0.5, 5, "Low", col="#FFFFFF")
# graphics::text(0.5, 95, "High", col="#FFFFFF")
if ( mp[nrow(mp),1] - mp[1,1] >= 1 ){
graphics::text(0.5, mp[1,1], "Low", col="#FFFFFF")
graphics::text(0.5, mp[nrow(mp),1], "High", col="#FFFFFF")
}
}
graphics::par(mfrow=c(1,1))
graphics::par(mar=c(5,4,4,2))
Expand Down
10 changes: 9 additions & 1 deletion R/io_vcfR.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@
#' If the parameter 'mask' is set to TRUE and the object is of class chromR (which has a mask slot), this mask is used to subset the data.
#' If an index is supplied as 'mask', then this index is used, and recycled as necessary, to subset the data.
#'
#' Because vcfR provides the opportunity to manipulate VCF data, it also provides the opportunity for the user to create invalid VCF files.
#' If there is a question regarding the validity of a file you have created one option is the \href{https://vcftools.github.io/perl_module.html#vcf-validator}{VCF validator} from VCF tools.
#'
#'
#' @return read.vcfR returns an object of class \code{\link{vcfR-class}}.
#' See the \strong{vignette:} \code{vignette('vcf_data')}.
#' The function write.vcf creates a gzipped VCF file.
Expand Down Expand Up @@ -148,7 +152,11 @@ write.vcf <- function(x, file = "", mask = FALSE, APPEND = FALSE){

if(APPEND == FALSE){
gz <- gzfile(file, "w")
write(x@meta, gz)

if( length(x@meta) > 0 ){
write(x@meta, gz)
}

header <- c(colnames(x@fix), colnames(x@gt))
header[1] <- "#CHROM"
header <- paste(header, collapse="\t")
Expand Down
5 changes: 5 additions & 0 deletions R/vcfR_conversion.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@
vcfR2genind <- function(x, sep="[|/]") {
locNames <- x@fix[,'ID']
x <- extract.gt(x)
x[grep('.', x, fixed = TRUE)] <- NA
# x[grep('\\.', x)] <- NA
# x[x == "./."] <- NA
# x[x == ".|."] <- NA

# x <- adegenet::df2genind(t(x), sep=sep)
if( requireNamespace('adegenet') ){
x <- adegenet::df2genind(t(x), sep=sep)
Expand Down
Loading

0 comments on commit 429b570

Please sign in to comment.