diff --git a/docs/MANUAL.html b/docs/MANUAL.html index 6829d86..f5a7e98 100644 --- a/docs/MANUAL.html +++ b/docs/MANUAL.html @@ -154,6 +154,7 @@
To download and install any one of these, use the --download-library
switch, e.g.:
kraken-build --download-library bacteria --db $DBNAME
diff --git a/docs/MANUAL.markdown b/docs/MANUAL.markdown
index 903a1ea..b274643 100644
--- a/docs/MANUAL.markdown
+++ b/docs/MANUAL.markdown
@@ -365,6 +365,7 @@ To build a custom database:
- plasmids: RefSeq plasmid sequences
- viruses: RefSeq complete viral genomes
- human: GRCh38 human genome
+ - mouse: GRCm38 mouse genome
To download and install any one of these, use the `--download-library`
switch, e.g.:
diff --git a/scripts/download_genomic_library.sh b/scripts/download_genomic_library.sh
index b1a7f13..82fc425 100755
--- a/scripts/download_genomic_library.sh
+++ b/scripts/download_genomic_library.sh
@@ -23,6 +23,7 @@
# plasmids - NCBI RefSeq plasmid sequences
# viruses - NCBI RefSeq complete viral DNA and RNA genomes
# human - NCBI RefSeq GRCh38 human reference genome
+# mouse - NCBI RefSeq GRCm38 mouse reference genome
set -u # Protect against uninitialized vars.
set -e # Stop on error
@@ -101,7 +102,7 @@ case "$1" in
do
wget --spider --no-remove-listing $FTP_SERVER/genomes/H_sapiens/$directory/
file=$(perl -nle '/^-/ and /\b(hs_ref_GRCh\S+\.fa\.gz)\s*$/ and print $1' .listing)
- [ -z "$file" ] && exit 1
+ [ -z $file ] && exit 1
rm .listing
wget $FTP_SERVER/genomes/H_sapiens/$directory/$file
gunzip "$file"
@@ -112,8 +113,35 @@ case "$1" in
echo "Skipping download of human genome, already downloaded here."
fi
;;
+ "mouse")
+ mkdir -p $LIBRARY_DIR/Mouse
+ cd $LIBRARY_DIR/Mouse
+ if [ ! -e "lib.complete" ]
+ then
+ # get list of CHR_* directories
+ wget --spider --no-remove-listing $FTP_SERVER/genomes/M_musculus/
+ directories=$(perl -nle '/^d/ and /(CHR_\w+)\s*$/ and print $1' .listing)
+ rm .listing
+
+ # For each CHR_* directory, get GRCh* fasta gzip file name, d/l, unzip, and add
+ for directory in $directories
+ do
+ wget --spider --no-remove-listing $FTP_SERVER/genomes/M_musculus/$directory/
+ file=$(perl -nle '/^-/ and /\b(mm_ref_GRCm\S+\.fa\.gz)\s*$/ and print $1' .listing)
+ [ -z $file ] && exit 1
+ rm .listing
+ wget $FTP_SERVER/genomes/M_musculus/$directory/$file
+ gunzip "$file"
+ done
+
+ touch "lib.complete"
+ else
+ echo "Skipping download of mouse genome, already downloaded here."
+ fi
+ ;;
*)
echo "Unsupported library. Valid options are: "
- echo " bacteria plasmids virus human"
+ echo " bacteria plasmids virus human mouse"
;;
+
esac
diff --git a/scripts/kraken-build b/scripts/kraken-build
index bf36ae6..fd219f9 100755
--- a/scripts/kraken-build
+++ b/scripts/kraken-build
@@ -40,7 +40,7 @@ my $DEF_MINIMIZER_LEN = 15;
my $DEF_KMER_LEN = 31;
my $DEF_THREAD_CT = 1;
-my @VALID_LIBRARY_TYPES = qw/bacteria plasmids viruses human/;
+my @VALID_LIBRARY_TYPES = qw/bacteria plasmids viruses human mouse/;
# Option/task option variables
my (
@@ -200,7 +200,7 @@ Task options (exactly one must be selected):
--download-taxonomy Download NCBI taxonomic information
--download-library TYPE Download partial library
(TYPE = one of "bacteria", "plasmids",
- "viruses", "human")
+ "viruses", "human","mouse")
--add-to-library FILE Add FILE to library
--build Create DB from library
(requires taxonomy d/l'ed and at least one file