From fae0fb58ec103c13c13026f1850a9ff0a24fdb17 Mon Sep 17 00:00:00 2001 From: Daniel Cameron Date: Tue, 20 Oct 2020 12:33:59 +1100 Subject: [PATCH] #410 added locking to setupreference step switched to always creating {REFERENCE}.dict during sequence dictionary check --- scripts/gridss.sh | 39 ++++++++++++++----- .../cmdline/ReferenceCommandLineProgram.java | 4 +- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/scripts/gridss.sh b/scripts/gridss.sh index 260619b45..6a236240a 100644 --- a/scripts/gridss.sh +++ b/scripts/gridss.sh @@ -518,23 +518,44 @@ if [[ "$useproperpair" == "true" ]] ; then fi if [[ $do_setupreference == true ]] ; then + lock_file=$reference.tmp.gridsslock if [[ ! -f ${reference}.fai ]] && [[ ! -f $(basename $reference .fa).fai ]] && [[ ! -f $(basename $reference .fasta).fai ]] ; then - write_status "Running samtools faidx (once-off setup for reference genome)" - $timecmd samtools faidx $reference 1>&2 2>> $logfile + if mkdir $lock_file ; then + write_status "Running samtools faidx (once-off setup for reference genome)" + $timecmd samtools faidx $reference 1>&2 2>> $logfile + rmdir $lock_file + else + write_status "Multiple instances of setupreference cannot be run concurrently. Aborting due to existence of $lock_file" + exit $EX_CANTCREAT + fi fi if [[ ! -f ${reference}.bwt ]] ; then - write_status "Running bwa index (once-off setup for reference genome)" - $timecmd bwa index $reference 1>&2 2>> $logfile + if mkdir $lock_file ; then + write_status "Running bwa index (once-off setup for reference genome)" + $timecmd bwa index $reference 1>&2 2>> $logfile + rmdir $lock_file + else + write_status "Multiple instances of setupreference cannot be run concurrently. Aborting due to existence of $lock_file" + exit $EX_CANTCREAT + fi fi #if [[ ! -f ${reference}.idx ]] ; then # write_status "Running minimap2 index (once-off setup for reference genome)" # $timecmd minimap2 -d ${reference}.idx ${reference} 1>&2 2>> $logfile #fi - write_status "Running PrepareReference (once-off setup for reference genome)" - $timecmd java -Xmx4g $jvm_args \ - -cp $gridss_jar gridss.PrepareReference \ - REFERENCE_SEQUENCE=$reference \ - 1>&2 2>> $logfile + if [[ ! -f ${reference}.gridsscache ]] || [[ ! -f ${reference}.img ]] || [[ ! -f ${reference}.dict ]] ; then + if mkdir $lock_file ; then + write_status "Running PrepareReference (once-off setup for reference genome)" + $timecmd java -Xmx4g $jvm_args \ + -cp $gridss_jar gridss.PrepareReference \ + REFERENCE_SEQUENCE=$reference \ + 1>&2 2>> $logfile + rmdir $lock_file + else + write_status "Multiple instances of setupreference cannot be run concurrently. Aborting due to existence of $lock_file" + exit $EX_CANTCREAT + fi + fi fi if [[ $do_preprocess == true ]] ; then diff --git a/src/main/java/gridss/cmdline/ReferenceCommandLineProgram.java b/src/main/java/gridss/cmdline/ReferenceCommandLineProgram.java index 3837c32ed..20d9a6619 100644 --- a/src/main/java/gridss/cmdline/ReferenceCommandLineProgram.java +++ b/src/main/java/gridss/cmdline/ReferenceCommandLineProgram.java @@ -48,9 +48,9 @@ public ReferenceLookup getReference() { public static boolean ensureSequenceDictionary(File referenceFile) { try (ReferenceSequenceFile rsf = new FastaSequenceFile(referenceFile, false)) { Path path = referenceFile.toPath().toAbsolutePath(); - if (rsf.getSequenceDictionary() == null) { + Path dictPath = path.resolveSibling(path.getFileName().toString() + FileExtensions.DICT); + if (!dictPath.toFile().exists()) { log.info("Attempting to create sequence dictionary for " + referenceFile); - Path dictPath = path.resolveSibling(path.getFileName().toString() + FileExtensions.DICT); CommandLineProgramHelper cmd = new CommandLineProgramHelper(new picard.sam.CreateSequenceDictionary()); cmd.addArg("OUTPUT", dictPath.toFile()); cmd.addArg("R", referenceFile.getPath());