From 193d0d47b8dc304d61cdd13d3563a60d0b467979 Mon Sep 17 00:00:00 2001 From: satoken Date: Sat, 15 Sep 2012 23:44:13 +0900 Subject: [PATCH] * Option '--ipknot' sets optimized parameters for IPknot decoding. --- src/cmdline.c | 103 +++++++++++++++++++++++++++++++------------------- src/cmdline.h | 9 ++++- src/dafs.cpp | 76 ++++++++++++++++++++----------------- src/dafs.ggo | 6 ++- 4 files changed, 118 insertions(+), 76 deletions(-) diff --git a/src/cmdline.c b/src/cmdline.c index 94ae27a..20c5e06 100644 --- a/src/cmdline.c +++ b/src/cmdline.c @@ -1,7 +1,7 @@ /* File autogenerated by gengetopt version 2.22.5 generated with the following command: - gengetopt --unamed + gengetopt -u The developers of gengetopt consider the fixed text that goes in all gengetopt output files to be in the public domain: @@ -32,32 +32,33 @@ const char *gengetopt_args_info_usage = "Usage: " CMDLINE_PARSER_PACKAGE " [OPTI const char *gengetopt_args_info_description = ""; const char *gengetopt_args_info_full_help[] = { - " -h, --help Print help and exit", - " --full-help Print help, including hidden options, and exit", - " -V, --version Print version and exit", - " -r, --refinement=INT The number of iteration of the iterative refinment \n (default=`0')", - " -w, --weight=FLOAT Weight of the expected accuracy score for secondary \n structures (default=`4.0')", - " --eta=FLOAT Initial step width for the subgradient optimization \n (default=`0.5')", - " -m, --max-iter=INT The maximum number of iteration of the subgradient \n optimization (default=`600')", - " -f, --fourway-pct=FLOAT Weight of four-way PCT (default=`0.0')", - " -v, --verbose=INT The level of verbose outputs (default=`0')", + " -h, --help Print help and exit", + " --full-help Print help, including hidden options, and exit", + " -V, --version Print version and exit", + " -r, --refinement=INT The number of iteration of the iterative refinment \n (default=`0')", + " -w, --weight=FLOAT Weight of the expected accuracy score for \n secondary structures (default=`4.0')", + " --eta=FLOAT Initial step width for the subgradient \n optimization (default=`0.5')", + " -m, --max-iter=INT The maximum number of iteration of the subgradient \n optimization (default=`600')", + " -f, --fourway-pct=FLOAT Weight of four-way PCT (default=`0.0')", + " -v, --verbose=INT The level of verbose outputs (default=`0')", "\nOptions for alignments:", - " -a, --align-model=STRING Alignment model for calcualating matching \n probablities (possible values=\"CONTRAlign\", \n \"ProbCons\" default=`ProbCons')", - " -p, --align-pct=FLOAT Weight of PCT for matching probabilities \n (default=`0.25')", - " -u, --align-th=FLOAT Threshold for matching probabilities \n (default=`0.01')", - " --align-aux=FILENAME Load matching probability matrices from FILENAME", + " -a, --align-model=STRING Alignment model for calcualating matching \n probablities (possible values=\"CONTRAlign\", \n \"ProbCons\" default=`ProbCons')", + " -p, --align-pct=FLOAT Weight of PCT for matching probabilities \n (default=`0.25')", + " -u, --align-th=FLOAT Threshold for matching probabilities \n (default=`0.01')", + " --align-aux=FILENAME Load matching probability matrices from FILENAME", "\nOptions for folding:", - " -s, --fold-model=STRING Folding model for calculating base-pairing \n probablities (possible values=\"Boltzmann\", \n \"Vienna\", \"CONTRAfold\" default=`Boltzmann')", - " -q, --fold-pct=FLOAT Weight of PCT for base-pairing probabilities \n (default=`0.25')", - " -t, --fold-th=FLOAT Threshold for base-pairing probabilities \n (default=`0.2')", - " -g, --gamma=FLOAT Specify the threshold for base-pairing \n probabilities by 1/(gamma+1))", - " --no-alifold No use of RNAalifold for calculating base-pairing \n probabilities (default=off)", - " -T, --fold-th1=FLOAT Threshold for base-pairing probabilities of the \n conclusive common secondary structures", - " -G, --gamma1=FLOAT Specify the threshold for base-pairing \n probabilities of the conclusive common secondary \n structuresby 1/(gamma+1))", - " --ipknot Use IPknot decoding (default=off)", - " --bp-update Use the iterative update of BPs (default=off)", - " --bp-update1 Use the iterative update of BPs for the final \n prediction (default=off)", - " --fold-aux=FILENAME Load base-pairing probability matrices from \n FILENAME", + " -s, --fold-model=STRING Folding model for calculating base-pairing \n probablities (possible values=\"Boltzmann\", \n \"Vienna\", \"CONTRAfold\" default=`Boltzmann')", + " --fold-decoder=STRING Decoder for common secondary structure prediction \n (possible values=\"Nussinov\", \"IPknot\" \n default=`Nussinov')", + " -q, --fold-pct=FLOAT Weight of PCT for base-pairing probabilities \n (default=`0.25')", + " -t, --fold-th=FLOAT Threshold for base-pairing probabilities \n (default=`0.2')", + " -g, --gamma=FLOAT Specify the threshold for base-pairing \n probabilities by 1/(gamma+1))", + " --no-alifold No use of RNAalifold for calculating base-pairing \n probabilities (default=off)", + " -T, --fold-th1=FLOAT Threshold for base-pairing probabilities of the \n conclusive common secondary structures", + " -G, --gamma1=FLOAT Specify the threshold for base-pairing \n probabilities of the conclusive common secondary \n structuresby 1/(gamma+1))", + " --ipknot Set optimized parameters for IPknot decoding \n (--fold-decoder=IPknot -g4,8 -G2,4 --bp-update1) \n (default=off)", + " --bp-update Use the iterative update of BPs (default=off)", + " --bp-update1 Use the iterative update of BPs for the final \n prediction (default=off)", + " --fold-aux=FILENAME Load base-pairing probability matrices from \n FILENAME", 0 }; @@ -75,9 +76,9 @@ init_help_array(void) gengetopt_args_info_help[8] = gengetopt_args_info_full_help[12]; gengetopt_args_info_help[9] = gengetopt_args_info_full_help[14]; gengetopt_args_info_help[10] = gengetopt_args_info_full_help[15]; - gengetopt_args_info_help[11] = gengetopt_args_info_full_help[17]; - gengetopt_args_info_help[12] = gengetopt_args_info_full_help[20]; - gengetopt_args_info_help[13] = gengetopt_args_info_full_help[22]; + gengetopt_args_info_help[11] = gengetopt_args_info_full_help[18]; + gengetopt_args_info_help[12] = gengetopt_args_info_full_help[21]; + gengetopt_args_info_help[13] = gengetopt_args_info_full_help[23]; gengetopt_args_info_help[14] = 0; } @@ -105,6 +106,7 @@ cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *pro const char *cmdline_parser_align_model_values[] = {"CONTRAlign", "ProbCons", 0}; /*< Possible values for align-model. */ const char *cmdline_parser_fold_model_values[] = {"Boltzmann", "Vienna", "CONTRAfold", 0}; /*< Possible values for fold-model. */ +const char *cmdline_parser_fold_decoder_values[] = {"Nussinov", "IPknot", 0}; /*< Possible values for fold-decoder. */ static char * gengetopt_strdup (const char *s); @@ -126,6 +128,7 @@ void clear_given (struct gengetopt_args_info *args_info) args_info->align_th_given = 0 ; args_info->align_aux_given = 0 ; args_info->fold_model_given = 0 ; + args_info->fold_decoder_given = 0 ; args_info->fold_pct_given = 0 ; args_info->fold_th_given = 0 ; args_info->gamma_given = 0 ; @@ -164,6 +167,8 @@ void clear_args (struct gengetopt_args_info *args_info) args_info->align_aux_orig = NULL; args_info->fold_model_arg = gengetopt_strdup ("Boltzmann"); args_info->fold_model_orig = NULL; + args_info->fold_decoder_arg = gengetopt_strdup ("Nussinov"); + args_info->fold_decoder_orig = NULL; args_info->fold_pct_arg = 0.25; args_info->fold_pct_orig = NULL; args_info->fold_th_arg = NULL; @@ -202,24 +207,25 @@ void init_args_info(struct gengetopt_args_info *args_info) args_info->align_th_help = gengetopt_args_info_full_help[12] ; args_info->align_aux_help = gengetopt_args_info_full_help[13] ; args_info->fold_model_help = gengetopt_args_info_full_help[15] ; - args_info->fold_pct_help = gengetopt_args_info_full_help[16] ; - args_info->fold_th_help = gengetopt_args_info_full_help[17] ; + args_info->fold_decoder_help = gengetopt_args_info_full_help[16] ; + args_info->fold_pct_help = gengetopt_args_info_full_help[17] ; + args_info->fold_th_help = gengetopt_args_info_full_help[18] ; args_info->fold_th_min = 0; args_info->fold_th_max = 0; - args_info->gamma_help = gengetopt_args_info_full_help[18] ; + args_info->gamma_help = gengetopt_args_info_full_help[19] ; args_info->gamma_min = 0; args_info->gamma_max = 0; - args_info->no_alifold_help = gengetopt_args_info_full_help[19] ; - args_info->fold_th1_help = gengetopt_args_info_full_help[20] ; + args_info->no_alifold_help = gengetopt_args_info_full_help[20] ; + args_info->fold_th1_help = gengetopt_args_info_full_help[21] ; args_info->fold_th1_min = 0; args_info->fold_th1_max = 0; - args_info->gamma1_help = gengetopt_args_info_full_help[21] ; + args_info->gamma1_help = gengetopt_args_info_full_help[22] ; args_info->gamma1_min = 0; args_info->gamma1_max = 0; - args_info->ipknot_help = gengetopt_args_info_full_help[22] ; - args_info->bp_update_help = gengetopt_args_info_full_help[23] ; - args_info->bp_update1_help = gengetopt_args_info_full_help[24] ; - args_info->fold_aux_help = gengetopt_args_info_full_help[25] ; + args_info->ipknot_help = gengetopt_args_info_full_help[23] ; + args_info->bp_update_help = gengetopt_args_info_full_help[24] ; + args_info->bp_update1_help = gengetopt_args_info_full_help[25] ; + args_info->fold_aux_help = gengetopt_args_info_full_help[26] ; } @@ -372,6 +378,8 @@ cmdline_parser_release (struct gengetopt_args_info *args_info) free_string_field (&(args_info->align_aux_orig)); free_string_field (&(args_info->fold_model_arg)); free_string_field (&(args_info->fold_model_orig)); + free_string_field (&(args_info->fold_decoder_arg)); + free_string_field (&(args_info->fold_decoder_orig)); free_string_field (&(args_info->fold_pct_orig)); free_multiple_field (args_info->fold_th_given, (void *)(args_info->fold_th_arg), &(args_info->fold_th_orig)); args_info->fold_th_arg = 0; @@ -495,6 +503,8 @@ cmdline_parser_dump(FILE *outfile, struct gengetopt_args_info *args_info) write_into_file(outfile, "align-aux", args_info->align_aux_orig, 0); if (args_info->fold_model_given) write_into_file(outfile, "fold-model", args_info->fold_model_orig, cmdline_parser_fold_model_values); + if (args_info->fold_decoder_given) + write_into_file(outfile, "fold-decoder", args_info->fold_decoder_orig, cmdline_parser_fold_decoder_values); if (args_info->fold_pct_given) write_into_file(outfile, "fold-pct", args_info->fold_pct_orig, 0); write_multiple_into_file(outfile, args_info->fold_th_given, "fold-th", args_info->fold_th_orig, 0); @@ -1109,6 +1119,7 @@ cmdline_parser_internal ( { "align-th", 1, NULL, 'u' }, { "align-aux", 1, NULL, 0 }, { "fold-model", 1, NULL, 's' }, + { "fold-decoder", 1, NULL, 0 }, { "fold-pct", 1, NULL, 'q' }, { "fold-th", 1, NULL, 't' }, { "gamma", 1, NULL, 'g' }, @@ -1329,6 +1340,20 @@ cmdline_parser_internal ( additional_error)) goto failure; + } + /* Decoder for common secondary structure prediction. */ + else if (strcmp (long_options[option_index].name, "fold-decoder") == 0) + { + + + if (update_arg( (void *)&(args_info->fold_decoder_arg), + &(args_info->fold_decoder_orig), &(args_info->fold_decoder_given), + &(local_args_info.fold_decoder_given), optarg, cmdline_parser_fold_decoder_values, "Nussinov", ARG_STRING, + check_ambiguity, override, 0, 0, + "fold-decoder", '-', + additional_error)) + goto failure; + } /* No use of RNAalifold for calculating base-pairing probabilities. */ else if (strcmp (long_options[option_index].name, "no-alifold") == 0) @@ -1342,7 +1367,7 @@ cmdline_parser_internal ( goto failure; } - /* Use IPknot decoding. */ + /* Set optimized parameters for IPknot decoding (--fold-decoder=IPknot -g4,8 -G2,4 --bp-update1). */ else if (strcmp (long_options[option_index].name, "ipknot") == 0) { diff --git a/src/cmdline.h b/src/cmdline.h index 19caee8..33ea9fa 100644 --- a/src/cmdline.h +++ b/src/cmdline.h @@ -77,6 +77,9 @@ struct gengetopt_args_info char * fold_model_arg; /**< @brief Folding model for calculating base-pairing probablities (default='Boltzmann'). */ char * fold_model_orig; /**< @brief Folding model for calculating base-pairing probablities original value given at command line. */ const char *fold_model_help; /**< @brief Folding model for calculating base-pairing probablities help description. */ + char * fold_decoder_arg; /**< @brief Decoder for common secondary structure prediction (default='Nussinov'). */ + char * fold_decoder_orig; /**< @brief Decoder for common secondary structure prediction original value given at command line. */ + const char *fold_decoder_help; /**< @brief Decoder for common secondary structure prediction help description. */ float fold_pct_arg; /**< @brief Weight of PCT for base-pairing probabilities (default='0.25'). */ char * fold_pct_orig; /**< @brief Weight of PCT for base-pairing probabilities original value given at command line. */ const char *fold_pct_help; /**< @brief Weight of PCT for base-pairing probabilities help description. */ @@ -102,8 +105,8 @@ struct gengetopt_args_info unsigned int gamma1_min; /**< @brief Specify the threshold for base-pairing probabilities of the conclusive common secondary structuresby 1/(gamma+1))'s minimum occurreces */ unsigned int gamma1_max; /**< @brief Specify the threshold for base-pairing probabilities of the conclusive common secondary structuresby 1/(gamma+1))'s maximum occurreces */ const char *gamma1_help; /**< @brief Specify the threshold for base-pairing probabilities of the conclusive common secondary structuresby 1/(gamma+1)) help description. */ - int ipknot_flag; /**< @brief Use IPknot decoding (default=off). */ - const char *ipknot_help; /**< @brief Use IPknot decoding help description. */ + int ipknot_flag; /**< @brief Set optimized parameters for IPknot decoding (--fold-decoder=IPknot -g4,8 -G2,4 --bp-update1) (default=off). */ + const char *ipknot_help; /**< @brief Set optimized parameters for IPknot decoding (--fold-decoder=IPknot -g4,8 -G2,4 --bp-update1) help description. */ int bp_update_flag; /**< @brief Use the iterative update of BPs (default=off). */ const char *bp_update_help; /**< @brief Use the iterative update of BPs help description. */ int bp_update1_flag; /**< @brief Use the iterative update of BPs for the final prediction (default=off). */ @@ -126,6 +129,7 @@ struct gengetopt_args_info unsigned int align_th_given ; /**< @brief Whether align-th was given. */ unsigned int align_aux_given ; /**< @brief Whether align-aux was given. */ unsigned int fold_model_given ; /**< @brief Whether fold-model was given. */ + unsigned int fold_decoder_given ; /**< @brief Whether fold-decoder was given. */ unsigned int fold_pct_given ; /**< @brief Whether fold-pct was given. */ unsigned int fold_th_given ; /**< @brief Whether fold-th was given. */ unsigned int gamma_given ; /**< @brief Whether gamma was given. */ @@ -268,6 +272,7 @@ int cmdline_parser_required (struct gengetopt_args_info *args_info, extern const char *cmdline_parser_align_model_values[]; /**< @brief Possible values for align-model. */ extern const char *cmdline_parser_fold_model_values[]; /**< @brief Possible values for fold-model. */ +extern const char *cmdline_parser_fold_decoder_values[]; /**< @brief Possible values for fold-decoder. */ #ifdef __cplusplus diff --git a/src/dafs.cpp b/src/dafs.cpp index 3e57f39..206189a 100644 --- a/src/dafs.cpp +++ b/src/dafs.cpp @@ -1545,58 +1545,66 @@ parse_options(int& argc, char**& argv) s_model_ = new CONTRAfold(CUTOFF); assert(s_model_!=NULL); - if (args_info.fold_th_given==0) - { - th_s_.resize(1); - th_s_[0] = args_info.fold_th_arg[0]; - } - else + VF th_s1; + if (args_info.fold_th_given) { th_s_.resize(args_info.fold_th_given); std::copy(args_info.fold_th_arg, args_info.fold_th_arg+th_s_.size(), th_s_.begin()); } - if (args_info.gamma_given!=0) + else if (args_info.gamma_given) { th_s_.resize(args_info.gamma_given); for (uint i=0; i!=th_s_.size(); ++i) th_s_[i] = 1.0/(1.0+args_info.gamma_arg[i]); } + else if (args_info.ipknot_flag) + { + th_s_.resize(2); + th_s_[0] = 1.0/(1.0+4.0); + th_s_[1] = 1.0/(1.0+8.0); + } + else + { + th_s_.resize(1); + th_s_[0] = args_info.fold_th_arg[0]; + } - if (args_info.ipknot_flag==0) + if (args_info.fold_th1_given) { - s_decoder_ = new SparseNussinov(w_, th_s_[0]); - if (args_info.fold_th1_given) - s_decoder1_ = new SparseNussinov(w_, args_info.fold_th1_arg[0]); - else if (args_info.gamma1_given) - s_decoder1_ = new SparseNussinov(w_, 1.0/(1.0+args_info.gamma1_arg[0])); - else - s_decoder1_ = new SparseNussinov(w_, th_s_[0]); + th_s1.resize(args_info.fold_th1_given); + std::copy(args_info.fold_th1_arg, args_info.fold_th1_arg+args_info.fold_th1_given, th_s1.begin()); + } + else if (args_info.gamma1_given) + { + th_s1.resize(args_info.gamma1_given); + for (uint i=0; i!=th_s1.size(); ++i) + th_s1[i] = 1.0/(1.0+args_info.gamma1_arg[i]); + } + else if (args_info.ipknot_flag) + { + th_s1.resize(2); + th_s1[0] = 1.0/(1.0+2.0); + th_s1[1] = 1.0/(1.0+4.0); } else + { + th_s1=th_s_; + } + + if (strcasecmp(args_info.fold_decoder_arg, "IPknot")==0 || args_info.ipknot_flag) { s_decoder_ = new IPknot(w_, th_s_); - if (args_info.fold_th1_given) - { - VF th(args_info.fold_th1_given); - std::copy(args_info.fold_th1_arg, args_info.fold_th1_arg+args_info.fold_th1_given, th.begin()); - s_decoder1_ = new IPknot(w_, th); - } - else if (args_info.gamma1_given) - { - VF th(args_info.gamma1_given); - for (uint i=0; i!=th.size(); ++i) - th[i] = 1.0/(1.0+args_info.gamma1_arg[i]); - s_decoder1_ = new IPknot(w_, th); - } - else - s_decoder1_ = new IPknot(w_, th_s_); + s_decoder1_ = new IPknot(w_, th_s1); } + else if (strcasecmp(args_info.fold_decoder_arg, "Nussinov")==0) + { + s_decoder_ = new SparseNussinov(w_, th_s_[0]); + s_decoder1_ = new SparseNussinov(w_, th_s1[0]); + } + assert(s_decoder_!=NULL); use_bp_update_ = args_info.bp_update_flag!=0; - if (args_info.bp_update1_given) - use_bp_update1_ = args_info.bp_update1_flag!=0; - else - use_bp_update1_ = use_bp_update_; + use_bp_update1_ = args_info.bp_update1_flag!=0 ^ args_info.ipknot_flag!=0; if (args_info.inputs_num==0) { diff --git a/src/dafs.ggo b/src/dafs.ggo index b14c717..13259f4 100644 --- a/src/dafs.ggo +++ b/src/dafs.ggo @@ -1,5 +1,7 @@ # command line options +args "-u" + purpose "DAFS: dual decomposition for simultaneous aligning and folding RNA sequences." option "refinement" r "The number of iteration of the iterative refinment" @@ -34,6 +36,8 @@ section "Options for folding" option "fold-model" s "Folding model for calculating base-pairing probablities" values="Boltzmann","Vienna","CONTRAfold" default="Boltzmann" optional +option "fold-decoder" - "Decoder for common secondary structure prediction" + values="Nussinov","IPknot" default="Nussinov" optional hidden option "fold-pct" q "Weight of PCT for base-pairing probabilities" float default="0.25" optional hidden option "fold-th" t "Threshold for base-pairing probabilities" @@ -46,7 +50,7 @@ option "fold-th1" T "Threshold for base-pairing probabilities of the conclusive float optional multiple option "gamma1" G "Specify the threshold for base-pairing probabilities of the conclusive common secondary structuresby 1/(gamma+1))" float optional hidden multiple -option "ipknot" - "Use IPknot decoding" +option "ipknot" - "Set optimized parameters for IPknot decoding (--fold-decoder=IPknot -g4,8 -G2,4 --bp-update1)" flag off option "bp-update" - "Use the iterative update of BPs" flag off hidden