From 193d0d47b8dc304d61cdd13d3563a60d0b467979 Mon Sep 17 00:00:00 2001
From: satoken <devnull@localhost>
Date: Sat, 15 Sep 2012 23:44:13 +0900
Subject: [PATCH] * Option '--ipknot' sets optimized parameters for IPknot
 decoding.

---
 src/cmdline.c | 103 +++++++++++++++++++++++++++++++-------------------
 src/cmdline.h |   9 ++++-
 src/dafs.cpp  |  76 ++++++++++++++++++++-----------------
 src/dafs.ggo  |   6 ++-
 4 files changed, 118 insertions(+), 76 deletions(-)

diff --git a/src/cmdline.c b/src/cmdline.c
index 94ae27a..20c5e06 100644
--- a/src/cmdline.c
+++ b/src/cmdline.c
@@ -1,7 +1,7 @@
 /*
   File autogenerated by gengetopt version 2.22.5
   generated with the following command:
-  gengetopt --unamed 
+  gengetopt -u
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
@@ -32,32 +32,33 @@ const char *gengetopt_args_info_usage = "Usage: " CMDLINE_PARSER_PACKAGE " [OPTI
 const char *gengetopt_args_info_description = "";
 
 const char *gengetopt_args_info_full_help[] = {
-  "  -h, --help                Print help and exit",
-  "      --full-help           Print help, including hidden options, and exit",
-  "  -V, --version             Print version and exit",
-  "  -r, --refinement=INT      The number of iteration of the iterative refinment  \n                              (default=`0')",
-  "  -w, --weight=FLOAT        Weight of the expected accuracy score for secondary \n                              structures  (default=`4.0')",
-  "      --eta=FLOAT           Initial step width for the subgradient optimization \n                               (default=`0.5')",
-  "  -m, --max-iter=INT        The maximum number of iteration of the subgradient \n                              optimization  (default=`600')",
-  "  -f, --fourway-pct=FLOAT   Weight of four-way PCT  (default=`0.0')",
-  "  -v, --verbose=INT         The level of verbose outputs  (default=`0')",
+  "  -h, --help                 Print help and exit",
+  "      --full-help            Print help, including hidden options, and exit",
+  "  -V, --version              Print version and exit",
+  "  -r, --refinement=INT       The number of iteration of the iterative refinment \n                                (default=`0')",
+  "  -w, --weight=FLOAT         Weight of the expected accuracy score for \n                               secondary structures  (default=`4.0')",
+  "      --eta=FLOAT            Initial step width for the subgradient \n                               optimization  (default=`0.5')",
+  "  -m, --max-iter=INT         The maximum number of iteration of the subgradient \n                               optimization  (default=`600')",
+  "  -f, --fourway-pct=FLOAT    Weight of four-way PCT  (default=`0.0')",
+  "  -v, --verbose=INT          The level of verbose outputs  (default=`0')",
   "\nOptions for alignments:",
-  "  -a, --align-model=STRING  Alignment model for calcualating matching \n                              probablities  (possible values=\"CONTRAlign\", \n                              \"ProbCons\" default=`ProbCons')",
-  "  -p, --align-pct=FLOAT     Weight of PCT for matching probabilities  \n                              (default=`0.25')",
-  "  -u, --align-th=FLOAT      Threshold for matching probabilities  \n                              (default=`0.01')",
-  "      --align-aux=FILENAME  Load matching probability matrices from FILENAME",
+  "  -a, --align-model=STRING   Alignment model for calcualating matching \n                               probablities  (possible values=\"CONTRAlign\", \n                               \"ProbCons\" default=`ProbCons')",
+  "  -p, --align-pct=FLOAT      Weight of PCT for matching probabilities  \n                               (default=`0.25')",
+  "  -u, --align-th=FLOAT       Threshold for matching probabilities  \n                               (default=`0.01')",
+  "      --align-aux=FILENAME   Load matching probability matrices from FILENAME",
   "\nOptions for folding:",
-  "  -s, --fold-model=STRING   Folding model for calculating base-pairing \n                              probablities  (possible values=\"Boltzmann\", \n                              \"Vienna\", \"CONTRAfold\" default=`Boltzmann')",
-  "  -q, --fold-pct=FLOAT      Weight of PCT for base-pairing probabilities  \n                              (default=`0.25')",
-  "  -t, --fold-th=FLOAT       Threshold for base-pairing probabilities  \n                              (default=`0.2')",
-  "  -g, --gamma=FLOAT         Specify the threshold for base-pairing \n                              probabilities by 1/(gamma+1))",
-  "      --no-alifold          No use of RNAalifold for calculating base-pairing \n                              probabilities  (default=off)",
-  "  -T, --fold-th1=FLOAT      Threshold for base-pairing probabilities of the \n                              conclusive common secondary structures",
-  "  -G, --gamma1=FLOAT        Specify the threshold for base-pairing \n                              probabilities of the conclusive common secondary \n                              structuresby 1/(gamma+1))",
-  "      --ipknot              Use IPknot decoding  (default=off)",
-  "      --bp-update           Use the iterative update of BPs  (default=off)",
-  "      --bp-update1          Use the iterative update of BPs for the final \n                              prediction  (default=off)",
-  "      --fold-aux=FILENAME   Load base-pairing probability matrices from \n                              FILENAME",
+  "  -s, --fold-model=STRING    Folding model for calculating base-pairing \n                               probablities  (possible values=\"Boltzmann\", \n                               \"Vienna\", \"CONTRAfold\" default=`Boltzmann')",
+  "      --fold-decoder=STRING  Decoder for common secondary structure prediction  \n                               (possible values=\"Nussinov\", \"IPknot\" \n                               default=`Nussinov')",
+  "  -q, --fold-pct=FLOAT       Weight of PCT for base-pairing probabilities  \n                               (default=`0.25')",
+  "  -t, --fold-th=FLOAT        Threshold for base-pairing probabilities  \n                               (default=`0.2')",
+  "  -g, --gamma=FLOAT          Specify the threshold for base-pairing \n                               probabilities by 1/(gamma+1))",
+  "      --no-alifold           No use of RNAalifold for calculating base-pairing \n                               probabilities  (default=off)",
+  "  -T, --fold-th1=FLOAT       Threshold for base-pairing probabilities of the \n                               conclusive common secondary structures",
+  "  -G, --gamma1=FLOAT         Specify the threshold for base-pairing \n                               probabilities of the conclusive common secondary \n                               structuresby 1/(gamma+1))",
+  "      --ipknot               Set optimized parameters for IPknot decoding \n                               (--fold-decoder=IPknot -g4,8 -G2,4 --bp-update1) \n                                (default=off)",
+  "      --bp-update            Use the iterative update of BPs  (default=off)",
+  "      --bp-update1           Use the iterative update of BPs for the final \n                               prediction  (default=off)",
+  "      --fold-aux=FILENAME    Load base-pairing probability matrices from \n                               FILENAME",
     0
 };
 
@@ -75,9 +76,9 @@ init_help_array(void)
   gengetopt_args_info_help[8] = gengetopt_args_info_full_help[12];
   gengetopt_args_info_help[9] = gengetopt_args_info_full_help[14];
   gengetopt_args_info_help[10] = gengetopt_args_info_full_help[15];
-  gengetopt_args_info_help[11] = gengetopt_args_info_full_help[17];
-  gengetopt_args_info_help[12] = gengetopt_args_info_full_help[20];
-  gengetopt_args_info_help[13] = gengetopt_args_info_full_help[22];
+  gengetopt_args_info_help[11] = gengetopt_args_info_full_help[18];
+  gengetopt_args_info_help[12] = gengetopt_args_info_full_help[21];
+  gengetopt_args_info_help[13] = gengetopt_args_info_full_help[23];
   gengetopt_args_info_help[14] = 0; 
   
 }
@@ -105,6 +106,7 @@ cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *pro
 
 const char *cmdline_parser_align_model_values[] = {"CONTRAlign", "ProbCons", 0}; /*< Possible values for align-model. */
 const char *cmdline_parser_fold_model_values[] = {"Boltzmann", "Vienna", "CONTRAfold", 0}; /*< Possible values for fold-model. */
+const char *cmdline_parser_fold_decoder_values[] = {"Nussinov", "IPknot", 0}; /*< Possible values for fold-decoder. */
 
 static char *
 gengetopt_strdup (const char *s);
@@ -126,6 +128,7 @@ void clear_given (struct gengetopt_args_info *args_info)
   args_info->align_th_given = 0 ;
   args_info->align_aux_given = 0 ;
   args_info->fold_model_given = 0 ;
+  args_info->fold_decoder_given = 0 ;
   args_info->fold_pct_given = 0 ;
   args_info->fold_th_given = 0 ;
   args_info->gamma_given = 0 ;
@@ -164,6 +167,8 @@ void clear_args (struct gengetopt_args_info *args_info)
   args_info->align_aux_orig = NULL;
   args_info->fold_model_arg = gengetopt_strdup ("Boltzmann");
   args_info->fold_model_orig = NULL;
+  args_info->fold_decoder_arg = gengetopt_strdup ("Nussinov");
+  args_info->fold_decoder_orig = NULL;
   args_info->fold_pct_arg = 0.25;
   args_info->fold_pct_orig = NULL;
   args_info->fold_th_arg = NULL;
@@ -202,24 +207,25 @@ void init_args_info(struct gengetopt_args_info *args_info)
   args_info->align_th_help = gengetopt_args_info_full_help[12] ;
   args_info->align_aux_help = gengetopt_args_info_full_help[13] ;
   args_info->fold_model_help = gengetopt_args_info_full_help[15] ;
-  args_info->fold_pct_help = gengetopt_args_info_full_help[16] ;
-  args_info->fold_th_help = gengetopt_args_info_full_help[17] ;
+  args_info->fold_decoder_help = gengetopt_args_info_full_help[16] ;
+  args_info->fold_pct_help = gengetopt_args_info_full_help[17] ;
+  args_info->fold_th_help = gengetopt_args_info_full_help[18] ;
   args_info->fold_th_min = 0;
   args_info->fold_th_max = 0;
-  args_info->gamma_help = gengetopt_args_info_full_help[18] ;
+  args_info->gamma_help = gengetopt_args_info_full_help[19] ;
   args_info->gamma_min = 0;
   args_info->gamma_max = 0;
-  args_info->no_alifold_help = gengetopt_args_info_full_help[19] ;
-  args_info->fold_th1_help = gengetopt_args_info_full_help[20] ;
+  args_info->no_alifold_help = gengetopt_args_info_full_help[20] ;
+  args_info->fold_th1_help = gengetopt_args_info_full_help[21] ;
   args_info->fold_th1_min = 0;
   args_info->fold_th1_max = 0;
-  args_info->gamma1_help = gengetopt_args_info_full_help[21] ;
+  args_info->gamma1_help = gengetopt_args_info_full_help[22] ;
   args_info->gamma1_min = 0;
   args_info->gamma1_max = 0;
-  args_info->ipknot_help = gengetopt_args_info_full_help[22] ;
-  args_info->bp_update_help = gengetopt_args_info_full_help[23] ;
-  args_info->bp_update1_help = gengetopt_args_info_full_help[24] ;
-  args_info->fold_aux_help = gengetopt_args_info_full_help[25] ;
+  args_info->ipknot_help = gengetopt_args_info_full_help[23] ;
+  args_info->bp_update_help = gengetopt_args_info_full_help[24] ;
+  args_info->bp_update1_help = gengetopt_args_info_full_help[25] ;
+  args_info->fold_aux_help = gengetopt_args_info_full_help[26] ;
   
 }
 
@@ -372,6 +378,8 @@ cmdline_parser_release (struct gengetopt_args_info *args_info)
   free_string_field (&(args_info->align_aux_orig));
   free_string_field (&(args_info->fold_model_arg));
   free_string_field (&(args_info->fold_model_orig));
+  free_string_field (&(args_info->fold_decoder_arg));
+  free_string_field (&(args_info->fold_decoder_orig));
   free_string_field (&(args_info->fold_pct_orig));
   free_multiple_field (args_info->fold_th_given, (void *)(args_info->fold_th_arg), &(args_info->fold_th_orig));
   args_info->fold_th_arg = 0;
@@ -495,6 +503,8 @@ cmdline_parser_dump(FILE *outfile, struct gengetopt_args_info *args_info)
     write_into_file(outfile, "align-aux", args_info->align_aux_orig, 0);
   if (args_info->fold_model_given)
     write_into_file(outfile, "fold-model", args_info->fold_model_orig, cmdline_parser_fold_model_values);
+  if (args_info->fold_decoder_given)
+    write_into_file(outfile, "fold-decoder", args_info->fold_decoder_orig, cmdline_parser_fold_decoder_values);
   if (args_info->fold_pct_given)
     write_into_file(outfile, "fold-pct", args_info->fold_pct_orig, 0);
   write_multiple_into_file(outfile, args_info->fold_th_given, "fold-th", args_info->fold_th_orig, 0);
@@ -1109,6 +1119,7 @@ cmdline_parser_internal (
         { "align-th",	1, NULL, 'u' },
         { "align-aux",	1, NULL, 0 },
         { "fold-model",	1, NULL, 's' },
+        { "fold-decoder",	1, NULL, 0 },
         { "fold-pct",	1, NULL, 'q' },
         { "fold-th",	1, NULL, 't' },
         { "gamma",	1, NULL, 'g' },
@@ -1329,6 +1340,20 @@ cmdline_parser_internal (
                 additional_error))
               goto failure;
           
+          }
+          /* Decoder for common secondary structure prediction.  */
+          else if (strcmp (long_options[option_index].name, "fold-decoder") == 0)
+          {
+          
+          
+            if (update_arg( (void *)&(args_info->fold_decoder_arg), 
+                 &(args_info->fold_decoder_orig), &(args_info->fold_decoder_given),
+                &(local_args_info.fold_decoder_given), optarg, cmdline_parser_fold_decoder_values, "Nussinov", ARG_STRING,
+                check_ambiguity, override, 0, 0,
+                "fold-decoder", '-',
+                additional_error))
+              goto failure;
+          
           }
           /* No use of RNAalifold for calculating base-pairing probabilities.  */
           else if (strcmp (long_options[option_index].name, "no-alifold") == 0)
@@ -1342,7 +1367,7 @@ cmdline_parser_internal (
               goto failure;
           
           }
-          /* Use IPknot decoding.  */
+          /* Set optimized parameters for IPknot decoding (--fold-decoder=IPknot -g4,8 -G2,4 --bp-update1).  */
           else if (strcmp (long_options[option_index].name, "ipknot") == 0)
           {
           
diff --git a/src/cmdline.h b/src/cmdline.h
index 19caee8..33ea9fa 100644
--- a/src/cmdline.h
+++ b/src/cmdline.h
@@ -77,6 +77,9 @@ struct gengetopt_args_info
   char * fold_model_arg;	/**< @brief Folding model for calculating base-pairing probablities (default='Boltzmann').  */
   char * fold_model_orig;	/**< @brief Folding model for calculating base-pairing probablities original value given at command line.  */
   const char *fold_model_help; /**< @brief Folding model for calculating base-pairing probablities help description.  */
+  char * fold_decoder_arg;	/**< @brief Decoder for common secondary structure prediction (default='Nussinov').  */
+  char * fold_decoder_orig;	/**< @brief Decoder for common secondary structure prediction original value given at command line.  */
+  const char *fold_decoder_help; /**< @brief Decoder for common secondary structure prediction help description.  */
   float fold_pct_arg;	/**< @brief Weight of PCT for base-pairing probabilities (default='0.25').  */
   char * fold_pct_orig;	/**< @brief Weight of PCT for base-pairing probabilities original value given at command line.  */
   const char *fold_pct_help; /**< @brief Weight of PCT for base-pairing probabilities help description.  */
@@ -102,8 +105,8 @@ struct gengetopt_args_info
   unsigned int gamma1_min; /**< @brief Specify the threshold for base-pairing probabilities of the conclusive common secondary structuresby 1/(gamma+1))'s minimum occurreces */
   unsigned int gamma1_max; /**< @brief Specify the threshold for base-pairing probabilities of the conclusive common secondary structuresby 1/(gamma+1))'s maximum occurreces */
   const char *gamma1_help; /**< @brief Specify the threshold for base-pairing probabilities of the conclusive common secondary structuresby 1/(gamma+1)) help description.  */
-  int ipknot_flag;	/**< @brief Use IPknot decoding (default=off).  */
-  const char *ipknot_help; /**< @brief Use IPknot decoding help description.  */
+  int ipknot_flag;	/**< @brief Set optimized parameters for IPknot decoding (--fold-decoder=IPknot -g4,8 -G2,4 --bp-update1) (default=off).  */
+  const char *ipknot_help; /**< @brief Set optimized parameters for IPknot decoding (--fold-decoder=IPknot -g4,8 -G2,4 --bp-update1) help description.  */
   int bp_update_flag;	/**< @brief Use the iterative update of BPs (default=off).  */
   const char *bp_update_help; /**< @brief Use the iterative update of BPs help description.  */
   int bp_update1_flag;	/**< @brief Use the iterative update of BPs for the final prediction (default=off).  */
@@ -126,6 +129,7 @@ struct gengetopt_args_info
   unsigned int align_th_given ;	/**< @brief Whether align-th was given.  */
   unsigned int align_aux_given ;	/**< @brief Whether align-aux was given.  */
   unsigned int fold_model_given ;	/**< @brief Whether fold-model was given.  */
+  unsigned int fold_decoder_given ;	/**< @brief Whether fold-decoder was given.  */
   unsigned int fold_pct_given ;	/**< @brief Whether fold-pct was given.  */
   unsigned int fold_th_given ;	/**< @brief Whether fold-th was given.  */
   unsigned int gamma_given ;	/**< @brief Whether gamma was given.  */
@@ -268,6 +272,7 @@ int cmdline_parser_required (struct gengetopt_args_info *args_info,
 
 extern const char *cmdline_parser_align_model_values[];  /**< @brief Possible values for align-model. */
 extern const char *cmdline_parser_fold_model_values[];  /**< @brief Possible values for fold-model. */
+extern const char *cmdline_parser_fold_decoder_values[];  /**< @brief Possible values for fold-decoder. */
 
 
 #ifdef __cplusplus
diff --git a/src/dafs.cpp b/src/dafs.cpp
index 3e57f39..206189a 100644
--- a/src/dafs.cpp
+++ b/src/dafs.cpp
@@ -1545,58 +1545,66 @@ parse_options(int& argc, char**& argv)
     s_model_ = new CONTRAfold(CUTOFF);
   assert(s_model_!=NULL);
 
-  if (args_info.fold_th_given==0)
-  {
-    th_s_.resize(1);
-    th_s_[0] = args_info.fold_th_arg[0];
-  }
-  else
+  VF th_s1;
+  if (args_info.fold_th_given)
   {
     th_s_.resize(args_info.fold_th_given);
     std::copy(args_info.fold_th_arg, args_info.fold_th_arg+th_s_.size(), th_s_.begin());
   }
-  if (args_info.gamma_given!=0)
+  else if (args_info.gamma_given)
   {
     th_s_.resize(args_info.gamma_given);
     for (uint i=0; i!=th_s_.size(); ++i)
       th_s_[i] = 1.0/(1.0+args_info.gamma_arg[i]);
   }
+  else if (args_info.ipknot_flag)
+  {
+    th_s_.resize(2);
+    th_s_[0] = 1.0/(1.0+4.0);
+    th_s_[1] = 1.0/(1.0+8.0);
+  }
+  else
+  {
+    th_s_.resize(1);
+    th_s_[0] = args_info.fold_th_arg[0];
+  }
 
-  if (args_info.ipknot_flag==0)
+  if (args_info.fold_th1_given)
   {
-    s_decoder_ = new SparseNussinov(w_, th_s_[0]);
-    if (args_info.fold_th1_given)
-      s_decoder1_ = new SparseNussinov(w_, args_info.fold_th1_arg[0]);
-    else if (args_info.gamma1_given)
-      s_decoder1_ = new SparseNussinov(w_, 1.0/(1.0+args_info.gamma1_arg[0]));
-    else
-      s_decoder1_ = new SparseNussinov(w_, th_s_[0]);
+    th_s1.resize(args_info.fold_th1_given);
+    std::copy(args_info.fold_th1_arg, args_info.fold_th1_arg+args_info.fold_th1_given, th_s1.begin());
+  }
+  else if (args_info.gamma1_given)
+  {
+    th_s1.resize(args_info.gamma1_given);
+    for (uint i=0; i!=th_s1.size(); ++i)
+      th_s1[i] = 1.0/(1.0+args_info.gamma1_arg[i]);
+  }
+  else if (args_info.ipknot_flag)
+  {
+    th_s1.resize(2);
+    th_s1[0] = 1.0/(1.0+2.0);
+    th_s1[1] = 1.0/(1.0+4.0);
   }
   else
+  {
+    th_s1=th_s_;
+  }
+
+  if (strcasecmp(args_info.fold_decoder_arg, "IPknot")==0 || args_info.ipknot_flag)
   {
     s_decoder_ = new IPknot(w_, th_s_);
-    if (args_info.fold_th1_given)
-    {
-      VF th(args_info.fold_th1_given);
-      std::copy(args_info.fold_th1_arg, args_info.fold_th1_arg+args_info.fold_th1_given, th.begin());
-      s_decoder1_ = new IPknot(w_, th);
-    }
-    else if (args_info.gamma1_given)
-    {
-      VF th(args_info.gamma1_given);
-      for (uint i=0; i!=th.size(); ++i)
-        th[i] = 1.0/(1.0+args_info.gamma1_arg[i]);
-      s_decoder1_ = new IPknot(w_, th);
-    }
-    else
-      s_decoder1_ = new IPknot(w_, th_s_);
+    s_decoder1_ = new IPknot(w_, th_s1);
   }
+  else if (strcasecmp(args_info.fold_decoder_arg, "Nussinov")==0)
+  {
+    s_decoder_ = new SparseNussinov(w_, th_s_[0]);
+    s_decoder1_ = new SparseNussinov(w_, th_s1[0]);
+  }
+  assert(s_decoder_!=NULL);
 
   use_bp_update_ = args_info.bp_update_flag!=0;
-  if (args_info.bp_update1_given)
-    use_bp_update1_ = args_info.bp_update1_flag!=0;
-  else
-    use_bp_update1_ = use_bp_update_;
+  use_bp_update1_ = args_info.bp_update1_flag!=0 ^ args_info.ipknot_flag!=0;
 
   if (args_info.inputs_num==0)
   {
diff --git a/src/dafs.ggo b/src/dafs.ggo
index b14c717..13259f4 100644
--- a/src/dafs.ggo
+++ b/src/dafs.ggo
@@ -1,5 +1,7 @@
 # command line options
 
+args "-u"
+
 purpose "DAFS: dual decomposition for simultaneous aligning and folding RNA sequences."
 
 option "refinement"	r "The number of iteration of the iterative refinment"
@@ -34,6 +36,8 @@ section "Options for folding"
 
 option "fold-model"	s "Folding model for calculating base-pairing probablities"
        			  values="Boltzmann","Vienna","CONTRAfold" default="Boltzmann" optional
+option "fold-decoder"   - "Decoder for common secondary structure prediction"
+                          values="Nussinov","IPknot" default="Nussinov" optional hidden
 option "fold-pct"	q "Weight of PCT for base-pairing probabilities"
        			  float default="0.25" optional hidden
 option "fold-th"	t "Threshold for base-pairing probabilities"
@@ -46,7 +50,7 @@ option "fold-th1"	T "Threshold for base-pairing probabilities of the conclusive
        			  float optional multiple
 option "gamma1"		G "Specify the threshold for base-pairing probabilities of the conclusive common secondary structuresby 1/(gamma+1))"
        			  float optional hidden multiple
-option "ipknot"		- "Use IPknot decoding"
+option "ipknot"		- "Set optimized parameters for IPknot decoding (--fold-decoder=IPknot -g4,8 -G2,4 --bp-update1)"
        			  flag off
 option "bp-update"	- "Use the iterative update of BPs"
        			  flag off hidden