<?xml version = "1.0" encoding = "ISO-8859-1" ?>
<!DOCTYPE pise PUBLIC "pise.dtd" "pise2.dtd"  >
<!--
Probcons Written by Chuong Do

PROBCONS comes with ABSOLUTELY NO WARRANTY.  This is free software, and
you are welcome to redistribute it under certain conditions.  See the
file COPYING.txt for details.

Usage:
       probcons [OPTION]... [MFAFILE]...

Description:
       Align sequences in MFAFILE(s) and print result to standard output

       -clustalw
              use CLUSTALW output format instead of MFA

       -c, -consistency REPS
              use 0 <= REPS <= 5 (default: 2) passes of consistency transformation

       -ir, -iterative-refinement REPS
              use 0 <= REPS <= 1000 (default: 100) passes of iterative-refinement

       -pre, -pre-training REPS
              use 0 <= REPS <= 20 (default: 0) rounds of pretraining

       -pairs
              generate all-pairs pairwise alignments

       -viterbi
              use Viterbi algorithm to generate all pairs (automatically enables -pairs)

       -v, -verbose
              report progress while aligning (default: off)

       -annot FILENAME
              write annotation for multiple alignment to FILENAME

       -t, -train FILENAME
              compute EM transition probabilities, store in FILENAME (default: no training)

       -e, -emissions
              also reestimate emission probabilities (default: off)

       -p, -paramfile FILENAME
              read parameters from FILENAME (default: )

       -a, -alignment-order
              print sequences in alignment order rather than input order (default: off)
              
              
PROBCONS version 1.12 - align multiple protein sequences and print to standard output
Written by Chuong Do

PROBCONS comes with ABSOLUTELY NO WARRANTY.  This is free software, and
you are welcome to redistribute it under certain conditions.  See the
file COPYING.txt for details.

Usage:
       probcons [OPTION]... [MFAFILE]...

Description:
       Align sequences in MFAFILE(s) and print result to standard output

       -clustalw
              use CLUSTALW output format instead of MFA

       -c, -*consistency REPS
              use 0 <= REPS <= 5 (default: 2) passes of consistency transformation

       -ir, -*iterative-refinement REPS
              use 0 <= REPS <= 1000 (default: 100) passes of iterative-refinement

       -pre, -*pre-training REPS
              use 0 <= REPS <= 20 (default: 0) rounds of pretraining

       -pairs
              generate all-pairs pairwise alignments

       -viterbi
              use Viterbi algorithm to generate all pairs (automatically enables -pairs)

       -v, -*verbose
              report progress while aligning (default: off)

       -annot FILENAME
              write annotation for multiple alignment to FILENAME

       -t, -*train FILENAME
              compute EM transition probabilities, store in FILENAME (default: no training)

       -e, -*emissions
              also reestimate emission probabilities (default: off)

       -p, -*paramfile FILENAME
              read parameters from FILENAME (default: )

       -a, -*alignment-order
              print sequences in alignment order rather than input order (default: off)
-->
<!--
2009/01/27 - Rahul Suri

Design decisions made in the course of wrapping ProbCons/ProbConsRNA into this 
Pise XML file are documented below.

* ProbCons-1.12 and ProbConsRNA were unified into a single interface; selection 
between the 2 packages is based on user selection of input type (amino or 
nucleic acid).

* Rather than automatically enabling "-pairs" when "-viterbi" is selected, 
"-pairs" is listed as a precondition of "-viterbi."  This results in a minor 
change for the user, but makes things easier to express in Pise XML.

* The "-verbose" option is not supported, as it does not make much sense in the 
context of the CIPRES portal workflow.

* Only 1 input file can be passed to the program, even during calls specifying 
the "-train" option.  This is a limitation of the current CIPRES architecture.

* The "-emissions" and "-alignment-order" options are listed in the ProbCons 
1.12 usage message, but not in the most-recent user's manual (v 1.08).  Lacking 
detailed descriptions to convey to users, these options were dropped from the 
interface.

* The "-matrixfile" option appears in the user's manual (v 1.08), but not in 
the ProbCons 1.12 usage message.  I assumed it was dropped between versions and 
omitted it from the interface.
-->
<pise>
  <head>
    <title>ProbCons</title>
    <version>1.12</version>
    <description>Probabilistic Consistency-based Multiple Alignment of Amino/Nucleic Acid Sequences</description>
    <authors>C.Do, M.Brudno, S.Batzoglou</authors>
    <reference>Do, C.B., Mahabhashyam, M.S.P., Brudno, M., and Batzoglou, S. 2005. PROBCONS: Probabilistic Consistency-based Multiple Sequence Alignment. Genome Research 15: 330-340.</reference>
    <category>Nucleic Acid Sequence,Protein Sequence,Phylogeny / Alignment</category>
    <doclink>http://probcons.stanford.edu/</doclink>
  </head>

  <command>probcons</command>

  <parameters>	  
	  <!-- scheduler file -->
		<parameter type="String" ishidden="1" >
			<name>scheduler_input</name>
			<attributes>
				<paramfile>scheduler.conf</paramfile>
				<precond>
					<language>perl</language>
					<code>!$more_memory</code>
				</precond>
					<format>
						<language>perl</language>
						<code>
									"ChargeFactor=1.0\\n" .
									"nodes=1\\n" .
									"mem=2G\\n" .
									"node_exclusive=0\\n" .
									"threads_per_process=1\\n"
						</code>
					</format>
			</attributes>
		</parameter>
		
				<parameter type="String" ishidden="1" >
			<name>scheduler_input2</name>
			<attributes>
				<paramfile>scheduler.conf</paramfile>
				<precond>
					<language>perl</language>
					<code>$more_memory &amp;&amp; !$more_memory2</code>
				</precond>
					<format>
						<language>perl</language>
						<code>
									"ChargeFactor=1.0\\n" .
									"nodes=1\\n" .
									"mem=8G\\n" .
									"node_exclusive=0\\n" .
									"threads_per_process=1\\n"
						</code>
					</format>
			</attributes>
		</parameter>
		
	<parameter type="String" ishidden="1" >
			<name>scheduler_input3</name>
			<attributes>
				<paramfile>scheduler.conf</paramfile>
				<precond>
					<language>perl</language>
					<code>$more_memory2</code>
				</precond>
					<format>
						<language>perl</language>
						<code>
									"ChargeFactor=1.0\\n" .
									"nodes=1\\n" .
									"mem=64G\\n" .
									"node_exclusive=0\\n" .
									"threads_per_process=1\\n"
						</code>
					</format>
			</attributes>
		</parameter>
		
<!-- /expanse/projects/ngbt/opt/expanse/probcons/1.12-->
	  <!-- scheduler file -->
		<parameter type="String" ishidden="1" >
			<name>invocation_string</name>
			<attributes>
					<format>
						<language>perl</language>
						<code>"/expanse/projects/ngbt/opt/expanse/probcons/1.12/probcons"</code>
					</format>
			</attributes>
		</parameter>
		
  <parameter type="Float" issimple="1" ismandatory="1">
	<name>runtime</name>
	<attributes>
		<group>1</group>
		<paramfile>scheduler.conf</paramfile>
		<prompt>Maximum Hours to Run (click here for help setting this correctly)</prompt>
		<vdef><value>1.0</value></vdef>
		<comment>
			<value>
				Estimate the maximum time your job will need to run (up to 168  hrs).  Your job will be killed if it doesn't finish within the time you specify, however jobs with shorter maximum run times are often scheduled sooner than longer jobs.
			</value>
		</comment>
		<ctrls>
			<ctrl>
				<message>Maximum Hours to Run must be between 0.1 - 72.0.</message>
				<language>perl</language>
				<code>$runtime &lt; 0.1 || $runtime &gt; 168.0</code>
			</ctrl>
			<ctrl>
				<message>Please set a value for the runtime</message>
				<language>perl</language>
				<code>!defined $runtime </code>
			</ctrl>
		</ctrls>
		<format>
			<language>perl</language>
			<code>"runhours=$value\\n"</code>
		</format>
		<warns>
			<warn>
			<message>This job will run on one core as configured. If it runs for the entire time, it will consume 1 x $runtime cpu hours</message>
			<language>perl</language>
			<code>!$more_memory</code>
			</warn>
			<warn>
			<message>This job will run on four cores as configured. If it runs for the entire time, it will consume 4 x $runtime cpu hours</message>
			<language>perl</language>
			<code>$more_memory</code>
			</warn>
		</warns>
	</attributes>
</parameter>
	  
<!-- required input file, selected outside the parameters gui -->
    <parameter issimple="1" ismandatory="1" isinput = "1" type = "InFile">
      <name>infile</name>
      <attributes>
        <prompt>Sequences File (FASTA format)</prompt>
        <filenames>input.fasta</filenames>
        <format>
          <language>perl</language>
          <code>" input.fasta"</code>
        </format>
        <group>98</group>
      </attributes>
    </parameter>

    <!-- command/sequence type selection -->
    <parameter issimple="1" ismandatory = "1" type = "Excl">
      <name>sequenceType</name>
      <attributes>
        <prompt>Data Type</prompt>
		<vdef>
			<value>aminoAcid</value>
		</vdef> 
        <vlist>
          <value>aminoAcid</value>
          <label>Amino Acid</label>
        </vlist>
        <group>0</group>
        <comment>
          <value>Amino acid inputs are processed by ProbCons version 1.12; nucleic acid inputs are</value>
          <value>processed by ProbConsRNA.</value>
        </comment>
      </attributes>
    </parameter>

    <!-- output format -->
    <parameter issimple = "1" ismandatory = "1" type = "Excl">
      <name>outputFormat</name>
      <attributes>
        <prompt>Output file format</prompt>
        <vlist>
          <value>MFA</value>
          <label>Multi-FASTA</label>
          <value>clustal</value>
          <label>ClustalW</label>
        </vlist>
        <vdef>
          <value>MFA</value>
        </vdef>
        <format>
          <language>perl</language>
          <code>(defined $value &amp;&amp; $value ne $vdef) ? " -clustalw" : ""</code>
        </format>
        <group>10</group>
        <comment>
          <value>For detailed descriptions of the Multi-FASTA and ClustalW formats, please consult</value>
          <value>the ProbCons User Manual at http://probcons.stanford.edu/.</value>
        </comment>
      </attributes>
    </parameter>
    
   <parameter issimple="1" ismandatory = "1" type = "Switch">
      <name>more_memory</name>
      <attributes>
        <prompt>I need more memory</prompt>
		<vdef>
			<value>0</value>
		</vdef> 
      </attributes>
    </parameter>
    
  <!--     <parameter issimple="1" ismandatory = "1" type = "Switch">
      <name>more_memory2</name>
      <attributes>
        <prompt>I need max memory</prompt>
		<vdef>
			<value>0</value>
		</vdef> 
      </attributes>
    </parameter>-->

    <!-- MFA output file -->
    <parameter type = "OutFile">
      <name>outputMFAFile</name>
      <attributes>
        <filenames>output.mfa</filenames>
        <format>
          <language>perl</language>
          <code>" &gt; output.mfa"</code>
        </format>
        <precond>
          <language>perl</language>
          <code>defined $outputFormat &amp;&amp; $outputFormat eq "MFA"</code>
        </precond>
        <group>99</group>
      </attributes>
    </parameter>

    <!-- MFA pairwise output files -->
    <parameter type = "Results">
      <name>outputMFAPairwise</name>
      <attributes>
        <filenames>*.fasta</filenames>
        <precond>
          <language>perl</language>
          <code>defined $outputFormat &amp;&amp; $outputFormat eq "MFA" &amp;&amp; $pairwise</code>
        </precond>
      </attributes>
    </parameter>

    <!-- ClustalW output file -->
    <parameter type = "OutFile">
      <name>outputClustalWFile</name>
      <attributes>
        <filenames>output.clustal</filenames>
        <format>
          <language>perl</language>
          <code>" &gt; output.clustal"</code>
        </format>
        <precond>
          <language>perl</language>
          <code>defined $outputFormat &amp;&amp; $outputFormat eq "clustal"</code>
        </precond>
        <group>99</group>
      </attributes>
    </parameter>

    <!-- ClustalW pairwise output files -->
    <parameter type = "Results">
      <name>outputClustalWPairwise</name>
      <attributes>
        <filenames>*.aln</filenames>
        <precond>
          <language>perl</language>
          <code>defined $outputFormat &amp;&amp; $outputFormat eq "clustal" &amp;&amp; $pairwise</code>
        </precond>
      </attributes>
    </parameter>

    <!-- # of consistency transformation reps -->
    <parameter issimple = "0" ismandatory = "1" type = "Excl">
      <name>numConsistencyReps</name>
      <attributes>
        <prompt>Number of passes of consistency transformation (--consistency)</prompt>
        <vlist>
          <value>0</value>
          <label>0</label>

          <value>1</value>
          <label>1</label>

          <value>2</value>
          <label>2</label>

          <value>3</value>
          <label>3</label>

          <value>4</value>
          <label>4</label>

          <value>5</value>
          <label>5</label>
        </vlist>
        <vdef>
          <value>2</value>
        </vdef>
        <format>
          <language>perl</language>
          <code>(defined $value &amp;&amp; $value ne $vdef) ? " -c $value" : ""</code>
        </format>
        <group>12</group>
        <comment>
          <value>Each pass applies one round of the consistency transformation on the set of</value>
          <value>sequences.  The consistency transformation is described in detail in the</value>
          <value>publication.  In each round, the aligner computes the consistency transformation</value>
          <value>for each pair of sequences using all other sequences.  The aligner then updates</value>
          <value>the posterior probability matrices of the pairwise alignments.</value>
        </comment>
      </attributes>
    </parameter>

    <!-- # of passes of iterative-refinement -->
    <parameter issimple = "0" ismandatory = "0" type = "Integer">
      <name>numRefinementReps</name>
      <attributes>
        <prompt>Number of passes of iterative refinement, up to 1000 (--iterative-refinement)</prompt>
        <format>
          <language>perl</language>
          <code>(defined $value &amp;&amp; $value &gt; -1 &amp;&amp; $value &lt; 1001 &amp;&amp; $value ne $vdef) ? " -ir $value" : ""</code>
        </format>
        <vdef>
          <value>100</value>
        </vdef>
        <ctrls>
          <ctrl>
            <message>Values for "--iterative-refinement" must be between 0 and 1000, inclusive.</message>
            <language>perl</language>
            <code>$value &lt; 0 || $value &gt; 1000</code>
          </ctrl>
        </ctrls>
        <group>13</group>
        <comment>
          <value>This specifies the number of iterations of iterative refinement to be performed.</value>
          <value>In each stage of iterative refinement, the set of sequences in the alignment is</value>
          <value>randomly partitioned into two groups.  After projecting the alignments to these</value>
          <value>groups, the two groups are realigned, resulting in an alignment whose objective</value>
          <value>score is guaranteed to be at least that of the original alignment.</value>
        </comment>
      </attributes>
    </parameter>

    <!-- # of rounds of pretraining -->
    <parameter issimple = "0" ismandatory = "0" type = "Integer">
      <name>numPretrainingReps</name>
      <attributes>
        <prompt>Number of rounds of pretraining, up to 20 (--pre-training)</prompt>
        <format>
          <language>perl</language>
          <code>(defined $value &amp;&amp; $value &gt; -1 &amp;&amp; $value &lt; 21 &amp;&amp; $value ne $vdef) ? " -pre $value" : ""</code>
        </format>
        <vdef>
          <value>0</value>
        </vdef>
        <ctrls>
          <ctrl>
            <message>Values for "--pre-training" must be between 0 and 20, inclusive.</message>
            <language>perl</language>
            <code>$value &lt; 0 || $value &gt; 20</code>
          </ctrl>
        </ctrls>
        <group>14</group>
        <comment>
          <value>This specifies the number of rounds of EM to be applied on the set of sequences</value>
          <value>being aligned.  This option is used in case the default parameters are not</value>
          <value>appropriate for the particular sequences being aligned; in general, this option</value>
          <value>is not recommended as it may lead to unstable alignment parameters.</value>
        </comment>
      </attributes>
    </parameter>

    <!-- generate only pairwise alignments? -->
    <parameter issimple = "0" ismandatory = "0" type = "Switch">
      <name>pairwise</name>
      <attributes>
        <prompt>Generate only pairwise alignments? (-pairs)</prompt>
        <format>
          <language>perl</language>
          <code>$value ? " -pairs" : ""</code>
        </format>
        <vdef>
          <value>0</value>
        </vdef>
        <group>15</group>
        <comment>
          <value>When this option is selected, PROBCONS generates all pairs pairwise maximum</value>
          <value>expected accuracy alignments using the posterior matrices without generating a</value>
          <value>full multiple alignment.  The names of the files are based on the header comments</value>
          <value>for each of the sequences in the original input file with .fasta appended.  When</value>
          <value>the clustalw output option is selected, then .aln is used as a suffix instead.</value>
        </comment>
      </attributes>
    </parameter>

    <!-- use Viterbi? -->
    <parameter issimple = "0" ismandatory = "0" type = "Switch">
      <name>viterbi</name>
      <attributes>
        <prompt>Use Viterbi decoding (-viterbi)</prompt>
        <format>
          <language>perl</language>
          <code>$value ? " -viterbi" : ""</code>
        </format>
        <vdef>
          <value>0</value>
        </vdef>
        <precond>
          <language>perl</language>
          <code>$pairwise</code>
        </precond>
        <group>16</group>
        <comment>
          <value>Generates all-pairs pairwise alignments using the Viterbi algorithm.  Note that</value>
          <value>this option requires the -pairs option to be enabled.  This option is not</value>
          <value>recommended but is available for comparison to the maximum expected accuracy</value>
          <value>alignments.</value>
        </comment>
      </attributes>
    </parameter>

    <!-- write annotation? -->
    <parameter issimple = "0" ismandatory = "0" type = "Switch">
      <name>writeAnnotation</name>
      <attributes>
        <prompt>Write annotation for multiple alignment (-annot)</prompt>
        <format>
          <language>perl</language>
          <code>$value ? " -annot output.annotations" : ""</code>
        </format>
        <vdef>
          <value>0</value>
        </vdef>
        <group>17</group>
        <comment>
          <value>Turning on this option causes the program to write quality scores for columns in</value>
          <value>the produced alignment to a file called output.annotations.  The quality score</value>
          <value>for each column of the alignment is given on a separate line and is an integer</value>
          <value>between 0 and 100 inclusive, representing the expected percentage of correct</value>
          <value>pairwise matches in the column.  Columns containing only one non-gap character</value>
          <value>automatically have quality score 0.</value>
        </comment>
      </attributes>
    </parameter>

    <!-- annotation results file -->
    <parameter type = "Results">
      <name>annotationResults</name>
      <attributes>
        <filenames>output.annotations</filenames>
        <precond>
          <language>perl</language>
          <code>$writeAnnotation</code>
        </precond>
      </attributes>
    </parameter>

    <!-- store training results? -->
    <parameter issimple = "0" ismandatory = "0" type = "Switch">
      <name>writeTraining</name>
      <attributes>
        <prompt>Write EM transition probabilities (--train)</prompt>
        <format>
          <language>perl</language>
          <code>$value ? " --train trained.params" : ""</code>
        </format>
        <vdef>
          <value>0</value>
        </vdef>
        <group>18</group>
        <comment>
          <value>This option is used to train the aligner using a set of sequences.  The test</value>
          <value>sequences are read from the specified input file.  This performs exactly one</value>
          <value>round of EM training on the sequences; multiple calls to PROBCONS are needed in</value>
          <value>order to obtain convergence.  The training parameters are written to a file</value>
          <value>called trained.params as three lines:</value>
          <value>    initMatchProb initInsertXProb initInsertYProb</value>
          <value>    startInsertXProb startInsertYProb</value>
          <value>    extendInsertXProb extendInsertYProb</value>
        </comment>
      </attributes>
    </parameter>

    <!-- training results file -->
    <parameter type = "Results">
      <name>trainingResults</name>
      <attributes>
        <filenames>trained.params</filenames>
        <precond>
          <language>perl</language>
          <code>$writeTraining</code>
        </precond>
      </attributes>
    </parameter>

    <!-- param file -->
    <parameter issimple = "0" ismandatory = "0" type = "InFile">
      <name>paramsFile</name>
      <attributes>
        <prompt>Trained ProbCons parameter file (--paramfile)</prompt>
        <filenames>input.params</filenames>
        <format>
          <language>perl</language>
          <code>defined $value ? " --paramfile input.params" : ""</code>
        </format>
        <group>19</group>
        <comment>
          <value>Reads initial/final and transition probabilities from a user-specified file.</value>
          <value>This file should specify the initial/final probabilities and transition</value>
          <value>probabilities for the HMM model used by the aligner.  The HMM model consists of</value>
          <value>a Match state, an Insert X state, and an Insert Y state, and is described in</value>
          <value>more detail in the publication.  The file format consists of three lines,</value>
          <value> containing:</value>
          <value>    initMatchProb initInsertXProb initInsertYProb</value>
          <value>    startInsertXProb startInsertYProb</value>
          <value>    extendInsertXProb extendInsertYProb</value>
        </comment>
      </attributes>
    </parameter>
  </parameters>
</pise>
