<?xml version="1.0" encoding="ISO-8859-1" ?>
<!DOCTYPE pise PUBLIC "pise.dtd" "pise2.dtd">

<pise>

  <head>
    <title>Seqboot</title>
    <version>Phylip 3.66</version>
    <description>Bootstrap, Jackknife, or Permutation Resampling</description>
        <authors>Felsenstein</authors>
    <reference>Felsenstein, J.  1993.  PHYLIP (Phylogeny Inference Package) version 3.5c. Distributed by the author.  Department of Genetics, University of Washington, Seattle.</reference>
    <reference>Felsenstein, J.  1989.  PHYLIP -- Phylogeny Inference Package (Version 3.2). Cladistics  5: 164-166.</reference>
	<category>Phylogeny / Alignment</category>
    <doclink>http://bioweb.pasteur.fr/docs/gensoft-evol.html#PHYLIP</doclink>
  </head>

  <command>seqboot</command>

  <parameters>
  
  <!-- scheduler file -->
		<parameter type="String" ishidden="1" >
			<name>scheduler_input</name>
			<attributes>
				<paramfile>scheduler.conf</paramfile>
					<format>
						<language>perl</language>
						<code>
									"ChargeFactor=1.0\\n" .
									"nodes=1\\n" .
									"node_exclusive=0\\n" .
									"threads_per_process=1\\n"
						</code>
					</format>
			</attributes>
		</parameter>
		
  <parameter type="Float" issimple="1" ismandatory="1">
	<name>runtime</name>
	<attributes>
		<group>1</group>
		<paramfile>scheduler.conf</paramfile>
		<prompt>Maximum Hours to Run (click here for help setting this correctly)</prompt>
		<vdef><value>1.0</value></vdef>
		<comment>
			<value>
				Estimate the maximum time your job will need to run (up to 72 hrs).  Your job will be killed if it doesn't finish within the time you specify, however jobs with shorter maximum run times are often scheduled sooner than longer jobs.
			</value>
		</comment>
		<ctrls>
			<ctrl>
				<message>Maximum Hours to Run must be between 0.1 - 72.0.</message>
				<language>perl</language>
				<code>$runtime &lt; 0.1 || $runtime &gt; 72.0</code>
			</ctrl>
			<ctrl>
				<message>Please set a value for the runtime</message>
				<language>perl</language>
				<code>!defined $runtime </code>
			</ctrl>
		</ctrls>
		<format>
			<language>perl</language>
			<code>"runhours=$value\\n"</code>
		</format>
	</attributes>
</parameter>

    <parameter iscommand="1" ishidden="1" type="String">
      <name>seqboot</name>
      <attributes>
	<format>
	  <language>perl</language>
	  <code> "seqboot &lt; params" </code>
	</format>
	<group>0</group>
      </attributes>
    </parameter>

	<!-- <parameter ismandatory="1" issimple="1"  type="InFile"> -->
    <parameter ismandatory="1" issimple="1" isinput="1" type="InFile">
	<name>infile</name>
	<attributes>
	  
	  <prompt>Alignment File</prompt>
	  <format>
	    <language>perl</language>
	    <!-- <code>"ln -s $infile infile; "</code> -->
		<code>"ln -s infile.phylip infile; "</code>
	  </format>
	  <group>-5</group>
	  <seqfmt>
	    <value>12</value>
	  </seqfmt>
	  <!-- Added for Swami-->
	  <filenames>infile.phylip</filenames>
	  <!-- -->
	</attributes>
      </parameter>
	  
	  
	<!-- Added for Swami -->
	 <parameter issimple="1" type="Switch">
	    <name>interleaved</name>
	    <attributes>
			<prompt>Input Sequences Interleaved</prompt>
			<format>
			  <language>perl</language>
			  <code>($value)?  "": "I\\n$"</code>
			</format>
			<group>1</group>
			<vdef><value>1</value></vdef>
			<paramfile>params</paramfile>
		</attributes>
	  </parameter>
    <!-- -->
<parameter ismandatory="1" issimple="1" type="Excl">
	<name>data_type</name>
	<attributes>
	  
	  <prompt>Data type (D)</prompt>
	  <vdef><value>sequence</value></vdef>
	  <group>1</group>
	  <vlist>
	    <value>sequence</value>
	    <label>Molecular sequences</label>
	    <value>morph</value>
	    <label>Discrete Morphology</label>
	    <value>rest</value>
	    <label>Restriction Sites</label>
	    <value>freq</value>
	    <label>Gene Frequencies</label>
	  </vlist>
	  <flist>
	    <value>sequence</value>
	    <code>""</code>
	    <value>morph</value>
	    <code>"D\\n"</code>
	    <value>rest</value>
	    <code>"D\\nD\\n"</code>
	    <value>freq</value>
	    <code>"D\\nD\\nD\\n"</code>
	  </flist>
	  <paramfile>params</paramfile>
	  
	</attributes>
      </parameter>

<parameter ismandatory="1" issimple="1" type="Excl">
	<name>method</name>
	<attributes>
	  
	  <prompt>Resampling methods (J)</prompt>
	  <format>
	    <language>perl</language>
	    <code>"$value"</code>
		<!--  <code>$format</code>
			        $format is not a parameter defined in this file
			 my guess it is a mistake to $value		 -->
	  </format>
	  <vdef><value>bootstrap</value></vdef>
	  <group>1</group>
	  <vlist>
	    <value>bootstrap</value>
	    <label>Bootstrap</label>
	    <value>jackknife</value>
	    <label>Delete-half jackknife</label>
	    <value>permute</value>
	    <label>Permute species for each character</label>
	  </vlist>
	  <flist>
	    <value>permute</value>
	    <code>"J\\nJ\\n"</code>
	    <value>bootstrap</value>
	    <code>""</code>
	    <value>jackknife</value>
	    <code>"J\\n"</code>
	  </flist>
	  <comment>
	    <value>1. The bootstrap. Bootstrapping was invented by Bradley Efron in 1979, and its use in phylogeny estimation was introduced by me (Felsenstein, 1985b). It involves creating a new data set by sampling N characters randomly with replacement, so that the resulting data set has the same size as the original, but some characters have been left out and others are duplicated. The random variation of the results from analyzing these bootstrapped data sets can be shown statistically to be typical of the variation that you would get from collecting new data sets. The method assumes that the characters evolve independently, an assumption that may not be realistic for many kinds of data.</value>
	    <value>2. Delete-half-jackknifing. This alternative to the bootstrap involves sampling a random half of the characters, and including them in the data but dropping the others. The resulting data sets are half the size of the original, and no characters are duplicated. The random variation from doing this should be very similar to that obtained from the bootstrap. The method is advocated by Wu (1986).</value>
	    <value>3. Permuting species within characters. This method of resampling (well, OK, it may not be best to call it resampling) was introduced by Archie (1989) and Faith (1990; see also Faith and Cranston, 1991). It involves permuting the columns of the data matrix separately. This produces data matrices that have the same number and kinds of characters but no taxonomic structure. It is used for different purposes than the bootstrap, as it tests not the variation around an estimated tree but the hypothesis that there is no taxonomic structure in the data: if a statistic such as number of steps is significantly smaller in the actual data than it is in replicates that are permuted, then we can argue that there is some taxonomic structure in the data (though perhaps it might be just a pair of sibling species).</value>
	  </comment>
	  <paramfile>params</paramfile>
	  
	</attributes>
      </parameter>

<parameter ismandatory="1" issimple="1" type="Integer">
	<name>seed</name>
	<attributes>
	  
	  <prompt>Random number seed (must be odd)</prompt>
	  <format>
	    <language>perl</language>
	    <code>"$value\\n"</code>
	  </format>
	  		<!--<group>1010</group>-->
	  <group>99</group>
	<paramfile>params</paramfile>
	  
	</attributes>
      </parameter>

<parameter type="Integer">
	<name>replicates</name>
	<attributes>

	  <prompt>How many replicates (R)</prompt>
	  <format>
	    <language>perl</language>
	    <code>($value &amp;&amp; $value != $vdef)? "R\\n$value\\n" : ""</code>
	  </format>
	  <vdef><value>100</value></vdef>
	  <group>1</group>
	  <ctrls>
	    <ctrl>
	      <message>this server allows no more than 1000 replicates</message>
	      <language>perl</language>
	      <code>$replicates &gt; 1000</code>
	    </ctrl>
	  </ctrls>
	  <paramfile>params</paramfile>
	  
	</attributes>
      </parameter>

<parameter type="Paragraph">
	<paragraph>
	  <name>freq_opt</name>
	  <prompt>Genes Frequencies options</prompt>
	  <precond>
	    <language>perl</language>
	    <code>$data_type eq "freq"</code>
	  </precond>
	  <parameters>
	    
	    <parameter type="Switch">
	      <name>alleles</name>
	      <attributes>
		
		<prompt>All alleles present at each locus (default: no, one absent at each locus) (A)</prompt>
		<format>
		  <language>perl</language>
		  <code>($value)? "A\\n" : ""</code>
		</format>
		<vdef><value>0</value></vdef>
		<group>1</group>
		<precond>
		  <language>perl</language>
		  <code>$data_type eq "freq"</code>
		</precond>
		<paramfile>params</paramfile>
		
	      </attributes>
	    </parameter>
	    
	  </parameters>
	</paragraph>
	
      </parameter>
	  
	<!-- Added for Swami -->  

	 <parameter type="Float">
	    <name>percentage</name>
	    <attributes>
			<prompt>what fraction of the characters</prompt>
			<comment>
			<value> The % option allows the user control over what fraction of the characters are sampled in the bootstrap 
			and jackknife methods. Normally the bootstrap samples a number of times equal to the number of characters, 
			and the jackknife samples half that number. This option permits you to specify a smaller fraction of characters 
			to be sampled. Note that doing so is "statistically incorrect", but it is available here for whatever other 
			purposes you may have in mind. Note that the fraction you will be asked to enter is the fraction of characters sampled, 
			not the fraction left out. If you specify 100 as the fraction of sites retained and are using the jackknife, 
			the data set will simply be rewritten. Note (as mentioned below) that this can 
			be used together with the W (Weights) option to rewrite a data set while omitting a particular set of sites. 
			</value>
			</comment>
			<format>
			  <language>perl</language>
			  <code>(defined $value &amp;&amp; $value &lt; 100)? "%\\n$value\\n" : ""</code>
			</format>
			<group>1</group>
			<paramfile>params</paramfile>
		</attributes>
	  </parameter>
	    
	 <parameter type="Integer">
	    <name>Bbootstrap</name>
	    <attributes>
			<prompt>Block Bootstrap</prompt>
			<comment>
			<value> The B option selects the Block Bootstrap. When you select option B the program will ask you to enter the block length. 
				When the block length is 1, this means that we are doing regular bootstrapping rather than block-bootstrapping.   
			</value>
			</comment>
			<format>
			  <language>perl</language>
			  <code>"B\\n$value\\n"</code>
			</format>
			<group>1</group>
			<paramfile>params</paramfile>
		</attributes>
	  </parameter>
	  
	  <parameter type="Switch">
	    <name>weights</name>
	    <attributes>
			<prompt>Weights</prompt>
			<comment>
			<value> The W (Weights) option allows weights to be read from a file whose default name is "weights". 
				The weights follow the format described in the Phylip main documentation file. Weights can only be 0 or 1, 
				and act to select the characters (or sites) that will be used in the resampling, 
				the others being ignored and always omitted from the output data sets. 
				If you use W together with the S (just weights) option, you write a file of weights 
				(whose default name is "outweights"). In that file, any character whose original weight is 0 will have weight 0,
				the other weights varying according to the resampling. Note that if you write out data sets rather than weights 
				(not using the S option), this output weights file is not written, as the characters are written different numbers of 
				times in the data output file Note that with restriction sites, the weights are not used by some of the programs. 
				Writing out files of weights will not be helpful with those programs. For the moment, with all gene frequencies programs 
				the weights are also not used.    
			</value>
			</comment>
			<format>
			  <language>perl</language>
			  <code>($value)? "W\\n$" : ""</code>
			</format>
			<group>1</group>
			<vdef><value>0</value></vdef>
			<paramfile>params</paramfile>
		</attributes>
	  </parameter>
	  
	<parameter  type="InFile">
		<name>weights_file</name>
		<attributes>
		  <prompt>Weights Input File</prompt>
		  <format>
			<language>perl</language>
			<code>""</code>
		  </format>
		  <group>1</group>
		  <precond>
			<language>perl</language>
			<code>$weights</code>
		  </precond>
		  <filenames>weights</filenames>
		</attributes>
      </parameter>
	  
	  <parameter type="Switch">
	    <name>categories</name>
	    <attributes>
			<prompt>Categories</prompt>
			<comment>
			<value> The C (Categories) option can be used with molecular sequence programs to allow assignment of sites or amino acid
				 positions to user-defined rate categories. The assignment of rates to sites is then made by reading a file whose default name is "categories". 
				 It should contain a string of digits 1 through 9. A new line or a blank can occur after any character in this string. 
				 Thus the categories file might look like this:</value>
					<value>122231111122411155</value>
					<value>1155333333444</value>
				<value>The only use of the Categories information in Seqboot is that they are sampled along with the sites (or amino acid positions)
				 and are written out onto a file whose default name is "outcategories", which has one set of categories
				 information for each bootstrap or jackknife replicate.</value>	
			</comment>
			<format>
			  <language>perl</language>
			  <code>($value)? "C\\n$" : ""</code>
			</format>
			<group>1</group>
			<vdef><value>0</value></vdef>
			<paramfile>params</paramfile>
		</attributes>
	  </parameter>
	  
	<parameter  type="InFile">
		<name>categories_file</name>
		<attributes>
		  <prompt>Categories Input File</prompt>
		  <format>
			<language>perl</language>
			<code>""</code>
		  </format>
		  <group>1</group>
		  <precond>
			<language>perl</language>
			<code>$categories</code>
		  </precond>
		  <filenames>categories</filenames>
		</attributes>
      </parameter>
	
	<parameter type="Switch">
	    <name>multiple_weights</name>
	    <attributes>
			<prompt>Produce multiple weights file</prompt>
			<comment> <value>
				The S option is a particularly important one. 
				It is used whether to produce multiple output files or 
				multiple weights. If your data set is large, a file with (say) 1000 
				such data sets can be very large and may use up too much space on your system. 
				If you choose the S option, the program will instead produce a weights file with 
				multiple sets of weights. The default name of this file is "outweights". 
				Except for some programs that cannot handle multiple sets of weights, 
				PHYLIP programs have an M (multiple data sets) option that asks the user
			    whether to use multiple data sets or multiple sets of weights. 
				If the latter is selected when running those programs, 
				they read one data set, but analyze it multiple times, 
				each time reading a new set of weights. 
				As both bootstrapping and jackknifing can be thought of as reweighting 
				the characters, this accomplishes the same thing (the multiple weights 
				option is not available for the various kinds of permutation). 
				As the file with multiple sets of weights is much smaller than a file 
				with multiple data sets, this can be an attractive way to save file space. 
				When multiple sets of weights is chosen, they reflect the sampling as well as any 
				set of weights that was read in, so that you can use Seqboot's W option as well. 
   			</value>
			</comment>
				     <comment> <value>&#169;Copyright 1980-2007. University of Washington.</value></comment>
			<format>
			  <language>perl</language>
			  <code>($value)? "S\\n$" : ""</code>
			</format>
			<group>1</group>
			<vdef><value>0</value></vdef>
			<paramfile>params</paramfile>
		</attributes>
		
	  </parameter>
	
	<parameter type="OutFile">
	    <name>outweights</name>
	    <attributes>   
	   <format>
		<language>perl</language>
		 <code>""</code>
	    </format>
	      <group>1</group>
	      <precond>
		<language>perl</language>
		<code>$multiple_weights</code>
	      </precond>
		 <filenames>outweights</filenames> 
		</attributes>
	  </parameter>
	  
	<!-- -->
	
     

   <parameter type="Paragraph">
	<paragraph>
	  <name>rest_opt</name>
	  <prompt>Restriction enzymes options</prompt>
	  <precond>
	    <language>perl</language>
	    <code>$data_type eq "rest"</code>
	  </precond>
	  <parameters>
	    
	    <parameter type="Switch">
	      <name>enzymes_nb</name>
	      <attributes>
		
		<prompt>Number of enzymes: not present in input file (E)</prompt>
		<format>
		  <language>perl</language>
		  <code>(! $value)? "E\\n" : ""</code>
		</format>
		<vdef><value>1</value></vdef>
		<group>1</group>
		<precond>
		  <language>perl</language>
		  <code>$data_type eq "rest"</code>
		</precond>
		<paramfile>params</paramfile>
		
	      </attributes>
	    </parameter>
	    
	  </parameters>
	</paragraph>
	
      </parameter>

      <parameter type="Results">
	<name>all_results</name>
	<attributes>

	  <filenames>*</filenames>

	</attributes>
      </parameter>

<!-- Swami: this is temporary file and not a real result file
	however we can bring this file back to the users but it's worhtless for them -->
 
<!--
   <parameter type="Results">
	<name>params</name>
	<attributes>
	  <filenames>params</filenames>
	</attributes>
   </parameter>
   --> 

      <parameter ishidden="1" type="String">
	<name>confirm</name>
	<attributes>

	  <format>
	    <language>perl</language>
	    <code>"y\\n"</code>
	  </format>
	  	<!--<group>1000</group>-->
	  <group>90</group>
	  <paramfile>params</paramfile>
	  
	</attributes>
      </parameter>

      <parameter ishidden="1" type="String">
	<name>terminal_type</name>
	<attributes>
	  
	  <format>
	    <language>perl</language>
	    <code>"0\\n"</code>
	  </format>
	   <!-- <group>-1</group> -->
		<group>0</group>
	  <paramfile>params</paramfile>
	  
	</attributes>
      </parameter>

   </parameters>
</pise>
