<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE pise PUBLIC "pise2.dtd" "pise2.dtd">
<pise>
	<head>
		<title>Divvier on XSEDE</title>
		<version>1.01</version>
		<description>Address MSA uncertainty and error with software run on XSEDE</description>
		<authors>R.H. Ali, M. Bogus, S. Whelan</authors>
		<reference>Ali RH, Bogusz M, Whelan S. Identifying Clusters of High Confidence Homologies in Multiple Sequence Alignments. Mol Biol Evol. 2019 Oct 1;36(10):2340-2351. doi: 10.1093/molbev/msz142. PMID: 31209473; PMCID: PMC6933875.</reference>
		<category>Phylogeny / </category>
<!--		<doclink>http://icwww.epfl.ch/~stamatak/index-Dateien/countManual7.0.0.php</doclink> -->
	</head>
	<command>divvier_xsede</command>
	
<parameters>
<!--          

Clustering options:
        -divvy       : do standard divvying (DEFAULT)
        -partial     : do partial filtering by testing removal of individual characters
        -thresh X    : set the threshold for divvying to X (DEFAULT divvying = 0.801; partial = 0.774)

Approximation options:
        -approx X    : minimum number of characters tested in a split during divvying (DEFAULT X = 10)
        -checksplits : go through sequence and ensure there's a pair for every split. Can be slow
        -HMMapprox   : Do the pairHMM bounding approximation (DEFAULT)
        -HMMexact    : Do the full pairHMM and ignore bounding

Output options:
        -mincol X    : Minimum number of characters in a column to output when divvying/filtering (DEFAULT X = 2)
        -divvygap    : Output a gap instead of the static * character so divvied MSAs can be used in phylogeny program
 -->

<!-- Hidden parameters-->

<!-- Invoke the run  --> 
					<parameter ishidden="1" type="String">
						<name>invoke_run</name>
						<attributes>
							<format>
								<language>perl</language>
								<code>"/expanse/projects/ngbt/opt/expanse/divvier/Divvier/divvier"</code>
							</format> 
							<group>0</group>
						</attributes>
					</parameter>	
		
<!-- Specify the input file -->	
					<parameter ismandatory="1" issimple="1" isinput="1" type="InFile">
						<name>infile</name>
						<attributes>
							<prompt>Sequences File (relaxed phylip format) (-in)</prompt>
							<format>
								<language>perl</language>
								<code>"infile.txt"</code>
							</format>
							<group>99</group>
							<filenames>infile.txt</filenames>
						</attributes>
					</parameter>
		
<!-- runs are on a single node -->		
	<!-- scheduler file -->
		<parameter type="String" ishidden="1" >
			<name>scheduler_input</name>
			<attributes>
				<paramfile>scheduler.conf</paramfile>
					<format>
						<language>perl</language>
						<code>
									"ChargeFactor=1.0\\n" .
									"nodes=1\\n" .
									"mem=7G\\n" .
									"node_exclusive=0\\n" .
									"threads_per_process=1\\n"
						</code>
					</format>
			</attributes>
		</parameter>
 						
<!-- retrun results -->
					<parameter type="Results">
						<name>all_outputfiles</name>
						<attributes>
						<filenames>*</filenames>
						</attributes>
					</parameter>
	
<!-- Parameters with visible controls start here -->
<!-- Begin Simple Parameters -->
<!-- Set Max hours -->
		<parameter type="Float" issimple="1" ismandatory="1">
			<name>runtime</name>
			<attributes>
				<group>1</group>
				<paramfile>scheduler.conf</paramfile>
				<prompt>Maximum Hours to Run (click here for help setting this correctly)</prompt>
				<vdef>
					<value>0.25</value>
				</vdef>
				<format>
					<language>perl</language>
					<code>"runhours=$value\\n"</code>
				</format>
				<ctrls>
					<ctrl>
						<message>Maximum Hours to Run must be less than 168</message>
						<language>perl</language>
						<code>$runtime &gt; 168</code>
					</ctrl>
					<ctrl>
						<message>Maximum Hours to Run must be greater than 0</message>
						<language>perl</language>
						<code>$runtime &lt; 0</code>
					</ctrl>
					<ctrl>
						<message>Please Enter a Value for Maximum Hours to Run</message>
						<language>perl</language>
						<code>!defined $runtime </code>
					</ctrl>
				</ctrls>
<!-- added by mamiller to provide feedback on number of cpu hrs to be consumed -->
							<warns>
								<warn>
									<message>The job will run on 1 processors as configured. If it runs for the entire configured time, it will consume 1 x $runtime cpu hours</message>
									<language>perl</language>
									<code>$runtime &gt; 0</code>
								</warn>
							</warns>
							<comment> 
<value>Estimate the maximum time your job will need to run. We recommend testimg initially with a &lt; 0.5hr test run because Jobs set for 0.5 h or less depedendably run immediately in the "debug" queue. 
Once you are sure the configuration is correct, you then increase the time. The reason is that jobs &gt; 0.5 h are submitted to the "normal" queue, where jobs configured for 1 or a few hours times may
run sooner than jobs configured for the full 168 hours. 
</value>
							</comment>
			</attributes>
		</parameter>
		
<!--    -divvy       : do standard divvying (DEFAULT)
        -partial     : do partial filtering by testing removal of individual characters
        -thresh X    : set the threshold for divvying to X (DEFAULT divvying = 0.801; partial = 0.774)

Approximation options:
        -approx X    : minimum number of characters tested in a split during divvying (DEFAULT X = 10)
        -checksplits : go through sequence and ensure there's a pair for every split. Can be slow
        -HMMapprox   : Do the pairHMM bounding approximation (DEFAULT)
        -HMMexact    : Do the full pairHMM and ignore bounding

Output options:
        -mincol X    : Minimum number of characters in a column to output when divvying/filtering (DEFAULT X = 2)
        -divvygap    : Output a gap instead of the static * character so divvied MSAs can be used in phylogeny program -->					
					<parameter type="Excl" issimple="1" ismandatory="1">
						<name>divv_orpartial</name> 
						<attributes>
							<group>2</group>
							<prompt>Select the algorithm</prompt>
							<vlist>
								<value>divvy</value>
								<label>Divvy</label>
								<value>partial</value>
								<label>Partial</label>
							</vlist>
							<format>
								<language>perl</language>
								<code>"-$value"</code>
							</format>
							<vdef>
								<value>divvy</value>
							</vdef>
							<ctrls>
								<ctrl>
									<message>Please select an algorithm: divvy or partial</message>
									<language>perl</language>
									<code>!defined $divv_orpartial</code>
								</ctrl>
							</ctrls>
							<comment>
								<value>
This selects the algorithm, -divvy do standard divvying (DEFAULT); -partial: do partial filtering by testing removal of individual characters					
								</value>
							</comment>
						</attributes>
					</parameter>

<!-- -thresh X    : set the threshold for divvying to X (DEFAULT divvying = 0.801; partial = 0.774) -->
		<parameter ishidden="0" issimple="1" type="Float">
			<name>divvy_threshold</name>
			<attributes>
				<prompt>Set the divvy threshold (--thresh)</prompt>
				<precond>
					<language>perl</language>
					<code>$divv_orpartial eq "divvy" </code>
				</precond>
				<format>
					<language>perl</language>
					<code>$divvy_threshold ne $vdef ? "-thresh $value" : ""</code>
				</format>
				<vdef>
					<value>0.801</value>
				</vdef>
				<group>6</group>
			</attributes>
		</parameter>
		
		<parameter ishidden="0" issimple="1" type="Float">
			<name>partial_threshold</name>
			<attributes>
				<prompt>Set the partial threshold (--thresh)</prompt>
				<precond>
					<language>perl</language>
					<code>$divv_orpartial eq "partial" </code>
				</precond>
				<format>
					<language>perl</language>
					<code>$partial_threshold ne $vdef ? "--thresh $value" : ""</code>
				</format>
				<vdef>
					<value>0.774</value>
				</vdef>
				<group>6</group>
			</attributes>
		</parameter>
		
<!--  Approximation options:
        -approx X    : minimum number of characters tested in a split during divvying (DEFAULT X = 10)
        -checksplits : go through sequence and ensure there's a pair for every split. Can be slow
        -HMMapprox   : Do the pairHMM bounding approximation (DEFAULT)
        -HMMexact    : Do the full pairHMM and ignore bounding           -->
		<parameter issimple="1" type="Integer">
			<name>specify_minchars</name>
			<attributes>
				<prompt>Minimum number of characters tested in a split (-approx)</prompt>
				<format>
					<language>perl</language>
					<code>"-approx $value"</code>
				</format>
				<vdef>
					<value>10</value>
				</vdef>
				<group>7</group>
			</attributes>
		</parameter>

<!--  go through sequence and ensure there's a pair for every split.          -->
		<parameter ishidden="0" issimple="1" type="Switch">
			<name>specify_checksplit</name>
			<attributes>
				<prompt>Ensure there is a pair for every split (-checksplits)</prompt>
				<format>
					<language>perl</language>
					<code>$value ? "-checksplits":""</code>
				</format>
				<vdef>
					<value>0</value>
				</vdef>
				<comment>
<value>Go through sequence and ensure there's a pair for every split. This can be slow</value>
				</comment>
				<group>8</group>
			</attributes>
		</parameter>
		
<!-- -HMMexact    : Do the full pairHMM and ignore bounding   -->
		<parameter issimple="1" type="Switch">
			<name>specify_HMMexact</name>
			<attributes>
				<prompt>Do the full pairHMM and ignore bounding (-HMMexact)</prompt>
				<format>
					<language>perl</language>
					<code>$value ? "-HMMexact":""</code>
				</format>
				<vdef>
					<value>0</value>
				</vdef>
				<comment>
<value>Check this to get -HMMexact: Do the full pairHMM and ignore bounding; uncheck to get the default, -HMMapprox: Do the pairHMM bounding approximation (DEFAULT)</value>
				</comment>
				<group>9</group>
			</attributes>
		</parameter>

<!--    Output options:
        -mincol X    : Minimum number of characters in a column to output when divvying/filtering (DEFAULT X = 2)
        -divvygap    : Output a gap instead of the static * character so divvied MSAs can be used in phylogeny program -->					
					
  					<parameter issimple="1" type="Integer">
						<name>specify_mincolumns</name>
						<attributes>
							<prompt>Minimum number of characters in a column to output when divvying/filtering (-mincol)</prompt>
							<format>
								<language>perl</language>
								<code>(defined $specify_mincolumns) ? "-mincol $specify_mincolumns":""</code>
							</format>
							<vdef>
								<value>2</value>
							</vdef>
							<group>10</group>
						</attributes>
					</parameter>
		
					<parameter issimple="1" type="Switch">
						<name>output_gap</name>
						<attributes>
							<prompt>Output a gap instead of the static * character (-divvygap)</prompt>
							<format>
								<language>perl</language>
								<code>($output_gap) ? "-divvygap":""</code>
							</format>
							<vdef>
								<value>1</value>
							</vdef>
							<group>11</group>
						</attributes>
					</parameter>
			
	</parameters>
</pise>

