<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE pise PUBLIC "pise2.dtd" "pise2.dtd" >
<pise>
    <head>
      <title>GATK on XSEDE</title>
      <version>3.5</version>
      <description>Variant Discovery in High-Throughput Sequencing Data</description>
      <authors>McKenna A, Hanna M, Banks E, Sivachenko A, Cibulskis K, Kernytsky A, Garimella K, Altshuler D, Gabriel S, Daly M, DePristo MA</authors>
      <reference>The Genome Analysis Toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data McKenna A, Hanna M, Banks E, Sivachenko A, Cibulskis K, Kernytsky A, Garimella K, Altshuler D, Gabriel S, Daly M, DePristo MA, 2010 Genome Research 20:1297-303 </reference>
      <category>Phylogeny / Alignment</category>
      <doclink></doclink>
      <doclink></doclink>
  </head>
	
	<command>gatk_xsede</command>
	
<!-- ***********************************  created 10/25/2016 or thereabouts by mamiller ****************************************************************

Here are the different commands that can be invoked using gatk. They are executed in this order: gatk_realignertarget, gatk_indelrealigner and gatk_gvcf.  A single interface similar to what you did for samtools would work well.

gatk_realignertarget command
java -jar /opt/biotools/GenomeAnalysisTK/3.6/GenomeAnalysisTK.jar -T RealignerTargetCreator -R ref.fasta -I A1.bam.sorted_marked_readgroups.bam -o A1.bam.sorted_marked_readgroups_realign.intervals _*filter_mismatching_base_and_quals

gatk_indelrealigner command
java -jar /opt/biotools/GenomeAnalysisTK/3.6/GenomeAnalysisTK.jar -I A1.bam.sorted_marked_readgroups.bam -R ref.fasta -T IndelRealigner -targetIntervals A1.bam.sorted_marked_readgroups_realign.intervals -o A1.bam.sorted_marked_readgroups_realign.bam

gatk_gvcf command
java -jar /opt/biotools/GenomeAnalysisTK/3.6/GenomeAnalysisTK.jar -T HaplotypeCaller -R /ref.fasta -I A1.bam.sorted_marked_readgroups_realign.bam 
-o A1.bam.sorted_marked_readgroups_realign.g.vcf -stand_call_conf 30.0 -stand_emit_conf 30.0 _*variant_index_type LINEAR _*variant_index_parameter 128000 _*sample_name A1 _*emitRefConfidence GVCF _*filter_mismatching_base_and_quals _*minPruning 2 _*sample_ploidy 2
  -->

<parameters> 
<!-- gatk_xsede.centos7 -->
		<parameter ishidden="1" type="String">
			<name>gatk_invoke_command</name>
			<attributes>
				<format>
					<language>perl</language>
					<code>'gatk_xsede.expanse' </code>
				</format>
			</attributes>
		</parameter>

        <parameter isinput="1" type="InFile">
			<name>infile</name>
			<attributes>
				<prompt>Input reference fasta file</prompt>
				<filenames>ref.fasta</filenames>
<!-- ref.fasta was chosen as input because it is required in all of the steps -->
			</attributes>
		</parameter>

		<parameter ishidden="1" type="String">
			<name>gatk_realalignertarget_command</name>
			<attributes>
				<precond>
					<language>perl</language>
					<code>$gatk_real</code>
				</precond>
				<format>
					<language>perl</language>
<!-- the input files are ref.fasta and A1.bam.sorted_marked_readgroups.bam -->
					<code>'gatk -T RealignerTargetCreator -R ref.fasta -I A1.bam.sorted_marked_readgroups.bam -o A1.bam.sorted_marked_readgroups_realign.intervals --filter_mismatching_base_and_quals &amp;&amp; ' </code>
<!-- the output file is A1.bam.sorted_marked_readgroups_realign.intervals -->
				</format>
			</attributes>
		</parameter>
		
		<parameter ishidden="1" type="String">
			<name>gatk_indelrealalignertarget_command</name>
			<attributes>
				<precond>
					<language>perl</language>
					<code>$gatk_indelreal</code>
				</precond>
				<format>
					<language>perl</language>
<!-- A1.bam.sorted_marked_readgroups.bam, ref.fasta, A1.bam.sorted_marked_readgroups_realign.intervals (output of step 1)  -->
					<code>'gatk -I A1.bam.sorted_marked_readgroups.bam -R ref.fasta -T IndelRealigner -targetIntervals A1.bam.sorted_marked_readgroups_realign.intervals -o A1.bam.sorted_marked_readgroups_realign.bam &amp;&amp;'</code>
				</format>
<!-- A1.bam.sorted_marked_readgroups_realign.bam -->
			</attributes>
		</parameter>
		
		<parameter ishidden="1" type="String">
			<name>gatk_gvcf_command</name>
			<attributes>
				<precond>
					<language>perl</language>
					<code>$gatk_gvcf</code>
				</precond>
				<format>
					<language>perl</language>
<!-- input files are ref.fasta and A1.bam.sorted_marked_readgroups_realign.bam (output of step 2)  -->
					<code>'gatk -T HaplotypeCaller -R ref.fasta -I A1.bam.sorted_marked_readgroups_realign.bam -o A1.bam.sorted_marked_readgroups_realign.g.vcf -stand_call_conf 30.0 -stand_emit_conf 30.0 --variant_index_type LINEAR --variant_index_parameter 128000 --emitRefConfidence GVCF --filter_mismatching_base_and_quals --minPruning 2 --sample_ploidy 2'</code>
<!--  the output file is A1.bam.sorted_marked_readgroups_realign.g.vcf   -->				
				</format>
			</attributes>
		</parameter>
		
		<parameter ishidden="1" type="String">
			<name>gatk_scheduler</name>
				<attributes>
					<paramfile>scheduler.conf</paramfile>
					<format>
						<language>perl</language>
							<code>
									"threads_per_process=12\\n" .
									"node_exclusive=0\\n" .
									"mem=23G\\n" .
									"nodes=1\\n"
								</code>
					</format>
					<group>0</group>
				</attributes>
		</parameter>

		<parameter type="Results">
			<name>Fastaout</name>
			<attributes>
				<prompt>fasta output</prompt>
				<filenames>output.fasta</filenames>
			</attributes>
		</parameter>

<!-- return absolutely everything -->
		<parameter type="Results">
			<name>all_output</name>
			<attributes>
				<prompt>fasta output</prompt>
				<filenames>*</filenames>
			</attributes>
		</parameter>
	
<!-- Visible parameters -->
<!-- Parameters with visible controls start here -->
		<parameter type="Float" issimple="1" ismandatory="1">
			<name>runtime</name>
			<attributes>
				<group>1</group>
				<paramfile>scheduler.conf</paramfile>
				<prompt>Maximum Hours to Run (click here for help setting this correctly)</prompt>
				<format>
					<language>perl</language>
					<code>"runhours=$value\\n"</code>
				</format>
				<vdef>
					<value>0.25</value>
				</vdef>
				<ctrls>
					<ctrl>
						<message>Maximum Hours to Run must be less than 168</message>
						<language>perl</language>
						<code>$runtime &gt; 168.0</code>
					</ctrl>
					<ctrl>
						<message>Maximum Hours to Run must be greater than 0.1 </message>
						<language>perl</language>
						<code>$runtime &lt; 0.1</code>
					</ctrl>
				</ctrls>
				<warns>
					<warn>
						<message>The job will run on 12 processors as configured. If it runs for the entire configured time, it will consume 12 x $runtime cpu hours</message>
						<language>perl</language>
						<code>$runtime ne 0 </code>
					</warn>
				</warns>
								<comment>
<value>Estimate the maximum time your job will need to run. We recommend testimg initially with a &lt; 0.5hr test run because Jobs set for 0.5 h or less depedendably run immediately in the "debug" queue. 
Once you are sure the configuration is correct, you then increase the time. The reason is that jobs &gt; 0.5 h are submitted to the "normal" queue, where jobs configured for 1 or a few hours times may
run sooner than jobs configured for the full 168 hours. 
</value>
				</comment>
			</attributes>
		</parameter>

		<parameter ismandatory="1" type="Switch">
			<name>gatk_real</name>
			<attributes>
				<prompt>Run realignertarget command</prompt>
				<vdef>
					<value>1</value>
				</vdef>
			</attributes>
		</parameter>
			
        
        <parameter ismandatory="1" type="Switch">
			<name>gatk_indelreal</name>
			<attributes>
				<prompt>Run indelrealignertarget command</prompt>
				<vdef>
					<value>1</value>
				</vdef>
			</attributes>		
        </parameter> 
        
        <parameter ismandatory="1"  type="Switch">
			<name>gatk_gvcf</name>
			<attributes>
				<prompt>Run GVCF command</prompt>
				<vdef>
					<value>1</value>
				</vdef>
<!--  			<warns>
					<warn>
						<message>This configuration requires BAM.sort file for input</message>
						<language>perl</language>
						<code>!$bam_sort &amp;&amp; !$sam_to_bam</code>
					</warn>
				</warns> --> 
			</attributes>		
        </parameter>
        
        <parameter ismandatory="1" type="InFile">
			<name>gatkref_dict</name>
			<attributes>
				<prompt>Select the .dict file (for reference file in main input)</prompt>
<!-- this input is required in steps 1 and 2 -->
				<filenames>ref.dict</filenames> 
<comment><value>This is the dictionary file for the ref fasta file</value></comment>
			</attributes>
		</parameter>
        
        <parameter ismandatory="1" type="InFile">
			<name>gatkref_fai</name>
			<attributes>
				<prompt>Select the .fai file (reference file in main input)</prompt>
<!-- this input is required in steps 1 and 2 -->
				<filenames>ref.fasta.fai</filenames> 
<comment><value>This is the fasta index file for the ref fasta file</value></comment>
			</attributes>
		</parameter>
		
		 <parameter ismandatory="1" type="InFile">
			<name>gatk_sortedbam</name>
			<attributes>
				<prompt>Select the sorted .bam file (reference file in main input)</prompt>
<!-- this input is required in steps 1 and 2 -->
				<filenames>A1.bam.sorted_marked_readgroups.bam</filenames> 
<comment><value>This is the sorted marked readgroups bam file</value></comment>
			</attributes>
		</parameter>
		
		<parameter ismandatory="1" type="InFile">
			<name>gatk_sortedbamindex</name>
			<attributes>
				<prompt>Select the sorted .bam index file, .bai (reference file in main input)</prompt>
<!-- this input is required in steps 1 and 2 -->
				<filenames>A1.bam.sorted_marked_readgroups.bai</filenames> 
<comment><value>This is the sorted marked readgroups bam index file produced by Picard.</value></comment>
			</attributes>
		</parameter>
		
		<parameter type="InFile">
			<name>indelrealign_inf</name>
			<attributes>
				<prompt>Select the indel realign .intervals file</prompt>
				<precond>
					<language>perl</language>
					<code>$gatk_indelreal &amp;&amp; !$gatk_real</code>
				</precond>
				<filenames>A1.bam.sorted_marked_readgroups_realign.intervals</filenames>
				<comment>
<value>This is the sorted marked readgroups realign .intervals file</value>
				</comment>
			</attributes>
		</parameter>
		
		<parameter type="InFile">
			<name>vcf_inf</name>
			<attributes>
				<prompt>Select the indel realign .bam file</prompt>
				<precond>
					<language>perl</language>
					<code>$gatk_gvcf &amp;&amp; !$gatk_indelreal &amp;&amp; !$gatk_real</code>
				</precond>
				<filenames>A1.bam.sorted_marked_readgroups_realign.bam</filenames>
				<comment>
<value>This is the sorted marked readgroups realign file for the third (vcf) step)</value>
				</comment>
			</attributes>
		</parameter>
		
		<parameter ismandatory="1" type="InFile">
			<name>gatk_sortedbamindex2</name>
			<attributes>
				<prompt>Select the sorted .bam index file, .bai (for runs with step 3 only)</prompt>
<!-- this input is required in steps 1 and 2 -->
				<precond>
					<language>perl</language>
					<code>$gatk_gvcf &amp;&amp; !$gatk_indelreal &amp;&amp; !$gatk_real</code>
				</precond>
				<filenames>A1.bam.sorted_marked_readgroups_realign.bai</filenames> 
<comment><value>This is the sorted marked readgroups bam index file produced by Picard, or some other tool for step 3 only runs.</value></comment>
			</attributes>
		</parameter>
        		
 </parameters> 
</pise>


