<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE pise PUBLIC "pise2.dtd" "pise2.dtd" >
<pise>
    <head>
      <title>MetaSpades/MetaMarc Workflow on XSEDE</title>
      <version>3.13.0</version>
      <description>Metaspades Markov Model Search for antibiotic resistance genes in environmental samples</description>
      <authors>Nurk, S., Bankevich, A., Antipov, D., Gurevich, A. A., Korobeynikov, A., Lapidus, A., Prjibelski, A. D., Pyshkin, A., Sirotkin, A., Sirotkin, Y., Stepanauskas, R., Clingenpeel, S. R., Woyke, T., McLean, J. S., Lasken, R., Tesler, G., Alekseyev, M. A., and Pevzner, P. A.</authors>
      <reference>Nurk, S., Bankevich, A., Antipov, D., Gurevich, A. A., Korobeynikov, A., Lapidus, A., Prjibelski, A. D., Pyshkin, A., Sirotkin, A., Sirotkin, Y., Stepanauskas, R., Clingenpeel, S. R., Woyke, T., McLean, J. S., Lasken, R., Tesler, G., Alekseyev, M. A., and Pevzner, P. A. (2013) Assembling single-cell genomes and mini-metagenomes from chimeric MDA products. J Comput Biol 20, 714-737. 10.1089/cmb.2013.0084</reference>
      <category>Sequence Assembly</category>
	</head>

	<command>metaspades_metamarc_xsede</command>
	
<!-- ********************************** created 10/21/2020 or thereabouts by mamiller ****************************************************************
-*meta   (same as metaspades.py)
    This flag is recommended when assembling metagenomic data sets (runs metaSPAdes, see paper for more 
    details). Currently metaSPAdes supports only a single short-read library which has to be paired-end 
    (we hope to remove this restriction soon). In addition, you can provide long reads 
    (e.g. using -*pacbio or -*nanopore options), but hybrid assembly for metagenomes remains an 
    experimental pipeline and optimal performance is not guaranteed. It does not support careful 
    mode (mismatch correction is not available). In addition, you cannot specify coverage cutoff for 
    metaSPAdes. Note that metaSPAdes might be very sensitive to presence of the technical sequences 
    remaining in the data (most notably adapter readthroughs), please run quality control and 
    pre-process your data accordingly. 
    -->
    
<parameters> 
<!-- hidden parameters --> 
    <parameter ishidden="1" ismandatory="1" type="String">
      <name>invocation</name>
      <attributes>  
      <format>
      		<language>perl</language>
        	<code>"metamarc_wrapper_expanse"</code>
      	</format>
      	<group>1</group>
      </attributes>
    </parameter>   
      
    <parameter ishidden="1" type="String">
			<name>metamarc_scheduler</name>
				<attributes>
					<paramfile>scheduler.conf</paramfile>
					<precond>
						<language>perl</language>
						<code>$num_threads &lt; 24</code>
					</precond>
					<format>
						<language>perl</language>
						<code>
							"threads_per_process=$num_threads\\n" .
							"node_exclusive=0\\n" .
							"mem=" . (int($num_threads*(248/32))) . "G\\n" .
							"nodes=1\\n"
						</code>
					</format>
				</attributes>
		</parameter>  
		
		<parameter ishidden="1" type="String">
			<name>metamarc_scheduler2</name>
				<attributes>
					<paramfile>scheduler.conf</paramfile>
					<precond>
						<language>perl</language>
						<code>$num_threads &gt; 23</code>
					</precond>
					<format>
						<language>perl</language>
							<code>
								"threads_per_process=24\\n" .
								"mem=" . (int(24*(248/32))) . "G\\n" .
								"node_exclusive=1\\n" .
								"nodes=1\\n"
							</code>
					</format>
				</attributes>
		</parameter> 

<!-- this starts the spades command line; the input file name cant be changed here -->    
    <parameter issimple="1" isinput="1" type="Sequence">
          <name>inputfile1_fastq</name>
          <attributes>
          <prompt>First fastq file</prompt>
			<filenames>inputfastq_R1.sub.fq.gz</filenames>
			<format>
				<language>perl</language>
				<code>"spades.py -1 inputfastq_R1.sub.fq.gz"</code>
			</format>
			<group>2</group>
        </attributes>
        </parameter>  
        
 <!-- this specifies meta spades, and 24 cores. It is placed at the end of the spades command line -->   
    <parameter ishidden="1" ismandatory="1" type="String">
      <name>command_line2</name>
      <attributes>  
      <format>
      		<language>perl</language>
        	<code>"--meta -t $num_threads"</code>
      	</format>
      	<group>16</group>
      </attributes>
    </parameter>
    
<!-- this creates the directory required by metamarc. It uses a user-entered name for the directory -->
 	<parameter type="String" issimple="0"  ishidden="1" >
					<name>metaspades_metamarc_workflow1</name>
					<attributes>
						<precond>
							<language>perl</language>
							<code>$run_metamarc</code>
						</precond>
 						<format>
 							<language>perl</language>
 							<code>"&amp;&amp; mkdir $name_spadesoutputdir"</code>
 						</format>
 						<vdef>
 							<value>0</value>
 						</vdef>
						<group>44</group>
					</attributes>
				</parameter>
				
<!--this line is hidden, and it begins the command line for metamarc in Datalign: mmarc -i spades_test_{file}/contigs.fasta -o {file} -f {file}_output -t 24 -->
<!-- the user provides the root of the output directory for a spades inut file  -->				
     <parameter type="String" ishidden="1" >
					<name>metaspades_metamarc_workflow2</name>
					<attributes>
						<precond>
							<language>perl</language>
							<code>$run_metamarc</code>
						</precond>
 						<format>
 							<language>perl</language>
 							<code>"&amp;&amp; metamarc_wrapper_expanse mmarc -i spades_$name_spadesoutputdir/contigs.fasta"</code>
 						</format>
 						<vdef>
 							<value>0</value>
 						</vdef>
						<group>45</group>
					</attributes>
				</parameter>
          
<!-- end of hidden parameters -->
	
<!-- Visible parameters -->
<!-- Parameters with visible controls start here -->
		<parameter type="Float" issimple="1" ismandatory="1">
			<name>runtime</name>
			<attributes>
				<group>1</group>
				<paramfile>scheduler.conf</paramfile>
				<prompt>Maximum Hours to Run (click here for help setting this correctly)</prompt>
				<vdef>
					<value>12</value>
				</vdef>
				<comment>
<value>Estimate the maximum time your job will need to run. We recommend testing initially with a &lt; 0.5hr test run because Jobs set for 0.5 h or less depedendably run immediately in the "debug" queue. 
Once you are sure the configuration is correct, you then increase the time. The reason is that jobs &gt; 0.5 h are submitted to the "normal" queue, where jobs configured for 1 or a few hours times may
run sooner than jobs configured for the full 168 hours. 
</value>
				</comment>
				<ctrls>
					<ctrl>
						<message>Maximum Hours to Run must be less than 168</message>
						<language>perl</language>
						<code>$runtime &gt; 168.0</code>
					</ctrl>
					<ctrl>
						<message>Maximum Hours to Run must be greater than 0.1 </message>
						<language>perl</language>
						<code>$runtime &lt; 0.1</code>
					</ctrl>
				</ctrls>
				<warns>
					<warn>
						<message>The job will run on 24 processors as configured. If it runs for the entire configured time, it will consume 24 x $runtime cpu hours</message>
						<language>perl</language>
						<code>$runtime ne 0 </code>
					</warn>
				</warns>
				<format>
					<language>perl</language>
					<code>"runhours=$value\\n"</code>
				</format>
			</attributes>
		</parameter>

<!-- the user specifies whther or not the files are paired end reads. This is the only support type until more are needed -->    
    <parameter issimple="1" type="Integer">
       <name>num_threads</name>
       <attributes>
       		<prompt>How many threads?</prompt>
          	<vdef>
            	<value>4</value>
          	</vdef>
		</attributes>
    </parameter>
     
<!-- the user specifies whther or not the files are paired end reads. This is the only support type until more are needed -->    
    <parameter issimple="1" type="Switch">
       <name>paired_end</name>
       <attributes>
       		<prompt>Files are paired end reads.</prompt>
          	<vdef>
            	<value>1</value>
          	</vdef>
          	<group>3</group>
		</attributes>
    </parameter>

<!-- The user specifies the second set of paried end reads. Must be gzipped fastq files -->    
    <parameter issimple="1" ismandatory="1"  type="InFile">
          <name>inputfastq2</name>
          <attributes>
          <prompt>Input fastq file2 (gzipped)</prompt>
			<filenames>inputfastq_R2.sub.fq.gz</filenames>
			<precond>
				<language>perl</language>
				<code>$paired_end</code>
			</precond>
			<group>10</group>
			<format>
				<language>perl</language>
				<code>"-2 inputfastq_R2.sub.fq.gz"</code>
			</format>
			<ctrls>
          		<ctrl>
          			<message>Please select the second fastq file</message>
          			<language>perl</language>
          			<code>$paired_end &amp;&amp; !defined $inputfastq2</code>
          		</ctrl>
          	</ctrls>
          	<group>5</group>
        </attributes>
        </parameter> 

<!-- The user specifies the name of the output directory for spades results
spades_{file} -->
     <parameter issimple="1" ismandatory="1" type="String">
      <name>name_spadesoutputdir</name>
      <attributes> 
      <prompt>Name for the spades output directory</prompt> 
      <format>
      		<language>perl</language>
        	<code>"-o spades_$name_spadesoutputdir"</code>
      	</format>
      	<vdef>
      		<value>output</value>
      	</vdef>
      	<group>15</group>
      </attributes>
    </parameter> 
    
 <!-- User cna specify the kmer values, but these are recommended -k 21,33,55,77,99,127 -->
      <parameter issimple="1" ismandatory="1" type="String">
      <name>specify_kmer</name>
      <attributes>
      <prompt>Specify the kmer values for meta-spades</prompt>  
      <format>
      		<language>perl</language>
        	<code>"-k $value"</code>
      	</format>
      	<vdef>
      		<value>21,33,55,77,99,127</value>
      	</vdef>
      	<group>18</group>
      	<ctrls>
      		<ctrl>
      			<message>Please specify the kmer values for meta-spades</message>
      			<language>perl</language>
      			<code>!defined $specify_kmer</code>
      		</ctrl>
      	</ctrls>   
      </attributes>
    </parameter> 

<!-- If metamarc is to be run, this switch must be set to 1. -->    
    <parameter issimple="1" type="Switch">
       <name>run_metamarc</name>
       <attributes>
       		<prompt>Run Meta-Marc after meta-Spades</prompt>
          	<vdef>
            	<value>1</value>
          	</vdef>
		</attributes>
    </parameter>
    
<!-- this lets the user configure metamarc parameters --> 
 <!-- set metamarc_dirname 
 				<parameter type="String">
 					<name>metamarc_dirname</name>
					<attributes>
						<prompt>Enter a name for the metamarc output directory (-o)</prompt>
						<precond>
							<language>perl</language>
							<code>$run_metamarc</code>
						</precond>
 						<vdef>
 							<value>metamarc_results</value>
 						</vdef>
						<group>46</group>
					</attributes>
				</parameter> -->
				
 <!-- set metamarc_outfilename -->
 				<parameter issimple="1" type="String">
 					<name>metamarc_filename</name>
					<attributes>
						<prompt>Enter a name for the metamarc output file (-f)</prompt>
						<precond>
							<language>perl</language>
							<code>$run_metamarc</code>
						</precond>
 						<vdef>
 							<value>metamarc_output</value>
 						</vdef>
						<group>46</group>
					</attributes>
				</parameter> 

<!-- /projects/ps-ngbt/opt/comet/meta-marc/1.0/bin/mmarc -i spades_{file}/contigs.fasta -o {file} -f {file}_output -t 24  --> 
<parameter ismandatory="1" ishidden="1" type="String">
					<name>metamarc_filedirnames</name>
					<attributes>
						<precond>
							<language>perl</language>
							<code>$run_metamarc</code>
						</precond>
 						<format>
 							<language>perl</language>
 							<code>"-o metamarc_$name_spadesoutputdir -f $metamarc_filename -t $num_threads"</code>
 						</format>
						<group>46</group>
					</attributes>
				</parameter>   

	 	<parameter type="Results">
				<name>all_outputfiles</name>
				<attributes>
					<filenames>*</filenames>
				</attributes>
			</parameter>	
			
</parameters>
</pise>