#!/usr/bin/env nextflow


/* 
 * Define the pipeline parameters
 *
 */

// Pipeline version
version = '0.1'

params.help            = false
params.resume          = false

log.info """

╔╦╗┌─┐┌─┐┌┬┐┌─┐┬─┐  ┌─┐┌─┐  ╔═╗╔═╗╦═╗╔═╗╔═╗
║║║├─┤└─┐ │ ├┤ ├┬┘  │ │├┤   ╠═╝║ ║╠╦╝║╣ ╚═╗
╩ ╩┴ ┴└─┘ ┴ └─┘┴└─  └─┘└    ╩  ╚═╝╩╚═╚═╝╚═╝
                                                                                       
====================================================
BIOCORE@CRG Preprocessing of Nanopore direct RNA - N F  ~  version ${version}
====================================================

kit                       : ${params.kit}
flowcell                  : ${params.flowcell}
fast5                     : ${params.fast5}
reference                 : ${params.reference}
annotation                : ${params.annotation}

ref_type                  : ${params.ref_type}
seq_type                  : ${params.seq_type}

output                    : ${params.output}
qualityqc                 : ${params.qualityqc}
granularity               : ${params.granularity}

basecaller                : ${params.basecaller}
basecaller_opt            : ${params.basecaller_opt}
GPU                       : ${params.GPU}
demultiplexing            : ${params.demultiplexing} 
demultiplexing_opt        : ${params.demultiplexing_opt} 
demulti_fast5		      : ${params.demulti_fast5}

filter                    : ${params.filter}
filter_opt                : ${params.filter_opt}
mapper                    : ${params.mapper}
mapper_opt                : ${params.mapper_opt}
map_type                  : ${params.map_type}

counter                   : ${params.counter}
counter_opt               : ${params.counter_opt}

downsampling			  : ${params.downsampling}

variant_caller            : ${params.variant_caller}
variant_opt               : ${params.variant_opt}

email                     : ${params.email}
"""

// Help and avoiding typos
if (params.help) exit 1
if (params.resume) exit 1, "Are you making the classical --resume typo? Be careful!!!! ;)"
if (params.granularity == "") params.granularity = 1000000000

// check multi5 and GPU usage. GPU maybe can be removed as param if there is a way to detect it
if (params.GPU != "ON" && params.GPU != "OFF") exit 1, "Please specify ON or OFF in GPU processors are available"

// check sequence type parameter
//if (params.seq_type != "gDNA" && params.seq_type != "cDNA" && params.seq_type != "RNA") exit 1, "Please specify the sequence type as RNA, cDNA or gDNA"
//if (params.seq_type == "gDNA") { 
//    log.info "seqType is gDNA: map_type is set to 'unspliced'"
//	params.map_type = "unspliced"
//} else {
//    log.info "seqType is ${params.seq_type}: map_type is ${params.map_type}"
	if (params.map_type != "unspliced" && params.map_type != "spliced") exit 1, "Mapping type NOT supported! Please choose either 'spliced' or 'unspliced'"
//}

// check input files
reference = file(params.reference)
if( !reference.exists() ) exit 1, "Missing reference file: ${reference}!"
config_report = file("$baseDir/config.yaml")
if( !config_report.exists() ) exit 1, "Missing config.yaml file!"
logo = file("$baseDir/../docs/logo_small.png")
deeplexicon_folder = file("$baseDir/deeplexicon/")


basecaller   		= params.basecaller
basecaller_opt  	= params.basecaller_opt
demultiplexer 		= params.demultiplexing
demultiplexer_opt   = params.demultiplexing_opt
mapper      		= params.mapper
mapper_opt   		= params.mapper_opt
counter_opt   		= params.counter_opt 


// Output folders
outputFastq    = "${params.output}/fastq_files"
outputFast5    = "${params.output}/fast5_files"
outputQual     = "${params.output}/QC_files"
outputMultiQC  = "${params.output}/report"
outputMapping  = "${params.output}/alignment"
outputCRAM     = "${params.output}/cram_files"
outputCounts   = "${params.output}/counts"
outputVars     = "${params.output}/variants"
outputAssigned = "${params.output}/assigned"
outputReport   = file("${outputMultiQC}/multiqc_report.html")

/*
* move old multiQCreport
*/
if( outputReport.exists() ) {
  log.info "Moving old report to multiqc_report.html multiqc_report.html.old"
  outputReport.moveTo("${outputMultiQC}/multiqc_report.html.old")
}

/*
 * Creates the channels that emits fast5 files
 */
Channel
    .fromPath( params.fast5)                                             
    .ifEmpty { error "Cannot find any file matching: ${params.fast5}" }
    .into {fast5_4_name; fast5_4_testing; fast5_4_granularity; fast5_4_variant}

/*
 * Get the name from the folder
 */
folder_info = params.fast5.tokenize("/")
folder_name = folder_info[-2]

// Check config file for consistency
//if (demultiplexer == "guppy" && params.barcodekit == "") 
//	exit 1, "Demultiplexing with guppy needs the definition of the barcodekit parameter. Exiting"
//if (basecaller != "guppy" && demultiplexer == "guppy") 
//	exit 1, "Demultiplexing with guppy can be performed ONLY when the basecaller is guppy too. Exiting"
//if (basecaller == "guppy" && demultiplexer == "guppy") 
//	log.info "Performing basecalling and demultiplexing at the same time with Guppy."

/*
* This is default value in case guppy will be used for RNA demultiplexing
*/
params.barcodekit = ""
if (demultiplexer == "") {
	demultiplexer = "OFF"
}

/*
* if (demultiplexer != "OFF" && demultiplexer != "deeplexicon")
* exit 1, "Demultiplexing of RNA can be performed only with deeplexicon. Current value is ${demultiplexer}"
*/

if (params.GPU == "YES" && basecaller != "guppy")
	exit 1, "GPU can be used only with GUPPY basecaller!"

if (params.ref_type == "genome") {
	if (params.annotation != "") {
		annotation = file(params.annotation)
		if( !annotation.exists() ) exit 1, "Missing annotation file: ${params.annotation}!"
	}
}


process testInput {
    tag {"${fast5}"}  
            
    input:
    file(fast5) from fast5_4_testing.first()

    output:
    stdout into multi5_type

    script:
    """
    fast5_type.py ${fast5}
    """
}

multi5_type.map {  it.trim().toInteger() }.into{multi5_type_for_msg; multi5_type_for_bc; multi5_type_for_granularity; multi5_type_for_demultiplexing}
multi5_type_for_msg.map{it == 0 ? "Single Fast5 files detected!": "MultiFast5 files detected!" }.println()

// if you are using GPU analyse the whole dataset, otherwise make batch of 4,000 sequences if they are single fast5
// or single batches of multi fast5 sequences 
multi5_type_for_granularity.merge(fast5_4_granularity.collect()).map{
	(params.GPU == "YES" ? params.granularity  : (it[0] == 0 ? it[1..-1].collate(4000) : it[1..-1].collate(1)) )
}.flatMap().set{fast5_batches}

// create a map id batch -> list of files
def num_batch = -1
fast5_batches.map { 
    num_batch++
    [num_batch, it]
}.into{ fast5_4_basecall; fast5_4_demulti}

/*
*  Perform base calling using albacore or guppy on raw fas5 files
*/

process logBaseCalling {
    label (params.GPU == "ON" ? 'basecall_gpus': 'basecall_cpus')
    echo true
    
    script:
    if (basecaller == "albacore") {
 	    """
 	    echo "no"
 	    #export PYTHONPATH=$baseDir/bin/albacore:\$PYTHONPATH
		#read_fast5_basecaller.py 
        """
   } else if (basecaller == "guppy"){
		"""
		echo '*********************************'
		guppy_basecaller --version
		guppy_basecaller --print_workflows | grep ${params.flowcell} | grep ${params.kit}
		echo '*********************************'
		"""
   }
}

process baseCalling {
    tag {"${basecaller}-${folder_name}-${idfile}"}  

    label (params.GPU == "ON" ? 'basecall_gpus': 'basecall_cpus')
	
	// move basecalled output fast5 files 
	if(demultiplexer != "deeplexicon") {
		publishDir outputFast5, pattern: "*_out/workspace/*.fast5",  mode: 'move', saveAs: { file -> "${file.split('\\/')[-1]}"  }
    }
    
    input:
    set idfile, file(fast5) from fast5_4_basecall
    val (multi5) from multi5_type_for_bc
    
    output:
    file ("${idfile}_out/workspace/*.fast5") optional true into fast5_files_for_demultiplexing
    set idfile, file ("${idfile}.*.gz") into fastq_files_for_demultiplexing, demulti_log
    file ("${idfile}_out/sequencing_summary.txt") into seq_summaries optional true 

    script:
	// conversion command if input is RNA - have to check if this is really needed
	def RNA_conv_cmd = ""
 	def demulti_cmd = ""
    def infolder = "./"
	if (params.seq_type == "RNA") {	RNA_conv_cmd = " | awk '{if (NR%4==2) gsub(\"U\",\"T\"); print}' " }   
    if (basecaller == "albacore") {
        // in case input files are multi fast5 convert them in single fast5 since albacore is not able to deal with multi fast5
        if (multi5 == 1) {
        	demulti_cmd = "mkdir demulti_tmp; mkdir demulti; multi_to_single_fast5 -i ${infolder} -s demulti_tmp -t ${task.cpus}; mv demulti_tmp/*/*.fast5 demulti; rm -fr demulti_tmp"
            infolder = "demulti"
        }
 	    """
 	    ${demulti_cmd}
 	    export PYTHONPATH=$baseDir/bin/albacore:\$PYTHONPATH
		read_fast5_basecaller.py ${basecaller_opt} --flowcell \"${params.flowcell}\" --kit \"${params.kit}\" --output_format fastq,fast5 \
			--worker_threads ${task.cpus} -s ./${idfile}_out --disable_filtering --input ${infolder};
		cat ${idfile}_out/workspace/*.fastq ${RNA_conv_cmd} >> ${idfile}.fastq
		rm ${idfile}_out/workspace/*.fastq
		gzip ${idfile}.fastq
		mkdir single_basecallings
		mv ${idfile}_out/workspace/*/*.fast5 single_basecallings
		mkdir temp_multi
        single_to_multi_fast5 -i single_basecallings -s temp_multi -t ${task.cpus}
        mv temp_multi/batch_0.fast5 ./${idfile}_out/workspace/batch_${idfile}.fast5
		if [-d demulti]; then rm -fr demulti; fi
		rm -fr single_basecallings temp_multi
        """
   } else if (basecaller == "guppy"){
    	def multi_cmd = ""
        def gpu_cmd = ""
        def gpu_prefix = ""
        if (params.GPU == "ON") {
            gpu_prefix = 'export LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/.singularity.d/libs"'
         	//gpu_cmd = '-x "cuda:0"'
                gpu_cmd = '-x auto'
        }
        // in case input files are single fast5 group them in multifast5 at the end
        if (multi5 == 0) {
            multi_cmd = "mkdir single_basecallings temp_multi; mv *_out/workspace/*.fast5 single_basecallings; single_to_multi_fast5 -i single_basecallings -s temp_multi -t ${task.cpus}; mv temp_multi/batch_0.fast5 ./${idfile}_out/workspace/batch_${idfile}.fast5; rm -fr temp_multi single_basecallings"
        }
        // Different command line in case guppy is also demultiplexer
        if (demultiplexer == "guppy") {
			"""
                ${gpu_prefix}
				guppy_basecaller ${gpu_cmd} ${basecaller_opt} ${demultiplexer_opt} --flowcell ${params.flowcell} --kit ${params.kit} --barcode_kits EXP-NBD104 --trim_barcodes  --fast5_out -i ${infolder} --save_path ./${idfile}_out --cpu_threads_per_caller 1 --gpu_runners_per_device 1 --num_callers ${task.cpus} 
				cd ${idfile}_out; 
				if [ -d barcode01 ]; then
					for d in barcode*; do echo \$d; cat \$d/*.fastq ${RNA_conv_cmd} > ../${idfile}.\$d.fastq; done;
				fi
				cat unclassified/*.fastq ${RNA_conv_cmd} > ../${idfile}.unclassified.fastq; cd ../
				for i in *.fastq; do gzip \$i; done
				${multi_cmd}
			"""
        }
        else if (demultiplexer == "guppy-readucks") {
        	"""
                ${gpu_prefix}
				guppy_basecaller ${gpu_cmd} ${basecaller_opt} ${demultiplexer_opt} --flowcell ${params.flowcell} --kit ${params.kit} --num_barcode_threads ${task.cpus} --barcode_kits EXP-NBD104 --trim_barcodes  --fast5_out -i ${infolder} --save_path ./${idfile}_out --gpu_runners_per_device 1 --cpu_threads_per_caller 1  --num_callers ${task.cpus} 
				cd ${idfile}_out; 
				if [ -d barcode01 ]; then
					for d in barcode*; do echo \$d; cat \$d/*.fastq ${RNA_conv_cmd} > ../${idfile}.\$d.fastq; done;
				fi
				cat unclassified/*.fastq ${RNA_conv_cmd} > ../${idfile}.unclassified.fastq; cd ../
				for i in *.fastq; do gzip \$i; done
				${multi_cmd}        		
        	"""
        }
        else  {
			"""
            ${gpu_prefix}
			guppy_basecaller ${gpu_cmd} --flowcell ${params.flowcell} --kit ${params.kit} --fast5_out ${basecaller_opt} -i ${infolder} --save_path ./${idfile}_out --cpu_threads_per_caller 1 --gpu_runners_per_device 1 --num_callers  ${task.cpus} 
			cat ${idfile}_out/*.fastq ${RNA_conv_cmd} >> ${idfile}.fastq
			rm ${idfile}_out/*.fastq
			gzip ${idfile}.fastq
			${multi_cmd}
			"""	
		}	
   } else if (demultiplexer == "OFF") {		
		"""
 		fast5_to_fastq.py ${fast5}; mv *.fastq ${idfile}.fastq; gzip ${idfile}.fastq
        """
   }
}

//demulti_log.println()

/*
*  Perform demultiplexing (optional) using porechop on basecalled reads
*/
if(demultiplexer == "deeplexicon") {
	process demultiplexing_with_deeplexicon {
		label 'demulti'
   	    tag {"${demultiplexer}-${idfile}"}  
				
		input:
    	set idfile, file(fast5) from fast5_4_demulti
    	val (multi5) from multi5_type_for_bc
        file(deeplexicon_folder)
        
		output:
		set idfile, file ("${idfile}_demux.tsv") into demux_for_fastq_extraction
		file ("${idfile}_demux.tsv") into demux_for_fast5_extraction

		script:
		def model = ''
		def executable = 'deeplexicon.py -f multi'
		if (multi5 == 0){
			executable = 'deeplexicon.py -f single'
		}
 		if (demultiplexer_opt.contains("pAmps-final-actrun_newdata_nanopore_UResNet20v2_model.030.h5")){
			executable = 'cmd_line_deeplexicon_caller_2019_09_12.py'
		}
		"""
		    ln -s ${deeplexicon_folder}/* .
            ${executable} -p ./ ${demultiplexer_opt}  -b 4000 -v > ${idfile}_demux.tsv
 		"""
	} 
	
	process extracting_demultiplexed_fastq {
		label 'basecall_cpus'
   	    tag {"${demultiplexer}"}  
				
		input:
    	set idfile, file(demux), file(fastq) from demux_for_fastq_extraction.join(fastq_files_for_demultiplexing)
        
		output:
		set idfile, file ("*.fastq.gz") into fastq_for_filtering

		script:
		"""
            extract_sequence_from_fastq.py ${demux} ${fastq}
			for i in *.fastq; do gzip \$i; done
 		"""
	} 
	
	process extracting_demultiplexed_fast5 {

		label 'basecall_cpus'
   	    tag { demultiplexer }  
		publishDir outputFast5,  mode: 'copy'
	
		when: 
		params.demulti_fast5 == "ON"
		
		input:
    	file("demux_*") from demux_for_fast5_extraction.collect()
    	file("*") from fast5_files_for_demultiplexing.collect()

		output:
        file("*")
        
		script:
		"""
		cat demux_* | grep -v ReadID >> dem.files
		awk '{print \$2 > \$3".list" }' dem.files
		for i in *.list; do mkdir `basename \$i .list`; fast5_subset --input ./ --save_path `basename \$i .list`/ --read_id_list \$i --batch_size 4000 -t ${task.cpus}; done 
 		rm *.list
 		rm */filename_mapping.txt
 		rm dem.files 
 		"""
	} 
	
	
} else {
	fastq_files_for_demultiplexing.set{ fastq_for_filtering}
}

/*
*  Perform filtering (optional) using nanofilt on fastq files
*/
if (params.filter == "nanofilt") {
	process filtering {
		label 'big_cpus'
   	    tag {"${params.filter}-${fastq_file}".replace('.fastq.gz', '')}  
				
		input:
		set idfile, file(fastq_file) from fastq_for_filtering.transpose()

		output:
		set idfile, file("*-filt.fastq.gz") into fastq_for_next_step

		script:
		output = "${fastq_file}".replace(".fastq.gz", "-filt.fastq.gz")
		"""
			zcat ${fastq_file} | NanoFilt ${params.filter_opt} | gzip > ${output}
		"""
	} 
} else {
	fastq_for_filtering.transpose().set{fastq_for_next_step}
}


fastq_for_next_step.map{
	filepath=it[1]
    if (demultiplexer != "OFF") {
        fileparts = filepath.getName().tokenize(".")
 		["${folder_name}.${fileparts[-3]}", filepath]
	} else {
		["${folder_name}", filepath]
	}
}.groupTuple().set{fastq_files_for_grouping}

/*
*  Concatenate FastQ files
*/
process concatenateFastQFiles {
    tag {idfile} 

	publishDir outputFastq, pattern: "*.fq.gz",  mode: 'copy'

    input:
    set idfile, file(fastq_files) from fastq_files_for_grouping

    output:
    set idfile, file("${idfile}.fq.gz") into fastq_files_for_fastqc, fastq_files_for_mapping, fastq_files_for_variants

    script:
    """
		cat *.fastq.gz >> ${idfile}.fq.gz
    """
}

/*
*  Perform QC on fast5 files
*/

process QC {
    tag {folder_name}  
    label 'big_cpus'
    publishDir outputQual, mode: 'copy'
    errorStrategy 'ignore'
    
    input:
    file("summaries_*") from seq_summaries.collect()

    output:
    file ("${folder_name}_QC") into QC_folders
    file ("final_summary.stats") into stat_for_variants

    script:
    """
      if [ -f "summaries_" ]; then
	  ln -s summaries_ final_summary.stats
	  else 
		  head -n 1 summaries_1 > final_summary.stats
	      for i in summaries_*; do grep -v "filename" \$i >> final_summary.stats; done
	  fi
      MinIONQC.R -i final_summary.stats -o ${folder_name}_QC -q ${params.qualityqc} -p ${task.cpus}
    """
}

/*
*  Perform fastQC on fastq files
*/

process fastQC {
    tag {idfile}  
    label 'big_cpus'

    publishDir outputQual, pattern: "*_fastqc.html", mode: 'copy'
   
    input:
    set idfile, file(fastq_file) from fastq_files_for_fastqc

    output:
    file ("*_fastqc.*") into fastqc_for_multiqc

    script:
    """
     fastqc ${fastq_file} -t ${task.cpus}
    """
}

/*
*  Perform mapping and sorting
*/
process mapping {
    tag {"${mapper}-${idfile}"}  
    publishDir outputMapping, mode: 'copy'
    label 'big_mem_cpus'

    input:
    file(reference)
    set idfile, file (fastq_file) from fastq_files_for_mapping
    
    output:
    set idfile, file("${idfile}.${mapper}.sorted.bam") optional true into aligned_reads, aligned_reads_for_QC, aligned_reads_for_QC2, aligned_reads_for_counts
    set idfile, mapper, file("${idfile}.${mapper}.sorted.bam"), file("${idfile}.${mapper}.sorted.bam.bai") optional true  into aligned_reads_for_crams
	set idfile, file("${idfile}.${mapper}.sorted.bam"), file("${idfile}.${mapper}.sorted.bam.bai") optional true  into aligned_reads_for_vars    
    file("${idfile}.${mapper}.sorted.bam*") optional true 

    script:    
    if (mapper == "minimap2") {
	    def mappars = (params.map_type == "spliced") ? "-ax splice -k14" : "-ax map-ont"
	    mappars += " ${mapper_opt} "
 	    """
        minimap2 -t ${task.cpus} ${mappars} -uf ${reference} ${fastq_file} | samtools view -@ ${task.cpus} -F4 -hSb - > reads.mapped.bam
        samtools sort -@ ${task.cpus} -o ${idfile}.${mapper}.sorted.bam reads.mapped.bam
        samtools index -@ ${task.cpus} ${idfile}.${mapper}.sorted.bam 
        rm reads.mapped.bam
        """
   }
   else if (mapper == "graphmap2"){
	    def mappars = (params.map_type == "spliced") ? "-x rnaseq" : ""
 	    mappars += " ${mapper_opt} "
        """
        graphmap2 align -t ${task.cpus} -r ${reference} ${mappars} -d ${fastq_file}  | samtools view -@ ${task.cpus} -F4 -hSb - > reads.mapped.bam
        samtools sort -@ ${task.cpus} -o ${idfile}.${mapper}.sorted.bam reads.mapped.bam
        samtools index -@ ${task.cpus} ${idfile}.${mapper}.sorted.bam 
        rm reads.mapped.bam
        """
   }
   else if (mapper == "graphmap"){
        """
        graphmap align -t ${task.cpus} ${mapper_opt} -r ${reference} -d ${fastq_file}  | samtools view -@ ${task.cpus} -F4 -hSb - > reads.mapped.bam
        samtools sort -@ ${task.cpus} -o ${idfile}.${mapper}.sorted.bam reads.mapped.bam
        samtools index -@ ${task.cpus} ${idfile}.${mapper}.sorted.bam 
		rm reads.mapped.bam
        """
   }   
    else {
        """
 		echo "nothing to do!"
        """
   }     
}

/*
*  Perform mapping and sorting
*/
process cram_conversion {
    tag {"${mapper}-${idfile}"}  
    publishDir outputCRAM, mode: 'copy'
    label 'big_mem_cpus'

    input:
    file(reference)
    set idfile, val(mapper), file(aln), file(index) from aligned_reads_for_crams
    
    output:
    file("${idfile}.${mapper}.sorted.cram*") optional true 
    script:
    def downcmd = ""
    def input = aln
    def cleancmd = ""
	gzipcmd = unzipCmd(reference, "myreference.fasta")
	gzipclean = "rm myreference.fasta"  
	if (params.downsampling != "") {
		def perc = params.downsampling/100
		downcmd = "samtools view -@ ${task.cpus} -bs ${perc} ${aln} > subsample.bam"
		input = "subsample.bam"
		cleancmd = "rm subsample.bam"
	}
 	"""
 		${downcmd}
 		${gzipcmd}
		samtools view  -@ ${task.cpus} -C ${input} -T  myreference.fasta -o ${idfile}.${mapper}.sorted.cram
		samtools index -@ ${task.cpus} ${idfile}.${mapper}.sorted.cram
		${cleancmd} 
		${gzipclean}
    """
}


/*
*  Perform counting (optional)
*/

if ( params.counter == "YES") {
	process counting {
		tag {"${idfile}"}  
		publishDir outputCounts, pattern: "*.count", mode: 'copy'
		publishDir outputAssigned, pattern: "*.assigned", mode: 'copy'

		input:
		set idfile, file(bamfile) from aligned_reads_for_counts

		output:
		file("${idfile}.count") into read_counts
		file("${idfile}.stats") optional true into count_stats
		file("${idfile}.assigned") optional true
		script:    
		if (params.ref_type == "transcriptome") {
			"""
			NanoCount -i ${bamfile} ${counter_opt} -o ${idfile}.count ${counter_opt};
	awk '{sum+=\$3}END{print FILENAME"\t"sum}' ${idfile}.count |sed s@.count@@g > ${idfile}.stats
	samtools view -F 256 ${bamfile} |cut -f 1,3 > ${idfile}.assigned
			"""
		} else if (params.ref_type == "genome") {
			def anno = unzipBash("${params.annotation}") 
			"""
			samtools view ${bamfile} |htseq-count -f sam - ${anno} ${counter_opt} -o ${idfile}.sam > ${idfile}.count
			awk '{gsub(/XF:Z:/,"",\$NF); print \$1"\t"\$NF}' ${idfile}.sam |grep -v '__' > ${idfile}.assigned
			rm ${idfile}.sam
			"""		
		}
	}

	/*
	*  Join alnQC 
	*/
	process joinCountQCs {
   
		input:
		file "*" from count_stats.collect()

		output:
		file("counts_mqc.txt") into count_repo_for_multiQC
	
		script:
		"""
	   echo '# id: NanoCount
	# plot_type: \'table\'
	# section_name: Read counts 
	File name	\'Counts\' ' > counts_mqc.txt 
		cat *.stats  >> counts_mqc.txt 
		"""
	}
} else {
   read_counts = Channel.empty()
   count_repo_for_multiQC = Channel.empty()
}


/*
*  Perform alnQC 
*/
process alnQC {
    tag {bamid}  
   
    input:
    set bamid, file(bamfile) from aligned_reads_for_QC

    output:
    file "${bamid}.stat" into single_alnQC_outs
    
    script:
    """
    bam2stats.py ${bamfile} > ${bamid}.stat
    """
}

/*
*  Join alnQC 
*/
process joinAlnQCs {
   
    input:
    file "alnqc_*" from single_alnQC_outs.collect()

    output:
    file("alnQC_mqc.txt") into alnQC_for_multiQC
    
    script:
    """
    echo '# id: alnQC
# plot_type: \'table\'
# section_name: \'Alignment QC\' ' > alnQC_mqc.txt
    cat alnqc_* | head -n 1| sed s@#@@g >> alnQC_mqc.txt
    cat alnqc_* | grep -v "#" >> alnQC_mqc.txt
    """
}

/*
*  Perform alnQC2 
*/

process alnQC2 {
    publishDir outputQual, pattern: "*_plot/*", mode: 'copy'
    label 'big_cpus'
    errorStrategy 'ignore'
    tag {bamid}  
   
    input:
    set bamid, file(bamfile) from aligned_reads_for_QC2

    output:
    file("*_plot/*") optional true
    file("${bamid}_stats_mqc.png") optional true into qc2_for_multiqc
    
    script:
    """
    NanoPlot --bam ${bamfile} -o ${bamid}_plot --maxlength 5000 -t ${task.cpus}
    mkdir tmp_dir
    cp ${bamid}_plot/PercentIdentityvsAverageBaseQuality_kde.png tmp_dir
    cp ${bamid}_plot/LengthvsQualityScatterPlot_dot.png tmp_dir
    cp ${bamid}_plot/HistogramReadlength.png tmp_dir 
    cp ${bamid}_plot/Weighted_HistogramReadlength.png tmp_dir
    gm montage tmp_dir/*.png -tile 2x2 -geometry 800x800 ${bamid}_stats_mqc.png
    rm -fr tmp_dir
    """
}

QC_folders.mix(fastqc_for_multiqc,qc2_for_multiqc,read_counts,count_repo_for_multiQC,alnQC_for_multiQC).set{files_for_report}

/*
*  Perform viral variant call (experimental)
*/

if ( params.variant_caller == "YES" && params.seq_type != "RNA") {

	process variant_calling {
		tag {"${idfile}"}  
		publishDir outputVars, pattern: "*.vcf", mode: 'copy'
		label 'big_cpus'

		input:
		set idfile, file(bamfile), file(bai) from aligned_reads_for_vars
        file(reference) 
        
		output:
		file("*.vcf")
		
		script:
		gzipcmd = unzipCmd(reference, "myreference.fasta", "yes")
		gzipclean = "rm myreference.fasta"  
		"""
			${gzipcmd}
			medaka_variant ${params.variant_opt} -i ${bamfile} -f myreference.fasta -d -t ${task.cpus} -o ./out
			mv `ls -t out/round_*.vcf| head -n1 ` .
			${gzipclean}
		"""
		}
	}


/*
*  Perform multiQC report
*/
process multiQC {
    publishDir outputMultiQC, mode: 'copy'
   
    input:
    file(logo)
    file(config_report)
    file("*") from files_for_report.collect()
    
    output:
    file("multiqc_report.html") into multiQC 
    
    script:
    """
     multiqc -c ${config_report} .
    """
}


if (params.email == "yourmail@yourdomain" || params.email == "") { 
    log.info 'Skipping the email\n'
}
else {
    log.info "Sending the email to ${params.email}\n"

    workflow.onComplete {

    def msg = """\
        Pipeline execution summary
        ---------------------------
        Completed at: ${workflow.complete}
        Duration    : ${workflow.duration}
        Success     : ${workflow.success}
        workDir     : ${workflow.workDir}
        exit status : ${workflow.exitStatus}
        Error report: ${workflow.errorReport ?: '-'}
        """
        .stripIndent()

        sendMail(to: params.email, subject: "Master of Pore execution", body: msg,  attach: "${outputMultiQC}/multiqc_report.html")
    }
}

workflow.onComplete {
    println "Pipeline BIOCORE@CRG Master of Pore completed!"
    println "Started at  $workflow.start" 
    println "Finished at $workflow.complete"
    println "Time elapsed: $workflow.duration"
    println "Execution status: ${ workflow.success ? 'OK' : 'failed' }"
}

// make named pipe 
def unzipCmd(filename, unzippedname, copy="") { 
    def cmd = "ln -s ${filename} ${unzippedname}"
	if (copy!="") {
		cmd = "cp ${filename} ${unzippedname}"
	}
    def ext = filename.getExtension()
    if (ext == "gz") {
    	cmd = "zcat ${filename} > ${unzippedname}"
    }
    return cmd
}


// make named pipe 
def unzipBash(filename) { 
    def cmd = filename.toString()
    if (cmd[-3..-1] == ".gz") {
    	cmd = "<(zcat ${filename})"
    }
    return cmd
}