if GENOMETOPREVIOUS is not None:
	rule mergePreviousPhaseTmsWithGencode:
		input:
			previous=lambda wildcards: GENOMETOPREVIOUS[CAPDESIGNTOGENOME[wildcards.capDesign]],
			annot="annotations/simplified/{capDesign}.gencode.collapsed.simplified_biotypes.gtf"
		threads:1
		conda: "envs/xtools_env.yml"
		output: "mappings/nonAnchoredMergeReads/previous/{capDesign}.previous.tmerge.all.gff.gz"
		shell:
			'''
uuid=$(uuidgen)
uuidM=$(uuidgen)
uuidTmpOut=$(uuidgen)

cat {input.previous} {input.annot} | skipcomments | sort -T {config[TMPDIR]}  -k1,1 -k4,4n -k5,5n  | tmerge --exonOverhangTolerance {config[exonOverhangTolerance]} - |sort -T {config[TMPDIR]}  -k1,1 -k4,4n -k5,5n  > {config[TMPDIR]}/$uuidM

bedtools intersect -s -wao -a {config[TMPDIR]}/$uuidM -b {config[TMPDIR]}/$uuidM |awk '$1 !~ /ERCC/'| buildLoci.pl - |sort -T {config[TMPDIR]}  -k1,1 -k4,4n -k5,5n  | gzip> {config[TMPDIR]}/$uuidTmpOut
mv {config[TMPDIR]}/$uuidTmpOut {output}

			'''
rule mergePreviousPhaseTmsWithGencodeBiotypes:
	input:
		clsGencode="mappings/nonAnchoredMergeReads/previous/{capDesign}.previous.tmerge.all.gff.gz",
		gencode="annotations/simplified/{capDesign}.gencode.collapsed.simplified_biotypes.gtf"
	output: "mappings/nonAnchoredMergeReads/previous+biotypes/{capDesign}.previous.tmerge.all.gff.gz"
	shell:
		'''
uuid=$(uuidgen)
uuidTmpOut=$(uuidgen)
zcat  {input.clsGencode} > {config[TMPDIR]}/$uuid
mergeToRef.pl {input.gencode} {config[TMPDIR]}/$uuid | sort -T {config[TMPDIR]}  -k1,1 -k4,4n -k5,5n  |gzip > {config[TMPDIR]}/$uuidTmpOut
mv {config[TMPDIR]}/$uuidTmpOut {output}

		'''


if GENOMETOPREVIOUS is not None: 
	rule mergeCurrentPreviousPhaseTmsWithGencode:
		input:
			current=lambda wildcards: expand("mappings/nonAnchoredMergeReads/{techname}Corr{corrLevel}_{capDesign}_{sizeFrac}_{barcodes}.HiSS.tmerge.min{minReadSupport}reads.splicing_status:all.endSupport:all.gff.gz", filtered_product_merge, techname=wildcards.techname, corrLevel=wildcards.corrLevel, capDesign=wildcards.capDesign, sizeFrac=wildcards.sizeFrac, barcodes=wildcards.barcodes, minReadSupport=wildcards.minReadSupport),
			previous=lambda wildcards: GENOMETOPREVIOUS[CAPDESIGNTOGENOME[wildcards.capDesign]],
			annot="annotations/simplified/{capDesign}.gencode.collapsed.simplified_biotypes.gtf"
		threads:1
		conda: "envs/xtools_env.yml"
		output: "mappings/nonAnchoredMergeReads/mergeWithPrevious/{techname}Corr{corrLevel}_{capDesign}_{sizeFrac}_{barcodes}.HiSS.tmerge.min{minReadSupport}reads.endSupport:all.gff.gz"
		shell:
			'''
uuid=$(uuidgen)
uuidM=$(uuidgen)
uuidTmpOut=$(uuidgen)

zcat -f {input.previous} {input.annot} {input.current} | skipcomments | sort -T {config[TMPDIR]}  -k1,1 -k4,4n -k5,5n  | tmerge --exonOverhangTolerance {config[exonOverhangTolerance]} - |sort -T {config[TMPDIR]}  -k1,1 -k4,4n -k5,5n  > {config[TMPDIR]}/$uuidM
bedtools intersect -s -wao -a {config[TMPDIR]}/$uuidM -b {config[TMPDIR]}/$uuidM | awk '$1 !~ /ERCC/'| buildLoci.pl - |sort -T {config[TMPDIR]}  -k1,1 -k4,4n -k5,5n  | gzip> {config[TMPDIR]}/$uuidTmpOut
mv {config[TMPDIR]}/$uuidTmpOut {output}

			'''

rule mergeCurrentPreviousPhaseTmsWithGencodeBiotypes:
	input:
		clsGencode="mappings/nonAnchoredMergeReads/mergeWithPrevious/{techname}Corr{corrLevel}_{capDesign}_{sizeFrac}_{barcodes}.HiSS.tmerge.min{minReadSupport}reads.endSupport:all.gff.gz",
		gencode="annotations/simplified/{capDesign}.gencode.collapsed.simplified_biotypes.gtf"
	output: "mappings/nonAnchoredMergeReads/mergeWithPrevious+biotypes/{techname}Corr{corrLevel}_{capDesign}_{sizeFrac}_{barcodes}.HiSS.tmerge.min{minReadSupport}reads.endSupport:all.gff.gz"
	shell:
		'''
uuid=$(uuidgen)
uuidTmpOut=$(uuidgen)
zcat  {input.clsGencode} > {config[TMPDIR]}/$uuid
mergeToRef.pl {input.gencode} {config[TMPDIR]}/$uuid | sort -T {config[TMPDIR]}  -k1,1 -k4,4n -k5,5n  |gzip > {config[TMPDIR]}/$uuidTmpOut
mv {config[TMPDIR]}/$uuidTmpOut {output}

		'''


rule getGencodeSupportedEnds:
	input:
		tm="annotations/simplified/{capDesign}.gencode.collapsed.simplified_biotypes.gtf",
		cagePeaks=lambda wildcards: GENOMETOCAGEPEAKS[CAPDESIGNTOGENOME[wildcards.capDesign]],
		genome = lambda wildcards: config["GENOMESDIR"] + CAPDESIGNTOGENOME[wildcards.capDesign] + ".sorted.genome",
		PAS=lambda wildcards: GENOMETOPAS[CAPDESIGNTOGENOME[wildcards.capDesign]],

	output:"mappings/nonAnchoredMergeReads/mergeWithPrevious/gencode/{capDesign}.gencode.cage+PASsupported.gff.gz"
	conda: "envs/xtools_env.yml"
	shell:
		'''
uuid5pEnds=$(uuidgen)
uuidTmpOut=$(uuidgen)
uuid2=$(uuidgen)
cat {input.genome} | cut -f1 | sort -T {config[TMPDIR]}  |uniq > {config[TMPDIR]}/$uuid2

cat {input.tm} | fgrep -w -f {config[TMPDIR]}/$uuid2 | gff2bed_full.pl - |extractTranscriptEndsFromBed12.pl 5 |sort -T {config[TMPDIR]}  -k1,1 -k2,2n -k3,3n > {config[TMPDIR]}/$uuid5pEnds
uuid3pEnds=$(uuidgen)
cat {input.tm} | fgrep -w -f {config[TMPDIR]}/$uuid2|gff2bed_full.pl - |extractTranscriptEndsFromBed12.pl 3 |sort -T {config[TMPDIR]}  -k1,1 -k2,2n -k3,3n > {config[TMPDIR]}/$uuid3pEnds

uuidCageSupported=$(uuidgen)
cat {config[TMPDIR]}/$uuid5pEnds | sort -T {config[TMPDIR]}  -k1,1 -k2,2n -k3,3n  | bedtools slop -s -l 50 -r 50 -i stdin -g {input.genome} | bedtools intersect -u -s -a stdin -b {input.cagePeaks} | cut -f4  |sort -T {config[TMPDIR]} |uniq > {config[TMPDIR]}/$uuidCageSupported

uuidPASsupported=$(uuidgen)
cat {config[TMPDIR]}/$uuid3pEnds | sort -T {config[TMPDIR]}  -k1,1 -k2,2n -k3,3n  | bedtools slop -s -l 50 -r -10 -i stdin -g {input.genome} | bedtools intersect -u -s -a stdin -b {input.PAS} |cut -f4 |sort -T {config[TMPDIR]} |uniq > {config[TMPDIR]}/$uuidPASsupported

uuidcagePASsupported=$(uuidgen)
comm -1 -2 {config[TMPDIR]}/$uuidCageSupported {config[TMPDIR]}/$uuidPASsupported |sort -T {config[TMPDIR]} |uniq | awk '{{print "transcript_id \\""$1"\\";"}}'> {config[TMPDIR]}/$uuidcagePASsupported

fgrep -w -f {config[TMPDIR]}/$uuidcagePASsupported {input.tm} |sort -T {config[TMPDIR]}  -k1,1 -k4,4n -k5,5n  | gzip> {config[TMPDIR]}/$uuidTmpOut
mv {config[TMPDIR]}/$uuidTmpOut {output}

		'''

rule getCurrentPreviousPhaseTmsWithGencodeSupportedEnds:
	input:
		tm="mappings/nonAnchoredMergeReads/mergeWithPrevious+biotypes/{techname}Corr{corrLevel}_{capDesign}_{sizeFrac}_{barcodes}.HiSS.tmerge.min{minReadSupport}reads.endSupport:all.gff.gz",
		cagePeaks=lambda wildcards: GENOMETOCAGEPEAKS[CAPDESIGNTOGENOME[wildcards.capDesign]],
		genome = lambda wildcards: config["GENOMESDIR"] + CAPDESIGNTOGENOME[wildcards.capDesign] + ".sorted.genome",
		PAS=lambda wildcards: GENOMETOPAS[CAPDESIGNTOGENOME[wildcards.capDesign]],

	output:"mappings/nonAnchoredMergeReads/mergeWithPrevious/cls/{techname}Corr{corrLevel}_{capDesign}_{sizeFrac}_{barcodes}.HiSS.tmerge.min{minReadSupport}reads.cage+PASsupported.gff.gz"
	conda: "envs/xtools_env.yml"
	shell:
		'''
uuid5pEnds=$(uuidgen)
uuidChrList=$(uuidgen)
uuidTmpOut=$(uuidgen)

cut -f1 {input.genome} |sort|uniq > {config[TMPDIR]}/$uuidChrList
zcat {input.tm} | fgrep -w -f {config[TMPDIR]}/$uuidChrList |gff2bed_full.pl - |extractTranscriptEndsFromBed12.pl 5 |sort -T {config[TMPDIR]}  -k1,1 -k2,2n -k3,3n > {config[TMPDIR]}/$uuid5pEnds
uuid3pEnds=$(uuidgen)
zcat {input.tm} | fgrep -w -f {config[TMPDIR]}/$uuidChrList |gff2bed_full.pl - |extractTranscriptEndsFromBed12.pl 3 |sort -T {config[TMPDIR]}  -k1,1 -k2,2n -k3,3n > {config[TMPDIR]}/$uuid3pEnds

uuidCageSupported=$(uuidgen)
cat {config[TMPDIR]}/$uuid5pEnds | sort -T {config[TMPDIR]}  -k1,1 -k2,2n -k3,3n  | bedtools slop -s -l 50 -r 50 -i stdin -g {input.genome} | bedtools intersect -u -s -a stdin -b {input.cagePeaks} | cut -f4  |sort -T {config[TMPDIR]} |uniq > {config[TMPDIR]}/$uuidCageSupported

uuidPASsupported=$(uuidgen)
cat {config[TMPDIR]}/$uuid3pEnds | sort -T {config[TMPDIR]}  -k1,1 -k2,2n -k3,3n  | bedtools slop -s -l 50 -r -10 -i stdin -g {input.genome} | bedtools intersect -u -s -a stdin -b {input.PAS} |cut -f4 |sort -T {config[TMPDIR]} |uniq > {config[TMPDIR]}/$uuidPASsupported

uuidcagePASsupported=$(uuidgen)
comm -1 -2 {config[TMPDIR]}/$uuidCageSupported {config[TMPDIR]}/$uuidPASsupported |sort -T {config[TMPDIR]} |uniq | awk '{{print "transcript_id \\""$1"\\";"}}'> {config[TMPDIR]}/$uuidcagePASsupported

zcat {input.tm} | fgrep -w -f {config[TMPDIR]}/$uuidcagePASsupported - |sort -T {config[TMPDIR]}  -k1,1 -k4,4n -k5,5n  | gzip> {config[TMPDIR]}/$uuidTmpOut
mv {config[TMPDIR]}/$uuidTmpOut {output}

		'''


rule getPreviousPhaseTmsWithGencodeSupportedEnds:
	input:
		tm="mappings/nonAnchoredMergeReads/previous+biotypes/{capDesign}.previous.tmerge.all.gff.gz",
		cagePeaks=lambda wildcards: GENOMETOCAGEPEAKS[CAPDESIGNTOGENOME[wildcards.capDesign]],
		genome = lambda wildcards: config["GENOMESDIR"] + CAPDESIGNTOGENOME[wildcards.capDesign] + ".sorted.genome",
		PAS=lambda wildcards: GENOMETOPAS[CAPDESIGNTOGENOME[wildcards.capDesign]],

	output:"mappings/nonAnchoredMergeReads/previous/{capDesign}.previous.tmerge.cage+PASsupported.gff.gz"
	conda: "envs/xtools_env.yml"
	shell:
		'''
uuid5pEnds=$(uuidgen)
uuidTmpOut=$(uuidgen)
uuid2=$(uuidgen)
cat {input.genome} | cut -f1 | sort -T {config[TMPDIR]}  |uniq > {config[TMPDIR]}/$uuid2

zcat {input.tm} | fgrep -w -f {config[TMPDIR]}/$uuid2|gff2bed_full.pl - |extractTranscriptEndsFromBed12.pl 5 |sort -T {config[TMPDIR]}  -k1,1 -k2,2n -k3,3n > {config[TMPDIR]}/$uuid5pEnds
uuid3pEnds=$(uuidgen)
zcat {input.tm} | fgrep -w -f {config[TMPDIR]}/$uuid2|gff2bed_full.pl - |extractTranscriptEndsFromBed12.pl 3 |sort -T {config[TMPDIR]}  -k1,1 -k2,2n -k3,3n > {config[TMPDIR]}/$uuid3pEnds


uuidCageSupported=$(uuidgen)
cat {config[TMPDIR]}/$uuid5pEnds | sort -T {config[TMPDIR]}  -k1,1 -k2,2n -k3,3n  | bedtools slop -s -l 50 -r 50 -i stdin -g {input.genome} | bedtools intersect -u -s -a stdin -b {input.cagePeaks} | cut -f4  |sort -T {config[TMPDIR]} |uniq > {config[TMPDIR]}/$uuidCageSupported

uuidPASsupported=$(uuidgen)
cat {config[TMPDIR]}/$uuid3pEnds | sort -T {config[TMPDIR]}  -k1,1 -k2,2n -k3,3n  | bedtools slop -s -l 50 -r -10 -i stdin -g {input.genome} | bedtools intersect -u -s -a stdin -b {input.PAS} |cut -f4 |sort -T {config[TMPDIR]} |uniq > {config[TMPDIR]}/$uuidPASsupported

uuidcagePASsupported=$(uuidgen)
comm -1 -2 {config[TMPDIR]}/$uuidCageSupported {config[TMPDIR]}/$uuidPASsupported |sort -T {config[TMPDIR]} |uniq | awk '{{print "transcript_id \\""$1"\\";"}}'> {config[TMPDIR]}/$uuidcagePASsupported

zcat {input.tm} | fgrep -w -f {config[TMPDIR]}/$uuidcagePASsupported - |sort -T {config[TMPDIR]}  -k1,1 -k4,4n -k5,5n  | gzip> {config[TMPDIR]}/$uuidTmpOut
mv {config[TMPDIR]}/$uuidTmpOut {output}

		'''


rule getFlLocusStats:
	input:
		gencode="annotations/simplified/{capDesign}.gencode.collapsed.simplified_biotypes.gtf",
		clsGencode="mappings/nonAnchoredMergeReads/mergeWithPrevious+biotypes/{techname}Corr{corrLevel}_{capDesign}_{sizeFrac}_{barcodes}.HiSS.tmerge.min{minReadSupport}reads.endSupport:all.gff.gz",
		gencodeFL="mappings/nonAnchoredMergeReads/mergeWithPrevious/gencode/{capDesign}.gencode.cage+PASsupported.gff.gz",
		previous="mappings/nonAnchoredMergeReads/previous+biotypes/{capDesign}.previous.tmerge.all.gff.gz",
		previousFL="mappings/nonAnchoredMergeReads/previous/{capDesign}.previous.tmerge.cage+PASsupported.gff.gz",
		clsGencodeFL="mappings/nonAnchoredMergeReads/mergeWithPrevious/cls/{techname}Corr{corrLevel}_{capDesign}_{sizeFrac}_{barcodes}.HiSS.tmerge.min{minReadSupport}reads.cage+PASsupported.gff.gz"
	output: config["STATSDATADIR"] + "tmp/{techname}Corr{corrLevel}_{capDesign}_{sizeFrac}_{barcodes}.min{minReadSupport}reads.FLloci.stats.tsv"
	shell:
		'''
#PCG stats
gencodePcg=$(cat {input.gencode} | fgrep "gene_type \\"protein_coding\\";" | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)
gencodePcgFL=$(zcat {input.gencodeFL} | fgrep "gene_type \\"protein_coding\\";" | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)
clsGencodePcg=$(zcat {input.clsGencode} | fgrep "gene_type \\"protein_coding\\";" | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)
clsGencodePcgFL=$(zcat {input.clsGencodeFL} | fgrep "gene_type \\"protein_coding\\";" | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)
previousPcg=$(zcat {input.previous} | fgrep "gene_type \\"protein_coding\\";" | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)
previousPcgFL=$(zcat {input.previousFL} | fgrep "gene_type \\"protein_coding\\";" | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)

#lncRNA stats
uuidgencodeLnc=$(uuidgen)
uuidgencodeLncFL=$(uuidgen)
uuidclsGencodeLnc=$(uuidgen)
uuidclsGencodeLncFL=$(uuidgen)
uuidPreviousLnc=$(uuidgen)
uuidPreviousLncFL=$(uuidgen)
uuidTmpOut=$(uuidgen)

cat {input.gencode} | fgrep "gene_type \\"lncRNA\\";" |awk '$3=="exon"' > {config[TMPDIR]}/$uuidgencodeLnc
zcat {input.gencodeFL} | fgrep "gene_type \\"lncRNA\\";" |awk '$3=="exon"'> {config[TMPDIR]}/$uuidgencodeLncFL
zcat {input.clsGencode} | fgrep "gene_type \\"lncRNA\\";" |awk '$3=="exon"'> {config[TMPDIR]}/$uuidclsGencodeLnc
zcat {input.clsGencodeFL} | fgrep "gene_type \\"lncRNA\\";" |awk '$3=="exon"'> {config[TMPDIR]}/$uuidclsGencodeLncFL
zcat {input.previous} | fgrep "gene_type \\"lncRNA\\";" |awk '$3=="exon"'> {config[TMPDIR]}/$uuidPreviousLnc
zcat {input.previousFL} | fgrep "gene_type \\"lncRNA\\";" |awk '$3=="exon"'> {config[TMPDIR]}/$uuidPreviousLncFL


gencodeLncSpliced=$(cat {config[TMPDIR]}/$uuidgencodeLnc | extractGffAttributeValue.pl transcript_id |sort -T {config[TMPDIR]} |uniq -d | fgrep -w -f - {config[TMPDIR]}/$uuidgencodeLnc | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)
gencodeLncFLSpliced=$(cat {config[TMPDIR]}/$uuidgencodeLncFL| extractGffAttributeValue.pl transcript_id |sort -T {config[TMPDIR]} |uniq -d | fgrep -w -f - {config[TMPDIR]}/$uuidgencodeLncFL | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)
clsGencodeLncSpliced=$(cat {config[TMPDIR]}/$uuidclsGencodeLnc | extractGffAttributeValue.pl transcript_id |sort -T {config[TMPDIR]} |uniq -d | fgrep -w -f - {config[TMPDIR]}/$uuidclsGencodeLnc | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)
clsGencodeLncFLSpliced=$(cat {config[TMPDIR]}/$uuidclsGencodeLncFL | extractGffAttributeValue.pl transcript_id |sort -T {config[TMPDIR]} |uniq -d | fgrep -w -f - {config[TMPDIR]}/$uuidclsGencodeLncFL | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)
previousLncSpliced=$(cat {config[TMPDIR]}/$uuidPreviousLnc | extractGffAttributeValue.pl transcript_id |sort -T {config[TMPDIR]} |uniq -d | fgrep -w -f - {config[TMPDIR]}/$uuidPreviousLnc | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)
previousLncFLSpliced=$(cat {config[TMPDIR]}/$uuidPreviousLncFL | extractGffAttributeValue.pl transcript_id |sort -T {config[TMPDIR]} |uniq -d | fgrep -w -f - {config[TMPDIR]}/$uuidPreviousLncFL | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)

#clsGencodeLncKnownSpliced=$(cat {config[TMPDIR]}/$uuidclsGencodeLnc | fgrep "gene_ref_status \\"known\\";" |extractGffAttributeValue.pl transcript_id |sort -T {config[TMPDIR]} |uniq -d | fgrep -w -f - {config[TMPDIR]}/$uuidclsGencodeLnc | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)
#clsGencodeLncFLKnownSpliced=$(cat {config[TMPDIR]}/$uuidclsGencodeLncFL | fgrep "gene_ref_status \\"known\\";" | extractGffAttributeValue.pl transcript_id |sort -T {config[TMPDIR]} |uniq -d | fgrep -w -f - {config[TMPDIR]}/$uuidclsGencodeLncFL | extractGffAttributeValue.pl gene_id|sort -T {config[TMPDIR]} |uniq|wc -l)


echo -e "{wildcards.techname}Corr{wildcards.corrLevel}\t{wildcards.capDesign}\t{wildcards.sizeFrac}\t{wildcards.barcodes}\t$gencodePcg\t$gencodePcgFL\t$clsGencodePcg\t$clsGencodePcgFL\t$gencodeLncSpliced\t$gencodeLncFLSpliced\t$clsGencodeLncSpliced\t$clsGencodeLncFLSpliced\t$previousLncSpliced\t$previousLncFLSpliced\t$previousPcg\t$previousPcgFL"  > {config[TMPDIR]}/$uuidTmpOut
mv {config[TMPDIR]}/$uuidTmpOut {output}


		'''


rule aggFlLocusStats:
	input: lambda wildcards: expand(config["STATSDATADIR"] + "tmp/{techname}Corr{corrLevel}_{capDesign}_{sizeFrac}_{barcodes}.min{minReadSupport}reads.FLloci.stats.tsv", techname='allSeqTechs', corrLevel=FINALCORRECTIONLEVELS, capDesign=CAPDESIGNSplusMERGED, sizeFrac=SIZEFRACS, barcodes='allTissues', minReadSupport=wildcards.minReadSupport)
	output: config["STATSDATADIR"] + "all.min{minReadSupport}reads.FLloci.stats.tsv"
	shell:
		'''
uuidTmpOut=$(uuidgen)
echo -e "seqTech\tcorrectionLevel\tcapDesign\tsizeFrac\ttissue\tannotation_set\tbiotype\tcategory\tcount\tpercent" > {config[TMPDIR]}/$uuidTmpOut

cat {input} | awk '{{print $1"\\t"$2"\\t"$3"\\t"$4"\\tGENCODE\\tprotein-coding\\tnon-FL\\t"$5-$6"\\t"($5-$6)/$5"\\n"$1"\\t"$2"\\t"$3"\\t"$4"\\tGENCODE\\tprotein-coding\\tFL\\t"$6"\\t"$6/$5"\\n"$1"\\t"$2"\\t"$3"\\t"$4"\\tGENCODE+previous+current\\tprotein-coding\\tnon-FL\\t"$7-$8"\\t"($7-$8)/$7"\\n"$1"\\t"$2"\\t"$3"\\t"$4"\\tGENCODE+previous+current\\tprotein-coding\\tFL\\t"$8"\\t"$8/$7"\\n"$1"\\t"$2"\\t"$3"\\t"$4"\\tGENCODE\\tlncRNA\\tnon-FL\\t"$9-$10"\\t"($9-$10)/$9"\\n"$1"\\t"$2"\\t"$3"\\t"$4"\\tGENCODE\\tlncRNA\\tFL\\t"$10"\\t"$10/$9"\\n"$1"\\t"$2"\\t"$3"\\t"$4"\\tGENCODE+previous+current\\tlncRNA\\tnon-FL\\t"$11-$12"\\t"($11-$12)/$11"\\n"$1"\\t"$2"\\t"$3"\\t"$4"\\tGENCODE+previous+current\\tlncRNA\\tFL\\t"$12"\\t"$12/$11"\\n"$1"\\t"$2"\\t"$3"\\t"$4"\\tGENCODE+previous\\tlncRNA\\tnon-FL\\t"$13-$14"\\t"($13-$14)/$13"\\n"$1"\\t"$2"\\t"$3"\\t"$4"\\tGENCODE+previous\\tlncRNA\\tFL\\t"$14"\\t"$14/$13"\\n"$1"\\t"$2"\\t"$3"\\t"$4"\\tGENCODE+previous\\tprotein-coding\\tnon-FL\\t"$15-$16"\\t"($15-$16)/$15"\\n"$1"\\t"$2"\\t"$3"\\t"$4"\\tGENCODE+previous\\tprotein-coding\\tFL\\t"$16"\\t"$16/$15}}' | sed 's/Corr0/\tNo/' | sed 's/Corr{lastK}/\tYes/' | sort -T {config[TMPDIR]}  >> {config[TMPDIR]}/$uuidTmpOut
mv {config[TMPDIR]}/$uuidTmpOut {output}
		'''

rule plotFlLocusGencodeOnlyStats:
	input: config["STATSDATADIR"] + "all.min{minReadSupport}reads.FLloci.stats.tsv"
	output: returnPlotFilenames(config["PLOTSDIR"] + "FLloci.gencodeOnly.stats/{techname}/Corr{corrLevel}/{capDesign}/{techname}Corr{corrLevel}_{capDesign}_{sizeFrac}_{barcodes}.min{minReadSupport}reads.FLloci.gencodeOnly.stats")
	conda: "envs/R_env.yml"
	params:
		filterDat=lambda wildcards: merge_figures_params(wildcards.capDesign, wildcards.sizeFrac, wildcards.barcodes, wildcards.corrLevel, wildcards.techname)
	shell:
		'''
echo "
library(ggplot2)
library(cowplot)
library(plyr)
library(scales)
library(gridExtra)
library(grid)
library(ggplotify)

dat <- read.table('{input}', header=T, as.is=T, sep='\\t')
{params.filterDat[10]}
{params.filterDat[0]}
{params.filterDat[1]}
{params.filterDat[2]}
{params.filterDat[3]}
{params.filterDat[4]}
{params.filterDat[5]}
{params.filterDat[8]}


dat <- subset(dat, annotation_set=='GENCODE')

dat\$category<-factor(dat\$category, ordered=TRUE, levels=rev(c('FL', 'non-FL')))

dat\$annotbiotype <- paste(sep='', dat\$biotype, '\\n(', dat\$annotation_set, ')')
hXyPlot = hXyPlot +1
wXyPlot = wXyPlot +0.5

plotBase <- \\"p <- ggplot(dat[order(dat\$category), ], aes(x=factor(annotbiotype), y=count, fill=category)) +
geom_bar(stat='identity') +
ylab('# gene loci') +
scale_y_continuous(labels=comma)+
scale_fill_manual (values=c('FL'='#C453C4', 'non-FL'='#a6a6a6'))+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab('Biotype (Annotation set)') +
guides(fill = guide_legend(title='Category'))+
geom_text(position = 'stack', size=geom_textSize, aes(x = factor(annotbiotype), y = count, label = paste(sep='',percent(round(percent, digits=2)),' / ','(',comma(count),')'), hjust = 0.5, vjust = 1))+
{GGPLOT_PUB_QUALITY} + \\"

{params.filterDat[12]}

save_plot('{output[0]}', legendOnly, base_width=wLegendOnly, base_height=hLegendOnly)
save_plot('{output[1]}', legendOnly, base_width=wLegendOnly, base_height=hLegendOnly)

save_plot('{output[2]}', pXy, base_width=wXyPlot, base_height=hXyPlot)
save_plot('{output[3]}', pXy, base_width=wXyPlot, base_height=hXyPlot)

save_plot('{output[4]}', pXyNoLegend, base_width=wXyNoLegendPlot, base_height=hXyNoLegendPlot)
save_plot('{output[5]}', pXyNoLegend, base_width=wXyNoLegendPlot, base_height=hXyNoLegendPlot)

save_plot('{output[6]}', pYx, base_width=wYxPlot, base_height=hYxPlot)
save_plot('{output[7]}', pYx, base_width=wYxPlot, base_height=hYxPlot)

save_plot('{output[8]}', pYxNoLegend, base_width=wYxNoLegendPlot, base_height=hYxNoLegendPlot)
save_plot('{output[9]}', pYxNoLegend, base_width=wYxNoLegendPlot, base_height=hYxNoLegendPlot)


" > $(dirname {output[0]})/$(basename {output[0]} .legendOnly.png).r
cat $(dirname {output[0]})/$(basename {output[0]} .legendOnly.png).r | R --slave

		'''


rule plotFlLocusStats:
	input: config["STATSDATADIR"] + "all.min{minReadSupport}reads.FLloci.stats.tsv"
	output: returnPlotFilenames(config["PLOTSDIR"] + "FLloci.stats/{techname}/Corr{corrLevel}/{capDesign}/{techname}Corr{corrLevel}_{capDesign}_{sizeFrac}_{barcodes}.min{minReadSupport}reads.FLloci.stats")
	conda: "envs/R_env.yml"
	params:
		filterDat=lambda wildcards: merge_figures_params(wildcards.capDesign, wildcards.sizeFrac, wildcards.barcodes, wildcards.corrLevel, wildcards.techname)
	shell:
		'''
echo "
library(ggplot2)
library(cowplot)
library(plyr)
library(scales)
library(gridExtra)
library(grid)
library(ggplotify)

dat <- read.table('{input}', header=T, as.is=T, sep='\\t')
{params.filterDat[10]}
{params.filterDat[0]}
{params.filterDat[1]}
{params.filterDat[2]}
{params.filterDat[3]}
{params.filterDat[4]}
{params.filterDat[5]}
{params.filterDat[8]}


dat\$category<-factor(dat\$category, ordered=TRUE, levels=rev(c('FL', 'non-FL')))

dat\$annotbiotype <- paste(sep='', dat\$biotype, '\\n(', dat\$annotation_set, ')')

hXyPlot = hXyPlot +1
wXyPlot = wXyPlot +2.5

plotBase <- \\"p <- ggplot(dat[order(dat\$category), ], aes(x=factor(annotbiotype), y=count, fill=category)) +
geom_bar(stat='identity') +
ylab('# gene loci') +
scale_y_continuous(labels=comma)+
scale_fill_manual (values=c('FL'='#C453C4', 'non-FL'='#a6a6a6'))+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab('Biotype (Annotation set)') +
guides(fill = guide_legend(title='Category'))+
geom_text(position = 'stack', size=geom_textSize, aes(x = factor(annotbiotype), y = count, label = paste(sep='',percent(round(percent, digits=2)),' ','(',comma(count),')'), hjust = 0.5, vjust = 1))+
{GGPLOT_PUB_QUALITY}+ \\"

{params.filterDat[12]}

save_plot('{output[0]}', legendOnly, base_width=wLegendOnly, base_height=hLegendOnly)
save_plot('{output[1]}', legendOnly, base_width=wLegendOnly, base_height=hLegendOnly)

save_plot('{output[2]}', pXy, base_width=wXyPlot, base_height=hXyPlot)
save_plot('{output[3]}', pXy, base_width=wXyPlot, base_height=hXyPlot)

save_plot('{output[4]}', pXyNoLegend, base_width=wXyNoLegendPlot, base_height=hXyNoLegendPlot)
save_plot('{output[5]}', pXyNoLegend, base_width=wXyNoLegendPlot, base_height=hXyNoLegendPlot)

save_plot('{output[6]}', pYx, base_width=wYxPlot, base_height=hYxPlot)
save_plot('{output[7]}', pYx, base_width=wYxPlot, base_height=hYxPlot)

save_plot('{output[8]}', pYxNoLegend, base_width=wYxNoLegendPlot, base_height=hYxNoLegendPlot)
save_plot('{output[9]}', pYxNoLegend, base_width=wYxNoLegendPlot, base_height=hYxNoLegendPlot)


" > $(dirname {output[0]})/$(basename {output[0]} .legendOnly.png).r

cat $(dirname {output[0]})/$(basename {output[0]} .legendOnly.png).r | R --slave


		'''