from os import listdir
from os.path import isfile, splitext

rule all:
    input:
        "mapping/stats/mapping_stats.xlsx",
        expand("mapping/{A}.bam", A=sorted(config['entries'].keys())),
        ".report/data/bowtie2_data.js"


rule mapping_stats_xlsx:
    input:
        "mapping/stats/mapping_stats.tsv"
    output:
        xlsx="mapping/stats/mapping_stats.xlsx",
        plot_alignment_absolute="mapping/stats/alignment_stats.svg",
        plot_alignment_relative="mapping/stats/alignment_stats_relative.svg"
    params:
        plot_dir="mapping/stats/"
    conda:
        "../lib/conda_env.yaml"
    group:
        "bowtie2_statistics"
    shell:
        "python3 lib/se_mapping_stats_tsv_to_xlsx.py {input} {output.xlsx} {params.plot_dir}"


rule mapping_stats_tsv:
    input:
        expand("mapping/logs/bowtie2_mapping.{name}.log", name=config['entries'].keys())
    output:
        "mapping/stats/mapping_stats.tsv"
    group:
        "bowtie2_statistics"
    shell:
        """
        echo "sample	reads	reads[%]	aligned_0_times	aligned_0_times[%]	aligned_1_time	aligned_1_time[%]	aligned_more_than_1_times	aligned_more_than_1_times[%]	overall_alignment_rate" > {output}
        for i in {input}; do
            sample=$(basename $i);
            sample=${{sample%*.log}};
            sample=${{sample#bowtie2_mapping.}};
            echo -n $sample"	" >> {output};
            tail -n+2 $i | sed -e 's/^[^0-9]*\([0-9]\{{1,\}}\) (\([0-9.]\{{1,\}}\)%).*/\\1\t\\2/' | sed -e 's/^\(.*\)%.*$/\\1/' | tr '\n' '\t' | sed -e 's/\t$//' >> {output};
            echo >> {output};
        done;
        """

rule bowtie2_index:
    params:
        prefix=splitext("%%GENOME_FASTA%%")[0]
    input:
        genome="%%GENOME_FASTA%%"
    output:
        splitext("%%GENOME_FASTA%%")[0] + ".1.bt2"
    conda:
        "../lib/conda_env.yaml"
    group:
        "bowtie2_index"
    log:
        "mapping/logs/bowtie2_index.log"
    threads:
        8
    shell:
        "bowtie2-build --threads {threads} {input.genome} {params.prefix} 2>&1 |"
        "tee {log}"


rule bowtie2_mapping:
    params:
        prefix=splitext("%%GENOME_FASTA%%")[0]
    input:
        genome="%%GENOME_FASTA%%",
        genome_index=splitext("%%GENOME_FASTA%%")[0] + ".1.bt2",
        reads="preprocessing/{name}.fastq.gz"
    output:
        temp("mapping/sam/{name}.sam")
    conda:
        "../lib/conda_env.yaml"
    group:
        "bowtie2_mapping"
    log:
        "mapping/logs/bowtie2_mapping.{name}.log"
    threads:
        2
    shell:
        "bowtie2 %%ADDITIONAL_BOWTIE2_OPTIONS%% -p {threads} --mm %%ALIGNMENT_TYPE%% -x {params.prefix} -U {input.reads} -S {output} 2>&1 |"
        "tee {log}"


rule sam_to_bam:
    input:
        "mapping/sam/{sample}.sam"
    output:
        bam="mapping/{sample}.bam",
        csi="mapping/{sample}.bam.csi",
        bam_unmapped="mapping/unmapped/{sample}_unmapped.bam"
    conda:
        "../lib/conda_env.yaml"
    group:
        "bowtie2_mapping"
    threads:
        2
    shell:
        "samtools view -F 4 -Shb {input} | samtools sort -@ {threads} -o {output.bam} - && samtools index -c {output.bam};"
        "samtools view -f 4 -Shb {input} | samtools sort -@ {threads} -o {output.bam_unmapped} -"

rule write_settings:
    output:
        settings="mapping/settings.yaml"
    conda:
        "../lib/conda_env.yaml"
    group:
        "bowtie2_report"
    shell:
        """
        set +e
        echo 'used_mapping_preset: "%%ALIGNMENT_TYPE%%"' > {output.settings};
        bw2_help=$(bowtie2 --help)
        bw2_version=$(echo "$bw2_help" | head -n1 | sed 's/.*version \(.*\) by.*/\\1/g')
        echo "bowtie2_version: \\"$bw2_version\\"" >> {output.settings};
        echo "use_shared_memory: 'true'" >> {output.settings};
        echo 'additional_parameters: "%%ADDITIONAL_BOWTIE2_OPTIONS%%"' >> {output.settings}; 
        """

rule generate_report_data:
    input:
        stats_tsv="mapping/stats/mapping_stats.tsv",
        images=["mapping/stats/alignment_stats.svg", "mapping/stats/alignment_stats_relative.svg"],
        settings="mapping/settings.yaml"
    output:
        bw2_data=".report/data/bowtie2_data.js",
        bw2_html=".report/modules/bowtie2.html",
        bw2_js=".report/js/modules/bowtie2.js",
        bw2_css=".report/css/modules/bowtie2.css",
        bw2_images=directory(".report/img/modules/bowtie2/")
    conda:
        "../lib/conda_env.yaml"
    group:
        "bowtie2_report"
    shell:
        "python3 lib/generate_report_data.py --stats {input.stats_tsv} --output {output.bw2_data} --settings {input.settings} && "
        "cp lib/report/bowtie2.html {output.bw2_html} && "
        "cp lib/report/bowtie2.js {output.bw2_js} && "
        "cp lib/report/bowtie2.css {output.bw2_css} &&"
        "mkdir -p {output.bw2_images} && cp {input.images} {output.bw2_images}"
