from os import listdir
from os.path import isfile, dirname, basename

def getSampleName(file_name):
    for sample_name, seqRun in config['entries'].items():
        if file_name in os.path.basename(seqRun['main']['reads']):
            return sample_name
    print('Cannot find {} in groups file'.format(file_name))

def getSymlinkPath(file_name, basedir):
    return (os.path.join(basedir, "{sample_name}_trimmed.fq".format(sample_name=getSampleName(file_name))))

rule all:
    input:
        ".report/modules/trimgalore.html"


rule trimgalore:
    input:
        reads=lambda wildcard: config['entries'][wildcard.sample]['main']['reads']
    output:
        reads_forward="preprocessing/trim_galore/{sample}_trimmed.fq.gz",
        stats_forward="preprocessing/trim_galore/{sample}_trimming_report.txt",
    conda:
        "../lib/conda_env.yaml"
    params:
        output_dir="preprocessing/trim_galore/",
        stats_forward_raw=lambda wildcard: basename(config['entries'][wildcard.sample]['main']['reads']) + "_trimming_report.txt",
        adapter=lambda wildcard: "--adapter %%ADAPTER%%" if "%%ADAPTER%%" else ""
    group:
        "trim_galore"
    log:
        "preprocessing/trim_galore/trimming_stats/{sample}.txt"
    threads:
        1
    shell:
        "trim_galore %%ADDITIONAL_PARAMETER%% --quality %%QUALITY_THRESHOLD%% %%PHRED_SCORE_TYPE%% --length %%MIN_LENGTH%% {params.adapter} "
        "--cores {threads} --gzip --basename {wildcards.sample} --output_dir {params.output_dir} {input.reads} 2>&1 |"
        "tee {log};"
        "echo \"Renaming Trim-Galore reports:\";"
        "echo \"preprocessing/trim_galore/{params.stats_forward_raw} -> {output.stats_forward}\";"
        "mv \"preprocessing/trim_galore/{params.stats_forward_raw}\" \"{output.stats_forward}\";"

rule collect_paired_reads_zipped:
    input:
        reads_forward="preprocessing/trim_galore/{sample}_trimmed.fq.gz",
    output:
        "preprocessing/{sample}.fastq.gz"
    shell:
        "ln ./{input} {output}"

rule stats_file_list_for_report:
    input:
        stats=expand("preprocessing/trim_galore/{sample}_trimming_report.txt", sample=sorted(config['entries'].keys()))
    output:
        temp("preprocessing/trim_galore/stats_file.tsv")
    params:
        target_dir="preprocessing/trim_galore",
        samples=sorted(config['entries'].keys())
    group:
        "trim_galore_report"
    shell:
        "echo -e \"Sample\tForward\" > {output};"
        "for sample in {params.samples}; do echo -e \"${{sample}}\t{params.target_dir}/${{sample}}_trimming_report.txt\" >> {output}; done"

rule generate_report:
    input:
        stats="preprocessing/trim_galore/stats_file.tsv"
    output:
        trim_galore_data=".report/data/trimgalore_data.js",
        trim_galore_html=".report/modules/trimgalore.html",
        trim_galore_js=".report/js/modules/trimgalore.js",
    conda:
        "../lib/conda_env.yaml"
    group:
        "trim_galore_report"
    shell:
        "python3 lib/generate_report_data.py --stats {input.stats} --output {output.trim_galore_data} && "
        "cp lib/report/trimgalore.html {output.trim_galore_html} && "
        "cp lib/report/trimgalore.js {output.trim_galore_js}"
