#vim: set syntax=python

__author__ = "Johannes Köste"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"

"""
A CRISPR/Cas9 analysis workflow using MAGeCK and VISPR.
"""


configfile: "config.yaml"


import yaml
from vispr.workflow import postprocess_config


postprocess_config(config)


rule all:
    input:
        expand("results/{experiment}.vispr.yaml", experiment=config["experiments"])


rule fastqc:
    input:
        lambda wildcards: config["samples"][wildcards.sample]
    output:
        "results/qc/{sample}/fastqc_data.txt"
    params:
        dir="results/qc/{sample}"
    log:
        "logs/fastqc/{sample}.log"
    shell:
        "rm -f {params.dir}/*.zip; "
        "fastqc --extract -o {params.dir} {input} 2> {log} && "
        "unzip -p {params.dir}/*.zip '*/fastqc_data.txt' > {output}"


rule mageck_count:
    input:
        fastq=config["samples"].values(),
        library=config["library"]
    output:
        "results/count/all.count.txt",
        "results/count/all.count.median_normalized.txt",
        "results/count/all.countsummary.txt"
    params:
        labels=",".join(config["samples"].keys()),
        fastqs=" ".join(",".join(replicates) for replicates in config["samples"].values()),
        prefix="results/count/all"
    log:
        "logs/mageck/count/all.log"
    shell:
        "mageck count --output-prefix {params.prefix} "
        "--list-seq {input.library} "
        "--fastq {input.fastq} --sample-label {params.labels} "
        "--trim-5 {config[sgrnas][trim-5]} --sgrna-len {config[sgrnas][len]} 2> {log}"


rule mageck_test:
    input:
        "results/count/all.count.txt"
    output:
        "results/test/{experiment}.gene_summary.txt",
        "results/test/{experiment}.sgrna_summary.txt"
    params:
        prefix="results/test/{experiment}",
        treatment=lambda wildcards: ",".join(config["experiments"][wildcards.experiment]["treatment"]),
        control=lambda wildcards: ",".join(config["experiments"][wildcards.experiment]["control"])
    log:
        "logs/mageck/test/{experiment}.log"
    shell:
        "mageck test --output-prefix {params.prefix} "
        "--count-table {input} --treatment-id {params.treatment} "
        "--control-id {params.control} 2> {log}"


rule vispr:
    input:
        results="results/test/{experiment}.gene_summary.txt",
        counts="results/count/all.count.median_normalized.txt",
        mapstats="results/count/all.countsummary.txt",
        fastqc=expand("results/qc/{sample}/fastqc_data.txt", sample=config["samples"])
    output:
        "results/{experiment}.vispr.yaml"
    run:
        relpath = lambda path: os.path.relpath(path, "results")
        vispr_config = {
            "experiment": wildcards.experiment,
            "species": config["species"],
            "assembly": config["assembly"],
            "targets": {
                "results": relpath(input.results),
                "genes": config["targets"]["genes"]
            },
            "sgrnas": {
                "counts": relpath(input.counts),
                "mapstats": relpath(input.mapstats)
            },
            "fastqc": {
                sample: relpath(fastqc)
                for sample, fastqc in zip(config["samples"], input.fastqc)
            }
        }
        if "controls" in config["targets"]:
            vispr_config["targets"]["controls"] = config["targets"]["controls"]
        if "info" in config["sgrnas"]:
            vispr_config["sgrnas"]["info"] = config["sgrnas"]["info"]
        with open(output[0], "w") as f:
            yaml.dump(
                vispr_config,
                f,
                default_flow_style=False
            )
