#!/usr/bin/env python

# Copyright (c) 2014. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Script to generate epitope predictions from somatic cancer variants
and (optionally) tumor RNA-seq data.

Example usage:
    topiary \
        -mhc-pan
        --mhc-alleles-file HLA.txt -
        --vcf somatic.vcf
        --rna-gene-fpkm-file genes.fpkm_tracking
        --rna-transcript-fpkm-file isoforms.fpkm_tracking
        --filter-ic50 500
        --filter-percentile 2
        --output results.csv
"""

from topiary import (
    arg_parser,
    mhc_binding_predictor_from_args,
    variant_collection_from_args,
    MutantEpitopePredictor,
    epitopes_to_dataframe,
)
from topiary.rna import (
    load_cufflinks_dataframe,
    load_cufflinks_fpkm_dict,
    remap_novel_gene_expression_onto_ensembl_ids,
)

args = arg_parser.parse_args()

def main():
    print("Topiary commandline arguments:")
    print(args)

    mhc_model = mhc_binding_predictor_from_args(args)
    predictor = MutantEpitopePredictor(
        mhc_model=mhc_model,
        padding_around_mutation=args.padding_around_mutation,
        ic50_cutoff=args.ic50_cutoff,
        percentile_cutoff=args.percentile_cutoff,
        keep_wildtype_epitopes=args.keep_wildtype_epitopes)
    variants = variant_collection_from_args(args)
    if args.rna_gene_fpkm_file:
        gene_expression_df = load_cufflinks_dataframe(args.rna_gene_fpkm_file)
        if args.rna_remap_novel_genes_onto_ensembl:
            gene_expression_dict = remap_novel_gene_expression_onto_ensembl_ids(
                gene_expression_df)
        else:
            gene_expression_dict = {
                row.id: row.fpkm
                for (_, row)
                in gene_expression_df.iterrows()
            }
    else:
        gene_expression_dict = None

    if args.rna_transcript_fpkm_file:
        transcript_expression_dict = load_cufflinks_fpkm_dict(
            args.rna_transcript_fpkm_file)
    else:
        transcript_expression_dict = None

    epitopes = predictor.epitopes_from_variants(
        variants,
        gene_expression_dict=gene_expression_dict,
        gene_expression_threshold=args.rna_min_gene_expression,
        transcript_expression_dict=transcript_expression_dict,
        transcript_expression_threshold=args.rna_min_transcript_expression,
        raise_on_variant_effect_error=not args.skip_variant_errors)

    df = epitopes_to_dataframe(
        epitopes,
        gene_expression_dict=gene_expression_dict,
        transcript_expression_dict=transcript_expression_dict)
    if args.output_csv:
        df.to_csv(args.output_csv, index=True, index_label="#")
    if args.output_html:
        df.to_html(args.output_html, index=True)
    print(df)

if __name__ == "__main__":
    main()
