#!/usr/bin/env python

import logging

from pyroe import make_splici_txome
from pyroe import fetch_processed_quant
from pyroe import convert
from pyroe import id_to_name
from pyroe import output_formats

if __name__ == "__main__":
    import argparse
    import sys
    from argparse import RawTextHelpFormatter
    from pyroe import __version__

    # Create the parser
    parser = argparse.ArgumentParser(
        description="The pyroe package provides useful functions to support alevin-fry ecosystem.",
        prog="pyroe",
    )
    parser.add_argument(
        "-v", "--version", action="version", version=f"pyroe {__version__}"
    )
    subparsers = parser.add_subparsers(
        title="subcommands",
        dest="command",
        description="valid subcommands",
        help="additional help",
    )
    parser_makeSplici = subparsers.add_parser(
        "make-splici", help="Make splici reference"
    )
    parser_makeSplici.add_argument(
        "genome_path",
        metavar="genome-path",
        type=str,
        help="The path to a genome fasta file.",
    )
    parser_makeSplici.add_argument(
        "gtf_path", metavar="gtf-path", type=str, help="The path to a gtf file."
    )
    parser_makeSplici.add_argument(
        "read_length",
        metavar="read-length",
        type=int,
        help="The read length of the single-cell experiment being processed (determines flank size).",
    )
    parser_makeSplici.add_argument(
        "output_dir",
        metavar="output-dir",
        type=str,
        help="The output directory where splici reference files will be written.",
    )
    parser_makeSplici.add_argument(
        "--filename-prefix",
        type=str,
        default="splici",
        help="The file name prefix of the generated output files.",
    )
    parser_makeSplici.add_argument(
        "--flank-trim-length",
        type=int,
        default=5,
        help="Determines the amount subtracted from the read length to get the flank length.",
    )
    parser_makeSplici.add_argument(
        "--extra-spliced",
        type=str,
        help="The path to an extra spliced sequence fasta file.",
    )
    parser_makeSplici.add_argument(
        "--extra-unspliced",
        type=str,
        help="The path to an extra unspliced sequence fasta file.",
    )
    parser_makeSplici.add_argument(
        "--bt-path",
        type=str,
        default="bedtools",
        help="The path to bedtools v2.30.0 or greater.",
    )
    parser_makeSplici.add_argument(
        "--no-bt",
        action="store_true",
        help="A flag indicates whether bedtools will be used for generating splici reference files.",
    )
    parser_makeSplici.add_argument(
        "--dedup-seqs",
        action="store_true",
        help="A flag indicates whether identical sequences will be deduplicated.",
    )
    parser_makeSplici.add_argument(
        "--no-flanking-merge",
        action="store_true",
        help="A flag indicates whether flank lengths will be considered when merging introns.",
    )

    # parse available datasets
    available_datasets = fetch_processed_quant()
    epilog = "\n".join(
        [
            "".join([f"{idx+1}", ". ", dataset_name])
            for (idx, dataset_name) in zip(
                range(available_datasets.shape[0]),
                available_datasets["dataset_name"].tolist(),
            )
        ]
    )
    epilog = "\n".join(["Index of the available datasets:", epilog])

    parser_fetchQuant = subparsers.add_parser(
        "fetch-quant",
        help="Fetch processed quant results",
        epilog=epilog,
        formatter_class=RawTextHelpFormatter,
    )
    parser_fetchQuant.add_argument(
        "dataset_ids",
        metavar="dataset-ids",
        nargs="+",
        type=int,
        help="The ids of the datasets to fetch",
    )
    parser_fetchQuant.add_argument(
        "--fetch-dir",
        type=str,
        default="processed_quant",
        help="The path to a directory for storing fetched datasets.",
    )
    parser_fetchQuant.add_argument(
        "--force",
        action="store_true",
        help="A flag indicates whether existing datasets will be redownloaded by force.",
    )
    parser_fetchQuant.add_argument(
        "--delete-tar",
        action="store_true",
        help="A flag indicates whether fetched tar files stored in the quant_tar directory under the provided fetch_dir should be deleted.",
    )
    parser_fetchQuant.add_argument(
        "--quiet",
        action="store_true",
        help="A flag indicates whether help messaged should not be printed.",
    )

    parser_id_to_name = subparsers.add_parser(
        "id-to-name", help="Generate a gene id to gene name mapping file from a GTF."
    )
    parser_id_to_name.add_argument("gtf_file", help="The GTF input file.")
    parser_id_to_name.add_argument(
        "output", help="The path to where the output tsv file will be written."
    )
    parser_id_to_name.add_argument(
        "--format",
        help="The input format of the file (must be either GTF or GFF3). This will be inferred from the filename, but if that fails it can be provided explicitly.",
        default=None,
    )

    out_formats = output_formats()
    parser_convert = subparsers.add_parser(
        "convert", help="Convert alevin-fry quantification result to another format."
    )
    parser_convert.add_argument(
        "quant_dir",
        metavar="quant_dir",
        type=str,
        help="The input quantification directory containing the matrix to be converted.",
    )
    parser_convert.add_argument(
        "output",
        help="The output name where the quantification matrix should be written. For `csvs` output format, this will be a directory. For all others, it will be a file.",
    )
    parser_convert.add_argument(
        "--output-structure",
        help="The structure that U,S and A counts should occupy in the output matrix.",
    )
    parser_convert.add_argument(
        "--output-format",
        default="h5ad",
        help=f"The format in which the output should be written, one of {out_formats}.",
    )
    parser_convert.add_argument(
        "--geneid-to-name",
        type=str,
        required=False,
        help="A 2 column tab-separated list of gene ID to gene name mappings. Providing this file will project gene IDs to gene names in the output.",
    )

    logging.basicConfig(level=logging.INFO)

    # Execute the parse_args() method
    args = parser.parse_args()
    if args.command == "make-splici":
        make_splici_txome(
            genome_path=args.genome_path,
            gtf_path=args.gtf_path,
            read_length=args.read_length,
            output_dir=args.output_dir,
            flank_trim_length=args.flank_trim_length,
            filename_prefix=args.filename_prefix,
            extra_spliced=args.extra_spliced,
            extra_unspliced=args.extra_unspliced,
            dedup_seqs=args.dedup_seqs,
            no_bt=args.no_bt,
            bt_path=args.bt_path,
            no_flanking_merge=args.no_flanking_merge,
        )
    elif args.command == "fetch-quant":
        fetch_processed_quant(
            dataset_ids=args.dataset_ids,
            fetch_dir=args.fetch_dir,
            force=args.force,
            delete_tar=args.delete_tar,
            quiet=args.quiet,
        )
    elif args.command == "convert":
        convert(args)
    elif args.command == "id-to-name":
        id_to_name(args)
    else:
        print(parser.print_help())
        sys.exit(1)
