#!/usr/bin/env python

# CoreTracker Copyright (C) 2016  Emmanuel Noutahi
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from Bio import SeqIO
from coretracker.coreutils import CoreFile
from collections import defaultdict as ddict
import warnings
import os
import argparse
warnings.filterwarnings("ignore")

poss_format = ('fasta', 'nexus', 'stockholm', 'clustal')


class SeqRep:

    def __init__(self, filelist, fmt):
        # in this case load it with SeqIO
        self.seqlist = ddict(list)
        if fmt == 'core':
            self._core_parser(filelist)
        elif fmt in poss_format:
            self._parser(filelist, fmt)

    def export_as(self, outfmt, outdir, outfile="out.core", reverse=False):

        seqlist = self.seqlist

        if outdir and not os.path.exists(outdir):
            os.makedirs(outdir)

        if reverse:
            seqlist = self.__convert_to(seqlist)

        if outfmt == 'core':
            CoreFile.write_corefile(
                seqlist, os.path.join(outdir, outfile))

        elif outfmt in poss_format:
            for spec, recs in seqlist.items():
                if len(recs) > 0:
                    try:
                        SeqIO.write(recs, os.path.join(
                            outdir, spec + "." + outfmt), outfmt)
                    except ValueError:
                        raise ValueError("Expect aligned sequences for "
                                         "the following format: %s . Try with(out) --order_reverse" % ", ".join(poss_format))

    def _parser(self, flist, fmt):
        for f in flist:
            fname = os.path.basename(f).split('.', 1)[0]
            for seqrec in SeqIO.parse(f, fmt):
                geneid = seqrec.id
                seqrec.id = geneid
                seqrec.name = geneid
                self.seqlist[fname].append(seqrec)

    def _core_parser(self, flist):
        if len(flist) != 1:
            raise Exception(
                "Only one core file should be converted into fasta format")
        self.seqlist = CoreFile(flist[0])

    def __convert_to(self, indict):
        """Convert a dict of species name to their content
        into CoreTracker format structure"""
        spec_genomes = ddict(list)
        for (gene, speclist) in indict.items():
            for seqrec in speclist:
                specname = seqrec.name
                seqrec.id = gene
                seqrec.name = gene
                seqrec.description = "source=" + seqrec.description
                spec_genomes[specname].append(seqrec)
        return spec_genomes


if __name__ == '__main__':

    fmt_choices = poss_format + ('core',)
    # argument parser
    parser = argparse.ArgumentParser(
        description='Break a core file into msa files or merge msa files into one core file.')

    parser.add_argument('--infmt', dest='infmt', required=True,
                        choices=fmt_choices, help="Input format")
    parser.add_argument('--outfmt', dest='outfmt', required=True,
                        choices=fmt_choices, default='outfmt', help="Output file format")
    parser.add_argument('--outdir', dest="outdir",
                        default="", help="Working directory")
    parser.add_argument('--outfile', dest="outfile",
                        help="OutFile name for core file format file")
    parser.add_argument('seqs', metavar='infile1',
                        nargs='+', help='List of input files')
    parser.add_argument('--order_reverse', dest='reverse',
                        action='store_true', help="Reverse the name order while saving")

    options = parser.parse_args()
    if options.outfmt != 'core' and options.outfile:
        parser.error('--outfile can only be set when --outfmt=core')
    elif not options.outfile:
        options.outfile = "converted_seq"

    seqreader = SeqRep(options.seqs, options.infmt)
    seqreader.export_as(options.outfmt, options.outdir,
                        options.outfile, options.reverse)
