#!/usr/bin/env python3

import alv
import argparse
import sys
from traceback import print_last

from alv.version import __version__
from alv.alignmentterminal import AlignmentTerminal
import alv.io as io
from alv.exceptions import AlvPossibleFormatError, AlvEmptyAlignment

def list_genetic_codes():
    additional_help_text='''
The genetic codes are implemented using BioPython, which reflect what is 
available at https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi .

The following is the list, with numbers, of known genetic codes:
   1. The Standard Code
   2. The Vertebrate Mitochondrial Code
   3. The Yeast Mitochondrial Code
   4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code
   5. The Invertebrate Mitochondrial Code
   6. The Ciliate, Dasycladacean and Hexamita Nuclear Code
   9. The Echinoderm and Flatworm Mitochondrial Code
   10. The Euplotid Nuclear Code
   11. The Bacterial, Archaeal and Plant Plastid Code
   12. The Alternative Yeast Nuclear Code
   13. The Ascidian Mitochondrial Code
   14. The Alternative Flatworm Mitochondrial Code
   16. Chlorophycean Mitochondrial Code
   21. Trematode Mitochondrial Code
   22. Scenedesmus obliquus Mitochondrial Code
   23. Thraustochytrium Mitochondrial Code
   24. Pterobranchia Mitochondrial Code
   25. Candidate Division SR1 and Gracilibacteria Code
   26. Pachysolen tannophilus Nuclear Code
   27. Karyorelict Nuclear
   28. Condylostoma Nuclear
   29. Mesodinium Nuclear
   30. Peritrich Nuclear
   31. Blastocrithidia Nuclear
'''
    print(additional_help_text)

def setup_argument_parsing():
    '''
    Create an argument parser, parse and return args.
    '''
    ap = argparse.ArgumentParser()
    ap.add_argument('infile', nargs='?', help="The infile is the path to a file, or '-' if reading from stdin.")
    ap.add_argument('--version', action='version', version='%(prog)s ' + __version__)
    ap.add_argument('-f', '--format', choices=['guess', 'fasta', 'clustal', 'phylip', 'stockholm'], default='guess',
                    help="Specify what sequence type to assume. Be specific if the file is not recognized automatically. When reading from stdin, the format is always guessed to be FASTA. Default: %(default)s")
    ap.add_argument('-t', '--type', choices=['aa', 'dna', 'rna', 'codon', 'guess'], default='guess',
                    help="Specify what sequence type to assume. Coding DNA/RNA is assumed with the 'codon' option. Guessing the format only chooses between 'aa' and 'dna', but assumes the standard genetic code.  Default: %(default)s")
    ap.add_argument('-c', '--color-scheme', choices=['clustal', 'taylor', 'hydrophobicity'], default='clustal',
                    help='Color scheme for AA and coding DNA/RNA. The clustal coloring scheme is an approximation of the original, due to the limited color choices for consoles. The "hydrophobicity" gives red to hydrophobic, blue to polar, and green to charged residues.  Default: %(default)s')
    ap.add_argument('--code', choices=[1,2,3,4,5,6,9,10,11, 12, 13, 14, 16,21,22,23,24,25,26,27,28,29,30,31], type=int, default=1,
                    help="Genetic code to use, based on NCBI's code list, see details below. Show alternatives with the --list-codes option. Default: %(default)s.")
    ap.add_argument('-lc', '--list-codes', action='store_true',
                    help="List the available genetic codes and exit.")
    ap.add_argument('-i', '--info', action='store_true',
                    help="Append basic information about the alignment at the end.")
    ap.add_argument('-j', '--just-info', action='store_true',
                    help="Write basic information about the alignment and exit.")
    ap.add_argument('--method', action='store_true',
                    help="Write a suggested text to add to a methods section.")
    ap.add_argument('-w', '--width', type=int, default=0,
                    help='Width of alignment blocks. Defaults to terminal width minus accession width, essentially.')
    ap.add_argument('-k', '--keep-colors-when-redirecting', action='store_true',
                    help="Do not strip colors when redirecting to stdout, or similar. In particular useful with the command 'less -R'.")

    # Options for changing sequence order
    ordering_args = ap.add_argument_group('Sequence ordering')
    ordering_args.add_argument('-s', '--sorting', choices=['infile', 'alpha'], default='infile',
                    help="Sort the sequences as given in the infile or alphabetically (by accession). Default: %(default)s")
    ordering_args.add_argument('-si', '--sort-by-id', metavar='ACCESSION', type=str,
                    help='Sort the output alignment by similarity (percent identity) to named sequence. Overrides -s.')
    ordering_args.add_argument('-so', '--sorting-order', metavar='ACCESSIONS', type=str,
                    help='Comma-separated list of accessions. Sequences will be presented in this order. Also note that one can choose which sequences to present with this opion. Overrides -s and -si.')

    # Options for limiting colorization
    restriction_args = ap.add_argument_group('Restricting colorization')
    restriction_args.add_argument('--majority', action='store_true',
                                  help='Only color those column where the most common amino acid is found in 50 percent of sequences.')
    restriction_args.add_argument('--no-indels', action='store_true',
                                  help='Only color column without indels.')

    # Options for removing parts of the accession
    accession_args = ap.add_argument_group('Accession trimming')
    accession_args.add_argument('-as', '--acc-substring', nargs=2, metavar='INT', type=int,
                                help="Specify what substring of an accession to keep. '-as 10 15' discards all but position 10 to 14 in any accession.")
    accession_args.add_argument('-aa', '--acc-abbreviate', type=int, metavar='N',
                                help="Keep only the first N and last N characters of the accession")
    
    return ap
# Feature to add:
#    ap.add_argument('-p', '--prefix', type=int, default=0, help='Number of characters to remove from the beggining of the accession.')


def input_and_option_adaption(args):
    '''
    Read data, and handle some of the program options.
    Return a pair of an alv.Alignment and an alv.Painter instance.
    Exits on error.
    '''
    try:
        if args.format == 'guess' and args.infile != '-':
            format = io.guess_format(args.infile)
        elif args.format == 'guess' and args.infile == '-':
            format = 'fasta'    # Hard guess, because a bit complicated when reading from pipe (sys.stdin)
        else:
            format = args.format
        alignment, painter = io.read_alignment(args.infile, args.type, format, args.color_scheme, args.code)
        return alignment, painter
    
    except AlvPossibleFormatError:
        print('alv: cannot guess the format of input.', file=sys.stderr)
        sys.exit(1)
    except FileNotFoundError:
        print("alv: file '" + args.infile + "' not found.")
        sys.exit(4)
    except ValueError as e:
        # Bio.AlignIO uses ValueError for a number of reading problems
        if str(e) == 'No records found in handle':
            msg = 'probably wrong input format in '+ args.infile+'. Try option -f.'
        elif str(e) == 'Sequences must all be the same length':
            msg = 'unequal sequence lengths. Maybe not aligned input in '+args.infile+'?'
        else:
            msg = str(e)
        print('alv error:', msg, file=sys.stderr)
        sys.exit(2)
    except Exception as e:
        print('alv bug: Unknown error when reading input.', file=sys.stderr)
        sys.exit(3)

    

def main():

    ap = setup_argument_parsing()
    args = ap.parse_args()

    # Handle option that do not require an infile
    if args.list_codes:
        list_genetic_codes()
        ap.exit()

    if args.method:
        print('Alignments were viewed using alv (github.com/arvestad/alv).')
        ap.exit()

    # From here on, we need data from an infile
    if not args.infile:
        ap.print_usage()
        ap.exit()

    # Read the data        
    alignment, painter = input_and_option_adaption(args)

    # In case we just want to know the basics:
    if args.just_info:
        io.output_al_info(alignment)
        ap.exit()

    # Shorten accessions, if requested
    if args.acc_substring:
        start = args.acc_substring[0]
        stop = args.acc_substring[1]
        if start >= stop or start<0:
            print("alv: bad indices for option '-as'!", file=sys.stderr)
            ap.exit(5)
        alignment.trim_accessions(start, stop)
    elif args.acc_abbreviate:
        alignment.abbreviate_accessions(args.acc_abbreviate)

    # Prepare for output
    painter.set_options(args)
    terminal = AlignmentTerminal(args)
    try:
        terminal.output_alignment(alignment, painter, args)
        if args.info:
            io.output_al_info(alignment)
    except AlvEmptyAlignment:
        print('alv: input contains no sequence data?', file=sys.stderr)
        ap.exit(4)
    except BrokenPipeError:
        # This should not cause any specific error at all: correct behaviour is to end the program.
        sys.stderr.close()  # Without this line, python will sometimes give an error
        ap.exit()
    # except Exception as e:
    #     print('Alv bug! Please report!\n', file=sys.stderr)
    #     print(e, file=sys.stderr)
    #     ap.exit(2)
    

if __name__ == '__main__':
    main()
    

