#!/usr/bin/env python
import sys
import os
import logging
import coloredlogs

import click
import argparse
import yaml
import json

import numpy as np

from grass import processLabels
from grass import iterGrass

CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])

def which(program):
    import os
    def is_exe(fpath):
        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

    fpath, fname = os.path.split(program)
    if fpath:
        if is_exe(program):
            return program
    else:
        for path in os.environ["PATH"].split(os.pathsep):
            path = path.strip('"')
            exe_file = os.path.join(path, program)
            if is_exe(exe_file):
                return exe_file

    return None

@click.command(context_settings=CONTEXT_SETTINGS)
@click.option('--config', required=True, help='Config file describing the experimental setup')
def processInput(config):
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger("grass")
    coloredlogs.install(level='INFO')

    if (which("junto") == None):
        logging.critical("Junto library not added to PATH. Install junto.")
        sys.exit(1)
    if (which("mcl") == None):
        logging.critical("MCL not added to PATH. Install MCL.")
        sys.exit(1)

    #config = args.config
    cfg = None
    with open(config, 'r') as yamCfg:
        cfg = yaml.load(yamCfg)

    # A list of all sample directories
    sampleDirs =  []
    outdir = None
    cutoff = 10.0
    runBLAST = True
    faFiles = []
    labelFiles = []

    #ensure that fasta file 1 is TS and file 2 is AS
    #ensure label files are in order [TS->ASdb, AS->TSdb] 

    if 'labels' not in cfg and 'fasta' not in cfg:
        logging.critical("Your configuration file must contain a \"fasta\" or \"labels\" entry!")
        sys.exit(1)
    elif 'labels' in cfg and 'fasta' in cfg:
        logging.critical("Provide either FASTA files to run BLAST or labels file in the required formats")
        sys.exit(1)
    else:
        outdir = cfg['outdir']
        
        if 'fasta' in cfg:
            faFiles = cfg['fasta']

        if 'labels' in cfg:
            labelFiles = cfg['labels']
            runBLAST = False

    if os.path.exists(outdir):
        if not os.path.isdir(outdir):
            logging.critical("The output directory already exists, and is not a directory!")
            sys.exit(1)
    else:
        # create it
        os.makedirs(outdir)
    # now the outdir exists

    #write the junto configuration file beforehand
    juntoConfigFile = os.path.sep.join([outdir, "junto.config"])

    keys={}
    keys["graph_file"] =  os.path.sep.join([outdir, "grassGraph.txt"])
    keys["seed_file"] =  os.path.sep.join([outdir, "seed.txt"])
    keys["output_file"] =  os.path.sep.join([outdir, "tempOutput"])

    with open(juntoConfigFile, "w") as configFile:
        for x in keys.iterkeys():
            configFile.write(x + " = " + keys[x] + "\n")
        configFile.write("data_format = edge_factored\n")
        configFile.write("iters = 1\n")
        configFile.write("prune_threshold = 0\n")
        configFile.write("algo = adsorption\n")

    #final labels file
    finalLabelFile = os.path.sep.join([outdir, "seedLabels.txt"])

    #calling Rapclust 
    from subprocess import call
    
    logging.info("Running RapClust on the input first")
    call(["RapClust", "--config", config])

    if runBLAST:
        logging.info("Running BLAST on the input FASTA files.")
        labelFiles = processLabels.runBLAST(faFiles[0], faFiles[1], outdir)
        logging.info("Processing the BLAST output.")
        processLabels.genFinalLabels(keys, labelFiles, finalLabelFile, outdir)
    else:
        logging.info("Processing the BLAST output.")
        processLabels.genFinalLabels(keys, labelFiles, finalLabelFile, outdir)
    
    iterGrass.run(keys, finalLabelFile, juntoConfigFile, outdir)

    clustFile = os.path.sep.join([outdir, "grass.mag.clust"])
    logging.info("Clustering multiple alignment graph after running GRASS")
    call(["mcl", keys["graph_file"], "--abc", "-o", clustFile])

    flatClustFile = os.path.sep.join([outdir, "grass.mag.flat.clust"])
    iterGrass.flattenClusters(clustFile, flatClustFile)

    summaryFile = os.path.sep.join([outdir, "grass.stats.json"])
    sizes = []
    with open(clustFile) as ifile:
        for l in ifile:
            toks = l.rstrip().split()
            sizes.append(len(toks))
    sizes = np.array(sizes)
    stats = {}
    stats['min clust size'] = sizes.min()
    stats['max clust size'] = sizes.max()
    stats['mean clust size'] = sizes.mean()
    stats['num clusts'] = len(sizes)

    with open(summaryFile, 'w') as ofile:
        json.dump(stats, ofile)
    
    call(['rm', os.path.sep.join([outdir, "mag.clust"])])
    call(['rm', os.path.sep.join([outdir, "mag.flat.clust"])])
    call(['rm', os.path.sep.join([outdir, "mag.net"])])
    call(['mv', keys["seed_file"], os.path.sep.join([outdir, "finalLabels.txt"])])
    call(['rm', keys["output_file"]])
    call(['rm', juntoConfigFile])

if __name__ == "__main__":
    processInput()