#!/usr/bin/env python
# vim: fileencoding=utf-8 tw=100 expandtab ts=4 sw=4 :
#
# Good Morning Planet Website
# (c) 2012 ActivKonnect
# Rémy Sanchez <remy.sanchez@activkonnect.com>

from __future__ import unicode_literals

import argparse
from pickle import PickleError
import sys
import os
import codecs
from gibi.io import FrenchNormalizer
from gibi.matrix import Matrix


def fail(message):
    sys.stderr.write('Fail: {}\n'.format(message))
    sys.exit(1)

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(help='action')

    parser_analyze = subparsers.add_parser('analyze', help='analyzes a text corpus, and dumps it '
                                                           'into a file for later use')
    parser_generate = subparsers.add_parser('generate', help='generates words from the previously'
                                                             'analyzed corpus file')

    parser_analyze.add_argument('corpus', help='path to the file containing the corpus')
    parser_analyze.add_argument('output', help='write the output matrix to this path')
    parser_analyze.add_argument('--overwrite', '-o', help='overwrites the output file if it '
                                                          'exists', action='store_true')
    parser_analyze.add_argument('--tail', '-t', type=int, default=3,
                                help='length of the tail generate (default: 3)')

    parser_generate.add_argument('matrix', help='matrix file generated by the "analayze" command')
    parser_generate.add_argument('-n', type=int, default=10, help='number of words to generate '
                                                                  '(default: 10)')

    args = parser.parse_args()

    if 'corpus' in args:
        m = Matrix(args.tail)

        if not args.overwrite and os.path.exists(args.output):
            fail('output file "{}" exists and will not be overwritten'.format(args.output))

        try:
            with codecs.open(args.corpus, 'r', encoding='utf-8') as f:
                n = FrenchNormalizer(f)
                m.feed(n)
        except IOError:
            fail('could not open corpus text "{}"'.format(args.corpus))

        try:
            with open(args.output, 'wb') as f:
                m.dump(f)
        except IOError:
            fail('could not write output to "{}"'.format(args.output))

    if 'matrix' in args:
        m = Matrix()

        try:
            with open(args.matrix, 'rb') as f:
                m.load(f)
        except IOError:
            fail('could not read matrix file "{}"'.format(args.matrix))
        except PickleError:
            fail('matrix file "{}" is in an invalid format'.format(args.matrix))

        for i in range(0, args.n):
            print(m.make_word())
