#!/usr/bin/python
#
# Copyright (C) 2014 LIGO Scientific Collaboration
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#

"""
Add xml files to a SQLite database and simplify the output to remove redundant
or unused entries (basically a combination of ligolw_sqlite and
ligolw_cbc_dbsimplify)
"""

import pycbc
import pycbc.version
__author__  = "Ian Harry <ian.harry@ligo.org>"
__version__ = pycbc.version.git_verbose_msg
__date__    = pycbc.version.date
__program__ = "pycbc_sqlite_simplify"

import argparse
import os,sys
import urlparse, urllib
import sqlite3
from glue.lal import CacheEntry
from glue.ligolw import ligolw
from glue.ligolw import dbtables
from glue.ligolw import lsctables
from glue.ligolw.utils import process
from glue.ligolw.utils import local_path_from_url
from glue.ligolw.utils import ligolw_sqlite
from pylal import ligolw_sqlutils as sqlutils

# so they can be inserted into a database
dbtables.ligolwtypes.ToPyType["ilwd:char"] = unicode


def cleanup_tables(connection, vacuum=True, verbose=False, debug=False):
    """
    Function to remove duplicate entries and compress.
    """
    # FIXME: Leaving the debug=False in here so this can be altered when logging
    #        module is added.
    # Cleaning up the veto_definer and segments tables
    sqlutils.simplify_segments_tbls(connection, verbose=verbose, debug=False)
    sqlutils.simplify_vetodef_tbl(connection, verbose=verbose, debug=False)

    # Make process_id map from info in the process & process_params tables
    sqlutils.get_process_info(connection, verbose=verbose, debug=False)

    sqlutils.simplify_timeslide_tbl(connection, verbose=verbose, debug=False)
    sqlutils.simplify_sim_tbls(connection, verbose=verbose, debug=False)

    # Remove empty space & defragment database
    sqlutils.vacuum_database(connection, vacuum=vacuum, verbose=verbose)

    # Cleaning up the coinc_definer & experiment tables
    sqlutils.simplify_coincdef_tbl(connection, verbose=verbose, debug=False)
    sqlutils.simplify_expr_tbl(connection, verbose=verbose, debug=False)
    sqlutils.simplify_exprsumm_tbl(connection, verbose=verbose, debug=False)

    # Cleaning up the summary tables
    sqlutils.simplify_summ_tbls(connection, verbose=verbose, debug=False)
    # Update process_ids in the sngl-ifo trigger tables tables
    sqlutils.update_pid_in_snglstbls(connection, verbose=verbose, debug=False)
    # Cleaning up the process & process_params tables
    sqlutils.simplify_proc_tbls(connection, verbose=verbose, debug=False)

    # Remove empty space & defragment database
    sqlutils.vacuum_database(connection, vacuum=vacuum, verbose=verbose)
    connection.commit()


# Command line parsing

# FIXME: Move verbose to loggin module
_desc = __doc__[1:]
parser = argparse.ArgumentParser(description=_desc)
parser.add_argument('--version', action='version', version=__version__)
parser.add_argument('-o', '--output-file', action="store", required=True,
                    metavar="OUTFILENAME", help="""The name of the SQLite output
                    file. If this exists it will be overwritten.""")
parser.add_argument('-p', '--preserve-ids', action="store_true",
                    default=False, help="""Preserve row IDs from the XML in the
                    database.  The default is to assign new IDs to prevent
                    collisisions.  Inserts will fail if collisions occur.""")
parser.add_argument('-t', '--tmp-space', action="store", default=None,
                    metavar='PATH', help="""Path to a directory suitable for use
                    as a work area while manipulating the database file.  The
                    database file will be worked on in this directory, and then
                    moved to the final location when complete.  This option is
                    intended to improve performance when running in a networked
                    environment, where there might be a local disk with higher
                    bandwidth than is available to the filesystem on which the
                    final output will reside.""")
parser.add_argument('input_files', nargs='+', action="store",
                    metavar="INPUT_FILE", help="""List of files to add to
                    SQLITE database.""")
parser.add_argument('-i', '--injection-file', action="store", default=None,
                    metavar="FILE", help="""Injection file to add to the
                    database and add in the experiment tables.""")
parser.add_argument('-s', '--simulation-tag', action="store", default=None,
                    metavar="TAG", help="""Identifying string used to
                    distinguish the injections provided here from injections
                    provided elsewhere once they are combined. Required if
                    injection-file is given.""")
parser.add_argument('--vacuum', action="store_true", default=False, help="""If
                    turned on, will vacuum the database before saving.
                    This cleans any fragmentation and removes empty space
                    left behind by all the DELETEs, making the output
                    database smaller and more efficient.
                    WARNING: Since this requires rebuilding the entire
                    database, this can take awhile for larger files.""")
parser.add_argument('-v', '--verbose', action="store_true", default=False,
                    help="Be verbose.")

args = parser.parse_args()

if args.injection_file and not args.simulation_tag:
    parser.error("--injection-file required --simulation-tag.")

#
# DB content handler
#


class ContentHandler(ligolw.LIGOLWContentHandler):
        pass

dbtables.use_in(ContentHandler)

target = dbtables.get_connection_filename(args.output_file, 
              tmp_path=args.tmp_space, replace_file=True, verbose=args.verbose)
connection = sqlite3.connect(target, timeout=10)
ContentHandler.connection = connection
crs = connection.cursor()

# Insert files
for n, path in enumerate(args.input_files):
    path = os.path.abspath(path)
    if args.verbose:
        print >>sys.stderr, "%d/%d:" % (n + 1, len(args.input_files))
    if path.endswith(".sqlite") or path.endswith(".sql"):
        source_filename = dbtables.get_connection_filename(path,
                                 tmp_path=args.tmp_space, verbose=args.verbose)
        if args.verbose:
            print >>sys.stderr, "reading '%s' ..." % source_filename
        xmldoc = dbtables.get_xml(sqlite3.connect(source_filename))
        ligolw_sqlite.insert_from_xmldoc(ContentHandler.connection, xmldoc,
                preserve_ids=args.preserve_ids, verbose=args.verbose)
        xmldoc.unlink()
        dbtables.discard_connection_filename(path, source_filename,
                                                          verbose=args.verbose)
    else:
        fileUrl = urlparse.urljoin('file:', urllib.pathname2url(path))
        ligolw_sqlite.insert_from_url(fileUrl,
                  contenthandler=ContentHandler,
                  preserve_ids=args.preserve_ids, verbose=args.verbose)
    dbtables.build_indexes(ContentHandler.connection, args.verbose)
    if not ((n+1)%20):
        # Do cleanup as we go to avoid file getting to big.
        cleanup_tables(connection, vacuum=args.vacuum, verbose=args.verbose)

# Add injection file if present
if args.injection_file:
    sim_table = "sim_inspiral"
    # FIXME: Remove this if mapping is done differently?
    # get sim table
    sim_table = sqlutils.validate_option( sim_table )

    # Insert the injection file to the databse
    ligolw_sqlite.insert_from_urls([args.injection_file], ContentHandler,
                                   preserve_ids=False, verbose=args.verbose)

    if args.verbose:
        print >> sys.stderr, "Updating the experiment summary table..."
    # the process_id of the added injection file should be whatever the
    # largest process_id is in the sim_inspiral table
    # FIXME: !!! So if there is more than one process_id, what happens?
    sqlquery = ''.join(["""
        UPDATE
            experiment_summary
        SET
            sim_proc_id = (
                SELECT DISTINCT 
                    process_id 
                FROM 
                    """, sim_table, """ 
                ORDER BY 
                    process_id 
                DESC LIMIT 1 ) 
        WHERE 
            sim_proc_id IS NULL
        """])
    crs.execute( sqlquery )

    # Need to get the process ID
    sqlquery = '''SELECT
                    sim_proc_id
                FROM
                    experiment_summary
                WHERE
                    datatype == "simulation"
                ORDER BY
                    sim_proc_id
                DESC LIMIT 1'''
    procId = crs.execute(sqlquery).fetchall()[0][0]

    # Ensure that all sims in the injection file now have the new process ID.
    sqlquery = ''.join(["""
        UPDATE
            sim_inspiral
        SET
            process_id = ?
        """])
    crs.execute(sqlquery, (procId,))

    # FIXME: This really doesn't belong in the process_params table!
    # Set the --user-tag entry of the process params inspinj table
    
    # First need to check if a user-tag is already set
    sqlquery = '''SELECT
                      value
                  FROM
                      process_params
                  WHERE
                      program == "inspinj" AND
                      process_id == ? AND
                      (param == "--userTag" OR param == "-userTag")'''
    orig_tag = crs.execute(sqlquery, (procId,)).fetchall()

    if orig_tag:
        # If it exists update it
        sqlquery = '''UPDATE
                          process_params
                      SET
                          value = ?
                      WHERE
                          program == "inspinj" AND
                          process_id == ? AND
                          (param == "--userTag" OR param == "-userTag")'''
        crs.execute(sqlquery, (args.simulation_tag, procId))
    else:
        # Else set it
        sqlquery = '''INSERT INTO
                          process_params
                          (program, process_id, param, type, value)
                      VALUES (?, ?, ?, ?, ?)'''
        crs.execute(sqlquery, ("inspinj", procId,
                                          "--userTag", "string",
                                          args.simulation_tag))

cleanup_tables(connection, vacuum=args.vacuum, verbose=args.verbose)

ContentHandler.connection.commit()
ContentHandler.connection.close()

# Move database to final location
dbtables.put_connection_filename(args.output_file, target, verbose=args.verbose)

if args.verbose:
    print >>sys.stderr, "Done."

