#!/usr/bin/env python
#
# Copyright (C) 2011--2013 Kipp Cannon, Chad Hanna, Drew Keppel
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

"""
Compute FAR and FAP distributions from the likelihood CCDFs.
"""

#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#

import pycbc
import pycbc.version
__author__  = "Chad Hanna <chad.hanna@ligo.org>, Kipp Cannon <kipp.cannon@ligo.org>, Ian Harry <ian.harry@ligo.org>"
__version__ = pycbc.version.git_verbose_msg
__date__    = pycbc.version.date
__program__ = "pycbc_compute_far_from_snr_chisq_histograms"

import argparse
import sqlite3
sqlite3.enable_callback_tracebacks(True)
import sys


from glue.ligolw import ligolw
from glue.ligolw import dbtables
from glue.ligolw import lsctables
from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import process as ligolw_process
from glue.ligolw.utils import search_summary as ligolw_search_summary
from pylal import ligolw_thinca


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


def parse_command_line():
    """
    Parse the command line, return options and check for consistency among the
    options.
    """
    _desc = __doc__[1:]
    parser = argparse.ArgumentParser(description=_desc)
    parser.add_argument('--version', action='version', version=__version__)
    parser.add_argument("--background-bins-file", metavar="filename",
                        required=True,
                        help="""Set the name of the xml file containing the
                                marginalized likelihood (required).""")
    parser.add_argument("--tmp-space", metavar="dir",
                help = "Set the name of the tmp space if working with sqlite.")
    parser.add_argument("--non-injection-db", metavar="filename", required=True,
                        help="""Provide the name of a database from a
                                non-injection run.""")
    parser.add_argument("--input-database", metavar="filename", default=None,
                      help="""Provide the name of a database for which to
                              compute FARs from.
                              Databases are assumed to be over the same
                              time period as the non injection database using
                              the same templates. If not the results will be
                              nonsense. If not given, will default to the
                              non-injection-db""")
    parser.add_argument("--output-database", metavar="filename",
                        required=True,
                        help="""Provide the name of the file to which to write
                                the database with FAR values calculated.""")
    parser.add_argument("--background-bins-out-file", metavar="filename",
                        default=None,
                        help="""If given, this is where the parameter and
                                ranking statistic distribution xml file with
                                zero-lag counts replaced with
                                count-above-threshold will be written""")
    parser.add_argument("--verbose", "-v", action="store_true",
                        help="Be verbose.")
    args = parser.parse_args()

    if not args.input_database:
        args.input_database = args.non_injection_db

    return args


#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#


#
# Parse command line
#


args = parse_command_line()

from gstlal import far


#
# Retrieve distribution data
#


coinc_params_distributions, ranking_data, seglists = far.parse_likelihood_control_doc(ligolw_utils.load_filename(args.background_bins_file, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = args.verbose))
if coinc_params_distributions is None:
    raise ValueError("\"%s\" does not contain event parameter PDFs" % args.background_bins_file)
if ranking_data is None:
    raise ValueError("\"%s\" does not contain likelihood ratio PDFs" % args.background_bins_file)
# FIXME:  need to do this to get the combined PDFs populated.  the XML file
# contains both bin counts and PDFs for all instrument sets but none for
# the "combined" set.  the bin counts for the combined set are populated by
# the .from_xml() method but the PDFs are built in the .finish() method.
# because all the other PDFs come out of the file we normally would not
# need to invoke the .finish() method here at all.  look into getting the
# combined PDFs built by the .from_xml() method as well.  NOTE: that method
# can't just call .finish() itself because that's a huge waste of time when
# many files need to be read and summed.
ranking_data.finish(verbose = args.verbose)

#
# Count the number of above-threshold events
#

if args.verbose:
    print >>sys.stderr, "beginning count of above-threshold events"

count_above_threshold = far.CountAboveThreshold()

#
# get working copy of database.
#

working_filename = dbtables.get_connection_filename(args.non_injection_db, tmp_path=args.tmp_space, verbose=args.verbose)
connection = sqlite3.connect(working_filename)

#
# update counts
#

xmldoc = dbtables.get_xml(connection)
coinc_def_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(ligolw_thinca.InspiralCoincDef.search, ligolw_thinca.InspiralCoincDef.search_coinc_type, create_new = False)
xmldoc.unlink()
count_above_threshold.update(connection, coinc_def_id, ranking_data.likelihood_ratio_threshold)

#
# done
#

connection.close()
dbtables.discard_connection_filename(args.non_injection_db, working_filename, verbose = args.verbose)

coinc_params_distributions.count_above_threshold = count_above_threshold
# make sure instrument combination probabilities reflect new counts
coinc_params_distributions.finish(verbose = args.verbose)

if args.verbose:
	print >>sys.stderr, "number of above-threshold events:  %s" % ", ".join("%s=%d" % (",".join(sorted(instruments)), count) for instruments, count in count_above_threshold.items())


#
# Initialize the FAP & FAR assignment machine
#


fapfar = far.FAPFAR(ranking_data.background_likelihood_pdfs, coinc_params_distributions.count_above_threshold, threshold = ranking_data.likelihood_ratio_threshold, livetime = far.get_live_time(seglists))


#
# Iterate over databases
#


if args.verbose:
	print >>sys.stderr, "assigning FAPs and FARs"

#
# get working copy of database
#

working_filename = dbtables.get_connection_filename(args.input_database, tmp_path=args.tmp_space, verbose=args.verbose)
connection = sqlite3.connect(working_filename)

#
# record our passage
#

xmldoc = dbtables.get_xml(connection)
process = ligolw_process.register_to_xmldoc(xmldoc, u"pycbc_compute_far_from_snr_chisq_histograms", {})

#
# assign FAPs and FARs
#

fapfar.assign_faps(connection)
fapfar.assign_fars(connection)

#
# done, save file
#

ligolw_process.set_process_end_time(process)
connection.cursor().execute("UPDATE process SET end_time = ? WHERE process_id == ?", (process.end_time, process.process_id))

connection.commit()
connection.close()
dbtables.put_connection_filename(args.output_database, working_filename, verbose = args.verbose)

if args.verbose:
	print >>sys.stderr, "FAP and FAR assignment complete"


#
# Rewrite parameter and ranking statistic distribution file but with
# zero-lag counts replaced with count-above-threshold.
#

if args.background_bins_out_file:
    xmldoc = ligolw.Document()
    xmldoc.appendChild(ligolw.LIGO_LW())
    process = ligolw_process.register_to_xmldoc(xmldoc, u"pycbc_compute_far_from_snr_chisq_histograms", {})
    search_summary = ligolw_search_summary.append_search_summary(xmldoc, process, ifos = seglists.keys(), inseg = seglists.extent_all(), outseg = seglists.extent_all())
    far.gen_likelihood_control_doc(xmldoc, process, coinc_params_distributions, ranking_data, seglists)
    ligolw_process.set_process_end_time(process)

    ligolw_utils.write_filename(xmldoc, args.background_bins_out_file, gz = args.background_bins_out_file.endswith(".gz"), verbose = args.verbose)

if args.verbose:
	print >>sys.stderr, "done"
