#!python

# Copyright (C) 2017 Scott Coughlin
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

import os
from sqlalchemy.engine import create_engine
import pandas as pd
import optparse

__author__ = "Scott Coughlin <scott.coughlin@ligo.org>"
__version__ = 1.0

# =============================================================================
#
#                               DEFINITIONS
#
# =============================================================================

def parse_commandline():
    """@parse the options given on the command-line.
    """
    parser = optparse.OptionParser(usage=__doc__,version=__version__)

    opts, args = parser.parse_args()


    return opts

# =============================================================================
#
#                               Text for Sphinx
#
# =============================================================================
BeginningText = """
.. _DBs:

###########################
The many DBs of Gravity Spy
###########################

This page is generated by ``gravityspyDBs``.

DB overview
===========
Databases for which you have selection privileges is obviously depedent on your
own postgresSQL user privileges. Here we discuss all the possible DBs

For LIGO users please see `Gravity Spy Authentication <https://secrets.ligo.org/secrets/144/>`_ for information concerning authentication to access certain Gravity Spy DBs.
"""


TrainingSetTableSubHeader = """
In order to obtain the training set described here: https://dcc.ligo.org/LIGO-P1700227

There are three ways.

* you can go here: https://ldvw.ligo.caltech.edu/ldvw/gspySearch

    * Under Pipeline select gravityspy and under Status select Training. After this, select csv download. Unfortunately this will only get you the metadata for the training set but will eventually provide that information so I wanted to point it out. It is aesthetically the best way to access Gravity Spy data in general. You can also perform a search and look at the images themselves instead of downloading a csv file.

* Second, using gwpy https://gwpy.github.io/, the preferred detchar software utility.

.. code-block:: bash

   $ pip install gwpy sqlalchemy psycopg2 pandas git+https://github.com/duncanmmacleod/ligo.org.git
   $ ipython

    >>> from gwpy.table import EventTable
    >>> trainingset = EventTable.fetch('gravityspy','trainingset')
    >>> trainingset.download(nproc=8, TrainingSet=1, LabelledSamples=1)



* Third, copying the path laid out here @ LHO cluster
    * `/home/scott.coughlin/public_html/GravitySpy/TrainingSet2/H1L1`. I recommend this only in the short term because it will be deprecated very soon and the preferred way will be using the software above because it is robust, has correctly labelled samples, and will be automatically updated to reflect when the trainingset changes/improves. In this folder there are 22 folders that all contain the images for that class.
"""

classificationsdevTableSubHeader = """
This table contains information about classification performed by users after the addition of the two new classes. Therefore, columns such as `annotations_value_choiceINT` = 3 being a `Blip` is true in this DB but not in classifications. Versioning controls for this type of thing will be implemented in the near future
"""

glitchesTableSubHeader = """
This table contains information about classification performed by users after the addition of the two new classes. Therefore, columns such as `annotations_value_choiceINT` = 3 being a `Blip` is true in this DB but not in classifications. Versioning controls for this type of thing will be implemented in the near future
"""

goldenimagesTableSubHeader = """
This table contains information about which subjects are considered golden and there assosciated label 
"""

userStatusTableSubHeader = """
This table contains information about what level agiven user should be on.
"""

TableSubHeaderDict = {'trainingsetv1d1' : TrainingSetTableSubHeader,
                      'classificationsdev' : classificationsdevTableSubHeader,
                      'glitches' : glitchesTableSubHeader,
                      'goldenimages' : goldenimagesTableSubHeader,
                      'userStatus' : userStatusTableSubHeader,
                     }

# =============================================================================
#
#                               Main
#
# =============================================================================
opts = parse_commandline()

engine = create_engine('postgresql://{0}:{1}@gravityspy.ciera.northwestern.edu:5432/gravityspy'.format(os.environ['GRAVITYSPY_DATABASE_USER'],os.environ['GRAVITYSPY_DATABASE_PASSWD']))

with open('DBs/index.rst', 'w') as f:
    f.write(BeginningText)
    for table in sorted(engine.table_names()):
        if table not in ['goldenimages', 'classificationsdev', 'userStatus', 'glitches', 'trainingsetv1d1']:
            continue

        TableHeader = """
{0}
{1}\n
""".format(table, '=' * len(table))
        tableDF = pd.read_sql("SELECT * FROM \"{0}\" LIMIT 1".format(table), engine)
        f.write(TableHeader)
        f.write(TableSubHeaderDict[table] + '\n')
        for icol in sorted(tableDF.columns):
            idtype = tableDF[icol].dtype
            f.write('* `{0}` : {1}\n'.format(icol, idtype))
            # Find all unique values for strings.
            if icol not in ['uniqueID', 'Filename1', 'Filename2', 'Filename3', 'Filename4', 'imgUrl1', 'imgUrl2', 'imgUrl3', 'imgUrl4']:
                if pd.read_sql("SELECT DISTINCT \"{0}\" FROM \"{1}\"  WHERE \"{0}\" IS NOT NULL LIMIT 1 ".format(icol, table), engine)[icol].dtype == 'object': 
                    uniqueDF = pd.read_sql("SELECT DISTINCT \"{0}\" FROM \"{1}\" WHERE \"{0}\" IS NOT NULL".format(icol, table), engine)
                    # Filter none entries
                    f.write('    * {0}\n'.format(' '.join(sorted(uniqueDF[icol].unique()))))
    f.close()
