#!python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging
import os
import sys
import datetime

import boto
from absl import app
from absl import flags

import bqtools

FLAGS = flags.FLAGS

flags.DEFINE_list('src_project_datasets', [],
                    """A comma separated list of projects and datasets to copy to the destination 
projects and datasets. Each item in list is project followed by a period '.' and then the dataset. 
The list in this argument MUST contain same number as in the dst_project_dataset argument""")

flags.DEFINE_list('dst_project_datasets', [],
                    """A comma separated list of projects and datasets to copy into. These maybe 
in a different region if they are then the src_bucket and dst_bucket MUST be specified.""")
flags.DEFINE_string('src_bucket', None,
                    """If the source set of datasets and region are different to the destination 
                    datasets region data
will be extracted from source dataset into this bucket meaning it MUST be in the same region as 
the source datasets when
copying across regions.""")
flags.DEFINE_string('dst_bucket', None,
                    """If the destination set is different to the source the data extracted into 
src_bucket will rewritten to this destination bucket and then loaded into the destination datasets.""")

flags.DEFINE_bool('copy_data', True,
                  """Default is True bqsync will copy data if set to False will only copy 
schemas""")

flags.DEFINE_bool('copy_views', True,
                  """Default is True bqsync will attempt to adjust views (i.e. change projects 
and dataset names in the views) such they work in the new region. If views cannot be created it 
will skip the view and move to the next logging an error. If set to False views are not recreated.""")

flags.DEFINE_bool('copy_access', True,"""Copy access permissions note this has to interact with 
copy views generally if you are not copying views then only set this if no authorised views. 
Suggest if you do have these run with nocopy_data and set access and then run copying data wit
nocopy_access.""")

flags.DEFINE_bool('remove_deleted_tables', True,
                  """Default is True bqsync will attempt to adjust views (i.e. change projects 
and dataset names in the views) such they work in the new region. If views cannot be created it 
will skip the view and move to the next logging an error. If set to False views are not recreated.""")

flags.DEFINE_integer('check_depth', -1,
                  """The amount of checking that data is identcal
-2 - Check rowcounts only
-1 - Add in hash check on key times i.e. modified, updated,created, cluster fields
 0 - All level 0 root fields
 1 - All fields one below root fields etc.""")
flags.DEFINE_list('table_or_views_to_copy', [".*"],
                    """A comma separated list of regular expressions to match tables names against. 
For example ^a.*,^c.* would copy only views and tables starting wth a and c default is .* 
(copy everythng).""")

flags.DEFINE_list('table_or_views_to_exclude', [],
                    """A comma separated list of regular expressions to match tables names against. 
For example ^a.*,^c.* would exclude views and tables starting wth a and c default is empty list [] 
(exclude nothing).""")

flags.DEFINE_integer('days_before_latest_day', None,
                  """The number of days to check if not set  unlimted. if 7 will check between start date
minus 7 days. If 0 will just check today any negative values below 0 are not allowed""")

flags.DEFINE_string('latest_date', None,
                    """The day to end day partition comparison default is None i.e. end at todays date
latest date MUST be in YYYY-MM-DD format i.e. 2020-12-30""")

flags.DEFINE_bool('do_day_partition_deep_check', False,
                  """If set to True day partition data checks are run (this can be expensive).
default is False this will not do a check unless number of bytes differ and mod time of source > 
destination tables modification time.""")

def main(argv):
    del argv  # Unused.

    # validate pre-conditions
    assert len(FLAGS.src_project_datasets) > 0, "Source project.dataset(s) must be set use " \
                                                   "--help for help"

    assert len(FLAGS.dst_project_datasets) > 0, "Destination project.dataset(s) must be set " \
                                                   "use --help for help"

    assert len(FLAGS.table_or_views_to_copy) > 0, "table_or_view_to_copy MUST have at least " \
                                                  "1 item to filter on"

    assert FLAGS.days_before_latest_day is None or FLAGS.days_before_latest_day >= 0, \
                                           "Days before latest date has to be  0 or greater"

    assert FLAGS.check_depth >= -2, "Check depth for data comparison MUST be > -2 see --help " \
                                   "for details"

    latest_date = None
    if FLAGS.latest_date is not None:
        date_format = '%Y-%m-%d'
        try:
            latest_date = datetime.datetime.strptime(FLAGS.latest_date, date_format)
        except ValueError:
            print("Incorrect data format, {} should be YYYY-MM-DD".format(FLAGS.latest_date))
            exit(-255)

    # deal with if we have proxies, intercept certificates etc
    ca_certificates_file = boto.config.get('Boto', 'ca_certificates_file', "system")

    if ca_certificates_file != "system":
        os.environ["REQUESTS_CA_BUNDLE"] = ca_certificates_file

    proxy_user = boto.config.get('Boto', 'proxy_user', None)
    proxy_host = boto.config.get('Boto', 'proxy', None)
    proxy = ""
    if proxy_user is not None:
        proxy = "http://{}:{}@{}:{}".format(boto.config.get('Boto', 'proxy_user', None),
                                            boto.config.get('Boto', 'proxy_pass', None),
                                            boto.config.get('Boto', 'proxy', None),
                                            boto.config.getint('Boto', 'proxy_port', 0))
    else:
        if proxy_host is not None:
            proxy = "http://{}:{}".format(boto.config.get('Boto', 'proxy', None),
                                          boto.config.getint('Boto', 'proxy_port', 0))
    if proxy != "":
        os.environ["HTTP_PROXY"] = proxy
        os.environ["HTTPS_PROXY"] = proxy

    # convert args to lists
    src_project_datasets_list = FLAGS.src_project_datasets
    dst_project_datasets_list = FLAGS.dst_project_datasets

    # set up sync
    multi_bq_copy = bqtools.MultiBQSyncCoordinator(src_project_datasets_list,
                                                   dst_project_datasets_list,
                                                   FLAGS.src_bucket,
                                                   FLAGS.dst_bucket,
                                                   FLAGS.remove_deleted_tables,
                                                   FLAGS.copy_data,
                                                   FLAGS.copy_views,
                                                   check_depth=FLAGS.check_depth,
                                                   table_view_filter=FLAGS.table_or_views_to_copy,
                                                   table_or_views_to_exclude=FLAGS.table_or_views_to_exclude,
                                                   latest_date=latest_date,
                                                   days_before_latest_day=FLAGS.days_before_latest_day,
                                                   day_partition_deep_check=FLAGS.do_day_partition_deep_check)

    # tweak logging
    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    multi_bq_copy.logger = logging

    # actually do the sync
    multi_bq_copy.sync()

    exitcode = 0

    if multi_bq_copy.tables_failed_sync > 0 or \
            multi_bq_copy.views_failed_sync > 0 or \
            multi_bq_copy.extract_fails > 0 or \
            multi_bq_copy.load_fails > 0 or \
            multi_bq_copy.copy_fails > 0:
        exitcode = -1

    exit(exitcode)


if __name__ == '__main__':
    app.run(main)
