#!/usr/bin/env python

import boto.s3
import sys
import os
import logging

from umobj.utils import umobj_logging, umobj_init, \
    umobj_get_bucket_key_pair_from_string
from umobj.obj import Obj
from umobj.options import umobj_parser, get_logging_level
from umobj.md5 import compute_key_md5, compute_file_md5
from umobj.verify import load_bag_checksums, get_bag_checksumming_algorithm


def compare_bucket_to_directory(bucket, directory, prefix=None):
    if not os.path.isdir(directory):
        logging.error('The directory %s doesn\'t exist' % directory)
        return
    bucket_keys = {}
    for k in bucket.list(prefix=prefix):
        if not k.name.endswith(os.path.sep):
            bucket_keys[k.name] = False

    paths = []
    for dirpath, dirnames, filenames in os.walk(directory):
        for filename in [f for f in filenames]:
            path = os.path.join(dirpath, filename)
            paths.append(os.path.join(dirpath, filename))

    offset = len(directory)
    offset += 1 if directory[-1] != os.path.sep else 0
    for f in paths:
        # format path to look like key
        formatted_path = f[f.index(directory) + offset:]
        if prefix:
            formatted_path = prefix + formatted_path
        file_md5 = compute_file_md5(f)
        key = boto.s3.key.Key(bucket, formatted_path)
        if not key.exists():
            print "The key %s does not exist in the bucket %s" % \
                (formatted_path, bucket.name)
            continue
        key_md5 = compute_key_md5(bucket, formatted_path)
            
        if file_md5 != key_md5:
            print "[Local %s: %s] != [Bucket %s: %s]" \
                % (formatted_path, file_md5, formatted_path, key_md5)
        del bucket_keys[formatted_path]

    for leftover_key in bucket_keys.keys():
        print "The key %s was in bucket %s but not found locally" \
            % (leftover_key, bucket.name)


if __name__ == "__main__":
    umobj_init()

    description = 'List Object(s)'
    parser = umobj_parser(description=description)
    parser.add_s3path(number='+', help='BUCKET:KEY')
    parser.add_verify_bag()
    parser.add_directory()
    args = parser.parse_args()

    umobj_logging(get_logging_level(args))  # set up logging

    logging.info("Running %s" % sys.argv)

    if not Obj.connect(host=args.host,
                       port=args.port,
                       access_key=args.access_key,
                       secret_key=args.secret_key):
        logging.error('Unable to contact object store.')
        sys.exit(1)

    S3PATH = args.S3PATH[0]
    directory = args.directory
    bucket_name, key_name = umobj_get_bucket_key_pair_from_string(S3PATH)

    try:
        bucket = Obj.conn.get_bucket(bucket_name)
    except boto.exception.S3ResponseError as e:
        logging.error("Can not access bucket %s, %s." %
                      (bucket_name, e.error_code))
        sys.exit(1)

    if args.verify_bag:  # comparing/verifying a bagit archive
        intact = True
        logging.debug("Verifying the integrity of bag %s in bucket %s." %
                      (key_name, bucket_name))
        entries = load_bag_checksums(bucket_name, key_name)
        algo = get_bag_checksumming_algorithm(bucket_name, key_name)
        for entry in entries:
            entry_path = '%s/%s' % (key_name, entry)
            expected_md5 = entries[entry][algo]
            actual_md5 = compute_key_md5(bucket, entry_path)
            if expected_md5 != actual_md5:
                logging.warning('key %s differs, actual: %s, expected: %s' %
                                (entry_path, actual_md5, expected_md5))
                intact = False
        if intact:
            logging.info('Successfully verified the integrity of the archive.')
        else:
            logging.error('The integrity of this bag is compromised')
    elif directory:
        if key_name and os.path.sep not in key_name:
            key_name += os.path.sep
        compare_bucket_to_directory(bucket, directory, prefix=key_name)
    else:  # computing md5 for a single key
        md5 = compute_key_md5(bucket, key_name)
        if md5 is not None:
            print md5
