#!/usr/bin/python3

#
# TODO: 
# forced anchors (done?) and self-hash anchors
# do not download self-hash file if it's in hashdb or in pre-packing dir
# 
# heuristics?
#
# rehash local files (if found xxx.tar.gz, hash it, store inside archive)
# add .hashget-restore files to archive (not store it in root of archive)
# use rhash for faster
#
#

import argparse
import os
import urllib.parse
import requests
import sys
import time
import logging
import shutil
import tempfile

# from requests.packages.urllib3.util.retry import Retry
# from requests.adapters import HTTPAdapter

import hashget
import hashget.hashdb
import hashget.filepool

from hashget.utils import kmgt
import hashget.utils
import hashget.globlist
from hashget.debian import debsubmit
import hashget.operations

BUF_SIZE = 1024*1024

repo = dict()
log = None


def download_file(url, prefix="/tmp", headers=None):
    headers = headers or dict()

    chunk_size = 1024*1024
    basename = url.split('/')[-1]
    local_filename = os.path.join(prefix, basename)

    r = requests.get(url, stream=True, headers=headers)
    
    if r.status_code != 200:
        return None
        
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=chunk_size): 
            if chunk:  # filter out keep-alive new chunks
                f.write(chunk)

    return local_filename


def deb2url(root, project, basename):
    if basename.startswith('lib'):
        namedir = os.path.join(project, 'name', basename[0:3], basename[3], basename[4])
    else:
        namedir = os.path.join(project, 'name', basename[0], basename[1])

    namepath = os.path.join(namedir, basename)
    project_url = urllib.parse.urljoin(root, project)
    file_url = urllib.parse.urljoin(project_url, namepath)
    return file_url




def get_by_sig(hashdb, sigtype, sig, outdir=None):
    try:
        hp = hashdb.sig2hp(sigtype, sig, remote=True)
    except KeyError:
        log.error("sig {} not found in hashdb".format(sig))
        return

    p = hashget.package.Package(url=hp.url, log=log)
    p.download()
    dst = os.path.join(outdir, p.basename)
    shutil.copy(p.path, dst)
    log.info(dst)
    return dst


def get_by_hashspec(hashdb, hashspec, outdir):
    """
    :param hashdb: hashdb
    :param hashspec: either hashspec (sha256:aabb...) or debsib
    :param outdir:
    :return:
    """

    hplist = hashdb.hash2hp(hashspec)
    if not hplist:
        log.error("{} not found in hashdb".format(hashspec))
        return

    hp = hplist[0]

    p = hashget.package.Package(url=hp.url, log=log)
    p.download()
    if p.hashes.match_hashspec(hashspec):
        dst = os.path.join(outdir, p.basename)
        shutil.copy(p.path, dst)
        log.info(dst)
        return dst

    src = p.hash2path(hashspec)
    if src is None:
        log.error('Not found {} in {}. Try --recursive'.format(hashspec, p.url))
        return
    dst = os.path.join(outdir, os.path.basename(src))
    shutil.copy(src, dst)
    p.cleanup() 
    log.info(dst)

def main():

    global log

    def_anchsz = 100*1024
    def_filesz = 1024
    def_hashserver = list(('https://hashdb.okerr.com/hashdb/',))
    def_project = None
    def_excludefile = None
#    def_excludefile = os.path.expanduser("~/hashget-exclude")
    def_skip = []
    def_exclude_dirs = []
    def_exclude_files = []
    def_sleep = 2
    def_outdir = '.'
    def_target = 'auto'
    def_heuristics = ['all']
    def_hashdb = ['all']
    def_pool = os.getenv('HASHGET_POOL', None)

    # default path
    if os.getuid() == 0:
        # root
        def_hashdb_path = '/var/cache/hashget/hashdb'
    else:
        # usual user
        def_hashdb_path = os.path.expanduser("~/.hashget/hashdb")

    parser = argparse.ArgumentParser(description='HashGet ver {} deduplication and compression tool'.
                                     format(hashget.__version__))

    g = parser.add_argument_group('Packing/unpacking')
    g.add_argument('--pack', default=None, metavar='DIR',
                   help='Make .tar.gz of dir into -f file (set of --index, --prepack and then tar -czf .. -X)')
    g.add_argument('--prepack', default=None, metavar='DIR', help='prepare DIR for hash-tarring')
    g.add_argument('--postunpack', '-u', default=None, metavar='DIR', help='post-unpack')

    g = parser.add_argument_group('Fetching packages and files')
    g.add_argument('--get', default=None, metavar='HASHSPEC', help='get file by hash')
    g.add_argument('--fetch', default=None, help='fetch .deb file by basename or hash specification (sha256:aabbcc...)')

    g = parser.add_argument_group('Local HashDB commands')
    g.add_argument('--submit', default=None, metavar='URL', help='submit URL to --project')
    g.add_argument('--index', default=None, metavar='DIR', help='index files in DIR')
    g.add_argument('--debsubmit', default=None, metavar='package.deb', help='submit local .deb file')

    g = parser.add_argument_group('Indexing options')
    g.add_argument('--anchsz', type=int, default=def_anchsz, help='min size of anchors ({})'.format(def_anchsz))
    g.add_argument('--filesz', type=int, default=def_filesz, help='min size of files ({})'.format(def_filesz))
    g.add_argument('--expires', default=None, type=hashget.utils.str2dt, help='expiration date YYYY-MM-DD')
    g.add_argument('--project', '-p', default=def_project, help='project name ({})'.format(def_project))
    g.add_argument('--sleep', type=int, default=def_sleep, help='delay ({}s)'.format(def_sleep))
    g.add_argument('--fanchor', nargs='+', metavar='FILEGLOB', default=list(),
                   help='forced anchor glob, e.g. --fanchor ".*Makefile" etc/passwd')
    g.add_argument('--heuristics', nargs='*', metavar='HEURISTIC NAME', default=def_heuristics,
                   help='list of heuristics')

    g = parser.add_argument_group('Other options')
    g.add_argument('--hashserver', nargs='*', default=def_hashserver, help='hashserver URL ({})'.format(def_hashserver))
    g.add_argument('--hashdb_path', default=def_hashdb_path, help='path to local hashdb directory ({})'.format(def_hashdb_path))
    g.add_argument('--hashdb', default=def_hashdb, nargs='*', metavar='PROJECT',
                   help='Use only these hashdb. {}'.format(def_hashdb))
    g.add_argument('-X', '--exclude-from', metavar='FILENAME', dest='excludefile', default=def_excludefile,
                   help='Exclude file (for -X tar option) ({})'.format(def_excludefile))
    g.add_argument('--etag', default=False, action='store_true', help='verify HTTP E-Tag when reuse cached files')
    g.add_argument('--outdir', '-o', default=def_outdir, help='dir where to store --get files ({})'.format(def_outdir))
    g.add_argument('--file', '-f', default=None, help='filename (for --submit and --pack)')
    g.add_argument('--user', default=False, action='store_true',
                   help='user mode (for --postunpack). do not set owner for files')
    g.add_argument('--pull', dest='pull', default=False, action='store_true',
                   help='pull anchors from hashserver (slower)')
    g.add_argument('--recursive', default=False, action='store_true', help='recursive unpacking (slower).')
    g.add_argument('-z', default=False, action='store_true', help='gzip tarred archive')
    g.add_argument('--sig', nargs='+', default=list(), metavar='SIG', help='Signature for --submit. format: SIGTYPE:SIGNATURE')
    g.add_argument('--pool', default=def_pool, metavar='PATH', help='Pool with package files ({})'.format(def_pool))


    g = parser.add_argument_group('Target directory specification')
    #g.add_argument('--target', '-t', default=def_target,
    #               help='Target specification (files/debian/auto) ({})'.format(def_target))
    g.add_argument('--exclude', nargs='+', metavar='PATH', default=def_exclude_dirs,
                   help='Do not include anything under PATH. Relative to --prepare/--pack')
    #g.add_argument('--exclude-file', nargs='+', metavar='PATH', default=def_exclude_files,
    #               help='Same as --exclude, but keeps all directories')
    g.add_argument('--skip', nargs='*', default=def_skip,
                   help='Do not try to deduplicate these files/dirs ({}). Relative to --prepare/--pack."'
                        '" (will be included into archive as-is, saves time.)'.format(def_skip))


    g = parser.add_argument_group('Logging, output')
    g.add_argument('--logfile', default=None, help='log file name')
    g.add_argument('--version', default=False, action='store_true', help='just print version ({})'.format(hashget.__version__))
    g.add_argument('-v', dest='verbose', default=False, action='store_true', help='verbose mode')
    g.add_argument('-q', dest='quiet', default=False, action='store_true', help='quiet mode')

    args = parser.parse_args()

    if args.version:
        print('HashGet version {}'.format(hashget.__version__))
        sys.exit(0)

    # configure logging
    if args.verbose:
        loglevel = logging.DEBUG
    else:
        loglevel = logging.INFO
    if args.quiet:
        loglevel = logging.ERROR
    log = logging.getLogger('hashget')
    log.setLevel(loglevel)
    logstdout = logging.StreamHandler(stream=sys.stderr)
    logstdout.setFormatter(logging.Formatter('%(message)s', '%Y-%m-%d %H:%M:%S'))
    log.addHandler(logstdout)
    if args.logfile:
        fh = logging.FileHandler(args.logfile)
        fh.setFormatter(logging.Formatter('%(asctime)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
        log.addHandler(fh)

    if(args.pool):
        if args.pool.startswith('http://') or args.pool.startswith('https://'):
            pool = hashget.filepool.HttpFilePool(url=args.pool)
        else:
            pool = hashget.filepool.DirFilePool(path=args.pool)
    else:
        pool = hashget.filepool.NullFilePool()

    hashdb = hashget.hashdb.HashDBClient(path=args.hashdb_path, enabled_hashdb=args.hashdb)

    anchors = hashget.AnchorList(args.anchsz)
    for fa in args.fanchor:
        anchors.add_fanchor(fa)

    for url in args.hashserver:
        hashdb.add_hashserver(url)

    hashget.cacheget.opt_verify_etag = args.etag

    # Options
    hashget.package.opt_recursive = args.recursive

    if args.index:
        hashget.operations.index(
            hashdb=hashdb,
            root=args.index,
            anchors=anchors,
            filesz=args.filesz,
            heuristics=args.heuristics,
            pool=pool,
            pull=args.pull)

    if args.prepack:

        path = args.prepack
        gl = hashget.globlist.GlobList(root=path)
        for skip in args.skip + args.exclude:
            gl.add_relpath(skip)

        if args.excludefile is None:
            excludefile = tempfile.mktemp(prefix='hashget-exclude-')
            log.info('exclude file: -X {}'.format(excludefile))
        else:
            excludefile = args.excludefile

        rfile = hashget.operations.prepare(path,
                hashdb=hashdb,
                anchors=anchors,
                filesz=args.filesz,
                skip=gl,
                excludefile=excludefile,
                expires=args.expires
                )
        print("Saved:", rfile)

    if args.postunpack:
        hashget.operations.postunpack(
            args.postunpack, usermode=args.user, recursive=args.recursive, pool=pool)

    if args.get:
        if args.get.startswith('sha256:'):
            get_by_hashspec(hashdb, args.get, args.outdir)
        else:
            if args.get.endswith('.deb'):
                sig = args.get[:-4]
            else:
                sig = args.get
            get_by_sig(hashdb, 'deb', sig, args.outdir)

    if args.submit:
        signatures = dict()
        if args.sig:
            for sigspec in args.sig:
                sigtype, signature = sigspec.split(':', 1)
                signatures[sigtype] = signature

        if not args.project:
            log.error('need --project when --submit')
            sys.exit(1)

        hashget.submiturl.submit_url(
            hashdb=hashdb,
            url=args.submit,
            file=args.file,
            project=args.project,
            anchors=anchors,
            signatures=signatures,
            filesz=args.filesz,
            pool=pool,
            expires=hashget.utils.str2dt(args.expires))

    if args.debsubmit:
        debsubmit(hashdb, args.debsubmit, anchors)

    if args.pack:
        hashget.operations.pack(
            hashdb=hashdb,
            root=args.pack,
            file=args.file,
            zip = args.z,
            exclude=args.exclude,
            skip=args.skip,
            anchors=anchors,
            filesz=args.filesz,
            heuristics=args.heuristics,
            pool=pool,
            pull=args.pull,
            expires=args.expires)

if __name__ == '__main__':
    main()
