#!/usr/bin/env python
# Copyright (c) 2022 Matthew Wall
"""Utility for managing translated strings in skins."""

# FIXME: improve the dotted notation to find pgettext instances

import sys
import glob
import os
from optparse import OptionParser
import re
import configobj

__version__ = '0.2'

usagestr = """Usage: i18n-report --skin=PATH_TO_SKIN [--help] [--version]

Examples:
  i18n-report --skin=PATH_TO_SKIN
  i18n-report --skin=PATH_TO_SKIN --action translations --lang=XX
  i18n-report --skin=PATH_TO_SKIN --action strings

  Utility to manage translated strings in WeeWX skins."""

def main():
    parser = OptionParser(usage=usagestr)
    parser.add_option("--version", action="store_true", dest="version",
                      help="Display version then exit")
    parser.add_option("--skin", dest="skin_dir", type=str, metavar="SKIN",
                      help="Specify the path to the desired skin")
    parser.add_option("--lang", type=str, metavar="LANGUAGE",
                      help="Specify two-letter language identifier")
    parser.add_option("--action", type=str, metavar="ACTION", default='all',
                      help="Options include: all, languages, translations, strings")

    options, args = parser.parse_args()

    if options.version:
        print(__version__)
        sys.exit(0)

    # figure out which skin we should look at
    skin_dir = options.skin_dir
    if not skin_dir:
        print("No skin specified")
        sys.exit(1)
    while skin_dir.endswith('/'):
        skin_dir = skin_dir.rstrip('/')
    if not os.path.isdir(skin_dir):
        print("No skin found at %s" % skin_dir)
        sys.exit(1)
    print("i18n translation report for skin: %s" % skin_dir)

    # check for the skin config file
    skin_conf = "%s/skin.conf" % skin_dir
    if not os.path.isfile(skin_conf):
        print("No skin configuration found at %s" % skin_conf)
        sys.exit(0)

    # check for the lang files in the specified skin
    lang_dir = "%s/lang" % skin_dir
    if not os.path.isdir(lang_dir):
        print("No language directory found at %s" % lang_dir)
        sys.exit(0)
    en_conf = "%s/en.conf" % lang_dir
    if not os.path.isfile(en_conf):
        print("No en.conf found at %s" % en_conf)
        sys.exit(0)

    action = options.action

    # list all the lang files that we find
    if action == 'all' or action == 'languages':
        confs = glob.glob("%s/*.conf" % lang_dir)
        print("language files found:")
        for f in confs:
            print("  %s" % f)

    # report any untranslated strings.  if a lang was specified, only report
    # about that language.  otherwise report on every language that is found.
    if action == 'all' or action == 'translations':
        lang = options.lang
        confs = []
        if lang:
            confs.append("%s/%s.conf" % (lang_dir, lang))
        else:
            confs = glob.glob("%s/*.conf" % lang_dir)
        results = dict()
        for f in confs:
            if f == en_conf:
                continue
            a, b = compare_files(en_conf, f)
            if a or b:
                results[f] = dict()
                if a:
                    results[f]["found only in %s:" % en_conf] = a
                if b:
                    results[f]["found only in %s:" % f] = b
        prettyp(results)

    # report any mismatched strings, i.e., strings that are used in the skin
    # but not enumerated in the base language file en.conf.  the strings in
    # the skin could come from gettext() invocations or plot labels.
    #
    # TODO: report *all* strings, not just those in gettext - this might not
    #       be feasible, since html/xml and other formats might use strings
    #       that should not be part of translation.  of course if we find
    #       strings within xml delimiters we could ignore those...
    if action == 'all' or action == 'strings':
        known_strings = read_texts(en_conf)
        ext_list = ['tmpl', 'inc']
        str_list = set()
        for x in get_gettext_strings(skin_dir, ext_list):
            str_list.add(x)
        for x in read_image_labels(skin_conf):
            str_list.add(x)
        unused = set() # strings in en.conf that are not in any skin files
        unlisted = set() # strings in skin files that are not in en.conf
        for x in str_list:
            if not x in known_strings:
                unlisted.add(x)
        for x in known_strings:
            if not x in str_list:
                unused.add(x)
        if unused:
            print("strings in en.conf but not found in the skin:")
            for x in unused:
                print("  %s" % x)
        if unlisted:
            print("strings in skin but not found in en.conf:")
            for x in unlisted:
                print("  %s" % x)

    sys.exit(0)


def compare_files(fn1, fn2):
    """Print discrepancies between two files."""
    cfg_dict1 = configobj.ConfigObj(fn1, file_error=True,
                                    encoding='utf-8', default_encoding='utf-8')
    cfg_dict2 = configobj.ConfigObj(fn2, file_error=True,
                                    encoding='utf-8', default_encoding='utf-8')
    a_only = dict() # {label: a_val, ...}
    b_only = dict() # {label: b_val, ...}
    diffs = dict() # {label: (a_val, b_val), ...}
    compare_dicts('', cfg_dict1, cfg_dict2, a_only, b_only, diffs)
    return (a_only, b_only)

def compare_dicts(section_name, a, b, a_only, b_only, diffs):
    for x in a.sections:
        label = "%s.%s" % (section_name, x) if section_name else x
        compare_dicts(label, a[x], b.get(x), a_only, b_only, diffs)

    found = []
    for x in a.scalars:
        label = "%s.%s" % (section_name, x) if section_name else x
        if x in b:
            found.append(x)
            if a[x] != b[x]:
                diffs[label] = (a[x], b[x])
        else:
            a_only[label] = a[x]

    for x in b.scalars:
        if x not in found:
            label = "%s.%s" % (section_name, x) if section_name else x
            b_only[label] = b[x]

def prettyp(d, indent=0):
    for key, value in d.items():
        if isinstance(value, dict):
            print('  ' * indent + str(key))
            prettyp(value, indent+1)
        else:
            print('  ' * indent + "%s=%s" % (key, value))

def get_gettext_strings(dir_name, ext_list, string_list=set()):
    """get all the gettext strings from a skin"""
    for f in os.listdir(dir_name):
        fn = os.path.join(dir_name, f)
        if os.path.isfile(fn):
            found = False
            for e in ext_list:
                if f.endswith(".%s" % e):
                    found = True
            if found:
                with open(fn) as file:
                    for line in file:
                        for m in re.findall(r'\$pgettext\(\s*\"([^\"]*)\",\s*\"([^\"]*)\"\s*\)', line):
                            string_list.add(m[1])
                        for m in re.findall(r'\$gettext\(\s*[\'\"]([^\)]*)[\'\"]\s*\)', line):
                            string_list.add(m)
        elif os.path.isdir(fn):
            get_gettext_strings(fn, ext_list, string_list)

    return string_list

def read_texts(fn):
    """
    return set of strings from Texts section.

    NOT IMPLEMENTED:
    return set of strings from Texts section.  format using dotted notation
    e.g., Text.name = value, or Text.Specialization.name = value
    """
    cfg_dict = configobj.ConfigObj(fn, file_error=True,
                                   encoding='utf-8', default_encoding='utf-8')
    texts = cfg_dict.get('Texts', {})
    str_list = texts.scalars
    for x in texts.sections:
        [str_list.append(s) for s in texts[x].scalars]
    return str_list

#    texts = cfg_dict.get('Texts', {})
#    str_list = ['Texts.%s' % s for s in texts.scalars]
#    for x in texts.sections:
#        [str_list.append("Texts.%s.%s" % (x, s)) for s in texts[x].scalars]
#    return str_list

def read_image_labels(fn):
    """return set of strings that are labels for plots in the imagegenerator"""
    cfg_dict = configobj.ConfigObj(fn, file_error=True,
                                   encoding='utf-8', default_encoding='utf-8')
    imggen_dict = cfg_dict.get('ImageGenerator', {})
    # FIXME: this assumes that the images will be defined using the standard
    # pattern for images.  anything other than xxx_images will not be found.
    str_list = []
    for period in ['day', 'week', 'month', 'year']:
        plot_dicts = imggen_dict.get("%s_images" % period, {})
        if not plot_dicts:
            continue
        for plot_name in plot_dicts.sections:
            for series in plot_dicts[plot_name].sections:
                if 'label' in plot_dicts[plot_name][series].scalars:
                    label = plot_dicts[plot_name][series]['label']
                    str_list.append(label)
    return str_list

main()
