#!/usr/bin/python3
# -*- coding: utf-8 -*-

#  Copyright © 2015  B. Clausius <barcc@gmx.de>
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.


import sys, os
import unicodedata

import polib

try:
    from checkpo_override import version
except ImportError:
    version = 0
if version > 0:
    from checkpo_override import overrides
    
override_filename = os.path.dirname(__file__)+'/checkpo_override.py'

override_defaults = dict(rpat=None, extrachars='', transchars=())

def overrides_to_internal():
    if version == 0:
        if os.path.exists(override_filename):
            raise Exception('{!r} exists, but version is 0'.format(override_filename))
        return []
    if version == 3:
        def to_key(item):
            return item['lang'], item['msgid'], item['msgstr']
        def to_val(item):
            def getitem(key):
                try:
                    return item[key]
                except KeyError:
                    return override_defaults[key]
            return item['opat'], getitem('rpat'), getitem('extrachars'), getitem('transchars'), 'unused'
        return {to_key(item):to_val(item) for item in overrides}
    raise Exception('unknown version {} in {!r}'.format(version, override_filename))
overrides = overrides_to_internal()

def write_overrides(overrides):
    from io import StringIO
    from pprint import pprint, pformat
    def overrides_sort(val):
        lang, linenum, msgid, msgstr, *unused = val
        if isinstance(linenum, int):
            assert type(lang) is str, val
            assert type(linenum) is int, val
            return lang, '', linenum, msgstr
        else:
            assert type(lang) is str, val
            assert type(linenum) is str, val
            return lang, linenum, 0, msgstr
    overrides_gen = ((l,n,mi,ms,op,rp,ec,rc) for (l,mi,ms),(op,rp,ec,rc,n) in overrides.items())
    overrides_ = sorted(overrides_gen, key=overrides_sort)
    out = StringIO()
    
    def piprint(prefix, msg, out):
        msg = pformat(msg, indent=1, width=100)
        msg = ('\n'+' '*(len(prefix)+7)).join(msg.splitlines())
        print('      ' + prefix + '=' + msg + ',', file=out)
    print('version = 3', file=out)
    print('overrides = [', file=out)
    for l,n,mi,ms,op,rp,ec,rc in overrides_:
        print('dict( lang={!r}, linenum={},'.format(l, n), file=out)
        vals = zip(('msgid ','msgstr','opat','rpat','extrachars','transchars'),
                   (mi,      ms,      op,    rp,    ec,          rc))
        vals = ((n,v) for n,v in vals if n not in override_defaults or override_defaults[n] != v)
        for name, val in vals:
            piprint(name, val, out)
        print('    ),', file=out)
    print(']', file=out)
    print(file=out)
    
    out = out.getvalue()
    with open(override_filename, 'wt', encoding='utf-8') as overridefile:
        overridefile.write(out)
        
        
extra_chars = "-,;'/"
trans_chars = ('…...',':.',':;','"«','"»')
extra_chars_lang = {'ast': "¡", 'bn': '/', 'es': '¡´', 'fr': '\xa0’', 'gl': '\u0303', 'he': '\u05be\u05f3\u200f',
                    'ja': '\u30fc\u3001', 'km': '\xa0\u200b', 'lo': '\u0ec6', 'ru': '—', 'uk': '’—', 'uz': '‘ʻ’',
                    'zh_CN': '\uff0c\uff1b', 'zh_TW': '\uff0c\uff1b',}
trans_chars_lang = {'bn': ('.\u0964',),
                    'cs': ('×x',), 'de': ('"„', '"“'), 'fr': ('×x',), 'gl': ('×x',), 'he': ('"„', '"“'),
                    'it': ('–-', '×x'), 'ja': ('!\uff01',), 'km': ('.\u17d4',), 'ms': ('–-', '×x'),
                    'pl': ('×x',), 'ru': ('–—',), 'uk': ('–—',),
                    'zh_CN': (':\uff1a', '!\uff01', '.\u3002', ':\uff0c', '"“', '"”'),
                    'zh_TW': (':\uff1a', '.\u3002', ),
                    }
extra_ucats_lang = {'bn': ('Lo', 'Mc', 'Mn'), 'he': ('Lo',), 'ja': ('Lo',), 'km': ('Lo', 'Mc', 'Mn'),
                    'lo' : ('Lo', 'Mn'), 'te': ('Lo', 'Mc', 'Mn'),
                    'th': ('Lo', 'Mn'), 'zh_CN': ('Lo',), 'zh_TW': ('Lo',),
                    }
#TODO: special checks for msgids like data/applications/pybik.desktop.in.h:4


class MsgidError (Exception):
    def __str__(self):
        return 'pot: {} in {!r}'.format(*self.args)
class MsgError (Exception):
    def __str__(self):
        return '{}: {} in {!r}'.format(*self.args)
        
def ischar(lang, c):
    return unicodedata.category(c) in ['Ll', 'Lu'] or c in extra_chars
    
def ischarsp(lang, c, extrachars):
    if c == ' ':
        return True
    if c in extrachars:
        return True
    if ischar(lang, c):
        return True
    return unicodedata.category(c) in extra_ucats_lang.get(lang, ()) or c in extra_chars_lang.get(lang, '')
    
def extract_pattern(lang, msgid):
    def ep_gen():
        maybebrace = False
        nbraces = 0
        bracestr = ''
        atstart = True
        spaces = ''
        link = ''
        digits = ''
        
        c = None
        imsgid = iter(msgid)
        for c in imsgid:
            if maybebrace:
                maybebrace = False
                if c == '{':
                    yield '{{'
                elif c == '}':
                    yield '{}'
                else:
                    nbraces = 1
                    bracestr = '{' + c
                continue
            elif nbraces:
                if c == '\n': raise MsgidError('newline inside {}', msgid)
                if c == '{':  nbraces += 1
                if c == '}':  nbraces -= 1
                bracestr += c
                if not nbraces:
                    yield bracestr
                    bracestr = ''
                continue
            elif link:
                if c == '\n': raise MsgidError('newline inside <>', msgid)
                link += c
                if c == '>':
                    yield link
                    link = ''
                continue
            elif digits:
                if c in '0123456789':
                    digits += c
                    continue
                yield digits
                digits = ''
            elif spaces:
                if c == ' ':
                    spaces += c
                    continue
                elif atstart and c == '*':
                    spaces += c
                    continue
                elif c == '\n':
                    atstart = True
                    spaces += c
                    continue
                else:
                    if atstart:
                        yield spaces
                        atstart = False
                    spaces = ''
            if c == '\n':
                atstart = True
                spaces = c
                continue
            if c == ' ':
                spaces = c
                continue
            elif c == '{':
                maybebrace = True
            elif c == '<':
                link = c
            elif c in '0123456789':
                digits = c
            elif not ischar(lang, c):
                yield c
            atstart = False
        if maybebrace:  raise MsgidError('unclosed { at end', msgid)
        elif nbraces:   raise MsgidError('unclosed { at end', msgid)
        elif link:      raise MsgidError('unclosed < at end', msgid)
        elif digits:    yield digits
        elif spaces:    yield spaces
    return tuple(ep_gen())
    
def startswith_pattern(lang, string, pattern, replacechars):
    if string.startswith(pattern):
        return pattern
    for co, *cr in replacechars:
        cr = ''.join(cr)
        if co == pattern and string.startswith(cr):
            return cr
    for co, *cr in trans_chars_lang.get(lang, ()):
        cr = ''.join(cr)
        if co == pattern and string.startswith(cr):
            return cr
    for co, *cr in trans_chars:
        cr = ''.join(cr)
        if co == pattern and string.startswith(cr):
            return cr
    return None
    
def match_pattern(lang, entry, patterns, msgstr):
    msgrest = msgstr
    assert type(msgrest) is str, type(msgrest)
    opatterns, rpatterns, extrachars, replacechars, linenum = overrides.get(
                            (lang, entry.msgid, msgstr), (patterns, None, '', (), 'standard'))
    if opatterns != patterns:
        print('{}:error: pattern {} changed to {}'.format(entry.linenum, opatterns, patterns))
        opatterns = patterns
    if linenum == 'unused':
        if (rpatterns is None or opatterns == rpatterns) and not extrachars and replacechars == ():
            del overrides[(lang, entry.msgid, msgstr)]
        else:
            overrides[(lang, entry.msgid, msgstr)] = opatterns, rpatterns, extrachars, replacechars, entry.linenum
    else:
        assert linenum == 'standard' or linenum == entry.linenum, (linenum, entry.linenum)
    if rpatterns is not None:
        patterns = rpatterns
    for i, pattern in enumerate(patterns):
        while msgrest:
            rpattern = startswith_pattern(lang, msgrest, pattern, replacechars)
            if rpattern is not None:
                msgrest = msgrest[len(rpattern):]
                break
            else:
                c = msgrest[0]
                if not ischarsp(lang, c, extrachars):
                    overrides[(lang, entry.msgid, msgstr)] = opatterns, rpatterns, extrachars, replacechars, entry.linenum
                    print('{}:error: found {!r} ({} {}) while searching for pattern[{}] of {!r}'.format(
                            entry.linenum, c, hex(ord(c)), unicodedata.category(c), i, patterns))
                    break
                msgrest = msgrest[1:]
        else:
            rpattern = startswith_pattern(lang, msgrest, pattern, replacechars)
            if rpattern is None:
                overrides[(lang, entry.msgid, msgstr)] = opatterns, rpatterns, extrachars, replacechars, entry.linenum
                print('{}:error: pattern[{}] {!r} not found'.format(entry.linenum, i, pattern))
    while msgrest:
        c = msgrest[0]
        if not ischarsp(lang, c, extrachars):
            overrides[(lang, entry.msgid, msgstr)] = opatterns, rpatterns, extrachars, replacechars, entry.linenum
            print('{}:error: found {!r} ({} {}) while searching for end'.format(
                    entry.linenum, c, hex(ord(c)), unicodedata.category(c)))
            break
        msgrest = msgrest[1:]
        
def check_postring(lang, entry):
    if not entry.translated():
        return
    pattern = extract_pattern(lang, entry.msgid)
    if entry.msgid_plural:
        pattern_plural = extract_pattern(lang, entry.msgid_plural)
        if pattern != pattern_plural:
            raise MsgError(lang, 'msgid != msgid_plural', entry.msgid)
        if entry.msgstr:
            raise MsgError(lang, 'msgstr not empty for plural', entry.msgstr)
        for msgstr in entry.msgstr_plural.values():
            match_pattern(lang, entry, pattern, msgstr)
    else:
        if entry.msgstr_plural:
            raise MsgError(lang, 'msgstr_plural not empty for singular', entry.msgstr_plural)
        match_pattern(lang, entry, pattern, entry.msgstr)
        
        
def usage():
    prog = os.path.basename(__file__)
    print('usage:')
    print(' ', prog, '-h')
    print(' ', prog)
    print('      print errors about:', override_filename)
    print(' ', prog, '-u')
    print('      write/update', override_filename)
    
def main():
    args = sys.argv[1:]
    update = False
    for arg in args:
        if arg == '-h':
            usage()
            return
        if arg == '-u':
            update = True
        else:
            print('unknown argument:', arg)
            usage()
            return
            
    try:
        for filename in sorted(os.listdir('po')):
            if filename.endswith('.po'):
                print('processing', filename)
                for entry in polib.pofile(os.path.join('po', filename)):
                    check_postring(os.path.splitext(filename)[0], entry)
    except PipeError as e:
        print(e)
        return
    if update:
        write_overrides(overrides)
        
        
if __name__ == '__main__':
    main()
    

