413 lines
16 KiB
Python
Executable File
413 lines
16 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# Version 1.0.2 - Build 43
|
|
#############################################################################
|
|
# Change log:
|
|
# 7 May 2019, ondratu , Initial
|
|
# 13 June 2019, 3d-gussner, Fix length false positives
|
|
# 14 Sep. 2019, 3d-gussner, Prepare adding new language
|
|
# 18 Sep. 2020, 3d-gussner, Fix execution of lang-check.py
|
|
# 2 Apr. 2021, 3d-gussner, Fix and improve text warp
|
|
# 22 Apr. 2021, DRracer , add English source to output
|
|
# 23 Apr. 2021, wavexx , improve
|
|
# 24 Apr. 2021, wavexx , improve
|
|
# 26 Apr. 2021, wavexx , add character ruler
|
|
# 21 Dec. 2021, 3d-gussner, Prepare more community languages
|
|
# Swedish
|
|
# Danish
|
|
# Slovanian
|
|
# Hungarian
|
|
# Luxembourgian
|
|
# Croatian
|
|
# 3 Jan. 2022, 3d-gussner, Prepare Lithuanian
|
|
# 7 Jan. 2022, 3d-gussner, Check for Syntax errors and exit with error
|
|
# , add Build number 'git rev-list --count HEAD lang-check.py'
|
|
# 30 Jan. 2022, 3d-gussner, Add arguments. Requested by @AttilaSVK
|
|
# --information == output all source and translated messages
|
|
# --import-check == used by `lang-import.sh`to verify
|
|
# newly import `lang_en_??.txt` files
|
|
# 14 Mar. 2022, 3d-gussner, Check if translation isn't equal to origin
|
|
#############################################################################
|
|
|
|
"""Check PO files for formatting errors."""
|
|
from argparse import ArgumentParser
|
|
from sys import stdout, stderr, exit
|
|
import codecs
|
|
import polib
|
|
import textwrap
|
|
import re
|
|
import os
|
|
|
|
from lib import charset as cs
|
|
from lib.io import load_map
|
|
import enum
|
|
|
|
COLORIZE = (stdout.isatty() and os.getenv("TERM", "dumb") != "dumb") or os.getenv('NO_COLOR') == "0"
|
|
LCD_WIDTH = 20
|
|
|
|
GH_ANNOTATIONS = os.getenv('GH_ANNOTATIONS') == "1"
|
|
CURRENT_PO = "Unknown file"
|
|
GH_ERR_COUNT = 0
|
|
|
|
class AN_TYPE(enum.Enum):
|
|
|
|
def __new__(cls, *args, **kwds):
|
|
value = len(cls.__members__) + 1
|
|
obj = object.__new__(cls)
|
|
obj._value_ = value
|
|
return obj
|
|
def __init__(self, a, b):
|
|
self.prefix = a
|
|
self.print_fmt = b
|
|
|
|
ERROR = "error", "[E]"
|
|
WARNING = "warning", "[W]"
|
|
NOTICE = "notice", "[S]"
|
|
|
|
def color_maybe(color_attr, text):
|
|
if COLORIZE:
|
|
return '\033[0;' + str(color_attr) + 'm' + text + '\033[0m'
|
|
else:
|
|
return text
|
|
|
|
red = lambda text: color_maybe(31, text)
|
|
green = lambda text: color_maybe(32, text)
|
|
yellow = lambda text: color_maybe(33, text)
|
|
cyan = lambda text: color_maybe(36, text)
|
|
|
|
|
|
def print_wrapped(wrapped_text, rows, cols):
|
|
if type(wrapped_text) == str:
|
|
wrapped_text = [wrapped_text]
|
|
for r, line in enumerate(wrapped_text):
|
|
r_ = str(r + 1).rjust(3)
|
|
if r >= rows:
|
|
r_ = red(r_)
|
|
print((' {} |{:' + str(cols) + 's}|').format(r_, line))
|
|
|
|
def print_truncated(text, cols):
|
|
if len(text) <= cols:
|
|
prefix = text.ljust(cols)
|
|
suffix = ''
|
|
else:
|
|
prefix = text[0:cols]
|
|
suffix = red(text[cols:])
|
|
print(' |' + prefix + '|' + suffix)
|
|
|
|
def print_ruler(spc, cols):
|
|
print(' ' * spc + cyan(('₀₁₂₃₄₅₆₇₈₉'*4)[:cols]))
|
|
|
|
def print_source_translation(source, translation, wrapped_source, wrapped_translation, rows, cols):
|
|
if rows == 1:
|
|
print(' source text:')
|
|
print_ruler(4, cols);
|
|
print_truncated(source, cols)
|
|
print(' translated text:')
|
|
print_ruler(4, cols);
|
|
print_truncated(translation, cols)
|
|
else:
|
|
print(' source text:')
|
|
print_ruler(6, cols);
|
|
print_wrapped(wrapped_source, rows, cols)
|
|
print(' translated text:')
|
|
print_ruler(6, cols);
|
|
print_wrapped(wrapped_translation, rows, cols)
|
|
print()
|
|
|
|
def highlight_trailing_white(text):
|
|
if type(text) == str:
|
|
return re.sub(r' $', '·', text)
|
|
else:
|
|
ret = text[:]
|
|
ret[-1] = highlight_trailing_white(ret[-1])
|
|
return ret
|
|
|
|
def wrap_text(text, cols):
|
|
ret = []
|
|
for line in text.split('\n'):
|
|
# wrap each input line in text individually
|
|
tmp = list(textwrap.TextWrapper(width=cols).wrap(line))
|
|
if len(ret):
|
|
# add back trailing whitespace
|
|
tmp[-1] += ' ' * (len(text) - len(text.rstrip()))
|
|
ret.extend(tmp)
|
|
return ret
|
|
|
|
def ign_char_first(c):
|
|
return c.isalnum() or c in {'%', '?'}
|
|
|
|
def ign_char_last(c):
|
|
return c.isalnum() or c in {'.', "'"}
|
|
|
|
# Print_anyway is used to reduce code copypasta.
|
|
# specifically, if we have all the info here to construct the "normal" message as well, it's done here
|
|
|
|
def gh_annotate(an_type, start_line, message, end_line = None, print_anyway = False):
|
|
if not GH_ANNOTATIONS:
|
|
if print_anyway:
|
|
if end_line is not None:
|
|
line_text = "lines {}-{}".format(start_line, end_line)
|
|
else:
|
|
line_text = "line {}".format(start_line)
|
|
message_simple = "{} on {}".format(message, line_text)
|
|
if an_type == AN_TYPE.ERROR:
|
|
print(red("{}: {}".format(an_type.print_fmt, message_simple)))
|
|
else:
|
|
print(yellow("{}: {}".format(an_type.print_fmt, message_simple)))
|
|
return
|
|
if end_line is not None:
|
|
line_info = "line={},endLine={}".format(start_line,end_line)
|
|
else:
|
|
line_info = "line={}".format(start_line)
|
|
|
|
print("::{} file={},{}::{}".format(an_type.prefix, CURRENT_PO, line_info, message))
|
|
if an_type == AN_TYPE.ERROR:
|
|
global GH_ERR_COUNT
|
|
GH_ERR_COUNT += 1
|
|
|
|
|
|
def check_translation(entry, msgids, is_pot, no_warning, no_suggest, warn_empty, warn_same, information, shorter):
|
|
"""Check strings to display definition."""
|
|
|
|
# do not check obsolete/deleted entriees
|
|
if entry.obsolete:
|
|
return True
|
|
|
|
# fetch/decode entry for easy access
|
|
meta = entry.comment.split('\n', 1)[0]
|
|
source = entry.msgid
|
|
translation = entry.msgstr
|
|
line = entry.linenum
|
|
known_msgid = msgids is None or source in msgids
|
|
errors = 0
|
|
|
|
# Check comment syntax (non-empty and include a MSG id)
|
|
if known_msgid or warn_empty:
|
|
if len(meta) == 0:
|
|
gh_annotate(AN_TYPE.ERROR, line, "Translation missing comment metadata", None, True)
|
|
return False
|
|
if not meta.startswith('MSG'):
|
|
gh_annotate(AN_TYPE.ERROR, line, "Critical Syntax Error: comment doesn't start with MSG", None, True)
|
|
print(red(" comment: " + meta))
|
|
return False
|
|
|
|
# Check if columns and rows are defined
|
|
tokens = meta.split(' ')
|
|
cols = None
|
|
rows = None
|
|
for item in tokens[1:]:
|
|
try:
|
|
key, val = item.split('=')
|
|
if key == 'c':
|
|
cols = int(val)
|
|
elif key == 'r':
|
|
rows = int(val)
|
|
else:
|
|
raise ValueError
|
|
except ValueError:
|
|
gh_annotate(AN_TYPE.ERROR, line, "Invalid display definition", None, True)
|
|
print(red(" definition: " + meta))
|
|
return False
|
|
|
|
if not cols:
|
|
if not no_warning and known_msgid and not rows:
|
|
errors += 1
|
|
gh_annotate(AN_TYPE.WARNING, line, "No usable display definition", None, True)
|
|
# probably fullscreen, guess from the message length to continue checking
|
|
cols = LCD_WIDTH
|
|
if cols > LCD_WIDTH:
|
|
errors += 1
|
|
gh_annotate(AN_TYPE.WARNING, line, "Invalid column count", None, True)
|
|
if not rows:
|
|
rows = 1
|
|
elif rows > 1 and cols != LCD_WIDTH:
|
|
errors += 1
|
|
gh_annotate(AN_TYPE.WARNING, line, "Multiple rows with odd number of columns", None, True)
|
|
|
|
# Check if translation contains unsupported characters
|
|
invalid_char = cs.translation_check(cs.unicode_to_source(translation))
|
|
if invalid_char is not None:
|
|
gh_annotate(AN_TYPE.ERROR, line, "Critical syntax: Unhandled char %s found".format(repr(invalid_char)), None, True )
|
|
print(red(' translation: ' + translation))
|
|
return False
|
|
|
|
# Pre-process the translation to translated characters for a correct preview and length check
|
|
translation = cs.trans_replace(translation)
|
|
|
|
wrapped_source = wrap_text(source, cols)
|
|
rows_count_source = len(wrapped_source)
|
|
wrapped_translation = wrap_text(translation, cols)
|
|
rows_count_translation = len(wrapped_translation)
|
|
|
|
# Incorrect number of rows/cols on the definition
|
|
if rows == 1 and (len(source) > cols or rows_count_source > rows):
|
|
errors += 1
|
|
gh_annotate(AN_TYPE.WARNING, line, "Source text longer than %d cols as defined".format(cols), None, True)
|
|
print_ruler(4, cols);
|
|
print_truncated(source, cols)
|
|
print()
|
|
elif rows_count_source > rows:
|
|
errors += 1
|
|
gh_annotate(AN_TYPE.WARNING, line, "Source text longer than %d rows as defined".format(rows), None, True)
|
|
print_ruler(6, cols);
|
|
print_wrapped(wrapped_source, rows, cols)
|
|
print()
|
|
|
|
# All further checks are against the translation
|
|
if is_pot:
|
|
return (errors == 0)
|
|
|
|
# Missing translation
|
|
if len(translation) == 0 and (warn_empty or (not no_warning and known_msgid)):
|
|
errors += 1
|
|
if rows == 1:
|
|
gh_annotate(AN_TYPE.WARNING, line, "Empty translation for \"{}\"".format(source), line + rows, True )
|
|
else:
|
|
gh_annotate(AN_TYPE.WARNING, line, "Empty translation", line + rows, True )
|
|
print_ruler(6, cols);
|
|
print_wrapped(wrapped_source, rows, cols)
|
|
print()
|
|
|
|
# Check for translation length too long
|
|
if (rows_count_translation > rows) or (rows == 1 and len(translation) > cols):
|
|
errors += 1
|
|
gh_annotate(AN_TYPE.ERROR, line, "Text is longer than definition", line + rows)
|
|
print(red('[E]: Text is longer than definition on line %d: cols=%d rows=%d (rows diff=%d)'
|
|
% (line, cols, rows, rows_count_translation-rows)))
|
|
print_source_translation(source, translation,
|
|
wrapped_source, wrapped_translation,
|
|
rows, cols)
|
|
|
|
# Check for translation length shorter
|
|
if shorter and (rows_count_translation < rows-1):
|
|
gh_annotate(AN_TYPE.NOTICE, line, "Text is shorter than definition", line + rows)
|
|
print(yellow('[S]: Text is shorter than definition on line %d: cols=%d rows=%d (rows diff=%d)'
|
|
% (line, cols, rows, rows_count_translation-rows)))
|
|
print_source_translation(source, translation,
|
|
wrapped_source, wrapped_translation,
|
|
rows, cols)
|
|
|
|
# Different count of % sequences
|
|
if source.count('%') != translation.count('%') and len(translation) > 0:
|
|
errors += 1
|
|
gh_annotate(AN_TYPE.ERROR, line, "Unequal count of %% escapes", None, True)
|
|
print_source_translation(source, translation,
|
|
wrapped_source, wrapped_translation,
|
|
rows, cols)
|
|
|
|
# Different first/last character
|
|
if not no_suggest and len(source) > 0 and len(translation) > 0:
|
|
source_end = source.rstrip()[-1]
|
|
translation_end = translation.rstrip()[-1]
|
|
start_diff = not (ign_char_first(source[0]) and ign_char_first(translation[0])) and source[0] != translation[0]
|
|
end_diff = not (ign_char_last(source_end) and ign_char_last(translation_end)) and source_end != translation_end
|
|
if start_diff or end_diff:
|
|
if start_diff:
|
|
gh_annotate(AN_TYPE.NOTICE, line, "Differing first punctuation character: ({} => {})".format(source[0],translation[0]), None, True)
|
|
if end_diff:
|
|
gh_annotate(AN_TYPE.NOTICE, line, "Differing last punctuation character: ({} => {})".format(source[-1],translation[-1]), None, True)
|
|
print_source_translation(source, translation,
|
|
wrapped_source, wrapped_translation,
|
|
rows, cols)
|
|
if not no_suggest and source == translation and (warn_same or len(source.split(' ', 1)) > 1):
|
|
gh_annotate(AN_TYPE.NOTICE, line, "Translation same as original text", None, True)
|
|
print_source_translation(source, translation,
|
|
wrapped_source, wrapped_translation,
|
|
rows, cols)
|
|
|
|
# Short translation
|
|
if not no_suggest and len(source) > 0 and len(translation) > 0:
|
|
if len(translation.rstrip()) < len(source.rstrip()) / 2:
|
|
gh_annotate(AN_TYPE.NOTICE, line, "Short translation", None, True)
|
|
print_source_translation(source, translation,
|
|
wrapped_source, wrapped_translation,
|
|
rows, cols)
|
|
|
|
# Incorrect trailing whitespace in translation
|
|
if not no_warning and len(translation) > 0 and \
|
|
(source.rstrip() == source or (rows == 1 and len(source) == cols)) and \
|
|
translation.rstrip() != translation and \
|
|
(rows > 1 or len(translation) != len(source)):
|
|
errors += 1
|
|
gh_annotate(AN_TYPE.WARNING, line, "Incorrect trailing whitespace for translation", None, True)
|
|
source = highlight_trailing_white(source)
|
|
translation = highlight_trailing_white(translation)
|
|
wrapped_translation = highlight_trailing_white(wrapped_translation)
|
|
print_source_translation(source, translation,
|
|
wrapped_source, wrapped_translation,
|
|
rows, cols)
|
|
|
|
# show the information
|
|
if information and errors == 0:
|
|
print(green('[I]: %s' % (meta)))
|
|
print_source_translation(source, translation,
|
|
wrapped_source, wrapped_translation,
|
|
rows, cols)
|
|
return (errors == 0)
|
|
|
|
|
|
def main():
|
|
"""Main function."""
|
|
parser = ArgumentParser(description=__doc__)
|
|
parser.add_argument("po", help="PO file to check")
|
|
parser.add_argument(
|
|
"--no-warning", action="store_true",
|
|
help="Disable warnings")
|
|
parser.add_argument(
|
|
"--no-suggest", action="store_true",
|
|
help="Disable suggestions")
|
|
parser.add_argument(
|
|
"--errors-only", action="store_true",
|
|
help="Only check errors")
|
|
parser.add_argument(
|
|
"--pot", action="store_true",
|
|
help="Do not check translations")
|
|
parser.add_argument(
|
|
"--information", action="store_true",
|
|
help="Output all translations")
|
|
parser.add_argument("--map",
|
|
help="Provide a map file to suppress warnings about unused translations")
|
|
parser.add_argument(
|
|
"--warn-empty", action="store_true",
|
|
help="Warn about empty definitions and translations even if unused")
|
|
parser.add_argument(
|
|
"--warn-same", action="store_true",
|
|
help="Warn about one-word translations which are identical to the source")
|
|
parser.add_argument(
|
|
"--shorter", action="store_true",
|
|
help="Show message if it is shorter than expected.")
|
|
|
|
# load the translations
|
|
args = parser.parse_args()
|
|
if not os.path.isfile(args.po):
|
|
print("{}: file does not exist or is not a regular file".format(args.po), file=stderr)
|
|
return 1
|
|
|
|
if args.errors_only:
|
|
args.no_warning = True
|
|
args.no_suggest = True
|
|
|
|
# load the symbol map to supress empty (but unused) translation warnings
|
|
msgids = None
|
|
if args.map:
|
|
msgids = set()
|
|
for sym in load_map(args.map):
|
|
if type(sym['data']) == bytes:
|
|
msgid = cs.source_to_unicode(codecs.decode(sym['data'], 'unicode_escape', 'strict'))
|
|
msgids.add(msgid)
|
|
|
|
# check each translation in turn
|
|
status = True
|
|
for translation in polib.pofile(args.po):
|
|
global CURRENT_PO
|
|
CURRENT_PO=args.po
|
|
status &= check_translation(translation, msgids, args.pot, args.no_warning, args.no_suggest,
|
|
args.warn_empty, args.warn_same, args.information, args.shorter)
|
|
if GH_ANNOTATIONS:
|
|
return GH_ERR_COUNT > 0 # Do not cause a failure if only warnings or notices.
|
|
else:
|
|
return 0 if status else 1
|
|
|
|
if __name__ == "__main__":
|
|
exit(main())
|