Prusa-Firmware/lang/lib/charset.py

172 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from .FontGen import CUSTOM_CHARS, INVERSE_CUSTOM_CHARS
# Charaters to be remapped prior to source-encoding transformation
# This transformation is applied to the translation prior to being converted to the final encoding,
# and maps UTF8 to UTF8. It replaces unavailable symbols in the translation to a close
# representation in the source encoding.
# sources
# https://en.wikipedia.org/wiki/Czech_orthography
# https://en.wikipedia.org/wiki/German_orthography
# https://en.wikipedia.org/wiki/French_orthography
# https://en.wikipedia.org/wiki/Spanish_orthography
# https://en.wikipedia.org/wiki/Italian_orthography
# https://en.wikipedia.org/wiki/Polish_alphabet
# https://en.wikipedia.org/wiki/Dutch_orthography
# https://en.wikipedia.org/wiki/Romanian_alphabet
# https://en.wikipedia.org/wiki/Hungarian_alphabet
# https://en.wikipedia.org/wiki/Gaj%27s_Latin_alphabet
# https://en.wikipedia.org/wiki/Slovak_orthography
# https://en.wikipedia.org/wiki/Swedish_alphabet
# https://en.wikipedia.org/wiki/Norwegian_orthography
TRANS_CHARS = {
# 'á': 'a', #cz,fr,es,hu,sk
# 'Á': 'A', #cz,fr,hu,sk
# 'à': 'a', #fr,it
'À': 'à', #fr,it
# 'â': 'a', #fr,ro
'Â': 'â', #ro
# 'Ä': 'ä', #de,sv,no,sk
# 'å': 'a', #sv,no
# 'Å': 'A', #sv,no
# 'æ': 'ä', #sv,no
# 'ą': 'a', #pl
# 'Ą': 'A', #pl
# 'ă': 'a', #ro - a-breve
# 'Ă': 'A', #ro - A-breve
'ǎ': 'ă', #ro - a-caron
'Ǎ': 'Ă', #ro - A-caron
# 'ć': 'c', #pl,hr
'Ć': 'ć', #pl,hr
'ç': 'c', #fr,nl
'Ç': 'C', #fr,nl
# 'č': 'c', #cz,hr,sk
# 'Č': 'č', #cz,hr,sk
# 'ď': 'd', #cz,sk
'Ď': 'ď', #cz,sk
'đ': 'd', #hr
'Đ': 'D', #hr
# 'é': 'e', #cz,fr,es,it,nl,hu,sk
# 'É': 'E', #cz,fr,it,hu,sk
# 'è': 'e', #fr,it,nl
'È': 'è', #fr,it
# 'ê': 'e', #fr,nl
'Ê': 'ê', #fr
# 'ě': 'e', #cz
'Ě': 'ě', #cz
# 'ë': 'e', #fr
# 'ę': 'e', #pl
# 'Ę': 'ę', #pl
# 'í': 'i', #cz,es,it,sk
# 'Í': 'í', #cz,it,sk
'ì': 'i',
'Ì': 'I',
# 'î': 'i', #fr,ro
# 'Î': 'I', #ro
# 'ĺ': 'l', #sk
'Ĺ': 'ĺ', #sk
# 'ł': 'l', #pl
# 'Ł': 'L', #pl
# 'ľ': 'l', #sk
# 'Ľ': 'L', #sk
# 'ń': 'n', #pl
'Ń': 'ń', #pl
# 'ň': 'n', #cz,sk
'Ň': 'ň', #cz,sk
'ñ': 'n', #es,nl
# 'ó': 'o', #cz,es,pl,hu,sk
# 'Ó': 'ó', #cz,pl,hu,sk
# 'ò': 'o', #it
'Ò': 'ò', #it
# 'ô': 'o', #fr,nl,sk
'Ô': 'ô', #sk
'œ': 'o', #fr
'œ': 'o', #fr
# 'ø': 'ö', #sv,no
# 'Ø': 'ø', #sv,no
# 'Ö': 'ö', #de,sv,no,hu
# 'ő': 'o', #hu
'Ő': 'ő', #hu
'ŕ': 'r', #sk
'Ŕ': 'R', #sk
# 'ř': 'r', #cz
# 'Ř': 'ř', #cz
# 'ś': 's', #pl
# 'Ś': 'ś', #pl
# 'š': 's', #cz,hr,sk
# 'Š': 'š', #cz,hr,sk
# 'ș': 's', #ro - s-comma
# 'Ș': 'ș', #ro - S-comma
'ş': 'ș', #ro - s-cedilla
'Ş': 'Ș', #ro - S-cedilla
# 'ß': 'ss',#de
'': 'ß',#de
# 'ť': 't', #cz,sk
'Ť': 'ť', #cz,sk
# 'ț': 't', #ro - t-comma
'Ț': 'ț', #ro - T-comma
'ţ': 'ț', #ro - t-cedilla
'Ţ': 'Ț', #ro - T-cedilla
# 'ú': 'u', #cz,es,hu,sk
'Ú': 'ú', #cz,hu,sk
'ù': 'u', #it
'Ù': 'U', #it
'û': 'u', #fr
'Û': 'U', #fr
# 'Ü': 'ü', #de,hu
# 'ů': 'u', #cz
'Ů': 'ů', #cz
# 'ű': 'u', #hu
'Ű': 'ű', #hu
# 'ý': 'y', #cz,sk
# 'Ý': 'ý', #cz,sk
'ÿ': 'y', #fr
'Ÿ': 'y', #fr
# 'ź': 'z', #pl
'Ź': 'ź', #pl
# 'ž': 'z', #cz,hr,sk
# 'Ž': 'ž', #cz,hr,sk
# 'ż': 'z', #pl
'Ż': 'ż', #pl
'¿': '', #es
'¡': '', #es
'': '\'',
}
def _character_check(buf, valid_chars):
for c in buf:
if (not c.isascii() or not c.isprintable()) and c not in valid_chars:
return c
return None
def source_check(buf):
valid_chars = set(CUSTOM_CHARS.values())
valid_chars.add('\n')
return _character_check(buf, valid_chars)
def translation_check(buf):
valid_chars = set(CUSTOM_CHARS.keys())
valid_chars.add('\n')
return _character_check(buf, valid_chars)
def trans_replace(buf):
for src, dst in TRANS_CHARS.items():
buf = buf.replace(src, dst)
return buf
def source_to_unicode(buf):
buf = trans_replace(buf)
out = u''
for c in buf:
out += CUSTOM_CHARS.get(c, c)
return out
def unicode_to_source(buf):
buf = trans_replace(buf)
out = ''
for c in buf:
out += INVERSE_CUSTOM_CHARS.get(c, c)
return out