Files
Olivier Meunier a6f70bc8b2 Fixed messages python scripts
- Changed separator for language code parsing
- Only check non fuzzy strings
2025-12-16 17:12:45 +01:00

257 lines
6.9 KiB
Python

# /// script
# requires-python = ">=3.11"
# dependencies = [
# "babel",
# ]
# ///
# SPDX-FileCopyrightText: © 2025 Olivier Meunier <olivier@neokraft.net>
#
# SPDX-License-Identifier: AGPL-3.0-only
import os
import sys
from argparse import ArgumentParser
from operator import itemgetter
from pathlib import Path
from babel.messages.catalog import Catalog, Locale
from babel.messages.extract import extract_from_file
from babel.messages.pofile import read_po, write_po
# Percentage of translated content under which a translation won't be loaded.
COMPLETION_CUTOFF = 0.90
HERE = Path(__file__).parent
ROOT = HERE / "src"
CATALOG_HEADER = """\
# Translations template for PROJECT.
# SPDX-FileCopyrightText: © YEAR Readeck <translate@readeck.com>
#
# SPDX-License-Identifier: AGPL-3.0-only
#"""
CATALOG_OPTIONS = {
"header_comment": CATALOG_HEADER,
"project": "Readeck User Documentation",
"version": "1.0.0",
"copyright_holder": "Readeck",
"msgid_bugs_address": "translate@readeck.com",
"last_translator": "Readeck <translate@readeck.com>",
"language_team": "Readeck <translate@readeck.com>",
}
def extract_blocks(fileobj, keywords, comment_tags, options):
token = None
messages = []
for lineno, text in enumerate(fileobj):
lineno = lineno + 1
if token is None:
token = [lineno, "", [], []]
messages = []
if text.strip() != b"":
messages.append(text.decode("utf-8").rstrip())
else:
if len(messages) > 0:
token[2] = "\n".join(messages)
yield token
token = None
messages = []
if token is not None and len(messages) > 0:
token[2] = "\n".join(messages)
yield token
def po2text(catalog: Catalog, destdir: Path):
os.makedirs(destdir, exist_ok=True)
files = {}
for m in catalog._messages.values():
for x in m.locations:
name = Path(x[0]).name
files.setdefault(name, [])
msg = m.string
if m.fuzzy or msg.strip() == "":
msg = m.id
files[name].append((x[1], msg))
for k in files:
files[k] = sorted(files[k], key=itemgetter(0))
for k, messages in files.items():
dest = destdir / k
with dest.open("w") as fp:
for x in messages:
fp.write(x[1])
fp.write("\n\n")
yield dest
def extract(_):
template = Catalog(**CATALOG_OPTIONS)
for f in (ROOT / "en").rglob("*.md"):
for lineno, message, comments, context in extract_from_file(
extract_blocks,
f,
):
template.add(
message,
None,
[(str(f.relative_to(ROOT)), lineno)],
auto_comments=comments,
context=context,
)
translations = HERE / "translations"
dest = translations / "messages.pot"
with dest.open("wb") as fp:
write_po(
fp,
template,
width=None,
sort_by_file=True,
include_lineno=True,
ignore_obsolete=True,
)
print(f"{dest} writen")
def update(_):
translations = HERE / "translations"
with (translations / "messages.pot").open("rb") as fp:
template = read_po(fp)
dirs = [x for x in translations.iterdir() if x.is_dir()]
for p in dirs:
locale = Locale.parse(p.name, sep="-")
po_file = p / "messages.po"
if po_file.exists():
with po_file.open("rb") as fp:
catalog = read_po(fp, locale=locale, domain=po_file.name)
else:
catalog = Catalog(
**CATALOG_OPTIONS,
locale=locale,
domain=po_file.name,
)
catalog.update(template)
with po_file.open("wb") as fp:
write_po(
fp,
catalog,
width=None,
sort_by_file=True,
include_lineno=True,
include_previous=False,
)
print(f"{po_file} written")
def generate(_):
translations = HERE / "translations"
po_files = translations.glob("*/messages.po")
with (translations / "messages.pot").open("rb") as fp:
template = read_po(fp)
total_strings = len(template)
for po_file in sorted(po_files):
code = po_file.parent.name
if code == "en":
continue
# Write markdown files
with po_file.open("rb") as fp:
catalog = read_po(fp)
nb_translated = 0
for k, m in catalog._messages.items():
tm = template._messages[k]
if m.fuzzy:
continue
if tm.string == m.string:
continue
if isinstance(m.string, str) and m.string.strip() == "":
continue
if isinstance(m.string, tuple) and any([x.strip() == "" for x in m.string]):
continue
nb_translated += 1
pct = float(nb_translated / total_strings)
count_info = "{:>4}/{:<4} {:>4}%".format(
nb_translated, total_strings, round(pct * 100)
)
if round(pct, 2) < COMPLETION_CUTOFF:
print("[-] {:8} {}".format(code, count_info))
continue
destdir = HERE / "src" / po_file.parent.name
os.makedirs(destdir, exist_ok=True)
nb_files = 0
for _ in po2text(catalog, destdir):
nb_files += 1
print("[+] {:8} {} -- {}/".format(code, count_info, destdir.relative_to(HERE)))
def check(_):
translations = HERE / "translations"
po_files = translations.glob("*/messages.po")
has_errors = False
for filename in po_files:
code = filename.parent.name
if code == "en":
continue
with filename.open("rb") as fp:
catalog = read_po(fp)
errors = list(catalog.check())
if len(errors) == 0:
print(f"[OK] {code}")
else:
has_errors = True
print(f"[ERRORS] {code}")
for [m, e] in errors:
print(f" - #{m.lineno} - {m.id}")
for x in e:
print(f" - {str(x)}")
sys.exit(has_errors and 1 or 0)
def main():
parser = ArgumentParser()
subparsers = parser.add_subparsers(required=True)
p_extract = subparsers.add_parser("extract", help="Extract messages")
p_extract.set_defaults(func=extract)
p_update = subparsers.add_parser("update", help="Update strings")
p_update.set_defaults(func=update)
p_generate = subparsers.add_parser("generate", help="generate markdown files")
p_generate.set_defaults(func=generate)
p_check = subparsers.add_parser("check", help="Check translation files")
p_check.set_defaults(func=check)
args = parser.parse_args()
args.func(args)
if __name__ == "__main__":
main()