Skip to content

Commit 6e7754e

Browse files
committed
Count translators with Python and polib, remove aliases and look for new ones
1 parent 2732110 commit 6e7754e

File tree

2 files changed

+35
-11
lines changed

2 files changed

+35
-11
lines changed

.github/workflows/update-lint-and-build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
with:
2020
python-version: 3
2121
- run: sudo apt-get install -y gettext
22-
- run: pip install requests cogapp
22+
- run: pip install requests cogapp polib
2323
- uses: actions/checkout@master
2424
with:
2525
ref: ${{ matrix.version }}

manage_translation.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,19 @@
1212
# * regenerate_tx_config: recreate configuration for all resources.
1313

1414
from argparse import ArgumentParser
15-
from collections import Counter
1615
import os
1716
from dataclasses import dataclass
17+
from difflib import SequenceMatcher
18+
from itertools import combinations
19+
from pathlib import Path
1820
from re import match
19-
from subprocess import call, run
21+
from subprocess import call
2022
import sys
2123
from typing import Self, Callable
2224
from urllib.parse import urlparse, parse_qs
2325

26+
from polib import pofile
27+
2428
LANGUAGE = 'pl'
2529

2630

@@ -168,14 +172,34 @@ def progress_from_resources(resources: list[ResourceLanguageStatistics], filter_
168172

169173

170174
def get_number_of_translators():
171-
process = run(
172-
['grep', '-ohP', r'(?<=^# )(.+)(?=, \d+$)', '-r', '.'],
173-
capture_output=True,
174-
text=True,
175-
)
176-
translators = [match('(.*)( <.*>)?', t).group(1) for t in process.stdout.splitlines()]
177-
unique_translators = Counter(translators).keys()
178-
return len(unique_translators)
175+
translators = _fetch_translators()
176+
_remove_aliases(translators)
177+
_check_for_new_aliases(translators)
178+
return len(translators)
179+
180+
181+
def _fetch_translators() -> set[str]:
182+
translators = set()
183+
for file in Path().rglob('*.po'):
184+
header = pofile(os.fsdecode(file)).header.splitlines()
185+
for translator_record in header[header.index('Translators:') + 1:]:
186+
translator, _year = translator_record.split(', ')
187+
translators.add(translator)
188+
return translators
189+
190+
191+
def _remove_aliases(translators: set[str]) -> None:
192+
for alias, main in (("m_aciek <maciej.olko@gmail.com>", "Maciej Olko <maciej.olko@gmail.com>"),):
193+
translators.remove(alias)
194+
assert main in translators
195+
196+
197+
def _check_for_new_aliases(translators) -> None:
198+
for pair in combinations(translators, 2):
199+
if (ratio := SequenceMatcher(lambda x: x in '<>@', *pair).ratio()) > 0.64:
200+
raise ValueError(
201+
f"{pair} are similar ({ratio:.3f}). Please add them to aliases list or bump the limit."
202+
)
179203

180204

181205
def language_switcher(entry: ResourceLanguageStatistics) -> bool:

0 commit comments

Comments
 (0)