From dad0cbcc5363895ce110d35d9a4c723238e5e1d6 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Holcroft Date: Sep 09 2023 19:41:44 +0000 Subject: website: display errors different types of errors can be seen while processing the stats let's share this information on the website, to make debugging easier also took the opportunity to have the release define on all hugo content --- diff --git a/build_website.py b/build_website.py index 6eb47f3..5df4da4 100755 --- a/build_website.py +++ b/build_website.py @@ -156,7 +156,7 @@ def main(): # prevent containers and alternative names to be included if code in cldr_territories_info.keys(): package_statistics_file = os.path.join(static_territories_folder, code, "_index.md") - generate_territory_index(package_statistics_file, cldr_territories[code], code, cldr_territories_info.get(code, {}), cldr_version) + generate_territory_index(args.results, package_statistics_file, cldr_territories[code], code, cldr_territories_info.get(code, {}), cldr_version) log.info("Copy translation memories") languages = [ @@ -171,7 +171,7 @@ def main(): log.info("done") -def generate_static_pages_langs(results: str, code: str, content: dict, destination_file: str, territories: list[str], tm_folder: str, static_tm_folder: str) -> None: +def generate_static_pages_langs(release: str, code: str, content: dict, destination_file: str, territories: list[str], tm_folder: str, static_tm_folder: str) -> None: """ Aggregate info and call language template """ log = logging.getLogger("buildWebsite.generate_static_pages_langs") data = content @@ -180,21 +180,29 @@ def generate_static_pages_langs(results: str, code: str, content: dict, destinat ) data["lang_name_local"] = langtable.language_name(languageId=code) data["scripts"] = langtable.list_scripts(languageId=code) - data["results"] = results + data["release"] = release data["lang_code"] = code data["now"] = datetime.datetime.utcnow() data["files"] = defaultdict(dict) - data["files"]["compendium"]["url"] = f"/{results}/{code}.po.gz" + data["files"]["compendium"]["url"] = f"/{release}/{code}.po.gz" data["files"]["compendium"]["size"] = os.path.getsize(os.path.join(tm_folder, f"{code}.po.gz")) - data["files"]["terminology"]["url"] = f"/{results}/{code}.terminology.po.gz" + data["files"]["terminology"]["url"] = f"/{release}/{code}.terminology.po.gz" data["files"]["terminology"]["size"] = os.path.getsize(os.path.join(tm_folder, f"{code}.terminology.po.gz")) - data["files"]["tmx"]["url"] = f"/{results}/{code}.tmx.gz" + data["files"]["tmx"]["url"] = f"/{release}/{code}.tmx.gz" data["files"]["tmx"]["size"] = os.path.getsize(os.path.join(tm_folder, f"{code}.tmx.gz")) - data["files"]["csv"]["url"] = f"/{results}/{code}.csv" + data["files"]["csv"]["url"] = f"/{release}/{code}.csv" data["files"]["csv"]["size"] = os.path.getsize(os.path.join(static_tm_folder, f"{code}.csv")) if len(territories) > 0: data["territories"] = territories + data["could_not_process_count"] = sum(value["could_not_process"] == 1 for value in data["po"]) + data["polib_error_count"] = sum(value["polib_error"] != "" for value in data["po"]) + + # remove local path + for file in data["po"]: + path = f"./results/{release}/packages/{file['package']}/" + file["filename"] = file["filename"].replace(path, " ") + # sort content data["packages"] = sorted(data["packages"].items(), key=lambda x: x[1]['progress'], reverse=True) @@ -205,7 +213,7 @@ def generate_static_pages_packages(release: str, package: str, statistics: dict, """ Aggregate info and call package template """ log = logging.getLogger("buildWebsite.generate_static_pages_packages") data = statistics - data["results"] = release + data["release"] = release data["package"] = package data["now"] = datetime.datetime.utcnow() @@ -214,6 +222,10 @@ def generate_static_pages_packages(release: str, package: str, statistics: dict, data["stats"] = {} data["stats"]["languages"] = {} + # in some rare cases, a package may have no file + if "po" not in statistics.keys(): + data["po"] = {} + if "error" in data["stats"]["languages"].keys(): data["started_languages"] = len(data["stats"]["languages"]) - 1 data["no_languages"] = len(data["stats"]["languages"]["error"]["filename"].split("./")) - 1 @@ -221,11 +233,19 @@ def generate_static_pages_packages(release: str, package: str, statistics: dict, data["started_languages"] = len(data["stats"]["languages"]) data["no_languages"] = 0 + data["could_not_process_count"] = sum(data["po"][value]["could_not_process"] == 1 for value in data["po"]) + data["polib_error_count"] = sum(data["po"][value]["polib_error"] != "" for value in data["po"]) + # remove local path + path = f"./results/{release}/packages/{package}/" + for lang in data["stats"]["languages"].keys(): - path = f"./results/{release}/packages/{package}/" data["stats"]["languages"][lang]["filename"] = data["stats"]["languages"][lang]["filename"].replace(path, " ") + for file, stats in data["po"].copy().items(): + data["po"][file.replace(path, " ")] = stats + del data["po"][file] + # sort content data["stats"]["languages"] = sorted(data["stats"]["languages"].items(), key=lambda x: x[1]['progress'], reverse=True) @@ -255,18 +275,19 @@ def generate_package_index(distribution: str, destination_file: str) -> None: """ Aggregate info and call package index template """ log = logging.getLogger("buildWebsite.generate_package_index") data = dict() - data["distribution"] = distribution + data["release"] = distribution data["now"] = datetime.datetime.utcnow() apply_jinja_template(data, destination_file, "_index.package.md") -def generate_territory_index(destination_file: str, name: list[str], code: str, data: dict, cldr_version: str) -> None: +def generate_territory_index(release: str, destination_file: str, name: list[str], code: str, data: dict, cldr_version: str) -> None: """ Aggregate info and call territory index template """ log = logging.getLogger("buildWebsite.generate_package_index") data["name"] = name data["code"] = code data["cldr_version"] = cldr_version + data["release"] = release apply_jinja_template(data, destination_file, "_index.territory.md") diff --git a/templates/_index.language.md b/templates/_index.language.md index 75291bc..5ad6d2a 100644 --- a/templates/_index.language.md +++ b/templates/_index.language.md @@ -1,5 +1,6 @@ --- title: "Languages for {{ release }}" +release: {{ release }} date: {{ now }} layout: "list_languages" --- \ No newline at end of file diff --git a/templates/_index.package.md b/templates/_index.package.md index 0c1f25b..3560416 100644 --- a/templates/_index.package.md +++ b/templates/_index.package.md @@ -1,7 +1,8 @@ --- -title: "Packages for {{ distribution }}" +title: "Packages for {{ release }}" date: {{ now }} layout: "list_packages" +release: {{ release }} --- This listing aims at making it easy to find packages with files for which no languages could be deducted. \ No newline at end of file diff --git a/templates/_index.release.md b/templates/_index.release.md index d083669..2a51ce4 100644 --- a/templates/_index.release.md +++ b/templates/_index.release.md @@ -2,6 +2,7 @@ title: "Statistics for {{ release }}" date: {{ now }} layout: "release" +release: {{ release }} --- Fedora {{ release }}: diff --git a/templates/_index.territory.md b/templates/_index.territory.md index ad6e959..6cce853 100644 --- a/templates/_index.territory.md +++ b/templates/_index.territory.md @@ -1,5 +1,6 @@ --- title: "{{ code }} {{ name }}" +release: {{ release }} --- Data coming from Unicode consortium (CLDR {{ cldr_version }}): diff --git a/templates/language.md b/templates/language.md index 32fb6a0..bd3042e 100644 --- a/templates/language.md +++ b/templates/language.md @@ -1,28 +1,30 @@ --- -title: "{{ lang_code }}-{{ lang_name_en }} ({{ lang_name_local }})" +title: "{{ lang_code }}-{{ lang_name_en }} ({{ lang_name_local }}) - translation progress for {{ release }}" date: {{ now }} code: {{ lang_code }} name_english: {{ lang_name_en }} name_local: {{ lang_name_local }} progress: {{ '{:.2f}'.format(progress) }} progress_d: {{ '{:.2f}'.format(progress_d) }} -release: {{ results }} +release: {{ release }} {%- if territories %} territories: {%- for territory in territories %} - {{ territory }} {%- endfor %} {%- endif %} +polib_error_count: {{ polib_error_count }} +could_not_process_count: {{ could_not_process_count }} --- -Language progress for {{ lang_name_en }} ({{ lang_code }}) in Fedora {{ results }} is: +Language progress for {{ lang_name_en }} ({{ lang_code }}) in Fedora {{ release }} is: * {{ '{:.2f}'.format(progress) }}% when we only look on started packages for this language. -* {{ '{:.2f}'.format(progress_d) }}% when we compare to every single translatable string in Fedora {{ results }}. +* {{ '{:.2f}'.format(progress_d) }}% when we compare to every single translatable string in Fedora {{ release }}. Possible scripts are: {% for script in scripts -%}{{ script }} {%- endfor %} -* Total translatable string in Fedora {{ results }}: {{ totalsourcewords_d }} +* Total translatable string in Fedora {{ release }}: {{ totalsourcewords_d }} * Source words to translate in started packages: {{ totalsourcewordssum }} * Translated words: {{ translatedsourcewords }} @@ -37,7 +39,7 @@ Packages: | Name | Translated words | Total source words | Progress (%) | Language teams | |------|------------------:|-------------------:|-------------:|----------------| {% for package, stats in packages %} -{%- set output = " | [" ~ package ~ "]({{< ref \"/" ~ results ~ "/package/" ~ package ~ ".md\" >}})" -%} +{%- set output = " | [" ~ package ~ "]({{< ref \"/" ~ release ~ "/package/" ~ package ~ ".md\" >}})" -%} {%- set output = output ~ " | " ~ stats.translatedsourcewords -%} {%- set output = output ~ " | " ~ stats.totalsourcewordssum -%} {%- set output = output ~ " | " ~ '{:.1f}'.format(stats.progress) -%} @@ -45,4 +47,43 @@ Packages: {%- set output = output ~ " | " -%} {{ output }} {% endfor -%} -{.sortable} \ No newline at end of file +{.sortable} + +{% if polib_error_count > 0 or could_not_process_count > 0 %} +# Errors on PO files + +{% if polib_error_count > 0 %} +## Error with polib +We use the po metadata to get the language code and the team, but sometimes it fails, here are the files. + +| Package | Lang from file path/name | polib error | Team | +|---------|--------------------------|-------------|------| +{% for file in po if file.polib_error != "" -%} +{%- set output = " | [" ~ file.package ~ "]({{< ref \"/" ~ release ~ "/package/" ~ file.package ~ ".md\" >}})" -%} +{%- set output = output ~ " | " ~ file.filename -%} +{%- set output = output ~ " | " ~ file.polib_error -%} +{%- set output = output ~ " | " ~ file.metadata_language_team -%} +{%- set output = output ~ " | " -%} +{{ output }} +{% endfor -%} +{.sortable} +{% endif %} + +{% if could_not_process_count > 0 %} +## Error with calcstats + +We use the calcstats from translate toolkit to get the translation progress, but sometimes it fails, here are the files: + +| Package | Lang from file path/name | Team | +|---------|--------------------------|------| +{% for file in po if file.could_not_process == 1 -%} +{%- set output = " | [" ~ file.package ~ "]({{< ref \"/" ~ release ~ "/package/" ~ file.package ~ ".md\" >}})" -%} +{%- set output = output ~ " | " ~ file.filename -%} +{%- set output = output ~ " | " ~ file.metadata_language_team -%} +{%- set output = output ~ " | " -%} +{{ output }} +{% endfor -%} +{.sortable} +{% endif %} + +{% endif %} \ No newline at end of file diff --git a/templates/package.md b/templates/package.md index 33ecfb0..b6abc6d 100644 --- a/templates/package.md +++ b/templates/package.md @@ -1,19 +1,25 @@ --- -title: "{{ package }}" +title: "{{ package }} - translation progress for {{ release }}" +package: {{ package }} date: {{ now }} started_languages: {{ started_languages }} no_languages: {{ no_languages }} +polib_error_count: {{ polib_error_count }} +could_not_process_count: {{ could_not_process_count }} +release: {{ release }} --- The package {{ package }}: -* represents {{ totalsourcewords }} source words to be translated, -* is translated into {{ stats.languages|length }} languages in Fedora {{ results }}, -* contains {{ no_languages }} files for which no languages could be deducted. +* represents {{ totalsourcewords }} source words to be translated +* is translated into {{ stats.languages|length }} languages in Fedora {{ release }} +{% if no_languages > 0 %}* no languages could be deducted for {{ no_languages }} files {% endif %} +{% if polib_error_count > 0 %}* polib could not open {{ polib_error_count }} files to extract metadata{% endif %} +{% if could_not_process_count > 0 %}* contains {{ could_not_process_count }} po stats could not be processes{% endif %} | Language | Translated words | Total source words | Progress (%) | Files | |----------|-----------------:|-------------------:|-------------:|-------| {% for lang, stat in stats.languages -%} -{%- set output = " | [" ~ lang ~ "]({{< ref \"/" ~ results ~ "/language/" ~ lang ~ ".md\" >}})" -%} +{%- set output = " | [" ~ lang ~ "]({{< ref \"/" ~ release ~ "/language/" ~ lang ~ ".md\" >}})" -%} {%- set output = output ~ " | " ~ stat.translatedsourcewords -%} {%- set output = output ~ " | " ~ stat.totalsourcewordssum -%} {%- set output = output ~ " | " ~ '{:.1f}'.format(stat.progress) -%} @@ -21,4 +27,59 @@ The package {{ package }}: {%- set output = output ~ " | " -%} {{ output }} {% endfor -%} -{.sortable} \ No newline at end of file +{.sortable} + +{% if polib_error_count > 0 or could_not_process_count > 0 or no_languages > 0 %} +# Errors on PO files +{% if no_languages > 0 %} +## Errors on language deduction +It were not possible to properly deduct the language code for the following files. + +| Lang from file path/name | Lang from file path/name | team from po metadata | polib error | +|--------------------------|--------------------------|-----------------------|-------------| +{% for file in po if po[file].lang_code_chosen == "error" -%} +{%- set output = " | " ~ file -%} +{%- set output = output ~ " | " ~ po[file].lang_in_path -%} +{%- set output = output ~ " | " ~ po[file].metadata_language_team -%} +{%- set output = output ~ " | " ~ po[file].polib_error -%} +{%- set output = output ~ " | " -%} +{{ output }} +{% endfor -%} +{.sortable} +{% endif %} + +{% if polib_error_count > 0 %} +## Error with polib +We use the po metadata to get the language code and the team, but sometimes it fails, here are the files. + +| Lang from file path/name | Lang code chosen | polib error | Team | +|--------------------------|------------------|-------------|------| +{% for file in po if po[file].polib_error != "" -%} +{%- set output = output ~ " | " ~ file -%} +{%- set output = output ~ " | " ~ po[file].lang_code_chosen -%} +{%- set output = output ~ " | " ~ po[file].polib_error -%} +{%- set output = output ~ " | " ~ po[file].metadata_language_team -%} +{%- set output = output ~ " | " -%} +{{ output }} +{% endfor -%} +{.sortable} +{% endif %} + +{% if could_not_process_count > 0 %} +## Error with calcstats + +We use the calcstats from translate toolkit to get the translation progress, but sometimes it fails, here are the files: + +| Package | Lang code chosen | Team | +|---------|------------------|------| +{% for file in po if po[file].could_not_process == 1 -%} +{%- set output = output ~ " | " ~ file -%} +{%- set output = output ~ " | " ~ po[file].lang_code_chosen -%} +{%- set output = output ~ " | " ~ po[file].metadata_language_team -%} +{%- set output = output ~ " | " -%} +{{ output }} +{% endfor -%} +{.sortable} +{% endif %} + +{% endif %} \ No newline at end of file diff --git a/todo.md b/todo.md index e2e843a..632c09f 100644 --- a/todo.md +++ b/todo.md @@ -11,14 +11,9 @@ direct call to: # build_tm.py -move error detection (check_lang) into %language%/stats.json and display erros -move error files into %language%/stats.json and make these accessible via website remove terminology (someone who wants it can do it locally) # website -list why we could not deduct error files - -# global - - +territory: table per spoken languages +language: table across releases \ No newline at end of file diff --git a/website/themes/beautifulhugo/layouts/_default/list_languages.html b/website/themes/beautifulhugo/layouts/_default/list_languages.html index 4e862b4..19d365b 100644 --- a/website/themes/beautifulhugo/layouts/_default/list_languages.html +++ b/website/themes/beautifulhugo/layouts/_default/list_languages.html @@ -5,21 +5,35 @@
{{ .Content }} - +

+ Column explanation: +

+

- + - + + + + {{ range sort .Pages "Title" "asc" }} + + + {{ end }}
Click on columns headers to sort values
codeCode English name Local nameProgress (%)Progress vs started packages (%)Progress vs release (%)File parsing errorsPolib errors
{{ .Params.code }} {{ .Params.name_english }} {{ .Params.name_local }}{{ .Params.progress }} {{ .Params.progress_d }}{{ .Params.could_not_process_count }}{{ .Params.polib_error_count }}
diff --git a/website/themes/beautifulhugo/layouts/_default/list_packages.html b/website/themes/beautifulhugo/layouts/_default/list_packages.html index 8276d0f..0ff2f9f 100644 --- a/website/themes/beautifulhugo/layouts/_default/list_packages.html +++ b/website/themes/beautifulhugo/layouts/_default/list_packages.html @@ -11,19 +11,26 @@
  • Package: package name in Fedora operating system
  • Languages: number of identified languages
  • No language: number of translation files for which no language could be deducted
  • +
  • File parsing errors: file for which po stats could not be extracted
  • +
  • Polib errors: polib could not open the file to extract metadata
  • + + + {{ range sort .Pages "Title" "asc" }} - + + + {{ end }}
    Click on columns headers to sort values
    Package Language No languageFile parsing errorsPolib errors
    {{ .Title }}{{ .Params.package }} {{ .Params.started_languages }} {{ .Params.no_languages }}{{ .Params.could_not_process_count }}{{ .Params.polib_error_count }}