#2 rework of src-to-pot.sh
Closed 4 years ago by jibecfed. Opened 4 years ago by jibecfed.

file added
+71
@@ -0,0 +1,71 @@ 

+ #!/usr/bin/env python3

+ """Calls `./po-to-src.sh` for all source repos"""

+ 

+ import argparse

+ import os

+ import urllib.request

+ import subprocess

+ import shlex

+ import yaml

+ 

+ 

+ def main():

+     """Handle params"""

+     parser = argparse.ArgumentParser(

+         description="Calls `./po-to-src.sh` for all source repos")

+     parser.add_argument("output_dir", help="Output directory")

+     args = parser.parse_args()

+ 

+     output_dir = args.output_dir

+ 

+     os.makedirs(output_dir, exist_ok=True)

+ 

+     output_dir = os.path.abspath(output_dir)

+     parse_yml(output_dir)

+ 

+ def call_podman_and_src_to_po(output_dir, doc):

+     command = "podman run --rm -it -v {o}:/output:z ".format(o=output_dir)

+     command += "-v {c}/src-to-pot.sh:/scripts/src-to-pot.sh:z ".format(c=os.getcwd())

+     command += "asamalik/fedora-docs-translations /scripts/src-to-pot.sh {d}".format(d=doc)

+     print(command)

+     args = shlex.split(command)

+     subprocess.run(args, check=True)

+ 

+ 

+ def parse_yml(output_dir):

+     """List repositories to convert to pot from antora yaml file"""

+ 

+     # download site.yml

+     urllib.request.urlretrieve(

+         "https://pagure.io/fedora-docs/docs-fp-o/raw/master/f/site.yml", "site.yml"

+     )

+ 

+     # Read site.yml

+     with open("site.yml", 'r') as stream:

+         data_loaded = yaml.load(stream, Loader=yaml.SafeLoader)

+ 

+     # Parse site.yml

+     for source in data_loaded['content']['sources']:

+         url = source['url']

+ 

+         if 'start_path' in source:

+             src_basedir = source['start_path']

+             doc = "--clone-target-repo --src-basedir \"{bd}\" {url} /output".format(

+                 bd=src_basedir, url=url)

+ 

+         elif 'branches' in source:

+             for branch in source['branches']:

+                 src_branch = branch

+                 doc = "--clone-target-repo --src-branch \"{br}\" {url} /output".format(

+                     br=src_branch, url=url)

+         else:

+             doc = "--clone-target-repo {url} /output".format(url=url)

+ 

+         call_podman_and_src_to_po(output_dir, doc)

+ 

+     # Remove site.yml

+     os.remove("site.yml")

+ 

+ 

+ if __name__ == '__main__':

+     main()

file modified
+124 -102
@@ -1,5 +1,5 @@ 

  #!/bin/bash

- 

+ #set -x

  usage () {

      echo "Converting an Antora source repo into a POT repo for transations"

      echo ""
@@ -23,7 +23,6 @@ 

      echo "  --clone-target-repo         Clones the PO/POT repo into the output directory"

  }

  

- 

  while true ; do

      case $1 in

          --src-branch )
@@ -79,14 +78,9 @@ 

  fi

  

  # TARGET_DIR need to exist

- if [ -d "$output_dir" ]; then

-     pushd $output_dir > /dev/null

-     output_dir=$(pwd)

-     popd > /dev/null

- else

-     echo "Error. The specified output directory doesn't exist"

-     exit 1

- fi

+ mkdir -p "$output_dir"

+ source_dir="$output_dir/_sources"

+ mkdir -p "$source_dir"

  

  if [ -z "$src_branch" ]; then

      src_branch="master"
@@ -96,132 +90,160 @@ 

      src_basedir=""

  fi

  

- tmp_english_src=$(mktemp -d)

+ # tmp_english_src=$(mktemp -d)

+ tmp_english_src=$(echo "$src_repo-$src_repo" | sed "s/[\/:]/-/g")

  

- if ! git clone -b $src_branch $src_repo $tmp_english_src ; then

+ if [ -d "$source_dir/$tmp_english_src" ]; then

+     pushd "$source_dir/$tmp_english_src" > /dev/null || exit

+     git pull

+     popd > /dev/null || exit

+ elif ! git clone -b $src_branch "$src_repo" "$source_dir/$tmp_english_src" ; then

      echo ""

      echo "Error cloning the specified git repository."

      rm -rf "$tmp_english_src"

      exit 1

  fi

  

- # Make sure the basedir exists

- if [ ! -d "$tmp_english_src/$src_basedir" ]; then

-     echo ""

-     echo "Error. The specified basedir doesn't exist in the git repository."

-     rm -rf "$tmp_english_src"

-     exit 1

- fi

- 

- # Make sure it's an Antora source

- if [ ! -f "$tmp_english_src/$src_basedir/antora.yml" ]; then

-     echo ""

-     echo "Error. There is no Antora source. (no antora.yml found)"

-     rm -rf "$tmp_english_src"

-     exit 1

- fi

+ check_dir() (

+     local basedir=$1

  

+     # Make sure the basedir exists

+     if [ ! -d "$basedir" ]; then

+         echo ""

+         echo "Error. The specified basedir doesn't exist in the git repository."

+         exit 1

+     fi

  

+     # Make sure it's an Antora source

+     if [ ! -f "$basedir/antora.yml" ]; then

+         echo ""

+         echo "Error. There is no Antora source. (no antora.yml found)"

+         exit 1

+     fi

  

- pushd "$tmp_english_src/$src_basedir" > /dev/null

+     # check if module dir exists

+     if [ ! -d "$basedir/modules/$module" ]; then

+ 	    echo ""

+ 	    echo "Error. The module you have specified doesn't exist."

+ 	    echo ""

+ 	    echo "There are:"

+ 	    ls -1 ./modules

+ 	    echo ""

+ 	    echo "Tip: You don't have to specify a module if there is just one"

+ 	    exit 1

+ 	fi

+ )

+ basedir="$source_dir/$tmp_english_src/$src_basedir"

  

  # Get component name from the antora.yml

- component=$(sed -n 's/^name: *\"*\([[:alnum:]_-]*\).*/\1/p' antora.yml)

+ component=$(sed -n 's/^name: *\"*\([[:alnum:]_-]*\).*/\1/p' "$basedir/antora.yml")

  

  # Get version name from the antora.yml

- version=$(sed -n 's/^version: *\"*\([[:alnum:]_-]*\).*/\1/p' antora.yml)

+ version=$(sed -n 's/^version: *\"*\([[:alnum:]_-]*\).*/\1/p' "$basedir/antora.yml")

+ 

  

  # I also need a module name. Two ways how to get it:

- #   1) exactly one module exists under ./module/

+ #   1) use the module from site.yml if existing

+ #   2) use the module from antora.yml

  #   2) the user specified it using --module MODULE and it exists

  # In case there are multiple modules under ./modules/

  # and the user hasn't specified one, throw an error.

- if [ ! $(ls -1 ./modules | wc -l) = 1 ] && [ -z $module ]; then

-     echo ""

-     echo "Error. This repository contains multiple antora modules,"

-     echo "and the script doesn't know which one to choose."

-     echo ""

-     echo "There are:"

-     ls -1 ./modules

-     echo ""

-     echo "Choose one and specify it using --module MODULE"

-     rm -rf "$tmp_english_src"

-     exit 1

- fi

  

- if [ -z $module ]; then

-     module=$(ls ./modules)

+ if [ -z "$module" ] || [ "$module" = "" ]; then

+     if [ -s "$basedir/site.yml" ] ; then

+         module=$(sed -n 's/.*start_page: *\"*\([[:alnum:]_-]*\).*/\1/p' "$basedir/site.yml")

+         module=$(ls $basedir/modules)

+     fi

+ 

+     # documentation-contributors-guide site.yml contains a site.yml but multiple repo

+     if [[ "$module" =~ $'\r' ]] || [[ "$module" =~ $'\n' ]]; then

+         module=""

+     fi

  fi

  

- if [ ! -d "modules/$module" ]; then

-     echo ""

-     echo "Error. The module you have specified doesn't exist."

-     echo ""

-     echo "There are:"

-     ls -1 ./modules

-     echo ""

-     echo "Tip: You don't have to specify a module if there is just one"

-     rm -rf "$tmp_english_src"

-     exit 1

+ if [ -z "$module" ] || [ "$module" = "" ]; then

+     module=$(sed -n 's/^start_page: *\"*\([[:alnum:]_-]*\).*/\1/p' "$basedir/antora.yml")

+     if [ "$module" = "" ]; then

+         # TODO: https://pagure.io/fedora-docs/docs-fp-o/ in pages/homepage

+         module=$(sed -n 's/^start-page: *\"*\([[:alnum:]_-]*\).*/\1/p' "$basedir/antora.yml")

+     fi

+ 

+     if [ "$module" = "" ]; then

+         # TODO: https://pagure.io/fedora-docs/release-docs-home/tree/f26

+         module="ROOT"

+     fi

  fi

  

- popd > /dev/null

+ check_dir "$basedir"

  

- if [ $module = "ROOT" ]; then

+ if [ "$module" = "ROOT" ]; then

      target_repo_name="$component"

  else

      target_repo_name="$component-$module"

  fi

  

- if [ "$clone_target_repo_name" ]; then

-     if [ -d "$output_dir/$target_repo_name" ]; then

-         echo ""

-         echo "Error. You chose to clone the target repo,"

-         echo "but the directory '$target_repo_name' already exist"

-         echo "under '$output_dir'."

-         echo ""

-         echo "Either delete it, or run this command without"

-         echo "the --clone-target-repo option"

-         rm -rf "$tmp_english_src"

+ 

+ echo -ne "Source: $src_repo basedir:$src_basedir branch:$src_branch is using this repository for pot/po files:https://pagure.io/fedora-l10n/$target_repo_name\n" >> "$output_dir/log.txt"

+ 

+ get_url="https://pagure.io/fedora-l10n/$target_repo_name.git"

+ push_url="ssh://git@pagure.io/fedora-l10n/$target_repo_name.git"

+ 

+ if [ -d "$output_dir/$target_repo_name" ]; then

+     pushd "$output_dir/$target_repo_name" > /dev/null || exit

+ 

+     push_url_current=$(git config --get remote.origin.pushurl)

+     get_url_current=$(git remote get-url origin)

+     popd > /dev/null || exit

+ 

+     if [ "$get_url" != "$get_url_current" ] ; then

+         echo "Git repository already exists"

+         echo "But the fetch remote url doesn't match $get_url"

+         echo "Current value is: $get_url_current"

          exit 1

      fi

  

-     # if I run this in a container that doesn't have push permissions, I need to be able to clone it

-     # while still configuring the right URL for push later

-     if ! git clone \

-                 --config "remote.origin.pushurl=ssh://git@pagure.io/fedora-l10n/$target_repo_name.git" \

-                 "https://pagure.io/fedora-l10n/$target_repo_name.git" "$output_dir/$target_repo_name" ; then

-         echo ""

-         echo "Error cloning the target repository."

-         echo "It might not exist. It should be here:"

-         echo "https://pagure.io/fedora-l10n/$target_repo_name"

-         rm -rf "$tmp_english_src"

+     if [ "$push_url" != "$push_url_current" ] ; then

+         echo "Git repository already exists"

+         echo "But the push remote url doesn't match $push_url"

+         echo "Current value is: $push_url_current"

          exit 1

      fi

+ 

+     pushd "$output_dir/$target_repo_name" > /dev/null || exit

+     git pull

+     popd > /dev/null || exit

+ 

+ # if I run this in a container that doesn't have push permissions, I need to be able to clone it

+ # while still configuring the right URL for push later

+ elif ! git clone \

+     --config "remote.origin.pushurl=$push_url" "$get_url" "$output_dir/$target_repo_name" ; then

+     echo ""

+     echo "Error cloning the target repository."

+     echo "It might not exist. It should be here:"

+     echo "https://pagure.io/fedora-l10n/$target_repo_name"

+     exit 1

  fi

-     

  

  module_dir="$output_dir/$target_repo_name/pot/$version"

  

  

  # Confert all .adoc files to .pot

- pushd "$tmp_english_src/$src_basedir/modules/$module" > /dev/null

+ pushd "$basedir/modules/$module" > /dev/null || exit

  

- for adoc_file in $(find . -name '*.adoc'); do

-     pot_file="$module_dir/$(dirname $adoc_file)/$(basename -s .adoc $adoc_file).pot"

+ while IFS= read -r -d '' adoc_file

+ do

+     pot_file=$module_dir/$(dirname "$adoc_file")/$(basename -s .adoc "$adoc_file").pot

  

      # generate new POT files

  

      if ! po4a-gettextize \

                  --format asciidoc \

-                 --option debug split_attributelist \

                  --master "$adoc_file" \

                  --master-charset "UTF-8" \

                  --localized-charset "UTF-8" \

                  --po "$pot_file" ; then

          echo ""

          echo "Error converting an adoc file: $adoc_file"

-         rm -rf "$tmp_english_src"

          exit 1

      fi

  
@@ -229,50 +251,50 @@ 

  

      if [ -d "$output_dir/$target_repo_name/po" ] ; then

  

-         for lang in $(ls "$output_dir/$target_repo_name/po"); do

- 

-             po_file="$output_dir/$target_repo_name/po/$lang/$version/$(dirname $adoc_file)/$(basename -s .adoc $adoc_file).po"

+         for lang in "$output_dir/$target_repo_name/po"/*

+         do

+             [[ -d "$output_dir/$target_repo_name/po/$lang" ]] || break  # handle the case of no *.wav files

+             po_file="$output_dir/$target_repo_name/po/$lang/$version/$(dirname "$adoc_file")/$(basename -s .adoc "$adoc_file").po"

+ 

+             # before the translation platform sends it, the language may not

+             # exists, most of the time because the branch exists in EN

+             # but not for the target language

+             if [ ! -e "$po_file" ] ; then

+                 echo "missing po file: $po_file"

+                 break

+             fi

  

              # po4a-updatepo would be angry otherwise

-             sed -i 's/Content-Type: text\/plain; charset=CHARSET/Content-Type: text\/plain; charset=UTF-8/g' $po_file

+             sed -i 's/Content-Type: text\/plain; charset=CHARSET/Content-Type: text\/plain; charset=UTF-8/g' "$po_file"

  

              if ! po4a-updatepo \

                          --format asciidoc \

-                         --option debug split_attributelist \

                          --master "$adoc_file" \

                          --master-charset "UTF-8" \

                          --po "$po_file" ; then

                  echo ""

                  echo "Error updating $lang PO file for: $adoc_file"

-                 #rm -rf "$tmp_english_src"

                  #exit 1

              fi

          done

      fi

- done

+ done <   <(find . -name '*.adoc' -print0)

  

- popd > /dev/null

+ popd > /dev/null || exit

  

  

  # Convert the antora.yml to antora.yml.pot

  # if this is the main module

- if [ $module = "ROOT" ]; then

-     pushd "$tmp_english_src/$src_basedir" > /dev/null

+ if [ "$module" = "ROOT" ]; then

+     pushd "$basedir" > /dev/null || exit

      if ! po4a-gettextize \

-                 --format asciidoc \

-                 --option debug split_attributelist \

+                 --format yaml \

                  --master "antora.yml" \

                  --master-charset "UTF-8" \

                  --po "$module_dir/antora.yml.pot" ; then

          echo ""

          echo "Error converting the antora.yml file"

-         rm -rf "$tmp_english_src"

          exit 1

      fi

-     popd > /dev/null

+     popd > /dev/null || exit

  fi

- 

- 

- rm -rf $tmp_english_src

- 

- echo -ne "Source: $src_repo basedir:$src_basedir branch:$src_branch is using this repository for pot/po files:https://pagure.io/fedora-l10n/$target_repo_name\n" >> "$output_dir/log.txt" 

\ No newline at end of file

This includes the work from https://pagure.io/fedora-docs/translations-scripts/pull-request/1 (which is the refactoring of all-src-to-pot.sh into all-src-to-pot.py) and adds a rework of src-to-pot.sh.

Done on src-to-pot.sh:

  • add local storage of source repositories (folder name sucks, but it woks)
  • add local storage of translated-source repositories
  • add generic handling of languages
  • bring support for file names with spaces
  • removes debug attribute of po4a
  • fix bug (antora.yml)
  • applies the suggestions of spellcheck: https://www.shellcheck.net

To be tested:

  • does the cache works correctly with the multiple docs using the same repository but multiple branches
  • confirm the module deduction is correct

To be discussed:

  • should we store source repositories in /output folder?
  • should we write a python script instead of bash?
  • related to the last point: what can of log do we would like?

Pull-Request has been closed by jibecfed

4 years ago
Metadata