PR#839: [WIP] feat: Add language filter script - fedora-websites

fedora-websites

#839 [WIP] feat: Add language filter script

Closed 5 years ago by ryanlerch. Opened 6 years ago by sijis.

sijis/fedora-websites feature/filter_languages into master

feat: Add language filter script

Sijis Aviles • 6 years ago

b38c4aa

tools/translated-languages.py

file added

+84

		`@@ -0,0 +1,84 @@`
		`+ """https://tagoh.bitbucket.io/zanata-js/module-zanata_project-Project.html#stats"""`
		`+ import xml.etree.ElementTree as ET`
		`+ import requests`
		`+`
		`+`
		`+ def call_api(url):`
		`+ headers = {`
		`+ 'X-Auth-User': 'your_username',`
		`+ 'X-Auth-Token': 'your_token',`
		`+ 'Content-Type': 'application/json',`
		`+ }`
		`+ querystring = {`
		`+ 'word': True,`
		`+ 'detail': False,`
		`+ }`
		`+ r = requests.get(url, headers=headers, params=querystring)`
		`+ return r.text`
		`+`
		`+`
		`+ def get_translations(domain='getfedora.org'):`
		`+ url = 'https://fedora.zanata.org/rest/stats/proj/fedora-web/iter/{0}'.format(domain)`
		`+ results = call_api(url)`
		`+ # print(results)`
		`+ return results`
		`+`
		`+ def get_domains():`
		`+ url = 'https://fedora.zanata.org/rest/projects/p/fedora-web'`
		`+ raw_xml = call_api(url)`
		`+ results = read_xml(raw_xml)`
		`+ return results`
		`+`
		`+`
		`+ def read_xml(results=None):`
		`+ if results:`
		`+ root = ET.fromstring(results)`
		`+ else:`
		`+ tree = ET.parse('results.xml')`
		`+ root = tree.getroot()`
		`+ # print('Root: %s' % root)`
		`+ return root`
		`+`
		`+`
		`+ def parse_xml(root):`
		`+ results = []`
		`+ stats = root[1]`
		`+ for stat in stats:`
		`+ record = stat.attrib`
		`+ if record['unit'] == 'WORD':`
		`+ translated, total = float(record['translated']), float(record['total'])`
		`+ percentage = (translated / total) * 100`
		`+ record['percentage'] = round(percentage)`
		`+ results.append(record)`
		`+ # print('Parsed xml: %s' % results)`
		`+ return results`
		`+`
		`+`
		`+ def filter_translations(translations, min_percent=25.0):`
		`+ results = []`
		`+ for result in translations:`
		`+ if result['percentage'] >= min_percent:`
		`+ results.append(result)`
		`+ return results`
		`+`
		`+`
		`+ def parse_domains(root):`
		`+ results = []`
		`+ domains = root[5]`
		`+ for domain in domains:`
		`+ record = domain.attrib['id']`
		`+ results.append(record)`
		`+ return results`
		`+`
		`+`
		`+ if __name__ == '__main__':`
		`+ domains_xml = get_domains()`
		`+ domains = parse_domains(domains_xml)`
		`+`
		`+ print('Domains: %s' % domains)`
		`+ for domain in domains:`
		`+ results = get_translations(domain=domain)`
		`+ xml_root = read_xml(results=results)`
		`+ results = parse_xml(xml_root)`
		`+ filtered = filter_translations(results)`
		`+ print(filtered)`