Spotter-VM/extra/helpers/sahana-lang-compare.py

#!/usr/bin/env python3

import argparse
import csv
import io
import os
from ast import literal_eval
from pprint import pprint

# GIS CSV can have extra large fields
csv.field_size_limit(2147483647)

# String which don't exist verbatim in the code
extras = (
    'Enter a number between %(min)g and %(max)g',
    'Enter a number greater than or equal to %(min)g',
    'Enter a number less than or equal to %(max)g',
    'Enter an integer between %(min)g and %(max)g',
    'Enter an integer greater than or equal to %(min)g',
    'Enter an integer less than or equal to %(max)g',
)

def get_file_contents(filename):
    # Return decoded file contents
    with open(filename, 'rb') as f:
        file_contents = f.read()
    try:
        return file_contents.decode('utf-8')
    except UnicodeDecodeError:
        return file_contents.decode('latin-1')

def get_csv_contents(filename):
    # Return list of all fields from a CSV file
    f = io.StringIO(get_file_contents(filename))
    csv_contents = []
    for row in csv.reader(f):
        csv_contents.extend(row)
    return csv_contents

def main(args):
    basename = os.path.basename(args.langfile)
    # Load existing translations from the current (old) Sahana Eden instance
    with open(os.path.join(args.web2pydir, 'applications/eden/languages', basename)) as f:
        old_translations = literal_eval(f.read())
    # Load translations produced by sahana-lang-convert.py
    with open(args.langfile) as f:
        translations = literal_eval(f.read())

    missing_translations = {key:value for key,value in old_translations.items() if key not in translations}

    for root, dirs, files in os.walk(args.web2pydir):
        # Iterate over all web2py subdirectories except "languages" which already contain translations
        if 'languages' in dirs:
            dirs.remove('languages')
        for file in files:
            extension = os.path.splitext(file)[1].lower()
            filename = os.path.join(root, file)

            if extension in ('.py', '.html', '.js'):
                try:
                    file_contents = get_file_contents(filename)
                except UnicodeDecodeError:
                    continue
                for key,value in missing_translations.copy().items():
                    # Naively search for quoted strings in .py .html and .js files
                    if f"'{key}'" in file_contents or f'"{key}"' in file_contents:
                        translations[key] = value
                        del missing_translations[key]

            elif extension == '.csv':
                try:
                    csv_contents = get_csv_contents(filename)
                except UnicodeDecodeError:
                    continue
                for key,value in missing_translations.copy().items():
                    # Naively search for full strings in csv fields
                    if key in csv_contents:
                        translations[key] = value
                        del missing_translations[key]

    for key in extras:
        # Add the extra translations which are never matched verbatim
        if key not in translations:
            translations[key] = old_translations[key]

    with open(basename, 'w') as langfile:
        # Write the updated translation file
        print('# -*- coding: utf-8 -*-', file=langfile)
        pprint(translations, langfile, 0, 8192)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Spotter Cluster Sahana Eden translation comparator')
    parser.add_argument('langfile', help='New translation file (with possibly missing strings).')
    parser.add_argument('web2pydir', help='Path to Web2py root directory.')
    main(parser.parse_args())
Update Czech Sahana translation + helper scripts, closes #411 2020-04-18 12:32:40 +02:00			`#!/usr/bin/env python3`

			`import argparse`
			`import csv`
			`import io`
			`import os`
			`from ast import literal_eval`
			`from pprint import pprint`

			`# GIS CSV can have extra large fields`
			`csv.field_size_limit(2147483647)`

			`# String which don't exist verbatim in the code`
			`extras = (`
			`'Enter a number between %(min)g and %(max)g',`
			`'Enter a number greater than or equal to %(min)g',`
			`'Enter a number less than or equal to %(max)g',`
			`'Enter an integer between %(min)g and %(max)g',`
			`'Enter an integer greater than or equal to %(min)g',`
			`'Enter an integer less than or equal to %(max)g',`
			`)`

			`def get_file_contents(filename):`
			`# Return decoded file contents`
			`with open(filename, 'rb') as f:`
			`file_contents = f.read()`
			`try:`
			`return file_contents.decode('utf-8')`
			`except UnicodeDecodeError:`
			`return file_contents.decode('latin-1')`

			`def get_csv_contents(filename):`
			`# Return list of all fields from a CSV file`
			`f = io.StringIO(get_file_contents(filename))`
			`csv_contents = []`
			`for row in csv.reader(f):`
			`csv_contents.extend(row)`
			`return csv_contents`

			`def main(args):`
			`basename = os.path.basename(args.langfile)`
			`# Load existing translations from the current (old) Sahana Eden instance`
			`with open(os.path.join(args.web2pydir, 'applications/eden/languages', basename)) as f:`
			`old_translations = literal_eval(f.read())`
			`# Load translations produced by sahana-lang-convert.py`
			`with open(args.langfile) as f:`
			`translations = literal_eval(f.read())`

			`missing_translations = {key:value for key,value in old_translations.items() if key not in translations}`

			`for root, dirs, files in os.walk(args.web2pydir):`
			`# Iterate over all web2py subdirectories except "languages" which already contain translations`
			`if 'languages' in dirs:`
			`dirs.remove('languages')`
			`for file in files:`
			`extension = os.path.splitext(file)[1].lower()`
			`filename = os.path.join(root, file)`

			`if extension in ('.py', '.html', '.js'):`
			`try:`
			`file_contents = get_file_contents(filename)`
			`except UnicodeDecodeError:`
			`continue`
			`for key,value in missing_translations.copy().items():`
			`# Naively search for quoted strings in .py .html and .js files`
			`if f"'{key}'" in file_contents or f'"{key}"' in file_contents:`
			`translations[key] = value`
			`del missing_translations[key]`

			`elif extension == '.csv':`
			`try:`
			`csv_contents = get_csv_contents(filename)`
			`except UnicodeDecodeError:`
			`continue`
			`for key,value in missing_translations.copy().items():`
			`# Naively search for full strings in csv fields`
			`if key in csv_contents:`
			`translations[key] = value`
			`del missing_translations[key]`

			`for key in extras:`
			`# Add the extra translations which are never matched verbatim`
			`if key not in translations:`
			`translations[key] = old_translations[key]`

			`with open(basename, 'w') as langfile:`
			`# Write the updated translation file`
			`print('# -- coding: utf-8 --', file=langfile)`
			`pprint(translations, langfile, 0, 8192)`

			`if __name__ == '__main__':`
			`parser = argparse.ArgumentParser(description='Spotter Cluster Sahana Eden translation comparator')`
			`parser.add_argument('langfile', help='New translation file (with possibly missing strings).')`
			`parser.add_argument('web2pydir', help='Path to Web2py root directory.')`
			`main(parser.parse_args())`