Files
homebox/.github/scripts/update_language_names.py
2025-12-28 21:43:52 +00:00

350 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Script to automatically update language names in the English translation file.
Queries Weblate for translation completion and language names.
Only adds languages with >=80% completion to en.json.
"""
import json
import logging
import sys
from pathlib import Path
from typing import Dict, List, Optional
import requests
from babel import Locale, UnknownLocaleError
LOCALES_DIR = Path('frontend/locales')
EN_JSON_PATH = LOCALES_DIR / 'en.json'
WEBLATE_API_URL = 'https://translate.sysadminsmedia.com/api'
WEBLATE_PROJECT = 'homebox'
WEBLATE_COMPONENT = 'frontend'
COMPLETION_THRESHOLD = 80.0 # Minimum completion percentage to include language
TIMEOUT = 10 # seconds
def setup_logging():
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(levelname)s: %(message)s'
)
def get_locale_files() -> List[str]:
"""Get all locale codes from JSON files in the locales directory."""
if not LOCALES_DIR.exists():
logging.error("Locales directory not found: %s", LOCALES_DIR)
return []
locale_codes = []
for file in sorted(LOCALES_DIR.glob('*.json')):
# Extract locale code from filename (e.g., "en.json" -> "en")
locale_code = file.stem
# Validate locale code format - should not contain dots
if '.' not in locale_code:
locale_codes.append(locale_code)
else:
logging.warning("Skipping invalid locale code: %s", locale_code)
logging.info("Found %d locale files", len(locale_codes))
return sorted(locale_codes)
def fetch_weblate_translations() -> Optional[Dict[str, Dict]]:
"""
Fetch translation statistics from Weblate API.
Returns:
Dict mapping locale code to translation data (percent, name, native_name)
or None if API is unavailable
"""
url = f"{WEBLATE_API_URL}/components/{WEBLATE_PROJECT}/{WEBLATE_COMPONENT}/translations/"
try:
# Weblate API may require pagination
translations = {}
page_url = url
while page_url:
logging.info("Fetching translations from Weblate: %s", page_url)
resp = requests.get(page_url, timeout=TIMEOUT)
if resp.status_code != 200:
logging.warning("Weblate API returned status %d", resp.status_code)
return None
data = resp.json()
for trans in data.get('results', []):
# Weblate uses underscores, we use hyphens
locale_code = trans.get('language_code', '').replace('_', '-')
percent = trans.get('translated_percent', 0.0)
lang_info = trans.get('language', {})
english_name = lang_info.get('name', '')
native_name = lang_info.get('native', '')
translations[locale_code] = {
'percent': percent,
'english_name': english_name,
'native_name': native_name
}
# Check for next page
page_url = data.get('next')
logging.info("Fetched %d translations from Weblate", len(translations))
return translations
except requests.exceptions.RequestException as e:
logging.warning("Failed to fetch from Weblate API: %s", e)
return None
except Exception as e:
logging.error("Unexpected error fetching Weblate data: %s", e)
return None
def get_language_name_from_babel(locale_code: str) -> Optional[str]:
"""
Get the language name using Babel in format "English (Native)".
Special handling for variants that need disambiguation (Portuguese, Chinese).
Args:
locale_code: Language/locale code (e.g., 'en', 'pt-BR', 'zh-CN')
Returns:
Language name in format "English (Native)" or None if cannot parse
"""
try:
# Special handling for ar-AA (non-standard code, use standard 'ar')
if locale_code == 'ar-AA':
locale = Locale.parse('ar')
else:
# Parse locale code using Babel
locale = Locale.parse(locale_code.replace('-', '_'))
# Get English display name
english_name = locale.get_display_name('en')
# Get native display name
native_name = locale.get_display_name(locale)
if not english_name:
return None
# Special handling for Portuguese variants (distinguish Brazil vs Portugal)
if locale_code == 'pt-BR':
native_base = native_name.split('(')[0].strip() if '(' in native_name else native_name
return f"Portuguese — Brazil ({native_base})"
elif locale_code == 'pt-PT':
native_base = native_name.split('(')[0].strip() if '(' in native_name else native_name
return f"Portuguese — Portugal ({native_base})"
# Special handling for Chinese variants (distinguish Simplified/Traditional and regions)
if locale_code == 'zh-CN':
native_base = native_name.split('(')[0].strip() if '(' in native_name else native_name
return f"Chinese — Simplified ({native_base})"
elif locale_code == 'zh-TW':
native_base = native_name.split('(')[0].strip() if '(' in native_name else native_name
return f"Chinese — Traditional ({native_base})"
elif locale_code == 'zh-HK':
native_base = native_name.split('(')[0].strip() if '(' in native_name else native_name
return f"Chinese — Hong Kong ({native_base})"
elif locale_code == 'zh-MO':
native_base = native_name.split('(')[0].strip() if '(' in native_name else native_name
return f"Chinese — Macau ({native_base})"
# Format: "English (Native)" if native name differs and is available
if native_name and native_name != english_name:
# Clean up nested parentheses for complex locales
if '(' in english_name and '(' in native_name:
# For cases like "Japanese (Japan) (日本語 (日本))"
# Simplify to "Japanese (日本語)"
english_base = english_name.split('(')[0].strip()
native_base = native_name.split('(')[0].strip()
return f"{english_base} ({native_base})"
else:
return f"{english_name} ({native_name})"
else:
return english_name
except (UnknownLocaleError, ValueError) as e:
logging.debug("Could not parse locale '%s' with Babel: %s", locale_code, e)
return None
def get_language_name(locale_code: str, weblate_data: Optional[Dict] = None) -> Optional[str]:
"""
Get the display name for a locale code.
Priority: Weblate API > Babel > None
Args:
locale_code: Language/locale code (e.g., 'en', 'pt-BR', 'zh-CN')
weblate_data: Translation data from Weblate (if available)
Returns:
Language name in format "English (Native)" or None if invalid
"""
# Validate locale code format
if '.' in locale_code or locale_code.startswith('languages.'):
logging.error("Invalid locale code format: %s", locale_code)
return None
# Try Weblate first
if weblate_data and locale_code in weblate_data:
english_name = weblate_data[locale_code].get('english_name', '')
native_name = weblate_data[locale_code].get('native_name', '')
if english_name:
# Format: "English (Native)" if both names available and different
if native_name and native_name != english_name:
return f"{english_name} ({native_name})"
else:
return english_name
# Fallback to Babel
babel_name = get_language_name_from_babel(locale_code)
if babel_name:
return babel_name
# If all else fails, return None (don't guess)
logging.warning("Could not determine language name for: %s", locale_code)
return None
def load_en_json() -> dict:
"""Load the English translation JSON file."""
if not EN_JSON_PATH.exists():
logging.error("English translation file not found: %s", EN_JSON_PATH)
return {}
try:
with EN_JSON_PATH.open('r', encoding='utf-8') as f:
return json.load(f)
except (IOError, json.JSONDecodeError) as e:
logging.error("Failed to load %s: %s", EN_JSON_PATH, e)
return {}
def save_en_json(data: dict):
"""Save the English translation JSON file."""
try:
with EN_JSON_PATH.open('w', encoding='utf-8') as f:
# Use 4-space indentation to match existing file format
json.dump(data, f, ensure_ascii=False, indent=4)
# Add newline at end of file
f.write('\n')
logging.info("Saved updated en.json")
except IOError as e:
logging.error("Failed to save %s: %s", EN_JSON_PATH, e)
sys.exit(1)
def update_language_names(en_data: dict, locale_codes: List[str], weblate_data: Optional[Dict] = None) -> bool:
"""
Update the languages section in en.json.
- Add new languages with >=80% completion (from Weblate) or that exist as locale files
- Never remove existing entries (even if completion drops below 80%)
Args:
en_data: The parsed en.json data
locale_codes: List of all locale codes from files
weblate_data: Translation data from Weblate (if available)
Returns:
True if changes were made, False otherwise
"""
# Ensure languages section exists
if 'languages' not in en_data:
en_data['languages'] = {}
logging.info("Created 'languages' section in en.json")
languages = en_data['languages']
original_languages = languages.copy()
# Process each locale file
added_count = 0
skipped_count = 0
for locale_code in locale_codes:
# Skip if already in languages (never remove existing entries)
if locale_code in languages:
continue
# Check Weblate completion threshold if data available
if weblate_data and locale_code in weblate_data:
percent = weblate_data[locale_code].get('percent', 0.0)
if percent < COMPLETION_THRESHOLD:
logging.info("Skipping %s: %.1f%% completion (threshold: %.1f%%)",
locale_code, percent, COMPLETION_THRESHOLD)
skipped_count += 1
continue
else:
logging.info("Including %s: %.1f%% completion", locale_code, percent)
else:
# If Weblate data not available, include locale file but log warning
logging.info("Including %s: Weblate data not available, locale file exists", locale_code)
# Get language name
language_name = get_language_name(locale_code, weblate_data)
if language_name:
languages[locale_code] = language_name
logging.info("Added language: %s = %s", locale_code, language_name)
added_count += 1
else:
logging.warning("Skipping %s: could not determine language name", locale_code)
skipped_count += 1
# Sort languages alphabetically by key
en_data['languages'] = dict(sorted(languages.items()))
# Check if anything changed
changed = (original_languages != en_data['languages'])
if changed:
logging.info("Updated %d language names, skipped %d", added_count, skipped_count)
else:
logging.info("All languages already present, no changes needed")
return changed
def main():
setup_logging()
logging.info("🔄 Starting language names update")
# Get all locale files
locale_codes = get_locale_files()
if not locale_codes:
logging.error("No locale files found")
sys.exit(1)
# Load English translation file
en_data = load_en_json()
if not en_data:
logging.error("Failed to load English translation file")
sys.exit(1)
# Fetch Weblate translation statistics
weblate_data = fetch_weblate_translations()
if weblate_data:
logging.info("Successfully fetched Weblate data for %d languages", len(weblate_data))
else:
logging.warning("Weblate data not available, proceeding with locale files only")
# Update language names
changed = update_language_names(en_data, locale_codes, weblate_data)
if changed:
save_en_json(en_data)
logging.info("✅ Language names updated successfully")
else:
logging.info("✅ No updates needed, en.json is already up-to-date")
sys.exit(0)
if __name__ == "__main__":
main()