fix: Address feedback - add Weblate API, native names, validation, and completion threshold

Co-authored-by: katosdev <7927609+katosdev@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2025-12-28 21:27:06 +00:00
parent 153ecd1094
commit f3e817e139
4 changed files with 220 additions and 72 deletions

Binary file not shown.

View File

@@ -1,24 +1,26 @@
#!/usr/bin/env python3
"""
Script to automatically update language names in the English translation file.
Scans for locale files and ensures all languages are present in en.json.
Queries Weblate for translation completion and language names.
Only adds languages with >=80% completion to en.json.
"""
import json
import logging
import os
import sys
from pathlib import Path
from typing import Dict, List
from typing import Dict, List, Optional, Tuple
import requests
from babel import Locale, UnknownLocaleError
LOCALES_DIR = Path('frontend/locales')
EN_JSON_PATH = LOCALES_DIR / 'en.json'
# Mapping for special/custom locale codes that don't follow standard BCP 47
CUSTOM_LOCALE_MAPPINGS = {
'ar-AA': ('ar', 'Arabic'), # Generic Arabic
'en': ('en', 'English'),
}
WEBLATE_API_URL = 'https://translate.sysadminsmedia.com/api'
WEBLATE_PROJECT = 'homebox'
WEBLATE_COMPONENT = 'frontend'
COMPLETION_THRESHOLD = 80.0 # Minimum completion percentage to include language
TIMEOUT = 10 # seconds
def setup_logging():
@@ -38,44 +40,153 @@ def get_locale_files() -> List[str]:
for file in sorted(LOCALES_DIR.glob('*.json')):
# Extract locale code from filename (e.g., "en.json" -> "en")
locale_code = file.stem
locale_codes.append(locale_code)
# Validate locale code format - should not contain dots
if '.' not in locale_code:
locale_codes.append(locale_code)
else:
logging.warning("Skipping invalid locale code: %s", locale_code)
logging.info("Found %d locale files", len(locale_codes))
return sorted(locale_codes)
def get_language_name(locale_code: str) -> str:
def fetch_weblate_translations() -> Optional[Dict[str, Dict]]:
"""
Get the English display name for a locale code.
Fetch translation statistics from Weblate API.
Returns:
Dict mapping locale code to translation data (percent, name, native_name)
or None if API is unavailable
"""
url = f"{WEBLATE_API_URL}/components/{WEBLATE_PROJECT}/{WEBLATE_COMPONENT}/translations/"
try:
# Weblate API may require pagination
translations = {}
page_url = url
while page_url:
logging.info("Fetching translations from Weblate: %s", page_url)
resp = requests.get(page_url, timeout=TIMEOUT)
if resp.status_code != 200:
logging.warning("Weblate API returned status %d", resp.status_code)
return None
data = resp.json()
for trans in data.get('results', []):
# Weblate uses underscores, we use hyphens
locale_code = trans.get('language_code', '').replace('_', '-')
percent = trans.get('translated_percent', 0.0)
lang_info = trans.get('language', {})
english_name = lang_info.get('name', '')
native_name = lang_info.get('native', '')
translations[locale_code] = {
'percent': percent,
'english_name': english_name,
'native_name': native_name
}
# Check for next page
page_url = data.get('next')
logging.info("Fetched %d translations from Weblate", len(translations))
return translations
except requests.exceptions.RequestException as e:
logging.warning("Failed to fetch from Weblate API: %s", e)
return None
except Exception as e:
logging.error("Unexpected error fetching Weblate data: %s", e)
return None
def get_language_name_from_babel(locale_code: str) -> Optional[str]:
"""
Get the language name using Babel in format "English (Native)".
Args:
locale_code: Language/locale code (e.g., 'en', 'pt-BR', 'zh-CN')
Returns:
English display name for the language
Language name in format "English (Native)" or None if cannot parse
"""
# Check custom mappings first
if locale_code in CUSTOM_LOCALE_MAPPINGS:
_, name = CUSTOM_LOCALE_MAPPINGS[locale_code]
return name
try:
# Parse locale code using Babel
locale = Locale.parse(locale_code.replace('-', '_'))
# Special handling for ar-AA (non-standard code, use standard 'ar')
if locale_code == 'ar-AA':
locale = Locale.parse('ar')
else:
# Parse locale code using Babel
locale = Locale.parse(locale_code.replace('-', '_'))
# Get English display name
display_name = locale.get_display_name('en')
english_name = locale.get_display_name('en')
if not display_name:
# Fallback to language name if full display name not available
display_name = locale.english_name
# Get native display name
native_name = locale.get_display_name(locale)
return display_name
if not english_name:
return None
# Format: "English (Native)" if native name differs and is available
if native_name and native_name != english_name:
# Clean up nested parentheses for complex locales
if '(' in english_name and '(' in native_name:
# For cases like "Japanese (Japan) (日本語 (日本))"
# Simplify to "Japanese (日本語)"
english_base = english_name.split('(')[0].strip()
native_base = native_name.split('(')[0].strip()
return f"{english_base} ({native_base})"
else:
return f"{english_name} ({native_name})"
else:
return english_name
except (UnknownLocaleError, ValueError) as e:
logging.warning("Could not parse locale '%s': %s", locale_code, e)
# Fallback: capitalize the locale code
return locale_code.replace('-', ' ').replace('_', ' ').title()
logging.debug("Could not parse locale '%s' with Babel: %s", locale_code, e)
return None
def get_language_name(locale_code: str, weblate_data: Optional[Dict] = None) -> Optional[str]:
"""
Get the display name for a locale code.
Priority: Weblate API > Babel > None
Args:
locale_code: Language/locale code (e.g., 'en', 'pt-BR', 'zh-CN')
weblate_data: Translation data from Weblate (if available)
Returns:
Language name in format "English (Native)" or None if invalid
"""
# Validate locale code format
if '.' in locale_code or locale_code.startswith('languages.'):
logging.error("Invalid locale code format: %s", locale_code)
return None
# Try Weblate first
if weblate_data and locale_code in weblate_data:
english_name = weblate_data[locale_code].get('english_name', '')
native_name = weblate_data[locale_code].get('native_name', '')
if english_name:
# Format: "English (Native)" if both names available and different
if native_name and native_name != english_name:
return f"{english_name} ({native_name})"
else:
return english_name
# Fallback to Babel
babel_name = get_language_name_from_babel(locale_code)
if babel_name:
return babel_name
# If all else fails, return None (don't guess)
logging.warning("Could not determine language name for: %s", locale_code)
return None
def load_en_json() -> dict:
@@ -106,13 +217,16 @@ def save_en_json(data: dict):
sys.exit(1)
def update_language_names(en_data: dict, locale_codes: List[str]) -> bool:
def update_language_names(en_data: dict, locale_codes: List[str], weblate_data: Optional[Dict] = None) -> bool:
"""
Update the languages section in en.json with all locale codes.
Update the languages section in en.json.
- Add new languages with >=80% completion (from Weblate) or that exist as locale files
- Never remove existing entries (even if completion drops below 80%)
Args:
en_data: The parsed en.json data
locale_codes: List of all locale codes from files
weblate_data: Translation data from Weblate (if available)
Returns:
True if changes were made, False otherwise
@@ -125,14 +239,40 @@ def update_language_names(en_data: dict, locale_codes: List[str]) -> bool:
languages = en_data['languages']
original_languages = languages.copy()
# Add any missing languages
# Process each locale file
added_count = 0
skipped_count = 0
for locale_code in locale_codes:
if locale_code not in languages:
language_name = get_language_name(locale_code)
# Skip if already in languages (never remove existing entries)
if locale_code in languages:
continue
# Check Weblate completion threshold if data available
if weblate_data and locale_code in weblate_data:
percent = weblate_data[locale_code].get('percent', 0.0)
if percent < COMPLETION_THRESHOLD:
logging.info("Skipping %s: %.1f%% completion (threshold: %.1f%%)",
locale_code, percent, COMPLETION_THRESHOLD)
skipped_count += 1
continue
else:
logging.info("Including %s: %.1f%% completion", locale_code, percent)
else:
# If Weblate data not available, include locale file but log warning
logging.info("Including %s: Weblate data not available, locale file exists", locale_code)
# Get language name
language_name = get_language_name(locale_code, weblate_data)
if language_name:
languages[locale_code] = language_name
logging.info("Added language: %s = %s", locale_code, language_name)
added_count += 1
else:
logging.warning("Skipping %s: could not determine language name", locale_code)
skipped_count += 1
# Sort languages alphabetically by key
en_data['languages'] = dict(sorted(languages.items()))
@@ -141,7 +281,7 @@ def update_language_names(en_data: dict, locale_codes: List[str]) -> bool:
changed = (original_languages != en_data['languages'])
if changed:
logging.info("Updated %d language names", added_count)
logging.info("Updated %d language names, skipped %d", added_count, skipped_count)
else:
logging.info("All languages already present, no changes needed")
@@ -164,8 +304,15 @@ def main():
logging.error("Failed to load English translation file")
sys.exit(1)
# Fetch Weblate translation statistics
weblate_data = fetch_weblate_translations()
if weblate_data:
logging.info("Successfully fetched Weblate data for %d languages", len(weblate_data))
else:
logging.warning("Weblate data not available, proceeding with locale files only")
# Update language names
changed = update_language_names(en_data, locale_codes)
changed = update_language_names(en_data, locale_codes, weblate_data)
if changed:
save_en_json(en_data)