1
0
Fork 0
forked from len0rd/rockbox
foxbox/tools/updatelang
Solomon Peachy a88ef80560 lang: Complain if there are multiple target matches for a given string
The tooling will always use the *final* match, which may or may not be
what is desired.  Treat this as a bug, and complain appropriately.

However, there is a special case.  The RTC set screen uses strings that
include the device button names. There should be an entry for the
specific device, but if not, we wanted to fall back to the string
specified by the 'rtc' feature flag as opposed to falling back to the
default, empty string.

To still support this, add a special "FALLBACK" value; If we end up
using this for a device, the tooling will treat this as a bug, and
complain accordingly.

This should fix FS#13615 and FS13616, and may introduce build failures
on targets that are missing appropriate entries.  We'll see.

Change-Id: Ie78bb247f968e19d450a0fbf6e1177b6d01126a1
2025-05-04 08:53:22 -04:00

601 lines
17 KiB
Perl
Executable file

#!/usr/bin/perl -s -w
# __________ __ ___.
# Open \______ \ ____ ____ | | _\_ |__ _______ ___
# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
# \/ \/ \/ \/ \/
#
# Copyright (C) 2020-2024 Solomon Peachy
#
use utf8;
use File::Basename;
use Unicode::Normalize;
use open qw( :std :encoding(UTF-8) );
binmode(STDOUT, ":encoding(UTF-8)");
sub trim {
my ($string) = @_;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
$string =~ tr/\t//d;
return $string;
}
sub parselangfile {
my ($filename) = @_;
my %phrases;
my @order;
my %empty = ( #'phrase' => {},
#'source' => {},
#'dest' => {},
#'voice' => {},
'notes' => "",
'new' => 0
);
my %thisphrase = %empty;
open(FH, "<$filename") || die ("Can't open $filename");
my @lines = <FH>;
close(FH);
my $pos = 'lang';
my $id = '';
my @comments;
foreach my $line (@lines) {
$line = trim($line);
if($line =~ /^ *###/) {
# Filter out warnings from prior runs
next;
} elsif($line =~ /^ *#/) {
push(@comments, "$line\n") if ($pos eq 'lang');
# comments are ignored, but retained!
next;
} elsif ($pos eq 'phrase' && $line =~ /^([^:]+): ?(.*)$/) {
$thisphrase{$pos}->{$1} = $2;
if ($1 eq 'id') {
push(@order, $2);
$id = $2;
}
} elsif ($pos ne 'phrase' && $line =~ /^([^:]+): ?\"?([^\"]*)\"?$/) {
my @targets = split(',', $1);
my $w;
if ($id ne 'VOICE_PAUSE') {
$w = trim($2);
} else {
$w = $2;
}
foreach (@targets) {
my $l = trim($_);
# Convert some obsolete keys
if ($l eq "swcodec") {
$l = "*";
} elsif ($l eq "lcd_bitmap") {
$l = "*";
} elsif ($l eq "recording_swcodec") {
$l = "recording";
# } elsif ($id =~ /USB_MODE/ && $l =~ /ibassodx/) {
# $l = "*";
}
$w = NFD($w); # Unicode decompose
$thisphrase{$pos}->{$l} = $w;
}
}
if ($line eq '</voice>' ||
$line eq '</dest>' ||
$line eq '</source>' ||
$line eq '<phrase>') {
$pos = 'phrase';
} elsif ($line eq '</phrase>') {
my %copy = %thisphrase;
$phrases{$id} = \%copy;
%thisphrase = %empty;
$pos = 'lang';
$id = '';
} elsif ($line eq '<source>') {
$pos = 'source';
} elsif ($line eq '<dest>') {
$pos = 'dest';
} elsif ($line eq '<voice>') {
$pos = 'voice';
}
}
$phrases{'HEADER'} = \@comments;
$phrases{'ORDER'} = \@order;
return %phrases;
}
sub combinetgts {
my (%tgtmap) = (@_);
my %strmap;
my %combined;
# Reverse-map things
foreach my $tgt (sort(keys(%tgtmap))) {
next if ($tgt eq '*'); # Do not combine anything with fallback
if (defined($strmap{$tgtmap{$tgt}})) {
$strmap{$tgtmap{$tgt}} .= ",$tgt";
} else {
$strmap{$tgtmap{$tgt}} = "$tgt";
}
}
# Copy over default/fallback as it was skipped
$combined{'*'} = $tgtmap{'*'};
foreach my $str (keys(%strmap)) {
$combined{$strmap{$str}} = $str;
}
return %combined;
}
sub reduceformat($) {
my ($in) = @_;
my $out = "";
my $infmt = 0;
for (my $i = 0; $i < length($in) ; $i++) {
my $c = substr($in, $i, 1);
if (!$infmt && ($c eq '%')) {
# First char in a format string!
$infmt = 1;
next;
}
next if (!$infmt);
if ($c ne '%') {
# Ignore literal %, otherwise dump specifier over
$out .= $c;
}
# Look for a terminating field:
my $count = $c =~ tr/sSdDuUxXzZ%//;
if ($count) {
$infmt = 0;
next;
}
}
return $out;
}
##################
if($#ARGV != 2) {
print "Usage: [ENGLISHORDER=1] updatelang <english.lang> <otherlang> <outfile|->\n";
exit;
}
# Parse master file
my %english = parselangfile($ARGV[0]);
my @englishorder = @{$english{'ORDER'}};
# Parse secondary file
my %lang = parselangfile($ARGV[1]);
my @langorder = @{$lang{'ORDER'}};
my @langheader = @{$lang{'HEADER'}};
# Clean up
delete $english{'ORDER'};
delete $english{'HEADER'};
delete $lang{'ORDER'};
delete $lang{'HEADER'};
# Extract language names
my @tmp = split(/\./, basename($ARGV[0]));
my $f1 = $tmp[0];
@tmp = split(/\./, basename($ARGV[1]));
my $f2 = $tmp[0];
undef @tmp;
# Read in ignore list
my $igname = dirname($0) . "/langignorelist.txt";
open (FH, "<$igname") || die ("Can't open $igname!");
my @ignorelist = <FH>;
close (FH);
sub not_ignorelist {
my ($key) = @_;
foreach (@ignorelist) {
chomp;
if ($_ eq $key) {
return 0;
}
}
return 1;
}
undef $igname;
# Do we care about notes?
my $printnotes = 1;
my $ignoredups = 0;
if ($f1 eq $f2) {
# Ignore all notes for master language
$printnotes = 0;
}
if (index($f2, $f1) > -1) {
# Ignore duplicates for sub-languages
$ignoredups = 1;
}
# work out the missing phrases
my %missing;
my @missingorder;
foreach (@englishorder) {
$missing{$_} = 1;
}
foreach (@langorder) {
if (!defined($english{$_})) {
delete($lang{$_});
# print "#!! '$_' no longer needed\n";
next;
}
delete $missing{$_};
}
foreach (@englishorder) {
push(@missingorder, $_) if defined($missing{$_});
}
# And add them to the phrase list.
foreach (@missingorder) {
# print "#!! '$_' missing\n";
push(@langorder, $_);
if ($_ eq 'VOICE_LANG_NAME') {
$lang{$_} = $english{$_};
$lang{$_}{'voice'}{'*'} = "";
$lang{$_}{'notes'} .= "### The phrase '$_' is missing entirely, please fill out\n";
} else {
$lang{$_} = $english{$_};
$lang{$_}{'notes'} .= "### The phrase '$_' is missing entirely, copying from english!\n";
}
$lang{$_}{'new'} = 1;
}
undef @missingorder;
undef %missing;
# Sanity-check a few things
foreach my $id (@langorder) {
if (!defined($english{$id})) {
next;
}
my %ep = %{$english{$id}{'phrase'}};
my %lp = %{$lang{$id}{'phrase'}};
if ($lp{'desc'} ne $ep{'desc'} || $ep{'desc'} eq 'deprecated') {
if ($ep{'desc'} eq 'deprecated') {
# Nuke all deprecated targets; just copy from English
# print "#!! '$id' deprecated, deleting\n";
$lang{$id} = $english{$id};
} else {
$lang{$id}{'notes'} .= "### The 'desc' field for '$id' differs from English!\n";
$lang{$id}{'notes'} .= "#### the previously used desc is commented below:\n";
$lang{$id}{'notes'} .= "##### desc: $lp{desc}\n";
$lang{$id}{'phrase'}{'desc'} = $english{$id}{'phrase'}{'desc'};
# print "#!! '$id' changed description\n";
}
}
if (!defined($ep{'user'}) || length($ep{'user'}) == 0) {
$lp{'user'} = 'core';
}
if (!defined($lp{'user'}) || $lp{'user'} ne $ep{'user'}) {
$lang{$id}{'notes'} .= "### The 'user' field for '$id' differs from English!\n";
$lang{$id}{'notes'} .= "#### the previously used desc is commented below:\n";
$lang{$id}{'notes'} .= "##### desc: $lp{user}\n";
if (!defined($lp{'user'}) || length($lp{'user'}) == 0) {
$lp{'user'} = $ep{'user'};
}
$lang{$id}{'phrase'}{'user'} = $english{$id}{'phrase'}{'user'};
# print "#!! '$id' changed user\n";
}
}
# Check sources
foreach my $id (@langorder) {
if (!defined($english{$id})) {
next;
}
my %ep = %{$english{$id}{'source'}};
my %lp;
if (defined($lang{$id}{'source'})) {
%lp = %{$lang{$id}{'source'}};
} else {
%lp = ();
}
foreach my $tgt (keys(%lp)) {
if (!defined($ep{$tgt})) {
# Delete any targets that have been nuked in master
delete($lang{$id}{'source'}{$tgt});
}
}
foreach my $tgt (keys(%ep)) {
if (!defined($lp{$tgt})) {
# If it doesn't exist in the language, copy it from English
if ($ep{$tgt} ne 'none' && $ep{$tgt} ne '' ) {
$lang{$id}{'notes'} .= "### The <source> section for '$id:$tgt' is missing! Copying from english!\n";
# print "#!! '$id:$tgt' source missing\n";
}
$lang{$id}{'source'}{$tgt} = $english{$id}{'source'}{$tgt};
} elsif ($lp{$tgt} ne $ep{$tgt}) {
# If the source string differs, complain, and copy from English
$lang{$id}{'notes'} .= "### The <source> section for '$id:$tgt' differs from English!\n";
$lang{$id}{'notes'} .= "#### the previously used one is commented below:\n";
$lang{$id}{'notes'} .= "##### $english{$id}{source}{$tgt}\n";
# print "#!! '$id:$tgt' source changed ('$lp{$tgt}' vs '$ep{$tgt}')\n";
$lang{$id}{'source'}{$tgt} = $english{$id}{'source'}{$tgt};
}
}
}
# Check dests
foreach my $id (@langorder) {
if (!defined($english{$id})) {
next;
}
my %ep = %{$english{$id}{'dest'}};
my %lp;
if (defined($lang{$id}{'dest'})) {
%lp = %{$lang{$id}{'dest'}};
} else {
%lp = ();
}
foreach my $tgt (keys(%lp)) {
if (!defined($ep{$tgt})) {
# Delete any targets that have been nuked in master
delete($lang{$id}{'dest'}{$tgt});
}
}
foreach my $tgt (keys(%ep)) {
if (!defined($lp{$tgt}) || ($lp{$tgt} eq 'none')) {
# If it doesn't exist in the language, copy it from English
if ($ep{$tgt} ne 'none' && $ep{$tgt} ne '' ) {
$lang{$id}{'notes'} .= "### The <dest> section for '$id:$tgt' is missing! Copying from english!\n";
# print "#!! '$id:$tgt' dest missing\n";
}
$lang{$id}{'dest'}{$tgt} = $english{$id}{'dest'}{$tgt};
} elsif ($lp{$tgt} ne $ep{$tgt}) {
# If the source string differs, complain, and copy from English
if ($lp{$tgt} eq '' && $ep{$tgt} ne '') {
$lang{$id}{'notes'} .= "### The <dest> section for '$id:$tgt' is blank! Copying from english!\n";
# print "#!! '$id:$tgt' dest is blank ('$lp{$tgt}' vs '$ep{$tgt}')\n";
$lang{$id}{'dest'}{$tgt} = $english{$id}{'dest'}{$tgt};
} elsif ($lp{$tgt} ne '' && $ep{$tgt} eq '') {
# It should be kept blank!
$lang{$id}{'notes'} .= "### The <dest> section for '$id:$tgt' is not blank!\n";
$lang{$id}{'notes'} .= "#### the previously used one is commented below:\n";
$lang{$id}{'notes'} .= "##### $english{$id}{dest}{$tgt}\n";
# print "#!! '$id:$tgt' dest not blank ('$lp{$tgt}' vs '$ep{$tgt}')\n";
$lang{$id}{'dest'}{$tgt} = $english{$id}{'dest'}{$tgt};
}
} elsif ($lp{$tgt} ne 'none' && $lp{$tgt} ne '' && not_ignorelist($id) && !$lang{$id}{'new'} && !$ignoredups) {
$lang{$id}{'notes'} .= "### The <dest> section for '$id:$tgt' is identical to english! (correct or prefix with ~)\n";
# print "#!! '$id:$tgt' dest identical ('$lp{$tgt}')\n";
}
if ($id eq 'LANG_VOICED_DATE_FORMAT') {
my $sane = $lp{$tgt};
$sane =~ s/^~?(.*)/$1/; # Strip off leading ~ if it's there as it's not a legal character for the format.
$sane =~ tr/YAmd~//d;
if (length($sane) != 0) {
$lang{$id}{'notes'} .= "### The <dest> section for '$id:$tgt' has illegal characters! Restoring from English!\n";
$lang{$id}{'notes'} .= "#### the previously used one is commented below:\n";
$lang{$id}{'notes'} .= "##### $lang{$id}{dest}{$tgt}\n";
$lang{$id}{'dest'}{$tgt} = $english{$id}{'dest'}{$tgt};
}
}
my $count1 = $ep{$tgt} =~ tr/%//;
my $count2 = 0;
if (defined($lp{$tgt})) {
$count2 = $lp{$tgt} =~ tr/%//;
}
if ($count1 || $count2) {
my $fmt1 = reduceformat($ep{$tgt});
my $fmt2 = "";
if ($count2) {
$fmt2 = reduceformat($lp{$tgt});
}
if ($fmt1 ne $fmt2) {
$lang{$id}{'notes'} .= "### The <dest> section for '$id:$tgt' has incorrect format specifiers! Copying from English!\n";
$lang{$id}{'notes'} .= "#### the previously used one is commented below:\n";
$lang{$id}{'notes'} .= "##### $lang{$id}{dest}{$tgt}\n";
$lang{$id}{'dest'}{$tgt} = $english{$id}{'dest'}{$tgt};
# print "#!! '$id:$tgt' dest does not match src format args: '$fmt1' vs '$fmt2'\n";
}
}
my $sane = $lang{$id}{'dest'}{$tgt};
$sane =~ s/^~?(.*)/$1/; # Strip off leading ~ if it's there as it's not a legal character otherwise
if ($sane =~ tr/"~<>//) {
# If it has suspicious characters that are not allowed
$lang{$id}{'notes'} .= "### The <dest> section for '$id:$tgt' has some suspicious characters (eg \",~,<,>), please double-check!\n";
# print "#!! '$id:$tgt' suspicious characters\n";
}
}
}
# Check voices
foreach my $id (@langorder) {
if (!defined($english{$id})) {
next;
}
my %ep = %{$english{$id}{'voice'}};
my %lp;
if (defined($lang{$id}{'voice'})) {
%lp = %{$lang{$id}{'voice'}};
} else {
%lp = ();
}
foreach my $tgt (keys(%lp)) {
if (!defined($ep{$tgt})) {
# Delete any targets that have been nuked in master
delete($lang{$id}{'voice'}{$tgt});
}
}
foreach my $tgt (keys(%ep)) {
if (!defined($lp{$tgt}) || ($lp{$tgt} eq 'none')) {
# If it doesn't exist in the language, copy it from English
if ($ep{$tgt} ne 'none' && $ep{$tgt} ne '' ) {
$lang{$id}{'notes'} .= "### The <voice> section for '$id:$tgt' is missing! Copying from english!\n";
# print "#!! '$id:$tgt' voice missing\n";
}
$lang{$id}{'voice'}{$tgt} = $english{$id}{'voice'}{$tgt};
} elsif ($lp{$tgt} ne $ep{$tgt}) {
if ($lp{$tgt} eq '' && $ep{$tgt} ne '') {
# If the lang voice string is blank, complain and copy from translation
# print "#!! '$id:$tgt' voice is blank ('$lp{$tgt}' vs '$ep{$tgt}')\n";
if ($lang{$id}{'dest'}{$tgt} ne '' &&
$lang{$id}{'dest'}{$tgt} ne $english{$id}{'dest'}{$tgt}) {
$lang{$id}{'notes'} .= "### The <voice> section for '$id:$tgt' is blank! Copying from translated <dest>!\n";
$lang{$id}{'voice'}{$tgt} = $lang{$id}{'dest'}{$tgt};
} elsif ($id eq 'VOICE_LANG_NAME') {
$lang{$id}{'notes'} .= "### The <voice> section for '$id:$tgt' is blank! Please fill out!\n";
} else {
$lang{$id}{'notes'} .= "### The <voice> section for '$id:$tgt' is blank! Copying from english!\n";
$lang{$id}{'voice'}{$tgt} = $english{$id}{'voice'}{$tgt};
}
} elsif ($lp{$tgt} ne '' && $ep{$tgt} eq '') {
if ($id ne 'VOICE_NUMERIC_TENS_SWAP_SEPARATOR') {
# If it's not blank, clear it and complain!
$lang{$id}{'notes'} .= "### The <voice> section for '$id:$tgt' is not blank!\n";
$lang{$id}{'notes'} .= "#### the previously used one is commented below:\n";
$lang{$id}{'notes'} .= "##### $english{$id}{voice}{$tgt}\n";
# print "#!! '$id:$tgt' voice not blank ('$lp{$tgt}' vs '$ep{$tgt}')\n";
$lang{$id}{'voice'}{$tgt} = $english{$id}{'voice'}{$tgt};
}
}
} elsif ($lp{$tgt} ne 'none' && $lp{$tgt} ne '' && not_ignorelist($id) && !$lang{$id}{'new'} && !$ignoredups) {
# print "#!! '$id:$tgt' voice identical ('$lp{$tgt}')\n";
if ($lang{$id}{'dest'}{$tgt} ne '' &&
$lang{$id}{'dest'}{$tgt} ne $english{$id}{'dest'}{$tgt} &&
$lang{$id}{'dest'}{$tgt} ne "~$english{$id}{dest}{$tgt}") {
$lang{$id}{'notes'} .= "### The <voice> section for '$id:$tgt' is identical to english, copying translated <dest>\n";
$lang{$id}{'voice'}{$tgt} = $lang{$id}{'dest'}{$tgt};
} else {
$lang{$id}{'notes'} .= "### The <voice> section for '$id:$tgt' is identical to english! (correct or prefix with ~)\n";
}
}
my $sane = $lang{$id}{'voice'}{$tgt};
$sane =~ s/^~?(.*)/$1/; # Strip off leading ~ if it's there as it's not a legal character otherwise
if ($sane =~ tr/%"~:\[\]<>{}\|//) {
# Suspicious characters that are not typically voiced..
$lang{$id}{'notes'} .= "### The <voice> section for '$id:$tgt' has some suspicious characters (eg %,\",~,:,<,>,[,],{,},|), please correct!\n";
# print "#!! '$id:$tgt' suspicious characters\n";
}
if ($lang{$id}{'voice'}{$tgt} =~ /\.\.\./) {
# Ellipses should not be in voice strings
$lang{$id}{'notes'} .= "### The <voice> section for '$id:$tgt' has ellipses (...), please remove!\n";
# print "#!! '$id:$tgt' ellipses\n";
}
}
}
########## Write new language file
my $fh;
if ($ARGV[2] ne '-') {
open(FH, ">$ARGV[2]") || die ("Can't open $ARGV[2]");
$fh = *FH;
} else {
$fh = *STDOUT;
}
foreach (@langheader) {
print $fh $_;
}
my @finalorder;
if ($ENV{'ENGLISHORDER'}) {
@finalorder = @englishorder;
} else {
@finalorder = @langorder;
}
foreach my $id (@finalorder) {
if (!defined($english{$id})) {
next;
}
my %lp;
# phrase
%lp = %{$lang{$id}{'phrase'}};
# Drop all deprecated phrases?
# next if ($lp{'desc'} eq 'deprecated');
if (length($lang{$id}{'notes'}) && $printnotes) {
print $fh "$lang{$id}{notes}";
}
print $fh "<phrase>\n";
print $fh " id: $lp{id}\n";
if ($lp{'desc'} ne '') {
print $fh " desc: $lp{desc}\n";
} else {
print $fh " desc:\n";
}
print $fh " user: $lp{user}\n";
# source
%lp = combinetgts(%{$lang{$id}{'source'}});
print $fh " <source>\n";
foreach my $tgt (sort(keys(%lp))) {
my $w = NFC($lp{$tgt});
next if ($w eq 'FALLBACK');
if ($w eq 'none') {
print $fh " $tgt: $w\n";
} else {
print $fh " $tgt: \"$w\"\n";
}
}
print $fh " </source>\n";
# dest
%lp = combinetgts(%{$lang{$id}{'dest'}});
print $fh " <dest>\n";
foreach my $tgt (sort(keys(%lp))) {
my $w = NFC($lp{$tgt});
next if ($w eq 'FALLBACK');
if ($w eq 'none') {
print $fh " $tgt: $w\n";
} else {
print $fh " $tgt: \"$w\"\n";
}
}
print $fh " </dest>\n";
# voice
%lp = combinetgts(%{$lang{$id}{'voice'}});
print $fh " <voice>\n";
foreach my $tgt (sort(keys(%lp))) {
my $w = NFC($lp{$tgt});
next if ($w eq 'FALLBACK');
if ($w eq 'none') {
print $fh " $tgt: $w\n";
} else {
print $fh " $tgt: \"$w\"\n";
}
}
print $fh " </voice>\n";
# FiN
print $fh "</phrase>\n";
}
if ($ARGV[2] ne '-') {
close(FH);
}