1
0
Fork 0
forked from len0rd/rockbox

updatelang: Normalize all strings in our lang files to NFC form.

Now no matter how [de]normalized the input strings are, we will
normalize them to the best of our ability in what we use.

This adds a dependencey for Perl's Unicode::Normalize.

Change-Id: I13e275692ea33a463b19f3a499ea06ce1acbb44a
This commit is contained in:
Solomon Peachy 2024-10-13 09:01:20 -04:00
parent c354e0bd1f
commit eb2d596d72

View file

@ -6,11 +6,15 @@
# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
# \/ \/ \/ \/ \/ # \/ \/ \/ \/ \/
# #
# Copyright (C) 2020 Solomon Peachy # Copyright (C) 2020-2024 Solomon Peachy
# #
use utf8; use utf8;
use File::Basename; use File::Basename;
use Unicode::Normalize;
use open qw( :std :encoding(UTF-8) );
binmode(STDOUT, ":encoding(UTF-8)");
sub trim { sub trim {
my ($string) = @_; my ($string) = @_;
@ -72,6 +76,7 @@ sub parselangfile {
# $l = "*"; # $l = "*";
} }
$w = NFD($w); # Unicode decompose
$thisphrase{$pos}->{$l} = $w; $thisphrase{$pos}->{$l} = $w;
} }
} }
@ -538,10 +543,11 @@ foreach my $id (@finalorder) {
%lp = combinetgts(%{$lang{$id}{'source'}}); %lp = combinetgts(%{$lang{$id}{'source'}});
print $fh " <source>\n"; print $fh " <source>\n";
foreach my $tgt (sort(keys(%lp))) { foreach my $tgt (sort(keys(%lp))) {
if ($lp{$tgt} eq 'none') { my $w = NFC($lp{$tgt});
print $fh " $tgt: $lp{$tgt}\n"; if ($w eq 'none') {
print $fh " $tgt: $w\n";
} else { } else {
print $fh " $tgt: \"$lp{$tgt}\"\n"; print $fh " $tgt: \"$w\"\n";
} }
} }
print $fh " </source>\n"; print $fh " </source>\n";
@ -550,10 +556,11 @@ foreach my $id (@finalorder) {
%lp = combinetgts(%{$lang{$id}{'dest'}}); %lp = combinetgts(%{$lang{$id}{'dest'}});
print $fh " <dest>\n"; print $fh " <dest>\n";
foreach my $tgt (sort(keys(%lp))) { foreach my $tgt (sort(keys(%lp))) {
if ($lp{$tgt} eq 'none') { my $w = NFC($lp{$tgt});
print $fh " $tgt: $lp{$tgt}\n"; if ($w eq 'none') {
print $fh " $tgt: $w\n";
} else { } else {
print $fh " $tgt: \"$lp{$tgt}\"\n"; print $fh " $tgt: \"$w\"\n";
} }
} }
print $fh " </dest>\n"; print $fh " </dest>\n";
@ -562,10 +569,11 @@ foreach my $id (@finalorder) {
%lp = combinetgts(%{$lang{$id}{'voice'}}); %lp = combinetgts(%{$lang{$id}{'voice'}});
print $fh " <voice>\n"; print $fh " <voice>\n";
foreach my $tgt (sort(keys(%lp))) { foreach my $tgt (sort(keys(%lp))) {
if ($lp{$tgt} eq 'none') { my $w = NFC($lp{$tgt});
print $fh " $tgt: $lp{$tgt}\n"; if ($w eq 'none') {
print $fh " $tgt: $w\n";
} else { } else {
print $fh " $tgt: \"$lp{$tgt}\"\n"; print $fh " $tgt: \"$w\"\n";
} }
} }
print $fh " </voice>\n"; print $fh " </voice>\n";