mirror of
https://github.com/Rockbox/rockbox.git
synced 2025-10-14 02:27:39 -04:00
FS#11913: Separate TTS correction expressions into separate file.
voice.pl will now read the TTS correction expressions from a file tools/voice-corrections.txt which includes regular expressions for adjusting the string. This makes it easier to adjust the corrections and allows integrating them into tools like Rockbox Utility. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@29500 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
1f77d091a5
commit
7ad78222c4
2 changed files with 129 additions and 99 deletions
92
tools/voice-corrections.txt
Normal file
92
tools/voice-corrections.txt
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
__________ __ ___.
|
||||||
|
Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||||
|
Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||||
|
Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||||
|
Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||||
|
\/ \/ \/ \/ \/
|
||||||
|
$Id$
|
||||||
|
|
||||||
|
|
||||||
|
Voice string corrections for voice.pl to support TTS engines.
|
||||||
|
The list items are separated by the separator that is defined by the first
|
||||||
|
character on the line. If the first character is a whitespace the line will
|
||||||
|
get treated as comment.
|
||||||
|
|
||||||
|
Format:
|
||||||
|
/language/engine/vendor/string/replacement/
|
||||||
|
|
||||||
|
Where / is the separator, and all strings are Perl regexes.
|
||||||
|
Empty lines and lines starting with a whitespace are ignored, for all other
|
||||||
|
lines the first character will become the separator.
|
||||||
|
|
||||||
|
General for all engines and languages
|
||||||
|
|
||||||
|
/.*/.*/.*/USB/U S B/g
|
||||||
|
/.*/.*/.*/ID3/I D 3/g
|
||||||
|
English
|
||||||
|
/english/(sapi|festival)/.*/plugin(s?)/plug-in$1/ig
|
||||||
|
/english/festival/.*//\ba\b/ay/ig
|
||||||
|
/english/festival/.*//$/./
|
||||||
|
|
||||||
|
German (deutsch)
|
||||||
|
|
||||||
|
/deutsch/.*/.*/alkaline/alkalein/ig
|
||||||
|
/deutsch/.*/.*/byte(s?)/beit$1/ig
|
||||||
|
/deutsch/.*/.*/clip(s?)/klipp$1/ig
|
||||||
|
/deutsch/.*/.*/\bcover/kawwer/ig
|
||||||
|
/deutsch/.*/.*/cuesheet/kjuschiet/ig
|
||||||
|
/deutsch/.*/.*/dither/didder/ig
|
||||||
|
/deutsch/.*/.*/equalizer/iquileiser/ig
|
||||||
|
/deutsch/.*/.*/\bflash\b/fläsh/ig
|
||||||
|
/deutsch/.*/.*/\bfirmware(s?)\b/firmwer$1/ig
|
||||||
|
/deutsch/.*/.*/\bI D 3 tag\b/I D 3 täg/ig
|
||||||
|
/deutsch/.*/.*/\bloudness\b/laudness/ig
|
||||||
|
/deutsch/.*/.*/\bunicode\b/unikod/ig
|
||||||
|
/deutsch/sapi/AT&T Labs/alphabet/alfabet/ig;
|
||||||
|
/deutsch/sapi/AT&T Labs/ampere/amper/ig;
|
||||||
|
/deutsch/sapi/AT&T Labs/\bdezibel\b/de-zibell/ig;
|
||||||
|
/deutsch/sapi/AT&T Labs/diddering/didde-ring/ig;
|
||||||
|
/deutsch/sapi/AT&T Labs/energie\b/ener-gie/ig;
|
||||||
|
/deutsch/sapi/AT&T Labs/\Blauf\b/-lauf/ig;
|
||||||
|
/deutsch/sapi/AT&T Labs/\bnumerisch\b/numehrisch/ig;
|
||||||
|
|
||||||
|
Swedish (svenska)
|
||||||
|
for all swedish engines (e.g. for english words)
|
||||||
|
|
||||||
|
/svenska/.*/.*/kilobyte/kilobajt/ig
|
||||||
|
/svenska/.*/.*/megabyte/megabajt/ig
|
||||||
|
/svenska/.*/.*/gigabyte/gigabajt/ig
|
||||||
|
/svenska/.*/.*/\bloudness\b/laudness/ig
|
||||||
|
/svenska/espeak/.*/ampere/ampär/ig
|
||||||
|
/svenska/espeak/.*/bokmärken/bok-märken/ig
|
||||||
|
/svenska/espeak/.*/generella/schenerella/ig
|
||||||
|
/svenska/espeak/.*/dithering/diddering/ig
|
||||||
|
/svenska/espeak/.*/\bunicode\b/jynikod/ig
|
||||||
|
/svenska/espeak/.*/uttoning/utoning/ig
|
||||||
|
/svenska/espeak/.*/procent/pro-cent/ig
|
||||||
|
/svenska/espeak/.*/spellistor/spelistor/ig
|
||||||
|
/svenska/espeak/.*/cuesheet/qjyschiit/ig
|
||||||
|
|
||||||
|
Italian (italiano)
|
||||||
|
for all italian engines (e.g. for english words)
|
||||||
|
|
||||||
|
/italiano/.*/.*/Replaygain/Ripleyghein/ig
|
||||||
|
/italiano/.*/.*/Crossfade/Crossfeid/ig
|
||||||
|
/italiano/.*/.*/beep/Bip/ig
|
||||||
|
/italiano/.*/.*/cuesheet/chiushit/ig
|
||||||
|
/italiano/.*/.*/fade/feid/ig
|
||||||
|
/italiano/.*/.*/Crossfeed/crossfid/ig
|
||||||
|
/italiano/.*/.*/Cache/chash/ig
|
||||||
|
/italiano/.*/.*/\bfirmware(s?)\b/firmuer$1/ig
|
||||||
|
/italiano/.*/.*/\bFile(s?)\b/fail$1/ig
|
||||||
|
/italiano/.*/.*/\bloudness\b/laudness/ig
|
||||||
|
/italiano/.*/.*/\bunicode\b/unikod/ig
|
||||||
|
/italiano/.*/.*/Playlist/pleylist/ig
|
||||||
|
/italiano/.*/.*/WavPack/wave pak/ig
|
||||||
|
/italiano/.*/.*/BITRATE/bit reit/ig
|
||||||
|
/italiano/.*/.*/Codepage/cod page/ig
|
||||||
|
/italiano/.*/.*/PCM Wave/pcm Ue'iv/ig
|
||||||
|
/italiano/sapi/Loquendo/Inizializza/inizializa/ig
|
||||||
|
/italiano/sapi/ScanSoft, Inc/V/v/ig
|
||||||
|
/italiano/sapi/ScanSoft, Inc/X/x/ig
|
||||||
|
/italiano/sapi/ScanSoft, Inc/stop/stohp/ig
|
136
tools/voice.pl
136
tools/voice.pl
|
@ -128,106 +128,12 @@ sub correct_string {
|
||||||
our $verbose;
|
our $verbose;
|
||||||
my ($string, $language, $tts_object) = @_;
|
my ($string, $language, $tts_object) = @_;
|
||||||
my $orig = $string;
|
my $orig = $string;
|
||||||
switch($language) {
|
my $corrections = $tts_object->{"corrections"};
|
||||||
# General for all engines and languages
|
|
||||||
$string =~ s/USB/U S B/g;
|
|
||||||
$string =~ s/ID3/I D 3/g;
|
|
||||||
|
|
||||||
case "english" {
|
foreach (@$corrections) {
|
||||||
switch($$tts_object{"name"}) {
|
my $r = "s" . $_->{separator} . $_->{search} . $_->{separator}
|
||||||
case ["sapi","festival"] {
|
. $_->{replace} . $_->{separator} . $_->{modifier};
|
||||||
$string =~ s/plugin(s?)/plug-in$1/ig; next
|
eval ('$string =~' . "$r;");
|
||||||
}
|
|
||||||
case "festival" {
|
|
||||||
$string =~ s/\ba\b/ay/ig;
|
|
||||||
$string =~ s/$/./;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case "deutsch" {
|
|
||||||
# for all german engines (e.g. for english words)
|
|
||||||
$string =~ s/alkaline/alkalein/ig;
|
|
||||||
$string =~ s/byte(s?)/beit$1/ig;
|
|
||||||
$string =~ s/clip(s?)/klipp$1/ig;
|
|
||||||
$string =~ s/\bcover/kawwer/ig;
|
|
||||||
$string =~ s/cuesheet/kjuschiet/ig;
|
|
||||||
$string =~ s/dither/didder/ig;
|
|
||||||
$string =~ s/equalizer/iquileiser/ig;
|
|
||||||
$string =~ s/\bflash\b/fläsh/ig;
|
|
||||||
$string =~ s/\bfirmware(s?)\b/firmwer$1/ig;
|
|
||||||
$string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here
|
|
||||||
$string =~ s/\bloudness\b/laudness/ig;
|
|
||||||
$string =~ s/\bunicode\b/unikod/ig;
|
|
||||||
switch($$tts_object{"name"}) {
|
|
||||||
case "sapi" { # just for SAPI
|
|
||||||
switch($$tts_object{"vendor"}) {
|
|
||||||
case "AT&T Labs" {
|
|
||||||
$string =~ s/alphabet/alfabet/ig;
|
|
||||||
$string =~ s/ampere/amper/ig;
|
|
||||||
$string =~ s/\bdezibel\b/de-zibell/ig;
|
|
||||||
$string =~ s/diddering/didde-ring/ig;
|
|
||||||
$string =~ s/energie\b/ener-gie/ig;
|
|
||||||
$string =~ s/\Blauf\b/-lauf/ig;
|
|
||||||
$string =~ s/\bnumerisch\b/numehrisch/ig;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case "svenska" {
|
|
||||||
# for all swedish engines (e.g. for english words)
|
|
||||||
$string =~ s/kilobyte/kilobajt/ig;
|
|
||||||
$string =~ s/megabyte/megabajt/ig;
|
|
||||||
$string =~ s/gigabyte/gigabajt/ig;
|
|
||||||
$string =~ s/\bloudness\b/laudness/ig;
|
|
||||||
|
|
||||||
switch($$tts_object{"name"}) {
|
|
||||||
case "espeak" { # just for eSpeak
|
|
||||||
$string =~ s/ampere/ampär/ig;
|
|
||||||
$string =~ s/bokmärken/bok-märken/ig;
|
|
||||||
$string =~ s/generella/schenerella/ig;
|
|
||||||
$string =~ s/dithering/diddering/ig;
|
|
||||||
$string =~ s/\bunicode\b/jynikod/ig;
|
|
||||||
$string =~ s/uttoning/utoning/ig;
|
|
||||||
$string =~ s/procent/pro-cent/ig;
|
|
||||||
$string =~ s/spellistor/spelistor/ig;
|
|
||||||
$string =~ s/cuesheet/qjyschiit/ig;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case "italiano" {
|
|
||||||
# for all italian engines (e.g. for english words)
|
|
||||||
$string =~ s/Replaygain/Ripleyghein/ig;
|
|
||||||
$string =~ s/Crossfade/Crossfeid/ig;
|
|
||||||
$string =~ s/beep/Bip/ig;
|
|
||||||
$string =~ s/cuesheet/chiushit/ig;
|
|
||||||
$string =~ s/fade/feid/ig;
|
|
||||||
$string =~ s/Crossfeed/crossfid/ig;
|
|
||||||
$string =~ s/Cache/chash/ig;
|
|
||||||
$string =~ s/\bfirmware(s?)\b/firmuer$1/ig;
|
|
||||||
$string =~ s/\bFile(s?)\b/fail$1/ig;
|
|
||||||
$string =~ s/\bloudness\b/laudness/ig;
|
|
||||||
$string =~ s/\bunicode\b/unikod/ig;
|
|
||||||
$string =~ s/Playlist/pleylist/ig;
|
|
||||||
$string =~ s/WavPack/wave pak/ig;
|
|
||||||
$string =~ s/BITRATE/bit reit/ig;
|
|
||||||
$string =~ s/Codepage/cod page/ig;
|
|
||||||
$string =~ s/PCM Wave/pcm Ue'iv/ig;
|
|
||||||
switch($$tts_object{"name"}) {
|
|
||||||
case "sapi" { # just for SAPI
|
|
||||||
switch($$tts_object{"vendor"}) {
|
|
||||||
case "Loquendo" {
|
|
||||||
$string =~ s/Inizializza/inizializa/ig;
|
|
||||||
}
|
|
||||||
case "ScanSoft, Inc" {
|
|
||||||
$string =~ s/V/v/ig;
|
|
||||||
$string =~ s/X/x/ig;
|
|
||||||
$string =~ s/stop/stohp/ig;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if ($orig ne $string) {
|
if ($orig ne $string) {
|
||||||
printf("%s -> %s\n", $orig, $string) if $verbose;
|
printf("%s -> %s\n", $orig, $string) if $verbose;
|
||||||
|
@ -331,6 +237,7 @@ sub generateclips {
|
||||||
my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
|
my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
|
||||||
my $english = dirname($0) . '/../apps/lang/english.lang';
|
my $english = dirname($0) . '/../apps/lang/english.lang';
|
||||||
my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang';
|
my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang';
|
||||||
|
my $correctionsfile = dirname($0) . '/voice-corrections.txt';
|
||||||
my $id = '';
|
my $id = '';
|
||||||
my $voice = '';
|
my $voice = '';
|
||||||
my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
|
my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
|
||||||
|
@ -340,6 +247,37 @@ sub generateclips {
|
||||||
local $| = 1; # make progress indicator work reliably
|
local $| = 1; # make progress indicator work reliably
|
||||||
|
|
||||||
my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language);
|
my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language);
|
||||||
|
# add string corrections to tts_object.
|
||||||
|
my @corrects = ();
|
||||||
|
open(VOICEREGEXP, "<$correctionsfile") or die "Can't open corrections file!\n";
|
||||||
|
while(<VOICEREGEXP>) {
|
||||||
|
# get first character of line
|
||||||
|
my $line = $_;
|
||||||
|
my $separator = substr($_, 0, 1);
|
||||||
|
if($separator =~ m/\s+/) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
chomp($line);
|
||||||
|
$line =~ s/^.//g; # remove separator at beginning
|
||||||
|
my ($lang, $engine, $vendor, $search, $replace, $modifier) = split(/$separator/, $line);
|
||||||
|
|
||||||
|
# does language match?
|
||||||
|
if($language !~ m/$lang/) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
if($$tts_object{"name"} !~ m/$engine/) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
my $v = $$tts_object{"vendor"} || ""; # vendor might be empty in $tts_object
|
||||||
|
if($v !~ m/$vendor/) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
push @corrects, {separator => $separator, search => $search, replace => $replace, modifier => $modifier};
|
||||||
|
|
||||||
|
}
|
||||||
|
close(VOICEREGEXP);
|
||||||
|
$tts_object->{corrections} = [@corrects];
|
||||||
|
|
||||||
print("Generating voice clips");
|
print("Generating voice clips");
|
||||||
print("\n") if $verbose;
|
print("\n") if $verbose;
|
||||||
for (`$cmd`) {
|
for (`$cmd`) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue