voice: Add support for Google Translate's speech synthesizer

Uses the 'gtts-cli' command line client.  Supports a wide variety of
languages, including all "Complete" and "Good" Rockbox translations.

Additional changes:

 * voice synth script can accept pre-encoded mp3 files
 * Move language->synth options mapping into the voice script
 * Additional cleanups

Change-Id: I9523e2bca87cbcee2d8c4111f9892e8e458c7419
This commit is contained in:
Solomon Peachy 2020-07-08 19:05:09 -04:00
parent 5e98eba8ab
commit 2c3399537c
2 changed files with 93 additions and 51 deletions

51
tools/configure vendored
View file

@ -1111,23 +1111,7 @@ voiceconfig () {
fi fi
if [ -n "`findtool festival`" ]; then if [ -n "`findtool festival`" ]; then
FESTIVAL="(F)estival " FESTIVAL="(F)estival "
case "$thislang" in
"italiano")
FESTIVAL_OPTS="--language italian"
;;
"espanol")
FESTIVAL_OPTS="--language spanish"
;;
"finnish")
FESTIVAL_OPTS="--language finnish"
;;
"czech")
FESTIVAL_OPTS="--language czech"
;;
*)
FESTIVAL_OPTS="" FESTIVAL_OPTS=""
;;
esac
DEFAULT_TTS="festival" DEFAULT_TTS="festival"
DEFAULT_TTS_OPTS=$FESTIVAL_OPTS DEFAULT_TTS_OPTS=$FESTIVAL_OPTS
DEFAULT_NOISEFLOOR="500" DEFAULT_NOISEFLOOR="500"
@ -1149,6 +1133,23 @@ voiceconfig () {
DEFAULT_NOISEFLOOR="500" DEFAULT_NOISEFLOOR="500"
DEFAULT_CHOICE="w" DEFAULT_CHOICE="w"
fi fi
# Allow SAPI if Windows is in use
if [ -n "`findtool winver`" ]; then
SAPI="(S)API "
SAPI_OPTS=""
DEFAULT_TTS="sapi"
DEFAULT_TTS_OPTS=$SAPI_OPTS
DEFAULT_NOISEFLOOR="500"
DEFAULT_CHOICE="S"
fi
if [ -n "`findtool gtts-cli`" ]; then
GTTS="(g)tts "
GTTS_OPTS=""
DEFAULT_TTS="gtts"
DEFAULT_TTS_OPTS=$GTTS_OPTS
DEFAULT_NOISEFLOOR="500"
DEFAULT_CHOICE="g"
fi
if [ -n "`findtool rbspeak`" ]; then if [ -n "`findtool rbspeak`" ]; then
RBSPEAK="(O)ther " RBSPEAK="(O)ther "
RBSPEAK_OPTS="" RBSPEAK_OPTS=""
@ -1157,17 +1158,8 @@ voiceconfig () {
DEFAULT_NOISEFLOOR="500" DEFAULT_NOISEFLOOR="500"
DEFAULT_CHOICE="O" DEFAULT_CHOICE="O"
fi fi
# Allow SAPI if Windows is in use
if [ -n "`findtool winver`" ]; then
SAPI="(S)API "
SAPI_OPTS=""
DEFAULT_TTS="sapi"
DEFAULT_TTS_OPTS=$SAPI_OPTS
DEFAULT_NOISEFLOOR="500"
DEFAULT_CHOICE="s"
fi
if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC"] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$RBSPEAK" ]; then if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC"] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$RBSPEAK" ] && [ "$RBSPEAK" = "$GTTS" ] ; then
echo "You need Festival, eSpeak, Mimic, Flite, or rbspeak in your path, or SAPI available to build voice files" echo "You need Festival, eSpeak, Mimic, Flite, or rbspeak in your path, or SAPI available to build voice files"
exit 3 exit 3
fi fi
@ -1175,7 +1167,7 @@ voiceconfig () {
if [ "$ARG_TTS" ]; then if [ "$ARG_TTS" ]; then
option=$ARG_TTS option=$ARG_TTS
else else
echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${RBSPEAK}(${DEFAULT_CHOICE})?" echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${GTTS}${RBSPEAK}(${DEFAULT_CHOICE})?"
option=`input` option=`input`
if [ -z "$option" ]; then option=${DEFAULT_CHOICE}; fi if [ -z "$option" ]; then option=${DEFAULT_CHOICE}; fi
advopts="$advopts --tts=$option" advopts="$advopts --tts=$option"
@ -1211,6 +1203,11 @@ voiceconfig () {
NOISEFLOOR="500" NOISEFLOOR="500"
TTS_OPTS=$SWIFT_OPTS TTS_OPTS=$SWIFT_OPTS
;; ;;
[Gg)
TTS_ENGINE="gtts"
NOISEFLOOR="500"
TTS_OPTS=$GTTS_OPTS
;;
[Oo]) [Oo])
TTS_ENGINE="rbspeak" TTS_ENGINE="rbspeak"
NOISEFLOOR="500" NOISEFLOOR="500"

View file

@ -67,12 +67,39 @@ USAGE
; ;
} }
my %festival_lang_map = {
'english' => 'english',
'english-us' => 'english',
'espanol' => 'spanish',
#'finnish' => 'finnish'
#'italiano' => 'italian',
#'czech' => 'czech',
#'welsh' => 'welsh'
};
my %gtts_lang_map = {
'english' => 'en-gb', # Always first, it's the golden master
'deutsch' => 'de',
'english-us' => 'en-us',
'francais' => 'fr-fr',
'greek' => 'gr',
'italiano' => 'it',
'norsk' => 'no',
'polski' => 'pl',
'russian' => 'ru',
'slovak' => 'sk',
'srpski' => 'sr',
};
# Initialize TTS engine. May return an object or value which will be passed # Initialize TTS engine. May return an object or value which will be passed
# to voicestring and shutdown_tts # to voicestring and shutdown_tts
sub init_tts { sub init_tts {
our $verbose; our $verbose;
my ($tts_engine, $tts_engine_opts, $language) = @_; my ($tts_engine, $tts_engine_opts, $language) = @_;
my %ret = ("name" => $tts_engine); my %ret = ("name" => $tts_engine);
$ret{"format"} = 'wav';
$ret{"ttsoptions"} = "";
# Don't use given/when here - it's not compatible with old perl versions # Don't use given/when here - it's not compatible with old perl versions
if ($tts_engine eq 'festival') { if ($tts_engine eq 'festival') {
print("> festival $tts_engine_opts --server\n") if $verbose; print("> festival $tts_engine_opts --server\n") if $verbose;
@ -81,8 +108,10 @@ sub init_tts {
$SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
$SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
$ret{"pid"} = $pid; $ret{"pid"} = $pid;
if (defined($festival_lang_map{$language})) {
$ret{"ttsoptions"} = "-l $festival_lang_map{$language} ";
} }
elsif ($tts_engine eq 'sapi') { } elsif ($tts_engine eq 'sapi') {
my $toolsdir = dirname($0); my $toolsdir = dirname($0);
my $path = `cygpath $toolsdir -a -w`; my $path = `cygpath $toolsdir -a -w`;
chomp($path); chomp($path);
@ -102,6 +131,11 @@ sub init_tts {
"stdin" => *CMD_IN, "stdin" => *CMD_IN,
"stdout" => *CMD_OUT, "stdout" => *CMD_OUT,
"vendor" => $vendor); "vendor" => $vendor);
} elsif ($tts_engine eq 'gtts') {
$ret{"format"} = 'mp3';
if (defined($gtts_lang_map{$language})) {
$ret{"ttsoptions"} = "-l $gtts_lang_map{$language} ";
}
} }
return \%ret; return \%ret;
} }
@ -143,6 +177,9 @@ sub voicestring {
my ($string, $output, $tts_engine_opts, $tts_object) = @_; my ($string, $output, $tts_engine_opts, $tts_object) = @_;
my $cmd; my $cmd;
my $name = $$tts_object{'name'}; my $name = $$tts_object{'name'};
$tts_engine_opts .= $$tts_object{"ttsoptions"};
printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose; printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose;
if ($name eq 'festival') { if ($name eq 'festival') {
# festival_client lies to us, so we have to do awful soul-eating # festival_client lies to us, so we have to do awful soul-eating
@ -167,7 +204,7 @@ sub voicestring {
elsif ($name eq 'flite') { elsif ($name eq 'flite') {
$cmd = "flite $tts_engine_opts -t \"$string\" \"$output\""; $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
print("> $cmd\n") if $verbose; print("> $cmd\n") if $verbose;
`$cmd`; system($cmd);
} }
elsif ($name eq 'espeak') { elsif ($name eq 'espeak') {
$cmd = "espeak $tts_engine_opts -w \"$output\""; $cmd = "espeak $tts_engine_opts -w \"$output\"";
@ -193,11 +230,14 @@ sub voicestring {
close(RBSPEAK); close(RBSPEAK);
} }
elsif ($name eq 'mimic') { elsif ($name eq 'mimic') {
$cmd = "mimic $tts_engine_opts -o $output"; $cmd = "mimic $tts_engine_opts -o $output -t \"$string\" ";
print("> $cmd\n") if $verbose; print("> $cmd\n") if $verbose;
open (MIMIC, "| $cmd"); system($cmd);
print MIMIC $string . "\n"; }
close(MIMIC); elsif ($name eq 'gtts') {
$cmd = "gtts-cli $tts_engine_opts -o $output \"$string\"";
print("> $cmd\n") if $verbose;
system($cmd);
} }
} }
@ -326,14 +366,19 @@ sub generateclips {
if ($id eq "VOICE_PAUSE") { if ($id eq "VOICE_PAUSE") {
print("Use distributed $wav\n") if $verbose; print("Use distributed $wav\n") if $verbose;
copy(dirname($0)."/VOICE_PAUSE.wav", $wav); copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
} } else {
else {
voicestring($voice, $wav, $tts_engine_opts, $tts_object); voicestring($voice, $wav, $tts_engine_opts, $tts_object);
if ($tts_object->{'format'} eq "wav") {
wavtrim($wav, 500, $tts_object); wavtrim($wav, 500, $tts_object);
# 500 seems to be a reasonable default for now # 500 seems to be a reasonable default for now
} }
}
if ($tts_object->{'format'} eq "wav" || $id eq "VOICE_PAUSE") {
encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object); encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
} else {
copy($wav, $mp3);
}
synchronize($tts_object); synchronize($tts_object);
if (defined($ENV{'POOL'})) { if (defined($ENV{'POOL'})) {
copy($mp3, $pool_file); copy($mp3, $pool_file);