voice: Add support for Google Translate's speech synthesizer

Uses the 'gtts-cli' command line client.  Supports a wide variety of
languages, including all "Complete" and "Good" Rockbox translations.

Additional changes:

 * voice synth script can accept pre-encoded mp3 files
 * Move language->synth options mapping into the voice script
 * Additional cleanups

Change-Id: I9523e2bca87cbcee2d8c4111f9892e8e458c7419
This commit is contained in:
Solomon Peachy 2020-07-08 19:05:09 -04:00
parent 5e98eba8ab
commit 2c3399537c
2 changed files with 93 additions and 51 deletions

53
tools/configure vendored
View file

@ -1111,23 +1111,7 @@ voiceconfig () {
fi fi
if [ -n "`findtool festival`" ]; then if [ -n "`findtool festival`" ]; then
FESTIVAL="(F)estival " FESTIVAL="(F)estival "
case "$thislang" in FESTIVAL_OPTS=""
"italiano")
FESTIVAL_OPTS="--language italian"
;;
"espanol")
FESTIVAL_OPTS="--language spanish"
;;
"finnish")
FESTIVAL_OPTS="--language finnish"
;;
"czech")
FESTIVAL_OPTS="--language czech"
;;
*)
FESTIVAL_OPTS=""
;;
esac
DEFAULT_TTS="festival" DEFAULT_TTS="festival"
DEFAULT_TTS_OPTS=$FESTIVAL_OPTS DEFAULT_TTS_OPTS=$FESTIVAL_OPTS
DEFAULT_NOISEFLOOR="500" DEFAULT_NOISEFLOOR="500"
@ -1149,6 +1133,23 @@ voiceconfig () {
DEFAULT_NOISEFLOOR="500" DEFAULT_NOISEFLOOR="500"
DEFAULT_CHOICE="w" DEFAULT_CHOICE="w"
fi fi
# Allow SAPI if Windows is in use
if [ -n "`findtool winver`" ]; then
SAPI="(S)API "
SAPI_OPTS=""
DEFAULT_TTS="sapi"
DEFAULT_TTS_OPTS=$SAPI_OPTS
DEFAULT_NOISEFLOOR="500"
DEFAULT_CHOICE="S"
fi
if [ -n "`findtool gtts-cli`" ]; then
GTTS="(g)tts "
GTTS_OPTS=""
DEFAULT_TTS="gtts"
DEFAULT_TTS_OPTS=$GTTS_OPTS
DEFAULT_NOISEFLOOR="500"
DEFAULT_CHOICE="g"
fi
if [ -n "`findtool rbspeak`" ]; then if [ -n "`findtool rbspeak`" ]; then
RBSPEAK="(O)ther " RBSPEAK="(O)ther "
RBSPEAK_OPTS="" RBSPEAK_OPTS=""
@ -1157,17 +1158,8 @@ voiceconfig () {
DEFAULT_NOISEFLOOR="500" DEFAULT_NOISEFLOOR="500"
DEFAULT_CHOICE="O" DEFAULT_CHOICE="O"
fi fi
# Allow SAPI if Windows is in use
if [ -n "`findtool winver`" ]; then
SAPI="(S)API "
SAPI_OPTS=""
DEFAULT_TTS="sapi"
DEFAULT_TTS_OPTS=$SAPI_OPTS
DEFAULT_NOISEFLOOR="500"
DEFAULT_CHOICE="s"
fi
if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC"] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$RBSPEAK" ]; then if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC"] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$RBSPEAK" ] && [ "$RBSPEAK" = "$GTTS" ] ; then
echo "You need Festival, eSpeak, Mimic, Flite, or rbspeak in your path, or SAPI available to build voice files" echo "You need Festival, eSpeak, Mimic, Flite, or rbspeak in your path, or SAPI available to build voice files"
exit 3 exit 3
fi fi
@ -1175,7 +1167,7 @@ voiceconfig () {
if [ "$ARG_TTS" ]; then if [ "$ARG_TTS" ]; then
option=$ARG_TTS option=$ARG_TTS
else else
echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${RBSPEAK}(${DEFAULT_CHOICE})?" echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${GTTS}${RBSPEAK}(${DEFAULT_CHOICE})?"
option=`input` option=`input`
if [ -z "$option" ]; then option=${DEFAULT_CHOICE}; fi if [ -z "$option" ]; then option=${DEFAULT_CHOICE}; fi
advopts="$advopts --tts=$option" advopts="$advopts --tts=$option"
@ -1211,6 +1203,11 @@ voiceconfig () {
NOISEFLOOR="500" NOISEFLOOR="500"
TTS_OPTS=$SWIFT_OPTS TTS_OPTS=$SWIFT_OPTS
;; ;;
[Gg)
TTS_ENGINE="gtts"
NOISEFLOOR="500"
TTS_OPTS=$GTTS_OPTS
;;
[Oo]) [Oo])
TTS_ENGINE="rbspeak" TTS_ENGINE="rbspeak"
NOISEFLOOR="500" NOISEFLOOR="500"

View file

@ -5,7 +5,7 @@
# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
# \/ \/ \/ \/ \/ # \/ \/ \/ \/ \/
# $Id$ # $Id$
# #
# Copyright (C) 2007 Jonas Häggqvist # Copyright (C) 2007 Jonas Häggqvist
# #
@ -33,46 +33,73 @@ sub printusage {
Usage: voice.pl [options] [path to dir] Usage: voice.pl [options] [path to dir]
-V -V
Create voice file. You must also specify -t and -l. Create voice file. You must also specify -t and -l.
-C -C
Create .talk clips. Create .talk clips.
-t=<target> -t=<target>
Specify which target you want to build voicefile for. Must include Specify which target you want to build voicefile for. Must include
any features that target supports. any features that target supports.
-i=<target_id> -i=<target_id>
Numeric target id. Needed for voice building. Numeric target id. Needed for voice building.
-l=<language> -l=<language>
Specify which language you want to build. Without .lang extension. Specify which language you want to build. Without .lang extension.
-e=<encoder> -e=<encoder>
Which encoder to use for voice strings Which encoder to use for voice strings
-E=<encoder options> -E=<encoder options>
Which encoder options to use when compressing voice strings. Enclose Which encoder options to use when compressing voice strings. Enclose
in double quotes if the options include spaces. in double quotes if the options include spaces.
-s=<TTS engine> -s=<TTS engine>
Which TTS engine to use. Which TTS engine to use.
-S=<TTS engine options> -S=<TTS engine options>
Options to pass to the TTS engine. Enclose in double quotes if the Options to pass to the TTS engine. Enclose in double quotes if the
options include spaces. options include spaces.
-v -v
Be verbose Be verbose
USAGE USAGE
; ;
} }
my %festival_lang_map = {
'english' => 'english',
'english-us' => 'english',
'espanol' => 'spanish',
#'finnish' => 'finnish'
#'italiano' => 'italian',
#'czech' => 'czech',
#'welsh' => 'welsh'
};
my %gtts_lang_map = {
'english' => 'en-gb', # Always first, it's the golden master
'deutsch' => 'de',
'english-us' => 'en-us',
'francais' => 'fr-fr',
'greek' => 'gr',
'italiano' => 'it',
'norsk' => 'no',
'polski' => 'pl',
'russian' => 'ru',
'slovak' => 'sk',
'srpski' => 'sr',
};
# Initialize TTS engine. May return an object or value which will be passed # Initialize TTS engine. May return an object or value which will be passed
# to voicestring and shutdown_tts # to voicestring and shutdown_tts
sub init_tts { sub init_tts {
our $verbose; our $verbose;
my ($tts_engine, $tts_engine_opts, $language) = @_; my ($tts_engine, $tts_engine_opts, $language) = @_;
my %ret = ("name" => $tts_engine); my %ret = ("name" => $tts_engine);
$ret{"format"} = 'wav';
$ret{"ttsoptions"} = "";
# Don't use given/when here - it's not compatible with old perl versions # Don't use given/when here - it's not compatible with old perl versions
if ($tts_engine eq 'festival') { if ($tts_engine eq 'festival') {
print("> festival $tts_engine_opts --server\n") if $verbose; print("> festival $tts_engine_opts --server\n") if $verbose;
@ -81,8 +108,10 @@ sub init_tts {
$SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
$SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
$ret{"pid"} = $pid; $ret{"pid"} = $pid;
} if (defined($festival_lang_map{$language})) {
elsif ($tts_engine eq 'sapi') { $ret{"ttsoptions"} = "-l $festival_lang_map{$language} ";
}
} elsif ($tts_engine eq 'sapi') {
my $toolsdir = dirname($0); my $toolsdir = dirname($0);
my $path = `cygpath $toolsdir -a -w`; my $path = `cygpath $toolsdir -a -w`;
chomp($path); chomp($path);
@ -102,6 +131,11 @@ sub init_tts {
"stdin" => *CMD_IN, "stdin" => *CMD_IN,
"stdout" => *CMD_OUT, "stdout" => *CMD_OUT,
"vendor" => $vendor); "vendor" => $vendor);
} elsif ($tts_engine eq 'gtts') {
$ret{"format"} = 'mp3';
if (defined($gtts_lang_map{$language})) {
$ret{"ttsoptions"} = "-l $gtts_lang_map{$language} ";
}
} }
return \%ret; return \%ret;
} }
@ -143,6 +177,9 @@ sub voicestring {
my ($string, $output, $tts_engine_opts, $tts_object) = @_; my ($string, $output, $tts_engine_opts, $tts_object) = @_;
my $cmd; my $cmd;
my $name = $$tts_object{'name'}; my $name = $$tts_object{'name'};
$tts_engine_opts .= $$tts_object{"ttsoptions"};
printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose; printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose;
if ($name eq 'festival') { if ($name eq 'festival') {
# festival_client lies to us, so we have to do awful soul-eating # festival_client lies to us, so we have to do awful soul-eating
@ -167,7 +204,7 @@ sub voicestring {
elsif ($name eq 'flite') { elsif ($name eq 'flite') {
$cmd = "flite $tts_engine_opts -t \"$string\" \"$output\""; $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
print("> $cmd\n") if $verbose; print("> $cmd\n") if $verbose;
`$cmd`; system($cmd);
} }
elsif ($name eq 'espeak') { elsif ($name eq 'espeak') {
$cmd = "espeak $tts_engine_opts -w \"$output\""; $cmd = "espeak $tts_engine_opts -w \"$output\"";
@ -193,11 +230,14 @@ sub voicestring {
close(RBSPEAK); close(RBSPEAK);
} }
elsif ($name eq 'mimic') { elsif ($name eq 'mimic') {
$cmd = "mimic $tts_engine_opts -o $output"; $cmd = "mimic $tts_engine_opts -o $output -t \"$string\" ";
print("> $cmd\n") if $verbose; print("> $cmd\n") if $verbose;
open (MIMIC, "| $cmd"); system($cmd);
print MIMIC $string . "\n"; }
close(MIMIC); elsif ($name eq 'gtts') {
$cmd = "gtts-cli $tts_engine_opts -o $output \"$string\"";
print("> $cmd\n") if $verbose;
system($cmd);
} }
} }
@ -326,17 +366,22 @@ sub generateclips {
if ($id eq "VOICE_PAUSE") { if ($id eq "VOICE_PAUSE") {
print("Use distributed $wav\n") if $verbose; print("Use distributed $wav\n") if $verbose;
copy(dirname($0)."/VOICE_PAUSE.wav", $wav); copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
} else {
voicestring($voice, $wav, $tts_engine_opts, $tts_object);
if ($tts_object->{'format'} eq "wav") {
wavtrim($wav, 500, $tts_object);
# 500 seems to be a reasonable default for now
}
} }
else { if ($tts_object->{'format'} eq "wav" || $id eq "VOICE_PAUSE") {
voicestring($voice, $wav, $tts_engine_opts, $tts_object); encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
wavtrim($wav, 500, $tts_object); } else {
# 500 seems to be a reasonable default for now copy($wav, $mp3);
} }
encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
synchronize($tts_object); synchronize($tts_object);
if (defined($ENV{'POOL'})) { if (defined($ENV{'POOL'})) {
copy($mp3, $pool_file); copy($mp3, $pool_file);
} }
unlink($wav); unlink($wav);
} }