mirror of
https://github.com/Rockbox/rockbox.git
synced 2025-11-09 21:22:39 -05:00
voice: Add support for Google Translate's speech synthesizer
Uses the 'gtts-cli' command line client. Supports a wide variety of languages, including all "Complete" and "Good" Rockbox translations. Additional changes: * voice synth script can accept pre-encoded mp3 files * Move language->synth options mapping into the voice script * Additional cleanups Change-Id: I9523e2bca87cbcee2d8c4111f9892e8e458c7419
This commit is contained in:
parent
5e98eba8ab
commit
2c3399537c
2 changed files with 93 additions and 51 deletions
53
tools/configure
vendored
53
tools/configure
vendored
|
|
@ -1111,23 +1111,7 @@ voiceconfig () {
|
||||||
fi
|
fi
|
||||||
if [ -n "`findtool festival`" ]; then
|
if [ -n "`findtool festival`" ]; then
|
||||||
FESTIVAL="(F)estival "
|
FESTIVAL="(F)estival "
|
||||||
case "$thislang" in
|
FESTIVAL_OPTS=""
|
||||||
"italiano")
|
|
||||||
FESTIVAL_OPTS="--language italian"
|
|
||||||
;;
|
|
||||||
"espanol")
|
|
||||||
FESTIVAL_OPTS="--language spanish"
|
|
||||||
;;
|
|
||||||
"finnish")
|
|
||||||
FESTIVAL_OPTS="--language finnish"
|
|
||||||
;;
|
|
||||||
"czech")
|
|
||||||
FESTIVAL_OPTS="--language czech"
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
FESTIVAL_OPTS=""
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
DEFAULT_TTS="festival"
|
DEFAULT_TTS="festival"
|
||||||
DEFAULT_TTS_OPTS=$FESTIVAL_OPTS
|
DEFAULT_TTS_OPTS=$FESTIVAL_OPTS
|
||||||
DEFAULT_NOISEFLOOR="500"
|
DEFAULT_NOISEFLOOR="500"
|
||||||
|
|
@ -1149,6 +1133,23 @@ voiceconfig () {
|
||||||
DEFAULT_NOISEFLOOR="500"
|
DEFAULT_NOISEFLOOR="500"
|
||||||
DEFAULT_CHOICE="w"
|
DEFAULT_CHOICE="w"
|
||||||
fi
|
fi
|
||||||
|
# Allow SAPI if Windows is in use
|
||||||
|
if [ -n "`findtool winver`" ]; then
|
||||||
|
SAPI="(S)API "
|
||||||
|
SAPI_OPTS=""
|
||||||
|
DEFAULT_TTS="sapi"
|
||||||
|
DEFAULT_TTS_OPTS=$SAPI_OPTS
|
||||||
|
DEFAULT_NOISEFLOOR="500"
|
||||||
|
DEFAULT_CHOICE="S"
|
||||||
|
fi
|
||||||
|
if [ -n "`findtool gtts-cli`" ]; then
|
||||||
|
GTTS="(g)tts "
|
||||||
|
GTTS_OPTS=""
|
||||||
|
DEFAULT_TTS="gtts"
|
||||||
|
DEFAULT_TTS_OPTS=$GTTS_OPTS
|
||||||
|
DEFAULT_NOISEFLOOR="500"
|
||||||
|
DEFAULT_CHOICE="g"
|
||||||
|
fi
|
||||||
if [ -n "`findtool rbspeak`" ]; then
|
if [ -n "`findtool rbspeak`" ]; then
|
||||||
RBSPEAK="(O)ther "
|
RBSPEAK="(O)ther "
|
||||||
RBSPEAK_OPTS=""
|
RBSPEAK_OPTS=""
|
||||||
|
|
@ -1157,17 +1158,8 @@ voiceconfig () {
|
||||||
DEFAULT_NOISEFLOOR="500"
|
DEFAULT_NOISEFLOOR="500"
|
||||||
DEFAULT_CHOICE="O"
|
DEFAULT_CHOICE="O"
|
||||||
fi
|
fi
|
||||||
# Allow SAPI if Windows is in use
|
|
||||||
if [ -n "`findtool winver`" ]; then
|
|
||||||
SAPI="(S)API "
|
|
||||||
SAPI_OPTS=""
|
|
||||||
DEFAULT_TTS="sapi"
|
|
||||||
DEFAULT_TTS_OPTS=$SAPI_OPTS
|
|
||||||
DEFAULT_NOISEFLOOR="500"
|
|
||||||
DEFAULT_CHOICE="s"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC"] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$RBSPEAK" ]; then
|
if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC"] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$RBSPEAK" ] && [ "$RBSPEAK" = "$GTTS" ] ; then
|
||||||
echo "You need Festival, eSpeak, Mimic, Flite, or rbspeak in your path, or SAPI available to build voice files"
|
echo "You need Festival, eSpeak, Mimic, Flite, or rbspeak in your path, or SAPI available to build voice files"
|
||||||
exit 3
|
exit 3
|
||||||
fi
|
fi
|
||||||
|
|
@ -1175,7 +1167,7 @@ voiceconfig () {
|
||||||
if [ "$ARG_TTS" ]; then
|
if [ "$ARG_TTS" ]; then
|
||||||
option=$ARG_TTS
|
option=$ARG_TTS
|
||||||
else
|
else
|
||||||
echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${RBSPEAK}(${DEFAULT_CHOICE})?"
|
echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${GTTS}${RBSPEAK}(${DEFAULT_CHOICE})?"
|
||||||
option=`input`
|
option=`input`
|
||||||
if [ -z "$option" ]; then option=${DEFAULT_CHOICE}; fi
|
if [ -z "$option" ]; then option=${DEFAULT_CHOICE}; fi
|
||||||
advopts="$advopts --tts=$option"
|
advopts="$advopts --tts=$option"
|
||||||
|
|
@ -1211,6 +1203,11 @@ voiceconfig () {
|
||||||
NOISEFLOOR="500"
|
NOISEFLOOR="500"
|
||||||
TTS_OPTS=$SWIFT_OPTS
|
TTS_OPTS=$SWIFT_OPTS
|
||||||
;;
|
;;
|
||||||
|
[Gg)
|
||||||
|
TTS_ENGINE="gtts"
|
||||||
|
NOISEFLOOR="500"
|
||||||
|
TTS_OPTS=$GTTS_OPTS
|
||||||
|
;;
|
||||||
[Oo])
|
[Oo])
|
||||||
TTS_ENGINE="rbspeak"
|
TTS_ENGINE="rbspeak"
|
||||||
NOISEFLOOR="500"
|
NOISEFLOOR="500"
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@
|
||||||
# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||||
# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||||
# \/ \/ \/ \/ \/
|
# \/ \/ \/ \/ \/
|
||||||
# $Id$
|
# $Id$
|
||||||
#
|
#
|
||||||
# Copyright (C) 2007 Jonas Häggqvist
|
# Copyright (C) 2007 Jonas Häggqvist
|
||||||
#
|
#
|
||||||
|
|
@ -33,46 +33,73 @@ sub printusage {
|
||||||
Usage: voice.pl [options] [path to dir]
|
Usage: voice.pl [options] [path to dir]
|
||||||
-V
|
-V
|
||||||
Create voice file. You must also specify -t and -l.
|
Create voice file. You must also specify -t and -l.
|
||||||
|
|
||||||
-C
|
-C
|
||||||
Create .talk clips.
|
Create .talk clips.
|
||||||
|
|
||||||
-t=<target>
|
-t=<target>
|
||||||
Specify which target you want to build voicefile for. Must include
|
Specify which target you want to build voicefile for. Must include
|
||||||
any features that target supports.
|
any features that target supports.
|
||||||
|
|
||||||
-i=<target_id>
|
-i=<target_id>
|
||||||
Numeric target id. Needed for voice building.
|
Numeric target id. Needed for voice building.
|
||||||
|
|
||||||
-l=<language>
|
-l=<language>
|
||||||
Specify which language you want to build. Without .lang extension.
|
Specify which language you want to build. Without .lang extension.
|
||||||
|
|
||||||
-e=<encoder>
|
-e=<encoder>
|
||||||
Which encoder to use for voice strings
|
Which encoder to use for voice strings
|
||||||
|
|
||||||
-E=<encoder options>
|
-E=<encoder options>
|
||||||
Which encoder options to use when compressing voice strings. Enclose
|
Which encoder options to use when compressing voice strings. Enclose
|
||||||
in double quotes if the options include spaces.
|
in double quotes if the options include spaces.
|
||||||
|
|
||||||
-s=<TTS engine>
|
-s=<TTS engine>
|
||||||
Which TTS engine to use.
|
Which TTS engine to use.
|
||||||
|
|
||||||
-S=<TTS engine options>
|
-S=<TTS engine options>
|
||||||
Options to pass to the TTS engine. Enclose in double quotes if the
|
Options to pass to the TTS engine. Enclose in double quotes if the
|
||||||
options include spaces.
|
options include spaces.
|
||||||
|
|
||||||
-v
|
-v
|
||||||
Be verbose
|
Be verbose
|
||||||
USAGE
|
USAGE
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
my %festival_lang_map = {
|
||||||
|
'english' => 'english',
|
||||||
|
'english-us' => 'english',
|
||||||
|
'espanol' => 'spanish',
|
||||||
|
#'finnish' => 'finnish'
|
||||||
|
#'italiano' => 'italian',
|
||||||
|
#'czech' => 'czech',
|
||||||
|
#'welsh' => 'welsh'
|
||||||
|
};
|
||||||
|
|
||||||
|
my %gtts_lang_map = {
|
||||||
|
'english' => 'en-gb', # Always first, it's the golden master
|
||||||
|
'deutsch' => 'de',
|
||||||
|
'english-us' => 'en-us',
|
||||||
|
'francais' => 'fr-fr',
|
||||||
|
'greek' => 'gr',
|
||||||
|
'italiano' => 'it',
|
||||||
|
'norsk' => 'no',
|
||||||
|
'polski' => 'pl',
|
||||||
|
'russian' => 'ru',
|
||||||
|
'slovak' => 'sk',
|
||||||
|
'srpski' => 'sr',
|
||||||
|
};
|
||||||
|
|
||||||
# Initialize TTS engine. May return an object or value which will be passed
|
# Initialize TTS engine. May return an object or value which will be passed
|
||||||
# to voicestring and shutdown_tts
|
# to voicestring and shutdown_tts
|
||||||
sub init_tts {
|
sub init_tts {
|
||||||
our $verbose;
|
our $verbose;
|
||||||
my ($tts_engine, $tts_engine_opts, $language) = @_;
|
my ($tts_engine, $tts_engine_opts, $language) = @_;
|
||||||
my %ret = ("name" => $tts_engine);
|
my %ret = ("name" => $tts_engine);
|
||||||
|
$ret{"format"} = 'wav';
|
||||||
|
$ret{"ttsoptions"} = "";
|
||||||
|
|
||||||
# Don't use given/when here - it's not compatible with old perl versions
|
# Don't use given/when here - it's not compatible with old perl versions
|
||||||
if ($tts_engine eq 'festival') {
|
if ($tts_engine eq 'festival') {
|
||||||
print("> festival $tts_engine_opts --server\n") if $verbose;
|
print("> festival $tts_engine_opts --server\n") if $verbose;
|
||||||
|
|
@ -81,8 +108,10 @@ sub init_tts {
|
||||||
$SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
|
$SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
|
||||||
$SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
|
$SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
|
||||||
$ret{"pid"} = $pid;
|
$ret{"pid"} = $pid;
|
||||||
}
|
if (defined($festival_lang_map{$language})) {
|
||||||
elsif ($tts_engine eq 'sapi') {
|
$ret{"ttsoptions"} = "-l $festival_lang_map{$language} ";
|
||||||
|
}
|
||||||
|
} elsif ($tts_engine eq 'sapi') {
|
||||||
my $toolsdir = dirname($0);
|
my $toolsdir = dirname($0);
|
||||||
my $path = `cygpath $toolsdir -a -w`;
|
my $path = `cygpath $toolsdir -a -w`;
|
||||||
chomp($path);
|
chomp($path);
|
||||||
|
|
@ -102,6 +131,11 @@ sub init_tts {
|
||||||
"stdin" => *CMD_IN,
|
"stdin" => *CMD_IN,
|
||||||
"stdout" => *CMD_OUT,
|
"stdout" => *CMD_OUT,
|
||||||
"vendor" => $vendor);
|
"vendor" => $vendor);
|
||||||
|
} elsif ($tts_engine eq 'gtts') {
|
||||||
|
$ret{"format"} = 'mp3';
|
||||||
|
if (defined($gtts_lang_map{$language})) {
|
||||||
|
$ret{"ttsoptions"} = "-l $gtts_lang_map{$language} ";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return \%ret;
|
return \%ret;
|
||||||
}
|
}
|
||||||
|
|
@ -143,6 +177,9 @@ sub voicestring {
|
||||||
my ($string, $output, $tts_engine_opts, $tts_object) = @_;
|
my ($string, $output, $tts_engine_opts, $tts_object) = @_;
|
||||||
my $cmd;
|
my $cmd;
|
||||||
my $name = $$tts_object{'name'};
|
my $name = $$tts_object{'name'};
|
||||||
|
|
||||||
|
$tts_engine_opts .= $$tts_object{"ttsoptions"};
|
||||||
|
|
||||||
printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose;
|
printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose;
|
||||||
if ($name eq 'festival') {
|
if ($name eq 'festival') {
|
||||||
# festival_client lies to us, so we have to do awful soul-eating
|
# festival_client lies to us, so we have to do awful soul-eating
|
||||||
|
|
@ -167,7 +204,7 @@ sub voicestring {
|
||||||
elsif ($name eq 'flite') {
|
elsif ($name eq 'flite') {
|
||||||
$cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
|
$cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
|
||||||
print("> $cmd\n") if $verbose;
|
print("> $cmd\n") if $verbose;
|
||||||
`$cmd`;
|
system($cmd);
|
||||||
}
|
}
|
||||||
elsif ($name eq 'espeak') {
|
elsif ($name eq 'espeak') {
|
||||||
$cmd = "espeak $tts_engine_opts -w \"$output\"";
|
$cmd = "espeak $tts_engine_opts -w \"$output\"";
|
||||||
|
|
@ -193,11 +230,14 @@ sub voicestring {
|
||||||
close(RBSPEAK);
|
close(RBSPEAK);
|
||||||
}
|
}
|
||||||
elsif ($name eq 'mimic') {
|
elsif ($name eq 'mimic') {
|
||||||
$cmd = "mimic $tts_engine_opts -o $output";
|
$cmd = "mimic $tts_engine_opts -o $output -t \"$string\" ";
|
||||||
print("> $cmd\n") if $verbose;
|
print("> $cmd\n") if $verbose;
|
||||||
open (MIMIC, "| $cmd");
|
system($cmd);
|
||||||
print MIMIC $string . "\n";
|
}
|
||||||
close(MIMIC);
|
elsif ($name eq 'gtts') {
|
||||||
|
$cmd = "gtts-cli $tts_engine_opts -o $output \"$string\"";
|
||||||
|
print("> $cmd\n") if $verbose;
|
||||||
|
system($cmd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -326,17 +366,22 @@ sub generateclips {
|
||||||
if ($id eq "VOICE_PAUSE") {
|
if ($id eq "VOICE_PAUSE") {
|
||||||
print("Use distributed $wav\n") if $verbose;
|
print("Use distributed $wav\n") if $verbose;
|
||||||
copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
|
copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
|
||||||
|
} else {
|
||||||
|
voicestring($voice, $wav, $tts_engine_opts, $tts_object);
|
||||||
|
if ($tts_object->{'format'} eq "wav") {
|
||||||
|
wavtrim($wav, 500, $tts_object);
|
||||||
|
# 500 seems to be a reasonable default for now
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
if ($tts_object->{'format'} eq "wav" || $id eq "VOICE_PAUSE") {
|
||||||
voicestring($voice, $wav, $tts_engine_opts, $tts_object);
|
encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
|
||||||
wavtrim($wav, 500, $tts_object);
|
} else {
|
||||||
# 500 seems to be a reasonable default for now
|
copy($wav, $mp3);
|
||||||
}
|
}
|
||||||
|
|
||||||
encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
|
|
||||||
synchronize($tts_object);
|
synchronize($tts_object);
|
||||||
if (defined($ENV{'POOL'})) {
|
if (defined($ENV{'POOL'})) {
|
||||||
copy($mp3, $pool_file);
|
copy($mp3, $pool_file);
|
||||||
}
|
}
|
||||||
unlink($wav);
|
unlink($wav);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue