1
0
Fork 0
forked from len0rd/rockbox

voice: Add support for the Piper TTS engine

https://github.com/rhasspy/piper

High quality, offline, neural-network-based, with good language coverage

Note that you have to manually download the piper voice models, and set
PIPER_MODEL_DIR appropriately.  The configure script will let you choose
from the available models and remember your choices.

Change-Id: I8eba9fcf78b51b01b89491539aac3e423cc42f16
This commit is contained in:
Solomon Peachy 2024-04-19 21:53:43 -04:00
parent 418a5acea0
commit e8a51569ad
2 changed files with 147 additions and 51 deletions

52
tools/configure vendored
View file

@ -1159,6 +1159,13 @@ voiceconfig () {
DEFAULT_TTS_OPTS=$GTTS_OPTS
DEFAULT_CHOICE="g"
fi
if [ -n "`findtool piper`" ]; then
PIPER="(p)iper "
PIPER_OPTS=""
DEFAULT_TTS="piper"
DEFAULT_TTS_OPTS=$PIPER_OPTS
DEFAULT_CHOICE="p"
fi
if [ -n "`findtool rbspeak`" ]; then
RBSPEAK="(O)ther "
RBSPEAK_OPTS=""
@ -1167,15 +1174,15 @@ voiceconfig () {
DEFAULT_CHOICE="O"
fi
if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC" ] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$GTTS" ] && [ "$GTTS" = "$RBSPEAK" ] ; then
echo "You need Festival, eSpeak, Mimic, Flite, gtts, or rbspeak in your path, or SAPI available to build voice files"
if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC" ] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$GTTS" ] && [ "$GTTS" = "$PIPER" ] && [ "$PIPER" = "$RBSPEAK" ] ; then
echo "You need Festival, eSpeak, Mimic, Flite, piper, gtts, or rbspeak in your path, or SAPI available to build voice files"
exit 3
fi
if [ "$ARG_TTS" ]; then
option=$ARG_TTS
else
echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${GTTS}${RBSPEAK}(${DEFAULT_CHOICE})?"
echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${GTTS}${RBSPEAK}${PIPER}(${DEFAULT_CHOICE})?"
option=`input`
if [ -z "$option" ]; then option=${DEFAULT_CHOICE}; fi
advopts="$advopts --tts=$option"
@ -1209,6 +1216,10 @@ voiceconfig () {
TTS_ENGINE="gtts"
TTS_OPTS=$GTTS_OPTS
;;
[Pp]|piper)
TTS_ENGINE="piper"
TTS_OPTS=$PIPER_OPTS
;;
[Oo]|rbspeak)
TTS_ENGINE="rbspeak"
TTS_OPTS=$RBSPEAK_OPTS
@ -1247,6 +1258,39 @@ voiceconfig () {
advopts="$advopts --voice=$CHOICE"
echo "Festival voice set to $TTS_FESTIVAL_VOICE"
echo "(voice_$TTS_FESTIVAL_VOICE)" > festival-prolog.scm
elif [ "$TTS_ENGINE" = "piper" ]; then
if [ -z "$PIPER_MODEL_DIR" ]; then
echo "Please set PIPER_MODEL_DIR!";
exit 1
fi
models=`(cd $PIPER_MODEL_DIR ; ls -1 *onnx)`
for model in $models; do
PIPER_MODEL="$model" # Default
break;
done
if [ "$ARG_VOICE" ]; then
CHOICE=$ARG_VOICE
else
i=1
for model in $models; do
printf "%3d. %s\n" "$i" "$model"
i=`expr $i + 1`
done
printf "Please select which piper model to use (default is $PIPER_MODEL): "
CHOICE=`input`
fi
i=1
for model in $models; do
if [ "$i" = "$CHOICE" -o "$model" = "$CHOICE" ]; then
PIPER_MODEL="$model"
break;
fi
i=`expr $i + 1`
done
TTS_OPTS="$TTS_OPTS --model $PIPER_MODEL_DIR/$PIPER_MODEL"
advopts="$advopts --voice=$PIPER_MODEL"
echo "Piper model set to $PIPER_MODEL"
elif [ "$TTS_ENGINE" = "mimic" ]; then
voicelist=`mimic -lv | cut -d':' -f2`
for voice in $voicelist; do
@ -1268,6 +1312,7 @@ voiceconfig () {
for voice in $voicelist; do
if [ "$i" = "$CHOICE" -o "$voice" = "$CHOICE" ]; then
TTS_MIMIC_VOICE="$voice"
break
fi
i=`expr $i + 1`
done
@ -4756,6 +4801,7 @@ export ANDROID_NDK_PATH=${ANDROID_NDK_PATH}
export ANDROID_SDK_PATH=${ANDROID_SDK_PATH}
export ANDROID_PLATFORM_VERSION=${ANDROID_PLATFORM_VERSION}
export TOOLSET=${toolset}
export PIPER_MODEL_DIR=${PIPER_MODEL_DIR}
$CCACHE_ARG
CONFIGURE_OPTIONS=${cmdline}

View file

@ -42,7 +42,8 @@ Usage: voice.pl [options] [path to dir]
Specify which target you want to build voicefile for. Must include
any features that target supports.
-f=<file> Use existing voiceids file
-f=<file>
Use existing voiceids file
-i=<target_id>
Numeric target id. Needed for voice building.
@ -64,7 +65,8 @@ Usage: voice.pl [options] [path to dir]
Options to pass to the TTS engine. Enclose in double quotes if the
options include spaces.
-F Force the file to be regenerated even if present
-F
Force the file to be regenerated even if present
-v
Be verbose
@ -84,7 +86,7 @@ my %festival_lang_map = (
my %gtts_lang_map = (
'english' => '-l en -t co.uk', # Always first, it's the golden master
'czech' => '-l cs', # not supported
'czech' => '-l cs',
'dansk' => '-l da',
'deutsch' => '-l de',
'english-us' => '-l en -t us',
@ -104,26 +106,47 @@ my %gtts_lang_map = (
);
my %espeak_lang_map = (
'english' => 'en-gb', # Always first, it's the golden master
'czech' => 'cs',
'dansk' => 'da',
'deutsch' => 'de',
'english-us' => 'en-us',
'espanol' => 'es',
'francais' => 'fr-fr',
'greek' => 'el',
'nederlands' => 'nl',
'magyar' => 'hu',
'italiano' => 'it',
'japanese' => 'ja',
'nederlands' => 'nl',
'norsk' => 'no',
'polski' => 'pl',
'russian' => 'ru',
'slovak' => 'sk',
'srpski' => 'sr',
'svenska' => 'sv',
'turkce' => 'tr',
'english' => '-ven-gb -k 5', # Always first, it's the golden master
'czech' => '-vcs',
'dansk' => '-vda',
'deutsch' => '-vde',
'english-us' => '-ven-us -k 5',
'espanol' => '-ves',
'francais' => '-vfr-fr',
'greek' => '-vel',
'magyar' => '-vhu',
'italiano' => '-vit',
'japanese' => '-vja',
'nederlands' => '-vnl',
'norsk' => '-vno',
'polski' => '-vpl',
'russian' => '-vru',
'slovak' => '-vsk',
'srpski' => '-vsr',
'svenska' => '-vsv',
'turkce' => '-vtr',
);
my %piper_lang_map = (
'english' => 'en_GB-cori-high.onnx', # Always first, it's the golden master
'czech' => 'cs_CZ-jirka-medium.onnx',
'dansk' => 'da_DK-talesyntese-medium.onnx',
'deutsch' => 'de_DE-thorsten-high.onnx',
'english-us' => 'en_US-libritts-high.onnx',
'espanol' => 'es_ES-sharvard-medium.onnx',
'francais' => 'fr_FR-siwis-medium.onnx',
'greek' => 'el_GR-rapunzelina-low.onnx',
# 'magyar' => '-vhu',
'italiano' => 'it_IT-riccardo-x_low.onnx',
# 'japanese' => '-vja',
'nederlands' => 'nl_NL-mls-medium.onnx',
'norsk' => 'no_NO-talesyntese-medium.onnx',
'polski' => 'pl_PL-gosia-medium.onnx',
'russian' => 'ru_RU-irina-medium.onnx',
'slovak' => 'sk_SK-lili-medium.onnx',
'srpski' => 'sr_RS-serbski_institut-medium.onnx',
'svenska' => 'sv_SE-nst-medium.onnx',
'turkce' => 'tr_TR-fettah-medium.onnx',
);
my $trim_thresh = 500; # Trim silence if over this, in ms
@ -141,6 +164,7 @@ sub init_tts {
# Don't use given/when here - it's not compatible with old perl versions
if ($tts_engine eq 'festival') {
print("> festival $tts_engine_opts --server\n") if $verbose;
# Open command, and filehandles for STDIN, STDOUT, STDERR
my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
my $dummy = *FESTIVAL_SERVER; #suppress warning
$SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
@ -149,6 +173,21 @@ sub init_tts {
if (defined($festival_lang_map{$language}) && $tts_engine_opts !~ /--language/) {
$ret{"ttsoptions"} = "--language $festival_lang_map{$language} ";
}
} elsif ($tts_engine eq 'piper') {
my $cmd = "piper $tts_engine_opts --json-input";
print("> $cmd\n") if $verbose;
my $pid = open3(*CMD_IN, *CMD_OUT, *CMD_ERR, $cmd);
$SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
$SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
$ret{"pid"} = $pid;
binmode(*CMD_IN, ':encoding(utf8)');
binmode(*CMD_OUT, ':encoding(utf8)');
binmode(*CMD_ERR, ':encoding(utf8)');
if (defined($piper_lang_map{$language}) && $tts_engine_opts !~ /--model/) {
die("Need PIPER_MODEL_DIR\n") if (!defined($ENV{'PIPER_MODEL_DIR'}));
$ret{"ttsoptions"} = "--model $ENV{PIPER_MODEL_DIR}/$piper_lang_map{$language} ";
}
} elsif ($tts_engine eq 'sapi') {
my $toolsdir = dirname($0);
my $path = `cygpath $toolsdir -a -w`;
@ -176,7 +215,7 @@ sub init_tts {
}
} elsif ($tts_engine eq 'espeak' || $tts_engine eq 'espeak-ng') {
if (defined($espeak_lang_map{$language}) && $tts_engine_opts !~ /-v/) {
$ret{"ttsoptions"} = "-v$espeak_lang_map{$language} ";
$ret{"ttsoptions"} = " $espeak_lang_map{$language} ";
}
}
@ -190,6 +229,10 @@ sub shutdown_tts {
# Send SIGTERM to festival server
kill TERM => $$tts_object{"pid"};
}
elsif ($$tts_object{'name'} eq 'piper') {
# Send SIGTERM to piper
kill TERM => $$tts_object{"pid"};
}
elsif ($$tts_object{'name'} eq 'sapi') {
print({$$tts_object{"stdin"}} "QUIT\r\n");
close($$tts_object{"stdin"});
@ -244,6 +287,13 @@ sub voicestring {
close(CMD_OUT);
close(CMD_ERR);
}
elsif ($name eq 'piper') {
$cmd = "{ \"text\": \"$string\", \"output_file\": \"$output\" }";
print(">> $cmd\n") if $verbose;
print(CMD_IN "$cmd\n");
my $res = <CMD_OUT>;
$res = <CMD_ERR>;
}
elsif ($name eq 'flite') {
$cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
print("> $cmd\n") if $verbose;
@ -469,7 +519,6 @@ sub generateclips {
print("\n");
unlink($updfile) if (-f $updfile);
shutdown_tts($tts_object);
}
# Assemble the voicefile
@ -608,6 +657,7 @@ if ($V == 1) {
defined($t) ? $t : "unknown",
$l, $e, $E, $s, $S);
generateclips($l, $t, $e, $E, $tts_object, $S, $f);
shutdown_tts($tts_object);
createvoice($l, $i, $f);
deleteencs();
} elsif ($C) {