1
0
Fork 0
forked from len0rd/rockbox

voice: Add support for the Piper TTS engine

https://github.com/rhasspy/piper

High quality, offline, neural-network-based, with good language coverage

Note that you have to manually download the piper voice models, and set
PIPER_MODEL_DIR appropriately.  The configure script will let you choose
from the available models and remember your choices.

Change-Id: I8eba9fcf78b51b01b89491539aac3e423cc42f16
This commit is contained in:
Solomon Peachy 2024-04-19 21:53:43 -04:00
parent 418a5acea0
commit e8a51569ad
2 changed files with 147 additions and 51 deletions

View file

@ -42,7 +42,8 @@ Usage: voice.pl [options] [path to dir]
Specify which target you want to build voicefile for. Must include
any features that target supports.
-f=<file> Use existing voiceids file
-f=<file>
Use existing voiceids file
-i=<target_id>
Numeric target id. Needed for voice building.
@ -64,7 +65,8 @@ Usage: voice.pl [options] [path to dir]
Options to pass to the TTS engine. Enclose in double quotes if the
options include spaces.
-F Force the file to be regenerated even if present
-F
Force the file to be regenerated even if present
-v
Be verbose
@ -73,57 +75,78 @@ USAGE
}
my %festival_lang_map = (
'english' => 'english',
'english-us' => 'english',
'espanol' => 'spanish',
#'finnish' => 'finnish'
#'italiano' => 'italian',
#'czech' => 'czech',
#'welsh' => 'welsh'
'english' => 'english',
'english-us' => 'english',
'espanol' => 'spanish',
#'finnish' => 'finnish'
#'italiano' => 'italian',
#'czech' => 'czech',
#'welsh' => 'welsh'
);
my %gtts_lang_map = (
'english' => '-l en -t co.uk', # Always first, it's the golden master
'czech' => '-l cs', # not supported
'dansk' => '-l da',
'deutsch' => '-l de',
'english-us' => '-l en -t us',
'espanol' => '-l es',
'francais' => '-l fr',
'greek' => '-l el',
'magyar' => '-l hu',
'italiano' => '-l it',
'nederlands' => '-l nl',
'norsk' => '-l no',
'polski' => '-l pl',
'russian' => '-l ru',
'slovak' => '-l sk',
'srpski' => '-l sr',
'svenska' => '-l sv',
'turkce' => '-l tr',
'czech' => '-l cs',
'dansk' => '-l da',
'deutsch' => '-l de',
'english-us' => '-l en -t us',
'espanol' => '-l es',
'francais' => '-l fr',
'greek' => '-l el',
'magyar' => '-l hu',
'italiano' => '-l it',
'nederlands' => '-l nl',
'norsk' => '-l no',
'polski' => '-l pl',
'russian' => '-l ru',
'slovak' => '-l sk',
'srpski' => '-l sr',
'svenska' => '-l sv',
'turkce' => '-l tr',
);
my %espeak_lang_map = (
'english' => 'en-gb', # Always first, it's the golden master
'czech' => 'cs',
'dansk' => 'da',
'deutsch' => 'de',
'english-us' => 'en-us',
'espanol' => 'es',
'francais' => 'fr-fr',
'greek' => 'el',
'nederlands' => 'nl',
'magyar' => 'hu',
'italiano' => 'it',
'japanese' => 'ja',
'nederlands' => 'nl',
'norsk' => 'no',
'polski' => 'pl',
'russian' => 'ru',
'slovak' => 'sk',
'srpski' => 'sr',
'svenska' => 'sv',
'turkce' => 'tr',
'english' => '-ven-gb -k 5', # Always first, it's the golden master
'czech' => '-vcs',
'dansk' => '-vda',
'deutsch' => '-vde',
'english-us' => '-ven-us -k 5',
'espanol' => '-ves',
'francais' => '-vfr-fr',
'greek' => '-vel',
'magyar' => '-vhu',
'italiano' => '-vit',
'japanese' => '-vja',
'nederlands' => '-vnl',
'norsk' => '-vno',
'polski' => '-vpl',
'russian' => '-vru',
'slovak' => '-vsk',
'srpski' => '-vsr',
'svenska' => '-vsv',
'turkce' => '-vtr',
);
my %piper_lang_map = (
'english' => 'en_GB-cori-high.onnx', # Always first, it's the golden master
'czech' => 'cs_CZ-jirka-medium.onnx',
'dansk' => 'da_DK-talesyntese-medium.onnx',
'deutsch' => 'de_DE-thorsten-high.onnx',
'english-us' => 'en_US-libritts-high.onnx',
'espanol' => 'es_ES-sharvard-medium.onnx',
'francais' => 'fr_FR-siwis-medium.onnx',
'greek' => 'el_GR-rapunzelina-low.onnx',
# 'magyar' => '-vhu',
'italiano' => 'it_IT-riccardo-x_low.onnx',
# 'japanese' => '-vja',
'nederlands' => 'nl_NL-mls-medium.onnx',
'norsk' => 'no_NO-talesyntese-medium.onnx',
'polski' => 'pl_PL-gosia-medium.onnx',
'russian' => 'ru_RU-irina-medium.onnx',
'slovak' => 'sk_SK-lili-medium.onnx',
'srpski' => 'sr_RS-serbski_institut-medium.onnx',
'svenska' => 'sv_SE-nst-medium.onnx',
'turkce' => 'tr_TR-fettah-medium.onnx',
);
my $trim_thresh = 500; # Trim silence if over this, in ms
@ -141,6 +164,7 @@ sub init_tts {
# Don't use given/when here - it's not compatible with old perl versions
if ($tts_engine eq 'festival') {
print("> festival $tts_engine_opts --server\n") if $verbose;
# Open command, and filehandles for STDIN, STDOUT, STDERR
my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
my $dummy = *FESTIVAL_SERVER; #suppress warning
$SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
@ -149,6 +173,21 @@ sub init_tts {
if (defined($festival_lang_map{$language}) && $tts_engine_opts !~ /--language/) {
$ret{"ttsoptions"} = "--language $festival_lang_map{$language} ";
}
} elsif ($tts_engine eq 'piper') {
my $cmd = "piper $tts_engine_opts --json-input";
print("> $cmd\n") if $verbose;
my $pid = open3(*CMD_IN, *CMD_OUT, *CMD_ERR, $cmd);
$SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
$SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
$ret{"pid"} = $pid;
binmode(*CMD_IN, ':encoding(utf8)');
binmode(*CMD_OUT, ':encoding(utf8)');
binmode(*CMD_ERR, ':encoding(utf8)');
if (defined($piper_lang_map{$language}) && $tts_engine_opts !~ /--model/) {
die("Need PIPER_MODEL_DIR\n") if (!defined($ENV{'PIPER_MODEL_DIR'}));
$ret{"ttsoptions"} = "--model $ENV{PIPER_MODEL_DIR}/$piper_lang_map{$language} ";
}
} elsif ($tts_engine eq 'sapi') {
my $toolsdir = dirname($0);
my $path = `cygpath $toolsdir -a -w`;
@ -176,7 +215,7 @@ sub init_tts {
}
} elsif ($tts_engine eq 'espeak' || $tts_engine eq 'espeak-ng') {
if (defined($espeak_lang_map{$language}) && $tts_engine_opts !~ /-v/) {
$ret{"ttsoptions"} = "-v$espeak_lang_map{$language} ";
$ret{"ttsoptions"} = " $espeak_lang_map{$language} ";
}
}
@ -190,6 +229,10 @@ sub shutdown_tts {
# Send SIGTERM to festival server
kill TERM => $$tts_object{"pid"};
}
elsif ($$tts_object{'name'} eq 'piper') {
# Send SIGTERM to piper
kill TERM => $$tts_object{"pid"};
}
elsif ($$tts_object{'name'} eq 'sapi') {
print({$$tts_object{"stdin"}} "QUIT\r\n");
close($$tts_object{"stdin"});
@ -244,6 +287,13 @@ sub voicestring {
close(CMD_OUT);
close(CMD_ERR);
}
elsif ($name eq 'piper') {
$cmd = "{ \"text\": \"$string\", \"output_file\": \"$output\" }";
print(">> $cmd\n") if $verbose;
print(CMD_IN "$cmd\n");
my $res = <CMD_OUT>;
$res = <CMD_ERR>;
}
elsif ($name eq 'flite') {
$cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
print("> $cmd\n") if $verbose;
@ -469,7 +519,6 @@ sub generateclips {
print("\n");
unlink($updfile) if (-f $updfile);
shutdown_tts($tts_object);
}
# Assemble the voicefile
@ -608,6 +657,7 @@ if ($V == 1) {
defined($t) ? $t : "unknown",
$l, $e, $E, $s, $S);
generateclips($l, $t, $e, $E, $tts_object, $S, $f);
shutdown_tts($tts_object);
createvoice($l, $i, $f);
deleteencs();
} elsif ($C) {