1
0
Fork 0
forked from len0rd/rockbox

Voice file generation: * Significant speedup of SAPI5 voice generation by running lame and wavtrim from inside the VB script instead of the perl script, avoiding the large overhead of process generation within cygwin. Added proper synchronisation between perl script and VB script as the pipes are buffered. * Make wavtrim work as intended (threashold wasn't passed). * Set correct SVN properties for the VB script.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14562 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jens Arnold 2007-09-01 08:38:10 +00:00
parent 80e91c1af3
commit 080522f917
2 changed files with 110 additions and 56 deletions

View file

@ -20,11 +20,13 @@
'To be done: 'To be done:
' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed) ' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed)
' - Voice specific replacements/corrections for pronounciation (this should be at a higher level really)
Option Explicit
Const SSFMCreateForWrite = 3 Const SSFMCreateForWrite = 3
Const SPSF_8kHz16BitMono = 6 ' Audio formats for SAPI5 filestream object
Const SPSF_8kHz16BitMono = 6
Const SPSF_11kHz16BitMono = 10 Const SPSF_11kHz16BitMono = 10
Const SPSF_12kHz16BitMono = 14 Const SPSF_12kHz16BitMono = 14
Const SPSF_16kHz16BitMono = 18 Const SPSF_16kHz16BitMono = 18
@ -34,34 +36,59 @@ Const SPSF_32kHz16BitMono = 30
Const SPSF_44kHz16BitMono = 34 Const SPSF_44kHz16BitMono = 34
Const SPSF_48kHz16BitMono = 38 Const SPSF_48kHz16BitMono = 38
Dim oSpVoice, oSpFS, nAudioFormat, sText, sOutputFile Dim oShell, oEnv
Dim oSpVoice, oSpFS ' SAPI5 voice and filestream
Dim aLine, aData ' used in command reading
Dim nAudioFormat
Dim bVerbose
On Error Resume Next
nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings: nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings:
'- for AT&T natural voices, use SPSF_32kHz16BitMono '- for AT&T natural voices, use SPSF_32kHz16BitMono
'- for MS voices, use SPSF_22kHz16BitMono '- for MS voices, use SPSF_22kHz16BitMono
Set oShell = CreateObject("WScript.Shell")
Set oEnv = oShell.Environment("Process")
bVerbose = (oEnv("V") <> "")
Set oSpVoice = CreateObject("SAPI.SpVoice") Set oSpVoice = CreateObject("SAPI.SpVoice")
If Err.Number <> 0 Then If Err.Number <> 0 Then
WScript.Echo "Error - could not get SpVoice object. " & _ WScript.StdErr.WriteLine "Error - could not get SpVoice object. " & _
"SAPI 5 not installed?" "SAPI 5 not installed?"
Err.Clear Err.Clear
WScript.Quit 1 WScript.Quit 1
End If End If
While 1 > 0 Set oSpFS = CreateObject("SAPI.SpFileStream")
sText = WScript.StdIn.ReadLine oSpFS.Format.Type = nAudioFormat
sOutputFile = WScript.StdIn.ReadLine
If sOutputFile = "" Then On Error Goto 0
Set oSpFS = Nothing
Set oSpVoice = Nothing Do
Set oArgs = Nothing aLine = Split(WScript.StdIn.ReadLine, vbTab, 2)
WScript.Quit 0 If Err.Number <> 0 Then
WScript.StdErr.WriteLine "Error " & Err.Number & ": " & Err.Description
WScript.Quit 1
End If End If
' WScript.Echo "Saying " + sText + " in " + sOutputFile Select Case aLine(0) ' command
Set oSpFS = CreateObject("SAPI.SpFileStream") Case "SPEAK"
oSpFS.Format.Type = nAudioFormat aData = Split(aLine(1), vbTab, 2)
oSpFS.Open sOutputFile, SSFMCreateForWrite, False If bVerbose Then WScript.StdErr.WriteLine "Saying " & aData(1) _
Set oSpVoice.AudioOutputStream = oSpFS & " in " & aData(0)
oSpVoice.Speak sText oSpFS.Open aData(0), SSFMCreateForWrite, false
oSpFS.Close Set oSpVoice.AudioOutputStream = oSpFS
Wend oSpVoice.Speak aData(1)
oSpFS.Close
Case "EXEC"
If bVerbose Then WScript.StdErr.WriteLine "> " & aLine(1)
oShell.Run aLine(1), 0, true
Case "SYNC"
If bVerbose Then WScript.StdErr.WriteLine "Syncing"
WScript.StdOut.WriteLine aLine(1) ' Just echo what was passed
Case "QUIT"
If bVerbose Then WScript.StdErr.WriteLine "Quitting"
WScript.Quit 0
End Select
Loop

View file

@ -21,6 +21,7 @@ use File::Basename;
use File::Copy; use File::Copy;
use Switch; use Switch;
use vars qw($V $C $t $l $e $E $s $S $i $v); use vars qw($V $C $t $l $e $E $s $S $i $v);
use IPC::Open2;
use IPC::Open3; use IPC::Open3;
use Digest::MD5 qw(md5_hex); use Digest::MD5 qw(md5_hex);
@ -69,43 +70,44 @@ USAGE
sub init_tts { sub init_tts {
our $verbose; our $verbose;
my ($tts_engine, $tts_engine_opts, $language) = @_; my ($tts_engine, $tts_engine_opts, $language) = @_;
my $ret = undef; my %ret = ("name" => $tts_engine);
switch($tts_engine) { switch($tts_engine) {
case "festival" { case "festival" {
print("> festival $tts_engine_opts --server\n") if $verbose; print("> festival $tts_engine_opts --server\n") if $verbose;
my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1"); my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
$ret = *FESTIVAL_SERVER; my $dummy = *FESTIVAL_SERVER; #suppress warning
$ret = $pid;
$SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
$SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
$ret{"pid"} = $pid;
} }
case "sapi5" { case "sapi5" {
my $toolsdir = dirname($0); my $toolsdir = dirname($0);
my $path = `cygpath $toolsdir -a -w`; my $path = `cygpath $toolsdir -a -w`;
chomp($path); chomp($path);
$path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts"; $path = $path . '\\';
$path =~ s/\\/\\\\/g; my $cmd = $path . "sapi5_voice_new.vbs $language $tts_engine_opts";
print("> cscript /B $path\n") if $verbose; $cmd =~ s/\\/\\\\/g;
my $pid = open(F, "| cscript /B $path"); print("> cscript //nologo $cmd\n") if $verbose;
$ret = *F; my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd");
$SIG{INT} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; $SIG{INT} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
$SIG{KILL} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; $SIG{KILL} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
%ret = (%ret, "stdin" => *CMD_IN, "stdout" => *CMD_OUT, "toolspath" => $path);
} }
} }
return $ret; return \%ret;
} }
# Shutdown TTS engine if necessary. # Shutdown TTS engine if necessary.
sub shutdown_tts { sub shutdown_tts {
my ($tts_engine, $tts_object) = @_; my ($tts_object) = @_;
switch($tts_engine) { switch($$tts_object{"name"}) {
case "festival" { case "festival" {
# Send SIGTERM to festival server # Send SIGTERM to festival server
kill TERM => $tts_object; kill TERM => $$tts_object{"pid"};
} }
case "sapi5" { case "sapi5" {
print($tts_object "\r\n\r\n"); print({$$tts_object{"stdin"}} "QUIT\r\n");
close($tts_object); close($$tts_object{"stdin"});
} }
} }
} }
@ -113,14 +115,14 @@ sub shutdown_tts {
# Apply corrections to a voice-string to make it sound better # Apply corrections to a voice-string to make it sound better
sub correct_string { sub correct_string {
our $verbose; our $verbose;
my ($string, $language, $tts_engine) = @_; my ($string, $language, $tts_object) = @_;
my $orig = $string; my $orig = $string;
switch($language) { switch($language) {
# General for all engines and languages (perhaps - just an example) # General for all engines and languages (perhaps - just an example)
$string =~ s/USB/U S B/; $string =~ s/USB/U S B/;
case ("deutsch") { case ("deutsch") {
switch($tts_engine) { switch($$tts_object{"name"}) {
$string =~ s/alphabet/alfabet/; $string =~ s/alphabet/alfabet/;
$string =~ s/alkaline/alkalein/; $string =~ s/alkaline/alkalein/;
$string =~ s/ampere/amper/; $string =~ s/ampere/amper/;
@ -146,10 +148,10 @@ sub correct_string {
# Produce a wav file of the text given # Produce a wav file of the text given
sub voicestring { sub voicestring {
our $verbose; our $verbose;
my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_; my ($string, $output, $tts_engine_opts, $tts_object) = @_;
my $cmd; my $cmd;
printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose; printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
switch($tts_engine) { switch($$tts_object{"name"}) {
case "festival" { case "festival" {
# festival_client lies to us, so we have to do awful soul-eating # festival_client lies to us, so we have to do awful soul-eating
# work with IPC::open3() # work with IPC::open3()
@ -180,15 +182,31 @@ sub voicestring {
close(ESPEAK); close(ESPEAK);
} }
case "sapi5" { case "sapi5" {
print($tts_object sprintf("%s\r\n%s\r\n", $string, $output)); print({$$tts_object{"stdin"}} sprintf("SPEAK\t%s\t%s\r\n", $output, $string));
} }
} }
} }
# trim leading / trailing silence from the clip
sub wavtrim {
our $verbose;
my ($file, $threshold, $tts_object) = @_;
printf("Trim \"%s\"\n", $file) if $verbose;
if ($$tts_object{"name"} eq "sapi5") {
my $cmd = $$tts_object{"toolspath"}."wavtrim $file $threshold";
print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd));
}
else {
my $cmd = dirname($0) . "/wavtrim $file $threshold";
print("> $cmd\n") if $verbose;
`$cmd`;
}
}
# Encode a wav file into the given destination file # Encode a wav file into the given destination file
sub encodewav { sub encodewav {
our $verbose; our $verbose;
my ($input, $output, $encoder, $encoder_opts) = @_; my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
my $cmd = ''; my $cmd = '';
printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose; printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
switch ($encoder) { switch ($encoder) {
@ -202,16 +220,23 @@ sub encodewav {
$cmd = "speexenc $encoder_opts \"$input\" \"$output\""; $cmd = "speexenc $encoder_opts \"$input\" \"$output\"";
} }
} }
print("> $cmd\n") if $verbose; if ($$tts_object{"name"} eq "sapi5") {
`$cmd`; print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd));
}
else {
print("> $cmd\n") if $verbose;
`$cmd`;
}
} }
sub wavtrim { # synchronize the clip generation / processing if it's running in another process
our $verbose; sub synchronize {
my ($file) = @_; my ($tts_object) = @_;
my $cmd = dirname($0) . "/wavtrim \"$file\""; if ($$tts_object{"name"} eq "sapi5") {
print("> $cmd\n") if $verbose; print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
`$cmd`; my $wait = readline($$tts_object{"stdout"});
#ignore what's actually returned
}
} }
# Run genlang and create voice clips for each string # Run genlang and create voice clips for each string
@ -267,11 +292,13 @@ sub generateclips {
copy(dirname($0)."/VOICE_PAUSE.wav", $wav); copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
} }
else { else {
voicestring($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object); voicestring($voice, $wav, $tts_engine_opts, $tts_object);
wavtrim($wav, 500); # 500 seems to be a reasonable default for now wavtrim($wav, 500, $tts_object);
# 500 seems to be a reasonable default for now
} }
encodewav($wav, $mp3, $encoder, $encoder_opts); encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
synchronize($tts_object);
if (defined($ENV{'POOL'})) { if (defined($ENV{'POOL'})) {
copy($mp3, $pool_file); copy($mp3, $pool_file);
} }
@ -284,7 +311,7 @@ sub generateclips {
} }
print("\n"); print("\n");
close(VOICEFONTIDS); close(VOICEFONTIDS);
shutdown_tts($tts_engine, $tts_object); shutdown_tts($tts_object);
} }
# Assemble the voicefile # Assemble the voicefile