diff --git a/tools/sapi5_voice_new.vbs b/tools/sapi5_voice_new.vbs index 96c6e2a720..f6abcf7d0b 100755 --- a/tools/sapi5_voice_new.vbs +++ b/tools/sapi5_voice_new.vbs @@ -20,11 +20,13 @@ 'To be done: ' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed) -' - Voice specific replacements/corrections for pronounciation (this should be at a higher level really) + +Option Explicit Const SSFMCreateForWrite = 3 -Const SPSF_8kHz16BitMono = 6 +' Audio formats for SAPI5 filestream object +Const SPSF_8kHz16BitMono = 6 Const SPSF_11kHz16BitMono = 10 Const SPSF_12kHz16BitMono = 14 Const SPSF_16kHz16BitMono = 18 @@ -34,34 +36,59 @@ Const SPSF_32kHz16BitMono = 30 Const SPSF_44kHz16BitMono = 34 Const SPSF_48kHz16BitMono = 38 -Dim oSpVoice, oSpFS, nAudioFormat, sText, sOutputFile +Dim oShell, oEnv +Dim oSpVoice, oSpFS ' SAPI5 voice and filestream +Dim aLine, aData ' used in command reading +Dim nAudioFormat +Dim bVerbose + + +On Error Resume Next nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings: '- for AT&T natural voices, use SPSF_32kHz16BitMono '- for MS voices, use SPSF_22kHz16BitMono +Set oShell = CreateObject("WScript.Shell") +Set oEnv = oShell.Environment("Process") +bVerbose = (oEnv("V") <> "") + Set oSpVoice = CreateObject("SAPI.SpVoice") If Err.Number <> 0 Then - WScript.Echo "Error - could not get SpVoice object. " & _ - "SAPI 5 not installed?" + WScript.StdErr.WriteLine "Error - could not get SpVoice object. " & _ + "SAPI 5 not installed?" Err.Clear WScript.Quit 1 End If -While 1 > 0 - sText = WScript.StdIn.ReadLine - sOutputFile = WScript.StdIn.ReadLine - If sOutputFile = "" Then - Set oSpFS = Nothing - Set oSpVoice = Nothing - Set oArgs = Nothing - WScript.Quit 0 +Set oSpFS = CreateObject("SAPI.SpFileStream") +oSpFS.Format.Type = nAudioFormat + +On Error Goto 0 + +Do + aLine = Split(WScript.StdIn.ReadLine, vbTab, 2) + If Err.Number <> 0 Then + WScript.StdErr.WriteLine "Error " & Err.Number & ": " & Err.Description + WScript.Quit 1 End If - ' WScript.Echo "Saying " + sText + " in " + sOutputFile - Set oSpFS = CreateObject("SAPI.SpFileStream") - oSpFS.Format.Type = nAudioFormat - oSpFS.Open sOutputFile, SSFMCreateForWrite, False - Set oSpVoice.AudioOutputStream = oSpFS - oSpVoice.Speak sText - oSpFS.Close -Wend + Select Case aLine(0) ' command + Case "SPEAK" + aData = Split(aLine(1), vbTab, 2) + If bVerbose Then WScript.StdErr.WriteLine "Saying " & aData(1) _ + & " in " & aData(0) + oSpFS.Open aData(0), SSFMCreateForWrite, false + Set oSpVoice.AudioOutputStream = oSpFS + oSpVoice.Speak aData(1) + oSpFS.Close + Case "EXEC" + If bVerbose Then WScript.StdErr.WriteLine "> " & aLine(1) + oShell.Run aLine(1), 0, true + Case "SYNC" + If bVerbose Then WScript.StdErr.WriteLine "Syncing" + WScript.StdOut.WriteLine aLine(1) ' Just echo what was passed + Case "QUIT" + If bVerbose Then WScript.StdErr.WriteLine "Quitting" + WScript.Quit 0 + End Select +Loop diff --git a/tools/voice.pl b/tools/voice.pl index 109451f82e..88f3ba4744 100755 --- a/tools/voice.pl +++ b/tools/voice.pl @@ -21,6 +21,7 @@ use File::Basename; use File::Copy; use Switch; use vars qw($V $C $t $l $e $E $s $S $i $v); +use IPC::Open2; use IPC::Open3; use Digest::MD5 qw(md5_hex); @@ -69,43 +70,44 @@ USAGE sub init_tts { our $verbose; my ($tts_engine, $tts_engine_opts, $language) = @_; - my $ret = undef; + my %ret = ("name" => $tts_engine); switch($tts_engine) { case "festival" { print("> festival $tts_engine_opts --server\n") if $verbose; my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1"); - $ret = *FESTIVAL_SERVER; - $ret = $pid; + my $dummy = *FESTIVAL_SERVER; #suppress warning $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; + $ret{"pid"} = $pid; } case "sapi5" { my $toolsdir = dirname($0); my $path = `cygpath $toolsdir -a -w`; chomp($path); - $path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts"; - $path =~ s/\\/\\\\/g; - print("> cscript /B $path\n") if $verbose; - my $pid = open(F, "| cscript /B $path"); - $ret = *F; - $SIG{INT} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; - $SIG{KILL} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; + $path = $path . '\\'; + my $cmd = $path . "sapi5_voice_new.vbs $language $tts_engine_opts"; + $cmd =~ s/\\/\\\\/g; + print("> cscript //nologo $cmd\n") if $verbose; + my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd"); + $SIG{INT} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); }; + $SIG{KILL} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); }; + %ret = (%ret, "stdin" => *CMD_IN, "stdout" => *CMD_OUT, "toolspath" => $path); } } - return $ret; + return \%ret; } # Shutdown TTS engine if necessary. sub shutdown_tts { - my ($tts_engine, $tts_object) = @_; - switch($tts_engine) { + my ($tts_object) = @_; + switch($$tts_object{"name"}) { case "festival" { # Send SIGTERM to festival server - kill TERM => $tts_object; + kill TERM => $$tts_object{"pid"}; } case "sapi5" { - print($tts_object "\r\n\r\n"); - close($tts_object); + print({$$tts_object{"stdin"}} "QUIT\r\n"); + close($$tts_object{"stdin"}); } } } @@ -113,14 +115,14 @@ sub shutdown_tts { # Apply corrections to a voice-string to make it sound better sub correct_string { our $verbose; - my ($string, $language, $tts_engine) = @_; + my ($string, $language, $tts_object) = @_; my $orig = $string; switch($language) { # General for all engines and languages (perhaps - just an example) $string =~ s/USB/U S B/; case ("deutsch") { - switch($tts_engine) { + switch($$tts_object{"name"}) { $string =~ s/alphabet/alfabet/; $string =~ s/alkaline/alkalein/; $string =~ s/ampere/amper/; @@ -146,10 +148,10 @@ sub correct_string { # Produce a wav file of the text given sub voicestring { our $verbose; - my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_; + my ($string, $output, $tts_engine_opts, $tts_object) = @_; my $cmd; - printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose; - switch($tts_engine) { + printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose; + switch($$tts_object{"name"}) { case "festival" { # festival_client lies to us, so we have to do awful soul-eating # work with IPC::open3() @@ -180,15 +182,31 @@ sub voicestring { close(ESPEAK); } case "sapi5" { - print($tts_object sprintf("%s\r\n%s\r\n", $string, $output)); + print({$$tts_object{"stdin"}} sprintf("SPEAK\t%s\t%s\r\n", $output, $string)); } } } +# trim leading / trailing silence from the clip +sub wavtrim { + our $verbose; + my ($file, $threshold, $tts_object) = @_; + printf("Trim \"%s\"\n", $file) if $verbose; + if ($$tts_object{"name"} eq "sapi5") { + my $cmd = $$tts_object{"toolspath"}."wavtrim $file $threshold"; + print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd)); + } + else { + my $cmd = dirname($0) . "/wavtrim $file $threshold"; + print("> $cmd\n") if $verbose; + `$cmd`; + } +} + # Encode a wav file into the given destination file sub encodewav { our $verbose; - my ($input, $output, $encoder, $encoder_opts) = @_; + my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_; my $cmd = ''; printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose; switch ($encoder) { @@ -202,16 +220,23 @@ sub encodewav { $cmd = "speexenc $encoder_opts \"$input\" \"$output\""; } } - print("> $cmd\n") if $verbose; - `$cmd`; + if ($$tts_object{"name"} eq "sapi5") { + print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd)); + } + else { + print("> $cmd\n") if $verbose; + `$cmd`; + } } -sub wavtrim { - our $verbose; - my ($file) = @_; - my $cmd = dirname($0) . "/wavtrim \"$file\""; - print("> $cmd\n") if $verbose; - `$cmd`; +# synchronize the clip generation / processing if it's running in another process +sub synchronize { + my ($tts_object) = @_; + if ($$tts_object{"name"} eq "sapi5") { + print({$$tts_object{"stdin"}} "SYNC\t42\r\n"); + my $wait = readline($$tts_object{"stdout"}); + #ignore what's actually returned + } } # Run genlang and create voice clips for each string @@ -267,11 +292,13 @@ sub generateclips { copy(dirname($0)."/VOICE_PAUSE.wav", $wav); } else { - voicestring($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object); - wavtrim($wav, 500); # 500 seems to be a reasonable default for now + voicestring($voice, $wav, $tts_engine_opts, $tts_object); + wavtrim($wav, 500, $tts_object); + # 500 seems to be a reasonable default for now } - encodewav($wav, $mp3, $encoder, $encoder_opts); + encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object); + synchronize($tts_object); if (defined($ENV{'POOL'})) { copy($mp3, $pool_file); } @@ -284,7 +311,7 @@ sub generateclips { } print("\n"); close(VOICEFONTIDS); - shutdown_tts($tts_engine, $tts_object); + shutdown_tts($tts_object); } # Assemble the voicefile