Add support for OS X' Speech Synthesis Manager TTS system.

This adds basic support for using the OS X' TTS system. The current
implementation does only support selecting the voice itself, no further
settings (like speed pitch / speed) adjustments are implemented. As OS X' TTS
system wants the strings to get spoken in 8 bit encoding problems with locale
combinations are possible. For this better error handling in the rbutil TTS is
needed. The voice test button in the configuration dialog reacts pretty slow
due to the way its speaking is done. This can get changed but also requires
adjustments in the rbutil TTS system.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24979 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Dominik Riebeling 2010-02-28 20:19:54 +00:00
parent c1689ca80d
commit 495edfb7b9
4 changed files with 494 additions and 1 deletions

View file

@ -22,6 +22,9 @@
#include "ttsfestival.h"
#include "ttssapi.h"
#include "ttsexes.h"
#if defined(Q_OS_MACX)
#include "ttscarbon.h"
#endif
// list of tts names and identifiers
QMap<QString,QString> TTSBase::ttsList;
@ -43,6 +46,9 @@ void TTSBase::initTTSList()
#if defined(Q_OS_LINUX)
ttsList["festival"] = "Festival TTS Engine";
#endif
#if defined(Q_OS_MACX)
ttsList["carbon"] = "OS X System Engine";
#endif
}
// function to get a specific encoder
@ -65,6 +71,14 @@ TTSBase* TTSBase::getTTS(QObject* parent,QString ttsName)
return tts;
}
else
#endif
#if defined(Q_OS_MACX)
if(ttsName == "carbon")
{
tts = new TTSCarbon(parent);
return tts;
}
else
#endif
if (true) // fix for OS other than WIN or LINUX
{

View file

@ -0,0 +1,405 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
*
* Copyright (C) 2010 by Dominik Riebeling
* $Id$
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <QtCore>
#include "ttsbase.h"
#include "ttscarbon.h"
#include "encttssettings.h"
#include "rbsettings.h"
#include <CoreFoundation/CoreFoundation.h>
#include <Carbon/Carbon.h>
#include <unistd.h>
#include <sys/stat.h>
#include <inttypes.h>
TTSCarbon::TTSCarbon(QObject* parent) : TTSBase(parent)
{
}
bool TTSCarbon::configOk()
{
return true;
}
bool TTSCarbon::start(QString *errStr)
{
(void)errStr;
VoiceSpec vspec;
VoiceSpec* vspecref;
VoiceDescription vdesc;
OSErr error;
QString selectedVoice
= RbSettings::subValue("carbon", RbSettings::TtsVoice).toString();
SInt16 numVoices;
SInt16 voiceIndex;
error = CountVoices(&numVoices);
for(voiceIndex = 1; voiceIndex < numVoices; ++voiceIndex) {
error = GetIndVoice(voiceIndex, &vspec);
error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
// name is pascal string, i.e. the first byte is the length.
QString name = QString::fromLocal8Bit((const char*)&vdesc.name[1],
vdesc.name[0]);
if(name == selectedVoice) {
vspecref = &vspec;
if(vdesc.script != -1)
m_voiceScript = (CFStringBuiltInEncodings)vdesc.script;
else
m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0];
break;
}
}
if(voiceIndex == numVoices) {
// voice not found. Add user notification here and proceed with
// system default voice.
qDebug() << "selected voice not found, using system default!";
vspecref = NULL;
GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
if(vdesc.script != -1)
m_voiceScript = (CFStringBuiltInEncodings)vdesc.script;
else
m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0];
}
error = NewSpeechChannel(vspecref, &m_channel);
//SetSpeechInfo(channel, soSpeechDoneCallBack, speechDone);
return (error == 0) ? true : false;
}
bool TTSCarbon::stop(void)
{
DisposeSpeechChannel(m_channel);
return true;
}
void TTSCarbon::generateSettings(void)
{
QStringList voiceNames;
QString systemVoice;
SInt16 numVoices;
OSErr error;
VoiceSpec vspec;
VoiceDescription vdesc;
// get system voice
error = GetVoiceDescription(NULL, &vdesc, sizeof(vdesc));
systemVoice
= QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]);
// get list of all voices
CountVoices(&numVoices);
for(SInt16 i = 1; i < numVoices; ++i) {
error = GetIndVoice(i, &vspec);
error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
// name is pascal string, i.e. the first byte is the length.
QString name
= QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]);
voiceNames.append(name.trimmed());
}
// voice
EncTtsSetting* setting;
QString voice
= RbSettings::subValue("carbon", RbSettings::TtsVoice).toString();
if(voice.isEmpty())
voice = systemVoice;
setting = new EncTtsSetting(this, EncTtsSetting::eSTRINGLIST,
tr("Voice:"), voice, voiceNames, EncTtsSetting::eNOBTN);
insertSetting(ConfigVoice, setting);
}
void TTSCarbon::saveSettings(void)
{
// save settings in user config
RbSettings::setSubValue("carbon", RbSettings::TtsVoice,
getSetting(ConfigVoice)->current().toString());
RbSettings::sync();
}
/** @brief create wav file from text using the selected TTS voice.
*/
TTSStatus TTSCarbon::voice(QString text, QString wavfile, QString* errStr)
{
TTSStatus status = NoError;
OSErr error;
QString aifffile = wavfile + ".aiff";
// FIXME: find out why we need to do this.
// Create a local copy of the temporary file filename.
// Not doing so causes weird issues (path contains trailing spaces)
unsigned int len = aifffile.size() + 1;
char* tmpfile = (char*)malloc(len * sizeof(char));
strncpy(tmpfile, aifffile.toLocal8Bit().constData(), len);
CFStringRef tmpfileref = CFStringCreateWithCString(kCFAllocatorDefault,
tmpfile, kCFStringEncodingUTF8);
CFURLRef urlref = CFURLCreateWithFileSystemPath(kCFAllocatorDefault,
tmpfileref, kCFURLPOSIXPathStyle, false);
SetSpeechInfo(m_channel, soOutputToFileWithCFURL, urlref);
// speak it.
// Convert the string to the encoding requested by the voice. Do this
// via CFString, as this allows to directly use the destination encoding
// as CFString uses the same values as the voice.
// allocate enough space to allow storing the string in a 2 byte encoding
unsigned int textlen = 2 * text.length() + 1;
char* textbuf = (char*)calloc(textlen, sizeof(char));
char* utf8data = (char*)text.toUtf8().constData();
int utf8bytes = text.toUtf8().size();
CFStringRef cfstring = CFStringCreateWithBytes(kCFAllocatorDefault,
(UInt8*)utf8data, utf8bytes,
kCFStringEncodingUTF8, (Boolean)false);
CFIndex usedBuf = 0;
CFRange range;
range.location = 0; // character in string to start.
range.length = text.length(); // number of _characters_ in string
// FIXME: check if converting between encodings was lossless.
CFStringGetBytes(cfstring, range, m_voiceScript, ' ',
false, (UInt8*)textbuf, textlen, &usedBuf);
error = SpeakText(m_channel, textbuf, (unsigned long)usedBuf);
while(SpeechBusy()) {
// FIXME: add small delay here to make calls less frequent
QCoreApplication::processEvents();
}
if(error != 0) {
*errStr = tr("Could not voice string");
status = FatalError;
}
free(textbuf);
CFRelease(cfstring);
// convert the temporary aiff file to wav
if(status == NoError
&& convertAiffToWav(tmpfile, wavfile.toLocal8Bit().constData()) != 0) {
*errStr = tr("Could not convert intermediate file");
status = FatalError;
}
// remove temporary aiff file
unlink(tmpfile);
free(tmpfile);
return status;
}
unsigned long TTSCarbon::be2u32(unsigned char* buf)
{
return (buf[0]&0xff)<<24 | (buf[1]&0xff)<<16 | (buf[2]&0xff)<<8 | (buf[3]&0xff);
}
unsigned long TTSCarbon::be2u16(unsigned char* buf)
{
return buf[1]&0xff | (buf[0]&0xff)<<8;
}
unsigned char* TTSCarbon::u32tobuf(unsigned char* buf, uint32_t val)
{
buf[0] = val & 0xff;
buf[1] = (val>> 8) & 0xff;
buf[2] = (val>>16) & 0xff;
buf[3] = (val>>24) & 0xff;
return buf;
}
unsigned char* TTSCarbon::u16tobuf(unsigned char* buf, uint16_t val)
{
buf[0] = val & 0xff;
buf[1] = (val>> 8) & 0xff;
return buf;
}
/** @brief convert 80 bit extended ("long double") to int.
* This is simplified to handle the usual audio sample rates. Everything else
* might break. If the value isn't supported it will return 0.
* Conversion taken from Rockbox aiff codec.
*/
unsigned int TTSCarbon::extended2int(unsigned char* buf)
{
unsigned int result = 0;
/* value negative? */
if(buf[0] & 0x80)
return 0;
/* check exponent. Int can handle up to 2^31. */
int exponent = buf[0] << 8 | buf[1];
if(exponent < 0x4000 || exponent > (0x4000 + 30))
return 0;
result = ((buf[2]<<24) | (buf[3]<<16) | (buf[4]<<8) | buf[5]) + 1;
result >>= (16 + 14 - buf[1]);
return result;
}
/** @brief Convert aiff file to wav. Returns 0 on success.
*/
int TTSCarbon::convertAiffToWav(const char* aiff, const char* wav)
{
struct commchunk {
unsigned long chunksize;
unsigned short channels;
unsigned long frames;
unsigned short size;
int rate;
};
struct ssndchunk {
unsigned long chunksize;
unsigned long offset;
unsigned long blocksize;
};
FILE* in;
FILE* out;
unsigned char obuf[4];
unsigned char* buf;
/* minimum file size for a valid aiff file is 46 bytes:
* - FORM chunk: 12 bytes
* - COMM chunk: 18 bytes
* - SSND chunk: 16 bytes (with no actual data)
*/
struct stat filestat;
stat(aiff, &filestat);
if(filestat.st_size < 46)
return -1;
/* read input file into memory */
buf = (unsigned char*)malloc(filestat.st_size * sizeof(unsigned char));
if(!buf) /* error out if malloc() failed */
return -1;
in = fopen(aiff, "rb");
if(fread(buf, 1, filestat.st_size, in) < filestat.st_size) {
printf("could not read file: not enought bytes read\n");
fclose(in);
return -1;
}
fclose(in);
/* check input file format */
if(memcmp(buf, "FORM", 4) | memcmp(&buf[8], "AIFF", 4)) {
printf("No valid AIFF header found.\n");
free(buf);
return -1;
}
/* read COMM chunk */
unsigned char* commstart = &buf[12];
struct commchunk comm;
if(memcmp(commstart, "COMM", 4)) {
printf("COMM chunk not at beginning.\n");
free(buf);
return -1;
}
comm.chunksize = be2u32(&commstart[4]);
comm.channels = be2u16(&commstart[8]);
comm.frames = be2u32(&commstart[10]);
comm.size = be2u16(&commstart[14]);
comm.rate = extended2int(&commstart[16]);
/* find SSND as next chunk */
unsigned char* ssndstart = commstart + 8 + comm.chunksize;
while(memcmp(ssndstart, "SSND", 4) && ssndstart < (buf + filestat.st_size)) {
printf("Skipping chunk.\n");
ssndstart += be2u32(&ssndstart[4]) + 8;
}
if(ssndstart > (buf + filestat.st_size)) {
free(buf);
return -1;
}
struct ssndchunk ssnd;
ssnd.chunksize = be2u32(&ssndstart[4]);
ssnd.offset = be2u32(&ssndstart[8]);
ssnd.blocksize = be2u32(&ssndstart[12]);
/* Calculate the total length of the resulting RIFF chunk.
* The length is given by frames * samples * bytes/sample.
* We need to add:
* - 16 bytes: fmt chunk header
* - 8 bytes: data chunk header
* - 4 bytes: wave chunk identifier
*/
out = fopen(wav, "wb+");
/* write the wav header */
unsigned short blocksize = comm.channels * (comm.size >> 3);
unsigned long rifflen = blocksize * comm.frames + 28;
fwrite("RIFF", 1, 4, out);
fwrite(u32tobuf(obuf, rifflen), 1, 4, out);
fwrite("WAVE", 1, 4, out);
/* write the fmt chunk and chunk size (always 16) */
/* write fmt chunk header:
* header, size (always 0x10, format code (always 0x0001)
*/
fwrite("fmt \x10\x00\x00\x00\x01\x00", 1, 10, out);
/* number of channels (2 bytes) */
fwrite(u16tobuf(obuf, comm.channels), 1, 2, out);
/* sampling rate (4 bytes) */
fwrite(u32tobuf(obuf, comm.rate), 1, 4, out);
/* data rate, i.e. bytes/sec */
fwrite(u32tobuf(obuf, comm.rate * blocksize), 1, 4, out);
/* data block size */
fwrite(u16tobuf(obuf, blocksize), 1, 2, out);
/* bits per sample */
fwrite(u16tobuf(obuf, comm.size), 1, 2, out);
/* write the data chunk */
/* chunk id */
fwrite("data", 1, 4, out);
/* chunk size: 4 bytes. */
unsigned long cs = blocksize * comm.frames;
fwrite(u32tobuf(obuf, cs), 1, 4, out);
/* write data */
unsigned char* data = ssndstart;
unsigned long pos = ssnd.chunksize;
/* byteswap if samples are 16 bit */
if(comm.size == 16) {
while(pos) {
obuf[1] = *data++ & 0xff;
obuf[0] = *data++ & 0xff;
fwrite(obuf, 1, 2, out);
pos -= 2;
}
}
/* 8 bit samples have need no conversion so we can bulk copy.
* Everything that is not 16 bit is considered 8. */
else {
fwrite(data, 1, pos, out);
}
/* number of bytes has to be even, even if chunksize is not. */
if(cs % 2) {
fwrite(obuf, 1, 1, out);
}
fclose(out);
free(buf);
return 0;
}

View file

@ -0,0 +1,72 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
*
* Copyright (C) 2010 by Dominik Riebeling
* $Id$
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#ifndef TTSCARBON_H
#define TTSCARBON_H
#include <QtCore>
#include "ttsbase.h"
#include <Carbon/Carbon.h>
#include <inttypes.h>
class TTSCarbon : public TTSBase
{
Q_OBJECT
//! Enum to identify the settings
enum ConfigValuesCarbon
{
ConfigVoice
};
public:
TTSCarbon(QObject *parent = NULL);
//! Child class should generate a clip
TTSStatus voice(QString text,QString wavfile, QString* errStr);
//! Child class should do startup
bool start(QString *errStr);
//! child class should stop
bool stop() ;
// configuration
//! Child class should return true, when configuration is good
bool configOk();
//! Child class should generate and insertSetting(..) its settings
void generateSettings();
//! Chlid class should commit the Settings to permanent storage
void saveSettings();
private:
SpeechChannel m_channel;
CFStringBuiltInEncodings m_voiceScript;
unsigned long be2u32(unsigned char* buf);
unsigned long be2u16(unsigned char* buf);
unsigned char* u32tobuf(unsigned char* buf, uint32_t val);
unsigned char* u16tobuf(unsigned char* buf, uint16_t val);
unsigned int extended2int(unsigned char* buf);
int convertAiffToWav(const char* aiff, const char* wav);
protected:
// static QMap<QString,QString> ttsList;
};
#endif // TTSCARBON_H

View file

@ -272,11 +272,13 @@ unix:static {
}
macx {
SOURCES += base/ttscarbon.cpp
HEADERS += base/ttscarbon.h
QMAKE_MAC_SDK=/Developer/SDKs/MacOSX10.4u.sdk
QMAKE_LFLAGS_PPC=-mmacosx-version-min=10.4 -arch ppc
QMAKE_LFLAGS_X86=-mmacosx-version-min=10.4 -arch i386
CONFIG+=x86 ppc
LIBS += -L/usr/local/lib -framework IOKit -framework CoreFoundation -lz
LIBS += -L/usr/local/lib -framework IOKit -framework CoreFoundation -framework Carbon -lz
INCLUDEPATH += /usr/local/include
QMAKE_INFO_PLIST = Info.plist
RC_FILE = icons/rbutilqt.icns