Realsense SR300语音文字转换

1.代码源自http://blog.csdn.net/zmdsjtu/article/details/53114690，特为感谢！

#include <Windows.h>  
#include <vector>  
#include "pxcspeechsynthesis.h"  
#include "pxcsession.h"  



class VoiceOut {
protected:

    enum { buffering = 3 };

    WAVEHDR             m_headers[buffering];
    PXCAudio::AudioData m_data[buffering];
    PXCAudio*           m_samples[buffering];

    // poor man's autogrowing bytestream  
    std::vector<byte>   m_wavefile;

    int         m_nsamples;
    HWAVEOUT m_hwo;

    WAVEFORMATEX m_wfx;

public:

    VoiceOut(PXCSpeechSynthesis::ProfileInfo *pinfo) {
        m_nsamples = 0;
        m_hwo = 0;

        memset(&m_wfx, 0, sizeof(m_wfx));
        m_wfx.wFormatTag = WAVE_FORMAT_PCM;
        m_wfx.nSamplesPerSec = pinfo->outputs.sampleRate;
        m_wfx.wBitsPerSample = 16;
        m_wfx.nChannels = pinfo->outputs.nchannels;
        m_wfx.nBlockAlign = (m_wfx.wBitsPerSample / 8)*m_wfx.nChannels;
        m_wfx.nAvgBytesPerSec = m_wfx.nBlockAlign*m_wfx.nSamplesPerSec;

        waveOutOpen(&m_hwo, WAVE_MAPPER, &m_wfx, 0, 0, CALLBACK_NULL);
    }

    void RenderAudio(PXCAudio *audio) {
        int k = (m_nsamples%buffering);
        if (m_nsamples++ >= buffering) {
            while (waveOutUnprepareHeader(m_hwo, &m_headers[k], sizeof(WAVEHDR)) == WAVERR_STILLPLAYING)
                Sleep(10);
            m_samples[k]->ReleaseAccess(&m_data[k]);
            m_samples[k]->Release();
        }
        audio->AddRef();
        m_samples[k] = audio;
        if (m_samples[k]->AcquireAccess(PXCAudio::ACCESS_READ, PXCAudio::AUDIO_FORMAT_PCM, &m_data[k]) >= PXC_STATUS_NO_ERROR) {
            memset(&m_headers[k], 0, sizeof(WAVEHDR));
            m_headers[k].dwBufferLength = m_data[k].dataSize * 2;
            m_headers[k].lpData = (LPSTR)m_data[k].dataPtr;
            waveOutPrepareHeader(m_hwo, &m_headers[k], sizeof(WAVEHDR));
            waveOutWrite(m_hwo, &m_headers[k], sizeof(WAVEHDR));
        }
    }




    ~VoiceOut(void) {
        if (!m_hwo || m_nsamples <= 0) return;
        for (int i = m_nsamples - buffering + 1; i<m_nsamples; i++) {
            if (i<0) i++; //If he have 1 AudioObject in m_samples 'k' will be -1.It is exeption for the alghorithm. Bug 57423  
            int k = (i%buffering);
            while (waveOutUnprepareHeader(m_hwo, &m_headers[k], sizeof(WAVEHDR)) == WAVERR_STILLPLAYING)
                Sleep(10);
            m_samples[k]->ReleaseAccess(&m_data[k]);
            m_samples[k]->Release();
        }
        waveOutClose(m_hwo);
    }
};



int main() {

    PXCSpeechSynthesis *tts = 0;
    PXCSession  *session = PXCSession::CreateInstance();
    session->CreateImpl<PXCSpeechSynthesis>(&tts);

    PXCSpeechSynthesis::ProfileInfo pinfo;
    tts->QueryProfile(0, &pinfo);
    pinfo.language = PXCSpeechSynthesis::LANGUAGE_CN_CHINESE;
    tts->SetProfile(&pinfo);

    // Synthesize the text string  
    tts->BuildSentence(1, L"终于成功了");

    // Retrieve the synthesized speech  
    int nbuffers = tts->QueryBufferNum(1);

    VoiceOut vo(&pinfo);

    for (int i = 0; i<nbuffers; i++) {
        PXCAudio *audio = tts->QueryBuffer(1, i);
        // send audio to the audio output device  
        vo.RenderAudio(audio);

    }

    // Clean up  
    tts->ReleaseSentence(1);
    system("pause");


}

目前不知什么情况未运行成功！（空指针）

2.利用微软语音api做的简单的TTS：
参考：
http://blog.csdn.net/lihn1987/article/details/65445938
http://blog.csdn.net/doraemon___/article/details/64158227

/*
Created by Jinhua Zhao,2017.09.15.
Contact:3210513029@qq.com
*/

#include <iostream>  
#include "sapi.h"

int main()
{
    ISpVoice *pVoice = nullptr;
    //Initial the COM interface.
    if (FAILED(::CoInitialize(NULL)))return -1;
    //Get the ISpVoice interface.
    HRESULT hr = CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice, reinterpret_cast<void **>(&pVoice));
    if (SUCCEEDED(hr))
    {
        pVoice->SetVolume((USHORT)100);
        pVoice->SetRate(0);
        hr = pVoice->Speak(L"C:/opencv/1.txt", SPF_IS_FILENAME, nullptr);
        pVoice->Release();
        pVoice = nullptr;
    }

    ::CoUninitialize();
    return TRUE;
}

3.调用微软语音识别：

/*
Created by Jinhua Zhao,2017.09.15.
Contact:3210513029@qq.com
*/

#include <Windows.h>
#include <sapi.h>
#include <iostream>
#include <atlbase.h>
#include <sphelper.h>
#include <string>

inline HRESULT BlockForResult(ISpRecoContext* pRecoCtxt, ISpRecoResult** ppResult)
{
    HRESULT hr = S_OK;
    CSpEvent event;

    while (SUCCEEDED(hr)&&SUCCEEDED(hr=event.GetFrom(pRecoCtxt))&&hr==S_FALSE)
    {
        hr = pRecoCtxt->WaitForNotifyEvent(INFINITE);
    }
    *ppResult = event.RecoResult();
    if (*ppResult)
    {
        (*ppResult)->AddRef();
    }
    return hr;
}


int main()
{
    HRESULT hr = E_FAIL;
    bool fUseTTS = true;//Turn TTS playback on or off.
    bool fReplay = true;//Turn audio replay on or off.

    if (SUCCEEDED(hr = ::CoInitialize(NULL)))
    {
        {
            CComPtr<ISpRecoContext> cpRecoCtxt;
            CComPtr<ISpRecoGrammar> cpGrammar;
            CComPtr<ISpVoice> cpVoice;
            hr = cpRecoCtxt.CoCreateInstance(CLSID_SpSharedRecoContext);

            if (SUCCEEDED(hr)){hr = cpRecoCtxt->GetVoice(&cpVoice);}

            hr = cpRecoCtxt->SetNotifyWin32Event();
            hr = cpRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));
            hr = cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL);
            hr = cpRecoCtxt->CreateGrammar(0, &cpGrammar);
            hr = cpGrammar->LoadDictation(NULL, SPLO_STATIC);
            hr = cpGrammar->SetDictationState(SPRS_ACTIVE);
            if (cpRecoCtxt&&cpVoice)
            {
                USES_CONVERSION;

                CComPtr<ISpRecoResult> cpResult;
                std::cout << "Please speak!" << std::endl;
                while (SUCCEEDED(hr=BlockForResult(cpRecoCtxt,&cpResult)))
                {
                    cpGrammar->SetDictationState(SPRS_INACTIVE);
                    CSpDynamicString dstrText;
                    if (SUCCEEDED(cpResult->GetText(SP_GETWHOLEPHRASE,SP_GETWHOLEPHRASE,TRUE,&dstrText,nullptr)))
                    {
                        std:: cout << "I heard:" << W2A(dstrText) << std::endl;
                        if (fUseTTS)
                        {
                            cpVoice->Speak(L"I heard:", SPF_ASYNC, nullptr);
                            cpVoice->Speak(dstrText, SPF_ASYNC, nullptr);
                        }

                        if (fReplay)
                        {
                            if (fUseTTS)
                                cpVoice->Speak(L"when you said", SPF_ASYNC, nullptr);
                            else
                                std::cout << "when you said" << std::endl;
                            cpResult->SpeakAudio(NULL, 0, NULL, NULL);
                        }
                        cpResult.Release();
                    }
                    cpGrammar->SetDictationState(SPRS_ACTIVE);
                }
            }
        }
        ::CoUninitialize();
    }
    return hr;
}

Realsense SR300语音文字转换

推荐阅读更多精彩内容