前言
由于近期需要做一些语音合成的工作,因此,需要进行对语音合成的数据进行实时播放,到网上找了一下资料,参考MSDN的相关说明,写下了如下一个PCM播放数据流的类,多说无益,直接上码:
头文件 pcmspeaker.h
#pragma once
#include <Windows.h>
#include "mmsystem.h"
#pragma comment(lib, "winmm.lib")
#define DEF_MAX_BUFFER_SIZE (1024 * 16)
#define DEF_MAX_BUFFER_COUNT 16
class CPcmSpeaker
{
public:
CPcmSpeaker(int bufferSize = DEF_MAX_BUFFER_SIZE, int bufferCnt = DEF_MAX_BUFFER_COUNT);
~CPcmSpeaker();
int init(int channels, int samplePerSec, int bitsPerSample);
//添加PCM音频数据,等待播放
int toSpeaker(const void *data, int len, int timeout = INFINITE);
int clearPcmData();
private:
typedef struct
{
WAVEHDR header;
char *data;
}WaveHeadandData;
int m_maxBufferSize;
int m_maxBufferCnt;
WaveHeadandData *m_headAndDatas;
static void CALLBACK waveOutProc(HWAVEOUT hwo, UINT uMsg, DWORD dwInstance, DWORD dwParam1, DWORD dwParam2);
int writeToWave(const void *data, int len);
int pcmtoWave(const void *data, int len, int timeout = INFINITE);
// 公共信息
WAVEFORMATEX m_waveFormat;
HWAVEOUT m_hWaveOut; // WAVEOUT句柄
HANDLE m_hBufferEvent;
CRITICAL_SECTION m_BufferOpCriticalSection;
};
实现文件 pcmspeaker.cpp
#include "PcmSpeaker.h"
CPcmSpeaker::CPcmSpeaker(int bufferSize, int bufferCnt)
{
m_hWaveOut = NULL;
m_hBufferEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
InitializeCriticalSection(&m_BufferOpCriticalSection);
//申请内存
m_headAndDatas = new WaveHeadandData[bufferCnt];
for (int i = 0; i < bufferCnt; i++)
{
memset(&m_headAndDatas[i].header, 0, sizeof(WAVEHDR));
m_headAndDatas[i].header.dwFlags = WHDR_DONE;
m_headAndDatas[i].data = new char[bufferSize];
}
m_maxBufferSize = bufferSize;
m_maxBufferCnt = bufferCnt;
}
CPcmSpeaker::~CPcmSpeaker()
{
//关闭Wave
if (m_hWaveOut != NULL)
{
clearPcmData();
waveOutClose(m_hWaveOut);
m_hWaveOut = NULL;
}
//关闭一些句柄
CloseHandle(m_hBufferEvent);
//删除临界区
DeleteCriticalSection(&m_BufferOpCriticalSection);
//释放内存
for (int i = 0; i < m_maxBufferCnt; i++)
delete[] m_headAndDatas[i].data;
delete[] m_headAndDatas;
}
int CPcmSpeaker::init(int channels, int samplePerSec, int bitsPerSample)
{
if (m_hWaveOut != NULL) {
return 0;// 已经进行了初始化
}
// 第一步: 获取waveformat信息
m_waveFormat.wFormatTag = WAVE_FORMAT_PCM;
m_waveFormat.nChannels = channels;
m_waveFormat.wBitsPerSample = bitsPerSample;
m_waveFormat.nSamplesPerSec = samplePerSec;
m_waveFormat.nBlockAlign =
m_waveFormat.nChannels * m_waveFormat.wBitsPerSample / 8;
m_waveFormat.nAvgBytesPerSec =
m_waveFormat.nSamplesPerSec * m_waveFormat.nBlockAlign;
m_waveFormat.cbSize = sizeof(m_waveFormat);
MMRESULT ret = waveOutOpen(NULL, WAVE_MAPPER, &m_waveFormat,
NULL, NULL, WAVE_FORMAT_QUERY);
if (MMSYSERR_NOERROR != ret) {
return -1;
}
// 第二步: 获取WAVEOUT句柄
ret = waveOutOpen(&m_hWaveOut, WAVE_MAPPER, &m_waveFormat,
(DWORD_PTR)waveOutProc, (DWORD_PTR)this, CALLBACK_FUNCTION);
if (MMSYSERR_NOERROR != ret) {
return -1;
}
return 0;
}
void CALLBACK CPcmSpeaker::waveOutProc(HWAVEOUT hwo, UINT uMsg, DWORD dwInstance, DWORD dwParam1, DWORD dwParam2)
{
CPcmSpeaker *render = (CPcmSpeaker *)dwInstance;
//WAVEHDR *header = (WAVEHDR *)dwParam1;
int i = 0;
switch (uMsg)
{
case WOM_DONE:
EnterCriticalSection(&render->m_BufferOpCriticalSection);
SetEvent(render->m_hBufferEvent);
LeaveCriticalSection(&render->m_BufferOpCriticalSection);
break;
case WOM_CLOSE:
i = 1;
break;
case WOM_OPEN:
i = 2;
break;
}
}
int CPcmSpeaker::clearPcmData()
{
if (m_hWaveOut != NULL)
{
EnterCriticalSection(&m_BufferOpCriticalSection);
for (int i = 0; i < m_maxBufferCnt; i++)
{
if (m_headAndDatas[i].header.dwFlags & WHDR_PREPARED) //有数据被Prepered
waveOutUnprepareHeader(m_hWaveOut, &m_headAndDatas[i].header, sizeof(WAVEHDR));
}
waveOutReset(m_hWaveOut);
LeaveCriticalSection(&m_BufferOpCriticalSection);
}
return 0;
}
int CPcmSpeaker::writeToWave(const void *data, int len)
{
MMRESULT mmres;
int i;
EnterCriticalSection(&m_BufferOpCriticalSection);
for (i = 0; i < m_maxBufferCnt; i++)
if (m_headAndDatas[i].header.dwFlags & WHDR_DONE)
{
//查看是否需要释放之前已经Prepared资源
if (m_headAndDatas[i].header.dwFlags & WHDR_PREPARED) //有数据被Prepered
waveOutUnprepareHeader(m_hWaveOut, &m_headAndDatas[i].header, sizeof(WAVEHDR));
//写入新的数据到音频缓冲区
memcpy(m_headAndDatas[i].data, data, len);
m_headAndDatas[i].header.lpData = m_headAndDatas[i].data;
m_headAndDatas[i].header.dwBufferLength = len;
m_headAndDatas[i].header.dwFlags = 0;
mmres = waveOutPrepareHeader(m_hWaveOut, &m_headAndDatas[i].header, sizeof(WAVEHDR));
if (MMSYSERR_NOERROR == mmres)
mmres = waveOutWrite(m_hWaveOut, &m_headAndDatas[i].header, sizeof(WAVEHDR));
break;
}
LeaveCriticalSection(&m_BufferOpCriticalSection);
if (i == m_maxBufferCnt)
return -2;
return (mmres == MMSYSERR_NOERROR) ? 0 : -1;
}
//添加PCM音频数据,等待播放
int CPcmSpeaker::pcmtoWave(const void *data, int len, int timeout)
{
int res;
if (len > m_maxBufferSize)
return -1;
res = writeToWave(data, len);
//缓冲区已满,需要等待
if (res == -2)
{
if (WAIT_OBJECT_0 == WaitForSingleObject(m_hBufferEvent, timeout))
res = writeToWave(data, len);
}
return res;
}
int CPcmSpeaker::toSpeaker(const void *data, int len, int timeout)
{
int res;
int n, l, ptr;
//对大数据做分段处理
n = len / m_maxBufferSize;
l = len % m_maxBufferSize;
ptr = 0;
for (int i = 0; i < n; i++)
{
res = pcmtoWave(((char *)data) + ptr, m_maxBufferSize, timeout);
ptr += m_maxBufferSize;
if (res != 0)
return -1;
}
return pcmtoWave(((char *)data) + ptr, l, timeout);
}
用法
用法非常简单,如下:
- 定义实例:
CPcmSpeaker ps;
- 初始化参数:
ps.init(1, 16000, 16);
三个参数分别为:通道数,采样速率,单次采样数据位 - 填PCM数据到喇叭:
ps.toSpeaker(data, data_len);
两个参数分别为PCM数据指针和数据长度。
而外说明
微软的这个Waveform相关的函数,感觉比较原始,用的时候,需要如下注意事项:
- 如MSDN所言,
waveOutProc
中不能调用任何Waveform
相关函数,原文如下:
Applications should not call any system-defined functions from inside a callback function, except for
EnterCriticalSection
,LeaveCriticalSection
,midiOutLongMsg
,midiOutShortMsg
,OutputDebugString
,PostMessage
,PostThreadMessage
,SetEvent
,timeGetSystemTime
,timeGetTime
,timeKillEvent
, andtimeSetEvent
. Calling other wave functions will cause deadlock.
CPcmSpeaker
的构造函数定义为:CPcmSpeaker(int bufferSize = DEF_MAX_BUFFER_SIZE, int bufferCnt = DEF_MAX_BUFFER_COUNT)
,其有两个有默认值的参数,分别为每次写入系统音频缓冲区的数据的最大大小,以及CPcmSpeaker
自己的缓冲区个数,bufferCnt
不要太小(最好大于2,根据具体的情况设置大小,建议值为32),否则会出现卡顿现象。toSpeaker
函数,带有第三个参数(默认为INFINITE
)timeout
,表示函数调用超时时间。换句话说,该函数在某种程度上是阻塞式的,即,如果写入的太快,使得系统来不及播放数据,导致CPcmSpeaker类内部的缓冲区已经满了,那么toSpeaker
函数将会等待有新的缓冲区数据被播放后,腾出空间后,才返回,当然如果你不想死等,可以设置一个超时值,超时后,也会返回。