命名空间
using System;
using Whisper.net.Internals.Native;
using Whisper.net.SamplingStrategy;
namespace Whisper.net;
常用方法
public class WhisperProcessorBuilder
{
private readonly WhisperProcessorOptions whisperProcessorOptions;
private readonly INativeWhisper nativeWhisper;
private readonly IStringPool stringPool;
internal WhisperProcessorBuilder(nint context, INativeWhisper nativeWhisper, IStringPool stringPool)
{
whisperProcessorOptions = new WhisperProcessorOptions
{
ContextHandle = context
};
this.nativeWhisper = nativeWhisper;
this.stringPool = stringPool;
}
//设置推理使用的CPU线程数,数值越大处理速度越快(但受硬件限制)
public WhisperProcessorBuilder WithThreads(int threads)
{
whisperProcessorOptions.Threads = threads;
return this;
}
//限制模型参考的上文token数量,避免长文本记忆负担(默认约128)
public WhisperProcessorBuilder WithMaxLastTextTokens(int maxLastTextTokens)
{
whisperProcessorOptions.MaxLastTextTokens = maxLastTextTokens;
return this;
}
//设置音频处理的起始时间偏移(跳过开头部分)
public WhisperProcessorBuilder WithOffset(TimeSpan offset)
{
whisperProcessorOptions.Offset = offset;
return this;
}
//限制处理音频的时长(从偏移点开始计算)
public WhisperProcessorBuilder WithDuration(TimeSpan duration)
{
whisperProcessorOptions.Duration = duration;
return this;
}
//启用翻译模式,将识别结果实时翻译为英语
public WhisperProcessorBuilder WithTranslate()
{
whisperProcessorOptions.Translate = true;
return this;
}
//禁用上下文缓存,提升实时性但降低长文本连贯性
public WhisperProcessorBuilder WithNoContext()
{
whisperProcessorOptions.NoContext = true;
return this;
}
//强制将整个音频作为单一片段处理(适合短语音)
public WhisperProcessorBuilder WithSingleSegment()
{
whisperProcessorOptions.SingleSegment = true;
return this;
}
//打印特殊控制token(如<|nospeech|>)用于调试
public WhisperProcessorBuilder WithPrintSpecialTokens()
{
whisperProcessorOptions.PrintSpecialTokens = true;
return this;
}
//在控制台实时输出处理进度百分比
public WhisperProcessorBuilder WithPrintProgress()
{
whisperProcessorOptions.PrintProgress = true;
return this;
}
//强制输出识别结果到控制台(默认已启用)
public WhisperProcessorBuilder WithPrintResults()
{
whisperProcessorOptions.PrintResults = true;
return this;
}
//控制是否在控制台输出时间戳(句子级)
public WhisperProcessorBuilder WithPrintTimestamps(bool printTimestamps = true)
{
whisperProcessorOptions.PrintTimestamps = printTimestamps;
return this;
}
//启用词级时间戳(需配合 WithTokenTimestampsThreshold 使用)
public WhisperProcessorBuilder WithTokenTimestamps()
{
whisperProcessorOptions.UseTokenTimestamps = true;
return this;
}
//词级时间戳的置信度阈值(0-1),高于该值才保留时间戳
public WhisperProcessorBuilder WithTokenTimestampsThreshold(float tokenTimestampsThreshold)
{
whisperProcessorOptions.TokenTimestampsThreshold = tokenTimestampsThreshold;
return this;
}
//多token组合时间戳的累计概率阈值,用于合并相邻标记
public WhisperProcessorBuilder WithTokenTimestampsSumThreshold(float tokenTimestampsSumThreshold)
{
whisperProcessorOptions.TokenTimestampsSumThreshold = tokenTimestampsSumThreshold;
return this;
}
//设定单段音频的最大长度(毫秒),超长音频会被分割处理
public WhisperProcessorBuilder WithMaxSegmentLength(int maxSegmentLength)
{
whisperProcessorOptions.MaxSegmentLength = maxSegmentLength;
return this;
}
//确保音频分段仅在单词边界处切割,避免中断单词
public WhisperProcessorBuilder SplitOnWord()
{
whisperProcessorOptions.SplitOnWord = true;
return this;
}
//设置单段文本的最大token数,超限时强制分段
public WhisperProcessorBuilder WithMaxTokensPerSegment(int maxTokensPerSegment)
{
whisperProcessorOptions.MaxTokensPerSegment = maxTokensPerSegment;
return this;
}
//配置音频上下文窗口大小,影响前后语境关联性
public WhisperProcessorBuilder WithAudioContextSize(int audioContextSize)
{
whisperProcessorOptions.AudioContextSize = audioContextSize;
return this;
}
//通过正则表达式过滤输出中的特定内容(如敏感词)
public WhisperProcessorBuilder WithSuppressRegex(string regex)
{
whisperProcessorOptions.SuppressRegex = regex;
return this;
}
//提供上下文提示文本,提升特定术语识别准确率(如专业词汇)
public WhisperProcessorBuilder WithPrompt(string prompt)
{
whisperProcessorOptions.Prompt = prompt;
return this;
}
//指定语音识别语言(如"zh"中文),需传入ISO语言代码
public WhisperProcessorBuilder WithLanguage(string language)
{
whisperProcessorOptions.Language = language;
return this;
}
//启用自动语言检测,无需手动指定语言
public WhisperProcessorBuilder WithLanguageDetection()
{
whisperProcessorOptions.Language = string.Empty;
return this;
}
//禁用静音段过滤,保留空白音频段的识别结果
public WhisperProcessorBuilder WithoutSuppressBlank()
{
whisperProcessorOptions.SuppressBlank = false;
return this;
}
//调整采样随机性(0-1),值越高结果越多样但可能不准确
public WhisperProcessorBuilder WithTemperature(float temperature)
{
whisperProcessorOptions.Temperature = temperature;
return this;
}
//控制初始时间戳的生成阈值,影响分段起始点判定
public WhisperProcessorBuilder WithMaxInitialTs(float maxInitialTs)
{
whisperProcessorOptions.MaxInitialTs = maxInitialTs;
return this;
}
//控制输出长度惩罚因子,影响生成长短(>1鼓励长文本)
public WhisperProcessorBuilder WithLengthPenalty(float lengthPenalty)
{
whisperProcessorOptions.LengthPenalty = lengthPenalty;
return this;
}
//动态调节采样随机性,值越高输出越多样化
public WhisperProcessorBuilder WithTemperatureInc(float temperature)
{
whisperProcessorOptions.TemperatureInc = temperature;
return this;
}
//设置熵阈值,过滤低信息量的模糊识别结果
public WhisperProcessorBuilder WithEntropyThreshold(float entropyThreshold)
{
whisperProcessorOptions.EntropyThreshold = entropyThreshold;
return this;
}
//对数概率阈值,仅保留高于此值的候选token
public WhisperProcessorBuilder WithLogProbThreshold(float logProbThreshold)
{
whisperProcessorOptions.LogProbThreshold = logProbThreshold;
return this;
}
//静音段检测阈值(0-1),高于该值判定为有效语音
public WhisperProcessorBuilder WithNoSpeechThreshold(float noSpeechThreshold)
{
whisperProcessorOptions.NoSpeechThreshold = noSpeechThreshold;
return this;
}
//注册分段完成回调,实时获取处理进度
public WhisperProcessorBuilder WithSegmentEventHandler(OnSegmentEventHandler segmentEventHandler)
{
whisperProcessorOptions.OnSegmentEventHandlers.Add(segmentEventHandler);
return this;
}
//进度百分比回调(0-1),用于进度条更新
public WhisperProcessorBuilder WithProgressHandler(OnProgressHandler progressHandler)
{
whisperProcessorOptions.OnProgressHandlers.Add(progressHandler);
return this;
}
//编码器启动前回调,返回false可中止处理
public WhisperProcessorBuilder WithEncoderBeginHandler(OnEncoderBeginEventHandler encoderBeginEventHandler)
{
whisperProcessorOptions.OnEncoderBeginEventHandlers.Add(encoderBeginEventHandler);
return this;
}
//启用字符串池复用,减少内存分配(默认开启)
public WhisperProcessorBuilder WithStringPool(IStringPool? stringPool = null)
{
whisperProcessorOptions.StringPool = stringPool ?? this.stringPool;
return this;
}
//禁用字符串池,牺牲内存换取线程安全
public WhisperProcessorBuilder WithoutStringPool()
{
whisperProcessorOptions.StringPool = null;
return this;
}
//启用贪心采样(默认),每次选择概率最高的token
public IWhisperSamplingStrategyBuilder WithGreedySamplingStrategy()
{
GreedySamplingStrategy greedySamplingStrategy = new GreedySamplingStrategy();
whisperProcessorOptions.SamplingStrategy = greedySamplingStrategy;
return new GreedySamplingStrategyBuilder(this, greedySamplingStrategy);
}
//改用束搜索采样,通过多路径探索提升长文本连贯性
public IWhisperSamplingStrategyBuilder WithBeamSearchSamplingStrategy()
{
BeamSearchSamplingStrategy beamSearchSamplingStrategy = new BeamSearchSamplingStrategy();
whisperProcessorOptions.SamplingStrategy = beamSearchSamplingStrategy;
return new BeamSearchSamplingStrategyBuilder(this, beamSearchSamplingStrategy);
}
//输出每个token的生成概率分布(调试用)
public WhisperProcessorBuilder WithProbabilities()
{
whisperProcessorOptions.ComputeProbabilities = true;
return this;
}
//启用Intel OpenVINO加速编码器(需硬件支持)
public WhisperProcessorBuilder WithOpenVinoEncoder(string? openVinoEncoderPath, string? openVinoDevice, string? openVinoCachePath)
{
whisperProcessorOptions.OpenVinoModelPath = openVinoEncoderPath;
whisperProcessorOptions.OpenVinoDevice = openVinoDevice;
whisperProcessorOptions.OpenVinoCacheDir = openVinoCachePath;
return this;
}
//验证配置并生成不可变的 WhisperProcessor 实例
public WhisperProcessor Build()
{
return new WhisperProcessor(whisperProcessorOptions, nativeWhisper);
}
}