Whisper.net.WhisperProcessorBuilder类中的一些方法

命名空间

using System;

using Whisper.net.Internals.Native;

using Whisper.net.SamplingStrategy;

namespace Whisper.net;

常用方法

public class WhisperProcessorBuilder

{

private readonly WhisperProcessorOptions whisperProcessorOptions;

private readonly INativeWhisper nativeWhisper;

private readonly IStringPool stringPool;

internal WhisperProcessorBuilder(nint context, INativeWhisper nativeWhisper, IStringPool stringPool)

{

whisperProcessorOptions = new WhisperProcessorOptions

{

ContextHandle = context

};

this.nativeWhisper = nativeWhisper;

this.stringPool = stringPool;

}

//设置推理使用的CPU线程数，数值越大处理速度越快（但受硬件限制）

public WhisperProcessorBuilder WithThreads(int threads)

{

whisperProcessorOptions.Threads = threads;

return this;

}

//限制模型参考的上文token数量，避免长文本记忆负担（默认约128）

public WhisperProcessorBuilder WithMaxLastTextTokens(int maxLastTextTokens)

{

whisperProcessorOptions.MaxLastTextTokens = maxLastTextTokens;

return this;

}

//设置音频处理的起始时间偏移（跳过开头部分）

public WhisperProcessorBuilder WithOffset(TimeSpan offset)

{

whisperProcessorOptions.Offset = offset;

return this;

}

//限制处理音频的时长（从偏移点开始计算）

public WhisperProcessorBuilder WithDuration(TimeSpan duration)

{

whisperProcessorOptions.Duration = duration;

return this;

}

//启用翻译模式，将识别结果实时翻译为英语

public WhisperProcessorBuilder WithTranslate()

{

whisperProcessorOptions.Translate = true;

return this;

}

//禁用上下文缓存，提升实时性但降低长文本连贯性

public WhisperProcessorBuilder WithNoContext()

{

whisperProcessorOptions.NoContext = true;

return this;

}

//强制将整个音频作为单一片段处理（适合短语音）

public WhisperProcessorBuilder WithSingleSegment()

{

whisperProcessorOptions.SingleSegment = true;

return this;

}

//打印特殊控制token（如<|nospeech|>）用于调试

public WhisperProcessorBuilder WithPrintSpecialTokens()

{

whisperProcessorOptions.PrintSpecialTokens = true;

return this;

}

//在控制台实时输出处理进度百分比

public WhisperProcessorBuilder WithPrintProgress()

{

whisperProcessorOptions.PrintProgress = true;

return this;

}

//强制输出识别结果到控制台（默认已启用）

public WhisperProcessorBuilder WithPrintResults()

{

whisperProcessorOptions.PrintResults = true;

return this;

}

//控制是否在控制台输出时间戳（句子级）

public WhisperProcessorBuilder WithPrintTimestamps(bool printTimestamps = true)

{

whisperProcessorOptions.PrintTimestamps = printTimestamps;

return this;

}

//启用词级时间戳（需配合 WithTokenTimestampsThreshold 使用）

public WhisperProcessorBuilder WithTokenTimestamps()

{

whisperProcessorOptions.UseTokenTimestamps = true;

return this;

}

//词级时间戳的置信度阈值（0-1），高于该值才保留时间戳

public WhisperProcessorBuilder WithTokenTimestampsThreshold(float tokenTimestampsThreshold)

{

whisperProcessorOptions.TokenTimestampsThreshold = tokenTimestampsThreshold;

return this;

}

//多token组合时间戳的累计概率阈值，用于合并相邻标记

public WhisperProcessorBuilder WithTokenTimestampsSumThreshold(float tokenTimestampsSumThreshold)

{

whisperProcessorOptions.TokenTimestampsSumThreshold = tokenTimestampsSumThreshold;

return this;

}

//设定单段音频的最大长度（毫秒），超长音频会被分割处理

public WhisperProcessorBuilder WithMaxSegmentLength(int maxSegmentLength)

{

whisperProcessorOptions.MaxSegmentLength = maxSegmentLength;

return this;

}

//确保音频分段仅在单词边界处切割，避免中断单词

public WhisperProcessorBuilder SplitOnWord()

{

whisperProcessorOptions.SplitOnWord = true;

return this;

}

//设置单段文本的最大token数，超限时强制分段

public WhisperProcessorBuilder WithMaxTokensPerSegment(int maxTokensPerSegment)

{

whisperProcessorOptions.MaxTokensPerSegment = maxTokensPerSegment;

return this;

}

//配置音频上下文窗口大小，影响前后语境关联性

public WhisperProcessorBuilder WithAudioContextSize(int audioContextSize)

{

whisperProcessorOptions.AudioContextSize = audioContextSize;

return this;

}

//通过正则表达式过滤输出中的特定内容（如敏感词）

public WhisperProcessorBuilder WithSuppressRegex(string regex)

{

whisperProcessorOptions.SuppressRegex = regex;

return this;

}

//提供上下文提示文本，提升特定术语识别准确率（如专业词汇）

public WhisperProcessorBuilder WithPrompt(string prompt)

{

whisperProcessorOptions.Prompt = prompt;

return this;

}

//指定语音识别语言（如"zh"中文），需传入ISO语言代码

public WhisperProcessorBuilder WithLanguage(string language)

{

whisperProcessorOptions.Language = language;

return this;

}

//启用自动语言检测，无需手动指定语言

public WhisperProcessorBuilder WithLanguageDetection()

{

whisperProcessorOptions.Language = string.Empty;

return this;

}

//禁用静音段过滤，保留空白音频段的识别结果

public WhisperProcessorBuilder WithoutSuppressBlank()

{

whisperProcessorOptions.SuppressBlank = false;

return this;

}

//调整采样随机性（0-1），值越高结果越多样但可能不准确

public WhisperProcessorBuilder WithTemperature(float temperature)

{

whisperProcessorOptions.Temperature = temperature;

return this;

}

//控制初始时间戳的生成阈值，影响分段起始点判定

public WhisperProcessorBuilder WithMaxInitialTs(float maxInitialTs)

{

whisperProcessorOptions.MaxInitialTs = maxInitialTs;

return this;

}

//控制输出长度惩罚因子，影响生成长短（>1鼓励长文本）

public WhisperProcessorBuilder WithLengthPenalty(float lengthPenalty)

{

whisperProcessorOptions.LengthPenalty = lengthPenalty;

return this;

}

//动态调节采样随机性，值越高输出越多样化

public WhisperProcessorBuilder WithTemperatureInc(float temperature)

{

whisperProcessorOptions.TemperatureInc = temperature;

return this;

}

//设置熵阈值，过滤低信息量的模糊识别结果

public WhisperProcessorBuilder WithEntropyThreshold(float entropyThreshold)

{

whisperProcessorOptions.EntropyThreshold = entropyThreshold;

return this;

}

//对数概率阈值，仅保留高于此值的候选token

public WhisperProcessorBuilder WithLogProbThreshold(float logProbThreshold)

{

whisperProcessorOptions.LogProbThreshold = logProbThreshold;

return this;

}

//静音段检测阈值（0-1），高于该值判定为有效语音

public WhisperProcessorBuilder WithNoSpeechThreshold(float noSpeechThreshold)

{

whisperProcessorOptions.NoSpeechThreshold = noSpeechThreshold;

return this;

}

//注册分段完成回调，实时获取处理进度

public WhisperProcessorBuilder WithSegmentEventHandler(OnSegmentEventHandler segmentEventHandler)

{

whisperProcessorOptions.OnSegmentEventHandlers.Add(segmentEventHandler);

return this;

}

//进度百分比回调（0-1），用于进度条更新

public WhisperProcessorBuilder WithProgressHandler(OnProgressHandler progressHandler)

{

whisperProcessorOptions.OnProgressHandlers.Add(progressHandler);

return this;

}

//编码器启动前回调，返回false可中止处理

public WhisperProcessorBuilder WithEncoderBeginHandler(OnEncoderBeginEventHandler encoderBeginEventHandler)

{

whisperProcessorOptions.OnEncoderBeginEventHandlers.Add(encoderBeginEventHandler);

return this;

}

//启用字符串池复用，减少内存分配（默认开启）

public WhisperProcessorBuilder WithStringPool(IStringPool? stringPool = null)

{

whisperProcessorOptions.StringPool = stringPool ?? this.stringPool;

return this;

}

//禁用字符串池，牺牲内存换取线程安全

public WhisperProcessorBuilder WithoutStringPool()

{

whisperProcessorOptions.StringPool = null;

return this;

}

//启用贪心采样（默认），每次选择概率最高的token

public IWhisperSamplingStrategyBuilder WithGreedySamplingStrategy()

{

GreedySamplingStrategy greedySamplingStrategy = new GreedySamplingStrategy();

whisperProcessorOptions.SamplingStrategy = greedySamplingStrategy;

return new GreedySamplingStrategyBuilder(this, greedySamplingStrategy);

}

//改用束搜索采样，通过多路径探索提升长文本连贯性

public IWhisperSamplingStrategyBuilder WithBeamSearchSamplingStrategy()

{

BeamSearchSamplingStrategy beamSearchSamplingStrategy = new BeamSearchSamplingStrategy();

whisperProcessorOptions.SamplingStrategy = beamSearchSamplingStrategy;

return new BeamSearchSamplingStrategyBuilder(this, beamSearchSamplingStrategy);

}

//输出每个token的生成概率分布（调试用）

public WhisperProcessorBuilder WithProbabilities()

{

whisperProcessorOptions.ComputeProbabilities = true;

return this;

}

//启用Intel OpenVINO加速编码器（需硬件支持）

public WhisperProcessorBuilder WithOpenVinoEncoder(string? openVinoEncoderPath, string? openVinoDevice, string? openVinoCachePath)

{

whisperProcessorOptions.OpenVinoModelPath = openVinoEncoderPath;

whisperProcessorOptions.OpenVinoDevice = openVinoDevice;

whisperProcessorOptions.OpenVinoCacheDir = openVinoCachePath;

return this;

}

//验证配置并生成不可变的 WhisperProcessor 实例

public WhisperProcessor Build()

{

return new WhisperProcessor(whisperProcessorOptions, nativeWhisper);

}

Whisper.net.WhisperProcessorBuilder类中的一些方法

推荐阅读更多精彩内容