Whisper.net.WhisperProcessorBuilder类中的一些方法

命名空间

using System;

using Whisper.net.Internals.Native;

using Whisper.net.SamplingStrategy;

namespace Whisper.net;

常用方法

public class WhisperProcessorBuilder

{

private readonly WhisperProcessorOptions whisperProcessorOptions;

private readonly INativeWhisper nativeWhisper;

private readonly IStringPool stringPool;

internal WhisperProcessorBuilder(nint context, INativeWhisper nativeWhisper, IStringPool stringPool)

{

whisperProcessorOptions = new WhisperProcessorOptions

{

ContextHandle = context

};

this.nativeWhisper = nativeWhisper;

this.stringPool = stringPool;

}

//设置推理使用的CPU线程数,数值越大处理速度越快(但受硬件限制)

public WhisperProcessorBuilder WithThreads(int threads)

{

whisperProcessorOptions.Threads = threads;

return this;

}

//限制模型参考的上文token数量,避免长文本记忆负担(默认约128)

public WhisperProcessorBuilder WithMaxLastTextTokens(int maxLastTextTokens)

{

whisperProcessorOptions.MaxLastTextTokens = maxLastTextTokens;

return this;

}

//设置音频处理的起始时间偏移(跳过开头部分)

public WhisperProcessorBuilder WithOffset(TimeSpan offset)

{

whisperProcessorOptions.Offset = offset;

return this;

}

//限制处理音频的时长(从偏移点开始计算)

public WhisperProcessorBuilder WithDuration(TimeSpan duration)

{

whisperProcessorOptions.Duration = duration;

return this;

}

//启用翻译模式,将识别结果实时翻译为英语

public WhisperProcessorBuilder WithTranslate()

{

whisperProcessorOptions.Translate = true;

return this;

}

//禁用上下文缓存,提升实时性但降低长文本连贯性

public WhisperProcessorBuilder WithNoContext()

{

whisperProcessorOptions.NoContext = true;

return this;

}

//强制将整个音频作为单一片段处理(适合短语音)

public WhisperProcessorBuilder WithSingleSegment()

{

whisperProcessorOptions.SingleSegment = true;

return this;

}

//打印特殊控制token(如<|nospeech|>)用于调试

public WhisperProcessorBuilder WithPrintSpecialTokens()

{

whisperProcessorOptions.PrintSpecialTokens = true;

return this;

}

//在控制台实时输出处理进度百分比

public WhisperProcessorBuilder WithPrintProgress()

{

whisperProcessorOptions.PrintProgress = true;

return this;

}

//强制输出识别结果到控制台(默认已启用)

public WhisperProcessorBuilder WithPrintResults()

{

whisperProcessorOptions.PrintResults = true;

return this;

}

//控制是否在控制台输出时间戳(句子级)

public WhisperProcessorBuilder WithPrintTimestamps(bool printTimestamps = true)

{

whisperProcessorOptions.PrintTimestamps = printTimestamps;

return this;

}

//启用词级时间戳(需配合 WithTokenTimestampsThreshold 使用)

public WhisperProcessorBuilder WithTokenTimestamps()

{

whisperProcessorOptions.UseTokenTimestamps = true;

return this;

}

//词级时间戳的置信度阈值(0-1),高于该值才保留时间戳

public WhisperProcessorBuilder WithTokenTimestampsThreshold(float tokenTimestampsThreshold)

{

whisperProcessorOptions.TokenTimestampsThreshold = tokenTimestampsThreshold;

return this;

}

//多token组合时间戳的累计概率阈值,用于合并相邻标记

public WhisperProcessorBuilder WithTokenTimestampsSumThreshold(float tokenTimestampsSumThreshold)

{

whisperProcessorOptions.TokenTimestampsSumThreshold = tokenTimestampsSumThreshold;

return this;

}

//设定单段音频的最大长度(毫秒),超长音频会被分割处理

public WhisperProcessorBuilder WithMaxSegmentLength(int maxSegmentLength)

{

whisperProcessorOptions.MaxSegmentLength = maxSegmentLength;

return this;

}

//确保音频分段仅在单词边界处切割,避免中断单词

public WhisperProcessorBuilder SplitOnWord()

{

whisperProcessorOptions.SplitOnWord = true;

return this;

}

//设置单段文本的最大token数,超限时强制分段

public WhisperProcessorBuilder WithMaxTokensPerSegment(int maxTokensPerSegment)

{

whisperProcessorOptions.MaxTokensPerSegment = maxTokensPerSegment;

return this;

}

//配置音频上下文窗口大小,影响前后语境关联性

public WhisperProcessorBuilder WithAudioContextSize(int audioContextSize)

{

whisperProcessorOptions.AudioContextSize = audioContextSize;

return this;

}

//通过正则表达式过滤输出中的特定内容(如敏感词)

public WhisperProcessorBuilder WithSuppressRegex(string regex)

{

whisperProcessorOptions.SuppressRegex = regex;

return this;

}

//提供上下文提示文本,提升特定术语识别准确率(如专业词汇)

public WhisperProcessorBuilder WithPrompt(string prompt)

{

whisperProcessorOptions.Prompt = prompt;

return this;

}

//指定语音识别语言(如"zh"中文),需传入ISO语言代码

public WhisperProcessorBuilder WithLanguage(string language)

{

whisperProcessorOptions.Language = language;

return this;

}

//启用自动语言检测,无需手动指定语言

public WhisperProcessorBuilder WithLanguageDetection()

{

whisperProcessorOptions.Language = string.Empty;

return this;

}

//禁用静音段过滤,保留空白音频段的识别结果

public WhisperProcessorBuilder WithoutSuppressBlank()

{

whisperProcessorOptions.SuppressBlank = false;

return this;

}

//调整采样随机性(0-1),值越高结果越多样但可能不准确

public WhisperProcessorBuilder WithTemperature(float temperature)

{

whisperProcessorOptions.Temperature = temperature;

return this;

}

//控制初始时间戳的生成阈值,影响分段起始点判定

public WhisperProcessorBuilder WithMaxInitialTs(float maxInitialTs)

{

whisperProcessorOptions.MaxInitialTs = maxInitialTs;

return this;

}

//控制输出长度惩罚因子,影响生成长短(>1鼓励长文本)

public WhisperProcessorBuilder WithLengthPenalty(float lengthPenalty)

{

whisperProcessorOptions.LengthPenalty = lengthPenalty;

return this;

}

//动态调节采样随机性,值越高输出越多样化

public WhisperProcessorBuilder WithTemperatureInc(float temperature)

{

whisperProcessorOptions.TemperatureInc = temperature;

return this;

}

//设置熵阈值,过滤低信息量的模糊识别结果

public WhisperProcessorBuilder WithEntropyThreshold(float entropyThreshold)

{

whisperProcessorOptions.EntropyThreshold = entropyThreshold;

return this;

}

//对数概率阈值,仅保留高于此值的候选token

public WhisperProcessorBuilder WithLogProbThreshold(float logProbThreshold)

{

whisperProcessorOptions.LogProbThreshold = logProbThreshold;

return this;

}

//静音段检测阈值(0-1),高于该值判定为有效语音

public WhisperProcessorBuilder WithNoSpeechThreshold(float noSpeechThreshold)

{

whisperProcessorOptions.NoSpeechThreshold = noSpeechThreshold;

return this;

}

//注册分段完成回调,实时获取处理进度

public WhisperProcessorBuilder WithSegmentEventHandler(OnSegmentEventHandler segmentEventHandler)

{

whisperProcessorOptions.OnSegmentEventHandlers.Add(segmentEventHandler);

return this;

}

//进度百分比回调(0-1),用于进度条更新

public WhisperProcessorBuilder WithProgressHandler(OnProgressHandler progressHandler)

{

whisperProcessorOptions.OnProgressHandlers.Add(progressHandler);

return this;

}

//编码器启动前回调,返回false可中止处理

public WhisperProcessorBuilder WithEncoderBeginHandler(OnEncoderBeginEventHandler encoderBeginEventHandler)

{

whisperProcessorOptions.OnEncoderBeginEventHandlers.Add(encoderBeginEventHandler);

return this;

}

//启用字符串池复用,减少内存分配(默认开启)

public WhisperProcessorBuilder WithStringPool(IStringPool? stringPool = null)

{

whisperProcessorOptions.StringPool = stringPool ?? this.stringPool;

return this;

}

//禁用字符串池,牺牲内存换取线程安全

public WhisperProcessorBuilder WithoutStringPool()

{

whisperProcessorOptions.StringPool = null;

return this;

}

//启用贪心采样(默认),每次选择概率最高的token

public IWhisperSamplingStrategyBuilder WithGreedySamplingStrategy()

{

GreedySamplingStrategy greedySamplingStrategy = new GreedySamplingStrategy();

whisperProcessorOptions.SamplingStrategy = greedySamplingStrategy;

return new GreedySamplingStrategyBuilder(this, greedySamplingStrategy);

}

//改用束搜索采样,通过多路径探索提升长文本连贯性

public IWhisperSamplingStrategyBuilder WithBeamSearchSamplingStrategy()

{

BeamSearchSamplingStrategy beamSearchSamplingStrategy = new BeamSearchSamplingStrategy();

whisperProcessorOptions.SamplingStrategy = beamSearchSamplingStrategy;

return new BeamSearchSamplingStrategyBuilder(this, beamSearchSamplingStrategy);

}

//输出每个token的生成概率分布(调试用)

public WhisperProcessorBuilder WithProbabilities()

{

whisperProcessorOptions.ComputeProbabilities = true;

return this;

}

//启用Intel OpenVINO加速编码器(需硬件支持)

public WhisperProcessorBuilder WithOpenVinoEncoder(string? openVinoEncoderPath, string? openVinoDevice, string? openVinoCachePath)

{

whisperProcessorOptions.OpenVinoModelPath = openVinoEncoderPath;

whisperProcessorOptions.OpenVinoDevice = openVinoDevice;

whisperProcessorOptions.OpenVinoCacheDir = openVinoCachePath;

return this;

}

//验证配置并生成不可变的 WhisperProcessor 实例

public WhisperProcessor Build()

{

return new WhisperProcessor(whisperProcessorOptions, nativeWhisper);

}

}

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容