帧长 frame_len
帧移 frame_shift
重叠部分长度 frame_overlap
/*
* Cut data length of frame_overlap+buffer_shift+frame_overlap
* If no enough data, zeros are filled in.
* The data in fram_overlap just participate in model forward
* All we care about is the data length of buffer_shift in middle
* */
int input_size = new_input_data.size();
int frame_overlap = _opts.frame_len - _opts.frame_shift;
int buffer_shift = _opts.sample_frequency * _opts.slice_second;
for (int start = 0; start < input_size; start += buffer_shift) {
int end = std::min(start + buffer_shift, input_size);
int buffer_size = frame_overlap + (end - start) + frame_overlap;
int start_pad = std::max(start - frame_overlap, 0);
int end_pad = std::min(end + frame_overlap, input_size);
int start_buffer = frame_overlap - (start - start_pad);
int copy_buffer_len = end_pad - start_pad;
std::vector<float> input_buffer(buffer_size, 0);
memcpy(input_buffer.data() + start_buffer,
new_input_data.data() + start_pad,
copy_buffer_len * sizeof(float));
std::vector<int64_t> input_dims = {1, (int64_t)input_buffer.size()};
std::vector<std::vector<BaseFloat>> output_buffer;
if (_inference_utils->run(input_buffer, input_dims, output_buffer)
!= 0) {
IDEC_WARN << "Fail to get denoised data";
return -1;
}
if (start == 0) {
output_data.resize(output_buffer.size());
for (size_t i = 0; i < output_data.size(); i++) {
output_data[i].reserve(input_size);
}
}
for (size_t i = 0; i < output_data.size(); i++) {
IDEC_ASSERT(output_buffer[i].size() == buffer_size);
output_data[i].insert(
output_data[i].end(),
output_buffer[i].data() + frame_overlap,
output_buffer[i].data() + frame_overlap + (end - start));
}
}