功能
将一段语音中空白处剔除
代码详解
from scipy
.ndimage
.morphology
import binary_dilation
import librosa
import numpy
as np
import struct
import librosa
.display
import webrtcvad
import soundfile
as sf
int16_max
= (2 ** 15) - 1
#输入
wav
, source_sr
= librosa
.load("chunk123.wav", sr
=None
)
samples_per_window
= (30 * 16000)
wav
= wav
[:len(wav
) - (len(wav
) % samples_per_window
)]
pcm_wave
= struct
.pack("%dh" % len(wav
), *(np
.round(wav
* int16_max
)).astype(np
.int16
))
voice_flags
= []
vad
= webrtcvad
.Vad(mode
=3)
for window_start
in range(0, len(wav
), samples_per_window
):
window_end
= window_start
+ samples_per_window
voice_flags
.append(vad
.is_speech(pcm_wave
[window_start
* 2:window_end
* 2],
sample_rate
=16000))
voice_flags
= np
.array(voice_flags
)
def
moving_average(array
, width
):
array_padded
= np
.concatenate((np
.zeros((width
- 1)
ret
= np
.cumsum(array_padded
, dtype
=float
)
ret
[width
:] = ret
[width
:] - ret
[:-width
]
return ret
[width
- 1:] / width
audio_mask
= moving_average(voice_flags
, 8)
audio_mask
= np
.round(audio_mask
).astype(np
.bool
)
audio_mask
= binary_dilation(audio_mask
, np
.ones(6 + 1))
audio_mask
= np
.repeat(audio_mask
, samples_per_window
)
res
=wav
[audio_mask
== True
]
sf
.write("1234.wav", res
.astype(np
.float32
), 16000, subtype
='PCM_24')
转载请注明原文地址:https://tech.qufami.com/read-7293.html