# main.pydefplot_waveform(filename:str,y:np.ndarray,sr:int)->None:'''
Plot the waveform of the audio signal.
Args:
`filename`: filename of the output figure
`y`: time series of the audio signal
`sr`: sample rate of the audio signal
'''fig_time_path=fig_path/filenamen_samples=y.shape[0]t=np.arange(n_samples)/srutils.plot_time_domain(fig_time_path,t,y)print(f'Output figure to "{fig_time_path}".')
# main.pydefcreate_spectrogram(y:np.ndarray,n_window:int)->Tuple[np.ndarray,np.ndarray]:'''
Create the spectrogram of the audio signal.
Args:
`y`: time series of the audio signal
`n_window`: the number of samples used in each window
Returns:
`i_starts`: the starting indices of each window
`spec`: the spectrum of frequencies of the audio signal as it varies with time
'''n_samples=y.shape[0]i_starts=np.arange(0,n_samples,n_window//2,dtype=int)i_starts=i_starts[i_starts+n_window<n_samples]n_fft=utils.next_pow2(n_window)zero_padding=np.zeros(n_fft-n_window)spec=np.array([np.abs(fft(np.concatenate((hamming(n_window)*y[i:i+n_window],zero_padding)))[:n_fft//2])foriini_starts])# Rescale the absolute value of the spectrogram.spec=10*np.log10(spec.T+np.finfo(float).eps)returni_starts,spec
# windows.pydefhamming(m:int)->np.ndarray:'''
Return the Hamming window.
Args:
`m`: number of points in the output window
Returns:
The Hamming window of size `m`.
'''ifm<1:returnnp.array([])ifm==1:returnnp.ones(1)n=np.arange(m)return0.54-0.46*np.cos(2*np.pi*n/(m-1))
# main.pyfig_path=Path('assets/spectrogram/dev_set')defplot_spectrogram(filename:str,i_starts:np.ndarray,spec:np.ndarray,sr:int)->None:'''
Plot the spectrogram of the audio signal.
Args:
`filename`: filename of the output figure
`i_starts`: the starting indices of each window
`spec`: the spectrogram to plot
`sr`: sample rate
'''fig_spec_path=fig_path/filenamexticks=np.linspace(0,spec.shape[1],10)xlabels=[f'{i:4.2f}'foriinnp.linspace(0,i_starts[-1]/sr,10)]yticks=np.linspace(0,spec.shape[0],10)ylabels=np.floor(fft_freq(spec.shape[0],sr,yticks)).astype(int)utils.plot_spectrogram(fig_spec_path,spec,xticks,xlabels,yticks,ylabels,n_window,)print(f'Output figure to "{fig_spec_path}".')
这里主要是要将 x 轴和 y 轴恢复成正确的时间和频率单位。目前 x 轴上的值是采样的索引,因此将其除以采样率 sr,就得到正确的时间了。y 轴上的值则需要通过函数 fft_freq() 从 FFT 结果转换为表示的频率,其实现就是乘上采样率 sr 再除以 FFT 处理的信号长度 n_fft,这里可以通过 FFT 结果的长度 spec.shape[0] 得到。
# utils.pydefplot_spectrogram(output_path,spec:np.ndarray,xticks:np.ndarray,xlabels:np.ndarray,yticks:np.ndarray,ylabels:np.ndarray,n_fft:int,)->None:'''
Plot the spectrogram of a wave.
Args:
`output_path`: path to the output figure
`spec`: the spectrogram to plot
`xticks`: tick locations of the x-axis
`xlabels`: tick labels of the x-axis
`yticks`: tick locations of the y-axis
`ylabels`: tick labels of the y-axis
`n_fft`: the number of samples for the FFT
'''plt.figure()plt.title(f'Spectrogram ({n_fft} window size, hamming window)')plt.xticks(xticks,xlabels)plt.xlabel('Time / s')plt.yticks(yticks,ylabels)plt.ylabel('Frequency / Hz')plt.imshow(spec,origin='lower',aspect='auto')plt.colorbar(use_gridspec=True)plt.tight_layout()plt.savefig(output_path)