1. Mel Frequency Cepstrum Coefficient

$$Mel(f)=2585*log_{10}(1+\frac{f}{700})$$

import numpy as np
import matplotlib.pyplot as plt

x = np.arange(8001)
y = 2595 * np.log10(1+x/700)

plt.plot(x, y, color='blue', linewidth=3)

plt.xlabel("f", fontsize=17)
plt.ylabel("Mel(f)", fontsize=17)
plt.xlim(0,x[-1])
plt.ylim(0,y[-1])

plt.savefig('mel_f.png', dpi=500)

2. workflow

import numpy as np
import scipy.io.wavfile
from matplotlib import pyplot as plt
from scipy.fftpack import dct

# 原始数据,读取前3.5s 的数据
sample_rate, signal = scipy.io.wavfile.read('OSR_us_000_0010_8k.wav')
original_signal = signal[0:int(3.5*sample_rate)]

signal_num = np.arange(len(signal))
sample_num = np.arange(len(original_signal))

# 绘图 01
plt.figure(figsize=(11,7), dpi=500)

plt.subplot(211)
plt.plot(signal_num/sample_rate, signal, color='black')
plt.plot(sample_num/sample_rate, original_signal, color='blue')
plt.ylabel("Amplitude")
plt.title("signal of Voice")

plt.subplot(212)
plt.plot(sample_num/sample_rate, original_signal, color='blue')
plt.xlabel("Time (sec)")
plt.ylabel("Amplitude") 
plt.title("3.5s signal of Voice ")

plt.savefig('mfcc_01.png')

reference

https://haythamfayek.com/2016/04/21/speech-processing-for-machine-learning.html https://zhuanlan.zhihu.com/p/88625876