%load_ext autoreload
%autoreload 2
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
from scipy.io import wavfile
from mfcc import *


win = 2048
hop = 512
K = win//2+1
sr = 44100
min_freq = 80
max_freq = 16000
n_bins = 200
freqs = np.arange(K)*sr/win
M = get_mel_filterbank(K, win, sr, min_freq, max_freq, n_bins)
plt.figure(figsize=(10, 4))
plt.plot(freqs, M.T);
plt.xlabel("Frequency (Hz)")
plt.title("Normalized Mel Filterbank: {} Bins from {}hz - {}hz".format(n_bins, min_freq, max_freq))

Text(0.5, 1.0, 'Normalized Mel Filterbank: 200 Bins from 80hz - 16000hz')


sr, x = wavfile.read("doves.wav")
S = stft(x, win, hop)
S = np.abs(S)[0:K, :]
MS = M.dot(S)

plt.figure(figsize=(12, 4))
plt.subplot(131)
plt.imshow(amplitude_to_db(M), cmap='magma', aspect='auto')
plt.ylim([0, M.shape[0]])
plt.title("Mel Filterbank ({} x {})".format(*M.shape))
plt.subplot(132)
plt.imshow(amplitude_to_db(S), cmap='magma', aspect='auto')
plt.ylim([0, S.shape[0]])
plt.title("Spectrogram in dB ({} x {})".format(*S.shape))
plt.subplot(133)
plt.imshow(amplitude_to_db(MS), cmap='magma', aspect='auto')
plt.ylim([0, MS.shape[0]])
plt.title("Mel Specrogram ({} x {})".format(*MS.shape))

Text(0.5, 1.0, 'Mel Specrogram (200 x 859)')


def invert_MS(M, MS, win, hop):
    SInv = (M.T).dot(MS)
    S2 = np.zeros((win, MS.shape[1]))
    S2[0:S.shape[0], :] = (M.T).dot(MS)
    S2 = np.array(S2, dtype=complex)
    S2 *= np.exp(1j*2*np.pi*np.random.rand(S2.shape[0], S2.shape[1]))
    S3 = np.zeros_like(S2)
    S3[0:-1, :] = np.conj(S2[1::, :])
    S2 = S2 + S3[::-1, :]
    y = istft(S2, win, hop)
    return y


ipd.Audio(x, rate=sr) # Original Audio


SInv = (M.T).dot(MS)
y = invert_MS(M, MS, win, hop)

plt.figure(figsize=(12, 4))
plt.subplot(131)
plt.imshow(amplitude_to_db(S), cmap='magma', aspect='auto')
plt.ylim([0, S.shape[0]])
plt.subplot(132)
plt.imshow(amplitude_to_db(SInv), cmap='magma', aspect='auto')
plt.ylim([0, S.shape[0]])
ipd.Audio(y, rate=sr)


M = get_mel_filterbank(K, win, sr, min_freq, max_freq, n_bins=40)
MS = M.dot(S)

plt.figure(figsize=(12, 4))
plt.subplot(131)
plt.imshow(amplitude_to_db(M), cmap='magma', aspect='auto')
plt.ylim([0, M.shape[0]])
plt.title("Mel Filterbank ({} x {})".format(*M.shape))
plt.subplot(132)
plt.imshow(amplitude_to_db(S), cmap='magma', aspect='auto')
plt.ylim([0, S.shape[0]])
plt.title("Spectrogram in dB ({} x {})".format(*S.shape))
plt.subplot(133)
plt.imshow(amplitude_to_db(MS), cmap='magma', aspect='auto', interpolation='none')
plt.ylim([0, MS.shape[0]])
plt.title("Mel Specrogram ({} x {})".format(*MS.shape))

Text(0.5, 1.0, 'Mel Specrogram (40 x 859)')


SInv = (M.T).dot(MS)
y = invert_MS(M, MS, win, hop)

plt.figure(figsize=(12, 4))
plt.subplot(131)
plt.imshow(amplitude_to_db(S), cmap='magma', aspect='auto')
plt.ylim([0, S.shape[0]])
plt.subplot(132)
plt.imshow(amplitude_to_db(SInv), cmap='magma', aspect='auto')
plt.ylim([0, S.shape[0]])
ipd.Audio(y, rate=sr)


M = get_mel_filterbank(K, win, sr, min_freq, max_freq, n_bins=20)
MS = M.dot(S)

plt.figure(figsize=(12, 4))
plt.subplot(131)
plt.imshow(amplitude_to_db(M), cmap='magma', aspect='auto')
plt.ylim([0, M.shape[0]])
plt.title("Mel Filterbank ({} x {})".format(*M.shape))
plt.subplot(132)
plt.imshow(amplitude_to_db(S), cmap='magma', aspect='auto')
plt.ylim([0, S.shape[0]])
plt.title("Spectrogram in dB ({} x {})".format(*S.shape))
plt.subplot(133)
plt.imshow(amplitude_to_db(MS), cmap='magma', aspect='auto', interpolation='none')
plt.ylim([0, MS.shape[0]])
plt.title("Mel Specrogram ({} x {})".format(*MS.shape))

Text(0.5, 1.0, 'Mel Specrogram (20 x 859)')


SInv = (M.T).dot(MS)
y = invert_MS(M, MS, win, hop)

plt.figure(figsize=(12, 4))
plt.subplot(131)
plt.imshow(amplitude_to_db(S), cmap='magma', aspect='auto')
plt.ylim([0, S.shape[0]])
plt.subplot(132)
plt.imshow(amplitude_to_db(SInv), cmap='magma', aspect='auto')
plt.ylim([0, S.shape[0]])
ipd.Audio(y, rate=sr)

Sonifying Mel Spectrograms¶

Chris Tralie¶

40 bins in the Mel-Spectrogram¶

20 Bins in mel Spectrogram¶