Audio Novelty Functions

Chris Tralie

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa
import librosa.display
x, sr = librosa.load("beatles.wav")
hop_length=512
win = 4096
S = librosa.stft(x, hop_length=hop_length, n_fft=win)
S = np.abs(S)
Sdb = librosa.amplitude_to_db(S,ref=np.max)

plt.figure(figsize=(10, 5))
plt.imshow(Sdb, aspect='auto', extent=(0, hop_length*S.shape[1]/sr, sr/2, 0))
plt.gca().invert_yaxis()
plt.xlabel("Time (Sec)")
plt.ylabel("Frequency (hz)")

ipd.Audio(x, rate=sr)
Out[2]:

Version 1: Pure Python Version with Nested Loops

In [5]:
M = Sdb.shape[0] # How many rows I have (frequency indices)
N = Sdb.shape[1] # How many columns I have (time window indices)
novfn = np.zeros(N-1) # Pre-allocate space to hold some kind of difference between columns
times = np.arange(N-1)*hop_length/sr
for j in range(N-1):
    acc = 0
    for i in range(M):
        acc += np.abs(Sdb[i, j+1] - Sdb[i, j])
    novfn[j] = acc
plt.figure(figsize=(8, 4))
plt.plot(times, novfn)
plt.xlabel("Time (Seconds)")
Out[5]:
Text(0.5, 0, 'Time (Seconds)')

Version 2: Using a column slice to eliminate the inner loop

In [6]:
M = Sdb.shape[0] # How many rows I have (frequency indices)
N = Sdb.shape[1] # How many columns I have (time window indices)
novfn = np.zeros(N-1) # Pre-allocate space to hold some kind of difference between columns
times = np.arange(N-1)*hop_length/sr
for j in range(N-1):
    novfn[j] = np.sum(np.abs(Sdb[:, j+1] - Sdb[:, j]))
plt.figure(figsize=(8, 4))
plt.plot(times, novfn)
plt.xlabel("Time (Seconds)")
Out[6]:
Text(0.5, 0, 'Time (Seconds)')

Version 3: Eliminate all loops with 2D slices and sums

In [14]:
M = Sdb.shape[0] # How many rows I have (frequency indices)
N = Sdb.shape[1] # How many columns I have (time window indices)
novfn = np.zeros(N-1) # Pre-allocate space to hold some kind of difference between columns
times = np.arange(N-1)*hop_length/sr
diff = np.abs(Sdb[:, 1::] - Sdb[:, 0:-1])
novfn = np.sum(diff, axis=0)


plt.figure(figsize=(10, 15))
plt.subplot(311)
plt.imshow(Sdb, aspect='auto', extent=(0, hop_length*S.shape[1]/sr, sr/2, 0))
plt.colorbar()
plt.gca().invert_yaxis()
plt.xlabel("Time (Sec)")
plt.ylabel("Frequency (hz)")
plt.subplot(312)
plt.imshow(diff, aspect='auto', extent=(0, hop_length*S.shape[1]/sr, sr/2, 0))
plt.colorbar()
plt.gca().invert_yaxis()
plt.xlabel("Time (Sec)")
plt.ylabel("Frequency (hz)")
plt.subplot(313)
plt.plot(times, novfn)
plt.xlabel("Time (Seconds)")
Out[14]:
Text(0.5, 0, 'Time (Seconds)')
In [ ]: