Audio Onsets And Sonification

Chris Tralie

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa
import librosa.display
x, sr = librosa.load("beatles.wav")
hop_length=512
win = 4096
S = librosa.stft(x, hop_length=hop_length, n_fft=win)
S = np.abs(S)
Sdb = librosa.amplitude_to_db(S,ref=np.max)
ipd.Audio(x, rate=sr)
Out[1]:

We actually only want to pick up on frequencies that increase in amplitude, so we zero out all of the frequency indices that decrease in value so they're not included in the sum. This leads to a much clearer audio novelty function

In [2]:
M = Sdb.shape[0] # How many rows I have (frequency indices)
N = Sdb.shape[1] # How many columns I have (time window indices)
novfn = np.zeros(N-1) # Pre-allocate space to hold some kind of difference between columns
times = np.arange(N-1)*hop_length/sr
diff = Sdb[:, 1::] - Sdb[:, 0:-1]
diff[diff < 0] = 0 # Cut out the differences that are less than 0
novfn = np.sum(diff, axis=0)


plt.figure(figsize=(10, 15))
plt.subplot(311)
plt.imshow(Sdb, aspect='auto', extent=(0, hop_length*S.shape[1]/sr, sr/2, 0))
plt.colorbar()
plt.gca().invert_yaxis()
plt.xlabel("Time (Sec)")
plt.ylabel("Frequency (hz)")
plt.subplot(312)
plt.imshow(diff, aspect='auto', extent=(0, hop_length*S.shape[1]/sr, sr/2, 0))
plt.colorbar()
plt.gca().invert_yaxis()
plt.xlabel("Time (Sec)")
plt.ylabel("Frequency (hz)")
plt.subplot(313)
plt.plot(times, novfn)
plt.xlabel("Time (Seconds)")
Out[2]:
Text(0.5, 0, 'Time (Seconds)')

We can "sonify" the audio novelty function by using it to shape the amplitude of noise so we can hear what it actually picks up on

In [3]:
def sonify_novfn(novfn, hop_length):
    x = np.random.randn(len(novfn)*hop_length)
    for i in range(len(novfn)):
        x[i*hop_length:(i+1)*hop_length] *= novfn[i]
    return x
In [4]:
y = sonify_novfn(novfn, hop_length)
ipd.Audio(y, rate=sr)
Out[4]:
In [ ]: