import librosa
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd


y, sr = librosa.load("voice.wav")
ipd.Audio(y, rate=sr)


hop_length = 512
win_length = 2048
S = np.abs(librosa.stft(y, hop_length=hop_length, win_length=win_length))

maxfreqs, voiced_flag, voiced_probs = librosa.pyin(y, fmin=80, fmax=2000, sr=sr, hop_length=hop_length)
times = np.arange(len(maxfreqs))*hop_length/sr

plt.imshow(librosa.amplitude_to_db(S,ref=np.max), cmap='magma', extent=(times[0], times[-1], sr//2, 0), aspect='auto')
plt.ylim([0, 5000])
plt.plot(times, maxfreqs, linewidth=2, color='cyan')
plt.xlabel("Time (Sec)")
plt.ylabel("Frequency (hz)")

Text(0, 0.5, 'Frequency (hz)')


def sonify_f0(maxfreqs, hop_length, sr):
    N = len(maxfreqs)
    df = np.zeros(N*hop_length) # Instantaneous frequency
    t = np.arange(hop_length)/sr
    ## TODO: Fill in df
    for i in range(len(maxfreqs)):
        if not np.isnan(maxfreqs[i]):
            df[i*hop_length:(i+1)*hop_length] = maxfreqs[i]
    
    # Integrate instantaneous frequency
    f = np.cumsum(df)/sr
    x = np.cos(2*np.pi*f)
    x = np.sign(x)*np.abs(x)**0.5
    return x

x = sonify_f0(maxfreqs, hop_length, sr)
ipd.Audio(x, rate=sr)


## TODO: Fill this in
p = 12*np.log2(maxfreqs/440)
p = np.round(p) # Round each note number to the nearest integer
maxfreqs_round = 440*(2**(p/12))

plt.plot(maxfreqs)
plt.plot(maxfreqs_round)
plt.legend(["Original f0", "Rounded f0"])

x = sonify_f0(maxfreqs_round, hop_length, sr)
ipd.Audio(x, rate=sr)


from spectrogramtools import *

def do_autotune(S, maxfreqs, target):
    ratios = target/maxfreqs
    ratios[np.isnan(ratios)] = 1
    S2 = np.zeros_like(S)
    freqs = np.arange(S.shape[0])
    for j in range(S.shape[1]):
        new_freqs = freqs/ratios[j]
        sj = np.abs(S[:, j])
        S2[:, j] = np.interp(new_freqs, freqs, sj)
    return griffinLimInverse(S2, win_length, hop_length)

yshift = do_autotune(S, maxfreqs, maxfreqs_round)
ipd.Audio(yshift, rate=sr)

Iteration 1 of 10
Iteration 2 of 10
Iteration 3 of 10
Iteration 4 of 10
Iteration 5 of 10
Iteration 6 of 10
Iteration 7 of 10
Iteration 8 of 10
Iteration 9 of 10
Iteration 10 of 10


target = 440*np.ones(len(maxfreqs))
yshift = do_autotune(S, maxfreqs, target)
ipd.Audio(yshift, rate=sr)

Iteration 1 of 10
Iteration 2 of 10
Iteration 3 of 10
Iteration 4 of 10
Iteration 5 of 10
Iteration 6 of 10
Iteration 7 of 10
Iteration 8 of 10
Iteration 9 of 10
Iteration 10 of 10


target = 330*np.ones(len(maxfreqs))
yshift = do_autotune(S, maxfreqs, target)
ipd.Audio(yshift, rate=sr)

Iteration 1 of 10
Iteration 2 of 10
Iteration 3 of 10
Iteration 4 of 10
Iteration 5 of 10
Iteration 6 of 10
Iteration 7 of 10
Iteration 8 of 10
Iteration 9 of 10
Iteration 10 of 10

Loading / pyYin¶

Sonifying Instantaneous Frequency¶

Rounding Frequency To Nearest Note, Sonifying Again¶

$f = 440 x 2^{p/12}$¶

$ (f/440) = 2^{p/12}$¶

$ \log_2(f/440) = p / 12$¶

$ p = 12 \log_2(f/440) $¶

Autotuning¶