In [3]:
%matplotlib inline
import seaborn
import numpy, scipy, matplotlib.pyplot as plt, IPython.display as ipd
import librosa, librosa.display
plt.rcParams['figure.figsize'] = (14, 5)

Harmonic-Percussive Source Separation¶

Load two files: one harmonic, and one percussive.

In [24]:
xh, sr_h = librosa.load('audio/prelude_cmaj.wav', duration=7, sr=None)
In [25]:
ipd.Audio(xh, rate=sr)
Out[25]:
In [26]:
xp, sr_p = librosa.load('audio/125_bounce.wav', duration=7, sr=None)
In [27]:
ipd.Audio(xp, rate=sr)
Out[27]:
In [28]:
print len(xh), len(xp)
154350 154350
In [29]:
print sr_h, sr_p
22050 22050

Add the two signals together, and rescale:

In [30]:
x = xh/xh.max() + xp/xp.max()
x = 0.5 * x/x.max()
In [32]:
x.max()
Out[32]:
0.5

Listen to the combined audio signal:

In [34]:
ipd.Audio(x, rate=sr_h)
Out[34]:

Compute the STFT:

In [35]:
X = librosa.stft(x)

Take the log-ampllitude for display purposes:

In [36]:
Xmag = librosa.amplitude_to_db(X)

Display the log-magnitude spectrogram:

In [38]:
librosa.display.specshow(Xmag, sr=sr_h, x_axis='time', y_axis='log')
plt.colorbar()
Out[38]:
<matplotlib.colorbar.Colorbar at 0x1139b14d0>

Perform harmonic-percussive source separation:

In [39]:
H, P = librosa.decompose.hpss(X)

Compute the log-amplitudes of the outputs:

In [41]:
Hmag = librosa.amplitude_to_db(H)
Pmag = librosa.amplitude_to_db(P)

Display each output:

In [42]:
librosa.display.specshow(Hmag, sr=sr_h, x_axis='time', y_axis='log')
plt.colorbar()
Out[42]:
<matplotlib.colorbar.Colorbar at 0x1139c2410>
In [43]:
librosa.display.specshow(Pmag, sr=sr_p, x_axis='time', y_axis='log')
plt.colorbar()
Out[43]:
<matplotlib.colorbar.Colorbar at 0x1134df950>

Transform the harmonic output back to the time domain:

In [44]:
h = librosa.istft(H)

Listen to the harmonic output:

In [45]:
ipd.Audio(h, rate=sr_h)
Out[45]:

Transform the percussive output back to the time domain:

In [46]:
p = librosa.istft(P)

Listen to the percussive output:

In [47]:
ipd.Audio(p, rate=sr_p)
Out[47]: