import numpy, scipy, matplotlib.pyplot as plt, sklearn, urllib, stanford_mir, IPython.display
%matplotlib inline
plt.rcParams['figure.figsize'] = (14, 5)
Download a file:
filename = '125_bounce.wav'
url = 'http://audio.musicinformationretrieval.com/'
urllib.urlretrieve(url + filename, filename=filename)
Load a file:
x, fs = librosa.load(filename)
Listen to the signal:
IPython.display.Audio(x, rate=fs)
Compute some features:
X = librosa.feature.mfcc(x, sr=fs)
print X.shape
Scale the features to have zero mean and unit variance:
X = sklearn.preprocessing.scale(X)
X.mean()
Create a PCA model object.
model = sklearn.decomposition.PCA(n_components=2, whiten=True)
Apply PCA to the scaled features:
model.fit(X.T)
Y = model.transform(X.T)
print Y.shape
Let's see how many principal components were returned:
model.components_.shape
Plot the two top principal components for each data point:
plt.scatter(Y[:,0], Y[:,1])