import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
%matplotlib inline
np.random.seed(999)
print('NumPy version', np.__version__)
print('pandas version', pd.__version__)
num_examples = 10
num_features = 2
# Generate data for the two classes
X = np.concatenate(
[
np.random.normal(0.25,0.2, size=(num_examples,num_features)),
np.random.normal(0.75,0.2, size=(num_examples,num_features))
],
axis=0)
y = np.concatenate([[0]*num_examples, [1]*num_examples], axis=0)
color_map = {
0:'blue',
1:'red'
}
color = [color_map[y_val] for y_val in y]
plt.scatter(X[:,0], X[:,1], color=color)
plt.title('Training data')
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X, y)
# X1 = 0.5, X2 = 0.5
X_test = [0.5,0.5]
plt.scatter(X[:,0], X[:,1], color=color)
plt.scatter(X_test[0], X_test[1], color='black')
plt.title('Training data & new data point')
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()
print('Predicted Class:\n', knn_model.predict([X_test]))
print('Predicted probability distributions over all classes:\n', knn_model.predict_proba([X_test]))
X_test = np.asarray([[0.5,0.5],[0.75,0.6],[0.25,0.3]])
plt.scatter(X[:,0], X[:,1], color=color)
plt.scatter(X_test[:,0], X_test[:,1], color='black')
plt.title('Training data & new data point')
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()
print('Predicted classes:\n', knn_model.predict(X_test))
print('\nPredicted probability distributions over all classes:\n', knn_model.predict_proba(X_test))