%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
fruits = pd.read_table('/home/Fruits/data.txt')fruits.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| fruit_label | fruit_name | mass | width | height | |
|---|---|---|---|---|---|
| 0 | 1 | apple | 192 | 8.4 | 7.3 |
| 1 | 1 | apple | 180 | 8.0 | 6.8 |
| 2 | 1 | apple | 176 | 7.4 | 7.2 |
| 3 | 2 | Mandarin | 86 | 6.2 | 4.7 |
| 4 | 2 | Mandarin | 84 | 6.0 | 4.6 |
look_up_fruit_name = dict(zip(fruits.fruit_label.unique(), fruits.fruit_name.unique()))
look_up_fruit_name {1: 'apple',
2: 'Mandarin',
3: 'Braeburn',
4: 'Golden',
5: 'Cripps',
6: 'Lane',
7: 'Morrisons',
8: 'Turkey',
9: 'Belsan',
10: 'lemons'}
X = fruits[['mass', 'width', 'height']]
y = fruits['fruit_label']X_train, X_test, y_train, y_test = train_test_split(X, y)from sklearn.neighbors import KNeighborsClassifierknn = KNeighborsClassifier(n_neighbors=5)knn.fit(X_train, y_train)KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=1, n_neighbors=5, p=2,
weights='uniform')
knn.score(X_test, y_test)0.4666666666666667
fruit_prediction = knn.predict([[20, 4.5, 5.5]])
print(fruit_prediction[0])
look_up_fruit_name[fruit_prediction[0]]2
'Mandarin'
fruit_prediction = knn.predict([[100, 6.5, 8.5]])
print(fruit_prediction[0])
look_up_fruit_name[fruit_prediction[0]]10
'lemons'
from sklearn.metrics import classification_report
pred = knn.predict(X_test)
print(classification_report(y_test, pred)) precision recall f1-score support
1 0.00 0.00 0.00 1
2 1.00 1.00 1.00 2
3 0.00 0.00 0.00 1
4 0.00 0.00 0.00 1
5 0.00 0.00 0.00 1
7 0.00 0.00 0.00 2
8 1.00 0.33 0.50 3
9 0.00 0.00 0.00 0
10 1.00 1.00 1.00 4
avg / total 0.60 0.47 0.50 15
/opt/conda/lib/python3.6/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
'precision', 'predicted', average, warn_for)
/opt/conda/lib/python3.6/site-packages/sklearn/metrics/classification.py:1137: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples.
'recall', 'true', average, warn_for)
k_range = range(1,20)
score = []
for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
score.append(knn.score(X_test, y_test))
plt.figure()
plt.xlabel('k')
plt.ylabel('accuracy')
plt.scatter(k_range, score)
plt.xticks([0, 5, 10, 15, 20])<IPython.core.display.Javascript object>
([<matplotlib.axis.XTick at 0x7f242d31f6a0>,
<matplotlib.axis.XTick at 0x7f242d38ef98>,
<matplotlib.axis.XTick at 0x7f242d38ee80>,
<matplotlib.axis.XTick at 0x7f242d33ca58>,
<matplotlib.axis.XTick at 0x7f242d33cf28>],
<a list of 5 Text xticklabel objects>)