-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathUnsupervised-Learning.py
More file actions
50 lines (44 loc) · 1.55 KB
/
Copy pathUnsupervised-Learning.py
File metadata and controls
50 lines (44 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pandas as pd
from sklearn.cluster import KMeans
dataset = pd.read_csv('https://storage.googleapis.com/dqlab-dataset/pythonTutorial/mall_customers.csv')
X = dataset[['annual_income','spending_score']]
cluster_model = KMeans(n_clusters = 5, random_state = 24)
labels = cluster_model.fit_predict(X)
#import library
import matplotlib.pyplot as plt
#convert dataframe to array
X = X.values
#Separate X to xs and ys --> use for chart axis
xs = X[:,0]
ys = X[:,1]
# Make a scatter plot of xs and ys, using labels to define the colors
plt.scatter(xs, ys, c=labels, alpha = 0.5)
# Assign the cluster centers: centroids
centroids = cluster_model.cluster_centers_
# Assign the columns of centroids: centroids_x, centroids_y
centroids_x = centroids[:,0]
centroids_y = centroids[:,1]
# Make a scatter plot of centroids_x and centroids_y
plt.scatter(centroids_x, centroids_y, marker='D', s=50)
plt.title('K Means Clustering', fontsize = 20)
plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.show()
#Elbow Method - Inertia plot
inertia = []
#looping the inertia calculation for each k
for k in range (1,10):
#Assign KMeans as cluster_model
cluster_model = KMeans(n_clusters = k, random_state=24)
#Fit the model to X
cluster_model.fit(X)
#Get the inertia value
inertia_value = cluster_model.inertia_
#Append the inertia value to inertia list
inertia.append(inertia_value)
#Inertia plot
plt.plot(range(1,10), inertia)
plt.title('The Elbow Method - Inertia plot', fontsize=20)
plt.xlabel('No. of Cluesters')
plt.ylabel('Inertia')
plt.show()