import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
dataset_customers=pd.read_csv('/content/Mall_Customers.csv')
print(dataset_customers.head())
X=dataset_customers.iloc[:,[3,4]].values #only need last two columns
min_dis_sum=[]
for i in range(1,11):
model= KMeans(n_clusters=i,init='k-means++',random_state=23)
model.fit(X)
min_dis_sum.append(model.inertia_)
plt.plot(range(1,11),min_dis_sum)
plt.title('The Elbow point graph')
plt.xlabel('Number of clusters')
plt.ylabel('Within Clusters Sum of squares ')
plt.show()
model=KMeans(n_clusters=5,init='k-means++',random_state=0)
Y=model.fit_predict(X) #return a label for each data point on their cluster
plt.figure(figsize=(8,8))
plt.scatter(X[Y==0,0],X[Y==0,1],s=50,c='green',label='cluster 1')
plt.scatter(X[Y==1,0],X[Y==1,1],s=50,c='yellow',label='cluster 2')
plt.scatter(X[Y==2,0],X[Y==2,1],s=50,c='cyan',label='cluster 3')
plt.scatter(X[Y==3,0],X[Y==3,1],s=50,c='violet',label='cluster 4')
plt.scatter(X[Y==4,0],X[Y==4,1],s=50,c='blue',label='cluster 5')
plt.scatter(model.cluster_centers_[:,0],model.cluster_centers_[:,1],s=100,c='red',label='Centroids')
plt.title('Customer Groups')
plt.xlabel('Annual Income')
plt.ylabel('Spending score')
plt.show()