Skip to content

Commit d2abe2e

Browse files
authored
Add files via upload
1 parent 8084479 commit d2abe2e

File tree

1 file changed

+115
-0
lines changed

1 file changed

+115
-0
lines changed
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sun Aug 27 14:58:11 2017
4+
5+
@author: hp pc
6+
"""
7+
8+
import scipy.io as sio
9+
import random
10+
import numpy as np
11+
import matplotlib.pyplot as plt
12+
import pandas as pd
13+
import copy
14+
15+
a = sio.loadmat('data.mat')
16+
b = a['h']
17+
18+
df = pd.DataFrame(b,columns=list('xy'))
19+
colmap = {1: 'r', 2: 'g', 3: 'b', 4: 'm', 5: 'y', 6: 'k'}
20+
21+
euc_error = pd.DataFrame({'2': [0],'3': [0],'4': [0],'5': [0],'6': [0]})
22+
cos_error = pd.DataFrame({'2': [0],'3': [0],'4': [0],'5': [0],'6': [0]})
23+
24+
def plotting(df, centroids, colmap):
25+
# fig = plt.figure(figsize=(5, 5))
26+
plt.scatter(df['x'], df['y'], color=df['color'], alpha=0.2, edgecolor='k')
27+
for i in centroids.keys():
28+
plt.scatter(*centroids[i], color=colmap[i])
29+
plt.xlim(-1, 5)
30+
plt.ylim(-2, 3)
31+
plt.show()
32+
33+
34+
def dist_calc(x1, x2, y1, y2, n):
35+
dist = 0
36+
if n == 0:
37+
dist = ((x1-x2)**2 + (y1-y2)**2)**0.5
38+
elif n ==1:
39+
dist = 1 - ((x1*x2 + y1*y2)/(np.sqrt((x1**2 + y1**2)*(x2**2 + y2**2))))
40+
return dist
41+
42+
def init_centroid(k):
43+
np.random.seed(200)
44+
centroids = {
45+
i+1: [random.uniform(-0.3, 4.0), random.uniform(-1.4, 2.0)]
46+
for i in range(k)
47+
}
48+
print(centroids)
49+
return centroids
50+
51+
def first_assignment(df, centroids, colmap, dist_type):
52+
for i in centroids.keys():
53+
df['distance_from_{}'.format(i)] = dist_calc(df['x'], centroids[i][0], df['y'], centroids[i][1], dist_type)
54+
55+
centroid_distance_cols = ['distance_from_{}'.format(i) for i in centroids.keys()]
56+
df['closest'] = df.loc[:, centroid_distance_cols].idxmin(axis=1)
57+
df['closest'] = df['closest'].map(lambda x: int(x.lstrip('distance_from_')))
58+
df['color'] = df['closest'].map(lambda x: colmap[x])
59+
return df
60+
61+
def update_centroid(df, centroids):
62+
for i in centroids.keys():
63+
centroids[i][0] = np.mean(df[df['closest'] == i]['x'])
64+
centroids[i][1] = np.mean(df[df['closest'] == i]['y'])
65+
return centroids
66+
67+
def clustering(df, k, colmap, dist_type):
68+
centroids = init_centroid(k)
69+
i = 0
70+
while(1):
71+
prev_set = centroids
72+
print(prev_set)
73+
df = first_assignment(df, centroids, colmap, dist_type)
74+
centroids = update_centroid(df, centroids)
75+
print(centroids)
76+
i=i+1;
77+
print("Iteration"+str(i))
78+
if prev_set==centroids:
79+
break
80+
return centroids
81+
82+
def error_calc(df):
83+
e_euc =[]
84+
e_cos =[]
85+
k = [2,3,4,5,6]
86+
87+
for i in range(2,7):
88+
for j in range(len(df.index)):
89+
euc_error['{}'.format(i)] += df['distance_from_{}'.format(df['closest'][j])][j]
90+
for j in range(len(df.index)):
91+
cos_error['{}'.format(i)] += df['distance_from_{}'.format(df['closest'][j])][j]
92+
e_euc.append(euc_error['{}'.format(i)])
93+
print(euc_error['{}'.format(i)])
94+
e_cos.append(cos_error['{}'.format(i)])
95+
print(cos_error['{}'.format(i)])
96+
plt.plot(k,e_euc)
97+
plt.plot(k,e_cos)
98+
plt.xlabel('Number of Clusters')
99+
plt.ylabel('Error')
100+
plt.title('Error variation with number of clusters')
101+
return df
102+
103+
def k_means(df, k, colmap, dist_type):
104+
centroids = clustering(df, k, colmap, dist_type)
105+
plotting(df, centroids, colmap)
106+
107+
108+
k = 4
109+
dist_type = 1
110+
111+
k_means(df, k, colmap, dist_type)
112+
113+
#df = error_calc(df)
114+
115+

0 commit comments

Comments
 (0)