1+ # -*- coding: utf-8 -*-
2+ """
3+ Created on Sun Aug 27 14:58:11 2017
4+
5+ @author: hp pc
6+ """
7+
8+ import scipy .io as sio
9+ import random
10+ import numpy as np
11+ import matplotlib .pyplot as plt
12+ import pandas as pd
13+ import copy
14+
15+ a = sio .loadmat ('data.mat' )
16+ b = a ['h' ]
17+
18+ df = pd .DataFrame (b ,columns = list ('xy' ))
19+ colmap = {1 : 'r' , 2 : 'g' , 3 : 'b' , 4 : 'm' , 5 : 'y' , 6 : 'k' }
20+
21+ euc_error = pd .DataFrame ({'2' : [0 ],'3' : [0 ],'4' : [0 ],'5' : [0 ],'6' : [0 ]})
22+ cos_error = pd .DataFrame ({'2' : [0 ],'3' : [0 ],'4' : [0 ],'5' : [0 ],'6' : [0 ]})
23+
24+ def plotting (df , centroids , colmap ):
25+ # fig = plt.figure(figsize=(5, 5))
26+ plt .scatter (df ['x' ], df ['y' ], color = df ['color' ], alpha = 0.2 , edgecolor = 'k' )
27+ for i in centroids .keys ():
28+ plt .scatter (* centroids [i ], color = colmap [i ])
29+ plt .xlim (- 1 , 5 )
30+ plt .ylim (- 2 , 3 )
31+ plt .show ()
32+
33+
34+ def dist_calc (x1 , x2 , y1 , y2 , n ):
35+ dist = 0
36+ if n == 0 :
37+ dist = ((x1 - x2 )** 2 + (y1 - y2 )** 2 )** 0.5
38+ elif n == 1 :
39+ dist = 1 - ((x1 * x2 + y1 * y2 )/ (np .sqrt ((x1 ** 2 + y1 ** 2 )* (x2 ** 2 + y2 ** 2 ))))
40+ return dist
41+
42+ def init_centroid (k ):
43+ np .random .seed (200 )
44+ centroids = {
45+ i + 1 : [random .uniform (- 0.3 , 4.0 ), random .uniform (- 1.4 , 2.0 )]
46+ for i in range (k )
47+ }
48+ print (centroids )
49+ return centroids
50+
51+ def first_assignment (df , centroids , colmap , dist_type ):
52+ for i in centroids .keys ():
53+ df ['distance_from_{}' .format (i )] = dist_calc (df ['x' ], centroids [i ][0 ], df ['y' ], centroids [i ][1 ], dist_type )
54+
55+ centroid_distance_cols = ['distance_from_{}' .format (i ) for i in centroids .keys ()]
56+ df ['closest' ] = df .loc [:, centroid_distance_cols ].idxmin (axis = 1 )
57+ df ['closest' ] = df ['closest' ].map (lambda x : int (x .lstrip ('distance_from_' )))
58+ df ['color' ] = df ['closest' ].map (lambda x : colmap [x ])
59+ return df
60+
61+ def update_centroid (df , centroids ):
62+ for i in centroids .keys ():
63+ centroids [i ][0 ] = np .mean (df [df ['closest' ] == i ]['x' ])
64+ centroids [i ][1 ] = np .mean (df [df ['closest' ] == i ]['y' ])
65+ return centroids
66+
67+ def clustering (df , k , colmap , dist_type ):
68+ centroids = init_centroid (k )
69+ i = 0
70+ while (1 ):
71+ prev_set = centroids
72+ print (prev_set )
73+ df = first_assignment (df , centroids , colmap , dist_type )
74+ centroids = update_centroid (df , centroids )
75+ print (centroids )
76+ i = i + 1 ;
77+ print ("Iteration" + str (i ))
78+ if prev_set == centroids :
79+ break
80+ return centroids
81+
82+ def error_calc (df ):
83+ e_euc = []
84+ e_cos = []
85+ k = [2 ,3 ,4 ,5 ,6 ]
86+
87+ for i in range (2 ,7 ):
88+ for j in range (len (df .index )):
89+ euc_error ['{}' .format (i )] += df ['distance_from_{}' .format (df ['closest' ][j ])][j ]
90+ for j in range (len (df .index )):
91+ cos_error ['{}' .format (i )] += df ['distance_from_{}' .format (df ['closest' ][j ])][j ]
92+ e_euc .append (euc_error ['{}' .format (i )])
93+ print (euc_error ['{}' .format (i )])
94+ e_cos .append (cos_error ['{}' .format (i )])
95+ print (cos_error ['{}' .format (i )])
96+ plt .plot (k ,e_euc )
97+ plt .plot (k ,e_cos )
98+ plt .xlabel ('Number of Clusters' )
99+ plt .ylabel ('Error' )
100+ plt .title ('Error variation with number of clusters' )
101+ return df
102+
103+ def k_means (df , k , colmap , dist_type ):
104+ centroids = clustering (df , k , colmap , dist_type )
105+ plotting (df , centroids , colmap )
106+
107+
108+ k = 4
109+ dist_type = 1
110+
111+ k_means (df , k , colmap , dist_type )
112+
113+ #df = error_calc(df)
114+
115+
0 commit comments