Skip to content

Commit 3172571

Browse files
committed
Fix
1 parent ebe9707 commit 3172571

File tree

8 files changed

+292
-0
lines changed

8 files changed

+292
-0
lines changed

kMeans/centroidfile_1d.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
1
2+
3
-2.04 KB
Binary file not shown.

kMeans/datafile_1d.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2
2+
3
3+
4
4+
10
5+
11
6+
12
7+
20
8+
25
9+
30
-2 KB
Binary file not shown.

kMeans/iris.data.txt

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
5.1,3.5,1.4,0.2,Iris-setosa
2+
4.9,3.0,1.4,0.2,Iris-setosa
3+
4.7,3.2,1.3,0.2,Iris-setosa
4+
4.6,3.1,1.5,0.2,Iris-setosa
5+
5.0,3.6,1.4,0.2,Iris-setosa
6+
5.4,3.9,1.7,0.4,Iris-setosa
7+
4.6,3.4,1.4,0.3,Iris-setosa
8+
5.0,3.4,1.5,0.2,Iris-setosa
9+
4.4,2.9,1.4,0.2,Iris-setosa
10+
4.9,3.1,1.5,0.1,Iris-setosa
11+
5.4,3.7,1.5,0.2,Iris-setosa
12+
4.8,3.4,1.6,0.2,Iris-setosa
13+
4.8,3.0,1.4,0.1,Iris-setosa
14+
4.3,3.0,1.1,0.1,Iris-setosa
15+
5.8,4.0,1.2,0.2,Iris-setosa
16+
5.7,4.4,1.5,0.4,Iris-setosa
17+
5.4,3.9,1.3,0.4,Iris-setosa
18+
5.1,3.5,1.4,0.3,Iris-setosa
19+
5.7,3.8,1.7,0.3,Iris-setosa
20+
5.1,3.8,1.5,0.3,Iris-setosa
21+
5.4,3.4,1.7,0.2,Iris-setosa
22+
5.1,3.7,1.5,0.4,Iris-setosa
23+
4.6,3.6,1.0,0.2,Iris-setosa
24+
5.1,3.3,1.7,0.5,Iris-setosa
25+
4.8,3.4,1.9,0.2,Iris-setosa
26+
5.0,3.0,1.6,0.2,Iris-setosa
27+
5.0,3.4,1.6,0.4,Iris-setosa
28+
5.2,3.5,1.5,0.2,Iris-setosa
29+
5.2,3.4,1.4,0.2,Iris-setosa
30+
4.7,3.2,1.6,0.2,Iris-setosa
31+
4.8,3.1,1.6,0.2,Iris-setosa
32+
5.4,3.4,1.5,0.4,Iris-setosa
33+
5.2,4.1,1.5,0.1,Iris-setosa
34+
5.5,4.2,1.4,0.2,Iris-setosa
35+
4.9,3.1,1.5,0.1,Iris-setosa
36+
5.0,3.2,1.2,0.2,Iris-setosa
37+
5.5,3.5,1.3,0.2,Iris-setosa
38+
4.9,3.1,1.5,0.1,Iris-setosa
39+
4.4,3.0,1.3,0.2,Iris-setosa
40+
5.1,3.4,1.5,0.2,Iris-setosa
41+
5.0,3.5,1.3,0.3,Iris-setosa
42+
4.5,2.3,1.3,0.3,Iris-setosa
43+
4.4,3.2,1.3,0.2,Iris-setosa
44+
5.0,3.5,1.6,0.6,Iris-setosa
45+
5.1,3.8,1.9,0.4,Iris-setosa
46+
4.8,3.0,1.4,0.3,Iris-setosa
47+
5.1,3.8,1.6,0.2,Iris-setosa
48+
4.6,3.2,1.4,0.2,Iris-setosa
49+
5.3,3.7,1.5,0.2,Iris-setosa
50+
5.0,3.3,1.4,0.2,Iris-setosa
51+
7.0,3.2,4.7,1.4,Iris-versicolor
52+
6.4,3.2,4.5,1.5,Iris-versicolor
53+
6.9,3.1,4.9,1.5,Iris-versicolor
54+
5.5,2.3,4.0,1.3,Iris-versicolor
55+
6.5,2.8,4.6,1.5,Iris-versicolor
56+
5.7,2.8,4.5,1.3,Iris-versicolor
57+
6.3,3.3,4.7,1.6,Iris-versicolor
58+
4.9,2.4,3.3,1.0,Iris-versicolor
59+
6.6,2.9,4.6,1.3,Iris-versicolor
60+
5.2,2.7,3.9,1.4,Iris-versicolor
61+
5.0,2.0,3.5,1.0,Iris-versicolor
62+
5.9,3.0,4.2,1.5,Iris-versicolor
63+
6.0,2.2,4.0,1.0,Iris-versicolor
64+
6.1,2.9,4.7,1.4,Iris-versicolor
65+
5.6,2.9,3.6,1.3,Iris-versicolor
66+
6.7,3.1,4.4,1.4,Iris-versicolor
67+
5.6,3.0,4.5,1.5,Iris-versicolor
68+
5.8,2.7,4.1,1.0,Iris-versicolor
69+
6.2,2.2,4.5,1.5,Iris-versicolor
70+
5.6,2.5,3.9,1.1,Iris-versicolor
71+
5.9,3.2,4.8,1.8,Iris-versicolor
72+
6.1,2.8,4.0,1.3,Iris-versicolor
73+
6.3,2.5,4.9,1.5,Iris-versicolor
74+
6.1,2.8,4.7,1.2,Iris-versicolor
75+
6.4,2.9,4.3,1.3,Iris-versicolor
76+
6.6,3.0,4.4,1.4,Iris-versicolor
77+
6.8,2.8,4.8,1.4,Iris-versicolor
78+
6.7,3.0,5.0,1.7,Iris-versicolor
79+
6.0,2.9,4.5,1.5,Iris-versicolor
80+
5.7,2.6,3.5,1.0,Iris-versicolor
81+
5.5,2.4,3.8,1.1,Iris-versicolor
82+
5.5,2.4,3.7,1.0,Iris-versicolor
83+
5.8,2.7,3.9,1.2,Iris-versicolor
84+
6.0,2.7,5.1,1.6,Iris-versicolor
85+
5.4,3.0,4.5,1.5,Iris-versicolor
86+
6.0,3.4,4.5,1.6,Iris-versicolor
87+
6.7,3.1,4.7,1.5,Iris-versicolor
88+
6.3,2.3,4.4,1.3,Iris-versicolor
89+
5.6,3.0,4.1,1.3,Iris-versicolor
90+
5.5,2.5,4.0,1.3,Iris-versicolor
91+
5.5,2.6,4.4,1.2,Iris-versicolor
92+
6.1,3.0,4.6,1.4,Iris-versicolor
93+
5.8,2.6,4.0,1.2,Iris-versicolor
94+
5.0,2.3,3.3,1.0,Iris-versicolor
95+
5.6,2.7,4.2,1.3,Iris-versicolor
96+
5.7,3.0,4.2,1.2,Iris-versicolor
97+
5.7,2.9,4.2,1.3,Iris-versicolor
98+
6.2,2.9,4.3,1.3,Iris-versicolor
99+
5.1,2.5,3.0,1.1,Iris-versicolor
100+
5.7,2.8,4.1,1.3,Iris-versicolor
101+
6.3,3.3,6.0,2.5,Iris-virginica
102+
5.8,2.7,5.1,1.9,Iris-virginica
103+
7.1,3.0,5.9,2.1,Iris-virginica
104+
6.3,2.9,5.6,1.8,Iris-virginica
105+
6.5,3.0,5.8,2.2,Iris-virginica
106+
7.6,3.0,6.6,2.1,Iris-virginica
107+
4.9,2.5,4.5,1.7,Iris-virginica
108+
7.3,2.9,6.3,1.8,Iris-virginica
109+
6.7,2.5,5.8,1.8,Iris-virginica
110+
7.2,3.6,6.1,2.5,Iris-virginica
111+
6.5,3.2,5.1,2.0,Iris-virginica
112+
6.4,2.7,5.3,1.9,Iris-virginica
113+
6.8,3.0,5.5,2.1,Iris-virginica
114+
5.7,2.5,5.0,2.0,Iris-virginica
115+
5.8,2.8,5.1,2.4,Iris-virginica
116+
6.4,3.2,5.3,2.3,Iris-virginica
117+
6.5,3.0,5.5,1.8,Iris-virginica
118+
7.7,3.8,6.7,2.2,Iris-virginica
119+
7.7,2.6,6.9,2.3,Iris-virginica
120+
6.0,2.2,5.0,1.5,Iris-virginica
121+
6.9,3.2,5.7,2.3,Iris-virginica
122+
5.6,2.8,4.9,2.0,Iris-virginica
123+
7.7,2.8,6.7,2.0,Iris-virginica
124+
6.3,2.7,4.9,1.8,Iris-virginica
125+
6.7,3.3,5.7,2.1,Iris-virginica
126+
7.2,3.2,6.0,1.8,Iris-virginica
127+
6.2,2.8,4.8,1.8,Iris-virginica
128+
6.1,3.0,4.9,1.8,Iris-virginica
129+
6.4,2.8,5.6,2.1,Iris-virginica
130+
7.2,3.0,5.8,1.6,Iris-virginica
131+
7.4,2.8,6.1,1.9,Iris-virginica
132+
7.9,3.8,6.4,2.0,Iris-virginica
133+
6.4,2.8,5.6,2.2,Iris-virginica
134+
6.3,2.8,5.1,1.5,Iris-virginica
135+
6.1,2.6,5.6,1.4,Iris-virginica
136+
7.7,3.0,6.1,2.3,Iris-virginica
137+
6.3,3.4,5.6,2.4,Iris-virginica
138+
6.4,3.1,5.5,1.8,Iris-virginica
139+
6.0,3.0,4.8,1.8,Iris-virginica
140+
6.9,3.1,5.4,2.1,Iris-virginica
141+
6.7,3.1,5.6,2.4,Iris-virginica
142+
6.9,3.1,5.1,2.3,Iris-virginica
143+
5.8,2.7,5.1,1.9,Iris-virginica
144+
6.8,3.2,5.9,2.3,Iris-virginica
145+
6.7,3.3,5.7,2.5,Iris-virginica
146+
6.7,3.0,5.2,2.3,Iris-virginica
147+
6.3,2.5,5.0,1.9,Iris-virginica
148+
6.5,3.0,5.2,2.0,Iris-virginica
149+
6.2,3.4,5.4,2.3,Iris-virginica
150+
5.9,3.0,5.1,1.8,Iris-virginica
-1.98 KB
Binary file not shown.

kMeans/kMeans.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# Tyler Phillips
2+
# CSCI57300 Data Mining
3+
# k-Means
4+
5+
import sys
6+
import numpy as np
7+
8+
# k-Means function
9+
# Args:
10+
# D - nxd data matrix
11+
# mu - kxd centroid matrix
12+
# k - cluster count
13+
# eps - convergence tolarence
14+
def kMeans(D, k, mu, eps=0.0001):
15+
# Get dimensions of nxd D matrix
16+
n, d = D.shape
17+
18+
# If mu is not preset
19+
if np.array_equal(mu,np.zeros((k,d))):
20+
# Randomly intialize k centroids in kxd mu matrix
21+
mu_list = np.random.choice(n,size=k,replace=False)
22+
for i,id in enumerate(mu_list):
23+
mu[i,:] = D[id,:]
24+
25+
# Intialize previous mu matrix
26+
prev_mu = np.zeros((k,d))
27+
28+
# Intialize iteration count
29+
iter = 1
30+
31+
while True:
32+
# Clusters as list of k lists
33+
C = [[] for i in range(k)]
34+
labels = [[] for i in range(k)]
35+
36+
# Cluster assignment step
37+
for i in range(n):
38+
min_dist = sys.float_info.max
39+
min_idx = -1
40+
# Get distances between x_i (D[i,:]) and each centriod in mu
41+
for j in range(k):
42+
dist = np.linalg.norm(D[i,:] - mu[j,:])
43+
if dist < min_dist:
44+
min_dist = dist
45+
min_idx = j
46+
# Add x_i to cluster corresponding to minimum distance
47+
C[min_idx].append(D[i,:])
48+
labels[min_idx].append(i+1)
49+
50+
# Centroid update step
51+
for i in range(k):
52+
# Update centriod mu_i as average of cluster C_i elements
53+
if len(C[i]) > 1:
54+
mu[i,:] = np.sum(C[i], axis=0) / len(C[i])
55+
56+
# Check for convergence
57+
if np.linalg.norm(mu - prev_mu) <= eps:
58+
return C, labels, mu, iter
59+
60+
# Print update
61+
#print("Iteration " + str(iter) + ":")
62+
#for i in range(k):
63+
#print("c_" + str(i) + ":" + str(C[i]) + " mu_" + str(i) + ":" + str(mu[i,:]))
64+
#print("||mu - mu_prev||:" + str(check) + "\n")
65+
66+
# Update iteration count
67+
iter += 1
68+
# Update previous mu
69+
prev_mu = np.copy(mu)
70+
71+
# Sum of squared error helper function
72+
def SSE(C, mu):
73+
sse = 0
74+
for c_i in C:
75+
sse = sse + np.linalg.norm(c_i - mu) ** 2
76+
return sse
77+
78+
79+
80+
81+
82+
# Get the arguments list
83+
argv = str(sys.argv)
84+
print(str(argv))
85+
86+
# Get number of arguments
87+
argc = len(sys.argv)
88+
89+
# Print error if not enough arguments
90+
if argc < 3:
91+
sys.exit("Datafile and k arguments are required!")
92+
93+
# Read in D data matrix
94+
if sys.argv[1] == "iris.data.txt" or sys.argv[1] == "iris.txt":
95+
D = np.loadtxt(sys.argv[1],delimiter=',',usecols=(0,1,2,3))
96+
else:
97+
D = np.loadtxt(sys.argv[1],delimiter=',')
98+
if len(D.shape) < 2:
99+
D = D.reshape((D.shape[0],1))
100+
101+
# Read in k centroid count
102+
k = int(sys.argv[2])
103+
104+
# Read in mu centroid id list if given
105+
mu = np.zeros((k,D.shape[1]))
106+
mu_list = None
107+
if argc > 3:
108+
mu_list = np.loadtxt(sys.argv[3],dtype=int,delimiter=',')
109+
for i,id in enumerate(mu_list):
110+
mu[i,:] = D[id-1,:]
111+
112+
C, labels, mu, iter = kMeans(D, k, mu)
113+
total_sse = 0
114+
115+
# Print input information
116+
n, d = D.shape
117+
print("Number of Datapoints: n=" + str(n))
118+
print("Number of Dimensions: d=" + str(d))
119+
print("Number of Clusters: k=" + str(k))
120+
print("\n")
121+
122+
# Print results
123+
print("Convergence after " + str(iter) + " iterations:")
124+
for i in range(k):
125+
print("-----Cluster " + str(i) + "---------------")
126+
print(str(len(C[i])) + " elements:" + str(labels[i]))
127+
print("mu_" + str(i) + ":" + str(mu[i,:]))
128+
print("SSE_" + str(i) + ":" + str(SSE(C[i],mu[i,:])))
129+
total_sse = total_sse + SSE(C[i],mu[i,:])
130+
print("/n")
131+
print("Total SSE :" + str(total_sse))

kMeans/kMeans.py - Shortcut.lnk

-1.46 KB
Binary file not shown.

0 commit comments

Comments
 (0)