Skip to content

Commit aa48612

Browse files
committed
PCA and LDA
1 parent 51a9e14 commit aa48612

File tree

4 files changed

+390
-5
lines changed

4 files changed

+390
-5
lines changed

DimensionalityReduction/LDA/LDA.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Tyler Phillips
2+
# CSCI57300 Data Mining
3+
# Linear Discriminant Analysis (LDA)
4+
5+
import numpy as np
6+
import sys
7+
8+
# LDA function
9+
# Args:
10+
# D - nxd data matrix (last column is class labels)
11+
# num_components - parameter to set number of ouput dimensions
12+
def LDA(D, num_components):
13+
# Get class information
14+
Y = D[:,-1]
15+
c = np.unique(Y).shape[0]
16+
17+
D = D[:,:-1]
18+
n, d = D.shape
19+
20+
# Get class subsets
21+
C = []
22+
for i in range(c):
23+
C.append(D[Y == i])
24+
25+
# Get class means
26+
mu_c = np.zeros((c,d))
27+
for i in range(c):
28+
mu_c[i,:] = np.sum(C[i],axis=0) / len(C[i])
29+
30+
# Get mean of class means
31+
mu = np.sum(mu_c,axis=0) / c
32+
33+
# Get between class scatter
34+
B = np.zeros((d,d))
35+
for i in range(c):
36+
B = B + np.outer((mu_c[i,:] - mu).T,(mu_c[i,:] - mu))
37+
B = B / c
38+
39+
# Center class matricies
40+
for i in range(c):
41+
C[i] = C[i] - mu_c[i,:]
42+
43+
# Get class scatter matricies
44+
W = []
45+
for i in range(c):
46+
Z_i = C[i].T @ C[i]
47+
W.append(Z_i)
48+
49+
# Compute within class scatter matrix
50+
S = np.zeros((d,d))
51+
for i in range(c):
52+
S = S + W[i]
53+
54+
# Compute eigenvectors and values of S^-1 @ B
55+
w, v = np.linalg.eig(np.linalg.inv(S) @ B)
56+
57+
# Sort descending eigenvalues and respective eigenvectors
58+
idx = np.argsort(w)[::-1]
59+
w = w[idx]
60+
v = v[:,idx]
61+
62+
# Get num_components dominant eigenvalues and vectors
63+
w = w[:num_components]
64+
v = v[:,:num_components]
65+
66+
# Project data into eigenvector basis subspace
67+
A = D @ v
68+
69+
return A
70+
71+
72+
# Get the arguments list
73+
argv = str(sys.argv)
74+
print(str(argv))
75+
76+
# Get number of arguments
77+
argc = len(sys.argv)
78+
79+
# Print error if not enough arguments
80+
if argc < 3:
81+
sys.exit("Datafile and k arguments are required!")
82+
83+
# Read in D data matrix
84+
D = np.loadtxt(sys.argv[1],delimiter=',')
85+
86+
# Read in num_components
87+
num_components = int(sys.argv[2])
88+
89+
A = LDA(D, num_components)
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
5.1,3.5,1.4,0.2,0
2+
4.9,3.0,1.4,0.2,0
3+
4.7,3.2,1.3,0.2,0
4+
4.6,3.1,1.5,0.2,0
5+
5.0,3.6,1.4,0.2,0
6+
5.4,3.9,1.7,0.4,0
7+
4.6,3.4,1.4,0.3,0
8+
5.0,3.4,1.5,0.2,0
9+
4.4,2.9,1.4,0.2,0
10+
4.9,3.1,1.5,0.1,0
11+
5.4,3.7,1.5,0.2,0
12+
4.8,3.4,1.6,0.2,0
13+
4.8,3.0,1.4,0.1,0
14+
4.3,3.0,1.1,0.1,0
15+
5.8,4.0,1.2,0.2,0
16+
5.7,4.4,1.5,0.4,0
17+
5.4,3.9,1.3,0.4,0
18+
5.1,3.5,1.4,0.3,0
19+
5.7,3.8,1.7,0.3,0
20+
5.1,3.8,1.5,0.3,0
21+
5.4,3.4,1.7,0.2,0
22+
5.1,3.7,1.5,0.4,0
23+
4.6,3.6,1.0,0.2,0
24+
5.1,3.3,1.7,0.5,0
25+
4.8,3.4,1.9,0.2,0
26+
5.0,3.0,1.6,0.2,0
27+
5.0,3.4,1.6,0.4,0
28+
5.2,3.5,1.5,0.2,0
29+
5.2,3.4,1.4,0.2,0
30+
4.7,3.2,1.6,0.2,0
31+
4.8,3.1,1.6,0.2,0
32+
5.4,3.4,1.5,0.4,0
33+
5.2,4.1,1.5,0.1,0
34+
5.5,4.2,1.4,0.2,0
35+
4.9,3.1,1.5,0.1,0
36+
5.0,3.2,1.2,0.2,0
37+
5.5,3.5,1.3,0.2,0
38+
4.9,3.1,1.5,0.1,0
39+
4.4,3.0,1.3,0.2,0
40+
5.1,3.4,1.5,0.2,0
41+
5.0,3.5,1.3,0.3,0
42+
4.5,2.3,1.3,0.3,0
43+
4.4,3.2,1.3,0.2,0
44+
5.0,3.5,1.6,0.6,0
45+
5.1,3.8,1.9,0.4,0
46+
4.8,3.0,1.4,0.3,0
47+
5.1,3.8,1.6,0.2,0
48+
4.6,3.2,1.4,0.2,0
49+
5.3,3.7,1.5,0.2,0
50+
5.0,3.3,1.4,0.2,0
51+
7.0,3.2,4.7,1.4,1
52+
6.4,3.2,4.5,1.5,1
53+
6.9,3.1,4.9,1.5,1
54+
5.5,2.3,4.0,1.3,1
55+
6.5,2.8,4.6,1.5,1
56+
5.7,2.8,4.5,1.3,1
57+
6.3,3.3,4.7,1.6,1
58+
4.9,2.4,3.3,1.0,1
59+
6.6,2.9,4.6,1.3,1
60+
5.2,2.7,3.9,1.4,1
61+
5.0,2.0,3.5,1.0,1
62+
5.9,3.0,4.2,1.5,1
63+
6.0,2.2,4.0,1.0,1
64+
6.1,2.9,4.7,1.4,1
65+
5.6,2.9,3.6,1.3,1
66+
6.7,3.1,4.4,1.4,1
67+
5.6,3.0,4.5,1.5,1
68+
5.8,2.7,4.1,1.0,1
69+
6.2,2.2,4.5,1.5,1
70+
5.6,2.5,3.9,1.1,1
71+
5.9,3.2,4.8,1.8,1
72+
6.1,2.8,4.0,1.3,1
73+
6.3,2.5,4.9,1.5,1
74+
6.1,2.8,4.7,1.2,1
75+
6.4,2.9,4.3,1.3,1
76+
6.6,3.0,4.4,1.4,1
77+
6.8,2.8,4.8,1.4,1
78+
6.7,3.0,5.0,1.7,1
79+
6.0,2.9,4.5,1.5,1
80+
5.7,2.6,3.5,1.0,1
81+
5.5,2.4,3.8,1.1,1
82+
5.5,2.4,3.7,1.0,1
83+
5.8,2.7,3.9,1.2,1
84+
6.0,2.7,5.1,1.6,1
85+
5.4,3.0,4.5,1.5,1
86+
6.0,3.4,4.5,1.6,1
87+
6.7,3.1,4.7,1.5,1
88+
6.3,2.3,4.4,1.3,1
89+
5.6,3.0,4.1,1.3,1
90+
5.5,2.5,4.0,1.3,1
91+
5.5,2.6,4.4,1.2,1
92+
6.1,3.0,4.6,1.4,1
93+
5.8,2.6,4.0,1.2,1
94+
5.0,2.3,3.3,1.0,1
95+
5.6,2.7,4.2,1.3,1
96+
5.7,3.0,4.2,1.2,1
97+
5.7,2.9,4.2,1.3,1
98+
6.2,2.9,4.3,1.3,1
99+
5.1,2.5,3.0,1.1,1
100+
5.7,2.8,4.1,1.3,1
101+
6.3,3.3,6.0,2.5,2
102+
5.8,2.7,5.1,1.9,2
103+
7.1,3.0,5.9,2.1,2
104+
6.3,2.9,5.6,1.8,2
105+
6.5,3.0,5.8,2.2,2
106+
7.6,3.0,6.6,2.1,2
107+
4.9,2.5,4.5,1.7,2
108+
7.3,2.9,6.3,1.8,2
109+
6.7,2.5,5.8,1.8,2
110+
7.2,3.6,6.1,2.5,2
111+
6.5,3.2,5.1,2.0,2
112+
6.4,2.7,5.3,1.9,2
113+
6.8,3.0,5.5,2.1,2
114+
5.7,2.5,5.0,2.0,2
115+
5.8,2.8,5.1,2.4,2
116+
6.4,3.2,5.3,2.3,2
117+
6.5,3.0,5.5,1.8,2
118+
7.7,3.8,6.7,2.2,2
119+
7.7,2.6,6.9,2.3,2
120+
6.0,2.2,5.0,1.5,2
121+
6.9,3.2,5.7,2.3,2
122+
5.6,2.8,4.9,2.0,2
123+
7.7,2.8,6.7,2.0,2
124+
6.3,2.7,4.9,1.8,2
125+
6.7,3.3,5.7,2.1,2
126+
7.2,3.2,6.0,1.8,2
127+
6.2,2.8,4.8,1.8,2
128+
6.1,3.0,4.9,1.8,2
129+
6.4,2.8,5.6,2.1,2
130+
7.2,3.0,5.8,1.6,2
131+
7.4,2.8,6.1,1.9,2
132+
7.9,3.8,6.4,2.0,2
133+
6.4,2.8,5.6,2.2,2
134+
6.3,2.8,5.1,1.5,2
135+
6.1,2.6,5.6,1.4,2
136+
7.7,3.0,6.1,2.3,2
137+
6.3,3.4,5.6,2.4,2
138+
6.4,3.1,5.5,1.8,2
139+
6.0,3.0,4.8,1.8,2
140+
6.9,3.1,5.4,2.1,2
141+
6.7,3.1,5.6,2.4,2
142+
6.9,3.1,5.1,2.3,2
143+
5.8,2.7,5.1,1.9,2
144+
6.8,3.2,5.9,2.3,2
145+
6.7,3.3,5.7,2.5,2
146+
6.7,3.0,5.2,2.3,2
147+
6.3,2.5,5.0,1.9,2
148+
6.5,3.0,5.2,2.0,2
149+
6.2,3.4,5.4,2.3,2
150+
5.9,3.0,5.1,1.8,2
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
5.1,3.5,0
2+
4.9,3.0,0
3+
4.7,3.2,0
4+
4.6,3.1,0
5+
5.0,3.6,0
6+
5.4,3.9,0
7+
4.6,3.4,0
8+
5.0,3.4,0
9+
4.4,2.9,0
10+
4.9,3.1,0
11+
5.4,3.7,0
12+
4.8,3.4,0
13+
4.8,3.0,0
14+
4.3,3.0,0
15+
5.8,4.0,0
16+
5.7,4.4,0
17+
5.4,3.9,0
18+
5.1,3.5,0
19+
5.7,3.8,0
20+
5.1,3.8,0
21+
5.4,3.4,0
22+
5.1,3.7,0
23+
4.6,3.6,0
24+
5.1,3.3,0
25+
4.8,3.4,0
26+
5.0,3.0,0
27+
5.0,3.4,0
28+
5.2,3.5,0
29+
5.2,3.4,0
30+
4.7,3.2,0
31+
4.8,3.1,0
32+
5.4,3.4,0
33+
5.2,4.1,0
34+
5.5,4.2,0
35+
4.9,3.1,0
36+
5.0,3.2,0
37+
5.5,3.5,0
38+
4.9,3.1,0
39+
4.4,3.0,0
40+
5.1,3.4,0
41+
5.0,3.5,0
42+
4.5,2.3,0
43+
4.4,3.2,0
44+
5.0,3.5,0
45+
5.1,3.8,0
46+
4.8,3.0,0
47+
5.1,3.8,0
48+
4.6,3.2,0
49+
5.3,3.7,0
50+
5.0,3.3,0
51+
7.0,3.2,1
52+
6.4,3.2,1
53+
6.9,3.1,1
54+
5.5,2.3,1
55+
6.5,2.8,1
56+
5.7,2.8,1
57+
6.3,3.3,1
58+
4.9,2.4,1
59+
6.6,2.9,1
60+
5.2,2.7,1
61+
5.0,2.0,1
62+
5.9,3.0,1
63+
6.0,2.2,1
64+
6.1,2.9,1
65+
5.6,2.9,1
66+
6.7,3.1,1
67+
5.6,3.0,1
68+
5.8,2.7,1
69+
6.2,2.2,1
70+
5.6,2.5,1
71+
5.9,3.2,1
72+
6.1,2.8,1
73+
6.3,2.5,1
74+
6.1,2.8,1
75+
6.4,2.9,1
76+
6.6,3.0,1
77+
6.8,2.8,1
78+
6.7,3.0,1
79+
6.0,2.9,1
80+
5.7,2.6,1
81+
5.5,2.4,1
82+
5.5,2.4,1
83+
5.8,2.7,1
84+
6.0,2.7,1
85+
5.4,3.0,1
86+
6.0,3.4,1
87+
6.7,3.1,1
88+
6.3,2.3,1
89+
5.6,3.0,1
90+
5.5,2.5,1
91+
5.5,2.6,1
92+
6.1,3.0,1
93+
5.8,2.6,1
94+
5.0,2.3,1
95+
5.6,2.7,1
96+
5.7,3.0,1
97+
5.7,2.9,1
98+
6.2,2.9,1
99+
5.1,2.5,1
100+
5.7,2.8,1
101+
6.3,3.3,2
102+
5.8,2.7,2
103+
7.1,3.0,2
104+
6.3,2.9,2
105+
6.5,3.0,2
106+
7.6,3.0,2
107+
4.9,2.5,2
108+
7.3,2.9,2
109+
6.7,2.5,2
110+
7.2,3.6,2
111+
6.5,3.2,2
112+
6.4,2.7,2
113+
6.8,3.0,2
114+
5.7,2.5,2
115+
5.8,2.8,2
116+
6.4,3.2,2
117+
6.5,3.0,2
118+
7.7,3.8,2
119+
7.7,2.6,2
120+
6.0,2.2,2
121+
6.9,3.2,2
122+
5.6,2.8,2
123+
7.7,2.8,2
124+
6.3,2.7,2
125+
6.7,3.3,2
126+
7.2,3.2,2
127+
6.2,2.8,2
128+
6.1,3.0,2
129+
6.4,2.8,2
130+
7.2,3.0,2
131+
7.4,2.8,2
132+
7.9,3.8,2
133+
6.4,2.8,2
134+
6.3,2.8,2
135+
6.1,2.6,2
136+
7.7,3.0,2
137+
6.3,3.4,2
138+
6.4,3.1,2
139+
6.0,3.0,2
140+
6.9,3.1,2
141+
6.7,3.1,2
142+
6.9,3.1,2
143+
5.8,2.7,2
144+
6.8,3.2,2
145+
6.7,3.3,2
146+
6.7,3.0,2
147+
6.3,2.5,2
148+
6.5,3.0,2
149+
6.2,3.4,2
150+
5.9,3.0,2

0 commit comments

Comments
 (0)