Skip to content

Commit 870a358

Browse files
committed
Begin work on email dataset loading
1 parent 06836dd commit 870a358

File tree

1 file changed

+44
-4
lines changed

1 file changed

+44
-4
lines changed

part2/experiments.py

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,32 @@
11
#!/usr/bin/env python3
22

33
import argparse
4+
import os
5+
import glob
46
from time import time
57

68
from sklearn import svm
79
from sklearn.linear_model import Perceptron
810

911

1012
def main():
11-
parsedArgs = parse_command_line_arguments()
12-
load_dataset(parsedArgs.path)
13+
parsed_args = parse_command_line_arguments()
14+
15+
data_path = parsed_args.path
16+
training_emails, training_labels = load_dataset(data_path + "/training")
17+
test_emails, test_labels = load_dataset(data_path + "/test")
18+
19+
print("Training Emails data:")
20+
print(training_emails)
21+
print()
22+
print("Training Emails labels:")
23+
print(training_labels)
24+
print()
25+
26+
svm_classifier = train_svm_classifier(training_emails, training_labels)
27+
28+
results = svm_classifier.predict(test_emails)
29+
print(results)
1330

1431

1532
def parse_command_line_arguments():
@@ -22,8 +39,31 @@ def parse_command_line_arguments():
2239
return parser.parse_args()
2340

2441

25-
def load_dataset(dataPath):
26-
print(dataPath)
42+
def load_dataset(data_path):
43+
print("Loading dataset at " + data_path)
44+
45+
email_data = []
46+
email_labels = []
47+
48+
os.chdir(data_path)
49+
for file in os.listdir():
50+
if (file.startswith("sp")):
51+
email_labels.append("spam")
52+
else:
53+
email_labels.append("ham")
54+
55+
print("Loaded " + str(len(email_labels)) + " emails")
56+
57+
return email_data, email_labels
58+
59+
60+
def train_svm_classifier(emails, labels):
61+
classifier = svm.SVC(
62+
gamma='scale',
63+
C=1,
64+
verbose=True)
65+
classifier.fit(emails, labels)
66+
return classifier
2767

2868

2969
if __name__ == "__main__":

0 commit comments

Comments
 (0)