11#!/usr/bin/env python3
22
33import argparse
4+ import os
5+ import glob
46from time import time
57
68from sklearn import svm
79from sklearn .linear_model import Perceptron
810
911
1012def main ():
11- parsedArgs = parse_command_line_arguments ()
12- load_dataset (parsedArgs .path )
13+ parsed_args = parse_command_line_arguments ()
14+
15+ data_path = parsed_args .path
16+ training_emails , training_labels = load_dataset (data_path + "/training" )
17+ test_emails , test_labels = load_dataset (data_path + "/test" )
18+
19+ print ("Training Emails data:" )
20+ print (training_emails )
21+ print ()
22+ print ("Training Emails labels:" )
23+ print (training_labels )
24+ print ()
25+
26+ svm_classifier = train_svm_classifier (training_emails , training_labels )
27+
28+ results = svm_classifier .predict (test_emails )
29+ print (results )
1330
1431
1532def parse_command_line_arguments ():
@@ -22,8 +39,31 @@ def parse_command_line_arguments():
2239 return parser .parse_args ()
2340
2441
25- def load_dataset (dataPath ):
26- print (dataPath )
42+ def load_dataset (data_path ):
43+ print ("Loading dataset at " + data_path )
44+
45+ email_data = []
46+ email_labels = []
47+
48+ os .chdir (data_path )
49+ for file in os .listdir ():
50+ if (file .startswith ("sp" )):
51+ email_labels .append ("spam" )
52+ else :
53+ email_labels .append ("ham" )
54+
55+ print ("Loaded " + str (len (email_labels )) + " emails" )
56+
57+ return email_data , email_labels
58+
59+
60+ def train_svm_classifier (emails , labels ):
61+ classifier = svm .SVC (
62+ gamma = 'scale' ,
63+ C = 1 ,
64+ verbose = True )
65+ classifier .fit (emails , labels )
66+ return classifier
2767
2868
2969if __name__ == "__main__" :
0 commit comments