11"""
2- Implementation of sequential minimal optimization (SMO) for support vector machines
3- (SVM).
2+ Sequential minimal optimization (SMO) for support vector machines (SVM)
43
5- Sequential minimal optimization (SMO) is an algorithm for solving the quadratic
6- programming (QP) problem that arises during the training of support vector
7- machines.
8- It was invented by John Platt in 1998.
4+ Sequential minimal optimization (SMO) is an algorithm for solving the quadratic
5+ programming (QP) problem that arises during the training of SVMs. It was invented by
6+ John Platt in 1998.
97
108Input:
119 0: type: numpy.ndarray.
@@ -124,8 +122,7 @@ def fit(self):
124122 b_old = self ._b
125123 self ._b = b
126124
127- # 4: update error value,here we only calculate those non-bound samples'
128- # error
125+ # 4: update error, here we only calculate the error for non-bound samples
129126 self ._unbound = [i for i in self ._all_samples if self ._is_unbound (i )]
130127 for s in self .unbound :
131128 if s in (i1 , i2 ):
@@ -136,7 +133,7 @@ def fit(self):
136133 + (self ._b - b_old )
137134 )
138135
139- # if i1 or i2 is non-bound,update there error value to zero
136+ # if i1 or i2 is non-bound, update their error value to zero
140137 if self ._is_unbound (i1 ):
141138 self ._error [i1 ] = 0
142139 if self ._is_unbound (i2 ):
@@ -161,7 +158,7 @@ def predict(self, test_samples, classify=True):
161158 results .append (result )
162159 return np .array (results )
163160
164- # Check if alpha violate KKT condition
161+ # Check if alpha violates the KKT condition
165162 def _check_obey_kkt (self , index ):
166163 alphas = self .alphas
167164 tol = self ._tol
@@ -172,20 +169,19 @@ def _check_obey_kkt(self, index):
172169
173170 # Get value calculated from kernel function
174171 def _k (self , i1 , i2 ):
175- # for test samples,use Kernel function
172+ # for test samples, use kernel function
176173 if isinstance (i2 , np .ndarray ):
177174 return self .Kernel (self .samples [i1 ], i2 )
178- # for train samples,Kernel values have been saved in matrix
175+ # for training samples, kernel values have been saved in matrix
179176 else :
180177 return self ._K_matrix [i1 , i2 ]
181178
182- # Get sample's error
179+ # Get error for sample
183180 def _e (self , index ):
184181 """
185182 Two cases:
186- 1:Sample[index] is non-bound,Fetch error from list: _error
187- 2:sample[index] is bound,Use predicted value deduct true value: g(xi) - yi
188-
183+ 1: Sample[index] is non-bound, fetch error from list: _error
184+ 2: sample[index] is bound, use predicted value minus true value: g(xi) - yi
189185 """
190186 # get from error data
191187 if self ._is_unbound (index ):
@@ -196,7 +192,7 @@ def _e(self, index):
196192 yi = self .tags [index ]
197193 return gx - yi
198194
199- # Calculate Kernel matrix of all possible i1,i2 , saving time
195+ # Calculate kernel matrix of all possible i1, i2, saving time
200196 def _calculate_k_matrix (self ):
201197 k_matrix = np .zeros ([self .length , self .length ])
202198 for i in self ._all_samples :
@@ -206,7 +202,7 @@ def _calculate_k_matrix(self):
206202 )
207203 return k_matrix
208204
209- # Predict test sample's tag
205+ # Predict tag for test sample
210206 def _predict (self , sample ):
211207 k = self ._k
212208 predicted_value = (
@@ -222,30 +218,31 @@ def _predict(self, sample):
222218
223219 # Choose alpha1 and alpha2
224220 def _choose_alphas (self ):
225- locis = yield from self ._choose_a1 ()
226- if not locis :
221+ loci = yield from self ._choose_a1 ()
222+ if not loci :
227223 return None
228- return locis
224+ return loci
229225
230226 def _choose_a1 (self ):
231227 """
232- Choose first alpha ;steps:
233- 1:First loop over all sample
234- 2:Second loop over all non-bound samples till all non-bound samples does not
235- voilate kkt condition.
236- 3:Repeat this two process endlessly,till all samples does not voilate kkt
237- condition samples after first loop.
228+ Choose first alpha
229+ Steps:
230+ 1: First loop over all samples
231+ 2: Second loop over all non-bound samples until no non-bound samples violate
232+ the KKT condition.
233+ 3: Repeat these two processes until no samples violate the KKT condition
234+ after the first loop.
238235 """
239236 while True :
240237 all_not_obey = True
241238 # all sample
242- print ("scanning all sample !" )
239+ print ("Scanning all samples !" )
243240 for i1 in [i for i in self ._all_samples if self ._check_obey_kkt (i )]:
244241 all_not_obey = False
245242 yield from self ._choose_a2 (i1 )
246243
247244 # non-bound sample
248- print ("scanning non-bound sample !" )
245+ print ("Scanning non-bound samples !" )
249246 while True :
250247 not_obey = True
251248 for i1 in [
@@ -256,20 +253,21 @@ def _choose_a1(self):
256253 not_obey = False
257254 yield from self ._choose_a2 (i1 )
258255 if not_obey :
259- print ("all non-bound samples fit the KKT condition!" )
256+ print ("All non-bound samples satisfy the KKT condition!" )
260257 break
261258 if all_not_obey :
262- print ("all samples fit the KKT condition! Optimization done !" )
259+ print ("All samples satisfy the KKT condition!" )
263260 break
264261 return False
265262
266263 def _choose_a2 (self , i1 ):
267264 """
268- Choose the second alpha by using heuristic algorithm ;steps:
269- 1: Choose alpha2 which gets the maximum step size (|E1 - E2|).
270- 2: Start in a random point,loop over all non-bound samples till alpha1 and
265+ Choose the second alpha using a heuristic algorithm
266+ Steps:
267+ 1: Choose alpha2 that maximizes the step size (|E1 - E2|).
268+ 2: Start in a random point, loop over all non-bound samples till alpha1 and
271269 alpha2 are optimized.
272- 3: Start in a random point,loop over all samples till alpha1 and alpha2 are
270+ 3: Start in a random point, loop over all samples till alpha1 and alpha2 are
273271 optimized.
274272 """
275273 self ._unbound = [i for i in self ._all_samples if self ._is_unbound (i )]
@@ -306,7 +304,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
306304 if i1 == i2 :
307305 return None , None
308306
309- # calculate L and H which bound the new alpha2
307+ # calculate L and H which bound the new alpha2
310308 s = y1 * y2
311309 if s == - 1 :
312310 l , h = max (0.0 , a2 - a1 ), min (self ._c , self ._c + a2 - a1 ) # noqa: E741
@@ -320,7 +318,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
320318 k22 = k (i2 , i2 )
321319 k12 = k (i1 , i2 )
322320
323- # select the new alpha2 which could get the minimal objectives
321+ # select the new alpha2 which could achieve the minimal objectives
324322 if (eta := k11 + k22 - 2.0 * k12 ) > 0.0 :
325323 a2_new_unc = a2 + (y2 * (e1 - e2 )) / eta
326324 # a2_new has a boundary
@@ -335,7 +333,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
335333 l1 = a1 + s * (a2 - l )
336334 h1 = a1 + s * (a2 - h )
337335
338- # way 1
336+ # Method 1
339337 f1 = y1 * (e1 + b ) - a1 * k (i1 , i1 ) - s * a2 * k (i1 , i2 )
340338 f2 = y2 * (e2 + b ) - a2 * k (i2 , i2 ) - s * a1 * k (i1 , i2 )
341339 ol = (
@@ -353,9 +351,8 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
353351 + s * h * h1 * k (i1 , i2 )
354352 )
355353 """
356- # way 2
357- Use objective function check which alpha2 new could get the minimal
358- objectives
354+ Method 2: Use objective function to check which alpha2_new could achieve the
355+ minimal objectives
359356 """
360357 if ol < (oh - self ._eps ):
361358 a2_new = l
@@ -375,7 +372,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
375372
376373 return a1_new , a2_new
377374
378- # Normalise data using min_max way
375+ # Normalize data using min-max method
379376 def _norm (self , data ):
380377 if self ._init :
381378 self ._min = np .min (data , axis = 0 )
@@ -424,7 +421,7 @@ def _rbf(self, v1, v2):
424421
425422 def _check (self ):
426423 if self ._kernel == self ._rbf and self .gamma < 0 :
427- raise ValueError ("gamma value must greater than 0 " )
424+ raise ValueError ("gamma value must be non-negative " )
428425
429426 def _get_kernel (self , kernel_name ):
430427 maps = {"linear" : self ._linear , "poly" : self ._polynomial , "rbf" : self ._rbf }
@@ -444,27 +441,27 @@ def call_func(*args, **kwargs):
444441 start_time = time .time ()
445442 func (* args , ** kwargs )
446443 end_time = time .time ()
447- print (f"smo algorithm cost { end_time - start_time } seconds" )
444+ print (f"SMO algorithm cost { end_time - start_time } seconds" )
448445
449446 return call_func
450447
451448
452449@count_time
453- def test_cancel_data ():
454- print ("Hello!\n Start test svm by smo algorithm!" )
450+ def test_cancer_data ():
451+ print ("Hello!\n Start test SVM using the SMO algorithm!" )
455452 # 0: download dataset and load into pandas' dataframe
456- if not os .path .exists (r"cancel_data .csv" ):
453+ if not os .path .exists (r"cancer_data .csv" ):
457454 request = urllib .request .Request ( # noqa: S310
458455 CANCER_DATASET_URL ,
459456 headers = {"User-Agent" : "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)" },
460457 )
461458 response = urllib .request .urlopen (request ) # noqa: S310
462459 content = response .read ().decode ("utf-8" )
463- with open (r"cancel_data .csv" , "w" ) as f :
460+ with open (r"cancer_data .csv" , "w" ) as f :
464461 f .write (content )
465462
466463 data = pd .read_csv (
467- "cancel_data .csv" ,
464+ "cancer_data .csv" ,
468465 header = None ,
469466 dtype = {0 : str }, # Assuming the first column contains string data
470467 )
@@ -479,14 +476,14 @@ def test_cancel_data():
479476 train_data , test_data = samples [:328 , :], samples [328 :, :]
480477 test_tags , test_samples = test_data [:, 0 ], test_data [:, 1 :]
481478
482- # 3: choose kernel function,and set initial alphas to zero(optional)
483- mykernel = Kernel (kernel = "rbf" , degree = 5 , coef0 = 1 , gamma = 0.5 )
479+ # 3: choose kernel function, and set initial alphas to zero (optional)
480+ my_kernel = Kernel (kernel = "rbf" , degree = 5 , coef0 = 1 , gamma = 0.5 )
484481 al = np .zeros (train_data .shape [0 ])
485482
486483 # 4: calculating best alphas using SMO algorithm and predict test_data samples
487484 mysvm = SmoSVM (
488485 train = train_data ,
489- kernel_func = mykernel ,
486+ kernel_func = my_kernel ,
490487 alpha_list = al ,
491488 cost = 0.4 ,
492489 b = 0.0 ,
@@ -501,30 +498,30 @@ def test_cancel_data():
501498 for i in range (test_tags .shape [0 ]):
502499 if test_tags [i ] == predict [i ]:
503500 score += 1
504- print (f"\n all : { test_num } \n right : { score } \n false : { test_num - score } " )
501+ print (f"\n All : { test_num } \n Correct : { score } \n Incorrect : { test_num - score } " )
505502 print (f"Rough Accuracy: { score / test_tags .shape [0 ]} " )
506503
507504
508505def test_demonstration ():
509506 # change stdout
510- print ("\n Start plot,please wait!! !" )
507+ print ("\n Starting plot, please wait!" )
511508 sys .stdout = open (os .devnull , "w" )
512509
513510 ax1 = plt .subplot2grid ((2 , 2 ), (0 , 0 ))
514511 ax2 = plt .subplot2grid ((2 , 2 ), (0 , 1 ))
515512 ax3 = plt .subplot2grid ((2 , 2 ), (1 , 0 ))
516513 ax4 = plt .subplot2grid ((2 , 2 ), (1 , 1 ))
517- ax1 .set_title ("linear svm, cost: 0.1" )
514+ ax1 .set_title ("Linear SVM, cost = 0.1" )
518515 test_linear_kernel (ax1 , cost = 0.1 )
519- ax2 .set_title ("linear svm, cost: 500" )
516+ ax2 .set_title ("Linear SVM, cost = 500" )
520517 test_linear_kernel (ax2 , cost = 500 )
521- ax3 .set_title ("rbf kernel svm, cost: 0.1" )
518+ ax3 .set_title ("RBF kernel SVM, cost = 0.1" )
522519 test_rbf_kernel (ax3 , cost = 0.1 )
523- ax4 .set_title ("rbf kernel svm, cost: 500" )
520+ ax4 .set_title ("RBF kernel SVM, cost = 500" )
524521 test_rbf_kernel (ax4 , cost = 500 )
525522
526523 sys .stdout = sys .__stdout__
527- print ("Plot done!!! " )
524+ print ("Plot done!" )
528525
529526
530527def test_linear_kernel (ax , cost ):
@@ -535,10 +532,10 @@ def test_linear_kernel(ax, cost):
535532 scaler = StandardScaler ()
536533 train_x_scaled = scaler .fit_transform (train_x , train_y )
537534 train_data = np .hstack ((train_y .reshape (500 , 1 ), train_x_scaled ))
538- mykernel = Kernel (kernel = "linear" , degree = 5 , coef0 = 1 , gamma = 0.5 )
535+ my_kernel = Kernel (kernel = "linear" , degree = 5 , coef0 = 1 , gamma = 0.5 )
539536 mysvm = SmoSVM (
540537 train = train_data ,
541- kernel_func = mykernel ,
538+ kernel_func = my_kernel ,
542539 cost = cost ,
543540 tolerance = 0.001 ,
544541 auto_norm = False ,
@@ -555,10 +552,10 @@ def test_rbf_kernel(ax, cost):
555552 scaler = StandardScaler ()
556553 train_x_scaled = scaler .fit_transform (train_x , train_y )
557554 train_data = np .hstack ((train_y .reshape (500 , 1 ), train_x_scaled ))
558- mykernel = Kernel (kernel = "rbf" , degree = 5 , coef0 = 1 , gamma = 0.5 )
555+ my_kernel = Kernel (kernel = "rbf" , degree = 5 , coef0 = 1 , gamma = 0.5 )
559556 mysvm = SmoSVM (
560557 train = train_data ,
561- kernel_func = mykernel ,
558+ kernel_func = my_kernel ,
562559 cost = cost ,
563560 tolerance = 0.001 ,
564561 auto_norm = False ,
@@ -571,11 +568,11 @@ def plot_partition_boundary(
571568 model , train_data , ax , resolution = 100 , colors = ("b" , "k" , "r" )
572569):
573570 """
574- We can not get the optimum w of our kernel svm model which is different from linear
575- svm . For this reason, we generate randomly distributed points with high desity and
576- prediced values of these points are calculated by using our trained model. Then we
577- could use this prediced values to draw contour map.
578- And this contour map can represent svm 's partition boundary.
571+ We cannot get the optimal w of our kernel SVM model, which is different from a
572+ linear SVM . For this reason, we generate randomly distributed points with high
573+ density, and predicted values of these points are calculated using our trained
574+ model. Then we could use this predicted values to draw contour map, and this contour
575+ map represents the SVM 's partition boundary.
579576 """
580577 train_data_x = train_data [:, 1 ]
581578 train_data_y = train_data [:, 2 ]
@@ -620,6 +617,6 @@ def plot_partition_boundary(
620617
621618
622619if __name__ == "__main__" :
623- test_cancel_data ()
620+ test_cancer_data ()
624621 test_demonstration ()
625622 plt .show ()
0 commit comments