44Reference: https://en.wikipedia.org/wiki/AdaBoost 
55
66>>> import numpy as np 
7- >>> X  = np.array([[0, 0], [1, 1], [1, 0], [0, 1]]) 
8- >>> y  = np.array([0, 1, 1, 0]) 
7+ >>> features  = np.array([[0, 0], [1, 1], [1, 0], [0, 1]]) 
8+ >>> labels  = np.array([0, 1, 1, 0]) 
99>>> clf = AdaBoost(n_estimators=5) 
10- >>> clf.fit(X, y ) 
10+ >>> clf.fit(features, labels ) 
1111>>> clf.predict(np.array([[0, 0], [1, 1]])) 
1212array([0, 1]) 
1313""" 
@@ -26,65 +26,65 @@ def __init__(self, n_estimators: int = 50) -> None:
2626 self .alphas : List [float ] =  [] # Weights for each weak learner 
2727 self .models : List [Dict [str , Any ]] =  [] # List of weak learners (stumps) 
2828
29-  def  fit (self , X : np .ndarray , y : np .ndarray ) ->  None :
29+  def  fit (self , feature_matrix : np .ndarray , target : np .ndarray ) ->  None :
3030 """Fit AdaBoost model. 
3131 Args: 
32-  X : (n_samples, n_features) feature matrix 
33-  y : (n_samples,) labels (0 or 1) 
32+  feature_matrix : (n_samples, n_features) feature matrix 
33+  target : (n_samples,) labels (0 or 1) 
3434 """ 
35-  n_samples , n_features  =  X .shape 
36-  w  =  np .ones (n_samples ) /  n_samples  # Initialize sample weights 
35+  n_samples , n_features  =  feature_matrix .shape 
36+  sample_weights  =  np .ones (n_samples ) /  n_samples  # Initialize sample weights 
3737 self .models  =  []
3838 self .alphas  =  []
39-  y_  =  np .where (y  ==  0 , - 1 , 1 ) # Convert labels to -1, 1 
39+  y_signed  =  np .where (target  ==  0 , - 1 , 1 ) # Convert labels to -1, 1 
4040 for  _  in  range (self .n_estimators ):
4141 # Train a decision stump with weighted samples 
42-  stump  =  self ._build_stump (X ,  y_ ,  w )
42+  stump  =  self ._build_stump (feature_matrix ,  y_signed ,  sample_weights )
4343 pred  =  stump ["pred" ]
4444 err  =  stump ["error" ]
4545 # Compute alpha (learner weight) 
4646 alpha  =  0.5  *  np .log ((1  -  err ) /  (err  +  1e-10 ))
4747 # Update sample weights 
48-  w  *=  np .exp (- alpha  *  y_  *  pred )
49-  w  /=  np .sum (w )
48+  sample_weights  *=  np .exp (- alpha  *  y_signed  *  pred )
49+  sample_weights  /=  np .sum (sample_weights )
5050 self .models .append (stump )
5151 self .alphas .append (alpha )
5252
53-  def  predict (self , X : np .ndarray ) ->  np .ndarray :
54-  """Predict class labels for samples in X . 
53+  def  predict (self , feature_matrix : np .ndarray ) ->  np .ndarray :
54+  """Predict class labels for samples in feature_matrix . 
5555 Args: 
56-  X : (n_samples, n_features) feature matrix 
56+  feature_matrix : (n_samples, n_features) feature matrix 
5757 Returns: 
5858 (n_samples,) predicted labels (0 or 1) 
5959 >>> import numpy as np 
60-  >>> X  = np.array([[0, 0], [1, 1], [1, 0], [0, 1]]) 
61-  >>> y  = np.array([0, 1, 1, 0]) 
60+  >>> features  = np.array([[0, 0], [1, 1], [1, 0], [0, 1]]) 
61+  >>> labels  = np.array([0, 1, 1, 0]) 
6262 >>> clf = AdaBoost(n_estimators=5) 
63-  >>> clf.fit(X, y ) 
63+  >>> clf.fit(features, labels ) 
6464 >>> clf.predict(np.array([[0, 0], [1, 1]])) 
6565 array([0, 1]) 
6666 """ 
67-  clf_preds  =  np .zeros (X .shape [0 ])
67+  clf_preds  =  np .zeros (feature_matrix .shape [0 ])
6868 for  alpha , stump  in  zip (self .alphas , self .models ):
6969 pred  =  self ._stump_predict (
70-  X , stump ["feature" ], stump ["threshold" ], stump ["polarity" ]
70+  feature_matrix , stump ["feature" ], stump ["threshold" ], stump ["polarity" ]
7171 )
7272 clf_preds  +=  alpha  *  pred 
7373 return  np .where (clf_preds  >=  0 , 1 , 0 )
7474
7575 def  _build_stump (
76-  self , X : np .ndarray , y : np .ndarray , w : np .ndarray 
76+  self , feature_matrix : np .ndarray , target_signed : np .ndarray , sample_weights : np .ndarray 
7777 ) ->  Dict [str , Any ]:
7878 """Find the best decision stump for current weights.""" 
79-  n_samples , n_features  =  X .shape 
79+  n_samples , n_features  =  feature_matrix .shape 
8080 min_error  =  float ("inf" )
8181 best_stump : Dict [str , Any ] =  {}
8282 for  feature  in  range (n_features ):
83-  thresholds  =  np .unique (X [:, feature ])
83+  thresholds  =  np .unique (feature_matrix [:, feature ])
8484 for  threshold  in  thresholds :
8585 for  polarity  in  [1 , - 1 ]:
86-  pred  =  self ._stump_predict (X , feature , threshold , polarity )
87-  error  =  np .sum (w  *  (pred  !=  y ))
86+  pred  =  self ._stump_predict (feature_matrix , feature , threshold , polarity )
87+  error  =  np .sum (sample_weights  *  (pred  !=  target_signed ))
8888 if  error  <  min_error :
8989 min_error  =  error 
9090 best_stump  =  {
@@ -97,12 +97,12 @@ def _build_stump(
9797 return  best_stump 
9898
9999 def  _stump_predict (
100-  self , X : np .ndarray , feature : int , threshold : float , polarity : int 
100+  self , feature_matrix : np .ndarray , feature : int , threshold : float , polarity : int 
101101 ) ->  np .ndarray :
102102 """Predict using a single decision stump.""" 
103-  pred  =  np .ones (X .shape [0 ])
103+  pred  =  np .ones (feature_matrix .shape [0 ])
104104 if  polarity  ==  1 :
105-  pred [X [:, feature ] <  threshold ] =  - 1 
105+  pred [feature_matrix [:, feature ] <  threshold ] =  - 1 
106106 else :
107-  pred [X [:, feature ] >  threshold ] =  - 1 
107+  pred [feature_matrix [:, feature ] >  threshold ] =  - 1 
108108 return  pred 
0 commit comments