Skip to content

Commit 6233abc

Browse files
committed
Fixing and Updating Adaboost algorithm
1 parent 6f801ac commit 6233abc

File tree

1 file changed

+14
-21
lines changed

1 file changed

+14
-21
lines changed

machine_learning/adaboost.py

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"""
1414

1515
import numpy as np
16-
from typing import Any, Dict, List
16+
from typing import Any
1717

1818

1919
class AdaBoost:
@@ -23,28 +23,25 @@ def __init__(self, n_estimators: int = 50) -> None:
2323
n_estimators: Number of boosting rounds.
2424
"""
2525
self.n_estimators: int = n_estimators
26-
self.alphas: List[float] = [] # Weights for each weak learner
27-
self.models: List[Dict[str, Any]] = [] # List of weak learners (stumps)
26+
self.alphas: list[float] = [] # Weights for each weak learner
27+
self.models: list[dict[str, Any]] = [] # List of weak learners (stumps)
2828

2929
def fit(self, feature_matrix: np.ndarray, target: np.ndarray) -> None:
3030
"""Fit AdaBoost model.
3131
Args:
3232
feature_matrix: (n_samples, n_features) feature matrix
3333
target: (n_samples,) labels (0 or 1)
3434
"""
35-
n_samples, n_features = feature_matrix.shape
36-
sample_weights = np.ones(n_samples) / n_samples # Initialize sample weights
35+
n_samples, _ = feature_matrix.shape
36+
sample_weights = np.ones(n_samples) / n_samples
3737
self.models = []
3838
self.alphas = []
39-
y_signed = np.where(target == 0, -1, 1) # Convert labels to -1, 1
39+
y_signed = np.where(target == 0, -1, 1)
4040
for _ in range(self.n_estimators):
41-
# Train a decision stump with weighted samples
4241
stump = self._build_stump(feature_matrix, y_signed, sample_weights)
4342
pred = stump["pred"]
4443
err = stump["error"]
45-
# Compute alpha (learner weight)
4644
alpha = 0.5 * np.log((1 - err) / (err + 1e-10))
47-
# Update sample weights
4845
sample_weights *= np.exp(-alpha * y_signed * pred)
4946
sample_weights /= np.sum(sample_weights)
5047
self.models.append(stump)
@@ -56,13 +53,6 @@ def predict(self, feature_matrix: np.ndarray) -> np.ndarray:
5653
feature_matrix: (n_samples, n_features) feature matrix
5754
Returns:
5855
(n_samples,) predicted labels (0 or 1)
59-
>>> import numpy as np
60-
>>> features = np.array([[0, 0], [1, 1], [1, 0], [0, 1]])
61-
>>> labels = np.array([0, 1, 1, 0])
62-
>>> clf = AdaBoost(n_estimators=5)
63-
>>> clf.fit(features, labels)
64-
>>> clf.predict(np.array([[0, 0], [1, 1]]))
65-
array([0, 1])
6656
"""
6757
clf_preds = np.zeros(feature_matrix.shape[0])
6858
for alpha, stump in zip(self.alphas, self.models):
@@ -73,12 +63,15 @@ def predict(self, feature_matrix: np.ndarray) -> np.ndarray:
7363
return np.where(clf_preds >= 0, 1, 0)
7464

7565
def _build_stump(
76-
self, feature_matrix: np.ndarray, target_signed: np.ndarray, sample_weights: np.ndarray
77-
) -> Dict[str, Any]:
66+
self,
67+
feature_matrix: np.ndarray,
68+
target_signed: np.ndarray,
69+
sample_weights: np.ndarray,
70+
) -> dict[str, Any]:
7871
"""Find the best decision stump for current weights."""
79-
n_samples, n_features = feature_matrix.shape
72+
_, n_features = feature_matrix.shape
8073
min_error = float("inf")
81-
best_stump: Dict[str, Any] = {}
74+
best_stump: dict[str, Any] = {}
8275
for feature in range(n_features):
8376
thresholds = np.unique(feature_matrix[:, feature])
8477
for threshold in thresholds:
@@ -105,4 +98,4 @@ def _stump_predict(
10598
pred[feature_matrix[:, feature] < threshold] = -1
10699
else:
107100
pred[feature_matrix[:, feature] > threshold] = -1
108-
return pred
101+
return pred

0 commit comments

Comments
 (0)