Skip to content

Commit 6f801ac

Browse files
committed
Added Adaboost algorithm
1 parent 851be66 commit 6f801ac

File tree

1 file changed

+29
-29
lines changed

1 file changed

+29
-29
lines changed

machine_learning/adaboost.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
Reference: https://en.wikipedia.org/wiki/AdaBoost
55
66
>>> import numpy as np
7-
>>> X = np.array([[0, 0], [1, 1], [1, 0], [0, 1]])
8-
>>> y = np.array([0, 1, 1, 0])
7+
>>> features = np.array([[0, 0], [1, 1], [1, 0], [0, 1]])
8+
>>> labels = np.array([0, 1, 1, 0])
99
>>> clf = AdaBoost(n_estimators=5)
10-
>>> clf.fit(X, y)
10+
>>> clf.fit(features, labels)
1111
>>> clf.predict(np.array([[0, 0], [1, 1]]))
1212
array([0, 1])
1313
"""
@@ -26,65 +26,65 @@ def __init__(self, n_estimators: int = 50) -> None:
2626
self.alphas: List[float] = [] # Weights for each weak learner
2727
self.models: List[Dict[str, Any]] = [] # List of weak learners (stumps)
2828

29-
def fit(self, X: np.ndarray, y: np.ndarray) -> None:
29+
def fit(self, feature_matrix: np.ndarray, target: np.ndarray) -> None:
3030
"""Fit AdaBoost model.
3131
Args:
32-
X: (n_samples, n_features) feature matrix
33-
y: (n_samples,) labels (0 or 1)
32+
feature_matrix: (n_samples, n_features) feature matrix
33+
target: (n_samples,) labels (0 or 1)
3434
"""
35-
n_samples, n_features = X.shape
36-
w = np.ones(n_samples) / n_samples # Initialize sample weights
35+
n_samples, n_features = feature_matrix.shape
36+
sample_weights = np.ones(n_samples) / n_samples # Initialize sample weights
3737
self.models = []
3838
self.alphas = []
39-
y_ = np.where(y == 0, -1, 1) # Convert labels to -1, 1
39+
y_signed = np.where(target == 0, -1, 1) # Convert labels to -1, 1
4040
for _ in range(self.n_estimators):
4141
# Train a decision stump with weighted samples
42-
stump = self._build_stump(X, y_, w)
42+
stump = self._build_stump(feature_matrix, y_signed, sample_weights)
4343
pred = stump["pred"]
4444
err = stump["error"]
4545
# Compute alpha (learner weight)
4646
alpha = 0.5 * np.log((1 - err) / (err + 1e-10))
4747
# Update sample weights
48-
w *= np.exp(-alpha * y_ * pred)
49-
w /= np.sum(w)
48+
sample_weights *= np.exp(-alpha * y_signed * pred)
49+
sample_weights /= np.sum(sample_weights)
5050
self.models.append(stump)
5151
self.alphas.append(alpha)
5252

53-
def predict(self, X: np.ndarray) -> np.ndarray:
54-
"""Predict class labels for samples in X.
53+
def predict(self, feature_matrix: np.ndarray) -> np.ndarray:
54+
"""Predict class labels for samples in feature_matrix.
5555
Args:
56-
X: (n_samples, n_features) feature matrix
56+
feature_matrix: (n_samples, n_features) feature matrix
5757
Returns:
5858
(n_samples,) predicted labels (0 or 1)
5959
>>> import numpy as np
60-
>>> X = np.array([[0, 0], [1, 1], [1, 0], [0, 1]])
61-
>>> y = np.array([0, 1, 1, 0])
60+
>>> features = np.array([[0, 0], [1, 1], [1, 0], [0, 1]])
61+
>>> labels = np.array([0, 1, 1, 0])
6262
>>> clf = AdaBoost(n_estimators=5)
63-
>>> clf.fit(X, y)
63+
>>> clf.fit(features, labels)
6464
>>> clf.predict(np.array([[0, 0], [1, 1]]))
6565
array([0, 1])
6666
"""
67-
clf_preds = np.zeros(X.shape[0])
67+
clf_preds = np.zeros(feature_matrix.shape[0])
6868
for alpha, stump in zip(self.alphas, self.models):
6969
pred = self._stump_predict(
70-
X, stump["feature"], stump["threshold"], stump["polarity"]
70+
feature_matrix, stump["feature"], stump["threshold"], stump["polarity"]
7171
)
7272
clf_preds += alpha * pred
7373
return np.where(clf_preds >= 0, 1, 0)
7474

7575
def _build_stump(
76-
self, X: np.ndarray, y: np.ndarray, w: np.ndarray
76+
self, feature_matrix: np.ndarray, target_signed: np.ndarray, sample_weights: np.ndarray
7777
) -> Dict[str, Any]:
7878
"""Find the best decision stump for current weights."""
79-
n_samples, n_features = X.shape
79+
n_samples, n_features = feature_matrix.shape
8080
min_error = float("inf")
8181
best_stump: Dict[str, Any] = {}
8282
for feature in range(n_features):
83-
thresholds = np.unique(X[:, feature])
83+
thresholds = np.unique(feature_matrix[:, feature])
8484
for threshold in thresholds:
8585
for polarity in [1, -1]:
86-
pred = self._stump_predict(X, feature, threshold, polarity)
87-
error = np.sum(w * (pred != y))
86+
pred = self._stump_predict(feature_matrix, feature, threshold, polarity)
87+
error = np.sum(sample_weights * (pred != target_signed))
8888
if error < min_error:
8989
min_error = error
9090
best_stump = {
@@ -97,12 +97,12 @@ def _build_stump(
9797
return best_stump
9898

9999
def _stump_predict(
100-
self, X: np.ndarray, feature: int, threshold: float, polarity: int
100+
self, feature_matrix: np.ndarray, feature: int, threshold: float, polarity: int
101101
) -> np.ndarray:
102102
"""Predict using a single decision stump."""
103-
pred = np.ones(X.shape[0])
103+
pred = np.ones(feature_matrix.shape[0])
104104
if polarity == 1:
105-
pred[X[:, feature] < threshold] = -1
105+
pred[feature_matrix[:, feature] < threshold] = -1
106106
else:
107-
pred[X[:, feature] > threshold] = -1
107+
pred[feature_matrix[:, feature] > threshold] = -1
108108
return pred

0 commit comments

Comments
 (0)