Ada boost

# AdaBoost for binary classification
# Assume data X with labels y (labels are -1 or 1)
# Assume a function weak_learner() that trains a weak model on weighted data

def decision_stump_ada(X, y, weights):
    best_feature, best_threshold = None, None
    best_error = float('inf')

    N, M = X.shape  # N samples, M features

    for feature in range(M):
        thresholds = np.unique(X[:, feature])
        for threshold in thresholds:
            # Example prediction: if less than threshold, predict -1, else 1
            predictions = np.ones(N)
            predictions[X[:, feature] < threshold] = -1

            # Weighted error
            error = np.sum(weights[predictions != y])

            if error < best_error:
                best_error = error
                best_feature = feature
                best_threshold = threshold

    # Return a simple model based on the best feature and threshold
    def stump_predict(X):
        predictions = np.ones(X.shape[0])
        predictions[X[:, best_feature] < best_threshold] = -1
        return predictions

    return stump_predict


def AdaBoost(X, y, T):
    N = len(y)  # Number of samples
    w = np.ones(N) / N  # Initial weights

    models = []  # To store weak learners
    model_weights = []  # To store model weights

    for t in range(T):  # Iterate T times
        model = decision_stump_ada(X, y, w)  # Train weak_learner using weighted samples
        predictions = model.predict(X)

        # Calculate error and model weight
        err = np.sum(w * (predictions != y)) / np.sum(w)
        alpha = 0.5 * np.log((1 - err) / err)

        # Update weights
        w = w * np.exp(-alpha * y * predictions)
        w /= np.sum(w)  # Normalize weights

        models.append(model)
        model_weights.append(alpha)

    # Final model: return a function that aggregates weak learners
    def final_model(X):
        final_predictions = np.zeros(len(X))
        for model, alpha in zip(models, model_weights):
            final_predictions += alpha * model.predict(X)
        return np.sign(final_predictions)

    return final_model
Last update: March 21, 2024