Gradient boost
# Gradient Boost for regression
# Assume data X with continuous labels y
# Assume a function weak_learner() that trains a weak model on the data
def simple_decision_stump_grad(X, residuals):
best_feature = 0 # Arbitrarily pick the first feature for simplicity
best_threshold = np.median(X[:, best_feature]) # Use median as a simple threshold
left_mean = np.mean(residuals[X[:, best_feature] < best_threshold])
right_mean = np.mean(residuals[X[:, best_feature] >= best_threshold])
# Predictor based on the simple split
def stump_predict(X):
predictions = np.where(X[:, best_feature] < best_threshold, left_mean, right_mean)
return predictions
return stump_predict
def GradientBoost(X, y, T):
# Initial prediction (mean of labels)
F0 = np.mean(y)
models = [F0]
for t in range(T): # Iterate T times
# Calculate residuals
residuals = y - np.sum([m.predict(X) if hasattr(m, 'predict') else m for m in models], axis=0)
# Train a new model on residuals
model = simple_decision_stump_grad(X, residuals)
models.append(model)
# Final model: return a function that aggregates predictions
def final_model(X):
return np.sum([m.predict(X) if hasattr(m, 'predict') else m for m in models], axis=0)
return final_model
Last update:
March 21, 2024