Knn

import numpy as np
from collections import Counter

def euclidean_distance(x1, x2):
    """
    Compute the Euclidean distance between two points.
    """
    return np.sqrt(np.sum((x1 - x2) ** 2))

def knn(X_train, y_train, X_test, k=3):
    """
    k-Nearest Neighbor algorithm.

    Args:
    - X_train: Training data features
    - y_train: Training data labels
    - X_test: Test data (single data point or a collection of data points)
    - k: Number of nearest neighbors to consider

    Returns:
    - predictions: The predicted labels for the test data
    """
    predictions = []
    for x_test in X_test:
        # Calculate distances between x_test and all points in X_train
        distances = [euclidean_distance(x_test, x_train) for x_train in X_train]

        # Get indices of k smallest distances
        k_indices = np.argsort(distances)[:k]

        # Get the labels of the k nearest neighbors
        k_nearest_labels = [y_train[i] for i in k_indices]

        # Majority vote, most common class label among k nearest neighbors
        most_common = Counter(k_nearest_labels).most_common(1)
        predictions.append(most_common[0][0])

    return predictions

Last update: March 21, 2024