机器学习基础算法实现

1. 线性回归 (Linear Regression)

class LinearRegression {
  private weights: number[] = [];
  private bias: number = 0;
  private learningRate: number;
  private epochs: number;

  constructor(learningRate: number = 0.01, epochs: number = 1000) {
    this.learningRate = learningRate;
    this.epochs = epochs;
  }

  // 训练模型
  fit(X: number[][], y: number[]): void {
    const n = X.length;
    const features = X[0].length;

    // 初始化权重
    this.weights = new Array(features).fill(0);
    this.bias = 0;

    // 梯度下降
    for (let epoch = 0; epoch < this.epochs; epoch++) {
      let dw = new Array(features).fill(0);
      let db = 0;

      // 计算梯度
      for (let i = 0; i < n; i++) {
        const prediction = this.predictSingle(X[i]);
        const error = prediction - y[i];

        for (let j = 0; j < features; j++) {
          dw[j] += error * X[i][j];
        }
        db += error;
      }

      // 更新权重
      for (let j = 0; j < features; j++) {
        this.weights[j] -= (this.learningRate * dw[j]) / n;
      }
      this.bias -= (this.learningRate * db) / n;
    }
  }

  // 预测单个样本
  private predictSingle(x: number[]): number {
    let prediction = this.bias;
    for (let i = 0; i < x.length; i++) {
      prediction += this.weights[i] * x[i];
    }
    return prediction;
  }

  // 预测多个样本
  predict(X: number[][]): number[] {
    return X.map((x) => this.predictSingle(x));
  }

  // 获取权重
  getWeights(): number[] {
    return [...this.weights];
  }

  // 获取偏置
  getBias(): number {
    return this.bias;
  }

  // 计算均方误差
  meanSquaredError(X: number[][], y: number[]): number {
    const predictions = this.predict(X);
    let mse = 0;

    for (let i = 0; i < predictions.length; i++) {
      const error = predictions[i] - y[i];
      mse += error * error;
    }

    return mse / predictions.length;
  }
}

// 正则化线性回归 (Ridge Regression)
class RidgeRegression extends LinearRegression {
  private lambda: number;

  constructor(learningRate: number = 0.01, epochs: number = 1000, lambda: number = 0.1) {
    super(learningRate, epochs);
    this.lambda = lambda;
  }

  fit(X: number[][], y: number[]): void {
    const n = X.length;
    const features = X[0].length;

    // 初始化权重
    this.weights = new Array(features).fill(0);
    this.bias = 0;

    // 带正则化的梯度下降
    for (let epoch = 0; epoch < this.epochs; epoch++) {
      let dw = new Array(features).fill(0);
      let db = 0;

      // 计算梯度
      for (let i = 0; i < n; i++) {
        const prediction = this.predictSingle(X[i]);
        const error = prediction - y[i];

        for (let j = 0; j < features; j++) {
          dw[j] += error * X[i][j] + this.lambda * this.weights[j];
        }
        db += error;
      }

      // 更新权重
      for (let j = 0; j < features; j++) {
        this.weights[j] -= (this.learningRate * dw[j]) / n;
      }
      this.bias -= (this.learningRate * db) / n;
    }
  }
}

// 使用示例
function linearRegressionExample() {
  // 简单数据集：y = 2*x1 + 3*x2 + 1
  const X = [
    [1, 2],
    [2, 3],
    [3, 4],
    [4, 5],
    [5, 6],
  ];
  const y = [8, 13, 18, 23, 28]; // 2*1+3*2+1=8, 2*2+3*3+1=13, etc.

  const lr = new LinearRegression(0.01, 1000);
  lr.fit(X, y);

  const predictions = lr.predict(X);
  console.log('Predictions:', predictions);
  console.log('Weights:', lr.getWeights());
  console.log('Bias:', lr.getBias());
  console.log('MSE:', lr.meanSquaredError(X, y));
}

2. K-means 聚类 (K-means Clustering)

class KMeans {
  private k: number;
  private maxIterations: number;
  private centroids: number[][] = [];
  private clusters: number[][][] = [];

  constructor(k: number, maxIterations: number = 100) {
    this.k = k;
    this.maxIterations = maxIterations;
  }

  // 训练模型
  fit(X: number[][]): void {
    const n = X.length;
    const dimensions = X[0].length;

    // 随机初始化质心
    this.centroids = [];
    const indices = new Set<number>();
    while (this.centroids.length < this.k) {
      const randomIndex = Math.floor(Math.random() * n);
      if (!indices.has(randomIndex)) {
        indices.add(randomIndex);
        this.centroids.push([...X[randomIndex]]);
      }
    }

    for (let iteration = 0; iteration < this.maxIterations; iteration++) {
      // 分配样本到最近的质心
      this.clusters = Array.from({ length: this.k }, () => []);
      for (const point of X) {
        const clusterIndex = this.getNearestCentroid(point);
        this.clusters[clusterIndex].push(point);
      }

      // 更新质心
      const newCentroids = [];
      for (const cluster of this.clusters) {
        if (cluster.length > 0) {
          const centroid = new Array(dimensions).fill(0);
          for (const point of cluster) {
            for (let d = 0; d < dimensions; d++) {
              centroid[d] += point[d];
            }
          }
          for (let d = 0; d < dimensions; d++) {
            centroid[d] /= cluster.length;
          }
          newCentroids.push(centroid);
        } else {
          // 如果簇为空，保持原质心
          newCentroids.push([...this.centroids[newCentroids.length]]);
        }
      }

      // 检查收敛
      let converged = true;
      for (let i = 0; i < this.k; i++) {
        if (!this.arraysEqual(this.centroids[i], newCentroids[i])) {
          converged = false;
          break;
        }
      }

      this.centroids = newCentroids;

      if (converged) {
        break;
      }
    }
  }

  // 预测样本所属簇
  predict(X: number[][]): number[] {
    return X.map((point) => this.getNearestCentroid(point));
  }

  // 获取最近质心的索引
  private getNearestCentroid(point: number[]): number {
    let minDistance = Infinity;
    let nearestIndex = 0;

    for (let i = 0; i < this.centroids.length; i++) {
      const distance = this.euclideanDistance(point, this.centroids[i]);
      if (distance < minDistance) {
        minDistance = distance;
        nearestIndex = i;
      }
    }

    return nearestIndex;
  }

  // 欧几里得距离
  private euclideanDistance(a: number[], b: number[]): number {
    let sum = 0;
    for (let i = 0; i < a.length; i++) {
      const diff = a[i] - b[i];
      sum += diff * diff;
    }
    return Math.sqrt(sum);
  }

  // 数组相等检查
  private arraysEqual(a: number[], b: number[]): boolean {
    if (a.length !== b.length) return false;
    for (let i = 0; i < a.length; i++) {
      if (Math.abs(a[i] - b[i]) > 1e-6) return false;
    }
    return true;
  }

  // 获取质心
  getCentroids(): number[][] {
    return this.centroids.map((centroid) => [...centroid]);
  }

  // 获取簇
  getClusters(): number[][][] {
    return this.clusters.map((cluster) => cluster.map((point) => [...point]));
  }

  // 计算聚类评估指标 (轮廓系数)
  silhouetteScore(X: number[][], labels: number[]): number {
    const n = X.length;
    let totalScore = 0;

    for (let i = 0; i < n; i++) {
      const cluster = labels[i];
      const point = X[i];

      // 计算同簇平均距离 (a)
      let a = 0;
      let countA = 0;
      for (let j = 0; j < n; j++) {
        if (labels[j] === cluster && i !== j) {
          a += this.euclideanDistance(point, X[j]);
          countA++;
        }
      }
      a = countA > 0 ? a / countA : 0;

      // 计算最近异簇平均距离 (b)
      let b = Infinity;
      for (let c = 0; c < this.k; c++) {
        if (c !== cluster) {
          let distance = 0;
          let countB = 0;
          for (let j = 0; j < n; j++) {
            if (labels[j] === c) {
              distance += this.euclideanDistance(point, X[j]);
              countB++;
            }
          }
          if (countB > 0) {
            distance /= countB;
            b = Math.min(b, distance);
          }
        }
      }

      // 计算轮廓系数
      const score = (b - a) / Math.max(a, b);
      totalScore += isNaN(score) ? 0 : score;
    }

    return totalScore / n;
  }
}

// 使用示例
function kmeansExample() {
  // 生成测试数据
  const X = [
    [1, 2],
    [1.5, 1.8],
    [5, 8],
    [8, 8],
    [1, 0.6],
    [9, 11],
  ];

  const kmeans = new KMeans(2);
  kmeans.fit(X);

  const labels = kmeans.predict(X);
  const centroids = kmeans.getCentroids();

  console.log('Labels:', labels);
  console.log('Centroids:', centroids);
  console.log('Silhouette Score:', kmeans.silhouetteScore(X, labels));
}

3. 决策树 (Decision Tree)

interface TreeNode {
  feature?: number;
  threshold?: number;
  left?: TreeNode;
  right?: TreeNode;
  value?: number;
  isLeaf: boolean;
}

class DecisionTreeRegressor {
  private root: TreeNode | null = null;
  private maxDepth: number;
  private minSamplesSplit: number;

  constructor(maxDepth: number = 10, minSamplesSplit: number = 2) {
    this.maxDepth = maxDepth;
    this.minSamplesSplit = minSamplesSplit;
  }

  // 训练模型
  fit(X: number[][], y: number[]): void {
    this.root = this.buildTree(X, y, 0);
  }

  private buildTree(X: number[][], y: number[], depth: number): TreeNode {
    const n = X.length;

    // 检查停止条件
    if (depth >= this.maxDepth || n < this.minSamplesSplit || this.allSame(y)) {
      return {
        value: this.mean(y),
        isLeaf: true,
      };
    }

    // 找到最佳分割
    const { feature, threshold } = this.findBestSplit(X, y);

    if (feature === -1) {
      return {
        value: this.mean(y),
        isLeaf: true,
      };
    }

    // 分割数据
    const leftIndices: number[] = [];
    const rightIndices: number[] = [];

    for (let i = 0; i < n; i++) {
      if (X[i][feature] <= threshold) {
        leftIndices.push(i);
      } else {
        rightIndices.push(i);
      }
    }

    const leftX = leftIndices.map((i) => X[i]);
    const leftY = leftIndices.map((i) => y[i]);
    const rightX = rightIndices.map((i) => X[i]);
    const rightY = rightIndices.map((i) => y[i]);

    return {
      feature,
      threshold,
      left: this.buildTree(leftX, leftY, depth + 1),
      right: this.buildTree(rightX, rightY, depth + 1),
      isLeaf: false,
    };
  }

  private findBestSplit(X: number[][], y: number[]): { feature: number; threshold: number } {
    const n = X.length;
    const features = X[0].length;
    let bestFeature = -1;
    let bestThreshold = 0;
    let bestScore = Infinity;

    for (let feature = 0; feature < features; feature++) {
      const values = X.map((row) => row[feature]).sort((a, b) => a - b);

      for (let i = 0; i < n - 1; i++) {
        const threshold = (values[i] + values[i + 1]) / 2;
        const score = this.calculateSplitScore(X, y, feature, threshold);

        if (score < bestScore) {
          bestScore = score;
          bestFeature = feature;
          bestThreshold = threshold;
        }
      }
    }

    return { feature: bestFeature, threshold: bestThreshold };
  }

  private calculateSplitScore(X: number[][], y: number[], feature: number, threshold: number): number {
    const leftY: number[] = [];
    const rightY: number[] = [];

    for (let i = 0; i < X.length; i++) {
      if (X[i][feature] <= threshold) {
        leftY.push(y[i]);
      } else {
        rightY.push(y[i]);
      }
    }

    if (leftY.length === 0 || rightY.length === 0) {
      return Infinity;
    }

    // 使用方差作为评分标准
    const leftVariance = this.variance(leftY);
    const rightVariance = this.variance(rightY);
    const totalVariance = (leftY.length * leftVariance + rightY.length * rightVariance) / y.length;

    return totalVariance;
  }

  private variance(values: number[]): number {
    const mean = this.mean(values);
    let sum = 0;
    for (const value of values) {
      const diff = value - mean;
      sum += diff * diff;
    }
    return sum / values.length;
  }

  private mean(values: number[]): number {
    return values.reduce((sum, val) => sum + val, 0) / values.length;
  }

  private allSame(values: number[]): boolean {
    return values.every((val) => val === values[0]);
  }

  // 预测
  predict(X: number[][]): number[] {
    return X.map((row) => this.predictSingle(row));
  }

  private predictSingle(row: number[]): number {
    let node = this.root;

    while (node && !node.isLeaf) {
      if (row[node.feature!] <= node.threshold!) {
        node = node.left!;
      } else {
        node = node.right!;
      }
    }

    return node?.value || 0;
  }

  // 计算均方误差
  meanSquaredError(X: number[][], y: number[]): number {
    const predictions = this.predict(X);
    let mse = 0;

    for (let i = 0; i < predictions.length; i++) {
      const error = predictions[i] - y[i];
      mse += error * error;
    }

    return mse / predictions.length;
  }
}

// 使用示例
function decisionTreeExample() {
  // 简单数据集
  const X = [
    [1, 2],
    [2, 3],
    [3, 4],
    [4, 5],
    [5, 6],
    [6, 7],
  ];
  const y = [3, 5, 7, 9, 11, 13]; // y ≈ x1 + x2

  const dt = new DecisionTreeRegressor(5, 2);
  dt.fit(X, y);

  const predictions = dt.predict(X);
  console.log('Predictions:', predictions);
  console.log('MSE:', dt.meanSquaredError(X, y));
}

4. 朴素贝叶斯 (Naive Bayes)

class GaussianNaiveBayes {
  private classes: Set<number> = new Set();
  private classPriors: Map<number, number> = new Map();
  private classMeans: Map<number, number[]> = new Map();
  private classVariances: Map<number, number[]> = new Map();

  // 训练模型
  fit(X: number[][], y: number[]): void {
    const n = X.length;
    const features = X[0].length;

    // 收集类别信息
    for (const label of y) {
      this.classes.add(label);
    }

    // 计算先验概率和条件概率参数
    for (const classLabel of this.classes) {
      const classData = X.filter((_, i) => y[i] === classLabel);
      const classCount = classData.length;

      // 先验概率
      this.classPriors.set(classLabel, classCount / n);

      // 计算均值
      const means = new Array(features).fill(0);
      for (const sample of classData) {
        for (let f = 0; f < features; f++) {
          means[f] += sample[f];
        }
      }
      for (let f = 0; f < features; f++) {
        means[f] /= classCount;
      }
      this.classMeans.set(classLabel, means);

      // 计算方差
      const variances = new Array(features).fill(0);
      for (const sample of classData) {
        for (let f = 0; f < features; f++) {
          const diff = sample[f] - means[f];
          variances[f] += diff * diff;
        }
      }
      for (let f = 0; f < features; f++) {
        variances[f] /= classCount;
        // 避免方差为0
        if (variances[f] === 0) variances[f] = 1e-6;
      }
      this.classVariances.set(classLabel, variances);
    }
  }

  // 预测
  predict(X: number[][]): number[] {
    return X.map((sample) => this.predictSingle(sample));
  }

  private predictSingle(sample: number[]): number {
    let bestClass = -1;
    let bestProb = -Infinity;

    for (const classLabel of this.classes) {
      const prior = this.classPriors.get(classLabel)!;
      const means = this.classMeans.get(classLabel)!;
      const variances = this.classVariances.get(classLabel)!;

      // 计算后验概率 (对数形式避免数值下溢)
      let logProb = Math.log(prior);

      for (let f = 0; f < sample.length; f++) {
        const diff = sample[f] - means[f];
        const variance = variances[f];
        logProb += -0.5 * Math.log(2 * Math.PI * variance) - (diff * diff) / (2 * variance);
      }

      if (logProb > bestProb) {
        bestProb = logProb;
        bestClass = classLabel;
      }
    }

    return bestClass;
  }

  // 计算准确率
  accuracy(X: number[][], y: number[]): number {
    const predictions = this.predict(X);
    let correct = 0;

    for (let i = 0; i < predictions.length; i++) {
      if (predictions[i] === y[i]) {
        correct++;
      }
    }

    return correct / predictions.length;
  }
}

// 使用示例
function naiveBayesExample() {
  // 生成测试数据 (两个高斯分布)
  const X: number[][] = [];
  const y: number[] = [];

  // 类 0: 均值 [2, 2], 方差 [1, 1]
  for (let i = 0; i < 50; i++) {
    X.push([2 + Math.random() * 2 - 1, 2 + Math.random() * 2 - 1]);
    y.push(0);
  }

  // 类 1: 均值 [6, 6], 方差 [1, 1]
  for (let i = 0; i < 50; i++) {
    X.push([6 + Math.random() * 2 - 1, 6 + Math.random() * 2 - 1]);
    y.push(1);
  }

  const nb = new GaussianNaiveBayes();
  nb.fit(X, y);

  const predictions = nb.predict(X);
  console.log('Accuracy:', nb.accuracy(X, y));
}

5. 实现要点

线性回归使用梯度下降最小化均方误差。
K-means 通过迭代更新质心进行聚类。
决策树使用递归分割构建树结构。
朴素贝叶斯基于贝叶斯定理和特征独立性假设。

算法知识库：机器学习基础算法实现

机器学习基础算法实现

1. 线性回归 (Linear Regression)

2. K-means 聚类 (K-means Clustering)

3. 决策树 (Decision Tree)

4. 朴素贝叶斯 (Naive Bayes)

5. 实现要点