算法2026-05-14·12 分钟
算法知识库:机器学习基础算法实现
JavaScript/TypeScript 实现基础机器学习算法,如线性回归、K-means聚类、决策树等。
机器学习基础算法实现
1. 线性回归 (Linear Regression)
ts
class LinearRegression {
private weights: number[] = [];
private bias: number = 0;
private learningRate: number;
private epochs: number;
constructor(learningRate: number = 0.01, epochs: number = 1000) {
this.learningRate = learningRate;
this.epochs = epochs;
}
// 训练模型
fit(X: number[][], y: number[]): void {
const n = X.length;
const features = X[0].length;
// 初始化权重
this.weights = new Array(features).fill(0);
this.bias = 0;
// 梯度下降
for (let epoch = 0; epoch < this.epochs; epoch++) {
let dw = new Array(features).fill(0);
let db = 0;
// 计算梯度
for (let i = 0; i < n; i++) {
const prediction = this.predictSingle(X[i]);
const error = prediction - y[i];
for (let j = 0; j < features; j++) {
dw[j] += error * X[i][j];
}
db += error;
}
// 更新权重
for (let j = 0; j < features; j++) {
this.weights[j] -= (this.learningRate * dw[j]) / n;
}
this.bias -= (this.learningRate * db) / n;
}
}
// 预测单个样本
private predictSingle(x: number[]): number {
let prediction = this.bias;
for (let i = 0; i < x.length; i++) {
prediction += this.weights[i] * x[i];
}
return prediction;
}
// 预测多个样本
predict(X: number[][]): number[] {
return X.map((x) => this.predictSingle(x));
}
// 获取权重
getWeights(): number[] {
return [...this.weights];
}
// 获取偏置
getBias(): number {
return this.bias;
}
// 计算均方误差
meanSquaredError(X: number[][], y: number[]): number {
const predictions = this.predict(X);
let mse = 0;
for (let i = 0; i < predictions.length; i++) {
const error = predictions[i] - y[i];
mse += error * error;
}
return mse / predictions.length;
}
}
// 正则化线性回归 (Ridge Regression)
class RidgeRegression extends LinearRegression {
private lambda: number;
constructor(learningRate: number = 0.01, epochs: number = 1000, lambda: number = 0.1) {
super(learningRate, epochs);
this.lambda = lambda;
}
fit(X: number[][], y: number[]): void {
const n = X.length;
const features = X[0].length;
// 初始化权重
this.weights = new Array(features).fill(0);
this.bias = 0;
// 带正则化的梯度下降
for (let epoch = 0; epoch < this.epochs; epoch++) {
let dw = new Array(features).fill(0);
let db = 0;
// 计算梯度
for (let i = 0; i < n; i++) {
const prediction = this.predictSingle(X[i]);
const error = prediction - y[i];
for (let j = 0; j < features; j++) {
dw[j] += error * X[i][j] + this.lambda * this.weights[j];
}
db += error;
}
// 更新权重
for (let j = 0; j < features; j++) {
this.weights[j] -= (this.learningRate * dw[j]) / n;
}
this.bias -= (this.learningRate * db) / n;
}
}
}
// 使用示例
function linearRegressionExample() {
// 简单数据集:y = 2*x1 + 3*x2 + 1
const X = [
[1, 2],
[2, 3],
[3, 4],
[4, 5],
[5, 6],
];
const y = [8, 13, 18, 23, 28]; // 2*1+3*2+1=8, 2*2+3*3+1=13, etc.
const lr = new LinearRegression(0.01, 1000);
lr.fit(X, y);
const predictions = lr.predict(X);
console.log('Predictions:', predictions);
console.log('Weights:', lr.getWeights());
console.log('Bias:', lr.getBias());
console.log('MSE:', lr.meanSquaredError(X, y));
}2. K-means 聚类 (K-means Clustering)
ts
class KMeans {
private k: number;
private maxIterations: number;
private centroids: number[][] = [];
private clusters: number[][][] = [];
constructor(k: number, maxIterations: number = 100) {
this.k = k;
this.maxIterations = maxIterations;
}
// 训练模型
fit(X: number[][]): void {
const n = X.length;
const dimensions = X[0].length;
// 随机初始化质心
this.centroids = [];
const indices = new Set<number>();
while (this.centroids.length < this.k) {
const randomIndex = Math.floor(Math.random() * n);
if (!indices.has(randomIndex)) {
indices.add(randomIndex);
this.centroids.push([...X[randomIndex]]);
}
}
for (let iteration = 0; iteration < this.maxIterations; iteration++) {
// 分配样本到最近的质心
this.clusters = Array.from({ length: this.k }, () => []);
for (const point of X) {
const clusterIndex = this.getNearestCentroid(point);
this.clusters[clusterIndex].push(point);
}
// 更新质心
const newCentroids = [];
for (const cluster of this.clusters) {
if (cluster.length > 0) {
const centroid = new Array(dimensions).fill(0);
for (const point of cluster) {
for (let d = 0; d < dimensions; d++) {
centroid[d] += point[d];
}
}
for (let d = 0; d < dimensions; d++) {
centroid[d] /= cluster.length;
}
newCentroids.push(centroid);
} else {
// 如果簇为空,保持原质心
newCentroids.push([...this.centroids[newCentroids.length]]);
}
}
// 检查收敛
let converged = true;
for (let i = 0; i < this.k; i++) {
if (!this.arraysEqual(this.centroids[i], newCentroids[i])) {
converged = false;
break;
}
}
this.centroids = newCentroids;
if (converged) {
break;
}
}
}
// 预测样本所属簇
predict(X: number[][]): number[] {
return X.map((point) => this.getNearestCentroid(point));
}
// 获取最近质心的索引
private getNearestCentroid(point: number[]): number {
let minDistance = Infinity;
let nearestIndex = 0;
for (let i = 0; i < this.centroids.length; i++) {
const distance = this.euclideanDistance(point, this.centroids[i]);
if (distance < minDistance) {
minDistance = distance;
nearestIndex = i;
}
}
return nearestIndex;
}
// 欧几里得距离
private euclideanDistance(a: number[], b: number[]): number {
let sum = 0;
for (let i = 0; i < a.length; i++) {
const diff = a[i] - b[i];
sum += diff * diff;
}
return Math.sqrt(sum);
}
// 数组相等检查
private arraysEqual(a: number[], b: number[]): boolean {
if (a.length !== b.length) return false;
for (let i = 0; i < a.length; i++) {
if (Math.abs(a[i] - b[i]) > 1e-6) return false;
}
return true;
}
// 获取质心
getCentroids(): number[][] {
return this.centroids.map((centroid) => [...centroid]);
}
// 获取簇
getClusters(): number[][][] {
return this.clusters.map((cluster) => cluster.map((point) => [...point]));
}
// 计算聚类评估指标 (轮廓系数)
silhouetteScore(X: number[][], labels: number[]): number {
const n = X.length;
let totalScore = 0;
for (let i = 0; i < n; i++) {
const cluster = labels[i];
const point = X[i];
// 计算同簇平均距离 (a)
let a = 0;
let countA = 0;
for (let j = 0; j < n; j++) {
if (labels[j] === cluster && i !== j) {
a += this.euclideanDistance(point, X[j]);
countA++;
}
}
a = countA > 0 ? a / countA : 0;
// 计算最近异簇平均距离 (b)
let b = Infinity;
for (let c = 0; c < this.k; c++) {
if (c !== cluster) {
let distance = 0;
let countB = 0;
for (let j = 0; j < n; j++) {
if (labels[j] === c) {
distance += this.euclideanDistance(point, X[j]);
countB++;
}
}
if (countB > 0) {
distance /= countB;
b = Math.min(b, distance);
}
}
}
// 计算轮廓系数
const score = (b - a) / Math.max(a, b);
totalScore += isNaN(score) ? 0 : score;
}
return totalScore / n;
}
}
// 使用示例
function kmeansExample() {
// 生成测试数据
const X = [
[1, 2],
[1.5, 1.8],
[5, 8],
[8, 8],
[1, 0.6],
[9, 11],
];
const kmeans = new KMeans(2);
kmeans.fit(X);
const labels = kmeans.predict(X);
const centroids = kmeans.getCentroids();
console.log('Labels:', labels);
console.log('Centroids:', centroids);
console.log('Silhouette Score:', kmeans.silhouetteScore(X, labels));
}3. 决策树 (Decision Tree)
ts
interface TreeNode {
feature?: number;
threshold?: number;
left?: TreeNode;
right?: TreeNode;
value?: number;
isLeaf: boolean;
}
class DecisionTreeRegressor {
private root: TreeNode | null = null;
private maxDepth: number;
private minSamplesSplit: number;
constructor(maxDepth: number = 10, minSamplesSplit: number = 2) {
this.maxDepth = maxDepth;
this.minSamplesSplit = minSamplesSplit;
}
// 训练模型
fit(X: number[][], y: number[]): void {
this.root = this.buildTree(X, y, 0);
}
private buildTree(X: number[][], y: number[], depth: number): TreeNode {
const n = X.length;
// 检查停止条件
if (depth >= this.maxDepth || n < this.minSamplesSplit || this.allSame(y)) {
return {
value: this.mean(y),
isLeaf: true,
};
}
// 找到最佳分割
const { feature, threshold } = this.findBestSplit(X, y);
if (feature === -1) {
return {
value: this.mean(y),
isLeaf: true,
};
}
// 分割数据
const leftIndices: number[] = [];
const rightIndices: number[] = [];
for (let i = 0; i < n; i++) {
if (X[i][feature] <= threshold) {
leftIndices.push(i);
} else {
rightIndices.push(i);
}
}
const leftX = leftIndices.map((i) => X[i]);
const leftY = leftIndices.map((i) => y[i]);
const rightX = rightIndices.map((i) => X[i]);
const rightY = rightIndices.map((i) => y[i]);
return {
feature,
threshold,
left: this.buildTree(leftX, leftY, depth + 1),
right: this.buildTree(rightX, rightY, depth + 1),
isLeaf: false,
};
}
private findBestSplit(X: number[][], y: number[]): { feature: number; threshold: number } {
const n = X.length;
const features = X[0].length;
let bestFeature = -1;
let bestThreshold = 0;
let bestScore = Infinity;
for (let feature = 0; feature < features; feature++) {
const values = X.map((row) => row[feature]).sort((a, b) => a - b);
for (let i = 0; i < n - 1; i++) {
const threshold = (values[i] + values[i + 1]) / 2;
const score = this.calculateSplitScore(X, y, feature, threshold);
if (score < bestScore) {
bestScore = score;
bestFeature = feature;
bestThreshold = threshold;
}
}
}
return { feature: bestFeature, threshold: bestThreshold };
}
private calculateSplitScore(X: number[][], y: number[], feature: number, threshold: number): number {
const leftY: number[] = [];
const rightY: number[] = [];
for (let i = 0; i < X.length; i++) {
if (X[i][feature] <= threshold) {
leftY.push(y[i]);
} else {
rightY.push(y[i]);
}
}
if (leftY.length === 0 || rightY.length === 0) {
return Infinity;
}
// 使用方差作为评分标准
const leftVariance = this.variance(leftY);
const rightVariance = this.variance(rightY);
const totalVariance = (leftY.length * leftVariance + rightY.length * rightVariance) / y.length;
return totalVariance;
}
private variance(values: number[]): number {
const mean = this.mean(values);
let sum = 0;
for (const value of values) {
const diff = value - mean;
sum += diff * diff;
}
return sum / values.length;
}
private mean(values: number[]): number {
return values.reduce((sum, val) => sum + val, 0) / values.length;
}
private allSame(values: number[]): boolean {
return values.every((val) => val === values[0]);
}
// 预测
predict(X: number[][]): number[] {
return X.map((row) => this.predictSingle(row));
}
private predictSingle(row: number[]): number {
let node = this.root;
while (node && !node.isLeaf) {
if (row[node.feature!] <= node.threshold!) {
node = node.left!;
} else {
node = node.right!;
}
}
return node?.value || 0;
}
// 计算均方误差
meanSquaredError(X: number[][], y: number[]): number {
const predictions = this.predict(X);
let mse = 0;
for (let i = 0; i < predictions.length; i++) {
const error = predictions[i] - y[i];
mse += error * error;
}
return mse / predictions.length;
}
}
// 使用示例
function decisionTreeExample() {
// 简单数据集
const X = [
[1, 2],
[2, 3],
[3, 4],
[4, 5],
[5, 6],
[6, 7],
];
const y = [3, 5, 7, 9, 11, 13]; // y ≈ x1 + x2
const dt = new DecisionTreeRegressor(5, 2);
dt.fit(X, y);
const predictions = dt.predict(X);
console.log('Predictions:', predictions);
console.log('MSE:', dt.meanSquaredError(X, y));
}4. 朴素贝叶斯 (Naive Bayes)
ts
class GaussianNaiveBayes {
private classes: Set<number> = new Set();
private classPriors: Map<number, number> = new Map();
private classMeans: Map<number, number[]> = new Map();
private classVariances: Map<number, number[]> = new Map();
// 训练模型
fit(X: number[][], y: number[]): void {
const n = X.length;
const features = X[0].length;
// 收集类别信息
for (const label of y) {
this.classes.add(label);
}
// 计算先验概率和条件概率参数
for (const classLabel of this.classes) {
const classData = X.filter((_, i) => y[i] === classLabel);
const classCount = classData.length;
// 先验概率
this.classPriors.set(classLabel, classCount / n);
// 计算均值
const means = new Array(features).fill(0);
for (const sample of classData) {
for (let f = 0; f < features; f++) {
means[f] += sample[f];
}
}
for (let f = 0; f < features; f++) {
means[f] /= classCount;
}
this.classMeans.set(classLabel, means);
// 计算方差
const variances = new Array(features).fill(0);
for (const sample of classData) {
for (let f = 0; f < features; f++) {
const diff = sample[f] - means[f];
variances[f] += diff * diff;
}
}
for (let f = 0; f < features; f++) {
variances[f] /= classCount;
// 避免方差为0
if (variances[f] === 0) variances[f] = 1e-6;
}
this.classVariances.set(classLabel, variances);
}
}
// 预测
predict(X: number[][]): number[] {
return X.map((sample) => this.predictSingle(sample));
}
private predictSingle(sample: number[]): number {
let bestClass = -1;
let bestProb = -Infinity;
for (const classLabel of this.classes) {
const prior = this.classPriors.get(classLabel)!;
const means = this.classMeans.get(classLabel)!;
const variances = this.classVariances.get(classLabel)!;
// 计算后验概率 (对数形式避免数值下溢)
let logProb = Math.log(prior);
for (let f = 0; f < sample.length; f++) {
const diff = sample[f] - means[f];
const variance = variances[f];
logProb += -0.5 * Math.log(2 * Math.PI * variance) - (diff * diff) / (2 * variance);
}
if (logProb > bestProb) {
bestProb = logProb;
bestClass = classLabel;
}
}
return bestClass;
}
// 计算准确率
accuracy(X: number[][], y: number[]): number {
const predictions = this.predict(X);
let correct = 0;
for (let i = 0; i < predictions.length; i++) {
if (predictions[i] === y[i]) {
correct++;
}
}
return correct / predictions.length;
}
}
// 使用示例
function naiveBayesExample() {
// 生成测试数据 (两个高斯分布)
const X: number[][] = [];
const y: number[] = [];
// 类 0: 均值 [2, 2], 方差 [1, 1]
for (let i = 0; i < 50; i++) {
X.push([2 + Math.random() * 2 - 1, 2 + Math.random() * 2 - 1]);
y.push(0);
}
// 类 1: 均值 [6, 6], 方差 [1, 1]
for (let i = 0; i < 50; i++) {
X.push([6 + Math.random() * 2 - 1, 6 + Math.random() * 2 - 1]);
y.push(1);
}
const nb = new GaussianNaiveBayes();
nb.fit(X, y);
const predictions = nb.predict(X);
console.log('Accuracy:', nb.accuracy(X, y));
}5. 实现要点
- 线性回归使用梯度下降最小化均方误差。
- K-means 通过迭代更新质心进行聚类。
- 决策树使用递归分割构建树结构。
- 朴素贝叶斯基于贝叶斯定理和特征独立性假设。
算法机器学习JavaScript