SorryToPerson logo
返回
算法2026-05-12·11 分钟

算法知识库:字符串算法实现

JavaScript/TypeScript 实现字符串处理中的经典算法,如模式匹配、字符串搜索、文本处理等。

字符串算法实现

1. KMP 算法

ts
// 计算前缀表
function computePrefixTable(pattern: string): number[] {
  const m = pattern.length;
  const prefixTable = new Array(m).fill(0);
  let j = 0;

  for (let i = 1; i < m; i++) {
    while (j > 0 && pattern[i] !== pattern[j]) {
      j = prefixTable[j - 1];
    }

    if (pattern[i] === pattern[j]) {
      j++;
    }

    prefixTable[i] = j;
  }

  return prefixTable;
}

// KMP 字符串匹配
function kmpSearch(text: string, pattern: string): number[] {
  const n = text.length;
  const m = pattern.length;
  const prefixTable = computePrefixTable(pattern);
  const matches: number[] = [];

  let j = 0; // 模式串指针

  for (let i = 0; i < n; i++) {
    while (j > 0 && text[i] !== pattern[j]) {
      j = prefixTable[j - 1];
    }

    if (text[i] === pattern[j]) {
      j++;
    }

    if (j === m) {
      matches.push(i - m + 1); // 找到匹配
      j = prefixTable[j - 1]; // 继续搜索下一个匹配
    }
  }

  return matches;
}

// 使用示例
function kmpExample() {
  const text = 'ABABDABACDABABCABAB';
  const pattern = 'ABABCABAB';

  const matches = kmpSearch(text, pattern);
  console.log('Pattern found at positions:', matches);
}

2. Boyer-Moore 算法

ts
// 构建坏字符表
function buildBadCharTable(pattern: string): Map<string, number> {
  const badChar = new Map<string, number>();
  const m = pattern.length;

  for (let i = 0; i < m - 1; i++) {
    badChar.set(pattern[i], m - 1 - i);
  }

  return badChar;
}

// 构建好后缀表
function buildGoodSuffixTable(pattern: string): number[] {
  const m = pattern.length;
  const goodSuffix = new Array(m + 1).fill(m);
  const border = new Array(m + 1).fill(0);

  let i = m;
  let j = m + 1;

  border[i] = j;

  while (i > 0) {
    while (j <= m && pattern[i - 1] !== pattern[j - 1]) {
      if (goodSuffix[j] === m) {
        goodSuffix[j] = j - i;
      }
      j = border[j];
    }

    i--;
    j--;
    border[i] = j;
  }

  // 第二遍
  j = border[0];
  for (let i = 0; i <= m; i++) {
    if (goodSuffix[i] === m) {
      goodSuffix[i] = j;
    }
    if (i === j) {
      j = border[j];
    }
  }

  return goodSuffix;
}

// Boyer-Moore 字符串匹配
function boyerMooreSearch(text: string, pattern: string): number[] {
  const n = text.length;
  const m = pattern.length;
  const badChar = buildBadCharTable(pattern);
  const goodSuffix = buildGoodSuffixTable(pattern);
  const matches: number[] = [];

  let i = 0;
  while (i <= n - m) {
    let j = m - 1;

    // 从右向左比较
    while (j >= 0 && pattern[j] === text[i + j]) {
      j--;
    }

    if (j < 0) {
      matches.push(i);
      i += goodSuffix[0];
    } else {
      // 计算移动距离
      const badCharShift = badChar.get(text[i + j]) || m;
      const goodSuffixShift = goodSuffix[j + 1];
      i += Math.max(badCharShift, goodSuffixShift);
    }
  }

  return matches;
}

3. Rabin-Karp 算法

ts
// Rabin-Karp 字符串匹配
function rabinKarpSearch(text: string, pattern: string, prime: number = 101): number[] {
  const n = text.length;
  const m = pattern.length;
  const matches: number[] = [];

  if (m > n) return matches;

  // 计算模式串的哈希值
  let patternHash = 0;
  let textHash = 0;
  let h = 1;

  // 计算 h = d^(m-1) % prime
  for (let i = 0; i < m - 1; i++) {
    h = (h * 256) % prime;
  }

  // 计算初始哈希值
  for (let i = 0; i < m; i++) {
    patternHash = (256 * patternHash + pattern.charCodeAt(i)) % prime;
    textHash = (256 * textHash + text.charCodeAt(i)) % prime;
  }

  // 滑动窗口
  for (let i = 0; i <= n - m; i++) {
    // 检查哈希值
    if (patternHash === textHash) {
      // 验证字符匹配
      let match = true;
      for (let j = 0; j < m; j++) {
        if (text[i + j] !== pattern[j]) {
          match = false;
          break;
        }
      }

      if (match) {
        matches.push(i);
      }
    }

    // 计算下一个窗口的哈希值
    if (i < n - m) {
      textHash = (256 * (textHash - text.charCodeAt(i) * h) + text.charCodeAt(i + m)) % prime;

      // 确保哈希值为正
      if (textHash < 0) {
        textHash += prime;
      }
    }
  }

  return matches;
}

// 多模式匹配的 Rabin-Karp
function multiPatternRabinKarp(text: string, patterns: string[]): Map<string, number[]> {
  const result = new Map<string, number[]>();
  const prime = 101;

  for (const pattern of patterns) {
    const matches = rabinKarpSearch(text, pattern, prime);
    result.set(pattern, matches);
  }

  return result;
}

4. Aho-Corasick 自动机

ts
class AhoCorasickNode {
  children: Map<string, AhoCorasickNode> = new Map();
  fail: AhoCorasickNode | null = null;
  output: string[] = [];
  isEndOfWord: boolean = false;
}

class AhoCorasick {
  private root: AhoCorasickNode = new AhoCorasickNode();

  // 构建 Trie 树
  buildTrie(patterns: string[]): void {
    for (const pattern of patterns) {
      let node = this.root;

      for (const char of pattern) {
        if (!node.children.has(char)) {
          node.children.set(char, new AhoCorasickNode());
        }
        node = node.children.get(char)!;
      }

      node.isEndOfWord = true;
      node.output.push(pattern);
    }
  }

  // 构建失败指针
  buildFailureLinks(): void {
    const queue: AhoCorasickNode[] = [];

    // 初始化根节点的所有子节点
    for (const child of this.root.children.values()) {
      child.fail = this.root;
      queue.push(child);
    }

    while (queue.length > 0) {
      const current = queue.shift()!;

      for (const [char, child] of current.children) {
        let failNode = current.fail;

        while (failNode && !failNode.children.has(char)) {
          failNode = failNode.fail;
        }

        if (failNode) {
          child.fail = failNode.children.get(char)!;
          child.output.push(...child.fail.output);
        } else {
          child.fail = this.root;
        }

        queue.push(child);
      }
    }
  }

  // 搜索所有模式
  search(text: string): Map<string, number[]> {
    const result = new Map<string, number[]>();
    let node = this.root;

    for (let i = 0; i < text.length; i++) {
      const char = text[i];

      while (node && !node.children.has(char)) {
        node = node.fail;
      }

      if (!node) {
        node = this.root;
        continue;
      }

      node = node.children.get(char)!;

      // 收集所有匹配的模式
      for (const pattern of node.output) {
        if (!result.has(pattern)) {
          result.set(pattern, []);
        }
        result.get(pattern)!.push(i - pattern.length + 1);
      }
    }

    return result;
  }

  // 初始化自动机
  build(patterns: string[]): void {
    this.buildTrie(patterns);
    this.buildFailureLinks();
  }
}

// 使用示例
function ahoCorasickExample() {
  const patterns = ['he', 'she', 'his', 'hers'];
  const text = 'ushers';

  const ac = new AhoCorasick();
  ac.build(patterns);

  const matches = ac.search(text);
  console.log('Matches:', matches);
}

5. 最长公共子串

ts
// 动态规划求最长公共子串
function longestCommonSubstring(s1: string, s2: string): { substring: string; length: number } {
  const m = s1.length;
  const n = s2.length;
  const dp: number[][] = Array(m + 1)
    .fill(0)
    .map(() => Array(n + 1).fill(0));

  let maxLength = 0;
  let endIndex = 0;

  for (let i = 1; i <= m; i++) {
    for (let j = 1; j <= n; j++) {
      if (s1[i - 1] === s2[j - 1]) {
        dp[i][j] = dp[i - 1][j - 1] + 1;

        if (dp[i][j] > maxLength) {
          maxLength = dp[i][j];
          endIndex = i - 1;
        }
      }
    }
  }

  const substring = s1.substring(endIndex - maxLength + 1, endIndex + 1);
  return { substring, length: maxLength };
}

// 最长公共子序列
function longestCommonSubsequence(s1: string, s2: string): { sequence: string; length: number } {
  const m = s1.length;
  const n = s2.length;
  const dp: number[][] = Array(m + 1)
    .fill(0)
    .map(() => Array(n + 1).fill(0));

  // 填充 DP 表
  for (let i = 1; i <= m; i++) {
    for (let j = 1; j <= n; j++) {
      if (s1[i - 1] === s2[j - 1]) {
        dp[i][j] = dp[i - 1][j - 1] + 1;
      } else {
        dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
      }
    }
  }

  // 重建 LCS
  let sequence = '';
  let i = m;
  let j = n;

  while (i > 0 && j > 0) {
    if (s1[i - 1] === s2[j - 1]) {
      sequence = s1[i - 1] + sequence;
      i--;
      j--;
    } else if (dp[i - 1][j] > dp[i][j - 1]) {
      i--;
    } else {
      j--;
    }
  }

  return { sequence, length: dp[m][n] };
}

6. 编辑距离

ts
// Levenshtein 距离(编辑距离)
function levenshteinDistance(s1: string, s2: string): number {
  const m = s1.length;
  const n = s2.length;
  const dp: number[][] = Array(m + 1)
    .fill(0)
    .map(() => Array(n + 1).fill(0));

  // 初始化第一行和第一列
  for (let i = 0; i <= m; i++) {
    dp[i][0] = i;
  }
  for (let j = 0; j <= n; j++) {
    dp[0][j] = j;
  }

  // 填充 DP 表
  for (let i = 1; i <= m; i++) {
    for (let j = 1; j <= n; j++) {
      if (s1[i - 1] === s2[j - 1]) {
        dp[i][j] = dp[i - 1][j - 1];
      } else {
        dp[i][j] = Math.min(
          dp[i - 1][j] + 1, // 删除
          dp[i][j - 1] + 1, // 插入
          dp[i - 1][j - 1] + 1, // 替换
        );
      }
    }
  }

  return dp[m][n];
}

// Damerau-Levenshtein 距离(允许相邻字符交换)
function damerauLevenshteinDistance(s1: string, s2: string): number {
  const m = s1.length;
  const n = s2.length;
  const dp: number[][] = Array(m + 1)
    .fill(0)
    .map(() => Array(n + 1).fill(0));

  // 初始化
  for (let i = 0; i <= m; i++) {
    dp[i][0] = i;
  }
  for (let j = 0; j <= n; j++) {
    dp[0][j] = j;
  }

  for (let i = 1; i <= m; i++) {
    for (let j = 1; j <= n; j++) {
      let cost = s1[i - 1] === s2[j - 1] ? 0 : 1;

      dp[i][j] = Math.min(
        dp[i - 1][j] + 1, // 删除
        dp[i][j - 1] + 1, // 插入
        dp[i - 1][j - 1] + cost, // 替换
      );

      // 检查相邻字符交换
      if (i > 1 && j > 1 && s1[i - 1] === s2[j - 2] && s1[i - 2] === s2[j - 1]) {
        dp[i][j] = Math.min(dp[i][j], dp[i - 2][j - 2] + cost);
      }
    }
  }

  return dp[m][n];
}

7. 实现要点

  • KMP 算法通过前缀表避免重复比较,时间复杂度 O(n+m)。
  • Boyer-Moore 算法使用坏字符和好后缀规则,平均性能优秀。
  • Rabin-Karp 使用滚动哈希进行快速匹配。
  • Aho-Corasick 支持多模式匹配,适合关键词过滤。
  • 最长公共子串/子序列使用动态规划解决。
  • 编辑距离度量字符串相似性,有多种变体。
算法字符串JavaScript