算法2026-05-12·11 分钟
算法知识库:字符串算法实现
JavaScript/TypeScript 实现字符串处理中的经典算法,如模式匹配、字符串搜索、文本处理等。
字符串算法实现
1. KMP 算法
ts
// 计算前缀表
function computePrefixTable(pattern: string): number[] {
const m = pattern.length;
const prefixTable = new Array(m).fill(0);
let j = 0;
for (let i = 1; i < m; i++) {
while (j > 0 && pattern[i] !== pattern[j]) {
j = prefixTable[j - 1];
}
if (pattern[i] === pattern[j]) {
j++;
}
prefixTable[i] = j;
}
return prefixTable;
}
// KMP 字符串匹配
function kmpSearch(text: string, pattern: string): number[] {
const n = text.length;
const m = pattern.length;
const prefixTable = computePrefixTable(pattern);
const matches: number[] = [];
let j = 0; // 模式串指针
for (let i = 0; i < n; i++) {
while (j > 0 && text[i] !== pattern[j]) {
j = prefixTable[j - 1];
}
if (text[i] === pattern[j]) {
j++;
}
if (j === m) {
matches.push(i - m + 1); // 找到匹配
j = prefixTable[j - 1]; // 继续搜索下一个匹配
}
}
return matches;
}
// 使用示例
function kmpExample() {
const text = 'ABABDABACDABABCABAB';
const pattern = 'ABABCABAB';
const matches = kmpSearch(text, pattern);
console.log('Pattern found at positions:', matches);
}2. Boyer-Moore 算法
ts
// 构建坏字符表
function buildBadCharTable(pattern: string): Map<string, number> {
const badChar = new Map<string, number>();
const m = pattern.length;
for (let i = 0; i < m - 1; i++) {
badChar.set(pattern[i], m - 1 - i);
}
return badChar;
}
// 构建好后缀表
function buildGoodSuffixTable(pattern: string): number[] {
const m = pattern.length;
const goodSuffix = new Array(m + 1).fill(m);
const border = new Array(m + 1).fill(0);
let i = m;
let j = m + 1;
border[i] = j;
while (i > 0) {
while (j <= m && pattern[i - 1] !== pattern[j - 1]) {
if (goodSuffix[j] === m) {
goodSuffix[j] = j - i;
}
j = border[j];
}
i--;
j--;
border[i] = j;
}
// 第二遍
j = border[0];
for (let i = 0; i <= m; i++) {
if (goodSuffix[i] === m) {
goodSuffix[i] = j;
}
if (i === j) {
j = border[j];
}
}
return goodSuffix;
}
// Boyer-Moore 字符串匹配
function boyerMooreSearch(text: string, pattern: string): number[] {
const n = text.length;
const m = pattern.length;
const badChar = buildBadCharTable(pattern);
const goodSuffix = buildGoodSuffixTable(pattern);
const matches: number[] = [];
let i = 0;
while (i <= n - m) {
let j = m - 1;
// 从右向左比较
while (j >= 0 && pattern[j] === text[i + j]) {
j--;
}
if (j < 0) {
matches.push(i);
i += goodSuffix[0];
} else {
// 计算移动距离
const badCharShift = badChar.get(text[i + j]) || m;
const goodSuffixShift = goodSuffix[j + 1];
i += Math.max(badCharShift, goodSuffixShift);
}
}
return matches;
}3. Rabin-Karp 算法
ts
// Rabin-Karp 字符串匹配
function rabinKarpSearch(text: string, pattern: string, prime: number = 101): number[] {
const n = text.length;
const m = pattern.length;
const matches: number[] = [];
if (m > n) return matches;
// 计算模式串的哈希值
let patternHash = 0;
let textHash = 0;
let h = 1;
// 计算 h = d^(m-1) % prime
for (let i = 0; i < m - 1; i++) {
h = (h * 256) % prime;
}
// 计算初始哈希值
for (let i = 0; i < m; i++) {
patternHash = (256 * patternHash + pattern.charCodeAt(i)) % prime;
textHash = (256 * textHash + text.charCodeAt(i)) % prime;
}
// 滑动窗口
for (let i = 0; i <= n - m; i++) {
// 检查哈希值
if (patternHash === textHash) {
// 验证字符匹配
let match = true;
for (let j = 0; j < m; j++) {
if (text[i + j] !== pattern[j]) {
match = false;
break;
}
}
if (match) {
matches.push(i);
}
}
// 计算下一个窗口的哈希值
if (i < n - m) {
textHash = (256 * (textHash - text.charCodeAt(i) * h) + text.charCodeAt(i + m)) % prime;
// 确保哈希值为正
if (textHash < 0) {
textHash += prime;
}
}
}
return matches;
}
// 多模式匹配的 Rabin-Karp
function multiPatternRabinKarp(text: string, patterns: string[]): Map<string, number[]> {
const result = new Map<string, number[]>();
const prime = 101;
for (const pattern of patterns) {
const matches = rabinKarpSearch(text, pattern, prime);
result.set(pattern, matches);
}
return result;
}4. Aho-Corasick 自动机
ts
class AhoCorasickNode {
children: Map<string, AhoCorasickNode> = new Map();
fail: AhoCorasickNode | null = null;
output: string[] = [];
isEndOfWord: boolean = false;
}
class AhoCorasick {
private root: AhoCorasickNode = new AhoCorasickNode();
// 构建 Trie 树
buildTrie(patterns: string[]): void {
for (const pattern of patterns) {
let node = this.root;
for (const char of pattern) {
if (!node.children.has(char)) {
node.children.set(char, new AhoCorasickNode());
}
node = node.children.get(char)!;
}
node.isEndOfWord = true;
node.output.push(pattern);
}
}
// 构建失败指针
buildFailureLinks(): void {
const queue: AhoCorasickNode[] = [];
// 初始化根节点的所有子节点
for (const child of this.root.children.values()) {
child.fail = this.root;
queue.push(child);
}
while (queue.length > 0) {
const current = queue.shift()!;
for (const [char, child] of current.children) {
let failNode = current.fail;
while (failNode && !failNode.children.has(char)) {
failNode = failNode.fail;
}
if (failNode) {
child.fail = failNode.children.get(char)!;
child.output.push(...child.fail.output);
} else {
child.fail = this.root;
}
queue.push(child);
}
}
}
// 搜索所有模式
search(text: string): Map<string, number[]> {
const result = new Map<string, number[]>();
let node = this.root;
for (let i = 0; i < text.length; i++) {
const char = text[i];
while (node && !node.children.has(char)) {
node = node.fail;
}
if (!node) {
node = this.root;
continue;
}
node = node.children.get(char)!;
// 收集所有匹配的模式
for (const pattern of node.output) {
if (!result.has(pattern)) {
result.set(pattern, []);
}
result.get(pattern)!.push(i - pattern.length + 1);
}
}
return result;
}
// 初始化自动机
build(patterns: string[]): void {
this.buildTrie(patterns);
this.buildFailureLinks();
}
}
// 使用示例
function ahoCorasickExample() {
const patterns = ['he', 'she', 'his', 'hers'];
const text = 'ushers';
const ac = new AhoCorasick();
ac.build(patterns);
const matches = ac.search(text);
console.log('Matches:', matches);
}5. 最长公共子串
ts
// 动态规划求最长公共子串
function longestCommonSubstring(s1: string, s2: string): { substring: string; length: number } {
const m = s1.length;
const n = s2.length;
const dp: number[][] = Array(m + 1)
.fill(0)
.map(() => Array(n + 1).fill(0));
let maxLength = 0;
let endIndex = 0;
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
if (s1[i - 1] === s2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
if (dp[i][j] > maxLength) {
maxLength = dp[i][j];
endIndex = i - 1;
}
}
}
}
const substring = s1.substring(endIndex - maxLength + 1, endIndex + 1);
return { substring, length: maxLength };
}
// 最长公共子序列
function longestCommonSubsequence(s1: string, s2: string): { sequence: string; length: number } {
const m = s1.length;
const n = s2.length;
const dp: number[][] = Array(m + 1)
.fill(0)
.map(() => Array(n + 1).fill(0));
// 填充 DP 表
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
if (s1[i - 1] === s2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
}
}
}
// 重建 LCS
let sequence = '';
let i = m;
let j = n;
while (i > 0 && j > 0) {
if (s1[i - 1] === s2[j - 1]) {
sequence = s1[i - 1] + sequence;
i--;
j--;
} else if (dp[i - 1][j] > dp[i][j - 1]) {
i--;
} else {
j--;
}
}
return { sequence, length: dp[m][n] };
}6. 编辑距离
ts
// Levenshtein 距离(编辑距离)
function levenshteinDistance(s1: string, s2: string): number {
const m = s1.length;
const n = s2.length;
const dp: number[][] = Array(m + 1)
.fill(0)
.map(() => Array(n + 1).fill(0));
// 初始化第一行和第一列
for (let i = 0; i <= m; i++) {
dp[i][0] = i;
}
for (let j = 0; j <= n; j++) {
dp[0][j] = j;
}
// 填充 DP 表
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
if (s1[i - 1] === s2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1];
} else {
dp[i][j] = Math.min(
dp[i - 1][j] + 1, // 删除
dp[i][j - 1] + 1, // 插入
dp[i - 1][j - 1] + 1, // 替换
);
}
}
}
return dp[m][n];
}
// Damerau-Levenshtein 距离(允许相邻字符交换)
function damerauLevenshteinDistance(s1: string, s2: string): number {
const m = s1.length;
const n = s2.length;
const dp: number[][] = Array(m + 1)
.fill(0)
.map(() => Array(n + 1).fill(0));
// 初始化
for (let i = 0; i <= m; i++) {
dp[i][0] = i;
}
for (let j = 0; j <= n; j++) {
dp[0][j] = j;
}
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
let cost = s1[i - 1] === s2[j - 1] ? 0 : 1;
dp[i][j] = Math.min(
dp[i - 1][j] + 1, // 删除
dp[i][j - 1] + 1, // 插入
dp[i - 1][j - 1] + cost, // 替换
);
// 检查相邻字符交换
if (i > 1 && j > 1 && s1[i - 1] === s2[j - 2] && s1[i - 2] === s2[j - 1]) {
dp[i][j] = Math.min(dp[i][j], dp[i - 2][j - 2] + cost);
}
}
}
return dp[m][n];
}7. 实现要点
- KMP 算法通过前缀表避免重复比较,时间复杂度 O(n+m)。
- Boyer-Moore 算法使用坏字符和好后缀规则,平均性能优秀。
- Rabin-Karp 使用滚动哈希进行快速匹配。
- Aho-Corasick 支持多模式匹配,适合关键词过滤。
- 最长公共子串/子序列使用动态规划解决。
- 编辑距离度量字符串相似性,有多种变体。
算法字符串JavaScript