算法2026-05-02·11 分钟
算法知识库:生物信息学算法实现
JavaScript/TypeScript 实现生物信息学基础算法,如序列比对、基因组组装等。
生物信息学算法实现
1. Needleman-Wunsch 全局序列比对
ts
function needlemanWunsch(seq1: string, seq2: string, match: number = 1, mismatch: number = -1, gap: number = -2): { score: number; alignment: [string, string] } {
const m = seq1.length;
const n = seq2.length;
// 初始化得分矩阵
const score = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
// 初始化第一行和第一列
for (let i = 1; i <= m; i += 1) {
score[i][0] = score[i - 1][0] + gap;
}
for (let j = 1; j <= n; j += 1) {
score[0][j] = score[0][j - 1] + gap;
}
// 填充得分矩阵
for (let i = 1; i <= m; i += 1) {
for (let j = 1; j <= n; j += 1) {
const matchScore = seq1[i - 1] === seq2[j - 1] ? match : mismatch;
const diagonal = score[i - 1][j - 1] + matchScore;
const up = score[i - 1][j] + gap;
const left = score[i][j - 1] + gap;
score[i][j] = Math.max(diagonal, up, left);
}
}
// 回溯构造比对
const alignment1: string[] = [];
const alignment2: string[] = [];
let i = m;
let j = n;
while (i > 0 || j > 0) {
if (i > 0 && j > 0 && score[i][j] === score[i - 1][j - 1] + (seq1[i - 1] === seq2[j - 1] ? match : mismatch)) {
alignment1.unshift(seq1[i - 1]);
alignment2.unshift(seq2[j - 1]);
i -= 1;
j -= 1;
} else if (i > 0 && score[i][j] === score[i - 1][j] + gap) {
alignment1.unshift(seq1[i - 1]);
alignment2.unshift('-');
i -= 1;
} else {
alignment1.unshift('-');
alignment2.unshift(seq2[j - 1]);
j -= 1;
}
}
return {
score: score[m][n],
alignment: [alignment1.join(''), alignment2.join('')],
};
}2. Smith-Waterman 局部序列比对
ts
function smithWaterman(seq1: string, seq2: string, match: number = 1, mismatch: number = -1, gap: number = -2): { score: number; alignment: [string, string]; startPositions: [number, number] } {
const m = seq1.length;
const n = seq2.length;
const score = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
let maxScore = 0;
let maxI = 0;
let maxJ = 0;
// 填充得分矩阵
for (let i = 1; i <= m; i += 1) {
for (let j = 1; j <= n; j += 1) {
const matchScore = seq1[i - 1] === seq2[j - 1] ? match : mismatch;
const diagonal = score[i - 1][j - 1] + matchScore;
const up = score[i - 1][j] + gap;
const left = score[i][j - 1] + gap;
score[i][j] = Math.max(0, diagonal, up, left);
if (score[i][j] > maxScore) {
maxScore = score[i][j];
maxI = i;
maxJ = j;
}
}
}
// 从最大得分位置回溯
const alignment1: string[] = [];
const alignment2: string[] = [];
let i = maxI;
let j = maxJ;
while (i > 0 && j > 0 && score[i][j] > 0) {
if (score[i][j] === score[i - 1][j - 1] + (seq1[i - 1] === seq2[j - 1] ? match : mismatch)) {
alignment1.unshift(seq1[i - 1]);
alignment2.unshift(seq2[j - 1]);
i -= 1;
j -= 1;
} else if (score[i][j] === score[i - 1][j] + gap) {
alignment1.unshift(seq1[i - 1]);
alignment2.unshift('-');
i -= 1;
} else if (score[i][j] === score[i][j - 1] + gap) {
alignment1.unshift('-');
alignment2.unshift(seq2[j - 1]);
j -= 1;
} else {
break;
}
}
return {
score: maxScore,
alignment: [alignment1.join(''), alignment2.join('')],
startPositions: [i, j],
};
}3. KMP 字符串匹配 (用于序列搜索)
ts
function computePrefixFunction(pattern: string): number[] {
const m = pattern.length;
const pi = new Array(m).fill(0);
let k = 0;
for (let i = 1; i < m; i += 1) {
while (k > 0 && pattern[k] !== pattern[i]) {
k = pi[k - 1];
}
if (pattern[k] === pattern[i]) {
k += 1;
}
pi[i] = k;
}
return pi;
}
function kmpSearch(text: string, pattern: string): number[] {
const n = text.length;
const m = pattern.length;
const pi = computePrefixFunction(pattern);
const matches: number[] = [];
let q = 0; // 匹配的字符数
for (let i = 0; i < n; i += 1) {
while (q > 0 && pattern[q] !== text[i]) {
q = pi[q - 1];
}
if (pattern[q] === text[i]) {
q += 1;
}
if (q === m) {
matches.push(i - m + 1);
q = pi[q - 1];
}
}
return matches;
}4. 动态规划序列比对 (简化版)
ts
function sequenceAlignment(seq1: string, seq2: string): { score: number; alignedSeq1: string; alignedSeq2: string } {
const m = seq1.length;
const n = seq2.length;
// 得分矩阵
const dp = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
// 初始化
for (let i = 1; i <= m; i += 1) {
dp[i][0] = dp[i - 1][0] - 1; // 缺口惩罚
}
for (let j = 1; j <= n; j += 1) {
dp[0][j] = dp[0][j - 1] - 1;
}
// 填充 DP 表
for (let i = 1; i <= m; i += 1) {
for (let j = 1; j <= n; j += 1) {
const match = seq1[i - 1] === seq2[j - 1] ? 1 : -1;
const diagonal = dp[i - 1][j - 1] + match;
const up = dp[i - 1][j] - 1;
const left = dp[i][j - 1] - 1;
dp[i][j] = Math.max(diagonal, up, left);
}
}
// 回溯构造比对
let alignedSeq1 = '';
let alignedSeq2 = '';
let i = m;
let j = n;
while (i > 0 || j > 0) {
if (i > 0 && j > 0 && dp[i][j] === dp[i - 1][j - 1] + (seq1[i - 1] === seq2[j - 1] ? 1 : -1)) {
alignedSeq1 = seq1[i - 1] + alignedSeq1;
alignedSeq2 = seq2[j - 1] + alignedSeq2;
i -= 1;
j -= 1;
} else if (i > 0 && dp[i][j] === dp[i - 1][j] - 1) {
alignedSeq1 = seq1[i - 1] + alignedSeq1;
alignedSeq2 = '-' + alignedSeq2;
i -= 1;
} else {
alignedSeq1 = '-' + alignedSeq1;
alignedSeq2 = seq2[j - 1] + alignedSeq2;
j -= 1;
}
}
return {
score: dp[m][n],
alignedSeq1,
alignedSeq2,
};
}5. 基因组组装 (重叠-布局-共识简化版)
ts
class GenomeAssembler {
private reads: string[];
constructor(reads: string[]) {
this.reads = reads;
}
assemble(): string {
if (this.reads.length === 0) return '';
let currentContig = this.reads[0];
const remainingReads = [...this.reads.slice(1)];
while (remainingReads.length > 0) {
let bestOverlap = -1;
let bestReadIndex = -1;
let bestPosition = -1; // 0: 前缀, 1: 后缀
for (let i = 0; i < remainingReads.length; i += 1) {
const read = remainingReads[i];
// 检查前缀重叠
const prefixOverlap = this.findOverlap(currentContig, read, true);
if (prefixOverlap > bestOverlap) {
bestOverlap = prefixOverlap;
bestReadIndex = i;
bestPosition = 0;
}
// 检查后缀重叠
const suffixOverlap = this.findOverlap(currentContig, read, false);
if (suffixOverlap > bestOverlap) {
bestOverlap = suffixOverlap;
bestReadIndex = i;
bestPosition = 1;
}
}
if (bestOverlap < 10) break; // 最小重叠长度
const bestRead = remainingReads[bestReadIndex];
if (bestPosition === 0) {
// 添加到开头
currentContig = bestRead + currentContig.slice(bestOverlap);
} else {
// 添加到结尾
currentContig = currentContig.slice(0, currentContig.length - bestOverlap) + bestRead;
}
remainingReads.splice(bestReadIndex, 1);
}
return currentContig;
}
private findOverlap(seq1: string, seq2: string, seq1IsPrefix: boolean): number {
const [shorter, longer] = seq1IsPrefix ? [seq2, seq1] : [seq1, seq2];
const maxOverlap = Math.min(shorter.length, longer.length);
for (let overlap = maxOverlap; overlap >= 1; overlap -= 1) {
const shorterSuffix = shorter.slice(shorter.length - overlap);
const longerPrefix = longer.slice(0, overlap);
if (shorterSuffix === longerPrefix) {
return overlap;
}
}
return 0;
}
}6. 实现要点
- Needleman-Wunsch 用于全局比对。
- Smith-Waterman 用于局部比对。
- KMP 提供高效的序列搜索。
- 基因组组装处理重叠片段。
算法生物信息学JavaScript