算法2026-05-13·10 分钟
算法知识库:压缩算法实现
JavaScript/TypeScript 实现经典数据压缩算法,如霍夫曼编码、LZW压缩、游程编码等。
压缩算法实现
1. 霍夫曼编码 (Huffman Coding)
ts
class HuffmanNode {
char: string | null;
frequency: number;
left: HuffmanNode | null;
right: HuffmanNode | null;
constructor(char: string | null, frequency: number) {
this.char = char;
this.frequency = frequency;
this.left = null;
this.right = null;
}
}
class HuffmanCoding {
private root: HuffmanNode | null = null;
private codes: Map<string, string> = new Map();
// 构建霍夫曼树
buildTree(text: string): void {
// 计算字符频率
const frequencyMap = new Map<string, number>();
for (const char of text) {
frequencyMap.set(char, (frequencyMap.get(char) || 0) + 1);
}
// 创建叶节点
const nodes: HuffmanNode[] = [];
for (const [char, freq] of frequencyMap) {
nodes.push(new HuffmanNode(char, freq));
}
// 构建树
while (nodes.length > 1) {
// 排序节点
nodes.sort((a, b) => a.frequency - b.frequency);
// 取出两个最小频率的节点
const left = nodes.shift()!;
const right = nodes.shift()!;
// 创建父节点
const parent = new HuffmanNode(null, left.frequency + right.frequency);
parent.left = left;
parent.right = right;
nodes.push(parent);
}
this.root = nodes[0];
this.buildCodes(this.root, '');
}
// 生成编码表
private buildCodes(node: HuffmanNode | null, code: string): void {
if (!node) return;
if (node.char !== null) {
this.codes.set(node.char, code);
return;
}
this.buildCodes(node.left, code + '0');
this.buildCodes(node.right, code + '1');
}
// 编码文本
encode(text: string): string {
let encoded = '';
for (const char of text) {
encoded += this.codes.get(char) || '';
}
return encoded;
}
// 解码文本
decode(encodedText: string): string {
let decoded = '';
let current = this.root;
for (const bit of encodedText) {
if (!current) break;
if (bit === '0') {
current = current.left;
} else {
current = current.right;
}
if (current && current.char !== null) {
decoded += current.char;
current = this.root;
}
}
return decoded;
}
// 获取编码表
getCodes(): Map<string, string> {
return new Map(this.codes);
}
// 计算压缩率
getCompressionRatio(originalText: string, encodedText: string): number {
const originalBits = originalText.length * 8; // 假设每个字符8位
const encodedBits = encodedText.length;
return encodedBits / originalBits;
}
}
// 使用示例
function huffmanExample() {
const text = 'this is an example for huffman encoding';
const huffman = new HuffmanCoding();
huffman.buildTree(text);
const encoded = huffman.encode(text);
const decoded = huffman.decode(encoded);
console.log('Original:', text);
console.log('Encoded:', encoded);
console.log('Decoded:', decoded);
console.log('Codes:', huffman.getCodes());
console.log('Compression ratio:', huffman.getCompressionRatio(text, encoded));
}2. LZW 压缩算法
ts
class LZWCompression {
private dictionary: Map<string, number> = new Map();
private reverseDictionary: Map<number, string> = new Map();
private nextCode: number;
constructor() {
this.initializeDictionary();
}
private initializeDictionary(): void {
this.dictionary.clear();
this.reverseDictionary.clear();
this.nextCode = 0;
// 初始化ASCII字符
for (let i = 0; i < 256; i++) {
const char = String.fromCharCode(i);
this.dictionary.set(char, this.nextCode);
this.reverseDictionary.set(this.nextCode, char);
this.nextCode++;
}
}
// 压缩
compress(input: string): number[] {
this.initializeDictionary();
const result: number[] = [];
let current = '';
for (const char of input) {
const next = current + char;
if (this.dictionary.has(next)) {
current = next;
} else {
// 输出当前字符串的编码
result.push(this.dictionary.get(current)!);
// 将新字符串加入字典
this.dictionary.set(next, this.nextCode);
this.reverseDictionary.set(this.nextCode, next);
this.nextCode++;
current = char;
}
}
// 输出最后的字符串
if (current.length > 0) {
result.push(this.dictionary.get(current)!);
}
return result;
}
// 解压
decompress(compressed: number[]): string {
this.initializeDictionary();
let result = '';
let previousCode = compressed[0];
let previousString = this.reverseDictionary.get(previousCode)!;
result += previousString;
for (let i = 1; i < compressed.length; i++) {
const currentCode = compressed[i];
let currentString: string;
if (this.reverseDictionary.has(currentCode)) {
currentString = this.reverseDictionary.get(currentCode)!;
} else if (currentCode === this.nextCode) {
// 特殊情况:当前编码等于下一个可用编码
currentString = previousString + previousString[0];
} else {
throw new Error('Invalid compressed data');
}
result += currentString;
// 添加新字符串到字典
const newString = previousString + currentString[0];
this.dictionary.set(newString, this.nextCode);
this.reverseDictionary.set(this.nextCode, newString);
this.nextCode++;
previousString = currentString;
}
return result;
}
// 将压缩结果转换为字符串(用于存储)
compressToString(input: string): string {
const compressed = this.compress(input);
return compressed.map((code) => String.fromCharCode(code)).join('');
}
// 从字符串解压
decompressFromString(compressedStr: string): string {
const compressed = compressedStr.split('').map((char) => char.charCodeAt(0));
return this.decompress(compressed);
}
}
// 使用示例
function lzwExample() {
const text = 'TOBEORNOTTOBEORTOBEORNOT';
const lzw = new LZWCompression();
const compressed = lzw.compress(text);
const decompressed = lzw.decompress(compressed);
console.log('Original:', text);
console.log('Compressed:', compressed);
console.log('Decompressed:', decompressed);
console.log('Compression ratio:', compressed.length / text.length);
}3. 游程编码 (Run-Length Encoding)
ts
// 基本游程编码
function runLengthEncode(input: string): string {
if (input.length === 0) return '';
let result = '';
let count = 1;
let currentChar = input[0];
for (let i = 1; i < input.length; i++) {
if (input[i] === currentChar) {
count++;
} else {
result += count.toString() + currentChar;
currentChar = input[i];
count = 1;
}
}
// 处理最后一个字符
result += count.toString() + currentChar;
return result;
}
// 游程解码
function runLengthDecode(encoded: string): string {
let result = '';
let i = 0;
while (i < encoded.length) {
// 解析计数
let countStr = '';
while (i < encoded.length && /\d/.test(encoded[i])) {
countStr += encoded[i];
i++;
}
const count = parseInt(countStr);
if (i < encoded.length) {
const char = encoded[i];
result += char.repeat(count);
i++;
}
}
return result;
}
// 改进的游程编码(处理数字字符)
function runLengthEncodeImproved(input: string): string {
if (input.length === 0) return '';
let result = '';
let count = 1;
let currentChar = input[0];
for (let i = 1; i < input.length; i++) {
if (input[i] === currentChar && count < 255) {
// 限制计数避免溢出
count++;
} else {
result += String.fromCharCode(count) + currentChar;
currentChar = input[i];
count = 1;
}
}
// 处理最后一个字符
result += String.fromCharCode(count) + currentChar;
return result;
}
function runLengthDecodeImproved(encoded: string): string {
let result = '';
for (let i = 0; i < encoded.length; i += 2) {
const count = encoded.charCodeAt(i);
const char = encoded[i + 1];
result += char.repeat(count);
}
return result;
}
// 游程编码变体:只编码重复字符
function runLengthEncodeSelective(input: string, minRepeat: number = 3): string {
let result = '';
let i = 0;
while (i < input.length) {
let count = 1;
let j = i + 1;
// 查找连续重复字符
while (j < input.length && input[j] === input[i]) {
count++;
j++;
}
if (count >= minRepeat) {
// 编码重复序列
result += input[i] + count.toString() + input[i];
i = j;
} else {
// 不编码,直接复制
result += input[i];
i++;
}
}
return result;
}
function runLengthDecodeSelective(encoded: string): string {
let result = '';
let i = 0;
while (i < encoded.length) {
const char = encoded[i];
i++;
// 检查是否是编码序列
if (i + 1 < encoded.length && /\d/.test(encoded[i]) && encoded[i + 1] === char) {
const count = parseInt(encoded[i]);
result += char.repeat(count);
i += 2;
} else {
result += char;
}
}
return result;
}4. LZ77 压缩算法
ts
interface LZ77Token {
offset: number;
length: number;
nextChar: string;
}
class LZ77Compression {
private windowSize: number;
private bufferSize: number;
constructor(windowSize: number = 4096, bufferSize: number = 16) {
this.windowSize = windowSize;
this.bufferSize = bufferSize;
}
// 压缩
compress(input: string): LZ77Token[] {
const tokens: LZ77Token[] = [];
let position = 0;
while (position < input.length) {
const match = this.findLongestMatch(input, position);
if (match.length > 0) {
// 找到匹配,使用匹配信息
tokens.push({
offset: match.offset,
length: match.length,
nextChar: input[position + match.length] || '',
});
position += match.length + 1;
} else {
// 没有找到匹配,直接输出字符
tokens.push({
offset: 0,
length: 0,
nextChar: input[position],
});
position++;
}
}
return tokens;
}
private findLongestMatch(input: string, position: number): { offset: number; length: number } {
let bestMatch = { offset: 0, length: 0 };
const start = Math.max(0, position - this.windowSize);
for (let i = start; i < position; i++) {
let length = 0;
while (length < this.bufferSize && position + length < input.length && input[i + length] === input[position + length]) {
length++;
}
if (length > bestMatch.length) {
bestMatch = {
offset: position - i,
length: length,
};
}
}
return bestMatch;
}
// 解压
decompress(tokens: LZ77Token[]): string {
let result = '';
for (const token of tokens) {
if (token.offset === 0 && token.length === 0) {
// 直接字符
result += token.nextChar;
} else {
// 复制匹配字符串
const start = result.length - token.offset;
for (let i = 0; i < token.length; i++) {
result += result[start + i];
}
result += token.nextChar;
}
}
return result;
}
// 将压缩结果转换为字符串
compressToString(input: string): string {
const tokens = this.compress(input);
let result = '';
for (const token of tokens) {
result += String.fromCharCode(token.offset >> 8);
result += String.fromCharCode(token.offset & 0xff);
result += String.fromCharCode(token.length);
result += token.nextChar;
}
return result;
}
// 从字符串解压
decompressFromString(compressed: string): string {
const tokens: LZ77Token[] = [];
let i = 0;
while (i < compressed.length) {
const offset = (compressed.charCodeAt(i) << 8) | compressed.charCodeAt(i + 1);
const length = compressed.charCodeAt(i + 2);
const nextChar = compressed[i + 3];
tokens.push({ offset, length, nextChar });
i += 4;
}
return this.decompress(tokens);
}
}
// 使用示例
function lz77Example() {
const text = 'ABRACADABRA';
const lz77 = new LZ77Compression(10, 4);
const compressed = lz77.compress(text);
const decompressed = lz77.decompress(compressed);
console.log('Original:', text);
console.log('Compressed tokens:', compressed);
console.log('Decompressed:', decompressed);
}5. 实现要点
- 霍夫曼编码根据字符频率构建最优前缀码。
- LZW 使用字典动态构建编码表。
- 游程编码适合有大量连续重复字符的数据。
- LZ77 通过滑动窗口查找重复字符串。
算法压缩JavaScript