SorryToPerson logo
返回
算法2026-05-13·10 分钟

算法知识库:压缩算法实现

JavaScript/TypeScript 实现经典数据压缩算法,如霍夫曼编码、LZW压缩、游程编码等。

压缩算法实现

1. 霍夫曼编码 (Huffman Coding)

ts
class HuffmanNode {
  char: string | null;
  frequency: number;
  left: HuffmanNode | null;
  right: HuffmanNode | null;

  constructor(char: string | null, frequency: number) {
    this.char = char;
    this.frequency = frequency;
    this.left = null;
    this.right = null;
  }
}

class HuffmanCoding {
  private root: HuffmanNode | null = null;
  private codes: Map<string, string> = new Map();

  // 构建霍夫曼树
  buildTree(text: string): void {
    // 计算字符频率
    const frequencyMap = new Map<string, number>();
    for (const char of text) {
      frequencyMap.set(char, (frequencyMap.get(char) || 0) + 1);
    }

    // 创建叶节点
    const nodes: HuffmanNode[] = [];
    for (const [char, freq] of frequencyMap) {
      nodes.push(new HuffmanNode(char, freq));
    }

    // 构建树
    while (nodes.length > 1) {
      // 排序节点
      nodes.sort((a, b) => a.frequency - b.frequency);

      // 取出两个最小频率的节点
      const left = nodes.shift()!;
      const right = nodes.shift()!;

      // 创建父节点
      const parent = new HuffmanNode(null, left.frequency + right.frequency);
      parent.left = left;
      parent.right = right;

      nodes.push(parent);
    }

    this.root = nodes[0];
    this.buildCodes(this.root, '');
  }

  // 生成编码表
  private buildCodes(node: HuffmanNode | null, code: string): void {
    if (!node) return;

    if (node.char !== null) {
      this.codes.set(node.char, code);
      return;
    }

    this.buildCodes(node.left, code + '0');
    this.buildCodes(node.right, code + '1');
  }

  // 编码文本
  encode(text: string): string {
    let encoded = '';
    for (const char of text) {
      encoded += this.codes.get(char) || '';
    }
    return encoded;
  }

  // 解码文本
  decode(encodedText: string): string {
    let decoded = '';
    let current = this.root;

    for (const bit of encodedText) {
      if (!current) break;

      if (bit === '0') {
        current = current.left;
      } else {
        current = current.right;
      }

      if (current && current.char !== null) {
        decoded += current.char;
        current = this.root;
      }
    }

    return decoded;
  }

  // 获取编码表
  getCodes(): Map<string, string> {
    return new Map(this.codes);
  }

  // 计算压缩率
  getCompressionRatio(originalText: string, encodedText: string): number {
    const originalBits = originalText.length * 8; // 假设每个字符8位
    const encodedBits = encodedText.length;
    return encodedBits / originalBits;
  }
}

// 使用示例
function huffmanExample() {
  const text = 'this is an example for huffman encoding';
  const huffman = new HuffmanCoding();

  huffman.buildTree(text);
  const encoded = huffman.encode(text);
  const decoded = huffman.decode(encoded);

  console.log('Original:', text);
  console.log('Encoded:', encoded);
  console.log('Decoded:', decoded);
  console.log('Codes:', huffman.getCodes());
  console.log('Compression ratio:', huffman.getCompressionRatio(text, encoded));
}

2. LZW 压缩算法

ts
class LZWCompression {
  private dictionary: Map<string, number> = new Map();
  private reverseDictionary: Map<number, string> = new Map();
  private nextCode: number;

  constructor() {
    this.initializeDictionary();
  }

  private initializeDictionary(): void {
    this.dictionary.clear();
    this.reverseDictionary.clear();
    this.nextCode = 0;

    // 初始化ASCII字符
    for (let i = 0; i < 256; i++) {
      const char = String.fromCharCode(i);
      this.dictionary.set(char, this.nextCode);
      this.reverseDictionary.set(this.nextCode, char);
      this.nextCode++;
    }
  }

  // 压缩
  compress(input: string): number[] {
    this.initializeDictionary();
    const result: number[] = [];
    let current = '';

    for (const char of input) {
      const next = current + char;

      if (this.dictionary.has(next)) {
        current = next;
      } else {
        // 输出当前字符串的编码
        result.push(this.dictionary.get(current)!);

        // 将新字符串加入字典
        this.dictionary.set(next, this.nextCode);
        this.reverseDictionary.set(this.nextCode, next);
        this.nextCode++;

        current = char;
      }
    }

    // 输出最后的字符串
    if (current.length > 0) {
      result.push(this.dictionary.get(current)!);
    }

    return result;
  }

  // 解压
  decompress(compressed: number[]): string {
    this.initializeDictionary();
    let result = '';
    let previousCode = compressed[0];
    let previousString = this.reverseDictionary.get(previousCode)!;
    result += previousString;

    for (let i = 1; i < compressed.length; i++) {
      const currentCode = compressed[i];
      let currentString: string;

      if (this.reverseDictionary.has(currentCode)) {
        currentString = this.reverseDictionary.get(currentCode)!;
      } else if (currentCode === this.nextCode) {
        // 特殊情况:当前编码等于下一个可用编码
        currentString = previousString + previousString[0];
      } else {
        throw new Error('Invalid compressed data');
      }

      result += currentString;

      // 添加新字符串到字典
      const newString = previousString + currentString[0];
      this.dictionary.set(newString, this.nextCode);
      this.reverseDictionary.set(this.nextCode, newString);
      this.nextCode++;

      previousString = currentString;
    }

    return result;
  }

  // 将压缩结果转换为字符串(用于存储)
  compressToString(input: string): string {
    const compressed = this.compress(input);
    return compressed.map((code) => String.fromCharCode(code)).join('');
  }

  // 从字符串解压
  decompressFromString(compressedStr: string): string {
    const compressed = compressedStr.split('').map((char) => char.charCodeAt(0));
    return this.decompress(compressed);
  }
}

// 使用示例
function lzwExample() {
  const text = 'TOBEORNOTTOBEORTOBEORNOT';
  const lzw = new LZWCompression();

  const compressed = lzw.compress(text);
  const decompressed = lzw.decompress(compressed);

  console.log('Original:', text);
  console.log('Compressed:', compressed);
  console.log('Decompressed:', decompressed);
  console.log('Compression ratio:', compressed.length / text.length);
}

3. 游程编码 (Run-Length Encoding)

ts
// 基本游程编码
function runLengthEncode(input: string): string {
  if (input.length === 0) return '';

  let result = '';
  let count = 1;
  let currentChar = input[0];

  for (let i = 1; i < input.length; i++) {
    if (input[i] === currentChar) {
      count++;
    } else {
      result += count.toString() + currentChar;
      currentChar = input[i];
      count = 1;
    }
  }

  // 处理最后一个字符
  result += count.toString() + currentChar;

  return result;
}

// 游程解码
function runLengthDecode(encoded: string): string {
  let result = '';
  let i = 0;

  while (i < encoded.length) {
    // 解析计数
    let countStr = '';
    while (i < encoded.length && /\d/.test(encoded[i])) {
      countStr += encoded[i];
      i++;
    }

    const count = parseInt(countStr);
    if (i < encoded.length) {
      const char = encoded[i];
      result += char.repeat(count);
      i++;
    }
  }

  return result;
}

// 改进的游程编码(处理数字字符)
function runLengthEncodeImproved(input: string): string {
  if (input.length === 0) return '';

  let result = '';
  let count = 1;
  let currentChar = input[0];

  for (let i = 1; i < input.length; i++) {
    if (input[i] === currentChar && count < 255) {
      // 限制计数避免溢出
      count++;
    } else {
      result += String.fromCharCode(count) + currentChar;
      currentChar = input[i];
      count = 1;
    }
  }

  // 处理最后一个字符
  result += String.fromCharCode(count) + currentChar;

  return result;
}

function runLengthDecodeImproved(encoded: string): string {
  let result = '';
  for (let i = 0; i < encoded.length; i += 2) {
    const count = encoded.charCodeAt(i);
    const char = encoded[i + 1];
    result += char.repeat(count);
  }
  return result;
}

// 游程编码变体:只编码重复字符
function runLengthEncodeSelective(input: string, minRepeat: number = 3): string {
  let result = '';
  let i = 0;

  while (i < input.length) {
    let count = 1;
    let j = i + 1;

    // 查找连续重复字符
    while (j < input.length && input[j] === input[i]) {
      count++;
      j++;
    }

    if (count >= minRepeat) {
      // 编码重复序列
      result += input[i] + count.toString() + input[i];
      i = j;
    } else {
      // 不编码,直接复制
      result += input[i];
      i++;
    }
  }

  return result;
}

function runLengthDecodeSelective(encoded: string): string {
  let result = '';
  let i = 0;

  while (i < encoded.length) {
    const char = encoded[i];
    i++;

    // 检查是否是编码序列
    if (i + 1 < encoded.length && /\d/.test(encoded[i]) && encoded[i + 1] === char) {
      const count = parseInt(encoded[i]);
      result += char.repeat(count);
      i += 2;
    } else {
      result += char;
    }
  }

  return result;
}

4. LZ77 压缩算法

ts
interface LZ77Token {
  offset: number;
  length: number;
  nextChar: string;
}

class LZ77Compression {
  private windowSize: number;
  private bufferSize: number;

  constructor(windowSize: number = 4096, bufferSize: number = 16) {
    this.windowSize = windowSize;
    this.bufferSize = bufferSize;
  }

  // 压缩
  compress(input: string): LZ77Token[] {
    const tokens: LZ77Token[] = [];
    let position = 0;

    while (position < input.length) {
      const match = this.findLongestMatch(input, position);

      if (match.length > 0) {
        // 找到匹配,使用匹配信息
        tokens.push({
          offset: match.offset,
          length: match.length,
          nextChar: input[position + match.length] || '',
        });
        position += match.length + 1;
      } else {
        // 没有找到匹配,直接输出字符
        tokens.push({
          offset: 0,
          length: 0,
          nextChar: input[position],
        });
        position++;
      }
    }

    return tokens;
  }

  private findLongestMatch(input: string, position: number): { offset: number; length: number } {
    let bestMatch = { offset: 0, length: 0 };
    const start = Math.max(0, position - this.windowSize);

    for (let i = start; i < position; i++) {
      let length = 0;
      while (length < this.bufferSize && position + length < input.length && input[i + length] === input[position + length]) {
        length++;
      }

      if (length > bestMatch.length) {
        bestMatch = {
          offset: position - i,
          length: length,
        };
      }
    }

    return bestMatch;
  }

  // 解压
  decompress(tokens: LZ77Token[]): string {
    let result = '';

    for (const token of tokens) {
      if (token.offset === 0 && token.length === 0) {
        // 直接字符
        result += token.nextChar;
      } else {
        // 复制匹配字符串
        const start = result.length - token.offset;
        for (let i = 0; i < token.length; i++) {
          result += result[start + i];
        }
        result += token.nextChar;
      }
    }

    return result;
  }

  // 将压缩结果转换为字符串
  compressToString(input: string): string {
    const tokens = this.compress(input);
    let result = '';

    for (const token of tokens) {
      result += String.fromCharCode(token.offset >> 8);
      result += String.fromCharCode(token.offset & 0xff);
      result += String.fromCharCode(token.length);
      result += token.nextChar;
    }

    return result;
  }

  // 从字符串解压
  decompressFromString(compressed: string): string {
    const tokens: LZ77Token[] = [];
    let i = 0;

    while (i < compressed.length) {
      const offset = (compressed.charCodeAt(i) << 8) | compressed.charCodeAt(i + 1);
      const length = compressed.charCodeAt(i + 2);
      const nextChar = compressed[i + 3];

      tokens.push({ offset, length, nextChar });
      i += 4;
    }

    return this.decompress(tokens);
  }
}

// 使用示例
function lz77Example() {
  const text = 'ABRACADABRA';
  const lz77 = new LZ77Compression(10, 4);

  const compressed = lz77.compress(text);
  const decompressed = lz77.decompress(compressed);

  console.log('Original:', text);
  console.log('Compressed tokens:', compressed);
  console.log('Decompressed:', decompressed);
}

5. 实现要点

  • 霍夫曼编码根据字符频率构建最优前缀码。
  • LZW 使用字典动态构建编码表。
  • 游程编码适合有大量连续重复字符的数据。
  • LZ77 通过滑动窗口查找重复字符串。
算法压缩JavaScript