压缩算法实现

1. 游程编码 (Run-Length Encoding)

function runLengthEncode(str: string): string {
  if (!str) return '';

  let result = '';
  let count = 1;

  for (let i = 1; i < str.length; i += 1) {
    if (str[i] === str[i - 1]) {
      count += 1;
    } else {
      result += count + str[i - 1];
      count = 1;
    }
  }

  result += count + str[str.length - 1];
  return result;
}

function runLengthDecode(encoded: string): string {
  let result = '';
  let i = 0;

  while (i < encoded.length) {
    let count = 0;
    while (i < encoded.length && /\d/.test(encoded[i])) {
      count = count * 10 + parseInt(encoded[i]);
      i += 1;
    }
    const char = encoded[i];
    result += char.repeat(count);
    i += 1;
  }

  return result;
}

2. 霍夫曼编码

class HuffmanNode {
  char?: string;
  freq: number;
  left?: HuffmanNode;
  right?: HuffmanNode;

  constructor(char?: string, freq: number = 0) {
    this.char = char;
    this.freq = freq;
  }
}

function buildHuffmanTree(text: string): HuffmanNode {
  const freqMap = new Map<string, number>();
  for (const char of text) {
    freqMap.set(char, (freqMap.get(char) || 0) + 1);
  }

  const priorityQueue: HuffmanNode[] = [];
  for (const [char, freq] of freqMap) {
    priorityQueue.push(new HuffmanNode(char, freq));
  }

  priorityQueue.sort((a, b) => a.freq - b.freq);

  while (priorityQueue.length > 1) {
    const left = priorityQueue.shift()!;
    const right = priorityQueue.shift()!;
    const merged = new HuffmanNode(undefined, left.freq + right.freq);
    merged.left = left;
    merged.right = right;
    priorityQueue.push(merged);
    priorityQueue.sort((a, b) => a.freq - b.freq);
  }

  return priorityQueue[0];
}

function buildHuffmanCodes(node: HuffmanNode, currentCode: string, codes: Map<string, string>): void {
  if (!node) return;

  if (node.char) {
    codes.set(node.char, currentCode);
    return;
  }

  buildHuffmanCodes(node.left!, currentCode + '0', codes);
  buildHuffmanCodes(node.right!, currentCode + '1', codes);
}

function huffmanEncode(text: string): { encoded: string; codes: Map<string, string> } {
  if (!text) return { encoded: '', codes: new Map() };

  const root = buildHuffmanTree(text);
  const codes = new Map<string, string>();
  buildHuffmanCodes(root, '', codes);

  let encoded = '';
  for (const char of text) {
    encoded += codes.get(char);
  }

  return { encoded, codes };
}

function huffmanDecode(encoded: string, codes: Map<string, string>): string {
  const reverseCodes = new Map<string, string>();
  for (const [char, code] of codes) {
    reverseCodes.set(code, char);
  }

  let result = '';
  let currentCode = '';

  for (const bit of encoded) {
    currentCode += bit;
    if (reverseCodes.has(currentCode)) {
      result += reverseCodes.get(currentCode);
      currentCode = '';
    }
  }

  return result;
}

3. LZW 压缩

function lzwCompress(input: string): number[] {
  const dictionary = new Map<string, number>();
  for (let i = 0; i < 256; i += 1) {
    dictionary.set(String.fromCharCode(i), i);
  }

  let result: number[] = [];
  let current = '';
  let nextCode = 256;

  for (const char of input) {
    const combined = current + char;
    if (dictionary.has(combined)) {
      current = combined;
    } else {
      result.push(dictionary.get(current)!);
      dictionary.set(combined, nextCode);
      nextCode += 1;
      current = char;
    }
  }

  if (current) {
    result.push(dictionary.get(current)!);
  }

  return result;
}

function lzwDecompress(compressed: number[]): string {
  const dictionary = new Map<number, string>();
  for (let i = 0; i < 256; i += 1) {
    dictionary.set(i, String.fromCharCode(i));
  }

  let result = '';
  let previous = String.fromCharCode(compressed[0]);
  result += previous;
  let nextCode = 256;

  for (let i = 1; i < compressed.length; i += 1) {
    let current: string;
    if (dictionary.has(compressed[i])) {
      current = dictionary.get(compressed[i])!;
    } else if (compressed[i] === nextCode) {
      current = previous + previous[0];
    } else {
      throw new Error('Invalid compressed data');
    }

    result += current;
    dictionary.set(nextCode, previous + current[0]);
    nextCode += 1;
    previous = current;
  }

  return result;
}

4. 实现要点

游程编码适合重复字符多的数据。
霍夫曼编码基于字符频率构建最优前缀码。
LZW 使用字典动态构建编码。
压缩效果取决于数据特征。

算法知识库：压缩算法实现

压缩算法实现

1. 游程编码 (Run-Length Encoding)

2. 霍夫曼编码

3. LZW 压缩

4. 实现要点