awkrs 0.4.14

Awk implementation in Rust with broad CLI compatibility, parallel records, and experimental Cranelift JIT
Documentation
# Huffman coding — build the prefix-code tree from character frequencies in
# the input text, emit each character's code, then encode + round-trip decode.
# Input: a single line of text.
# Output:
#   the code table (char freq code), one row per distinct char, sorted by char;
#   ENC: <bitstring>
#   DEC: <recovered text>
#   COMPRESSION: <orig_bits>/<enc_bits>
#
# Tie-break for the priority queue: (freq asc, then lex-min representative
# char asc) so output is deterministic regardless of build order.
# Tree stored as parallel arrays:
#   ch[i]   leaf char (empty for internal nodes)
#   fr[i]   frequency
#   lo[i]   leftmost char in subtree (tie-break)
#   lc[i]   left child id
#   rc[i]   right child id

function extract_min(   best, key, i) {
  best = 0; key = ""
  for (i in active) {
    k = sprintf("%012d %s %d", fr[i], lo[i], i)
    if (best == 0 || k < key) { best = i; key = k }
  }
  delete active[best]
  return best
}

function assign_codes(node, prefix) {
  if (lc[node] == 0 && rc[node] == 0) {
    code[ch[node]] = (prefix == "" ? "0" : prefix)   # single-char text edge case
    return
  }
  if (lc[node] != 0) assign_codes(lc[node], prefix "0")
  if (rc[node] != 0) assign_codes(rc[node], prefix "1")
}

NR == 1 {
  text = $0
  for (i = 1; i <= length(text); i++) {
    c = substr(text, i, 1)
    freq[c]++
  }

  nn = 0
  for (c in freq) {
    nn++
    ch[nn] = c; fr[nn] = freq[c]; lo[nn] = c
    lc[nn] = 0; rc[nn] = 0
    active[nn] = 1
  }

  while (1) {
    cnt = 0
    for (i in active) cnt++
    if (cnt <= 1) break
    a = extract_min()
    b = extract_min()
    nn++
    ch[nn] = ""; fr[nn] = fr[a] + fr[b]
    lo[nn] = (lo[a] < lo[b]) ? lo[a] : lo[b]
    lc[nn] = a; rc[nn] = b
    active[nn] = 1
  }

  for (i in active) root = i
  assign_codes(root, "")

  # Emit code table sorted by char.
  PROCINFO["sorted_in"] = "@ind_str_asc"
  for (c in freq) printf "char=%s  freq=%d  code=%s\n", c, freq[c], code[c]

  # Encode.
  enc = ""
  for (i = 1; i <= length(text); i++) enc = enc code[substr(text, i, 1)]
  printf "ENC: %s\n", enc

  # Decode by walking the tree.
  dec = ""; cur = root
  for (i = 1; i <= length(enc); i++) {
    bit = substr(enc, i, 1)
    cur = (bit == "0") ? lc[cur] : rc[cur]
    if (lc[cur] == 0 && rc[cur] == 0) {
      dec = dec ch[cur]
      cur = root
    }
  }
  printf "DEC: %s\n", dec

  printf "COMPRESSION: %d/%d\n", length(text) * 8, length(enc)
  exit 0
}