awkrs 0.4.13

Awk implementation in Rust with broad CLI compatibility, parallel records, and experimental Cranelift JIT
Documentation
# Z-function in O(n): z[i] = length of the longest substring starting at i
# that matches a prefix of s. Used for linear-time pattern matching.
#
# Input lines:  "<string>"
# Output:       "<string>: z=[z1,z2,...]"
#               If the string contains '#' followed by "PAT:" then split into
#               pattern + text and report match positions:
#                 "MATCHES: pos1 pos2 ..."

function z_array(s,   n, i, l, r, k) {
  n = length(s)
  delete z
  z[1] = n
  l = 0; r = 0
  for (i = 2; i <= n; i++) {
    if (i <= r) z[i] = (r - i + 1 < z[i - l + 1]) ? r - i + 1 : z[i - l + 1]
    else        z[i] = 0
    while (i + z[i] <= n && substr(s, z[i] + 1, 1) == substr(s, i + z[i], 1)) z[i]++
    if (i + z[i] - 1 > r) { l = i; r = i + z[i] - 1 }
  }
  return n
}

function show_z(s,   n, i, out, sep) {
  n = z_array(s)
  out = ""; sep = ""
  for (i = 1; i <= n; i++) { out = out sep z[i]; sep = "," }
  printf "%s: z=[%s]\n", s, out
}

function find_matches(pat, txt,   sep_pos, n, m, s, i, hits, sep) {
  m = length(pat)
  s = pat "#" txt
  n = z_array(s)
  hits = ""; sep = ""
  for (i = m + 2; i <= n; i++) {
    if (z[i] >= m) {
      hits = hits sep (i - m - 1)
      sep = " "
    }
  }
  return hits == "" ? "NONE" : hits
}

NF == 0 { next }
{
  line = $0
  if (index(line, " in ")) {
    pat = $1
    sub(/^[^ ]+ in /, "", line)
    txt = line
    show_z(pat)
    printf "MATCHES of %s in %s: %s\n", pat, txt, find_matches(pat, txt)
    next
  }
  show_z(line)
}