from __future__ import annotations
import os, re, sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
RS_DIR = ROOT / "src" / "ported" / "modules"
C_DIR = ROOT / "src" / "zsh" / "Src" / "Modules"
COMMON = {
"if","for","while","switch","return","else","do","sizeof","static",
"extern","struct","union","enum","typedef","const","volatile","inline",
"register","auto","goto","break","continue","case","default","NULL",
"void","int","char","long","short","unsigned","signed","float","double",
"size_t","ssize_t","FILE","TRUE","FALSE",
"let","mut","fn","loop","match","in","as","ref","pub","self","Self",
"Some","None","Ok","Err","true","false","String","str","i32","u32",
"i64","u64","usize","isize","Vec","HashMap","Option","Result","Box",
"to_string","unwrap","clone","push","len","is_empty","into","from",
"args","arg","name","ret","val","s","p","i","j","k","n","m","x","y",
}
TOK = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")
def toks(line: str) -> set[str]:
return {t for t in TOK.findall(line) if t not in COMMON and len(t) > 2}
RE_C_FN = re.compile(r"^([A-Za-z_][A-Za-z0-9_]*)\s*\(")
def index_c(c_path: Path) -> dict[str, dict]:
out: dict[str, dict] = {}
if not c_path.exists():
return out
lines = c_path.read_text(errors="replace").splitlines()
n = len(lines)
i = 0
while i < n:
line = lines[i]
if line and not line[0].isspace() and not line.startswith(("/", "*", "#")):
m = RE_C_FN.match(line)
if m and m.group(1) not in COMMON:
blk_open = -1
for j in range(i, min(i+6, n)):
if "{" in lines[j] and ";" not in lines[j].split("{",1)[0]:
blk_open = j; break
if blk_open < 0:
i += 1; continue
depth = 0; end = blk_open
for j in range(blk_open, n):
for ch in lines[j]:
if ch == "{": depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
end = j; break
if depth == 0 and j >= blk_open: break
name = m.group(1)
body_text = "\n".join(lines[blk_open+1:end])
body_toks = toks(body_text)
if name not in out:
out[name] = {"start": i+1, "end": end+1, "body_toks": body_toks}
i = end + 1
continue
i += 1
return out
RE_RS_FN_SIG = re.compile(r"^(\s*)(?:pub\S*\s+|async\s+|unsafe\s+|extern\s+\"[^\"]*\"\s+)*fn\s+([A-Za-z_][A-Za-z0-9_]*)\b")
RE_PORT_DOC = re.compile(r"Port(?:s|ed|ing)?\s+of\s+`?([A-Za-z_][A-Za-z0-9_]*)`?", re.IGNORECASE)
RE_C_TAG = re.compile(r"//\s*[Cc]:\s*([A-Za-z_][A-Za-z0-9_]*)\s*\(")
RE_WARN = re.compile(r"WARNING:\s*THIS IS ADHOC IMPLEMENTATION", re.IGNORECASE)
def find_rs_fns(text: str):
lines = text.splitlines()
i = 0
while i < len(lines):
m = RE_RS_FN_SIG.match(lines[i])
if m:
indent, name = m.group(1), m.group(2)
probe_text = " ".join(lines[i:i+5])
semi = probe_text.find(";")
brace = probe_text.find("{")
if brace < 0 or (semi >= 0 and semi < brace):
i += 1
continue
j = i; depth = 0; opened = False; sig_end = i
while j < len(lines):
line = lines[j]
for ch in line:
if ch == "{":
if not opened: opened = True; sig_end = j
depth += 1
elif ch == "}":
depth -= 1
if opened and depth == 0:
yield (i, sig_end, j, name, indent)
i = j + 1
break
if opened and depth == 0: break
j += 1
else:
return
continue
i += 1
def candidate_c_names(rs_name: str) -> list[str]:
out = [rs_name]
if rs_name.startswith("builtin_"):
rest = rs_name[len("builtin_"):]
out += [f"bin_{rest}", rest]
m = re.match(r"^(bin|builtin)_z([a-z][a-zA-Z0-9_]*)$", rs_name)
if m:
out.append(f"bin_{m.group(2)}")
if rs_name.startswith("z") and len(rs_name) > 1 and rs_name[1].islower():
out.append(rs_name[1:])
if rs_name.startswith("get_"):
out.append(f"get{rs_name[4:]}")
if rs_name.startswith("set_"):
out.append(f"set{rs_name[4:]}")
if rs_name.startswith("do_"):
out.append(rs_name[3:])
seen, result = set(), []
for c in out:
if c not in seen:
seen.add(c); result.append(c)
return result
def lookup_existing_doc(lines: list[str], sig_line: int) -> tuple[str | None, bool]:
name, has_warn = None, False
k = sig_line - 1
while k >= 0:
ls = lines[k].lstrip()
if not (ls.startswith("///") or ls.startswith("//!") or ls.startswith("//")
or ls.startswith("#[") or ls.startswith("/*") or ls.startswith("*")
or ls.startswith("*/")):
break
if RE_WARN.search(lines[k]):
has_warn = True
m = RE_PORT_DOC.search(lines[k])
if m and not name:
name = m.group(1)
m2 = RE_C_TAG.search(lines[k])
if m2 and not name:
name = m2.group(1)
k -= 1
return name, has_warn
def best_body_match(rs_body_toks: set[str], c_idx: dict[str, dict]) -> tuple[str | None, int]:
if not rs_body_toks:
return None, 0
best, score = None, 0
for name, info in c_idx.items():
ct = info["body_toks"]
if not ct: continue
sc = len(rs_body_toks & ct)
if sc > score:
score, best = sc, name
return best, score
def annotate(rs_path: Path, c_path: Path, c_idx: dict[str, dict]) -> dict:
text = rs_path.read_text()
lines = text.splitlines()
c_basename = c_path.relative_to(ROOT).as_posix().replace("src/zsh/Src/", "Src/")
matched = adhoc = skipped = 0
insertions: list[tuple[int, list[str]]] = []
for sig_line, body_open, body_close, rs_name, indent in find_rs_fns(text):
existing_name, has_warn = lookup_existing_doc(lines, sig_line)
if has_warn or existing_name:
skipped += 1
continue
chosen: tuple[str, int] | None = None
for cand in candidate_c_names(rs_name):
if cand in c_idx:
chosen = (cand, c_idx[cand]["start"])
break
if chosen is None and len(c_idx) <= 80:
rs_body = "\n".join(lines[body_open+1:body_close])
rs_body_toks = toks(rs_body)
best, score = best_body_match(rs_body_toks, c_idx)
if best and score >= 4:
chosen = (best, c_idx[best]["start"])
if chosen:
cname, lineno = chosen
doc = [
f"{indent}/// Port of `{cname}()` from `{c_basename}:{lineno}`.",
]
matched += 1
else:
doc = [
f"{indent}/// WARNING: THIS IS ADHOC IMPLEMENTATION AND NOT A FAITHFUL PORT",
f"{indent}/// of any function in `{c_basename}`.",
]
adhoc += 1
ins = sig_line
while ins > 0:
ls = lines[ins-1].lstrip()
if ls.startswith("///") or ls.startswith("//!") or ls.startswith("#["):
ins -= 1
else:
break
insertions.append((ins, doc))
for ins, doc in sorted(insertions, key=lambda x: -x[0]):
for j, d in enumerate(doc):
lines.insert(ins + j, d)
rs_path.write_text("\n".join(lines) + ("\n" if text.endswith("\n") else ""))
return {"matched": matched, "adhoc": adhoc, "skipped": skipped}
def main() -> int:
only = set(sys.argv[1:])
files = sorted(p for p in RS_DIR.glob("*.rs") if p.name != "mod.rs")
if only:
files = [p for p in files if p.stem in only]
totals = {"matched":0,"adhoc":0,"skipped":0}
for rs in files:
c = C_DIR / f"{rs.stem}.c"
c_idx = index_c(c)
r = annotate(rs, c, c_idx)
for k,v in r.items(): totals[k] += v
print(f"{rs.name:<24} matched:{r['matched']:4} adhoc:{r['adhoc']:4} "
f"already-doc:{r['skipped']:4} c-fns:{len(c_idx):4}", file=sys.stderr)
print(f"\nTOTAL matched:{totals['matched']} adhoc:{totals['adhoc']} "
f"already-doc:{totals['skipped']}", file=sys.stderr)
return 0
if __name__ == "__main__":
raise SystemExit(main())