from __future__ import annotations
import re, sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
RS_DIR = ROOT / "src" / "ported" / "modules"
C_DIR = ROOT / "src" / "zsh" / "Src" / "Modules"
C_KW = {
"if","for","while","switch","return","else","do","sizeof","static",
"extern","struct","union","enum","typedef","const","volatile","inline",
"register","auto","goto","break","continue","case","default","NULL",
"void","int","char","long","short","unsigned","signed","float","double",
"size_t","ssize_t","FILE","TRUE","FALSE",
}
RS_KW = {
"if","else","let","mut","fn","return","for","while","loop","match",
"in","as","ref","pub","self","Self","Some","None","Ok","Err","true",
"false","String","str","i32","u32","i64","usize","Vec","HashMap",
"to_string","unwrap","clone",
}
COMMON = C_KW | RS_KW
TOK = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")
def toks(line: str) -> set[str]:
return {t for t in TOK.findall(line) if t not in COMMON and len(t) > 2}
RE_C_FN = re.compile(r"^([A-Za-z_][A-Za-z0-9_]*)\s*\(")
def index_c(c_path: Path) -> dict[str, dict]:
out: dict[str, dict] = {}
if not c_path.exists():
return out
lines = c_path.read_text(errors="replace").splitlines()
n = len(lines)
i = 0
while i < n:
line = lines[i]
if line and not line[0].isspace() and not line.startswith(("/", "*", "#")):
m = RE_C_FN.match(line)
if m and m.group(1) not in C_KW:
blk_open = -1
for j in range(i, min(i+6, n)):
if "{" in lines[j] and ";" not in lines[j].split("{",1)[0]:
blk_open = j; break
if blk_open < 0:
i += 1; continue
depth = 0
end = blk_open
for j in range(blk_open, n):
for ch in lines[j]:
if ch == "{": depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
end = j; break
if depth == 0 and j >= blk_open: break
name = m.group(1)
body = [(k+1, lines[k]) for k in range(blk_open+1, end)]
if name not in out:
out[name] = {"start": i+1, "end": end+1, "body": body}
i = end + 1
continue
i += 1
return out
RE_RS_FN_SIG = re.compile(r"^(\s*)(?:pub(?:\([^)]*\))?\s+)?(?:async\s+)?(?:unsafe\s+)?(?:extern\s+\"[^\"]+\"\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\b")
RE_PORT_DOC = re.compile(r"Port(?:s|ed|ing)?\s+of\s+`?([A-Za-z_][A-Za-z0-9_]*)`?", re.IGNORECASE)
def candidate_c_names(rs_name: str) -> list[str]:
out = [rs_name]
if rs_name.startswith("builtin_"):
rest = rs_name[len("builtin_"):]
out += [f"bin_{rest}", rest]
m = re.match(r"^(bin|builtin)_z([a-z][a-zA-Z0-9_]*)$", rs_name)
if m:
out.append(f"bin_{m.group(2)}")
if rs_name.startswith("z") and len(rs_name) > 1 and rs_name[1].islower():
out.append(rs_name[1:])
return out
def find_rs_fns(text: str) -> list[tuple[int, int, str, str]]:
lines = text.splitlines()
out: list[tuple] = []
i = 0
while i < len(lines):
m = RE_RS_FN_SIG.match(lines[i])
if m:
indent, name = m.group(1), m.group(2)
j = i
depth = 0
opened = False
sig_end = i
while j < len(lines):
line = lines[j]
for k, ch in enumerate(line):
if ch == "{":
if not opened: opened = True; sig_end = j
depth += 1
elif ch == "}":
depth -= 1
if opened and depth == 0:
out.append((i, sig_end, j, name, indent))
break
if opened and depth == 0:
i = j + 1
break
j += 1
else:
break
continue
i += 1
return out
PAD_COL = 80
def add_tag(line: str, lineno: int) -> str:
stripped = line.strip()
if not stripped: return line
if stripped.startswith(("//", "/*", "*", "*/")): return line
if "// c:" in line or "//c:" in line: return line
if stripped.startswith(("///", "//!")): return line
base = line.rstrip()
if len(base) < PAD_COL:
base = base + " " * (PAD_COL - len(base))
else:
base = base + " "
return f"{base}// c:{lineno}"
def best_match_lineno(rs_line: str, body: list[tuple[int,str]], default: int) -> int:
rs_t = toks(rs_line)
if not rs_t:
return default
best, best_score = default, 0
for ln, text in body:
ct = toks(text)
if not ct: continue
score = len(rs_t & ct)
if score > best_score:
best_score, best = score, ln
return best
def annotate_file(rs_path: Path, c_path: Path) -> tuple[int, int, int]:
text = rs_path.read_text()
c_idx = index_c(c_path)
fns = find_rs_fns(text)
if not fns:
return (0, 0, 0)
lines = text.splitlines()
fn_count = 0
line_tags = 0
fn_skip = 0
for sig_line, body_open, body_close, name, indent in reversed(fns):
c_fn = c_idx.get(name)
if not c_fn:
fn_skip += 1
continue
c_start = c_fn["start"]
c_body = c_fn["body"]
for k in range(body_open + 1, body_close):
lineno = best_match_lineno(lines[k], c_body, c_start)
new = add_tag(lines[k], lineno)
if new != lines[k]:
lines[k] = new
line_tags += 1
ins = sig_line
while ins > 0:
prev = lines[ins - 1].lstrip()
if prev.startswith("///") or prev.startswith("//!") or prev.startswith("#["):
ins -= 1
continue
break
already = False
for k in range(ins, sig_line):
if f"Port of {name}" in lines[k] or f"Port of `{name}`" in lines[k]:
already = True; break
if f"// c:{c_start}" in lines[k] or f"c:{c_start}" in lines[k]:
already = True; break
if not already:
cfile_rel = c_path.relative_to(ROOT).as_posix().replace("src/zsh/Src/", "Src/")
doc = f"{indent}/// Port of `{name}()` from `{cfile_rel}:{c_start}`."
lines.insert(ins, doc)
fn_count += 1
rs_path.write_text("\n".join(lines) + ("\n" if text.endswith("\n") else ""))
return (fn_count, line_tags, fn_skip)
def main() -> int:
only = set(sys.argv[1:])
files = sorted(p for p in RS_DIR.glob("*.rs") if p.name != "mod.rs")
if only:
files = [p for p in files if p.stem in only]
total_fns = total_tags = total_skip = 0
for rs in files:
c = C_DIR / f"{rs.stem}.c"
a, b, s = annotate_file(rs, c)
total_fns += a; total_tags += b; total_skip += s
print(f"{rs.name:<24} fns:{a:4} tags:{b:6} unmatched:{s:4}", file=sys.stderr)
print(f"\nTOTAL fns:{total_fns} tags:{total_tags} unmatched:{total_skip}", file=sys.stderr)
return 0
if __name__ == "__main__":
raise SystemExit(main())