import os
import re
import sys
_SERDE_ENABLED = False
SYNTA_RE = re.compile(r"\bsynta\b", re.IGNORECASE)
NOSTD_RE = re.compile(r"#!\[no_std\]")
SERDE_RE = re.compile(r"\bserde_json\b")
FOREIGN_RE = re.compile(
r"(?:^|\W)use\s+der\s*::"
r"|(?:^|\W)use\s+asn1_rs\s*::"
r"|(?:^|\W)use\s+yasna\b"
r"|(?:^|\W)use\s+asn1\s*::",
re.MULTILINE,
)
MAIN_RE = re.compile(r"\bfn\s+main\s*\(\s*\)")
TOPLEVEL_RE = re.compile(
r"^(?:pub(?:\s*\([^)]*\))?\s+)?" r"(?:async\s+)?(?:unsafe\s+)?" r'(?:extern\s+"[^"]+"\s+)?' r"(?:fn|struct|enum|impl|type|const|static|trait|macro_rules!)\b",
re.MULTILINE,
)
SKIP_ANNOTATIONS = frozenset({"ignore", "compile_fail"})
PREAMBLE = """\
// Auto-generated wrapper for Synta documentation snippet.
#![allow(
unused_imports,
unused_variables,
unused_mut,
dead_code,
unused_must_use,
unreachable_code,
unused_assignments,
)]
use synta::*;
"""
FRAGMENT_VARS = """\
// ── pre-declared identifiers for doc fragment compilation ──
let data: Vec<u8> = vec![0x02, 0x01, 0x2a];
let mut decoder = synta::Decoder::new(data.as_slice(), synta::Encoding::Der);
let mut encoder = synta::Encoder::new(synta::Encoding::Der);
let integer = synta::Integer::from(0i64);
"""
_HARNESS_RETURN = "std::result::Result<(), Box<dyn std::error::Error>>"
def classify(code: str, annotation: str) -> str:
if annotation in SKIP_ANNOTATIONS:
return "skip_annotated"
if not SYNTA_RE.search(code):
return "skip_nonsynta"
if NOSTD_RE.search(code):
return "skip_nostd"
if FOREIGN_RE.search(code):
return "skip_foreign"
if SERDE_RE.search(code) and not _SERDE_ENABLED:
return "skip_serde"
if MAIN_RE.search(code):
return "program"
if TOPLEVEL_RE.search(code):
return "toplevel"
return "fragment"
def wrap_program(code: str) -> str:
renamed = re.sub(r"\bfn\s+main\s*\(\s*\)", "fn _synta_doc_main()", code)
return PREAMBLE + "\n" + renamed + "\n"
def wrap_toplevel(code: str) -> str:
return PREAMBLE + "\n" + code + "\n"
def wrap_fragment(code: str) -> str:
indented = "\n".join(" " + line for line in code.splitlines())
return (
PREAMBLE
+ f"\nfn _synta_doc_sample() -> {_HARNESS_RETURN} {{\n"
+ FRAGMENT_VARS
+ " // ── fragment (nested scope allows re-declaration) ──\n"
+ " {\n"
+ indented
+ "\n"
+ " }\n"
+ " Ok(())\n"
+ "}\n"
)
def wrap(code: str, kind: str) -> str:
if kind == "program":
return wrap_program(code)
if kind == "toplevel":
return wrap_toplevel(code)
return wrap_fragment(code)
def wrap_combined(prev_codes: list[str], cur_code: str, cur_kind: str) -> str:
prefix = PREAMBLE + "\n" + "\n".join(prev_codes) + "\n"
if cur_kind == "fragment":
indented = "\n".join(" " + line for line in cur_code.splitlines())
return (
prefix
+ f"\nfn _synta_doc_sample() -> {_HARNESS_RETURN} {{\n"
+ FRAGMENT_VARS
+ " // ── fragment (nested scope allows re-declaration) ──\n"
+ " {\n"
+ indented
+ "\n"
+ " }\n"
+ " Ok(())\n"
+ "}\n"
)
if cur_kind == "program":
renamed = re.sub(r"\bfn\s+main\s*\(\s*\)", "fn _synta_doc_main()", cur_code)
return prefix + "\n" + renamed + "\n"
return prefix + "\n" + cur_code + "\n"
def main() -> None:
global _SERDE_ENABLED
args = sys.argv[1:]
if not args:
print(
f"Usage: {sys.argv[0]} [--features-serde] <work_dir> [file.md ...]",
file=sys.stderr,
)
sys.exit(1)
if args[0] == "--features-serde":
_SERDE_ENABLED = True
args = args[1:]
if not args:
print(
f"Usage: {sys.argv[0]} [--features-serde] <work_dir> [file.md ...]",
file=sys.stderr,
)
sys.exit(1)
work_dir = args[0]
md_files = args[1:]
raw_dir = os.path.join(work_dir, "raw")
src_dir = os.path.join(work_dir, "src")
os.makedirs(raw_dir, exist_ok=True)
os.makedirs(src_dir, exist_ok=True)
manifest_rows: list[str] = []
block_n = 0
skip_n = 0
file_toplevel_history: dict[str, list[str]] = {}
for md_path in md_files:
try:
with open(md_path, encoding="utf-8") as fh:
lines = fh.readlines()
except OSError as exc:
print(f"warning: cannot read {md_path}: {exc}", file=sys.stderr)
continue
in_block = False
annotation = ""
start_line = 0
buf: list[str] = []
for lineno, line in enumerate(lines, 1):
if not in_block:
m = re.match(r"^```rust(?:,(\S+))?\s*$", line, re.IGNORECASE)
if m:
annotation = (m.group(1) or "").lower()
in_block = True
start_line = (
lineno + 1
) buf = []
else:
if line.startswith("```"):
in_block = False
if buf:
block_n += 1
code = "".join(buf)
ext = "rs"
raw_path = os.path.join(raw_dir, f"{block_n:05d}.{ext}")
with open(raw_path, "w", encoding="utf-8") as fh:
fh.write(code)
kind = classify(code, annotation)
if kind.startswith("skip"):
skip_n += 1
else:
src_path = os.path.join(src_dir, f"{block_n:05d}.{ext}")
with open(src_path, "w", encoding="utf-8") as fh:
fh.write(wrap(code, kind))
combined_path = ""
prev = file_toplevel_history.get(md_path, [])
if prev:
combined_path = os.path.join(
src_dir, f"{block_n:05d}.combined.{ext}"
)
with open(combined_path, "w", encoding="utf-8") as fh:
fh.write(wrap_combined(prev, code, kind))
if kind in ("toplevel", "program"):
file_toplevel_history.setdefault(md_path, []).append(
code
)
manifest_rows.append(
"\t".join(
[
md_path,
str(start_line),
"rust",
src_path,
kind,
raw_path,
combined_path,
]
)
)
buf = []
else:
buf.append(line)
manifest_path = os.path.join(work_dir, "manifest.tsv")
with open(manifest_path, "w", encoding="utf-8") as fh:
fh.write("\n".join(manifest_rows))
if manifest_rows:
fh.write("\n")
print(f"{block_n}\t{skip_n}")
if __name__ == "__main__":
main()