import os
import re
import sys
SYNTA_RE = re.compile(r"\bsynta\b", re.IGNORECASE)
ELLIPSIS_RE = re.compile(r"^\s*\.\.\.\s*$", re.MULTILINE)
MAIN_RE = re.compile(r'if\s+__name__\s*==\s*["\']__main__["\']')
SKIP_ANNOTATIONS = frozenset({"ignore", "notest"})
PREAMBLE = """\
# Auto-generated wrapper for Synta Python documentation snippet.
# Validated with: python3 -m py_compile (syntax check only; synta not imported)
import synta # noqa: F401
"""
def classify(code: str, annotation: str) -> str:
if annotation in SKIP_ANNOTATIONS:
return "skip_annotated"
if not SYNTA_RE.search(code):
return "skip_nonsynta"
if ELLIPSIS_RE.search(code):
return "stub"
if MAIN_RE.search(code):
return "program"
return "fragment"
def wrap(code: str, kind: str) -> str:
if kind in ("program", "stub"):
return (
"# Auto-generated wrapper for Synta Python documentation snippet.\n"
+ code
+ "\n"
)
return PREAMBLE + "\n" + code + "\n"
def main() -> None:
args = sys.argv[1:]
if not args:
print(
f"Usage: {sys.argv[0]} <work_dir> [file.md ...]",
file=sys.stderr,
)
sys.exit(1)
work_dir = args[0]
md_files = args[1:]
raw_dir = os.path.join(work_dir, "raw")
src_dir = os.path.join(work_dir, "src")
os.makedirs(raw_dir, exist_ok=True)
os.makedirs(src_dir, exist_ok=True)
manifest_rows: list[str] = []
block_n = 0
skip_n = 0
for md_path in md_files:
try:
with open(md_path, encoding="utf-8") as fh:
lines = fh.readlines()
except OSError as exc:
print(f"warning: cannot read {md_path}: {exc}", file=sys.stderr)
continue
in_block = False
annotation = ""
start_line = 0
buf: list[str] = []
for lineno, line in enumerate(lines, 1):
if not in_block:
m = re.match(r"^```python(?:,(\S+))?\s*$", line, re.IGNORECASE)
if m:
annotation = (m.group(1) or "").lower()
in_block = True
start_line = (
lineno + 1
) buf = []
else:
if line.startswith("```"):
in_block = False
if buf:
block_n += 1
code = "".join(buf)
raw_path = os.path.join(raw_dir, f"{block_n:05d}.py")
with open(raw_path, "w", encoding="utf-8") as fh:
fh.write(code)
kind = classify(code, annotation)
if kind.startswith("skip"):
skip_n += 1
else:
src_path = os.path.join(src_dir, f"{block_n:05d}.py")
with open(src_path, "w", encoding="utf-8") as fh:
fh.write(wrap(code, kind))
manifest_rows.append(
"\t".join(
[
md_path,
str(start_line),
"python",
src_path,
kind,
raw_path,
]
)
)
buf = []
else:
buf.append(line)
manifest_path = os.path.join(work_dir, "manifest.tsv")
with open(manifest_path, "w", encoding="utf-8") as fh:
fh.write("\n".join(manifest_rows))
if manifest_rows:
fh.write("\n")
print(f"{block_n}\t{skip_n}")
if __name__ == "__main__":
main()