from __future__ import annotations
import argparse
import re
import subprocess
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
MARKDOWNLINT_BASE = 53
ALLOWED_OTHER_TOOL_COUNTS = {MARKDOWNLINT_BASE, 46, 38}
DOC_FILES = [
"README.md",
"docs/index.md",
"docs/rules.md",
"docs/comparison.md",
"docs/markdownlint-comparison.md",
"docs/mdformat-comparison.md",
"docs/getting-started/quickstart.md",
]
RULES_REFERENCE = "docs/rules.md"
RULES_TABLE_ROW = re.compile(r"\|\s*\[(MD\d{3})\]\(")
ANY_SENTINEL = re.compile(r"<!-- (RULE_\w+) -->[^\n<]*<!-- /\1 -->")
RULE_COUNT_PHRASE = re.compile(r"\b(\d+)\s+(?:(?:lint|linting)\s+)?rules?\b")
def registry_rule_ids() -> list[str]:
try:
out = subprocess.run(
["cargo", "run", "-q", "--bin", "rumdl", "--", "rule"],
cwd=ROOT,
capture_output=True,
text=True,
check=True,
).stdout
except subprocess.CalledProcessError as exc:
sys.exit(f"error: `rumdl rule` failed:\n{exc.stderr}")
ids = sorted(set(re.findall(r"\bMD\d{3}\b", out)))
if not ids:
sys.exit("error: no rule ids parsed from `rumdl rule` output")
return ids
def expected_values(ids: list[str]) -> dict[str, str]:
total = len(ids)
return {
"RULE_COUNT": str(total),
"RULE_COUNT_ADDITIONAL": str(total - MARKDOWNLINT_BASE),
"RULE_MAX": max(ids),
}
def sentinel_pattern(name: str) -> re.Pattern[str]:
return re.compile(
rf"(<!-- {name} -->)([^\n<]*)(<!-- /{name} -->)",
)
def check_sentinels(values: dict[str, str], root: Path = ROOT) -> list[str]:
drift: list[str] = []
for rel in DOC_FILES:
path = root / rel
content = path.read_text()
for name, expected in values.items():
for m in sentinel_pattern(name).finditer(content):
found = m.group(2)
if found != expected:
line_no = content.count("\n", 0, m.start()) + 1
drift.append(
f" {rel}:{line_no}: {name} is {found!r}, expected {expected!r}"
)
return drift
def write_sentinels(values: dict[str, str], root: Path = ROOT) -> list[str]:
changed: list[str] = []
for rel in DOC_FILES:
path = root / rel
original = path.read_text()
content = original
for name, expected in values.items():
content = sentinel_pattern(name).sub(
lambda m, e=expected: m.group(1) + e + m.group(3),
content,
)
if content != original:
path.write_text(content)
changed.append(rel)
return changed
def check_rules_table(ids: list[str], root: Path = ROOT) -> list[str]:
content = (root / RULES_REFERENCE).read_text()
seen = set(RULES_TABLE_ROW.findall(content))
registry = set(ids)
problems: list[str] = []
missing = sorted(registry - seen)
extra = sorted(seen - registry)
if missing:
problems.append(
f" {RULES_REFERENCE}: missing table rows for {', '.join(missing)}"
)
if extra:
problems.append(
f" {RULES_REFERENCE}: table rows for nonexistent rules {', '.join(extra)}"
)
return problems
def check_no_unwrapped_counts(root: Path = ROOT) -> list[str]:
problems: list[str] = []
for rel in DOC_FILES:
content = (root / rel).read_text()
masked = ANY_SENTINEL.sub(lambda m: " " * len(m.group(0)), content)
for m in RULE_COUNT_PHRASE.finditer(masked):
count = int(m.group(1))
if count in ALLOWED_OTHER_TOOL_COUNTS:
continue
line_no = masked.count("\n", 0, m.start()) + 1
problems.append(
f" {rel}:{line_no}: unwrapped rule count {m.group(0)!r}; "
f"wrap the number in a <!-- RULE_COUNT --> sentinel"
)
return problems
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--write",
action="store_true",
help="rewrite sentinel values from the registry instead of checking",
)
args = parser.parse_args()
ids = registry_rule_ids()
values = expected_values(ids)
if args.write:
changed = write_sentinels(values)
if changed:
print("Updated rule-count sentinels in:")
for rel in changed:
print(f" {rel}")
else:
print("Rule-count sentinels already in sync.")
residual = check_rules_table(ids) + check_no_unwrapped_counts()
if residual:
print(
f"\nResidual drift that --write cannot fix (total {values['RULE_COUNT']} rules):",
file=sys.stderr,
)
for line in residual:
print(line, file=sys.stderr)
return 1
return 0
problems = (
check_sentinels(values) + check_rules_table(ids) + check_no_unwrapped_counts()
)
if problems:
print(
f"Rule-doc drift detected. Registry has {values['RULE_COUNT']} rules "
f"({values['RULE_COUNT_ADDITIONAL']} beyond markdownlint, "
f"max {values['RULE_MAX']}):\n",
file=sys.stderr,
)
for line in problems:
print(line, file=sys.stderr)
print(
"\nRun `make sync-rule-docs` to update the count sentinels. Add or "
"remove docs/rules.md table rows by hand to resolve table drift.",
file=sys.stderr,
)
return 1
print(
f"Rule docs in sync: {values['RULE_COUNT']} rules "
f"({values['RULE_COUNT_ADDITIONAL']} beyond markdownlint, max {values['RULE_MAX']})."
)
return 0
if __name__ == "__main__":
raise SystemExit(main())