import argparse
import re
import sys
from pathlib import Path
START_MARKER = (
"<!-- START doctoc generated TOC please keep comment here to allow auto update -->"
)
END_MARKER = (
"<!-- END doctoc generated TOC please keep comment here to allow auto update -->"
)
DONT_EDIT = (
"<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->"
)
_SKIP_TITLES = frozenset({"table of contents", "contents"})
_EXCLUDED_DIRS = frozenset({'target', '.cargo', '.claude', 'bench-data', '.pytest_cache', 'vendor'})
_EXCLUDED_FILENAMES = frozenset({'CHANGELOG.md'})
_EXCLUDED_PREFIXES = (
'tests/vectors/cryptography',
'tests/vectors/dilithium-certificates',
'tests/vectors/kyber-certificates',
'tests/vectors/mozilla-ca',
'tests/vectors/ccadb',
'docs',
)
def _find_repo_root() -> Path:
candidate = Path(__file__).resolve().parent.parent.parent
if (candidate / 'Cargo.toml').exists():
return candidate
p = Path.cwd().resolve()
while p != p.parent:
if (p / 'Cargo.toml').exists():
return p
p = p.parent
return Path.cwd()
def _is_excluded(path: Path, repo_root: Path) -> bool:
if path.name in _EXCLUDED_FILENAMES:
return True
try:
rel = path.relative_to(repo_root)
except ValueError:
return False
if any(part in _EXCLUDED_DIRS for part in rel.parts):
return True
rel_str = rel.as_posix()
return any(
rel_str == prefix or rel_str.startswith(prefix + '/')
for prefix in _EXCLUDED_PREFIXES
)
def collect_files(
explicit: list[str],
docs_dir: str | None,
repo_root: Path,
) -> list[Path]:
if explicit:
paths = []
for f in explicit:
p = Path(f)
if not p.is_file():
print(f"error: {f}: not found", file=sys.stderr)
sys.exit(1)
paths.append(p)
return paths
if docs_dir:
base = Path(docs_dir)
if not base.is_dir():
print(f"error: --docs-dir {docs_dir!r}: not a directory", file=sys.stderr)
sys.exit(1)
return sorted(base.rglob('*.md'))
return sorted(
p for p in repo_root.rglob('*.md')
if not _is_excluded(p, repo_root)
)
def make_anchor(title: str) -> str:
title = re.sub(r'`([^`]*)`', r'\1', title)
title = re.sub(r'\[([^\]]*)\]\([^)]*\)', r'\1', title)
title = re.sub(r'[*_]{1,3}([^*_]+)[*_]{1,3}', r'\1', title)
title = re.sub(r'<[^>]+>', '', title)
title = title.lower()
title = re.sub(r'[^\w\s-]', '', title)
title = re.sub(r'\s+', '-', title.strip())
title = re.sub(r'-+', '-', title)
return title
def extract_headings(text: str) -> list[tuple[int, str]]:
headings: list[tuple[int, str]] = []
in_fence = False
fence_char = ''
fence_len = 0
for line in text.splitlines():
stripped = line.strip()
fence_m = re.match(r'^(`{3,}|~{3,})', stripped)
if fence_m:
fc = fence_m.group(1)[0]
fl = len(fence_m.group(1))
if not in_fence:
in_fence = True
fence_char = fc
fence_len = fl
elif fc == fence_char and fl >= fence_len:
in_fence = False
fence_char = ''
fence_len = 0
continue
if in_fence:
continue
m = re.match(r'^(#{1,6})\s+(.+?)(?:\s+#+)?\s*$', line)
if m:
headings.append((len(m.group(1)), m.group(2).strip()))
return headings
def generate_toc_lines(headings: list[tuple[int, str]]) -> list[str]:
if not headings:
return []
if headings and headings[0][0] == 1:
headings = headings[1:]
headings = [
(lvl, t) for lvl, t in headings
if t.strip().lower() not in _SKIP_TITLES
]
if not headings:
return []
min_level = min(lvl for lvl, _ in headings)
seen: dict[str, int] = {}
lines: list[str] = []
for level, title in headings:
anchor = make_anchor(title)
if anchor in seen:
seen[anchor] += 1
anchor = f"{anchor}-{seen[anchor]}"
else:
seen[anchor] = 0
indent = ' ' * (level - min_level)
lines.append(f"{indent}- [{title}](#{anchor})")
return lines
def build_toc_block(toc_lines: list[str]) -> str:
return '\n'.join([
START_MARKER,
DONT_EDIT,
'**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)*',
'',
*toc_lines,
'',
END_MARKER,
])
def _insert_toc(text: str, toc_block: str) -> str:
lines = text.splitlines()
insert_pos = 0
for i, line in enumerate(lines):
if re.match(r'^#\s+', line):
insert_pos = i + 1
while insert_pos < len(lines) and not lines[insert_pos].strip():
insert_pos += 1
break
insertion = [toc_block, '']
new_lines = lines[:insert_pos] + insertion + lines[insert_pos:]
result = '\n'.join(new_lines)
if text.endswith('\n'):
result += '\n'
return result
def _update_toc(text: str, toc_block: str) -> str:
pattern = re.compile(
re.escape(START_MARKER) + r'.*?' + re.escape(END_MARKER),
re.DOTALL,
)
return pattern.sub(toc_block, text)
def process_file(path: Path, *, check: bool, verbose: bool) -> bool:
text = path.read_text(encoding='utf-8')
headings = extract_headings(text)
toc_lines = generate_toc_lines(headings)
if not toc_lines:
if verbose:
print(f" {path}: no headings — skipped")
return False
toc_block = build_toc_block(toc_lines)
has_markers = START_MARKER in text
new_text = _update_toc(text, toc_block) if has_markers else _insert_toc(text, toc_block)
if new_text == text:
if verbose:
print(f" {path}: up to date")
return False
if check:
print(f" {path}: TOC outdated (run update-toc.py to fix)")
return True
path.write_text(new_text, encoding='utf-8')
print(f" {path}: updated")
return True
def main() -> None:
parser = argparse.ArgumentParser(
description='Insert or update a doctoc-compatible TOC in Markdown files.',
epilog=(
'Examples:\n'
' %(prog)s # update all workspace .md files\n'
' %(prog)s docs/*.md README.md # update specific files\n'
' %(prog)s --docs-dir docs/ # update files in one directory\n'
' %(prog)s --check # CI check — exit 1 if any TOC outdated\n'
' %(prog)s -v docs/C_API.md # verbose: show status for every file'
),
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument('files', nargs='*', metavar='FILE',
help='Markdown files to process (default: all workspace .md files)')
parser.add_argument('--docs-dir', metavar='DIR',
help='Search for .md files in DIR instead of the whole workspace')
parser.add_argument('--check', action='store_true',
help='Exit 1 if any TOC is outdated; do not write files')
parser.add_argument('--verbose', '-v', action='store_true',
help='Print a status line for every file, including up-to-date ones')
args = parser.parse_args()
repo_root = _find_repo_root()
md_files = collect_files(args.files, args.docs_dir, repo_root)
if not md_files:
print("error: no Markdown files found", file=sys.stderr)
sys.exit(1)
updated = skipped = up_to_date = 0
for path in md_files:
headings = extract_headings(path.read_text(encoding='utf-8'))
if not generate_toc_lines(headings):
skipped += 1
if args.verbose:
print(f" {path}: no headings — skipped")
continue
if process_file(path, check=args.check, verbose=args.verbose):
updated += 1
else:
up_to_date += 1
print(
f"\nResults: {updated} updated, {up_to_date} up to date, {skipped} skipped"
+ (" (--check mode, no files written)" if args.check and updated else "")
)
if args.check and updated:
sys.exit(1)
if __name__ == '__main__':
main()