from __future__ import annotations
import re
import subprocess
import sys
EXEMPT_MARKER = "noqa: process-label"
PATTERNS = [
("bead id", re.compile(r"\bbd-[a-z0-9]{4}\b")),
("PR reference", re.compile(r"(?i)\bPR\s*#?\s*\d+\b")),
("issue/PR number", re.compile(r"(?<![\w&#/])#\d+\b")),
("release-context cue", re.compile(
r"(?i)\b(?:since|as of|flipped in)\s+(?:version\s+)?\d+\.\d+\b")),
]
INCLUDE_PREFIXES = (
"src/", "src-python/src/", "mrrc/", "tests/", "docs/", ".github/",
)
INCLUDE_FILES = ("README.md",)
EXCLUDE_PREFIXES = ("docs/history/", "docs/design/")
EXCLUDE_FILES = ("CHANGELOG.md",)
def tracked_files() -> list[str]:
out = subprocess.run(
["git", "ls-files"], capture_output=True, text=True, check=True
).stdout.splitlines()
files = []
for path in out:
if path in EXCLUDE_FILES or path.startswith(EXCLUDE_PREFIXES):
continue
if path in INCLUDE_FILES or path.startswith(INCLUDE_PREFIXES):
files.append(path)
return files
def main() -> int:
offenders = []
for path in tracked_files():
try:
with open(path, encoding="utf-8") as fh:
lines = fh.readlines()
except (UnicodeDecodeError, FileNotFoundError):
continue
for lineno, line in enumerate(lines, 1):
if EXEMPT_MARKER in line:
continue
for label, pattern in PATTERNS:
m = pattern.search(line)
if m:
offenders.append((path, lineno, label, m.group(0),
line.rstrip()))
if not offenders:
print("✓ process-label lint passed")
return 0
print("✗ process-label lint failed: persistent artifacts must not embed "
"process labels (bead IDs, PR/issue numbers, phases, version-tagged "
"claims).", file=sys.stderr)
print(f" Put provenance in git/CHANGELOG, or add '{EXEMPT_MARKER}' to a "
"line that is a genuine reference.\n", file=sys.stderr)
for path, lineno, label, match, text in offenders:
snippet = text if len(text) <= 100 else text[:97] + "..."
print(f" {path}:{lineno}: [{label}] {match!r}", file=sys.stderr)
print(f" {snippet.strip()}", file=sys.stderr)
return 1
if __name__ == "__main__":
sys.exit(main())