from __future__ import annotations
import argparse
import re
import sys
from pathlib import Path
_VERSION_HEADING_RE = re.compile(r"^## \[")
_VERSION_RE = re.compile(r"^## \[(\d+\.\d+\.\d+[^\]]*)\]")
_LINK_DEF_RE = re.compile(r"^\[([^\]]+)\]:\s+\S+")
_DEFAULT_ARCHIVE_DIR = "docs/archive/changelog"
def _minor_key(version: str) -> str:
parts = version.split(".")
if len(parts) < 2:
msg = f"Expected a version with at least two components (X.Y), got: {version!r}"
raise ValueError(msg)
return f"{parts[0]}.{parts[1]}"
def _version_sort_key(label: str) -> tuple[bool, tuple[int, ...]]:
parts = label.split(".")
try:
nums = tuple(int(p) for p in parts)
except ValueError:
return (True, ())
return (False, nums)
def _extract_link_defs(text: str) -> tuple[str, dict[str, str]]:
lines = text.rstrip("\n").split("\n")
link_defs: dict[str, str] = {}
i = len(lines) - 1
while i >= 0:
line = lines[i]
m = _LINK_DEF_RE.match(line)
if m:
link_defs[m.group(1).lower()] = line
i -= 1
elif line.strip() == "":
i -= 1
else:
break
cleaned = "\n".join(lines[: i + 1])
return cleaned.rstrip("\n") + "\n", link_defs
def parse_changelog(text: str) -> tuple[str, str, list[tuple[str, str]]]:
lines = text.split("\n")
headings: list[int] = []
for i, line in enumerate(lines):
if _VERSION_HEADING_RE.match(line):
headings.append(i)
if not headings:
return text, "", []
preamble = "\n".join(lines[: headings[0]])
unreleased = ""
version_blocks: list[tuple[str, str]] = []
for idx, start in enumerate(headings):
end = headings[idx + 1] if idx + 1 < len(headings) else len(lines)
block = "\n".join(lines[start:end])
heading_line = lines[start]
if "Unreleased" in heading_line:
unreleased = block
else:
m = _VERSION_RE.match(heading_line)
if not m:
continue
version_blocks.append((m.group(1), block))
return preamble, unreleased, version_blocks
def group_by_minor(
version_blocks: list[tuple[str, str]],
) -> dict[str, list[tuple[str, str]]]:
groups: dict[str, list[tuple[str, str]]] = {}
for ver, block in version_blocks:
key = _minor_key(ver)
groups.setdefault(key, []).append((ver, block))
return groups
def _format_link_defs(link_defs: dict[str, str], labels: set[str]) -> str:
relevant = [link_defs[label] for label in sorted(link_defs, key=_version_sort_key, reverse=True) if label in labels]
return "\n".join(relevant) if relevant else ""
def write_archive(
archive_dir: Path,
minor: str,
blocks: list[tuple[str, str]],
link_defs: dict[str, str] | None = None,
) -> Path:
archive_dir.mkdir(parents=True, exist_ok=True)
path = archive_dir / f"{minor}.md"
parts = [f"# Changelog - {minor}.x\n"]
for _ver, block in blocks:
parts.append(block)
text = "\n".join(parts)
if link_defs:
versions = {ver.lower() for ver, _ in blocks}
defs_text = _format_link_defs(link_defs, versions)
if defs_text:
text = text.rstrip("\n") + "\n\n" + defs_text
text = text.rstrip("\n") + "\n"
path.write_text(text, encoding="utf-8")
return path
def build_root(
preamble: str,
unreleased: str,
active_blocks: list[tuple[str, str]],
archived_minors: list[str],
archive_dir_rel: str,
) -> str:
parts: list[str] = [preamble]
if unreleased:
parts.append(unreleased)
for _ver, block in active_blocks:
parts.append(block)
if archived_minors:
archive_lines = ["## Archives\n"]
archive_lines.append("Older releases are archived by minor series:\n")
archive_lines.extend(f"- [{minor}.x]({archive_dir_rel}/{minor}.md)" for minor in archived_minors)
archive_lines.append("")
parts.append("\n".join(archive_lines))
return "\n".join(parts).rstrip("\n") + "\n"
def archive_changelog(
changelog_path: Path,
archive_dir: Path | None = None,
) -> None:
if archive_dir is None:
archive_dir = changelog_path.parent / _DEFAULT_ARCHIVE_DIR
text = changelog_path.read_text(encoding="utf-8")
text, link_defs = _extract_link_defs(text)
preamble, unreleased, version_blocks = parse_changelog(text)
if not version_blocks:
return
groups = group_by_minor(version_blocks)
minor_keys = list(groups.keys())
active_minor = minor_keys[0]
archived_minors: list[str] = []
for minor in minor_keys[1:]:
write_archive(archive_dir, minor, groups[minor], link_defs)
archived_minors.append(minor)
if not archived_minors:
return
try:
archive_dir_rel = str(archive_dir.relative_to(changelog_path.parent))
except ValueError:
archive_dir_rel = str(archive_dir)
root_text = build_root(
preamble,
unreleased,
groups[active_minor],
sorted(archived_minors, key=_version_sort_key, reverse=True),
archive_dir_rel,
)
if link_defs:
labels: set[str] = {ver.lower() for ver, _ in groups[active_minor]}
if unreleased:
labels.add("unreleased")
defs_text = _format_link_defs(link_defs, labels)
if defs_text:
root_text = root_text.rstrip("\n") + "\n\n" + defs_text + "\n"
changelog_path.write_text(root_text, encoding="utf-8")
def main() -> None:
parser = argparse.ArgumentParser(
prog="archive-changelog",
description="Archive completed minor series from CHANGELOG.md.",
)
parser.add_argument(
"path",
nargs="?",
default="CHANGELOG.md",
help="Path to CHANGELOG.md (default: CHANGELOG.md)",
)
parser.add_argument(
"--archive-dir",
default=None,
help=f"Archive output directory (default: {_DEFAULT_ARCHIVE_DIR})",
)
args = parser.parse_args()
changelog = Path(args.path)
if not changelog.is_file():
print(f"Error: {changelog} not found", file=sys.stderr)
sys.exit(1)
archive_dir = Path(args.archive_dir) if args.archive_dir else None
archive_changelog(changelog, archive_dir)
if __name__ == "__main__":
main()