from __future__ import annotations
import argparse
import logging
import re
import subprocess
import sys
from pathlib import Path
from subprocess_utils import (
ExecutableNotFoundError,
run_git_command,
run_git_command_with_input,
)
_GITHUB_TAG_ANNOTATION_LIMIT = 125_000
_GREEN = "\033[0;32m"
_BLUE = "\033[0;34m"
_YELLOW = "\033[1;33m"
_RESET = "\033[0m"
log = logging.getLogger(__name__)
_ALNUM_ID = r"(?:(?=[0-9A-Za-z-]*[A-Za-z-])[0-9A-Za-z-]+)"
_SEMVER_RE = re.compile(
r"^v"
r"(0|[1-9]\d*)\."
r"(0|[1-9]\d*)\."
r"(0|[1-9]\d*)"
rf"(?:-(?:(?:0|[1-9]\d*)|{_ALNUM_ID})"
rf"(?:\.(?:(?:0|[1-9]\d*)|{_ALNUM_ID}))*"
r")?"
r"(?:\+[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*)?$"
)
def validate_semver(tag_version: str) -> None:
if not _SEMVER_RE.match(tag_version):
msg = f"Tag version should follow SemVer format 'vX.Y.Z' (e.g., v0.3.5, v1.2.3-rc.1). Got: {tag_version}"
raise ValueError(msg)
def parse_version(tag_version: str) -> str:
return tag_version.removeprefix("v")
def find_changelog(start: Path | None = None) -> Path:
base = start or Path.cwd()
for candidate in (base / "CHANGELOG.md", base.parent / "CHANGELOG.md"):
if candidate.is_file():
return candidate
msg = "CHANGELOG.md not found in current directory or parent directory."
raise FileNotFoundError(msg)
def _archive_path_for_version(changelog: Path, version: str) -> Path | None:
parts = version.split(".")
if len(parts) < 2:
return None
minor = f"{parts[0]}.{parts[1]}"
candidate = changelog.parent / "docs" / "archive" / "changelog" / f"{minor}.md"
return candidate if candidate.is_file() else None
def _extract_section_from_file(path: Path, version: str) -> str | None:
header_re = _version_header_re(version)
content = path.read_text(encoding="utf-8")
lines = content.split("\n")
section: list[str] = []
collecting = False
for line in lines:
if re.match(r"^##\s", line):
if collecting:
break
if header_re.match(line):
collecting = True
continue
elif collecting:
section.append(line)
if not collecting:
return None
start = 0
while start < len(section) and not section[start].strip():
start += 1
end = len(section)
while end > start and not section[end - 1].strip():
end -= 1
body = "\n".join(section[start:end])
return body if body.strip() else None
def extract_changelog_section(changelog: Path, version: str) -> tuple[str, Path]:
body = _extract_section_from_file(changelog, version)
if body:
return body, changelog
archive = _archive_path_for_version(changelog, version)
if archive:
body = _extract_section_from_file(archive, version)
if body:
return body, archive
msg = f"No changelog section found for version {version}. Expected a heading like: ## [{version}] - YYYY-MM-DD"
raise LookupError(msg)
def _tag_exists(tag_version: str) -> bool:
try:
run_git_command(["rev-parse", "-q", "--verify", f"refs/tags/{tag_version}"])
except subprocess.CalledProcessError:
return False
else:
return True
def _delete_tag(tag_version: str) -> None:
run_git_command(["tag", "-d", tag_version])
def _get_repo_url() -> str:
result = run_git_command(["remote", "get-url", "origin"])
raw = result.stdout.strip()
patterns = [
r"^git@github\.com:(?P<slug>[^/]+/[^/]+?)(?:\.git)?/?$",
r"^https://github\.com/(?P<slug>[^/]+/[^/]+?)(?:\.git)?/?$",
r"^ssh://git@github\.com[:/](?P<slug>[^/]+/[^/]+?)(?:\.git)?/?$",
]
for pat in patterns:
m = re.match(pat, raw)
if m:
return f"https://github.com/{m.group('slug')}"
if re.search(r"://[^/@]+:[^/@]+@", raw) or re.search(r"://[^/@]+@", raw) or re.match(r"[^@]+@", raw):
msg = f"Remote URL appears to contain credentials; cannot use as a public URL: {raw[:20]}..."
raise ValueError(msg)
return raw
def _version_header_re(version: str) -> re.Pattern[str]:
return re.compile(rf"^##\s*\[?v?{re.escape(version)}\]?(?:$|\s|\()")
def _heading_to_anchor(heading_line: str) -> str:
heading = heading_line.removeprefix("## ").strip()
heading = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", heading)
heading = re.sub(r"\[([^\]]+)\]", r"\1", heading)
heading = heading.lower()
heading = re.sub(r"[^a-z0-9\s-]", "", heading)
return re.sub(r"\s+", "-", heading)
def _find_anchor_in_file(path: Path, version: str) -> str | None:
header_re = _version_header_re(version)
try:
for line in path.read_text(encoding="utf-8").splitlines():
if header_re.match(line):
return _heading_to_anchor(line)
except OSError:
pass
return None
def _github_anchor(changelog: Path, version: str) -> str:
anchor = _find_anchor_in_file(changelog, version)
if anchor:
return anchor
archive = _archive_path_for_version(changelog, version)
if archive:
anchor = _find_anchor_in_file(archive, version)
if anchor:
return anchor
return re.sub(r"[^a-z0-9-]", "", f"v{version}".lower())
def create_tag(tag_version: str, *, force: bool = False) -> None:
validate_semver(tag_version)
version = parse_version(tag_version)
tag_existed = _tag_exists(tag_version)
if tag_existed and not force:
print(f"{_YELLOW}Tag '{tag_version}' already exists.{_RESET}", file=sys.stderr)
print(f"Use --force to recreate, or delete manually: git tag -d {tag_version}", file=sys.stderr)
sys.exit(1)
changelog = find_changelog()
section, source = extract_changelog_section(changelog, version)
section_bytes = len(section.encode("utf-8"))
if section_bytes > _GITHUB_TAG_ANNOTATION_LIMIT:
print(f"{_YELLOW}⚠ Changelog section ({section_bytes:,} bytes) exceeds GitHub's tag limit ({_GITHUB_TAG_ANNOTATION_LIMIT:,} bytes){_RESET}")
anchor = _github_anchor(changelog, version)
repo_url = _get_repo_url()
try:
source_rel = source.relative_to(changelog.parent)
except ValueError:
source_rel = source
tag_message = (
f"Version {version}\n\n"
f"This release contains extensive changes. See full changelog:\n"
f"<{repo_url}/blob/{tag_version}/{source_rel}#{anchor}>\n\n"
f"For detailed release notes, refer to {source_rel} in the repository.\n"
)
is_truncated = True
print(f"{_BLUE}→ Creating annotated tag with CHANGELOG.md reference{_RESET}")
else:
tag_message = section
is_truncated = False
print(f"{_BLUE}Tag message preview ({section_bytes:,} bytes):{_RESET}")
preview = section.split("\n")[:20]
print("----------------------------------------")
print("\n".join(preview))
if len(section.split("\n")) > 20:
print("... (truncated for preview)")
print("----------------------------------------")
if tag_existed and force:
print(f"{_BLUE}Deleting existing tag '{tag_version}'...{_RESET}")
_delete_tag(tag_version)
label = "reference" if is_truncated else "full changelog"
print(f"{_BLUE}Creating annotated tag '{tag_version}' with {label} content...{_RESET}")
run_git_command_with_input(["tag", "-a", tag_version, "-F", "-"], input_data=tag_message)
print(f"{_GREEN}✓ Successfully created tag '{tag_version}'{_RESET}")
print()
print("Next steps:")
if force:
print(f" 1. Force-push the tag: {_BLUE}git push --force origin {tag_version}{_RESET}")
else:
print(f" 1. Push the tag: {_BLUE}git push origin {tag_version}{_RESET}")
print(f" 2. Create GitHub release: {_BLUE}gh release create {tag_version} --notes-from-tag{_RESET}")
if is_truncated:
print(f"\n{_YELLOW}Note: Tag annotation references CHANGELOG.md due to size (>125KB).{_RESET}")
def main() -> None:
parser = argparse.ArgumentParser(
prog="tag-release",
description="Create an annotated git tag from a CHANGELOG.md section.",
)
parser.add_argument("version", help="Tag version (e.g. v1.2.3)")
parser.add_argument("--force", action="store_true", help="Recreate tag if it already exists")
parser.add_argument("--debug", action="store_true", help="Enable debug logging")
args = parser.parse_args()
if args.debug:
logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(message)s")
else:
logging.basicConfig(level=logging.WARNING, format="%(levelname)s: %(message)s")
try:
create_tag(args.version, force=args.force)
except (
ValueError,
FileNotFoundError,
LookupError,
ExecutableNotFoundError,
subprocess.CalledProcessError,
) as exc:
print(f"Error: {exc}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()