import os
import re
import argparse
import asyncio
import httpx
from pathlib import Path
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeElapsedColumn
console = Console()
parser = argparse.ArgumentParser(
description="Export GitHub issues (and comments) as Markdown files."
)
parser.add_argument("repo", help="GitHub repository (e.g. 'librosa/librosa')")
parser.add_argument("-t", "--token", help="GitHub token or use GITHUB_TOKEN env var")
parser.add_argument("-s", "--state", choices=["open", "closed", "all"], default="open", help="Which issues to fetch")
parser.add_argument("-o", "--out", default="issues_export", help="Directory to write .md files")
parser.add_argument("-nc", "--no-comments", action="store_true", help="Skip downloading comments")
parser.add_argument("--filename-format", default="issue_{number:05}.md", help="Output filename pattern")
args = parser.parse_args()
REPO = args.repo
TOKEN = args.token or os.getenv("GITHUB_TOKEN")
OUTPUT_DIR = Path(args.out)
INCLUDE_COMMENTS = not args.no_comments
if '/' not in REPO:
console.print("[bold red]Error:[/bold red] Repository must be in format 'owner/repo'")
exit(1)
if not TOKEN:
console.print("[bold red]Error:[/bold red] GitHub token is required (--token or $GITHUB_TOKEN)")
exit(1)
HEADERS = {
"Authorization": f"token {TOKEN}",
"Accept": "application/vnd.github.v3+json",
"User-Agent": "GitHubIssueExporter"
}
def slugify(text: str) -> str:
return re.sub(r"[^\w\-]+", "-", text.lower())[:40].strip("-")
async def fetch_issues(client, repo: str, state: str = "open"):
issues, page = [], 1
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TimeElapsedColumn(),
transient=True,
) as progress:
task = progress.add_task("[cyan]Fetching issues...", start=False)
while True:
url = f"https://api.github.com/repos/{repo}/issues?state={state}&page={page}&per_page=100"
response = await client.get(url)
if response.status_code == 404:
console.print(f"[red]Repository not found:[/red] {repo}")
exit(1)
data = response.json()
if not data:
break
issues.extend([i for i in data if "pull_request" not in i])
page += 1
progress.update(task, advance=1)
progress.start_task(task)
return issues
async def fetch_comments(client, repo: str, issue_number: int):
url = f"https://api.github.com/repos/{repo}/issues/{issue_number}/comments"
response = await client.get(url)
return response.json() if response.status_code == 200 else []
def write_issue_md(issue: dict, comments: list, out_dir: Path, filename_format: str):
number = issue["number"]
title = issue["title"]
slug = slugify(title)
user = issue["user"]["login"]
labels = ", ".join(label["name"] for label in issue.get("labels", []))
body = issue.get("body") or "*No description provided.*"
md = [
f"# Issue #{number}: {title}",
f"**Author**: {user}",
f"**Labels**: {labels if labels else 'None'}",
"",
"---",
"",
body.strip(),
"",
"---",
]
if comments:
md.append("## Comments")
for comment in comments:
author = comment["user"]["login"]
content = comment["body"]
md.append(f"**{author}**:\n\n{content.strip()}\n")
filename = filename_format.format(number=number, slug=slug)
filepath = out_dir / filename
with open(filepath, "w", encoding="utf-8") as f:
f.write("\n".join(md))
def write_index(issues: list, out_dir: Path, filename_format: str, filename: str = "index.md"):
lines = ["# GitHub Issue Index", ""]
sorted_issues = sorted(issues, key=lambda i: i["number"])
for issue in sorted_issues:
number = issue["number"]
title = issue["title"]
slug = slugify(title)
user = issue["user"]["login"]
labels = ", ".join(label["name"] for label in issue.get("labels", []))
md_filename = filename_format.format(number=number, slug=slug)
lines.append(
f"- [#{number} - {title}](./{md_filename}) by **{user}** "
f"{f'[Labels: {labels}]' if labels else ''}"
)
(out_dir / filename).write_text("\n".join(lines), encoding="utf-8")
async def main():
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
async with httpx.AsyncClient(headers=HEADERS, timeout=10) as client:
issues = await fetch_issues(client, REPO, args.state)
if not issues:
console.print("[yellow]No issues found.[/yellow]")
return
console.print(f"[green]Saving {len(issues)} issues to '{OUTPUT_DIR}'...[/green]")
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TimeElapsedColumn(),
) as progress:
task = progress.add_task("[cyan]Exporting issues...", total=len(issues))
comment_tasks = []
for issue in issues:
if INCLUDE_COMMENTS:
comment_tasks.append(fetch_comments(client, REPO, issue["number"]))
else:
comment_tasks.append(asyncio.sleep(0, result=[]))
comment_results = await asyncio.gather(*comment_tasks)
for issue, comments in zip(issues, comment_results):
write_issue_md(issue, comments, OUTPUT_DIR, args.filename_format)
progress.update(task, advance=1)
write_index(issues, OUTPUT_DIR, args.filename_format)
console.print(f"[bold green]Done! Exported {len(issues)} issue(s).[/bold green]")
if __name__ == "__main__":
asyncio.run(main())