import os
import sys
import argparse
import logging
import json
from github import Github
from github.PullRequest import PullRequest
import anthropic
import fnmatch
import base64
from typing import List, Dict, Any, Optional
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger("goosebot")
class FileFilterConfig:
def __init__(self, whitelist_patterns: str = "*", blacklist_patterns: str = ""):
self.whitelist_patterns = whitelist_patterns.split(",") if whitelist_patterns else ["*"]
self.blacklist_patterns = blacklist_patterns.split(",") if blacklist_patterns else []
@classmethod
def from_env(cls) -> 'FileFilterConfig':
return cls(
whitelist_patterns=os.environ.get("PR_REVIEW_WHITELIST", "*"),
blacklist_patterns=os.environ.get("PR_REVIEW_BLACKLIST", "")
)
def should_review_file(self, filename: str) -> bool:
for pattern in self.blacklist_patterns:
if fnmatch.fnmatch(filename, pattern):
logger.debug(f"Skipping {filename} (matches blacklist pattern {pattern})")
return False
for pattern in self.whitelist_patterns:
if fnmatch.fnmatch(filename, pattern):
logger.debug(f"Including {filename} (matches whitelist pattern {pattern})")
return True
logger.debug(f"Skipping {filename} (doesn't match any whitelist pattern)")
return False
class TokenUsageTracker:
def __init__(self, budget_limit: int = 100000):
self.budget_limit = budget_limit
self.current_usage = 0
def can_process(self, estimated_tokens: int) -> bool:
return self.current_usage + estimated_tokens <= self.budget_limit
def record_usage(self, prompt_tokens: int, completion_tokens: int) -> int:
usage = prompt_tokens + completion_tokens
self.current_usage += usage
logger.info(f"API call used {prompt_tokens} prompt tokens + {completion_tokens} completion tokens = {usage} total")
logger.info(f"Total usage: {self.current_usage}/{self.budget_limit} tokens ({(self.current_usage/self.budget_limit)*100:.1f}%)")
return self.current_usage
def gather_project_context() -> str:
context = ""
context_files = [
"projectbrief.md",
"productContext.md",
"systemPatterns.md",
"techContext.md",
"activeContext.md",
"progress.md"
]
for filename in context_files:
path = f"memory-bank/{filename}"
if os.path.exists(path):
logger.info(f"Loading context from {path}")
with open(path, 'r') as f:
content = f.read()
context += f"## {filename}\n{content}\n\n"
else:
logger.warning(f"Context file {path} not found")
if not context:
logger.warning("No context files found in memory-bank/")
return context
def load_prompt_template(scope: str, version: str = "v1") -> str:
template_path = f".github/prompts/{version}/{scope}_review.md"
try:
with open(template_path, 'r') as f:
return f.read()
except FileNotFoundError:
logger.error(f"Prompt template not found: {template_path}")
sys.exit(1)
def get_pull_request(repo, pr_number: int) -> PullRequest:
try:
return repo.get_pull(pr_number)
except Exception as e:
logger.error(f"Failed to get PR #{pr_number}: {e}")
sys.exit(1)
def get_pr_details(pr: PullRequest) -> Dict[str, Any]:
logger.info(f"Getting details for PR #{pr.number}: {pr.title}")
files_changed = []
try:
for file in pr.get_files():
files_changed.append({
'filename': file.filename,
'status': file.status, 'additions': file.additions,
'deletions': file.deletions,
'changes': file.changes
})
except Exception as e:
logger.error(f"Failed to get files changed: {e}")
return {
'title': pr.title,
'description': pr.body or "",
'files_changed': files_changed,
'author': pr.user.login if pr.user else "Unknown"
}
def filter_relevant_files(files_changed: List[Dict[str, Any]], file_filter: FileFilterConfig) -> List[Dict[str, Any]]:
relevant_files = []
for file in files_changed:
if file_filter.should_review_file(file['filename']):
relevant_files.append(file)
logger.info(f"Filtered {len(files_changed)} files to {len(relevant_files)} relevant files")
return relevant_files
def get_file_content(repo, file_path: str, ref: str = "main") -> Optional[str]:
try:
content_file = repo.get_contents(file_path, ref=ref)
content = base64.b64decode(content_file.content).decode('utf-8')
return content
except Exception as e:
logger.warning(f"Failed to get content for {file_path}: {e}")
return None
def call_anthropic_api(prompt: str, token_tracker: TokenUsageTracker, max_tokens: int = 4000) -> Dict[str, Any]:
logger.info("Checking API environment variables:")
logger.info(f"ANTHROPIC_API_KEY present in environment: {'ANTHROPIC_API_KEY' in os.environ}")
logger.info(f"ANTHROPIC_API_URL present in environment: {'ANTHROPIC_API_URL' in os.environ}")
debug_key = os.environ.get("ANTHROPIC_API_KEY", "")
logger.info(f"ANTHROPIC_API_KEY length: {len(debug_key)}")
logger.info(f"ANTHROPIC_API_KEY first 4 chars: {debug_key[:4] if len(debug_key) > 4 else 'empty'}")
api_key = os.environ.get("ANTHROPIC_API_KEY")
if api_key is None or api_key.strip() == "":
logger.error("ANTHROPIC_API_KEY environment variable not set or is empty")
logger.info("Please ensure the secret is set correctly in GitHub repository settings")
logger.info("See: https://github.com/tag1consulting/goose/settings/secrets/actions")
sys.exit(1)
api_url = os.environ.get("ANTHROPIC_API_URL")
estimated_tokens = len(prompt.split()) * 1.3
if not token_tracker.can_process(estimated_tokens + max_tokens):
logger.error(f"Token budget exceeded. Estimated prompt: {estimated_tokens}, response: {max_tokens}")
return {
"content": "Error: Token budget exceeded. Unable to complete review.",
"prompt_tokens": 0,
"completion_tokens": 0
}
try:
logger.info("Calling Anthropic API...")
client_kwargs = {"api_key": api_key}
if api_url:
logger.info(f"Using custom API URL: {api_url}")
client_kwargs["base_url"] = api_url
client = anthropic.Anthropic(**client_kwargs)
model = os.environ.get("ANTHROPIC_MODEL", "claude-sonnet-4-20250514")
logger.info(f"Using model: {model}")
response = client.messages.create(
model=model,
max_tokens=max_tokens,
system="You are GooseBot, an AI assistant that helps with code reviews for the Goose load testing framework. Be concise and helpful.",
messages=[
{"role": "user", "content": prompt}
]
)
prompt_tokens = response.usage.input_tokens
completion_tokens = response.usage.output_tokens
token_tracker.record_usage(prompt_tokens, completion_tokens)
return {
"content": response.content[0].text,
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens
}
except Exception as e:
logger.error(f"Error calling Anthropic API: {e}")
return {
"content": f"Error: Failed to get response from Anthropic API: {e}",
"prompt_tokens": 0,
"completion_tokens": 0
}
def format_files_changed_summary(files: List[Dict[str, Any]]) -> str:
result = ""
for file in files:
result += f"- {file['filename']} ({file['status']}, +{file['additions']}, -{file['deletions']})\n"
return result
def post_pr_comment(pr: PullRequest, comment_text: str) -> bool:
try:
pr.create_issue_comment(comment_text)
logger.info(f"Posted comment on PR #{pr.number}")
return True
except Exception as e:
logger.error(f"Failed to post comment: {e}")
return False
def main():
parser = argparse.ArgumentParser(description="GooseBot PR Review")
parser.add_argument("--pr", type=int, required=True, help="PR number to review")
parser.add_argument("--scope", type=str, default="clarity", help="Review scope (e.g., clarity)")
parser.add_argument("--debug", action="store_true", help="Enable debug logging")
parser.add_argument("--version", type=str, default="v1", help="Prompt version to use")
args = parser.parse_args()
if args.debug:
logger.setLevel(logging.DEBUG)
logger.info(f"Starting GooseBot review for PR #{args.pr} (scope: {args.scope}, version: {args.version})")
token_tracker = TokenUsageTracker(budget_limit=int(os.environ.get("TOKEN_BUDGET", "100000")))
file_filter = FileFilterConfig.from_env()
github_token = os.environ.get("GITHUB_TOKEN")
if not github_token:
logger.error("GITHUB_TOKEN environment variable not set")
sys.exit(1)
try:
repo_name = os.environ.get("GITHUB_REPOSITORY", "tag1consulting/goose")
g = Github(github_token)
repo = g.get_repo(repo_name)
logger.info(f"Connected to GitHub repository: {repo.full_name}")
pr = get_pull_request(repo, args.pr)
pr_details = get_pr_details(pr)
relevant_files = filter_relevant_files(pr_details['files_changed'], file_filter)
if not relevant_files:
logger.warning("No relevant files found to review")
post_pr_comment(pr, "## GooseBot PR Review\n\nNo relevant files found to review based on current filter settings.")
return
files_changed_summary = format_files_changed_summary(relevant_files)
project_context = gather_project_context()
prompt_template = load_prompt_template(args.scope, args.version)
prompt = prompt_template.format(
project_context=project_context,
pr_title=pr_details['title'],
pr_description=pr_details['description'],
files_changed=files_changed_summary
)
response = call_anthropic_api(prompt, token_tracker)
if "error" in response["content"].lower():
logger.error(f"API returned an error: {response['content']}")
post_pr_comment(pr, f"## GooseBot Error\n\n{response['content']}")
else:
logger.info("Successfully generated review content")
post_pr_comment(pr, response["content"])
except Exception as e:
logger.error(f"Unexpected error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()