goose 0.18.1

A load testing framework inspired by Locust.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
#!/usr/bin/env python3
"""
GooseBot - AI Code Review Bot for Goose Load Testing Framework

This script fetches PR details, gathers project context from memory-bank,
sends this information to the Anthropic API, and posts the review as a comment.
"""

import os
import sys
import argparse
import logging
import json
from github import Github
from github.PullRequest import PullRequest
import anthropic
import fnmatch
import base64
from typing import List, Dict, Any, Optional

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger("goosebot")

class FileFilterConfig:
    """Configuration for filtering files that should be reviewed."""
    
    def __init__(self, whitelist_patterns: str = "*", blacklist_patterns: str = ""):
        """
        Initialize file filter configuration.
        
        Args:
            whitelist_patterns: Comma-separated glob patterns for files to include
            blacklist_patterns: Comma-separated glob patterns for files to exclude
        """
        self.whitelist_patterns = whitelist_patterns.split(",") if whitelist_patterns else ["*"]
        self.blacklist_patterns = blacklist_patterns.split(",") if blacklist_patterns else []
        
    @classmethod
    def from_env(cls) -> 'FileFilterConfig':
        """Create a FileFilterConfig from environment variables."""
        return cls(
            whitelist_patterns=os.environ.get("PR_REVIEW_WHITELIST", "*"),
            blacklist_patterns=os.environ.get("PR_REVIEW_BLACKLIST", "")
        )
        
    def should_review_file(self, filename: str) -> bool:
        """
        Determine if a file should be reviewed based on patterns.
        
        Args:
            filename: The name of the file to check
            
        Returns:
            True if the file should be reviewed, False otherwise
        """
        # Check blacklist first (takes precedence)
        for pattern in self.blacklist_patterns:
            if fnmatch.fnmatch(filename, pattern):
                logger.debug(f"Skipping {filename} (matches blacklist pattern {pattern})")
                return False
                
        # Then check if matches any whitelist pattern
        for pattern in self.whitelist_patterns:
            if fnmatch.fnmatch(filename, pattern):
                logger.debug(f"Including {filename} (matches whitelist pattern {pattern})")
                return True
                
        # If no whitelist patterns match, exclude the file
        logger.debug(f"Skipping {filename} (doesn't match any whitelist pattern)")
        return False

class TokenUsageTracker:
    """Track token usage for Anthropic API calls."""
    
    def __init__(self, budget_limit: int = 100000):
        """
        Initialize token usage tracking.
        
        Args:
            budget_limit: Maximum tokens to use across all API calls
        """
        self.budget_limit = budget_limit
        self.current_usage = 0
        
    def can_process(self, estimated_tokens: int) -> bool:
        """
        Check if there's enough budget for the estimated token usage.
        
        Args:
            estimated_tokens: Estimated tokens for the next API call
            
        Returns:
            True if the estimated usage is within budget, False otherwise
        """
        return self.current_usage + estimated_tokens <= self.budget_limit
        
    def record_usage(self, prompt_tokens: int, completion_tokens: int) -> int:
        """
        Record tokens used from an API call.
        
        Args:
            prompt_tokens: Number of tokens in the prompt
            completion_tokens: Number of tokens in the completion
            
        Returns:
            Updated total token usage
        """
        usage = prompt_tokens + completion_tokens
        self.current_usage += usage
        logger.info(f"API call used {prompt_tokens} prompt tokens + {completion_tokens} completion tokens = {usage} total")
        logger.info(f"Total usage: {self.current_usage}/{self.budget_limit} tokens ({(self.current_usage/self.budget_limit)*100:.1f}%)")
        return self.current_usage

def gather_project_context() -> str:
    """
    Read memory-bank files to provide context to the AI.
    
    Returns:
        String containing content from memory-bank files
    """
    context = ""
    
    # Priority order for files
    context_files = [
        "projectbrief.md",
        "productContext.md", 
        "systemPatterns.md",
        "techContext.md",
        "activeContext.md",
        "progress.md"
    ]
    
    for filename in context_files:
        path = f"memory-bank/{filename}"
        if os.path.exists(path):
            logger.info(f"Loading context from {path}")
            with open(path, 'r') as f:
                content = f.read()
                context += f"## {filename}\n{content}\n\n"
        else:
            logger.warning(f"Context file {path} not found")
    
    if not context:
        logger.warning("No context files found in memory-bank/")
        
    return context

def load_prompt_template(scope: str, version: str = "v1") -> str:
    """
    Load the prompt template for the specified scope and version.
    
    Args:
        scope: The review scope (e.g., 'clarity')
        version: The prompt version to use
        
    Returns:
        The prompt template as a string
    """
    template_path = f".github/prompts/{version}/{scope}_review.md"
    
    try:
        with open(template_path, 'r') as f:
            return f.read()
    except FileNotFoundError:
        logger.error(f"Prompt template not found: {template_path}")
        sys.exit(1)

def get_pull_request(repo, pr_number: int) -> PullRequest:
    """
    Get the pull request object from GitHub.
    
    Args:
        repo: GitHub repository object
        pr_number: Pull request number
        
    Returns:
        GitHub PullRequest object
    """
    try:
        return repo.get_pull(pr_number)
    except Exception as e:
        logger.error(f"Failed to get PR #{pr_number}: {e}")
        sys.exit(1)

def get_pr_details(pr: PullRequest) -> Dict[str, Any]:
    """
    Extract relevant details from a pull request.
    
    Args:
        pr: GitHub PullRequest object
        
    Returns:
        Dictionary containing PR title, description, and files changed
    """
    logger.info(f"Getting details for PR #{pr.number}: {pr.title}")
    
    files_changed = []
    try:
        for file in pr.get_files():
            files_changed.append({
                'filename': file.filename,
                'status': file.status,  # added, modified, removed
                'additions': file.additions,
                'deletions': file.deletions,
                'changes': file.changes
            })
    except Exception as e:
        logger.error(f"Failed to get files changed: {e}")
    
    return {
        'title': pr.title,
        'description': pr.body or "",
        'files_changed': files_changed,
        'author': pr.user.login if pr.user else "Unknown"
    }

def filter_relevant_files(files_changed: List[Dict[str, Any]], file_filter: FileFilterConfig) -> List[Dict[str, Any]]:
    """
    Filter files based on filter configuration.
    
    Args:
        files_changed: List of file change dictionaries
        file_filter: FileFilterConfig object
        
    Returns:
        Filtered list of file change dictionaries
    """
    relevant_files = []
    
    for file in files_changed:
        if file_filter.should_review_file(file['filename']):
            relevant_files.append(file)
    
    logger.info(f"Filtered {len(files_changed)} files to {len(relevant_files)} relevant files")
    return relevant_files

def get_file_content(repo, file_path: str, ref: str = "main") -> Optional[str]:
    """
    Get the content of a file from the repository.
    
    Args:
        repo: GitHub repository object
        file_path: Path to the file
        ref: Branch or commit reference
        
    Returns:
        File content as string, or None if not found
    """
    try:
        content_file = repo.get_contents(file_path, ref=ref)
        content = base64.b64decode(content_file.content).decode('utf-8')
        return content
    except Exception as e:
        logger.warning(f"Failed to get content for {file_path}: {e}")
        return None

def call_anthropic_api(prompt: str, token_tracker: TokenUsageTracker, max_tokens: int = 4000) -> Dict[str, Any]:
    """
    Send prompt to Anthropic API and get response.
    
    Args:
        prompt: The prompt to send
        token_tracker: TokenUsageTracker to record usage
        max_tokens: Maximum tokens to generate
        
    Returns:
        Dictionary with the API response and token usage
    """
    # Simple environment variable debugging
    logger.info("Checking API environment variables:")
    logger.info(f"ANTHROPIC_API_KEY present in environment: {'ANTHROPIC_API_KEY' in os.environ}")
    logger.info(f"ANTHROPIC_API_URL present in environment: {'ANTHROPIC_API_URL' in os.environ}")
    
    # Check if key has content without revealing it
    debug_key = os.environ.get("ANTHROPIC_API_KEY", "")
    logger.info(f"ANTHROPIC_API_KEY length: {len(debug_key)}")
    logger.info(f"ANTHROPIC_API_KEY first 4 chars: {debug_key[:4] if len(debug_key) > 4 else 'empty'}")
    
    api_key = os.environ.get("ANTHROPIC_API_KEY")
    # Check if the key exists AND has a non-empty value
    if api_key is None or api_key.strip() == "":
        logger.error("ANTHROPIC_API_KEY environment variable not set or is empty")
        logger.info("Please ensure the secret is set correctly in GitHub repository settings")
        logger.info("See: https://github.com/tag1consulting/goose/settings/secrets/actions")
        sys.exit(1)
    
    # Get custom API URL if set
    api_url = os.environ.get("ANTHROPIC_API_URL")
    
    # Estimate token count (rough approximation)
    estimated_tokens = len(prompt.split()) * 1.3
    
    if not token_tracker.can_process(estimated_tokens + max_tokens):
        logger.error(f"Token budget exceeded. Estimated prompt: {estimated_tokens}, response: {max_tokens}")
        return {
            "content": "Error: Token budget exceeded. Unable to complete review.",
            "prompt_tokens": 0,
            "completion_tokens": 0
        }
    
    try:
        logger.info("Calling Anthropic API...")
        
        # Configure client with custom URL if provided
        client_kwargs = {"api_key": api_key}
        if api_url:
            logger.info(f"Using custom API URL: {api_url}")
            client_kwargs["base_url"] = api_url
            
        client = anthropic.Anthropic(**client_kwargs)
        
        # Use Claude Sonnet 4 model (configurable via environment variable)
        model = os.environ.get("ANTHROPIC_MODEL", "claude-sonnet-4-20250514")
        logger.info(f"Using model: {model}")
        
        response = client.messages.create(
            model=model,
            max_tokens=max_tokens,
            system="You are GooseBot, an AI assistant that helps with code reviews for the Goose load testing framework. Be concise and helpful.",
            messages=[
                {"role": "user", "content": prompt}
            ]
        )
        
        prompt_tokens = response.usage.input_tokens
        completion_tokens = response.usage.output_tokens
        
        token_tracker.record_usage(prompt_tokens, completion_tokens)
        
        return {
            "content": response.content[0].text,
            "prompt_tokens": prompt_tokens,
            "completion_tokens": completion_tokens
        }
    except Exception as e:
        logger.error(f"Error calling Anthropic API: {e}")
        return {
            "content": f"Error: Failed to get response from Anthropic API: {e}",
            "prompt_tokens": 0,
            "completion_tokens": 0
        }

def format_files_changed_summary(files: List[Dict[str, Any]]) -> str:
    """
    Format files changed for the prompt.
    
    Args:
        files: List of file change dictionaries
        
    Returns:
        Formatted string of files changed
    """
    result = ""
    for file in files:
        result += f"- {file['filename']} ({file['status']}, +{file['additions']}, -{file['deletions']})\n"
    return result

def post_pr_comment(pr: PullRequest, comment_text: str) -> bool:
    """
    Post a comment on a pull request.
    
    Args:
        pr: GitHub PullRequest object
        comment_text: Comment text to post
        
    Returns:
        True if successful, False otherwise
    """
    try:
        pr.create_issue_comment(comment_text)
        logger.info(f"Posted comment on PR #{pr.number}")
        return True
    except Exception as e:
        logger.error(f"Failed to post comment: {e}")
        return False

def main():
    """Main function to run the GooseBot review process."""
    parser = argparse.ArgumentParser(description="GooseBot PR Review")
    parser.add_argument("--pr", type=int, required=True, help="PR number to review")
    parser.add_argument("--scope", type=str, default="clarity", help="Review scope (e.g., clarity)")
    parser.add_argument("--debug", action="store_true", help="Enable debug logging")
    parser.add_argument("--version", type=str, default="v1", help="Prompt version to use")
    args = parser.parse_args()
    
    if args.debug:
        logger.setLevel(logging.DEBUG)
        
    logger.info(f"Starting GooseBot review for PR #{args.pr} (scope: {args.scope}, version: {args.version})")
    
    # Initialize token tracker
    token_tracker = TokenUsageTracker(budget_limit=int(os.environ.get("TOKEN_BUDGET", "100000")))
    
    # Initialize file filter from environment
    file_filter = FileFilterConfig.from_env()
    
    # Initialize GitHub API client
    github_token = os.environ.get("GITHUB_TOKEN")
    if not github_token:
        logger.error("GITHUB_TOKEN environment variable not set")
        sys.exit(1)
        
    try:
        # Get repository
        repo_name = os.environ.get("GITHUB_REPOSITORY", "tag1consulting/goose")
        g = Github(github_token)
        repo = g.get_repo(repo_name)
        logger.info(f"Connected to GitHub repository: {repo.full_name}")
        
        # Get PR details
        pr = get_pull_request(repo, args.pr)
        pr_details = get_pr_details(pr)
        
        # Filter relevant files
        relevant_files = filter_relevant_files(pr_details['files_changed'], file_filter)
        
        if not relevant_files:
            logger.warning("No relevant files found to review")
            post_pr_comment(pr, "## GooseBot PR Review\n\nNo relevant files found to review based on current filter settings.")
            return
            
        # Generate files changed summary
        files_changed_summary = format_files_changed_summary(relevant_files)
        
        # Gather project context from memory-bank
        project_context = gather_project_context()
        
        # Load prompt template
        prompt_template = load_prompt_template(args.scope, args.version)
        
        # Format the prompt
        prompt = prompt_template.format(
            project_context=project_context,
            pr_title=pr_details['title'],
            pr_description=pr_details['description'],
            files_changed=files_changed_summary
        )
        
        # Call Anthropic API
        response = call_anthropic_api(prompt, token_tracker)
        
        # Post comment with review
        if "error" in response["content"].lower():
            logger.error(f"API returned an error: {response['content']}")
            post_pr_comment(pr, f"## GooseBot Error\n\n{response['content']}")
        else:
            logger.info("Successfully generated review content")
            post_pr_comment(pr, response["content"])
        
    except Exception as e:
        logger.error(f"Unexpected error: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()