aptu_core/github/
issues.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! GitHub issue operations for the triage command.
4//!
5//! Provides functionality to parse issue URLs, fetch issue details,
6//! and post triage comments.
7
8use anyhow::{Context, Result};
9use backon::Retryable;
10use octocrab::Octocrab;
11use serde::{Deserialize, Serialize};
12use tracing::{debug, instrument};
13
14use crate::ai::types::{IssueComment, IssueDetails, RepoIssueContext};
15use crate::retry::retry_backoff;
16
17/// A GitHub issue without labels (untriaged).
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct UntriagedIssue {
20    /// Issue number.
21    pub number: u64,
22    /// Issue title.
23    pub title: String,
24    /// Creation timestamp (ISO 8601).
25    pub created_at: String,
26    /// Issue URL.
27    pub url: String,
28}
29
30/// A single entry in a Git tree response.
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct GitTreeEntry {
33    /// File path relative to repository root.
34    pub path: String,
35    /// Type of entry: "blob" (file) or "tree" (directory).
36    #[serde(rename = "type")]
37    pub type_: String,
38    /// File mode (e.g., "100644" for regular files).
39    pub mode: String,
40    /// SHA-1 hash of the entry.
41    pub sha: String,
42}
43
44/// Response from GitHub Git Trees API.
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct GitTreeResponse {
47    /// List of entries in the tree.
48    pub tree: Vec<GitTreeEntry>,
49    /// Whether the tree is truncated (too many entries).
50    pub truncated: bool,
51}
52
53/// Parses an owner/repo string to extract owner and repo.
54///
55/// Validates format: exactly one `/`, non-empty parts.
56///
57/// # Errors
58///
59/// Returns an error if the format is invalid.
60pub fn parse_owner_repo(s: &str) -> Result<(String, String)> {
61    let parts: Vec<&str> = s.split('/').collect();
62    if parts.len() != 2 || parts[0].is_empty() || parts[1].is_empty() {
63        anyhow::bail!(
64            "Invalid owner/repo format.\n\
65             Expected: owner/repo\n\
66             Got: {s}"
67        );
68    }
69    Ok((parts[0].to_string(), parts[1].to_string()))
70}
71
72/// Parses a GitHub issue reference in multiple formats.
73///
74/// Supports:
75/// - Full URL: `https://github.com/owner/repo/issues/123`
76/// - Short form: `owner/repo#123`
77/// - Bare number: `123` (requires `repo_context`)
78///
79/// # Arguments
80///
81/// * `input` - The issue reference to parse
82/// * `repo_context` - Optional repository context for bare numbers (e.g., "owner/repo")
83///
84/// # Errors
85///
86/// Returns an error if the format is invalid or bare number is used without context.
87pub fn parse_issue_reference(
88    input: &str,
89    repo_context: Option<&str>,
90) -> Result<(String, String, u64)> {
91    let input = input.trim();
92
93    // Try full URL first
94    if input.starts_with("https://") || input.starts_with("http://") {
95        // Remove trailing fragments and query params
96        let clean_url = input.split('#').next().unwrap_or(input);
97        let clean_url = clean_url.split('?').next().unwrap_or(clean_url);
98
99        // Parse the URL path
100        let parts: Vec<&str> = clean_url.trim_end_matches('/').split('/').collect();
101
102        // Expected: ["https:", "", "github.com", "owner", "repo", "issues", "123"]
103        if parts.len() < 7 {
104            anyhow::bail!(
105                "Invalid GitHub issue URL format.\n\
106                 Expected: https://github.com/owner/repo/issues/123\n\
107                 Got: {input}"
108            );
109        }
110
111        // Verify it's a github.com URL
112        if !parts[2].contains("github.com") {
113            anyhow::bail!(
114                "URL must be a GitHub issue URL.\n\
115                 Expected: https://github.com/owner/repo/issues/123\n\
116                 Got: {input}"
117            );
118        }
119
120        // Verify it's an issues path
121        if parts[5] != "issues" {
122            anyhow::bail!(
123                "URL must point to a GitHub issue.\n\
124                 Expected: https://github.com/owner/repo/issues/123\n\
125                 Got: {input}"
126            );
127        }
128
129        let owner = parts[3].to_string();
130        let repo = parts[4].to_string();
131        let number: u64 = parts[6].parse().with_context(|| {
132            format!(
133                "Invalid issue number '{}' in URL.\n\
134                 Expected a numeric issue number.",
135                parts[6]
136            )
137        })?;
138
139        debug!(owner = %owner, repo = %repo, number = number, "Parsed issue URL");
140        return Ok((owner, repo, number));
141    }
142
143    // Try short form: owner/repo#123
144    if let Some(hash_pos) = input.find('#') {
145        let owner_repo_part = &input[..hash_pos];
146        let number_part = &input[hash_pos + 1..];
147
148        let (owner, repo) = parse_owner_repo(owner_repo_part)?;
149        let number: u64 = number_part.parse().with_context(|| {
150            format!(
151                "Invalid issue number '{number_part}' in short form.\n\
152                 Expected: owner/repo#123\n\
153                 Got: {input}"
154            )
155        })?;
156
157        debug!(owner = %owner, repo = %repo, number = number, "Parsed short-form issue reference");
158        return Ok((owner, repo, number));
159    }
160
161    // Try bare number: 123 (requires repo_context)
162    if let Ok(number) = input.parse::<u64>() {
163        let repo_context = repo_context.ok_or_else(|| {
164            anyhow::anyhow!(
165                "Bare issue number requires repository context.\n\
166                 Use one of:\n\
167                 - Full URL: https://github.com/owner/repo/issues/123\n\
168                 - Short form: owner/repo#123\n\
169                 - Bare number with --repo flag: 123 --repo owner/repo\n\
170                 Got: {input}"
171            )
172        })?;
173
174        let (owner, repo) = parse_owner_repo(repo_context)?;
175        debug!(owner = %owner, repo = %repo, number = number, "Parsed bare issue number");
176        return Ok((owner, repo, number));
177    }
178
179    // If we get here, it's an invalid format
180    anyhow::bail!(
181        "Invalid issue reference format.\n\
182         Expected one of:\n\
183         - Full URL: https://github.com/owner/repo/issues/123\n\
184         - Short form: owner/repo#123\n\
185         - Bare number with --repo flag: 123 --repo owner/repo\n\
186         Got: {input}"
187    );
188}
189
190/// Fetches issue details including comments from GitHub.
191///
192/// # Errors
193///
194/// Returns an error if the API request fails or the issue is not found.
195#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
196pub async fn fetch_issue_with_comments(
197    client: &Octocrab,
198    owner: &str,
199    repo: &str,
200    number: u64,
201) -> Result<IssueDetails> {
202    debug!("Fetching issue details");
203
204    // Fetch the issue with retry logic
205    let issue = (|| async {
206        client
207            .issues(owner, repo)
208            .get(number)
209            .await
210            .map_err(|e| anyhow::anyhow!(e))
211    })
212    .retry(retry_backoff())
213    .notify(|err, dur| {
214        tracing::warn!(
215            error = %err,
216            retry_after = ?dur,
217            "Retrying fetch_issue_with_comments (issue fetch)"
218        );
219    })
220    .await
221    .with_context(|| format!("Failed to fetch issue #{number} from {owner}/{repo}"))?;
222
223    // Fetch comments (limited to first page) with retry logic
224    let comments_page = (|| async {
225        client
226            .issues(owner, repo)
227            .list_comments(number)
228            .per_page(5)
229            .send()
230            .await
231            .map_err(|e| anyhow::anyhow!(e))
232    })
233    .retry(retry_backoff())
234    .notify(|err, dur| {
235        tracing::warn!(
236            error = %err,
237            retry_after = ?dur,
238            "Retrying fetch_issue_with_comments (comments fetch)"
239        );
240    })
241    .await
242    .with_context(|| format!("Failed to fetch comments for issue #{number}"))?;
243
244    // Convert to our types
245    let labels: Vec<String> = issue.labels.iter().map(|l| l.name.clone()).collect();
246
247    let comments: Vec<IssueComment> = comments_page
248        .items
249        .iter()
250        .map(|c| IssueComment {
251            author: c.user.login.clone(),
252            body: c.body.clone().unwrap_or_default(),
253        })
254        .collect();
255
256    let issue_url = issue.html_url.to_string();
257
258    let details = IssueDetails::builder()
259        .owner(owner.to_string())
260        .repo(repo.to_string())
261        .number(number)
262        .title(issue.title)
263        .body(issue.body.unwrap_or_default())
264        .labels(labels)
265        .comments(comments)
266        .url(issue_url)
267        .build();
268
269    debug!(
270        labels = details.labels.len(),
271        comments = details.comments.len(),
272        "Fetched issue details"
273    );
274
275    Ok(details)
276}
277
278/// Extracts significant keywords from an issue title for search.
279///
280/// Filters out common stop words and returns lowercase keywords.
281/// Extracts keywords from an issue title for relevance matching.
282///
283/// Filters out common stop words and limits to 5 keywords.
284/// Used for prioritizing relevant files in repository tree filtering.
285///
286/// # Arguments
287///
288/// * `title` - Issue title to extract keywords from
289///
290/// # Returns
291///
292/// Vector of lowercase keywords (max 5), excluding stop words.
293pub fn extract_keywords(title: &str) -> Vec<String> {
294    let stop_words = [
295        "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is",
296        "it", "its", "of", "on", "or", "that", "the", "to", "was", "will", "with",
297    ];
298
299    title
300        .to_lowercase()
301        .split(|c: char| !c.is_alphanumeric())
302        .filter(|word| !word.is_empty() && !stop_words.contains(word))
303        .take(5) // Limit to first 5 keywords
304        .map(std::string::ToString::to_string)
305        .collect()
306}
307
308/// Searches for related issues in a repository based on title keywords.
309///
310/// Extracts keywords from the issue title and searches the repository
311/// for matching issues. Returns up to 20 results, excluding the specified issue.
312///
313/// # Arguments
314///
315/// * `client` - Authenticated Octocrab client
316/// * `owner` - Repository owner
317/// * `repo` - Repository name
318/// * `title` - Issue title to extract keywords from
319/// * `exclude_number` - Issue number to exclude from results
320///
321/// # Errors
322///
323/// Returns an error if the search API request fails.
324#[instrument(skip(client), fields(owner = %owner, repo = %repo, exclude_number = %exclude_number))]
325pub async fn search_related_issues(
326    client: &Octocrab,
327    owner: &str,
328    repo: &str,
329    title: &str,
330    exclude_number: u64,
331) -> Result<Vec<RepoIssueContext>> {
332    let keywords = extract_keywords(title);
333
334    if keywords.is_empty() {
335        debug!("No keywords extracted from title");
336        return Ok(Vec::new());
337    }
338
339    // Build search query: keyword1 keyword2 ... repo:owner/repo is:issue
340    let query = format!("{} repo:{}/{} is:issue", keywords.join(" "), owner, repo);
341
342    debug!(query = %query, "Searching for related issues");
343
344    // Search for issues with retry logic
345    let search_result = (|| async {
346        client
347            .search()
348            .issues_and_pull_requests(&query)
349            .per_page(20)
350            .send()
351            .await
352            .map_err(|e| anyhow::anyhow!(e))
353    })
354    .retry(retry_backoff())
355    .notify(|err, dur| {
356        tracing::warn!(
357            error = %err,
358            retry_after = ?dur,
359            "Retrying search_related_issues"
360        );
361    })
362    .await
363    .with_context(|| format!("Failed to search for related issues in {owner}/{repo}"))?;
364
365    // Convert to our context type
366    let related: Vec<RepoIssueContext> = search_result
367        .items
368        .iter()
369        .filter_map(|item| {
370            // Only include issues (not PRs)
371            if item.pull_request.is_some() {
372                return None;
373            }
374
375            // Exclude the issue being triaged
376            if item.number == exclude_number {
377                return None;
378            }
379
380            Some(RepoIssueContext {
381                number: item.number,
382                title: item.title.clone(),
383                labels: item.labels.iter().map(|l| l.name.clone()).collect(),
384                state: format!("{:?}", item.state).to_lowercase(),
385            })
386        })
387        .collect();
388
389    debug!(count = related.len(), "Found related issues");
390
391    Ok(related)
392}
393
394/// Posts a triage comment to a GitHub issue.
395///
396/// # Returns
397///
398/// The URL of the created comment.
399///
400/// # Errors
401///
402/// Returns an error if the API request fails.
403#[instrument(skip(client, body), fields(owner = %owner, repo = %repo, number = number))]
404pub async fn post_comment(
405    client: &Octocrab,
406    owner: &str,
407    repo: &str,
408    number: u64,
409    body: &str,
410) -> Result<String> {
411    debug!("Posting triage comment");
412
413    let comment = client
414        .issues(owner, repo)
415        .create_comment(number, body)
416        .await
417        .with_context(|| format!("Failed to post comment to issue #{number}"))?;
418
419    let comment_url = comment.html_url.to_string();
420
421    debug!(url = %comment_url, "Comment posted successfully");
422
423    Ok(comment_url)
424}
425
426/// Creates a new GitHub issue.
427///
428/// Posts a new issue with the given title and body to the repository.
429/// Returns the issue URL and issue number.
430///
431/// # Arguments
432///
433/// * `client` - Authenticated Octocrab client
434/// * `owner` - Repository owner
435/// * `repo` - Repository name
436/// * `title` - Issue title
437/// * `body` - Issue body (markdown)
438///
439/// # Errors
440///
441/// Returns an error if the GitHub API call fails.
442#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
443pub async fn create_issue(
444    client: &Octocrab,
445    owner: &str,
446    repo: &str,
447    title: &str,
448    body: &str,
449) -> Result<(String, u64)> {
450    debug!("Creating GitHub issue");
451
452    let issue = client
453        .issues(owner, repo)
454        .create(title)
455        .body(body)
456        .send()
457        .await
458        .with_context(|| format!("Failed to create issue in {owner}/{repo}"))?;
459
460    let issue_url = issue.html_url.to_string();
461    let issue_number = issue.number;
462
463    debug!(number = issue_number, url = %issue_url, "Issue created successfully");
464
465    Ok((issue_url, issue_number))
466}
467
468/// Result of applying labels and milestone to an issue.
469#[derive(Debug, Clone)]
470pub struct ApplyResult {
471    /// Labels that were successfully applied.
472    pub applied_labels: Vec<String>,
473    /// Milestone that was successfully applied, if any.
474    pub applied_milestone: Option<String>,
475    /// Warnings about labels or milestones that could not be applied.
476    pub warnings: Vec<String>,
477}
478
479/// Merges existing and suggested labels additively.
480///
481/// Implements additive label merging with priority label handling:
482/// - If existing labels contain a priority label (p[0-9]), skip AI-suggested priority labels
483/// - Merge remaining labels with case-insensitive deduplication
484/// - Preserve all existing labels
485///
486/// # Arguments
487///
488/// * `existing_labels` - Labels currently on the issue
489/// * `suggested_labels` - Labels suggested by AI
490///
491/// # Returns
492///
493/// Merged label list with duplicates removed (case-insensitive)
494fn merge_labels(existing_labels: &[String], suggested_labels: &[String]) -> Vec<String> {
495    // Check if existing labels contain a priority label (p[0-9])
496    let has_priority = existing_labels.iter().any(|label| {
497        let lower = label.to_lowercase();
498        lower.len() == 2
499            && lower.starts_with('p')
500            && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
501    });
502
503    // Start with existing labels
504    let mut merged = existing_labels.to_vec();
505
506    // Add suggested labels, filtering out priority labels if existing has one
507    for suggested in suggested_labels {
508        let is_priority = {
509            let lower = suggested.to_lowercase();
510            lower.len() == 2
511                && lower.starts_with('p')
512                && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
513        };
514
515        // Skip priority labels if existing already has one
516        if is_priority && has_priority {
517            continue;
518        }
519
520        // Add if not already present (case-insensitive check)
521        if !merged
522            .iter()
523            .any(|l| l.to_lowercase() == suggested.to_lowercase())
524        {
525            merged.push(suggested.clone());
526        }
527    }
528
529    merged
530}
531
532/// Updates an issue with labels and milestone.
533///
534/// Applies labels additively by merging existing and suggested labels.
535/// Validates suggestions against available options before applying.
536/// Returns what was actually applied and any warnings.
537///
538/// # Errors
539///
540/// Returns an error if the GitHub API call fails.
541#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
542#[allow(clippy::too_many_arguments)]
543pub async fn update_issue_labels_and_milestone(
544    client: &Octocrab,
545    owner: &str,
546    repo: &str,
547    number: u64,
548    existing_labels: &[String],
549    suggested_labels: &[String],
550    existing_milestone: Option<&str>,
551    suggested_milestone: Option<&str>,
552    available_labels: &[crate::ai::types::RepoLabel],
553    available_milestones: &[crate::ai::types::RepoMilestone],
554) -> Result<ApplyResult> {
555    debug!("Updating issue with labels and milestone");
556
557    let mut warnings = Vec::new();
558
559    // Validate and collect labels
560    let available_label_names: std::collections::HashSet<_> =
561        available_labels.iter().map(|l| l.name.as_str()).collect();
562
563    // Validate suggested labels
564    let mut valid_suggested = Vec::new();
565    for label in suggested_labels {
566        if available_label_names.contains(label.as_str()) {
567            valid_suggested.push(label.clone());
568        } else {
569            warnings.push(format!("Label '{label}' not found in repository"));
570        }
571    }
572
573    // Merge existing and suggested labels additively
574    let applied_labels = merge_labels(existing_labels, &valid_suggested);
575
576    // Validate and find milestone (only set if issue has no existing milestone)
577    let applied_milestone = if existing_milestone.is_none() {
578        if let Some(milestone_title) = suggested_milestone {
579            if let Some(milestone) = available_milestones
580                .iter()
581                .find(|m| m.title == milestone_title)
582            {
583                Some(milestone.title.clone())
584            } else {
585                warnings.push(format!(
586                    "Milestone '{milestone_title}' not found in repository"
587                ));
588                None
589            }
590        } else {
591            None
592        }
593    } else {
594        None
595    };
596
597    // Apply updates to the issue
598    let issues_handler = client.issues(owner, repo);
599    let mut update_builder = issues_handler.update(number);
600
601    if !applied_labels.is_empty() {
602        update_builder = update_builder.labels(&applied_labels);
603    }
604
605    #[allow(clippy::collapsible_if)]
606    if let Some(milestone_title) = &applied_milestone {
607        if let Some(milestone) = available_milestones
608            .iter()
609            .find(|m| &m.title == milestone_title)
610        {
611            update_builder = update_builder.milestone(milestone.number);
612        }
613    }
614
615    update_builder
616        .send()
617        .await
618        .with_context(|| format!("Failed to update issue #{number}"))?;
619
620    debug!(
621        labels = ?applied_labels,
622        milestone = ?applied_milestone,
623        warnings = ?warnings,
624        "Issue updated successfully"
625    );
626
627    Ok(ApplyResult {
628        applied_labels,
629        applied_milestone,
630        warnings,
631    })
632}
633
634/// Priority labels that should be included first in tiered filtering.
635/// These labels are most actionable for issue triage.
636const PRIORITY_LABELS: &[&str] = &[
637    "bug",
638    "enhancement",
639    "documentation",
640    "good first issue",
641    "help wanted",
642    "question",
643    "feature",
644    "fix",
645    "breaking",
646    "security",
647    "performance",
648    "breaking-change",
649];
650
651/// Filters labels using tiered selection: priority labels first, then remaining labels.
652///
653/// Implements two-tier filtering:
654/// - Tier 1: Priority labels (case-insensitive matching)
655/// - Tier 2: Remaining labels to fill up to `max_labels`
656///
657/// This ensures the AI sees the most actionable labels regardless of repository size.
658///
659/// # Arguments
660///
661/// * `labels` - List of available labels from the repository
662/// * `max_labels` - Maximum number of labels to return
663///
664/// # Returns
665///
666/// Filtered list of labels with priority labels first.
667#[must_use]
668pub fn filter_labels_by_relevance(
669    labels: &[crate::ai::types::RepoLabel],
670    max_labels: usize,
671) -> Vec<crate::ai::types::RepoLabel> {
672    if labels.is_empty() || max_labels == 0 {
673        return Vec::new();
674    }
675
676    let mut priority_labels = Vec::new();
677    let mut other_labels = Vec::new();
678
679    // Separate labels into priority and other
680    for label in labels {
681        let label_lower = label.name.to_lowercase();
682        let is_priority = PRIORITY_LABELS
683            .iter()
684            .any(|&p| label_lower == p.to_lowercase());
685
686        if is_priority {
687            priority_labels.push(label.clone());
688        } else {
689            other_labels.push(label.clone());
690        }
691    }
692
693    // Combine: priority labels first, then fill remaining slots with other labels
694    let mut result = priority_labels;
695    let remaining_slots = max_labels.saturating_sub(result.len());
696    result.extend(other_labels.into_iter().take(remaining_slots));
697
698    // Limit to max_labels
699    result.truncate(max_labels);
700    result
701}
702
703/// Patterns for directories/files to completely exclude from tree filtering.
704/// Based on GitHub Linguist vendor.yml and common build artifacts.
705const EXCLUDE_PATTERNS: &[&str] = &[
706    "node_modules/",
707    "vendor/",
708    "dist/",
709    "build/",
710    "target/",
711    ".git/",
712    "cache/",
713    "docs/",
714    "examples/",
715];
716
717/// Patterns for directories to deprioritize but not exclude.
718/// These contain test/benchmark code less relevant to issue triage.
719const DEPRIORITIZE_PATTERNS: &[&str] = &[
720    "test/",
721    "tests/",
722    "spec/",
723    "bench/",
724    "eval/",
725    "fixtures/",
726    "mocks/",
727];
728
729/// Returns language-specific entry point file patterns.
730/// These are prioritized as they often contain the main logic.
731fn entry_point_patterns(language: &str) -> Vec<&'static str> {
732    match language.to_lowercase().as_str() {
733        "rust" => vec!["lib.rs", "mod.rs", "main.rs"],
734        "python" => vec!["__init__.py"],
735        "javascript" | "typescript" => vec!["index.ts", "index.js"],
736        "java" => vec!["Main.java"],
737        "go" => vec!["main.go"],
738        "c#" | "csharp" => vec!["Program.cs"],
739        _ => vec![],
740    }
741}
742
743/// Maps programming languages to their common file extensions.
744fn get_extensions_for_language(language: &str) -> Vec<&'static str> {
745    match language.to_lowercase().as_str() {
746        "rust" => vec!["rs"],
747        "python" => vec!["py"],
748        "javascript" | "typescript" => vec!["js", "ts", "jsx", "tsx"],
749        "java" => vec!["java"],
750        "c" => vec!["c", "h"],
751        "c++" | "cpp" => vec!["cpp", "cc", "cxx", "h", "hpp"],
752        "c#" | "csharp" => vec!["cs"],
753        "go" => vec!["go"],
754        "ruby" => vec!["rb"],
755        "php" => vec!["php"],
756        "swift" => vec!["swift"],
757        "kotlin" => vec!["kt"],
758        "scala" => vec!["scala"],
759        "r" => vec!["r"],
760        "shell" | "bash" => vec!["sh", "bash"],
761        "html" => vec!["html", "htm"],
762        "css" => vec!["css", "scss", "sass"],
763        "json" => vec!["json"],
764        "yaml" | "yml" => vec!["yaml", "yml"],
765        "toml" => vec!["toml"],
766        "xml" => vec!["xml"],
767        "markdown" => vec!["md"],
768        _ => vec![],
769    }
770}
771
772/// Filters repository tree entries by language-specific extensions.
773///
774/// Removes common non-source directories and limits results to 50 paths.
775/// Prioritizes shallow paths (fewer `/` characters).
776/// This is a legacy function kept for backward compatibility with existing tests.
777///
778/// # Arguments
779///
780/// * `entries` - Raw tree entries from GitHub API
781/// * `language` - Repository primary language for extension filtering
782///
783/// # Returns
784///
785/// Filtered and sorted list of file paths (max 50).
786#[allow(dead_code)]
787fn filter_tree_by_language(entries: &[GitTreeEntry], language: &str) -> Vec<String> {
788    let extensions = get_extensions_for_language(language);
789    let exclude_dirs = [
790        "node_modules/",
791        "target/",
792        "dist/",
793        "build/",
794        ".git/",
795        "vendor/",
796        "test",
797        "spec",
798        "mock",
799        "fixture",
800    ];
801
802    let mut filtered: Vec<String> = entries
803        .iter()
804        .filter(|entry| {
805            // Only include files (blobs), not directories
806            if entry.type_ != "blob" {
807                return false;
808            }
809
810            // Exclude paths containing excluded directories
811            if exclude_dirs.iter().any(|dir| entry.path.contains(dir)) {
812                return false;
813            }
814
815            // Filter by extension if language is recognized
816            if extensions.is_empty() {
817                // If language not recognized, include all files
818                true
819            } else {
820                extensions.iter().any(|ext| entry.path.ends_with(ext))
821            }
822        })
823        .map(|e| e.path.clone())
824        .collect();
825
826    // Sort by path depth (fewer slashes first), then alphabetically
827    filtered.sort_by(|a, b| {
828        let depth_a = a.matches('/').count();
829        let depth_b = b.matches('/').count();
830        if depth_a == depth_b {
831            a.cmp(b)
832        } else {
833            depth_a.cmp(&depth_b)
834        }
835    });
836
837    // Limit to 50 paths
838    filtered.truncate(50);
839    filtered
840}
841
842/// Filters repository tree entries by relevance using tiered keyword matching.
843///
844/// Implements three-tier filtering:
845/// - Tier 1: Files matching keywords (max 35)
846/// - Tier 2: Language entry points (max 10)
847/// - Tier 3: Other relevant files (max 15)
848///
849/// Removes common non-source directories and limits results to 60 paths.
850///
851/// # Arguments
852///
853/// * `entries` - Raw tree entries from GitHub API
854/// * `language` - Repository primary language for extension filtering
855/// * `keywords` - Optional keywords extracted from issue title for relevance matching
856///
857/// # Returns
858///
859/// Filtered and sorted list of file paths (max 60).
860fn filter_tree_by_relevance(
861    entries: &[GitTreeEntry],
862    language: &str,
863    keywords: &[String],
864) -> Vec<String> {
865    let extensions = get_extensions_for_language(language);
866    let entry_points = entry_point_patterns(language);
867
868    // Filter to valid source files
869    let candidates: Vec<String> = entries
870        .iter()
871        .filter(|entry| {
872            // Only include files (blobs), not directories
873            if entry.type_ != "blob" {
874                return false;
875            }
876
877            // Exclude paths containing excluded directories
878            if EXCLUDE_PATTERNS.iter().any(|dir| entry.path.contains(dir)) {
879                return false;
880            }
881
882            // Filter by extension if language is recognized
883            if extensions.is_empty() {
884                // If language not recognized, include all files
885                true
886            } else {
887                extensions.iter().any(|ext| entry.path.ends_with(ext))
888            }
889        })
890        .map(|e| e.path.clone())
891        .collect();
892
893    // Tier 1: Files matching keywords (max 35)
894    let mut tier1: Vec<String> = Vec::new();
895    let mut remaining: Vec<String> = Vec::new();
896
897    for path in candidates {
898        let path_lower = path.to_lowercase();
899        let matches_keyword = keywords.iter().any(|kw| path_lower.contains(kw));
900
901        if matches_keyword && tier1.len() < 35 {
902            tier1.push(path);
903        } else {
904            remaining.push(path);
905        }
906    }
907
908    // Tier 2: Entry point files (max 10)
909    let mut tier2: Vec<String> = Vec::new();
910    let mut tier3_candidates: Vec<String> = Vec::new();
911
912    for path in remaining {
913        let is_entry_point = entry_points.iter().any(|ep| path.ends_with(ep));
914        let is_deprioritized = DEPRIORITIZE_PATTERNS.iter().any(|dp| path.contains(dp));
915
916        if is_entry_point && tier2.len() < 10 {
917            tier2.push(path);
918        } else if !is_deprioritized {
919            tier3_candidates.push(path);
920        }
921    }
922
923    // Tier 3: Other relevant files (max 15)
924    let mut tier3: Vec<String> = tier3_candidates.into_iter().take(15).collect();
925
926    // Combine and sort by depth within each tier
927    let mut result = tier1;
928    result.append(&mut tier2);
929    result.append(&mut tier3);
930
931    // Sort by path depth (fewer slashes first), then alphabetically
932    result.sort_by(|a, b| {
933        let depth_a = a.matches('/').count();
934        let depth_b = b.matches('/').count();
935        if depth_a == depth_b {
936            a.cmp(b)
937        } else {
938            depth_a.cmp(&depth_b)
939        }
940    });
941
942    // Limit to 60 paths
943    result.truncate(60);
944    result
945}
946
947/// Fetches the repository file tree from GitHub.
948///
949/// Attempts to fetch from the default branch (main, then master).
950/// Returns filtered list of source file paths based on repository language and optional keywords.
951///
952/// # Arguments
953///
954/// * `client` - Authenticated Octocrab client
955/// * `owner` - Repository owner
956/// * `repo` - Repository name
957/// * `language` - Repository primary language for filtering
958/// * `keywords` - Optional keywords extracted from issue title for relevance matching
959///
960/// # Errors
961///
962/// Returns an error if the API request fails (but not if tree is unavailable).
963#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
964pub async fn fetch_repo_tree(
965    client: &Octocrab,
966    owner: &str,
967    repo: &str,
968    language: &str,
969    keywords: &[String],
970) -> Result<Vec<String>> {
971    debug!("Fetching repository tree");
972
973    // Try main branch first, then master
974    let branches = ["main", "master"];
975    let mut tree_response: Option<GitTreeResponse> = None;
976
977    for branch in &branches {
978        let route = format!("/repos/{owner}/{repo}/git/trees/{branch}?recursive=1");
979        let result = (|| async {
980            client
981                .get::<GitTreeResponse, _, _>(&route, None::<&()>)
982                .await
983                .map_err(|e| anyhow::anyhow!(e))
984        })
985        .retry(retry_backoff())
986        .notify(|err, dur| {
987            tracing::warn!(
988                error = %err,
989                retry_after = ?dur,
990                branch = %branch,
991                "Retrying fetch_repo_tree"
992            );
993        })
994        .await;
995
996        match result {
997            Ok(response) => {
998                tree_response = Some(response);
999                debug!(branch = %branch, "Fetched tree from branch");
1000                break;
1001            }
1002            Err(e) => {
1003                debug!(branch = %branch, error = %e, "Failed to fetch tree from branch");
1004            }
1005        }
1006    }
1007
1008    let response =
1009        tree_response.context("Failed to fetch repository tree from main or master branch")?;
1010
1011    let filtered = filter_tree_by_relevance(&response.tree, language, keywords);
1012    debug!(count = filtered.len(), "Filtered tree entries");
1013
1014    Ok(filtered)
1015}
1016
1017/// Fetches issues needing triage from a specific repository.
1018///
1019/// In default mode (force=false), returns issues that are either unlabeled OR missing a milestone.
1020/// In force mode (force=true), returns ALL open issues with no filtering.
1021///
1022/// # Arguments
1023///
1024/// * `client` - The Octocrab GitHub client
1025/// * `owner` - Repository owner
1026/// * `repo` - Repository name
1027/// * `since` - Optional RFC3339 timestamp to filter issues created after this date (client-side filtering)
1028/// * `force` - If true, return all open issues; if false, filter to unlabeled or milestone-missing issues
1029///
1030/// # Errors
1031///
1032/// Returns an error if the REST API request fails.
1033#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
1034pub async fn fetch_issues_needing_triage(
1035    client: &Octocrab,
1036    owner: &str,
1037    repo: &str,
1038    since: Option<&str>,
1039    force: bool,
1040) -> Result<Vec<UntriagedIssue>> {
1041    debug!("Fetching issues needing triage");
1042
1043    let issues_page: octocrab::Page<octocrab::models::issues::Issue> = client
1044        .issues(owner, repo)
1045        .list()
1046        .state(octocrab::params::State::Open)
1047        .per_page(100)
1048        .send()
1049        .await
1050        .context("Failed to fetch issues from repository")?;
1051
1052    let total_issues = issues_page.items.len();
1053
1054    let mut issues_needing_triage: Vec<UntriagedIssue> = issues_page
1055        .items
1056        .into_iter()
1057        .filter(|issue| {
1058            if force {
1059                true
1060            } else {
1061                issue.labels.is_empty() || issue.milestone.is_none()
1062            }
1063        })
1064        .map(|issue| UntriagedIssue {
1065            number: issue.number,
1066            title: issue.title,
1067            created_at: issue.created_at.to_rfc3339(),
1068            url: issue.html_url.to_string(),
1069        })
1070        .collect();
1071
1072    if let Some(since_date) = since
1073        && let Ok(since_timestamp) = chrono::DateTime::parse_from_rfc3339(since_date)
1074    {
1075        issues_needing_triage.retain(|issue| {
1076            if let Ok(created_at) = chrono::DateTime::parse_from_rfc3339(&issue.created_at) {
1077                created_at >= since_timestamp
1078            } else {
1079                true
1080            }
1081        });
1082    }
1083
1084    debug!(
1085        total_issues = total_issues,
1086        issues_needing_triage_count = issues_needing_triage.len(),
1087        "Fetched issues needing triage"
1088    );
1089
1090    Ok(issues_needing_triage)
1091}
1092
1093#[cfg(test)]
1094mod fetch_issues_needing_triage_tests {
1095    #[test]
1096    fn filter_logic_unlabeled_default_mode() {
1097        let labels_empty = true;
1098        let milestone_none = true;
1099        let force = false;
1100
1101        let passes = if force {
1102            true
1103        } else {
1104            labels_empty || milestone_none
1105        };
1106
1107        assert!(passes);
1108    }
1109
1110    #[test]
1111    fn filter_logic_labeled_default_mode() {
1112        let labels_empty = false;
1113        let milestone_none = true;
1114        let force = false;
1115
1116        let passes = if force {
1117            true
1118        } else {
1119            labels_empty || milestone_none
1120        };
1121
1122        assert!(passes);
1123    }
1124
1125    #[test]
1126    fn filter_logic_missing_milestone_default_mode() {
1127        let labels_empty = false;
1128        let milestone_none = true;
1129        let force = false;
1130
1131        let passes = if force {
1132            true
1133        } else {
1134            labels_empty || milestone_none
1135        };
1136
1137        assert!(passes);
1138    }
1139
1140    #[test]
1141    fn filter_logic_force_mode_returns_all() {
1142        let labels_empty = false;
1143        let milestone_none = false;
1144        let force = true;
1145
1146        let passes = if force {
1147            true
1148        } else {
1149            labels_empty || milestone_none
1150        };
1151
1152        assert!(passes);
1153    }
1154
1155    #[test]
1156    fn filter_logic_fully_triaged_default_mode_excluded() {
1157        let labels_empty = false;
1158        let milestone_none = false;
1159        let force = false;
1160
1161        let passes = if force {
1162            true
1163        } else {
1164            labels_empty || milestone_none
1165        };
1166
1167        assert!(!passes);
1168    }
1169}
1170
1171#[cfg(test)]
1172mod tree_tests {
1173    use super::*;
1174
1175    #[test]
1176    fn filter_tree_by_relevance_keyword_matching() {
1177        let entries = vec![
1178            GitTreeEntry {
1179                path: "src/parser.rs".to_string(),
1180                type_: "blob".to_string(),
1181                mode: "100644".to_string(),
1182                sha: "abc123".to_string(),
1183            },
1184            GitTreeEntry {
1185                path: "src/main.rs".to_string(),
1186                type_: "blob".to_string(),
1187                mode: "100644".to_string(),
1188                sha: "def456".to_string(),
1189            },
1190            GitTreeEntry {
1191                path: "src/utils.rs".to_string(),
1192                type_: "blob".to_string(),
1193                mode: "100644".to_string(),
1194                sha: "ghi789".to_string(),
1195            },
1196        ];
1197
1198        let keywords = vec!["parser".to_string()];
1199        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1200        assert!(filtered.contains(&"src/parser.rs".to_string()));
1201    }
1202
1203    #[test]
1204    fn filter_tree_by_relevance_entry_points() {
1205        let entries = vec![
1206            GitTreeEntry {
1207                path: "src/lib.rs".to_string(),
1208                type_: "blob".to_string(),
1209                mode: "100644".to_string(),
1210                sha: "abc123".to_string(),
1211            },
1212            GitTreeEntry {
1213                path: "src/utils.rs".to_string(),
1214                type_: "blob".to_string(),
1215                mode: "100644".to_string(),
1216                sha: "def456".to_string(),
1217            },
1218        ];
1219
1220        let keywords = vec![];
1221        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1222        assert!(filtered.contains(&"src/lib.rs".to_string()));
1223    }
1224
1225    #[test]
1226    fn filter_tree_by_relevance_excludes_tests() {
1227        let entries = vec![
1228            GitTreeEntry {
1229                path: "src/main.rs".to_string(),
1230                type_: "blob".to_string(),
1231                mode: "100644".to_string(),
1232                sha: "abc123".to_string(),
1233            },
1234            GitTreeEntry {
1235                path: "tests/integration_test.rs".to_string(),
1236                type_: "blob".to_string(),
1237                mode: "100644".to_string(),
1238                sha: "def456".to_string(),
1239            },
1240        ];
1241
1242        let keywords = vec![];
1243        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1244        assert!(!filtered.contains(&"tests/integration_test.rs".to_string()));
1245        assert!(filtered.contains(&"src/main.rs".to_string()));
1246    }
1247
1248    #[test]
1249    fn filter_tree_excludes_node_modules() {
1250        let entries = vec![
1251            GitTreeEntry {
1252                path: "src/main.rs".to_string(),
1253                type_: "blob".to_string(),
1254                mode: "100644".to_string(),
1255                sha: "abc123".to_string(),
1256            },
1257            GitTreeEntry {
1258                path: "node_modules/package/index.js".to_string(),
1259                type_: "blob".to_string(),
1260                mode: "100644".to_string(),
1261                sha: "def456".to_string(),
1262            },
1263        ];
1264
1265        let filtered = filter_tree_by_language(&entries, "rust");
1266        assert_eq!(filtered.len(), 1);
1267        assert_eq!(filtered[0], "src/main.rs");
1268    }
1269
1270    #[test]
1271    fn filter_tree_excludes_directories() {
1272        let entries = vec![
1273            GitTreeEntry {
1274                path: "src/main.rs".to_string(),
1275                type_: "blob".to_string(),
1276                mode: "100644".to_string(),
1277                sha: "abc123".to_string(),
1278            },
1279            GitTreeEntry {
1280                path: "src/lib".to_string(),
1281                type_: "tree".to_string(),
1282                mode: "040000".to_string(),
1283                sha: "def456".to_string(),
1284            },
1285        ];
1286
1287        let filtered = filter_tree_by_language(&entries, "rust");
1288        assert_eq!(filtered.len(), 1);
1289        assert_eq!(filtered[0], "src/main.rs");
1290    }
1291
1292    #[test]
1293    fn filter_tree_sorts_by_depth() {
1294        let entries = vec![
1295            GitTreeEntry {
1296                path: "a/b/c/d.rs".to_string(),
1297                type_: "blob".to_string(),
1298                mode: "100644".to_string(),
1299                sha: "abc123".to_string(),
1300            },
1301            GitTreeEntry {
1302                path: "a/b.rs".to_string(),
1303                type_: "blob".to_string(),
1304                mode: "100644".to_string(),
1305                sha: "def456".to_string(),
1306            },
1307            GitTreeEntry {
1308                path: "main.rs".to_string(),
1309                type_: "blob".to_string(),
1310                mode: "100644".to_string(),
1311                sha: "ghi789".to_string(),
1312            },
1313        ];
1314
1315        let filtered = filter_tree_by_language(&entries, "rust");
1316        assert_eq!(filtered[0], "main.rs");
1317        assert_eq!(filtered[1], "a/b.rs");
1318        assert_eq!(filtered[2], "a/b/c/d.rs");
1319    }
1320
1321    #[test]
1322    fn filter_tree_limits_to_50() {
1323        let entries: Vec<GitTreeEntry> = (0..100)
1324            .map(|i| GitTreeEntry {
1325                path: format!("file{i}.rs"),
1326                type_: "blob".to_string(),
1327                mode: "100644".to_string(),
1328                sha: format!("sha{i}"),
1329            })
1330            .collect();
1331
1332        let filtered = filter_tree_by_language(&entries, "rust");
1333        assert_eq!(filtered.len(), 50);
1334    }
1335
1336    #[test]
1337    fn filter_tree_by_language_rust() {
1338        let entries = vec![
1339            GitTreeEntry {
1340                path: "src/main.rs".to_string(),
1341                type_: "blob".to_string(),
1342                mode: "100644".to_string(),
1343                sha: "abc123".to_string(),
1344            },
1345            GitTreeEntry {
1346                path: "src/lib.py".to_string(),
1347                type_: "blob".to_string(),
1348                mode: "100644".to_string(),
1349                sha: "def456".to_string(),
1350            },
1351        ];
1352
1353        let filtered = filter_tree_by_language(&entries, "rust");
1354        assert_eq!(filtered.len(), 1);
1355        assert_eq!(filtered[0], "src/main.rs");
1356    }
1357
1358    #[test]
1359    fn filter_tree_by_language_python() {
1360        let entries = vec![
1361            GitTreeEntry {
1362                path: "main.py".to_string(),
1363                type_: "blob".to_string(),
1364                mode: "100644".to_string(),
1365                sha: "abc123".to_string(),
1366            },
1367            GitTreeEntry {
1368                path: "lib.rs".to_string(),
1369                type_: "blob".to_string(),
1370                mode: "100644".to_string(),
1371                sha: "def456".to_string(),
1372            },
1373        ];
1374
1375        let filtered = filter_tree_by_language(&entries, "python");
1376        assert_eq!(filtered.len(), 1);
1377        assert_eq!(filtered[0], "main.py");
1378    }
1379
1380    #[test]
1381    fn get_extensions_for_language_rust() {
1382        let exts = get_extensions_for_language("rust");
1383        assert_eq!(exts, vec!["rs"]);
1384    }
1385
1386    #[test]
1387    fn get_extensions_for_language_javascript() {
1388        let exts = get_extensions_for_language("javascript");
1389        assert!(exts.contains(&"js"));
1390        assert!(exts.contains(&"ts"));
1391        assert!(exts.contains(&"jsx"));
1392        assert!(exts.contains(&"tsx"));
1393    }
1394
1395    #[test]
1396    fn get_extensions_for_language_unknown() {
1397        let exts = get_extensions_for_language("unknown_language");
1398        assert!(exts.is_empty());
1399    }
1400}
1401
1402#[cfg(test)]
1403mod merge_labels_tests {
1404    use super::*;
1405
1406    #[test]
1407    fn preserves_existing_and_adds_new() {
1408        let existing = vec!["bug".to_string(), "enhancement".to_string()];
1409        let suggested = vec!["documentation".to_string()];
1410        let merged = merge_labels(&existing, &suggested);
1411        assert_eq!(merged.len(), 3);
1412        assert!(merged.contains(&"bug".to_string()));
1413        assert!(merged.contains(&"enhancement".to_string()));
1414        assert!(merged.contains(&"documentation".to_string()));
1415    }
1416
1417    #[test]
1418    fn deduplicates_case_insensitive() {
1419        let existing = vec!["Bug".to_string()];
1420        let suggested = vec!["bug".to_string(), "enhancement".to_string()];
1421        let merged = merge_labels(&existing, &suggested);
1422        assert_eq!(merged.len(), 2);
1423        assert!(merged.contains(&"Bug".to_string()));
1424        assert!(merged.contains(&"enhancement".to_string()));
1425    }
1426
1427    #[test]
1428    fn skips_priority_when_existing_has_one() {
1429        // P1 (uppercase) exists, p2 suggested - should keep P1, skip p2, add bug
1430        let existing = vec!["P1".to_string()];
1431        let suggested = vec!["p2".to_string(), "bug".to_string()];
1432        let merged = merge_labels(&existing, &suggested);
1433        assert_eq!(merged.len(), 2);
1434        assert!(merged.contains(&"P1".to_string()));
1435        assert!(merged.contains(&"bug".to_string()));
1436        assert!(!merged.contains(&"p2".to_string()));
1437    }
1438
1439    #[test]
1440    fn handles_empty_inputs() {
1441        // Empty existing: suggested labels pass through
1442        let merged = merge_labels(&[], &["bug".to_string(), "p1".to_string()]);
1443        assert_eq!(merged.len(), 2);
1444
1445        // Empty suggested: existing labels preserved
1446        let merged = merge_labels(&["bug".to_string()], &[]);
1447        assert_eq!(merged.len(), 1);
1448        assert!(merged.contains(&"bug".to_string()));
1449    }
1450}
1451
1452#[cfg(test)]
1453mod label_tests {
1454    use super::*;
1455
1456    #[test]
1457    fn filter_labels_empty_input() {
1458        let labels = vec![];
1459        let filtered = filter_labels_by_relevance(&labels, 30);
1460        assert!(filtered.is_empty());
1461    }
1462
1463    #[test]
1464    fn filter_labels_zero_max() {
1465        let labels = vec![crate::ai::types::RepoLabel {
1466            name: "bug".to_string(),
1467            color: "ff0000".to_string(),
1468            description: "Bug report".to_string(),
1469        }];
1470        let filtered = filter_labels_by_relevance(&labels, 0);
1471        assert!(filtered.is_empty());
1472    }
1473
1474    #[test]
1475    fn filter_labels_priority_first() {
1476        let labels = vec![
1477            crate::ai::types::RepoLabel {
1478                name: "documentation".to_string(),
1479                color: "0075ca".to_string(),
1480                description: "Documentation".to_string(),
1481            },
1482            crate::ai::types::RepoLabel {
1483                name: "other".to_string(),
1484                color: "cccccc".to_string(),
1485                description: "Other".to_string(),
1486            },
1487            crate::ai::types::RepoLabel {
1488                name: "bug".to_string(),
1489                color: "ff0000".to_string(),
1490                description: "Bug".to_string(),
1491            },
1492        ];
1493        let filtered = filter_labels_by_relevance(&labels, 30);
1494        assert_eq!(filtered.len(), 3);
1495        assert_eq!(filtered[0].name, "documentation");
1496        assert_eq!(filtered[1].name, "bug");
1497        assert_eq!(filtered[2].name, "other");
1498    }
1499
1500    #[test]
1501    fn filter_labels_case_insensitive() {
1502        let labels = vec![
1503            crate::ai::types::RepoLabel {
1504                name: "Bug".to_string(),
1505                color: "ff0000".to_string(),
1506                description: "Bug".to_string(),
1507            },
1508            crate::ai::types::RepoLabel {
1509                name: "ENHANCEMENT".to_string(),
1510                color: "a2eeef".to_string(),
1511                description: "Enhancement".to_string(),
1512            },
1513        ];
1514        let filtered = filter_labels_by_relevance(&labels, 30);
1515        assert_eq!(filtered.len(), 2);
1516        assert_eq!(filtered[0].name, "Bug");
1517        assert_eq!(filtered[1].name, "ENHANCEMENT");
1518    }
1519
1520    #[test]
1521    fn filter_labels_over_limit_with_priorities() {
1522        let mut labels = vec![];
1523        for i in 0..20 {
1524            labels.push(crate::ai::types::RepoLabel {
1525                name: format!("label{}", i),
1526                color: "cccccc".to_string(),
1527                description: format!("Label {}", i),
1528            });
1529        }
1530        labels.push(crate::ai::types::RepoLabel {
1531            name: "bug".to_string(),
1532            color: "ff0000".to_string(),
1533            description: "Bug".to_string(),
1534        });
1535        labels.push(crate::ai::types::RepoLabel {
1536            name: "enhancement".to_string(),
1537            color: "a2eeef".to_string(),
1538            description: "Enhancement".to_string(),
1539        });
1540
1541        let filtered = filter_labels_by_relevance(&labels, 10);
1542        assert_eq!(filtered.len(), 10);
1543        assert_eq!(filtered[0].name, "bug");
1544        assert_eq!(filtered[1].name, "enhancement");
1545    }
1546}
1547
1548#[cfg(test)]
1549mod tests {
1550    use super::*;
1551
1552    #[test]
1553    fn parse_reference_full_url() {
1554        let url = "https://github.com/block/goose/issues/5836";
1555        let (owner, repo, number) = parse_issue_reference(url, None).unwrap();
1556        assert_eq!(owner, "block");
1557        assert_eq!(repo, "goose");
1558        assert_eq!(number, 5836);
1559    }
1560
1561    #[test]
1562    fn parse_reference_short_form() {
1563        let reference = "block/goose#5836";
1564        let (owner, repo, number) = parse_issue_reference(reference, None).unwrap();
1565        assert_eq!(owner, "block");
1566        assert_eq!(repo, "goose");
1567        assert_eq!(number, 5836);
1568    }
1569
1570    #[test]
1571    fn parse_reference_short_form_with_context() {
1572        let reference = "block/goose#5836";
1573        let (owner, repo, number) =
1574            parse_issue_reference(reference, Some("astral-sh/ruff")).unwrap();
1575        assert_eq!(owner, "block");
1576        assert_eq!(repo, "goose");
1577        assert_eq!(number, 5836);
1578    }
1579
1580    #[test]
1581    fn parse_reference_bare_number_with_context() {
1582        let reference = "5836";
1583        let (owner, repo, number) = parse_issue_reference(reference, Some("block/goose")).unwrap();
1584        assert_eq!(owner, "block");
1585        assert_eq!(repo, "goose");
1586        assert_eq!(number, 5836);
1587    }
1588
1589    #[test]
1590    fn parse_reference_bare_number_without_context() {
1591        let reference = "5836";
1592        let result = parse_issue_reference(reference, None);
1593        assert!(result.is_err());
1594        assert!(
1595            result
1596                .unwrap_err()
1597                .to_string()
1598                .contains("Bare issue number requires repository context")
1599        );
1600    }
1601
1602    #[test]
1603    fn parse_reference_invalid_short_form_missing_slash() {
1604        let reference = "owner#123";
1605        let result = parse_issue_reference(reference, None);
1606        assert!(result.is_err());
1607        assert!(
1608            result
1609                .unwrap_err()
1610                .to_string()
1611                .contains("Invalid owner/repo format")
1612        );
1613    }
1614
1615    #[test]
1616    fn parse_reference_invalid_short_form_extra_slash() {
1617        let reference = "owner/repo/extra#123";
1618        let result = parse_issue_reference(reference, None);
1619        assert!(result.is_err());
1620        assert!(
1621            result
1622                .unwrap_err()
1623                .to_string()
1624                .contains("Invalid owner/repo format")
1625        );
1626    }
1627
1628    #[test]
1629    fn parse_reference_invalid_bare_number() {
1630        let reference = "abc";
1631        let result = parse_issue_reference(reference, Some("block/goose"));
1632        assert!(result.is_err());
1633        assert!(
1634            result
1635                .unwrap_err()
1636                .to_string()
1637                .contains("Invalid issue reference format")
1638        );
1639    }
1640
1641    #[test]
1642    fn parse_reference_whitespace_trimming() {
1643        let reference = "  block/goose#5836  ";
1644        let (owner, repo, number) = parse_issue_reference(reference, None).unwrap();
1645        assert_eq!(owner, "block");
1646        assert_eq!(repo, "goose");
1647        assert_eq!(number, 5836);
1648    }
1649
1650    #[test]
1651    fn parse_reference_bare_number_whitespace() {
1652        let reference = "  5836  ";
1653        let (owner, repo, number) = parse_issue_reference(reference, Some("block/goose")).unwrap();
1654        assert_eq!(owner, "block");
1655        assert_eq!(repo, "goose");
1656        assert_eq!(number, 5836);
1657    }
1658
1659    #[test]
1660    fn extract_keywords_filters_stop_words() {
1661        let title = "The issue is about a bug in the CLI";
1662        let keywords = extract_keywords(title);
1663        assert!(!keywords.contains(&"the".to_string()));
1664        assert!(!keywords.contains(&"is".to_string()));
1665        assert!(!keywords.contains(&"a".to_string()));
1666        assert!(keywords.contains(&"issue".to_string()));
1667        assert!(keywords.contains(&"bug".to_string()));
1668        assert!(keywords.contains(&"cli".to_string()));
1669    }
1670
1671    #[test]
1672    fn extract_keywords_limits_to_five() {
1673        let title = "one two three four five six seven eight nine ten";
1674        let keywords = extract_keywords(title);
1675        assert_eq!(keywords.len(), 5);
1676    }
1677
1678    #[test]
1679    fn extract_keywords_empty_title() {
1680        let title = "the a an and or";
1681        let keywords = extract_keywords(title);
1682        assert!(keywords.is_empty());
1683    }
1684
1685    #[test]
1686    fn extract_keywords_lowercase_conversion() {
1687        let title = "CLI Bug FIX";
1688        let keywords = extract_keywords(title);
1689        assert!(keywords.iter().all(|k| k.chars().all(char::is_lowercase)));
1690    }
1691}