aptu_core/github/
issues.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! GitHub issue operations for the triage command.
4//!
5//! Provides functionality to parse issue URLs, fetch issue details,
6//! and post triage comments.
7
8use anyhow::{Context, Result};
9use backon::Retryable;
10use octocrab::Octocrab;
11use serde::{Deserialize, Serialize};
12use tracing::{debug, instrument};
13
14use crate::ai::types::{IssueComment, IssueDetails, RepoIssueContext};
15use crate::retry::retry_backoff;
16
17/// A GitHub issue without labels (untriaged).
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct UntriagedIssue {
20    /// Issue number.
21    pub number: u64,
22    /// Issue title.
23    pub title: String,
24    /// Creation timestamp (ISO 8601).
25    pub created_at: String,
26    /// Issue URL.
27    pub url: String,
28}
29
30/// A single entry in a Git tree response.
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct GitTreeEntry {
33    /// File path relative to repository root.
34    pub path: String,
35    /// Type of entry: "blob" (file) or "tree" (directory).
36    #[serde(rename = "type")]
37    pub type_: String,
38    /// File mode (e.g., "100644" for regular files).
39    pub mode: String,
40    /// SHA-1 hash of the entry.
41    pub sha: String,
42}
43
44/// Response from GitHub Git Trees API.
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct GitTreeResponse {
47    /// List of entries in the tree.
48    pub tree: Vec<GitTreeEntry>,
49    /// Whether the tree is truncated (too many entries).
50    pub truncated: bool,
51}
52
53/// Parses an owner/repo string to extract owner and repo.
54///
55/// Validates format: exactly one `/`, non-empty parts.
56///
57/// # Errors
58///
59/// Returns an error if the format is invalid.
60pub fn parse_owner_repo(s: &str) -> Result<(String, String)> {
61    let parts: Vec<&str> = s.split('/').collect();
62    if parts.len() != 2 || parts[0].is_empty() || parts[1].is_empty() {
63        anyhow::bail!(
64            "Invalid owner/repo format.\n\
65             Expected: owner/repo\n\
66             Got: {s}"
67        );
68    }
69    Ok((parts[0].to_string(), parts[1].to_string()))
70}
71
72/// Parses a GitHub issue reference in multiple formats.
73///
74/// Supports:
75/// - Full URL: `https://github.com/owner/repo/issues/123`
76/// - Short form: `owner/repo#123`
77/// - Bare number: `123` (requires `repo_context`)
78///
79/// # Arguments
80///
81/// * `input` - The issue reference to parse
82/// * `repo_context` - Optional repository context for bare numbers (e.g., "owner/repo")
83///
84/// # Errors
85///
86/// Returns an error if the format is invalid or bare number is used without context.
87pub fn parse_issue_reference(
88    input: &str,
89    repo_context: Option<&str>,
90) -> Result<(String, String, u64)> {
91    let input = input.trim();
92
93    // Try full URL first
94    if input.starts_with("https://") || input.starts_with("http://") {
95        // Remove trailing fragments and query params
96        let clean_url = input.split('#').next().unwrap_or(input);
97        let clean_url = clean_url.split('?').next().unwrap_or(clean_url);
98
99        // Parse the URL path
100        let parts: Vec<&str> = clean_url.trim_end_matches('/').split('/').collect();
101
102        // Expected: ["https:", "", "github.com", "owner", "repo", "issues", "123"]
103        if parts.len() < 7 {
104            anyhow::bail!(
105                "Invalid GitHub issue URL format.\n\
106                 Expected: https://github.com/owner/repo/issues/123\n\
107                 Got: {input}"
108            );
109        }
110
111        // Verify it's a github.com URL
112        if !parts[2].contains("github.com") {
113            anyhow::bail!(
114                "URL must be a GitHub issue URL.\n\
115                 Expected: https://github.com/owner/repo/issues/123\n\
116                 Got: {input}"
117            );
118        }
119
120        // Verify it's an issues path
121        if parts[5] != "issues" {
122            anyhow::bail!(
123                "URL must point to a GitHub issue.\n\
124                 Expected: https://github.com/owner/repo/issues/123\n\
125                 Got: {input}"
126            );
127        }
128
129        let owner = parts[3].to_string();
130        let repo = parts[4].to_string();
131        let number: u64 = parts[6].parse().with_context(|| {
132            format!(
133                "Invalid issue number '{}' in URL.\n\
134                 Expected a numeric issue number.",
135                parts[6]
136            )
137        })?;
138
139        debug!(owner = %owner, repo = %repo, number = number, "Parsed issue URL");
140        return Ok((owner, repo, number));
141    }
142
143    // Try short form: owner/repo#123
144    if let Some(hash_pos) = input.find('#') {
145        let owner_repo_part = &input[..hash_pos];
146        let number_part = &input[hash_pos + 1..];
147
148        let (owner, repo) = parse_owner_repo(owner_repo_part)?;
149        let number: u64 = number_part.parse().with_context(|| {
150            format!(
151                "Invalid issue number '{number_part}' in short form.\n\
152                 Expected: owner/repo#123\n\
153                 Got: {input}"
154            )
155        })?;
156
157        debug!(owner = %owner, repo = %repo, number = number, "Parsed short-form issue reference");
158        return Ok((owner, repo, number));
159    }
160
161    // Try bare number: 123 (requires repo_context)
162    if let Ok(number) = input.parse::<u64>() {
163        let repo_context = repo_context.ok_or_else(|| {
164            anyhow::anyhow!(
165                "Bare issue number requires repository context.\n\
166                 Use one of:\n\
167                 - Full URL: https://github.com/owner/repo/issues/123\n\
168                 - Short form: owner/repo#123\n\
169                 - Bare number with --repo flag: 123 --repo owner/repo\n\
170                 Got: {input}"
171            )
172        })?;
173
174        let (owner, repo) = parse_owner_repo(repo_context)?;
175        debug!(owner = %owner, repo = %repo, number = number, "Parsed bare issue number");
176        return Ok((owner, repo, number));
177    }
178
179    // If we get here, it's an invalid format
180    anyhow::bail!(
181        "Invalid issue reference format.\n\
182         Expected one of:\n\
183         - Full URL: https://github.com/owner/repo/issues/123\n\
184         - Short form: owner/repo#123\n\
185         - Bare number with --repo flag: 123 --repo owner/repo\n\
186         Got: {input}"
187    );
188}
189
190/// Fetches issue details including comments from GitHub.
191///
192/// # Errors
193///
194/// Returns an error if the API request fails or the issue is not found.
195#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
196pub async fn fetch_issue_with_comments(
197    client: &Octocrab,
198    owner: &str,
199    repo: &str,
200    number: u64,
201) -> Result<IssueDetails> {
202    debug!("Fetching issue details");
203
204    // Fetch the issue with retry logic
205    let issue = (|| async {
206        client
207            .issues(owner, repo)
208            .get(number)
209            .await
210            .map_err(|e| anyhow::anyhow!(e))
211    })
212    .retry(retry_backoff())
213    .notify(|err, dur| {
214        tracing::warn!(
215            error = %err,
216            retry_after = ?dur,
217            "Retrying fetch_issue_with_comments (issue fetch)"
218        );
219    })
220    .await
221    .with_context(|| format!("Failed to fetch issue #{number} from {owner}/{repo}"))?;
222
223    // Fetch comments (limited to first page) with retry logic
224    let comments_page = (|| async {
225        client
226            .issues(owner, repo)
227            .list_comments(number)
228            .per_page(5)
229            .send()
230            .await
231            .map_err(|e| anyhow::anyhow!(e))
232    })
233    .retry(retry_backoff())
234    .notify(|err, dur| {
235        tracing::warn!(
236            error = %err,
237            retry_after = ?dur,
238            "Retrying fetch_issue_with_comments (comments fetch)"
239        );
240    })
241    .await
242    .with_context(|| format!("Failed to fetch comments for issue #{number}"))?;
243
244    // Convert to our types
245    let labels: Vec<String> = issue.labels.iter().map(|l| l.name.clone()).collect();
246
247    let comments: Vec<IssueComment> = comments_page
248        .items
249        .iter()
250        .map(|c| IssueComment {
251            author: c.user.login.clone(),
252            body: c.body.clone().unwrap_or_default(),
253        })
254        .collect();
255
256    let issue_url = issue.html_url.to_string();
257
258    let details = IssueDetails::builder()
259        .owner(owner.to_string())
260        .repo(repo.to_string())
261        .number(number)
262        .title(issue.title)
263        .body(issue.body.unwrap_or_default())
264        .labels(labels)
265        .comments(comments)
266        .url(issue_url)
267        .build();
268
269    debug!(
270        labels = details.labels.len(),
271        comments = details.comments.len(),
272        "Fetched issue details"
273    );
274
275    Ok(details)
276}
277
278/// Extracts significant keywords from an issue title for search.
279///
280/// Filters out common stop words and returns lowercase keywords.
281/// Extracts keywords from an issue title for relevance matching.
282///
283/// Filters out common stop words and limits to 5 keywords.
284/// Used for prioritizing relevant files in repository tree filtering.
285///
286/// # Arguments
287///
288/// * `title` - Issue title to extract keywords from
289///
290/// # Returns
291///
292/// Vector of lowercase keywords (max 5), excluding stop words.
293pub fn extract_keywords(title: &str) -> Vec<String> {
294    let stop_words = [
295        "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is",
296        "it", "its", "of", "on", "or", "that", "the", "to", "was", "will", "with",
297    ];
298
299    title
300        .to_lowercase()
301        .split(|c: char| !c.is_alphanumeric())
302        .filter(|word| !word.is_empty() && !stop_words.contains(word))
303        .take(5) // Limit to first 5 keywords
304        .map(std::string::ToString::to_string)
305        .collect()
306}
307
308/// Searches for related issues in a repository based on title keywords.
309///
310/// Extracts keywords from the issue title and searches the repository
311/// for matching issues. Returns up to 20 results, excluding the specified issue.
312///
313/// # Arguments
314///
315/// * `client` - Authenticated Octocrab client
316/// * `owner` - Repository owner
317/// * `repo` - Repository name
318/// * `title` - Issue title to extract keywords from
319/// * `exclude_number` - Issue number to exclude from results
320///
321/// # Errors
322///
323/// Returns an error if the search API request fails.
324#[instrument(skip(client), fields(owner = %owner, repo = %repo, exclude_number = %exclude_number))]
325pub async fn search_related_issues(
326    client: &Octocrab,
327    owner: &str,
328    repo: &str,
329    title: &str,
330    exclude_number: u64,
331) -> Result<Vec<RepoIssueContext>> {
332    let keywords = extract_keywords(title);
333
334    if keywords.is_empty() {
335        debug!("No keywords extracted from title");
336        return Ok(Vec::new());
337    }
338
339    // Build search query: keyword1 keyword2 ... repo:owner/repo is:issue
340    let query = format!("{} repo:{}/{} is:issue", keywords.join(" "), owner, repo);
341
342    debug!(query = %query, "Searching for related issues");
343
344    // Search for issues with retry logic
345    let search_result = (|| async {
346        client
347            .search()
348            .issues_and_pull_requests(&query)
349            .per_page(20)
350            .send()
351            .await
352            .map_err(|e| anyhow::anyhow!(e))
353    })
354    .retry(retry_backoff())
355    .notify(|err, dur| {
356        tracing::warn!(
357            error = %err,
358            retry_after = ?dur,
359            "Retrying search_related_issues"
360        );
361    })
362    .await
363    .with_context(|| format!("Failed to search for related issues in {owner}/{repo}"))?;
364
365    // Convert to our context type
366    let related: Vec<RepoIssueContext> = search_result
367        .items
368        .iter()
369        .filter_map(|item| {
370            // Only include issues (not PRs)
371            if item.pull_request.is_some() {
372                return None;
373            }
374
375            // Exclude the issue being triaged
376            if item.number == exclude_number {
377                return None;
378            }
379
380            Some(RepoIssueContext {
381                number: item.number,
382                title: item.title.clone(),
383                labels: item.labels.iter().map(|l| l.name.clone()).collect(),
384                state: format!("{:?}", item.state).to_lowercase(),
385            })
386        })
387        .collect();
388
389    debug!(count = related.len(), "Found related issues");
390
391    Ok(related)
392}
393
394/// Posts a triage comment to a GitHub issue.
395///
396/// # Returns
397///
398/// The URL of the created comment.
399///
400/// # Errors
401///
402/// Returns an error if the API request fails.
403#[instrument(skip(client, body), fields(owner = %owner, repo = %repo, number = number))]
404pub async fn post_comment(
405    client: &Octocrab,
406    owner: &str,
407    repo: &str,
408    number: u64,
409    body: &str,
410) -> Result<String> {
411    debug!("Posting triage comment");
412
413    let comment = client
414        .issues(owner, repo)
415        .create_comment(number, body)
416        .await
417        .with_context(|| format!("Failed to post comment to issue #{number}"))?;
418
419    let comment_url = comment.html_url.to_string();
420
421    debug!(url = %comment_url, "Comment posted successfully");
422
423    Ok(comment_url)
424}
425
426/// Creates a new GitHub issue.
427///
428/// Posts a new issue with the given title and body to the repository.
429/// Returns the issue URL and issue number.
430///
431/// # Arguments
432///
433/// * `client` - Authenticated Octocrab client
434/// * `owner` - Repository owner
435/// * `repo` - Repository name
436/// * `title` - Issue title
437/// * `body` - Issue body (markdown)
438///
439/// # Errors
440///
441/// Returns an error if the GitHub API call fails.
442#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
443pub async fn create_issue(
444    client: &Octocrab,
445    owner: &str,
446    repo: &str,
447    title: &str,
448    body: &str,
449) -> Result<(String, u64)> {
450    debug!("Creating GitHub issue");
451
452    let issue = client
453        .issues(owner, repo)
454        .create(title)
455        .body(body)
456        .send()
457        .await
458        .with_context(|| format!("Failed to create issue in {owner}/{repo}"))?;
459
460    let issue_url = issue.html_url.to_string();
461    let issue_number = issue.number;
462
463    debug!(number = issue_number, url = %issue_url, "Issue created successfully");
464
465    Ok((issue_url, issue_number))
466}
467
468/// Result of applying labels and milestone to an issue.
469#[derive(Debug, Clone)]
470pub struct ApplyResult {
471    /// Labels that were successfully applied.
472    pub applied_labels: Vec<String>,
473    /// Milestone that was successfully applied, if any.
474    pub applied_milestone: Option<String>,
475    /// Warnings about labels or milestones that could not be applied.
476    pub warnings: Vec<String>,
477}
478
479/// Merges existing and suggested labels additively.
480/// Labels that should only be applied by maintainers, not by AI suggestions
481const MAINTAINER_ONLY_LABELS: &[&str] = &["good first issue", "help wanted"];
482
483///
484/// Implements additive label merging with priority label handling:
485/// - If existing labels contain a priority label (p[0-9]), skip AI-suggested priority labels
486/// - Merge remaining labels with case-insensitive deduplication
487/// - Preserve all existing labels
488///
489/// # Arguments
490///
491/// * `existing_labels` - Labels currently on the issue
492/// * `suggested_labels` - Labels suggested by AI
493///
494/// # Returns
495///
496/// Merged label list with duplicates removed (case-insensitive)
497fn merge_labels(existing_labels: &[String], suggested_labels: &[String]) -> Vec<String> {
498    // Check if existing labels contain a priority label (p[0-9])
499    let has_priority = existing_labels.iter().any(|label| {
500        let lower = label.to_lowercase();
501        lower.len() == 2
502            && lower.starts_with('p')
503            && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
504    });
505
506    // Start with existing labels
507    let mut merged = existing_labels.to_vec();
508
509    // Add suggested labels, filtering out priority labels if existing has one
510    for suggested in suggested_labels {
511        let is_priority = {
512            let lower = suggested.to_lowercase();
513            lower.len() == 2
514                && lower.starts_with('p')
515                && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
516        };
517
518        // Skip priority labels if existing already has one
519        if is_priority && has_priority {
520            continue;
521        }
522
523        // Skip maintainer-only labels
524        if MAINTAINER_ONLY_LABELS
525            .iter()
526            .any(|&m| m.eq_ignore_ascii_case(suggested))
527        {
528            continue;
529        }
530
531        // Add if not already present (case-insensitive check)
532        if !merged
533            .iter()
534            .any(|l| l.to_lowercase() == suggested.to_lowercase())
535        {
536            merged.push(suggested.clone());
537        }
538    }
539
540    merged
541}
542
543/// Updates an issue with labels and milestone.
544///
545/// Applies labels additively by merging existing and suggested labels.
546/// Validates suggestions against available options before applying.
547/// Returns what was actually applied and any warnings.
548///
549/// # Errors
550///
551/// Returns an error if the GitHub API call fails.
552#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
553#[allow(clippy::too_many_arguments)]
554pub async fn update_issue_labels_and_milestone(
555    client: &Octocrab,
556    owner: &str,
557    repo: &str,
558    number: u64,
559    existing_labels: &[String],
560    suggested_labels: &[String],
561    existing_milestone: Option<&str>,
562    suggested_milestone: Option<&str>,
563    available_labels: &[crate::ai::types::RepoLabel],
564    available_milestones: &[crate::ai::types::RepoMilestone],
565) -> Result<ApplyResult> {
566    debug!("Updating issue with labels and milestone");
567
568    let mut warnings = Vec::new();
569
570    // Validate and collect labels
571    let available_label_names: std::collections::HashSet<_> =
572        available_labels.iter().map(|l| l.name.as_str()).collect();
573
574    // Validate suggested labels
575    let mut valid_suggested = Vec::new();
576    for label in suggested_labels {
577        if available_label_names.contains(label.as_str()) {
578            valid_suggested.push(label.clone());
579        } else {
580            warnings.push(format!("Label '{label}' not found in repository"));
581        }
582    }
583
584    // Merge existing and suggested labels additively
585    let applied_labels = merge_labels(existing_labels, &valid_suggested);
586
587    // Validate and find milestone (only set if issue has no existing milestone)
588    let applied_milestone = if existing_milestone.is_none() {
589        if let Some(milestone_title) = suggested_milestone {
590            if let Some(milestone) = available_milestones
591                .iter()
592                .find(|m| m.title == milestone_title)
593            {
594                Some(milestone.title.clone())
595            } else {
596                warnings.push(format!(
597                    "Milestone '{milestone_title}' not found in repository"
598                ));
599                None
600            }
601        } else {
602            None
603        }
604    } else {
605        None
606    };
607
608    // Apply updates to the issue
609    let issues_handler = client.issues(owner, repo);
610    let mut update_builder = issues_handler.update(number);
611
612    if !applied_labels.is_empty() {
613        update_builder = update_builder.labels(&applied_labels);
614    }
615
616    #[allow(clippy::collapsible_if)]
617    if let Some(milestone_title) = &applied_milestone {
618        if let Some(milestone) = available_milestones
619            .iter()
620            .find(|m| &m.title == milestone_title)
621        {
622            update_builder = update_builder.milestone(milestone.number);
623        }
624    }
625
626    update_builder
627        .send()
628        .await
629        .with_context(|| format!("Failed to update issue #{number}"))?;
630
631    debug!(
632        labels = ?applied_labels,
633        milestone = ?applied_milestone,
634        warnings = ?warnings,
635        "Issue updated successfully"
636    );
637
638    Ok(ApplyResult {
639        applied_labels,
640        applied_milestone,
641        warnings,
642    })
643}
644
645/// Apply labels to an issue or PR by number.
646///
647/// Simplified label-only application function for PRs (no milestone, no merge logic).
648/// Returns an error if the GitHub API call fails.
649#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
650pub async fn apply_labels_to_number(
651    client: &Octocrab,
652    owner: &str,
653    repo: &str,
654    number: u64,
655    labels: &[String],
656) -> Result<Vec<String>> {
657    debug!("Applying labels to issue/PR");
658
659    if labels.is_empty() {
660        debug!("No labels to apply");
661        return Ok(Vec::new());
662    }
663
664    let route = format!("repos/{owner}/{repo}/issues/{number}/labels");
665    let payload = serde_json::json!({ "labels": labels });
666
667    client
668        .post::<_, ()>(route, Some(&payload))
669        .await
670        .with_context(|| {
671            format!(
672                "Failed to apply labels to issue/PR #{number} in {owner}/{repo}. \
673                     Check that you have write access to the repository."
674            )
675        })?;
676
677    debug!(labels = ?labels, "Labels applied successfully");
678
679    Ok(labels.to_vec())
680}
681
682/// Priority labels that should be included first in tiered filtering.
683/// These labels are most actionable for issue triage.
684const PRIORITY_LABELS: &[&str] = &[
685    "bug",
686    "enhancement",
687    "documentation",
688    "good first issue",
689    "help wanted",
690    "question",
691    "feature",
692    "fix",
693    "breaking",
694    "security",
695    "performance",
696    "breaking-change",
697];
698
699/// Filters labels using tiered selection: priority labels first, then remaining labels.
700///
701/// Implements two-tier filtering:
702/// - Tier 1: Priority labels (case-insensitive matching)
703/// - Tier 2: Remaining labels to fill up to `max_labels`
704///
705/// This ensures the AI sees the most actionable labels regardless of repository size.
706///
707/// # Arguments
708///
709/// * `labels` - List of available labels from the repository
710/// * `max_labels` - Maximum number of labels to return
711///
712/// # Returns
713///
714/// Filtered list of labels with priority labels first.
715#[must_use]
716pub fn filter_labels_by_relevance(
717    labels: &[crate::ai::types::RepoLabel],
718    max_labels: usize,
719) -> Vec<crate::ai::types::RepoLabel> {
720    if labels.is_empty() || max_labels == 0 {
721        return Vec::new();
722    }
723
724    let mut priority_labels = Vec::new();
725    let mut other_labels = Vec::new();
726
727    // Separate labels into priority and other
728    for label in labels {
729        let label_lower = label.name.to_lowercase();
730        let is_priority = PRIORITY_LABELS
731            .iter()
732            .any(|&p| label_lower == p.to_lowercase());
733
734        if is_priority {
735            priority_labels.push(label.clone());
736        } else {
737            other_labels.push(label.clone());
738        }
739    }
740
741    // Combine: priority labels first, then fill remaining slots with other labels
742    let mut result = priority_labels;
743    let remaining_slots = max_labels.saturating_sub(result.len());
744    result.extend(other_labels.into_iter().take(remaining_slots));
745
746    // Limit to max_labels
747    result.truncate(max_labels);
748    result
749}
750
751/// Patterns for directories/files to completely exclude from tree filtering.
752/// Based on GitHub Linguist vendor.yml and common build artifacts.
753const EXCLUDE_PATTERNS: &[&str] = &[
754    "node_modules/",
755    "vendor/",
756    "dist/",
757    "build/",
758    "target/",
759    ".git/",
760    "cache/",
761    "docs/",
762    "examples/",
763];
764
765/// Patterns for directories to deprioritize but not exclude.
766/// These contain test/benchmark code less relevant to issue triage.
767const DEPRIORITIZE_PATTERNS: &[&str] = &[
768    "test/",
769    "tests/",
770    "spec/",
771    "bench/",
772    "eval/",
773    "fixtures/",
774    "mocks/",
775];
776
777/// Returns language-specific entry point file patterns.
778/// These are prioritized as they often contain the main logic.
779fn entry_point_patterns(language: &str) -> Vec<&'static str> {
780    match language.to_lowercase().as_str() {
781        "rust" => vec!["lib.rs", "mod.rs", "main.rs"],
782        "python" => vec!["__init__.py"],
783        "javascript" | "typescript" => vec!["index.ts", "index.js"],
784        "java" => vec!["Main.java"],
785        "go" => vec!["main.go"],
786        "c#" | "csharp" => vec!["Program.cs"],
787        _ => vec![],
788    }
789}
790
791/// Maps programming languages to their common file extensions.
792fn get_extensions_for_language(language: &str) -> Vec<&'static str> {
793    match language.to_lowercase().as_str() {
794        "rust" => vec!["rs"],
795        "python" => vec!["py"],
796        "javascript" | "typescript" => vec!["js", "ts", "jsx", "tsx"],
797        "java" => vec!["java"],
798        "c" => vec!["c", "h"],
799        "c++" | "cpp" => vec!["cpp", "cc", "cxx", "h", "hpp"],
800        "c#" | "csharp" => vec!["cs"],
801        "go" => vec!["go"],
802        "ruby" => vec!["rb"],
803        "php" => vec!["php"],
804        "swift" => vec!["swift"],
805        "kotlin" => vec!["kt"],
806        "scala" => vec!["scala"],
807        "r" => vec!["r"],
808        "shell" | "bash" => vec!["sh", "bash"],
809        "html" => vec!["html", "htm"],
810        "css" => vec!["css", "scss", "sass"],
811        "json" => vec!["json"],
812        "yaml" | "yml" => vec!["yaml", "yml"],
813        "toml" => vec!["toml"],
814        "xml" => vec!["xml"],
815        "markdown" => vec!["md"],
816        _ => vec![],
817    }
818}
819
820/// Filters repository tree entries by language-specific extensions.
821///
822/// Removes common non-source directories and limits results to 50 paths.
823/// Prioritizes shallow paths (fewer `/` characters).
824/// This is a legacy function kept for backward compatibility with existing tests.
825///
826/// # Arguments
827///
828/// * `entries` - Raw tree entries from GitHub API
829/// * `language` - Repository primary language for extension filtering
830///
831/// # Returns
832///
833/// Filtered and sorted list of file paths (max 50).
834#[allow(dead_code)]
835fn filter_tree_by_language(entries: &[GitTreeEntry], language: &str) -> Vec<String> {
836    let extensions = get_extensions_for_language(language);
837    let exclude_dirs = [
838        "node_modules/",
839        "target/",
840        "dist/",
841        "build/",
842        ".git/",
843        "vendor/",
844        "test",
845        "spec",
846        "mock",
847        "fixture",
848    ];
849
850    let mut filtered: Vec<String> = entries
851        .iter()
852        .filter(|entry| {
853            // Only include files (blobs), not directories
854            if entry.type_ != "blob" {
855                return false;
856            }
857
858            // Exclude paths containing excluded directories
859            if exclude_dirs.iter().any(|dir| entry.path.contains(dir)) {
860                return false;
861            }
862
863            // Filter by extension if language is recognized
864            if extensions.is_empty() {
865                // If language not recognized, include all files
866                true
867            } else {
868                extensions.iter().any(|ext| entry.path.ends_with(ext))
869            }
870        })
871        .map(|e| e.path.clone())
872        .collect();
873
874    // Sort by path depth (fewer slashes first), then alphabetically
875    filtered.sort_by(|a, b| {
876        let depth_a = a.matches('/').count();
877        let depth_b = b.matches('/').count();
878        if depth_a == depth_b {
879            a.cmp(b)
880        } else {
881            depth_a.cmp(&depth_b)
882        }
883    });
884
885    // Limit to 50 paths
886    filtered.truncate(50);
887    filtered
888}
889
890/// Filters repository tree entries by relevance using tiered keyword matching.
891///
892/// Implements three-tier filtering:
893/// - Tier 1: Files matching keywords (max 35)
894/// - Tier 2: Language entry points (max 10)
895/// - Tier 3: Other relevant files (max 15)
896///
897/// Removes common non-source directories and limits results to 60 paths.
898///
899/// # Arguments
900///
901/// * `entries` - Raw tree entries from GitHub API
902/// * `language` - Repository primary language for extension filtering
903/// * `keywords` - Optional keywords extracted from issue title for relevance matching
904///
905/// # Returns
906///
907/// Filtered and sorted list of file paths (max 60).
908fn filter_tree_by_relevance(
909    entries: &[GitTreeEntry],
910    language: &str,
911    keywords: &[String],
912) -> Vec<String> {
913    let extensions = get_extensions_for_language(language);
914    let entry_points = entry_point_patterns(language);
915
916    // Filter to valid source files
917    let candidates: Vec<String> = entries
918        .iter()
919        .filter(|entry| {
920            // Only include files (blobs), not directories
921            if entry.type_ != "blob" {
922                return false;
923            }
924
925            // Exclude paths containing excluded directories
926            if EXCLUDE_PATTERNS.iter().any(|dir| entry.path.contains(dir)) {
927                return false;
928            }
929
930            // Filter by extension if language is recognized
931            if extensions.is_empty() {
932                // If language not recognized, include all files
933                true
934            } else {
935                extensions.iter().any(|ext| entry.path.ends_with(ext))
936            }
937        })
938        .map(|e| e.path.clone())
939        .collect();
940
941    // Tier 1: Files matching keywords (max 35)
942    let mut tier1: Vec<String> = Vec::new();
943    let mut remaining: Vec<String> = Vec::new();
944
945    for path in candidates {
946        let path_lower = path.to_lowercase();
947        let matches_keyword = keywords.iter().any(|kw| path_lower.contains(kw));
948
949        if matches_keyword && tier1.len() < 35 {
950            tier1.push(path);
951        } else {
952            remaining.push(path);
953        }
954    }
955
956    // Tier 2: Entry point files (max 10)
957    let mut tier2: Vec<String> = Vec::new();
958    let mut tier3_candidates: Vec<String> = Vec::new();
959
960    for path in remaining {
961        let is_entry_point = entry_points.iter().any(|ep| path.ends_with(ep));
962        let is_deprioritized = DEPRIORITIZE_PATTERNS.iter().any(|dp| path.contains(dp));
963
964        if is_entry_point && tier2.len() < 10 {
965            tier2.push(path);
966        } else if !is_deprioritized {
967            tier3_candidates.push(path);
968        }
969    }
970
971    // Tier 3: Other relevant files (max 15)
972    let mut tier3: Vec<String> = tier3_candidates.into_iter().take(15).collect();
973
974    // Combine and sort by depth within each tier
975    let mut result = tier1;
976    result.append(&mut tier2);
977    result.append(&mut tier3);
978
979    // Sort by path depth (fewer slashes first), then alphabetically
980    result.sort_by(|a, b| {
981        let depth_a = a.matches('/').count();
982        let depth_b = b.matches('/').count();
983        if depth_a == depth_b {
984            a.cmp(b)
985        } else {
986            depth_a.cmp(&depth_b)
987        }
988    });
989
990    // Limit to 60 paths
991    result.truncate(60);
992    result
993}
994
995/// Fetches the repository file tree from GitHub.
996///
997/// Attempts to fetch from the default branch (main, then master).
998/// Returns filtered list of source file paths based on repository language and optional keywords.
999///
1000/// # Arguments
1001///
1002/// * `client` - Authenticated Octocrab client
1003/// * `owner` - Repository owner
1004/// * `repo` - Repository name
1005/// * `language` - Repository primary language for filtering
1006/// * `keywords` - Optional keywords extracted from issue title for relevance matching
1007///
1008/// # Errors
1009///
1010/// Returns an error if the API request fails (but not if tree is unavailable).
1011#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
1012pub async fn fetch_repo_tree(
1013    client: &Octocrab,
1014    owner: &str,
1015    repo: &str,
1016    language: &str,
1017    keywords: &[String],
1018) -> Result<Vec<String>> {
1019    debug!("Fetching repository tree");
1020
1021    // Try main branch first, then master
1022    let branches = ["main", "master"];
1023    let mut tree_response: Option<GitTreeResponse> = None;
1024
1025    for branch in &branches {
1026        let route = format!("/repos/{owner}/{repo}/git/trees/{branch}?recursive=1");
1027        let result = (|| async {
1028            client
1029                .get::<GitTreeResponse, _, _>(&route, None::<&()>)
1030                .await
1031                .map_err(|e| anyhow::anyhow!(e))
1032        })
1033        .retry(retry_backoff())
1034        .notify(|err, dur| {
1035            tracing::warn!(
1036                error = %err,
1037                retry_after = ?dur,
1038                branch = %branch,
1039                "Retrying fetch_repo_tree"
1040            );
1041        })
1042        .await;
1043
1044        match result {
1045            Ok(response) => {
1046                tree_response = Some(response);
1047                debug!(branch = %branch, "Fetched tree from branch");
1048                break;
1049            }
1050            Err(e) => {
1051                debug!(branch = %branch, error = %e, "Failed to fetch tree from branch");
1052            }
1053        }
1054    }
1055
1056    let response =
1057        tree_response.context("Failed to fetch repository tree from main or master branch")?;
1058
1059    let filtered = filter_tree_by_relevance(&response.tree, language, keywords);
1060    debug!(count = filtered.len(), "Filtered tree entries");
1061
1062    Ok(filtered)
1063}
1064
1065/// Fetches issues needing triage from a specific repository.
1066///
1067/// In default mode (force=false), returns issues that are either unlabeled OR missing a milestone.
1068/// In force mode (force=true), returns ALL open issues with no filtering.
1069///
1070/// # Arguments
1071///
1072/// * `client` - The Octocrab GitHub client
1073/// * `owner` - Repository owner
1074/// * `repo` - Repository name
1075/// * `since` - Optional RFC3339 timestamp to filter issues created after this date (client-side filtering)
1076/// * `force` - If true, return all issues in the specified state; if false, filter to unlabeled or milestone-missing issues
1077/// * `state` - Issue state filter (Open, Closed, or All)
1078///
1079/// # Errors
1080///
1081/// Returns an error if the REST API request fails.
1082#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
1083pub async fn fetch_issues_needing_triage(
1084    client: &Octocrab,
1085    owner: &str,
1086    repo: &str,
1087    since: Option<&str>,
1088    force: bool,
1089    state: octocrab::params::State,
1090) -> Result<Vec<UntriagedIssue>> {
1091    debug!("Fetching issues needing triage");
1092
1093    let issues_page: octocrab::Page<octocrab::models::issues::Issue> = client
1094        .issues(owner, repo)
1095        .list()
1096        .state(state)
1097        .per_page(100)
1098        .send()
1099        .await
1100        .context("Failed to fetch issues from repository")?;
1101
1102    let total_issues = issues_page.items.len();
1103
1104    let mut issues_needing_triage: Vec<UntriagedIssue> = issues_page
1105        .items
1106        .into_iter()
1107        .filter(|issue| {
1108            if force {
1109                true
1110            } else {
1111                issue.labels.is_empty() || issue.milestone.is_none()
1112            }
1113        })
1114        .map(|issue| UntriagedIssue {
1115            number: issue.number,
1116            title: issue.title,
1117            created_at: issue.created_at.to_rfc3339(),
1118            url: issue.html_url.to_string(),
1119        })
1120        .collect();
1121
1122    if let Some(since_date) = since
1123        && let Ok(since_timestamp) = chrono::DateTime::parse_from_rfc3339(since_date)
1124    {
1125        issues_needing_triage.retain(|issue| {
1126            if let Ok(created_at) = chrono::DateTime::parse_from_rfc3339(&issue.created_at) {
1127                created_at >= since_timestamp
1128            } else {
1129                true
1130            }
1131        });
1132    }
1133
1134    debug!(
1135        total_issues = total_issues,
1136        issues_needing_triage_count = issues_needing_triage.len(),
1137        "Fetched issues needing triage"
1138    );
1139
1140    Ok(issues_needing_triage)
1141}
1142
1143#[cfg(test)]
1144mod fetch_issues_needing_triage_tests {
1145    #[test]
1146    fn filter_logic_unlabeled_default_mode() {
1147        let labels_empty = true;
1148        let milestone_none = true;
1149        let force = false;
1150
1151        let passes = if force {
1152            true
1153        } else {
1154            labels_empty || milestone_none
1155        };
1156
1157        assert!(passes);
1158    }
1159
1160    #[test]
1161    fn filter_logic_labeled_default_mode() {
1162        let labels_empty = false;
1163        let milestone_none = true;
1164        let force = false;
1165
1166        let passes = if force {
1167            true
1168        } else {
1169            labels_empty || milestone_none
1170        };
1171
1172        assert!(passes);
1173    }
1174
1175    #[test]
1176    fn filter_logic_missing_milestone_default_mode() {
1177        let labels_empty = false;
1178        let milestone_none = true;
1179        let force = false;
1180
1181        let passes = if force {
1182            true
1183        } else {
1184            labels_empty || milestone_none
1185        };
1186
1187        assert!(passes);
1188    }
1189
1190    #[test]
1191    fn filter_logic_force_mode_returns_all() {
1192        let labels_empty = false;
1193        let milestone_none = false;
1194        let force = true;
1195
1196        let passes = if force {
1197            true
1198        } else {
1199            labels_empty || milestone_none
1200        };
1201
1202        assert!(passes);
1203    }
1204
1205    #[test]
1206    fn filter_logic_fully_triaged_default_mode_excluded() {
1207        let labels_empty = false;
1208        let milestone_none = false;
1209        let force = false;
1210
1211        let passes = if force {
1212            true
1213        } else {
1214            labels_empty || milestone_none
1215        };
1216
1217        assert!(!passes);
1218    }
1219}
1220
1221#[cfg(test)]
1222mod tree_tests {
1223    use super::*;
1224
1225    #[test]
1226    fn filter_tree_by_relevance_keyword_matching() {
1227        let entries = vec![
1228            GitTreeEntry {
1229                path: "src/parser.rs".to_string(),
1230                type_: "blob".to_string(),
1231                mode: "100644".to_string(),
1232                sha: "abc123".to_string(),
1233            },
1234            GitTreeEntry {
1235                path: "src/main.rs".to_string(),
1236                type_: "blob".to_string(),
1237                mode: "100644".to_string(),
1238                sha: "def456".to_string(),
1239            },
1240            GitTreeEntry {
1241                path: "src/utils.rs".to_string(),
1242                type_: "blob".to_string(),
1243                mode: "100644".to_string(),
1244                sha: "ghi789".to_string(),
1245            },
1246        ];
1247
1248        let keywords = vec!["parser".to_string()];
1249        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1250        assert!(filtered.contains(&"src/parser.rs".to_string()));
1251    }
1252
1253    #[test]
1254    fn filter_tree_by_relevance_entry_points() {
1255        let entries = vec![
1256            GitTreeEntry {
1257                path: "src/lib.rs".to_string(),
1258                type_: "blob".to_string(),
1259                mode: "100644".to_string(),
1260                sha: "abc123".to_string(),
1261            },
1262            GitTreeEntry {
1263                path: "src/utils.rs".to_string(),
1264                type_: "blob".to_string(),
1265                mode: "100644".to_string(),
1266                sha: "def456".to_string(),
1267            },
1268        ];
1269
1270        let keywords = vec![];
1271        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1272        assert!(filtered.contains(&"src/lib.rs".to_string()));
1273    }
1274
1275    #[test]
1276    fn filter_tree_by_relevance_excludes_tests() {
1277        let entries = vec![
1278            GitTreeEntry {
1279                path: "src/main.rs".to_string(),
1280                type_: "blob".to_string(),
1281                mode: "100644".to_string(),
1282                sha: "abc123".to_string(),
1283            },
1284            GitTreeEntry {
1285                path: "tests/integration_test.rs".to_string(),
1286                type_: "blob".to_string(),
1287                mode: "100644".to_string(),
1288                sha: "def456".to_string(),
1289            },
1290        ];
1291
1292        let keywords = vec![];
1293        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1294        assert!(!filtered.contains(&"tests/integration_test.rs".to_string()));
1295        assert!(filtered.contains(&"src/main.rs".to_string()));
1296    }
1297
1298    #[test]
1299    fn filter_tree_excludes_node_modules() {
1300        let entries = vec![
1301            GitTreeEntry {
1302                path: "src/main.rs".to_string(),
1303                type_: "blob".to_string(),
1304                mode: "100644".to_string(),
1305                sha: "abc123".to_string(),
1306            },
1307            GitTreeEntry {
1308                path: "node_modules/package/index.js".to_string(),
1309                type_: "blob".to_string(),
1310                mode: "100644".to_string(),
1311                sha: "def456".to_string(),
1312            },
1313        ];
1314
1315        let filtered = filter_tree_by_language(&entries, "rust");
1316        assert_eq!(filtered.len(), 1);
1317        assert_eq!(filtered[0], "src/main.rs");
1318    }
1319
1320    #[test]
1321    fn filter_tree_excludes_directories() {
1322        let entries = vec![
1323            GitTreeEntry {
1324                path: "src/main.rs".to_string(),
1325                type_: "blob".to_string(),
1326                mode: "100644".to_string(),
1327                sha: "abc123".to_string(),
1328            },
1329            GitTreeEntry {
1330                path: "src/lib".to_string(),
1331                type_: "tree".to_string(),
1332                mode: "040000".to_string(),
1333                sha: "def456".to_string(),
1334            },
1335        ];
1336
1337        let filtered = filter_tree_by_language(&entries, "rust");
1338        assert_eq!(filtered.len(), 1);
1339        assert_eq!(filtered[0], "src/main.rs");
1340    }
1341
1342    #[test]
1343    fn filter_tree_sorts_by_depth() {
1344        let entries = vec![
1345            GitTreeEntry {
1346                path: "a/b/c/d.rs".to_string(),
1347                type_: "blob".to_string(),
1348                mode: "100644".to_string(),
1349                sha: "abc123".to_string(),
1350            },
1351            GitTreeEntry {
1352                path: "a/b.rs".to_string(),
1353                type_: "blob".to_string(),
1354                mode: "100644".to_string(),
1355                sha: "def456".to_string(),
1356            },
1357            GitTreeEntry {
1358                path: "main.rs".to_string(),
1359                type_: "blob".to_string(),
1360                mode: "100644".to_string(),
1361                sha: "ghi789".to_string(),
1362            },
1363        ];
1364
1365        let filtered = filter_tree_by_language(&entries, "rust");
1366        assert_eq!(filtered[0], "main.rs");
1367        assert_eq!(filtered[1], "a/b.rs");
1368        assert_eq!(filtered[2], "a/b/c/d.rs");
1369    }
1370
1371    #[test]
1372    fn filter_tree_limits_to_50() {
1373        let entries: Vec<GitTreeEntry> = (0..100)
1374            .map(|i| GitTreeEntry {
1375                path: format!("file{i}.rs"),
1376                type_: "blob".to_string(),
1377                mode: "100644".to_string(),
1378                sha: format!("sha{i}"),
1379            })
1380            .collect();
1381
1382        let filtered = filter_tree_by_language(&entries, "rust");
1383        assert_eq!(filtered.len(), 50);
1384    }
1385
1386    #[test]
1387    fn filter_tree_by_language_rust() {
1388        let entries = vec![
1389            GitTreeEntry {
1390                path: "src/main.rs".to_string(),
1391                type_: "blob".to_string(),
1392                mode: "100644".to_string(),
1393                sha: "abc123".to_string(),
1394            },
1395            GitTreeEntry {
1396                path: "src/lib.py".to_string(),
1397                type_: "blob".to_string(),
1398                mode: "100644".to_string(),
1399                sha: "def456".to_string(),
1400            },
1401        ];
1402
1403        let filtered = filter_tree_by_language(&entries, "rust");
1404        assert_eq!(filtered.len(), 1);
1405        assert_eq!(filtered[0], "src/main.rs");
1406    }
1407
1408    #[test]
1409    fn filter_tree_by_language_python() {
1410        let entries = vec![
1411            GitTreeEntry {
1412                path: "main.py".to_string(),
1413                type_: "blob".to_string(),
1414                mode: "100644".to_string(),
1415                sha: "abc123".to_string(),
1416            },
1417            GitTreeEntry {
1418                path: "lib.rs".to_string(),
1419                type_: "blob".to_string(),
1420                mode: "100644".to_string(),
1421                sha: "def456".to_string(),
1422            },
1423        ];
1424
1425        let filtered = filter_tree_by_language(&entries, "python");
1426        assert_eq!(filtered.len(), 1);
1427        assert_eq!(filtered[0], "main.py");
1428    }
1429
1430    #[test]
1431    fn get_extensions_for_language_rust() {
1432        let exts = get_extensions_for_language("rust");
1433        assert_eq!(exts, vec!["rs"]);
1434    }
1435
1436    #[test]
1437    fn get_extensions_for_language_javascript() {
1438        let exts = get_extensions_for_language("javascript");
1439        assert!(exts.contains(&"js"));
1440        assert!(exts.contains(&"ts"));
1441        assert!(exts.contains(&"jsx"));
1442        assert!(exts.contains(&"tsx"));
1443    }
1444
1445    #[test]
1446    fn get_extensions_for_language_unknown() {
1447        let exts = get_extensions_for_language("unknown_language");
1448        assert!(exts.is_empty());
1449    }
1450}
1451
1452#[cfg(test)]
1453mod merge_labels_tests {
1454    use super::*;
1455
1456    #[test]
1457    fn preserves_existing_and_adds_new() {
1458        let existing = vec!["bug".to_string(), "enhancement".to_string()];
1459        let suggested = vec!["documentation".to_string()];
1460        let merged = merge_labels(&existing, &suggested);
1461        assert_eq!(merged.len(), 3);
1462        assert!(merged.contains(&"bug".to_string()));
1463        assert!(merged.contains(&"enhancement".to_string()));
1464        assert!(merged.contains(&"documentation".to_string()));
1465    }
1466
1467    #[test]
1468    fn deduplicates_case_insensitive() {
1469        let existing = vec!["Bug".to_string()];
1470        let suggested = vec!["bug".to_string(), "enhancement".to_string()];
1471        let merged = merge_labels(&existing, &suggested);
1472        assert_eq!(merged.len(), 2);
1473        assert!(merged.contains(&"Bug".to_string()));
1474        assert!(merged.contains(&"enhancement".to_string()));
1475    }
1476
1477    #[test]
1478    fn skips_priority_when_existing_has_one() {
1479        // P1 (uppercase) exists, p2 suggested - should keep P1, skip p2, add bug
1480        let existing = vec!["P1".to_string()];
1481        let suggested = vec!["p2".to_string(), "bug".to_string()];
1482        let merged = merge_labels(&existing, &suggested);
1483        assert_eq!(merged.len(), 2);
1484        assert!(merged.contains(&"P1".to_string()));
1485        assert!(merged.contains(&"bug".to_string()));
1486        assert!(!merged.contains(&"p2".to_string()));
1487    }
1488
1489    #[test]
1490    fn handles_empty_inputs() {
1491        // Empty existing: suggested labels pass through
1492        let merged = merge_labels(&[], &["bug".to_string(), "p1".to_string()]);
1493        assert_eq!(merged.len(), 2);
1494
1495        // Empty suggested: existing labels preserved
1496        let merged = merge_labels(&["bug".to_string()], &[]);
1497        assert_eq!(merged.len(), 1);
1498        assert!(merged.contains(&"bug".to_string()));
1499    }
1500
1501    #[test]
1502    fn filters_maintainer_only_labels() {
1503        let existing = vec![];
1504        let suggested = vec![
1505            "good first issue".to_string(),
1506            "help wanted".to_string(),
1507            "bug".to_string(),
1508        ];
1509        let merged = merge_labels(&existing, &suggested);
1510        assert_eq!(merged.len(), 1);
1511        assert!(merged.contains(&"bug".to_string()));
1512        assert!(!merged.contains(&"good first issue".to_string()));
1513        assert!(!merged.contains(&"help wanted".to_string()));
1514    }
1515
1516    #[test]
1517    fn filters_maintainer_only_case_insensitive() {
1518        let existing = vec![];
1519        let suggested = vec![
1520            "Good First Issue".to_string(),
1521            "HELP WANTED".to_string(),
1522            "enhancement".to_string(),
1523        ];
1524        let merged = merge_labels(&existing, &suggested);
1525        assert_eq!(merged.len(), 1);
1526        assert!(merged.contains(&"enhancement".to_string()));
1527        assert!(!merged.contains(&"Good First Issue".to_string()));
1528        assert!(!merged.contains(&"HELP WANTED".to_string()));
1529    }
1530}
1531
1532#[cfg(test)]
1533mod label_tests {
1534    use super::*;
1535
1536    #[test]
1537    fn filter_labels_empty_input() {
1538        let labels = vec![];
1539        let filtered = filter_labels_by_relevance(&labels, 30);
1540        assert!(filtered.is_empty());
1541    }
1542
1543    #[test]
1544    fn filter_labels_zero_max() {
1545        let labels = vec![crate::ai::types::RepoLabel {
1546            name: "bug".to_string(),
1547            color: "ff0000".to_string(),
1548            description: "Bug report".to_string(),
1549        }];
1550        let filtered = filter_labels_by_relevance(&labels, 0);
1551        assert!(filtered.is_empty());
1552    }
1553
1554    #[test]
1555    fn filter_labels_priority_first() {
1556        let labels = vec![
1557            crate::ai::types::RepoLabel {
1558                name: "documentation".to_string(),
1559                color: "0075ca".to_string(),
1560                description: "Documentation".to_string(),
1561            },
1562            crate::ai::types::RepoLabel {
1563                name: "other".to_string(),
1564                color: "cccccc".to_string(),
1565                description: "Other".to_string(),
1566            },
1567            crate::ai::types::RepoLabel {
1568                name: "bug".to_string(),
1569                color: "ff0000".to_string(),
1570                description: "Bug".to_string(),
1571            },
1572        ];
1573        let filtered = filter_labels_by_relevance(&labels, 30);
1574        assert_eq!(filtered.len(), 3);
1575        assert_eq!(filtered[0].name, "documentation");
1576        assert_eq!(filtered[1].name, "bug");
1577        assert_eq!(filtered[2].name, "other");
1578    }
1579
1580    #[test]
1581    fn filter_labels_case_insensitive() {
1582        let labels = vec![
1583            crate::ai::types::RepoLabel {
1584                name: "Bug".to_string(),
1585                color: "ff0000".to_string(),
1586                description: "Bug".to_string(),
1587            },
1588            crate::ai::types::RepoLabel {
1589                name: "ENHANCEMENT".to_string(),
1590                color: "a2eeef".to_string(),
1591                description: "Enhancement".to_string(),
1592            },
1593        ];
1594        let filtered = filter_labels_by_relevance(&labels, 30);
1595        assert_eq!(filtered.len(), 2);
1596        assert_eq!(filtered[0].name, "Bug");
1597        assert_eq!(filtered[1].name, "ENHANCEMENT");
1598    }
1599
1600    #[test]
1601    fn filter_labels_over_limit_with_priorities() {
1602        let mut labels = vec![];
1603        for i in 0..20 {
1604            labels.push(crate::ai::types::RepoLabel {
1605                name: format!("label{}", i),
1606                color: "cccccc".to_string(),
1607                description: format!("Label {}", i),
1608            });
1609        }
1610        labels.push(crate::ai::types::RepoLabel {
1611            name: "bug".to_string(),
1612            color: "ff0000".to_string(),
1613            description: "Bug".to_string(),
1614        });
1615        labels.push(crate::ai::types::RepoLabel {
1616            name: "enhancement".to_string(),
1617            color: "a2eeef".to_string(),
1618            description: "Enhancement".to_string(),
1619        });
1620
1621        let filtered = filter_labels_by_relevance(&labels, 10);
1622        assert_eq!(filtered.len(), 10);
1623        assert_eq!(filtered[0].name, "bug");
1624        assert_eq!(filtered[1].name, "enhancement");
1625    }
1626}
1627
1628#[cfg(test)]
1629mod tests {
1630    use super::*;
1631
1632    #[test]
1633    fn parse_reference_full_url() {
1634        let url = "https://github.com/block/goose/issues/5836";
1635        let (owner, repo, number) = parse_issue_reference(url, None).unwrap();
1636        assert_eq!(owner, "block");
1637        assert_eq!(repo, "goose");
1638        assert_eq!(number, 5836);
1639    }
1640
1641    #[test]
1642    fn parse_reference_short_form() {
1643        let reference = "block/goose#5836";
1644        let (owner, repo, number) = parse_issue_reference(reference, None).unwrap();
1645        assert_eq!(owner, "block");
1646        assert_eq!(repo, "goose");
1647        assert_eq!(number, 5836);
1648    }
1649
1650    #[test]
1651    fn parse_reference_short_form_with_context() {
1652        let reference = "block/goose#5836";
1653        let (owner, repo, number) =
1654            parse_issue_reference(reference, Some("astral-sh/ruff")).unwrap();
1655        assert_eq!(owner, "block");
1656        assert_eq!(repo, "goose");
1657        assert_eq!(number, 5836);
1658    }
1659
1660    #[test]
1661    fn parse_reference_bare_number_with_context() {
1662        let reference = "5836";
1663        let (owner, repo, number) = parse_issue_reference(reference, Some("block/goose")).unwrap();
1664        assert_eq!(owner, "block");
1665        assert_eq!(repo, "goose");
1666        assert_eq!(number, 5836);
1667    }
1668
1669    #[test]
1670    fn parse_reference_bare_number_without_context() {
1671        let reference = "5836";
1672        let result = parse_issue_reference(reference, None);
1673        assert!(result.is_err());
1674        assert!(
1675            result
1676                .unwrap_err()
1677                .to_string()
1678                .contains("Bare issue number requires repository context")
1679        );
1680    }
1681
1682    #[test]
1683    fn parse_reference_invalid_short_form_missing_slash() {
1684        let reference = "owner#123";
1685        let result = parse_issue_reference(reference, None);
1686        assert!(result.is_err());
1687        assert!(
1688            result
1689                .unwrap_err()
1690                .to_string()
1691                .contains("Invalid owner/repo format")
1692        );
1693    }
1694
1695    #[test]
1696    fn parse_reference_invalid_short_form_extra_slash() {
1697        let reference = "owner/repo/extra#123";
1698        let result = parse_issue_reference(reference, None);
1699        assert!(result.is_err());
1700        assert!(
1701            result
1702                .unwrap_err()
1703                .to_string()
1704                .contains("Invalid owner/repo format")
1705        );
1706    }
1707
1708    #[test]
1709    fn parse_reference_invalid_bare_number() {
1710        let reference = "abc";
1711        let result = parse_issue_reference(reference, Some("block/goose"));
1712        assert!(result.is_err());
1713        assert!(
1714            result
1715                .unwrap_err()
1716                .to_string()
1717                .contains("Invalid issue reference format")
1718        );
1719    }
1720
1721    #[test]
1722    fn parse_reference_whitespace_trimming() {
1723        let reference = "  block/goose#5836  ";
1724        let (owner, repo, number) = parse_issue_reference(reference, None).unwrap();
1725        assert_eq!(owner, "block");
1726        assert_eq!(repo, "goose");
1727        assert_eq!(number, 5836);
1728    }
1729
1730    #[test]
1731    fn parse_reference_bare_number_whitespace() {
1732        let reference = "  5836  ";
1733        let (owner, repo, number) = parse_issue_reference(reference, Some("block/goose")).unwrap();
1734        assert_eq!(owner, "block");
1735        assert_eq!(repo, "goose");
1736        assert_eq!(number, 5836);
1737    }
1738
1739    #[test]
1740    fn extract_keywords_filters_stop_words() {
1741        let title = "The issue is about a bug in the CLI";
1742        let keywords = extract_keywords(title);
1743        assert!(!keywords.contains(&"the".to_string()));
1744        assert!(!keywords.contains(&"is".to_string()));
1745        assert!(!keywords.contains(&"a".to_string()));
1746        assert!(keywords.contains(&"issue".to_string()));
1747        assert!(keywords.contains(&"bug".to_string()));
1748        assert!(keywords.contains(&"cli".to_string()));
1749    }
1750
1751    #[test]
1752    fn extract_keywords_limits_to_five() {
1753        let title = "one two three four five six seven eight nine ten";
1754        let keywords = extract_keywords(title);
1755        assert_eq!(keywords.len(), 5);
1756    }
1757
1758    #[test]
1759    fn extract_keywords_empty_title() {
1760        let title = "the a an and or";
1761        let keywords = extract_keywords(title);
1762        assert!(keywords.is_empty());
1763    }
1764
1765    #[test]
1766    fn extract_keywords_lowercase_conversion() {
1767        let title = "CLI Bug FIX";
1768        let keywords = extract_keywords(title);
1769        assert!(keywords.iter().all(|k| k.chars().all(char::is_lowercase)));
1770    }
1771}