aptu_core/github/
issues.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! GitHub issue operations for the triage command.
4//!
5//! Provides functionality to parse issue URLs, fetch issue details,
6//! and post triage comments.
7
8use anyhow::{Context, Result};
9use backon::Retryable;
10use octocrab::Octocrab;
11use serde::{Deserialize, Serialize};
12use tracing::{debug, instrument};
13
14use super::{ReferenceKind, parse_github_reference};
15use crate::ai::types::{IssueComment, IssueDetails, RepoIssueContext};
16use crate::retry::retry_backoff;
17use crate::utils::is_priority_label;
18
19/// A GitHub issue without labels (untriaged).
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct UntriagedIssue {
22    /// Issue number.
23    pub number: u64,
24    /// Issue title.
25    pub title: String,
26    /// Creation timestamp (ISO 8601).
27    pub created_at: String,
28    /// Issue URL.
29    pub url: String,
30}
31
32/// A single entry in a Git tree response.
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct GitTreeEntry {
35    /// File path relative to repository root.
36    pub path: String,
37    /// Type of entry: "blob" (file) or "tree" (directory).
38    #[serde(rename = "type")]
39    pub type_: String,
40    /// File mode (e.g., "100644" for regular files).
41    pub mode: String,
42    /// SHA-1 hash of the entry.
43    pub sha: String,
44}
45
46/// Response from GitHub Git Trees API.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct GitTreeResponse {
49    /// List of entries in the tree.
50    pub tree: Vec<GitTreeEntry>,
51    /// Whether the tree is truncated (too many entries).
52    pub truncated: bool,
53}
54
55/// Parses an owner/repo string to extract owner and repo.
56///
57/// Validates format: exactly one `/`, non-empty parts.
58///
59/// # Errors
60///
61/// Returns an error if the format is invalid.
62pub fn parse_owner_repo(s: &str) -> Result<(String, String)> {
63    let parts: Vec<&str> = s.split('/').collect();
64    if parts.len() != 2 || parts[0].is_empty() || parts[1].is_empty() {
65        anyhow::bail!(
66            "Invalid owner/repo format.\n\
67             Expected: owner/repo\n\
68             Got: {s}"
69        );
70    }
71    Ok((parts[0].to_string(), parts[1].to_string()))
72}
73
74/// Parses a GitHub issue reference in multiple formats.
75///
76/// Supports:
77/// - Full URL: `https://github.com/owner/repo/issues/123`
78/// - Short form: `owner/repo#123`
79/// - Bare number: `123` (requires `repo_context`)
80///
81/// # Arguments
82///
83/// * `input` - The issue reference to parse
84/// * `repo_context` - Optional repository context for bare numbers (e.g., "owner/repo")
85///
86/// # Errors
87///
88/// Returns an error if the format is invalid or bare number is used without context.
89pub fn parse_issue_reference(
90    input: &str,
91    repo_context: Option<&str>,
92) -> Result<(String, String, u64)> {
93    parse_github_reference(ReferenceKind::Issue, input, repo_context)
94}
95
96/// Fetches issue details including comments from GitHub.
97///
98/// # Errors
99///
100/// Returns an error if the API request fails or the issue is not found.
101#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
102pub async fn fetch_issue_with_comments(
103    client: &Octocrab,
104    owner: &str,
105    repo: &str,
106    number: u64,
107) -> Result<IssueDetails> {
108    debug!("Fetching issue details");
109
110    // Fetch the issue with retry logic
111    let issue = (|| async {
112        client
113            .issues(owner, repo)
114            .get(number)
115            .await
116            .map_err(|e| anyhow::anyhow!(e))
117    })
118    .retry(retry_backoff())
119    .notify(|err, dur| {
120        tracing::warn!(
121            error = %err,
122            retry_after = ?dur,
123            "Retrying fetch_issue_with_comments (issue fetch)"
124        );
125    })
126    .await
127    .with_context(|| format!("Failed to fetch issue #{number} from {owner}/{repo}"))?;
128
129    // Fetch comments (limited to first page) with retry logic
130    let comments_page = (|| async {
131        client
132            .issues(owner, repo)
133            .list_comments(number)
134            .per_page(5)
135            .send()
136            .await
137            .map_err(|e| anyhow::anyhow!(e))
138    })
139    .retry(retry_backoff())
140    .notify(|err, dur| {
141        tracing::warn!(
142            error = %err,
143            retry_after = ?dur,
144            "Retrying fetch_issue_with_comments (comments fetch)"
145        );
146    })
147    .await
148    .with_context(|| format!("Failed to fetch comments for issue #{number}"))?;
149
150    // Convert to our types
151    let labels: Vec<String> = issue.labels.iter().map(|l| l.name.clone()).collect();
152
153    let comments: Vec<IssueComment> = comments_page
154        .items
155        .iter()
156        .map(|c| IssueComment {
157            author: c.user.login.clone(),
158            body: c.body.clone().unwrap_or_default(),
159        })
160        .collect();
161
162    let issue_url = issue.html_url.to_string();
163
164    let details = IssueDetails::builder()
165        .owner(owner.to_string())
166        .repo(repo.to_string())
167        .number(number)
168        .title(issue.title)
169        .body(issue.body.unwrap_or_default())
170        .labels(labels)
171        .comments(comments)
172        .url(issue_url)
173        .build();
174
175    debug!(
176        labels = details.labels.len(),
177        comments = details.comments.len(),
178        "Fetched issue details"
179    );
180
181    Ok(details)
182}
183
184/// Extracts significant keywords from an issue title for search.
185///
186/// Filters out common stop words and returns lowercase keywords.
187/// Extracts keywords from an issue title for relevance matching.
188///
189/// Filters out common stop words and limits to 5 keywords.
190/// Used for prioritizing relevant files in repository tree filtering.
191///
192/// # Arguments
193///
194/// * `title` - Issue title to extract keywords from
195///
196/// # Returns
197///
198/// Vector of lowercase keywords (max 5), excluding stop words.
199pub fn extract_keywords(title: &str) -> Vec<String> {
200    let stop_words = [
201        "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is",
202        "it", "its", "of", "on", "or", "that", "the", "to", "was", "will", "with",
203    ];
204
205    title
206        .to_lowercase()
207        .split(|c: char| !c.is_alphanumeric())
208        .filter(|word| !word.is_empty() && !stop_words.contains(word))
209        .take(5) // Limit to first 5 keywords
210        .map(std::string::ToString::to_string)
211        .collect()
212}
213
214/// Searches for related issues in a repository based on title keywords.
215///
216/// Extracts keywords from the issue title and searches the repository
217/// for matching issues. Returns up to 20 results, excluding the specified issue.
218///
219/// # Arguments
220///
221/// * `client` - Authenticated Octocrab client
222/// * `owner` - Repository owner
223/// * `repo` - Repository name
224/// * `title` - Issue title to extract keywords from
225/// * `exclude_number` - Issue number to exclude from results
226///
227/// # Errors
228///
229/// Returns an error if the search API request fails.
230#[instrument(skip(client), fields(owner = %owner, repo = %repo, exclude_number = %exclude_number))]
231pub async fn search_related_issues(
232    client: &Octocrab,
233    owner: &str,
234    repo: &str,
235    title: &str,
236    exclude_number: u64,
237) -> Result<Vec<RepoIssueContext>> {
238    let keywords = extract_keywords(title);
239
240    if keywords.is_empty() {
241        debug!("No keywords extracted from title");
242        return Ok(Vec::new());
243    }
244
245    // Build search query: keyword1 keyword2 ... repo:owner/repo is:issue
246    let query = format!("{} repo:{}/{} is:issue", keywords.join(" "), owner, repo);
247
248    debug!(query = %query, "Searching for related issues");
249
250    // Search for issues with retry logic
251    let search_result = (|| async {
252        client
253            .search()
254            .issues_and_pull_requests(&query)
255            .per_page(20)
256            .send()
257            .await
258            .map_err(|e| anyhow::anyhow!(e))
259    })
260    .retry(retry_backoff())
261    .notify(|err, dur| {
262        tracing::warn!(
263            error = %err,
264            retry_after = ?dur,
265            "Retrying search_related_issues"
266        );
267    })
268    .await
269    .with_context(|| format!("Failed to search for related issues in {owner}/{repo}"))?;
270
271    // Convert to our context type
272    let related: Vec<RepoIssueContext> = search_result
273        .items
274        .iter()
275        .filter_map(|item| {
276            // Only include issues (not PRs)
277            if item.pull_request.is_some() {
278                return None;
279            }
280
281            // Exclude the issue being triaged
282            if item.number == exclude_number {
283                return None;
284            }
285
286            Some(RepoIssueContext {
287                number: item.number,
288                title: item.title.clone(),
289                labels: item.labels.iter().map(|l| l.name.clone()).collect(),
290                state: format!("{:?}", item.state).to_lowercase(),
291            })
292        })
293        .collect();
294
295    debug!(count = related.len(), "Found related issues");
296
297    Ok(related)
298}
299
300/// Posts a triage comment to a GitHub issue.
301///
302/// # Returns
303///
304/// The URL of the created comment.
305///
306/// # Errors
307///
308/// Returns an error if the API request fails.
309#[instrument(skip(client, body), fields(owner = %owner, repo = %repo, number = number))]
310pub async fn post_comment(
311    client: &Octocrab,
312    owner: &str,
313    repo: &str,
314    number: u64,
315    body: &str,
316) -> Result<String> {
317    debug!("Posting triage comment");
318
319    let comment = client
320        .issues(owner, repo)
321        .create_comment(number, body)
322        .await
323        .with_context(|| format!("Failed to post comment to issue #{number}"))?;
324
325    let comment_url = comment.html_url.to_string();
326
327    debug!(url = %comment_url, "Comment posted successfully");
328
329    Ok(comment_url)
330}
331
332/// Creates a new GitHub issue.
333///
334/// Posts a new issue with the given title and body to the repository.
335/// Returns the issue URL and issue number.
336///
337/// # Arguments
338///
339/// * `client` - Authenticated Octocrab client
340/// * `owner` - Repository owner
341/// * `repo` - Repository name
342/// * `title` - Issue title
343/// * `body` - Issue body (markdown)
344///
345/// # Errors
346///
347/// Returns an error if the GitHub API call fails.
348#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
349pub async fn create_issue(
350    client: &Octocrab,
351    owner: &str,
352    repo: &str,
353    title: &str,
354    body: &str,
355) -> Result<(String, u64)> {
356    debug!("Creating GitHub issue");
357
358    let issue = client
359        .issues(owner, repo)
360        .create(title)
361        .body(body)
362        .send()
363        .await
364        .with_context(|| format!("Failed to create issue in {owner}/{repo}"))?;
365
366    let issue_url = issue.html_url.to_string();
367    let issue_number = issue.number;
368
369    debug!(number = issue_number, url = %issue_url, "Issue created successfully");
370
371    Ok((issue_url, issue_number))
372}
373
374/// Result of applying labels and milestone to an issue.
375#[derive(Debug, Clone)]
376pub struct ApplyResult {
377    /// Labels that were successfully applied.
378    pub applied_labels: Vec<String>,
379    /// Milestone that was successfully applied, if any.
380    pub applied_milestone: Option<String>,
381    /// Warnings about labels or milestones that could not be applied.
382    pub warnings: Vec<String>,
383}
384
385/// Merges existing and suggested labels additively.
386/// Labels that should only be applied by maintainers, not by AI suggestions
387const MAINTAINER_ONLY_LABELS: &[&str] = &["good first issue", "help wanted"];
388
389///
390/// Implements additive label merging with priority label handling:
391/// - If existing labels contain a priority label (p[0-9]), skip AI-suggested priority labels
392/// - Merge remaining labels with case-insensitive deduplication
393/// - Preserve all existing labels
394///
395/// # Arguments
396///
397/// * `existing_labels` - Labels currently on the issue
398/// * `suggested_labels` - Labels suggested by AI
399///
400/// # Returns
401///
402/// Merged label list with duplicates removed (case-insensitive)
403fn merge_labels(existing_labels: &[String], suggested_labels: &[String]) -> Vec<String> {
404    // Check if existing labels contain a priority label
405    let has_priority = existing_labels.iter().any(|label| is_priority_label(label));
406
407    // Start with existing labels
408    let mut merged = existing_labels.to_vec();
409
410    // Add suggested labels, filtering out priority labels if existing has one
411    for suggested in suggested_labels {
412        // Skip priority labels if existing already has one
413        if is_priority_label(suggested) && has_priority {
414            continue;
415        }
416
417        // Skip maintainer-only labels
418        if MAINTAINER_ONLY_LABELS
419            .iter()
420            .any(|&m| m.eq_ignore_ascii_case(suggested))
421        {
422            continue;
423        }
424
425        // Add if not already present (case-insensitive check)
426        if !merged
427            .iter()
428            .any(|l| l.to_lowercase() == suggested.to_lowercase())
429        {
430            merged.push(suggested.clone());
431        }
432    }
433
434    merged
435}
436
437/// Updates an issue with labels and milestone.
438///
439/// Applies labels additively by merging existing and suggested labels.
440/// Validates suggestions against available options before applying.
441/// Returns what was actually applied and any warnings.
442///
443/// # Errors
444///
445/// Returns an error if the GitHub API call fails.
446#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
447#[allow(clippy::too_many_arguments)]
448pub async fn update_issue_labels_and_milestone(
449    client: &Octocrab,
450    owner: &str,
451    repo: &str,
452    number: u64,
453    existing_labels: &[String],
454    suggested_labels: &[String],
455    existing_milestone: Option<&str>,
456    suggested_milestone: Option<&str>,
457    available_labels: &[crate::ai::types::RepoLabel],
458    available_milestones: &[crate::ai::types::RepoMilestone],
459) -> Result<ApplyResult> {
460    debug!("Updating issue with labels and milestone");
461
462    let mut warnings = Vec::new();
463
464    // Validate and collect labels
465    let available_label_names: std::collections::HashSet<_> =
466        available_labels.iter().map(|l| l.name.as_str()).collect();
467
468    // Validate suggested labels
469    let mut valid_suggested = Vec::new();
470    for label in suggested_labels {
471        if available_label_names.contains(label.as_str()) {
472            valid_suggested.push(label.clone());
473        } else {
474            warnings.push(format!("Label '{label}' not found in repository"));
475        }
476    }
477
478    // Merge existing and suggested labels additively
479    let applied_labels = merge_labels(existing_labels, &valid_suggested);
480
481    // Validate and find milestone (only set if issue has no existing milestone)
482    let applied_milestone = if existing_milestone.is_none() {
483        if let Some(milestone_title) = suggested_milestone {
484            if let Some(milestone) = available_milestones
485                .iter()
486                .find(|m| m.title == milestone_title)
487            {
488                Some(milestone.title.clone())
489            } else {
490                warnings.push(format!(
491                    "Milestone '{milestone_title}' not found in repository"
492                ));
493                None
494            }
495        } else {
496            None
497        }
498    } else {
499        None
500    };
501
502    // Apply updates to the issue
503    let issues_handler = client.issues(owner, repo);
504    let mut update_builder = issues_handler.update(number);
505
506    if !applied_labels.is_empty() {
507        update_builder = update_builder.labels(&applied_labels);
508    }
509
510    #[allow(clippy::collapsible_if)]
511    if let Some(milestone_title) = &applied_milestone {
512        if let Some(milestone) = available_milestones
513            .iter()
514            .find(|m| &m.title == milestone_title)
515        {
516            update_builder = update_builder.milestone(milestone.number);
517        }
518    }
519
520    update_builder
521        .send()
522        .await
523        .with_context(|| format!("Failed to update issue #{number}"))?;
524
525    debug!(
526        labels = ?applied_labels,
527        milestone = ?applied_milestone,
528        warnings = ?warnings,
529        "Issue updated successfully"
530    );
531
532    Ok(ApplyResult {
533        applied_labels,
534        applied_milestone,
535        warnings,
536    })
537}
538
539/// Apply labels to an issue or PR by number.
540///
541/// Simplified label-only application function for PRs (no milestone, no merge logic).
542/// Returns an error if the GitHub API call fails.
543#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
544pub async fn apply_labels_to_number(
545    client: &Octocrab,
546    owner: &str,
547    repo: &str,
548    number: u64,
549    labels: &[String],
550) -> Result<Vec<String>> {
551    debug!("Applying labels to issue/PR");
552
553    if labels.is_empty() {
554        debug!("No labels to apply");
555        return Ok(Vec::new());
556    }
557
558    let route = format!("/repos/{owner}/{repo}/issues/{number}/labels");
559    let payload = serde_json::json!({ "labels": labels });
560
561    client
562        .post::<_, serde_json::Value>(route, Some(&payload))
563        .await
564        .with_context(|| {
565            format!(
566                "Failed to apply labels to issue/PR #{number} in {owner}/{repo}. \
567                     Check that you have write access to the repository."
568            )
569        })?;
570
571    debug!(labels = ?labels, "Labels applied successfully");
572
573    Ok(labels.to_vec())
574}
575
576/// Priority labels that should be included first in tiered filtering.
577/// These labels are most actionable for issue triage.
578const PRIORITY_LABELS: &[&str] = &[
579    "bug",
580    "enhancement",
581    "documentation",
582    "good first issue",
583    "help wanted",
584    "question",
585    "feature",
586    "fix",
587    "breaking",
588    "security",
589    "performance",
590    "breaking-change",
591];
592
593/// Filters labels using tiered selection: priority labels first, then remaining labels.
594///
595/// Implements two-tier filtering:
596/// - Tier 1: Priority labels (case-insensitive matching)
597/// - Tier 2: Remaining labels to fill up to `max_labels`
598///
599/// This ensures the AI sees the most actionable labels regardless of repository size.
600///
601/// # Arguments
602///
603/// * `labels` - List of available labels from the repository
604/// * `max_labels` - Maximum number of labels to return
605///
606/// # Returns
607///
608/// Filtered list of labels with priority labels first.
609#[must_use]
610pub fn filter_labels_by_relevance(
611    labels: &[crate::ai::types::RepoLabel],
612    max_labels: usize,
613) -> Vec<crate::ai::types::RepoLabel> {
614    if labels.is_empty() || max_labels == 0 {
615        return Vec::new();
616    }
617
618    let mut priority_labels = Vec::new();
619    let mut other_labels = Vec::new();
620
621    // Separate labels into priority and other
622    for label in labels {
623        let label_lower = label.name.to_lowercase();
624        let is_priority = PRIORITY_LABELS
625            .iter()
626            .any(|&p| label_lower == p.to_lowercase());
627
628        if is_priority {
629            priority_labels.push(label.clone());
630        } else {
631            other_labels.push(label.clone());
632        }
633    }
634
635    // Combine: priority labels first, then fill remaining slots with other labels
636    let mut result = priority_labels;
637    let remaining_slots = max_labels.saturating_sub(result.len());
638    result.extend(other_labels.into_iter().take(remaining_slots));
639
640    // Limit to max_labels
641    result.truncate(max_labels);
642    result
643}
644
645/// Patterns for directories/files to completely exclude from tree filtering.
646/// Based on GitHub Linguist vendor.yml and common build artifacts.
647const EXCLUDE_PATTERNS: &[&str] = &[
648    "node_modules/",
649    "vendor/",
650    "dist/",
651    "build/",
652    "target/",
653    ".git/",
654    "cache/",
655    "docs/",
656    "examples/",
657];
658
659/// Patterns for directories to deprioritize but not exclude.
660/// These contain test/benchmark code less relevant to issue triage.
661const DEPRIORITIZE_PATTERNS: &[&str] = &[
662    "test/",
663    "tests/",
664    "spec/",
665    "bench/",
666    "eval/",
667    "fixtures/",
668    "mocks/",
669];
670
671/// Returns language-specific entry point file patterns.
672/// These are prioritized as they often contain the main logic.
673fn entry_point_patterns(language: &str) -> Vec<&'static str> {
674    match language.to_lowercase().as_str() {
675        "rust" => vec!["lib.rs", "mod.rs", "main.rs"],
676        "python" => vec!["__init__.py"],
677        "javascript" | "typescript" => vec!["index.ts", "index.js"],
678        "java" => vec!["Main.java"],
679        "go" => vec!["main.go"],
680        "c#" | "csharp" => vec!["Program.cs"],
681        _ => vec![],
682    }
683}
684
685/// Maps programming languages to their common file extensions.
686fn get_extensions_for_language(language: &str) -> Vec<&'static str> {
687    match language.to_lowercase().as_str() {
688        "rust" => vec!["rs"],
689        "python" => vec!["py"],
690        "javascript" | "typescript" => vec!["js", "ts", "jsx", "tsx"],
691        "java" => vec!["java"],
692        "c" => vec!["c", "h"],
693        "c++" | "cpp" => vec!["cpp", "cc", "cxx", "h", "hpp"],
694        "c#" | "csharp" => vec!["cs"],
695        "go" => vec!["go"],
696        "ruby" => vec!["rb"],
697        "php" => vec!["php"],
698        "swift" => vec!["swift"],
699        "kotlin" => vec!["kt"],
700        "scala" => vec!["scala"],
701        "r" => vec!["r"],
702        "shell" | "bash" => vec!["sh", "bash"],
703        "html" => vec!["html", "htm"],
704        "css" => vec!["css", "scss", "sass"],
705        "json" => vec!["json"],
706        "yaml" | "yml" => vec!["yaml", "yml"],
707        "toml" => vec!["toml"],
708        "xml" => vec!["xml"],
709        "markdown" => vec!["md"],
710        _ => vec![],
711    }
712}
713
714/// Filters repository tree entries by relevance using tiered keyword matching.
715///
716/// Implements three-tier filtering:
717/// - Tier 1: Files matching keywords (max 35)
718/// - Tier 2: Language entry points (max 10)
719/// - Tier 3: Other relevant files (max 15)
720///
721/// Removes common non-source directories and limits results to 60 paths.
722///
723/// # Arguments
724///
725/// * `entries` - Raw tree entries from GitHub API
726/// * `language` - Repository primary language for extension filtering
727/// * `keywords` - Optional keywords extracted from issue title for relevance matching
728///
729/// # Returns
730///
731/// Filtered and sorted list of file paths (max 60).
732fn filter_tree_by_relevance(
733    entries: &[GitTreeEntry],
734    language: &str,
735    keywords: &[String],
736) -> Vec<String> {
737    let extensions = get_extensions_for_language(language);
738    let entry_points = entry_point_patterns(language);
739
740    // Filter to valid source files
741    let candidates: Vec<String> = entries
742        .iter()
743        .filter(|entry| {
744            // Only include files (blobs), not directories
745            if entry.type_ != "blob" {
746                return false;
747            }
748
749            // Exclude paths containing excluded directories
750            if EXCLUDE_PATTERNS.iter().any(|dir| entry.path.contains(dir)) {
751                return false;
752            }
753
754            // Filter by extension if language is recognized
755            if extensions.is_empty() {
756                // If language not recognized, include all files
757                true
758            } else {
759                extensions.iter().any(|ext| entry.path.ends_with(ext))
760            }
761        })
762        .map(|e| e.path.clone())
763        .collect();
764
765    // Tier 1: Files matching keywords (max 35)
766    let mut tier1: Vec<String> = Vec::new();
767    let mut remaining: Vec<String> = Vec::new();
768
769    for path in candidates {
770        let path_lower = path.to_lowercase();
771        let matches_keyword = keywords.iter().any(|kw| path_lower.contains(kw));
772
773        if matches_keyword && tier1.len() < 35 {
774            tier1.push(path);
775        } else {
776            remaining.push(path);
777        }
778    }
779
780    // Tier 2: Entry point files (max 10)
781    let mut tier2: Vec<String> = Vec::new();
782    let mut tier3_candidates: Vec<String> = Vec::new();
783
784    for path in remaining {
785        let is_entry_point = entry_points.iter().any(|ep| path.ends_with(ep));
786        let is_deprioritized = DEPRIORITIZE_PATTERNS.iter().any(|dp| path.contains(dp));
787
788        if is_entry_point && tier2.len() < 10 {
789            tier2.push(path);
790        } else if !is_deprioritized {
791            tier3_candidates.push(path);
792        }
793    }
794
795    // Tier 3: Other relevant files (max 15)
796    let mut tier3: Vec<String> = tier3_candidates.into_iter().take(15).collect();
797
798    // Combine and sort by depth within each tier
799    let mut result = tier1;
800    result.append(&mut tier2);
801    result.append(&mut tier3);
802
803    // Sort by path depth (fewer slashes first), then alphabetically
804    result.sort_by(|a, b| {
805        let depth_a = a.matches('/').count();
806        let depth_b = b.matches('/').count();
807        if depth_a == depth_b {
808            a.cmp(b)
809        } else {
810            depth_a.cmp(&depth_b)
811        }
812    });
813
814    // Limit to 60 paths
815    result.truncate(60);
816    result
817}
818
819/// Fetches the repository file tree from GitHub.
820///
821/// Attempts to fetch from the default branch (main, then master).
822/// Returns filtered list of source file paths based on repository language and optional keywords.
823///
824/// # Arguments
825///
826/// * `client` - Authenticated Octocrab client
827/// * `owner` - Repository owner
828/// * `repo` - Repository name
829/// * `language` - Repository primary language for filtering
830/// * `keywords` - Optional keywords extracted from issue title for relevance matching
831///
832/// # Errors
833///
834/// Returns an error if the API request fails (but not if tree is unavailable).
835#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
836pub async fn fetch_repo_tree(
837    client: &Octocrab,
838    owner: &str,
839    repo: &str,
840    language: &str,
841    keywords: &[String],
842) -> Result<Vec<String>> {
843    debug!("Fetching repository tree");
844
845    // Try main branch first, then master
846    let branches = ["main", "master"];
847    let mut tree_response: Option<GitTreeResponse> = None;
848
849    for branch in &branches {
850        let route = format!("/repos/{owner}/{repo}/git/trees/{branch}?recursive=1");
851        let result = (|| async {
852            client
853                .get::<GitTreeResponse, _, _>(&route, None::<&()>)
854                .await
855                .map_err(|e| anyhow::anyhow!(e))
856        })
857        .retry(retry_backoff())
858        .notify(|err, dur| {
859            tracing::warn!(
860                error = %err,
861                retry_after = ?dur,
862                branch = %branch,
863                "Retrying fetch_repo_tree"
864            );
865        })
866        .await;
867
868        match result {
869            Ok(response) => {
870                tree_response = Some(response);
871                debug!(branch = %branch, "Fetched tree from branch");
872                break;
873            }
874            Err(e) => {
875                debug!(branch = %branch, error = %e, "Failed to fetch tree from branch");
876            }
877        }
878    }
879
880    let response =
881        tree_response.context("Failed to fetch repository tree from main or master branch")?;
882
883    let filtered = filter_tree_by_relevance(&response.tree, language, keywords);
884    debug!(count = filtered.len(), "Filtered tree entries");
885
886    Ok(filtered)
887}
888
889/// Fetches issues needing triage from a specific repository.
890///
891/// In default mode (force=false), returns issues that are either unlabeled OR missing a milestone.
892/// In force mode (force=true), returns ALL open issues with no filtering.
893///
894/// # Arguments
895///
896/// * `client` - The Octocrab GitHub client
897/// * `owner` - Repository owner
898/// * `repo` - Repository name
899/// * `since` - Optional RFC3339 timestamp to filter issues created after this date (client-side filtering)
900/// * `force` - If true, return all issues in the specified state; if false, filter to unlabeled or milestone-missing issues
901/// * `state` - Issue state filter (Open, Closed, or All)
902///
903/// # Errors
904///
905/// Returns an error if the REST API request fails.
906#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
907pub async fn fetch_issues_needing_triage(
908    client: &Octocrab,
909    owner: &str,
910    repo: &str,
911    since: Option<&str>,
912    force: bool,
913    state: octocrab::params::State,
914) -> Result<Vec<UntriagedIssue>> {
915    debug!("Fetching issues needing triage");
916
917    let issues_page: octocrab::Page<octocrab::models::issues::Issue> = client
918        .issues(owner, repo)
919        .list()
920        .state(state)
921        .per_page(100)
922        .send()
923        .await
924        .context("Failed to fetch issues from repository")?;
925
926    let total_issues = issues_page.items.len();
927
928    let mut issues_needing_triage: Vec<UntriagedIssue> = issues_page
929        .items
930        .into_iter()
931        .filter(|issue| {
932            if force {
933                true
934            } else {
935                issue.labels.is_empty() || issue.milestone.is_none()
936            }
937        })
938        .map(|issue| UntriagedIssue {
939            number: issue.number,
940            title: issue.title,
941            created_at: issue.created_at.to_rfc3339(),
942            url: issue.html_url.to_string(),
943        })
944        .collect();
945
946    if let Some(since_date) = since
947        && let Ok(since_timestamp) = chrono::DateTime::parse_from_rfc3339(since_date)
948    {
949        issues_needing_triage.retain(|issue| {
950            if let Ok(created_at) = chrono::DateTime::parse_from_rfc3339(&issue.created_at) {
951                created_at >= since_timestamp
952            } else {
953                true
954            }
955        });
956    }
957
958    debug!(
959        total_issues = total_issues,
960        issues_needing_triage_count = issues_needing_triage.len(),
961        "Fetched issues needing triage"
962    );
963
964    Ok(issues_needing_triage)
965}
966
967#[cfg(test)]
968mod fetch_issues_needing_triage_tests {
969    #[test]
970    fn filter_logic_unlabeled_default_mode() {
971        let labels_empty = true;
972        let milestone_none = true;
973        let force = false;
974
975        let passes = if force {
976            true
977        } else {
978            labels_empty || milestone_none
979        };
980
981        assert!(passes);
982    }
983
984    #[test]
985    fn filter_logic_labeled_default_mode() {
986        let labels_empty = false;
987        let milestone_none = true;
988        let force = false;
989
990        let passes = if force {
991            true
992        } else {
993            labels_empty || milestone_none
994        };
995
996        assert!(passes);
997    }
998
999    #[test]
1000    fn filter_logic_missing_milestone_default_mode() {
1001        let labels_empty = false;
1002        let milestone_none = true;
1003        let force = false;
1004
1005        let passes = if force {
1006            true
1007        } else {
1008            labels_empty || milestone_none
1009        };
1010
1011        assert!(passes);
1012    }
1013
1014    #[test]
1015    fn filter_logic_force_mode_returns_all() {
1016        let labels_empty = false;
1017        let milestone_none = false;
1018        let force = true;
1019
1020        let passes = if force {
1021            true
1022        } else {
1023            labels_empty || milestone_none
1024        };
1025
1026        assert!(passes);
1027    }
1028
1029    #[test]
1030    fn filter_logic_fully_triaged_default_mode_excluded() {
1031        let labels_empty = false;
1032        let milestone_none = false;
1033        let force = false;
1034
1035        let passes = if force {
1036            true
1037        } else {
1038            labels_empty || milestone_none
1039        };
1040
1041        assert!(!passes);
1042    }
1043}
1044
1045#[cfg(test)]
1046mod tree_tests {
1047    use super::*;
1048
1049    #[test]
1050    fn filter_tree_by_relevance_keyword_matching() {
1051        let entries = vec![
1052            GitTreeEntry {
1053                path: "src/parser.rs".to_string(),
1054                type_: "blob".to_string(),
1055                mode: "100644".to_string(),
1056                sha: "abc123".to_string(),
1057            },
1058            GitTreeEntry {
1059                path: "src/main.rs".to_string(),
1060                type_: "blob".to_string(),
1061                mode: "100644".to_string(),
1062                sha: "def456".to_string(),
1063            },
1064            GitTreeEntry {
1065                path: "src/utils.rs".to_string(),
1066                type_: "blob".to_string(),
1067                mode: "100644".to_string(),
1068                sha: "ghi789".to_string(),
1069            },
1070        ];
1071
1072        let keywords = vec!["parser".to_string()];
1073        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1074        assert!(filtered.contains(&"src/parser.rs".to_string()));
1075    }
1076
1077    #[test]
1078    fn filter_tree_by_relevance_entry_points() {
1079        let entries = vec![
1080            GitTreeEntry {
1081                path: "src/lib.rs".to_string(),
1082                type_: "blob".to_string(),
1083                mode: "100644".to_string(),
1084                sha: "abc123".to_string(),
1085            },
1086            GitTreeEntry {
1087                path: "src/utils.rs".to_string(),
1088                type_: "blob".to_string(),
1089                mode: "100644".to_string(),
1090                sha: "def456".to_string(),
1091            },
1092        ];
1093
1094        let keywords = vec![];
1095        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1096        assert!(filtered.contains(&"src/lib.rs".to_string()));
1097    }
1098
1099    #[test]
1100    fn filter_tree_by_relevance_excludes_tests() {
1101        let entries = vec![
1102            GitTreeEntry {
1103                path: "src/main.rs".to_string(),
1104                type_: "blob".to_string(),
1105                mode: "100644".to_string(),
1106                sha: "abc123".to_string(),
1107            },
1108            GitTreeEntry {
1109                path: "tests/integration_test.rs".to_string(),
1110                type_: "blob".to_string(),
1111                mode: "100644".to_string(),
1112                sha: "def456".to_string(),
1113            },
1114        ];
1115
1116        let keywords = vec![];
1117        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1118        assert!(!filtered.contains(&"tests/integration_test.rs".to_string()));
1119        assert!(filtered.contains(&"src/main.rs".to_string()));
1120    }
1121
1122    #[test]
1123    fn get_extensions_for_language_rust() {
1124        let exts = get_extensions_for_language("rust");
1125        assert_eq!(exts, vec!["rs"]);
1126    }
1127
1128    #[test]
1129    fn get_extensions_for_language_javascript() {
1130        let exts = get_extensions_for_language("javascript");
1131        assert!(exts.contains(&"js"));
1132        assert!(exts.contains(&"ts"));
1133        assert!(exts.contains(&"jsx"));
1134        assert!(exts.contains(&"tsx"));
1135    }
1136
1137    #[test]
1138    fn get_extensions_for_language_unknown() {
1139        let exts = get_extensions_for_language("unknown_language");
1140        assert!(exts.is_empty());
1141    }
1142}
1143
1144#[cfg(test)]
1145mod merge_labels_tests {
1146    use super::*;
1147
1148    #[test]
1149    fn preserves_existing_and_adds_new() {
1150        let existing = vec!["bug".to_string(), "enhancement".to_string()];
1151        let suggested = vec!["documentation".to_string()];
1152        let merged = merge_labels(&existing, &suggested);
1153        assert_eq!(merged.len(), 3);
1154        assert!(merged.contains(&"bug".to_string()));
1155        assert!(merged.contains(&"enhancement".to_string()));
1156        assert!(merged.contains(&"documentation".to_string()));
1157    }
1158
1159    #[test]
1160    fn deduplicates_case_insensitive() {
1161        let existing = vec!["Bug".to_string()];
1162        let suggested = vec!["bug".to_string(), "enhancement".to_string()];
1163        let merged = merge_labels(&existing, &suggested);
1164        assert_eq!(merged.len(), 2);
1165        assert!(merged.contains(&"Bug".to_string()));
1166        assert!(merged.contains(&"enhancement".to_string()));
1167    }
1168
1169    #[test]
1170    fn skips_priority_when_existing_has_one() {
1171        // P1 (uppercase) exists, p2 suggested - should keep P1, skip p2, add bug
1172        let existing = vec!["P1".to_string()];
1173        let suggested = vec!["p2".to_string(), "bug".to_string()];
1174        let merged = merge_labels(&existing, &suggested);
1175        assert_eq!(merged.len(), 2);
1176        assert!(merged.contains(&"P1".to_string()));
1177        assert!(merged.contains(&"bug".to_string()));
1178        assert!(!merged.contains(&"p2".to_string()));
1179    }
1180
1181    #[test]
1182    fn handles_empty_inputs() {
1183        // Empty existing: suggested labels pass through
1184        let merged = merge_labels(&[], &["bug".to_string(), "p1".to_string()]);
1185        assert_eq!(merged.len(), 2);
1186
1187        // Empty suggested: existing labels preserved
1188        let merged = merge_labels(&["bug".to_string()], &[]);
1189        assert_eq!(merged.len(), 1);
1190        assert!(merged.contains(&"bug".to_string()));
1191    }
1192
1193    #[test]
1194    fn filters_maintainer_only_labels() {
1195        let existing = vec![];
1196        let suggested = vec![
1197            "good first issue".to_string(),
1198            "help wanted".to_string(),
1199            "bug".to_string(),
1200        ];
1201        let merged = merge_labels(&existing, &suggested);
1202        assert_eq!(merged.len(), 1);
1203        assert!(merged.contains(&"bug".to_string()));
1204        assert!(!merged.contains(&"good first issue".to_string()));
1205        assert!(!merged.contains(&"help wanted".to_string()));
1206    }
1207
1208    #[test]
1209    fn filters_maintainer_only_case_insensitive() {
1210        let existing = vec![];
1211        let suggested = vec![
1212            "Good First Issue".to_string(),
1213            "HELP WANTED".to_string(),
1214            "enhancement".to_string(),
1215        ];
1216        let merged = merge_labels(&existing, &suggested);
1217        assert_eq!(merged.len(), 1);
1218        assert!(merged.contains(&"enhancement".to_string()));
1219        assert!(!merged.contains(&"Good First Issue".to_string()));
1220        assert!(!merged.contains(&"HELP WANTED".to_string()));
1221    }
1222
1223    #[test]
1224    fn skips_priority_prefix_when_existing_has_one() {
1225        // priority: high exists, priority: medium suggested - should keep priority: high, skip priority: medium, add bug
1226        let existing = vec!["priority: high".to_string()];
1227        let suggested = vec!["priority: medium".to_string(), "bug".to_string()];
1228        let merged = merge_labels(&existing, &suggested);
1229        assert_eq!(merged.len(), 2);
1230        assert!(merged.contains(&"priority: high".to_string()));
1231        assert!(merged.contains(&"bug".to_string()));
1232        assert!(!merged.contains(&"priority: medium".to_string()));
1233    }
1234
1235    #[test]
1236    fn skips_mixed_priority_formats_when_existing_has_one() {
1237        // p1 exists, priority: high suggested - should keep p1, skip priority: high, add bug
1238        let existing = vec!["p1".to_string()];
1239        let suggested = vec!["priority: high".to_string(), "bug".to_string()];
1240        let merged = merge_labels(&existing, &suggested);
1241        assert_eq!(merged.len(), 2);
1242        assert!(merged.contains(&"p1".to_string()));
1243        assert!(merged.contains(&"bug".to_string()));
1244        assert!(!merged.contains(&"priority: high".to_string()));
1245    }
1246}
1247
1248#[cfg(test)]
1249mod label_tests {
1250    use super::*;
1251
1252    #[test]
1253    fn filter_labels_empty_input() {
1254        let labels = vec![];
1255        let filtered = filter_labels_by_relevance(&labels, 30);
1256        assert!(filtered.is_empty());
1257    }
1258
1259    #[test]
1260    fn filter_labels_zero_max() {
1261        let labels = vec![crate::ai::types::RepoLabel {
1262            name: "bug".to_string(),
1263            color: "ff0000".to_string(),
1264            description: "Bug report".to_string(),
1265        }];
1266        let filtered = filter_labels_by_relevance(&labels, 0);
1267        assert!(filtered.is_empty());
1268    }
1269
1270    #[test]
1271    fn filter_labels_priority_first() {
1272        let labels = vec![
1273            crate::ai::types::RepoLabel {
1274                name: "documentation".to_string(),
1275                color: "0075ca".to_string(),
1276                description: "Documentation".to_string(),
1277            },
1278            crate::ai::types::RepoLabel {
1279                name: "other".to_string(),
1280                color: "cccccc".to_string(),
1281                description: "Other".to_string(),
1282            },
1283            crate::ai::types::RepoLabel {
1284                name: "bug".to_string(),
1285                color: "ff0000".to_string(),
1286                description: "Bug".to_string(),
1287            },
1288        ];
1289        let filtered = filter_labels_by_relevance(&labels, 30);
1290        assert_eq!(filtered.len(), 3);
1291        assert_eq!(filtered[0].name, "documentation");
1292        assert_eq!(filtered[1].name, "bug");
1293        assert_eq!(filtered[2].name, "other");
1294    }
1295
1296    #[test]
1297    fn filter_labels_case_insensitive() {
1298        let labels = vec![
1299            crate::ai::types::RepoLabel {
1300                name: "Bug".to_string(),
1301                color: "ff0000".to_string(),
1302                description: "Bug".to_string(),
1303            },
1304            crate::ai::types::RepoLabel {
1305                name: "ENHANCEMENT".to_string(),
1306                color: "a2eeef".to_string(),
1307                description: "Enhancement".to_string(),
1308            },
1309        ];
1310        let filtered = filter_labels_by_relevance(&labels, 30);
1311        assert_eq!(filtered.len(), 2);
1312        assert_eq!(filtered[0].name, "Bug");
1313        assert_eq!(filtered[1].name, "ENHANCEMENT");
1314    }
1315
1316    #[test]
1317    fn filter_labels_over_limit_with_priorities() {
1318        let mut labels = vec![];
1319        for i in 0..20 {
1320            labels.push(crate::ai::types::RepoLabel {
1321                name: format!("label{i}"),
1322                color: "cccccc".to_string(),
1323                description: format!("Label {i}"),
1324            });
1325        }
1326        labels.push(crate::ai::types::RepoLabel {
1327            name: "bug".to_string(),
1328            color: "ff0000".to_string(),
1329            description: "Bug".to_string(),
1330        });
1331        labels.push(crate::ai::types::RepoLabel {
1332            name: "enhancement".to_string(),
1333            color: "a2eeef".to_string(),
1334            description: "Enhancement".to_string(),
1335        });
1336
1337        let filtered = filter_labels_by_relevance(&labels, 10);
1338        assert_eq!(filtered.len(), 10);
1339        assert_eq!(filtered[0].name, "bug");
1340        assert_eq!(filtered[1].name, "enhancement");
1341    }
1342}
1343
1344#[cfg(test)]
1345mod tests {
1346    use super::*;
1347
1348    // Smoke test to verify parse_issue_reference delegates correctly.
1349    // Comprehensive parsing tests are in github/mod.rs.
1350    #[test]
1351    fn parse_issue_reference_delegates_to_shared() {
1352        let (owner, repo, number) =
1353            parse_issue_reference("https://github.com/block/goose/issues/5836", None).unwrap();
1354        assert_eq!(owner, "block");
1355        assert_eq!(repo, "goose");
1356        assert_eq!(number, 5836);
1357    }
1358
1359    #[test]
1360    fn extract_keywords_filters_stop_words() {
1361        let title = "The issue is about a bug in the CLI";
1362        let keywords = extract_keywords(title);
1363        assert!(!keywords.contains(&"the".to_string()));
1364        assert!(!keywords.contains(&"is".to_string()));
1365        assert!(!keywords.contains(&"a".to_string()));
1366        assert!(keywords.contains(&"issue".to_string()));
1367        assert!(keywords.contains(&"bug".to_string()));
1368        assert!(keywords.contains(&"cli".to_string()));
1369    }
1370
1371    #[test]
1372    fn extract_keywords_limits_to_five() {
1373        let title = "one two three four five six seven eight nine ten";
1374        let keywords = extract_keywords(title);
1375        assert_eq!(keywords.len(), 5);
1376    }
1377
1378    #[test]
1379    fn extract_keywords_empty_title() {
1380        let title = "the a an and or";
1381        let keywords = extract_keywords(title);
1382        assert!(keywords.is_empty());
1383    }
1384
1385    #[test]
1386    fn extract_keywords_lowercase_conversion() {
1387        let title = "CLI Bug FIX";
1388        let keywords = extract_keywords(title);
1389        assert!(keywords.iter().all(|k| k.chars().all(char::is_lowercase)));
1390    }
1391}