Skip to main content

aptu_core/github/
issues.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! GitHub issue operations for the triage command.
4//!
5//! Provides functionality to parse issue URLs, fetch issue details,
6//! and post triage comments.
7
8use anyhow::{Context, Result};
9use backon::Retryable;
10use octocrab::Octocrab;
11use serde::{Deserialize, Serialize};
12use tracing::{debug, instrument};
13
14use super::{ReferenceKind, parse_github_reference};
15use crate::ai::types::{IssueComment, IssueDetails, RepoIssueContext};
16use crate::retry::retry_backoff;
17use crate::utils::is_priority_label;
18
19/// A GitHub issue without labels (untriaged).
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct UntriagedIssue {
22    /// Issue number.
23    pub number: u64,
24    /// Issue title.
25    pub title: String,
26    /// Creation timestamp (ISO 8601).
27    pub created_at: String,
28    /// Issue URL.
29    pub url: String,
30}
31
32/// A single entry in a Git tree response.
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct GitTreeEntry {
35    /// File path relative to repository root.
36    pub path: String,
37    /// Type of entry: "blob" (file) or "tree" (directory).
38    #[serde(rename = "type")]
39    pub type_: String,
40    /// File mode (e.g., "100644" for regular files).
41    pub mode: String,
42    /// SHA-1 hash of the entry.
43    pub sha: String,
44}
45
46/// Response from GitHub Git Trees API.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct GitTreeResponse {
49    /// List of entries in the tree.
50    pub tree: Vec<GitTreeEntry>,
51    /// Whether the tree is truncated (too many entries).
52    pub truncated: bool,
53}
54
55/// Parses an owner/repo string to extract owner and repo.
56///
57/// Validates format: exactly one `/`, non-empty parts.
58///
59/// # Errors
60///
61/// Returns an error if the format is invalid.
62pub fn parse_owner_repo(s: &str) -> Result<(String, String)> {
63    let parts: Vec<&str> = s.split('/').collect();
64    if parts.len() != 2 || parts[0].is_empty() || parts[1].is_empty() {
65        anyhow::bail!(
66            "Invalid owner/repo format.\n\
67             Expected: owner/repo\n\
68             Got: {s}"
69        );
70    }
71    Ok((parts[0].to_string(), parts[1].to_string()))
72}
73
74/// Parses a GitHub issue reference in multiple formats.
75///
76/// Supports:
77/// - Full URL: `https://github.com/owner/repo/issues/123`
78/// - Short form: `owner/repo#123`
79/// - Bare number: `123` (requires `repo_context`)
80///
81/// # Arguments
82///
83/// * `input` - The issue reference to parse
84/// * `repo_context` - Optional repository context for bare numbers (e.g., "owner/repo")
85///
86/// # Errors
87///
88/// Returns an error if the format is invalid or bare number is used without context.
89pub fn parse_issue_reference(
90    input: &str,
91    repo_context: Option<&str>,
92) -> Result<(String, String, u64)> {
93    parse_github_reference(ReferenceKind::Issue, input, repo_context)
94}
95
96/// Fetches issue details including comments from GitHub.
97///
98/// # Errors
99///
100/// Returns an error if the API request fails or the issue is not found.
101#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
102pub async fn fetch_issue_with_comments(
103    client: &Octocrab,
104    owner: &str,
105    repo: &str,
106    number: u64,
107) -> Result<IssueDetails> {
108    debug!("Fetching issue details");
109
110    // Fetch the issue with retry logic
111    let issue = (|| async {
112        client
113            .issues(owner, repo)
114            .get(number)
115            .await
116            .map_err(|e| anyhow::anyhow!(e))
117    })
118    .retry(retry_backoff())
119    .notify(|err, dur| {
120        tracing::warn!(
121            error = %err,
122            retry_after = ?dur,
123            "Retrying fetch_issue_with_comments (issue fetch)"
124        );
125    })
126    .await
127    .with_context(|| format!("Failed to fetch issue #{number} from {owner}/{repo}"))?;
128
129    // Fetch comments (limited to first page) with retry logic
130    let comments_page = (|| async {
131        client
132            .issues(owner, repo)
133            .list_comments(number)
134            .per_page(5)
135            .send()
136            .await
137            .map_err(|e| anyhow::anyhow!(e))
138    })
139    .retry(retry_backoff())
140    .notify(|err, dur| {
141        tracing::warn!(
142            error = %err,
143            retry_after = ?dur,
144            "Retrying fetch_issue_with_comments (comments fetch)"
145        );
146    })
147    .await
148    .with_context(|| format!("Failed to fetch comments for issue #{number}"))?;
149
150    // Convert to our types
151    let labels: Vec<String> = issue.labels.iter().map(|l| l.name.clone()).collect();
152
153    let comments: Vec<IssueComment> = comments_page
154        .items
155        .iter()
156        .map(|c| IssueComment {
157            id: c.id.0,
158            author: c.user.login.clone(),
159            body: c.body.clone().unwrap_or_default(),
160        })
161        .collect();
162
163    let issue_url = issue.html_url.to_string();
164
165    let details = IssueDetails::builder()
166        .owner(owner.to_string())
167        .repo(repo.to_string())
168        .number(number)
169        .title(issue.title)
170        .body(issue.body.unwrap_or_default())
171        .labels(labels)
172        .comments(comments)
173        .url(issue_url)
174        .build();
175
176    debug!(
177        labels = details.labels.len(),
178        comments = details.comments.len(),
179        "Fetched issue details"
180    );
181
182    Ok(details)
183}
184
185/// Extracts significant keywords from an issue title for search.
186///
187/// Filters out common stop words and returns lowercase keywords.
188/// Extracts keywords from an issue title for relevance matching.
189///
190/// Filters out common stop words and limits to 5 keywords.
191/// Used for prioritizing relevant files in repository tree filtering.
192///
193/// # Arguments
194///
195/// * `title` - Issue title to extract keywords from
196///
197/// # Returns
198///
199/// Vector of lowercase keywords (max 5), excluding stop words.
200pub fn extract_keywords(title: &str) -> Vec<String> {
201    let stop_words = [
202        "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is",
203        "it", "its", "of", "on", "or", "that", "the", "to", "was", "will", "with",
204    ];
205
206    title
207        .to_lowercase()
208        .split(|c: char| !c.is_alphanumeric())
209        .filter(|word| !word.is_empty() && !stop_words.contains(word))
210        .take(5) // Limit to first 5 keywords
211        .map(std::string::ToString::to_string)
212        .collect()
213}
214
215/// Searches for related issues in a repository based on title keywords.
216///
217/// Extracts keywords from the issue title and searches the repository
218/// for matching issues. Returns up to 20 results, excluding the specified issue.
219///
220/// # Arguments
221///
222/// * `client` - Authenticated Octocrab client
223/// * `owner` - Repository owner
224/// * `repo` - Repository name
225/// * `title` - Issue title to extract keywords from
226/// * `exclude_number` - Issue number to exclude from results
227///
228/// # Errors
229///
230/// Returns an error if the search API request fails.
231#[instrument(skip(client), fields(owner = %owner, repo = %repo, exclude_number = %exclude_number))]
232pub async fn search_related_issues(
233    client: &Octocrab,
234    owner: &str,
235    repo: &str,
236    title: &str,
237    exclude_number: u64,
238) -> Result<Vec<RepoIssueContext>> {
239    let keywords = extract_keywords(title);
240
241    if keywords.is_empty() {
242        debug!("No keywords extracted from title");
243        return Ok(Vec::new());
244    }
245
246    // Build search query: keyword1 keyword2 ... repo:owner/repo is:issue
247    let query = format!("{} repo:{}/{} is:issue", keywords.join(" "), owner, repo);
248
249    debug!(query = %query, "Searching for related issues");
250
251    // Search for issues with retry logic
252    let search_result = (|| async {
253        client
254            .search()
255            .issues_and_pull_requests(&query)
256            .per_page(20)
257            .send()
258            .await
259            .map_err(|e| anyhow::anyhow!(e))
260    })
261    .retry(retry_backoff())
262    .notify(|err, dur| {
263        tracing::warn!(
264            error = %err,
265            retry_after = ?dur,
266            "Retrying search_related_issues"
267        );
268    })
269    .await
270    .with_context(|| format!("Failed to search for related issues in {owner}/{repo}"))?;
271
272    // Convert to our context type
273    let related: Vec<RepoIssueContext> = search_result
274        .items
275        .iter()
276        .filter_map(|item| {
277            // Only include issues (not PRs)
278            if item.pull_request.is_some() {
279                return None;
280            }
281
282            // Exclude the issue being triaged
283            if item.number == exclude_number {
284                return None;
285            }
286
287            Some(RepoIssueContext {
288                number: item.number,
289                title: item.title.clone(),
290                labels: item.labels.iter().map(|l| l.name.clone()).collect(),
291                state: format!("{:?}", item.state).to_lowercase(),
292            })
293        })
294        .collect();
295
296    debug!(count = related.len(), "Found related issues");
297
298    Ok(related)
299}
300
301/// Posts a triage comment to a GitHub issue.
302///
303/// # Returns
304///
305/// The URL of the created comment.
306///
307/// # Errors
308///
309/// Returns an error if the API request fails.
310#[instrument(skip(client, body), fields(owner = %owner, repo = %repo, number = number))]
311pub async fn post_comment(
312    client: &Octocrab,
313    owner: &str,
314    repo: &str,
315    number: u64,
316    body: &str,
317) -> Result<String> {
318    debug!("Posting triage comment");
319
320    let comment = client
321        .issues(owner, repo)
322        .create_comment(number, body)
323        .await
324        .with_context(|| format!("Failed to post comment to issue #{number}"))?;
325
326    let comment_url = comment.html_url.to_string();
327
328    debug!(url = %comment_url, "Comment posted successfully");
329
330    Ok(comment_url)
331}
332
333/// Deletes a comment from a GitHub issue.
334///
335/// # Errors
336///
337/// Returns an error if the API request fails. 404 errors (comment not found)
338/// are treated as success (idempotent).
339#[instrument(skip(client), fields(owner = %owner, repo = %repo, comment_id = comment_id))]
340pub async fn delete_issue_comment(
341    client: &Octocrab,
342    owner: &str,
343    repo: &str,
344    comment_id: u64,
345) -> Result<()> {
346    debug!("Deleting issue comment");
347
348    let route = format!("/repos/{owner}/{repo}/issues/comments/{comment_id}");
349
350    // Use generic delete method; needs explicit empty object body type
351    let empty_body = serde_json::json!({});
352    let result: std::result::Result<serde_json::Value, _> =
353        client.delete(&route, Some(&empty_body)).await;
354
355    match result {
356        Ok(_) => {
357            debug!("Comment deleted successfully");
358            Ok(())
359        }
360        Err(e)
361            if let octocrab::Error::GitHub { source, .. } = &e
362                && source.status_code.as_u16() == 404 =>
363        {
364            debug!("Comment already deleted (404); treating as success");
365            Ok(())
366        }
367        Err(e) => Err(e).with_context(|| format!("Failed to delete comment #{comment_id}")),
368    }
369}
370
371/// Removes a label from a GitHub issue.
372///
373/// # Errors
374///
375/// Returns an error if the API request fails. 404 errors (label not found)
376/// are treated as success (idempotent).
377#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number, label = label))]
378pub async fn remove_issue_label(
379    client: &Octocrab,
380    owner: &str,
381    repo: &str,
382    number: u64,
383    label: &str,
384) -> Result<()> {
385    debug!("Removing label from issue");
386
387    // URL-encode label name using percent-encoding (handle spaces, special chars)
388    let encoded_label =
389        percent_encoding::percent_encode(label.as_bytes(), percent_encoding::NON_ALPHANUMERIC)
390            .to_string();
391    let route = format!("/repos/{owner}/{repo}/issues/{number}/labels/{encoded_label}");
392
393    // Use generic delete method; needs explicit empty object body type
394    let empty_body = serde_json::json!({});
395    let result: std::result::Result<serde_json::Value, _> =
396        client.delete(&route, Some(&empty_body)).await;
397
398    match result {
399        Ok(_) => {
400            debug!("Label removed successfully");
401            Ok(())
402        }
403        Err(e)
404            if let octocrab::Error::GitHub { source, .. } = &e
405                && source.status_code.as_u16() == 404 =>
406        {
407            debug!("Label not found (404); treating as success");
408            Ok(())
409        }
410        Err(e) => {
411            Err(e).with_context(|| format!("Failed to remove label '{label}' from issue #{number}"))
412        }
413    }
414}
415
416/// Creates a new GitHub issue.
417///
418/// Posts a new issue with the given title and body to the repository.
419/// Returns the issue URL and issue number.
420///
421/// # Arguments
422///
423/// * `client` - Authenticated Octocrab client
424/// * `owner` - Repository owner
425/// * `repo` - Repository name
426/// * `title` - Issue title
427/// * `body` - Issue body (markdown)
428///
429/// # Errors
430///
431/// Returns an error if the GitHub API call fails.
432#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
433pub async fn create_issue(
434    client: &Octocrab,
435    owner: &str,
436    repo: &str,
437    title: &str,
438    body: &str,
439) -> Result<(String, u64)> {
440    debug!("Creating GitHub issue");
441
442    let issue = client
443        .issues(owner, repo)
444        .create(title)
445        .body(body)
446        .send()
447        .await
448        .with_context(|| format!("Failed to create issue in {owner}/{repo}"))?;
449
450    let issue_url = issue.html_url.to_string();
451    let issue_number = issue.number;
452
453    debug!(number = issue_number, url = %issue_url, "Issue created successfully");
454
455    Ok((issue_url, issue_number))
456}
457
458/// Result of applying labels and milestone to an issue.
459#[derive(Debug, Clone)]
460pub struct ApplyResult {
461    /// Labels that were successfully applied.
462    pub applied_labels: Vec<String>,
463    /// Milestone that was successfully applied, if any.
464    pub applied_milestone: Option<String>,
465    /// Warnings about labels or milestones that could not be applied.
466    pub warnings: Vec<String>,
467}
468
469/// Merges existing and suggested labels additively.
470/// Labels that should only be applied by maintainers, not by AI suggestions
471const MAINTAINER_ONLY_LABELS: &[&str] = &["good first issue", "help wanted"];
472
473///
474/// Implements additive label merging with priority label handling:
475/// - If existing labels contain a priority label (p[0-9]), skip AI-suggested priority labels
476/// - Merge remaining labels with case-insensitive deduplication
477/// - Preserve all existing labels
478///
479/// # Arguments
480///
481/// * `existing_labels` - Labels currently on the issue
482/// * `suggested_labels` - Labels suggested by AI
483///
484/// # Returns
485///
486/// Merged label list with duplicates removed (case-insensitive)
487fn merge_labels(existing_labels: &[String], suggested_labels: &[String]) -> Vec<String> {
488    // Check if existing labels contain a priority label
489    let has_priority = existing_labels.iter().any(|label| is_priority_label(label));
490
491    // Start with existing labels
492    let mut merged = existing_labels.to_vec();
493
494    // Add suggested labels, filtering out priority labels if existing has one
495    for suggested in suggested_labels {
496        // Skip priority labels if existing already has one
497        if is_priority_label(suggested) && has_priority {
498            continue;
499        }
500
501        // Skip maintainer-only labels
502        if MAINTAINER_ONLY_LABELS
503            .iter()
504            .any(|&m| m.eq_ignore_ascii_case(suggested))
505        {
506            continue;
507        }
508
509        // Add if not already present (case-insensitive check)
510        if !merged
511            .iter()
512            .any(|l| l.to_lowercase() == suggested.to_lowercase())
513        {
514            merged.push(suggested.clone());
515        }
516    }
517
518    merged
519}
520
521/// Updates an issue with labels and milestone.
522///
523/// Applies labels additively by merging existing and suggested labels.
524/// Validates suggestions against available options before applying.
525/// Returns what was actually applied and any warnings.
526///
527/// # Errors
528///
529/// Returns an error if the GitHub API call fails.
530#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
531#[allow(clippy::too_many_arguments)]
532pub async fn update_issue_labels_and_milestone(
533    client: &Octocrab,
534    owner: &str,
535    repo: &str,
536    number: u64,
537    existing_labels: &[String],
538    suggested_labels: &[String],
539    existing_milestone: Option<&str>,
540    suggested_milestone: Option<&str>,
541    available_labels: &[crate::ai::types::RepoLabel],
542    available_milestones: &[crate::ai::types::RepoMilestone],
543) -> Result<ApplyResult> {
544    debug!("Updating issue with labels and milestone");
545
546    let mut warnings = Vec::new();
547
548    // Validate and collect labels
549    let available_label_names: std::collections::HashSet<_> =
550        available_labels.iter().map(|l| l.name.as_str()).collect();
551
552    // Validate suggested labels
553    let mut valid_suggested = Vec::new();
554    for label in suggested_labels {
555        if available_label_names.contains(label.as_str()) {
556            valid_suggested.push(label.clone());
557        } else {
558            warnings.push(format!("Label '{label}' not found in repository"));
559        }
560    }
561
562    // Merge existing and suggested labels additively
563    let applied_labels = merge_labels(existing_labels, &valid_suggested);
564
565    // Validate and find milestone (only set if issue has no existing milestone)
566    let applied_milestone = if existing_milestone.is_none() {
567        if let Some(milestone_title) = suggested_milestone {
568            if let Some(milestone) = available_milestones
569                .iter()
570                .find(|m| m.title == milestone_title)
571            {
572                Some(milestone.title.clone())
573            } else {
574                warnings.push(format!(
575                    "Milestone '{milestone_title}' not found in repository"
576                ));
577                None
578            }
579        } else {
580            None
581        }
582    } else {
583        None
584    };
585
586    // Apply updates to the issue
587    let issues_handler = client.issues(owner, repo);
588    let mut update_builder = issues_handler.update(number);
589
590    if !applied_labels.is_empty() {
591        update_builder = update_builder.labels(&applied_labels);
592    }
593
594    #[allow(clippy::collapsible_if)]
595    if let Some(milestone_title) = &applied_milestone {
596        if let Some(milestone) = available_milestones
597            .iter()
598            .find(|m| &m.title == milestone_title)
599        {
600            update_builder = update_builder.milestone(milestone.number);
601        }
602    }
603
604    update_builder
605        .send()
606        .await
607        .with_context(|| format!("Failed to update issue #{number}"))?;
608
609    debug!(
610        labels = ?applied_labels,
611        milestone = ?applied_milestone,
612        warnings = ?warnings,
613        "Issue updated successfully"
614    );
615
616    Ok(ApplyResult {
617        applied_labels,
618        applied_milestone,
619        warnings,
620    })
621}
622
623/// Apply labels to an issue or PR by number.
624///
625/// Simplified label-only application function for PRs (no milestone, no merge logic).
626/// Returns an error if the GitHub API call fails.
627#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
628pub async fn apply_labels_to_number(
629    client: &Octocrab,
630    owner: &str,
631    repo: &str,
632    number: u64,
633    labels: &[String],
634) -> Result<Vec<String>> {
635    debug!("Applying labels to issue/PR");
636
637    if labels.is_empty() {
638        debug!("No labels to apply");
639        return Ok(Vec::new());
640    }
641
642    let route = format!("/repos/{owner}/{repo}/issues/{number}/labels");
643    let payload = serde_json::json!({ "labels": labels });
644
645    client
646        .post::<_, serde_json::Value>(route, Some(&payload))
647        .await
648        .with_context(|| {
649            format!(
650                "Failed to apply labels to issue/PR #{number} in {owner}/{repo}. \
651                     Check that you have write access to the repository."
652            )
653        })?;
654
655    debug!(labels = ?labels, "Labels applied successfully");
656
657    Ok(labels.to_vec())
658}
659
660/// Priority labels that should be included first in tiered filtering.
661/// These labels are most actionable for issue triage.
662const PRIORITY_LABELS: &[&str] = &[
663    "bug",
664    "enhancement",
665    "documentation",
666    "good first issue",
667    "help wanted",
668    "question",
669    "feature",
670    "fix",
671    "breaking",
672    "security",
673    "performance",
674    "breaking-change",
675];
676
677/// Filters labels using tiered selection: priority labels first, then remaining labels.
678///
679/// Implements two-tier filtering:
680/// - Tier 1: Priority labels (case-insensitive matching)
681/// - Tier 2: Remaining labels to fill up to `max_labels`
682///
683/// This ensures the AI sees the most actionable labels regardless of repository size.
684///
685/// # Arguments
686///
687/// * `labels` - List of available labels from the repository
688/// * `max_labels` - Maximum number of labels to return
689///
690/// # Returns
691///
692/// Filtered list of labels with priority labels first.
693#[must_use]
694pub fn filter_labels_by_relevance(
695    labels: &[crate::ai::types::RepoLabel],
696    max_labels: usize,
697) -> Vec<crate::ai::types::RepoLabel> {
698    if labels.is_empty() || max_labels == 0 {
699        return Vec::new();
700    }
701
702    let mut priority_labels = Vec::new();
703    let mut other_labels = Vec::new();
704
705    // Separate labels into priority and other
706    for label in labels {
707        let label_lower = label.name.to_lowercase();
708        let is_priority = PRIORITY_LABELS
709            .iter()
710            .any(|&p| label_lower == p.to_lowercase());
711
712        if is_priority {
713            priority_labels.push(label.clone());
714        } else {
715            other_labels.push(label.clone());
716        }
717    }
718
719    // Combine: priority labels first, then fill remaining slots with other labels
720    let mut result = priority_labels;
721    let remaining_slots = max_labels.saturating_sub(result.len());
722    result.extend(other_labels.into_iter().take(remaining_slots));
723
724    // Limit to max_labels
725    result.truncate(max_labels);
726    result
727}
728
729/// Patterns for directories/files to completely exclude from tree filtering.
730/// Based on GitHub Linguist vendor.yml and common build artifacts.
731const EXCLUDE_PATTERNS: &[&str] = &[
732    "node_modules/",
733    "vendor/",
734    "dist/",
735    "build/",
736    "target/",
737    ".git/",
738    "cache/",
739    "docs/",
740    "examples/",
741];
742
743/// Patterns for directories to deprioritize but not exclude.
744/// These contain test/benchmark code less relevant to issue triage.
745const DEPRIORITIZE_PATTERNS: &[&str] = &[
746    "test/",
747    "tests/",
748    "spec/",
749    "bench/",
750    "eval/",
751    "fixtures/",
752    "mocks/",
753];
754
755/// Returns language-specific entry point file patterns.
756/// These are prioritized as they often contain the main logic.
757fn entry_point_patterns(language: &str) -> Vec<&'static str> {
758    match language.to_lowercase().as_str() {
759        "rust" => vec!["lib.rs", "mod.rs", "main.rs"],
760        "python" => vec!["__init__.py"],
761        "javascript" | "typescript" => vec!["index.ts", "index.js"],
762        "java" => vec!["Main.java"],
763        "go" => vec!["main.go"],
764        "c#" | "csharp" => vec!["Program.cs"],
765        _ => vec![],
766    }
767}
768
769/// Maps programming languages to their common file extensions.
770fn get_extensions_for_language(language: &str) -> Vec<&'static str> {
771    match language.to_lowercase().as_str() {
772        "rust" => vec!["rs"],
773        "python" => vec!["py"],
774        "javascript" | "typescript" => vec!["js", "ts", "jsx", "tsx"],
775        "java" => vec!["java"],
776        "c" => vec!["c", "h"],
777        "c++" | "cpp" => vec!["cpp", "cc", "cxx", "h", "hpp"],
778        "c#" | "csharp" => vec!["cs"],
779        "go" => vec!["go"],
780        "ruby" => vec!["rb"],
781        "php" => vec!["php"],
782        "swift" => vec!["swift"],
783        "kotlin" => vec!["kt"],
784        "scala" => vec!["scala"],
785        "r" => vec!["r"],
786        "shell" | "bash" => vec!["sh", "bash"],
787        "html" => vec!["html", "htm"],
788        "css" => vec!["css", "scss", "sass"],
789        "json" => vec!["json"],
790        "yaml" | "yml" => vec!["yaml", "yml"],
791        "toml" => vec!["toml"],
792        "xml" => vec!["xml"],
793        "markdown" => vec!["md"],
794        _ => vec![],
795    }
796}
797
798/// Filters repository tree entries by relevance using tiered keyword matching.
799///
800/// Implements three-tier filtering:
801/// - Tier 1: Files matching keywords (max 35)
802/// - Tier 2: Language entry points (max 10)
803/// - Tier 3: Other relevant files (max 15)
804///
805/// Removes common non-source directories and limits results to 60 paths.
806///
807/// # Arguments
808///
809/// * `entries` - Raw tree entries from GitHub API
810/// * `language` - Repository primary language for extension filtering
811/// * `keywords` - Optional keywords extracted from issue title for relevance matching
812///
813/// # Returns
814///
815/// Filtered and sorted list of file paths (max 60).
816fn filter_tree_by_relevance(
817    entries: &[GitTreeEntry],
818    language: &str,
819    keywords: &[String],
820) -> Vec<String> {
821    let extensions = get_extensions_for_language(language);
822    let entry_points = entry_point_patterns(language);
823
824    // Filter to valid source files
825    let candidates: Vec<String> = entries
826        .iter()
827        .filter(|entry| {
828            // Only include files (blobs), not directories
829            if entry.type_ != "blob" {
830                return false;
831            }
832
833            // Exclude paths containing excluded directories
834            if EXCLUDE_PATTERNS.iter().any(|dir| entry.path.contains(dir)) {
835                return false;
836            }
837
838            // Filter by extension if language is recognized
839            if extensions.is_empty() {
840                // If language not recognized, include all files
841                true
842            } else {
843                extensions.iter().any(|ext| entry.path.ends_with(ext))
844            }
845        })
846        .map(|e| e.path.clone())
847        .collect();
848
849    // Tier 1: Files matching keywords (max 35)
850    let mut tier1: Vec<String> = Vec::new();
851    let mut remaining: Vec<String> = Vec::new();
852
853    for path in candidates {
854        let path_lower = path.to_lowercase();
855        let matches_keyword = keywords.iter().any(|kw| path_lower.contains(kw));
856
857        if matches_keyword && tier1.len() < 35 {
858            tier1.push(path);
859        } else {
860            remaining.push(path);
861        }
862    }
863
864    // Tier 2: Entry point files (max 10)
865    let mut tier2: Vec<String> = Vec::new();
866    let mut tier3_candidates: Vec<String> = Vec::new();
867
868    for path in remaining {
869        let is_entry_point = entry_points.iter().any(|ep| path.ends_with(ep));
870        let is_deprioritized = DEPRIORITIZE_PATTERNS.iter().any(|dp| path.contains(dp));
871
872        if is_entry_point && tier2.len() < 10 {
873            tier2.push(path);
874        } else if !is_deprioritized {
875            tier3_candidates.push(path);
876        }
877    }
878
879    // Tier 3: Other relevant files (max 15)
880    let mut tier3: Vec<String> = tier3_candidates.into_iter().take(15).collect();
881
882    // Combine and sort by depth within each tier
883    let mut result = tier1;
884    result.append(&mut tier2);
885    result.append(&mut tier3);
886
887    // Sort by path depth (fewer slashes first), then alphabetically
888    result.sort_by(|a, b| {
889        let depth_a = a.matches('/').count();
890        let depth_b = b.matches('/').count();
891        if depth_a == depth_b {
892            a.cmp(b)
893        } else {
894            depth_a.cmp(&depth_b)
895        }
896    });
897
898    // Limit to 60 paths
899    result.truncate(60);
900    result
901}
902
903/// Fetches the repository file tree from GitHub.
904///
905/// Attempts to fetch from the default branch (main, then master).
906/// Returns filtered list of source file paths based on repository language and optional keywords.
907///
908/// # Arguments
909///
910/// * `client` - Authenticated Octocrab client
911/// * `owner` - Repository owner
912/// * `repo` - Repository name
913/// * `language` - Repository primary language for filtering
914/// * `keywords` - Optional keywords extracted from issue title for relevance matching
915///
916/// # Errors
917///
918/// Returns an error if the API request fails (but not if tree is unavailable).
919#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
920pub async fn fetch_repo_tree(
921    client: &Octocrab,
922    owner: &str,
923    repo: &str,
924    language: &str,
925    keywords: &[String],
926) -> Result<Vec<String>> {
927    debug!("Fetching repository tree");
928
929    // Try main branch first, then master
930    let branches = ["main", "master"];
931    let mut tree_response: Option<GitTreeResponse> = None;
932
933    for branch in &branches {
934        let route = format!("/repos/{owner}/{repo}/git/trees/{branch}?recursive=1");
935        let result = (|| async {
936            client
937                .get::<GitTreeResponse, _, _>(&route, None::<&()>)
938                .await
939                .map_err(|e| anyhow::anyhow!(e))
940        })
941        .retry(retry_backoff())
942        .notify(|err, dur| {
943            tracing::warn!(
944                error = %err,
945                retry_after = ?dur,
946                branch = %branch,
947                "Retrying fetch_repo_tree"
948            );
949        })
950        .await;
951
952        match result {
953            Ok(response) => {
954                tree_response = Some(response);
955                debug!(branch = %branch, "Fetched tree from branch");
956                break;
957            }
958            Err(e) => {
959                debug!(branch = %branch, error = %e, "Failed to fetch tree from branch");
960            }
961        }
962    }
963
964    let response =
965        tree_response.context("Failed to fetch repository tree from main or master branch")?;
966
967    let filtered = filter_tree_by_relevance(&response.tree, language, keywords);
968    debug!(count = filtered.len(), "Filtered tree entries");
969
970    Ok(filtered)
971}
972
973/// Fetches issues needing triage from a specific repository.
974///
975/// In default mode (force=false), returns issues that are either unlabeled OR missing a milestone.
976/// In force mode (force=true), returns ALL open issues with no filtering.
977///
978/// # Arguments
979///
980/// * `client` - The Octocrab GitHub client
981/// * `owner` - Repository owner
982/// * `repo` - Repository name
983/// * `since` - Optional RFC3339 timestamp to filter issues created after this date (client-side filtering)
984/// * `force` - If true, return all issues in the specified state; if false, filter to unlabeled or milestone-missing issues
985/// * `state` - Issue state filter (Open, Closed, or All)
986///
987/// # Errors
988///
989/// Returns an error if the REST API request fails.
990#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
991pub async fn fetch_issues_needing_triage(
992    client: &Octocrab,
993    owner: &str,
994    repo: &str,
995    since: Option<&str>,
996    force: bool,
997    state: octocrab::params::State,
998) -> Result<Vec<UntriagedIssue>> {
999    debug!("Fetching issues needing triage");
1000
1001    let issues_page: octocrab::Page<octocrab::models::issues::Issue> = client
1002        .issues(owner, repo)
1003        .list()
1004        .state(state)
1005        .per_page(100)
1006        .send()
1007        .await
1008        .context("Failed to fetch issues from repository")?;
1009
1010    let total_issues = issues_page.items.len();
1011
1012    let mut issues_needing_triage: Vec<UntriagedIssue> = issues_page
1013        .items
1014        .into_iter()
1015        .filter(|issue| {
1016            if force {
1017                true
1018            } else {
1019                issue.labels.is_empty() || issue.milestone.is_none()
1020            }
1021        })
1022        .map(|issue| UntriagedIssue {
1023            number: issue.number,
1024            title: issue.title,
1025            created_at: issue.created_at.to_rfc3339(),
1026            url: issue.html_url.to_string(),
1027        })
1028        .collect();
1029
1030    if let Some(since_date) = since
1031        && let Ok(since_timestamp) = chrono::DateTime::parse_from_rfc3339(since_date)
1032    {
1033        issues_needing_triage.retain(|issue| {
1034            if let Ok(created_at) = chrono::DateTime::parse_from_rfc3339(&issue.created_at) {
1035                created_at >= since_timestamp
1036            } else {
1037                true
1038            }
1039        });
1040    }
1041
1042    debug!(
1043        total_issues = total_issues,
1044        issues_needing_triage_count = issues_needing_triage.len(),
1045        "Fetched issues needing triage"
1046    );
1047
1048    Ok(issues_needing_triage)
1049}
1050
1051#[cfg(test)]
1052mod fetch_issues_needing_triage_tests {
1053    #[test]
1054    fn filter_logic_unlabeled_default_mode() {
1055        let labels_empty = true;
1056        let milestone_none = true;
1057        let force = false;
1058
1059        let passes = if force {
1060            true
1061        } else {
1062            labels_empty || milestone_none
1063        };
1064
1065        assert!(passes);
1066    }
1067
1068    #[test]
1069    fn filter_logic_labeled_default_mode() {
1070        let labels_empty = false;
1071        let milestone_none = true;
1072        let force = false;
1073
1074        let passes = if force {
1075            true
1076        } else {
1077            labels_empty || milestone_none
1078        };
1079
1080        assert!(passes);
1081    }
1082
1083    #[test]
1084    fn filter_logic_missing_milestone_default_mode() {
1085        let labels_empty = false;
1086        let milestone_none = true;
1087        let force = false;
1088
1089        let passes = if force {
1090            true
1091        } else {
1092            labels_empty || milestone_none
1093        };
1094
1095        assert!(passes);
1096    }
1097
1098    #[test]
1099    fn filter_logic_force_mode_returns_all() {
1100        let labels_empty = false;
1101        let milestone_none = false;
1102        let force = true;
1103
1104        let passes = if force {
1105            true
1106        } else {
1107            labels_empty || milestone_none
1108        };
1109
1110        assert!(passes);
1111    }
1112
1113    #[test]
1114    fn filter_logic_fully_triaged_default_mode_excluded() {
1115        let labels_empty = false;
1116        let milestone_none = false;
1117        let force = false;
1118
1119        let passes = if force {
1120            true
1121        } else {
1122            labels_empty || milestone_none
1123        };
1124
1125        assert!(!passes);
1126    }
1127}
1128
1129#[cfg(test)]
1130mod tree_tests {
1131    use super::*;
1132
1133    #[test]
1134    fn filter_tree_by_relevance_keyword_matching() {
1135        let entries = vec![
1136            GitTreeEntry {
1137                path: "src/parser.rs".to_string(),
1138                type_: "blob".to_string(),
1139                mode: "100644".to_string(),
1140                sha: "abc123".to_string(),
1141            },
1142            GitTreeEntry {
1143                path: "src/main.rs".to_string(),
1144                type_: "blob".to_string(),
1145                mode: "100644".to_string(),
1146                sha: "def456".to_string(),
1147            },
1148            GitTreeEntry {
1149                path: "src/utils.rs".to_string(),
1150                type_: "blob".to_string(),
1151                mode: "100644".to_string(),
1152                sha: "ghi789".to_string(),
1153            },
1154        ];
1155
1156        let keywords = vec!["parser".to_string()];
1157        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1158        assert!(filtered.contains(&"src/parser.rs".to_string()));
1159    }
1160
1161    #[test]
1162    fn filter_tree_by_relevance_entry_points() {
1163        let entries = vec![
1164            GitTreeEntry {
1165                path: "src/lib.rs".to_string(),
1166                type_: "blob".to_string(),
1167                mode: "100644".to_string(),
1168                sha: "abc123".to_string(),
1169            },
1170            GitTreeEntry {
1171                path: "src/utils.rs".to_string(),
1172                type_: "blob".to_string(),
1173                mode: "100644".to_string(),
1174                sha: "def456".to_string(),
1175            },
1176        ];
1177
1178        let keywords = vec![];
1179        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1180        assert!(filtered.contains(&"src/lib.rs".to_string()));
1181    }
1182
1183    #[test]
1184    fn filter_tree_by_relevance_excludes_tests() {
1185        let entries = vec![
1186            GitTreeEntry {
1187                path: "src/main.rs".to_string(),
1188                type_: "blob".to_string(),
1189                mode: "100644".to_string(),
1190                sha: "abc123".to_string(),
1191            },
1192            GitTreeEntry {
1193                path: "tests/integration_test.rs".to_string(),
1194                type_: "blob".to_string(),
1195                mode: "100644".to_string(),
1196                sha: "def456".to_string(),
1197            },
1198        ];
1199
1200        let keywords = vec![];
1201        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1202        assert!(!filtered.contains(&"tests/integration_test.rs".to_string()));
1203        assert!(filtered.contains(&"src/main.rs".to_string()));
1204    }
1205
1206    #[test]
1207    fn get_extensions_for_language_rust() {
1208        let exts = get_extensions_for_language("rust");
1209        assert_eq!(exts, vec!["rs"]);
1210    }
1211
1212    #[test]
1213    fn get_extensions_for_language_javascript() {
1214        let exts = get_extensions_for_language("javascript");
1215        assert!(exts.contains(&"js"));
1216        assert!(exts.contains(&"ts"));
1217        assert!(exts.contains(&"jsx"));
1218        assert!(exts.contains(&"tsx"));
1219    }
1220
1221    #[test]
1222    fn get_extensions_for_language_unknown() {
1223        let exts = get_extensions_for_language("unknown_language");
1224        assert!(exts.is_empty());
1225    }
1226}
1227
1228#[cfg(test)]
1229mod merge_labels_tests {
1230    use super::*;
1231
1232    #[test]
1233    fn preserves_existing_and_adds_new() {
1234        let existing = vec!["bug".to_string(), "enhancement".to_string()];
1235        let suggested = vec!["documentation".to_string()];
1236        let merged = merge_labels(&existing, &suggested);
1237        assert_eq!(merged.len(), 3);
1238        assert!(merged.contains(&"bug".to_string()));
1239        assert!(merged.contains(&"enhancement".to_string()));
1240        assert!(merged.contains(&"documentation".to_string()));
1241    }
1242
1243    #[test]
1244    fn deduplicates_case_insensitive() {
1245        let existing = vec!["Bug".to_string()];
1246        let suggested = vec!["bug".to_string(), "enhancement".to_string()];
1247        let merged = merge_labels(&existing, &suggested);
1248        assert_eq!(merged.len(), 2);
1249        assert!(merged.contains(&"Bug".to_string()));
1250        assert!(merged.contains(&"enhancement".to_string()));
1251    }
1252
1253    #[test]
1254    fn skips_priority_when_existing_has_one() {
1255        // P1 (uppercase) exists, p2 suggested - should keep P1, skip p2, add bug
1256        let existing = vec!["P1".to_string()];
1257        let suggested = vec!["p2".to_string(), "bug".to_string()];
1258        let merged = merge_labels(&existing, &suggested);
1259        assert_eq!(merged.len(), 2);
1260        assert!(merged.contains(&"P1".to_string()));
1261        assert!(merged.contains(&"bug".to_string()));
1262        assert!(!merged.contains(&"p2".to_string()));
1263    }
1264
1265    #[test]
1266    fn handles_empty_inputs() {
1267        // Empty existing: suggested labels pass through
1268        let merged = merge_labels(&[], &["bug".to_string(), "p1".to_string()]);
1269        assert_eq!(merged.len(), 2);
1270
1271        // Empty suggested: existing labels preserved
1272        let merged = merge_labels(&["bug".to_string()], &[]);
1273        assert_eq!(merged.len(), 1);
1274        assert!(merged.contains(&"bug".to_string()));
1275    }
1276
1277    #[test]
1278    fn filters_maintainer_only_labels() {
1279        let existing = vec![];
1280        let suggested = vec![
1281            "good first issue".to_string(),
1282            "help wanted".to_string(),
1283            "bug".to_string(),
1284        ];
1285        let merged = merge_labels(&existing, &suggested);
1286        assert_eq!(merged.len(), 1);
1287        assert!(merged.contains(&"bug".to_string()));
1288        assert!(!merged.contains(&"good first issue".to_string()));
1289        assert!(!merged.contains(&"help wanted".to_string()));
1290    }
1291
1292    #[test]
1293    fn filters_maintainer_only_case_insensitive() {
1294        let existing = vec![];
1295        let suggested = vec![
1296            "Good First Issue".to_string(),
1297            "HELP WANTED".to_string(),
1298            "enhancement".to_string(),
1299        ];
1300        let merged = merge_labels(&existing, &suggested);
1301        assert_eq!(merged.len(), 1);
1302        assert!(merged.contains(&"enhancement".to_string()));
1303        assert!(!merged.contains(&"Good First Issue".to_string()));
1304        assert!(!merged.contains(&"HELP WANTED".to_string()));
1305    }
1306
1307    #[test]
1308    fn skips_priority_prefix_when_existing_has_one() {
1309        // priority: high exists, priority: medium suggested - should keep priority: high, skip priority: medium, add bug
1310        let existing = vec!["priority: high".to_string()];
1311        let suggested = vec!["priority: medium".to_string(), "bug".to_string()];
1312        let merged = merge_labels(&existing, &suggested);
1313        assert_eq!(merged.len(), 2);
1314        assert!(merged.contains(&"priority: high".to_string()));
1315        assert!(merged.contains(&"bug".to_string()));
1316        assert!(!merged.contains(&"priority: medium".to_string()));
1317    }
1318
1319    #[test]
1320    fn skips_mixed_priority_formats_when_existing_has_one() {
1321        // p1 exists, priority: high suggested - should keep p1, skip priority: high, add bug
1322        let existing = vec!["p1".to_string()];
1323        let suggested = vec!["priority: high".to_string(), "bug".to_string()];
1324        let merged = merge_labels(&existing, &suggested);
1325        assert_eq!(merged.len(), 2);
1326        assert!(merged.contains(&"p1".to_string()));
1327        assert!(merged.contains(&"bug".to_string()));
1328        assert!(!merged.contains(&"priority: high".to_string()));
1329    }
1330}
1331
1332#[cfg(test)]
1333mod label_tests {
1334    use super::*;
1335
1336    #[test]
1337    fn filter_labels_empty_input() {
1338        let labels = vec![];
1339        let filtered = filter_labels_by_relevance(&labels, 30);
1340        assert!(filtered.is_empty());
1341    }
1342
1343    #[test]
1344    fn filter_labels_zero_max() {
1345        let labels = vec![crate::ai::types::RepoLabel {
1346            name: "bug".to_string(),
1347            color: "ff0000".to_string(),
1348            description: "Bug report".to_string(),
1349        }];
1350        let filtered = filter_labels_by_relevance(&labels, 0);
1351        assert!(filtered.is_empty());
1352    }
1353
1354    #[test]
1355    fn filter_labels_priority_first() {
1356        let labels = vec![
1357            crate::ai::types::RepoLabel {
1358                name: "documentation".to_string(),
1359                color: "0075ca".to_string(),
1360                description: "Documentation".to_string(),
1361            },
1362            crate::ai::types::RepoLabel {
1363                name: "other".to_string(),
1364                color: "cccccc".to_string(),
1365                description: "Other".to_string(),
1366            },
1367            crate::ai::types::RepoLabel {
1368                name: "bug".to_string(),
1369                color: "ff0000".to_string(),
1370                description: "Bug".to_string(),
1371            },
1372        ];
1373        let filtered = filter_labels_by_relevance(&labels, 30);
1374        assert_eq!(filtered.len(), 3);
1375        assert_eq!(filtered[0].name, "documentation");
1376        assert_eq!(filtered[1].name, "bug");
1377        assert_eq!(filtered[2].name, "other");
1378    }
1379
1380    #[test]
1381    fn filter_labels_case_insensitive() {
1382        let labels = vec![
1383            crate::ai::types::RepoLabel {
1384                name: "Bug".to_string(),
1385                color: "ff0000".to_string(),
1386                description: "Bug".to_string(),
1387            },
1388            crate::ai::types::RepoLabel {
1389                name: "ENHANCEMENT".to_string(),
1390                color: "a2eeef".to_string(),
1391                description: "Enhancement".to_string(),
1392            },
1393        ];
1394        let filtered = filter_labels_by_relevance(&labels, 30);
1395        assert_eq!(filtered.len(), 2);
1396        assert_eq!(filtered[0].name, "Bug");
1397        assert_eq!(filtered[1].name, "ENHANCEMENT");
1398    }
1399
1400    #[test]
1401    fn filter_labels_over_limit_with_priorities() {
1402        let mut labels = vec![];
1403        for i in 0..20 {
1404            labels.push(crate::ai::types::RepoLabel {
1405                name: format!("label{i}"),
1406                color: "cccccc".to_string(),
1407                description: format!("Label {i}"),
1408            });
1409        }
1410        labels.push(crate::ai::types::RepoLabel {
1411            name: "bug".to_string(),
1412            color: "ff0000".to_string(),
1413            description: "Bug".to_string(),
1414        });
1415        labels.push(crate::ai::types::RepoLabel {
1416            name: "enhancement".to_string(),
1417            color: "a2eeef".to_string(),
1418            description: "Enhancement".to_string(),
1419        });
1420
1421        let filtered = filter_labels_by_relevance(&labels, 10);
1422        assert_eq!(filtered.len(), 10);
1423        assert_eq!(filtered[0].name, "bug");
1424        assert_eq!(filtered[1].name, "enhancement");
1425    }
1426}
1427
1428#[cfg(test)]
1429mod tests {
1430    use super::*;
1431
1432    // Smoke test to verify parse_issue_reference delegates correctly.
1433    // Comprehensive parsing tests are in github/mod.rs.
1434    #[test]
1435    fn parse_issue_reference_delegates_to_shared() {
1436        let (owner, repo, number) =
1437            parse_issue_reference("https://github.com/block/goose/issues/5836", None).unwrap();
1438        assert_eq!(owner, "block");
1439        assert_eq!(repo, "goose");
1440        assert_eq!(number, 5836);
1441    }
1442
1443    #[test]
1444    fn extract_keywords_filters_stop_words() {
1445        let title = "The issue is about a bug in the CLI";
1446        let keywords = extract_keywords(title);
1447        assert!(!keywords.contains(&"the".to_string()));
1448        assert!(!keywords.contains(&"is".to_string()));
1449        assert!(!keywords.contains(&"a".to_string()));
1450        assert!(keywords.contains(&"issue".to_string()));
1451        assert!(keywords.contains(&"bug".to_string()));
1452        assert!(keywords.contains(&"cli".to_string()));
1453    }
1454
1455    #[test]
1456    fn extract_keywords_limits_to_five() {
1457        let title = "one two three four five six seven eight nine ten";
1458        let keywords = extract_keywords(title);
1459        assert_eq!(keywords.len(), 5);
1460    }
1461
1462    #[test]
1463    fn extract_keywords_empty_title() {
1464        let title = "the a an and or";
1465        let keywords = extract_keywords(title);
1466        assert!(keywords.is_empty());
1467    }
1468
1469    #[test]
1470    fn extract_keywords_lowercase_conversion() {
1471        let title = "CLI Bug FIX";
1472        let keywords = extract_keywords(title);
1473        assert!(keywords.iter().all(|k| k.chars().all(char::is_lowercase)));
1474    }
1475}