Skip to main content

aptu_core/github/
issues.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! GitHub issue operations for the triage command.
4//!
5//! Provides functionality to parse issue URLs, fetch issue details,
6//! and post triage comments.
7
8use anyhow::{Context, Result};
9#[cfg(not(target_arch = "wasm32"))]
10use backon::Retryable;
11#[cfg(not(target_arch = "wasm32"))]
12use octocrab::Octocrab;
13use serde::{Deserialize, Serialize};
14use tracing::{debug, instrument};
15
16use super::{ReferenceKind, parse_github_reference};
17use crate::ai::types::{IssueComment, IssueDetails, RepoIssueContext};
18#[cfg(not(target_arch = "wasm32"))]
19use crate::retry::retry_backoff;
20use crate::utils::is_priority_label;
21
22/// A GitHub issue without labels (untriaged).
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct UntriagedIssue {
25    /// Issue number.
26    pub number: u64,
27    /// Issue title.
28    pub title: String,
29    /// Creation timestamp (ISO 8601).
30    pub created_at: String,
31    /// Issue URL.
32    pub url: String,
33}
34
35/// A single entry in a Git tree response.
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct GitTreeEntry {
38    /// File path relative to repository root.
39    pub path: String,
40    /// Type of entry: "blob" (file) or "tree" (directory).
41    #[serde(rename = "type")]
42    pub type_: String,
43    /// File mode (e.g., "100644" for regular files).
44    pub mode: String,
45    /// SHA-1 hash of the entry.
46    pub sha: String,
47}
48
49/// Response from GitHub Git Trees API.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct GitTreeResponse {
52    /// List of entries in the tree.
53    pub tree: Vec<GitTreeEntry>,
54    /// Whether the tree is truncated (too many entries).
55    pub truncated: bool,
56}
57
58/// Parses an owner/repo string to extract owner and repo.
59///
60/// Validates format: exactly one `/`, non-empty parts.
61///
62/// # Errors
63///
64/// Returns an error if the format is invalid.
65pub fn parse_owner_repo(s: &str) -> Result<(String, String)> {
66    let parts: Vec<&str> = s.split('/').collect();
67    if parts.len() != 2 || parts[0].is_empty() || parts[1].is_empty() {
68        anyhow::bail!(
69            "Invalid owner/repo format.\n\
70             Expected: owner/repo\n\
71             Got: {s}"
72        );
73    }
74    Ok((parts[0].to_string(), parts[1].to_string()))
75}
76
77/// Parses a GitHub issue reference in multiple formats.
78///
79/// Supports:
80/// - Full URL: `https://github.com/owner/repo/issues/123`
81/// - Short form: `owner/repo#123`
82/// - Bare number: `123` (requires `repo_context`)
83///
84/// # Arguments
85///
86/// * `input` - The issue reference to parse
87/// * `repo_context` - Optional repository context for bare numbers (e.g., "owner/repo")
88///
89/// # Errors
90///
91/// Returns an error if the format is invalid or bare number is used without context.
92pub fn parse_issue_reference(
93    input: &str,
94    repo_context: Option<&str>,
95) -> Result<(String, String, u64)> {
96    parse_github_reference(ReferenceKind::Issue, input, repo_context)
97}
98
99/// Fetches issue details including comments from GitHub.
100///
101/// # Errors
102///
103/// Returns an error if the API request fails or the issue is not found.
104#[cfg(not(target_arch = "wasm32"))]
105#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
106pub async fn fetch_issue_with_comments(
107    client: &Octocrab,
108    owner: &str,
109    repo: &str,
110    number: u64,
111) -> Result<IssueDetails> {
112    debug!("Fetching issue details");
113
114    // Fetch the issue with retry logic
115    let issue = (|| async {
116        client
117            .issues(owner, repo)
118            .get(number)
119            .await
120            .map_err(|e| anyhow::anyhow!(e))
121    })
122    .retry(retry_backoff())
123    .notify(|err, dur| {
124        tracing::warn!(
125            error = %err,
126            retry_after = ?dur,
127            "Retrying fetch_issue_with_comments (issue fetch)"
128        );
129    })
130    .await
131    .with_context(|| format!("Failed to fetch issue #{number} from {owner}/{repo}"))?;
132
133    // Fetch comments (limited to first page) with retry logic
134    let comments_page = (|| async {
135        client
136            .issues(owner, repo)
137            .list_comments(number)
138            .per_page(5)
139            .send()
140            .await
141            .map_err(|e| anyhow::anyhow!(e))
142    })
143    .retry(retry_backoff())
144    .notify(|err, dur| {
145        tracing::warn!(
146            error = %err,
147            retry_after = ?dur,
148            "Retrying fetch_issue_with_comments (comments fetch)"
149        );
150    })
151    .await
152    .with_context(|| format!("Failed to fetch comments for issue #{number}"))?;
153
154    // Convert to our types
155    let labels: Vec<String> = issue.labels.iter().map(|l| l.name.clone()).collect();
156
157    let comments: Vec<IssueComment> = comments_page
158        .items
159        .iter()
160        .map(|c| IssueComment {
161            id: c.id.0,
162            author: c.user.login.clone(),
163            body: c.body.clone().unwrap_or_default(),
164        })
165        .collect();
166
167    let issue_url = issue.html_url.to_string();
168
169    let details = IssueDetails::builder()
170        .owner(owner.to_string())
171        .repo(repo.to_string())
172        .number(number)
173        .title(issue.title)
174        .body(issue.body.unwrap_or_default())
175        .labels(labels)
176        .comments(comments)
177        .url(issue_url)
178        .build();
179
180    debug!(
181        labels = details.labels.len(),
182        comments = details.comments.len(),
183        "Fetched issue details"
184    );
185
186    Ok(details)
187}
188
189/// Extracts significant keywords from an issue title for search.
190///
191/// Filters out common stop words and returns lowercase keywords.
192/// Extracts keywords from an issue title for relevance matching.
193///
194/// Filters out common stop words and limits to 5 keywords.
195/// Used for prioritizing relevant files in repository tree filtering.
196///
197/// # Arguments
198///
199/// * `title` - Issue title to extract keywords from
200///
201/// # Returns
202///
203/// Vector of lowercase keywords (max 5), excluding stop words.
204pub fn extract_keywords(title: &str) -> Vec<String> {
205    let stop_words = [
206        "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is",
207        "it", "its", "of", "on", "or", "that", "the", "to", "was", "will", "with",
208    ];
209
210    title
211        .to_lowercase()
212        .split(|c: char| !c.is_alphanumeric())
213        .filter(|word| !word.is_empty() && !stop_words.contains(word))
214        .take(5) // Limit to first 5 keywords
215        .map(std::string::ToString::to_string)
216        .collect()
217}
218
219/// Searches for related issues in a repository based on title keywords.
220///
221/// Extracts keywords from the issue title and searches the repository
222/// for matching issues. Returns up to 20 results, excluding the specified issue.
223///
224/// # Arguments
225///
226/// * `client` - Authenticated Octocrab client
227/// * `owner` - Repository owner
228/// * `repo` - Repository name
229/// * `title` - Issue title to extract keywords from
230/// * `exclude_number` - Issue number to exclude from results
231///
232/// # Errors
233///
234/// Returns an error if the search API request fails.
235#[cfg(not(target_arch = "wasm32"))]
236#[instrument(skip(client), fields(owner = %owner, repo = %repo, exclude_number = %exclude_number))]
237pub async fn search_related_issues(
238    client: &Octocrab,
239    owner: &str,
240    repo: &str,
241    title: &str,
242    exclude_number: u64,
243) -> Result<Vec<RepoIssueContext>> {
244    let keywords = extract_keywords(title);
245
246    if keywords.is_empty() {
247        debug!("No keywords extracted from title");
248        return Ok(Vec::new());
249    }
250
251    // Build search query: keyword1 keyword2 ... repo:owner/repo is:issue
252    let query = format!("{} repo:{}/{} is:issue", keywords.join(" "), owner, repo);
253
254    debug!(query = %query, "Searching for related issues");
255
256    // Search for issues with retry logic
257    let search_result = (|| async {
258        client
259            .search()
260            .issues_and_pull_requests(&query)
261            .per_page(20)
262            .send()
263            .await
264            .map_err(|e| anyhow::anyhow!(e))
265    })
266    .retry(retry_backoff())
267    .notify(|err, dur| {
268        tracing::warn!(
269            error = %err,
270            retry_after = ?dur,
271            "Retrying search_related_issues"
272        );
273    })
274    .await
275    .with_context(|| format!("Failed to search for related issues in {owner}/{repo}"))?;
276
277    // Convert to our context type
278    let related: Vec<RepoIssueContext> = search_result
279        .items
280        .iter()
281        .filter_map(|item| {
282            // Only include issues (not PRs)
283            if item.pull_request.is_some() {
284                return None;
285            }
286
287            // Exclude the issue being triaged
288            if item.number == exclude_number {
289                return None;
290            }
291
292            Some(RepoIssueContext {
293                number: item.number,
294                title: item.title.clone(),
295                labels: item.labels.iter().map(|l| l.name.clone()).collect(),
296                state: format!("{:?}", item.state).to_lowercase(),
297            })
298        })
299        .collect();
300
301    debug!(count = related.len(), "Found related issues");
302
303    Ok(related)
304}
305
306/// Posts a triage comment to a GitHub issue.
307///
308/// # Returns
309///
310/// The URL of the created comment.
311///
312/// # Errors
313///
314/// Returns an error if the API request fails.
315#[cfg(not(target_arch = "wasm32"))]
316#[instrument(skip(client, body), fields(owner = %owner, repo = %repo, number = number))]
317pub async fn post_comment(
318    client: &Octocrab,
319    owner: &str,
320    repo: &str,
321    number: u64,
322    body: &str,
323) -> Result<String> {
324    debug!("Posting triage comment");
325
326    let comment = client
327        .issues(owner, repo)
328        .create_comment(number, body)
329        .await
330        .with_context(|| format!("Failed to post comment to issue #{number}"))?;
331
332    let comment_url = comment.html_url.to_string();
333
334    debug!(url = %comment_url, "Comment posted successfully");
335
336    Ok(comment_url)
337}
338
339/// Deletes a comment from a GitHub issue.
340///
341/// # Errors
342///
343/// Returns an error if the API request fails. 404 errors (comment not found)
344/// are treated as success (idempotent).
345#[cfg(not(target_arch = "wasm32"))]
346#[instrument(skip(client), fields(owner = %owner, repo = %repo, comment_id = comment_id))]
347pub async fn delete_issue_comment(
348    client: &Octocrab,
349    owner: &str,
350    repo: &str,
351    comment_id: u64,
352) -> Result<()> {
353    debug!("Deleting issue comment");
354
355    let route = format!("/repos/{owner}/{repo}/issues/comments/{comment_id}");
356
357    // Use generic delete method; needs explicit empty object body type
358    let empty_body = serde_json::json!({});
359    let result: std::result::Result<serde_json::Value, _> =
360        client.delete(&route, Some(&empty_body)).await;
361
362    match result {
363        Ok(_) => {
364            debug!("Comment deleted successfully");
365            Ok(())
366        }
367        Err(e)
368            if let octocrab::Error::GitHub { source, .. } = &e
369                && source.status_code.as_u16() == 404 =>
370        {
371            debug!("Comment already deleted (404); treating as success");
372            Ok(())
373        }
374        Err(e) => Err(e).with_context(|| format!("Failed to delete comment #{comment_id}")),
375    }
376}
377
378/// Removes a label from a GitHub issue.
379///
380/// # Errors
381///
382/// Returns an error if the API request fails. 404 errors (label not found)
383/// are treated as success (idempotent).
384#[cfg(not(target_arch = "wasm32"))]
385#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number, label = label))]
386pub async fn remove_issue_label(
387    client: &Octocrab,
388    owner: &str,
389    repo: &str,
390    number: u64,
391    label: &str,
392) -> Result<()> {
393    debug!("Removing label from issue");
394
395    // URL-encode label name using percent-encoding (handle spaces, special chars)
396    let encoded_label =
397        percent_encoding::percent_encode(label.as_bytes(), percent_encoding::NON_ALPHANUMERIC)
398            .to_string();
399    let route = format!("/repos/{owner}/{repo}/issues/{number}/labels/{encoded_label}");
400
401    // Use generic delete method; needs explicit empty object body type
402    let empty_body = serde_json::json!({});
403    let result: std::result::Result<serde_json::Value, _> =
404        client.delete(&route, Some(&empty_body)).await;
405
406    match result {
407        Ok(_) => {
408            debug!("Label removed successfully");
409            Ok(())
410        }
411        Err(e)
412            if let octocrab::Error::GitHub { source, .. } = &e
413                && source.status_code.as_u16() == 404 =>
414        {
415            debug!("Label not found (404); treating as success");
416            Ok(())
417        }
418        Err(e) => {
419            Err(e).with_context(|| format!("Failed to remove label '{label}' from issue #{number}"))
420        }
421    }
422}
423
424/// Creates a new GitHub issue.
425///
426/// Posts a new issue with the given title and body to the repository.
427/// Returns the issue URL and issue number.
428///
429/// # Arguments
430///
431/// * `client` - Authenticated Octocrab client
432/// * `owner` - Repository owner
433/// * `repo` - Repository name
434/// * `title` - Issue title
435/// * `body` - Issue body (markdown)
436///
437/// # Errors
438///
439/// Returns an error if the GitHub API call fails.
440#[cfg(not(target_arch = "wasm32"))]
441#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
442pub async fn create_issue(
443    client: &Octocrab,
444    owner: &str,
445    repo: &str,
446    title: &str,
447    body: &str,
448) -> Result<(String, u64)> {
449    debug!("Creating GitHub issue");
450
451    let issue = Box::pin(client.issues(owner, repo).create(title).body(body).send())
452        .await
453        .with_context(|| format!("Failed to create issue in {owner}/{repo}"))?;
454
455    let issue_url = issue.html_url.to_string();
456    let issue_number = issue.number;
457
458    debug!(number = issue_number, url = %issue_url, "Issue created successfully");
459
460    Ok((issue_url, issue_number))
461}
462
463/// Result of applying labels and milestone to an issue.
464#[derive(Debug, Clone)]
465pub struct ApplyResult {
466    /// Labels that were successfully applied.
467    pub applied_labels: Vec<String>,
468    /// Milestone that was successfully applied, if any.
469    pub applied_milestone: Option<String>,
470    /// Warnings about labels or milestones that could not be applied.
471    pub warnings: Vec<String>,
472}
473
474/// Merges existing and suggested labels additively.
475/// Labels that should only be applied by maintainers, not by AI suggestions
476const MAINTAINER_ONLY_LABELS: &[&str] = &["good first issue", "help wanted"];
477
478///
479/// Implements additive label merging with priority label handling:
480/// - If existing labels contain a priority label (p[0-9]), skip AI-suggested priority labels
481/// - Merge remaining labels with case-insensitive deduplication
482/// - Preserve all existing labels
483///
484/// # Arguments
485///
486/// * `existing_labels` - Labels currently on the issue
487/// * `suggested_labels` - Labels suggested by AI
488///
489/// # Returns
490///
491/// Merged label list with duplicates removed (case-insensitive)
492fn merge_labels(existing_labels: &[String], suggested_labels: &[String]) -> Vec<String> {
493    // Check if existing labels contain a priority label
494    let has_priority = existing_labels.iter().any(|label| is_priority_label(label));
495
496    // Start with existing labels
497    let mut merged = existing_labels.to_vec();
498
499    // Add suggested labels, filtering out priority labels if existing has one
500    for suggested in suggested_labels {
501        // Skip priority labels if existing already has one
502        if is_priority_label(suggested) && has_priority {
503            continue;
504        }
505
506        // Skip maintainer-only labels
507        if MAINTAINER_ONLY_LABELS
508            .iter()
509            .any(|&m| m.eq_ignore_ascii_case(suggested))
510        {
511            continue;
512        }
513
514        // Add if not already present (case-insensitive check)
515        if !merged
516            .iter()
517            .any(|l| l.to_lowercase() == suggested.to_lowercase())
518        {
519            merged.push(suggested.clone());
520        }
521    }
522
523    merged
524}
525
526/// Updates an issue with labels and milestone.
527///
528/// Applies labels additively by merging existing and suggested labels.
529/// Validates suggestions against available options before applying.
530/// Returns what was actually applied and any warnings.
531///
532/// # Errors
533///
534/// Returns an error if the GitHub API call fails.
535#[cfg(not(target_arch = "wasm32"))]
536#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
537#[allow(clippy::too_many_arguments)]
538pub async fn update_issue_labels_and_milestone(
539    client: &Octocrab,
540    owner: &str,
541    repo: &str,
542    number: u64,
543    existing_labels: &[String],
544    suggested_labels: &[String],
545    existing_milestone: Option<&str>,
546    suggested_milestone: Option<&str>,
547    available_labels: &[crate::ai::types::RepoLabel],
548    available_milestones: &[crate::ai::types::RepoMilestone],
549) -> Result<ApplyResult> {
550    debug!("Updating issue with labels and milestone");
551
552    let mut warnings = Vec::new();
553
554    // Validate and collect labels
555    let available_label_names: std::collections::HashSet<_> =
556        available_labels.iter().map(|l| l.name.as_str()).collect();
557
558    // Validate suggested labels
559    let mut valid_suggested = Vec::new();
560    for label in suggested_labels {
561        if available_label_names.contains(label.as_str()) {
562            valid_suggested.push(label.clone());
563        } else {
564            warnings.push(format!("Label '{label}' not found in repository"));
565        }
566    }
567
568    // Merge existing and suggested labels additively
569    let applied_labels = merge_labels(existing_labels, &valid_suggested);
570
571    // Validate and find milestone (only set if issue has no existing milestone)
572    let applied_milestone = if existing_milestone.is_none() {
573        if let Some(milestone_title) = suggested_milestone {
574            if let Some(milestone) = available_milestones
575                .iter()
576                .find(|m| m.title == milestone_title)
577            {
578                Some(milestone.title.clone())
579            } else {
580                warnings.push(format!(
581                    "Milestone '{milestone_title}' not found in repository"
582                ));
583                None
584            }
585        } else {
586            None
587        }
588    } else {
589        None
590    };
591
592    // Apply updates to the issue
593    let issues_handler = client.issues(owner, repo);
594    let mut update_builder = issues_handler.update(number);
595
596    if !applied_labels.is_empty() {
597        update_builder = update_builder.labels(&applied_labels);
598    }
599
600    #[allow(clippy::collapsible_if)]
601    if let Some(milestone_title) = &applied_milestone {
602        if let Some(milestone) = available_milestones
603            .iter()
604            .find(|m| &m.title == milestone_title)
605        {
606            update_builder = update_builder.milestone(milestone.number);
607        }
608    }
609
610    update_builder
611        .send()
612        .await
613        .with_context(|| format!("Failed to update issue #{number}"))?;
614
615    debug!(
616        labels = ?applied_labels,
617        milestone = ?applied_milestone,
618        warnings = ?warnings,
619        "Issue updated successfully"
620    );
621
622    Ok(ApplyResult {
623        applied_labels,
624        applied_milestone,
625        warnings,
626    })
627}
628
629/// Apply labels to an issue or PR by number.
630///
631/// Simplified label-only application function for PRs (no milestone, no merge logic).
632/// Returns an error if the GitHub API call fails.
633#[cfg(not(target_arch = "wasm32"))]
634#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
635pub async fn apply_labels_to_number(
636    client: &Octocrab,
637    owner: &str,
638    repo: &str,
639    number: u64,
640    labels: &[String],
641) -> Result<Vec<String>> {
642    debug!("Applying labels to issue/PR");
643
644    if labels.is_empty() {
645        debug!("No labels to apply");
646        return Ok(Vec::new());
647    }
648
649    let route = format!("/repos/{owner}/{repo}/issues/{number}/labels");
650    let payload = serde_json::json!({ "labels": labels });
651
652    client
653        .post::<_, serde_json::Value>(route, Some(&payload))
654        .await
655        .with_context(|| {
656            format!(
657                "Failed to apply labels to issue/PR #{number} in {owner}/{repo}. \
658                     Check that you have write access to the repository."
659            )
660        })?;
661
662    debug!(labels = ?labels, "Labels applied successfully");
663
664    Ok(labels.to_vec())
665}
666
667/// Priority labels that should be included first in tiered filtering.
668/// These labels are most actionable for issue triage.
669const PRIORITY_LABELS: &[&str] = &[
670    "bug",
671    "enhancement",
672    "documentation",
673    "good first issue",
674    "help wanted",
675    "question",
676    "feature",
677    "fix",
678    "breaking",
679    "security",
680    "performance",
681    "breaking-change",
682];
683
684/// Filters labels using tiered selection: priority labels first, then remaining labels.
685///
686/// Implements two-tier filtering:
687/// - Tier 1: Priority labels (case-insensitive matching)
688/// - Tier 2: Remaining labels to fill up to `max_labels`
689///
690/// This ensures the AI sees the most actionable labels regardless of repository size.
691///
692/// # Arguments
693///
694/// * `labels` - List of available labels from the repository
695/// * `max_labels` - Maximum number of labels to return
696///
697/// # Returns
698///
699/// Filtered list of labels with priority labels first.
700#[must_use]
701pub fn filter_labels_by_relevance(
702    labels: &[crate::ai::types::RepoLabel],
703    max_labels: usize,
704) -> Vec<crate::ai::types::RepoLabel> {
705    if labels.is_empty() || max_labels == 0 {
706        return Vec::new();
707    }
708
709    let mut priority_labels = Vec::new();
710    let mut other_labels = Vec::new();
711
712    // Separate labels into priority and other
713    for label in labels {
714        let label_lower = label.name.to_lowercase();
715        let is_priority = PRIORITY_LABELS
716            .iter()
717            .any(|&p| label_lower == p.to_lowercase());
718
719        if is_priority {
720            priority_labels.push(label.clone());
721        } else {
722            other_labels.push(label.clone());
723        }
724    }
725
726    // Combine: priority labels first, then fill remaining slots with other labels
727    let mut result = priority_labels;
728    let remaining_slots = max_labels.saturating_sub(result.len());
729    result.extend(other_labels.into_iter().take(remaining_slots));
730
731    // Limit to max_labels
732    result.truncate(max_labels);
733    result
734}
735
736/// Patterns for directories/files to completely exclude from tree filtering.
737/// Based on GitHub Linguist vendor.yml and common build artifacts.
738const EXCLUDE_PATTERNS: &[&str] = &[
739    "node_modules/",
740    "vendor/",
741    "dist/",
742    "build/",
743    "target/",
744    ".git/",
745    "cache/",
746    "docs/",
747    "examples/",
748];
749
750/// Patterns for directories to deprioritize but not exclude.
751/// These contain test/benchmark code less relevant to issue triage.
752const DEPRIORITIZE_PATTERNS: &[&str] = &[
753    "test/",
754    "tests/",
755    "spec/",
756    "bench/",
757    "eval/",
758    "fixtures/",
759    "mocks/",
760];
761
762/// Returns language-specific entry point file patterns.
763/// These are prioritized as they often contain the main logic.
764fn entry_point_patterns(language: &str) -> Vec<&'static str> {
765    match language.to_lowercase().as_str() {
766        "rust" => vec!["lib.rs", "mod.rs", "main.rs"],
767        "python" => vec!["__init__.py"],
768        "javascript" | "typescript" => vec!["index.ts", "index.js"],
769        "java" => vec!["Main.java"],
770        "go" => vec!["main.go"],
771        "c#" | "csharp" => vec!["Program.cs"],
772        _ => vec![],
773    }
774}
775
776/// Maps programming languages to their common file extensions.
777fn get_extensions_for_language(language: &str) -> Vec<&'static str> {
778    match language.to_lowercase().as_str() {
779        "rust" => vec!["rs"],
780        "python" => vec!["py"],
781        "javascript" | "typescript" => vec!["js", "ts", "jsx", "tsx"],
782        "java" => vec!["java"],
783        "c" => vec!["c", "h"],
784        "c++" | "cpp" => vec!["cpp", "cc", "cxx", "h", "hpp"],
785        "c#" | "csharp" => vec!["cs"],
786        "go" => vec!["go"],
787        "ruby" => vec!["rb"],
788        "php" => vec!["php"],
789        "swift" => vec!["swift"],
790        "kotlin" => vec!["kt"],
791        "scala" => vec!["scala"],
792        "r" => vec!["r"],
793        "shell" | "bash" => vec!["sh", "bash"],
794        "html" => vec!["html", "htm"],
795        "css" => vec!["css", "scss", "sass"],
796        "json" => vec!["json"],
797        "yaml" | "yml" => vec!["yaml", "yml"],
798        "toml" => vec!["toml"],
799        "xml" => vec!["xml"],
800        "markdown" => vec!["md"],
801        _ => vec![],
802    }
803}
804
805/// Filters repository tree entries by relevance using tiered keyword matching.
806///
807/// Implements three-tier filtering:
808/// - Tier 1: Files matching keywords (max 35)
809/// - Tier 2: Language entry points (max 10)
810/// - Tier 3: Other relevant files (max 15)
811///
812/// Removes common non-source directories and limits results to 60 paths.
813///
814/// # Arguments
815///
816/// * `entries` - Raw tree entries from GitHub API
817/// * `language` - Repository primary language for extension filtering
818/// * `keywords` - Optional keywords extracted from issue title for relevance matching
819///
820/// # Returns
821///
822/// Filtered and sorted list of file paths (max 60).
823fn filter_tree_by_relevance(
824    entries: &[GitTreeEntry],
825    language: &str,
826    keywords: &[String],
827) -> Vec<String> {
828    let extensions = get_extensions_for_language(language);
829    let entry_points = entry_point_patterns(language);
830
831    // Filter to valid source files
832    let candidates: Vec<String> = entries
833        .iter()
834        .filter(|entry| {
835            // Only include files (blobs), not directories
836            if entry.type_ != "blob" {
837                return false;
838            }
839
840            // Exclude paths containing excluded directories
841            if EXCLUDE_PATTERNS.iter().any(|dir| entry.path.contains(dir)) {
842                return false;
843            }
844
845            // Filter by extension if language is recognized
846            if extensions.is_empty() {
847                // If language not recognized, include all files
848                true
849            } else {
850                extensions.iter().any(|ext| entry.path.ends_with(ext))
851            }
852        })
853        .map(|e| e.path.clone())
854        .collect();
855
856    // Tier 1: Files matching keywords (max 35)
857    let mut tier1: Vec<String> = Vec::new();
858    let mut remaining: Vec<String> = Vec::new();
859
860    for path in candidates {
861        let path_lower = path.to_lowercase();
862        let matches_keyword = keywords.iter().any(|kw| path_lower.contains(kw));
863
864        if matches_keyword && tier1.len() < 35 {
865            tier1.push(path);
866        } else {
867            remaining.push(path);
868        }
869    }
870
871    // Tier 2: Entry point files (max 10)
872    let mut tier2: Vec<String> = Vec::new();
873    let mut tier3_candidates: Vec<String> = Vec::new();
874
875    for path in remaining {
876        let is_entry_point = entry_points.iter().any(|ep| path.ends_with(ep));
877        let is_deprioritized = DEPRIORITIZE_PATTERNS.iter().any(|dp| path.contains(dp));
878
879        if is_entry_point && tier2.len() < 10 {
880            tier2.push(path);
881        } else if !is_deprioritized {
882            tier3_candidates.push(path);
883        }
884    }
885
886    // Tier 3: Other relevant files (max 15)
887    let mut tier3: Vec<String> = tier3_candidates.into_iter().take(15).collect();
888
889    // Combine and sort by depth within each tier
890    let mut result = tier1;
891    result.append(&mut tier2);
892    result.append(&mut tier3);
893
894    // Sort by path depth (fewer slashes first), then alphabetically
895    result.sort_by(|a, b| {
896        let depth_a = a.matches('/').count();
897        let depth_b = b.matches('/').count();
898        if depth_a == depth_b {
899            a.cmp(b)
900        } else {
901            depth_a.cmp(&depth_b)
902        }
903    });
904
905    // Limit to 60 paths
906    result.truncate(60);
907    result
908}
909
910/// Fetches the repository file tree from GitHub.
911///
912/// Attempts to fetch from the default branch (main, then master).
913/// Returns filtered list of source file paths based on repository language and optional keywords.
914///
915/// # Arguments
916///
917/// * `client` - Authenticated Octocrab client
918/// * `owner` - Repository owner
919/// * `repo` - Repository name
920/// * `language` - Repository primary language for filtering
921/// * `keywords` - Optional keywords extracted from issue title for relevance matching
922///
923/// # Errors
924///
925/// Returns an error if the API request fails (but not if tree is unavailable).
926#[cfg(not(target_arch = "wasm32"))]
927#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
928pub async fn fetch_repo_tree(
929    client: &Octocrab,
930    owner: &str,
931    repo: &str,
932    language: &str,
933    keywords: &[String],
934) -> Result<Vec<String>> {
935    debug!("Fetching repository tree");
936
937    // Try main branch first, then master
938    let branches = ["main", "master"];
939    let mut tree_response: Option<GitTreeResponse> = None;
940
941    for branch in &branches {
942        let route = format!("/repos/{owner}/{repo}/git/trees/{branch}?recursive=1");
943        let result = (|| async {
944            client
945                .get::<GitTreeResponse, _, _>(&route, None::<&()>)
946                .await
947                .map_err(|e| anyhow::anyhow!(e))
948        })
949        .retry(retry_backoff())
950        .notify(|err, dur| {
951            tracing::warn!(
952                error = %err,
953                retry_after = ?dur,
954                branch = %branch,
955                "Retrying fetch_repo_tree"
956            );
957        })
958        .await;
959
960        match result {
961            Ok(response) => {
962                tree_response = Some(response);
963                debug!(branch = %branch, "Fetched tree from branch");
964                break;
965            }
966            Err(e) => {
967                debug!(branch = %branch, error = %e, "Failed to fetch tree from branch");
968            }
969        }
970    }
971
972    let response =
973        tree_response.context("Failed to fetch repository tree from main or master branch")?;
974
975    let filtered = filter_tree_by_relevance(&response.tree, language, keywords);
976    debug!(count = filtered.len(), "Filtered tree entries");
977
978    Ok(filtered)
979}
980
981/// Fetches issues needing triage from a specific repository.
982///
983/// In default mode (force=false), returns issues that are either unlabeled OR missing a milestone.
984/// In force mode (force=true), returns ALL open issues with no filtering.
985///
986/// # Arguments
987///
988/// * `client` - The Octocrab GitHub client
989/// * `owner` - Repository owner
990/// * `repo` - Repository name
991/// * `since` - Optional RFC3339 timestamp to filter issues created after this date (client-side filtering)
992/// * `force` - If true, return all issues in the specified state; if false, filter to unlabeled or milestone-missing issues
993/// * `state` - Issue state filter (Open, Closed, or All)
994///
995/// # Errors
996///
997/// Returns an error if the REST API request fails.
998#[cfg(not(target_arch = "wasm32"))]
999#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
1000pub async fn fetch_issues_needing_triage(
1001    client: &Octocrab,
1002    owner: &str,
1003    repo: &str,
1004    since: Option<&str>,
1005    force: bool,
1006    state: octocrab::params::State,
1007) -> Result<Vec<UntriagedIssue>> {
1008    debug!("Fetching issues needing triage");
1009
1010    let issues_page: octocrab::Page<octocrab::models::issues::Issue> = client
1011        .issues(owner, repo)
1012        .list()
1013        .state(state)
1014        .per_page(100)
1015        .send()
1016        .await
1017        .context("Failed to fetch issues from repository")?;
1018
1019    let total_issues = issues_page.items.len();
1020
1021    let mut issues_needing_triage: Vec<UntriagedIssue> = issues_page
1022        .items
1023        .into_iter()
1024        .filter(|issue| {
1025            if force {
1026                true
1027            } else {
1028                issue.labels.is_empty() || issue.milestone.is_none()
1029            }
1030        })
1031        .map(|issue| UntriagedIssue {
1032            number: issue.number,
1033            title: issue.title,
1034            created_at: issue.created_at.to_rfc3339(),
1035            url: issue.html_url.to_string(),
1036        })
1037        .collect();
1038
1039    if let Some(since_date) = since
1040        && let Ok(since_timestamp) = chrono::DateTime::parse_from_rfc3339(since_date)
1041    {
1042        issues_needing_triage.retain(|issue| {
1043            if let Ok(created_at) = chrono::DateTime::parse_from_rfc3339(&issue.created_at) {
1044                created_at >= since_timestamp
1045            } else {
1046                true
1047            }
1048        });
1049    }
1050
1051    debug!(
1052        total_issues = total_issues,
1053        issues_needing_triage_count = issues_needing_triage.len(),
1054        "Fetched issues needing triage"
1055    );
1056
1057    Ok(issues_needing_triage)
1058}
1059
1060#[cfg(test)]
1061mod fetch_issues_needing_triage_tests {
1062    #[test]
1063    fn filter_logic_unlabeled_default_mode() {
1064        let labels_empty = true;
1065        let milestone_none = true;
1066        let force = false;
1067
1068        let passes = if force {
1069            true
1070        } else {
1071            labels_empty || milestone_none
1072        };
1073
1074        assert!(passes);
1075    }
1076
1077    #[test]
1078    fn filter_logic_labeled_default_mode() {
1079        let labels_empty = false;
1080        let milestone_none = true;
1081        let force = false;
1082
1083        let passes = if force {
1084            true
1085        } else {
1086            labels_empty || milestone_none
1087        };
1088
1089        assert!(passes);
1090    }
1091
1092    #[test]
1093    fn filter_logic_missing_milestone_default_mode() {
1094        let labels_empty = false;
1095        let milestone_none = true;
1096        let force = false;
1097
1098        let passes = if force {
1099            true
1100        } else {
1101            labels_empty || milestone_none
1102        };
1103
1104        assert!(passes);
1105    }
1106
1107    #[test]
1108    fn filter_logic_force_mode_returns_all() {
1109        let labels_empty = false;
1110        let milestone_none = false;
1111        let force = true;
1112
1113        let passes = if force {
1114            true
1115        } else {
1116            labels_empty || milestone_none
1117        };
1118
1119        assert!(passes);
1120    }
1121
1122    #[test]
1123    fn filter_logic_fully_triaged_default_mode_excluded() {
1124        let labels_empty = false;
1125        let milestone_none = false;
1126        let force = false;
1127
1128        let passes = if force {
1129            true
1130        } else {
1131            labels_empty || milestone_none
1132        };
1133
1134        assert!(!passes);
1135    }
1136}
1137
1138#[cfg(test)]
1139mod tree_tests {
1140    use super::*;
1141
1142    #[test]
1143    fn filter_tree_by_relevance_keyword_matching() {
1144        let entries = vec![
1145            GitTreeEntry {
1146                path: "src/parser.rs".to_string(),
1147                type_: "blob".to_string(),
1148                mode: "100644".to_string(),
1149                sha: "abc123".to_string(),
1150            },
1151            GitTreeEntry {
1152                path: "src/main.rs".to_string(),
1153                type_: "blob".to_string(),
1154                mode: "100644".to_string(),
1155                sha: "def456".to_string(),
1156            },
1157            GitTreeEntry {
1158                path: "src/utils.rs".to_string(),
1159                type_: "blob".to_string(),
1160                mode: "100644".to_string(),
1161                sha: "ghi789".to_string(),
1162            },
1163        ];
1164
1165        let keywords = vec!["parser".to_string()];
1166        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1167        assert!(filtered.contains(&"src/parser.rs".to_string()));
1168    }
1169
1170    #[test]
1171    fn filter_tree_by_relevance_entry_points() {
1172        let entries = vec![
1173            GitTreeEntry {
1174                path: "src/lib.rs".to_string(),
1175                type_: "blob".to_string(),
1176                mode: "100644".to_string(),
1177                sha: "abc123".to_string(),
1178            },
1179            GitTreeEntry {
1180                path: "src/utils.rs".to_string(),
1181                type_: "blob".to_string(),
1182                mode: "100644".to_string(),
1183                sha: "def456".to_string(),
1184            },
1185        ];
1186
1187        let keywords = vec![];
1188        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1189        assert!(filtered.contains(&"src/lib.rs".to_string()));
1190    }
1191
1192    #[test]
1193    fn filter_tree_by_relevance_excludes_tests() {
1194        let entries = vec![
1195            GitTreeEntry {
1196                path: "src/main.rs".to_string(),
1197                type_: "blob".to_string(),
1198                mode: "100644".to_string(),
1199                sha: "abc123".to_string(),
1200            },
1201            GitTreeEntry {
1202                path: "tests/integration_test.rs".to_string(),
1203                type_: "blob".to_string(),
1204                mode: "100644".to_string(),
1205                sha: "def456".to_string(),
1206            },
1207        ];
1208
1209        let keywords = vec![];
1210        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
1211        assert!(!filtered.contains(&"tests/integration_test.rs".to_string()));
1212        assert!(filtered.contains(&"src/main.rs".to_string()));
1213    }
1214
1215    #[test]
1216    fn get_extensions_for_language_rust() {
1217        let exts = get_extensions_for_language("rust");
1218        assert_eq!(exts, vec!["rs"]);
1219    }
1220
1221    #[test]
1222    fn get_extensions_for_language_javascript() {
1223        let exts = get_extensions_for_language("javascript");
1224        assert!(exts.contains(&"js"));
1225        assert!(exts.contains(&"ts"));
1226        assert!(exts.contains(&"jsx"));
1227        assert!(exts.contains(&"tsx"));
1228    }
1229
1230    #[test]
1231    fn get_extensions_for_language_unknown() {
1232        let exts = get_extensions_for_language("unknown_language");
1233        assert!(exts.is_empty());
1234    }
1235}
1236
1237#[cfg(test)]
1238mod merge_labels_tests {
1239    use super::*;
1240
1241    #[test]
1242    fn preserves_existing_and_adds_new() {
1243        let existing = vec!["bug".to_string(), "enhancement".to_string()];
1244        let suggested = vec!["documentation".to_string()];
1245        let merged = merge_labels(&existing, &suggested);
1246        assert_eq!(merged.len(), 3);
1247        assert!(merged.contains(&"bug".to_string()));
1248        assert!(merged.contains(&"enhancement".to_string()));
1249        assert!(merged.contains(&"documentation".to_string()));
1250    }
1251
1252    #[test]
1253    fn deduplicates_case_insensitive() {
1254        let existing = vec!["Bug".to_string()];
1255        let suggested = vec!["bug".to_string(), "enhancement".to_string()];
1256        let merged = merge_labels(&existing, &suggested);
1257        assert_eq!(merged.len(), 2);
1258        assert!(merged.contains(&"Bug".to_string()));
1259        assert!(merged.contains(&"enhancement".to_string()));
1260    }
1261
1262    #[test]
1263    fn skips_priority_when_existing_has_one() {
1264        // P1 (uppercase) exists, p2 suggested - should keep P1, skip p2, add bug
1265        let existing = vec!["P1".to_string()];
1266        let suggested = vec!["p2".to_string(), "bug".to_string()];
1267        let merged = merge_labels(&existing, &suggested);
1268        assert_eq!(merged.len(), 2);
1269        assert!(merged.contains(&"P1".to_string()));
1270        assert!(merged.contains(&"bug".to_string()));
1271        assert!(!merged.contains(&"p2".to_string()));
1272    }
1273
1274    #[test]
1275    fn handles_empty_inputs() {
1276        // Empty existing: suggested labels pass through
1277        let merged = merge_labels(&[], &["bug".to_string(), "p1".to_string()]);
1278        assert_eq!(merged.len(), 2);
1279
1280        // Empty suggested: existing labels preserved
1281        let merged = merge_labels(&["bug".to_string()], &[]);
1282        assert_eq!(merged.len(), 1);
1283        assert!(merged.contains(&"bug".to_string()));
1284    }
1285
1286    #[test]
1287    fn filters_maintainer_only_labels() {
1288        let existing = vec![];
1289        let suggested = vec![
1290            "good first issue".to_string(),
1291            "help wanted".to_string(),
1292            "bug".to_string(),
1293        ];
1294        let merged = merge_labels(&existing, &suggested);
1295        assert_eq!(merged.len(), 1);
1296        assert!(merged.contains(&"bug".to_string()));
1297        assert!(!merged.contains(&"good first issue".to_string()));
1298        assert!(!merged.contains(&"help wanted".to_string()));
1299    }
1300
1301    #[test]
1302    fn filters_maintainer_only_case_insensitive() {
1303        let existing = vec![];
1304        let suggested = vec![
1305            "Good First Issue".to_string(),
1306            "HELP WANTED".to_string(),
1307            "enhancement".to_string(),
1308        ];
1309        let merged = merge_labels(&existing, &suggested);
1310        assert_eq!(merged.len(), 1);
1311        assert!(merged.contains(&"enhancement".to_string()));
1312        assert!(!merged.contains(&"Good First Issue".to_string()));
1313        assert!(!merged.contains(&"HELP WANTED".to_string()));
1314    }
1315
1316    #[test]
1317    fn skips_priority_prefix_when_existing_has_one() {
1318        // priority: high exists, priority: medium suggested - should keep priority: high, skip priority: medium, add bug
1319        let existing = vec!["priority: high".to_string()];
1320        let suggested = vec!["priority: medium".to_string(), "bug".to_string()];
1321        let merged = merge_labels(&existing, &suggested);
1322        assert_eq!(merged.len(), 2);
1323        assert!(merged.contains(&"priority: high".to_string()));
1324        assert!(merged.contains(&"bug".to_string()));
1325        assert!(!merged.contains(&"priority: medium".to_string()));
1326    }
1327
1328    #[test]
1329    fn skips_mixed_priority_formats_when_existing_has_one() {
1330        // p1 exists, priority: high suggested - should keep p1, skip priority: high, add bug
1331        let existing = vec!["p1".to_string()];
1332        let suggested = vec!["priority: high".to_string(), "bug".to_string()];
1333        let merged = merge_labels(&existing, &suggested);
1334        assert_eq!(merged.len(), 2);
1335        assert!(merged.contains(&"p1".to_string()));
1336        assert!(merged.contains(&"bug".to_string()));
1337        assert!(!merged.contains(&"priority: high".to_string()));
1338    }
1339}
1340
1341#[cfg(test)]
1342mod label_tests {
1343    use super::*;
1344
1345    #[test]
1346    fn filter_labels_empty_input() {
1347        let labels = vec![];
1348        let filtered = filter_labels_by_relevance(&labels, 30);
1349        assert!(filtered.is_empty());
1350    }
1351
1352    #[test]
1353    fn filter_labels_zero_max() {
1354        let labels = vec![crate::ai::types::RepoLabel {
1355            name: "bug".to_string(),
1356            color: "ff0000".to_string(),
1357            description: "Bug report".to_string(),
1358        }];
1359        let filtered = filter_labels_by_relevance(&labels, 0);
1360        assert!(filtered.is_empty());
1361    }
1362
1363    #[test]
1364    fn filter_labels_priority_first() {
1365        let labels = vec![
1366            crate::ai::types::RepoLabel {
1367                name: "documentation".to_string(),
1368                color: "0075ca".to_string(),
1369                description: "Documentation".to_string(),
1370            },
1371            crate::ai::types::RepoLabel {
1372                name: "other".to_string(),
1373                color: "cccccc".to_string(),
1374                description: "Other".to_string(),
1375            },
1376            crate::ai::types::RepoLabel {
1377                name: "bug".to_string(),
1378                color: "ff0000".to_string(),
1379                description: "Bug".to_string(),
1380            },
1381        ];
1382        let filtered = filter_labels_by_relevance(&labels, 30);
1383        assert_eq!(filtered.len(), 3);
1384        assert_eq!(filtered[0].name, "documentation");
1385        assert_eq!(filtered[1].name, "bug");
1386        assert_eq!(filtered[2].name, "other");
1387    }
1388
1389    #[test]
1390    fn filter_labels_case_insensitive() {
1391        let labels = vec![
1392            crate::ai::types::RepoLabel {
1393                name: "Bug".to_string(),
1394                color: "ff0000".to_string(),
1395                description: "Bug".to_string(),
1396            },
1397            crate::ai::types::RepoLabel {
1398                name: "ENHANCEMENT".to_string(),
1399                color: "a2eeef".to_string(),
1400                description: "Enhancement".to_string(),
1401            },
1402        ];
1403        let filtered = filter_labels_by_relevance(&labels, 30);
1404        assert_eq!(filtered.len(), 2);
1405        assert_eq!(filtered[0].name, "Bug");
1406        assert_eq!(filtered[1].name, "ENHANCEMENT");
1407    }
1408
1409    #[test]
1410    fn filter_labels_over_limit_with_priorities() {
1411        let mut labels = vec![];
1412        for i in 0..20 {
1413            labels.push(crate::ai::types::RepoLabel {
1414                name: format!("label{i}"),
1415                color: "cccccc".to_string(),
1416                description: format!("Label {i}"),
1417            });
1418        }
1419        labels.push(crate::ai::types::RepoLabel {
1420            name: "bug".to_string(),
1421            color: "ff0000".to_string(),
1422            description: "Bug".to_string(),
1423        });
1424        labels.push(crate::ai::types::RepoLabel {
1425            name: "enhancement".to_string(),
1426            color: "a2eeef".to_string(),
1427            description: "Enhancement".to_string(),
1428        });
1429
1430        let filtered = filter_labels_by_relevance(&labels, 10);
1431        assert_eq!(filtered.len(), 10);
1432        assert_eq!(filtered[0].name, "bug");
1433        assert_eq!(filtered[1].name, "enhancement");
1434    }
1435}
1436
1437#[cfg(test)]
1438mod tests {
1439    use super::*;
1440
1441    // Smoke test to verify parse_issue_reference delegates correctly.
1442    // Comprehensive parsing tests are in github/mod.rs.
1443    #[test]
1444    fn parse_issue_reference_delegates_to_shared() {
1445        let (owner, repo, number) =
1446            parse_issue_reference("https://github.com/block/goose/issues/5836", None).unwrap();
1447        assert_eq!(owner, "block");
1448        assert_eq!(repo, "goose");
1449        assert_eq!(number, 5836);
1450    }
1451
1452    #[test]
1453    fn extract_keywords_filters_stop_words() {
1454        let title = "The issue is about a bug in the CLI";
1455        let keywords = extract_keywords(title);
1456        assert!(!keywords.contains(&"the".to_string()));
1457        assert!(!keywords.contains(&"is".to_string()));
1458        assert!(!keywords.contains(&"a".to_string()));
1459        assert!(keywords.contains(&"issue".to_string()));
1460        assert!(keywords.contains(&"bug".to_string()));
1461        assert!(keywords.contains(&"cli".to_string()));
1462    }
1463
1464    #[test]
1465    fn extract_keywords_limits_to_five() {
1466        let title = "one two three four five six seven eight nine ten";
1467        let keywords = extract_keywords(title);
1468        assert_eq!(keywords.len(), 5);
1469    }
1470
1471    #[test]
1472    fn extract_keywords_empty_title() {
1473        let title = "the a an and or";
1474        let keywords = extract_keywords(title);
1475        assert!(keywords.is_empty());
1476    }
1477
1478    #[test]
1479    fn extract_keywords_lowercase_conversion() {
1480        let title = "CLI Bug FIX";
1481        let keywords = extract_keywords(title);
1482        assert!(keywords.iter().all(|k| k.chars().all(char::is_lowercase)));
1483    }
1484}