aptu_core/github/
issues.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! GitHub issue operations for the triage command.
4//!
5//! Provides functionality to parse issue URLs, fetch issue details,
6//! and post triage comments.
7
8use anyhow::{Context, Result};
9use octocrab::Octocrab;
10use serde::{Deserialize, Serialize};
11use tracing::{debug, instrument};
12
13use crate::ai::types::{IssueComment, IssueDetails, RepoIssueContext};
14
15/// A GitHub issue without labels (untriaged).
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct UntriagedIssue {
18    /// Issue number.
19    pub number: u64,
20    /// Issue title.
21    pub title: String,
22    /// Creation timestamp (ISO 8601).
23    pub created_at: String,
24    /// Issue URL.
25    pub url: String,
26}
27
28/// A single entry in a Git tree response.
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct GitTreeEntry {
31    /// File path relative to repository root.
32    pub path: String,
33    /// Type of entry: "blob" (file) or "tree" (directory).
34    #[serde(rename = "type")]
35    pub type_: String,
36    /// File mode (e.g., "100644" for regular files).
37    pub mode: String,
38    /// SHA-1 hash of the entry.
39    pub sha: String,
40}
41
42/// Response from GitHub Git Trees API.
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct GitTreeResponse {
45    /// List of entries in the tree.
46    pub tree: Vec<GitTreeEntry>,
47    /// Whether the tree is truncated (too many entries).
48    pub truncated: bool,
49}
50
51/// Parses an owner/repo string to extract owner and repo.
52///
53/// Validates format: exactly one `/`, non-empty parts.
54///
55/// # Errors
56///
57/// Returns an error if the format is invalid.
58pub fn parse_owner_repo(s: &str) -> Result<(String, String)> {
59    let parts: Vec<&str> = s.split('/').collect();
60    if parts.len() != 2 || parts[0].is_empty() || parts[1].is_empty() {
61        anyhow::bail!(
62            "Invalid owner/repo format.\n\
63             Expected: owner/repo\n\
64             Got: {s}"
65        );
66    }
67    Ok((parts[0].to_string(), parts[1].to_string()))
68}
69
70/// Parses a GitHub issue reference in multiple formats.
71///
72/// Supports:
73/// - Full URL: `https://github.com/owner/repo/issues/123`
74/// - Short form: `owner/repo#123`
75/// - Bare number: `123` (requires `repo_context`)
76///
77/// # Arguments
78///
79/// * `input` - The issue reference to parse
80/// * `repo_context` - Optional repository context for bare numbers (e.g., "owner/repo")
81///
82/// # Errors
83///
84/// Returns an error if the format is invalid or bare number is used without context.
85pub fn parse_issue_reference(
86    input: &str,
87    repo_context: Option<&str>,
88) -> Result<(String, String, u64)> {
89    let input = input.trim();
90
91    // Try full URL first
92    if input.starts_with("https://") || input.starts_with("http://") {
93        // Remove trailing fragments and query params
94        let clean_url = input.split('#').next().unwrap_or(input);
95        let clean_url = clean_url.split('?').next().unwrap_or(clean_url);
96
97        // Parse the URL path
98        let parts: Vec<&str> = clean_url.trim_end_matches('/').split('/').collect();
99
100        // Expected: ["https:", "", "github.com", "owner", "repo", "issues", "123"]
101        if parts.len() < 7 {
102            anyhow::bail!(
103                "Invalid GitHub issue URL format.\n\
104                 Expected: https://github.com/owner/repo/issues/123\n\
105                 Got: {input}"
106            );
107        }
108
109        // Verify it's a github.com URL
110        if !parts[2].contains("github.com") {
111            anyhow::bail!(
112                "URL must be a GitHub issue URL.\n\
113                 Expected: https://github.com/owner/repo/issues/123\n\
114                 Got: {input}"
115            );
116        }
117
118        // Verify it's an issues path
119        if parts[5] != "issues" {
120            anyhow::bail!(
121                "URL must point to a GitHub issue.\n\
122                 Expected: https://github.com/owner/repo/issues/123\n\
123                 Got: {input}"
124            );
125        }
126
127        let owner = parts[3].to_string();
128        let repo = parts[4].to_string();
129        let number: u64 = parts[6].parse().with_context(|| {
130            format!(
131                "Invalid issue number '{}' in URL.\n\
132                 Expected a numeric issue number.",
133                parts[6]
134            )
135        })?;
136
137        debug!(owner = %owner, repo = %repo, number = number, "Parsed issue URL");
138        return Ok((owner, repo, number));
139    }
140
141    // Try short form: owner/repo#123
142    if let Some(hash_pos) = input.find('#') {
143        let owner_repo_part = &input[..hash_pos];
144        let number_part = &input[hash_pos + 1..];
145
146        let (owner, repo) = parse_owner_repo(owner_repo_part)?;
147        let number: u64 = number_part.parse().with_context(|| {
148            format!(
149                "Invalid issue number '{number_part}' in short form.\n\
150                 Expected: owner/repo#123\n\
151                 Got: {input}"
152            )
153        })?;
154
155        debug!(owner = %owner, repo = %repo, number = number, "Parsed short-form issue reference");
156        return Ok((owner, repo, number));
157    }
158
159    // Try bare number: 123 (requires repo_context)
160    if let Ok(number) = input.parse::<u64>() {
161        let repo_context = repo_context.ok_or_else(|| {
162            anyhow::anyhow!(
163                "Bare issue number requires repository context.\n\
164                 Use one of:\n\
165                 - Full URL: https://github.com/owner/repo/issues/123\n\
166                 - Short form: owner/repo#123\n\
167                 - Bare number with --repo flag: 123 --repo owner/repo\n\
168                 Got: {input}"
169            )
170        })?;
171
172        let (owner, repo) = parse_owner_repo(repo_context)?;
173        debug!(owner = %owner, repo = %repo, number = number, "Parsed bare issue number");
174        return Ok((owner, repo, number));
175    }
176
177    // If we get here, it's an invalid format
178    anyhow::bail!(
179        "Invalid issue reference format.\n\
180         Expected one of:\n\
181         - Full URL: https://github.com/owner/repo/issues/123\n\
182         - Short form: owner/repo#123\n\
183         - Bare number with --repo flag: 123 --repo owner/repo\n\
184         Got: {input}"
185    );
186}
187
188/// Fetches issue details including comments from GitHub.
189///
190/// # Errors
191///
192/// Returns an error if the API request fails or the issue is not found.
193#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
194pub async fn fetch_issue_with_comments(
195    client: &Octocrab,
196    owner: &str,
197    repo: &str,
198    number: u64,
199) -> Result<IssueDetails> {
200    debug!("Fetching issue details");
201
202    // Fetch the issue
203    let issue = client
204        .issues(owner, repo)
205        .get(number)
206        .await
207        .with_context(|| format!("Failed to fetch issue #{number} from {owner}/{repo}"))?;
208
209    // Fetch comments (limited to first page)
210    let comments_page = client
211        .issues(owner, repo)
212        .list_comments(number)
213        .per_page(5)
214        .send()
215        .await
216        .with_context(|| format!("Failed to fetch comments for issue #{number}"))?;
217
218    // Convert to our types
219    let labels: Vec<String> = issue.labels.iter().map(|l| l.name.clone()).collect();
220
221    let comments: Vec<IssueComment> = comments_page
222        .items
223        .iter()
224        .map(|c| IssueComment {
225            author: c.user.login.clone(),
226            body: c.body.clone().unwrap_or_default(),
227        })
228        .collect();
229
230    let issue_url = issue.html_url.to_string();
231
232    let details = IssueDetails {
233        owner: owner.to_string(),
234        repo: repo.to_string(),
235        number,
236        title: issue.title,
237        body: issue.body.unwrap_or_default(),
238        labels,
239        comments,
240        url: issue_url,
241        repo_context: Vec::new(),
242        repo_tree: Vec::new(),
243        available_labels: Vec::new(),
244        available_milestones: Vec::new(),
245        viewer_permission: None,
246    };
247
248    debug!(
249        labels = details.labels.len(),
250        comments = details.comments.len(),
251        "Fetched issue details"
252    );
253
254    Ok(details)
255}
256
257/// Extracts significant keywords from an issue title for search.
258///
259/// Filters out common stop words and returns lowercase keywords.
260/// Extracts keywords from an issue title for relevance matching.
261///
262/// Filters out common stop words and limits to 5 keywords.
263/// Used for prioritizing relevant files in repository tree filtering.
264///
265/// # Arguments
266///
267/// * `title` - Issue title to extract keywords from
268///
269/// # Returns
270///
271/// Vector of lowercase keywords (max 5), excluding stop words.
272pub fn extract_keywords(title: &str) -> Vec<String> {
273    let stop_words = [
274        "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is",
275        "it", "its", "of", "on", "or", "that", "the", "to", "was", "will", "with",
276    ];
277
278    title
279        .to_lowercase()
280        .split(|c: char| !c.is_alphanumeric())
281        .filter(|word| !word.is_empty() && !stop_words.contains(word))
282        .take(5) // Limit to first 5 keywords
283        .map(std::string::ToString::to_string)
284        .collect()
285}
286
287/// Searches for related issues in a repository based on title keywords.
288///
289/// Extracts keywords from the issue title and searches the repository
290/// for matching issues. Returns up to 20 results, excluding the specified issue.
291///
292/// # Arguments
293///
294/// * `client` - Authenticated Octocrab client
295/// * `owner` - Repository owner
296/// * `repo` - Repository name
297/// * `title` - Issue title to extract keywords from
298/// * `exclude_number` - Issue number to exclude from results
299///
300/// # Errors
301///
302/// Returns an error if the search API request fails.
303#[instrument(skip(client), fields(owner = %owner, repo = %repo, exclude_number = %exclude_number))]
304pub async fn search_related_issues(
305    client: &Octocrab,
306    owner: &str,
307    repo: &str,
308    title: &str,
309    exclude_number: u64,
310) -> Result<Vec<RepoIssueContext>> {
311    let keywords = extract_keywords(title);
312
313    if keywords.is_empty() {
314        debug!("No keywords extracted from title");
315        return Ok(Vec::new());
316    }
317
318    // Build search query: keyword1 keyword2 ... repo:owner/repo is:issue
319    let query = format!("{} repo:{}/{} is:issue", keywords.join(" "), owner, repo);
320
321    debug!(query = %query, "Searching for related issues");
322
323    // Search for issues
324    let search_result = client
325        .search()
326        .issues_and_pull_requests(&query)
327        .per_page(20)
328        .send()
329        .await
330        .with_context(|| format!("Failed to search for related issues in {owner}/{repo}"))?;
331
332    // Convert to our context type
333    let related: Vec<RepoIssueContext> = search_result
334        .items
335        .iter()
336        .filter_map(|item| {
337            // Only include issues (not PRs)
338            if item.pull_request.is_some() {
339                return None;
340            }
341
342            // Exclude the issue being triaged
343            if item.number == exclude_number {
344                return None;
345            }
346
347            Some(RepoIssueContext {
348                number: item.number,
349                title: item.title.clone(),
350                labels: item.labels.iter().map(|l| l.name.clone()).collect(),
351                state: format!("{:?}", item.state).to_lowercase(),
352            })
353        })
354        .collect();
355
356    debug!(count = related.len(), "Found related issues");
357
358    Ok(related)
359}
360
361/// Posts a triage comment to a GitHub issue.
362///
363/// # Returns
364///
365/// The URL of the created comment.
366///
367/// # Errors
368///
369/// Returns an error if the API request fails.
370#[instrument(skip(client, body), fields(owner = %owner, repo = %repo, number = number))]
371pub async fn post_comment(
372    client: &Octocrab,
373    owner: &str,
374    repo: &str,
375    number: u64,
376    body: &str,
377) -> Result<String> {
378    debug!("Posting triage comment");
379
380    let comment = client
381        .issues(owner, repo)
382        .create_comment(number, body)
383        .await
384        .with_context(|| format!("Failed to post comment to issue #{number}"))?;
385
386    let comment_url = comment.html_url.to_string();
387
388    debug!(url = %comment_url, "Comment posted successfully");
389
390    Ok(comment_url)
391}
392
393/// Creates a new GitHub issue.
394///
395/// Posts a new issue with the given title and body to the repository.
396/// Returns the issue URL and issue number.
397///
398/// # Arguments
399///
400/// * `client` - Authenticated Octocrab client
401/// * `owner` - Repository owner
402/// * `repo` - Repository name
403/// * `title` - Issue title
404/// * `body` - Issue body (markdown)
405///
406/// # Errors
407///
408/// Returns an error if the GitHub API call fails.
409#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
410pub async fn create_issue(
411    client: &Octocrab,
412    owner: &str,
413    repo: &str,
414    title: &str,
415    body: &str,
416) -> Result<(String, u64)> {
417    debug!("Creating GitHub issue");
418
419    let issue = client
420        .issues(owner, repo)
421        .create(title)
422        .body(body)
423        .send()
424        .await
425        .with_context(|| format!("Failed to create issue in {owner}/{repo}"))?;
426
427    let issue_url = issue.html_url.to_string();
428    let issue_number = issue.number;
429
430    debug!(number = issue_number, url = %issue_url, "Issue created successfully");
431
432    Ok((issue_url, issue_number))
433}
434
435/// Result of applying labels and milestone to an issue.
436#[derive(Debug, Clone)]
437pub struct ApplyResult {
438    /// Labels that were successfully applied.
439    pub applied_labels: Vec<String>,
440    /// Milestone that was successfully applied, if any.
441    pub applied_milestone: Option<String>,
442    /// Warnings about labels or milestones that could not be applied.
443    pub warnings: Vec<String>,
444}
445
446/// Updates an issue with labels and milestone.
447///
448/// Validates suggested labels and milestone against available options before applying.
449/// Returns what was actually applied and any warnings.
450///
451/// # Errors
452///
453/// Returns an error if the GitHub API call fails.
454#[instrument(skip(client), fields(owner = %owner, repo = %repo, number = number))]
455#[allow(clippy::too_many_arguments)]
456pub async fn update_issue_labels_and_milestone(
457    client: &Octocrab,
458    owner: &str,
459    repo: &str,
460    number: u64,
461    suggested_labels: &[String],
462    suggested_milestone: Option<&str>,
463    available_labels: &[crate::ai::types::RepoLabel],
464    available_milestones: &[crate::ai::types::RepoMilestone],
465) -> Result<ApplyResult> {
466    debug!("Updating issue with labels and milestone");
467
468    let mut applied_labels = Vec::new();
469    let mut warnings = Vec::new();
470
471    // Validate and collect labels
472    let available_label_names: std::collections::HashSet<_> =
473        available_labels.iter().map(|l| l.name.as_str()).collect();
474
475    for label in suggested_labels {
476        if available_label_names.contains(label.as_str()) {
477            applied_labels.push(label.clone());
478        } else {
479            warnings.push(format!("Label '{label}' not found in repository"));
480        }
481    }
482
483    // Validate and find milestone
484    let mut applied_milestone = None;
485    if let Some(milestone_title) = suggested_milestone {
486        if let Some(milestone) = available_milestones
487            .iter()
488            .find(|m| m.title == milestone_title)
489        {
490            applied_milestone = Some(milestone.title.clone());
491        } else {
492            warnings.push(format!(
493                "Milestone '{milestone_title}' not found in repository"
494            ));
495        }
496    }
497
498    // Apply updates to the issue
499    let issues_handler = client.issues(owner, repo);
500    let mut update_builder = issues_handler.update(number);
501
502    if !applied_labels.is_empty() {
503        update_builder = update_builder.labels(&applied_labels);
504    }
505
506    #[allow(clippy::collapsible_if)]
507    if let Some(milestone_title) = &applied_milestone {
508        if let Some(milestone) = available_milestones
509            .iter()
510            .find(|m| &m.title == milestone_title)
511        {
512            update_builder = update_builder.milestone(milestone.number);
513        }
514    }
515
516    update_builder
517        .send()
518        .await
519        .with_context(|| format!("Failed to update issue #{number}"))?;
520
521    debug!(
522        labels = ?applied_labels,
523        milestone = ?applied_milestone,
524        warnings = ?warnings,
525        "Issue updated successfully"
526    );
527
528    Ok(ApplyResult {
529        applied_labels,
530        applied_milestone,
531        warnings,
532    })
533}
534
535/// Patterns for directories/files to completely exclude from tree filtering.
536/// Based on GitHub Linguist vendor.yml and common build artifacts.
537const EXCLUDE_PATTERNS: &[&str] = &[
538    "node_modules/",
539    "vendor/",
540    "dist/",
541    "build/",
542    "target/",
543    ".git/",
544    "cache/",
545    "docs/",
546    "examples/",
547];
548
549/// Patterns for directories to deprioritize but not exclude.
550/// These contain test/benchmark code less relevant to issue triage.
551const DEPRIORITIZE_PATTERNS: &[&str] = &[
552    "test/",
553    "tests/",
554    "spec/",
555    "bench/",
556    "eval/",
557    "fixtures/",
558    "mocks/",
559];
560
561/// Returns language-specific entry point file patterns.
562/// These are prioritized as they often contain the main logic.
563fn entry_point_patterns(language: &str) -> Vec<&'static str> {
564    match language.to_lowercase().as_str() {
565        "rust" => vec!["lib.rs", "mod.rs", "main.rs"],
566        "python" => vec!["__init__.py"],
567        "javascript" | "typescript" => vec!["index.ts", "index.js"],
568        "java" => vec!["Main.java"],
569        "go" => vec!["main.go"],
570        "c#" | "csharp" => vec!["Program.cs"],
571        _ => vec![],
572    }
573}
574
575/// Maps programming languages to their common file extensions.
576fn get_extensions_for_language(language: &str) -> Vec<&'static str> {
577    match language.to_lowercase().as_str() {
578        "rust" => vec!["rs"],
579        "python" => vec!["py"],
580        "javascript" | "typescript" => vec!["js", "ts", "jsx", "tsx"],
581        "java" => vec!["java"],
582        "c" => vec!["c", "h"],
583        "c++" | "cpp" => vec!["cpp", "cc", "cxx", "h", "hpp"],
584        "c#" | "csharp" => vec!["cs"],
585        "go" => vec!["go"],
586        "ruby" => vec!["rb"],
587        "php" => vec!["php"],
588        "swift" => vec!["swift"],
589        "kotlin" => vec!["kt"],
590        "scala" => vec!["scala"],
591        "r" => vec!["r"],
592        "shell" | "bash" => vec!["sh", "bash"],
593        "html" => vec!["html", "htm"],
594        "css" => vec!["css", "scss", "sass"],
595        "json" => vec!["json"],
596        "yaml" | "yml" => vec!["yaml", "yml"],
597        "toml" => vec!["toml"],
598        "xml" => vec!["xml"],
599        "markdown" => vec!["md"],
600        _ => vec![],
601    }
602}
603
604/// Filters repository tree entries by language-specific extensions.
605///
606/// Removes common non-source directories and limits results to 50 paths.
607/// Prioritizes shallow paths (fewer `/` characters).
608/// This is a legacy function kept for backward compatibility with existing tests.
609///
610/// # Arguments
611///
612/// * `entries` - Raw tree entries from GitHub API
613/// * `language` - Repository primary language for extension filtering
614///
615/// # Returns
616///
617/// Filtered and sorted list of file paths (max 50).
618#[allow(dead_code)]
619fn filter_tree_by_language(entries: &[GitTreeEntry], language: &str) -> Vec<String> {
620    let extensions = get_extensions_for_language(language);
621    let exclude_dirs = [
622        "node_modules/",
623        "target/",
624        "dist/",
625        "build/",
626        ".git/",
627        "vendor/",
628        "test",
629        "spec",
630        "mock",
631        "fixture",
632    ];
633
634    let mut filtered: Vec<String> = entries
635        .iter()
636        .filter(|entry| {
637            // Only include files (blobs), not directories
638            if entry.type_ != "blob" {
639                return false;
640            }
641
642            // Exclude paths containing excluded directories
643            if exclude_dirs.iter().any(|dir| entry.path.contains(dir)) {
644                return false;
645            }
646
647            // Filter by extension if language is recognized
648            if extensions.is_empty() {
649                // If language not recognized, include all files
650                true
651            } else {
652                extensions.iter().any(|ext| entry.path.ends_with(ext))
653            }
654        })
655        .map(|e| e.path.clone())
656        .collect();
657
658    // Sort by path depth (fewer slashes first), then alphabetically
659    filtered.sort_by(|a, b| {
660        let depth_a = a.matches('/').count();
661        let depth_b = b.matches('/').count();
662        if depth_a == depth_b {
663            a.cmp(b)
664        } else {
665            depth_a.cmp(&depth_b)
666        }
667    });
668
669    // Limit to 50 paths
670    filtered.truncate(50);
671    filtered
672}
673
674/// Filters repository tree entries by relevance using tiered keyword matching.
675///
676/// Implements three-tier filtering:
677/// - Tier 1: Files matching keywords (max 35)
678/// - Tier 2: Language entry points (max 10)
679/// - Tier 3: Other relevant files (max 15)
680///
681/// Removes common non-source directories and limits results to 60 paths.
682///
683/// # Arguments
684///
685/// * `entries` - Raw tree entries from GitHub API
686/// * `language` - Repository primary language for extension filtering
687/// * `keywords` - Optional keywords extracted from issue title for relevance matching
688///
689/// # Returns
690///
691/// Filtered and sorted list of file paths (max 60).
692fn filter_tree_by_relevance(
693    entries: &[GitTreeEntry],
694    language: &str,
695    keywords: &[String],
696) -> Vec<String> {
697    let extensions = get_extensions_for_language(language);
698    let entry_points = entry_point_patterns(language);
699
700    // Filter to valid source files
701    let candidates: Vec<String> = entries
702        .iter()
703        .filter(|entry| {
704            // Only include files (blobs), not directories
705            if entry.type_ != "blob" {
706                return false;
707            }
708
709            // Exclude paths containing excluded directories
710            if EXCLUDE_PATTERNS.iter().any(|dir| entry.path.contains(dir)) {
711                return false;
712            }
713
714            // Filter by extension if language is recognized
715            if extensions.is_empty() {
716                // If language not recognized, include all files
717                true
718            } else {
719                extensions.iter().any(|ext| entry.path.ends_with(ext))
720            }
721        })
722        .map(|e| e.path.clone())
723        .collect();
724
725    // Tier 1: Files matching keywords (max 35)
726    let mut tier1: Vec<String> = Vec::new();
727    let mut remaining: Vec<String> = Vec::new();
728
729    for path in candidates {
730        let path_lower = path.to_lowercase();
731        let matches_keyword = keywords.iter().any(|kw| path_lower.contains(kw));
732
733        if matches_keyword && tier1.len() < 35 {
734            tier1.push(path);
735        } else {
736            remaining.push(path);
737        }
738    }
739
740    // Tier 2: Entry point files (max 10)
741    let mut tier2: Vec<String> = Vec::new();
742    let mut tier3_candidates: Vec<String> = Vec::new();
743
744    for path in remaining {
745        let is_entry_point = entry_points.iter().any(|ep| path.ends_with(ep));
746        let is_deprioritized = DEPRIORITIZE_PATTERNS.iter().any(|dp| path.contains(dp));
747
748        if is_entry_point && tier2.len() < 10 {
749            tier2.push(path);
750        } else if !is_deprioritized {
751            tier3_candidates.push(path);
752        }
753    }
754
755    // Tier 3: Other relevant files (max 15)
756    let mut tier3: Vec<String> = tier3_candidates.into_iter().take(15).collect();
757
758    // Combine and sort by depth within each tier
759    let mut result = tier1;
760    result.append(&mut tier2);
761    result.append(&mut tier3);
762
763    // Sort by path depth (fewer slashes first), then alphabetically
764    result.sort_by(|a, b| {
765        let depth_a = a.matches('/').count();
766        let depth_b = b.matches('/').count();
767        if depth_a == depth_b {
768            a.cmp(b)
769        } else {
770            depth_a.cmp(&depth_b)
771        }
772    });
773
774    // Limit to 60 paths
775    result.truncate(60);
776    result
777}
778
779/// Fetches the repository file tree from GitHub.
780///
781/// Attempts to fetch from the default branch (main, then master).
782/// Returns filtered list of source file paths based on repository language and optional keywords.
783///
784/// # Arguments
785///
786/// * `client` - Authenticated Octocrab client
787/// * `owner` - Repository owner
788/// * `repo` - Repository name
789/// * `language` - Repository primary language for filtering
790/// * `keywords` - Optional keywords extracted from issue title for relevance matching
791///
792/// # Errors
793///
794/// Returns an error if the API request fails (but not if tree is unavailable).
795#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
796pub async fn fetch_repo_tree(
797    client: &Octocrab,
798    owner: &str,
799    repo: &str,
800    language: &str,
801    keywords: &[String],
802) -> Result<Vec<String>> {
803    debug!("Fetching repository tree");
804
805    // Try main branch first, then master
806    let branches = ["main", "master"];
807    let mut tree_response: Option<GitTreeResponse> = None;
808
809    for branch in &branches {
810        let route = format!("/repos/{owner}/{repo}/git/trees/{branch}?recursive=1");
811        match client
812            .get::<GitTreeResponse, _, _>(&route, None::<&()>)
813            .await
814        {
815            Ok(response) => {
816                tree_response = Some(response);
817                debug!(branch = %branch, "Fetched tree from branch");
818                break;
819            }
820            Err(e) => {
821                debug!(branch = %branch, error = %e, "Failed to fetch tree from branch");
822            }
823        }
824    }
825
826    let response =
827        tree_response.context("Failed to fetch repository tree from main or master branch")?;
828
829    let filtered = filter_tree_by_relevance(&response.tree, language, keywords);
830    debug!(count = filtered.len(), "Filtered tree entries");
831
832    Ok(filtered)
833}
834
835/// Fetches untriaged issues (those without any labels) from a specific repository.
836///
837/// # Arguments
838///
839/// * `client` - The Octocrab GitHub client
840/// * `owner` - Repository owner
841/// * `repo` - Repository name
842/// * `since` - Optional RFC3339 timestamp to filter issues created after this date (client-side filtering)
843///
844/// # Errors
845///
846/// Returns an error if the REST API request fails.
847#[instrument(skip(client), fields(owner = %owner, repo = %repo))]
848pub async fn fetch_untriaged_issues(
849    client: &Octocrab,
850    owner: &str,
851    repo: &str,
852    since: Option<&str>,
853) -> Result<Vec<UntriagedIssue>> {
854    debug!("Fetching untriaged issues");
855
856    let issues_page: octocrab::Page<octocrab::models::issues::Issue> = client
857        .issues(owner, repo)
858        .list()
859        .state(octocrab::params::State::Open)
860        .per_page(100)
861        .send()
862        .await
863        .context("Failed to fetch issues from repository")?;
864
865    let total_issues = issues_page.items.len();
866
867    let mut untriaged: Vec<UntriagedIssue> = issues_page
868        .items
869        .into_iter()
870        .filter(|issue| issue.labels.is_empty())
871        .map(|issue| UntriagedIssue {
872            number: issue.number,
873            title: issue.title,
874            created_at: issue.created_at.to_rfc3339(),
875            url: issue.html_url.to_string(),
876        })
877        .collect();
878
879    if let Some(since_date) = since
880        && let Ok(since_timestamp) = chrono::DateTime::parse_from_rfc3339(since_date)
881    {
882        untriaged.retain(|issue| {
883            if let Ok(created_at) = chrono::DateTime::parse_from_rfc3339(&issue.created_at) {
884                created_at >= since_timestamp
885            } else {
886                true
887            }
888        });
889    }
890
891    debug!(
892        total_issues = total_issues,
893        untriaged_count = untriaged.len(),
894        "Fetched untriaged issues"
895    );
896
897    Ok(untriaged)
898}
899
900#[cfg(test)]
901mod tree_tests {
902    use super::*;
903
904    #[test]
905    fn filter_tree_by_relevance_keyword_matching() {
906        let entries = vec![
907            GitTreeEntry {
908                path: "src/parser.rs".to_string(),
909                type_: "blob".to_string(),
910                mode: "100644".to_string(),
911                sha: "abc123".to_string(),
912            },
913            GitTreeEntry {
914                path: "src/main.rs".to_string(),
915                type_: "blob".to_string(),
916                mode: "100644".to_string(),
917                sha: "def456".to_string(),
918            },
919            GitTreeEntry {
920                path: "src/utils.rs".to_string(),
921                type_: "blob".to_string(),
922                mode: "100644".to_string(),
923                sha: "ghi789".to_string(),
924            },
925        ];
926
927        let keywords = vec!["parser".to_string()];
928        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
929        assert!(filtered.contains(&"src/parser.rs".to_string()));
930    }
931
932    #[test]
933    fn filter_tree_by_relevance_entry_points() {
934        let entries = vec![
935            GitTreeEntry {
936                path: "src/lib.rs".to_string(),
937                type_: "blob".to_string(),
938                mode: "100644".to_string(),
939                sha: "abc123".to_string(),
940            },
941            GitTreeEntry {
942                path: "src/utils.rs".to_string(),
943                type_: "blob".to_string(),
944                mode: "100644".to_string(),
945                sha: "def456".to_string(),
946            },
947        ];
948
949        let keywords = vec![];
950        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
951        assert!(filtered.contains(&"src/lib.rs".to_string()));
952    }
953
954    #[test]
955    fn filter_tree_by_relevance_excludes_tests() {
956        let entries = vec![
957            GitTreeEntry {
958                path: "src/main.rs".to_string(),
959                type_: "blob".to_string(),
960                mode: "100644".to_string(),
961                sha: "abc123".to_string(),
962            },
963            GitTreeEntry {
964                path: "tests/integration_test.rs".to_string(),
965                type_: "blob".to_string(),
966                mode: "100644".to_string(),
967                sha: "def456".to_string(),
968            },
969        ];
970
971        let keywords = vec![];
972        let filtered = filter_tree_by_relevance(&entries, "rust", &keywords);
973        assert!(!filtered.contains(&"tests/integration_test.rs".to_string()));
974        assert!(filtered.contains(&"src/main.rs".to_string()));
975    }
976
977    #[test]
978    fn filter_tree_excludes_node_modules() {
979        let entries = vec![
980            GitTreeEntry {
981                path: "src/main.rs".to_string(),
982                type_: "blob".to_string(),
983                mode: "100644".to_string(),
984                sha: "abc123".to_string(),
985            },
986            GitTreeEntry {
987                path: "node_modules/package/index.js".to_string(),
988                type_: "blob".to_string(),
989                mode: "100644".to_string(),
990                sha: "def456".to_string(),
991            },
992        ];
993
994        let filtered = filter_tree_by_language(&entries, "rust");
995        assert_eq!(filtered.len(), 1);
996        assert_eq!(filtered[0], "src/main.rs");
997    }
998
999    #[test]
1000    fn filter_tree_excludes_directories() {
1001        let entries = vec![
1002            GitTreeEntry {
1003                path: "src/main.rs".to_string(),
1004                type_: "blob".to_string(),
1005                mode: "100644".to_string(),
1006                sha: "abc123".to_string(),
1007            },
1008            GitTreeEntry {
1009                path: "src/lib".to_string(),
1010                type_: "tree".to_string(),
1011                mode: "040000".to_string(),
1012                sha: "def456".to_string(),
1013            },
1014        ];
1015
1016        let filtered = filter_tree_by_language(&entries, "rust");
1017        assert_eq!(filtered.len(), 1);
1018        assert_eq!(filtered[0], "src/main.rs");
1019    }
1020
1021    #[test]
1022    fn filter_tree_sorts_by_depth() {
1023        let entries = vec![
1024            GitTreeEntry {
1025                path: "a/b/c/d.rs".to_string(),
1026                type_: "blob".to_string(),
1027                mode: "100644".to_string(),
1028                sha: "abc123".to_string(),
1029            },
1030            GitTreeEntry {
1031                path: "a/b.rs".to_string(),
1032                type_: "blob".to_string(),
1033                mode: "100644".to_string(),
1034                sha: "def456".to_string(),
1035            },
1036            GitTreeEntry {
1037                path: "main.rs".to_string(),
1038                type_: "blob".to_string(),
1039                mode: "100644".to_string(),
1040                sha: "ghi789".to_string(),
1041            },
1042        ];
1043
1044        let filtered = filter_tree_by_language(&entries, "rust");
1045        assert_eq!(filtered[0], "main.rs");
1046        assert_eq!(filtered[1], "a/b.rs");
1047        assert_eq!(filtered[2], "a/b/c/d.rs");
1048    }
1049
1050    #[test]
1051    fn filter_tree_limits_to_50() {
1052        let entries: Vec<GitTreeEntry> = (0..100)
1053            .map(|i| GitTreeEntry {
1054                path: format!("file{i}.rs"),
1055                type_: "blob".to_string(),
1056                mode: "100644".to_string(),
1057                sha: format!("sha{i}"),
1058            })
1059            .collect();
1060
1061        let filtered = filter_tree_by_language(&entries, "rust");
1062        assert_eq!(filtered.len(), 50);
1063    }
1064
1065    #[test]
1066    fn filter_tree_by_language_rust() {
1067        let entries = vec![
1068            GitTreeEntry {
1069                path: "src/main.rs".to_string(),
1070                type_: "blob".to_string(),
1071                mode: "100644".to_string(),
1072                sha: "abc123".to_string(),
1073            },
1074            GitTreeEntry {
1075                path: "src/lib.py".to_string(),
1076                type_: "blob".to_string(),
1077                mode: "100644".to_string(),
1078                sha: "def456".to_string(),
1079            },
1080        ];
1081
1082        let filtered = filter_tree_by_language(&entries, "rust");
1083        assert_eq!(filtered.len(), 1);
1084        assert_eq!(filtered[0], "src/main.rs");
1085    }
1086
1087    #[test]
1088    fn filter_tree_by_language_python() {
1089        let entries = vec![
1090            GitTreeEntry {
1091                path: "main.py".to_string(),
1092                type_: "blob".to_string(),
1093                mode: "100644".to_string(),
1094                sha: "abc123".to_string(),
1095            },
1096            GitTreeEntry {
1097                path: "lib.rs".to_string(),
1098                type_: "blob".to_string(),
1099                mode: "100644".to_string(),
1100                sha: "def456".to_string(),
1101            },
1102        ];
1103
1104        let filtered = filter_tree_by_language(&entries, "python");
1105        assert_eq!(filtered.len(), 1);
1106        assert_eq!(filtered[0], "main.py");
1107    }
1108
1109    #[test]
1110    fn get_extensions_for_language_rust() {
1111        let exts = get_extensions_for_language("rust");
1112        assert_eq!(exts, vec!["rs"]);
1113    }
1114
1115    #[test]
1116    fn get_extensions_for_language_javascript() {
1117        let exts = get_extensions_for_language("javascript");
1118        assert!(exts.contains(&"js"));
1119        assert!(exts.contains(&"ts"));
1120        assert!(exts.contains(&"jsx"));
1121        assert!(exts.contains(&"tsx"));
1122    }
1123
1124    #[test]
1125    fn get_extensions_for_language_unknown() {
1126        let exts = get_extensions_for_language("unknown_language");
1127        assert!(exts.is_empty());
1128    }
1129}
1130
1131#[cfg(test)]
1132mod tests {
1133    use super::*;
1134
1135    #[test]
1136    fn parse_reference_full_url() {
1137        let url = "https://github.com/block/goose/issues/5836";
1138        let (owner, repo, number) = parse_issue_reference(url, None).unwrap();
1139        assert_eq!(owner, "block");
1140        assert_eq!(repo, "goose");
1141        assert_eq!(number, 5836);
1142    }
1143
1144    #[test]
1145    fn parse_reference_short_form() {
1146        let reference = "block/goose#5836";
1147        let (owner, repo, number) = parse_issue_reference(reference, None).unwrap();
1148        assert_eq!(owner, "block");
1149        assert_eq!(repo, "goose");
1150        assert_eq!(number, 5836);
1151    }
1152
1153    #[test]
1154    fn parse_reference_short_form_with_context() {
1155        let reference = "block/goose#5836";
1156        let (owner, repo, number) =
1157            parse_issue_reference(reference, Some("astral-sh/ruff")).unwrap();
1158        assert_eq!(owner, "block");
1159        assert_eq!(repo, "goose");
1160        assert_eq!(number, 5836);
1161    }
1162
1163    #[test]
1164    fn parse_reference_bare_number_with_context() {
1165        let reference = "5836";
1166        let (owner, repo, number) = parse_issue_reference(reference, Some("block/goose")).unwrap();
1167        assert_eq!(owner, "block");
1168        assert_eq!(repo, "goose");
1169        assert_eq!(number, 5836);
1170    }
1171
1172    #[test]
1173    fn parse_reference_bare_number_without_context() {
1174        let reference = "5836";
1175        let result = parse_issue_reference(reference, None);
1176        assert!(result.is_err());
1177        assert!(
1178            result
1179                .unwrap_err()
1180                .to_string()
1181                .contains("Bare issue number requires repository context")
1182        );
1183    }
1184
1185    #[test]
1186    fn parse_reference_invalid_short_form_missing_slash() {
1187        let reference = "owner#123";
1188        let result = parse_issue_reference(reference, None);
1189        assert!(result.is_err());
1190        assert!(
1191            result
1192                .unwrap_err()
1193                .to_string()
1194                .contains("Invalid owner/repo format")
1195        );
1196    }
1197
1198    #[test]
1199    fn parse_reference_invalid_short_form_extra_slash() {
1200        let reference = "owner/repo/extra#123";
1201        let result = parse_issue_reference(reference, None);
1202        assert!(result.is_err());
1203        assert!(
1204            result
1205                .unwrap_err()
1206                .to_string()
1207                .contains("Invalid owner/repo format")
1208        );
1209    }
1210
1211    #[test]
1212    fn parse_reference_invalid_bare_number() {
1213        let reference = "abc";
1214        let result = parse_issue_reference(reference, Some("block/goose"));
1215        assert!(result.is_err());
1216        assert!(
1217            result
1218                .unwrap_err()
1219                .to_string()
1220                .contains("Invalid issue reference format")
1221        );
1222    }
1223
1224    #[test]
1225    fn parse_reference_whitespace_trimming() {
1226        let reference = "  block/goose#5836  ";
1227        let (owner, repo, number) = parse_issue_reference(reference, None).unwrap();
1228        assert_eq!(owner, "block");
1229        assert_eq!(repo, "goose");
1230        assert_eq!(number, 5836);
1231    }
1232
1233    #[test]
1234    fn parse_reference_bare_number_whitespace() {
1235        let reference = "  5836  ";
1236        let (owner, repo, number) = parse_issue_reference(reference, Some("block/goose")).unwrap();
1237        assert_eq!(owner, "block");
1238        assert_eq!(repo, "goose");
1239        assert_eq!(number, 5836);
1240    }
1241
1242    #[test]
1243    fn extract_keywords_filters_stop_words() {
1244        let title = "The issue is about a bug in the CLI";
1245        let keywords = extract_keywords(title);
1246        assert!(!keywords.contains(&"the".to_string()));
1247        assert!(!keywords.contains(&"is".to_string()));
1248        assert!(!keywords.contains(&"a".to_string()));
1249        assert!(keywords.contains(&"issue".to_string()));
1250        assert!(keywords.contains(&"bug".to_string()));
1251        assert!(keywords.contains(&"cli".to_string()));
1252    }
1253
1254    #[test]
1255    fn extract_keywords_limits_to_five() {
1256        let title = "one two three four five six seven eight nine ten";
1257        let keywords = extract_keywords(title);
1258        assert_eq!(keywords.len(), 5);
1259    }
1260
1261    #[test]
1262    fn extract_keywords_empty_title() {
1263        let title = "the a an and or";
1264        let keywords = extract_keywords(title);
1265        assert!(keywords.is_empty());
1266    }
1267
1268    #[test]
1269    fn extract_keywords_lowercase_conversion() {
1270        let title = "CLI Bug FIX";
1271        let keywords = extract_keywords(title);
1272        assert!(keywords.iter().all(|k| k.chars().all(char::is_lowercase)));
1273    }
1274}