Skip to main content

aptu_core/github/
instructions.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Repository instructions fetching for PR review context.
4//!
5//! Fetches AGENTS.md or .github/instructions/pr-review.md from a repository
6//! to inject as context into PR review prompts.
7
8use tracing::instrument;
9
10/// Fetches repository instructions for PR review context.
11///
12/// Attempts to fetch instructions from the repository in the following order:
13/// 1. If `override_path` is provided, fetch only from that path
14/// 2. Otherwise, try "AGENTS.md" then ".github/instructions/pr-review.md"
15///
16/// Returns `None` if:
17/// - Neither file exists (when no override)
18/// - File content is empty
19/// - Any error occurs during fetching
20///
21/// The returned content:
22/// - Has YAML frontmatter stripped (leading `---\n...---\n` block)
23/// - Is truncated to 1500 characters maximum
24///
25/// # Arguments
26///
27/// * `client` - Octocrab GitHub API client
28/// * `owner` - Repository owner
29/// * `repo` - Repository name
30/// * `head_sha` - Commit SHA to fetch from
31/// * `override_path` - Optional path to fetch instead of default paths
32#[cfg(not(target_arch = "wasm32"))]
33#[instrument(skip(client), fields(owner = %owner, repo = %repo, head_sha = %head_sha))]
34pub async fn fetch_repo_instructions(
35    client: &octocrab::Octocrab,
36    owner: &str,
37    repo: &str,
38    head_sha: &str,
39    override_path: Option<&str>,
40    max_chars: usize,
41) -> Option<String> {
42    let paths = if let Some(path) = override_path {
43        vec![path.to_string()]
44    } else {
45        vec![
46            "AGENTS.md".to_string(),
47            ".github/instructions/pr-review.md".to_string(),
48        ]
49    };
50
51    for path in paths {
52        match fetch_file_content(client, owner, repo, &path, head_sha).await {
53            Some(content) => {
54                if !content.is_empty() {
55                    let stripped = strip_yaml_frontmatter(&content);
56                    let truncated = truncate_to_chars(&stripped, max_chars);
57                    tracing::debug!(
58                        file = %path,
59                        chars = truncated.len(),
60                        "Fetched repo instructions"
61                    );
62                    return Some(truncated);
63                }
64            }
65            None => {
66                tracing::debug!(file = %path, "Instructions file not found or error fetching");
67            }
68        }
69    }
70
71    tracing::debug!("No instructions file found");
72    None
73}
74
75/// Fetches a single file's content from the repository.
76///
77/// Returns `None` on any error (404, decode failure, etc.).
78#[cfg(not(target_arch = "wasm32"))]
79async fn fetch_file_content(
80    client: &octocrab::Octocrab,
81    owner: &str,
82    repo: &str,
83    filename: &str,
84    head_sha: &str,
85) -> Option<String> {
86    match client
87        .repos(owner, repo)
88        .get_content()
89        .path(filename)
90        .r#ref(head_sha)
91        .send()
92        .await
93    {
94        Ok(content) => {
95            // Try to decode the first item (should be the file, not a directory listing)
96            if let Some(item) = content.items.first() {
97                if let Some(decoded) = item.decoded_content() {
98                    return Some(decoded);
99                }
100                tracing::debug!(
101                    path = filename,
102                    "failed to decode instructions file content"
103                );
104                return None;
105            }
106            None
107        }
108        Err(e) => {
109            tracing::debug!(error = %e, path = filename, "failed to fetch instructions file");
110            None
111        }
112    }
113}
114
115/// Strips YAML frontmatter from content.
116///
117/// If content starts with `---\n`, finds the closing `---\n` and removes that block.
118/// Handles both LF (\n) and CRLF (\r\n) line endings.
119/// Otherwise, returns content unchanged.
120fn strip_yaml_frontmatter(content: &str) -> String {
121    // Only strip if content begins with a frontmatter delimiter
122    let after_open = if let Some(rest) = content.strip_prefix("---\n") {
123        rest
124    } else if let Some(rest) = content.strip_prefix("---\r\n") {
125        rest
126    } else {
127        return content.to_string();
128    };
129
130    // Find closing delimiter; if absent, return content as-is (no frontmatter)
131    if let Some(end) = after_open.find("\n---\n") {
132        after_open[end + 5..].to_string()
133    } else if let Some(end) = after_open.find("\r\n---\r\n") {
134        after_open[end + 7..].to_string()
135    } else {
136        // No closing delimiter found; treat entire content as body
137        content.to_string()
138    }
139}
140
141/// Truncates content to a maximum number of characters.
142fn truncate_to_chars(content: &str, max_chars: usize) -> String {
143    content.chars().take(max_chars).collect::<String>()
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149
150    #[test]
151    fn test_strip_yaml_frontmatter_with_frontmatter() {
152        let content = "---\ntitle: Test\nauthor: Me\n---\nActual content here";
153        let result = strip_yaml_frontmatter(content);
154        assert_eq!(result, "Actual content here");
155    }
156
157    #[test]
158    fn test_strip_yaml_frontmatter_without_frontmatter() {
159        let content = "Just plain content";
160        let result = strip_yaml_frontmatter(content);
161        assert_eq!(result, "Just plain content");
162    }
163
164    #[test]
165    fn test_strip_yaml_frontmatter_no_closing() {
166        let content = "---\ntitle: Test\nNo closing marker";
167        let result = strip_yaml_frontmatter(content);
168        // If no closing delimiter found, treat entire content as body (no frontmatter)
169        assert_eq!(result, "---\ntitle: Test\nNo closing marker");
170    }
171
172    #[test]
173    fn test_truncate_to_chars() {
174        let content = "0123456789";
175        let result = truncate_to_chars(content, 5);
176        assert_eq!(result, "01234");
177    }
178
179    #[test]
180    fn test_truncate_to_chars_longer_than_max() {
181        let content = "short";
182        let result = truncate_to_chars(content, 100);
183        assert_eq!(result, "short");
184    }
185
186    #[test]
187    fn test_truncate_to_chars_unicode() {
188        let content = "hello 🌍 world";
189        let result = truncate_to_chars(content, 8);
190        assert_eq!(result, "hello 🌍 ");
191    }
192}