Skip to main content

oxi_agent/tools/
github_search.rs

1use super::http_client::shared_http_client;
2use super::search_cache::{SearchCache, SearchResult};
3/// GitHub search tool — search GitHub repositories, issues, and code via the GitHub REST API.
4///
5/// Features:
6/// - Search repositories by topic, language, stars, etc.
7/// - Sort by stars, forks, or recently updated
8/// - Optional GitHub token for higher rate limits (via GITHUB_TOKEN env var)
9/// - Structured JSON results — no HTML scraping
10/// - Result caching with the shared SearchCache
11use super::{AgentTool, AgentToolResult, ToolContext, ToolError};
12use async_trait::async_trait;
13use serde::Deserialize;
14use serde_json::{json, Value};
15use std::sync::Arc;
16use tokio::sync::oneshot;
17
18/// Maximum results to return by default.
19const DEFAULT_MAX_RESULTS: usize = 10;
20
21/// Maximum results allowed (GitHub API max is 100 per page).
22const MAX_RESULTS: usize = 30;
23
24// ── GitHub API response types ─────────────────────────────────────
25
26/// Top-level GitHub search response.
27#[derive(Debug, Deserialize)]
28
29struct GitHubSearchResponse {
30    total_count: u64,
31    _incomplete_results: bool,
32    items: Vec<GitHubRepo>,
33}
34
35/// A single repository from GitHub search.
36#[derive(Debug, Deserialize)]
37struct GitHubRepo {
38    full_name: String,
39    html_url: String,
40    description: Option<String>,
41    language: Option<String>,
42    stargazers_count: u64,
43    forks_count: u64,
44    open_issues_count: u64,
45    updated_at: String,
46
47    _archived: bool,
48    topics: Vec<String>,
49    license: Option<GitHubLicense>,
50}
51
52#[derive(Debug, Deserialize)]
53struct GitHubLicense {
54    spdx_id: Option<String>,
55    name: Option<String>,
56}
57
58// ── GitHub search result (our public type) ────────────────────────
59
60/// A single GitHub repository result.
61#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
62pub struct GitHubSearchResult {
63    /// Full repo name (e.g. "owner/repo").
64    pub full_name: String,
65    /// Repository URL.
66    pub url: String,
67    /// Repository description.
68    pub description: String,
69    /// Primary programming language.
70    pub language: String,
71    /// Star count.
72    pub stars: u64,
73    /// Fork count.
74    pub forks: u64,
75    /// Open issues count.
76    pub open_issues: u64,
77    /// Last update timestamp.
78    pub updated_at: String,
79    /// Repository topics/tags.
80    pub topics: Vec<String>,
81    /// License name.
82    pub license: String,
83}
84
85impl From<&GitHubSearchResult> for SearchResult {
86    fn from(r: &GitHubSearchResult) -> Self {
87        SearchResult {
88            title: r.full_name.clone(),
89            url: r.url.clone(),
90            snippet: r.description.clone(),
91            engines: vec!["GitHub".to_string()],
92            score: r.stars as f64,
93        }
94    }
95}
96
97// ── API call ──────────────────────────────────────────────────────
98
99/// Resolve a GitHub API token from the environment.
100fn resolve_github_token() -> Option<String> {
101    // 1. GITHUB_SEARCH_TOKEN (explicit for this tool)
102    std::env::var("GITHUB_SEARCH_TOKEN")
103        .ok()
104        .or_else(|| std::env::var("GITHUB_TOKEN").ok())
105        .or_else(|| std::env::var("GH_TOKEN").ok())
106}
107
108/// Search GitHub repositories via the REST API.
109async fn search_github_repos(
110    query: &str,
111    sort: &str,
112    order: &str,
113    limit: usize,
114    language: Option<&str>,
115) -> Result<(u64, Vec<GitHubSearchResult>), ToolError> {
116    let mut url = format!(
117        "https://api.github.com/search/repositories?q={}&sort={}&order={}&per_page={}",
118        urlencoding(query),
119        sort,
120        order,
121        limit.min(MAX_RESULTS),
122    );
123
124    // Add language filter if specified
125    if let Some(lang) = language {
126        // Append language:xxx to the query
127        url = format!(
128            "https://api.github.com/search/repositories?q={}+language%3A{}&sort={}&order={}&per_page={}",
129            urlencoding(query),
130            urlencoding(lang),
131            sort,
132            order,
133            limit.min(MAX_RESULTS),
134        );
135    }
136
137    let mut builder = shared_http_client()
138        .get(&url)
139        .header("Accept", "application/vnd.github.v3+json")
140        .header("User-Agent", "oxi-agent");
141
142    // Attach token if available (raises rate limit from 10/min to 5000/hr)
143    if let Some(token) = resolve_github_token() {
144        builder = builder.header("Authorization", format!("Bearer {}", token));
145    }
146
147    let response = builder
148        .send()
149        .await
150        .map_err(|e| format!("GitHub API request failed: {}", e))?;
151
152    let status = response.status();
153    if status.as_u16() == 403 {
154        let body = response.text().await.unwrap_or_default();
155        return Err(format!(
156            "GitHub API rate limit exceeded. Set GITHUB_TOKEN env var for higher limits. Body: {}",
157            body.chars().take(200).collect::<String>()
158        ));
159    }
160    if !status.is_success() {
161        let body = response.text().await.unwrap_or_default();
162        return Err(format!(
163            "GitHub API returned status {}: {}",
164            status,
165            body.chars().take(300).collect::<String>()
166        ));
167    }
168
169    let search_response: GitHubSearchResponse = response
170        .json()
171        .await
172        .map_err(|e| format!("Failed to parse GitHub response: {}", e))?;
173
174    let results = search_response
175        .items
176        .into_iter()
177        .map(|repo| GitHubSearchResult {
178            full_name: repo.full_name,
179            url: repo.html_url,
180            description: repo.description.unwrap_or_default(),
181            language: repo.language.unwrap_or_default(),
182            stars: repo.stargazers_count,
183            forks: repo.forks_count,
184            open_issues: repo.open_issues_count,
185            updated_at: repo.updated_at,
186            topics: repo.topics,
187            license: repo
188                .license
189                .and_then(|l| l.spdx_id.or(l.name))
190                .unwrap_or_default(),
191        })
192        .collect();
193
194    Ok((search_response.total_count, results))
195}
196
197/// URL-encode a string for query parameters.
198fn urlencoding(s: &str) -> String {
199    let mut result = String::with_capacity(s.len() * 3);
200    for byte in s.bytes() {
201        match byte {
202            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
203                result.push(byte as char);
204            }
205            _ => {
206                result.push('%');
207                result.push_str(&format!("{:02X}", byte));
208            }
209        }
210    }
211    result
212}
213
214// ── Formatting ────────────────────────────────────────────────────
215
216/// Format GitHub search results for display.
217fn format_github_results(total: u64, results: &[GitHubSearchResult]) -> String {
218    if results.is_empty() {
219        return "No repositories found.".to_string();
220    }
221
222    let mut output = format!(
223        "Found {} repositories (showing {}):\n\n",
224        total,
225        results.len()
226    );
227
228    for (i, r) in results.iter().enumerate() {
229        let stars = if r.stars >= 1000 {
230            format!("{:.1}k", r.stars as f64 / 1000.0)
231        } else {
232            r.stars.to_string()
233        };
234
235        let desc = if r.description.chars().count() > 150 {
236            let truncated: String = r.description.chars().take(150).collect();
237            format!("{}...", truncated)
238        } else {
239            r.description.clone()
240        };
241
242        output.push_str(&format!(
243            "{}. **{}** ⭐{}\n   {}\n   {} {} | 🔀 {} forks | 📦 {} issues\n   Updated: {}\n",
244            i + 1,
245            r.full_name,
246            stars,
247            r.url,
248            desc,
249            if r.language.is_empty() {
250                "Unknown".to_string()
251            } else {
252                r.language.clone()
253            },
254            r.forks,
255            r.open_issues,
256            &r.updated_at[..10], // Just the date part
257        ));
258
259        if !r.topics.is_empty() {
260            output.push_str(&format!("   Topics: {}\n", r.topics.join(", ")));
261        }
262
263        if !r.license.is_empty() {
264            output.push_str(&format!("   License: {}\n", r.license));
265        }
266
267        output.push('\n');
268    }
269
270    output
271}
272
273// ── GitHubSearchTool ──────────────────────────────────────────────
274
275/// GitHub repository search tool using the GitHub REST API.
276pub struct GitHubSearchTool {
277    cache: Arc<SearchCache>,
278}
279
280impl GitHubSearchTool {
281    /// Create a new GitHubSearchTool with the given search cache.
282    pub fn new(cache: Arc<SearchCache>) -> Self {
283        Self { cache }
284    }
285}
286
287#[async_trait]
288impl AgentTool for GitHubSearchTool {
289    fn name(&self) -> &str {
290        "github_search"
291    }
292
293    fn label(&self) -> &str {
294        "GitHub Search"
295    }
296
297    fn description(&self) -> &str {
298        "Search GitHub repositories by query. Returns repos with stars, forks, language, description, and topics. Supports sorting by stars, forks, or recently updated. No API key required (set GITHUB_TOKEN for higher rate limits)."
299    }
300
301    fn parameters_schema(&self) -> Value {
302        json!({
303            "type": "object",
304            "properties": {
305                "query": {
306                    "type": "string",
307                    "description": "Search query (e.g. 'rust web framework', 'machine learning', 'owner:mariozechner')"
308                },
309                "sort": {
310                    "type": "string",
311                    "description": "Sort results by: 'stars' (default), 'forks', or 'updated'",
312                    "enum": ["stars", "forks", "updated"],
313                    "default": "stars"
314                },
315                "order": {
316                    "type": "string",
317                    "description": "Sort order: 'desc' (default) or 'asc'",
318                    "enum": ["desc", "asc"],
319                    "default": "desc"
320                },
321                "language": {
322                    "type": "string",
323                    "description": "Filter by programming language (e.g. 'rust', 'python', 'typescript')"
324                },
325                "limit": {
326                    "type": "integer",
327                    "description": "Maximum number of results to return (default: 10, max: 30)",
328                    "default": 10
329                }
330            },
331            "required": ["query"]
332        })
333    }
334
335    async fn execute(
336        &self,
337        _tool_call_id: &str,
338        params: Value,
339        _signal: Option<oneshot::Receiver<()>>,
340        _ctx: &ToolContext,
341    ) -> Result<AgentToolResult, ToolError> {
342        let query = params["query"]
343            .as_str()
344            .ok_or_else(|| "Missing required parameter: query".to_string())?;
345
346        let sort = params["sort"].as_str().unwrap_or("stars");
347        let sort = match sort {
348            "forks" | "updated" => sort,
349            _ => "stars",
350        };
351
352        let order = params["order"].as_str().unwrap_or("desc");
353        let order = match order {
354            "asc" => "asc",
355            _ => "desc",
356        };
357
358        let language = params["language"].as_str();
359
360        let limit = params["limit"]
361            .as_u64()
362            .unwrap_or(DEFAULT_MAX_RESULTS as u64)
363            .min(MAX_RESULTS as u64) as usize;
364
365        let (total, results) = search_github_repos(query, sort, order, limit, language).await?;
366
367        if results.is_empty() {
368            return Ok(AgentToolResult::success(format!(
369                "No GitHub repositories found for: {}",
370                query
371            )));
372        }
373
374        // Cache results
375        let search_id = self.cache.insert(
376            &format!("github:{}", query),
377            results.iter().map(|r| r.into()).collect(),
378        );
379
380        let output = format_github_results(total, &results);
381
382        let results_json: Vec<Value> = results
383            .iter()
384            .map(|r| {
385                json!({
386                    "full_name": r.full_name,
387                    "url": r.url,
388                    "description": r.description,
389                    "language": r.language,
390                    "stars": r.stars,
391                    "forks": r.forks,
392                    "open_issues": r.open_issues,
393                    "updated_at": r.updated_at,
394                    "topics": r.topics,
395                    "license": r.license
396                })
397            })
398            .collect();
399
400        Ok(AgentToolResult::success(output).with_metadata(json!({
401            "results": results_json,
402            "query": query,
403            "searchId": search_id,
404            "totalCount": total,
405            "resultCount": results.len()
406        })))
407    }
408}
409
410// ── Tests ─────────────────────────────────────────────────────────
411
412#[cfg(test)]
413mod tests {
414    use super::*;
415
416    #[test]
417    fn test_urlencoding() {
418        assert_eq!(urlencoding("hello world"), "hello%20world");
419        assert_eq!(urlencoding("rust&cargo"), "rust%26cargo");
420        assert_eq!(urlencoding("c++"), "c%2B%2B");
421    }
422
423    #[test]
424    fn test_format_github_results_empty() {
425        assert_eq!(format_github_results(0, &[]), "No repositories found.");
426    }
427
428    #[test]
429    fn test_format_github_results() {
430        let results = vec![GitHubSearchResult {
431            full_name: "rust-lang/rust".to_string(),
432            url: "https://github.com/rust-lang/rust".to_string(),
433            description: "Empowering everyone to build reliable and efficient software."
434                .to_string(),
435            language: "Rust".to_string(),
436            stars: 95000,
437            forks: 12000,
438            open_issues: 9000,
439            updated_at: "2026-05-08T12:00:00Z".to_string(),
440            topics: vec!["programming-language".to_string(), "systems".to_string()],
441            license: "MIT/Apache-2.0".to_string(),
442        }];
443        let formatted = format_github_results(1, &results);
444        assert!(formatted.contains("**rust-lang/rust**"));
445        assert!(formatted.contains("95.0k"));
446        assert!(formatted.contains("Rust"));
447        assert!(formatted.contains("Topics: programming-language, systems"));
448    }
449
450    #[test]
451    fn test_format_stars_under_1k() {
452        let results = vec![GitHubSearchResult {
453            full_name: "test/repo".to_string(),
454            url: "https://github.com/test/repo".to_string(),
455            description: "A test".to_string(),
456            language: "Python".to_string(),
457            stars: 500,
458            forks: 20,
459            open_issues: 3,
460            updated_at: "2026-05-01T00:00:00Z".to_string(),
461            topics: vec![],
462            license: String::new(),
463        }];
464        let formatted = format_github_results(1, &results);
465        assert!(formatted.contains("⭐500"));
466    }
467
468    #[test]
469    fn test_schema() {
470        let cache = Arc::new(SearchCache::new());
471        let tool = GitHubSearchTool::new(cache);
472        let schema = tool.parameters_schema();
473        assert_eq!(schema["type"], "object");
474        assert!(schema["properties"]["query"].is_object());
475        assert!(schema["properties"]["sort"].is_object());
476        assert!(schema["properties"]["language"].is_object());
477        assert!(schema["required"]
478            .as_array()
479            .unwrap()
480            .contains(&json!("query")));
481    }
482
483    #[tokio::test]
484    async fn test_github_search_live() {
485        // Integration test — requires network. Skip if offline.
486        let result = search_github_repos("rust web framework", "stars", "desc", 3, None).await;
487        if let Ok((total, results)) = result {
488            assert!(total > 0);
489            assert!(!results.is_empty());
490            assert!(results[0].stars > 0);
491            assert!(results[0].url.starts_with("https://github.com/"));
492        }
493    }
494}