Skip to main content

aptu_core/repos/
discovery.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Repository discovery via GitHub Search API.
4//!
5//! Searches GitHub for welcoming repositories using the REST Search API via Octocrab.
6//! Results are scored client-side based on stars, activity, and other signals.
7//! Supports caching with configurable TTL.
8
9use chrono::{Duration, Utc};
10use serde::{Deserialize, Serialize};
11use tracing::{debug, instrument};
12
13use crate::cache::FileCache;
14use crate::config::load_config;
15use crate::error::AptuError;
16use crate::github::auth::create_client_with_token;
17use secrecy::SecretString;
18
19/// A discovered repository from GitHub search.
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct DiscoveredRepo {
22    /// Repository owner (user or organization).
23    pub owner: String,
24    /// Repository name.
25    pub name: String,
26    /// Primary programming language.
27    pub language: Option<String>,
28    /// Short description.
29    pub description: Option<String>,
30    /// Number of stars.
31    pub stars: u32,
32    /// Repository URL.
33    pub url: String,
34    /// Relevance score (0-100).
35    pub score: u32,
36}
37
38impl DiscoveredRepo {
39    /// Returns the full repository name in "owner/name" format.
40    #[must_use]
41    pub fn full_name(&self) -> String {
42        format!("{}/{}", self.owner, self.name)
43    }
44}
45
46/// Filter for repository discovery.
47#[derive(Debug, Clone)]
48pub struct DiscoveryFilter {
49    /// Programming language to filter by (e.g., "Rust", "Python").
50    pub language: Option<String>,
51    /// Minimum number of stars.
52    pub min_stars: u32,
53    /// Maximum number of results to return.
54    pub limit: u32,
55}
56
57impl Default for DiscoveryFilter {
58    fn default() -> Self {
59        Self {
60            language: None,
61            min_stars: 10,
62            limit: 20,
63        }
64    }
65}
66
67/// Score a repository based on various signals.
68///
69/// Scoring factors:
70/// - Stars (0-50 points): logarithmic scale, capped at 50
71/// - Language match (0-30 points): exact match gets full points
72/// - Description presence (0-20 points): repositories with descriptions score higher
73///
74/// # Arguments
75///
76/// * `repo` - The repository to score
77/// * `filter` - The discovery filter (for language matching)
78///
79/// # Returns
80///
81/// A score from 0-100.
82#[must_use]
83pub fn score_repo(repo: &octocrab::models::Repository, filter: &DiscoveryFilter) -> u32 {
84    let mut score = 0u32;
85
86    // Stars: logarithmic scale (0-50 points)
87    let stars = f64::from(repo.stargazers_count.unwrap_or(0));
88    let star_score = if stars > 0.0 {
89        #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
90        let score_val = ((stars.ln() + 1.0) / 10.0 * 50.0).min(50.0) as u32;
91        score_val
92    } else {
93        0
94    };
95    score += star_score;
96
97    // Language match (0-30 points)
98    if let Some(ref filter_lang) = filter.language
99        && let Some(ref repo_lang) = repo.language
100        && let Some(lang_str) = repo_lang.as_str()
101        && lang_str.to_lowercase() == filter_lang.to_lowercase()
102    {
103        score += 30;
104    }
105
106    // Description presence (0-20 points)
107    if repo.description.is_some() && !repo.description.as_ref().unwrap().is_empty() {
108        score += 20;
109    }
110
111    score.min(100)
112}
113
114use std::fmt::Write as FmtWrite;
115
116/// Build a GitHub search query from filter parameters.
117///
118/// Constructs a query string suitable for GitHub's REST Search API.
119/// Includes filters for:
120/// - Good first issue labels
121/// - Help wanted labels
122/// - Active repositories (pushed in last 30 days)
123/// - Minimum stars
124/// - Language (if specified)
125///
126/// # Arguments
127///
128/// * `filter` - The discovery filter
129///
130/// # Returns
131///
132/// A GitHub search query string using repository search qualifiers.
133/// Searches for repositories with open good-first-issue labeled issues,
134/// pushed within the last 30 days, meeting minimum star count and language criteria.
135#[must_use]
136pub fn build_search_query(filter: &DiscoveryFilter) -> String {
137    let mut query = String::from("good-first-issues:>0");
138
139    // Calculate date 30 days ago from now
140    let thirty_days_ago = Utc::now() - Duration::days(30);
141    let date_str = thirty_days_ago.format("%Y-%m-%d").to_string();
142    let _ = write!(query, " pushed:>{date_str}");
143
144    let _ = write!(query, " stars:>={}", filter.min_stars);
145
146    if let Some(ref lang) = filter.language {
147        let _ = write!(query, " language:{lang}");
148    }
149
150    query
151}
152
153/// Search for repositories matching the discovery filter.
154///
155/// Uses GitHub's REST Search API via Octocrab to find repositories.
156/// Results are scored client-side and sorted by score descending.
157/// Supports caching with configurable TTL.
158///
159/// # Arguments
160///
161/// * `token` - GitHub API token
162/// * `filter` - Discovery filter (language, `min_stars`, limit)
163///
164/// # Returns
165///
166/// A vector of discovered repositories, sorted by score.
167///
168/// # Errors
169///
170/// Returns an error if:
171/// - GitHub API call fails
172/// - Response parsing fails
173#[instrument(skip(token), fields(language = ?filter.language, min_stars = filter.min_stars, limit = filter.limit))]
174pub async fn search_repositories(
175    token: &SecretString,
176    filter: &DiscoveryFilter,
177) -> crate::Result<Vec<DiscoveredRepo>> {
178    // Check cache first
179    let cache_key = format!(
180        "discovered_repos_{}_{}_{}",
181        filter.language.as_deref().unwrap_or("any"),
182        filter.min_stars,
183        filter.limit
184    );
185
186    let config = load_config()?;
187    let ttl = Duration::hours(config.cache.repo_ttl_hours);
188
189    let cache: crate::cache::FileCacheImpl<Vec<DiscoveredRepo>> =
190        crate::cache::FileCacheImpl::new("discovery", ttl);
191    if let Ok(Some(repos)) = cache.get(&cache_key) {
192        debug!("Using cached discovered repositories");
193        return Ok(repos);
194    }
195
196    // Create GitHub client
197    let client = create_client_with_token(token).map_err(|e| AptuError::GitHub {
198        message: format!("Failed to create GitHub client: {e}"),
199    })?;
200
201    // Build search query
202    let query = build_search_query(filter);
203    debug!("Searching with query: {}", query);
204
205    // Execute search with retry logic
206    let repos = client
207        .search()
208        .repositories(&query)
209        .per_page(100)
210        .send()
211        .await
212        .map_err(|e| AptuError::GitHub {
213            message: format!("Failed to search repositories: {e}"),
214        })?;
215
216    // Score and sort results
217    let mut discovered: Vec<DiscoveredRepo> = repos
218        .items
219        .into_iter()
220        .filter_map(|repo| {
221            let score = score_repo(&repo, filter);
222            let url = repo.html_url.as_ref().map(ToString::to_string)?;
223            let language = repo
224                .language
225                .as_ref()
226                .and_then(|v| v.as_str())
227                .map(ToString::to_string);
228
229            Some(DiscoveredRepo {
230                owner: repo
231                    .owner
232                    .as_ref()
233                    .map(|o| o.login.clone())
234                    .unwrap_or_default(),
235                name: repo.name.clone(),
236                language,
237                description: repo.description.clone(),
238                stars: repo.stargazers_count.unwrap_or(0),
239                url,
240                score,
241            })
242        })
243        .collect();
244
245    // Sort by score descending, then by stars descending
246    discovered.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| b.stars.cmp(&a.stars)));
247
248    // Limit results
249    discovered.truncate(filter.limit as usize);
250
251    // Cache the results
252    let _ = cache.set(&cache_key, &discovered);
253
254    debug!(
255        "Found and cached {} discovered repositories",
256        discovered.len()
257    );
258    Ok(discovered)
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264
265    #[test]
266    fn build_search_query_basic() {
267        let filter = DiscoveryFilter {
268            language: None,
269            min_stars: 10,
270            limit: 20,
271        };
272
273        let query = build_search_query(&filter);
274        assert!(query.contains("good-first-issues:>0"));
275        assert!(query.contains("pushed:>"));
276        assert!(query.contains("stars:>=10"));
277        assert!(!query.contains("language:"));
278    }
279
280    #[test]
281    fn build_search_query_with_language() {
282        let filter = DiscoveryFilter {
283            language: Some("Rust".to_string()),
284            min_stars: 50,
285            limit: 10,
286        };
287
288        let query = build_search_query(&filter);
289        assert!(query.contains("good-first-issues:>0"));
290        assert!(query.contains("language:Rust"));
291        assert!(query.contains("stars:>=50"));
292    }
293
294    #[test]
295    fn discovered_repo_full_name() {
296        let repo = DiscoveredRepo {
297            owner: "owner".to_string(),
298            name: "repo".to_string(),
299            language: Some("Rust".to_string()),
300            description: Some("Test".to_string()),
301            stars: 100,
302            url: "https://github.com/owner/repo".to_string(),
303            score: 75,
304        };
305
306        assert_eq!(repo.full_name(), "owner/repo");
307    }
308}