github_rust/github/
search.rs

1use crate::github::client::GitHubClient;
2use crate::github::types::*;
3use crate::{config::*, error::*};
4use chrono::{Duration, Utc};
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8/// Validates and sanitizes a language parameter for GitHub search.
9/// Returns None if the language is invalid, Some(sanitized) otherwise.
10fn validate_language(language: &str) -> Option<String> {
11    let trimmed = language.trim();
12
13    // Empty language is invalid
14    if trimmed.is_empty() {
15        return None;
16    }
17
18    // Language should only contain alphanumeric, spaces, hyphens, plus, hash, and dots
19    // Examples: "C++", "C#", "F#", "Objective-C", "Visual Basic .NET"
20    let is_valid = trimmed
21        .chars()
22        .all(|c| c.is_alphanumeric() || c == ' ' || c == '-' || c == '+' || c == '#' || c == '.');
23
24    if !is_valid {
25        return None;
26    }
27
28    // Reject if it looks like a search operator injection attempt
29    let lower = trimmed.to_lowercase();
30    let suspicious_patterns = [
31        "repo:",
32        "user:",
33        "org:",
34        "in:",
35        "size:",
36        "fork:",
37        "stars:",
38        "pushed:",
39        "created:",
40        "updated:",
41        "language:",
42        "topic:",
43        "license:",
44        "is:",
45        "has:",
46        "good-first-issues:",
47        "help-wanted-issues:",
48        "archived:",
49        "mirror:",
50        "template:",
51        "sort:",
52        " or ",
53        " and ",
54        " not ",
55    ];
56
57    for pattern in suspicious_patterns {
58        if lower.contains(pattern) {
59            return None;
60        }
61    }
62
63    Some(trimmed.to_string())
64}
65
66/// Repository data from GitHub search API.
67///
68/// A lighter-weight repository struct returned by search operations,
69/// containing the most commonly needed fields.
70#[derive(Deserialize, Serialize, Clone, Default, Debug)]
71pub struct SearchRepository {
72    /// GitHub's internal ID for the repository
73    pub id: String,
74    /// Repository name (without owner)
75    pub name: String,
76    /// Full repository name in "owner/repo" format
77    #[serde(rename = "nameWithOwner")]
78    pub name_with_owner: String,
79    /// Repository description
80    pub description: Option<String>,
81    /// GitHub URL for the repository
82    pub url: String,
83    /// Number of stars
84    #[serde(rename = "stargazerCount")]
85    pub stargazer_count: u32,
86    /// Number of forks
87    #[serde(rename = "forkCount")]
88    pub fork_count: u32,
89    /// ISO 8601 timestamp when repository was created
90    #[serde(rename = "createdAt")]
91    pub created_at: String,
92    /// ISO 8601 timestamp of last update
93    #[serde(rename = "updatedAt")]
94    pub updated_at: String,
95    /// ISO 8601 timestamp of last push
96    #[serde(rename = "pushedAt")]
97    pub pushed_at: Option<String>,
98    /// Primary programming language
99    #[serde(rename = "primaryLanguage")]
100    pub primary_language: Option<Language>,
101    /// License information
102    #[serde(rename = "licenseInfo")]
103    pub license_info: Option<License>,
104    /// Repository topics/tags
105    #[serde(rename = "repositoryTopics")]
106    pub repository_topics: TopicConnection,
107}
108
109impl SearchRepository {
110    /// Returns the primary language name, or None if not set.
111    #[must_use]
112    pub fn language(&self) -> Option<&str> {
113        self.primary_language.as_ref().map(|l| l.name.as_str())
114    }
115
116    /// Returns the license name, or None if not set.
117    #[must_use]
118    pub fn license(&self) -> Option<&str> {
119        self.license_info.as_ref().map(|l| l.name.as_str())
120    }
121
122    /// Returns the SPDX license identifier, or None if not available.
123    #[must_use]
124    pub fn license_spdx(&self) -> Option<&str> {
125        self.license_info
126            .as_ref()
127            .and_then(|l| l.spdx_id.as_deref())
128    }
129
130    /// Returns a list of topic names.
131    #[must_use]
132    pub fn topics(&self) -> Vec<&str> {
133        self.repository_topics
134            .edges
135            .iter()
136            .map(|e| e.node.topic.name.as_str())
137            .collect()
138    }
139
140    /// Returns the owner part of name_with_owner.
141    #[must_use]
142    pub fn owner(&self) -> &str {
143        self.name_with_owner
144            .split('/')
145            .next()
146            .unwrap_or(&self.name_with_owner)
147    }
148}
149
150#[derive(Deserialize)]
151struct SearchResult {
152    search: SearchConnection,
153}
154
155#[derive(Deserialize)]
156struct PageInfo {
157    #[serde(rename = "hasNextPage")]
158    has_next_page: bool,
159    #[serde(rename = "endCursor")]
160    end_cursor: Option<String>,
161}
162
163#[derive(Deserialize)]
164struct SearchConnection {
165    #[serde(rename = "repositoryCount")]
166    #[allow(dead_code)]
167    repository_count: u32,
168    #[serde(rename = "pageInfo")]
169    page_info: PageInfo,
170    edges: Vec<SearchEdge>,
171}
172
173#[derive(Deserialize)]
174struct SearchEdge {
175    node: SearchRepository,
176}
177
178/// Search for repositories created in the last N days with minimum stars.
179pub async fn search_repositories(
180    client: &GitHubClient,
181    days_back: u32,
182    limit: usize,
183    language: Option<&str>,
184    min_stars: u32,
185) -> Result<Vec<SearchRepository>> {
186    let now = Utc::now();
187    let days_ago = now - Duration::days(days_back as i64);
188    let date_filter = days_ago.format("%Y-%m-%d").to_string();
189
190    let mut query_parts = vec![
191        format!("created:>{}", date_filter),
192        format!("stars:>={}", min_stars),
193        "is:public".to_string(),
194        "sort:stars-desc".to_string(),
195    ];
196
197    if let Some(lang) = language {
198        if let Some(validated_lang) = validate_language(lang) {
199            query_parts.push(format!("language:{}", validated_lang));
200        } else {
201            return Err(GitHubError::InvalidInput(format!(
202                "Invalid language parameter: '{}'. Language must contain only alphanumeric characters, spaces, hyphens, plus signs, hash, or dots.",
203                lang
204            )));
205        }
206    }
207
208    let query_string = query_parts.join(" ");
209    tracing::debug!("GitHub search query: {}", query_string);
210
211    let mut all_repositories = Vec::new();
212    let mut after_cursor: Option<String> = None;
213    let max_total = limit.min(1000);
214
215    loop {
216        let mut variables = HashMap::new();
217        variables.insert(
218            "queryString".to_string(),
219            serde_json::Value::String(query_string.clone()),
220        );
221        variables.insert(
222            "first".to_string(),
223            serde_json::Value::Number(serde_json::Number::from(100)),
224        );
225
226        if let Some(cursor) = &after_cursor {
227            variables.insert(
228                "after".to_string(),
229                serde_json::Value::String(cursor.clone()),
230            );
231        } else {
232            variables.insert("after".to_string(), serde_json::Value::Null);
233        }
234
235        let graphql_query: GraphQLQuery<HashMap<String, serde_json::Value>> = GraphQLQuery {
236            query: GRAPHQL_SEARCH_REPOSITORIES_QUERY.to_string(),
237            variables,
238        };
239
240        let response = client
241            .client()
242            .post(GITHUB_GRAPHQL_URL)
243            .json(&graphql_query)
244            .send()
245            .await?;
246
247        let status = response.status();
248        if !status.is_success() {
249            let error_text = response.text().await.unwrap_or_default();
250            return match status.as_u16() {
251                401 => Err(GitHubError::AuthenticationError(
252                    "Invalid or missing GitHub token".to_string(),
253                )),
254                403 => Err(GitHubError::RateLimitError(
255                    "GraphQL API rate limit exceeded".to_string(),
256                )),
257                451 => Err(GitHubError::DmcaBlockedError(
258                    "Search blocked for legal reasons".to_string(),
259                )),
260                _ => Err(GitHubError::ApiError {
261                    status: status.as_u16(),
262                    message: error_text,
263                }),
264            };
265        }
266
267        let graphql_response: GraphQLResponse<SearchResult> = response.json().await?;
268
269        if let Some(errors) = graphql_response.errors {
270            let error_message = errors
271                .into_iter()
272                .map(|e| e.message)
273                .collect::<Vec<_>>()
274                .join(", ");
275            return Err(GitHubError::ApiError {
276                status: 200,
277                message: error_message,
278            });
279        }
280
281        match graphql_response.data {
282            Some(data) => {
283                let page_repositories: Vec<SearchRepository> = data
284                    .search
285                    .edges
286                    .into_iter()
287                    .map(|edge| edge.node)
288                    .collect();
289
290                all_repositories.extend(page_repositories);
291
292                if data.search.page_info.has_next_page && all_repositories.len() < max_total {
293                    after_cursor = data.search.page_info.end_cursor;
294                } else {
295                    break;
296                }
297            }
298            None => {
299                return Err(GitHubError::ParseError(
300                    "No data in GraphQL response".to_string(),
301                ));
302            }
303        }
304    }
305
306    all_repositories.truncate(max_total);
307    Ok(all_repositories)
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313
314    #[test]
315    fn test_search_repository_default() {
316        let repo = SearchRepository::default();
317        assert_eq!(repo.stargazer_count, 0);
318        assert_eq!(repo.fork_count, 0);
319        assert!(repo.description.is_none());
320    }
321}