1use crate::github::client::GitHubClient;
2use crate::github::types::*;
3use crate::{config::*, error::*};
4use chrono::{Duration, Utc};
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8fn validate_language(language: &str) -> Option<String> {
11 let trimmed = language.trim();
12
13 if trimmed.is_empty() {
15 return None;
16 }
17
18 let is_valid = trimmed
21 .chars()
22 .all(|c| c.is_alphanumeric() || c == ' ' || c == '-' || c == '+' || c == '#' || c == '.');
23
24 if !is_valid {
25 return None;
26 }
27
28 let lower = trimmed.to_lowercase();
30 let suspicious_patterns = [
31 "repo:",
32 "user:",
33 "org:",
34 "in:",
35 "size:",
36 "fork:",
37 "stars:",
38 "pushed:",
39 "created:",
40 "updated:",
41 "language:",
42 "topic:",
43 "license:",
44 "is:",
45 "has:",
46 "good-first-issues:",
47 "help-wanted-issues:",
48 "archived:",
49 "mirror:",
50 "template:",
51 "sort:",
52 " or ",
53 " and ",
54 " not ",
55 ];
56
57 for pattern in suspicious_patterns {
58 if lower.contains(pattern) {
59 return None;
60 }
61 }
62
63 Some(trimmed.to_string())
64}
65
66#[derive(Deserialize, Serialize, Clone, Default, Debug)]
71pub struct SearchRepository {
72 pub id: String,
74 pub name: String,
76 #[serde(rename = "nameWithOwner")]
78 pub name_with_owner: String,
79 pub description: Option<String>,
81 pub url: String,
83 #[serde(rename = "stargazerCount")]
85 pub stargazer_count: u32,
86 #[serde(rename = "forkCount")]
88 pub fork_count: u32,
89 #[serde(rename = "createdAt")]
91 pub created_at: String,
92 #[serde(rename = "updatedAt")]
94 pub updated_at: String,
95 #[serde(rename = "pushedAt")]
97 pub pushed_at: Option<String>,
98 #[serde(rename = "primaryLanguage")]
100 pub primary_language: Option<Language>,
101 #[serde(rename = "licenseInfo")]
103 pub license_info: Option<License>,
104 #[serde(rename = "repositoryTopics")]
106 pub repository_topics: TopicConnection,
107}
108
109impl SearchRepository {
110 #[must_use]
112 pub fn language(&self) -> Option<&str> {
113 self.primary_language.as_ref().map(|l| l.name.as_str())
114 }
115
116 #[must_use]
118 pub fn license(&self) -> Option<&str> {
119 self.license_info.as_ref().map(|l| l.name.as_str())
120 }
121
122 #[must_use]
124 pub fn license_spdx(&self) -> Option<&str> {
125 self.license_info
126 .as_ref()
127 .and_then(|l| l.spdx_id.as_deref())
128 }
129
130 #[must_use]
132 pub fn topics(&self) -> Vec<&str> {
133 self.repository_topics
134 .edges
135 .iter()
136 .map(|e| e.node.topic.name.as_str())
137 .collect()
138 }
139
140 #[must_use]
142 pub fn owner(&self) -> &str {
143 self.name_with_owner
144 .split('/')
145 .next()
146 .unwrap_or(&self.name_with_owner)
147 }
148}
149
150#[derive(Deserialize)]
151struct SearchResult {
152 search: SearchConnection,
153}
154
155#[derive(Deserialize)]
156struct PageInfo {
157 #[serde(rename = "hasNextPage")]
158 has_next_page: bool,
159 #[serde(rename = "endCursor")]
160 end_cursor: Option<String>,
161}
162
163#[derive(Deserialize)]
164struct SearchConnection {
165 #[serde(rename = "repositoryCount")]
166 #[allow(dead_code)]
167 repository_count: u32,
168 #[serde(rename = "pageInfo")]
169 page_info: PageInfo,
170 edges: Vec<SearchEdge>,
171}
172
173#[derive(Deserialize)]
174struct SearchEdge {
175 node: SearchRepository,
176}
177
178pub async fn search_repositories(
180 client: &GitHubClient,
181 days_back: u32,
182 limit: usize,
183 language: Option<&str>,
184 min_stars: u32,
185) -> Result<Vec<SearchRepository>> {
186 let now = Utc::now();
187 let days_ago = now - Duration::days(days_back as i64);
188 let date_filter = days_ago.format("%Y-%m-%d").to_string();
189
190 let mut query_parts = vec![
191 format!("created:>{}", date_filter),
192 format!("stars:>={}", min_stars),
193 "is:public".to_string(),
194 "sort:stars-desc".to_string(),
195 ];
196
197 if let Some(lang) = language {
198 if let Some(validated_lang) = validate_language(lang) {
199 query_parts.push(format!("language:{}", validated_lang));
200 } else {
201 return Err(GitHubError::InvalidInput(format!(
202 "Invalid language parameter: '{}'. Language must contain only alphanumeric characters, spaces, hyphens, plus signs, hash, or dots.",
203 lang
204 )));
205 }
206 }
207
208 let query_string = query_parts.join(" ");
209 tracing::debug!("GitHub search query: {}", query_string);
210
211 let mut all_repositories = Vec::new();
212 let mut after_cursor: Option<String> = None;
213 let max_total = limit.min(1000);
214
215 loop {
216 let mut variables = HashMap::new();
217 variables.insert(
218 "queryString".to_string(),
219 serde_json::Value::String(query_string.clone()),
220 );
221 variables.insert(
222 "first".to_string(),
223 serde_json::Value::Number(serde_json::Number::from(100)),
224 );
225
226 if let Some(cursor) = &after_cursor {
227 variables.insert(
228 "after".to_string(),
229 serde_json::Value::String(cursor.clone()),
230 );
231 } else {
232 variables.insert("after".to_string(), serde_json::Value::Null);
233 }
234
235 let graphql_query: GraphQLQuery<HashMap<String, serde_json::Value>> = GraphQLQuery {
236 query: GRAPHQL_SEARCH_REPOSITORIES_QUERY.to_string(),
237 variables,
238 };
239
240 let response = client
241 .client()
242 .post(GITHUB_GRAPHQL_URL)
243 .json(&graphql_query)
244 .send()
245 .await?;
246
247 let status = response.status();
248 if !status.is_success() {
249 let error_text = response.text().await.unwrap_or_default();
250 return match status.as_u16() {
251 401 => Err(GitHubError::AuthenticationError(
252 "Invalid or missing GitHub token".to_string(),
253 )),
254 403 => Err(GitHubError::RateLimitError(
255 "GraphQL API rate limit exceeded".to_string(),
256 )),
257 451 => Err(GitHubError::DmcaBlockedError(
258 "Search blocked for legal reasons".to_string(),
259 )),
260 _ => Err(GitHubError::ApiError {
261 status: status.as_u16(),
262 message: error_text,
263 }),
264 };
265 }
266
267 let graphql_response: GraphQLResponse<SearchResult> = response.json().await?;
268
269 if let Some(errors) = graphql_response.errors {
270 let error_message = errors
271 .into_iter()
272 .map(|e| e.message)
273 .collect::<Vec<_>>()
274 .join(", ");
275 return Err(GitHubError::ApiError {
276 status: 200,
277 message: error_message,
278 });
279 }
280
281 match graphql_response.data {
282 Some(data) => {
283 let page_repositories: Vec<SearchRepository> = data
284 .search
285 .edges
286 .into_iter()
287 .map(|edge| edge.node)
288 .collect();
289
290 all_repositories.extend(page_repositories);
291
292 if data.search.page_info.has_next_page && all_repositories.len() < max_total {
293 after_cursor = data.search.page_info.end_cursor;
294 } else {
295 break;
296 }
297 }
298 None => {
299 return Err(GitHubError::ParseError(
300 "No data in GraphQL response".to_string(),
301 ));
302 }
303 }
304 }
305
306 all_repositories.truncate(max_total);
307 Ok(all_repositories)
308}
309
310#[cfg(test)]
311mod tests {
312 use super::*;
313
314 #[test]
315 fn test_search_repository_default() {
316 let repo = SearchRepository::default();
317 assert_eq!(repo.stargazer_count, 0);
318 assert_eq!(repo.fork_count, 0);
319 assert!(repo.description.is_none());
320 }
321}