1use anyhow::{anyhow, Result};
5use chrono::{DateTime, Utc};
6use octocrab::Octocrab;
7use serde::{Deserialize, Serialize};
8use tracing::{debug, info};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct RepoInfo {
13 pub name: String,
14 pub full_name: String,
15 pub description: Option<String>,
16 pub language: Option<String>,
17 pub stars: u32,
18 pub default_branch: String,
19 pub updated_at: DateTime<Utc>,
20}
21
22pub struct GitHubMiner {
25 client: Octocrab,
26}
27
28impl GitHubMiner {
29 pub fn new(token: Option<String>) -> Self {
45 let client = if let Some(token) = token {
46 debug!("Initializing GitHub client with authentication");
47 Octocrab::builder()
48 .personal_token(token)
49 .build()
50 .expect("Failed to build Octocrab client")
51 } else {
52 debug!("Initializing GitHub client without authentication");
53 Octocrab::builder()
54 .build()
55 .expect("Failed to build Octocrab client")
56 };
57
58 Self { client }
59 }
60
61 pub async fn fetch_organization_repos(&self, org_name: &str) -> Result<Vec<RepoInfo>> {
84 if org_name.trim().is_empty() {
86 return Err(anyhow!("Organization name cannot be empty"));
87 }
88
89 info!("Fetching repositories for organization: {}", org_name);
90
91 let repos = self
93 .client
94 .orgs(org_name)
95 .list_repos()
96 .send()
97 .await
98 .map_err(|e| anyhow!("Failed to fetch repositories for {}: {}", org_name, e))?;
99
100 debug!("Found {} repositories for {}", repos.items.len(), org_name);
101
102 let repo_infos: Vec<RepoInfo> = repos
104 .items
105 .into_iter()
106 .map(|repo| RepoInfo {
107 name: repo.name,
108 full_name: repo.full_name.unwrap_or_default(),
109 description: repo.description,
110 language: repo.language.and_then(|v| v.as_str().map(String::from)),
111 stars: repo.stargazers_count.unwrap_or(0),
112 default_branch: repo.default_branch.unwrap_or_else(|| "main".to_string()),
113 updated_at: repo.updated_at.unwrap_or_else(Utc::now),
114 })
115 .collect();
116
117 info!(
118 "Successfully fetched {} repositories for {}",
119 repo_infos.len(),
120 org_name
121 );
122
123 Ok(repo_infos)
124 }
125
126 pub fn filter_by_date(repos: Vec<RepoInfo>, since: DateTime<Utc>) -> Vec<RepoInfo> {
135 repos
136 .into_iter()
137 .filter(|repo| repo.updated_at >= since)
138 .collect()
139 }
140}
141
142#[cfg(test)]
143mod tests {
144 use super::*;
145
146 #[tokio::test]
147 async fn test_github_miner_creation() {
148 let _miner = GitHubMiner::new(None);
150 let _miner_with_token = GitHubMiner::new(Some("test_token".to_string()));
151 }
152
153 #[tokio::test]
154 async fn test_empty_org_name_validation() {
155 let miner = GitHubMiner::new(None);
156 let result = miner.fetch_organization_repos("").await;
157
158 assert!(result.is_err());
159 assert!(result.unwrap_err().to_string().contains("cannot be empty"));
160 }
161
162 #[tokio::test]
163 async fn test_whitespace_org_name_validation() {
164 let miner = GitHubMiner::new(None);
165 let result = miner.fetch_organization_repos(" ").await;
166
167 assert!(result.is_err());
168 }
169
170 #[test]
171 fn test_repo_info_structure() {
172 let now = Utc::now();
173 let repo = RepoInfo {
174 name: "test-repo".to_string(),
175 full_name: "owner/test-repo".to_string(),
176 description: Some("A test repository".to_string()),
177 language: Some("Rust".to_string()),
178 stars: 42,
179 default_branch: "main".to_string(),
180 updated_at: now,
181 };
182
183 assert_eq!(repo.name, "test-repo");
184 assert_eq!(repo.full_name, "owner/test-repo");
185 assert_eq!(repo.description, Some("A test repository".to_string()));
186 assert_eq!(repo.language, Some("Rust".to_string()));
187 assert_eq!(repo.stars, 42);
188 assert_eq!(repo.default_branch, "main");
189 assert_eq!(repo.updated_at, now);
190 }
191
192 #[test]
193 fn test_repo_info_serialization() {
194 let now = Utc::now();
195 let repo = RepoInfo {
196 name: "test".to_string(),
197 full_name: "owner/test".to_string(),
198 description: None,
199 language: None,
200 stars: 0,
201 default_branch: "main".to_string(),
202 updated_at: now,
203 };
204
205 let json = serde_json::to_string(&repo).unwrap();
206 let deserialized: RepoInfo = serde_json::from_str(&json).unwrap();
207
208 assert_eq!(repo.name, deserialized.name);
209 assert_eq!(repo.full_name, deserialized.full_name);
210 assert_eq!(repo.stars, deserialized.stars);
211 }
212
213 #[test]
214 fn test_filter_by_date_includes_recent() {
215 let now = Utc::now();
216 let yesterday = now - chrono::Duration::days(1);
217 let last_week = now - chrono::Duration::days(7);
218
219 let repos = vec![
220 RepoInfo {
221 name: "recent".to_string(),
222 full_name: "org/recent".to_string(),
223 description: None,
224 language: None,
225 stars: 0,
226 default_branch: "main".to_string(),
227 updated_at: now,
228 },
229 RepoInfo {
230 name: "old".to_string(),
231 full_name: "org/old".to_string(),
232 description: None,
233 language: None,
234 stars: 0,
235 default_branch: "main".to_string(),
236 updated_at: last_week,
237 },
238 ];
239
240 let filtered = GitHubMiner::filter_by_date(repos, yesterday);
241 assert_eq!(filtered.len(), 1);
242 assert_eq!(filtered[0].name, "recent");
243 }
244
245 #[test]
246 fn test_filter_by_date_excludes_old() {
247 let now = Utc::now();
248 let two_days_ago = now - chrono::Duration::days(2);
249 let one_week_ago = now - chrono::Duration::days(7);
250
251 let repos = vec![RepoInfo {
252 name: "old".to_string(),
253 full_name: "org/old".to_string(),
254 description: None,
255 language: None,
256 stars: 0,
257 default_branch: "main".to_string(),
258 updated_at: one_week_ago,
259 }];
260
261 let filtered = GitHubMiner::filter_by_date(repos, two_days_ago);
262 assert_eq!(filtered.len(), 0);
263 }
264
265 #[test]
266 fn test_filter_by_date_empty_input() {
267 let now = Utc::now();
268 let repos: Vec<RepoInfo> = vec![];
269
270 let filtered = GitHubMiner::filter_by_date(repos, now);
271 assert_eq!(filtered.len(), 0);
272 }
273
274 #[test]
275 fn test_filter_by_date_exact_match() {
276 let now = Utc::now();
277
278 let repos = vec![RepoInfo {
279 name: "exact".to_string(),
280 full_name: "org/exact".to_string(),
281 description: None,
282 language: None,
283 stars: 0,
284 default_branch: "main".to_string(),
285 updated_at: now,
286 }];
287
288 let filtered = GitHubMiner::filter_by_date(repos, now);
290 assert_eq!(filtered.len(), 1);
291 }
292
293 #[test]
294 fn test_repo_info_with_all_fields() {
295 let now = Utc::now();
296 let repo = RepoInfo {
297 name: "full-repo".to_string(),
298 full_name: "owner/full-repo".to_string(),
299 description: Some("Complete description".to_string()),
300 language: Some("Python".to_string()),
301 stars: 1000,
302 default_branch: "develop".to_string(),
303 updated_at: now,
304 };
305
306 assert!(repo.description.is_some());
307 assert!(repo.language.is_some());
308 assert!(repo.stars > 0);
309 }
310
311 #[test]
312 fn test_repo_info_minimal_fields() {
313 let now = Utc::now();
314 let repo = RepoInfo {
315 name: "minimal".to_string(),
316 full_name: "org/minimal".to_string(),
317 description: None,
318 language: None,
319 stars: 0,
320 default_branch: "main".to_string(),
321 updated_at: now,
322 };
323
324 assert!(repo.description.is_none());
325 assert!(repo.language.is_none());
326 assert_eq!(repo.stars, 0);
327 }
328
329 #[test]
330 fn test_filter_by_date_multiple_repos() {
331 let now = Utc::now();
332 let cutoff = now - chrono::Duration::days(3);
333
334 let repos = vec![
335 RepoInfo {
336 name: "repo1".to_string(),
337 full_name: "org/repo1".to_string(),
338 description: None,
339 language: None,
340 stars: 0,
341 default_branch: "main".to_string(),
342 updated_at: now,
343 },
344 RepoInfo {
345 name: "repo2".to_string(),
346 full_name: "org/repo2".to_string(),
347 description: None,
348 language: None,
349 stars: 0,
350 default_branch: "main".to_string(),
351 updated_at: now - chrono::Duration::days(2),
352 },
353 RepoInfo {
354 name: "repo3".to_string(),
355 full_name: "org/repo3".to_string(),
356 description: None,
357 language: None,
358 stars: 0,
359 default_branch: "main".to_string(),
360 updated_at: now - chrono::Duration::days(5),
361 },
362 ];
363
364 let filtered = GitHubMiner::filter_by_date(repos, cutoff);
365 assert_eq!(filtered.len(), 2);
366 assert_eq!(filtered[0].name, "repo1");
367 assert_eq!(filtered[1].name, "repo2");
368 }
369
370 #[test]
371 fn test_repo_info_clone() {
372 let now = Utc::now();
373 let original = RepoInfo {
374 name: "test".to_string(),
375 full_name: "org/test".to_string(),
376 description: Some("desc".to_string()),
377 language: Some("Rust".to_string()),
378 stars: 100,
379 default_branch: "main".to_string(),
380 updated_at: now,
381 };
382
383 let cloned = original.clone();
384
385 assert_eq!(original.name, cloned.name);
386 assert_eq!(original.full_name, cloned.full_name);
387 assert_eq!(original.description, cloned.description);
388 assert_eq!(original.language, cloned.language);
389 assert_eq!(original.stars, cloned.stars);
390 assert_eq!(original.default_branch, cloned.default_branch);
391 assert_eq!(original.updated_at, cloned.updated_at);
392 }
393
394 #[test]
395 fn test_repo_info_debug_format() {
396 let now = Utc::now();
397 let repo = RepoInfo {
398 name: "test-repo".to_string(),
399 full_name: "owner/test-repo".to_string(),
400 description: Some("Test description".to_string()),
401 language: Some("Rust".to_string()),
402 stars: 42,
403 default_branch: "main".to_string(),
404 updated_at: now,
405 };
406
407 let debug_str = format!("{:?}", repo);
408 assert!(debug_str.contains("test-repo"));
409 assert!(debug_str.contains("owner/test-repo"));
410 assert!(debug_str.contains("42"));
411 }
412
413 #[test]
414 fn test_repo_info_with_empty_strings() {
415 let now = Utc::now();
416 let repo = RepoInfo {
417 name: "".to_string(),
418 full_name: "".to_string(),
419 description: Some("".to_string()),
420 language: Some("".to_string()),
421 stars: 0,
422 default_branch: "".to_string(),
423 updated_at: now,
424 };
425
426 assert_eq!(repo.name, "");
427 assert_eq!(repo.full_name, "");
428 assert_eq!(repo.description, Some("".to_string()));
429 assert_eq!(repo.language, Some("".to_string()));
430 }
431
432 #[test]
433 fn test_repo_info_with_high_stars() {
434 let now = Utc::now();
435 let repo = RepoInfo {
436 name: "popular".to_string(),
437 full_name: "org/popular".to_string(),
438 description: None,
439 language: None,
440 stars: 999999,
441 default_branch: "main".to_string(),
442 updated_at: now,
443 };
444
445 assert_eq!(repo.stars, 999999);
446 }
447
448 #[test]
449 fn test_repo_info_with_different_branches() {
450 let now = Utc::now();
451
452 let main_repo = RepoInfo {
453 name: "main-branch".to_string(),
454 full_name: "org/main-branch".to_string(),
455 description: None,
456 language: None,
457 stars: 0,
458 default_branch: "main".to_string(),
459 updated_at: now,
460 };
461
462 let master_repo = RepoInfo {
463 name: "master-branch".to_string(),
464 full_name: "org/master-branch".to_string(),
465 description: None,
466 language: None,
467 stars: 0,
468 default_branch: "master".to_string(),
469 updated_at: now,
470 };
471
472 let develop_repo = RepoInfo {
473 name: "develop-branch".to_string(),
474 full_name: "org/develop-branch".to_string(),
475 description: None,
476 language: None,
477 stars: 0,
478 default_branch: "develop".to_string(),
479 updated_at: now,
480 };
481
482 assert_eq!(main_repo.default_branch, "main");
483 assert_eq!(master_repo.default_branch, "master");
484 assert_eq!(develop_repo.default_branch, "develop");
485 }
486
487 #[test]
488 fn test_filter_by_date_preserves_order() {
489 let now = Utc::now();
490 let cutoff = now - chrono::Duration::days(10);
491
492 let repos = vec![
493 RepoInfo {
494 name: "first".to_string(),
495 full_name: "org/first".to_string(),
496 description: None,
497 language: None,
498 stars: 0,
499 default_branch: "main".to_string(),
500 updated_at: now,
501 },
502 RepoInfo {
503 name: "second".to_string(),
504 full_name: "org/second".to_string(),
505 description: None,
506 language: None,
507 stars: 0,
508 default_branch: "main".to_string(),
509 updated_at: now - chrono::Duration::days(5),
510 },
511 RepoInfo {
512 name: "third".to_string(),
513 full_name: "org/third".to_string(),
514 description: None,
515 language: None,
516 stars: 0,
517 default_branch: "main".to_string(),
518 updated_at: now - chrono::Duration::days(3),
519 },
520 ];
521
522 let filtered = GitHubMiner::filter_by_date(repos, cutoff);
523
524 assert_eq!(filtered.len(), 3);
525 assert_eq!(filtered[0].name, "first");
526 assert_eq!(filtered[1].name, "second");
527 assert_eq!(filtered[2].name, "third");
528 }
529
530 #[test]
531 fn test_repo_info_with_long_description() {
532 let now = Utc::now();
533 let long_desc = "A".repeat(1000);
534
535 let repo = RepoInfo {
536 name: "described".to_string(),
537 full_name: "org/described".to_string(),
538 description: Some(long_desc.clone()),
539 language: None,
540 stars: 0,
541 default_branch: "main".to_string(),
542 updated_at: now,
543 };
544
545 assert_eq!(repo.description, Some(long_desc));
546 }
547
548 #[test]
549 fn test_filter_by_date_with_future_date() {
550 let now = Utc::now();
551 let future = now + chrono::Duration::days(365);
552
553 let repos = vec![RepoInfo {
554 name: "current".to_string(),
555 full_name: "org/current".to_string(),
556 description: None,
557 language: None,
558 stars: 0,
559 default_branch: "main".to_string(),
560 updated_at: now,
561 }];
562
563 let filtered = GitHubMiner::filter_by_date(repos, future);
565 assert_eq!(filtered.len(), 0);
566 }
567
568 #[test]
569 fn test_repo_info_deserialization() {
570 let json = r#"{
571 "name": "test-repo",
572 "full_name": "owner/test-repo",
573 "description": "Test description",
574 "language": "Rust",
575 "stars": 123,
576 "default_branch": "main",
577 "updated_at": "2024-01-01T00:00:00Z"
578 }"#;
579
580 let repo: RepoInfo = serde_json::from_str(json).unwrap();
581
582 assert_eq!(repo.name, "test-repo");
583 assert_eq!(repo.full_name, "owner/test-repo");
584 assert_eq!(repo.description, Some("Test description".to_string()));
585 assert_eq!(repo.language, Some("Rust".to_string()));
586 assert_eq!(repo.stars, 123);
587 assert_eq!(repo.default_branch, "main");
588 }
589
590 #[test]
591 fn test_repo_info_with_special_characters() {
592 let now = Utc::now();
593 let repo = RepoInfo {
594 name: "repo-with_special.chars".to_string(),
595 full_name: "org/repo-with_special.chars".to_string(),
596 description: Some("Description with émojis 🚀 and special chars: <>&\"'".to_string()),
597 language: Some("C++".to_string()),
598 stars: 0,
599 default_branch: "main".to_string(),
600 updated_at: now,
601 };
602
603 assert!(repo.description.unwrap().contains("🚀"));
604 assert_eq!(repo.language.unwrap(), "C++");
605 }
606
607 #[test]
608 fn test_multiple_languages() {
609 let now = Utc::now();
610
611 let languages = vec![
612 "Rust",
613 "Python",
614 "JavaScript",
615 "Go",
616 "TypeScript",
617 "C++",
618 "Java",
619 ];
620
621 for lang in languages {
622 let repo = RepoInfo {
623 name: format!("{}-repo", lang.to_lowercase()),
624 full_name: format!("org/{}-repo", lang.to_lowercase()),
625 description: None,
626 language: Some(lang.to_string()),
627 stars: 0,
628 default_branch: "main".to_string(),
629 updated_at: now,
630 };
631
632 assert_eq!(repo.language, Some(lang.to_string()));
633 }
634 }
635
636 #[tokio::test]
637 async fn test_github_miner_with_empty_token() {
638 let _miner = GitHubMiner::new(Some("".to_string()));
639 }
641
642 #[test]
643 fn test_filter_by_date_all_old() {
644 let now = Utc::now();
645 let very_old = now - chrono::Duration::days(365);
646
647 let repos = vec![
648 RepoInfo {
649 name: "old1".to_string(),
650 full_name: "org/old1".to_string(),
651 description: None,
652 language: None,
653 stars: 0,
654 default_branch: "main".to_string(),
655 updated_at: very_old,
656 },
657 RepoInfo {
658 name: "old2".to_string(),
659 full_name: "org/old2".to_string(),
660 description: None,
661 language: None,
662 stars: 0,
663 default_branch: "main".to_string(),
664 updated_at: very_old - chrono::Duration::days(30),
665 },
666 ];
667
668 let filtered = GitHubMiner::filter_by_date(repos, now);
669 assert_eq!(filtered.len(), 0);
670 }
671}