1use anyhow::{Context, Result, anyhow};
7use chrono::{DateTime, NaiveDate, Utc};
8use reqwest::blocking::Client;
9use serde::de::DeserializeOwned;
10use serde::{Deserialize, Serialize};
11use shiplog_cache::ApiCache;
12use shiplog_cache::CacheKey;
13use shiplog_coverage::{day_windows, month_windows, week_windows, window_len_days};
14use shiplog_ids::{EventId, RunId};
15use shiplog_ports::{IngestOutput, Ingestor};
16use shiplog_schema::coverage::{Completeness, CoverageManifest, CoverageSlice, TimeWindow};
17use shiplog_schema::event::{
18 Actor, EventEnvelope, EventKind, EventPayload, Link, PullRequestEvent, PullRequestState,
19 RepoRef, RepoVisibility, ReviewEvent, SourceRef, SourceSystem,
20};
21use std::path::PathBuf;
22use std::thread::sleep;
23use std::time::Duration;
24use url::Url;
25
26#[derive(Debug)]
27pub struct GithubIngestor {
28 pub user: String,
29 pub since: NaiveDate,
30 pub until: NaiveDate,
31 pub mode: String,
33 pub include_reviews: bool,
34 pub fetch_details: bool,
35 pub throttle_ms: u64,
36 pub token: Option<String>,
37 pub api_base: String,
39 pub cache: Option<ApiCache>,
41}
42
43impl GithubIngestor {
44 pub fn new(user: String, since: NaiveDate, until: NaiveDate) -> Self {
62 Self {
63 user,
64 since,
65 until,
66 mode: "merged".to_string(),
67 include_reviews: false,
68 fetch_details: true,
69 throttle_ms: 0,
70 token: None,
71 api_base: "https://api.github.com".to_string(),
72 cache: None,
73 }
74 }
75
76 pub fn with_cache(mut self, cache_dir: impl Into<PathBuf>) -> Result<Self> {
92 let cache_path = cache_dir.into().join("github-api-cache.db");
93 if let Some(parent) = cache_path.parent() {
94 std::fs::create_dir_all(parent)
95 .with_context(|| format!("create GitHub cache directory {parent:?}"))?;
96 }
97 let cache = ApiCache::open(&cache_path)
98 .with_context(|| format!("open GitHub API cache at {cache_path:?}"))?;
99 self.cache = Some(cache);
100 Ok(self)
101 }
102
103 pub fn with_in_memory_cache(mut self) -> Result<Self> {
119 let cache = ApiCache::open_in_memory().context("open in-memory API cache")?;
120 self.cache = Some(cache);
121 Ok(self)
122 }
123
124 fn html_base_url(&self) -> String {
125 if let Ok(u) = Url::parse(&self.api_base) {
126 let scheme = u.scheme();
127 if let Some(host) = u.host_str() {
128 if host == "api.github.com" {
129 return "https://github.com".to_string();
130 }
131 let port_suffix = u.port().map(|p| format!(":{p}")).unwrap_or_default();
132 return format!("{scheme}://{host}{port_suffix}");
133 }
134 }
135 "https://github.com".to_string()
136 }
137
138 #[mutants::skip]
139 fn client(&self) -> Result<Client> {
140 Client::builder()
141 .user_agent(concat!("shiplog/", env!("CARGO_PKG_VERSION")))
142 .build()
143 .context("build reqwest client")
144 }
145
146 #[mutants::skip]
147 fn api_url(&self, path: &str) -> String {
148 format!("{}{}", self.api_base.trim_end_matches('/'), path)
149 }
150
151 #[mutants::skip]
152 fn throttle(&self) {
153 if self.throttle_ms > 0 {
154 sleep(Duration::from_millis(self.throttle_ms));
155 }
156 }
157
158 #[mutants::skip]
159 fn get_json<T: DeserializeOwned>(
160 &self,
161 client: &Client,
162 url: &str,
163 params: &[(&str, String)],
164 ) -> Result<T> {
165 let request_url = build_url_with_params(url, params)?;
166 let request_url_for_err = request_url.as_str().to_string();
167
168 let mut req = client
169 .get(request_url)
170 .header("Accept", "application/vnd.github+json");
171 req = req.header("X-GitHub-Api-Version", "2022-11-28");
172 if let Some(t) = &self.token {
173 req = req.bearer_auth(t);
174 }
175 let resp = req
176 .send()
177 .with_context(|| format!("GET {request_url_for_err}"))?;
178 self.throttle();
179
180 if !resp.status().is_success() {
181 let status = resp.status();
182 let body = resp.text().unwrap_or_default();
183 return Err(anyhow!("GitHub API error {status}: {body}"));
184 }
185
186 resp.json::<T>()
187 .with_context(|| format!("parse json from {request_url_for_err}"))
188 }
189}
190
191impl Ingestor for GithubIngestor {
192 #[mutants::skip]
193 fn ingest(&self) -> Result<IngestOutput> {
194 if self.since >= self.until {
195 return Err(anyhow!("since must be < until"));
196 }
197
198 let client = self.client().context("create GitHub API client")?;
199 let run_id = RunId::now("shiplog");
200 let mut slices: Vec<CoverageSlice> = Vec::new();
201 let mut warnings: Vec<String> = Vec::new();
202 let mut completeness = Completeness::Complete;
203
204 let mut events: Vec<EventEnvelope> = Vec::new();
205
206 let pr_query_builder = |w: &TimeWindow| self.build_pr_query(w);
208 let (pr_items, pr_slices, pr_partial) =
209 self.collect_search_items(&client, pr_query_builder, self.since, self.until, "prs")?;
210 slices.extend(pr_slices);
211 if pr_partial {
212 completeness = Completeness::Partial;
213 }
214
215 events.extend(self.items_to_pr_events(&client, pr_items)?);
216
217 if self.include_reviews {
219 warnings.push("Reviews are collected via search + per-PR review fetch; treat as best-effort coverage.".to_string());
220 let review_query_builder = |w: &TimeWindow| self.build_reviewed_query(w);
221 let (review_items, review_slices, review_partial) = self.collect_search_items(
222 &client,
223 review_query_builder,
224 self.since,
225 self.until,
226 "reviews",
227 )?;
228 slices.extend(review_slices);
229 if review_partial {
230 completeness = Completeness::Partial;
231 }
232 events.extend(self.items_to_review_events(&client, review_items)?);
233 }
234
235 events.sort_by_key(|e| e.occurred_at);
237
238 let cov = CoverageManifest {
239 run_id,
240 generated_at: Utc::now(),
241 user: self.user.clone(),
242 window: TimeWindow {
243 since: self.since,
244 until: self.until,
245 },
246 mode: self.mode.clone(),
247 sources: vec!["github".to_string()],
248 slices,
249 warnings,
250 completeness,
251 };
252
253 Ok(IngestOutput {
254 events,
255 coverage: cov,
256 })
257 }
258}
259
260impl GithubIngestor {
261 fn build_pr_query(&self, w: &TimeWindow) -> String {
262 let (start, end) = github_inclusive_range(w);
263 match self.mode.as_str() {
264 "created" => format!("is:pr author:{} created:{}..{}", self.user, start, end),
265 _ => format!(
266 "is:pr is:merged author:{} merged:{}..{}",
267 self.user, start, end
268 ),
269 }
270 }
271
272 fn build_reviewed_query(&self, w: &TimeWindow) -> String {
273 let (start, end) = github_inclusive_range(w);
276 format!("is:pr reviewed-by:{} updated:{}..{}", self.user, start, end)
277 }
278
279 #[mutants::skip]
286 fn collect_search_items<F>(
287 &self,
288 client: &Client,
289 make_query: F,
290 since: NaiveDate,
291 until: NaiveDate,
292 label: &str,
293 ) -> Result<(Vec<SearchIssueItem>, Vec<CoverageSlice>, bool)>
294 where
295 F: Fn(&TimeWindow) -> String,
296 {
297 let mut slices: Vec<CoverageSlice> = Vec::new();
298 let mut items: Vec<SearchIssueItem> = Vec::new();
299 let mut partial = false;
300
301 for w in month_windows(since, until) {
302 let (mut i, mut s, p) =
303 self.collect_window(client, &make_query, &w, Granularity::Month, label)?;
304 items.append(&mut i);
305 slices.append(&mut s);
306 partial |= p;
307 }
308
309 Ok((items, slices, partial))
310 }
311
312 #[mutants::skip]
313 fn collect_window<F>(
314 &self,
315 client: &Client,
316 make_query: &F,
317 window: &TimeWindow,
318 gran: Granularity,
319 label: &str,
320 ) -> Result<(Vec<SearchIssueItem>, Vec<CoverageSlice>, bool)>
321 where
322 F: Fn(&TimeWindow) -> String,
323 {
324 if window.since >= window.until {
325 return Ok((vec![], vec![], false));
326 }
327
328 let query = make_query(window);
329 let (meta_total, meta_incomplete) = self.search_meta(client, &query)?;
330 let mut slices = vec![CoverageSlice {
331 window: window.clone(),
332 query: query.clone(),
333 total_count: meta_total,
334 fetched: 0,
335 incomplete_results: Some(meta_incomplete),
336 notes: vec![format!("probe:{label}")],
337 }];
338
339 let need_subdivide = meta_total > 1000 || meta_incomplete;
341 let can_subdivide = gran != Granularity::Day && window_len_days(window) > 1;
342
343 if need_subdivide && can_subdivide {
344 slices[0].notes.push(format!(
345 "subdivide:{}",
346 if meta_total > 1000 {
347 "cap"
348 } else {
349 "incomplete"
350 }
351 ));
352
353 let mut out_items = Vec::new();
354 let mut out_slices = slices;
355 let mut partial = false;
356
357 let subs = match gran {
358 Granularity::Month => week_windows(window.since, window.until),
359 Granularity::Week => day_windows(window.since, window.until),
360 Granularity::Day => vec![],
361 };
362
363 for sub in subs {
364 let (mut i, mut s, p) =
365 self.collect_window(client, make_query, &sub, gran.next(), label)?;
366 out_items.append(&mut i);
367 out_slices.append(&mut s);
368 partial |= p;
369 }
370 return Ok((out_items, out_slices, partial));
371 }
372
373 let mut partial = false;
375 if meta_total > 1000 || meta_incomplete {
376 partial = true;
377 slices[0]
378 .notes
379 .push("partial:unresolvable_at_this_granularity".to_string());
380 }
381
382 let fetched_items = self.fetch_all_search_items(client, &query)?;
383 let fetched = fetched_items.len() as u64;
384
385 slices.push(CoverageSlice {
387 window: window.clone(),
388 query: query.clone(),
389 total_count: meta_total,
390 fetched,
391 incomplete_results: Some(meta_incomplete),
392 notes: vec![format!("fetch:{label}")],
393 });
394
395 Ok((fetched_items, slices, partial))
396 }
397
398 #[mutants::skip]
399 fn search_meta(&self, client: &Client, q: &str) -> Result<(u64, bool)> {
400 let url = self.api_url("/search/issues");
401 let resp: SearchResponse<SearchIssueItem> = self.get_json(
402 client,
403 &url,
404 &[
405 ("q", q.to_string()),
406 ("per_page", "1".to_string()),
407 ("page", "1".to_string()),
408 ],
409 )?;
410 Ok((resp.total_count, resp.incomplete_results))
411 }
412
413 #[mutants::skip]
414 fn fetch_all_search_items(&self, client: &Client, q: &str) -> Result<Vec<SearchIssueItem>> {
415 let url = self.api_url("/search/issues");
416 let mut out: Vec<SearchIssueItem> = Vec::new();
417 let per_page = 100;
418 let max_pages = 10; for page in 1..=max_pages {
420 let resp: SearchResponse<SearchIssueItem> = self.get_json(
421 client,
422 &url,
423 &[
424 ("q", q.to_string()),
425 ("per_page", per_page.to_string()),
426 ("page", page.to_string()),
427 ],
428 )?;
429 let items_len = resp.items.len();
430 out.extend(resp.items);
431 if out.len() as u64 >= resp.total_count.min(1000) {
432 break;
433 }
434 if items_len < per_page {
435 break;
436 }
437 }
438 Ok(out)
439 }
440
441 #[mutants::skip]
442 fn items_to_pr_events(
443 &self,
444 client: &Client,
445 items: Vec<SearchIssueItem>,
446 ) -> Result<Vec<EventEnvelope>> {
447 let mut out = Vec::new();
448 for item in items {
449 if let Some(pr_ref) = &item.pull_request {
450 let html_base = self.html_base_url();
451 let (repo_full_name, repo_html_url) =
452 repo_from_repo_url(&item.repository_url, &html_base);
453
454 let (title, created_at, merged_at, additions, deletions, changed_files, visibility) =
455 if self.fetch_details {
456 match self.fetch_pr_details(client, &pr_ref.url) {
457 Ok(d) => {
458 let vis = if d.base.repo.private_field {
459 RepoVisibility::Private
460 } else {
461 RepoVisibility::Public
462 };
463 (
464 d.title,
465 d.created_at,
466 d.merged_at,
467 Some(d.additions),
468 Some(d.deletions),
469 Some(d.changed_files),
470 vis,
471 )
472 }
473 Err(_) => {
474 (
476 item.title.clone(),
477 item.created_at.unwrap_or_else(Utc::now),
478 None,
479 None,
480 None,
481 None,
482 RepoVisibility::Unknown,
483 )
484 }
485 }
486 } else {
487 (
488 item.title.clone(),
489 item.created_at.unwrap_or_else(Utc::now),
490 None,
491 None,
492 None,
493 None,
494 RepoVisibility::Unknown,
495 )
496 };
497
498 let occurred_at = match self.mode.as_str() {
499 "created" => created_at,
500 _ => merged_at.unwrap_or(created_at),
501 };
502
503 let state = if merged_at.is_some() {
504 PullRequestState::Merged
505 } else {
506 PullRequestState::Unknown
507 };
508
509 let id = EventId::from_parts([
510 "github",
511 "pr",
512 &repo_full_name,
513 &item.number.to_string(),
514 ]);
515
516 let ev = EventEnvelope {
517 id,
518 kind: EventKind::PullRequest,
519 occurred_at,
520 actor: Actor {
521 login: self.user.clone(),
522 id: None,
523 },
524 repo: RepoRef {
525 full_name: repo_full_name,
526 html_url: Some(repo_html_url),
527 visibility,
528 },
529 payload: EventPayload::PullRequest(PullRequestEvent {
530 number: item.number,
531 title,
532 state,
533 created_at,
534 merged_at,
535 additions,
536 deletions,
537 changed_files,
538 touched_paths_hint: vec![],
539 window: None,
540 }),
541 tags: vec![],
542 links: vec![Link {
543 label: "pr".into(),
544 url: item.html_url.clone(),
545 }],
546 source: SourceRef {
547 system: SourceSystem::Github,
548 url: Some(pr_ref.url.clone()),
549 opaque_id: Some(item.id.to_string()),
550 },
551 };
552
553 out.push(ev);
554 }
555 }
556 Ok(out)
557 }
558
559 #[mutants::skip]
560 fn items_to_review_events(
561 &self,
562 client: &Client,
563 items: Vec<SearchIssueItem>,
564 ) -> Result<Vec<EventEnvelope>> {
565 let mut out = Vec::new();
566 for item in items {
567 let Some(pr_ref) = &item.pull_request else {
568 continue;
569 };
570 let html_base = self.html_base_url();
571 let (repo_full_name, repo_html_url) =
572 repo_from_repo_url(&item.repository_url, &html_base);
573
574 let reviews = self.fetch_pr_reviews(client, &pr_ref.url)?;
576 for r in reviews {
577 if r.user.login != self.user {
578 continue;
579 }
580 let submitted = match r.submitted_at {
581 Some(s) => s,
582 None => continue,
583 };
584 let submitted_date = submitted.date_naive();
585 if submitted_date < self.since || submitted_date >= self.until {
586 continue;
587 }
588
589 let id = EventId::from_parts([
590 "github",
591 "review",
592 &repo_full_name,
593 &item.number.to_string(),
594 &r.id.to_string(),
595 ]);
596
597 let ev = EventEnvelope {
598 id,
599 kind: EventKind::Review,
600 occurred_at: submitted,
601 actor: Actor {
602 login: self.user.clone(),
603 id: None,
604 },
605 repo: RepoRef {
606 full_name: repo_full_name.clone(),
607 html_url: Some(repo_html_url.clone()),
608 visibility: RepoVisibility::Unknown,
609 },
610 payload: EventPayload::Review(ReviewEvent {
611 pull_number: item.number,
612 pull_title: item.title.clone(),
613 submitted_at: submitted,
614 state: r.state,
615 window: None,
616 }),
617 tags: vec![],
618 links: vec![Link {
619 label: "pr".into(),
620 url: item.html_url.clone(),
621 }],
622 source: SourceRef {
623 system: SourceSystem::Github,
624 url: Some(pr_ref.url.clone()),
625 opaque_id: Some(r.id.to_string()),
626 },
627 };
628
629 out.push(ev);
630 }
631 }
632 Ok(out)
633 }
634
635 #[mutants::skip]
636 fn fetch_pr_details(&self, client: &Client, pr_api_url: &str) -> Result<PullRequestDetails> {
637 let cache_key = CacheKey::pr_details(pr_api_url);
639 #[allow(clippy::collapsible_if)]
640 if let Some(ref cache) = self.cache {
641 if let Some(cached) = cache.get::<PullRequestDetails>(&cache_key)? {
642 return Ok(cached);
643 }
644 }
645
646 let details: PullRequestDetails = self.get_json(client, pr_api_url, &[])?;
648
649 if let Some(ref cache) = self.cache {
651 cache.set(&cache_key, &details)?;
652 }
653
654 Ok(details)
655 }
656
657 #[mutants::skip]
658 fn fetch_pr_reviews(
659 &self,
660 client: &Client,
661 pr_api_url: &str,
662 ) -> Result<Vec<PullRequestReview>> {
663 let url = format!("{pr_api_url}/reviews");
664 let mut out = Vec::new();
665 let per_page = 100;
666 for page in 1..=10 {
667 let cache_key = CacheKey::pr_reviews(pr_api_url, page);
668
669 let page_reviews: Vec<PullRequestReview> = if let Some(ref cache) = self.cache {
671 if let Some(cached) = cache.get::<Vec<PullRequestReview>>(&cache_key)? {
672 cached
673 } else {
674 let reviews: Vec<PullRequestReview> = self.get_json(
676 client,
677 &url,
678 &[
679 ("per_page", per_page.to_string()),
680 ("page", page.to_string()),
681 ],
682 )?;
683 cache.set(&cache_key, &reviews)?;
685 reviews
686 }
687 } else {
688 self.get_json(
690 client,
691 &url,
692 &[
693 ("per_page", per_page.to_string()),
694 ("page", page.to_string()),
695 ],
696 )?
697 };
698
699 let n = page_reviews.len();
700 out.extend(page_reviews);
701 if n < per_page {
702 break;
703 }
704 }
705 Ok(out)
706 }
707}
708
709#[derive(Copy, Clone, Debug, PartialEq, Eq)]
710enum Granularity {
711 Month,
712 Week,
713 Day,
714}
715
716impl Granularity {
717 fn next(&self) -> Granularity {
718 match self {
719 Granularity::Month => Granularity::Week,
720 Granularity::Week => Granularity::Day,
721 Granularity::Day => Granularity::Day,
722 }
723 }
724}
725
726fn github_inclusive_range(w: &TimeWindow) -> (String, String) {
727 let start = w.since.format("%Y-%m-%d").to_string();
728 let end_date = w.until.pred_opt().unwrap_or(w.until);
729 let end = end_date.format("%Y-%m-%d").to_string();
730 (start, end)
731}
732
733fn build_url_with_params(base: &str, params: &[(&str, String)]) -> Result<Url> {
734 let mut url = Url::parse(base).with_context(|| format!("parse url {base}"))?;
735 if !params.is_empty() {
736 let mut query = url.query_pairs_mut();
737 for (k, v) in params {
738 query.append_pair(k, v);
739 }
740 }
741 Ok(url)
742}
743
744fn repo_from_repo_url(repo_api_url: &str, html_base: &str) -> (String, String) {
745 #[allow(clippy::collapsible_if)]
746 if let Ok(u) = Url::parse(repo_api_url) {
747 if let Some(segs) = u.path_segments() {
748 let v: Vec<&str> = segs.collect();
749 if v.len() >= 3 && v[0] == "repos" {
750 let owner = v[1];
751 let repo = v[2];
752 let full = format!("{}/{}", owner, repo);
753 let html = format!("{}/{}/{}", html_base.trim_end_matches('/'), owner, repo);
754 return (full, html);
755 }
756 }
757 }
758 ("unknown/unknown".to_string(), html_base.to_string())
759}
760
761#[derive(Debug, Deserialize)]
763struct SearchResponse<T> {
764 total_count: u64,
765 incomplete_results: bool,
766 items: Vec<T>,
767}
768
769#[derive(Debug, Deserialize)]
770struct SearchIssueItem {
771 id: u64,
772 number: u64,
773 title: String,
774 html_url: String,
775 repository_url: String,
776 pull_request: Option<SearchPullRequestRef>,
777
778 created_at: Option<DateTime<Utc>>,
780}
781
782#[derive(Debug, Deserialize)]
783struct SearchPullRequestRef {
784 url: String,
785}
786
787#[derive(Debug, Deserialize, Serialize, Clone)]
788struct PullRequestDetails {
789 title: String,
790 created_at: DateTime<Utc>,
791 merged_at: Option<DateTime<Utc>>,
792 additions: u64,
793 deletions: u64,
794 changed_files: u64,
795 base: PullBase,
796}
797
798#[derive(Debug, Deserialize, Serialize, Clone)]
799struct PullBase {
800 repo: PullRepo,
801}
802
803#[derive(Debug, Deserialize, Serialize, Clone)]
804struct PullRepo {
805 full_name: String,
806 html_url: String,
807 #[serde(rename = "private")]
808 private_field: bool,
809}
810
811#[derive(Debug, Deserialize, Serialize, Clone)]
812struct PullRequestReview {
813 id: u64,
814 state: String,
815 submitted_at: Option<DateTime<Utc>>,
816 user: ReviewUser,
817}
818
819#[derive(Debug, Deserialize, Serialize, Clone)]
820struct ReviewUser {
821 login: String,
822}
823
824#[cfg(test)]
825mod tests {
826 use super::*;
827 use proptest::prelude::*;
828
829 fn make_ingestor(user: &str) -> GithubIngestor {
832 GithubIngestor::new(
833 user.to_string(),
834 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
835 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
836 )
837 }
838
839 fn make_search_item(number: u64, repo: &str, with_pr: bool) -> SearchIssueItem {
840 SearchIssueItem {
841 id: number * 100,
842 number,
843 title: format!("PR #{number}"),
844 html_url: format!("https://github.com/{repo}/pull/{number}"),
845 repository_url: format!("https://api.github.com/repos/{repo}"),
846 pull_request: if with_pr {
847 Some(SearchPullRequestRef {
848 url: format!("https://api.github.com/repos/{repo}/pulls/{number}"),
849 })
850 } else {
851 None
852 },
853 created_at: Some(Utc::now()),
854 }
855 }
856
857 #[test]
860 fn with_cache_creates_missing_directory() {
861 let temp = tempfile::tempdir().unwrap();
862 let cache_dir = temp.path().join("nested").join("cache");
863
864 let ing = GithubIngestor::new(
865 "octocat".to_string(),
866 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
867 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
868 )
869 .with_cache(&cache_dir)
870 .unwrap();
871
872 assert!(ing.cache.is_some());
873 assert!(cache_dir.join("github-api-cache.db").exists());
874 }
875
876 #[test]
877 fn build_url_with_params_encodes_query_values() {
878 let url = build_url_with_params(
879 "https://api.github.com/search/issues",
880 &[
881 ("q", "is:pr is:merged author:octocat".to_string()),
882 ("per_page", "1".to_string()),
883 ],
884 )
885 .unwrap();
886
887 assert!(!url.as_str().contains(' '), "URL should be percent-encoded");
888
889 let pairs: Vec<(String, String)> = url
890 .query_pairs()
891 .map(|(k, v)| (k.into_owned(), v.into_owned()))
892 .collect();
893 assert_eq!(
894 pairs,
895 vec![
896 (
897 "q".to_string(),
898 "is:pr is:merged author:octocat".to_string()
899 ),
900 ("per_page".to_string(), "1".to_string()),
901 ]
902 );
903 }
904
905 #[test]
906 fn github_inclusive_range_uses_exclusive_until_date() {
907 let window = TimeWindow {
908 since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
909 until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
910 };
911
912 let (start, end) = github_inclusive_range(&window);
913 assert_eq!(start, "2025-01-01");
914 assert_eq!(end, "2025-01-31");
915 }
916
917 #[test]
918 fn html_base_url_maps_public_and_ghes_hosts() {
919 let mut ing = GithubIngestor::new(
920 "octocat".to_string(),
921 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
922 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
923 );
924 ing.api_base = "https://api.github.com".to_string();
925 assert_eq!(ing.html_base_url(), "https://github.com");
926
927 ing.api_base = "https://github.enterprise.local/api/v3".to_string();
928 assert_eq!(ing.html_base_url(), "https://github.enterprise.local");
929 }
930
931 #[test]
932 fn build_pr_query_merged_and_created_modes() {
933 let mut ing = GithubIngestor::new(
934 "octocat".to_string(),
935 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
936 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
937 );
938 let w = TimeWindow {
939 since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
940 until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
941 };
942
943 ing.mode = "merged".to_string();
944 let merged_q = ing.build_pr_query(&w);
945 assert!(!merged_q.is_empty());
946 assert!(merged_q.contains("is:merged"));
947 assert!(merged_q.contains("author:octocat"));
948
949 ing.mode = "created".to_string();
950 let created_q = ing.build_pr_query(&w);
951 assert!(!created_q.is_empty());
952 assert!(created_q.contains("created:"));
953 assert!(created_q.contains("author:octocat"));
954
955 assert_ne!(merged_q, created_q);
957 }
958
959 #[test]
960 fn build_reviewed_query_contains_user() {
961 let ing = GithubIngestor::new(
962 "octocat".to_string(),
963 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
964 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
965 );
966 let w = TimeWindow {
967 since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
968 until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
969 };
970
971 let q = ing.build_reviewed_query(&w);
972 assert!(!q.is_empty());
973 assert!(q.contains("reviewed-by:octocat"));
974 assert!(q.contains("is:pr"));
975 }
976
977 #[test]
978 fn repo_from_repo_url_invalid_url_returns_fallback() {
979 let (full, html) = repo_from_repo_url("not-a-url-at-all", "https://github.com");
980 assert_eq!(full, "unknown/unknown");
981 assert_eq!(html, "https://github.com");
982
983 let (full2, _) =
985 repo_from_repo_url("https://api.github.com/users/octocat", "https://github.com");
986 assert_eq!(full2, "unknown/unknown");
987 }
988
989 #[test]
990 fn repo_from_repo_url_extracts_or_falls_back() {
991 let (full, html) = repo_from_repo_url(
992 "https://api.github.com/repos/owner/repo",
993 "https://github.com",
994 );
995 assert_eq!(full, "owner/repo");
996 assert_eq!(html, "https://github.com/owner/repo");
997
998 let (full_fallback, html_fallback) = repo_from_repo_url("not-a-url", "https://github.com");
999 assert_eq!(full_fallback, "unknown/unknown");
1000 assert_eq!(html_fallback, "https://github.com");
1001 }
1002
1003 #[test]
1008 fn granularity_next_transitions() {
1009 assert_eq!(Granularity::Month.next(), Granularity::Week);
1010 assert_eq!(Granularity::Week.next(), Granularity::Day);
1011 assert_eq!(Granularity::Day.next(), Granularity::Day);
1012 }
1013
1014 #[test]
1015 fn granularity_day_is_fixed_point() {
1016 let g = Granularity::Day;
1017 assert_eq!(g.next(), Granularity::Day);
1018 assert_eq!(g.next().next(), Granularity::Day);
1019 }
1020
1021 #[test]
1024 fn new_defaults_are_correct() {
1025 let ing = make_ingestor("alice");
1026 assert_eq!(ing.user, "alice");
1027 assert_eq!(ing.mode, "merged");
1028 assert!(!ing.include_reviews);
1029 assert!(ing.fetch_details);
1030 assert_eq!(ing.throttle_ms, 0);
1031 assert!(ing.token.is_none());
1032 assert_eq!(ing.api_base, "https://api.github.com");
1033 assert!(ing.cache.is_none());
1034 }
1035
1036 #[test]
1039 fn with_in_memory_cache_sets_cache() {
1040 let ing = make_ingestor("bob").with_in_memory_cache().unwrap();
1041 assert!(ing.cache.is_some());
1042 }
1043
1044 #[test]
1047 fn api_url_concatenates_path() {
1048 let ing = make_ingestor("octocat");
1049 assert_eq!(
1050 ing.api_url("/search/issues"),
1051 "https://api.github.com/search/issues"
1052 );
1053 }
1054
1055 #[test]
1056 fn api_url_strips_trailing_slash() {
1057 let mut ing = make_ingestor("octocat");
1058 ing.api_base = "https://ghes.local/api/v3/".to_string();
1059 assert_eq!(
1060 ing.api_url("/search/issues"),
1061 "https://ghes.local/api/v3/search/issues"
1062 );
1063 }
1064
1065 #[test]
1068 fn html_base_url_with_port() {
1069 let mut ing = make_ingestor("octocat");
1070 ing.api_base = "https://ghes.local:8443/api/v3".to_string();
1071 assert_eq!(ing.html_base_url(), "https://ghes.local:8443");
1072 }
1073
1074 #[test]
1075 fn html_base_url_invalid_url_falls_back() {
1076 let mut ing = make_ingestor("octocat");
1077 ing.api_base = "not-a-valid-url".to_string();
1078 assert_eq!(ing.html_base_url(), "https://github.com");
1079 }
1080
1081 #[test]
1082 fn html_base_url_http_scheme() {
1083 let mut ing = make_ingestor("octocat");
1084 ing.api_base = "http://internal-ghes.corp/api/v3".to_string();
1085 assert_eq!(ing.html_base_url(), "http://internal-ghes.corp");
1086 }
1087
1088 #[test]
1091 fn github_inclusive_range_single_day_window() {
1092 let window = TimeWindow {
1093 since: NaiveDate::from_ymd_opt(2025, 3, 15).unwrap(),
1094 until: NaiveDate::from_ymd_opt(2025, 3, 16).unwrap(),
1095 };
1096 let (start, end) = github_inclusive_range(&window);
1097 assert_eq!(start, "2025-03-15");
1098 assert_eq!(end, "2025-03-15");
1099 }
1100
1101 #[test]
1102 fn github_inclusive_range_year_boundary() {
1103 let window = TimeWindow {
1104 since: NaiveDate::from_ymd_opt(2024, 12, 1).unwrap(),
1105 until: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1106 };
1107 let (start, end) = github_inclusive_range(&window);
1108 assert_eq!(start, "2024-12-01");
1109 assert_eq!(end, "2024-12-31");
1110 }
1111
1112 #[test]
1113 fn github_inclusive_range_same_day_uses_pred() {
1114 let window = TimeWindow {
1116 since: NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1117 until: NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1118 };
1119 let (start, end) = github_inclusive_range(&window);
1120 assert_eq!(start, "2025-06-01");
1121 assert_eq!(end, "2025-05-31");
1122 }
1123
1124 #[test]
1127 fn build_url_with_params_empty_params() {
1128 let url = build_url_with_params("https://api.github.com/search/issues", &[]).unwrap();
1129 assert_eq!(url.as_str(), "https://api.github.com/search/issues");
1130 }
1131
1132 #[test]
1133 fn build_url_with_params_special_characters() {
1134 let url = build_url_with_params(
1135 "https://api.github.com/search/issues",
1136 &[(
1137 "q",
1138 "author:user+name with spaces&special=chars".to_string(),
1139 )],
1140 )
1141 .unwrap();
1142 assert!(!url.as_str().contains(' '));
1144 let val: String = url
1146 .query_pairs()
1147 .find(|(k, _)| k == "q")
1148 .map(|(_, v)| v.into_owned())
1149 .unwrap();
1150 assert_eq!(val, "author:user+name with spaces&special=chars");
1151 }
1152
1153 #[test]
1154 fn build_url_with_params_invalid_base_url_errors() {
1155 let result = build_url_with_params("not a url", &[]);
1156 assert!(result.is_err());
1157 }
1158
1159 #[test]
1162 fn repo_from_repo_url_ghes_url() {
1163 let (full, html) = repo_from_repo_url(
1169 "https://ghes.corp/api/v3/repos/myorg/myrepo",
1170 "https://ghes.corp",
1171 );
1172 assert_eq!(full, "unknown/unknown");
1175 assert_eq!(html, "https://ghes.corp");
1176 }
1177
1178 #[test]
1179 fn repo_from_repo_url_three_plus_segments_wrong_prefix_falls_back() {
1180 let (full, html) = repo_from_repo_url(
1183 "https://api.github.com/users/octocat/repos",
1184 "https://github.com",
1185 );
1186 assert_eq!(full, "unknown/unknown");
1187 assert_eq!(html, "https://github.com");
1188 }
1189
1190 #[test]
1191 fn repo_from_repo_url_exactly_two_segments_repos_prefix_falls_back() {
1192 let (full, html) = repo_from_repo_url(
1195 "https://api.github.com/repos/owner-only",
1196 "https://github.com",
1197 );
1198 assert_eq!(full, "unknown/unknown");
1199 assert_eq!(html, "https://github.com");
1200 }
1201
1202 #[test]
1203 fn repo_from_repo_url_trailing_slash_in_html_base() {
1204 let (full, html) = repo_from_repo_url(
1205 "https://api.github.com/repos/owner/repo",
1206 "https://github.com/",
1207 );
1208 assert_eq!(full, "owner/repo");
1209 assert_eq!(html, "https://github.com/owner/repo");
1210 }
1211
1212 #[test]
1213 fn repo_from_repo_url_extra_path_segments() {
1214 let (full, html) = repo_from_repo_url(
1216 "https://api.github.com/repos/org/project/pulls",
1217 "https://github.com",
1218 );
1219 assert_eq!(full, "org/project");
1220 assert_eq!(html, "https://github.com/org/project");
1221 }
1222
1223 #[test]
1224 fn repo_from_repo_url_empty_string() {
1225 let (full, html) = repo_from_repo_url("", "https://github.com");
1226 assert_eq!(full, "unknown/unknown");
1227 assert_eq!(html, "https://github.com");
1228 }
1229
1230 #[test]
1233 fn build_pr_query_uses_inclusive_range() {
1234 let ing = make_ingestor("alice");
1235 let w = TimeWindow {
1236 since: NaiveDate::from_ymd_opt(2025, 3, 1).unwrap(),
1237 until: NaiveDate::from_ymd_opt(2025, 3, 15).unwrap(),
1238 };
1239 let q = ing.build_pr_query(&w);
1240 assert!(q.contains("2025-03-01..2025-03-14"), "got: {q}");
1242 }
1243
1244 #[test]
1245 fn build_pr_query_unknown_mode_defaults_to_merged() {
1246 let mut ing = make_ingestor("octocat");
1247 ing.mode = "unknown_mode".to_string();
1248 let w = TimeWindow {
1249 since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1250 until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1251 };
1252 let q = ing.build_pr_query(&w);
1253 assert!(
1254 q.contains("is:merged"),
1255 "unknown mode should fall through to merged"
1256 );
1257 }
1258
1259 #[test]
1262 fn build_reviewed_query_uses_updated_qualifier() {
1263 let ing = make_ingestor("reviewer");
1264 let w = TimeWindow {
1265 since: NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1266 until: NaiveDate::from_ymd_opt(2025, 7, 1).unwrap(),
1267 };
1268 let q = ing.build_reviewed_query(&w);
1269 assert!(
1270 q.contains("updated:"),
1271 "review query should use updated: qualifier"
1272 );
1273 assert!(q.contains("reviewed-by:reviewer"));
1274 }
1275
1276 #[test]
1279 fn search_response_deserializes_from_json() {
1280 let json = r#"{
1281 "total_count": 42,
1282 "incomplete_results": false,
1283 "items": [
1284 {
1285 "id": 1001,
1286 "number": 123,
1287 "title": "Fix bug",
1288 "html_url": "https://github.com/owner/repo/pull/123",
1289 "repository_url": "https://api.github.com/repos/owner/repo",
1290 "pull_request": { "url": "https://api.github.com/repos/owner/repo/pulls/123" },
1291 "created_at": "2025-01-15T10:30:00Z"
1292 }
1293 ]
1294 }"#;
1295
1296 let resp: SearchResponse<SearchIssueItem> = serde_json::from_str(json).unwrap();
1297 assert_eq!(resp.total_count, 42);
1298 assert!(!resp.incomplete_results);
1299 assert_eq!(resp.items.len(), 1);
1300 assert_eq!(resp.items[0].number, 123);
1301 assert_eq!(resp.items[0].title, "Fix bug");
1302 assert!(resp.items[0].pull_request.is_some());
1303 }
1304
1305 #[test]
1306 fn search_response_deserializes_without_pull_request() {
1307 let json = r#"{
1308 "total_count": 1,
1309 "incomplete_results": true,
1310 "items": [
1311 {
1312 "id": 2002,
1313 "number": 456,
1314 "title": "Issue only",
1315 "html_url": "https://github.com/owner/repo/issues/456",
1316 "repository_url": "https://api.github.com/repos/owner/repo",
1317 "created_at": null
1318 }
1319 ]
1320 }"#;
1321
1322 let resp: SearchResponse<SearchIssueItem> = serde_json::from_str(json).unwrap();
1323 assert!(resp.incomplete_results);
1324 assert!(resp.items[0].pull_request.is_none());
1325 assert!(resp.items[0].created_at.is_none());
1326 }
1327
1328 #[test]
1329 fn search_response_empty_items() {
1330 let json = r#"{"total_count": 0, "incomplete_results": false, "items": []}"#;
1331 let resp: SearchResponse<SearchIssueItem> = serde_json::from_str(json).unwrap();
1332 assert_eq!(resp.total_count, 0);
1333 assert!(resp.items.is_empty());
1334 }
1335
1336 #[test]
1339 fn pr_details_deserializes_from_json() {
1340 let json = r#"{
1341 "title": "Add feature",
1342 "created_at": "2025-01-10T08:00:00Z",
1343 "merged_at": "2025-01-12T14:30:00Z",
1344 "additions": 150,
1345 "deletions": 30,
1346 "changed_files": 5,
1347 "base": {
1348 "repo": {
1349 "full_name": "owner/repo",
1350 "html_url": "https://github.com/owner/repo",
1351 "private": false
1352 }
1353 }
1354 }"#;
1355
1356 let details: PullRequestDetails = serde_json::from_str(json).unwrap();
1357 assert_eq!(details.title, "Add feature");
1358 assert!(details.merged_at.is_some());
1359 assert_eq!(details.additions, 150);
1360 assert_eq!(details.deletions, 30);
1361 assert_eq!(details.changed_files, 5);
1362 assert!(!details.base.repo.private_field);
1363 assert_eq!(details.base.repo.full_name, "owner/repo");
1364 }
1365
1366 #[test]
1367 fn pr_details_private_repo() {
1368 let json = r#"{
1369 "title": "Secret fix",
1370 "created_at": "2025-01-10T08:00:00Z",
1371 "merged_at": null,
1372 "additions": 10,
1373 "deletions": 5,
1374 "changed_files": 1,
1375 "base": {
1376 "repo": {
1377 "full_name": "corp/secret",
1378 "html_url": "https://github.com/corp/secret",
1379 "private": true
1380 }
1381 }
1382 }"#;
1383
1384 let details: PullRequestDetails = serde_json::from_str(json).unwrap();
1385 assert!(details.base.repo.private_field);
1386 assert!(details.merged_at.is_none());
1387 }
1388
1389 #[test]
1392 fn pr_review_deserializes_from_json() {
1393 let json = r#"{
1394 "id": 99001,
1395 "state": "APPROVED",
1396 "submitted_at": "2025-02-01T12:00:00Z",
1397 "user": { "login": "reviewer42" }
1398 }"#;
1399
1400 let review: PullRequestReview = serde_json::from_str(json).unwrap();
1401 assert_eq!(review.id, 99001);
1402 assert_eq!(review.state, "APPROVED");
1403 assert!(review.submitted_at.is_some());
1404 assert_eq!(review.user.login, "reviewer42");
1405 }
1406
1407 #[test]
1408 fn pr_review_with_null_submitted_at() {
1409 let json = r#"{
1410 "id": 99002,
1411 "state": "PENDING",
1412 "submitted_at": null,
1413 "user": { "login": "pending-reviewer" }
1414 }"#;
1415
1416 let review: PullRequestReview = serde_json::from_str(json).unwrap();
1417 assert!(review.submitted_at.is_none());
1418 }
1419
1420 #[test]
1421 fn recorded_github_search_payload_deserializes_and_converts() {
1422 let search_payload = serde_json::json!({
1423 "total_count": 1,
1424 "incomplete_results": false,
1425 "items": [
1426 {
1427 "url": "https://api.github.com/repos/octocat/Hello-World/issues/1347",
1428 "repository_url": "https://api.github.com/repos/octocat/Hello-World",
1429 "labels_url": "https://api.github.com/repos/octocat/Hello-World/issues/1347/labels{/name}",
1430 "comments_url": "https://api.github.com/repos/octocat/Hello-World/issues/1347/comments",
1431 "events_url": "https://api.github.com/repos/octocat/Hello-World/issues/1347/events",
1432 "html_url": "https://github.com/octocat/Hello-World/pull/1347",
1433 "id": 1000001347,
1434 "node_id": "PR_kwDOABCD",
1435 "number": 1347,
1436 "state": "closed",
1437 "title": "Reduce deploy rollback toil",
1438 "user": {
1439 "login": "alice",
1440 "id": 100,
1441 "node_id": "MDQ6VXNlcjEwMA==",
1442 "avatar_url": "https://github.com/images/error/alice_happy.gif",
1443 "gravatar_id": "",
1444 "url": "https://api.github.com/users/alice",
1445 "html_url": "https://github.com/alice",
1446 "type": "User",
1447 "site_admin": false
1448 },
1449 "labels": [
1450 {
1451 "id": 208045946,
1452 "node_id": "MDU6TGFiZWwyMDgwNDU5NDY=",
1453 "url": "https://api.github.com/repos/octocat/Hello-World/labels/reliability",
1454 "name": "reliability",
1455 "description": "Reliability work",
1456 "color": "0e8a16",
1457 "default": false
1458 }
1459 ],
1460 "pull_request": {
1461 "url": "https://api.github.com/repos/octocat/Hello-World/pulls/1347",
1462 "html_url": "https://github.com/octocat/Hello-World/pull/1347",
1463 "diff_url": "https://github.com/octocat/Hello-World/pull/1347.diff",
1464 "patch_url": "https://github.com/octocat/Hello-World/pull/1347.patch"
1465 },
1466 "closed_at": "2025-01-18T16:00:00Z",
1467 "created_at": "2025-01-10T09:00:00Z",
1468 "updated_at": "2025-01-18T16:00:00Z",
1469 "author_association": "MEMBER",
1470 "score": 1.0
1471 }
1472 ]
1473 });
1474 let details_payload = serde_json::json!({
1475 "url": "https://api.github.com/repos/octocat/Hello-World/pulls/1347",
1476 "id": 2000001347,
1477 "node_id": "PR_kwDOABCD",
1478 "html_url": "https://github.com/octocat/Hello-World/pull/1347",
1479 "diff_url": "https://github.com/octocat/Hello-World/pull/1347.diff",
1480 "patch_url": "https://github.com/octocat/Hello-World/pull/1347.patch",
1481 "issue_url": "https://api.github.com/repos/octocat/Hello-World/issues/1347",
1482 "number": 1347,
1483 "state": "closed",
1484 "locked": false,
1485 "title": "Reduce deploy rollback toil",
1486 "user": { "login": "alice", "id": 100 },
1487 "body": "Add preflight checks and rollback runbook links.",
1488 "created_at": "2025-01-10T09:00:00Z",
1489 "updated_at": "2025-01-18T16:00:00Z",
1490 "closed_at": "2025-01-18T16:00:00Z",
1491 "merged_at": "2025-01-18T16:00:00Z",
1492 "merge_commit_sha": "ecdd80bb57125d7ba9641ffaa4d7d2c19d3f3091",
1493 "assignee": null,
1494 "assignees": [],
1495 "requested_reviewers": [],
1496 "requested_teams": [],
1497 "labels": [],
1498 "head": {
1499 "label": "alice:rollback-preflight",
1500 "ref": "rollback-preflight",
1501 "sha": "bbcd538c8e72b8c175046e27cc8f907076331401",
1502 "user": { "login": "alice", "id": 100 },
1503 "repo": {
1504 "id": 1296269,
1505 "name": "Hello-World",
1506 "full_name": "octocat/Hello-World",
1507 "private": false,
1508 "html_url": "https://github.com/octocat/Hello-World"
1509 }
1510 },
1511 "base": {
1512 "label": "octocat:main",
1513 "ref": "main",
1514 "sha": "bbcd538c8e72b8c175046e27cc8f907076331402",
1515 "user": { "login": "octocat", "id": 1 },
1516 "repo": {
1517 "id": 1296269,
1518 "name": "Hello-World",
1519 "full_name": "octocat/Hello-World",
1520 "private": true,
1521 "html_url": "https://github.com/octocat/Hello-World"
1522 }
1523 },
1524 "draft": false,
1525 "merged": true,
1526 "mergeable": true,
1527 "rebaseable": true,
1528 "mergeable_state": "clean",
1529 "merged_by": { "login": "octocat", "id": 1 },
1530 "comments": 2,
1531 "review_comments": 1,
1532 "commits": 3,
1533 "additions": 144,
1534 "deletions": 18,
1535 "changed_files": 6
1536 });
1537 let reviews_payload = serde_json::json!([
1538 {
1539 "id": 99001,
1540 "node_id": "MDE3OlB1bGxSZXF1ZXN0UmV2aWV3OTkwMDE=",
1541 "user": {
1542 "login": "alice",
1543 "id": 100,
1544 "node_id": "MDQ6VXNlcjEwMA==",
1545 "avatar_url": "https://github.com/images/error/alice_happy.gif",
1546 "gravatar_id": "",
1547 "url": "https://api.github.com/users/alice",
1548 "html_url": "https://github.com/alice",
1549 "type": "User",
1550 "site_admin": false
1551 },
1552 "body": "Verified the rollback path.",
1553 "state": "APPROVED",
1554 "html_url": "https://github.com/octocat/Hello-World/pull/1347#pullrequestreview-99001",
1555 "pull_request_url": "https://api.github.com/repos/octocat/Hello-World/pulls/1347",
1556 "_links": {
1557 "html": { "href": "https://github.com/octocat/Hello-World/pull/1347#pullrequestreview-99001" },
1558 "pull_request": { "href": "https://api.github.com/repos/octocat/Hello-World/pulls/1347" }
1559 },
1560 "submitted_at": "2025-01-18T15:30:00Z",
1561 "commit_id": "ecdd80bb57125d7ba9641ffaa4d7d2c19d3f3091",
1562 "author_association": "MEMBER"
1563 },
1564 {
1565 "id": 99002,
1566 "user": { "login": "bob" },
1567 "state": "COMMENTED",
1568 "submitted_at": "2025-01-18T15:45:00Z"
1569 },
1570 {
1571 "id": 99003,
1572 "user": { "login": "alice" },
1573 "state": "PENDING",
1574 "submitted_at": null
1575 }
1576 ]);
1577
1578 let search: SearchResponse<SearchIssueItem> =
1579 serde_json::from_value(search_payload.clone()).unwrap();
1580 assert_eq!(search.total_count, 1);
1581 assert!(!search.incomplete_results);
1582 assert_eq!(
1583 search.items[0].repository_url,
1584 "https://api.github.com/repos/octocat/Hello-World"
1585 );
1586 let pr_url = search.items[0].pull_request.as_ref().unwrap().url.clone();
1587
1588 let details: PullRequestDetails = serde_json::from_value(details_payload).unwrap();
1589 assert_eq!(details.additions, 144);
1590 assert_eq!(details.deletions, 18);
1591 assert_eq!(details.changed_files, 6);
1592 assert!(details.base.repo.private_field);
1593
1594 let mut ing = make_ingestor("alice").with_in_memory_cache().unwrap();
1595 ing.mode = "merged".to_string();
1596 ing.cache
1597 .as_ref()
1598 .unwrap()
1599 .set(&CacheKey::pr_details(&pr_url), &details)
1600 .unwrap();
1601
1602 let client = Client::new();
1603 let pr_events = ing.items_to_pr_events(&client, search.items).unwrap();
1604 assert_eq!(pr_events.len(), 1);
1605 let pr_event = &pr_events[0];
1606 assert_eq!(pr_event.kind, EventKind::PullRequest);
1607 assert_eq!(pr_event.actor.login, "alice");
1608 assert_eq!(pr_event.repo.full_name, "octocat/Hello-World");
1609 assert_eq!(pr_event.repo.visibility, RepoVisibility::Private);
1610 assert_eq!(pr_event.source.system, SourceSystem::Github);
1611 assert_eq!(pr_event.source.url.as_deref(), Some(pr_url.as_str()));
1612 assert_eq!(pr_event.source.opaque_id.as_deref(), Some("1000001347"));
1613
1614 if let EventPayload::PullRequest(pr) = &pr_event.payload {
1615 assert_eq!(pr.number, 1347);
1616 assert_eq!(pr.title, "Reduce deploy rollback toil");
1617 assert_eq!(pr.state, PullRequestState::Merged);
1618 assert_eq!(
1619 pr.merged_at,
1620 Some("2025-01-18T16:00:00Z".parse::<DateTime<Utc>>().unwrap())
1621 );
1622 assert_eq!(pr.additions, Some(144));
1623 assert_eq!(pr.deletions, Some(18));
1624 assert_eq!(pr.changed_files, Some(6));
1625 } else {
1626 panic!("expected PullRequest payload");
1627 }
1628
1629 let reviews: Vec<PullRequestReview> = serde_json::from_value(reviews_payload).unwrap();
1630 ing.cache
1631 .as_ref()
1632 .unwrap()
1633 .set(&CacheKey::pr_reviews(&pr_url, 1), &reviews)
1634 .unwrap();
1635 let review_search: SearchResponse<SearchIssueItem> =
1636 serde_json::from_value(search_payload).unwrap();
1637
1638 let review_events = ing
1639 .items_to_review_events(&client, review_search.items)
1640 .unwrap();
1641 assert_eq!(review_events.len(), 1);
1642 let review_event = &review_events[0];
1643 assert_eq!(review_event.kind, EventKind::Review);
1644 assert_eq!(review_event.actor.login, "alice");
1645 assert_eq!(review_event.repo.full_name, "octocat/Hello-World");
1646 assert_eq!(review_event.source.url.as_deref(), Some(pr_url.as_str()));
1647 assert_eq!(review_event.source.opaque_id.as_deref(), Some("99001"));
1648
1649 if let EventPayload::Review(review) = &review_event.payload {
1650 assert_eq!(review.pull_number, 1347);
1651 assert_eq!(review.pull_title, "Reduce deploy rollback toil");
1652 assert_eq!(review.state, "APPROVED");
1653 assert_eq!(
1654 review.submitted_at,
1655 "2025-01-18T15:30:00Z".parse::<DateTime<Utc>>().unwrap()
1656 );
1657 } else {
1658 panic!("expected Review payload");
1659 }
1660 }
1661
1662 #[test]
1665 fn items_to_pr_events_without_details_produces_events() {
1666 let mut ing = make_ingestor("alice");
1667 ing.fetch_details = false;
1668
1669 let client = Client::new();
1670 let items = vec![
1671 make_search_item(10, "org/repo-a", true),
1672 make_search_item(20, "org/repo-b", true),
1673 ];
1674
1675 let events = ing.items_to_pr_events(&client, items).unwrap();
1676 assert_eq!(events.len(), 2);
1677
1678 assert_eq!(events[0].kind, EventKind::PullRequest);
1679 assert_eq!(events[0].actor.login, "alice");
1680 assert_eq!(events[0].repo.full_name, "org/repo-a");
1681 assert_eq!(events[0].links.len(), 1);
1682 assert_eq!(events[0].links[0].label, "pr");
1683
1684 assert_eq!(events[1].repo.full_name, "org/repo-b");
1685 }
1686
1687 #[test]
1688 fn items_to_pr_events_skips_items_without_pr_ref() {
1689 let mut ing = make_ingestor("bob");
1690 ing.fetch_details = false;
1691
1692 let client = Client::new();
1693 let items = vec![
1694 make_search_item(1, "org/repo", true),
1695 make_search_item(2, "org/repo", false), make_search_item(3, "org/repo", true),
1697 ];
1698
1699 let events = ing.items_to_pr_events(&client, items).unwrap();
1700 assert_eq!(
1701 events.len(),
1702 2,
1703 "items without pull_request should be skipped"
1704 );
1705 }
1706
1707 #[test]
1708 fn items_to_pr_events_empty_input() {
1709 let mut ing = make_ingestor("carol");
1710 ing.fetch_details = false;
1711 let client = Client::new();
1712 let events = ing.items_to_pr_events(&client, vec![]).unwrap();
1713 assert!(events.is_empty());
1714 }
1715
1716 #[test]
1717 fn items_to_pr_events_sets_source_system() {
1718 let mut ing = make_ingestor("dave");
1719 ing.fetch_details = false;
1720
1721 let client = Client::new();
1722 let items = vec![make_search_item(42, "org/repo", true)];
1723 let events = ing.items_to_pr_events(&client, items).unwrap();
1724
1725 assert_eq!(events[0].source.system, SourceSystem::Github);
1726 assert!(events[0].source.url.is_some());
1727 assert!(events[0].source.opaque_id.is_some());
1728 }
1729
1730 #[test]
1731 fn items_to_pr_events_merged_mode_uses_created_at_as_occurred() {
1732 let mut ing = make_ingestor("eve");
1733 ing.fetch_details = false;
1734 ing.mode = "merged".to_string();
1735
1736 let client = Client::new();
1737 let mut item = make_search_item(1, "org/repo", true);
1738 let created = DateTime::parse_from_rfc3339("2025-03-15T10:00:00Z")
1739 .unwrap()
1740 .with_timezone(&Utc);
1741 item.created_at = Some(created);
1742
1743 let events = ing.items_to_pr_events(&client, vec![item]).unwrap();
1744 assert_eq!(events[0].occurred_at, created);
1746 }
1747
1748 #[test]
1749 fn items_to_pr_events_created_mode_uses_created_at() {
1750 let mut ing = make_ingestor("frank");
1751 ing.fetch_details = false;
1752 ing.mode = "created".to_string();
1753
1754 let client = Client::new();
1755 let mut item = make_search_item(1, "org/repo", true);
1756 let created = DateTime::parse_from_rfc3339("2025-04-01T12:00:00Z")
1757 .unwrap()
1758 .with_timezone(&Utc);
1759 item.created_at = Some(created);
1760
1761 let events = ing.items_to_pr_events(&client, vec![item]).unwrap();
1762 assert_eq!(events[0].occurred_at, created);
1763 }
1764
1765 #[test]
1766 fn items_to_pr_events_without_details_has_unknown_visibility() {
1767 let mut ing = make_ingestor("grace");
1768 ing.fetch_details = false;
1769
1770 let client = Client::new();
1771 let items = vec![make_search_item(1, "org/repo", true)];
1772 let events = ing.items_to_pr_events(&client, items).unwrap();
1773
1774 assert_eq!(events[0].repo.visibility, RepoVisibility::Unknown);
1775 }
1776
1777 #[test]
1778 fn items_to_pr_events_without_details_state_is_unknown() {
1779 let mut ing = make_ingestor("heidi");
1780 ing.fetch_details = false;
1781
1782 let client = Client::new();
1783 let items = vec![make_search_item(1, "org/repo", true)];
1784 let events = ing.items_to_pr_events(&client, items).unwrap();
1785
1786 if let EventPayload::PullRequest(ref pr) = events[0].payload {
1787 assert_eq!(pr.state, PullRequestState::Unknown);
1788 assert!(pr.merged_at.is_none());
1789 assert!(pr.additions.is_none());
1790 assert!(pr.deletions.is_none());
1791 assert!(pr.changed_files.is_none());
1792 } else {
1793 panic!("expected PullRequest payload");
1794 }
1795 }
1796
1797 #[test]
1798 fn items_to_pr_events_deterministic_ids() {
1799 let mut ing = make_ingestor("ivan");
1800 ing.fetch_details = false;
1801
1802 let client = Client::new();
1803 let items1 = vec![make_search_item(42, "org/repo", true)];
1804 let items2 = vec![make_search_item(42, "org/repo", true)];
1805
1806 let events1 = ing.items_to_pr_events(&client, items1).unwrap();
1807 let events2 = ing.items_to_pr_events(&client, items2).unwrap();
1808 assert_eq!(
1809 events1[0].id, events2[0].id,
1810 "same inputs should produce same event ID"
1811 );
1812 }
1813
1814 #[test]
1815 fn items_to_pr_events_different_prs_get_different_ids() {
1816 let mut ing = make_ingestor("judy");
1817 ing.fetch_details = false;
1818
1819 let client = Client::new();
1820 let items = vec![
1821 make_search_item(1, "org/repo", true),
1822 make_search_item(2, "org/repo", true),
1823 ];
1824
1825 let events = ing.items_to_pr_events(&client, items).unwrap();
1826 assert_ne!(events[0].id, events[1].id);
1827 }
1828
1829 #[test]
1832 fn items_to_review_events_skips_items_without_pr_ref() {
1833 let ing = make_ingestor("reviewer");
1834 let client = Client::new();
1835
1836 let items = vec![make_search_item(1, "org/repo", false)];
1839
1840 let events = ing.items_to_review_events(&client, items).unwrap();
1841 assert!(events.is_empty());
1842 }
1843
1844 #[test]
1847 fn ingest_rejects_since_equals_until() {
1848 let date = NaiveDate::from_ymd_opt(2025, 6, 1).unwrap();
1849 let ing = GithubIngestor::new("user".to_string(), date, date);
1850 let err = ing.ingest().unwrap_err();
1851 assert!(
1852 err.to_string().contains("since must be < until"),
1853 "got: {err}"
1854 );
1855 }
1856
1857 #[test]
1858 fn ingest_rejects_since_after_until() {
1859 let ing = GithubIngestor::new(
1860 "user".to_string(),
1861 NaiveDate::from_ymd_opt(2025, 6, 15).unwrap(),
1862 NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1863 );
1864 let err = ing.ingest().unwrap_err();
1865 assert!(err.to_string().contains("since must be < until"));
1866 }
1867
1868 #[test]
1871 fn with_cache_then_in_memory_cache_overrides() {
1872 let temp = tempfile::tempdir().unwrap();
1873 let ing = make_ingestor("octocat")
1874 .with_cache(temp.path())
1875 .unwrap()
1876 .with_in_memory_cache()
1877 .unwrap();
1878 assert!(ing.cache.is_some());
1879 }
1880
1881 #[test]
1882 fn multiple_with_cache_calls_succeed() {
1883 let temp1 = tempfile::tempdir().unwrap();
1884 let temp2 = tempfile::tempdir().unwrap();
1885 let ing = make_ingestor("octocat")
1886 .with_cache(temp1.path())
1887 .unwrap()
1888 .with_cache(temp2.path())
1889 .unwrap();
1890 assert!(ing.cache.is_some());
1891 }
1892
1893 fn arb_naive_date() -> impl Strategy<Value = NaiveDate> {
1896 (2000i32..2030, 1u32..13, 1u32..29)
1897 .prop_map(|(y, m, d)| NaiveDate::from_ymd_opt(y, m, d).unwrap())
1898 }
1899
1900 fn arb_time_window() -> impl Strategy<Value = TimeWindow> {
1901 (arb_naive_date(), 1u32..366).prop_map(|(since, delta)| {
1902 let until = since + chrono::Duration::days(delta as i64);
1903 TimeWindow { since, until }
1904 })
1905 }
1906
1907 proptest! {
1908 #[test]
1909 fn prop_github_inclusive_range_start_lte_end(w in arb_time_window()) {
1910 let (start, end) = github_inclusive_range(&w);
1911 prop_assert!(start <= end, "start={start} > end={end}");
1912 }
1913
1914 #[test]
1915 fn prop_github_inclusive_range_start_matches_since(w in arb_time_window()) {
1916 let (start, _) = github_inclusive_range(&w);
1917 let expected = w.since.format("%Y-%m-%d").to_string();
1918 prop_assert_eq!(start, expected);
1919 }
1920
1921 #[test]
1922 fn prop_github_inclusive_range_end_is_until_minus_one(w in arb_time_window()) {
1923 let (_, end) = github_inclusive_range(&w);
1924 let expected_date = w.until.pred_opt().unwrap_or(w.until);
1925 let expected = expected_date.format("%Y-%m-%d").to_string();
1926 prop_assert_eq!(end, expected);
1927 }
1928
1929 #[test]
1930 fn prop_build_url_with_params_produces_valid_url(
1931 key in "[a-z]{1,10}",
1932 val in "[a-zA-Z0-9 ]{0,50}",
1933 ) {
1934 let result = build_url_with_params(
1935 "https://api.github.com/search/issues",
1936 &[(&key, val.clone())],
1937 );
1938 prop_assert!(result.is_ok());
1939 let url = result.unwrap();
1940 prop_assert!(!url.as_str().contains(' '));
1942 let found: String = url.query_pairs()
1944 .find(|(k, _)| k.as_ref() == key)
1945 .map(|(_, v)| v.into_owned())
1946 .unwrap();
1947 prop_assert_eq!(found, val);
1948 }
1949
1950 #[test]
1951 fn prop_repo_from_repo_url_never_panics(
1952 owner in "[a-zA-Z0-9][a-zA-Z0-9_-]{0,19}",
1953 repo in "[a-zA-Z0-9][a-zA-Z0-9_.-]{0,29}",
1954 ) {
1955 let api_url = format!("https://api.github.com/repos/{}/{}", owner, repo);
1956 let (full, html) = repo_from_repo_url(&api_url, "https://github.com");
1957 let expected_prefix = format!("{}/", owner);
1958 prop_assert!(full.starts_with(&expected_prefix));
1959 prop_assert!(html.starts_with("https://github.com/"));
1960 }
1961
1962 #[test]
1963 fn prop_repo_from_repo_url_arbitrary_strings_never_panic(
1964 s in ".*",
1965 ) {
1966 let _ = repo_from_repo_url(&s, "https://github.com");
1968 }
1969
1970 #[test]
1971 fn prop_build_pr_query_contains_user(
1972 user in "[a-zA-Z][a-zA-Z0-9-]{0,19}",
1973 ) {
1974 let ing = GithubIngestor::new(
1975 user.clone(),
1976 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1977 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1978 );
1979 let w = TimeWindow {
1980 since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1981 until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1982 };
1983 let q = ing.build_pr_query(&w);
1984 let expected_author = format!("author:{}", user);
1985 prop_assert!(q.contains(&expected_author));
1986 prop_assert!(q.contains("is:pr"));
1987 }
1988
1989 #[test]
1990 fn prop_build_reviewed_query_contains_user(
1991 user in "[a-zA-Z][a-zA-Z0-9-]{0,19}",
1992 ) {
1993 let ing = GithubIngestor::new(
1994 user.clone(),
1995 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1996 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1997 );
1998 let w = TimeWindow {
1999 since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
2000 until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
2001 };
2002 let q = ing.build_reviewed_query(&w);
2003 let expected_reviewer = format!("reviewed-by:{}", user);
2004 prop_assert!(q.contains(&expected_reviewer));
2005 }
2006
2007 #[test]
2008 fn prop_api_url_preserves_path(
2009 segment in "[a-z]{1,15}",
2010 ) {
2011 let ing = make_ingestor("test");
2012 let path = format!("/{}", segment);
2013 let url = ing.api_url(&path);
2014 prop_assert!(url.ends_with(&path));
2015 prop_assert!(url.starts_with("https://api.github.com"));
2016 }
2017
2018 #[test]
2019 fn prop_html_base_url_always_returns_valid_string(
2020 base in "(https?://[a-z]{3,15}\\.[a-z]{2,5}(/[a-z]+)*)",
2021 ) {
2022 let mut ing = make_ingestor("test");
2023 ing.api_base = base;
2024 let result = ing.html_base_url();
2025 prop_assert!(!result.is_empty());
2026 prop_assert!(result.starts_with("http"));
2027 }
2028 }
2029}