1use anyhow::{Context, Result, anyhow};
7use chrono::{DateTime, NaiveDate, Utc};
8use reqwest::blocking::Client;
9use serde::de::DeserializeOwned;
10use serde::{Deserialize, Serialize};
11use shiplog_cache::{ApiCache, CacheKey, CacheLookup};
12use shiplog_coverage::{day_windows, month_windows, week_windows, window_len_days};
13use shiplog_ids::{EventId, RunId};
14use shiplog_ports::{IngestOutput, Ingestor};
15use shiplog_schema::coverage::{Completeness, CoverageManifest, CoverageSlice, TimeWindow};
16use shiplog_schema::event::{
17 Actor, EventEnvelope, EventKind, EventPayload, Link, PullRequestEvent, PullRequestState,
18 RepoRef, RepoVisibility, ReviewEvent, SourceRef, SourceSystem,
19};
20use shiplog_schema::freshness::{FreshnessStatus, SourceFreshness};
21use std::path::PathBuf;
22use std::sync::atomic::{AtomicU64, Ordering};
23use std::thread::sleep;
24use std::time::Duration;
25use url::Url;
26
27#[derive(Debug)]
28pub struct GithubIngestor {
29 pub user: String,
30 pub since: NaiveDate,
31 pub until: NaiveDate,
32 pub mode: String,
34 pub include_reviews: bool,
35 pub fetch_details: bool,
36 pub throttle_ms: u64,
37 pub token: Option<String>,
38 pub api_base: String,
40 pub cache: Option<ApiCache>,
42 cache_hits: AtomicU64,
48 cache_misses: AtomicU64,
53 cache_stale_hits: AtomicU64,
56}
57
58impl GithubIngestor {
59 pub fn new(user: String, since: NaiveDate, until: NaiveDate) -> Self {
77 Self {
78 user,
79 since,
80 until,
81 mode: "merged".to_string(),
82 include_reviews: false,
83 fetch_details: true,
84 throttle_ms: 0,
85 token: None,
86 api_base: "https://api.github.com".to_string(),
87 cache: None,
88 cache_hits: AtomicU64::new(0),
89 cache_misses: AtomicU64::new(0),
90 cache_stale_hits: AtomicU64::new(0),
91 }
92 }
93
94 pub fn with_cache(mut self, cache_dir: impl Into<PathBuf>) -> Result<Self> {
110 let cache_path = cache_dir.into().join("github-api-cache.db");
111 if let Some(parent) = cache_path.parent() {
112 std::fs::create_dir_all(parent)
113 .with_context(|| format!("create GitHub cache directory {parent:?}"))?;
114 }
115 let cache = ApiCache::open(&cache_path)
116 .with_context(|| format!("open GitHub API cache at {cache_path:?}"))?;
117 self.cache = Some(cache);
118 Ok(self)
119 }
120
121 pub fn with_in_memory_cache(mut self) -> Result<Self> {
137 let cache = ApiCache::open_in_memory().context("open in-memory API cache")?;
138 self.cache = Some(cache);
139 Ok(self)
140 }
141
142 fn html_base_url(&self) -> String {
143 if let Ok(u) = Url::parse(&self.api_base) {
144 let scheme = u.scheme();
145 if let Some(host) = u.host_str() {
146 if host == "api.github.com" {
147 return "https://github.com".to_string();
148 }
149 let port_suffix = u.port().map(|p| format!(":{p}")).unwrap_or_default();
150 return format!("{scheme}://{host}{port_suffix}");
151 }
152 }
153 "https://github.com".to_string()
154 }
155
156 #[mutants::skip]
157 fn client(&self) -> Result<Client> {
158 Client::builder()
159 .user_agent(concat!("shiplog/", env!("CARGO_PKG_VERSION")))
160 .build()
161 .context("build reqwest client")
162 }
163
164 #[mutants::skip]
165 fn api_url(&self, path: &str) -> String {
166 format!("{}{}", self.api_base.trim_end_matches('/'), path)
167 }
168
169 #[mutants::skip]
170 fn throttle(&self) {
171 if self.throttle_ms > 0 {
172 sleep(Duration::from_millis(self.throttle_ms));
173 }
174 }
175
176 #[mutants::skip]
177 fn get_json<T: DeserializeOwned>(
178 &self,
179 client: &Client,
180 url: &str,
181 params: &[(&str, String)],
182 ) -> Result<T> {
183 let request_url = build_url_with_params(url, params)?;
184 let request_url_for_err = request_url.as_str().to_string();
185
186 let mut req = client
187 .get(request_url)
188 .header("Accept", "application/vnd.github+json");
189 req = req.header("X-GitHub-Api-Version", "2022-11-28");
190 if let Some(t) = &self.token {
191 req = req.bearer_auth(t);
192 }
193 let resp = req
194 .send()
195 .with_context(|| format!("GET {request_url_for_err}"))?;
196 self.throttle();
197
198 if !resp.status().is_success() {
199 let status = resp.status();
200 let body = resp.text().unwrap_or_default();
201 return Err(anyhow!("GitHub API error {status}: {body}"));
202 }
203
204 resp.json::<T>()
205 .with_context(|| format!("parse json from {request_url_for_err}"))
206 }
207}
208
209impl Ingestor for GithubIngestor {
210 #[mutants::skip]
211 fn ingest(&self) -> Result<IngestOutput> {
212 if self.since >= self.until {
213 return Err(anyhow!("since must be < until"));
214 }
215
216 let client = self.client().context("create GitHub API client")?;
217 let run_id = RunId::now("shiplog");
218 let mut slices: Vec<CoverageSlice> = Vec::new();
219 let mut warnings: Vec<String> = Vec::new();
220 let mut completeness = Completeness::Complete;
221
222 let mut events: Vec<EventEnvelope> = Vec::new();
223
224 let pr_query_builder = |w: &TimeWindow| self.build_pr_query(w);
226 let (pr_items, pr_slices, pr_partial) =
227 self.collect_search_items(&client, pr_query_builder, self.since, self.until, "prs")?;
228 slices.extend(pr_slices);
229 if pr_partial {
230 completeness = Completeness::Partial;
231 }
232
233 events.extend(self.items_to_pr_events(&client, pr_items)?);
234
235 if self.include_reviews {
237 warnings.push("Reviews are collected via search + per-PR review fetch; treat as best-effort coverage.".to_string());
238 let review_query_builder = |w: &TimeWindow| self.build_reviewed_query(w);
239 let (review_items, review_slices, review_partial) = self.collect_search_items(
240 &client,
241 review_query_builder,
242 self.since,
243 self.until,
244 "reviews",
245 )?;
246 slices.extend(review_slices);
247 if review_partial {
248 completeness = Completeness::Partial;
249 }
250 events.extend(self.items_to_review_events(&client, review_items)?);
251 }
252
253 events.sort_by_key(|e| e.occurred_at);
255
256 let fetched_at = Utc::now();
257 let cov = CoverageManifest {
258 run_id,
259 generated_at: fetched_at,
260 user: self.user.clone(),
261 window: TimeWindow {
262 since: self.since,
263 until: self.until,
264 },
265 mode: self.mode.clone(),
266 sources: vec!["github".to_string()],
267 slices,
268 warnings,
269 completeness,
270 };
271
272 let hits = self.cache_hits.load(Ordering::Relaxed);
281 let misses = self.cache_misses.load(Ordering::Relaxed);
282 let stale_hits = self.cache_stale_hits.load(Ordering::Relaxed);
283 let (status, reason) =
284 github_freshness_status(self.cache.is_some(), hits, misses, stale_hits);
285 let freshness = vec![SourceFreshness {
286 source: "github".to_string(),
287 status,
288 cache_hits: hits,
289 cache_misses: misses,
290 fetched_at: Some(fetched_at),
291 reason,
292 }];
293
294 Ok(IngestOutput {
295 events,
296 coverage: cov,
297 freshness,
298 })
299 }
300}
301
302impl GithubIngestor {
303 fn build_pr_query(&self, w: &TimeWindow) -> String {
304 let (start, end) = github_inclusive_range(w);
305 match self.mode.as_str() {
306 "created" => format!("is:pr author:{} created:{}..{}", self.user, start, end),
307 _ => format!(
308 "is:pr is:merged author:{} merged:{}..{}",
309 self.user, start, end
310 ),
311 }
312 }
313
314 fn build_reviewed_query(&self, w: &TimeWindow) -> String {
315 let (start, end) = github_inclusive_range(w);
318 format!("is:pr reviewed-by:{} updated:{}..{}", self.user, start, end)
319 }
320
321 #[mutants::skip]
328 fn collect_search_items<F>(
329 &self,
330 client: &Client,
331 make_query: F,
332 since: NaiveDate,
333 until: NaiveDate,
334 label: &str,
335 ) -> Result<(Vec<SearchIssueItem>, Vec<CoverageSlice>, bool)>
336 where
337 F: Fn(&TimeWindow) -> String,
338 {
339 let mut slices: Vec<CoverageSlice> = Vec::new();
340 let mut items: Vec<SearchIssueItem> = Vec::new();
341 let mut partial = false;
342
343 for w in month_windows(since, until) {
344 let (mut i, mut s, p) =
345 self.collect_window(client, &make_query, &w, Granularity::Month, label)?;
346 items.append(&mut i);
347 slices.append(&mut s);
348 partial |= p;
349 }
350
351 Ok((items, slices, partial))
352 }
353
354 #[mutants::skip]
355 fn collect_window<F>(
356 &self,
357 client: &Client,
358 make_query: &F,
359 window: &TimeWindow,
360 gran: Granularity,
361 label: &str,
362 ) -> Result<(Vec<SearchIssueItem>, Vec<CoverageSlice>, bool)>
363 where
364 F: Fn(&TimeWindow) -> String,
365 {
366 if window.since >= window.until {
367 return Ok((vec![], vec![], false));
368 }
369
370 let query = make_query(window);
371 let (meta_total, meta_incomplete) = self.search_meta(client, &query)?;
372 let mut slices = vec![CoverageSlice {
373 window: window.clone(),
374 query: query.clone(),
375 total_count: meta_total,
376 fetched: 0,
377 incomplete_results: Some(meta_incomplete),
378 notes: vec![format!("probe:{label}")],
379 }];
380
381 let need_subdivide = meta_total > 1000 || meta_incomplete;
383 let can_subdivide = gran != Granularity::Day && window_len_days(window) > 1;
384
385 if need_subdivide && can_subdivide {
386 slices[0].notes.push(format!(
387 "subdivide:{}",
388 if meta_total > 1000 {
389 "cap"
390 } else {
391 "incomplete"
392 }
393 ));
394
395 let mut out_items = Vec::new();
396 let mut out_slices = slices;
397 let mut partial = false;
398
399 let subs = match gran {
400 Granularity::Month => week_windows(window.since, window.until),
401 Granularity::Week => day_windows(window.since, window.until),
402 Granularity::Day => vec![],
403 };
404
405 for sub in subs {
406 let (mut i, mut s, p) =
407 self.collect_window(client, make_query, &sub, gran.next(), label)?;
408 out_items.append(&mut i);
409 out_slices.append(&mut s);
410 partial |= p;
411 }
412 return Ok((out_items, out_slices, partial));
413 }
414
415 let mut partial = false;
417 if meta_total > 1000 || meta_incomplete {
418 partial = true;
419 slices[0]
420 .notes
421 .push("partial:unresolvable_at_this_granularity".to_string());
422 }
423
424 let fetched_items = self.fetch_all_search_items(client, &query)?;
425 let fetched = fetched_items.len() as u64;
426
427 slices.push(CoverageSlice {
429 window: window.clone(),
430 query: query.clone(),
431 total_count: meta_total,
432 fetched,
433 incomplete_results: Some(meta_incomplete),
434 notes: vec![format!("fetch:{label}")],
435 });
436
437 Ok((fetched_items, slices, partial))
438 }
439
440 #[mutants::skip]
441 fn search_meta(&self, client: &Client, q: &str) -> Result<(u64, bool)> {
442 let url = self.api_url("/search/issues");
443 let resp: SearchResponse<SearchIssueItem> = self.get_json(
444 client,
445 &url,
446 &[
447 ("q", q.to_string()),
448 ("per_page", "1".to_string()),
449 ("page", "1".to_string()),
450 ],
451 )?;
452 Ok((resp.total_count, resp.incomplete_results))
453 }
454
455 #[mutants::skip]
456 fn fetch_all_search_items(&self, client: &Client, q: &str) -> Result<Vec<SearchIssueItem>> {
457 let url = self.api_url("/search/issues");
458 let mut out: Vec<SearchIssueItem> = Vec::new();
459 let per_page = 100;
460 let max_pages = 10; for page in 1..=max_pages {
462 let resp: SearchResponse<SearchIssueItem> = self.get_json(
463 client,
464 &url,
465 &[
466 ("q", q.to_string()),
467 ("per_page", per_page.to_string()),
468 ("page", page.to_string()),
469 ],
470 )?;
471 let items_len = resp.items.len();
472 out.extend(resp.items);
473 if out.len() as u64 >= resp.total_count.min(1000) {
474 break;
475 }
476 if items_len < per_page {
477 break;
478 }
479 }
480 Ok(out)
481 }
482
483 #[mutants::skip]
484 fn items_to_pr_events(
485 &self,
486 client: &Client,
487 items: Vec<SearchIssueItem>,
488 ) -> Result<Vec<EventEnvelope>> {
489 let mut out = Vec::new();
490 for item in items {
491 if let Some(pr_ref) = &item.pull_request {
492 let html_base = self.html_base_url();
493 let (repo_full_name, repo_html_url) =
494 repo_from_repo_url(&item.repository_url, &html_base);
495
496 let (title, created_at, merged_at, additions, deletions, changed_files, visibility) =
497 if self.fetch_details {
498 match self.fetch_pr_details(client, &pr_ref.url) {
499 Ok(d) => {
500 let vis = if d.base.repo.private_field {
501 RepoVisibility::Private
502 } else {
503 RepoVisibility::Public
504 };
505 (
506 d.title,
507 d.created_at,
508 d.merged_at,
509 Some(d.additions),
510 Some(d.deletions),
511 Some(d.changed_files),
512 vis,
513 )
514 }
515 Err(_) => {
516 (
518 item.title.clone(),
519 item.created_at.unwrap_or_else(Utc::now),
520 None,
521 None,
522 None,
523 None,
524 RepoVisibility::Unknown,
525 )
526 }
527 }
528 } else {
529 (
530 item.title.clone(),
531 item.created_at.unwrap_or_else(Utc::now),
532 None,
533 None,
534 None,
535 None,
536 RepoVisibility::Unknown,
537 )
538 };
539
540 let occurred_at = match self.mode.as_str() {
541 "created" => created_at,
542 _ => merged_at.unwrap_or(created_at),
543 };
544
545 let state = if merged_at.is_some() {
546 PullRequestState::Merged
547 } else {
548 PullRequestState::Unknown
549 };
550
551 let id = EventId::from_parts([
552 "github",
553 "pr",
554 &repo_full_name,
555 &item.number.to_string(),
556 ]);
557
558 let ev = EventEnvelope {
559 id,
560 kind: EventKind::PullRequest,
561 occurred_at,
562 actor: Actor {
563 login: self.user.clone(),
564 id: None,
565 },
566 repo: RepoRef {
567 full_name: repo_full_name,
568 html_url: Some(repo_html_url),
569 visibility,
570 },
571 payload: EventPayload::PullRequest(PullRequestEvent {
572 number: item.number,
573 title,
574 state,
575 created_at,
576 merged_at,
577 additions,
578 deletions,
579 changed_files,
580 touched_paths_hint: vec![],
581 window: None,
582 }),
583 tags: vec![],
584 links: vec![Link {
585 label: "pr".into(),
586 url: item.html_url.clone(),
587 }],
588 source: SourceRef {
589 system: SourceSystem::Github,
590 url: Some(pr_ref.url.clone()),
591 opaque_id: Some(item.id.to_string()),
592 },
593 };
594
595 out.push(ev);
596 }
597 }
598 Ok(out)
599 }
600
601 #[mutants::skip]
602 fn items_to_review_events(
603 &self,
604 client: &Client,
605 items: Vec<SearchIssueItem>,
606 ) -> Result<Vec<EventEnvelope>> {
607 let mut out = Vec::new();
608 for item in items {
609 let Some(pr_ref) = &item.pull_request else {
610 continue;
611 };
612 let html_base = self.html_base_url();
613 let (repo_full_name, repo_html_url) =
614 repo_from_repo_url(&item.repository_url, &html_base);
615
616 let reviews = self.fetch_pr_reviews(client, &pr_ref.url)?;
618 for r in reviews {
619 if r.user.login != self.user {
620 continue;
621 }
622 let submitted = match r.submitted_at {
623 Some(s) => s,
624 None => continue,
625 };
626 let submitted_date = submitted.date_naive();
627 if submitted_date < self.since || submitted_date >= self.until {
628 continue;
629 }
630
631 let id = EventId::from_parts([
632 "github",
633 "review",
634 &repo_full_name,
635 &item.number.to_string(),
636 &r.id.to_string(),
637 ]);
638
639 let ev = EventEnvelope {
640 id,
641 kind: EventKind::Review,
642 occurred_at: submitted,
643 actor: Actor {
644 login: self.user.clone(),
645 id: None,
646 },
647 repo: RepoRef {
648 full_name: repo_full_name.clone(),
649 html_url: Some(repo_html_url.clone()),
650 visibility: RepoVisibility::Unknown,
651 },
652 payload: EventPayload::Review(ReviewEvent {
653 pull_number: item.number,
654 pull_title: item.title.clone(),
655 submitted_at: submitted,
656 state: r.state,
657 window: None,
658 }),
659 tags: vec![],
660 links: vec![Link {
661 label: "pr".into(),
662 url: item.html_url.clone(),
663 }],
664 source: SourceRef {
665 system: SourceSystem::Github,
666 url: Some(pr_ref.url.clone()),
667 opaque_id: Some(r.id.to_string()),
668 },
669 };
670
671 out.push(ev);
672 }
673 }
674 Ok(out)
675 }
676
677 #[mutants::skip]
678 fn fetch_pr_details(&self, client: &Client, pr_api_url: &str) -> Result<PullRequestDetails> {
679 let cache_key = CacheKey::pr_details(pr_api_url);
681 if let Some(ref cache) = self.cache {
682 match cache.lookup::<PullRequestDetails>(&cache_key)? {
683 CacheLookup::Fresh(cached) => {
684 self.cache_hits.fetch_add(1, Ordering::Relaxed);
685 return Ok(cached);
686 }
687 CacheLookup::Stale(cached) => {
688 self.cache_hits.fetch_add(1, Ordering::Relaxed);
689 self.cache_stale_hits.fetch_add(1, Ordering::Relaxed);
690 return Ok(cached);
691 }
692 CacheLookup::Miss => {
693 self.cache_misses.fetch_add(1, Ordering::Relaxed);
694 }
695 }
696 }
697
698 let details: PullRequestDetails = self.get_json(client, pr_api_url, &[])?;
700
701 if let Some(ref cache) = self.cache {
703 cache.set(&cache_key, &details)?;
704 }
705
706 Ok(details)
707 }
708
709 #[mutants::skip]
710 fn fetch_pr_reviews(
711 &self,
712 client: &Client,
713 pr_api_url: &str,
714 ) -> Result<Vec<PullRequestReview>> {
715 let url = format!("{pr_api_url}/reviews");
716 let mut out = Vec::new();
717 let per_page = 100;
718 for page in 1..=10 {
719 let cache_key = CacheKey::pr_reviews(pr_api_url, page);
720
721 let page_reviews: Vec<PullRequestReview> = if let Some(ref cache) = self.cache {
723 match cache.lookup::<Vec<PullRequestReview>>(&cache_key)? {
724 CacheLookup::Fresh(cached) => {
725 self.cache_hits.fetch_add(1, Ordering::Relaxed);
726 cached
727 }
728 CacheLookup::Stale(cached) => {
729 self.cache_hits.fetch_add(1, Ordering::Relaxed);
730 self.cache_stale_hits.fetch_add(1, Ordering::Relaxed);
731 cached
732 }
733 CacheLookup::Miss => {
734 self.cache_misses.fetch_add(1, Ordering::Relaxed);
735 let reviews: Vec<PullRequestReview> = self.get_json(
737 client,
738 &url,
739 &[
740 ("per_page", per_page.to_string()),
741 ("page", page.to_string()),
742 ],
743 )?;
744 cache.set(&cache_key, &reviews)?;
746 reviews
747 }
748 }
749 } else {
750 self.get_json(
752 client,
753 &url,
754 &[
755 ("per_page", per_page.to_string()),
756 ("page", page.to_string()),
757 ],
758 )?
759 };
760
761 let n = page_reviews.len();
762 out.extend(page_reviews);
763 if n < per_page {
764 break;
765 }
766 }
767 Ok(out)
768 }
769}
770
771#[derive(Copy, Clone, Debug, PartialEq, Eq)]
772enum Granularity {
773 Month,
774 Week,
775 Day,
776}
777
778impl Granularity {
779 fn next(&self) -> Granularity {
780 match self {
781 Granularity::Month => Granularity::Week,
782 Granularity::Week => Granularity::Day,
783 Granularity::Day => Granularity::Day,
784 }
785 }
786}
787
788fn github_inclusive_range(w: &TimeWindow) -> (String, String) {
789 let start = w.since.format("%Y-%m-%d").to_string();
790 let end_date = w.until.pred_opt().unwrap_or(w.until);
791 let end = end_date.format("%Y-%m-%d").to_string();
792 (start, end)
793}
794
795fn build_url_with_params(base: &str, params: &[(&str, String)]) -> Result<Url> {
796 let mut url = Url::parse(base).with_context(|| format!("parse url {base}"))?;
797 if !params.is_empty() {
798 let mut query = url.query_pairs_mut();
799 for (k, v) in params {
800 query.append_pair(k, v);
801 }
802 }
803 Ok(url)
804}
805
806fn github_freshness_status(
807 cache_configured: bool,
808 cache_hits: u64,
809 cache_misses: u64,
810 cache_stale_hits: u64,
811) -> (FreshnessStatus, Option<String>) {
812 if cache_configured && cache_stale_hits > 0 {
813 return (
814 FreshnessStatus::Stale,
815 Some("one or more expired cache entries were used".to_string()),
816 );
817 }
818 if cache_configured && cache_hits > 0 && cache_misses == 0 {
819 return (FreshnessStatus::Cached, None);
820 }
821
822 (FreshnessStatus::Fresh, None)
823}
824
825fn repo_from_repo_url(repo_api_url: &str, html_base: &str) -> (String, String) {
826 #[expect(clippy::collapsible_if, reason = "policy:clippy-0002")]
827 if let Ok(u) = Url::parse(repo_api_url) {
828 if let Some(segs) = u.path_segments() {
829 let v: Vec<&str> = segs.collect();
830 if v.len() >= 3 && v[0] == "repos" {
831 let owner = v[1];
832 let repo = v[2];
833 let full = format!("{}/{}", owner, repo);
834 let html = format!("{}/{}/{}", html_base.trim_end_matches('/'), owner, repo);
835 return (full, html);
836 }
837 }
838 }
839 ("unknown/unknown".to_string(), html_base.to_string())
840}
841
842#[derive(Debug, Deserialize)]
844struct SearchResponse<T> {
845 total_count: u64,
846 incomplete_results: bool,
847 items: Vec<T>,
848}
849
850#[derive(Debug, Deserialize)]
851struct SearchIssueItem {
852 id: u64,
853 number: u64,
854 title: String,
855 html_url: String,
856 repository_url: String,
857 pull_request: Option<SearchPullRequestRef>,
858
859 created_at: Option<DateTime<Utc>>,
861}
862
863#[derive(Debug, Deserialize)]
864struct SearchPullRequestRef {
865 url: String,
866}
867
868#[derive(Debug, Deserialize, Serialize, Clone)]
869struct PullRequestDetails {
870 title: String,
871 created_at: DateTime<Utc>,
872 merged_at: Option<DateTime<Utc>>,
873 additions: u64,
874 deletions: u64,
875 changed_files: u64,
876 base: PullBase,
877}
878
879#[derive(Debug, Deserialize, Serialize, Clone)]
880struct PullBase {
881 repo: PullRepo,
882}
883
884#[derive(Debug, Deserialize, Serialize, Clone)]
885struct PullRepo {
886 full_name: String,
887 html_url: String,
888 #[serde(rename = "private")]
889 private_field: bool,
890}
891
892#[derive(Debug, Deserialize, Serialize, Clone)]
893struct PullRequestReview {
894 id: u64,
895 state: String,
896 submitted_at: Option<DateTime<Utc>>,
897 user: ReviewUser,
898}
899
900#[derive(Debug, Deserialize, Serialize, Clone)]
901struct ReviewUser {
902 login: String,
903}
904
905#[cfg(test)]
906mod tests {
907 use super::*;
908 use chrono::TimeZone;
909 use proptest::prelude::*;
910 use std::io::{ErrorKind, Read, Write};
911 use std::net::{TcpListener, TcpStream};
912 use std::sync::{Arc, Mutex};
913 use std::thread::{self, JoinHandle};
914 use std::time::{Duration as StdDuration, Instant};
915
916 fn make_ingestor(user: &str) -> GithubIngestor {
919 GithubIngestor::new(
920 user.to_string(),
921 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
922 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
923 )
924 }
925
926 fn make_search_item(number: u64, repo: &str, with_pr: bool) -> SearchIssueItem {
927 SearchIssueItem {
928 id: number * 100,
929 number,
930 title: format!("PR #{number}"),
931 html_url: format!("https://github.com/{repo}/pull/{number}"),
932 repository_url: format!("https://api.github.com/repos/{repo}"),
933 pull_request: if with_pr {
934 Some(SearchPullRequestRef {
935 url: format!("https://api.github.com/repos/{repo}/pulls/{number}"),
936 })
937 } else {
938 None
939 },
940 created_at: Some(Utc::now()),
941 }
942 }
943
944 #[test]
947 fn with_cache_creates_missing_directory() {
948 let temp = tempfile::tempdir().unwrap();
949 let cache_dir = temp.path().join("nested").join("cache");
950
951 let ing = GithubIngestor::new(
952 "octocat".to_string(),
953 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
954 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
955 )
956 .with_cache(&cache_dir)
957 .unwrap();
958
959 assert!(ing.cache.is_some());
960 assert!(cache_dir.join("github-api-cache.db").exists());
961 }
962
963 #[test]
964 fn build_url_with_params_encodes_query_values() {
965 let url = build_url_with_params(
966 "https://api.github.com/search/issues",
967 &[
968 ("q", "is:pr is:merged author:octocat".to_string()),
969 ("per_page", "1".to_string()),
970 ],
971 )
972 .unwrap();
973
974 assert!(!url.as_str().contains(' '), "URL should be percent-encoded");
975
976 let pairs: Vec<(String, String)> = url
977 .query_pairs()
978 .map(|(k, v)| (k.into_owned(), v.into_owned()))
979 .collect();
980 assert_eq!(
981 pairs,
982 vec![
983 (
984 "q".to_string(),
985 "is:pr is:merged author:octocat".to_string()
986 ),
987 ("per_page".to_string(), "1".to_string()),
988 ]
989 );
990 }
991
992 #[test]
993 fn github_inclusive_range_uses_exclusive_until_date() {
994 let window = TimeWindow {
995 since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
996 until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
997 };
998
999 let (start, end) = github_inclusive_range(&window);
1000 assert_eq!(start, "2025-01-01");
1001 assert_eq!(end, "2025-01-31");
1002 }
1003
1004 #[test]
1005 fn html_base_url_maps_public_and_ghes_hosts() {
1006 let mut ing = GithubIngestor::new(
1007 "octocat".to_string(),
1008 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1009 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1010 );
1011 ing.api_base = "https://api.github.com".to_string();
1012 assert_eq!(ing.html_base_url(), "https://github.com");
1013
1014 ing.api_base = "https://github.enterprise.local/api/v3".to_string();
1015 assert_eq!(ing.html_base_url(), "https://github.enterprise.local");
1016 }
1017
1018 #[test]
1019 fn build_pr_query_merged_and_created_modes() {
1020 let mut ing = GithubIngestor::new(
1021 "octocat".to_string(),
1022 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1023 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1024 );
1025 let w = TimeWindow {
1026 since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1027 until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1028 };
1029
1030 ing.mode = "merged".to_string();
1031 let merged_q = ing.build_pr_query(&w);
1032 assert!(!merged_q.is_empty());
1033 assert!(merged_q.contains("is:merged"));
1034 assert!(merged_q.contains("author:octocat"));
1035
1036 ing.mode = "created".to_string();
1037 let created_q = ing.build_pr_query(&w);
1038 assert!(!created_q.is_empty());
1039 assert!(created_q.contains("created:"));
1040 assert!(created_q.contains("author:octocat"));
1041
1042 assert_ne!(merged_q, created_q);
1044 }
1045
1046 #[test]
1047 fn build_reviewed_query_contains_user() {
1048 let ing = GithubIngestor::new(
1049 "octocat".to_string(),
1050 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1051 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1052 );
1053 let w = TimeWindow {
1054 since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1055 until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1056 };
1057
1058 let q = ing.build_reviewed_query(&w);
1059 assert!(!q.is_empty());
1060 assert!(q.contains("reviewed-by:octocat"));
1061 assert!(q.contains("is:pr"));
1062 }
1063
1064 #[test]
1065 fn repo_from_repo_url_invalid_url_returns_fallback() {
1066 let (full, html) = repo_from_repo_url("not-a-url-at-all", "https://github.com");
1067 assert_eq!(full, "unknown/unknown");
1068 assert_eq!(html, "https://github.com");
1069
1070 let (full2, _) =
1072 repo_from_repo_url("https://api.github.com/users/octocat", "https://github.com");
1073 assert_eq!(full2, "unknown/unknown");
1074 }
1075
1076 #[test]
1077 fn repo_from_repo_url_extracts_or_falls_back() {
1078 let (full, html) = repo_from_repo_url(
1079 "https://api.github.com/repos/owner/repo",
1080 "https://github.com",
1081 );
1082 assert_eq!(full, "owner/repo");
1083 assert_eq!(html, "https://github.com/owner/repo");
1084
1085 let (full_fallback, html_fallback) = repo_from_repo_url("not-a-url", "https://github.com");
1086 assert_eq!(full_fallback, "unknown/unknown");
1087 assert_eq!(html_fallback, "https://github.com");
1088 }
1089
1090 #[test]
1095 fn granularity_next_transitions() {
1096 assert_eq!(Granularity::Month.next(), Granularity::Week);
1097 assert_eq!(Granularity::Week.next(), Granularity::Day);
1098 assert_eq!(Granularity::Day.next(), Granularity::Day);
1099 }
1100
1101 #[test]
1102 fn granularity_day_is_fixed_point() {
1103 let g = Granularity::Day;
1104 assert_eq!(g.next(), Granularity::Day);
1105 assert_eq!(g.next().next(), Granularity::Day);
1106 }
1107
1108 #[test]
1111 fn new_defaults_are_correct() {
1112 let ing = make_ingestor("alice");
1113 assert_eq!(ing.user, "alice");
1114 assert_eq!(ing.mode, "merged");
1115 assert!(!ing.include_reviews);
1116 assert!(ing.fetch_details);
1117 assert_eq!(ing.throttle_ms, 0);
1118 assert!(ing.token.is_none());
1119 assert_eq!(ing.api_base, "https://api.github.com");
1120 assert!(ing.cache.is_none());
1121 }
1122
1123 #[test]
1126 fn with_in_memory_cache_sets_cache() {
1127 let ing = make_ingestor("bob").with_in_memory_cache().unwrap();
1128 assert!(ing.cache.is_some());
1129 }
1130
1131 #[test]
1141 fn freshness_counters_start_at_zero() -> anyhow::Result<()> {
1142 let ing = make_ingestor("octocat").with_in_memory_cache()?;
1143 assert_eq!(ing.cache_hits.load(Ordering::Relaxed), 0);
1144 assert_eq!(ing.cache_misses.load(Ordering::Relaxed), 0);
1145 assert_eq!(ing.cache_stale_hits.load(Ordering::Relaxed), 0);
1146 Ok(())
1147 }
1148
1149 #[test]
1150 fn freshness_status_cached_when_only_hits_observed() -> anyhow::Result<()> {
1151 let ing = make_ingestor("octocat").with_in_memory_cache()?;
1153 ing.cache_hits.fetch_add(3, Ordering::Relaxed);
1154 let hits = ing.cache_hits.load(Ordering::Relaxed);
1155 let misses = ing.cache_misses.load(Ordering::Relaxed);
1156 let stale_hits = ing.cache_stale_hits.load(Ordering::Relaxed);
1157 let (status, reason) =
1158 github_freshness_status(ing.cache.is_some(), hits, misses, stale_hits);
1159 assert!(matches!(status, FreshnessStatus::Cached));
1160 assert!(reason.is_none());
1161 Ok(())
1162 }
1163
1164 #[test]
1165 fn freshness_status_fresh_when_any_miss_observed() -> anyhow::Result<()> {
1166 let ing = make_ingestor("octocat").with_in_memory_cache()?;
1168 ing.cache_hits.fetch_add(2, Ordering::Relaxed);
1169 ing.cache_misses.fetch_add(1, Ordering::Relaxed);
1170 let hits = ing.cache_hits.load(Ordering::Relaxed);
1171 let misses = ing.cache_misses.load(Ordering::Relaxed);
1172 let stale_hits = ing.cache_stale_hits.load(Ordering::Relaxed);
1173 let (status, reason) =
1174 github_freshness_status(ing.cache.is_some(), hits, misses, stale_hits);
1175 assert!(matches!(status, FreshnessStatus::Fresh));
1176 assert!(reason.is_none());
1177 Ok(())
1178 }
1179
1180 #[test]
1181 fn freshness_status_stale_when_stale_hit_observed() -> anyhow::Result<()> {
1182 let ing = make_ingestor("octocat").with_in_memory_cache()?;
1185 ing.cache_hits.fetch_add(2, Ordering::Relaxed);
1186 ing.cache_misses.fetch_add(1, Ordering::Relaxed);
1187 ing.cache_stale_hits.fetch_add(1, Ordering::Relaxed);
1188 let hits = ing.cache_hits.load(Ordering::Relaxed);
1189 let misses = ing.cache_misses.load(Ordering::Relaxed);
1190 let stale_hits = ing.cache_stale_hits.load(Ordering::Relaxed);
1191 let (status, reason) =
1192 github_freshness_status(ing.cache.is_some(), hits, misses, stale_hits);
1193 assert!(matches!(status, FreshnessStatus::Stale));
1194 assert_eq!(
1195 reason.as_deref(),
1196 Some("one or more expired cache entries were used")
1197 );
1198 Ok(())
1199 }
1200
1201 #[test]
1202 fn freshness_status_fresh_when_no_cache_configured() {
1203 let ing = make_ingestor("octocat");
1206 let hits = ing.cache_hits.load(Ordering::Relaxed);
1207 let misses = ing.cache_misses.load(Ordering::Relaxed);
1208 let stale_hits = ing.cache_stale_hits.load(Ordering::Relaxed);
1209 let (status, reason) =
1210 github_freshness_status(ing.cache.is_some(), hits, misses, stale_hits);
1211 assert!(matches!(status, FreshnessStatus::Fresh));
1212 assert!(reason.is_none());
1213 }
1214
1215 fn make_pr_details() -> anyhow::Result<PullRequestDetails> {
1238 let ts = Utc
1239 .with_ymd_and_hms(2025, 5, 1, 12, 0, 0)
1240 .single()
1241 .ok_or_else(|| anyhow!("Utc.with_ymd_and_hms returned an ambiguous timestamp"))?;
1242 Ok(PullRequestDetails {
1243 title: "warm-rerun fixture".into(),
1244 created_at: ts,
1245 merged_at: Some(ts),
1246 additions: 10,
1247 deletions: 2,
1248 changed_files: 3,
1249 base: PullBase {
1250 repo: PullRepo {
1251 full_name: "acme/widgets".into(),
1252 html_url: "https://github.com/acme/widgets".into(),
1253 private_field: false,
1254 },
1255 },
1256 })
1257 }
1258
1259 fn no_op_client() -> Result<Client> {
1260 Client::builder()
1265 .user_agent("shiplog-warm-rerun-test")
1266 .build()
1267 .map_err(Into::into)
1268 }
1269
1270 #[test]
1271 fn warm_rerun_fetch_pr_details_records_cache_hit_without_network() -> anyhow::Result<()> {
1272 let ing = make_ingestor("octocat").with_in_memory_cache()?;
1273 let url = "https://api.github.com/repos/acme/widgets/pulls/1";
1274
1275 let key = CacheKey::pr_details(url);
1279 let seeded = make_pr_details()?;
1280 let cache = ing
1281 .cache
1282 .as_ref()
1283 .ok_or_else(|| anyhow!("in-memory cache was just attached"))?;
1284 cache.set(&key, &seeded)?;
1285
1286 let client = no_op_client()?;
1290 let got = ing.fetch_pr_details(&client, url)?;
1291 assert_eq!(got.title, seeded.title);
1292 assert_eq!(ing.cache_hits.load(Ordering::Relaxed), 1);
1293 assert_eq!(ing.cache_misses.load(Ordering::Relaxed), 0);
1294
1295 let got_again = ing.fetch_pr_details(&client, url)?;
1301 assert_eq!(got_again.title, seeded.title);
1302 assert_eq!(ing.cache_hits.load(Ordering::Relaxed), 2);
1303 assert_eq!(ing.cache_misses.load(Ordering::Relaxed), 0);
1304
1305 let hits = ing.cache_hits.load(Ordering::Relaxed);
1309 let misses = ing.cache_misses.load(Ordering::Relaxed);
1310 let stale_hits = ing.cache_stale_hits.load(Ordering::Relaxed);
1311 let (status, reason) =
1312 github_freshness_status(ing.cache.is_some(), hits, misses, stale_hits);
1313 assert!(
1314 matches!(status, FreshnessStatus::Cached),
1315 "warm rerun with fully populated cache must derive Cached, got {status:?}"
1316 );
1317 assert!(reason.is_none());
1318 assert_eq!(status.as_label(), "cached");
1319 Ok(())
1320 }
1321
1322 #[test]
1323 fn expired_cache_entry_records_stale_hit_without_network() -> anyhow::Result<()> {
1324 let ing = make_ingestor("octocat").with_in_memory_cache()?;
1325 let url = "https://api.github.com/repos/acme/widgets/pulls/3";
1326 let key = CacheKey::pr_details(url);
1327 let seeded = make_pr_details()?;
1328 let cache = ing
1329 .cache
1330 .as_ref()
1331 .ok_or_else(|| anyhow!("in-memory cache was just attached"))?;
1332 cache.set_with_ttl(&key, &seeded, chrono::Duration::seconds(-1))?;
1333
1334 let client = no_op_client()?;
1335 let got = ing.fetch_pr_details(&client, url)?;
1336 assert_eq!(got.title, seeded.title);
1337 assert_eq!(ing.cache_hits.load(Ordering::Relaxed), 1);
1338 assert_eq!(ing.cache_misses.load(Ordering::Relaxed), 0);
1339 assert_eq!(ing.cache_stale_hits.load(Ordering::Relaxed), 1);
1340
1341 let (status, reason) = github_freshness_status(
1342 ing.cache.is_some(),
1343 ing.cache_hits.load(Ordering::Relaxed),
1344 ing.cache_misses.load(Ordering::Relaxed),
1345 ing.cache_stale_hits.load(Ordering::Relaxed),
1346 );
1347 assert!(matches!(status, FreshnessStatus::Stale));
1348 assert_eq!(
1349 reason.as_deref(),
1350 Some("one or more expired cache entries were used")
1351 );
1352 assert_eq!(status.as_label(), "stale");
1353 Ok(())
1354 }
1355
1356 struct RecordedGithubServer {
1357 base_url: String,
1358 requests: Arc<Mutex<Vec<String>>>,
1359 handle: Option<JoinHandle<anyhow::Result<()>>>,
1360 }
1361
1362 impl RecordedGithubServer {
1363 fn start(expected_requests: usize) -> anyhow::Result<Self> {
1364 let listener = TcpListener::bind("127.0.0.1:0").context("bind fixture server")?;
1365 listener
1366 .set_nonblocking(true)
1367 .context("set fixture server nonblocking")?;
1368 let base_url = format!("http://{}", listener.local_addr()?);
1369 let requests = Arc::new(Mutex::new(Vec::new()));
1370 let thread_requests = Arc::clone(&requests);
1371 let thread_base_url = base_url.clone();
1372 let handle = thread::spawn(move || {
1373 replay_github_fixtures(
1374 listener,
1375 &thread_base_url,
1376 thread_requests,
1377 expected_requests,
1378 )
1379 });
1380
1381 Ok(Self {
1382 base_url,
1383 requests,
1384 handle: Some(handle),
1385 })
1386 }
1387
1388 fn base_url(&self) -> String {
1389 self.base_url.clone()
1390 }
1391
1392 fn finish(mut self) -> anyhow::Result<Vec<String>> {
1393 if let Some(handle) = self.handle.take() {
1394 handle
1395 .join()
1396 .map_err(|_| anyhow!("recorded fixture server thread panicked"))??;
1397 }
1398 self.requests
1399 .lock()
1400 .map_err(|_| anyhow!("recorded fixture request log was poisoned"))
1401 .map(|requests| requests.clone())
1402 }
1403 }
1404
1405 fn replay_github_fixtures(
1406 listener: TcpListener,
1407 base_url: &str,
1408 requests: Arc<Mutex<Vec<String>>>,
1409 expected_requests: usize,
1410 ) -> anyhow::Result<()> {
1411 let deadline = Instant::now() + StdDuration::from_secs(10);
1412
1413 while fixture_request_count(&requests)? < expected_requests {
1414 match listener.accept() {
1415 Ok((mut stream, _peer)) => {
1416 let request_line = handle_recorded_github_request(&mut stream, base_url)?;
1417 requests
1418 .lock()
1419 .map_err(|_| anyhow!("recorded fixture request log was poisoned"))?
1420 .push(request_line);
1421 }
1422 Err(err) if err.kind() == ErrorKind::WouldBlock => {
1423 if Instant::now() > deadline {
1424 return Err(anyhow!(
1425 "recorded fixture server expected {expected_requests} requests, saw {}",
1426 fixture_request_count(&requests)?
1427 ));
1428 }
1429 thread::sleep(StdDuration::from_millis(10));
1430 }
1431 Err(err) => return Err(err).context("accept recorded GitHub fixture request"),
1432 }
1433 }
1434
1435 Ok(())
1436 }
1437
1438 fn fixture_request_count(requests: &Arc<Mutex<Vec<String>>>) -> anyhow::Result<usize> {
1439 requests
1440 .lock()
1441 .map_err(|_| anyhow!("recorded fixture request log was poisoned"))
1442 .map(|requests| requests.len())
1443 }
1444
1445 fn handle_recorded_github_request(
1446 stream: &mut TcpStream,
1447 base_url: &str,
1448 ) -> anyhow::Result<String> {
1449 let mut buf = [0_u8; 4096];
1450 let mut received = Vec::new();
1451 loop {
1452 let n = stream
1453 .read(&mut buf)
1454 .context("read recorded GitHub fixture request")?;
1455 if n == 0 {
1456 break;
1457 }
1458 received.extend_from_slice(&buf[..n]);
1459 if received.windows(4).any(|window| window == b"\r\n\r\n") {
1460 break;
1461 }
1462 if received.len() > 64 * 1024 {
1463 return Err(anyhow!("recorded GitHub fixture request was too large"));
1464 }
1465 }
1466
1467 let request = String::from_utf8_lossy(&received);
1468 let request_line = request
1469 .lines()
1470 .next()
1471 .ok_or_else(|| anyhow!("recorded GitHub fixture request had no request line"))?
1472 .to_string();
1473 let target = request_line
1474 .split_whitespace()
1475 .nth(1)
1476 .ok_or_else(|| anyhow!("recorded GitHub fixture request had no target"))?;
1477 let (status, body) = recorded_github_fixture_response(target, base_url);
1478 let response = format!(
1479 "HTTP/1.1 {status}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}",
1480 body.len()
1481 );
1482 stream
1483 .write_all(response.as_bytes())
1484 .context("write recorded GitHub fixture response")?;
1485 stream
1486 .flush()
1487 .context("flush recorded GitHub fixture response")?;
1488 Ok(request_line)
1489 }
1490
1491 fn recorded_github_fixture_response(target: &str, base_url: &str) -> (&'static str, String) {
1492 let body = if target.starts_with("/search/issues?")
1493 && target_has_query_param(target, "per_page", "1")
1494 {
1495 include_str!("../tests/fixtures/github-warm-rerun/search_meta.json")
1496 } else if target.starts_with("/search/issues?")
1497 && target_has_query_param(target, "per_page", "100")
1498 {
1499 include_str!("../tests/fixtures/github-warm-rerun/search_items.json")
1500 } else if target == "/repos/acme/widgets/pulls/1" {
1501 include_str!("../tests/fixtures/github-warm-rerun/pr_details.json")
1502 } else {
1503 r#"{"message":"unexpected recorded fixture request"}"#
1504 };
1505 let status = if body.contains("unexpected recorded fixture request") {
1506 "404 Not Found"
1507 } else {
1508 "200 OK"
1509 };
1510 (status, body.replace("__API_BASE__", base_url))
1511 }
1512
1513 fn target_has_query_param(target: &str, key: &str, value: &str) -> bool {
1514 target
1515 .split_once('?')
1516 .map(|(_path, query)| {
1517 query.split('&').any(|pair| {
1518 pair.split_once('=')
1519 .map(|(k, v)| k == key && v == value)
1520 .unwrap_or(false)
1521 })
1522 })
1523 .unwrap_or(false)
1524 }
1525
1526 #[test]
1527 fn recorded_http_fixtures_prove_full_fresh_then_cached_ingest() -> anyhow::Result<()> {
1528 let server = RecordedGithubServer::start(5)?;
1529 let cache_dir = tempfile::tempdir().context("create fixture cache dir")?;
1530
1531 let mut cold = make_ingestor("octocat").with_cache(cache_dir.path())?;
1532 cold.api_base = server.base_url();
1533 let cold_output = cold.ingest()?;
1534 assert_eq!(cold_output.events.len(), 1);
1535 let cold_freshness = cold_output
1536 .freshness
1537 .first()
1538 .ok_or_else(|| anyhow!("cold fixture ingest did not emit source freshness"))?;
1539 assert!(
1540 matches!(cold_freshness.status, FreshnessStatus::Fresh),
1541 "first recorded fixture run should be fresh, got {}",
1542 cold_freshness.status.as_label()
1543 );
1544 assert_eq!(cold_freshness.cache_hits, 0);
1545 assert_eq!(cold_freshness.cache_misses, 1);
1546
1547 let mut warm = make_ingestor("octocat").with_cache(cache_dir.path())?;
1548 warm.api_base = server.base_url();
1549 let warm_output = warm.ingest()?;
1550 assert_eq!(warm_output.events.len(), 1);
1551 let warm_freshness = warm_output
1552 .freshness
1553 .first()
1554 .ok_or_else(|| anyhow!("warm fixture ingest did not emit source freshness"))?;
1555 assert!(
1556 matches!(warm_freshness.status, FreshnessStatus::Cached),
1557 "second recorded fixture run should be cached, got {}",
1558 warm_freshness.status.as_label()
1559 );
1560 assert_eq!(warm_freshness.cache_hits, 1);
1561 assert_eq!(warm_freshness.cache_misses, 0);
1562
1563 let requests = server.finish()?;
1564 let search_requests = requests
1565 .iter()
1566 .filter(|line| line.contains("/search/issues?"))
1567 .count();
1568 let detail_requests = requests
1569 .iter()
1570 .filter(|line| line.contains("/repos/acme/widgets/pulls/1"))
1571 .count();
1572 assert_eq!(search_requests, 4);
1573 assert_eq!(
1574 detail_requests, 1,
1575 "warm run must serve PR details from cache instead of replaying HTTP"
1576 );
1577 Ok(())
1578 }
1579
1580 #[test]
1581 fn warm_rerun_cache_primitive_round_trips_miss_then_hit() -> anyhow::Result<()> {
1582 let ing = make_ingestor("octocat").with_in_memory_cache()?;
1592 let cache = ing
1593 .cache
1594 .as_ref()
1595 .ok_or_else(|| anyhow!("in-memory cache was just attached"))?;
1596 let key = CacheKey::pr_details("https://api.github.com/repos/acme/widgets/pulls/2");
1597
1598 let cold: Option<PullRequestDetails> = cache.get(&key)?;
1599 assert!(
1600 cold.is_none(),
1601 "first lookup against an empty cache must miss"
1602 );
1603
1604 let value = make_pr_details()?;
1605 cache.set(&key, &value)?;
1606 let warm: Option<PullRequestDetails> = cache.get(&key)?;
1607 let warm = warm.ok_or_else(|| anyhow!("second lookup after set must hit"))?;
1608 assert_eq!(warm.title, value.title);
1609 Ok(())
1610 }
1611
1612 #[test]
1615 fn api_url_concatenates_path() {
1616 let ing = make_ingestor("octocat");
1617 assert_eq!(
1618 ing.api_url("/search/issues"),
1619 "https://api.github.com/search/issues"
1620 );
1621 }
1622
1623 #[test]
1624 fn api_url_strips_trailing_slash() {
1625 let mut ing = make_ingestor("octocat");
1626 ing.api_base = "https://ghes.local/api/v3/".to_string();
1627 assert_eq!(
1628 ing.api_url("/search/issues"),
1629 "https://ghes.local/api/v3/search/issues"
1630 );
1631 }
1632
1633 #[test]
1636 fn html_base_url_with_port() {
1637 let mut ing = make_ingestor("octocat");
1638 ing.api_base = "https://ghes.local:8443/api/v3".to_string();
1639 assert_eq!(ing.html_base_url(), "https://ghes.local:8443");
1640 }
1641
1642 #[test]
1643 fn html_base_url_invalid_url_falls_back() {
1644 let mut ing = make_ingestor("octocat");
1645 ing.api_base = "not-a-valid-url".to_string();
1646 assert_eq!(ing.html_base_url(), "https://github.com");
1647 }
1648
1649 #[test]
1650 fn html_base_url_http_scheme() {
1651 let mut ing = make_ingestor("octocat");
1652 ing.api_base = "http://internal-ghes.corp/api/v3".to_string();
1653 assert_eq!(ing.html_base_url(), "http://internal-ghes.corp");
1654 }
1655
1656 #[test]
1659 fn github_inclusive_range_single_day_window() {
1660 let window = TimeWindow {
1661 since: NaiveDate::from_ymd_opt(2025, 3, 15).unwrap(),
1662 until: NaiveDate::from_ymd_opt(2025, 3, 16).unwrap(),
1663 };
1664 let (start, end) = github_inclusive_range(&window);
1665 assert_eq!(start, "2025-03-15");
1666 assert_eq!(end, "2025-03-15");
1667 }
1668
1669 #[test]
1670 fn github_inclusive_range_year_boundary() {
1671 let window = TimeWindow {
1672 since: NaiveDate::from_ymd_opt(2024, 12, 1).unwrap(),
1673 until: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1674 };
1675 let (start, end) = github_inclusive_range(&window);
1676 assert_eq!(start, "2024-12-01");
1677 assert_eq!(end, "2024-12-31");
1678 }
1679
1680 #[test]
1681 fn github_inclusive_range_same_day_uses_pred() {
1682 let window = TimeWindow {
1684 since: NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1685 until: NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1686 };
1687 let (start, end) = github_inclusive_range(&window);
1688 assert_eq!(start, "2025-06-01");
1689 assert_eq!(end, "2025-05-31");
1690 }
1691
1692 #[test]
1695 fn build_url_with_params_empty_params() {
1696 let url = build_url_with_params("https://api.github.com/search/issues", &[]).unwrap();
1697 assert_eq!(url.as_str(), "https://api.github.com/search/issues");
1698 }
1699
1700 #[test]
1701 fn build_url_with_params_special_characters() {
1702 let url = build_url_with_params(
1703 "https://api.github.com/search/issues",
1704 &[(
1705 "q",
1706 "author:user+name with spaces&special=chars".to_string(),
1707 )],
1708 )
1709 .unwrap();
1710 assert!(!url.as_str().contains(' '));
1712 let val: String = url
1714 .query_pairs()
1715 .find(|(k, _)| k == "q")
1716 .map(|(_, v)| v.into_owned())
1717 .unwrap();
1718 assert_eq!(val, "author:user+name with spaces&special=chars");
1719 }
1720
1721 #[test]
1722 fn build_url_with_params_invalid_base_url_errors() {
1723 let result = build_url_with_params("not a url", &[]);
1724 assert!(result.is_err());
1725 }
1726
1727 #[test]
1730 fn repo_from_repo_url_ghes_url() {
1731 let (full, html) = repo_from_repo_url(
1737 "https://ghes.corp/api/v3/repos/myorg/myrepo",
1738 "https://ghes.corp",
1739 );
1740 assert_eq!(full, "unknown/unknown");
1743 assert_eq!(html, "https://ghes.corp");
1744 }
1745
1746 #[test]
1747 fn repo_from_repo_url_three_plus_segments_wrong_prefix_falls_back() {
1748 let (full, html) = repo_from_repo_url(
1751 "https://api.github.com/users/octocat/repos",
1752 "https://github.com",
1753 );
1754 assert_eq!(full, "unknown/unknown");
1755 assert_eq!(html, "https://github.com");
1756 }
1757
1758 #[test]
1759 fn repo_from_repo_url_exactly_two_segments_repos_prefix_falls_back() {
1760 let (full, html) = repo_from_repo_url(
1763 "https://api.github.com/repos/owner-only",
1764 "https://github.com",
1765 );
1766 assert_eq!(full, "unknown/unknown");
1767 assert_eq!(html, "https://github.com");
1768 }
1769
1770 #[test]
1771 fn repo_from_repo_url_trailing_slash_in_html_base() {
1772 let (full, html) = repo_from_repo_url(
1773 "https://api.github.com/repos/owner/repo",
1774 "https://github.com/",
1775 );
1776 assert_eq!(full, "owner/repo");
1777 assert_eq!(html, "https://github.com/owner/repo");
1778 }
1779
1780 #[test]
1781 fn repo_from_repo_url_extra_path_segments() {
1782 let (full, html) = repo_from_repo_url(
1784 "https://api.github.com/repos/org/project/pulls",
1785 "https://github.com",
1786 );
1787 assert_eq!(full, "org/project");
1788 assert_eq!(html, "https://github.com/org/project");
1789 }
1790
1791 #[test]
1792 fn repo_from_repo_url_empty_string() {
1793 let (full, html) = repo_from_repo_url("", "https://github.com");
1794 assert_eq!(full, "unknown/unknown");
1795 assert_eq!(html, "https://github.com");
1796 }
1797
1798 #[test]
1801 fn build_pr_query_uses_inclusive_range() {
1802 let ing = make_ingestor("alice");
1803 let w = TimeWindow {
1804 since: NaiveDate::from_ymd_opt(2025, 3, 1).unwrap(),
1805 until: NaiveDate::from_ymd_opt(2025, 3, 15).unwrap(),
1806 };
1807 let q = ing.build_pr_query(&w);
1808 assert!(q.contains("2025-03-01..2025-03-14"), "got: {q}");
1810 }
1811
1812 #[test]
1813 fn build_pr_query_unknown_mode_defaults_to_merged() {
1814 let mut ing = make_ingestor("octocat");
1815 ing.mode = "unknown_mode".to_string();
1816 let w = TimeWindow {
1817 since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1818 until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1819 };
1820 let q = ing.build_pr_query(&w);
1821 assert!(
1822 q.contains("is:merged"),
1823 "unknown mode should fall through to merged"
1824 );
1825 }
1826
1827 #[test]
1830 fn build_reviewed_query_uses_updated_qualifier() {
1831 let ing = make_ingestor("reviewer");
1832 let w = TimeWindow {
1833 since: NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1834 until: NaiveDate::from_ymd_opt(2025, 7, 1).unwrap(),
1835 };
1836 let q = ing.build_reviewed_query(&w);
1837 assert!(
1838 q.contains("updated:"),
1839 "review query should use updated: qualifier"
1840 );
1841 assert!(q.contains("reviewed-by:reviewer"));
1842 }
1843
1844 #[test]
1847 fn search_response_deserializes_from_json() {
1848 let json = r#"{
1849 "total_count": 42,
1850 "incomplete_results": false,
1851 "items": [
1852 {
1853 "id": 1001,
1854 "number": 123,
1855 "title": "Fix bug",
1856 "html_url": "https://github.com/owner/repo/pull/123",
1857 "repository_url": "https://api.github.com/repos/owner/repo",
1858 "pull_request": { "url": "https://api.github.com/repos/owner/repo/pulls/123" },
1859 "created_at": "2025-01-15T10:30:00Z"
1860 }
1861 ]
1862 }"#;
1863
1864 let resp: SearchResponse<SearchIssueItem> = serde_json::from_str(json).unwrap();
1865 assert_eq!(resp.total_count, 42);
1866 assert!(!resp.incomplete_results);
1867 assert_eq!(resp.items.len(), 1);
1868 assert_eq!(resp.items[0].number, 123);
1869 assert_eq!(resp.items[0].title, "Fix bug");
1870 assert!(resp.items[0].pull_request.is_some());
1871 }
1872
1873 #[test]
1874 fn search_response_deserializes_without_pull_request() {
1875 let json = r#"{
1876 "total_count": 1,
1877 "incomplete_results": true,
1878 "items": [
1879 {
1880 "id": 2002,
1881 "number": 456,
1882 "title": "Issue only",
1883 "html_url": "https://github.com/owner/repo/issues/456",
1884 "repository_url": "https://api.github.com/repos/owner/repo",
1885 "created_at": null
1886 }
1887 ]
1888 }"#;
1889
1890 let resp: SearchResponse<SearchIssueItem> = serde_json::from_str(json).unwrap();
1891 assert!(resp.incomplete_results);
1892 assert!(resp.items[0].pull_request.is_none());
1893 assert!(resp.items[0].created_at.is_none());
1894 }
1895
1896 #[test]
1897 fn search_response_empty_items() {
1898 let json = r#"{"total_count": 0, "incomplete_results": false, "items": []}"#;
1899 let resp: SearchResponse<SearchIssueItem> = serde_json::from_str(json).unwrap();
1900 assert_eq!(resp.total_count, 0);
1901 assert!(resp.items.is_empty());
1902 }
1903
1904 #[test]
1907 fn pr_details_deserializes_from_json() {
1908 let json = r#"{
1909 "title": "Add feature",
1910 "created_at": "2025-01-10T08:00:00Z",
1911 "merged_at": "2025-01-12T14:30:00Z",
1912 "additions": 150,
1913 "deletions": 30,
1914 "changed_files": 5,
1915 "base": {
1916 "repo": {
1917 "full_name": "owner/repo",
1918 "html_url": "https://github.com/owner/repo",
1919 "private": false
1920 }
1921 }
1922 }"#;
1923
1924 let details: PullRequestDetails = serde_json::from_str(json).unwrap();
1925 assert_eq!(details.title, "Add feature");
1926 assert!(details.merged_at.is_some());
1927 assert_eq!(details.additions, 150);
1928 assert_eq!(details.deletions, 30);
1929 assert_eq!(details.changed_files, 5);
1930 assert!(!details.base.repo.private_field);
1931 assert_eq!(details.base.repo.full_name, "owner/repo");
1932 }
1933
1934 #[test]
1935 fn pr_details_private_repo() {
1936 let json = r#"{
1937 "title": "Secret fix",
1938 "created_at": "2025-01-10T08:00:00Z",
1939 "merged_at": null,
1940 "additions": 10,
1941 "deletions": 5,
1942 "changed_files": 1,
1943 "base": {
1944 "repo": {
1945 "full_name": "corp/secret",
1946 "html_url": "https://github.com/corp/secret",
1947 "private": true
1948 }
1949 }
1950 }"#;
1951
1952 let details: PullRequestDetails = serde_json::from_str(json).unwrap();
1953 assert!(details.base.repo.private_field);
1954 assert!(details.merged_at.is_none());
1955 }
1956
1957 #[test]
1960 fn pr_review_deserializes_from_json() {
1961 let json = r#"{
1962 "id": 99001,
1963 "state": "APPROVED",
1964 "submitted_at": "2025-02-01T12:00:00Z",
1965 "user": { "login": "reviewer42" }
1966 }"#;
1967
1968 let review: PullRequestReview = serde_json::from_str(json).unwrap();
1969 assert_eq!(review.id, 99001);
1970 assert_eq!(review.state, "APPROVED");
1971 assert!(review.submitted_at.is_some());
1972 assert_eq!(review.user.login, "reviewer42");
1973 }
1974
1975 #[test]
1976 fn pr_review_with_null_submitted_at() {
1977 let json = r#"{
1978 "id": 99002,
1979 "state": "PENDING",
1980 "submitted_at": null,
1981 "user": { "login": "pending-reviewer" }
1982 }"#;
1983
1984 let review: PullRequestReview = serde_json::from_str(json).unwrap();
1985 assert!(review.submitted_at.is_none());
1986 }
1987
1988 #[test]
1989 fn recorded_github_search_payload_deserializes_and_converts() {
1990 let search_payload = serde_json::json!({
1991 "total_count": 1,
1992 "incomplete_results": false,
1993 "items": [
1994 {
1995 "url": "https://api.github.com/repos/octocat/Hello-World/issues/1347",
1996 "repository_url": "https://api.github.com/repos/octocat/Hello-World",
1997 "labels_url": "https://api.github.com/repos/octocat/Hello-World/issues/1347/labels{/name}",
1998 "comments_url": "https://api.github.com/repos/octocat/Hello-World/issues/1347/comments",
1999 "events_url": "https://api.github.com/repos/octocat/Hello-World/issues/1347/events",
2000 "html_url": "https://github.com/octocat/Hello-World/pull/1347",
2001 "id": 1000001347,
2002 "node_id": "PR_kwDOABCD",
2003 "number": 1347,
2004 "state": "closed",
2005 "title": "Reduce deploy rollback toil",
2006 "user": {
2007 "login": "alice",
2008 "id": 100,
2009 "node_id": "MDQ6VXNlcjEwMA==",
2010 "avatar_url": "https://github.com/images/error/alice_happy.gif",
2011 "gravatar_id": "",
2012 "url": "https://api.github.com/users/alice",
2013 "html_url": "https://github.com/alice",
2014 "type": "User",
2015 "site_admin": false
2016 },
2017 "labels": [
2018 {
2019 "id": 208045946,
2020 "node_id": "MDU6TGFiZWwyMDgwNDU5NDY=",
2021 "url": "https://api.github.com/repos/octocat/Hello-World/labels/reliability",
2022 "name": "reliability",
2023 "description": "Reliability work",
2024 "color": "0e8a16",
2025 "default": false
2026 }
2027 ],
2028 "pull_request": {
2029 "url": "https://api.github.com/repos/octocat/Hello-World/pulls/1347",
2030 "html_url": "https://github.com/octocat/Hello-World/pull/1347",
2031 "diff_url": "https://github.com/octocat/Hello-World/pull/1347.diff",
2032 "patch_url": "https://github.com/octocat/Hello-World/pull/1347.patch"
2033 },
2034 "closed_at": "2025-01-18T16:00:00Z",
2035 "created_at": "2025-01-10T09:00:00Z",
2036 "updated_at": "2025-01-18T16:00:00Z",
2037 "author_association": "MEMBER",
2038 "score": 1.0
2039 }
2040 ]
2041 });
2042 let details_payload = serde_json::json!({
2043 "url": "https://api.github.com/repos/octocat/Hello-World/pulls/1347",
2044 "id": 2000001347,
2045 "node_id": "PR_kwDOABCD",
2046 "html_url": "https://github.com/octocat/Hello-World/pull/1347",
2047 "diff_url": "https://github.com/octocat/Hello-World/pull/1347.diff",
2048 "patch_url": "https://github.com/octocat/Hello-World/pull/1347.patch",
2049 "issue_url": "https://api.github.com/repos/octocat/Hello-World/issues/1347",
2050 "number": 1347,
2051 "state": "closed",
2052 "locked": false,
2053 "title": "Reduce deploy rollback toil",
2054 "user": { "login": "alice", "id": 100 },
2055 "body": "Add preflight checks and rollback runbook links.",
2056 "created_at": "2025-01-10T09:00:00Z",
2057 "updated_at": "2025-01-18T16:00:00Z",
2058 "closed_at": "2025-01-18T16:00:00Z",
2059 "merged_at": "2025-01-18T16:00:00Z",
2060 "merge_commit_sha": "ecdd80bb57125d7ba9641ffaa4d7d2c19d3f3091",
2061 "assignee": null,
2062 "assignees": [],
2063 "requested_reviewers": [],
2064 "requested_teams": [],
2065 "labels": [],
2066 "head": {
2067 "label": "alice:rollback-preflight",
2068 "ref": "rollback-preflight",
2069 "sha": "bbcd538c8e72b8c175046e27cc8f907076331401",
2070 "user": { "login": "alice", "id": 100 },
2071 "repo": {
2072 "id": 1296269,
2073 "name": "Hello-World",
2074 "full_name": "octocat/Hello-World",
2075 "private": false,
2076 "html_url": "https://github.com/octocat/Hello-World"
2077 }
2078 },
2079 "base": {
2080 "label": "octocat:main",
2081 "ref": "main",
2082 "sha": "bbcd538c8e72b8c175046e27cc8f907076331402",
2083 "user": { "login": "octocat", "id": 1 },
2084 "repo": {
2085 "id": 1296269,
2086 "name": "Hello-World",
2087 "full_name": "octocat/Hello-World",
2088 "private": true,
2089 "html_url": "https://github.com/octocat/Hello-World"
2090 }
2091 },
2092 "draft": false,
2093 "merged": true,
2094 "mergeable": true,
2095 "rebaseable": true,
2096 "mergeable_state": "clean",
2097 "merged_by": { "login": "octocat", "id": 1 },
2098 "comments": 2,
2099 "review_comments": 1,
2100 "commits": 3,
2101 "additions": 144,
2102 "deletions": 18,
2103 "changed_files": 6
2104 });
2105 let reviews_payload = serde_json::json!([
2106 {
2107 "id": 99001,
2108 "node_id": "MDE3OlB1bGxSZXF1ZXN0UmV2aWV3OTkwMDE=",
2109 "user": {
2110 "login": "alice",
2111 "id": 100,
2112 "node_id": "MDQ6VXNlcjEwMA==",
2113 "avatar_url": "https://github.com/images/error/alice_happy.gif",
2114 "gravatar_id": "",
2115 "url": "https://api.github.com/users/alice",
2116 "html_url": "https://github.com/alice",
2117 "type": "User",
2118 "site_admin": false
2119 },
2120 "body": "Verified the rollback path.",
2121 "state": "APPROVED",
2122 "html_url": "https://github.com/octocat/Hello-World/pull/1347#pullrequestreview-99001",
2123 "pull_request_url": "https://api.github.com/repos/octocat/Hello-World/pulls/1347",
2124 "_links": {
2125 "html": { "href": "https://github.com/octocat/Hello-World/pull/1347#pullrequestreview-99001" },
2126 "pull_request": { "href": "https://api.github.com/repos/octocat/Hello-World/pulls/1347" }
2127 },
2128 "submitted_at": "2025-01-18T15:30:00Z",
2129 "commit_id": "ecdd80bb57125d7ba9641ffaa4d7d2c19d3f3091",
2130 "author_association": "MEMBER"
2131 },
2132 {
2133 "id": 99002,
2134 "user": { "login": "bob" },
2135 "state": "COMMENTED",
2136 "submitted_at": "2025-01-18T15:45:00Z"
2137 },
2138 {
2139 "id": 99003,
2140 "user": { "login": "alice" },
2141 "state": "PENDING",
2142 "submitted_at": null
2143 }
2144 ]);
2145
2146 let search: SearchResponse<SearchIssueItem> =
2147 serde_json::from_value(search_payload.clone()).unwrap();
2148 assert_eq!(search.total_count, 1);
2149 assert!(!search.incomplete_results);
2150 assert_eq!(
2151 search.items[0].repository_url,
2152 "https://api.github.com/repos/octocat/Hello-World"
2153 );
2154 let pr_url = search.items[0].pull_request.as_ref().unwrap().url.clone();
2155
2156 let details: PullRequestDetails = serde_json::from_value(details_payload).unwrap();
2157 assert_eq!(details.additions, 144);
2158 assert_eq!(details.deletions, 18);
2159 assert_eq!(details.changed_files, 6);
2160 assert!(details.base.repo.private_field);
2161
2162 let mut ing = make_ingestor("alice").with_in_memory_cache().unwrap();
2163 ing.mode = "merged".to_string();
2164 ing.cache
2165 .as_ref()
2166 .unwrap()
2167 .set(&CacheKey::pr_details(&pr_url), &details)
2168 .unwrap();
2169
2170 let client = Client::new();
2171 let pr_events = ing.items_to_pr_events(&client, search.items).unwrap();
2172 assert_eq!(pr_events.len(), 1);
2173 let pr_event = &pr_events[0];
2174 assert_eq!(pr_event.kind, EventKind::PullRequest);
2175 assert_eq!(pr_event.actor.login, "alice");
2176 assert_eq!(pr_event.repo.full_name, "octocat/Hello-World");
2177 assert_eq!(pr_event.repo.visibility, RepoVisibility::Private);
2178 assert_eq!(pr_event.source.system, SourceSystem::Github);
2179 assert_eq!(pr_event.source.url.as_deref(), Some(pr_url.as_str()));
2180 assert_eq!(pr_event.source.opaque_id.as_deref(), Some("1000001347"));
2181
2182 if let EventPayload::PullRequest(pr) = &pr_event.payload {
2183 assert_eq!(pr.number, 1347);
2184 assert_eq!(pr.title, "Reduce deploy rollback toil");
2185 assert_eq!(pr.state, PullRequestState::Merged);
2186 assert_eq!(
2187 pr.merged_at,
2188 Some("2025-01-18T16:00:00Z".parse::<DateTime<Utc>>().unwrap())
2189 );
2190 assert_eq!(pr.additions, Some(144));
2191 assert_eq!(pr.deletions, Some(18));
2192 assert_eq!(pr.changed_files, Some(6));
2193 } else {
2194 panic!("expected PullRequest payload");
2195 }
2196
2197 let reviews: Vec<PullRequestReview> = serde_json::from_value(reviews_payload).unwrap();
2198 ing.cache
2199 .as_ref()
2200 .unwrap()
2201 .set(&CacheKey::pr_reviews(&pr_url, 1), &reviews)
2202 .unwrap();
2203 let review_search: SearchResponse<SearchIssueItem> =
2204 serde_json::from_value(search_payload).unwrap();
2205
2206 let review_events = ing
2207 .items_to_review_events(&client, review_search.items)
2208 .unwrap();
2209 assert_eq!(review_events.len(), 1);
2210 let review_event = &review_events[0];
2211 assert_eq!(review_event.kind, EventKind::Review);
2212 assert_eq!(review_event.actor.login, "alice");
2213 assert_eq!(review_event.repo.full_name, "octocat/Hello-World");
2214 assert_eq!(review_event.source.url.as_deref(), Some(pr_url.as_str()));
2215 assert_eq!(review_event.source.opaque_id.as_deref(), Some("99001"));
2216
2217 if let EventPayload::Review(review) = &review_event.payload {
2218 assert_eq!(review.pull_number, 1347);
2219 assert_eq!(review.pull_title, "Reduce deploy rollback toil");
2220 assert_eq!(review.state, "APPROVED");
2221 assert_eq!(
2222 review.submitted_at,
2223 "2025-01-18T15:30:00Z".parse::<DateTime<Utc>>().unwrap()
2224 );
2225 } else {
2226 panic!("expected Review payload");
2227 }
2228 }
2229
2230 #[test]
2233 fn items_to_pr_events_without_details_produces_events() {
2234 let mut ing = make_ingestor("alice");
2235 ing.fetch_details = false;
2236
2237 let client = Client::new();
2238 let items = vec![
2239 make_search_item(10, "org/repo-a", true),
2240 make_search_item(20, "org/repo-b", true),
2241 ];
2242
2243 let events = ing.items_to_pr_events(&client, items).unwrap();
2244 assert_eq!(events.len(), 2);
2245
2246 assert_eq!(events[0].kind, EventKind::PullRequest);
2247 assert_eq!(events[0].actor.login, "alice");
2248 assert_eq!(events[0].repo.full_name, "org/repo-a");
2249 assert_eq!(events[0].links.len(), 1);
2250 assert_eq!(events[0].links[0].label, "pr");
2251
2252 assert_eq!(events[1].repo.full_name, "org/repo-b");
2253 }
2254
2255 #[test]
2256 fn items_to_pr_events_skips_items_without_pr_ref() {
2257 let mut ing = make_ingestor("bob");
2258 ing.fetch_details = false;
2259
2260 let client = Client::new();
2261 let items = vec![
2262 make_search_item(1, "org/repo", true),
2263 make_search_item(2, "org/repo", false), make_search_item(3, "org/repo", true),
2265 ];
2266
2267 let events = ing.items_to_pr_events(&client, items).unwrap();
2268 assert_eq!(
2269 events.len(),
2270 2,
2271 "items without pull_request should be skipped"
2272 );
2273 }
2274
2275 #[test]
2276 fn items_to_pr_events_empty_input() {
2277 let mut ing = make_ingestor("carol");
2278 ing.fetch_details = false;
2279 let client = Client::new();
2280 let events = ing.items_to_pr_events(&client, vec![]).unwrap();
2281 assert!(events.is_empty());
2282 }
2283
2284 #[test]
2285 fn items_to_pr_events_sets_source_system() {
2286 let mut ing = make_ingestor("dave");
2287 ing.fetch_details = false;
2288
2289 let client = Client::new();
2290 let items = vec![make_search_item(42, "org/repo", true)];
2291 let events = ing.items_to_pr_events(&client, items).unwrap();
2292
2293 assert_eq!(events[0].source.system, SourceSystem::Github);
2294 assert!(events[0].source.url.is_some());
2295 assert!(events[0].source.opaque_id.is_some());
2296 }
2297
2298 #[test]
2299 fn items_to_pr_events_merged_mode_uses_created_at_as_occurred() {
2300 let mut ing = make_ingestor("eve");
2301 ing.fetch_details = false;
2302 ing.mode = "merged".to_string();
2303
2304 let client = Client::new();
2305 let mut item = make_search_item(1, "org/repo", true);
2306 let created = DateTime::parse_from_rfc3339("2025-03-15T10:00:00Z")
2307 .unwrap()
2308 .with_timezone(&Utc);
2309 item.created_at = Some(created);
2310
2311 let events = ing.items_to_pr_events(&client, vec![item]).unwrap();
2312 assert_eq!(events[0].occurred_at, created);
2314 }
2315
2316 #[test]
2317 fn items_to_pr_events_created_mode_uses_created_at() {
2318 let mut ing = make_ingestor("frank");
2319 ing.fetch_details = false;
2320 ing.mode = "created".to_string();
2321
2322 let client = Client::new();
2323 let mut item = make_search_item(1, "org/repo", true);
2324 let created = DateTime::parse_from_rfc3339("2025-04-01T12:00:00Z")
2325 .unwrap()
2326 .with_timezone(&Utc);
2327 item.created_at = Some(created);
2328
2329 let events = ing.items_to_pr_events(&client, vec![item]).unwrap();
2330 assert_eq!(events[0].occurred_at, created);
2331 }
2332
2333 #[test]
2334 fn items_to_pr_events_without_details_has_unknown_visibility() {
2335 let mut ing = make_ingestor("grace");
2336 ing.fetch_details = false;
2337
2338 let client = Client::new();
2339 let items = vec![make_search_item(1, "org/repo", true)];
2340 let events = ing.items_to_pr_events(&client, items).unwrap();
2341
2342 assert_eq!(events[0].repo.visibility, RepoVisibility::Unknown);
2343 }
2344
2345 #[test]
2346 fn items_to_pr_events_without_details_state_is_unknown() {
2347 let mut ing = make_ingestor("heidi");
2348 ing.fetch_details = false;
2349
2350 let client = Client::new();
2351 let items = vec![make_search_item(1, "org/repo", true)];
2352 let events = ing.items_to_pr_events(&client, items).unwrap();
2353
2354 if let EventPayload::PullRequest(ref pr) = events[0].payload {
2355 assert_eq!(pr.state, PullRequestState::Unknown);
2356 assert!(pr.merged_at.is_none());
2357 assert!(pr.additions.is_none());
2358 assert!(pr.deletions.is_none());
2359 assert!(pr.changed_files.is_none());
2360 } else {
2361 panic!("expected PullRequest payload");
2362 }
2363 }
2364
2365 #[test]
2366 fn items_to_pr_events_deterministic_ids() {
2367 let mut ing = make_ingestor("ivan");
2368 ing.fetch_details = false;
2369
2370 let client = Client::new();
2371 let items1 = vec![make_search_item(42, "org/repo", true)];
2372 let items2 = vec![make_search_item(42, "org/repo", true)];
2373
2374 let events1 = ing.items_to_pr_events(&client, items1).unwrap();
2375 let events2 = ing.items_to_pr_events(&client, items2).unwrap();
2376 assert_eq!(
2377 events1[0].id, events2[0].id,
2378 "same inputs should produce same event ID"
2379 );
2380 }
2381
2382 #[test]
2383 fn items_to_pr_events_different_prs_get_different_ids() {
2384 let mut ing = make_ingestor("judy");
2385 ing.fetch_details = false;
2386
2387 let client = Client::new();
2388 let items = vec![
2389 make_search_item(1, "org/repo", true),
2390 make_search_item(2, "org/repo", true),
2391 ];
2392
2393 let events = ing.items_to_pr_events(&client, items).unwrap();
2394 assert_ne!(events[0].id, events[1].id);
2395 }
2396
2397 #[test]
2400 fn items_to_review_events_skips_items_without_pr_ref() {
2401 let ing = make_ingestor("reviewer");
2402 let client = Client::new();
2403
2404 let items = vec![make_search_item(1, "org/repo", false)];
2407
2408 let events = ing.items_to_review_events(&client, items).unwrap();
2409 assert!(events.is_empty());
2410 }
2411
2412 #[test]
2415 fn ingest_rejects_since_equals_until() {
2416 let date = NaiveDate::from_ymd_opt(2025, 6, 1).unwrap();
2417 let ing = GithubIngestor::new("user".to_string(), date, date);
2418 let err = ing.ingest().unwrap_err();
2419 assert!(
2420 err.to_string().contains("since must be < until"),
2421 "got: {err}"
2422 );
2423 }
2424
2425 #[test]
2426 fn ingest_rejects_since_after_until() {
2427 let ing = GithubIngestor::new(
2428 "user".to_string(),
2429 NaiveDate::from_ymd_opt(2025, 6, 15).unwrap(),
2430 NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
2431 );
2432 let err = ing.ingest().unwrap_err();
2433 assert!(err.to_string().contains("since must be < until"));
2434 }
2435
2436 #[test]
2439 fn with_cache_then_in_memory_cache_overrides() {
2440 let temp = tempfile::tempdir().unwrap();
2441 let ing = make_ingestor("octocat")
2442 .with_cache(temp.path())
2443 .unwrap()
2444 .with_in_memory_cache()
2445 .unwrap();
2446 assert!(ing.cache.is_some());
2447 }
2448
2449 #[test]
2450 fn multiple_with_cache_calls_succeed() {
2451 let temp1 = tempfile::tempdir().unwrap();
2452 let temp2 = tempfile::tempdir().unwrap();
2453 let ing = make_ingestor("octocat")
2454 .with_cache(temp1.path())
2455 .unwrap()
2456 .with_cache(temp2.path())
2457 .unwrap();
2458 assert!(ing.cache.is_some());
2459 }
2460
2461 fn arb_naive_date() -> impl Strategy<Value = NaiveDate> {
2464 (2000i32..2030, 1u32..13, 1u32..29)
2465 .prop_map(|(y, m, d)| NaiveDate::from_ymd_opt(y, m, d).unwrap())
2466 }
2467
2468 fn arb_time_window() -> impl Strategy<Value = TimeWindow> {
2469 (arb_naive_date(), 1u32..366).prop_map(|(since, delta)| {
2470 let until = since + chrono::Duration::days(delta as i64);
2471 TimeWindow { since, until }
2472 })
2473 }
2474
2475 proptest! {
2476 #[test]
2477 fn prop_github_inclusive_range_start_lte_end(w in arb_time_window()) {
2478 let (start, end) = github_inclusive_range(&w);
2479 prop_assert!(start <= end, "start={start} > end={end}");
2480 }
2481
2482 #[test]
2483 fn prop_github_inclusive_range_start_matches_since(w in arb_time_window()) {
2484 let (start, _) = github_inclusive_range(&w);
2485 let expected = w.since.format("%Y-%m-%d").to_string();
2486 prop_assert_eq!(start, expected);
2487 }
2488
2489 #[test]
2490 fn prop_github_inclusive_range_end_is_until_minus_one(w in arb_time_window()) {
2491 let (_, end) = github_inclusive_range(&w);
2492 let expected_date = w.until.pred_opt().unwrap_or(w.until);
2493 let expected = expected_date.format("%Y-%m-%d").to_string();
2494 prop_assert_eq!(end, expected);
2495 }
2496
2497 #[test]
2498 fn prop_build_url_with_params_produces_valid_url(
2499 key in "[a-z]{1,10}",
2500 val in "[a-zA-Z0-9 ]{0,50}",
2501 ) {
2502 let result = build_url_with_params(
2503 "https://api.github.com/search/issues",
2504 &[(&key, val.clone())],
2505 );
2506 prop_assert!(result.is_ok());
2507 let url = result.unwrap();
2508 prop_assert!(!url.as_str().contains(' '));
2510 let found: String = url.query_pairs()
2512 .find(|(k, _)| k.as_ref() == key)
2513 .map(|(_, v)| v.into_owned())
2514 .unwrap();
2515 prop_assert_eq!(found, val);
2516 }
2517
2518 #[test]
2519 fn prop_repo_from_repo_url_never_panics(
2520 owner in "[a-zA-Z0-9][a-zA-Z0-9_-]{0,19}",
2521 repo in "[a-zA-Z0-9][a-zA-Z0-9_.-]{0,29}",
2522 ) {
2523 let api_url = format!("https://api.github.com/repos/{}/{}", owner, repo);
2524 let (full, html) = repo_from_repo_url(&api_url, "https://github.com");
2525 let expected_prefix = format!("{}/", owner);
2526 prop_assert!(full.starts_with(&expected_prefix));
2527 prop_assert!(html.starts_with("https://github.com/"));
2528 }
2529
2530 #[test]
2531 fn prop_repo_from_repo_url_arbitrary_strings_never_panic(
2532 s in ".*",
2533 ) {
2534 let _ = repo_from_repo_url(&s, "https://github.com");
2536 }
2537
2538 #[test]
2539 fn prop_build_pr_query_contains_user(
2540 user in "[a-zA-Z][a-zA-Z0-9-]{0,19}",
2541 ) {
2542 let ing = GithubIngestor::new(
2543 user.clone(),
2544 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
2545 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
2546 );
2547 let w = TimeWindow {
2548 since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
2549 until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
2550 };
2551 let q = ing.build_pr_query(&w);
2552 let expected_author = format!("author:{}", user);
2553 prop_assert!(q.contains(&expected_author));
2554 prop_assert!(q.contains("is:pr"));
2555 }
2556
2557 #[test]
2558 fn prop_build_reviewed_query_contains_user(
2559 user in "[a-zA-Z][a-zA-Z0-9-]{0,19}",
2560 ) {
2561 let ing = GithubIngestor::new(
2562 user.clone(),
2563 NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
2564 NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
2565 );
2566 let w = TimeWindow {
2567 since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
2568 until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
2569 };
2570 let q = ing.build_reviewed_query(&w);
2571 let expected_reviewer = format!("reviewed-by:{}", user);
2572 prop_assert!(q.contains(&expected_reviewer));
2573 }
2574
2575 #[test]
2576 fn prop_api_url_preserves_path(
2577 segment in "[a-z]{1,15}",
2578 ) {
2579 let ing = make_ingestor("test");
2580 let path = format!("/{}", segment);
2581 let url = ing.api_url(&path);
2582 prop_assert!(url.ends_with(&path));
2583 prop_assert!(url.starts_with("https://api.github.com"));
2584 }
2585
2586 #[test]
2587 fn prop_html_base_url_always_returns_valid_string(
2588 base in "(https?://[a-z]{3,15}\\.[a-z]{2,5}(/[a-z]+)*)",
2589 ) {
2590 let mut ing = make_ingestor("test");
2591 ing.api_base = base;
2592 let result = ing.html_base_url();
2593 prop_assert!(!result.is_empty());
2594 prop_assert!(result.starts_with("http"));
2595 }
2596 }
2597}