Skip to main content

shiplog_ingest_github/
lib.rs

1//! GitHub API ingestor with adaptive date slicing and cache support.
2//!
3//! Collects PR/review events, tracks coverage slices, and marks partial
4//! completeness when search caps or incomplete API responses are detected.
5
6use anyhow::{Context, Result, anyhow};
7use chrono::{DateTime, NaiveDate, Utc};
8use reqwest::blocking::Client;
9use serde::de::DeserializeOwned;
10use serde::{Deserialize, Serialize};
11use shiplog_cache::{ApiCache, CacheKey, CacheLookup};
12use shiplog_coverage::{day_windows, month_windows, week_windows, window_len_days};
13use shiplog_ids::{EventId, RunId};
14use shiplog_ports::{IngestOutput, Ingestor};
15use shiplog_schema::coverage::{Completeness, CoverageManifest, CoverageSlice, TimeWindow};
16use shiplog_schema::event::{
17    Actor, EventEnvelope, EventKind, EventPayload, Link, PullRequestEvent, PullRequestState,
18    RepoRef, RepoVisibility, ReviewEvent, SourceRef, SourceSystem,
19};
20use shiplog_schema::freshness::{FreshnessStatus, SourceFreshness};
21use std::path::PathBuf;
22use std::sync::atomic::{AtomicU64, Ordering};
23use std::thread::sleep;
24use std::time::Duration;
25use url::Url;
26
27#[derive(Debug)]
28pub struct GithubIngestor {
29    pub user: String,
30    pub since: NaiveDate,
31    pub until: NaiveDate,
32    /// "merged" or "created"
33    pub mode: String,
34    pub include_reviews: bool,
35    pub fetch_details: bool,
36    pub throttle_ms: u64,
37    pub token: Option<String>,
38    /// GitHub API base URL (for GHES). Default: <https://api.github.com>
39    pub api_base: String,
40    /// Optional cache for API responses
41    pub cache: Option<ApiCache>,
42    /// Adapter-local cache hit counter for the most recent (or
43    /// in-progress) `ingest()` call. Incremented every time
44    /// `self.cache.get(...)` returns `Some(_)`. Reported in the
45    /// per-source [`SourceFreshness`] receipt at end of ingest. Private
46    /// because the counter is a run-scoped diagnostic, not a public API.
47    cache_hits: AtomicU64,
48    /// Adapter-local cache miss counter. Incremented every time
49    /// `self.cache.get(...)` returns `None` and triggers a fresh fetch.
50    /// Equivalent to "live API calls performed under the cache". See
51    /// [`Self::cache_hits`] above.
52    cache_misses: AtomicU64,
53    /// Adapter-local stale-hit counter. Incremented when `ApiCache::lookup`
54    /// returns an expired row that this adapter uses.
55    cache_stale_hits: AtomicU64,
56}
57
58impl GithubIngestor {
59    /// Create a new GitHub ingestor for the given user and date range.
60    ///
61    /// Defaults to `merged` mode with no reviews, no cache, and no throttle.
62    ///
63    /// # Examples
64    ///
65    /// ```
66    /// use shiplog_ingest_github::GithubIngestor;
67    /// use chrono::NaiveDate;
68    ///
69    /// let ingestor = GithubIngestor::new(
70    ///     "octocat".into(),
71    ///     NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
72    ///     NaiveDate::from_ymd_opt(2025, 4, 1).unwrap(),
73    /// );
74    /// assert_eq!(ingestor.mode, "merged");
75    /// ```
76    pub fn new(user: String, since: NaiveDate, until: NaiveDate) -> Self {
77        Self {
78            user,
79            since,
80            until,
81            mode: "merged".to_string(),
82            include_reviews: false,
83            fetch_details: true,
84            throttle_ms: 0,
85            token: None,
86            api_base: "https://api.github.com".to_string(),
87            cache: None,
88            cache_hits: AtomicU64::new(0),
89            cache_misses: AtomicU64::new(0),
90            cache_stale_hits: AtomicU64::new(0),
91        }
92    }
93
94    /// Enable caching with the given cache directory.
95    ///
96    /// # Examples
97    ///
98    /// ```rust,no_run
99    /// use shiplog_ingest_github::GithubIngestor;
100    /// use chrono::NaiveDate;
101    ///
102    /// let ingestor = GithubIngestor::new(
103    ///     "octocat".into(),
104    ///     NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
105    ///     NaiveDate::from_ymd_opt(2025, 4, 1).unwrap(),
106    /// ).with_cache("./cache")?;
107    /// # Ok::<(), anyhow::Error>(())
108    /// ```
109    pub fn with_cache(mut self, cache_dir: impl Into<PathBuf>) -> Result<Self> {
110        let cache_path = cache_dir.into().join("github-api-cache.db");
111        if let Some(parent) = cache_path.parent() {
112            std::fs::create_dir_all(parent)
113                .with_context(|| format!("create GitHub cache directory {parent:?}"))?;
114        }
115        let cache = ApiCache::open(&cache_path)
116            .with_context(|| format!("open GitHub API cache at {cache_path:?}"))?;
117        self.cache = Some(cache);
118        Ok(self)
119    }
120
121    /// Enable in-memory caching (useful for testing).
122    ///
123    /// # Examples
124    ///
125    /// ```
126    /// use shiplog_ingest_github::GithubIngestor;
127    /// use chrono::NaiveDate;
128    ///
129    /// let ingestor = GithubIngestor::new(
130    ///     "octocat".into(),
131    ///     NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
132    ///     NaiveDate::from_ymd_opt(2025, 4, 1).unwrap(),
133    /// ).with_in_memory_cache().unwrap();
134    /// assert!(ingestor.cache.is_some());
135    /// ```
136    pub fn with_in_memory_cache(mut self) -> Result<Self> {
137        let cache = ApiCache::open_in_memory().context("open in-memory API cache")?;
138        self.cache = Some(cache);
139        Ok(self)
140    }
141
142    fn html_base_url(&self) -> String {
143        if let Ok(u) = Url::parse(&self.api_base) {
144            let scheme = u.scheme();
145            if let Some(host) = u.host_str() {
146                if host == "api.github.com" {
147                    return "https://github.com".to_string();
148                }
149                let port_suffix = u.port().map(|p| format!(":{p}")).unwrap_or_default();
150                return format!("{scheme}://{host}{port_suffix}");
151            }
152        }
153        "https://github.com".to_string()
154    }
155
156    #[mutants::skip]
157    fn client(&self) -> Result<Client> {
158        Client::builder()
159            .user_agent(concat!("shiplog/", env!("CARGO_PKG_VERSION")))
160            .build()
161            .context("build reqwest client")
162    }
163
164    #[mutants::skip]
165    fn api_url(&self, path: &str) -> String {
166        format!("{}{}", self.api_base.trim_end_matches('/'), path)
167    }
168
169    #[mutants::skip]
170    fn throttle(&self) {
171        if self.throttle_ms > 0 {
172            sleep(Duration::from_millis(self.throttle_ms));
173        }
174    }
175
176    #[mutants::skip]
177    fn get_json<T: DeserializeOwned>(
178        &self,
179        client: &Client,
180        url: &str,
181        params: &[(&str, String)],
182    ) -> Result<T> {
183        let request_url = build_url_with_params(url, params)?;
184        let request_url_for_err = request_url.as_str().to_string();
185
186        let mut req = client
187            .get(request_url)
188            .header("Accept", "application/vnd.github+json");
189        req = req.header("X-GitHub-Api-Version", "2022-11-28");
190        if let Some(t) = &self.token {
191            req = req.bearer_auth(t);
192        }
193        let resp = req
194            .send()
195            .with_context(|| format!("GET {request_url_for_err}"))?;
196        self.throttle();
197
198        if !resp.status().is_success() {
199            let status = resp.status();
200            let body = resp.text().unwrap_or_default();
201            return Err(anyhow!("GitHub API error {status}: {body}"));
202        }
203
204        resp.json::<T>()
205            .with_context(|| format!("parse json from {request_url_for_err}"))
206    }
207}
208
209impl Ingestor for GithubIngestor {
210    #[mutants::skip]
211    fn ingest(&self) -> Result<IngestOutput> {
212        if self.since >= self.until {
213            return Err(anyhow!("since must be < until"));
214        }
215
216        let client = self.client().context("create GitHub API client")?;
217        let run_id = RunId::now("shiplog");
218        let mut slices: Vec<CoverageSlice> = Vec::new();
219        let mut warnings: Vec<String> = Vec::new();
220        let mut completeness = Completeness::Complete;
221
222        let mut events: Vec<EventEnvelope> = Vec::new();
223
224        // PRs authored
225        let pr_query_builder = |w: &TimeWindow| self.build_pr_query(w);
226        let (pr_items, pr_slices, pr_partial) =
227            self.collect_search_items(&client, pr_query_builder, self.since, self.until, "prs")?;
228        slices.extend(pr_slices);
229        if pr_partial {
230            completeness = Completeness::Partial;
231        }
232
233        events.extend(self.items_to_pr_events(&client, pr_items)?);
234
235        // Reviews authored (best-effort)
236        if self.include_reviews {
237            warnings.push("Reviews are collected via search + per-PR review fetch; treat as best-effort coverage.".to_string());
238            let review_query_builder = |w: &TimeWindow| self.build_reviewed_query(w);
239            let (review_items, review_slices, review_partial) = self.collect_search_items(
240                &client,
241                review_query_builder,
242                self.since,
243                self.until,
244                "reviews",
245            )?;
246            slices.extend(review_slices);
247            if review_partial {
248                completeness = Completeness::Partial;
249            }
250            events.extend(self.items_to_review_events(&client, review_items)?);
251        }
252
253        // Sort for stable output
254        events.sort_by_key(|e| e.occurred_at);
255
256        let fetched_at = Utc::now();
257        let cov = CoverageManifest {
258            run_id,
259            generated_at: fetched_at,
260            user: self.user.clone(),
261            window: TimeWindow {
262                since: self.since,
263                until: self.until,
264            },
265            mode: self.mode.clone(),
266            sources: vec!["github".to_string()],
267            slices,
268            warnings,
269            completeness,
270        };
271
272        // Snapshot the run's cache counters and derive freshness status.
273        // Status rules:
274        //   - cache configured and >0 hits and 0 misses => Cached.
275        //   - cache configured and >=1 miss (regardless of hits) => Fresh.
276        //   - cache configured and 0 hits + 0 misses => Fresh (the search
277        //     phase always runs uncached, so the adapter performed live
278        //     work even if `fetch_details=false` skipped the detail leg).
279        //   - cache not configured => Fresh.
280        let hits = self.cache_hits.load(Ordering::Relaxed);
281        let misses = self.cache_misses.load(Ordering::Relaxed);
282        let stale_hits = self.cache_stale_hits.load(Ordering::Relaxed);
283        let (status, reason) =
284            github_freshness_status(self.cache.is_some(), hits, misses, stale_hits);
285        let freshness = vec![SourceFreshness {
286            source: "github".to_string(),
287            status,
288            cache_hits: hits,
289            cache_misses: misses,
290            fetched_at: Some(fetched_at),
291            reason,
292        }];
293
294        Ok(IngestOutput {
295            events,
296            coverage: cov,
297            freshness,
298        })
299    }
300}
301
302impl GithubIngestor {
303    fn build_pr_query(&self, w: &TimeWindow) -> String {
304        let (start, end) = github_inclusive_range(w);
305        match self.mode.as_str() {
306            "created" => format!("is:pr author:{} created:{}..{}", self.user, start, end),
307            _ => format!(
308                "is:pr is:merged author:{} merged:{}..{}",
309                self.user, start, end
310            ),
311        }
312    }
313
314    fn build_reviewed_query(&self, w: &TimeWindow) -> String {
315        // GitHub does not expose review submission time in search qualifiers.
316        // We use `updated:` to find candidate PRs, then filter reviews by submitted_at.
317        let (start, end) = github_inclusive_range(w);
318        format!("is:pr reviewed-by:{} updated:{}..{}", self.user, start, end)
319    }
320
321    /// Collect search items for a date range, adaptively slicing to avoid the 1000-result cap.
322    ///
323    /// Returns:
324    /// - items
325    /// - coverage slices
326    /// - whether coverage is partial
327    #[mutants::skip]
328    fn collect_search_items<F>(
329        &self,
330        client: &Client,
331        make_query: F,
332        since: NaiveDate,
333        until: NaiveDate,
334        label: &str,
335    ) -> Result<(Vec<SearchIssueItem>, Vec<CoverageSlice>, bool)>
336    where
337        F: Fn(&TimeWindow) -> String,
338    {
339        let mut slices: Vec<CoverageSlice> = Vec::new();
340        let mut items: Vec<SearchIssueItem> = Vec::new();
341        let mut partial = false;
342
343        for w in month_windows(since, until) {
344            let (mut i, mut s, p) =
345                self.collect_window(client, &make_query, &w, Granularity::Month, label)?;
346            items.append(&mut i);
347            slices.append(&mut s);
348            partial |= p;
349        }
350
351        Ok((items, slices, partial))
352    }
353
354    #[mutants::skip]
355    fn collect_window<F>(
356        &self,
357        client: &Client,
358        make_query: &F,
359        window: &TimeWindow,
360        gran: Granularity,
361        label: &str,
362    ) -> Result<(Vec<SearchIssueItem>, Vec<CoverageSlice>, bool)>
363    where
364        F: Fn(&TimeWindow) -> String,
365    {
366        if window.since >= window.until {
367            return Ok((vec![], vec![], false));
368        }
369
370        let query = make_query(window);
371        let (meta_total, meta_incomplete) = self.search_meta(client, &query)?;
372        let mut slices = vec![CoverageSlice {
373            window: window.clone(),
374            query: query.clone(),
375            total_count: meta_total,
376            fetched: 0,
377            incomplete_results: Some(meta_incomplete),
378            notes: vec![format!("probe:{label}")],
379        }];
380
381        // Decide if we need to subdivide
382        let need_subdivide = meta_total > 1000 || meta_incomplete;
383        let can_subdivide = gran != Granularity::Day && window_len_days(window) > 1;
384
385        if need_subdivide && can_subdivide {
386            slices[0].notes.push(format!(
387                "subdivide:{}",
388                if meta_total > 1000 {
389                    "cap"
390                } else {
391                    "incomplete"
392                }
393            ));
394
395            let mut out_items = Vec::new();
396            let mut out_slices = slices;
397            let mut partial = false;
398
399            let subs = match gran {
400                Granularity::Month => week_windows(window.since, window.until),
401                Granularity::Week => day_windows(window.since, window.until),
402                Granularity::Day => vec![],
403            };
404
405            for sub in subs {
406                let (mut i, mut s, p) =
407                    self.collect_window(client, make_query, &sub, gran.next(), label)?;
408                out_items.append(&mut i);
409                out_slices.append(&mut s);
410                partial |= p;
411            }
412            return Ok((out_items, out_slices, partial));
413        }
414
415        // Day-level overflow: can't subdivide further. We'll still fetch up to the API cap.
416        let mut partial = false;
417        if meta_total > 1000 || meta_incomplete {
418            partial = true;
419            slices[0]
420                .notes
421                .push("partial:unresolvable_at_this_granularity".to_string());
422        }
423
424        let fetched_items = self.fetch_all_search_items(client, &query)?;
425        let fetched = fetched_items.len() as u64;
426
427        // Record a fetch slice (separate from the probe for clarity)
428        slices.push(CoverageSlice {
429            window: window.clone(),
430            query: query.clone(),
431            total_count: meta_total,
432            fetched,
433            incomplete_results: Some(meta_incomplete),
434            notes: vec![format!("fetch:{label}")],
435        });
436
437        Ok((fetched_items, slices, partial))
438    }
439
440    #[mutants::skip]
441    fn search_meta(&self, client: &Client, q: &str) -> Result<(u64, bool)> {
442        let url = self.api_url("/search/issues");
443        let resp: SearchResponse<SearchIssueItem> = self.get_json(
444            client,
445            &url,
446            &[
447                ("q", q.to_string()),
448                ("per_page", "1".to_string()),
449                ("page", "1".to_string()),
450            ],
451        )?;
452        Ok((resp.total_count, resp.incomplete_results))
453    }
454
455    #[mutants::skip]
456    fn fetch_all_search_items(&self, client: &Client, q: &str) -> Result<Vec<SearchIssueItem>> {
457        let url = self.api_url("/search/issues");
458        let mut out: Vec<SearchIssueItem> = Vec::new();
459        let per_page = 100;
460        let max_pages = 10; // 1000 cap
461        for page in 1..=max_pages {
462            let resp: SearchResponse<SearchIssueItem> = self.get_json(
463                client,
464                &url,
465                &[
466                    ("q", q.to_string()),
467                    ("per_page", per_page.to_string()),
468                    ("page", page.to_string()),
469                ],
470            )?;
471            let items_len = resp.items.len();
472            out.extend(resp.items);
473            if out.len() as u64 >= resp.total_count.min(1000) {
474                break;
475            }
476            if items_len < per_page {
477                break;
478            }
479        }
480        Ok(out)
481    }
482
483    #[mutants::skip]
484    fn items_to_pr_events(
485        &self,
486        client: &Client,
487        items: Vec<SearchIssueItem>,
488    ) -> Result<Vec<EventEnvelope>> {
489        let mut out = Vec::new();
490        for item in items {
491            if let Some(pr_ref) = &item.pull_request {
492                let html_base = self.html_base_url();
493                let (repo_full_name, repo_html_url) =
494                    repo_from_repo_url(&item.repository_url, &html_base);
495
496                let (title, created_at, merged_at, additions, deletions, changed_files, visibility) =
497                    if self.fetch_details {
498                        match self.fetch_pr_details(client, &pr_ref.url) {
499                            Ok(d) => {
500                                let vis = if d.base.repo.private_field {
501                                    RepoVisibility::Private
502                                } else {
503                                    RepoVisibility::Public
504                                };
505                                (
506                                    d.title,
507                                    d.created_at,
508                                    d.merged_at,
509                                    Some(d.additions),
510                                    Some(d.deletions),
511                                    Some(d.changed_files),
512                                    vis,
513                                )
514                            }
515                            Err(_) => {
516                                // If details fail, fall back to search fields.
517                                (
518                                    item.title.clone(),
519                                    item.created_at.unwrap_or_else(Utc::now),
520                                    None,
521                                    None,
522                                    None,
523                                    None,
524                                    RepoVisibility::Unknown,
525                                )
526                            }
527                        }
528                    } else {
529                        (
530                            item.title.clone(),
531                            item.created_at.unwrap_or_else(Utc::now),
532                            None,
533                            None,
534                            None,
535                            None,
536                            RepoVisibility::Unknown,
537                        )
538                    };
539
540                let occurred_at = match self.mode.as_str() {
541                    "created" => created_at,
542                    _ => merged_at.unwrap_or(created_at),
543                };
544
545                let state = if merged_at.is_some() {
546                    PullRequestState::Merged
547                } else {
548                    PullRequestState::Unknown
549                };
550
551                let id = EventId::from_parts([
552                    "github",
553                    "pr",
554                    &repo_full_name,
555                    &item.number.to_string(),
556                ]);
557
558                let ev = EventEnvelope {
559                    id,
560                    kind: EventKind::PullRequest,
561                    occurred_at,
562                    actor: Actor {
563                        login: self.user.clone(),
564                        id: None,
565                    },
566                    repo: RepoRef {
567                        full_name: repo_full_name,
568                        html_url: Some(repo_html_url),
569                        visibility,
570                    },
571                    payload: EventPayload::PullRequest(PullRequestEvent {
572                        number: item.number,
573                        title,
574                        state,
575                        created_at,
576                        merged_at,
577                        additions,
578                        deletions,
579                        changed_files,
580                        touched_paths_hint: vec![],
581                        window: None,
582                    }),
583                    tags: vec![],
584                    links: vec![Link {
585                        label: "pr".into(),
586                        url: item.html_url.clone(),
587                    }],
588                    source: SourceRef {
589                        system: SourceSystem::Github,
590                        url: Some(pr_ref.url.clone()),
591                        opaque_id: Some(item.id.to_string()),
592                    },
593                };
594
595                out.push(ev);
596            }
597        }
598        Ok(out)
599    }
600
601    #[mutants::skip]
602    fn items_to_review_events(
603        &self,
604        client: &Client,
605        items: Vec<SearchIssueItem>,
606    ) -> Result<Vec<EventEnvelope>> {
607        let mut out = Vec::new();
608        for item in items {
609            let Some(pr_ref) = &item.pull_request else {
610                continue;
611            };
612            let html_base = self.html_base_url();
613            let (repo_full_name, repo_html_url) =
614                repo_from_repo_url(&item.repository_url, &html_base);
615
616            // Fetch reviews for this PR and filter by author + date window.
617            let reviews = self.fetch_pr_reviews(client, &pr_ref.url)?;
618            for r in reviews {
619                if r.user.login != self.user {
620                    continue;
621                }
622                let submitted = match r.submitted_at {
623                    Some(s) => s,
624                    None => continue,
625                };
626                let submitted_date = submitted.date_naive();
627                if submitted_date < self.since || submitted_date >= self.until {
628                    continue;
629                }
630
631                let id = EventId::from_parts([
632                    "github",
633                    "review",
634                    &repo_full_name,
635                    &item.number.to_string(),
636                    &r.id.to_string(),
637                ]);
638
639                let ev = EventEnvelope {
640                    id,
641                    kind: EventKind::Review,
642                    occurred_at: submitted,
643                    actor: Actor {
644                        login: self.user.clone(),
645                        id: None,
646                    },
647                    repo: RepoRef {
648                        full_name: repo_full_name.clone(),
649                        html_url: Some(repo_html_url.clone()),
650                        visibility: RepoVisibility::Unknown,
651                    },
652                    payload: EventPayload::Review(ReviewEvent {
653                        pull_number: item.number,
654                        pull_title: item.title.clone(),
655                        submitted_at: submitted,
656                        state: r.state,
657                        window: None,
658                    }),
659                    tags: vec![],
660                    links: vec![Link {
661                        label: "pr".into(),
662                        url: item.html_url.clone(),
663                    }],
664                    source: SourceRef {
665                        system: SourceSystem::Github,
666                        url: Some(pr_ref.url.clone()),
667                        opaque_id: Some(r.id.to_string()),
668                    },
669                };
670
671                out.push(ev);
672            }
673        }
674        Ok(out)
675    }
676
677    #[mutants::skip]
678    fn fetch_pr_details(&self, client: &Client, pr_api_url: &str) -> Result<PullRequestDetails> {
679        // Check cache first
680        let cache_key = CacheKey::pr_details(pr_api_url);
681        if let Some(ref cache) = self.cache {
682            match cache.lookup::<PullRequestDetails>(&cache_key)? {
683                CacheLookup::Fresh(cached) => {
684                    self.cache_hits.fetch_add(1, Ordering::Relaxed);
685                    return Ok(cached);
686                }
687                CacheLookup::Stale(cached) => {
688                    self.cache_hits.fetch_add(1, Ordering::Relaxed);
689                    self.cache_stale_hits.fetch_add(1, Ordering::Relaxed);
690                    return Ok(cached);
691                }
692                CacheLookup::Miss => {
693                    self.cache_misses.fetch_add(1, Ordering::Relaxed);
694                }
695            }
696        }
697
698        // Fetch from API
699        let details: PullRequestDetails = self.get_json(client, pr_api_url, &[])?;
700
701        // Store in cache
702        if let Some(ref cache) = self.cache {
703            cache.set(&cache_key, &details)?;
704        }
705
706        Ok(details)
707    }
708
709    #[mutants::skip]
710    fn fetch_pr_reviews(
711        &self,
712        client: &Client,
713        pr_api_url: &str,
714    ) -> Result<Vec<PullRequestReview>> {
715        let url = format!("{pr_api_url}/reviews");
716        let mut out = Vec::new();
717        let per_page = 100;
718        for page in 1..=10 {
719            let cache_key = CacheKey::pr_reviews(pr_api_url, page);
720
721            // Try to get from cache first
722            let page_reviews: Vec<PullRequestReview> = if let Some(ref cache) = self.cache {
723                match cache.lookup::<Vec<PullRequestReview>>(&cache_key)? {
724                    CacheLookup::Fresh(cached) => {
725                        self.cache_hits.fetch_add(1, Ordering::Relaxed);
726                        cached
727                    }
728                    CacheLookup::Stale(cached) => {
729                        self.cache_hits.fetch_add(1, Ordering::Relaxed);
730                        self.cache_stale_hits.fetch_add(1, Ordering::Relaxed);
731                        cached
732                    }
733                    CacheLookup::Miss => {
734                        self.cache_misses.fetch_add(1, Ordering::Relaxed);
735                        // Not in cache, fetch from API
736                        let reviews: Vec<PullRequestReview> = self.get_json(
737                            client,
738                            &url,
739                            &[
740                                ("per_page", per_page.to_string()),
741                                ("page", page.to_string()),
742                            ],
743                        )?;
744                        // Store in cache
745                        cache.set(&cache_key, &reviews)?;
746                        reviews
747                    }
748                }
749            } else {
750                // No cache configured, fetch directly
751                self.get_json(
752                    client,
753                    &url,
754                    &[
755                        ("per_page", per_page.to_string()),
756                        ("page", page.to_string()),
757                    ],
758                )?
759            };
760
761            let n = page_reviews.len();
762            out.extend(page_reviews);
763            if n < per_page {
764                break;
765            }
766        }
767        Ok(out)
768    }
769}
770
771#[derive(Copy, Clone, Debug, PartialEq, Eq)]
772enum Granularity {
773    Month,
774    Week,
775    Day,
776}
777
778impl Granularity {
779    fn next(&self) -> Granularity {
780        match self {
781            Granularity::Month => Granularity::Week,
782            Granularity::Week => Granularity::Day,
783            Granularity::Day => Granularity::Day,
784        }
785    }
786}
787
788fn github_inclusive_range(w: &TimeWindow) -> (String, String) {
789    let start = w.since.format("%Y-%m-%d").to_string();
790    let end_date = w.until.pred_opt().unwrap_or(w.until);
791    let end = end_date.format("%Y-%m-%d").to_string();
792    (start, end)
793}
794
795fn build_url_with_params(base: &str, params: &[(&str, String)]) -> Result<Url> {
796    let mut url = Url::parse(base).with_context(|| format!("parse url {base}"))?;
797    if !params.is_empty() {
798        let mut query = url.query_pairs_mut();
799        for (k, v) in params {
800            query.append_pair(k, v);
801        }
802    }
803    Ok(url)
804}
805
806fn github_freshness_status(
807    cache_configured: bool,
808    cache_hits: u64,
809    cache_misses: u64,
810    cache_stale_hits: u64,
811) -> (FreshnessStatus, Option<String>) {
812    if cache_configured && cache_stale_hits > 0 {
813        return (
814            FreshnessStatus::Stale,
815            Some("one or more expired cache entries were used".to_string()),
816        );
817    }
818    if cache_configured && cache_hits > 0 && cache_misses == 0 {
819        return (FreshnessStatus::Cached, None);
820    }
821
822    (FreshnessStatus::Fresh, None)
823}
824
825fn repo_from_repo_url(repo_api_url: &str, html_base: &str) -> (String, String) {
826    #[expect(clippy::collapsible_if, reason = "policy:clippy-0002")]
827    if let Ok(u) = Url::parse(repo_api_url) {
828        if let Some(segs) = u.path_segments() {
829            let v: Vec<&str> = segs.collect();
830            if v.len() >= 3 && v[0] == "repos" {
831                let owner = v[1];
832                let repo = v[2];
833                let full = format!("{}/{}", owner, repo);
834                let html = format!("{}/{}/{}", html_base.trim_end_matches('/'), owner, repo);
835                return (full, html);
836            }
837        }
838    }
839    ("unknown/unknown".to_string(), html_base.to_string())
840}
841
842/// GitHub search response envelope.
843#[derive(Debug, Deserialize)]
844struct SearchResponse<T> {
845    total_count: u64,
846    incomplete_results: bool,
847    items: Vec<T>,
848}
849
850#[derive(Debug, Deserialize)]
851struct SearchIssueItem {
852    id: u64,
853    number: u64,
854    title: String,
855    html_url: String,
856    repository_url: String,
857    pull_request: Option<SearchPullRequestRef>,
858
859    // Search returns these for issues; for PR queries they are present and useful.
860    created_at: Option<DateTime<Utc>>,
861}
862
863#[derive(Debug, Deserialize)]
864struct SearchPullRequestRef {
865    url: String,
866}
867
868#[derive(Debug, Deserialize, Serialize, Clone)]
869struct PullRequestDetails {
870    title: String,
871    created_at: DateTime<Utc>,
872    merged_at: Option<DateTime<Utc>>,
873    additions: u64,
874    deletions: u64,
875    changed_files: u64,
876    base: PullBase,
877}
878
879#[derive(Debug, Deserialize, Serialize, Clone)]
880struct PullBase {
881    repo: PullRepo,
882}
883
884#[derive(Debug, Deserialize, Serialize, Clone)]
885struct PullRepo {
886    full_name: String,
887    html_url: String,
888    #[serde(rename = "private")]
889    private_field: bool,
890}
891
892#[derive(Debug, Deserialize, Serialize, Clone)]
893struct PullRequestReview {
894    id: u64,
895    state: String,
896    submitted_at: Option<DateTime<Utc>>,
897    user: ReviewUser,
898}
899
900#[derive(Debug, Deserialize, Serialize, Clone)]
901struct ReviewUser {
902    login: String,
903}
904
905#[cfg(test)]
906mod tests {
907    use super::*;
908    use chrono::TimeZone;
909    use proptest::prelude::*;
910    use std::io::{ErrorKind, Read, Write};
911    use std::net::{TcpListener, TcpStream};
912    use std::sync::{Arc, Mutex};
913    use std::thread::{self, JoinHandle};
914    use std::time::{Duration as StdDuration, Instant};
915
916    // ── helpers ──────────────────────────────────────────────────────────
917
918    fn make_ingestor(user: &str) -> GithubIngestor {
919        GithubIngestor::new(
920            user.to_string(),
921            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
922            NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
923        )
924    }
925
926    fn make_search_item(number: u64, repo: &str, with_pr: bool) -> SearchIssueItem {
927        SearchIssueItem {
928            id: number * 100,
929            number,
930            title: format!("PR #{number}"),
931            html_url: format!("https://github.com/{repo}/pull/{number}"),
932            repository_url: format!("https://api.github.com/repos/{repo}"),
933            pull_request: if with_pr {
934                Some(SearchPullRequestRef {
935                    url: format!("https://api.github.com/repos/{repo}/pulls/{number}"),
936                })
937            } else {
938                None
939            },
940            created_at: Some(Utc::now()),
941        }
942    }
943
944    // ── existing tests (preserved) ──────────────────────────────────────
945
946    #[test]
947    fn with_cache_creates_missing_directory() {
948        let temp = tempfile::tempdir().unwrap();
949        let cache_dir = temp.path().join("nested").join("cache");
950
951        let ing = GithubIngestor::new(
952            "octocat".to_string(),
953            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
954            NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
955        )
956        .with_cache(&cache_dir)
957        .unwrap();
958
959        assert!(ing.cache.is_some());
960        assert!(cache_dir.join("github-api-cache.db").exists());
961    }
962
963    #[test]
964    fn build_url_with_params_encodes_query_values() {
965        let url = build_url_with_params(
966            "https://api.github.com/search/issues",
967            &[
968                ("q", "is:pr is:merged author:octocat".to_string()),
969                ("per_page", "1".to_string()),
970            ],
971        )
972        .unwrap();
973
974        assert!(!url.as_str().contains(' '), "URL should be percent-encoded");
975
976        let pairs: Vec<(String, String)> = url
977            .query_pairs()
978            .map(|(k, v)| (k.into_owned(), v.into_owned()))
979            .collect();
980        assert_eq!(
981            pairs,
982            vec![
983                (
984                    "q".to_string(),
985                    "is:pr is:merged author:octocat".to_string()
986                ),
987                ("per_page".to_string(), "1".to_string()),
988            ]
989        );
990    }
991
992    #[test]
993    fn github_inclusive_range_uses_exclusive_until_date() {
994        let window = TimeWindow {
995            since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
996            until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
997        };
998
999        let (start, end) = github_inclusive_range(&window);
1000        assert_eq!(start, "2025-01-01");
1001        assert_eq!(end, "2025-01-31");
1002    }
1003
1004    #[test]
1005    fn html_base_url_maps_public_and_ghes_hosts() {
1006        let mut ing = GithubIngestor::new(
1007            "octocat".to_string(),
1008            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1009            NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1010        );
1011        ing.api_base = "https://api.github.com".to_string();
1012        assert_eq!(ing.html_base_url(), "https://github.com");
1013
1014        ing.api_base = "https://github.enterprise.local/api/v3".to_string();
1015        assert_eq!(ing.html_base_url(), "https://github.enterprise.local");
1016    }
1017
1018    #[test]
1019    fn build_pr_query_merged_and_created_modes() {
1020        let mut ing = GithubIngestor::new(
1021            "octocat".to_string(),
1022            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1023            NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1024        );
1025        let w = TimeWindow {
1026            since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1027            until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1028        };
1029
1030        ing.mode = "merged".to_string();
1031        let merged_q = ing.build_pr_query(&w);
1032        assert!(!merged_q.is_empty());
1033        assert!(merged_q.contains("is:merged"));
1034        assert!(merged_q.contains("author:octocat"));
1035
1036        ing.mode = "created".to_string();
1037        let created_q = ing.build_pr_query(&w);
1038        assert!(!created_q.is_empty());
1039        assert!(created_q.contains("created:"));
1040        assert!(created_q.contains("author:octocat"));
1041
1042        // The two queries should be different
1043        assert_ne!(merged_q, created_q);
1044    }
1045
1046    #[test]
1047    fn build_reviewed_query_contains_user() {
1048        let ing = GithubIngestor::new(
1049            "octocat".to_string(),
1050            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1051            NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1052        );
1053        let w = TimeWindow {
1054            since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1055            until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1056        };
1057
1058        let q = ing.build_reviewed_query(&w);
1059        assert!(!q.is_empty());
1060        assert!(q.contains("reviewed-by:octocat"));
1061        assert!(q.contains("is:pr"));
1062    }
1063
1064    #[test]
1065    fn repo_from_repo_url_invalid_url_returns_fallback() {
1066        let (full, html) = repo_from_repo_url("not-a-url-at-all", "https://github.com");
1067        assert_eq!(full, "unknown/unknown");
1068        assert_eq!(html, "https://github.com");
1069
1070        // URL with wrong path structure
1071        let (full2, _) =
1072            repo_from_repo_url("https://api.github.com/users/octocat", "https://github.com");
1073        assert_eq!(full2, "unknown/unknown");
1074    }
1075
1076    #[test]
1077    fn repo_from_repo_url_extracts_or_falls_back() {
1078        let (full, html) = repo_from_repo_url(
1079            "https://api.github.com/repos/owner/repo",
1080            "https://github.com",
1081        );
1082        assert_eq!(full, "owner/repo");
1083        assert_eq!(html, "https://github.com/owner/repo");
1084
1085        let (full_fallback, html_fallback) = repo_from_repo_url("not-a-url", "https://github.com");
1086        assert_eq!(full_fallback, "unknown/unknown");
1087        assert_eq!(html_fallback, "https://github.com");
1088    }
1089
1090    // ── new unit tests ──────────────────────────────────────────────────
1091
1092    // -- Granularity --
1093
1094    #[test]
1095    fn granularity_next_transitions() {
1096        assert_eq!(Granularity::Month.next(), Granularity::Week);
1097        assert_eq!(Granularity::Week.next(), Granularity::Day);
1098        assert_eq!(Granularity::Day.next(), Granularity::Day);
1099    }
1100
1101    #[test]
1102    fn granularity_day_is_fixed_point() {
1103        let g = Granularity::Day;
1104        assert_eq!(g.next(), Granularity::Day);
1105        assert_eq!(g.next().next(), Granularity::Day);
1106    }
1107
1108    // -- GithubIngestor::new defaults --
1109
1110    #[test]
1111    fn new_defaults_are_correct() {
1112        let ing = make_ingestor("alice");
1113        assert_eq!(ing.user, "alice");
1114        assert_eq!(ing.mode, "merged");
1115        assert!(!ing.include_reviews);
1116        assert!(ing.fetch_details);
1117        assert_eq!(ing.throttle_ms, 0);
1118        assert!(ing.token.is_none());
1119        assert_eq!(ing.api_base, "https://api.github.com");
1120        assert!(ing.cache.is_none());
1121    }
1122
1123    // -- with_in_memory_cache --
1124
1125    #[test]
1126    fn with_in_memory_cache_sets_cache() {
1127        let ing = make_ingestor("bob").with_in_memory_cache().unwrap();
1128        assert!(ing.cache.is_some());
1129    }
1130
1131    // -- cache hit/miss counters --
1132    //
1133    // Exercises the freshness-attribution wiring without involving the
1134    // network. `fetch_pr_details` is the canonical cache-aware path; a
1135    // miss followed by a hit on the same key should leave the counters
1136    // at (1 miss, 1 hit), and the second call must return the cached
1137    // value rather than re-fetching it (verified by storing a sentinel
1138    // value in the cache manually).
1139
1140    #[test]
1141    fn freshness_counters_start_at_zero() -> anyhow::Result<()> {
1142        let ing = make_ingestor("octocat").with_in_memory_cache()?;
1143        assert_eq!(ing.cache_hits.load(Ordering::Relaxed), 0);
1144        assert_eq!(ing.cache_misses.load(Ordering::Relaxed), 0);
1145        assert_eq!(ing.cache_stale_hits.load(Ordering::Relaxed), 0);
1146        Ok(())
1147    }
1148
1149    #[test]
1150    fn freshness_status_cached_when_only_hits_observed() -> anyhow::Result<()> {
1151        // Status rule: cache present, hits > 0, misses == 0 => Cached.
1152        let ing = make_ingestor("octocat").with_in_memory_cache()?;
1153        ing.cache_hits.fetch_add(3, Ordering::Relaxed);
1154        let hits = ing.cache_hits.load(Ordering::Relaxed);
1155        let misses = ing.cache_misses.load(Ordering::Relaxed);
1156        let stale_hits = ing.cache_stale_hits.load(Ordering::Relaxed);
1157        let (status, reason) =
1158            github_freshness_status(ing.cache.is_some(), hits, misses, stale_hits);
1159        assert!(matches!(status, FreshnessStatus::Cached));
1160        assert!(reason.is_none());
1161        Ok(())
1162    }
1163
1164    #[test]
1165    fn freshness_status_fresh_when_any_miss_observed() -> anyhow::Result<()> {
1166        // Status rule: cache present, any miss => Fresh.
1167        let ing = make_ingestor("octocat").with_in_memory_cache()?;
1168        ing.cache_hits.fetch_add(2, Ordering::Relaxed);
1169        ing.cache_misses.fetch_add(1, Ordering::Relaxed);
1170        let hits = ing.cache_hits.load(Ordering::Relaxed);
1171        let misses = ing.cache_misses.load(Ordering::Relaxed);
1172        let stale_hits = ing.cache_stale_hits.load(Ordering::Relaxed);
1173        let (status, reason) =
1174            github_freshness_status(ing.cache.is_some(), hits, misses, stale_hits);
1175        assert!(matches!(status, FreshnessStatus::Fresh));
1176        assert!(reason.is_none());
1177        Ok(())
1178    }
1179
1180    #[test]
1181    fn freshness_status_stale_when_stale_hit_observed() -> anyhow::Result<()> {
1182        // Status rule: any stale row used by the adapter makes the
1183        // source stale, even if other cache lookups hit or miss.
1184        let ing = make_ingestor("octocat").with_in_memory_cache()?;
1185        ing.cache_hits.fetch_add(2, Ordering::Relaxed);
1186        ing.cache_misses.fetch_add(1, Ordering::Relaxed);
1187        ing.cache_stale_hits.fetch_add(1, Ordering::Relaxed);
1188        let hits = ing.cache_hits.load(Ordering::Relaxed);
1189        let misses = ing.cache_misses.load(Ordering::Relaxed);
1190        let stale_hits = ing.cache_stale_hits.load(Ordering::Relaxed);
1191        let (status, reason) =
1192            github_freshness_status(ing.cache.is_some(), hits, misses, stale_hits);
1193        assert!(matches!(status, FreshnessStatus::Stale));
1194        assert_eq!(
1195            reason.as_deref(),
1196            Some("one or more expired cache entries were used")
1197        );
1198        Ok(())
1199    }
1200
1201    #[test]
1202    fn freshness_status_fresh_when_no_cache_configured() {
1203        // Status rule: no cache => Fresh regardless of counters (which
1204        // are unreachable without a cache anyway).
1205        let ing = make_ingestor("octocat");
1206        let hits = ing.cache_hits.load(Ordering::Relaxed);
1207        let misses = ing.cache_misses.load(Ordering::Relaxed);
1208        let stale_hits = ing.cache_stale_hits.load(Ordering::Relaxed);
1209        let (status, reason) =
1210            github_freshness_status(ing.cache.is_some(), hits, misses, stale_hits);
1211        assert!(matches!(status, FreshnessStatus::Fresh));
1212        assert!(reason.is_none());
1213    }
1214
1215    // -- warm-rerun cache behavior --
1216    //
1217    // These tests pin the contract that a second `shiplog intake` run
1218    // against a populated cache reports `cached` freshness. We exercise
1219    // `fetch_pr_details` directly because (a) it is the cache-aware
1220    // entry point the rest of the adapter funnels through, and
1221    // (b) its cache-hit branch returns before any HTTP work — so the
1222    // test can pre-seed the cache at the canonical key, hand
1223    // `fetch_pr_details` a constructed `Client` that is never actually
1224    // used, and observe the counter / status derivation that the
1225    // intake report's `source_freshness` block reads from. The
1226    // miss-then-fresh transition is also pinned here at the cache
1227    // primitive layer (cache.get returns None → set value → cache.get
1228    // returns Some); the miss branch of `fetch_pr_details` itself
1229    // requires a working HTTP endpoint (or a recorded-fixtures harness)
1230    // and is therefore deferred to a follow-up integration fixture.
1231    //
1232    // TODO(follow-up): once a recorded-fixtures HTTP harness lands
1233    // (e.g. `wiremock` or a pre-recorded cassette in `fuzz/` style),
1234    // extend these to drive a full fresh-then-cached round trip
1235    // through `ingest()` and assert on `IngestOutput.freshness`.
1236
1237    fn make_pr_details() -> anyhow::Result<PullRequestDetails> {
1238        let ts = Utc
1239            .with_ymd_and_hms(2025, 5, 1, 12, 0, 0)
1240            .single()
1241            .ok_or_else(|| anyhow!("Utc.with_ymd_and_hms returned an ambiguous timestamp"))?;
1242        Ok(PullRequestDetails {
1243            title: "warm-rerun fixture".into(),
1244            created_at: ts,
1245            merged_at: Some(ts),
1246            additions: 10,
1247            deletions: 2,
1248            changed_files: 3,
1249            base: PullBase {
1250                repo: PullRepo {
1251                    full_name: "acme/widgets".into(),
1252                    html_url: "https://github.com/acme/widgets".into(),
1253                    private_field: false,
1254                },
1255            },
1256        })
1257    }
1258
1259    fn no_op_client() -> Result<Client> {
1260        // The cache-hit branch of fetch_pr_details returns before
1261        // touching the client, so the configured user-agent is the
1262        // only thing we need to make this a well-formed Client.
1263        // No network call is performed in any of the tests below.
1264        Client::builder()
1265            .user_agent("shiplog-warm-rerun-test")
1266            .build()
1267            .map_err(Into::into)
1268    }
1269
1270    #[test]
1271    fn warm_rerun_fetch_pr_details_records_cache_hit_without_network() -> anyhow::Result<()> {
1272        let ing = make_ingestor("octocat").with_in_memory_cache()?;
1273        let url = "https://api.github.com/repos/acme/widgets/pulls/1";
1274
1275        // Simulate "the first intake run already populated this entry"
1276        // by writing to the cache directly using the same canonical
1277        // CacheKey the fetch path constructs.
1278        let key = CacheKey::pr_details(url);
1279        let seeded = make_pr_details()?;
1280        let cache = ing
1281            .cache
1282            .as_ref()
1283            .ok_or_else(|| anyhow!("in-memory cache was just attached"))?;
1284        cache.set(&key, &seeded)?;
1285
1286        // First lookup on the SECOND intake run hits the cache. The
1287        // hit branch in fetch_pr_details returns before calling
1288        // get_json, so the no-op client is never invoked.
1289        let client = no_op_client()?;
1290        let got = ing.fetch_pr_details(&client, url)?;
1291        assert_eq!(got.title, seeded.title);
1292        assert_eq!(ing.cache_hits.load(Ordering::Relaxed), 1);
1293        assert_eq!(ing.cache_misses.load(Ordering::Relaxed), 0);
1294
1295        // Second lookup in the same run hits again (a real run can
1296        // request the same PR twice if details and reviews both refer
1297        // to it, depending on configuration). Counter must keep
1298        // climbing; the "no misses" invariant is what makes the status
1299        // Cached.
1300        let got_again = ing.fetch_pr_details(&client, url)?;
1301        assert_eq!(got_again.title, seeded.title);
1302        assert_eq!(ing.cache_hits.load(Ordering::Relaxed), 2);
1303        assert_eq!(ing.cache_misses.load(Ordering::Relaxed), 0);
1304
1305        // Status derivation mirrors the rule baked into
1306        // GithubIngestor::ingest: cache present, any hit, zero miss
1307        // => Cached.
1308        let hits = ing.cache_hits.load(Ordering::Relaxed);
1309        let misses = ing.cache_misses.load(Ordering::Relaxed);
1310        let stale_hits = ing.cache_stale_hits.load(Ordering::Relaxed);
1311        let (status, reason) =
1312            github_freshness_status(ing.cache.is_some(), hits, misses, stale_hits);
1313        assert!(
1314            matches!(status, FreshnessStatus::Cached),
1315            "warm rerun with fully populated cache must derive Cached, got {status:?}"
1316        );
1317        assert!(reason.is_none());
1318        assert_eq!(status.as_label(), "cached");
1319        Ok(())
1320    }
1321
1322    #[test]
1323    fn expired_cache_entry_records_stale_hit_without_network() -> anyhow::Result<()> {
1324        let ing = make_ingestor("octocat").with_in_memory_cache()?;
1325        let url = "https://api.github.com/repos/acme/widgets/pulls/3";
1326        let key = CacheKey::pr_details(url);
1327        let seeded = make_pr_details()?;
1328        let cache = ing
1329            .cache
1330            .as_ref()
1331            .ok_or_else(|| anyhow!("in-memory cache was just attached"))?;
1332        cache.set_with_ttl(&key, &seeded, chrono::Duration::seconds(-1))?;
1333
1334        let client = no_op_client()?;
1335        let got = ing.fetch_pr_details(&client, url)?;
1336        assert_eq!(got.title, seeded.title);
1337        assert_eq!(ing.cache_hits.load(Ordering::Relaxed), 1);
1338        assert_eq!(ing.cache_misses.load(Ordering::Relaxed), 0);
1339        assert_eq!(ing.cache_stale_hits.load(Ordering::Relaxed), 1);
1340
1341        let (status, reason) = github_freshness_status(
1342            ing.cache.is_some(),
1343            ing.cache_hits.load(Ordering::Relaxed),
1344            ing.cache_misses.load(Ordering::Relaxed),
1345            ing.cache_stale_hits.load(Ordering::Relaxed),
1346        );
1347        assert!(matches!(status, FreshnessStatus::Stale));
1348        assert_eq!(
1349            reason.as_deref(),
1350            Some("one or more expired cache entries were used")
1351        );
1352        assert_eq!(status.as_label(), "stale");
1353        Ok(())
1354    }
1355
1356    struct RecordedGithubServer {
1357        base_url: String,
1358        requests: Arc<Mutex<Vec<String>>>,
1359        handle: Option<JoinHandle<anyhow::Result<()>>>,
1360    }
1361
1362    impl RecordedGithubServer {
1363        fn start(expected_requests: usize) -> anyhow::Result<Self> {
1364            let listener = TcpListener::bind("127.0.0.1:0").context("bind fixture server")?;
1365            listener
1366                .set_nonblocking(true)
1367                .context("set fixture server nonblocking")?;
1368            let base_url = format!("http://{}", listener.local_addr()?);
1369            let requests = Arc::new(Mutex::new(Vec::new()));
1370            let thread_requests = Arc::clone(&requests);
1371            let thread_base_url = base_url.clone();
1372            let handle = thread::spawn(move || {
1373                replay_github_fixtures(
1374                    listener,
1375                    &thread_base_url,
1376                    thread_requests,
1377                    expected_requests,
1378                )
1379            });
1380
1381            Ok(Self {
1382                base_url,
1383                requests,
1384                handle: Some(handle),
1385            })
1386        }
1387
1388        fn base_url(&self) -> String {
1389            self.base_url.clone()
1390        }
1391
1392        fn finish(mut self) -> anyhow::Result<Vec<String>> {
1393            if let Some(handle) = self.handle.take() {
1394                handle
1395                    .join()
1396                    .map_err(|_| anyhow!("recorded fixture server thread panicked"))??;
1397            }
1398            self.requests
1399                .lock()
1400                .map_err(|_| anyhow!("recorded fixture request log was poisoned"))
1401                .map(|requests| requests.clone())
1402        }
1403    }
1404
1405    fn replay_github_fixtures(
1406        listener: TcpListener,
1407        base_url: &str,
1408        requests: Arc<Mutex<Vec<String>>>,
1409        expected_requests: usize,
1410    ) -> anyhow::Result<()> {
1411        let deadline = Instant::now() + StdDuration::from_secs(10);
1412
1413        while fixture_request_count(&requests)? < expected_requests {
1414            match listener.accept() {
1415                Ok((mut stream, _peer)) => {
1416                    let request_line = handle_recorded_github_request(&mut stream, base_url)?;
1417                    requests
1418                        .lock()
1419                        .map_err(|_| anyhow!("recorded fixture request log was poisoned"))?
1420                        .push(request_line);
1421                }
1422                Err(err) if err.kind() == ErrorKind::WouldBlock => {
1423                    if Instant::now() > deadline {
1424                        return Err(anyhow!(
1425                            "recorded fixture server expected {expected_requests} requests, saw {}",
1426                            fixture_request_count(&requests)?
1427                        ));
1428                    }
1429                    thread::sleep(StdDuration::from_millis(10));
1430                }
1431                Err(err) => return Err(err).context("accept recorded GitHub fixture request"),
1432            }
1433        }
1434
1435        Ok(())
1436    }
1437
1438    fn fixture_request_count(requests: &Arc<Mutex<Vec<String>>>) -> anyhow::Result<usize> {
1439        requests
1440            .lock()
1441            .map_err(|_| anyhow!("recorded fixture request log was poisoned"))
1442            .map(|requests| requests.len())
1443    }
1444
1445    fn handle_recorded_github_request(
1446        stream: &mut TcpStream,
1447        base_url: &str,
1448    ) -> anyhow::Result<String> {
1449        let mut buf = [0_u8; 4096];
1450        let mut received = Vec::new();
1451        loop {
1452            let n = stream
1453                .read(&mut buf)
1454                .context("read recorded GitHub fixture request")?;
1455            if n == 0 {
1456                break;
1457            }
1458            received.extend_from_slice(&buf[..n]);
1459            if received.windows(4).any(|window| window == b"\r\n\r\n") {
1460                break;
1461            }
1462            if received.len() > 64 * 1024 {
1463                return Err(anyhow!("recorded GitHub fixture request was too large"));
1464            }
1465        }
1466
1467        let request = String::from_utf8_lossy(&received);
1468        let request_line = request
1469            .lines()
1470            .next()
1471            .ok_or_else(|| anyhow!("recorded GitHub fixture request had no request line"))?
1472            .to_string();
1473        let target = request_line
1474            .split_whitespace()
1475            .nth(1)
1476            .ok_or_else(|| anyhow!("recorded GitHub fixture request had no target"))?;
1477        let (status, body) = recorded_github_fixture_response(target, base_url);
1478        let response = format!(
1479            "HTTP/1.1 {status}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}",
1480            body.len()
1481        );
1482        stream
1483            .write_all(response.as_bytes())
1484            .context("write recorded GitHub fixture response")?;
1485        stream
1486            .flush()
1487            .context("flush recorded GitHub fixture response")?;
1488        Ok(request_line)
1489    }
1490
1491    fn recorded_github_fixture_response(target: &str, base_url: &str) -> (&'static str, String) {
1492        let body = if target.starts_with("/search/issues?")
1493            && target_has_query_param(target, "per_page", "1")
1494        {
1495            include_str!("../tests/fixtures/github-warm-rerun/search_meta.json")
1496        } else if target.starts_with("/search/issues?")
1497            && target_has_query_param(target, "per_page", "100")
1498        {
1499            include_str!("../tests/fixtures/github-warm-rerun/search_items.json")
1500        } else if target == "/repos/acme/widgets/pulls/1" {
1501            include_str!("../tests/fixtures/github-warm-rerun/pr_details.json")
1502        } else {
1503            r#"{"message":"unexpected recorded fixture request"}"#
1504        };
1505        let status = if body.contains("unexpected recorded fixture request") {
1506            "404 Not Found"
1507        } else {
1508            "200 OK"
1509        };
1510        (status, body.replace("__API_BASE__", base_url))
1511    }
1512
1513    fn target_has_query_param(target: &str, key: &str, value: &str) -> bool {
1514        target
1515            .split_once('?')
1516            .map(|(_path, query)| {
1517                query.split('&').any(|pair| {
1518                    pair.split_once('=')
1519                        .map(|(k, v)| k == key && v == value)
1520                        .unwrap_or(false)
1521                })
1522            })
1523            .unwrap_or(false)
1524    }
1525
1526    #[test]
1527    fn recorded_http_fixtures_prove_full_fresh_then_cached_ingest() -> anyhow::Result<()> {
1528        let server = RecordedGithubServer::start(5)?;
1529        let cache_dir = tempfile::tempdir().context("create fixture cache dir")?;
1530
1531        let mut cold = make_ingestor("octocat").with_cache(cache_dir.path())?;
1532        cold.api_base = server.base_url();
1533        let cold_output = cold.ingest()?;
1534        assert_eq!(cold_output.events.len(), 1);
1535        let cold_freshness = cold_output
1536            .freshness
1537            .first()
1538            .ok_or_else(|| anyhow!("cold fixture ingest did not emit source freshness"))?;
1539        assert!(
1540            matches!(cold_freshness.status, FreshnessStatus::Fresh),
1541            "first recorded fixture run should be fresh, got {}",
1542            cold_freshness.status.as_label()
1543        );
1544        assert_eq!(cold_freshness.cache_hits, 0);
1545        assert_eq!(cold_freshness.cache_misses, 1);
1546
1547        let mut warm = make_ingestor("octocat").with_cache(cache_dir.path())?;
1548        warm.api_base = server.base_url();
1549        let warm_output = warm.ingest()?;
1550        assert_eq!(warm_output.events.len(), 1);
1551        let warm_freshness = warm_output
1552            .freshness
1553            .first()
1554            .ok_or_else(|| anyhow!("warm fixture ingest did not emit source freshness"))?;
1555        assert!(
1556            matches!(warm_freshness.status, FreshnessStatus::Cached),
1557            "second recorded fixture run should be cached, got {}",
1558            warm_freshness.status.as_label()
1559        );
1560        assert_eq!(warm_freshness.cache_hits, 1);
1561        assert_eq!(warm_freshness.cache_misses, 0);
1562
1563        let requests = server.finish()?;
1564        let search_requests = requests
1565            .iter()
1566            .filter(|line| line.contains("/search/issues?"))
1567            .count();
1568        let detail_requests = requests
1569            .iter()
1570            .filter(|line| line.contains("/repos/acme/widgets/pulls/1"))
1571            .count();
1572        assert_eq!(search_requests, 4);
1573        assert_eq!(
1574            detail_requests, 1,
1575            "warm run must serve PR details from cache instead of replaying HTTP"
1576        );
1577        Ok(())
1578    }
1579
1580    #[test]
1581    fn warm_rerun_cache_primitive_round_trips_miss_then_hit() -> anyhow::Result<()> {
1582        // Pin the fresh-vs-cached primitive on the cache itself: a
1583        // first lookup against an empty cache returns None (so the
1584        // adapter would increment cache_misses and fetch live), and a
1585        // subsequent lookup after `set` returns the seeded value (so
1586        // the adapter would increment cache_hits). This is what backs
1587        // the warm-rerun status transition; the assertions above prove
1588        // the adapter increments correctly when fed a hit, and these
1589        // assertions prove the cache layer itself behaves the way the
1590        // adapter assumes.
1591        let ing = make_ingestor("octocat").with_in_memory_cache()?;
1592        let cache = ing
1593            .cache
1594            .as_ref()
1595            .ok_or_else(|| anyhow!("in-memory cache was just attached"))?;
1596        let key = CacheKey::pr_details("https://api.github.com/repos/acme/widgets/pulls/2");
1597
1598        let cold: Option<PullRequestDetails> = cache.get(&key)?;
1599        assert!(
1600            cold.is_none(),
1601            "first lookup against an empty cache must miss"
1602        );
1603
1604        let value = make_pr_details()?;
1605        cache.set(&key, &value)?;
1606        let warm: Option<PullRequestDetails> = cache.get(&key)?;
1607        let warm = warm.ok_or_else(|| anyhow!("second lookup after set must hit"))?;
1608        assert_eq!(warm.title, value.title);
1609        Ok(())
1610    }
1611
1612    // -- api_url --
1613
1614    #[test]
1615    fn api_url_concatenates_path() {
1616        let ing = make_ingestor("octocat");
1617        assert_eq!(
1618            ing.api_url("/search/issues"),
1619            "https://api.github.com/search/issues"
1620        );
1621    }
1622
1623    #[test]
1624    fn api_url_strips_trailing_slash() {
1625        let mut ing = make_ingestor("octocat");
1626        ing.api_base = "https://ghes.local/api/v3/".to_string();
1627        assert_eq!(
1628            ing.api_url("/search/issues"),
1629            "https://ghes.local/api/v3/search/issues"
1630        );
1631    }
1632
1633    // -- html_base_url edge cases --
1634
1635    #[test]
1636    fn html_base_url_with_port() {
1637        let mut ing = make_ingestor("octocat");
1638        ing.api_base = "https://ghes.local:8443/api/v3".to_string();
1639        assert_eq!(ing.html_base_url(), "https://ghes.local:8443");
1640    }
1641
1642    #[test]
1643    fn html_base_url_invalid_url_falls_back() {
1644        let mut ing = make_ingestor("octocat");
1645        ing.api_base = "not-a-valid-url".to_string();
1646        assert_eq!(ing.html_base_url(), "https://github.com");
1647    }
1648
1649    #[test]
1650    fn html_base_url_http_scheme() {
1651        let mut ing = make_ingestor("octocat");
1652        ing.api_base = "http://internal-ghes.corp/api/v3".to_string();
1653        assert_eq!(ing.html_base_url(), "http://internal-ghes.corp");
1654    }
1655
1656    // -- github_inclusive_range edge cases --
1657
1658    #[test]
1659    fn github_inclusive_range_single_day_window() {
1660        let window = TimeWindow {
1661            since: NaiveDate::from_ymd_opt(2025, 3, 15).unwrap(),
1662            until: NaiveDate::from_ymd_opt(2025, 3, 16).unwrap(),
1663        };
1664        let (start, end) = github_inclusive_range(&window);
1665        assert_eq!(start, "2025-03-15");
1666        assert_eq!(end, "2025-03-15");
1667    }
1668
1669    #[test]
1670    fn github_inclusive_range_year_boundary() {
1671        let window = TimeWindow {
1672            since: NaiveDate::from_ymd_opt(2024, 12, 1).unwrap(),
1673            until: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1674        };
1675        let (start, end) = github_inclusive_range(&window);
1676        assert_eq!(start, "2024-12-01");
1677        assert_eq!(end, "2024-12-31");
1678    }
1679
1680    #[test]
1681    fn github_inclusive_range_same_day_uses_pred() {
1682        // When since == until, pred_opt gives previous day
1683        let window = TimeWindow {
1684            since: NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1685            until: NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1686        };
1687        let (start, end) = github_inclusive_range(&window);
1688        assert_eq!(start, "2025-06-01");
1689        assert_eq!(end, "2025-05-31");
1690    }
1691
1692    // -- build_url_with_params edge cases --
1693
1694    #[test]
1695    fn build_url_with_params_empty_params() {
1696        let url = build_url_with_params("https://api.github.com/search/issues", &[]).unwrap();
1697        assert_eq!(url.as_str(), "https://api.github.com/search/issues");
1698    }
1699
1700    #[test]
1701    fn build_url_with_params_special_characters() {
1702        let url = build_url_with_params(
1703            "https://api.github.com/search/issues",
1704            &[(
1705                "q",
1706                "author:user+name with spaces&special=chars".to_string(),
1707            )],
1708        )
1709        .unwrap();
1710        // Should not contain raw spaces
1711        assert!(!url.as_str().contains(' '));
1712        // Should roundtrip the value
1713        let val: String = url
1714            .query_pairs()
1715            .find(|(k, _)| k == "q")
1716            .map(|(_, v)| v.into_owned())
1717            .unwrap();
1718        assert_eq!(val, "author:user+name with spaces&special=chars");
1719    }
1720
1721    #[test]
1722    fn build_url_with_params_invalid_base_url_errors() {
1723        let result = build_url_with_params("not a url", &[]);
1724        assert!(result.is_err());
1725    }
1726
1727    // -- repo_from_repo_url edge cases --
1728
1729    #[test]
1730    fn repo_from_repo_url_ghes_url() {
1731        // GHES API URLs have /api/v3/repos/owner/repo — the function looks for
1732        // the /repos/ segment, so the path must contain "repos" at position [0].
1733        // Standard GHES URLs: the path_segments include ["api","v3","repos","owner","repo"].
1734        // The function only matches when v[0] == "repos", so GHES-style deep paths
1735        // don't match and fall back to unknown.
1736        let (full, html) = repo_from_repo_url(
1737            "https://ghes.corp/api/v3/repos/myorg/myrepo",
1738            "https://ghes.corp",
1739        );
1740        // The function requires path segment [0] == "repos", but GHES has api/v3/repos,
1741        // so segment[0] == "api". This correctly falls back.
1742        assert_eq!(full, "unknown/unknown");
1743        assert_eq!(html, "https://ghes.corp");
1744    }
1745
1746    #[test]
1747    fn repo_from_repo_url_three_plus_segments_wrong_prefix_falls_back() {
1748        // 3+ segments but v[0] != "repos" → must fall back.
1749        // Kills && → || mutation: with ||, v.len()>=3 alone would enter the block.
1750        let (full, html) = repo_from_repo_url(
1751            "https://api.github.com/users/octocat/repos",
1752            "https://github.com",
1753        );
1754        assert_eq!(full, "unknown/unknown");
1755        assert_eq!(html, "https://github.com");
1756    }
1757
1758    #[test]
1759    fn repo_from_repo_url_exactly_two_segments_repos_prefix_falls_back() {
1760        // v[0] == "repos" but only 2 segments → must fall back.
1761        // Kills && → || mutation: with ||, v[0]=="repos" alone would enter the block.
1762        let (full, html) = repo_from_repo_url(
1763            "https://api.github.com/repos/owner-only",
1764            "https://github.com",
1765        );
1766        assert_eq!(full, "unknown/unknown");
1767        assert_eq!(html, "https://github.com");
1768    }
1769
1770    #[test]
1771    fn repo_from_repo_url_trailing_slash_in_html_base() {
1772        let (full, html) = repo_from_repo_url(
1773            "https://api.github.com/repos/owner/repo",
1774            "https://github.com/",
1775        );
1776        assert_eq!(full, "owner/repo");
1777        assert_eq!(html, "https://github.com/owner/repo");
1778    }
1779
1780    #[test]
1781    fn repo_from_repo_url_extra_path_segments() {
1782        // URL with more path segments after owner/repo (e.g. /repos/owner/repo/pulls)
1783        let (full, html) = repo_from_repo_url(
1784            "https://api.github.com/repos/org/project/pulls",
1785            "https://github.com",
1786        );
1787        assert_eq!(full, "org/project");
1788        assert_eq!(html, "https://github.com/org/project");
1789    }
1790
1791    #[test]
1792    fn repo_from_repo_url_empty_string() {
1793        let (full, html) = repo_from_repo_url("", "https://github.com");
1794        assert_eq!(full, "unknown/unknown");
1795        assert_eq!(html, "https://github.com");
1796    }
1797
1798    // -- build_pr_query date range formatting --
1799
1800    #[test]
1801    fn build_pr_query_uses_inclusive_range() {
1802        let ing = make_ingestor("alice");
1803        let w = TimeWindow {
1804            since: NaiveDate::from_ymd_opt(2025, 3, 1).unwrap(),
1805            until: NaiveDate::from_ymd_opt(2025, 3, 15).unwrap(),
1806        };
1807        let q = ing.build_pr_query(&w);
1808        // Merged query should use the inclusive end date (2025-03-14)
1809        assert!(q.contains("2025-03-01..2025-03-14"), "got: {q}");
1810    }
1811
1812    #[test]
1813    fn build_pr_query_unknown_mode_defaults_to_merged() {
1814        let mut ing = make_ingestor("octocat");
1815        ing.mode = "unknown_mode".to_string();
1816        let w = TimeWindow {
1817            since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1818            until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1819        };
1820        let q = ing.build_pr_query(&w);
1821        assert!(
1822            q.contains("is:merged"),
1823            "unknown mode should fall through to merged"
1824        );
1825    }
1826
1827    // -- build_reviewed_query format --
1828
1829    #[test]
1830    fn build_reviewed_query_uses_updated_qualifier() {
1831        let ing = make_ingestor("reviewer");
1832        let w = TimeWindow {
1833            since: NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1834            until: NaiveDate::from_ymd_opt(2025, 7, 1).unwrap(),
1835        };
1836        let q = ing.build_reviewed_query(&w);
1837        assert!(
1838            q.contains("updated:"),
1839            "review query should use updated: qualifier"
1840        );
1841        assert!(q.contains("reviewed-by:reviewer"));
1842    }
1843
1844    // -- SearchResponse deserialization --
1845
1846    #[test]
1847    fn search_response_deserializes_from_json() {
1848        let json = r#"{
1849            "total_count": 42,
1850            "incomplete_results": false,
1851            "items": [
1852                {
1853                    "id": 1001,
1854                    "number": 123,
1855                    "title": "Fix bug",
1856                    "html_url": "https://github.com/owner/repo/pull/123",
1857                    "repository_url": "https://api.github.com/repos/owner/repo",
1858                    "pull_request": { "url": "https://api.github.com/repos/owner/repo/pulls/123" },
1859                    "created_at": "2025-01-15T10:30:00Z"
1860                }
1861            ]
1862        }"#;
1863
1864        let resp: SearchResponse<SearchIssueItem> = serde_json::from_str(json).unwrap();
1865        assert_eq!(resp.total_count, 42);
1866        assert!(!resp.incomplete_results);
1867        assert_eq!(resp.items.len(), 1);
1868        assert_eq!(resp.items[0].number, 123);
1869        assert_eq!(resp.items[0].title, "Fix bug");
1870        assert!(resp.items[0].pull_request.is_some());
1871    }
1872
1873    #[test]
1874    fn search_response_deserializes_without_pull_request() {
1875        let json = r#"{
1876            "total_count": 1,
1877            "incomplete_results": true,
1878            "items": [
1879                {
1880                    "id": 2002,
1881                    "number": 456,
1882                    "title": "Issue only",
1883                    "html_url": "https://github.com/owner/repo/issues/456",
1884                    "repository_url": "https://api.github.com/repos/owner/repo",
1885                    "created_at": null
1886                }
1887            ]
1888        }"#;
1889
1890        let resp: SearchResponse<SearchIssueItem> = serde_json::from_str(json).unwrap();
1891        assert!(resp.incomplete_results);
1892        assert!(resp.items[0].pull_request.is_none());
1893        assert!(resp.items[0].created_at.is_none());
1894    }
1895
1896    #[test]
1897    fn search_response_empty_items() {
1898        let json = r#"{"total_count": 0, "incomplete_results": false, "items": []}"#;
1899        let resp: SearchResponse<SearchIssueItem> = serde_json::from_str(json).unwrap();
1900        assert_eq!(resp.total_count, 0);
1901        assert!(resp.items.is_empty());
1902    }
1903
1904    // -- PullRequestDetails deserialization --
1905
1906    #[test]
1907    fn pr_details_deserializes_from_json() {
1908        let json = r#"{
1909            "title": "Add feature",
1910            "created_at": "2025-01-10T08:00:00Z",
1911            "merged_at": "2025-01-12T14:30:00Z",
1912            "additions": 150,
1913            "deletions": 30,
1914            "changed_files": 5,
1915            "base": {
1916                "repo": {
1917                    "full_name": "owner/repo",
1918                    "html_url": "https://github.com/owner/repo",
1919                    "private": false
1920                }
1921            }
1922        }"#;
1923
1924        let details: PullRequestDetails = serde_json::from_str(json).unwrap();
1925        assert_eq!(details.title, "Add feature");
1926        assert!(details.merged_at.is_some());
1927        assert_eq!(details.additions, 150);
1928        assert_eq!(details.deletions, 30);
1929        assert_eq!(details.changed_files, 5);
1930        assert!(!details.base.repo.private_field);
1931        assert_eq!(details.base.repo.full_name, "owner/repo");
1932    }
1933
1934    #[test]
1935    fn pr_details_private_repo() {
1936        let json = r#"{
1937            "title": "Secret fix",
1938            "created_at": "2025-01-10T08:00:00Z",
1939            "merged_at": null,
1940            "additions": 10,
1941            "deletions": 5,
1942            "changed_files": 1,
1943            "base": {
1944                "repo": {
1945                    "full_name": "corp/secret",
1946                    "html_url": "https://github.com/corp/secret",
1947                    "private": true
1948                }
1949            }
1950        }"#;
1951
1952        let details: PullRequestDetails = serde_json::from_str(json).unwrap();
1953        assert!(details.base.repo.private_field);
1954        assert!(details.merged_at.is_none());
1955    }
1956
1957    // -- PullRequestReview deserialization --
1958
1959    #[test]
1960    fn pr_review_deserializes_from_json() {
1961        let json = r#"{
1962            "id": 99001,
1963            "state": "APPROVED",
1964            "submitted_at": "2025-02-01T12:00:00Z",
1965            "user": { "login": "reviewer42" }
1966        }"#;
1967
1968        let review: PullRequestReview = serde_json::from_str(json).unwrap();
1969        assert_eq!(review.id, 99001);
1970        assert_eq!(review.state, "APPROVED");
1971        assert!(review.submitted_at.is_some());
1972        assert_eq!(review.user.login, "reviewer42");
1973    }
1974
1975    #[test]
1976    fn pr_review_with_null_submitted_at() {
1977        let json = r#"{
1978            "id": 99002,
1979            "state": "PENDING",
1980            "submitted_at": null,
1981            "user": { "login": "pending-reviewer" }
1982        }"#;
1983
1984        let review: PullRequestReview = serde_json::from_str(json).unwrap();
1985        assert!(review.submitted_at.is_none());
1986    }
1987
1988    #[test]
1989    fn recorded_github_search_payload_deserializes_and_converts() {
1990        let search_payload = serde_json::json!({
1991            "total_count": 1,
1992            "incomplete_results": false,
1993            "items": [
1994                {
1995                    "url": "https://api.github.com/repos/octocat/Hello-World/issues/1347",
1996                    "repository_url": "https://api.github.com/repos/octocat/Hello-World",
1997                    "labels_url": "https://api.github.com/repos/octocat/Hello-World/issues/1347/labels{/name}",
1998                    "comments_url": "https://api.github.com/repos/octocat/Hello-World/issues/1347/comments",
1999                    "events_url": "https://api.github.com/repos/octocat/Hello-World/issues/1347/events",
2000                    "html_url": "https://github.com/octocat/Hello-World/pull/1347",
2001                    "id": 1000001347,
2002                    "node_id": "PR_kwDOABCD",
2003                    "number": 1347,
2004                    "state": "closed",
2005                    "title": "Reduce deploy rollback toil",
2006                    "user": {
2007                        "login": "alice",
2008                        "id": 100,
2009                        "node_id": "MDQ6VXNlcjEwMA==",
2010                        "avatar_url": "https://github.com/images/error/alice_happy.gif",
2011                        "gravatar_id": "",
2012                        "url": "https://api.github.com/users/alice",
2013                        "html_url": "https://github.com/alice",
2014                        "type": "User",
2015                        "site_admin": false
2016                    },
2017                    "labels": [
2018                        {
2019                            "id": 208045946,
2020                            "node_id": "MDU6TGFiZWwyMDgwNDU5NDY=",
2021                            "url": "https://api.github.com/repos/octocat/Hello-World/labels/reliability",
2022                            "name": "reliability",
2023                            "description": "Reliability work",
2024                            "color": "0e8a16",
2025                            "default": false
2026                        }
2027                    ],
2028                    "pull_request": {
2029                        "url": "https://api.github.com/repos/octocat/Hello-World/pulls/1347",
2030                        "html_url": "https://github.com/octocat/Hello-World/pull/1347",
2031                        "diff_url": "https://github.com/octocat/Hello-World/pull/1347.diff",
2032                        "patch_url": "https://github.com/octocat/Hello-World/pull/1347.patch"
2033                    },
2034                    "closed_at": "2025-01-18T16:00:00Z",
2035                    "created_at": "2025-01-10T09:00:00Z",
2036                    "updated_at": "2025-01-18T16:00:00Z",
2037                    "author_association": "MEMBER",
2038                    "score": 1.0
2039                }
2040            ]
2041        });
2042        let details_payload = serde_json::json!({
2043            "url": "https://api.github.com/repos/octocat/Hello-World/pulls/1347",
2044            "id": 2000001347,
2045            "node_id": "PR_kwDOABCD",
2046            "html_url": "https://github.com/octocat/Hello-World/pull/1347",
2047            "diff_url": "https://github.com/octocat/Hello-World/pull/1347.diff",
2048            "patch_url": "https://github.com/octocat/Hello-World/pull/1347.patch",
2049            "issue_url": "https://api.github.com/repos/octocat/Hello-World/issues/1347",
2050            "number": 1347,
2051            "state": "closed",
2052            "locked": false,
2053            "title": "Reduce deploy rollback toil",
2054            "user": { "login": "alice", "id": 100 },
2055            "body": "Add preflight checks and rollback runbook links.",
2056            "created_at": "2025-01-10T09:00:00Z",
2057            "updated_at": "2025-01-18T16:00:00Z",
2058            "closed_at": "2025-01-18T16:00:00Z",
2059            "merged_at": "2025-01-18T16:00:00Z",
2060            "merge_commit_sha": "ecdd80bb57125d7ba9641ffaa4d7d2c19d3f3091",
2061            "assignee": null,
2062            "assignees": [],
2063            "requested_reviewers": [],
2064            "requested_teams": [],
2065            "labels": [],
2066            "head": {
2067                "label": "alice:rollback-preflight",
2068                "ref": "rollback-preflight",
2069                "sha": "bbcd538c8e72b8c175046e27cc8f907076331401",
2070                "user": { "login": "alice", "id": 100 },
2071                "repo": {
2072                    "id": 1296269,
2073                    "name": "Hello-World",
2074                    "full_name": "octocat/Hello-World",
2075                    "private": false,
2076                    "html_url": "https://github.com/octocat/Hello-World"
2077                }
2078            },
2079            "base": {
2080                "label": "octocat:main",
2081                "ref": "main",
2082                "sha": "bbcd538c8e72b8c175046e27cc8f907076331402",
2083                "user": { "login": "octocat", "id": 1 },
2084                "repo": {
2085                    "id": 1296269,
2086                    "name": "Hello-World",
2087                    "full_name": "octocat/Hello-World",
2088                    "private": true,
2089                    "html_url": "https://github.com/octocat/Hello-World"
2090                }
2091            },
2092            "draft": false,
2093            "merged": true,
2094            "mergeable": true,
2095            "rebaseable": true,
2096            "mergeable_state": "clean",
2097            "merged_by": { "login": "octocat", "id": 1 },
2098            "comments": 2,
2099            "review_comments": 1,
2100            "commits": 3,
2101            "additions": 144,
2102            "deletions": 18,
2103            "changed_files": 6
2104        });
2105        let reviews_payload = serde_json::json!([
2106            {
2107                "id": 99001,
2108                "node_id": "MDE3OlB1bGxSZXF1ZXN0UmV2aWV3OTkwMDE=",
2109                "user": {
2110                    "login": "alice",
2111                    "id": 100,
2112                    "node_id": "MDQ6VXNlcjEwMA==",
2113                    "avatar_url": "https://github.com/images/error/alice_happy.gif",
2114                    "gravatar_id": "",
2115                    "url": "https://api.github.com/users/alice",
2116                    "html_url": "https://github.com/alice",
2117                    "type": "User",
2118                    "site_admin": false
2119                },
2120                "body": "Verified the rollback path.",
2121                "state": "APPROVED",
2122                "html_url": "https://github.com/octocat/Hello-World/pull/1347#pullrequestreview-99001",
2123                "pull_request_url": "https://api.github.com/repos/octocat/Hello-World/pulls/1347",
2124                "_links": {
2125                    "html": { "href": "https://github.com/octocat/Hello-World/pull/1347#pullrequestreview-99001" },
2126                    "pull_request": { "href": "https://api.github.com/repos/octocat/Hello-World/pulls/1347" }
2127                },
2128                "submitted_at": "2025-01-18T15:30:00Z",
2129                "commit_id": "ecdd80bb57125d7ba9641ffaa4d7d2c19d3f3091",
2130                "author_association": "MEMBER"
2131            },
2132            {
2133                "id": 99002,
2134                "user": { "login": "bob" },
2135                "state": "COMMENTED",
2136                "submitted_at": "2025-01-18T15:45:00Z"
2137            },
2138            {
2139                "id": 99003,
2140                "user": { "login": "alice" },
2141                "state": "PENDING",
2142                "submitted_at": null
2143            }
2144        ]);
2145
2146        let search: SearchResponse<SearchIssueItem> =
2147            serde_json::from_value(search_payload.clone()).unwrap();
2148        assert_eq!(search.total_count, 1);
2149        assert!(!search.incomplete_results);
2150        assert_eq!(
2151            search.items[0].repository_url,
2152            "https://api.github.com/repos/octocat/Hello-World"
2153        );
2154        let pr_url = search.items[0].pull_request.as_ref().unwrap().url.clone();
2155
2156        let details: PullRequestDetails = serde_json::from_value(details_payload).unwrap();
2157        assert_eq!(details.additions, 144);
2158        assert_eq!(details.deletions, 18);
2159        assert_eq!(details.changed_files, 6);
2160        assert!(details.base.repo.private_field);
2161
2162        let mut ing = make_ingestor("alice").with_in_memory_cache().unwrap();
2163        ing.mode = "merged".to_string();
2164        ing.cache
2165            .as_ref()
2166            .unwrap()
2167            .set(&CacheKey::pr_details(&pr_url), &details)
2168            .unwrap();
2169
2170        let client = Client::new();
2171        let pr_events = ing.items_to_pr_events(&client, search.items).unwrap();
2172        assert_eq!(pr_events.len(), 1);
2173        let pr_event = &pr_events[0];
2174        assert_eq!(pr_event.kind, EventKind::PullRequest);
2175        assert_eq!(pr_event.actor.login, "alice");
2176        assert_eq!(pr_event.repo.full_name, "octocat/Hello-World");
2177        assert_eq!(pr_event.repo.visibility, RepoVisibility::Private);
2178        assert_eq!(pr_event.source.system, SourceSystem::Github);
2179        assert_eq!(pr_event.source.url.as_deref(), Some(pr_url.as_str()));
2180        assert_eq!(pr_event.source.opaque_id.as_deref(), Some("1000001347"));
2181
2182        if let EventPayload::PullRequest(pr) = &pr_event.payload {
2183            assert_eq!(pr.number, 1347);
2184            assert_eq!(pr.title, "Reduce deploy rollback toil");
2185            assert_eq!(pr.state, PullRequestState::Merged);
2186            assert_eq!(
2187                pr.merged_at,
2188                Some("2025-01-18T16:00:00Z".parse::<DateTime<Utc>>().unwrap())
2189            );
2190            assert_eq!(pr.additions, Some(144));
2191            assert_eq!(pr.deletions, Some(18));
2192            assert_eq!(pr.changed_files, Some(6));
2193        } else {
2194            panic!("expected PullRequest payload");
2195        }
2196
2197        let reviews: Vec<PullRequestReview> = serde_json::from_value(reviews_payload).unwrap();
2198        ing.cache
2199            .as_ref()
2200            .unwrap()
2201            .set(&CacheKey::pr_reviews(&pr_url, 1), &reviews)
2202            .unwrap();
2203        let review_search: SearchResponse<SearchIssueItem> =
2204            serde_json::from_value(search_payload).unwrap();
2205
2206        let review_events = ing
2207            .items_to_review_events(&client, review_search.items)
2208            .unwrap();
2209        assert_eq!(review_events.len(), 1);
2210        let review_event = &review_events[0];
2211        assert_eq!(review_event.kind, EventKind::Review);
2212        assert_eq!(review_event.actor.login, "alice");
2213        assert_eq!(review_event.repo.full_name, "octocat/Hello-World");
2214        assert_eq!(review_event.source.url.as_deref(), Some(pr_url.as_str()));
2215        assert_eq!(review_event.source.opaque_id.as_deref(), Some("99001"));
2216
2217        if let EventPayload::Review(review) = &review_event.payload {
2218            assert_eq!(review.pull_number, 1347);
2219            assert_eq!(review.pull_title, "Reduce deploy rollback toil");
2220            assert_eq!(review.state, "APPROVED");
2221            assert_eq!(
2222                review.submitted_at,
2223                "2025-01-18T15:30:00Z".parse::<DateTime<Utc>>().unwrap()
2224            );
2225        } else {
2226            panic!("expected Review payload");
2227        }
2228    }
2229
2230    // -- items_to_pr_events (no network, fetch_details=false) --
2231
2232    #[test]
2233    fn items_to_pr_events_without_details_produces_events() {
2234        let mut ing = make_ingestor("alice");
2235        ing.fetch_details = false;
2236
2237        let client = Client::new();
2238        let items = vec![
2239            make_search_item(10, "org/repo-a", true),
2240            make_search_item(20, "org/repo-b", true),
2241        ];
2242
2243        let events = ing.items_to_pr_events(&client, items).unwrap();
2244        assert_eq!(events.len(), 2);
2245
2246        assert_eq!(events[0].kind, EventKind::PullRequest);
2247        assert_eq!(events[0].actor.login, "alice");
2248        assert_eq!(events[0].repo.full_name, "org/repo-a");
2249        assert_eq!(events[0].links.len(), 1);
2250        assert_eq!(events[0].links[0].label, "pr");
2251
2252        assert_eq!(events[1].repo.full_name, "org/repo-b");
2253    }
2254
2255    #[test]
2256    fn items_to_pr_events_skips_items_without_pr_ref() {
2257        let mut ing = make_ingestor("bob");
2258        ing.fetch_details = false;
2259
2260        let client = Client::new();
2261        let items = vec![
2262            make_search_item(1, "org/repo", true),
2263            make_search_item(2, "org/repo", false), // no pull_request ref
2264            make_search_item(3, "org/repo", true),
2265        ];
2266
2267        let events = ing.items_to_pr_events(&client, items).unwrap();
2268        assert_eq!(
2269            events.len(),
2270            2,
2271            "items without pull_request should be skipped"
2272        );
2273    }
2274
2275    #[test]
2276    fn items_to_pr_events_empty_input() {
2277        let mut ing = make_ingestor("carol");
2278        ing.fetch_details = false;
2279        let client = Client::new();
2280        let events = ing.items_to_pr_events(&client, vec![]).unwrap();
2281        assert!(events.is_empty());
2282    }
2283
2284    #[test]
2285    fn items_to_pr_events_sets_source_system() {
2286        let mut ing = make_ingestor("dave");
2287        ing.fetch_details = false;
2288
2289        let client = Client::new();
2290        let items = vec![make_search_item(42, "org/repo", true)];
2291        let events = ing.items_to_pr_events(&client, items).unwrap();
2292
2293        assert_eq!(events[0].source.system, SourceSystem::Github);
2294        assert!(events[0].source.url.is_some());
2295        assert!(events[0].source.opaque_id.is_some());
2296    }
2297
2298    #[test]
2299    fn items_to_pr_events_merged_mode_uses_created_at_as_occurred() {
2300        let mut ing = make_ingestor("eve");
2301        ing.fetch_details = false;
2302        ing.mode = "merged".to_string();
2303
2304        let client = Client::new();
2305        let mut item = make_search_item(1, "org/repo", true);
2306        let created = DateTime::parse_from_rfc3339("2025-03-15T10:00:00Z")
2307            .unwrap()
2308            .with_timezone(&Utc);
2309        item.created_at = Some(created);
2310
2311        let events = ing.items_to_pr_events(&client, vec![item]).unwrap();
2312        // Without details, merged_at is None, so occurred_at falls back to created_at
2313        assert_eq!(events[0].occurred_at, created);
2314    }
2315
2316    #[test]
2317    fn items_to_pr_events_created_mode_uses_created_at() {
2318        let mut ing = make_ingestor("frank");
2319        ing.fetch_details = false;
2320        ing.mode = "created".to_string();
2321
2322        let client = Client::new();
2323        let mut item = make_search_item(1, "org/repo", true);
2324        let created = DateTime::parse_from_rfc3339("2025-04-01T12:00:00Z")
2325            .unwrap()
2326            .with_timezone(&Utc);
2327        item.created_at = Some(created);
2328
2329        let events = ing.items_to_pr_events(&client, vec![item]).unwrap();
2330        assert_eq!(events[0].occurred_at, created);
2331    }
2332
2333    #[test]
2334    fn items_to_pr_events_without_details_has_unknown_visibility() {
2335        let mut ing = make_ingestor("grace");
2336        ing.fetch_details = false;
2337
2338        let client = Client::new();
2339        let items = vec![make_search_item(1, "org/repo", true)];
2340        let events = ing.items_to_pr_events(&client, items).unwrap();
2341
2342        assert_eq!(events[0].repo.visibility, RepoVisibility::Unknown);
2343    }
2344
2345    #[test]
2346    fn items_to_pr_events_without_details_state_is_unknown() {
2347        let mut ing = make_ingestor("heidi");
2348        ing.fetch_details = false;
2349
2350        let client = Client::new();
2351        let items = vec![make_search_item(1, "org/repo", true)];
2352        let events = ing.items_to_pr_events(&client, items).unwrap();
2353
2354        if let EventPayload::PullRequest(ref pr) = events[0].payload {
2355            assert_eq!(pr.state, PullRequestState::Unknown);
2356            assert!(pr.merged_at.is_none());
2357            assert!(pr.additions.is_none());
2358            assert!(pr.deletions.is_none());
2359            assert!(pr.changed_files.is_none());
2360        } else {
2361            panic!("expected PullRequest payload");
2362        }
2363    }
2364
2365    #[test]
2366    fn items_to_pr_events_deterministic_ids() {
2367        let mut ing = make_ingestor("ivan");
2368        ing.fetch_details = false;
2369
2370        let client = Client::new();
2371        let items1 = vec![make_search_item(42, "org/repo", true)];
2372        let items2 = vec![make_search_item(42, "org/repo", true)];
2373
2374        let events1 = ing.items_to_pr_events(&client, items1).unwrap();
2375        let events2 = ing.items_to_pr_events(&client, items2).unwrap();
2376        assert_eq!(
2377            events1[0].id, events2[0].id,
2378            "same inputs should produce same event ID"
2379        );
2380    }
2381
2382    #[test]
2383    fn items_to_pr_events_different_prs_get_different_ids() {
2384        let mut ing = make_ingestor("judy");
2385        ing.fetch_details = false;
2386
2387        let client = Client::new();
2388        let items = vec![
2389            make_search_item(1, "org/repo", true),
2390            make_search_item(2, "org/repo", true),
2391        ];
2392
2393        let events = ing.items_to_pr_events(&client, items).unwrap();
2394        assert_ne!(events[0].id, events[1].id);
2395    }
2396
2397    // -- items_to_review_events (no-network partial) --
2398
2399    #[test]
2400    fn items_to_review_events_skips_items_without_pr_ref() {
2401        let ing = make_ingestor("reviewer");
2402        let client = Client::new();
2403
2404        // Item without pull_request ref should be silently skipped.
2405        // (fetch_pr_reviews would fail, but we never reach it.)
2406        let items = vec![make_search_item(1, "org/repo", false)];
2407
2408        let events = ing.items_to_review_events(&client, items).unwrap();
2409        assert!(events.is_empty());
2410    }
2411
2412    // -- ingest error handling --
2413
2414    #[test]
2415    fn ingest_rejects_since_equals_until() {
2416        let date = NaiveDate::from_ymd_opt(2025, 6, 1).unwrap();
2417        let ing = GithubIngestor::new("user".to_string(), date, date);
2418        let err = ing.ingest().unwrap_err();
2419        assert!(
2420            err.to_string().contains("since must be < until"),
2421            "got: {err}"
2422        );
2423    }
2424
2425    #[test]
2426    fn ingest_rejects_since_after_until() {
2427        let ing = GithubIngestor::new(
2428            "user".to_string(),
2429            NaiveDate::from_ymd_opt(2025, 6, 15).unwrap(),
2430            NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
2431        );
2432        let err = ing.ingest().unwrap_err();
2433        assert!(err.to_string().contains("since must be < until"));
2434    }
2435
2436    // -- cache integration --
2437
2438    #[test]
2439    fn with_cache_then_in_memory_cache_overrides() {
2440        let temp = tempfile::tempdir().unwrap();
2441        let ing = make_ingestor("octocat")
2442            .with_cache(temp.path())
2443            .unwrap()
2444            .with_in_memory_cache()
2445            .unwrap();
2446        assert!(ing.cache.is_some());
2447    }
2448
2449    #[test]
2450    fn multiple_with_cache_calls_succeed() {
2451        let temp1 = tempfile::tempdir().unwrap();
2452        let temp2 = tempfile::tempdir().unwrap();
2453        let ing = make_ingestor("octocat")
2454            .with_cache(temp1.path())
2455            .unwrap()
2456            .with_cache(temp2.path())
2457            .unwrap();
2458        assert!(ing.cache.is_some());
2459    }
2460
2461    // ── property tests ──────────────────────────────────────────────────
2462
2463    fn arb_naive_date() -> impl Strategy<Value = NaiveDate> {
2464        (2000i32..2030, 1u32..13, 1u32..29)
2465            .prop_map(|(y, m, d)| NaiveDate::from_ymd_opt(y, m, d).unwrap())
2466    }
2467
2468    fn arb_time_window() -> impl Strategy<Value = TimeWindow> {
2469        (arb_naive_date(), 1u32..366).prop_map(|(since, delta)| {
2470            let until = since + chrono::Duration::days(delta as i64);
2471            TimeWindow { since, until }
2472        })
2473    }
2474
2475    proptest! {
2476        #[test]
2477        fn prop_github_inclusive_range_start_lte_end(w in arb_time_window()) {
2478            let (start, end) = github_inclusive_range(&w);
2479            prop_assert!(start <= end, "start={start} > end={end}");
2480        }
2481
2482        #[test]
2483        fn prop_github_inclusive_range_start_matches_since(w in arb_time_window()) {
2484            let (start, _) = github_inclusive_range(&w);
2485            let expected = w.since.format("%Y-%m-%d").to_string();
2486            prop_assert_eq!(start, expected);
2487        }
2488
2489        #[test]
2490        fn prop_github_inclusive_range_end_is_until_minus_one(w in arb_time_window()) {
2491            let (_, end) = github_inclusive_range(&w);
2492            let expected_date = w.until.pred_opt().unwrap_or(w.until);
2493            let expected = expected_date.format("%Y-%m-%d").to_string();
2494            prop_assert_eq!(end, expected);
2495        }
2496
2497        #[test]
2498        fn prop_build_url_with_params_produces_valid_url(
2499            key in "[a-z]{1,10}",
2500            val in "[a-zA-Z0-9 ]{0,50}",
2501        ) {
2502            let result = build_url_with_params(
2503                "https://api.github.com/search/issues",
2504                &[(&key, val.clone())],
2505            );
2506            prop_assert!(result.is_ok());
2507            let url = result.unwrap();
2508            // URL should not contain raw spaces
2509            prop_assert!(!url.as_str().contains(' '));
2510            // Value should roundtrip
2511            let found: String = url.query_pairs()
2512                .find(|(k, _)| k.as_ref() == key)
2513                .map(|(_, v)| v.into_owned())
2514                .unwrap();
2515            prop_assert_eq!(found, val);
2516        }
2517
2518        #[test]
2519        fn prop_repo_from_repo_url_never_panics(
2520            owner in "[a-zA-Z0-9][a-zA-Z0-9_-]{0,19}",
2521            repo in "[a-zA-Z0-9][a-zA-Z0-9_.-]{0,29}",
2522        ) {
2523            let api_url = format!("https://api.github.com/repos/{}/{}", owner, repo);
2524            let (full, html) = repo_from_repo_url(&api_url, "https://github.com");
2525            let expected_prefix = format!("{}/", owner);
2526            prop_assert!(full.starts_with(&expected_prefix));
2527            prop_assert!(html.starts_with("https://github.com/"));
2528        }
2529
2530        #[test]
2531        fn prop_repo_from_repo_url_arbitrary_strings_never_panic(
2532            s in ".*",
2533        ) {
2534            // Should never panic, even with garbage input
2535            let _ = repo_from_repo_url(&s, "https://github.com");
2536        }
2537
2538        #[test]
2539        fn prop_build_pr_query_contains_user(
2540            user in "[a-zA-Z][a-zA-Z0-9-]{0,19}",
2541        ) {
2542            let ing = GithubIngestor::new(
2543                user.clone(),
2544                NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
2545                NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
2546            );
2547            let w = TimeWindow {
2548                since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
2549                until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
2550            };
2551            let q = ing.build_pr_query(&w);
2552            let expected_author = format!("author:{}", user);
2553            prop_assert!(q.contains(&expected_author));
2554            prop_assert!(q.contains("is:pr"));
2555        }
2556
2557        #[test]
2558        fn prop_build_reviewed_query_contains_user(
2559            user in "[a-zA-Z][a-zA-Z0-9-]{0,19}",
2560        ) {
2561            let ing = GithubIngestor::new(
2562                user.clone(),
2563                NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
2564                NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
2565            );
2566            let w = TimeWindow {
2567                since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
2568                until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
2569            };
2570            let q = ing.build_reviewed_query(&w);
2571            let expected_reviewer = format!("reviewed-by:{}", user);
2572            prop_assert!(q.contains(&expected_reviewer));
2573        }
2574
2575        #[test]
2576        fn prop_api_url_preserves_path(
2577            segment in "[a-z]{1,15}",
2578        ) {
2579            let ing = make_ingestor("test");
2580            let path = format!("/{}", segment);
2581            let url = ing.api_url(&path);
2582            prop_assert!(url.ends_with(&path));
2583            prop_assert!(url.starts_with("https://api.github.com"));
2584        }
2585
2586        #[test]
2587        fn prop_html_base_url_always_returns_valid_string(
2588            base in "(https?://[a-z]{3,15}\\.[a-z]{2,5}(/[a-z]+)*)",
2589        ) {
2590            let mut ing = make_ingestor("test");
2591            ing.api_base = base;
2592            let result = ing.html_base_url();
2593            prop_assert!(!result.is_empty());
2594            prop_assert!(result.starts_with("http"));
2595        }
2596    }
2597}