Skip to main content

shiplog_ingest_github/
lib.rs

1//! GitHub API ingestor with adaptive date slicing and cache support.
2//!
3//! Collects PR/review events, tracks coverage slices, and marks partial
4//! completeness when search caps or incomplete API responses are detected.
5
6use anyhow::{Context, Result, anyhow};
7use chrono::{DateTime, NaiveDate, Utc};
8use reqwest::blocking::Client;
9use serde::de::DeserializeOwned;
10use serde::{Deserialize, Serialize};
11use shiplog_cache::ApiCache;
12use shiplog_cache::CacheKey;
13use shiplog_coverage::{day_windows, month_windows, week_windows, window_len_days};
14use shiplog_ids::{EventId, RunId};
15use shiplog_ports::{IngestOutput, Ingestor};
16use shiplog_schema::coverage::{Completeness, CoverageManifest, CoverageSlice, TimeWindow};
17use shiplog_schema::event::{
18    Actor, EventEnvelope, EventKind, EventPayload, Link, PullRequestEvent, PullRequestState,
19    RepoRef, RepoVisibility, ReviewEvent, SourceRef, SourceSystem,
20};
21use std::path::PathBuf;
22use std::thread::sleep;
23use std::time::Duration;
24use url::Url;
25
26#[derive(Debug)]
27pub struct GithubIngestor {
28    pub user: String,
29    pub since: NaiveDate,
30    pub until: NaiveDate,
31    /// "merged" or "created"
32    pub mode: String,
33    pub include_reviews: bool,
34    pub fetch_details: bool,
35    pub throttle_ms: u64,
36    pub token: Option<String>,
37    /// GitHub API base URL (for GHES). Default: <https://api.github.com>
38    pub api_base: String,
39    /// Optional cache for API responses
40    pub cache: Option<ApiCache>,
41}
42
43impl GithubIngestor {
44    /// Create a new GitHub ingestor for the given user and date range.
45    ///
46    /// Defaults to `merged` mode with no reviews, no cache, and no throttle.
47    ///
48    /// # Examples
49    ///
50    /// ```
51    /// use shiplog_ingest_github::GithubIngestor;
52    /// use chrono::NaiveDate;
53    ///
54    /// let ingestor = GithubIngestor::new(
55    ///     "octocat".into(),
56    ///     NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
57    ///     NaiveDate::from_ymd_opt(2025, 4, 1).unwrap(),
58    /// );
59    /// assert_eq!(ingestor.mode, "merged");
60    /// ```
61    pub fn new(user: String, since: NaiveDate, until: NaiveDate) -> Self {
62        Self {
63            user,
64            since,
65            until,
66            mode: "merged".to_string(),
67            include_reviews: false,
68            fetch_details: true,
69            throttle_ms: 0,
70            token: None,
71            api_base: "https://api.github.com".to_string(),
72            cache: None,
73        }
74    }
75
76    /// Enable caching with the given cache directory.
77    ///
78    /// # Examples
79    ///
80    /// ```rust,no_run
81    /// use shiplog_ingest_github::GithubIngestor;
82    /// use chrono::NaiveDate;
83    ///
84    /// let ingestor = GithubIngestor::new(
85    ///     "octocat".into(),
86    ///     NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
87    ///     NaiveDate::from_ymd_opt(2025, 4, 1).unwrap(),
88    /// ).with_cache("./cache")?;
89    /// # Ok::<(), anyhow::Error>(())
90    /// ```
91    pub fn with_cache(mut self, cache_dir: impl Into<PathBuf>) -> Result<Self> {
92        let cache_path = cache_dir.into().join("github-api-cache.db");
93        if let Some(parent) = cache_path.parent() {
94            std::fs::create_dir_all(parent)
95                .with_context(|| format!("create GitHub cache directory {parent:?}"))?;
96        }
97        let cache = ApiCache::open(&cache_path)
98            .with_context(|| format!("open GitHub API cache at {cache_path:?}"))?;
99        self.cache = Some(cache);
100        Ok(self)
101    }
102
103    /// Enable in-memory caching (useful for testing).
104    ///
105    /// # Examples
106    ///
107    /// ```
108    /// use shiplog_ingest_github::GithubIngestor;
109    /// use chrono::NaiveDate;
110    ///
111    /// let ingestor = GithubIngestor::new(
112    ///     "octocat".into(),
113    ///     NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
114    ///     NaiveDate::from_ymd_opt(2025, 4, 1).unwrap(),
115    /// ).with_in_memory_cache().unwrap();
116    /// assert!(ingestor.cache.is_some());
117    /// ```
118    pub fn with_in_memory_cache(mut self) -> Result<Self> {
119        let cache = ApiCache::open_in_memory().context("open in-memory API cache")?;
120        self.cache = Some(cache);
121        Ok(self)
122    }
123
124    fn html_base_url(&self) -> String {
125        if let Ok(u) = Url::parse(&self.api_base) {
126            let scheme = u.scheme();
127            if let Some(host) = u.host_str() {
128                if host == "api.github.com" {
129                    return "https://github.com".to_string();
130                }
131                let port_suffix = u.port().map(|p| format!(":{p}")).unwrap_or_default();
132                return format!("{scheme}://{host}{port_suffix}");
133            }
134        }
135        "https://github.com".to_string()
136    }
137
138    #[mutants::skip]
139    fn client(&self) -> Result<Client> {
140        Client::builder()
141            .user_agent(concat!("shiplog/", env!("CARGO_PKG_VERSION")))
142            .build()
143            .context("build reqwest client")
144    }
145
146    #[mutants::skip]
147    fn api_url(&self, path: &str) -> String {
148        format!("{}{}", self.api_base.trim_end_matches('/'), path)
149    }
150
151    #[mutants::skip]
152    fn throttle(&self) {
153        if self.throttle_ms > 0 {
154            sleep(Duration::from_millis(self.throttle_ms));
155        }
156    }
157
158    #[mutants::skip]
159    fn get_json<T: DeserializeOwned>(
160        &self,
161        client: &Client,
162        url: &str,
163        params: &[(&str, String)],
164    ) -> Result<T> {
165        let request_url = build_url_with_params(url, params)?;
166        let request_url_for_err = request_url.as_str().to_string();
167
168        let mut req = client
169            .get(request_url)
170            .header("Accept", "application/vnd.github+json");
171        req = req.header("X-GitHub-Api-Version", "2022-11-28");
172        if let Some(t) = &self.token {
173            req = req.bearer_auth(t);
174        }
175        let resp = req
176            .send()
177            .with_context(|| format!("GET {request_url_for_err}"))?;
178        self.throttle();
179
180        if !resp.status().is_success() {
181            let status = resp.status();
182            let body = resp.text().unwrap_or_default();
183            return Err(anyhow!("GitHub API error {status}: {body}"));
184        }
185
186        resp.json::<T>()
187            .with_context(|| format!("parse json from {request_url_for_err}"))
188    }
189}
190
191impl Ingestor for GithubIngestor {
192    #[mutants::skip]
193    fn ingest(&self) -> Result<IngestOutput> {
194        if self.since >= self.until {
195            return Err(anyhow!("since must be < until"));
196        }
197
198        let client = self.client().context("create GitHub API client")?;
199        let run_id = RunId::now("shiplog");
200        let mut slices: Vec<CoverageSlice> = Vec::new();
201        let mut warnings: Vec<String> = Vec::new();
202        let mut completeness = Completeness::Complete;
203
204        let mut events: Vec<EventEnvelope> = Vec::new();
205
206        // PRs authored
207        let pr_query_builder = |w: &TimeWindow| self.build_pr_query(w);
208        let (pr_items, pr_slices, pr_partial) =
209            self.collect_search_items(&client, pr_query_builder, self.since, self.until, "prs")?;
210        slices.extend(pr_slices);
211        if pr_partial {
212            completeness = Completeness::Partial;
213        }
214
215        events.extend(self.items_to_pr_events(&client, pr_items)?);
216
217        // Reviews authored (best-effort)
218        if self.include_reviews {
219            warnings.push("Reviews are collected via search + per-PR review fetch; treat as best-effort coverage.".to_string());
220            let review_query_builder = |w: &TimeWindow| self.build_reviewed_query(w);
221            let (review_items, review_slices, review_partial) = self.collect_search_items(
222                &client,
223                review_query_builder,
224                self.since,
225                self.until,
226                "reviews",
227            )?;
228            slices.extend(review_slices);
229            if review_partial {
230                completeness = Completeness::Partial;
231            }
232            events.extend(self.items_to_review_events(&client, review_items)?);
233        }
234
235        // Sort for stable output
236        events.sort_by_key(|e| e.occurred_at);
237
238        let cov = CoverageManifest {
239            run_id,
240            generated_at: Utc::now(),
241            user: self.user.clone(),
242            window: TimeWindow {
243                since: self.since,
244                until: self.until,
245            },
246            mode: self.mode.clone(),
247            sources: vec!["github".to_string()],
248            slices,
249            warnings,
250            completeness,
251        };
252
253        Ok(IngestOutput {
254            events,
255            coverage: cov,
256        })
257    }
258}
259
260impl GithubIngestor {
261    fn build_pr_query(&self, w: &TimeWindow) -> String {
262        let (start, end) = github_inclusive_range(w);
263        match self.mode.as_str() {
264            "created" => format!("is:pr author:{} created:{}..{}", self.user, start, end),
265            _ => format!(
266                "is:pr is:merged author:{} merged:{}..{}",
267                self.user, start, end
268            ),
269        }
270    }
271
272    fn build_reviewed_query(&self, w: &TimeWindow) -> String {
273        // GitHub does not expose review submission time in search qualifiers.
274        // We use `updated:` to find candidate PRs, then filter reviews by submitted_at.
275        let (start, end) = github_inclusive_range(w);
276        format!("is:pr reviewed-by:{} updated:{}..{}", self.user, start, end)
277    }
278
279    /// Collect search items for a date range, adaptively slicing to avoid the 1000-result cap.
280    ///
281    /// Returns:
282    /// - items
283    /// - coverage slices
284    /// - whether coverage is partial
285    #[mutants::skip]
286    fn collect_search_items<F>(
287        &self,
288        client: &Client,
289        make_query: F,
290        since: NaiveDate,
291        until: NaiveDate,
292        label: &str,
293    ) -> Result<(Vec<SearchIssueItem>, Vec<CoverageSlice>, bool)>
294    where
295        F: Fn(&TimeWindow) -> String,
296    {
297        let mut slices: Vec<CoverageSlice> = Vec::new();
298        let mut items: Vec<SearchIssueItem> = Vec::new();
299        let mut partial = false;
300
301        for w in month_windows(since, until) {
302            let (mut i, mut s, p) =
303                self.collect_window(client, &make_query, &w, Granularity::Month, label)?;
304            items.append(&mut i);
305            slices.append(&mut s);
306            partial |= p;
307        }
308
309        Ok((items, slices, partial))
310    }
311
312    #[mutants::skip]
313    fn collect_window<F>(
314        &self,
315        client: &Client,
316        make_query: &F,
317        window: &TimeWindow,
318        gran: Granularity,
319        label: &str,
320    ) -> Result<(Vec<SearchIssueItem>, Vec<CoverageSlice>, bool)>
321    where
322        F: Fn(&TimeWindow) -> String,
323    {
324        if window.since >= window.until {
325            return Ok((vec![], vec![], false));
326        }
327
328        let query = make_query(window);
329        let (meta_total, meta_incomplete) = self.search_meta(client, &query)?;
330        let mut slices = vec![CoverageSlice {
331            window: window.clone(),
332            query: query.clone(),
333            total_count: meta_total,
334            fetched: 0,
335            incomplete_results: Some(meta_incomplete),
336            notes: vec![format!("probe:{label}")],
337        }];
338
339        // Decide if we need to subdivide
340        let need_subdivide = meta_total > 1000 || meta_incomplete;
341        let can_subdivide = gran != Granularity::Day && window_len_days(window) > 1;
342
343        if need_subdivide && can_subdivide {
344            slices[0].notes.push(format!(
345                "subdivide:{}",
346                if meta_total > 1000 {
347                    "cap"
348                } else {
349                    "incomplete"
350                }
351            ));
352
353            let mut out_items = Vec::new();
354            let mut out_slices = slices;
355            let mut partial = false;
356
357            let subs = match gran {
358                Granularity::Month => week_windows(window.since, window.until),
359                Granularity::Week => day_windows(window.since, window.until),
360                Granularity::Day => vec![],
361            };
362
363            for sub in subs {
364                let (mut i, mut s, p) =
365                    self.collect_window(client, make_query, &sub, gran.next(), label)?;
366                out_items.append(&mut i);
367                out_slices.append(&mut s);
368                partial |= p;
369            }
370            return Ok((out_items, out_slices, partial));
371        }
372
373        // Day-level overflow: can't subdivide further. We'll still fetch up to the API cap.
374        let mut partial = false;
375        if meta_total > 1000 || meta_incomplete {
376            partial = true;
377            slices[0]
378                .notes
379                .push("partial:unresolvable_at_this_granularity".to_string());
380        }
381
382        let fetched_items = self.fetch_all_search_items(client, &query)?;
383        let fetched = fetched_items.len() as u64;
384
385        // Record a fetch slice (separate from the probe for clarity)
386        slices.push(CoverageSlice {
387            window: window.clone(),
388            query: query.clone(),
389            total_count: meta_total,
390            fetched,
391            incomplete_results: Some(meta_incomplete),
392            notes: vec![format!("fetch:{label}")],
393        });
394
395        Ok((fetched_items, slices, partial))
396    }
397
398    #[mutants::skip]
399    fn search_meta(&self, client: &Client, q: &str) -> Result<(u64, bool)> {
400        let url = self.api_url("/search/issues");
401        let resp: SearchResponse<SearchIssueItem> = self.get_json(
402            client,
403            &url,
404            &[
405                ("q", q.to_string()),
406                ("per_page", "1".to_string()),
407                ("page", "1".to_string()),
408            ],
409        )?;
410        Ok((resp.total_count, resp.incomplete_results))
411    }
412
413    #[mutants::skip]
414    fn fetch_all_search_items(&self, client: &Client, q: &str) -> Result<Vec<SearchIssueItem>> {
415        let url = self.api_url("/search/issues");
416        let mut out: Vec<SearchIssueItem> = Vec::new();
417        let per_page = 100;
418        let max_pages = 10; // 1000 cap
419        for page in 1..=max_pages {
420            let resp: SearchResponse<SearchIssueItem> = self.get_json(
421                client,
422                &url,
423                &[
424                    ("q", q.to_string()),
425                    ("per_page", per_page.to_string()),
426                    ("page", page.to_string()),
427                ],
428            )?;
429            let items_len = resp.items.len();
430            out.extend(resp.items);
431            if out.len() as u64 >= resp.total_count.min(1000) {
432                break;
433            }
434            if items_len < per_page {
435                break;
436            }
437        }
438        Ok(out)
439    }
440
441    #[mutants::skip]
442    fn items_to_pr_events(
443        &self,
444        client: &Client,
445        items: Vec<SearchIssueItem>,
446    ) -> Result<Vec<EventEnvelope>> {
447        let mut out = Vec::new();
448        for item in items {
449            if let Some(pr_ref) = &item.pull_request {
450                let html_base = self.html_base_url();
451                let (repo_full_name, repo_html_url) =
452                    repo_from_repo_url(&item.repository_url, &html_base);
453
454                let (title, created_at, merged_at, additions, deletions, changed_files, visibility) =
455                    if self.fetch_details {
456                        match self.fetch_pr_details(client, &pr_ref.url) {
457                            Ok(d) => {
458                                let vis = if d.base.repo.private_field {
459                                    RepoVisibility::Private
460                                } else {
461                                    RepoVisibility::Public
462                                };
463                                (
464                                    d.title,
465                                    d.created_at,
466                                    d.merged_at,
467                                    Some(d.additions),
468                                    Some(d.deletions),
469                                    Some(d.changed_files),
470                                    vis,
471                                )
472                            }
473                            Err(_) => {
474                                // If details fail, fall back to search fields.
475                                (
476                                    item.title.clone(),
477                                    item.created_at.unwrap_or_else(Utc::now),
478                                    None,
479                                    None,
480                                    None,
481                                    None,
482                                    RepoVisibility::Unknown,
483                                )
484                            }
485                        }
486                    } else {
487                        (
488                            item.title.clone(),
489                            item.created_at.unwrap_or_else(Utc::now),
490                            None,
491                            None,
492                            None,
493                            None,
494                            RepoVisibility::Unknown,
495                        )
496                    };
497
498                let occurred_at = match self.mode.as_str() {
499                    "created" => created_at,
500                    _ => merged_at.unwrap_or(created_at),
501                };
502
503                let state = if merged_at.is_some() {
504                    PullRequestState::Merged
505                } else {
506                    PullRequestState::Unknown
507                };
508
509                let id = EventId::from_parts([
510                    "github",
511                    "pr",
512                    &repo_full_name,
513                    &item.number.to_string(),
514                ]);
515
516                let ev = EventEnvelope {
517                    id,
518                    kind: EventKind::PullRequest,
519                    occurred_at,
520                    actor: Actor {
521                        login: self.user.clone(),
522                        id: None,
523                    },
524                    repo: RepoRef {
525                        full_name: repo_full_name,
526                        html_url: Some(repo_html_url),
527                        visibility,
528                    },
529                    payload: EventPayload::PullRequest(PullRequestEvent {
530                        number: item.number,
531                        title,
532                        state,
533                        created_at,
534                        merged_at,
535                        additions,
536                        deletions,
537                        changed_files,
538                        touched_paths_hint: vec![],
539                        window: None,
540                    }),
541                    tags: vec![],
542                    links: vec![Link {
543                        label: "pr".into(),
544                        url: item.html_url.clone(),
545                    }],
546                    source: SourceRef {
547                        system: SourceSystem::Github,
548                        url: Some(pr_ref.url.clone()),
549                        opaque_id: Some(item.id.to_string()),
550                    },
551                };
552
553                out.push(ev);
554            }
555        }
556        Ok(out)
557    }
558
559    #[mutants::skip]
560    fn items_to_review_events(
561        &self,
562        client: &Client,
563        items: Vec<SearchIssueItem>,
564    ) -> Result<Vec<EventEnvelope>> {
565        let mut out = Vec::new();
566        for item in items {
567            let Some(pr_ref) = &item.pull_request else {
568                continue;
569            };
570            let html_base = self.html_base_url();
571            let (repo_full_name, repo_html_url) =
572                repo_from_repo_url(&item.repository_url, &html_base);
573
574            // Fetch reviews for this PR and filter by author + date window.
575            let reviews = self.fetch_pr_reviews(client, &pr_ref.url)?;
576            for r in reviews {
577                if r.user.login != self.user {
578                    continue;
579                }
580                let submitted = match r.submitted_at {
581                    Some(s) => s,
582                    None => continue,
583                };
584                let submitted_date = submitted.date_naive();
585                if submitted_date < self.since || submitted_date >= self.until {
586                    continue;
587                }
588
589                let id = EventId::from_parts([
590                    "github",
591                    "review",
592                    &repo_full_name,
593                    &item.number.to_string(),
594                    &r.id.to_string(),
595                ]);
596
597                let ev = EventEnvelope {
598                    id,
599                    kind: EventKind::Review,
600                    occurred_at: submitted,
601                    actor: Actor {
602                        login: self.user.clone(),
603                        id: None,
604                    },
605                    repo: RepoRef {
606                        full_name: repo_full_name.clone(),
607                        html_url: Some(repo_html_url.clone()),
608                        visibility: RepoVisibility::Unknown,
609                    },
610                    payload: EventPayload::Review(ReviewEvent {
611                        pull_number: item.number,
612                        pull_title: item.title.clone(),
613                        submitted_at: submitted,
614                        state: r.state,
615                        window: None,
616                    }),
617                    tags: vec![],
618                    links: vec![Link {
619                        label: "pr".into(),
620                        url: item.html_url.clone(),
621                    }],
622                    source: SourceRef {
623                        system: SourceSystem::Github,
624                        url: Some(pr_ref.url.clone()),
625                        opaque_id: Some(r.id.to_string()),
626                    },
627                };
628
629                out.push(ev);
630            }
631        }
632        Ok(out)
633    }
634
635    #[mutants::skip]
636    fn fetch_pr_details(&self, client: &Client, pr_api_url: &str) -> Result<PullRequestDetails> {
637        // Check cache first
638        let cache_key = CacheKey::pr_details(pr_api_url);
639        #[allow(clippy::collapsible_if)]
640        if let Some(ref cache) = self.cache {
641            if let Some(cached) = cache.get::<PullRequestDetails>(&cache_key)? {
642                return Ok(cached);
643            }
644        }
645
646        // Fetch from API
647        let details: PullRequestDetails = self.get_json(client, pr_api_url, &[])?;
648
649        // Store in cache
650        if let Some(ref cache) = self.cache {
651            cache.set(&cache_key, &details)?;
652        }
653
654        Ok(details)
655    }
656
657    #[mutants::skip]
658    fn fetch_pr_reviews(
659        &self,
660        client: &Client,
661        pr_api_url: &str,
662    ) -> Result<Vec<PullRequestReview>> {
663        let url = format!("{pr_api_url}/reviews");
664        let mut out = Vec::new();
665        let per_page = 100;
666        for page in 1..=10 {
667            let cache_key = CacheKey::pr_reviews(pr_api_url, page);
668
669            // Try to get from cache first
670            let page_reviews: Vec<PullRequestReview> = if let Some(ref cache) = self.cache {
671                if let Some(cached) = cache.get::<Vec<PullRequestReview>>(&cache_key)? {
672                    cached
673                } else {
674                    // Not in cache, fetch from API
675                    let reviews: Vec<PullRequestReview> = self.get_json(
676                        client,
677                        &url,
678                        &[
679                            ("per_page", per_page.to_string()),
680                            ("page", page.to_string()),
681                        ],
682                    )?;
683                    // Store in cache
684                    cache.set(&cache_key, &reviews)?;
685                    reviews
686                }
687            } else {
688                // No cache configured, fetch directly
689                self.get_json(
690                    client,
691                    &url,
692                    &[
693                        ("per_page", per_page.to_string()),
694                        ("page", page.to_string()),
695                    ],
696                )?
697            };
698
699            let n = page_reviews.len();
700            out.extend(page_reviews);
701            if n < per_page {
702                break;
703            }
704        }
705        Ok(out)
706    }
707}
708
709#[derive(Copy, Clone, Debug, PartialEq, Eq)]
710enum Granularity {
711    Month,
712    Week,
713    Day,
714}
715
716impl Granularity {
717    fn next(&self) -> Granularity {
718        match self {
719            Granularity::Month => Granularity::Week,
720            Granularity::Week => Granularity::Day,
721            Granularity::Day => Granularity::Day,
722        }
723    }
724}
725
726fn github_inclusive_range(w: &TimeWindow) -> (String, String) {
727    let start = w.since.format("%Y-%m-%d").to_string();
728    let end_date = w.until.pred_opt().unwrap_or(w.until);
729    let end = end_date.format("%Y-%m-%d").to_string();
730    (start, end)
731}
732
733fn build_url_with_params(base: &str, params: &[(&str, String)]) -> Result<Url> {
734    let mut url = Url::parse(base).with_context(|| format!("parse url {base}"))?;
735    if !params.is_empty() {
736        let mut query = url.query_pairs_mut();
737        for (k, v) in params {
738            query.append_pair(k, v);
739        }
740    }
741    Ok(url)
742}
743
744fn repo_from_repo_url(repo_api_url: &str, html_base: &str) -> (String, String) {
745    #[allow(clippy::collapsible_if)]
746    if let Ok(u) = Url::parse(repo_api_url) {
747        if let Some(segs) = u.path_segments() {
748            let v: Vec<&str> = segs.collect();
749            if v.len() >= 3 && v[0] == "repos" {
750                let owner = v[1];
751                let repo = v[2];
752                let full = format!("{}/{}", owner, repo);
753                let html = format!("{}/{}/{}", html_base.trim_end_matches('/'), owner, repo);
754                return (full, html);
755            }
756        }
757    }
758    ("unknown/unknown".to_string(), html_base.to_string())
759}
760
761/// GitHub search response envelope.
762#[derive(Debug, Deserialize)]
763struct SearchResponse<T> {
764    total_count: u64,
765    incomplete_results: bool,
766    items: Vec<T>,
767}
768
769#[derive(Debug, Deserialize)]
770struct SearchIssueItem {
771    id: u64,
772    number: u64,
773    title: String,
774    html_url: String,
775    repository_url: String,
776    pull_request: Option<SearchPullRequestRef>,
777
778    // Search returns these for issues; for PR queries they are present and useful.
779    created_at: Option<DateTime<Utc>>,
780}
781
782#[derive(Debug, Deserialize)]
783struct SearchPullRequestRef {
784    url: String,
785}
786
787#[derive(Debug, Deserialize, Serialize, Clone)]
788struct PullRequestDetails {
789    title: String,
790    created_at: DateTime<Utc>,
791    merged_at: Option<DateTime<Utc>>,
792    additions: u64,
793    deletions: u64,
794    changed_files: u64,
795    base: PullBase,
796}
797
798#[derive(Debug, Deserialize, Serialize, Clone)]
799struct PullBase {
800    repo: PullRepo,
801}
802
803#[derive(Debug, Deserialize, Serialize, Clone)]
804struct PullRepo {
805    full_name: String,
806    html_url: String,
807    #[serde(rename = "private")]
808    private_field: bool,
809}
810
811#[derive(Debug, Deserialize, Serialize, Clone)]
812struct PullRequestReview {
813    id: u64,
814    state: String,
815    submitted_at: Option<DateTime<Utc>>,
816    user: ReviewUser,
817}
818
819#[derive(Debug, Deserialize, Serialize, Clone)]
820struct ReviewUser {
821    login: String,
822}
823
824#[cfg(test)]
825mod tests {
826    use super::*;
827    use proptest::prelude::*;
828
829    // ── helpers ──────────────────────────────────────────────────────────
830
831    fn make_ingestor(user: &str) -> GithubIngestor {
832        GithubIngestor::new(
833            user.to_string(),
834            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
835            NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
836        )
837    }
838
839    fn make_search_item(number: u64, repo: &str, with_pr: bool) -> SearchIssueItem {
840        SearchIssueItem {
841            id: number * 100,
842            number,
843            title: format!("PR #{number}"),
844            html_url: format!("https://github.com/{repo}/pull/{number}"),
845            repository_url: format!("https://api.github.com/repos/{repo}"),
846            pull_request: if with_pr {
847                Some(SearchPullRequestRef {
848                    url: format!("https://api.github.com/repos/{repo}/pulls/{number}"),
849                })
850            } else {
851                None
852            },
853            created_at: Some(Utc::now()),
854        }
855    }
856
857    // ── existing tests (preserved) ──────────────────────────────────────
858
859    #[test]
860    fn with_cache_creates_missing_directory() {
861        let temp = tempfile::tempdir().unwrap();
862        let cache_dir = temp.path().join("nested").join("cache");
863
864        let ing = GithubIngestor::new(
865            "octocat".to_string(),
866            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
867            NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
868        )
869        .with_cache(&cache_dir)
870        .unwrap();
871
872        assert!(ing.cache.is_some());
873        assert!(cache_dir.join("github-api-cache.db").exists());
874    }
875
876    #[test]
877    fn build_url_with_params_encodes_query_values() {
878        let url = build_url_with_params(
879            "https://api.github.com/search/issues",
880            &[
881                ("q", "is:pr is:merged author:octocat".to_string()),
882                ("per_page", "1".to_string()),
883            ],
884        )
885        .unwrap();
886
887        assert!(!url.as_str().contains(' '), "URL should be percent-encoded");
888
889        let pairs: Vec<(String, String)> = url
890            .query_pairs()
891            .map(|(k, v)| (k.into_owned(), v.into_owned()))
892            .collect();
893        assert_eq!(
894            pairs,
895            vec![
896                (
897                    "q".to_string(),
898                    "is:pr is:merged author:octocat".to_string()
899                ),
900                ("per_page".to_string(), "1".to_string()),
901            ]
902        );
903    }
904
905    #[test]
906    fn github_inclusive_range_uses_exclusive_until_date() {
907        let window = TimeWindow {
908            since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
909            until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
910        };
911
912        let (start, end) = github_inclusive_range(&window);
913        assert_eq!(start, "2025-01-01");
914        assert_eq!(end, "2025-01-31");
915    }
916
917    #[test]
918    fn html_base_url_maps_public_and_ghes_hosts() {
919        let mut ing = GithubIngestor::new(
920            "octocat".to_string(),
921            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
922            NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
923        );
924        ing.api_base = "https://api.github.com".to_string();
925        assert_eq!(ing.html_base_url(), "https://github.com");
926
927        ing.api_base = "https://github.enterprise.local/api/v3".to_string();
928        assert_eq!(ing.html_base_url(), "https://github.enterprise.local");
929    }
930
931    #[test]
932    fn build_pr_query_merged_and_created_modes() {
933        let mut ing = GithubIngestor::new(
934            "octocat".to_string(),
935            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
936            NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
937        );
938        let w = TimeWindow {
939            since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
940            until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
941        };
942
943        ing.mode = "merged".to_string();
944        let merged_q = ing.build_pr_query(&w);
945        assert!(!merged_q.is_empty());
946        assert!(merged_q.contains("is:merged"));
947        assert!(merged_q.contains("author:octocat"));
948
949        ing.mode = "created".to_string();
950        let created_q = ing.build_pr_query(&w);
951        assert!(!created_q.is_empty());
952        assert!(created_q.contains("created:"));
953        assert!(created_q.contains("author:octocat"));
954
955        // The two queries should be different
956        assert_ne!(merged_q, created_q);
957    }
958
959    #[test]
960    fn build_reviewed_query_contains_user() {
961        let ing = GithubIngestor::new(
962            "octocat".to_string(),
963            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
964            NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
965        );
966        let w = TimeWindow {
967            since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
968            until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
969        };
970
971        let q = ing.build_reviewed_query(&w);
972        assert!(!q.is_empty());
973        assert!(q.contains("reviewed-by:octocat"));
974        assert!(q.contains("is:pr"));
975    }
976
977    #[test]
978    fn repo_from_repo_url_invalid_url_returns_fallback() {
979        let (full, html) = repo_from_repo_url("not-a-url-at-all", "https://github.com");
980        assert_eq!(full, "unknown/unknown");
981        assert_eq!(html, "https://github.com");
982
983        // URL with wrong path structure
984        let (full2, _) =
985            repo_from_repo_url("https://api.github.com/users/octocat", "https://github.com");
986        assert_eq!(full2, "unknown/unknown");
987    }
988
989    #[test]
990    fn repo_from_repo_url_extracts_or_falls_back() {
991        let (full, html) = repo_from_repo_url(
992            "https://api.github.com/repos/owner/repo",
993            "https://github.com",
994        );
995        assert_eq!(full, "owner/repo");
996        assert_eq!(html, "https://github.com/owner/repo");
997
998        let (full_fallback, html_fallback) = repo_from_repo_url("not-a-url", "https://github.com");
999        assert_eq!(full_fallback, "unknown/unknown");
1000        assert_eq!(html_fallback, "https://github.com");
1001    }
1002
1003    // ── new unit tests ──────────────────────────────────────────────────
1004
1005    // -- Granularity --
1006
1007    #[test]
1008    fn granularity_next_transitions() {
1009        assert_eq!(Granularity::Month.next(), Granularity::Week);
1010        assert_eq!(Granularity::Week.next(), Granularity::Day);
1011        assert_eq!(Granularity::Day.next(), Granularity::Day);
1012    }
1013
1014    #[test]
1015    fn granularity_day_is_fixed_point() {
1016        let g = Granularity::Day;
1017        assert_eq!(g.next(), Granularity::Day);
1018        assert_eq!(g.next().next(), Granularity::Day);
1019    }
1020
1021    // -- GithubIngestor::new defaults --
1022
1023    #[test]
1024    fn new_defaults_are_correct() {
1025        let ing = make_ingestor("alice");
1026        assert_eq!(ing.user, "alice");
1027        assert_eq!(ing.mode, "merged");
1028        assert!(!ing.include_reviews);
1029        assert!(ing.fetch_details);
1030        assert_eq!(ing.throttle_ms, 0);
1031        assert!(ing.token.is_none());
1032        assert_eq!(ing.api_base, "https://api.github.com");
1033        assert!(ing.cache.is_none());
1034    }
1035
1036    // -- with_in_memory_cache --
1037
1038    #[test]
1039    fn with_in_memory_cache_sets_cache() {
1040        let ing = make_ingestor("bob").with_in_memory_cache().unwrap();
1041        assert!(ing.cache.is_some());
1042    }
1043
1044    // -- api_url --
1045
1046    #[test]
1047    fn api_url_concatenates_path() {
1048        let ing = make_ingestor("octocat");
1049        assert_eq!(
1050            ing.api_url("/search/issues"),
1051            "https://api.github.com/search/issues"
1052        );
1053    }
1054
1055    #[test]
1056    fn api_url_strips_trailing_slash() {
1057        let mut ing = make_ingestor("octocat");
1058        ing.api_base = "https://ghes.local/api/v3/".to_string();
1059        assert_eq!(
1060            ing.api_url("/search/issues"),
1061            "https://ghes.local/api/v3/search/issues"
1062        );
1063    }
1064
1065    // -- html_base_url edge cases --
1066
1067    #[test]
1068    fn html_base_url_with_port() {
1069        let mut ing = make_ingestor("octocat");
1070        ing.api_base = "https://ghes.local:8443/api/v3".to_string();
1071        assert_eq!(ing.html_base_url(), "https://ghes.local:8443");
1072    }
1073
1074    #[test]
1075    fn html_base_url_invalid_url_falls_back() {
1076        let mut ing = make_ingestor("octocat");
1077        ing.api_base = "not-a-valid-url".to_string();
1078        assert_eq!(ing.html_base_url(), "https://github.com");
1079    }
1080
1081    #[test]
1082    fn html_base_url_http_scheme() {
1083        let mut ing = make_ingestor("octocat");
1084        ing.api_base = "http://internal-ghes.corp/api/v3".to_string();
1085        assert_eq!(ing.html_base_url(), "http://internal-ghes.corp");
1086    }
1087
1088    // -- github_inclusive_range edge cases --
1089
1090    #[test]
1091    fn github_inclusive_range_single_day_window() {
1092        let window = TimeWindow {
1093            since: NaiveDate::from_ymd_opt(2025, 3, 15).unwrap(),
1094            until: NaiveDate::from_ymd_opt(2025, 3, 16).unwrap(),
1095        };
1096        let (start, end) = github_inclusive_range(&window);
1097        assert_eq!(start, "2025-03-15");
1098        assert_eq!(end, "2025-03-15");
1099    }
1100
1101    #[test]
1102    fn github_inclusive_range_year_boundary() {
1103        let window = TimeWindow {
1104            since: NaiveDate::from_ymd_opt(2024, 12, 1).unwrap(),
1105            until: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1106        };
1107        let (start, end) = github_inclusive_range(&window);
1108        assert_eq!(start, "2024-12-01");
1109        assert_eq!(end, "2024-12-31");
1110    }
1111
1112    #[test]
1113    fn github_inclusive_range_same_day_uses_pred() {
1114        // When since == until, pred_opt gives previous day
1115        let window = TimeWindow {
1116            since: NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1117            until: NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1118        };
1119        let (start, end) = github_inclusive_range(&window);
1120        assert_eq!(start, "2025-06-01");
1121        assert_eq!(end, "2025-05-31");
1122    }
1123
1124    // -- build_url_with_params edge cases --
1125
1126    #[test]
1127    fn build_url_with_params_empty_params() {
1128        let url = build_url_with_params("https://api.github.com/search/issues", &[]).unwrap();
1129        assert_eq!(url.as_str(), "https://api.github.com/search/issues");
1130    }
1131
1132    #[test]
1133    fn build_url_with_params_special_characters() {
1134        let url = build_url_with_params(
1135            "https://api.github.com/search/issues",
1136            &[(
1137                "q",
1138                "author:user+name with spaces&special=chars".to_string(),
1139            )],
1140        )
1141        .unwrap();
1142        // Should not contain raw spaces
1143        assert!(!url.as_str().contains(' '));
1144        // Should roundtrip the value
1145        let val: String = url
1146            .query_pairs()
1147            .find(|(k, _)| k == "q")
1148            .map(|(_, v)| v.into_owned())
1149            .unwrap();
1150        assert_eq!(val, "author:user+name with spaces&special=chars");
1151    }
1152
1153    #[test]
1154    fn build_url_with_params_invalid_base_url_errors() {
1155        let result = build_url_with_params("not a url", &[]);
1156        assert!(result.is_err());
1157    }
1158
1159    // -- repo_from_repo_url edge cases --
1160
1161    #[test]
1162    fn repo_from_repo_url_ghes_url() {
1163        // GHES API URLs have /api/v3/repos/owner/repo — the function looks for
1164        // the /repos/ segment, so the path must contain "repos" at position [0].
1165        // Standard GHES URLs: the path_segments include ["api","v3","repos","owner","repo"].
1166        // The function only matches when v[0] == "repos", so GHES-style deep paths
1167        // don't match and fall back to unknown.
1168        let (full, html) = repo_from_repo_url(
1169            "https://ghes.corp/api/v3/repos/myorg/myrepo",
1170            "https://ghes.corp",
1171        );
1172        // The function requires path segment [0] == "repos", but GHES has api/v3/repos,
1173        // so segment[0] == "api". This correctly falls back.
1174        assert_eq!(full, "unknown/unknown");
1175        assert_eq!(html, "https://ghes.corp");
1176    }
1177
1178    #[test]
1179    fn repo_from_repo_url_three_plus_segments_wrong_prefix_falls_back() {
1180        // 3+ segments but v[0] != "repos" → must fall back.
1181        // Kills && → || mutation: with ||, v.len()>=3 alone would enter the block.
1182        let (full, html) = repo_from_repo_url(
1183            "https://api.github.com/users/octocat/repos",
1184            "https://github.com",
1185        );
1186        assert_eq!(full, "unknown/unknown");
1187        assert_eq!(html, "https://github.com");
1188    }
1189
1190    #[test]
1191    fn repo_from_repo_url_exactly_two_segments_repos_prefix_falls_back() {
1192        // v[0] == "repos" but only 2 segments → must fall back.
1193        // Kills && → || mutation: with ||, v[0]=="repos" alone would enter the block.
1194        let (full, html) = repo_from_repo_url(
1195            "https://api.github.com/repos/owner-only",
1196            "https://github.com",
1197        );
1198        assert_eq!(full, "unknown/unknown");
1199        assert_eq!(html, "https://github.com");
1200    }
1201
1202    #[test]
1203    fn repo_from_repo_url_trailing_slash_in_html_base() {
1204        let (full, html) = repo_from_repo_url(
1205            "https://api.github.com/repos/owner/repo",
1206            "https://github.com/",
1207        );
1208        assert_eq!(full, "owner/repo");
1209        assert_eq!(html, "https://github.com/owner/repo");
1210    }
1211
1212    #[test]
1213    fn repo_from_repo_url_extra_path_segments() {
1214        // URL with more path segments after owner/repo (e.g. /repos/owner/repo/pulls)
1215        let (full, html) = repo_from_repo_url(
1216            "https://api.github.com/repos/org/project/pulls",
1217            "https://github.com",
1218        );
1219        assert_eq!(full, "org/project");
1220        assert_eq!(html, "https://github.com/org/project");
1221    }
1222
1223    #[test]
1224    fn repo_from_repo_url_empty_string() {
1225        let (full, html) = repo_from_repo_url("", "https://github.com");
1226        assert_eq!(full, "unknown/unknown");
1227        assert_eq!(html, "https://github.com");
1228    }
1229
1230    // -- build_pr_query date range formatting --
1231
1232    #[test]
1233    fn build_pr_query_uses_inclusive_range() {
1234        let ing = make_ingestor("alice");
1235        let w = TimeWindow {
1236            since: NaiveDate::from_ymd_opt(2025, 3, 1).unwrap(),
1237            until: NaiveDate::from_ymd_opt(2025, 3, 15).unwrap(),
1238        };
1239        let q = ing.build_pr_query(&w);
1240        // Merged query should use the inclusive end date (2025-03-14)
1241        assert!(q.contains("2025-03-01..2025-03-14"), "got: {q}");
1242    }
1243
1244    #[test]
1245    fn build_pr_query_unknown_mode_defaults_to_merged() {
1246        let mut ing = make_ingestor("octocat");
1247        ing.mode = "unknown_mode".to_string();
1248        let w = TimeWindow {
1249            since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1250            until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1251        };
1252        let q = ing.build_pr_query(&w);
1253        assert!(
1254            q.contains("is:merged"),
1255            "unknown mode should fall through to merged"
1256        );
1257    }
1258
1259    // -- build_reviewed_query format --
1260
1261    #[test]
1262    fn build_reviewed_query_uses_updated_qualifier() {
1263        let ing = make_ingestor("reviewer");
1264        let w = TimeWindow {
1265            since: NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1266            until: NaiveDate::from_ymd_opt(2025, 7, 1).unwrap(),
1267        };
1268        let q = ing.build_reviewed_query(&w);
1269        assert!(
1270            q.contains("updated:"),
1271            "review query should use updated: qualifier"
1272        );
1273        assert!(q.contains("reviewed-by:reviewer"));
1274    }
1275
1276    // -- SearchResponse deserialization --
1277
1278    #[test]
1279    fn search_response_deserializes_from_json() {
1280        let json = r#"{
1281            "total_count": 42,
1282            "incomplete_results": false,
1283            "items": [
1284                {
1285                    "id": 1001,
1286                    "number": 123,
1287                    "title": "Fix bug",
1288                    "html_url": "https://github.com/owner/repo/pull/123",
1289                    "repository_url": "https://api.github.com/repos/owner/repo",
1290                    "pull_request": { "url": "https://api.github.com/repos/owner/repo/pulls/123" },
1291                    "created_at": "2025-01-15T10:30:00Z"
1292                }
1293            ]
1294        }"#;
1295
1296        let resp: SearchResponse<SearchIssueItem> = serde_json::from_str(json).unwrap();
1297        assert_eq!(resp.total_count, 42);
1298        assert!(!resp.incomplete_results);
1299        assert_eq!(resp.items.len(), 1);
1300        assert_eq!(resp.items[0].number, 123);
1301        assert_eq!(resp.items[0].title, "Fix bug");
1302        assert!(resp.items[0].pull_request.is_some());
1303    }
1304
1305    #[test]
1306    fn search_response_deserializes_without_pull_request() {
1307        let json = r#"{
1308            "total_count": 1,
1309            "incomplete_results": true,
1310            "items": [
1311                {
1312                    "id": 2002,
1313                    "number": 456,
1314                    "title": "Issue only",
1315                    "html_url": "https://github.com/owner/repo/issues/456",
1316                    "repository_url": "https://api.github.com/repos/owner/repo",
1317                    "created_at": null
1318                }
1319            ]
1320        }"#;
1321
1322        let resp: SearchResponse<SearchIssueItem> = serde_json::from_str(json).unwrap();
1323        assert!(resp.incomplete_results);
1324        assert!(resp.items[0].pull_request.is_none());
1325        assert!(resp.items[0].created_at.is_none());
1326    }
1327
1328    #[test]
1329    fn search_response_empty_items() {
1330        let json = r#"{"total_count": 0, "incomplete_results": false, "items": []}"#;
1331        let resp: SearchResponse<SearchIssueItem> = serde_json::from_str(json).unwrap();
1332        assert_eq!(resp.total_count, 0);
1333        assert!(resp.items.is_empty());
1334    }
1335
1336    // -- PullRequestDetails deserialization --
1337
1338    #[test]
1339    fn pr_details_deserializes_from_json() {
1340        let json = r#"{
1341            "title": "Add feature",
1342            "created_at": "2025-01-10T08:00:00Z",
1343            "merged_at": "2025-01-12T14:30:00Z",
1344            "additions": 150,
1345            "deletions": 30,
1346            "changed_files": 5,
1347            "base": {
1348                "repo": {
1349                    "full_name": "owner/repo",
1350                    "html_url": "https://github.com/owner/repo",
1351                    "private": false
1352                }
1353            }
1354        }"#;
1355
1356        let details: PullRequestDetails = serde_json::from_str(json).unwrap();
1357        assert_eq!(details.title, "Add feature");
1358        assert!(details.merged_at.is_some());
1359        assert_eq!(details.additions, 150);
1360        assert_eq!(details.deletions, 30);
1361        assert_eq!(details.changed_files, 5);
1362        assert!(!details.base.repo.private_field);
1363        assert_eq!(details.base.repo.full_name, "owner/repo");
1364    }
1365
1366    #[test]
1367    fn pr_details_private_repo() {
1368        let json = r#"{
1369            "title": "Secret fix",
1370            "created_at": "2025-01-10T08:00:00Z",
1371            "merged_at": null,
1372            "additions": 10,
1373            "deletions": 5,
1374            "changed_files": 1,
1375            "base": {
1376                "repo": {
1377                    "full_name": "corp/secret",
1378                    "html_url": "https://github.com/corp/secret",
1379                    "private": true
1380                }
1381            }
1382        }"#;
1383
1384        let details: PullRequestDetails = serde_json::from_str(json).unwrap();
1385        assert!(details.base.repo.private_field);
1386        assert!(details.merged_at.is_none());
1387    }
1388
1389    // -- PullRequestReview deserialization --
1390
1391    #[test]
1392    fn pr_review_deserializes_from_json() {
1393        let json = r#"{
1394            "id": 99001,
1395            "state": "APPROVED",
1396            "submitted_at": "2025-02-01T12:00:00Z",
1397            "user": { "login": "reviewer42" }
1398        }"#;
1399
1400        let review: PullRequestReview = serde_json::from_str(json).unwrap();
1401        assert_eq!(review.id, 99001);
1402        assert_eq!(review.state, "APPROVED");
1403        assert!(review.submitted_at.is_some());
1404        assert_eq!(review.user.login, "reviewer42");
1405    }
1406
1407    #[test]
1408    fn pr_review_with_null_submitted_at() {
1409        let json = r#"{
1410            "id": 99002,
1411            "state": "PENDING",
1412            "submitted_at": null,
1413            "user": { "login": "pending-reviewer" }
1414        }"#;
1415
1416        let review: PullRequestReview = serde_json::from_str(json).unwrap();
1417        assert!(review.submitted_at.is_none());
1418    }
1419
1420    // -- items_to_pr_events (no network, fetch_details=false) --
1421
1422    #[test]
1423    fn items_to_pr_events_without_details_produces_events() {
1424        let mut ing = make_ingestor("alice");
1425        ing.fetch_details = false;
1426
1427        let client = Client::new();
1428        let items = vec![
1429            make_search_item(10, "org/repo-a", true),
1430            make_search_item(20, "org/repo-b", true),
1431        ];
1432
1433        let events = ing.items_to_pr_events(&client, items).unwrap();
1434        assert_eq!(events.len(), 2);
1435
1436        assert_eq!(events[0].kind, EventKind::PullRequest);
1437        assert_eq!(events[0].actor.login, "alice");
1438        assert_eq!(events[0].repo.full_name, "org/repo-a");
1439        assert_eq!(events[0].links.len(), 1);
1440        assert_eq!(events[0].links[0].label, "pr");
1441
1442        assert_eq!(events[1].repo.full_name, "org/repo-b");
1443    }
1444
1445    #[test]
1446    fn items_to_pr_events_skips_items_without_pr_ref() {
1447        let mut ing = make_ingestor("bob");
1448        ing.fetch_details = false;
1449
1450        let client = Client::new();
1451        let items = vec![
1452            make_search_item(1, "org/repo", true),
1453            make_search_item(2, "org/repo", false), // no pull_request ref
1454            make_search_item(3, "org/repo", true),
1455        ];
1456
1457        let events = ing.items_to_pr_events(&client, items).unwrap();
1458        assert_eq!(
1459            events.len(),
1460            2,
1461            "items without pull_request should be skipped"
1462        );
1463    }
1464
1465    #[test]
1466    fn items_to_pr_events_empty_input() {
1467        let mut ing = make_ingestor("carol");
1468        ing.fetch_details = false;
1469        let client = Client::new();
1470        let events = ing.items_to_pr_events(&client, vec![]).unwrap();
1471        assert!(events.is_empty());
1472    }
1473
1474    #[test]
1475    fn items_to_pr_events_sets_source_system() {
1476        let mut ing = make_ingestor("dave");
1477        ing.fetch_details = false;
1478
1479        let client = Client::new();
1480        let items = vec![make_search_item(42, "org/repo", true)];
1481        let events = ing.items_to_pr_events(&client, items).unwrap();
1482
1483        assert_eq!(events[0].source.system, SourceSystem::Github);
1484        assert!(events[0].source.url.is_some());
1485        assert!(events[0].source.opaque_id.is_some());
1486    }
1487
1488    #[test]
1489    fn items_to_pr_events_merged_mode_uses_created_at_as_occurred() {
1490        let mut ing = make_ingestor("eve");
1491        ing.fetch_details = false;
1492        ing.mode = "merged".to_string();
1493
1494        let client = Client::new();
1495        let mut item = make_search_item(1, "org/repo", true);
1496        let created = DateTime::parse_from_rfc3339("2025-03-15T10:00:00Z")
1497            .unwrap()
1498            .with_timezone(&Utc);
1499        item.created_at = Some(created);
1500
1501        let events = ing.items_to_pr_events(&client, vec![item]).unwrap();
1502        // Without details, merged_at is None, so occurred_at falls back to created_at
1503        assert_eq!(events[0].occurred_at, created);
1504    }
1505
1506    #[test]
1507    fn items_to_pr_events_created_mode_uses_created_at() {
1508        let mut ing = make_ingestor("frank");
1509        ing.fetch_details = false;
1510        ing.mode = "created".to_string();
1511
1512        let client = Client::new();
1513        let mut item = make_search_item(1, "org/repo", true);
1514        let created = DateTime::parse_from_rfc3339("2025-04-01T12:00:00Z")
1515            .unwrap()
1516            .with_timezone(&Utc);
1517        item.created_at = Some(created);
1518
1519        let events = ing.items_to_pr_events(&client, vec![item]).unwrap();
1520        assert_eq!(events[0].occurred_at, created);
1521    }
1522
1523    #[test]
1524    fn items_to_pr_events_without_details_has_unknown_visibility() {
1525        let mut ing = make_ingestor("grace");
1526        ing.fetch_details = false;
1527
1528        let client = Client::new();
1529        let items = vec![make_search_item(1, "org/repo", true)];
1530        let events = ing.items_to_pr_events(&client, items).unwrap();
1531
1532        assert_eq!(events[0].repo.visibility, RepoVisibility::Unknown);
1533    }
1534
1535    #[test]
1536    fn items_to_pr_events_without_details_state_is_unknown() {
1537        let mut ing = make_ingestor("heidi");
1538        ing.fetch_details = false;
1539
1540        let client = Client::new();
1541        let items = vec![make_search_item(1, "org/repo", true)];
1542        let events = ing.items_to_pr_events(&client, items).unwrap();
1543
1544        if let EventPayload::PullRequest(ref pr) = events[0].payload {
1545            assert_eq!(pr.state, PullRequestState::Unknown);
1546            assert!(pr.merged_at.is_none());
1547            assert!(pr.additions.is_none());
1548            assert!(pr.deletions.is_none());
1549            assert!(pr.changed_files.is_none());
1550        } else {
1551            panic!("expected PullRequest payload");
1552        }
1553    }
1554
1555    #[test]
1556    fn items_to_pr_events_deterministic_ids() {
1557        let mut ing = make_ingestor("ivan");
1558        ing.fetch_details = false;
1559
1560        let client = Client::new();
1561        let items1 = vec![make_search_item(42, "org/repo", true)];
1562        let items2 = vec![make_search_item(42, "org/repo", true)];
1563
1564        let events1 = ing.items_to_pr_events(&client, items1).unwrap();
1565        let events2 = ing.items_to_pr_events(&client, items2).unwrap();
1566        assert_eq!(
1567            events1[0].id, events2[0].id,
1568            "same inputs should produce same event ID"
1569        );
1570    }
1571
1572    #[test]
1573    fn items_to_pr_events_different_prs_get_different_ids() {
1574        let mut ing = make_ingestor("judy");
1575        ing.fetch_details = false;
1576
1577        let client = Client::new();
1578        let items = vec![
1579            make_search_item(1, "org/repo", true),
1580            make_search_item(2, "org/repo", true),
1581        ];
1582
1583        let events = ing.items_to_pr_events(&client, items).unwrap();
1584        assert_ne!(events[0].id, events[1].id);
1585    }
1586
1587    // -- items_to_review_events (no-network partial) --
1588
1589    #[test]
1590    fn items_to_review_events_skips_items_without_pr_ref() {
1591        let ing = make_ingestor("reviewer");
1592        let client = Client::new();
1593
1594        // Item without pull_request ref should be silently skipped.
1595        // (fetch_pr_reviews would fail, but we never reach it.)
1596        let items = vec![make_search_item(1, "org/repo", false)];
1597
1598        let events = ing.items_to_review_events(&client, items).unwrap();
1599        assert!(events.is_empty());
1600    }
1601
1602    // -- ingest error handling --
1603
1604    #[test]
1605    fn ingest_rejects_since_equals_until() {
1606        let date = NaiveDate::from_ymd_opt(2025, 6, 1).unwrap();
1607        let ing = GithubIngestor::new("user".to_string(), date, date);
1608        let err = ing.ingest().unwrap_err();
1609        assert!(
1610            err.to_string().contains("since must be < until"),
1611            "got: {err}"
1612        );
1613    }
1614
1615    #[test]
1616    fn ingest_rejects_since_after_until() {
1617        let ing = GithubIngestor::new(
1618            "user".to_string(),
1619            NaiveDate::from_ymd_opt(2025, 6, 15).unwrap(),
1620            NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(),
1621        );
1622        let err = ing.ingest().unwrap_err();
1623        assert!(err.to_string().contains("since must be < until"));
1624    }
1625
1626    // -- cache integration --
1627
1628    #[test]
1629    fn with_cache_then_in_memory_cache_overrides() {
1630        let temp = tempfile::tempdir().unwrap();
1631        let ing = make_ingestor("octocat")
1632            .with_cache(temp.path())
1633            .unwrap()
1634            .with_in_memory_cache()
1635            .unwrap();
1636        assert!(ing.cache.is_some());
1637    }
1638
1639    #[test]
1640    fn multiple_with_cache_calls_succeed() {
1641        let temp1 = tempfile::tempdir().unwrap();
1642        let temp2 = tempfile::tempdir().unwrap();
1643        let ing = make_ingestor("octocat")
1644            .with_cache(temp1.path())
1645            .unwrap()
1646            .with_cache(temp2.path())
1647            .unwrap();
1648        assert!(ing.cache.is_some());
1649    }
1650
1651    // ── property tests ──────────────────────────────────────────────────
1652
1653    fn arb_naive_date() -> impl Strategy<Value = NaiveDate> {
1654        (2000i32..2030, 1u32..13, 1u32..29)
1655            .prop_map(|(y, m, d)| NaiveDate::from_ymd_opt(y, m, d).unwrap())
1656    }
1657
1658    fn arb_time_window() -> impl Strategy<Value = TimeWindow> {
1659        (arb_naive_date(), 1u32..366).prop_map(|(since, delta)| {
1660            let until = since + chrono::Duration::days(delta as i64);
1661            TimeWindow { since, until }
1662        })
1663    }
1664
1665    proptest! {
1666        #[test]
1667        fn prop_github_inclusive_range_start_lte_end(w in arb_time_window()) {
1668            let (start, end) = github_inclusive_range(&w);
1669            prop_assert!(start <= end, "start={start} > end={end}");
1670        }
1671
1672        #[test]
1673        fn prop_github_inclusive_range_start_matches_since(w in arb_time_window()) {
1674            let (start, _) = github_inclusive_range(&w);
1675            let expected = w.since.format("%Y-%m-%d").to_string();
1676            prop_assert_eq!(start, expected);
1677        }
1678
1679        #[test]
1680        fn prop_github_inclusive_range_end_is_until_minus_one(w in arb_time_window()) {
1681            let (_, end) = github_inclusive_range(&w);
1682            let expected_date = w.until.pred_opt().unwrap_or(w.until);
1683            let expected = expected_date.format("%Y-%m-%d").to_string();
1684            prop_assert_eq!(end, expected);
1685        }
1686
1687        #[test]
1688        fn prop_build_url_with_params_produces_valid_url(
1689            key in "[a-z]{1,10}",
1690            val in "[a-zA-Z0-9 ]{0,50}",
1691        ) {
1692            let result = build_url_with_params(
1693                "https://api.github.com/search/issues",
1694                &[(&key, val.clone())],
1695            );
1696            prop_assert!(result.is_ok());
1697            let url = result.unwrap();
1698            // URL should not contain raw spaces
1699            prop_assert!(!url.as_str().contains(' '));
1700            // Value should roundtrip
1701            let found: String = url.query_pairs()
1702                .find(|(k, _)| k.as_ref() == key)
1703                .map(|(_, v)| v.into_owned())
1704                .unwrap();
1705            prop_assert_eq!(found, val);
1706        }
1707
1708        #[test]
1709        fn prop_repo_from_repo_url_never_panics(
1710            owner in "[a-zA-Z0-9][a-zA-Z0-9_-]{0,19}",
1711            repo in "[a-zA-Z0-9][a-zA-Z0-9_.-]{0,29}",
1712        ) {
1713            let api_url = format!("https://api.github.com/repos/{}/{}", owner, repo);
1714            let (full, html) = repo_from_repo_url(&api_url, "https://github.com");
1715            let expected_prefix = format!("{}/", owner);
1716            prop_assert!(full.starts_with(&expected_prefix));
1717            prop_assert!(html.starts_with("https://github.com/"));
1718        }
1719
1720        #[test]
1721        fn prop_repo_from_repo_url_arbitrary_strings_never_panic(
1722            s in ".*",
1723        ) {
1724            // Should never panic, even with garbage input
1725            let _ = repo_from_repo_url(&s, "https://github.com");
1726        }
1727
1728        #[test]
1729        fn prop_build_pr_query_contains_user(
1730            user in "[a-zA-Z][a-zA-Z0-9-]{0,19}",
1731        ) {
1732            let ing = GithubIngestor::new(
1733                user.clone(),
1734                NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1735                NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1736            );
1737            let w = TimeWindow {
1738                since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1739                until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1740            };
1741            let q = ing.build_pr_query(&w);
1742            let expected_author = format!("author:{}", user);
1743            prop_assert!(q.contains(&expected_author));
1744            prop_assert!(q.contains("is:pr"));
1745        }
1746
1747        #[test]
1748        fn prop_build_reviewed_query_contains_user(
1749            user in "[a-zA-Z][a-zA-Z0-9-]{0,19}",
1750        ) {
1751            let ing = GithubIngestor::new(
1752                user.clone(),
1753                NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1754                NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1755            );
1756            let w = TimeWindow {
1757                since: NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
1758                until: NaiveDate::from_ymd_opt(2025, 2, 1).unwrap(),
1759            };
1760            let q = ing.build_reviewed_query(&w);
1761            let expected_reviewer = format!("reviewed-by:{}", user);
1762            prop_assert!(q.contains(&expected_reviewer));
1763        }
1764
1765        #[test]
1766        fn prop_api_url_preserves_path(
1767            segment in "[a-z]{1,15}",
1768        ) {
1769            let ing = make_ingestor("test");
1770            let path = format!("/{}", segment);
1771            let url = ing.api_url(&path);
1772            prop_assert!(url.ends_with(&path));
1773            prop_assert!(url.starts_with("https://api.github.com"));
1774        }
1775
1776        #[test]
1777        fn prop_html_base_url_always_returns_valid_string(
1778            base in "(https?://[a-z]{3,15}\\.[a-z]{2,5}(/[a-z]+)*)",
1779        ) {
1780            let mut ing = make_ingestor("test");
1781            ing.api_base = base;
1782            let result = ing.html_base_url();
1783            prop_assert!(!result.is_empty());
1784            prop_assert!(result.starts_with("http"));
1785        }
1786    }
1787}