Skip to main content

shiplog_ingest_git/
lib.rs

1//! Local git repository ingestor.
2//!
3//! Collects commit history from local git repositories without
4//! requiring GitHub API access or authentication.
5
6use anyhow::{Context, Result, anyhow};
7use chrono::{DateTime, NaiveDate, Utc};
8use git2::{Repository, Time};
9use shiplog_ids::{EventId, RunId};
10use shiplog_ports::{IngestOutput, Ingestor};
11use shiplog_schema::coverage::{Completeness, CoverageManifest, CoverageSlice, TimeWindow};
12use shiplog_schema::event::{
13    Actor, EventEnvelope, EventKind, EventPayload, PullRequestEvent, PullRequestState, RepoRef,
14    RepoVisibility, SourceRef, SourceSystem,
15};
16use shiplog_schema::freshness::{FreshnessStatus, SourceFreshness};
17use std::path::{Path, PathBuf};
18
19/// Local git repository ingestor.
20///
21/// Collects commit history from a local git repository and converts it
22/// into shiplog events.
23#[derive(Debug)]
24pub struct LocalGitIngestor {
25    /// Path to the git repository
26    pub repo_path: PathBuf,
27    /// Start date for commit collection
28    pub since: NaiveDate,
29    /// End date for commit collection
30    pub until: NaiveDate,
31    /// Optional author email filter
32    pub author: Option<String>,
33    /// Include merge commits in the output
34    pub include_merges: bool,
35}
36
37impl LocalGitIngestor {
38    /// Create a new local git ingestor.
39    pub fn new(repo_path: impl AsRef<Path>, since: NaiveDate, until: NaiveDate) -> Self {
40        Self {
41            repo_path: repo_path.as_ref().to_path_buf(),
42            since,
43            until,
44            author: None,
45            include_merges: false,
46        }
47    }
48
49    /// Filter commits by author email.
50    pub fn with_author(mut self, author: impl Into<String>) -> Self {
51        self.author = Some(author.into());
52        self
53    }
54
55    /// Include merge commits in the output.
56    pub fn with_merges(mut self, include: bool) -> Self {
57        self.include_merges = include;
58        self
59    }
60
61    /// Open the git repository.
62    fn open_repo(&self) -> Result<Repository> {
63        let path = &self.repo_path;
64        if !path.exists() {
65            return Err(anyhow!("Path does not exist: {}", path.display()));
66        }
67
68        Repository::open(path)
69            .with_context(|| format!("Failed to open git repository at {}", path.display()))
70    }
71
72    /// Get the repository name from the git config.
73    #[mutants::skip]
74    fn get_repo_name(&self, repo: &Repository) -> Result<String> {
75        // Try to get the remote URL and extract the repo name
76        if let Ok(remote) = repo.find_remote("origin")
77            && let Some(url) = remote.url()
78        {
79            // Extract repo name from URL like:
80            // https://github.com/owner/repo.git
81            // git@github.com:owner/repo.git
82            if let Some(name) = url.split('/').next_back() {
83                return Ok(name.trim_end_matches(".git").to_string());
84            }
85        }
86
87        // Fallback to directory name
88        self.repo_path
89            .file_name()
90            .and_then(|n| n.to_str())
91            .map(|s| s.to_string())
92            .ok_or_else(|| anyhow!("Could not determine repository name"))
93    }
94
95    /// Convert git2 Time to DateTime<Utc>.
96    fn git_time_to_datetime(time: &Time) -> DateTime<Utc> {
97        DateTime::from_timestamp(time.seconds(), 0)
98            .unwrap_or_else(|| DateTime::from_timestamp(0, 0).unwrap())
99    }
100
101    /// Check if a commit is within the date range.
102    fn is_in_date_range(&self, commit_time: &DateTime<Utc>) -> bool {
103        let commit_date = commit_time.date_naive();
104        commit_date >= self.since && commit_date <= self.until
105    }
106
107    /// Check if a commit matches the author filter.
108    fn matches_author(&self, commit: &git2::Commit) -> bool {
109        if let Some(ref author_email) = self.author {
110            let author = commit.author();
111            let email_matches = author
112                .email()
113                .map(|e| e.to_lowercase() == author_email.to_lowercase())
114                .unwrap_or(false);
115            let name_matches = author
116                .name()
117                .map(|n| n.to_lowercase() == author_email.to_lowercase())
118                .unwrap_or(false);
119            email_matches || name_matches
120        } else {
121            true
122        }
123    }
124
125    /// Check if a commit is a merge commit.
126    fn is_merge_commit(commit: &git2::Commit) -> bool {
127        commit.parent_count() > 1
128    }
129
130    /// Create an event envelope from a git commit.
131    #[mutants::skip]
132    fn commit_to_event(
133        &self,
134        commit: &git2::Commit,
135        repo_name: &str,
136        _run_id: &RunId,
137    ) -> Result<EventEnvelope> {
138        let commit_time = Self::git_time_to_datetime(&commit.time());
139        let commit_hash = commit.id().to_string();
140
141        // Extract first line of commit message as title
142        let title = commit.summary().unwrap_or("<no message>").to_string();
143
144        let author = commit.author();
145        let author_name = author.name().unwrap_or("Unknown").to_string();
146        let author_email = author.email().unwrap_or("").to_string();
147
148        // Create actor (use email as login if name is not available)
149        let actor_login = if !author_email.is_empty() {
150            author_email.clone()
151        } else {
152            author_name.clone()
153        };
154
155        // Create event ID from commit hash
156        let event_id = EventId::from_parts(["local_git", &commit_hash]);
157
158        // Create source reference
159        let source = SourceRef {
160            system: SourceSystem::LocalGit,
161            url: None,
162            opaque_id: Some(commit_hash),
163        };
164
165        // Create repository reference
166        let repo = RepoRef {
167            full_name: repo_name.to_string(),
168            html_url: None,
169            visibility: RepoVisibility::Unknown,
170        };
171
172        // Create actor
173        let actor = Actor {
174            login: actor_login,
175            id: None,
176        };
177
178        // Create pull request event (using commit as proxy for PR)
179        // This is a simplification - in a real implementation, we might
180        // want to distinguish between regular commits and PR commits
181        let payload = EventPayload::PullRequest(PullRequestEvent {
182            number: 0, // No PR number for local commits
183            title,
184            state: PullRequestState::Merged, // All commits are "merged" in this context
185            created_at: commit_time,
186            merged_at: Some(commit_time),
187            additions: None,
188            deletions: None,
189            changed_files: None,
190            touched_paths_hint: vec![],
191            window: Some(TimeWindow {
192                since: self.since,
193                until: self.until,
194            }),
195        });
196
197        // Create links (could include link to commit if remote URL is available)
198        let links = vec![];
199
200        Ok(EventEnvelope {
201            id: event_id,
202            kind: EventKind::PullRequest,
203            occurred_at: commit_time,
204            actor,
205            repo,
206            payload,
207            tags: vec![],
208            links,
209            source,
210        })
211    }
212
213    /// Collect commits from the repository.
214    #[mutants::skip]
215    fn collect_commits(&self, repo: &Repository, run_id: &RunId) -> Result<Vec<EventEnvelope>> {
216        let mut events = Vec::new();
217        let repo_name = self.get_repo_name(repo)?;
218
219        // Get the HEAD reference
220        let head = repo.head().context("Failed to get HEAD reference")?;
221        let head_commit = head.peel_to_commit().context("Failed to peel to commit")?;
222
223        // Walk the commit history
224        let mut revwalk = repo.revwalk().context("Failed to create revwalk")?;
225        revwalk
226            .push(head_commit.id())
227            .context("Failed to push HEAD to revwalk")?;
228
229        for commit_id in revwalk {
230            let commit_id = commit_id.context("Failed to get commit id")?;
231            let commit = repo
232                .find_commit(commit_id)
233                .context("Failed to find commit")?;
234
235            let commit_time = Self::git_time_to_datetime(&commit.time());
236
237            // Stop if we've gone past the since date
238            if commit_time.date_naive() < self.since {
239                break;
240            }
241
242            // Check if commit is in date range
243            if !self.is_in_date_range(&commit_time) {
244                continue;
245            }
246
247            // Check author filter
248            if !self.matches_author(&commit) {
249                continue;
250            }
251
252            // Check merge commit filter
253            if !self.include_merges && Self::is_merge_commit(&commit) {
254                continue;
255            }
256
257            // Convert commit to event
258            match self.commit_to_event(&commit, &repo_name, run_id) {
259                Ok(event) => events.push(event),
260                Err(e) => {
261                    // Log error but continue processing other commits
262                    eprintln!("Warning: Failed to convert commit to event: {}", e);
263                }
264            }
265        }
266
267        // Sort events by timestamp (newest first)
268        events.sort_by_key(|e| std::cmp::Reverse(e.occurred_at));
269
270        Ok(events)
271    }
272}
273
274impl Ingestor for LocalGitIngestor {
275    fn ingest(&self) -> Result<IngestOutput> {
276        if self.since >= self.until {
277            return Err(anyhow!("since must be < until"));
278        }
279
280        let repo = self.open_repo()?;
281        let run_id = RunId::now("shiplog");
282
283        // Collect commits
284        let events = self.collect_commits(&repo, &run_id)?;
285
286        // Create coverage slice
287        let coverage_slice = CoverageSlice {
288            window: TimeWindow {
289                since: self.since,
290                until: self.until,
291            },
292            query: format!("local_git:{}", self.repo_path.display()),
293            total_count: events.len() as u64,
294            fetched: events.len() as u64,
295            incomplete_results: Some(false),
296            notes: vec![],
297        };
298
299        // Create coverage manifest
300        let fetched_at = Utc::now();
301        let coverage = CoverageManifest {
302            run_id: run_id.clone(),
303            generated_at: fetched_at,
304            user: "local".to_string(),
305            window: TimeWindow {
306                since: self.since,
307                until: self.until,
308            },
309            mode: "local".to_string(),
310            sources: vec!["local_git".to_string()],
311            slices: vec![coverage_slice],
312            warnings: vec![],
313            completeness: Completeness::Complete,
314        };
315
316        let freshness = vec![SourceFreshness {
317            source: "local_git".to_string(),
318            status: FreshnessStatus::Fresh,
319            cache_hits: 0,
320            cache_misses: 0,
321            fetched_at: Some(fetched_at),
322            reason: None,
323        }];
324
325        Ok(IngestOutput {
326            events,
327            coverage,
328            freshness,
329        })
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336    use git2::Signature;
337    use proptest::prelude::*;
338    use tempfile::TempDir;
339
340    fn create_test_repo() -> Result<(TempDir, Repository)> {
341        let dir = TempDir::new()?;
342        let repo = Repository::init(dir.path())?;
343
344        // Configure the repo
345        let mut config = repo.config()?;
346        config.set_str("user.name", "Test User")?;
347        config.set_str("user.email", "test@example.com")?;
348
349        // Create initial commit
350        let sig = repo.signature()?;
351        let mut index = repo.index()?;
352        let tree_id = index.write_tree()?;
353        {
354            let tree = repo.find_tree(tree_id)?;
355            let _oid = repo.commit(Some("HEAD"), &sig, &sig, "Initial commit", &tree, &[])?;
356        }
357
358        // Create a second commit
359        let tree_id = {
360            let oid = repo.head()?.peel_to_commit()?;
361            oid.tree_id()
362        };
363        {
364            let tree = repo.find_tree(tree_id)?;
365            let _oid = repo.commit(
366                Some("HEAD"),
367                &sig,
368                &sig,
369                "Second commit",
370                &tree,
371                &[&repo.head()?.peel_to_commit()?],
372            )?;
373        }
374
375        Ok((dir, repo))
376    }
377
378    /// Create a repo with commits from multiple authors and a merge commit.
379    fn create_multi_author_repo() -> Result<(TempDir, Repository)> {
380        let dir = TempDir::new()?;
381        let repo = Repository::init(dir.path())?;
382
383        let mut config = repo.config()?;
384        config.set_str("user.name", "Alice")?;
385        config.set_str("user.email", "alice@example.com")?;
386
387        let alice = Signature::now("Alice", "alice@example.com")?;
388        let bob = Signature::now("Bob", "bob@example.com")?;
389
390        // Initial commit by Alice
391        let mut index = repo.index()?;
392        let tree_id = index.write_tree()?;
393
394        let c1 = {
395            let tree = repo.find_tree(tree_id)?;
396            repo.commit(Some("HEAD"), &alice, &alice, "Alice initial", &tree, &[])?
397        };
398
399        // Commit by Bob
400        let c2 = {
401            let tree = repo.find_tree(tree_id)?;
402            let c1_commit = repo.find_commit(c1)?;
403            repo.commit(
404                Some("HEAD"),
405                &bob,
406                &bob,
407                "Bob feature work",
408                &tree,
409                &[&c1_commit],
410            )?
411        };
412
413        // Another commit by Alice
414        let c3 = {
415            let tree = repo.find_tree(tree_id)?;
416            let c2_commit = repo.find_commit(c2)?;
417            repo.commit(
418                Some("HEAD"),
419                &alice,
420                &alice,
421                "Alice second commit",
422                &tree,
423                &[&c2_commit],
424            )?
425        };
426
427        // Create a branch for the merge
428        let branch_commit = {
429            let tree = repo.find_tree(tree_id)?;
430            let c3_commit = repo.find_commit(c3)?;
431            repo.commit(
432                None, // don't update HEAD
433                &bob,
434                &bob,
435                "Bob branch commit",
436                &tree,
437                &[&c3_commit],
438            )?
439        };
440
441        // Merge commit (two parents)
442        {
443            let tree = repo.find_tree(tree_id)?;
444            let c3_commit = repo.find_commit(c3)?;
445            let branch_commit_obj = repo.find_commit(branch_commit)?;
446            let _merge = repo.commit(
447                Some("HEAD"),
448                &alice,
449                &alice,
450                "Merge branch into main",
451                &tree,
452                &[&c3_commit, &branch_commit_obj],
453            )?;
454        }
455
456        Ok((dir, repo))
457    }
458
459    #[test]
460    fn test_open_repo() {
461        let (_dir, repo) = create_test_repo().unwrap();
462        let repo_path = repo.path().parent().unwrap().to_path_buf();
463        let ingestor = LocalGitIngestor::new(
464            &repo_path,
465            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
466            NaiveDate::from_ymd_opt(2025, 12, 31).unwrap(),
467        );
468
469        let result = ingestor.open_repo();
470        assert!(result.is_ok());
471    }
472
473    #[test]
474    fn test_open_nonexistent_repo() {
475        let ingestor = LocalGitIngestor::new(
476            "/nonexistent/path",
477            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
478            NaiveDate::from_ymd_opt(2025, 12, 31).unwrap(),
479        );
480
481        let result = ingestor.open_repo();
482        assert!(result.is_err());
483    }
484
485    #[test]
486    fn test_git_time_to_datetime() {
487        let time = Time::new(1704067200, 0); // 2024-01-01 00:00:00 UTC
488        let dt = LocalGitIngestor::git_time_to_datetime(&time);
489        assert_eq!(dt.timestamp(), 1704067200);
490    }
491
492    #[test]
493    fn test_is_in_date_range() {
494        let ingestor = LocalGitIngestor::new(
495            "/tmp",
496            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
497            NaiveDate::from_ymd_opt(2025, 1, 31).unwrap(),
498        );
499
500        let inside = DateTime::from_timestamp(1735689600, 0).unwrap(); // 2025-01-01
501        let before = DateTime::from_timestamp(1733011200, 0).unwrap(); // 2024-12-01
502        let after = DateTime::from_timestamp(1738368000, 0).unwrap(); // 2025-02-01
503
504        assert!(!ingestor.is_in_date_range(&before));
505        assert!(ingestor.is_in_date_range(&inside));
506        assert!(!ingestor.is_in_date_range(&after));
507    }
508
509    #[test]
510    fn test_matches_author() {
511        let (_dir, repo) = create_test_repo().unwrap();
512        let head = repo.head().unwrap();
513        let commit = head.peel_to_commit().unwrap();
514
515        let ingestor = LocalGitIngestor::new(
516            "/tmp",
517            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
518            NaiveDate::from_ymd_opt(2025, 12, 31).unwrap(),
519        )
520        .with_author("test@example.com");
521
522        assert!(ingestor.matches_author(&commit));
523
524        let ingestor = LocalGitIngestor::new(
525            "/tmp",
526            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
527            NaiveDate::from_ymd_opt(2025, 12, 31).unwrap(),
528        )
529        .with_author("other@example.com");
530
531        assert!(!ingestor.matches_author(&commit));
532    }
533
534    #[test]
535    fn test_is_merge_commit() {
536        let (_dir, repo) = create_test_repo().unwrap();
537        let head = repo.head().unwrap();
538        let commit = head.peel_to_commit().unwrap();
539
540        // Regular commit with one parent
541        assert!(!LocalGitIngestor::is_merge_commit(&commit));
542    }
543
544    #[test]
545    fn test_ingest() {
546        let (_dir, repo) = create_test_repo().unwrap();
547        let repo_path = repo.path().parent().unwrap().to_path_buf();
548        let ingestor = LocalGitIngestor::new(
549            &repo_path,
550            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
551            NaiveDate::from_ymd_opt(2030, 12, 31).unwrap(),
552        );
553
554        let result = ingestor.ingest();
555        assert!(result.is_ok());
556
557        let output = result.unwrap();
558        assert!(!output.events.is_empty());
559        assert_eq!(output.coverage.slices.len(), 1);
560        assert_eq!(output.coverage.sources, vec!["local_git"]);
561    }
562
563    #[test]
564    fn test_ingest_with_author_filter() {
565        let (_dir, repo) = create_test_repo().unwrap();
566        let repo_path = repo.path().parent().unwrap().to_path_buf();
567        let ingestor = LocalGitIngestor::new(
568            &repo_path,
569            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
570            NaiveDate::from_ymd_opt(2030, 12, 31).unwrap(),
571        )
572        .with_author("test@example.com");
573
574        let result = ingestor.ingest();
575        assert!(result.is_ok());
576
577        let output = result.unwrap();
578        assert!(!output.events.is_empty());
579    }
580
581    #[test]
582    fn test_ingest_invalid_date_range() {
583        let (_dir, repo) = create_test_repo().unwrap();
584        let repo_path = repo.path().parent().unwrap().to_path_buf();
585        let ingestor = LocalGitIngestor::new(
586            &repo_path,
587            NaiveDate::from_ymd_opt(2025, 12, 31).unwrap(),
588            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
589        );
590
591        let result = ingestor.ingest();
592        assert!(result.is_err());
593    }
594
595    // ── Property tests ──────────────────────────────────────────────────
596
597    proptest! {
598        #[test]
599        fn git_time_to_datetime_always_valid(secs in 0i64..=4_102_444_800i64) {
600            let time = Time::new(secs, 0);
601            let dt = LocalGitIngestor::git_time_to_datetime(&time);
602            prop_assert_eq!(dt.timestamp(), secs);
603        }
604
605        #[test]
606        fn git_time_to_datetime_negative_yields_epoch(secs in i64::MIN..0i64) {
607            let time = Time::new(secs, 0);
608            let dt = LocalGitIngestor::git_time_to_datetime(&time);
609            // Negative timestamps either map correctly or fall back to epoch 0
610            prop_assert!(dt.timestamp() == secs || dt.timestamp() == 0);
611        }
612
613        #[test]
614        fn is_in_date_range_boundary_inclusive(
615            day_offset in 0u32..365,
616        ) {
617            let since = NaiveDate::from_ymd_opt(2025, 1, 1).unwrap();
618            let until = NaiveDate::from_ymd_opt(2025, 12, 31).unwrap();
619            let ingestor = LocalGitIngestor::new("/tmp", since, until);
620
621            let test_date = since + chrono::Duration::days(day_offset as i64);
622            // Build a DateTime at midnight UTC on that date
623            let dt = test_date.and_hms_opt(0, 0, 0).unwrap().and_utc();
624
625            if test_date >= since && test_date <= until {
626                prop_assert!(ingestor.is_in_date_range(&dt));
627            } else {
628                prop_assert!(!ingestor.is_in_date_range(&dt));
629            }
630        }
631
632        #[test]
633        fn builder_preserves_author(author in "[a-z]+@[a-z]+\\.[a-z]+") {
634            let ingestor = LocalGitIngestor::new(
635                "/tmp",
636                NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
637                NaiveDate::from_ymd_opt(2025, 12, 31).unwrap(),
638            )
639            .with_author(&author);
640            prop_assert_eq!(ingestor.author.as_deref(), Some(author.as_str()));
641        }
642
643        #[test]
644        fn builder_preserves_merges(flag in proptest::bool::ANY) {
645            let ingestor = LocalGitIngestor::new(
646                "/tmp",
647                NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
648                NaiveDate::from_ymd_opt(2025, 12, 31).unwrap(),
649            )
650            .with_merges(flag);
651            prop_assert_eq!(ingestor.include_merges, flag);
652        }
653    }
654
655    // ── Integration tests with fixture repos ────────────────────────────
656
657    #[test]
658    fn ingest_author_filter_isolates_single_author() {
659        let (_dir, repo) = create_multi_author_repo().unwrap();
660        let repo_path = repo.path().parent().unwrap().to_path_buf();
661
662        // Only Alice's commits
663        let alice_ingestor = LocalGitIngestor::new(
664            &repo_path,
665            NaiveDate::from_ymd_opt(2020, 1, 1).unwrap(),
666            NaiveDate::from_ymd_opt(2030, 12, 31).unwrap(),
667        )
668        .with_author("alice@example.com");
669        let alice_out = alice_ingestor.ingest().unwrap();
670
671        // Only Bob's commits
672        let bob_ingestor = LocalGitIngestor::new(
673            &repo_path,
674            NaiveDate::from_ymd_opt(2020, 1, 1).unwrap(),
675            NaiveDate::from_ymd_opt(2030, 12, 31).unwrap(),
676        )
677        .with_author("bob@example.com");
678        let bob_out = bob_ingestor.ingest().unwrap();
679
680        // All commits (no author filter)
681        let all_ingestor = LocalGitIngestor::new(
682            &repo_path,
683            NaiveDate::from_ymd_opt(2020, 1, 1).unwrap(),
684            NaiveDate::from_ymd_opt(2030, 12, 31).unwrap(),
685        )
686        .with_merges(true);
687        let all_out = all_ingestor.ingest().unwrap();
688
689        assert!(!alice_out.events.is_empty());
690        assert!(!bob_out.events.is_empty());
691        // Author-filtered sets should be subsets of all events
692        assert!(alice_out.events.len() + bob_out.events.len() <= all_out.events.len());
693    }
694
695    #[test]
696    fn author_matching_is_case_insensitive() {
697        let (_dir, repo) = create_test_repo().unwrap();
698        let head = repo.head().unwrap();
699        let commit = head.peel_to_commit().unwrap();
700
701        let upper = LocalGitIngestor::new(
702            "/tmp",
703            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
704            NaiveDate::from_ymd_opt(2025, 12, 31).unwrap(),
705        )
706        .with_author("TEST@EXAMPLE.COM");
707        assert!(upper.matches_author(&commit));
708
709        let mixed = LocalGitIngestor::new(
710            "/tmp",
711            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
712            NaiveDate::from_ymd_opt(2025, 12, 31).unwrap(),
713        )
714        .with_author("Test@Example.Com");
715        assert!(mixed.matches_author(&commit));
716    }
717
718    #[test]
719    fn author_matching_by_name() {
720        let (_dir, repo) = create_test_repo().unwrap();
721        let head = repo.head().unwrap();
722        let commit = head.peel_to_commit().unwrap();
723
724        // Match by name instead of email
725        let by_name = LocalGitIngestor::new(
726            "/tmp",
727            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
728            NaiveDate::from_ymd_opt(2025, 12, 31).unwrap(),
729        )
730        .with_author("Test User");
731        assert!(by_name.matches_author(&commit));
732    }
733
734    #[test]
735    fn no_author_filter_matches_all() {
736        let (_dir, repo) = create_test_repo().unwrap();
737        let head = repo.head().unwrap();
738        let commit = head.peel_to_commit().unwrap();
739
740        let ingestor = LocalGitIngestor::new(
741            "/tmp",
742            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
743            NaiveDate::from_ymd_opt(2025, 12, 31).unwrap(),
744        );
745        // No author set → matches everything
746        assert!(ingestor.matches_author(&commit));
747    }
748
749    #[test]
750    fn merge_commit_detected_in_multi_author_repo() {
751        let (_dir, repo) = create_multi_author_repo().unwrap();
752        let head = repo.head().unwrap();
753        let commit = head.peel_to_commit().unwrap();
754
755        // The HEAD in multi_author_repo is the merge commit
756        assert!(LocalGitIngestor::is_merge_commit(&commit));
757    }
758
759    #[test]
760    fn ingest_excludes_merges_by_default() {
761        let (_dir, repo) = create_multi_author_repo().unwrap();
762        let repo_path = repo.path().parent().unwrap().to_path_buf();
763
764        let no_merge = LocalGitIngestor::new(
765            &repo_path,
766            NaiveDate::from_ymd_opt(2020, 1, 1).unwrap(),
767            NaiveDate::from_ymd_opt(2030, 12, 31).unwrap(),
768        );
769        let with_merge = LocalGitIngestor::new(
770            &repo_path,
771            NaiveDate::from_ymd_opt(2020, 1, 1).unwrap(),
772            NaiveDate::from_ymd_opt(2030, 12, 31).unwrap(),
773        )
774        .with_merges(true);
775
776        let no_merge_out = no_merge.ingest().unwrap();
777        let with_merge_out = with_merge.ingest().unwrap();
778
779        // Including merges should produce at least one more event
780        assert!(with_merge_out.events.len() > no_merge_out.events.len());
781    }
782
783    #[test]
784    fn ingest_narrow_date_range_filters_correctly() {
785        let (_dir, repo) = create_test_repo().unwrap();
786        let repo_path = repo.path().parent().unwrap().to_path_buf();
787
788        // Use a date range far in the past — no commits should match
789        let ingestor = LocalGitIngestor::new(
790            &repo_path,
791            NaiveDate::from_ymd_opt(2000, 1, 1).unwrap(),
792            NaiveDate::from_ymd_opt(2000, 1, 2).unwrap(),
793        );
794        let output = ingestor.ingest().unwrap();
795        assert!(output.events.is_empty());
796        assert_eq!(output.coverage.slices[0].total_count, 0);
797    }
798
799    #[test]
800    fn ingest_nonexistent_author_yields_empty() {
801        let (_dir, repo) = create_test_repo().unwrap();
802        let repo_path = repo.path().parent().unwrap().to_path_buf();
803
804        let ingestor = LocalGitIngestor::new(
805            &repo_path,
806            NaiveDate::from_ymd_opt(2020, 1, 1).unwrap(),
807            NaiveDate::from_ymd_opt(2030, 12, 31).unwrap(),
808        )
809        .with_author("nobody@nowhere.com");
810        let output = ingestor.ingest().unwrap();
811        assert!(output.events.is_empty());
812    }
813
814    #[test]
815    fn coverage_manifest_populated_correctly() {
816        let (_dir, repo) = create_test_repo().unwrap();
817        let repo_path = repo.path().parent().unwrap().to_path_buf();
818
819        let since = NaiveDate::from_ymd_opt(2020, 1, 1).unwrap();
820        let until = NaiveDate::from_ymd_opt(2030, 12, 31).unwrap();
821        let ingestor = LocalGitIngestor::new(&repo_path, since, until);
822        let output = ingestor.ingest().unwrap();
823
824        assert_eq!(output.coverage.window.since, since);
825        assert_eq!(output.coverage.window.until, until);
826        assert_eq!(output.coverage.user, "local");
827        assert_eq!(output.coverage.mode, "local");
828        assert_eq!(output.coverage.sources, vec!["local_git"]);
829        assert_eq!(output.coverage.slices.len(), 1);
830
831        let slice = &output.coverage.slices[0];
832        assert_eq!(slice.total_count, slice.fetched);
833        assert_eq!(slice.total_count, output.events.len() as u64);
834        assert_eq!(slice.incomplete_results, Some(false));
835    }
836
837    #[test]
838    fn events_sorted_newest_first() {
839        let (_dir, repo) = create_test_repo().unwrap();
840        let repo_path = repo.path().parent().unwrap().to_path_buf();
841
842        let ingestor = LocalGitIngestor::new(
843            &repo_path,
844            NaiveDate::from_ymd_opt(2020, 1, 1).unwrap(),
845            NaiveDate::from_ymd_opt(2030, 12, 31).unwrap(),
846        );
847        let output = ingestor.ingest().unwrap();
848
849        for pair in output.events.windows(2) {
850            assert!(pair[0].occurred_at >= pair[1].occurred_at);
851        }
852    }
853
854    #[test]
855    fn all_events_have_local_git_source() {
856        let (_dir, repo) = create_test_repo().unwrap();
857        let repo_path = repo.path().parent().unwrap().to_path_buf();
858
859        let ingestor = LocalGitIngestor::new(
860            &repo_path,
861            NaiveDate::from_ymd_opt(2020, 1, 1).unwrap(),
862            NaiveDate::from_ymd_opt(2030, 12, 31).unwrap(),
863        );
864        let output = ingestor.ingest().unwrap();
865
866        for event in &output.events {
867            assert_eq!(event.source.system, SourceSystem::LocalGit);
868            assert!(event.source.opaque_id.is_some());
869            assert_eq!(event.kind, EventKind::PullRequest);
870        }
871    }
872
873    // ── Error handling tests ────────────────────────────────────────────
874
875    #[test]
876    fn ingest_equal_dates_errors() {
877        let (_dir, repo) = create_test_repo().unwrap();
878        let repo_path = repo.path().parent().unwrap().to_path_buf();
879
880        let same_date = NaiveDate::from_ymd_opt(2025, 6, 15).unwrap();
881        let ingestor = LocalGitIngestor::new(&repo_path, same_date, same_date);
882        let err = ingestor.ingest().unwrap_err();
883        assert!(err.to_string().contains("since must be < until"));
884    }
885
886    #[test]
887    fn open_path_exists_but_not_a_repo() {
888        let dir = TempDir::new().unwrap();
889        let ingestor = LocalGitIngestor::new(
890            dir.path(),
891            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
892            NaiveDate::from_ymd_opt(2025, 12, 31).unwrap(),
893        );
894
895        let result = ingestor.open_repo();
896        let err = result.err().expect("expected an error");
897        assert!(err.to_string().contains("Failed to open git repository"));
898    }
899
900    #[test]
901    fn git_time_to_datetime_at_epoch() {
902        let time = Time::new(0, 0);
903        let dt = LocalGitIngestor::git_time_to_datetime(&time);
904        assert_eq!(dt.timestamp(), 0);
905    }
906}