lore_cli/git/
mod.rs

1//! Git integration.
2//!
3//! Provides git repository discovery, commit information retrieval,
4//! and auto-linking confidence scoring. Used by the link command and
5//! auto-linking features.
6
7use anyhow::{Context, Result};
8use chrono::{DateTime, TimeZone, Utc};
9use std::collections::HashSet;
10use std::path::Path;
11
12/// Retrieves information about a git repository.
13///
14/// Discovers the repository containing the given path and extracts
15/// branch, commit, and remote information.
16///
17/// # Errors
18///
19/// Returns an error if the path is not inside a git repository.
20pub fn repo_info(path: &Path) -> Result<RepoInfo> {
21    let repo = git2::Repository::discover(path).context("Not a git repository")?;
22
23    let head = repo.head().ok();
24    let branch = head
25        .as_ref()
26        .and_then(|h| h.shorthand())
27        .map(|s| s.to_string());
28
29    let commit_sha = head
30        .and_then(|h| h.peel_to_commit().ok())
31        .map(|c| c.id().to_string());
32
33    let remote_url = repo
34        .find_remote("origin")
35        .ok()
36        .and_then(|r| r.url().map(|s| s.to_string()));
37
38    let workdir = repo
39        .workdir()
40        .map(|p| p.to_string_lossy().to_string())
41        .unwrap_or_default();
42
43    Ok(RepoInfo {
44        path: workdir,
45        branch,
46        commit_sha,
47        remote_url,
48    })
49}
50
51/// Information about a git repository.
52///
53/// Contains the current state of a repository including branch,
54/// HEAD commit, and remote URL.
55#[derive(Debug)]
56pub struct RepoInfo {
57    /// Absolute path to the repository working directory.
58    /// Currently used for session filtering by working directory.
59    #[allow(dead_code)]
60    pub path: String,
61    /// Current branch name, if HEAD points to a branch.
62    /// Used for branch-based session matching in auto-linking.
63    #[allow(dead_code)]
64    pub branch: Option<String>,
65    /// SHA of the current HEAD commit.
66    pub commit_sha: Option<String>,
67    /// URL of the "origin" remote, if configured.
68    /// Reserved for future remote-based features.
69    #[allow(dead_code)]
70    pub remote_url: Option<String>,
71}
72
73/// Information about a specific git commit.
74///
75/// Contains the SHA, timestamp, branch, and author information.
76#[derive(Debug)]
77pub struct CommitInfo {
78    /// Full SHA of the commit.
79    pub sha: String,
80    /// When the commit was authored.
81    pub timestamp: DateTime<Utc>,
82    /// Branch name the commit is on (if determinable).
83    pub branch: Option<String>,
84    /// Commit message summary (first line).
85    pub summary: String,
86}
87
88/// Calculates a confidence score for auto-linking a session to a commit.
89///
90/// The score is based on multiple factors:
91/// - Branch match (20%): Session and commit are on the same branch
92/// - File overlap (40%): Proportion of commit files mentioned in the session
93/// - Time proximity (30%): Decays over 30 minutes
94/// - Recent activity bonus (10%): Extra weight for commits within 5 minutes
95///
96/// Returns a value between 0.0 and 1.0.
97pub fn calculate_link_confidence(
98    session_branch: Option<&str>,
99    session_files: &[String],
100    commit_branch: &str,
101    commit_files: &[String],
102    time_diff_minutes: i64,
103) -> f64 {
104    let mut score = 0.0;
105
106    // Branch match
107    if session_branch == Some(commit_branch) {
108        score += 0.2;
109    }
110
111    // File overlap
112    let overlap = session_files
113        .iter()
114        .filter(|f| commit_files.contains(f))
115        .count();
116
117    if overlap > 0 {
118        let overlap_ratio = overlap as f64 / commit_files.len().max(1) as f64;
119        score += 0.4 * overlap_ratio;
120    }
121
122    // Time proximity (decay over 30 minutes)
123    if time_diff_minutes < 30 {
124        score += 0.3 * (1.0 - (time_diff_minutes as f64 / 30.0));
125    }
126
127    // Recent activity bonus
128    if time_diff_minutes < 5 {
129        score += 0.1;
130    }
131
132    score.min(1.0)
133}
134
135/// Retrieves all commits in a repository made within a time range.
136///
137/// Walks the commit history from all local branches and collects all commits
138/// whose timestamps fall between `after` and `before` (inclusive on both ends).
139/// Commits are returned in reverse chronological order (newest first).
140///
141/// This is equivalent to `git log --all --after=X --before=Y`.
142///
143/// This is used for auto-linking sessions to commits made during the
144/// session's time window.
145///
146/// # Arguments
147///
148/// * `repo_path` - A path inside the git repository
149/// * `after` - Only include commits at or after this time
150/// * `before` - Only include commits at or before this time
151///
152/// # Errors
153///
154/// Returns an error if the repository cannot be found or the commit
155/// history cannot be walked.
156pub fn get_commits_in_time_range(
157    repo_path: &Path,
158    after: DateTime<Utc>,
159    before: DateTime<Utc>,
160) -> Result<Vec<CommitInfo>> {
161    let repo = git2::Repository::discover(repo_path).context("Not a git repository")?;
162
163    let mut revwalk = repo.revwalk().context("Could not create revision walker")?;
164
165    // Push all local branch refs to walk commits from all branches
166    for branch_result in repo.branches(Some(git2::BranchType::Local))? {
167        let (branch, _) = branch_result?;
168        if let Some(reference) = branch.get().target() {
169            revwalk.push(reference)?;
170        }
171    }
172
173    // Also push HEAD to handle detached HEAD state (common in CI environments).
174    // This is idempotent - if HEAD points to a branch we already pushed, the
175    // seen_shas HashSet will deduplicate commits.
176    if let Ok(head) = repo.head() {
177        if let Ok(commit) = head.peel_to_commit() {
178            revwalk.push(commit.id())?;
179        }
180    }
181
182    revwalk.set_sorting(git2::Sort::TIME)?;
183
184    let after_secs = after.timestamp();
185    let before_secs = before.timestamp();
186
187    let mut commits = Vec::new();
188    let mut seen_shas: HashSet<String> = HashSet::new();
189
190    for oid_result in revwalk {
191        let oid = oid_result.context("Error walking commits")?;
192        let commit = repo.find_commit(oid).context("Could not find commit")?;
193
194        let sha = commit.id().to_string();
195
196        // Skip already-seen commits (same commit reachable from multiple branches)
197        if seen_shas.contains(&sha) {
198            continue;
199        }
200        seen_shas.insert(sha.clone());
201
202        let commit_time = commit.time().seconds();
203
204        // Skip commits outside the time window
205        // Note: we cannot do early exit because commits from different branches
206        // may interleave in the time-sorted order
207        if commit_time < after_secs || commit_time > before_secs {
208            continue;
209        }
210
211        let timestamp = Utc
212            .timestamp_opt(commit_time, 0)
213            .single()
214            .unwrap_or_else(Utc::now);
215
216        // Try to determine branch name by checking if HEAD points to this commit
217        let branch = repo.head().ok().and_then(|h| {
218            if h.peel_to_commit().ok()?.id() == commit.id() {
219                h.shorthand().map(|s| s.to_string())
220            } else {
221                None
222            }
223        });
224
225        let summary = commit.summary().unwrap_or("").to_string();
226
227        commits.push(CommitInfo {
228            sha,
229            timestamp,
230            branch,
231            summary,
232        });
233    }
234
235    Ok(commits)
236}
237
238/// Retrieves information about a specific commit.
239///
240/// Resolves the commit reference (SHA, HEAD, branch name, etc.) and returns
241/// details including timestamp, branch, and summary.
242///
243/// # Errors
244///
245/// Returns an error if the repository cannot be found or the commit
246/// reference cannot be resolved.
247pub fn get_commit_info(repo_path: &Path, commit_ref: &str) -> Result<CommitInfo> {
248    let repo = git2::Repository::discover(repo_path).context("Not a git repository")?;
249
250    // Resolve the reference to a commit
251    let obj = repo
252        .revparse_single(commit_ref)
253        .with_context(|| format!("Could not resolve commit reference: {commit_ref}"))?;
254
255    let commit = obj
256        .peel_to_commit()
257        .with_context(|| format!("Reference is not a commit: {commit_ref}"))?;
258
259    let sha = commit.id().to_string();
260
261    // Convert git timestamp to chrono DateTime
262    let git_time = commit.time();
263    let timestamp = Utc
264        .timestamp_opt(git_time.seconds(), 0)
265        .single()
266        .unwrap_or_else(Utc::now);
267
268    // Try to get the branch name (check if HEAD points to this commit)
269    let branch = repo.head().ok().and_then(|h| {
270        if h.peel_to_commit().ok()?.id() == commit.id() {
271            h.shorthand().map(|s| s.to_string())
272        } else {
273            None
274        }
275    });
276
277    let summary = commit.summary().unwrap_or("").to_string();
278
279    Ok(CommitInfo {
280        sha,
281        timestamp,
282        branch,
283        summary,
284    })
285}
286
287/// Resolves a git reference (SHA, HEAD, branch name, etc.) to a full commit SHA.
288///
289/// Supports:
290/// - Full and partial SHAs
291/// - HEAD and HEAD~N syntax
292/// - Branch names
293/// - Tag names
294///
295/// # Arguments
296///
297/// * `repo_path` - A path inside the git repository
298/// * `reference` - The git reference to resolve (SHA, HEAD, branch, tag, etc.)
299///
300/// # Errors
301///
302/// Returns an error if the repository cannot be found or the reference
303/// cannot be resolved to a valid commit.
304pub fn resolve_commit_ref(repo_path: &Path, reference: &str) -> Result<String> {
305    let repo = git2::Repository::discover(repo_path).context("Not a git repository")?;
306
307    // Resolve the reference to a commit
308    let obj = repo
309        .revparse_single(reference)
310        .with_context(|| format!("Could not resolve reference: {reference}"))?;
311
312    let commit = obj
313        .peel_to_commit()
314        .with_context(|| format!("Reference is not a commit: {reference}"))?;
315
316    Ok(commit.id().to_string())
317}
318
319/// Retrieves the list of files changed in a commit.
320///
321/// Returns the file paths relative to the repository root for all files
322/// that were added, modified, or deleted in the commit.
323///
324/// # Errors
325///
326/// Returns an error if the repository cannot be found or the commit
327/// reference cannot be resolved.
328pub fn get_commit_files(repo_path: &Path, commit_ref: &str) -> Result<Vec<String>> {
329    let repo = git2::Repository::discover(repo_path).context("Not a git repository")?;
330
331    // Resolve the reference to a commit
332    let obj = repo
333        .revparse_single(commit_ref)
334        .with_context(|| format!("Could not resolve commit reference: {commit_ref}"))?;
335
336    let commit = obj
337        .peel_to_commit()
338        .with_context(|| format!("Reference is not a commit: {commit_ref}"))?;
339
340    let tree = commit.tree().context("Could not get commit tree")?;
341
342    // Get the parent tree (or empty tree for initial commit)
343    let parent_tree = commit.parent(0).ok().and_then(|p| p.tree().ok());
344
345    let diff = repo
346        .diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)
347        .context("Could not compute diff")?;
348
349    let mut files = Vec::new();
350
351    diff.foreach(
352        &mut |delta, _| {
353            // Get the new file path (or old path for deletions)
354            let path = delta.new_file().path().or_else(|| delta.old_file().path());
355
356            if let Some(p) = path {
357                files.push(p.to_string_lossy().to_string());
358            }
359            true
360        },
361        None,
362        None,
363        None,
364    )
365    .context("Could not iterate diff")?;
366
367    Ok(files)
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    #[test]
375    fn test_calculate_link_confidence_full_match() {
376        let session_files = vec!["src/main.rs".to_string(), "src/lib.rs".to_string()];
377        let commit_files = vec!["src/main.rs".to_string(), "src/lib.rs".to_string()];
378
379        let score = calculate_link_confidence(
380            Some("main"),
381            &session_files,
382            "main",
383            &commit_files,
384            2, // 2 minutes ago
385        );
386
387        // Branch match: 0.2
388        // File overlap: 0.4 (100% overlap)
389        // Time proximity: 0.3 * (1 - 2/30) = 0.28
390        // Recent bonus: 0.1 (within 5 min)
391        // Total: 0.98
392        assert!(
393            score > 0.9,
394            "Full match should have high confidence: {score}"
395        );
396    }
397
398    #[test]
399    fn test_calculate_link_confidence_no_match() {
400        let session_files = vec!["other.rs".to_string()];
401        let commit_files = vec!["src/main.rs".to_string()];
402
403        let score = calculate_link_confidence(
404            Some("feature"),
405            &session_files,
406            "main",
407            &commit_files,
408            60, // 60 minutes ago
409        );
410
411        // Branch match: 0 (different)
412        // File overlap: 0 (no overlap)
413        // Time proximity: 0 (> 30 min)
414        // Recent bonus: 0 (> 5 min)
415        // Total: 0
416        assert!(score < 0.1, "No match should have low confidence: {score}");
417    }
418
419    #[test]
420    fn test_calculate_link_confidence_partial_overlap() {
421        let session_files = vec![
422            "src/main.rs".to_string(),
423            "src/lib.rs".to_string(),
424            "other.rs".to_string(),
425        ];
426        let commit_files = vec!["src/main.rs".to_string(), "src/lib.rs".to_string()];
427
428        let score = calculate_link_confidence(
429            Some("main"),
430            &session_files,
431            "main",
432            &commit_files,
433            15, // 15 minutes ago
434        );
435
436        // Branch match: 0.2
437        // File overlap: 0.4 (100% of commit files are in session files)
438        // Time proximity: 0.3 * (1 - 15/30) = 0.15
439        // Recent bonus: 0 (> 5 min)
440        // Total: 0.75
441        assert!(
442            score > 0.7 && score < 0.8,
443            "Partial match should have medium-high confidence: {score}"
444        );
445    }
446
447    #[test]
448    fn test_calculate_link_confidence_time_decay() {
449        let session_files = vec!["src/main.rs".to_string()];
450        let commit_files = vec!["src/main.rs".to_string()];
451
452        let score_recent =
453            calculate_link_confidence(Some("main"), &session_files, "main", &commit_files, 1);
454
455        let score_old =
456            calculate_link_confidence(Some("main"), &session_files, "main", &commit_files, 25);
457
458        assert!(
459            score_recent > score_old,
460            "Recent commits should score higher: {score_recent} vs {score_old}"
461        );
462    }
463
464    #[test]
465    fn test_calculate_link_confidence_caps_at_one() {
466        let session_files = vec!["a.rs".to_string(), "b.rs".to_string()];
467        let commit_files = vec!["a.rs".to_string()];
468
469        let score =
470            calculate_link_confidence(Some("main"), &session_files, "main", &commit_files, 0);
471
472        assert!(score <= 1.0, "Score should be capped at 1.0: {score}");
473    }
474
475    #[test]
476    fn test_calculate_link_confidence_empty_files() {
477        let session_files: Vec<String> = vec![];
478        let commit_files: Vec<String> = vec![];
479
480        let score =
481            calculate_link_confidence(Some("main"), &session_files, "main", &commit_files, 5);
482
483        // Should not panic and should give branch + time score
484        assert!(score > 0.0, "Should handle empty files gracefully: {score}");
485    }
486
487    // ==================== resolve_commit_ref Tests ====================
488
489    #[test]
490    fn test_resolve_commit_ref_with_head() {
491        // This test runs in the lore repository itself
492        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
493
494        // HEAD should always resolve to a valid SHA
495        let result = resolve_commit_ref(repo_path, "HEAD");
496        assert!(result.is_ok(), "HEAD should resolve: {:?}", result.err());
497
498        let sha = result.unwrap();
499        // SHA should be 40 hex characters
500        assert_eq!(sha.len(), 40, "SHA should be 40 characters: {sha}");
501        assert!(
502            sha.chars().all(|c| c.is_ascii_hexdigit()),
503            "SHA should be hex: {sha}"
504        );
505    }
506
507    #[test]
508    fn test_resolve_commit_ref_with_head_tilde() {
509        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
510
511        // HEAD~1 should resolve if there are at least 2 commits
512        // This may fail in a fresh repo with only one commit
513        let result = resolve_commit_ref(repo_path, "HEAD~1");
514
515        // If the repo has multiple commits, this should succeed
516        if let Ok(sha) = result {
517            assert_eq!(sha.len(), 40, "SHA should be 40 characters");
518
519            // Should be different from HEAD
520            let head_sha = resolve_commit_ref(repo_path, "HEAD").unwrap();
521            assert_ne!(sha, head_sha, "HEAD~1 should differ from HEAD");
522        }
523        // If it fails, that's acceptable for a repo with one commit
524    }
525
526    #[test]
527    fn test_resolve_commit_ref_with_full_sha() {
528        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
529
530        // First get HEAD's SHA
531        let head_sha = resolve_commit_ref(repo_path, "HEAD").unwrap();
532
533        // Now resolve using the full SHA
534        let result = resolve_commit_ref(repo_path, &head_sha);
535        assert!(
536            result.is_ok(),
537            "Full SHA should resolve: {:?}",
538            result.err()
539        );
540
541        let resolved = result.unwrap();
542        assert_eq!(resolved, head_sha, "Resolved SHA should match input");
543    }
544
545    #[test]
546    fn test_resolve_commit_ref_with_partial_sha() {
547        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
548
549        // First get HEAD's SHA
550        let head_sha = resolve_commit_ref(repo_path, "HEAD").unwrap();
551
552        // Try resolving with first 7 characters (common short SHA length)
553        let short_sha = &head_sha[..7];
554        let result = resolve_commit_ref(repo_path, short_sha);
555        assert!(
556            result.is_ok(),
557            "Partial SHA should resolve: {:?}",
558            result.err()
559        );
560
561        let resolved = result.unwrap();
562        assert_eq!(resolved, head_sha, "Resolved SHA should be full SHA");
563    }
564
565    #[test]
566    fn test_resolve_commit_ref_invalid_reference() {
567        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
568
569        // This reference should not exist
570        let result = resolve_commit_ref(repo_path, "nonexistent-branch-xyz123");
571        assert!(result.is_err(), "Invalid reference should fail");
572    }
573
574    #[test]
575    fn test_resolve_commit_ref_not_a_repo() {
576        // /tmp should not be a git repository
577        let result = resolve_commit_ref(std::path::Path::new("/tmp"), "HEAD");
578        assert!(result.is_err(), "Non-repo path should fail");
579    }
580
581    // ==================== get_commits_in_time_range Tests ====================
582
583    #[test]
584    fn test_get_commits_in_time_range_returns_recent_commits() {
585        // This test runs in the lore repository itself
586        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
587
588        // Get HEAD commit info to know when it was made
589        let head_info = get_commit_info(repo_path, "HEAD").expect("Should get HEAD commit info");
590
591        // Create a time range that includes HEAD commit (from 1 hour before to 1 hour after)
592        let after = head_info.timestamp - chrono::Duration::hours(1);
593        let before = head_info.timestamp + chrono::Duration::hours(1);
594
595        let result = get_commits_in_time_range(repo_path, after, before);
596        assert!(
597            result.is_ok(),
598            "Should get commits in time range: {:?}",
599            result.err()
600        );
601
602        let commits = result.unwrap();
603        assert!(
604            !commits.is_empty(),
605            "Should find at least HEAD commit in time range"
606        );
607
608        // HEAD commit should be in the results
609        let has_head = commits.iter().any(|c| c.sha == head_info.sha);
610        assert!(has_head, "HEAD commit should be in results");
611    }
612
613    #[test]
614    fn test_get_commits_in_time_range_empty_for_future() {
615        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
616
617        // Create a time range entirely in the future
618        let now = Utc::now();
619        let after = now + chrono::Duration::days(365);
620        let before = now + chrono::Duration::days(366);
621
622        let result = get_commits_in_time_range(repo_path, after, before);
623        assert!(result.is_ok(), "Should succeed even with future dates");
624
625        let commits = result.unwrap();
626        assert!(
627            commits.is_empty(),
628            "Should find no commits in future time range"
629        );
630    }
631
632    #[test]
633    fn test_get_commits_in_time_range_empty_for_distant_past() {
634        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
635
636        // Create a time range before git was invented
637        let after = Utc.with_ymd_and_hms(1990, 1, 1, 0, 0, 0).unwrap();
638        let before = Utc.with_ymd_and_hms(1990, 1, 2, 0, 0, 0).unwrap();
639
640        let result = get_commits_in_time_range(repo_path, after, before);
641        assert!(result.is_ok(), "Should succeed even with past dates");
642
643        let commits = result.unwrap();
644        assert!(
645            commits.is_empty(),
646            "Should find no commits in distant past time range"
647        );
648    }
649
650    #[test]
651    fn test_get_commits_in_time_range_not_a_repo() {
652        // /tmp should not be a git repository
653        let after = Utc::now() - chrono::Duration::hours(1);
654        let before = Utc::now();
655
656        let result = get_commits_in_time_range(std::path::Path::new("/tmp"), after, before);
657        assert!(result.is_err(), "Non-repo path should fail");
658    }
659
660    #[test]
661    fn test_get_commits_in_time_range_commit_info_complete() {
662        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
663
664        // Get HEAD commit to determine time range
665        let head_info = get_commit_info(repo_path, "HEAD").expect("Should get HEAD commit info");
666
667        let after = head_info.timestamp - chrono::Duration::seconds(1);
668        let before = head_info.timestamp + chrono::Duration::seconds(1);
669
670        let commits =
671            get_commits_in_time_range(repo_path, after, before).expect("Should get commits");
672
673        // Find HEAD commit in results
674        let head_commit = commits.iter().find(|c| c.sha == head_info.sha);
675        assert!(head_commit.is_some(), "HEAD commit should be in results");
676
677        let head_commit = head_commit.unwrap();
678
679        // Verify commit info is complete
680        assert_eq!(head_commit.sha.len(), 40, "SHA should be 40 characters");
681        assert!(
682            head_commit.sha.chars().all(|c| c.is_ascii_hexdigit()),
683            "SHA should be hex"
684        );
685        assert_eq!(
686            head_commit.timestamp, head_info.timestamp,
687            "Timestamp should match"
688        );
689        // Summary should be non-empty for most commits
690        // (we don't strictly require this as some commits may have empty messages)
691    }
692
693    #[test]
694    fn test_get_commits_in_time_range_sorted_by_time() {
695        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
696
697        // Wide time range to get multiple commits
698        let before = Utc::now();
699        let after = before - chrono::Duration::days(30);
700
701        let result = get_commits_in_time_range(repo_path, after, before);
702        if result.is_err() {
703            // Skip test if repo access fails
704            return;
705        }
706
707        let commits = result.unwrap();
708        if commits.len() < 2 {
709            // Not enough commits to test sorting
710            return;
711        }
712
713        // Verify commits are sorted newest first (descending by time)
714        for window in commits.windows(2) {
715            assert!(
716                window[0].timestamp >= window[1].timestamp,
717                "Commits should be sorted newest first: {} >= {}",
718                window[0].timestamp,
719                window[1].timestamp
720            );
721        }
722    }
723}