lore_cli/git/
mod.rs

1//! Git integration.
2//!
3//! Provides git repository discovery, commit information retrieval,
4//! and auto-linking confidence scoring. Used by the link command and
5//! auto-linking features.
6
7use anyhow::{Context, Result};
8use chrono::{DateTime, TimeZone, Utc};
9use std::path::Path;
10
11/// Retrieves information about a git repository.
12///
13/// Discovers the repository containing the given path and extracts
14/// branch, commit, and remote information.
15///
16/// # Errors
17///
18/// Returns an error if the path is not inside a git repository.
19pub fn repo_info(path: &Path) -> Result<RepoInfo> {
20    let repo = git2::Repository::discover(path).context("Not a git repository")?;
21
22    let head = repo.head().ok();
23    let branch = head
24        .as_ref()
25        .and_then(|h| h.shorthand())
26        .map(|s| s.to_string());
27
28    let commit_sha = head
29        .and_then(|h| h.peel_to_commit().ok())
30        .map(|c| c.id().to_string());
31
32    let remote_url = repo
33        .find_remote("origin")
34        .ok()
35        .and_then(|r| r.url().map(|s| s.to_string()));
36
37    let workdir = repo
38        .workdir()
39        .map(|p| p.to_string_lossy().to_string())
40        .unwrap_or_default();
41
42    Ok(RepoInfo {
43        path: workdir,
44        branch,
45        commit_sha,
46        remote_url,
47    })
48}
49
50/// Information about a git repository.
51///
52/// Contains the current state of a repository including branch,
53/// HEAD commit, and remote URL.
54#[derive(Debug)]
55pub struct RepoInfo {
56    /// Absolute path to the repository working directory.
57    /// Currently used for session filtering by working directory.
58    #[allow(dead_code)]
59    pub path: String,
60    /// Current branch name, if HEAD points to a branch.
61    /// Used for branch-based session matching in auto-linking.
62    #[allow(dead_code)]
63    pub branch: Option<String>,
64    /// SHA of the current HEAD commit.
65    pub commit_sha: Option<String>,
66    /// URL of the "origin" remote, if configured.
67    /// Reserved for future remote-based features.
68    #[allow(dead_code)]
69    pub remote_url: Option<String>,
70}
71
72/// Information about a specific git commit.
73///
74/// Contains the SHA, timestamp, branch, and author information.
75#[derive(Debug)]
76pub struct CommitInfo {
77    /// Full SHA of the commit.
78    pub sha: String,
79    /// When the commit was authored.
80    pub timestamp: DateTime<Utc>,
81    /// Branch name the commit is on (if determinable).
82    pub branch: Option<String>,
83    /// Commit message summary (first line).
84    pub summary: String,
85}
86
87/// Calculates a confidence score for auto-linking a session to a commit.
88///
89/// The score is based on multiple factors:
90/// - Branch match (20%): Session and commit are on the same branch
91/// - File overlap (40%): Proportion of commit files mentioned in the session
92/// - Time proximity (30%): Decays over 30 minutes
93/// - Recent activity bonus (10%): Extra weight for commits within 5 minutes
94///
95/// Returns a value between 0.0 and 1.0.
96pub fn calculate_link_confidence(
97    session_branch: Option<&str>,
98    session_files: &[String],
99    commit_branch: &str,
100    commit_files: &[String],
101    time_diff_minutes: i64,
102) -> f64 {
103    let mut score = 0.0;
104
105    // Branch match
106    if session_branch == Some(commit_branch) {
107        score += 0.2;
108    }
109
110    // File overlap
111    let overlap = session_files
112        .iter()
113        .filter(|f| commit_files.contains(f))
114        .count();
115
116    if overlap > 0 {
117        let overlap_ratio = overlap as f64 / commit_files.len().max(1) as f64;
118        score += 0.4 * overlap_ratio;
119    }
120
121    // Time proximity (decay over 30 minutes)
122    if time_diff_minutes < 30 {
123        score += 0.3 * (1.0 - (time_diff_minutes as f64 / 30.0));
124    }
125
126    // Recent activity bonus
127    if time_diff_minutes < 5 {
128        score += 0.1;
129    }
130
131    score.min(1.0)
132}
133
134/// Retrieves information about a specific commit.
135///
136/// Resolves the commit reference (SHA, HEAD, branch name, etc.) and returns
137/// details including timestamp, branch, and summary.
138///
139/// # Errors
140///
141/// Returns an error if the repository cannot be found or the commit
142/// reference cannot be resolved.
143pub fn get_commit_info(repo_path: &Path, commit_ref: &str) -> Result<CommitInfo> {
144    let repo = git2::Repository::discover(repo_path).context("Not a git repository")?;
145
146    // Resolve the reference to a commit
147    let obj = repo
148        .revparse_single(commit_ref)
149        .with_context(|| format!("Could not resolve commit reference: {commit_ref}"))?;
150
151    let commit = obj
152        .peel_to_commit()
153        .with_context(|| format!("Reference is not a commit: {commit_ref}"))?;
154
155    let sha = commit.id().to_string();
156
157    // Convert git timestamp to chrono DateTime
158    let git_time = commit.time();
159    let timestamp = Utc
160        .timestamp_opt(git_time.seconds(), 0)
161        .single()
162        .unwrap_or_else(Utc::now);
163
164    // Try to get the branch name (check if HEAD points to this commit)
165    let branch = repo.head().ok().and_then(|h| {
166        if h.peel_to_commit().ok()?.id() == commit.id() {
167            h.shorthand().map(|s| s.to_string())
168        } else {
169            None
170        }
171    });
172
173    let summary = commit.summary().unwrap_or("").to_string();
174
175    Ok(CommitInfo {
176        sha,
177        timestamp,
178        branch,
179        summary,
180    })
181}
182
183/// Resolves a git reference (SHA, HEAD, branch name, etc.) to a full commit SHA.
184///
185/// Supports:
186/// - Full and partial SHAs
187/// - HEAD and HEAD~N syntax
188/// - Branch names
189/// - Tag names
190///
191/// # Arguments
192///
193/// * `repo_path` - A path inside the git repository
194/// * `reference` - The git reference to resolve (SHA, HEAD, branch, tag, etc.)
195///
196/// # Errors
197///
198/// Returns an error if the repository cannot be found or the reference
199/// cannot be resolved to a valid commit.
200pub fn resolve_commit_ref(repo_path: &Path, reference: &str) -> Result<String> {
201    let repo = git2::Repository::discover(repo_path).context("Not a git repository")?;
202
203    // Resolve the reference to a commit
204    let obj = repo
205        .revparse_single(reference)
206        .with_context(|| format!("Could not resolve reference: {reference}"))?;
207
208    let commit = obj
209        .peel_to_commit()
210        .with_context(|| format!("Reference is not a commit: {reference}"))?;
211
212    Ok(commit.id().to_string())
213}
214
215/// Retrieves the list of files changed in a commit.
216///
217/// Returns the file paths relative to the repository root for all files
218/// that were added, modified, or deleted in the commit.
219///
220/// # Errors
221///
222/// Returns an error if the repository cannot be found or the commit
223/// reference cannot be resolved.
224pub fn get_commit_files(repo_path: &Path, commit_ref: &str) -> Result<Vec<String>> {
225    let repo = git2::Repository::discover(repo_path).context("Not a git repository")?;
226
227    // Resolve the reference to a commit
228    let obj = repo
229        .revparse_single(commit_ref)
230        .with_context(|| format!("Could not resolve commit reference: {commit_ref}"))?;
231
232    let commit = obj
233        .peel_to_commit()
234        .with_context(|| format!("Reference is not a commit: {commit_ref}"))?;
235
236    let tree = commit.tree().context("Could not get commit tree")?;
237
238    // Get the parent tree (or empty tree for initial commit)
239    let parent_tree = commit.parent(0).ok().and_then(|p| p.tree().ok());
240
241    let diff = repo
242        .diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)
243        .context("Could not compute diff")?;
244
245    let mut files = Vec::new();
246
247    diff.foreach(
248        &mut |delta, _| {
249            // Get the new file path (or old path for deletions)
250            let path = delta.new_file().path().or_else(|| delta.old_file().path());
251
252            if let Some(p) = path {
253                files.push(p.to_string_lossy().to_string());
254            }
255            true
256        },
257        None,
258        None,
259        None,
260    )
261    .context("Could not iterate diff")?;
262
263    Ok(files)
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269
270    #[test]
271    fn test_calculate_link_confidence_full_match() {
272        let session_files = vec!["src/main.rs".to_string(), "src/lib.rs".to_string()];
273        let commit_files = vec!["src/main.rs".to_string(), "src/lib.rs".to_string()];
274
275        let score = calculate_link_confidence(
276            Some("main"),
277            &session_files,
278            "main",
279            &commit_files,
280            2, // 2 minutes ago
281        );
282
283        // Branch match: 0.2
284        // File overlap: 0.4 (100% overlap)
285        // Time proximity: 0.3 * (1 - 2/30) = 0.28
286        // Recent bonus: 0.1 (within 5 min)
287        // Total: 0.98
288        assert!(
289            score > 0.9,
290            "Full match should have high confidence: {score}"
291        );
292    }
293
294    #[test]
295    fn test_calculate_link_confidence_no_match() {
296        let session_files = vec!["other.rs".to_string()];
297        let commit_files = vec!["src/main.rs".to_string()];
298
299        let score = calculate_link_confidence(
300            Some("feature"),
301            &session_files,
302            "main",
303            &commit_files,
304            60, // 60 minutes ago
305        );
306
307        // Branch match: 0 (different)
308        // File overlap: 0 (no overlap)
309        // Time proximity: 0 (> 30 min)
310        // Recent bonus: 0 (> 5 min)
311        // Total: 0
312        assert!(score < 0.1, "No match should have low confidence: {score}");
313    }
314
315    #[test]
316    fn test_calculate_link_confidence_partial_overlap() {
317        let session_files = vec![
318            "src/main.rs".to_string(),
319            "src/lib.rs".to_string(),
320            "other.rs".to_string(),
321        ];
322        let commit_files = vec!["src/main.rs".to_string(), "src/lib.rs".to_string()];
323
324        let score = calculate_link_confidence(
325            Some("main"),
326            &session_files,
327            "main",
328            &commit_files,
329            15, // 15 minutes ago
330        );
331
332        // Branch match: 0.2
333        // File overlap: 0.4 (100% of commit files are in session files)
334        // Time proximity: 0.3 * (1 - 15/30) = 0.15
335        // Recent bonus: 0 (> 5 min)
336        // Total: 0.75
337        assert!(
338            score > 0.7 && score < 0.8,
339            "Partial match should have medium-high confidence: {score}"
340        );
341    }
342
343    #[test]
344    fn test_calculate_link_confidence_time_decay() {
345        let session_files = vec!["src/main.rs".to_string()];
346        let commit_files = vec!["src/main.rs".to_string()];
347
348        let score_recent =
349            calculate_link_confidence(Some("main"), &session_files, "main", &commit_files, 1);
350
351        let score_old =
352            calculate_link_confidence(Some("main"), &session_files, "main", &commit_files, 25);
353
354        assert!(
355            score_recent > score_old,
356            "Recent commits should score higher: {score_recent} vs {score_old}"
357        );
358    }
359
360    #[test]
361    fn test_calculate_link_confidence_caps_at_one() {
362        let session_files = vec!["a.rs".to_string(), "b.rs".to_string()];
363        let commit_files = vec!["a.rs".to_string()];
364
365        let score =
366            calculate_link_confidence(Some("main"), &session_files, "main", &commit_files, 0);
367
368        assert!(score <= 1.0, "Score should be capped at 1.0: {score}");
369    }
370
371    #[test]
372    fn test_calculate_link_confidence_empty_files() {
373        let session_files: Vec<String> = vec![];
374        let commit_files: Vec<String> = vec![];
375
376        let score =
377            calculate_link_confidence(Some("main"), &session_files, "main", &commit_files, 5);
378
379        // Should not panic and should give branch + time score
380        assert!(score > 0.0, "Should handle empty files gracefully: {score}");
381    }
382
383    // ==================== resolve_commit_ref Tests ====================
384
385    #[test]
386    fn test_resolve_commit_ref_with_head() {
387        // This test runs in the lore repository itself
388        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
389
390        // HEAD should always resolve to a valid SHA
391        let result = resolve_commit_ref(repo_path, "HEAD");
392        assert!(result.is_ok(), "HEAD should resolve: {:?}", result.err());
393
394        let sha = result.unwrap();
395        // SHA should be 40 hex characters
396        assert_eq!(sha.len(), 40, "SHA should be 40 characters: {sha}");
397        assert!(
398            sha.chars().all(|c| c.is_ascii_hexdigit()),
399            "SHA should be hex: {sha}"
400        );
401    }
402
403    #[test]
404    fn test_resolve_commit_ref_with_head_tilde() {
405        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
406
407        // HEAD~1 should resolve if there are at least 2 commits
408        // This may fail in a fresh repo with only one commit
409        let result = resolve_commit_ref(repo_path, "HEAD~1");
410
411        // If the repo has multiple commits, this should succeed
412        if let Ok(sha) = result {
413            assert_eq!(sha.len(), 40, "SHA should be 40 characters");
414
415            // Should be different from HEAD
416            let head_sha = resolve_commit_ref(repo_path, "HEAD").unwrap();
417            assert_ne!(sha, head_sha, "HEAD~1 should differ from HEAD");
418        }
419        // If it fails, that's acceptable for a repo with one commit
420    }
421
422    #[test]
423    fn test_resolve_commit_ref_with_full_sha() {
424        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
425
426        // First get HEAD's SHA
427        let head_sha = resolve_commit_ref(repo_path, "HEAD").unwrap();
428
429        // Now resolve using the full SHA
430        let result = resolve_commit_ref(repo_path, &head_sha);
431        assert!(
432            result.is_ok(),
433            "Full SHA should resolve: {:?}",
434            result.err()
435        );
436
437        let resolved = result.unwrap();
438        assert_eq!(resolved, head_sha, "Resolved SHA should match input");
439    }
440
441    #[test]
442    fn test_resolve_commit_ref_with_partial_sha() {
443        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
444
445        // First get HEAD's SHA
446        let head_sha = resolve_commit_ref(repo_path, "HEAD").unwrap();
447
448        // Try resolving with first 7 characters (common short SHA length)
449        let short_sha = &head_sha[..7];
450        let result = resolve_commit_ref(repo_path, short_sha);
451        assert!(
452            result.is_ok(),
453            "Partial SHA should resolve: {:?}",
454            result.err()
455        );
456
457        let resolved = result.unwrap();
458        assert_eq!(resolved, head_sha, "Resolved SHA should be full SHA");
459    }
460
461    #[test]
462    fn test_resolve_commit_ref_invalid_reference() {
463        let repo_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
464
465        // This reference should not exist
466        let result = resolve_commit_ref(repo_path, "nonexistent-branch-xyz123");
467        assert!(result.is_err(), "Invalid reference should fail");
468    }
469
470    #[test]
471    fn test_resolve_commit_ref_not_a_repo() {
472        // /tmp should not be a git repository
473        let result = resolve_commit_ref(std::path::Path::new("/tmp"), "HEAD");
474        assert!(result.is_err(), "Non-repo path should fail");
475    }
476}