infiniloom_engine/
git.rs

1//! Git integration for diff/log analysis
2//!
3//! Provides integration with Git for:
4//! - Getting changed files between commits
5//! - Extracting commit history
6//! - Blame information for file importance
7
8use std::path::Path;
9use std::process::Command;
10use thiserror::Error;
11
12/// Git repository wrapper
13pub struct GitRepo {
14    path: String,
15}
16
17/// A git commit entry
18#[derive(Debug, Clone)]
19pub struct Commit {
20    pub hash: String,
21    pub short_hash: String,
22    pub author: String,
23    pub email: String,
24    pub date: String,
25    pub message: String,
26}
27
28/// A file changed in a commit
29#[derive(Debug, Clone)]
30pub struct ChangedFile {
31    /// Current path (or new path for renames)
32    pub path: String,
33    /// Original path for renamed/copied files (None for add/modify/delete)
34    pub old_path: Option<String>,
35    pub status: FileStatus,
36    pub additions: u32,
37    pub deletions: u32,
38}
39
40/// File change status
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum FileStatus {
43    Added,
44    Modified,
45    Deleted,
46    Renamed,
47    Copied,
48    Unknown,
49}
50
51impl FileStatus {
52    fn from_char(c: char) -> Self {
53        match c {
54            'A' => Self::Added,
55            'M' => Self::Modified,
56            'D' => Self::Deleted,
57            'R' => Self::Renamed,
58            'C' => Self::Copied,
59            _ => Self::Unknown,
60        }
61    }
62}
63
64/// Blame entry for a line
65#[derive(Debug, Clone)]
66pub struct BlameLine {
67    pub commit: String,
68    pub author: String,
69    pub date: String,
70    pub line_number: u32,
71}
72
73/// Git errors
74#[derive(Debug, Error)]
75pub enum GitError {
76    #[error("Not a git repository")]
77    NotAGitRepo,
78    #[error("Git command failed: {0}")]
79    CommandFailed(String),
80    #[error("Parse error: {0}")]
81    ParseError(String),
82}
83
84impl GitRepo {
85    /// Open a git repository
86    pub fn open(path: &Path) -> Result<Self, GitError> {
87        let git_dir = path.join(".git");
88        if !git_dir.exists() {
89            return Err(GitError::NotAGitRepo);
90        }
91
92        Ok(Self { path: path.to_string_lossy().to_string() })
93    }
94
95    /// Check if path is a git repository
96    pub fn is_git_repo(path: &Path) -> bool {
97        path.join(".git").exists()
98    }
99
100    /// Get current branch name
101    pub fn current_branch(&self) -> Result<String, GitError> {
102        let output = self.run_git(&["rev-parse", "--abbrev-ref", "HEAD"])?;
103        Ok(output.trim().to_owned())
104    }
105
106    /// Get current commit hash
107    pub fn current_commit(&self) -> Result<String, GitError> {
108        let output = self.run_git(&["rev-parse", "HEAD"])?;
109        Ok(output.trim().to_owned())
110    }
111
112    /// Get short commit hash
113    pub fn short_hash(&self, commit: &str) -> Result<String, GitError> {
114        let output = self.run_git(&["rev-parse", "--short", commit])?;
115        Ok(output.trim().to_owned())
116    }
117
118    /// Get files changed between two commits
119    pub fn diff_files(&self, from: &str, to: &str) -> Result<Vec<ChangedFile>, GitError> {
120        // First get file status with --name-status (shows A/M/D/R/C status)
121        let status_output = self.run_git(&["diff", "--name-status", from, to])?;
122
123        // Then get line counts with --numstat (shows additions/deletions)
124        let numstat_output = self.run_git(&["diff", "--numstat", from, to])?;
125
126        // Build a map of path -> (additions, deletions) from numstat
127        let mut stats: std::collections::HashMap<String, (u32, u32)> =
128            std::collections::HashMap::new();
129        for line in numstat_output.lines() {
130            if line.is_empty() {
131                continue;
132            }
133            let parts: Vec<&str> = line.split('\t').collect();
134            if parts.len() >= 3 {
135                // numstat format: additions<TAB>deletions<TAB>path
136                // Binary files show "-" for additions/deletions
137                let add = parts[0].parse::<u32>().unwrap_or(0);
138                let del = parts[1].parse::<u32>().unwrap_or(0);
139                let path = parts[2..].join("\t");
140                stats.insert(path, (add, del));
141            }
142        }
143
144        let mut files = Vec::new();
145
146        // Parse name-status output
147        for line in status_output.lines() {
148            if line.is_empty() {
149                continue;
150            }
151
152            let parts: Vec<&str> = line.split('\t').collect();
153            if parts.is_empty() {
154                continue;
155            }
156
157            let status_str = parts[0];
158            let first_char = status_str.chars().next().unwrap_or(' ');
159            let status = FileStatus::from_char(first_char);
160
161            // Handle renamed/copied files: R100 or C100 followed by old_path and new_path
162            let (path, old_path) = if (first_char == 'R' || first_char == 'C') && parts.len() >= 3 {
163                // For renames: parts[1] = old_path, parts[2] = new_path
164                (parts[2].to_owned(), Some(parts[1].to_owned()))
165            } else if parts.len() >= 2 {
166                // For other statuses: parts[1] is the path
167                (parts[1].to_owned(), None)
168            } else {
169                continue;
170            };
171
172            // Look up line statistics
173            let (additions, deletions) = stats.get(&path).copied().unwrap_or((0, 0));
174
175            files.push(ChangedFile { path, old_path, status, additions, deletions });
176        }
177
178        Ok(files)
179    }
180
181    /// Get files changed in working tree
182    ///
183    /// Returns both staged and unstaged changes. For renames, the `old_path`
184    /// field contains the original filename.
185    pub fn status(&self) -> Result<Vec<ChangedFile>, GitError> {
186        let output = self.run_git(&["status", "--porcelain"])?;
187
188        let mut files = Vec::new();
189
190        for line in output.lines() {
191            if line.len() < 3 {
192                continue;
193            }
194
195            // Git status --porcelain format: XY filename
196            // X = staged status, Y = unstaged status
197            let staged_char = line.chars().next().unwrap_or(' ');
198            let unstaged_char = line.chars().nth(1).unwrap_or(' ');
199            let path_part = &line[3..];
200
201            // Determine the effective status (prefer staged, then unstaged)
202            let (status, status_char) = if staged_char != ' ' && staged_char != '?' {
203                // Has staged changes
204                (
205                    match staged_char {
206                        'A' => FileStatus::Added,
207                        'M' => FileStatus::Modified,
208                        'D' => FileStatus::Deleted,
209                        'R' => FileStatus::Renamed,
210                        'C' => FileStatus::Copied,
211                        _ => FileStatus::Unknown,
212                    },
213                    staged_char,
214                )
215            } else {
216                // Only unstaged changes
217                (
218                    match unstaged_char {
219                        '?' | 'A' => FileStatus::Added,
220                        'M' => FileStatus::Modified,
221                        'D' => FileStatus::Deleted,
222                        'R' => FileStatus::Renamed,
223                        _ => FileStatus::Unknown,
224                    },
225                    unstaged_char,
226                )
227            };
228
229            // Handle renames: format is "old_path -> new_path"
230            let (path, old_path) = if status_char == 'R' || status_char == 'C' {
231                if let Some(arrow_pos) = path_part.find(" -> ") {
232                    let old = path_part[..arrow_pos].to_owned();
233                    let new = path_part[arrow_pos + 4..].to_owned();
234                    (new, Some(old))
235                } else {
236                    (path_part.to_owned(), None)
237                }
238            } else {
239                (path_part.to_owned(), None)
240            };
241
242            files.push(ChangedFile { path, old_path, status, additions: 0, deletions: 0 });
243        }
244
245        Ok(files)
246    }
247
248    /// Get recent commits
249    pub fn log(&self, count: usize) -> Result<Vec<Commit>, GitError> {
250        let output = self.run_git(&[
251            "log",
252            &format!("-{}", count),
253            "--format=%H%n%h%n%an%n%ae%n%ad%n%s%n---COMMIT---",
254            "--date=short",
255        ])?;
256
257        let mut commits = Vec::new();
258        let mut lines = output.lines().peekable();
259
260        while lines.peek().is_some() {
261            let hash = lines.next().unwrap_or("").to_owned();
262            if hash.is_empty() {
263                continue;
264            }
265
266            let short_hash = lines.next().unwrap_or("").to_owned();
267            let author = lines.next().unwrap_or("").to_owned();
268            let email = lines.next().unwrap_or("").to_owned();
269            let date = lines.next().unwrap_or("").to_owned();
270            let message = lines.next().unwrap_or("").to_owned();
271
272            // Skip separator
273            while lines.peek().map(|l| *l != "---COMMIT---").unwrap_or(false) {
274                lines.next();
275            }
276            lines.next(); // Skip the separator
277
278            commits.push(Commit { hash, short_hash, author, email, date, message });
279        }
280
281        Ok(commits)
282    }
283
284    /// Get commits that modified a specific file
285    pub fn file_log(&self, path: &str, count: usize) -> Result<Vec<Commit>, GitError> {
286        let output = self.run_git(&[
287            "log",
288            &format!("-{}", count),
289            "--format=%H%n%h%n%an%n%ae%n%ad%n%s%n---COMMIT---",
290            "--date=short",
291            "--follow",
292            "--",
293            path,
294        ])?;
295
296        let mut commits = Vec::new();
297        let commit_blocks: Vec<&str> = output.split("---COMMIT---").collect();
298
299        for block in commit_blocks {
300            let lines: Vec<&str> = block.lines().filter(|l| !l.is_empty()).collect();
301            if lines.len() < 6 {
302                continue;
303            }
304
305            commits.push(Commit {
306                hash: lines[0].to_owned(),
307                short_hash: lines[1].to_owned(),
308                author: lines[2].to_owned(),
309                email: lines[3].to_owned(),
310                date: lines[4].to_owned(),
311                message: lines[5].to_owned(),
312            });
313        }
314
315        Ok(commits)
316    }
317
318    /// Get blame information for a file
319    pub fn blame(&self, path: &str) -> Result<Vec<BlameLine>, GitError> {
320        let output = self.run_git(&["blame", "--porcelain", path])?;
321
322        let mut lines = Vec::new();
323        let mut current_commit = String::new();
324        let mut current_author = String::new();
325        let mut current_date = String::new();
326        let mut line_number = 0u32;
327
328        for line in output.lines() {
329            if line.starts_with('\t') {
330                // This is the actual line content, create blame entry
331                lines.push(BlameLine {
332                    commit: current_commit.clone(),
333                    author: current_author.clone(),
334                    date: current_date.clone(),
335                    line_number,
336                });
337            } else if line.len() >= 40 && line.chars().take(40).all(|c| c.is_ascii_hexdigit()) {
338                // New commit hash line
339                let parts: Vec<&str> = line.split_whitespace().collect();
340                if !parts.is_empty() {
341                    current_commit = parts[0][..8.min(parts[0].len())].to_string();
342                    if parts.len() >= 3 {
343                        line_number = parts[2].parse().unwrap_or(0);
344                    }
345                }
346            } else if let Some(author) = line.strip_prefix("author ") {
347                current_author = author.to_owned();
348            } else if let Some(time) = line.strip_prefix("author-time ") {
349                // Convert Unix timestamp to date
350                if let Ok(ts) = time.parse::<i64>() {
351                    current_date = format_timestamp(ts);
352                }
353            }
354        }
355
356        Ok(lines)
357    }
358
359    /// Get list of files tracked by git
360    pub fn ls_files(&self) -> Result<Vec<String>, GitError> {
361        let output = self.run_git(&["ls-files"])?;
362        Ok(output.lines().map(String::from).collect())
363    }
364
365    /// Get diff content between two commits for a file
366    pub fn diff_content(&self, from: &str, to: &str, path: &str) -> Result<String, GitError> {
367        self.run_git(&["diff", from, to, "--", path])
368    }
369
370    /// Get diff content for uncommitted changes (working tree vs HEAD)
371    /// Includes both staged and unstaged changes.
372    pub fn uncommitted_diff(&self, path: &str) -> Result<String, GitError> {
373        // Get both staged and unstaged changes combined
374        self.run_git(&["diff", "HEAD", "--", path])
375    }
376
377    /// Get diff content for all uncommitted changes
378    /// Returns combined diff for all changed files.
379    pub fn all_uncommitted_diffs(&self) -> Result<String, GitError> {
380        self.run_git(&["diff", "HEAD"])
381    }
382
383    /// Check if a file has uncommitted changes
384    pub fn has_changes(&self, path: &str) -> Result<bool, GitError> {
385        let output = self.run_git(&["status", "--porcelain", "--", path])?;
386        Ok(!output.trim().is_empty())
387    }
388
389    /// Get the commit where a file was last modified
390    pub fn last_modified_commit(&self, path: &str) -> Result<Commit, GitError> {
391        let commits = self.file_log(path, 1)?;
392        commits
393            .into_iter()
394            .next()
395            .ok_or_else(|| GitError::ParseError("No commits found".to_owned()))
396    }
397
398    /// Calculate file importance based on recent changes
399    pub fn file_change_frequency(&self, path: &str, days: u32) -> Result<u32, GitError> {
400        let output = self.run_git(&[
401            "log",
402            &format!("--since={} days ago", days),
403            "--oneline",
404            "--follow",
405            "--",
406            path,
407        ])?;
408
409        Ok(output.lines().count() as u32)
410    }
411
412    /// Run a git command and return output
413    fn run_git(&self, args: &[&str]) -> Result<String, GitError> {
414        let output = Command::new("git")
415            .current_dir(&self.path)
416            .args(args)
417            .output()
418            .map_err(|e| GitError::CommandFailed(e.to_string()))?;
419
420        if !output.status.success() {
421            let stderr = String::from_utf8_lossy(&output.stderr);
422            return Err(GitError::CommandFailed(stderr.to_string()));
423        }
424
425        String::from_utf8(output.stdout).map_err(|e| GitError::ParseError(e.to_string()))
426    }
427}
428
429/// Format Unix timestamp as YYYY-MM-DD
430fn format_timestamp(ts: i64) -> String {
431    // Simple formatting without chrono
432    let secs_per_day = 86400;
433    let days_since_epoch = ts / secs_per_day;
434
435    // Approximate calculation (doesn't account for leap seconds)
436    let mut year = 1970;
437    let mut remaining_days = days_since_epoch;
438
439    loop {
440        let days_in_year = if is_leap_year(year) { 366 } else { 365 };
441        if remaining_days < days_in_year {
442            break;
443        }
444        remaining_days -= days_in_year;
445        year += 1;
446    }
447
448    let days_in_months = if is_leap_year(year) {
449        [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
450    } else {
451        [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
452    };
453
454    let mut month = 1;
455    for days in days_in_months {
456        if remaining_days < days {
457            break;
458        }
459        remaining_days -= days;
460        month += 1;
461    }
462
463    let day = remaining_days + 1;
464
465    format!("{:04}-{:02}-{:02}", year, month, day)
466}
467
468fn is_leap_year(year: i64) -> bool {
469    (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0)
470}
471
472#[cfg(test)]
473#[allow(clippy::str_to_string)]
474mod tests {
475    use super::*;
476    use std::process::Command;
477    use tempfile::TempDir;
478
479    fn init_test_repo() -> TempDir {
480        let temp = TempDir::new().unwrap();
481
482        // Initialize git repo
483        Command::new("git")
484            .current_dir(temp.path())
485            .args(["init"])
486            .output()
487            .unwrap();
488
489        // Configure git
490        Command::new("git")
491            .current_dir(temp.path())
492            .args(["config", "user.email", "test@test.com"])
493            .output()
494            .unwrap();
495
496        Command::new("git")
497            .current_dir(temp.path())
498            .args(["config", "user.name", "Test"])
499            .output()
500            .unwrap();
501
502        // Create a file and commit
503        std::fs::write(temp.path().join("test.txt"), "hello").unwrap();
504
505        Command::new("git")
506            .current_dir(temp.path())
507            .args(["add", "."])
508            .output()
509            .unwrap();
510
511        Command::new("git")
512            .current_dir(temp.path())
513            .args(["commit", "-m", "Initial commit"])
514            .output()
515            .unwrap();
516
517        temp
518    }
519
520    #[test]
521    fn test_open_repo() {
522        let temp = init_test_repo();
523        let repo = GitRepo::open(temp.path());
524        assert!(repo.is_ok());
525    }
526
527    #[test]
528    fn test_not_a_repo() {
529        let temp = TempDir::new().unwrap();
530        let repo = GitRepo::open(temp.path());
531        assert!(matches!(repo, Err(GitError::NotAGitRepo)));
532    }
533
534    #[test]
535    fn test_current_branch() {
536        let temp = init_test_repo();
537        let repo = GitRepo::open(temp.path()).unwrap();
538        let branch = repo.current_branch().unwrap();
539        // Branch could be "main" or "master" depending on git config
540        assert!(!branch.is_empty());
541    }
542
543    #[test]
544    fn test_log() {
545        let temp = init_test_repo();
546        let repo = GitRepo::open(temp.path()).unwrap();
547        let commits = repo.log(10).unwrap();
548        assert!(!commits.is_empty());
549        assert_eq!(commits[0].message, "Initial commit");
550    }
551
552    #[test]
553    fn test_ls_files() {
554        let temp = init_test_repo();
555        let repo = GitRepo::open(temp.path()).unwrap();
556        let files = repo.ls_files().unwrap();
557        assert!(files.contains(&"test.txt".to_string()));
558    }
559
560    #[test]
561    fn test_format_timestamp() {
562        // 2024-01-01 00:00:00 UTC
563        let ts = 1704067200;
564        let date = format_timestamp(ts);
565        assert_eq!(date, "2024-01-01");
566    }
567}