Skip to main content

infiniloom_engine/
git.rs

1//! Git integration for diff/log analysis
2//!
3//! Provides integration with Git for:
4//! - Getting changed files between commits
5//! - Extracting commit history
6//! - Blame information for file importance
7
8use std::path::Path;
9use std::process::{Command, Stdio};
10use std::time::{Duration, Instant};
11use thiserror::Error;
12
13/// Default timeout for git operations (30 seconds)
14const DEFAULT_GIT_TIMEOUT: Duration = Duration::from_secs(30);
15
16/// Git repository wrapper
17pub struct GitRepo {
18    path: String,
19    timeout: Duration,
20}
21
22/// A git commit entry
23#[derive(Debug, Clone)]
24pub struct Commit {
25    pub hash: String,
26    pub short_hash: String,
27    pub author: String,
28    pub email: String,
29    pub date: String,
30    pub message: String,
31}
32
33/// A file changed in a commit
34#[derive(Debug, Clone)]
35pub struct ChangedFile {
36    /// Current path (or new path for renames)
37    pub path: String,
38    /// Original path for renamed/copied files (None for add/modify/delete)
39    pub old_path: Option<String>,
40    pub status: FileStatus,
41    pub additions: u32,
42    pub deletions: u32,
43}
44
45/// File change status
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub enum FileStatus {
48    Added,
49    Modified,
50    Deleted,
51    Renamed,
52    Copied,
53    Unknown,
54}
55
56impl FileStatus {
57    fn from_char(c: char) -> Self {
58        match c {
59            'A' => Self::Added,
60            'M' => Self::Modified,
61            'D' => Self::Deleted,
62            'R' => Self::Renamed,
63            'C' => Self::Copied,
64            _ => Self::Unknown,
65        }
66    }
67}
68
69/// Blame entry for a line
70#[derive(Debug, Clone)]
71pub struct BlameLine {
72    pub commit: String,
73    pub author: String,
74    pub date: String,
75    pub line_number: u32,
76}
77
78/// Type of line change in a diff
79#[derive(Debug, Clone, Copy, PartialEq, Eq)]
80pub enum DiffLineType {
81    /// Line was added
82    Add,
83    /// Line was removed
84    Remove,
85    /// Context line (unchanged)
86    Context,
87}
88
89impl DiffLineType {
90    /// Get string representation
91    pub fn as_str(&self) -> &'static str {
92        match self {
93            Self::Add => "add",
94            Self::Remove => "remove",
95            Self::Context => "context",
96        }
97    }
98}
99
100/// A single line change within a diff hunk
101#[derive(Debug, Clone)]
102pub struct DiffLine {
103    /// Type of change: add, remove, or context
104    pub change_type: DiffLineType,
105    /// Line number in the old file (None for additions)
106    pub old_line: Option<u32>,
107    /// Line number in the new file (None for deletions)
108    pub new_line: Option<u32>,
109    /// The actual line content (without +/- prefix)
110    pub content: String,
111}
112
113/// A diff hunk representing a contiguous block of changes
114#[derive(Debug, Clone)]
115pub struct DiffHunk {
116    /// File path this hunk belongs to (relative to repo root)
117    pub file: String,
118    /// Starting line in the old file
119    pub old_start: u32,
120    /// Number of lines in the old file section
121    pub old_count: u32,
122    /// Starting line in the new file
123    pub new_start: u32,
124    /// Number of lines in the new file section
125    pub new_count: u32,
126    /// Header line (e.g., "@@ -1,5 +1,7 @@ function name")
127    pub header: String,
128    /// Individual line changes within this hunk
129    pub lines: Vec<DiffLine>,
130}
131
132/// Git errors
133#[derive(Debug, Error)]
134pub enum GitError {
135    #[error("Not a git repository")]
136    NotAGitRepo,
137    #[error("Git command failed: {0}")]
138    CommandFailed(String),
139    #[error("Git command timed out after {0:?}: {1}")]
140    Timeout(Duration, String),
141    #[error("Parse error: {0}")]
142    ParseError(String),
143}
144
145impl GitRepo {
146    /// Open a git repository with the default timeout (30 seconds)
147    pub fn open(path: &Path) -> Result<Self, GitError> {
148        let git_dir = path.join(".git");
149        if !git_dir.exists() {
150            return Err(GitError::NotAGitRepo);
151        }
152
153        Ok(Self { path: path.to_string_lossy().to_string(), timeout: DEFAULT_GIT_TIMEOUT })
154    }
155
156    /// Set a custom timeout for git operations
157    ///
158    /// Returns `self` for builder-style chaining.
159    ///
160    /// # Example
161    /// ```ignore
162    /// let repo = GitRepo::open(Path::new("."))?.with_timeout(Duration::from_secs(60));
163    /// ```
164    #[must_use]
165    pub fn with_timeout(mut self, timeout: Duration) -> Self {
166        self.timeout = timeout;
167        self
168    }
169
170    /// Check if path is a git repository
171    pub fn is_git_repo(path: &Path) -> bool {
172        path.join(".git").exists()
173    }
174
175    /// Get current branch name
176    pub fn current_branch(&self) -> Result<String, GitError> {
177        let output = self.run_git(&["rev-parse", "--abbrev-ref", "HEAD"])?;
178        Ok(output.trim().to_owned())
179    }
180
181    /// Get current commit hash
182    pub fn current_commit(&self) -> Result<String, GitError> {
183        let output = self.run_git(&["rev-parse", "HEAD"])?;
184        Ok(output.trim().to_owned())
185    }
186
187    /// Get short commit hash
188    pub fn short_hash(&self, commit: &str) -> Result<String, GitError> {
189        let output = self.run_git(&["rev-parse", "--short", commit])?;
190        Ok(output.trim().to_owned())
191    }
192
193    /// Get files changed between two commits
194    pub fn diff_files(&self, from: &str, to: &str) -> Result<Vec<ChangedFile>, GitError> {
195        // First get file status with --name-status (shows A/M/D/R/C status)
196        let status_output = self.run_git(&["diff", "--name-status", from, to])?;
197
198        // Then get line counts with --numstat (shows additions/deletions)
199        let numstat_output = self.run_git(&["diff", "--numstat", from, to])?;
200
201        // Build a map of path -> (additions, deletions) from numstat
202        let mut stats: std::collections::HashMap<String, (u32, u32)> =
203            std::collections::HashMap::new();
204        for line in numstat_output.lines() {
205            if line.is_empty() {
206                continue;
207            }
208            let parts: Vec<&str> = line.split('\t').collect();
209            if parts.len() >= 3 {
210                // numstat format: additions<TAB>deletions<TAB>path
211                // Binary files show "-" for additions/deletions
212                let add = parts[0].parse::<u32>().unwrap_or(0);
213                let del = parts[1].parse::<u32>().unwrap_or(0);
214                let path = parts[2..].join("\t");
215                stats.insert(path, (add, del));
216            }
217        }
218
219        let mut files = Vec::new();
220
221        // Parse name-status output
222        for line in status_output.lines() {
223            if line.is_empty() {
224                continue;
225            }
226
227            let parts: Vec<&str> = line.split('\t').collect();
228            if parts.is_empty() {
229                continue;
230            }
231
232            let status_str = parts[0];
233            let first_char = status_str.chars().next().unwrap_or(' ');
234            let status = FileStatus::from_char(first_char);
235
236            // Handle renamed/copied files: R100 or C100 followed by old_path and new_path
237            let (path, old_path) = if (first_char == 'R' || first_char == 'C') && parts.len() >= 3 {
238                // For renames: parts[1] = old_path, parts[2] = new_path
239                (parts[2].to_owned(), Some(parts[1].to_owned()))
240            } else if parts.len() >= 2 {
241                // For other statuses: parts[1] is the path
242                (parts[1].to_owned(), None)
243            } else {
244                continue;
245            };
246
247            // Look up line statistics
248            let (additions, deletions) = stats.get(&path).copied().unwrap_or((0, 0));
249
250            files.push(ChangedFile { path, old_path, status, additions, deletions });
251        }
252
253        Ok(files)
254    }
255
256    /// Get files changed in working tree
257    ///
258    /// Returns both staged and unstaged changes. For renames, the `old_path`
259    /// field contains the original filename.
260    pub fn status(&self) -> Result<Vec<ChangedFile>, GitError> {
261        let output = self.run_git(&["status", "--porcelain"])?;
262
263        let mut files = Vec::new();
264
265        for line in output.lines() {
266            if line.len() < 3 {
267                continue;
268            }
269
270            // Git status --porcelain format: XY filename
271            // X = staged status, Y = unstaged status
272            let staged_char = line.chars().next().unwrap_or(' ');
273            let unstaged_char = line.chars().nth(1).unwrap_or(' ');
274            let path_part = &line[3..];
275
276            // Determine the effective status (prefer staged, then unstaged)
277            let (status, status_char) = if staged_char != ' ' && staged_char != '?' {
278                // Has staged changes
279                (
280                    match staged_char {
281                        'A' => FileStatus::Added,
282                        'M' => FileStatus::Modified,
283                        'D' => FileStatus::Deleted,
284                        'R' => FileStatus::Renamed,
285                        'C' => FileStatus::Copied,
286                        _ => FileStatus::Unknown,
287                    },
288                    staged_char,
289                )
290            } else {
291                // Only unstaged changes
292                (
293                    match unstaged_char {
294                        '?' | 'A' => FileStatus::Added,
295                        'M' => FileStatus::Modified,
296                        'D' => FileStatus::Deleted,
297                        'R' => FileStatus::Renamed,
298                        _ => FileStatus::Unknown,
299                    },
300                    unstaged_char,
301                )
302            };
303
304            // Handle renames: format is "old_path -> new_path"
305            let (path, old_path) = if status_char == 'R' || status_char == 'C' {
306                if let Some(arrow_pos) = path_part.find(" -> ") {
307                    let old = path_part[..arrow_pos].to_owned();
308                    let new = path_part[arrow_pos + 4..].to_owned();
309                    (new, Some(old))
310                } else {
311                    (path_part.to_owned(), None)
312                }
313            } else {
314                (path_part.to_owned(), None)
315            };
316
317            files.push(ChangedFile { path, old_path, status, additions: 0, deletions: 0 });
318        }
319
320        Ok(files)
321    }
322
323    /// Get recent commits
324    pub fn log(&self, count: usize) -> Result<Vec<Commit>, GitError> {
325        let output = self.run_git(&[
326            "log",
327            &format!("-{}", count),
328            "--format=%H%n%h%n%an%n%ae%n%ad%n%s%n---COMMIT---",
329            "--date=short",
330        ])?;
331
332        let mut commits = Vec::new();
333        let mut lines = output.lines().peekable();
334
335        while lines.peek().is_some() {
336            let hash = lines.next().unwrap_or("").to_owned();
337            if hash.is_empty() {
338                continue;
339            }
340
341            let short_hash = lines.next().unwrap_or("").to_owned();
342            let author = lines.next().unwrap_or("").to_owned();
343            let email = lines.next().unwrap_or("").to_owned();
344            let date = lines.next().unwrap_or("").to_owned();
345            let message = lines.next().unwrap_or("").to_owned();
346
347            // Skip separator
348            while lines.peek().is_some_and(|l| *l != "---COMMIT---") {
349                lines.next();
350            }
351            lines.next(); // Skip the separator
352
353            commits.push(Commit { hash, short_hash, author, email, date, message });
354        }
355
356        Ok(commits)
357    }
358
359    /// Get commits that modified a specific file
360    pub fn file_log(&self, path: &str, count: usize) -> Result<Vec<Commit>, GitError> {
361        let output = self.run_git(&[
362            "log",
363            &format!("-{}", count),
364            "--format=%H%n%h%n%an%n%ae%n%ad%n%s%n---COMMIT---",
365            "--date=short",
366            "--follow",
367            "--",
368            path,
369        ])?;
370
371        let mut commits = Vec::new();
372        let commit_blocks: Vec<&str> = output.split("---COMMIT---").collect();
373
374        for block in commit_blocks {
375            let lines: Vec<&str> = block.lines().filter(|l| !l.is_empty()).collect();
376            if lines.len() < 6 {
377                continue;
378            }
379
380            commits.push(Commit {
381                hash: lines[0].to_owned(),
382                short_hash: lines[1].to_owned(),
383                author: lines[2].to_owned(),
384                email: lines[3].to_owned(),
385                date: lines[4].to_owned(),
386                message: lines[5].to_owned(),
387            });
388        }
389
390        Ok(commits)
391    }
392
393    /// Get blame information for a file
394    pub fn blame(&self, path: &str) -> Result<Vec<BlameLine>, GitError> {
395        let output = self.run_git(&["blame", "--porcelain", path])?;
396
397        let mut lines = Vec::new();
398        let mut current_commit = String::new();
399        let mut current_author = String::new();
400        let mut current_date = String::new();
401        let mut line_number = 0u32;
402
403        for line in output.lines() {
404            if line.starts_with('\t') {
405                // This is the actual line content, create blame entry
406                lines.push(BlameLine {
407                    commit: current_commit.clone(),
408                    author: current_author.clone(),
409                    date: current_date.clone(),
410                    line_number,
411                });
412            } else if line.len() >= 40 && line.chars().take(40).all(|c| c.is_ascii_hexdigit()) {
413                // New commit hash line
414                let parts: Vec<&str> = line.split_whitespace().collect();
415                if !parts.is_empty() {
416                    current_commit = parts[0][..8.min(parts[0].len())].to_string();
417                    if parts.len() >= 3 {
418                        line_number = parts[2].parse().unwrap_or(0);
419                    }
420                }
421            } else if let Some(author) = line.strip_prefix("author ") {
422                current_author = author.to_owned();
423            } else if let Some(time) = line.strip_prefix("author-time ") {
424                // Convert Unix timestamp to date
425                if let Ok(ts) = time.parse::<i64>() {
426                    current_date = format_timestamp(ts);
427                }
428            }
429        }
430
431        Ok(lines)
432    }
433
434    /// Get list of files tracked by git
435    pub fn ls_files(&self) -> Result<Vec<String>, GitError> {
436        let output = self.run_git(&["ls-files"])?;
437        Ok(output.lines().map(String::from).collect())
438    }
439
440    /// Get diff content between two commits for a file
441    pub fn diff_content(&self, from: &str, to: &str, path: &str) -> Result<String, GitError> {
442        self.run_git(&["diff", from, to, "--", path])
443    }
444
445    /// Get diff content for uncommitted changes (working tree vs HEAD)
446    /// Includes both staged and unstaged changes.
447    pub fn uncommitted_diff(&self, path: &str) -> Result<String, GitError> {
448        // Get both staged and unstaged changes combined
449        self.run_git(&["diff", "HEAD", "--", path])
450    }
451
452    /// Get diff content for all uncommitted changes
453    /// Returns combined diff for all changed files.
454    pub fn all_uncommitted_diffs(&self) -> Result<String, GitError> {
455        self.run_git(&["diff", "HEAD"])
456    }
457
458    /// Check if a file has uncommitted changes
459    pub fn has_changes(&self, path: &str) -> Result<bool, GitError> {
460        let output = self.run_git(&["status", "--porcelain", "--", path])?;
461        Ok(!output.trim().is_empty())
462    }
463
464    /// Get the commit where a file was last modified
465    pub fn last_modified_commit(&self, path: &str) -> Result<Commit, GitError> {
466        let commits = self.file_log(path, 1)?;
467        commits
468            .into_iter()
469            .next()
470            .ok_or_else(|| GitError::ParseError("No commits found".to_owned()))
471    }
472
473    /// Calculate file importance based on recent changes
474    pub fn file_change_frequency(&self, path: &str, days: u32) -> Result<u32, GitError> {
475        let output = self.run_git(&[
476            "log",
477            &format!("--since={} days ago", days),
478            "--oneline",
479            "--follow",
480            "--",
481            path,
482        ])?;
483
484        Ok(output.lines().count() as u32)
485    }
486
487    /// Get file content at a specific git ref (commit, branch, tag)
488    ///
489    /// Uses `git show <ref>:<path>` to retrieve file content at that revision.
490    ///
491    /// # Arguments
492    /// * `path` - File path relative to repository root
493    /// * `git_ref` - Git ref (commit hash, branch name, tag, HEAD~n, etc.)
494    ///
495    /// # Returns
496    /// File content as string, or error if file doesn't exist at that ref
497    ///
498    /// # Example
499    /// ```ignore
500    /// let repo = GitRepo::open(Path::new("."))?;
501    /// let content = repo.file_at_ref("src/main.rs", "HEAD~5")?;
502    /// ```
503    pub fn file_at_ref(&self, path: &str, git_ref: &str) -> Result<String, GitError> {
504        self.run_git(&["show", &format!("{}:{}", git_ref, path)])
505    }
506
507    /// Parse diff between two refs into structured hunks
508    ///
509    /// Returns detailed hunk information including line numbers for each change.
510    ///
511    /// # Arguments
512    /// * `from_ref` - Starting ref (e.g., "main", "HEAD~5", commit hash)
513    /// * `to_ref` - Ending ref (e.g., "HEAD", "feature-branch")
514    /// * `path` - Optional file path to filter to a single file
515    ///
516    /// # Returns
517    /// Vec of DiffHunk with structured line-level information
518    pub fn diff_hunks(
519        &self,
520        from_ref: &str,
521        to_ref: &str,
522        path: Option<&str>,
523    ) -> Result<Vec<DiffHunk>, GitError> {
524        let output = match path {
525            Some(p) => self.run_git(&["diff", "-U3", from_ref, to_ref, "--", p])?,
526            None => self.run_git(&["diff", "-U3", from_ref, to_ref])?,
527        };
528
529        parse_diff_hunks(&output)
530    }
531
532    /// Parse uncommitted changes (working tree vs HEAD) into structured hunks
533    ///
534    /// # Arguments
535    /// * `path` - Optional file path to filter to a single file
536    ///
537    /// # Returns
538    /// Vec of DiffHunk for uncommitted changes
539    pub fn uncommitted_hunks(&self, path: Option<&str>) -> Result<Vec<DiffHunk>, GitError> {
540        let output = match path {
541            Some(p) => self.run_git(&["diff", "-U3", "HEAD", "--", p])?,
542            None => self.run_git(&["diff", "-U3", "HEAD"])?,
543        };
544
545        parse_diff_hunks(&output)
546    }
547
548    /// Parse staged changes into structured hunks
549    ///
550    /// # Arguments
551    /// * `path` - Optional file path to filter to a single file
552    ///
553    /// # Returns
554    /// Vec of DiffHunk for staged changes only
555    pub fn staged_hunks(&self, path: Option<&str>) -> Result<Vec<DiffHunk>, GitError> {
556        let output = match path {
557            Some(p) => self.run_git(&["diff", "-U3", "--staged", "--", p])?,
558            None => self.run_git(&["diff", "-U3", "--staged"])?,
559        };
560
561        parse_diff_hunks(&output)
562    }
563
564    /// Run a git command and return output
565    ///
566    /// Uses `spawn()` + `try_wait()` polling with a deadline instead of blocking
567    /// `.output()`, so that hung git processes are killed after `self.timeout`.
568    fn run_git(&self, args: &[&str]) -> Result<String, GitError> {
569        let mut child = Command::new("git")
570            .current_dir(&self.path)
571            .args(args)
572            .stdout(Stdio::piped())
573            .stderr(Stdio::piped())
574            .spawn()
575            .map_err(|e| GitError::CommandFailed(e.to_string()))?;
576
577        let deadline = Instant::now() + self.timeout;
578        loop {
579            match child.try_wait() {
580                Ok(Some(_status)) => {
581                    // Process has exited; collect output
582                    let output = child
583                        .wait_with_output()
584                        .map_err(|e| GitError::CommandFailed(e.to_string()))?;
585                    if !output.status.success() {
586                        let stderr = String::from_utf8_lossy(&output.stderr);
587                        return Err(GitError::CommandFailed(stderr.to_string()));
588                    }
589                    return String::from_utf8(output.stdout)
590                        .map_err(|e| GitError::ParseError(e.to_string()));
591                },
592                Ok(None) => {
593                    // Process still running — check deadline
594                    if Instant::now() > deadline {
595                        let _kill = child.kill();
596                        let _reap = child.wait();
597                        return Err(GitError::Timeout(
598                            self.timeout,
599                            format!("git {}", args.join(" ")),
600                        ));
601                    }
602                    std::thread::sleep(Duration::from_millis(50));
603                },
604                Err(e) => return Err(GitError::CommandFailed(e.to_string())),
605            }
606        }
607    }
608}
609
610/// Format Unix timestamp as YYYY-MM-DD
611fn format_timestamp(ts: i64) -> String {
612    // Simple formatting without chrono
613    let secs_per_day = 86400;
614    let days_since_epoch = ts / secs_per_day;
615
616    // Approximate calculation (doesn't account for leap seconds)
617    let mut year = 1970;
618    let mut remaining_days = days_since_epoch;
619
620    loop {
621        let days_in_year = if is_leap_year(year) { 366 } else { 365 };
622        if remaining_days < days_in_year {
623            break;
624        }
625        remaining_days -= days_in_year;
626        year += 1;
627    }
628
629    let days_in_months = if is_leap_year(year) {
630        [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
631    } else {
632        [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
633    };
634
635    let mut month = 1;
636    for days in days_in_months {
637        if remaining_days < days {
638            break;
639        }
640        remaining_days -= days;
641        month += 1;
642    }
643
644    let day = remaining_days + 1;
645
646    format!("{:04}-{:02}-{:02}", year, month, day)
647}
648
649fn is_leap_year(year: i64) -> bool {
650    (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0)
651}
652
653/// Parse unified diff output into structured hunks
654///
655/// Handles the standard unified diff format with hunk headers like:
656/// `@@ -start,count +start,count @@ optional context`
657fn parse_diff_hunks(diff_output: &str) -> Result<Vec<DiffHunk>, GitError> {
658    let mut hunks = Vec::new();
659    let mut current_hunk: Option<DiffHunk> = None;
660    let mut current_file = String::new();
661    let mut old_line = 0u32;
662    let mut new_line = 0u32;
663
664    for line in diff_output.lines() {
665        // Reset file tracking when we see a new diff header
666        if line.starts_with("diff --git") {
667            // Save previous hunk if exists before starting new file
668            if let Some(hunk) = current_hunk.take() {
669                hunks.push(hunk);
670            }
671            current_file = String::new();
672            continue;
673        }
674        // Track file from "--- a/path" lines (old file path)
675        if let Some(path) = line.strip_prefix("--- a/") {
676            current_file = path.to_owned();
677            continue;
678        }
679        // Track file from "+++ b/path" lines (new file path - prefer this)
680        if let Some(path) = line.strip_prefix("+++ b/") {
681            current_file = path.to_owned();
682            continue;
683        }
684        // Handle /dev/null for new or deleted files
685        if line.starts_with("--- /dev/null") || line.starts_with("+++ /dev/null") {
686            continue;
687        }
688
689        // Check for hunk header: @@ -old_start,old_count +new_start,new_count @@ context
690        if line.starts_with("@@") {
691            // Save previous hunk if exists
692            if let Some(hunk) = current_hunk.take() {
693                hunks.push(hunk);
694            }
695
696            // Parse hunk header
697            if let Some((old_start, old_count, new_start, new_count)) = parse_hunk_header(line) {
698                old_line = old_start;
699                new_line = new_start;
700
701                current_hunk = Some(DiffHunk {
702                    file: current_file.clone(),
703                    old_start,
704                    old_count,
705                    new_start,
706                    new_count,
707                    header: line.to_owned(),
708                    lines: Vec::new(),
709                });
710            }
711        } else if let Some(ref mut hunk) = current_hunk {
712            // Parse line within a hunk
713            if let Some(first_char) = line.chars().next() {
714                let (change_type, content) = match first_char {
715                    '+' => (DiffLineType::Add, line[1..].to_owned()),
716                    '-' => (DiffLineType::Remove, line[1..].to_owned()),
717                    ' ' => (DiffLineType::Context, line[1..].to_owned()),
718                    '\\' => continue, // "\ No newline at end of file"
719                    _ => continue,    // Skip diff headers (diff --git, index, ---, +++)
720                };
721
722                let (old_ln, new_ln) = match change_type {
723                    DiffLineType::Add => {
724                        let nl = new_line;
725                        new_line += 1;
726                        (None, Some(nl))
727                    },
728                    DiffLineType::Remove => {
729                        let ol = old_line;
730                        old_line += 1;
731                        (Some(ol), None)
732                    },
733                    DiffLineType::Context => {
734                        let ol = old_line;
735                        let nl = new_line;
736                        old_line += 1;
737                        new_line += 1;
738                        (Some(ol), Some(nl))
739                    },
740                };
741
742                hunk.lines.push(DiffLine {
743                    change_type,
744                    old_line: old_ln,
745                    new_line: new_ln,
746                    content,
747                });
748            }
749        }
750    }
751
752    // Push final hunk
753    if let Some(hunk) = current_hunk {
754        hunks.push(hunk);
755    }
756
757    Ok(hunks)
758}
759
760/// Parse a hunk header line into (old_start, old_count, new_start, new_count)
761///
762/// Format: @@ -old_start,old_count +new_start,new_count @@ optional_context
763/// Note: count defaults to 1 if omitted (e.g., @@ -5 +5,2 @@)
764fn parse_hunk_header(header: &str) -> Option<(u32, u32, u32, u32)> {
765    // Find the range specifications between @@ markers
766    let header = header.strip_prefix("@@")?;
767    let end_idx = header.find("@@")?;
768    let range_part = header[..end_idx].trim();
769
770    let parts: Vec<&str> = range_part.split_whitespace().collect();
771    if parts.len() < 2 {
772        return None;
773    }
774
775    // Parse old range: -start,count or -start
776    let old_part = parts[0].strip_prefix('-')?;
777    let (old_start, old_count) = parse_range(old_part)?;
778
779    // Parse new range: +start,count or +start
780    let new_part = parts[1].strip_prefix('+')?;
781    let (new_start, new_count) = parse_range(new_part)?;
782
783    Some((old_start, old_count, new_start, new_count))
784}
785
786/// Parse a range specification like "5,3" or "5" into (start, count)
787fn parse_range(range: &str) -> Option<(u32, u32)> {
788    if let Some((start_str, count_str)) = range.split_once(',') {
789        let start = start_str.parse().ok()?;
790        let count = count_str.parse().ok()?;
791        Some((start, count))
792    } else {
793        let start = range.parse().ok()?;
794        Some((start, 1)) // Default count is 1
795    }
796}
797
798#[cfg(test)]
799#[allow(clippy::str_to_string)]
800mod tests {
801    use super::*;
802    use std::process::Command;
803    use tempfile::TempDir;
804
805    fn init_test_repo() -> TempDir {
806        let temp = TempDir::new().unwrap();
807
808        // Initialize git repo
809        Command::new("git")
810            .current_dir(temp.path())
811            .args(["init"])
812            .output()
813            .unwrap();
814
815        // Configure git
816        Command::new("git")
817            .current_dir(temp.path())
818            .args(["config", "user.email", "test@test.com"])
819            .output()
820            .unwrap();
821
822        Command::new("git")
823            .current_dir(temp.path())
824            .args(["config", "user.name", "Test"])
825            .output()
826            .unwrap();
827
828        // Create a file and commit
829        std::fs::write(temp.path().join("test.txt"), "hello").unwrap();
830
831        Command::new("git")
832            .current_dir(temp.path())
833            .args(["add", "."])
834            .output()
835            .unwrap();
836
837        Command::new("git")
838            .current_dir(temp.path())
839            .args(["commit", "-m", "Initial commit"])
840            .output()
841            .unwrap();
842
843        temp
844    }
845
846    #[test]
847    fn test_open_repo() {
848        let temp = init_test_repo();
849        let repo = GitRepo::open(temp.path());
850        assert!(repo.is_ok());
851    }
852
853    #[test]
854    fn test_not_a_repo() {
855        let temp = TempDir::new().unwrap();
856        let repo = GitRepo::open(temp.path());
857        assert!(matches!(repo, Err(GitError::NotAGitRepo)));
858    }
859
860    #[test]
861    fn test_current_branch() {
862        let temp = init_test_repo();
863        let repo = GitRepo::open(temp.path()).unwrap();
864        let branch = repo.current_branch().unwrap();
865        // Branch could be "main" or "master" depending on git config
866        assert!(!branch.is_empty());
867    }
868
869    #[test]
870    fn test_log() {
871        let temp = init_test_repo();
872        let repo = GitRepo::open(temp.path()).unwrap();
873        let commits = repo.log(10).unwrap();
874        assert!(!commits.is_empty());
875        assert_eq!(commits[0].message, "Initial commit");
876    }
877
878    #[test]
879    fn test_ls_files() {
880        let temp = init_test_repo();
881        let repo = GitRepo::open(temp.path()).unwrap();
882        let files = repo.ls_files().unwrap();
883        assert!(files.contains(&"test.txt".to_string()));
884    }
885
886    #[test]
887    fn test_format_timestamp() {
888        // 2024-01-01 00:00:00 UTC
889        let ts = 1704067200;
890        let date = format_timestamp(ts);
891        assert_eq!(date, "2024-01-01");
892    }
893
894    #[test]
895    fn test_file_at_ref() {
896        let temp = init_test_repo();
897        let repo = GitRepo::open(temp.path()).unwrap();
898
899        // Get file content at HEAD
900        let content = repo.file_at_ref("test.txt", "HEAD").unwrap();
901        assert_eq!(content.trim(), "hello");
902
903        // Modify the file and commit
904        std::fs::write(temp.path().join("test.txt"), "world").unwrap();
905        Command::new("git")
906            .current_dir(temp.path())
907            .args(["add", "."])
908            .output()
909            .unwrap();
910        Command::new("git")
911            .current_dir(temp.path())
912            .args(["commit", "-m", "Update"])
913            .output()
914            .unwrap();
915
916        // Check current HEAD has new content
917        let new_content = repo.file_at_ref("test.txt", "HEAD").unwrap();
918        assert_eq!(new_content.trim(), "world");
919
920        // Check HEAD~1 still has old content
921        let old_content = repo.file_at_ref("test.txt", "HEAD~1").unwrap();
922        assert_eq!(old_content.trim(), "hello");
923    }
924
925    #[test]
926    fn test_parse_hunk_header() {
927        // Standard case
928        let result = parse_hunk_header("@@ -1,5 +1,7 @@ fn main()");
929        assert_eq!(result, Some((1, 5, 1, 7)));
930
931        // No count (defaults to 1)
932        let result = parse_hunk_header("@@ -1 +1 @@");
933        assert_eq!(result, Some((1, 1, 1, 1)));
934
935        // Mixed
936        let result = parse_hunk_header("@@ -10,3 +15 @@");
937        assert_eq!(result, Some((10, 3, 15, 1)));
938
939        // Invalid
940        let result = parse_hunk_header("not a header");
941        assert_eq!(result, None);
942    }
943
944    #[test]
945    fn test_parse_diff_hunks() {
946        let diff = r#"diff --git a/test.txt b/test.txt
947index abc123..def456 100644
948--- a/test.txt
949+++ b/test.txt
950@@ -1,3 +1,4 @@
951 line 1
952-old line 2
953+new line 2
954+added line
955 line 3
956"#;
957
958        let hunks = parse_diff_hunks(diff).unwrap();
959        assert_eq!(hunks.len(), 1);
960
961        let hunk = &hunks[0];
962        assert_eq!(hunk.old_start, 1);
963        assert_eq!(hunk.old_count, 3);
964        assert_eq!(hunk.new_start, 1);
965        assert_eq!(hunk.new_count, 4);
966        assert_eq!(hunk.lines.len(), 5);
967
968        // Check line types
969        assert_eq!(hunk.lines[0].change_type, DiffLineType::Context);
970        assert_eq!(hunk.lines[1].change_type, DiffLineType::Remove);
971        assert_eq!(hunk.lines[2].change_type, DiffLineType::Add);
972        assert_eq!(hunk.lines[3].change_type, DiffLineType::Add);
973        assert_eq!(hunk.lines[4].change_type, DiffLineType::Context);
974
975        // Check line numbers
976        assert_eq!(hunk.lines[0].old_line, Some(1));
977        assert_eq!(hunk.lines[0].new_line, Some(1));
978        assert_eq!(hunk.lines[1].old_line, Some(2));
979        assert_eq!(hunk.lines[1].new_line, None);
980        assert_eq!(hunk.lines[2].old_line, None);
981        assert_eq!(hunk.lines[2].new_line, Some(2));
982    }
983
984    #[test]
985    fn test_diff_hunks() {
986        let temp = init_test_repo();
987        let repo = GitRepo::open(temp.path()).unwrap();
988
989        // Modify file and commit
990        std::fs::write(temp.path().join("test.txt"), "hello\nworld\n").unwrap();
991        Command::new("git")
992            .current_dir(temp.path())
993            .args(["add", "."])
994            .output()
995            .unwrap();
996        Command::new("git")
997            .current_dir(temp.path())
998            .args(["commit", "-m", "Add world"])
999            .output()
1000            .unwrap();
1001
1002        // Get hunks between commits
1003        let hunks = repo.diff_hunks("HEAD~1", "HEAD", Some("test.txt")).unwrap();
1004        assert!(!hunks.is_empty());
1005
1006        // Verify we got structured data
1007        let hunk = &hunks[0];
1008        assert!(hunk.old_start > 0);
1009        assert!(!hunk.header.is_empty());
1010    }
1011
1012    #[test]
1013    fn test_uncommitted_hunks() {
1014        let temp = init_test_repo();
1015        let repo = GitRepo::open(temp.path()).unwrap();
1016
1017        // Make uncommitted change
1018        std::fs::write(temp.path().join("test.txt"), "modified content").unwrap();
1019
1020        let hunks = repo.uncommitted_hunks(Some("test.txt")).unwrap();
1021        assert!(!hunks.is_empty());
1022
1023        // Should have some changes
1024        let total_changes: usize = hunks.iter().map(|h| h.lines.len()).sum();
1025        assert!(total_changes > 0);
1026    }
1027}