ralph_workflow/git_helpers/
repo.rs

1//! Basic git repository operations.
2//!
3//! Provides fundamental git operations used throughout the application:
4//!
5//! - Repository detection and root path resolution
6//! - Working tree status snapshots (porcelain format)
7//! - Staging and committing changes
8//! - Diff generation for commit messages
9//! - Automated commit message generation and committing
10//!
11//! Operations use libgit2 directly to avoid CLI dependencies and work
12//! even when git is not installed.
13
14use std::io;
15use std::path::PathBuf;
16
17use super::identity::GitIdentity;
18
19/// The level of truncation applied to a diff for review.
20///
21/// This enum tracks how much a diff has been abbreviated and determines
22/// what instructions should be given to the reviewer agent.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
24pub enum DiffTruncationLevel {
25    /// No truncation - full diff is included
26    #[default]
27    Full,
28    /// Diff was semantically truncated - high-priority files shown, instruction to explore
29    Abbreviated,
30    /// Only file paths listed - instruction to explore each file's diff
31    FileList,
32    /// File list was abbreviated - instruction to explore and discover files
33    FileListAbbreviated,
34}
35
36/// The result of diff truncation for review purposes.
37///
38/// Contains both the potentially-truncated content and metadata about
39/// what truncation was applied, along with version context information.
40#[derive(Debug, Clone, PartialEq, Eq)]
41pub struct DiffReviewContent {
42    /// The content to include in the review prompt
43    pub content: String,
44    /// The level of truncation applied
45    pub truncation_level: DiffTruncationLevel,
46    /// Total number of files in the full diff (for context in messages)
47    pub total_file_count: usize,
48    /// Number of files shown in the abbreviated content (if applicable)
49    pub shown_file_count: Option<usize>,
50    /// The OID (commit SHA) that this diff is compared against (baseline)
51    pub baseline_oid: Option<String>,
52    /// Short form (first 8 chars) of the baseline OID for display
53    pub baseline_short: Option<String>,
54    /// Description of what the baseline represents (e.g., "review_baseline", "start_commit")
55    pub baseline_description: String,
56}
57
58impl DiffReviewContent {
59    /// Generate a human-readable header describing the diff's version context.
60    ///
61    /// This header is meant to be included at the beginning of the diff content
62    /// to provide clarity about what state of the code the diff represents.
63    ///
64    /// # Returns
65    ///
66    /// A formatted string like:
67    /// ```text
68    /// Diff Context: Compared against review_baseline abc12345
69    /// Current state: Working directory (includes unstaged changes)
70    /// ```
71    ///
72    /// If no baseline information is available, returns a generic message.
73    pub fn format_context_header(&self) -> String {
74        let mut lines = Vec::new();
75
76        if let Some(short) = &self.baseline_short {
77            lines.push(format!(
78                "Diff Context: Compared against {} {}",
79                self.baseline_description, short
80            ));
81        } else {
82            lines.push("Diff Context: Version information not available".to_string());
83        }
84
85        // Add information about truncation if applicable
86        match self.truncation_level {
87            DiffTruncationLevel::Full => {
88                // No truncation - full diff
89            }
90            DiffTruncationLevel::Abbreviated => {
91                lines.push(format!(
92                    "Note: Diff abbreviated - {}/{} files shown",
93                    self.shown_file_count.unwrap_or(0),
94                    self.total_file_count
95                ));
96            }
97            DiffTruncationLevel::FileList => {
98                lines.push(format!(
99                    "Note: Only file list shown - {} files changed",
100                    self.total_file_count
101                ));
102            }
103            DiffTruncationLevel::FileListAbbreviated => {
104                lines.push(format!(
105                    "Note: File list abbreviated - {}/{} files shown",
106                    self.shown_file_count.unwrap_or(0),
107                    self.total_file_count
108                ));
109            }
110        }
111
112        if lines.is_empty() {
113            String::new()
114        } else {
115            format!("{}\n", lines.join("\n"))
116        }
117    }
118}
119
120/// Convert git2 error to `io::Error`.
121fn git2_to_io_error(err: &git2::Error) -> io::Error {
122    io::Error::other(err.to_string())
123}
124
125/// Check if we're in a git repository.
126pub fn require_git_repo() -> io::Result<()> {
127    git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
128    Ok(())
129}
130
131/// Get the git repository root.
132pub fn get_repo_root() -> io::Result<PathBuf> {
133    let repo = git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
134    repo.workdir()
135        .map(PathBuf::from)
136        .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "No workdir for repository"))
137}
138
139/// Get the git hooks directory path.
140///
141/// Returns the path to the hooks directory inside .git (or the equivalent
142/// for worktrees and other configurations).
143pub fn get_hooks_dir() -> io::Result<PathBuf> {
144    let repo = git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
145    Ok(repo.path().join("hooks"))
146}
147
148/// Get a snapshot of the current git status.
149///
150/// Returns status in porcelain format (similar to `git status --porcelain=v1`).
151pub fn git_snapshot() -> io::Result<String> {
152    let repo = git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
153
154    let mut opts = git2::StatusOptions::new();
155    opts.include_untracked(true).recurse_untracked_dirs(true);
156    let statuses = repo
157        .statuses(Some(&mut opts))
158        .map_err(|e| git2_to_io_error(&e))?;
159
160    let mut result = String::new();
161    for entry in statuses.iter() {
162        let status = entry.status();
163        let path = entry.path().unwrap_or("").to_string();
164
165        // Convert git2 status to porcelain format
166        // Untracked files are represented as "??" in porcelain v1.
167        if status.contains(git2::Status::WT_NEW) {
168            result.push('?');
169            result.push('?');
170            result.push(' ');
171            result.push_str(&path);
172            result.push('\n');
173            continue;
174        }
175
176        // Index status
177        let index_status = if status.contains(git2::Status::INDEX_NEW) {
178            'A'
179        } else if status.contains(git2::Status::INDEX_MODIFIED) {
180            'M'
181        } else if status.contains(git2::Status::INDEX_DELETED) {
182            'D'
183        } else if status.contains(git2::Status::INDEX_RENAMED) {
184            'R'
185        } else if status.contains(git2::Status::INDEX_TYPECHANGE) {
186            'T'
187        } else {
188            ' '
189        };
190
191        // Worktree status
192        let wt_status = if status.contains(git2::Status::WT_MODIFIED) {
193            'M'
194        } else if status.contains(git2::Status::WT_DELETED) {
195            'D'
196        } else if status.contains(git2::Status::WT_RENAMED) {
197            'R'
198        } else if status.contains(git2::Status::WT_TYPECHANGE) {
199            'T'
200        } else {
201            ' '
202        };
203
204        result.push(index_status);
205        result.push(wt_status);
206        result.push(' ');
207        result.push_str(&path);
208        result.push('\n');
209    }
210
211    Ok(result)
212}
213
214/// Get the diff of all changes (unstaged and staged).
215///
216/// Returns a formatted diff string suitable for LLM analysis.
217/// This is similar to `git diff HEAD`.
218///
219/// Handles the case of an empty repository (no commits yet) by
220/// diffing against an empty tree using a read-only approach.
221pub fn git_diff() -> io::Result<String> {
222    let repo = git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
223
224    // Try to get HEAD tree
225    let head_tree = match repo.head() {
226        Ok(head) => Some(head.peel_to_tree().map_err(|e| git2_to_io_error(&e))?),
227        Err(ref e) if e.code() == git2::ErrorCode::UnbornBranch => {
228            // No commits yet - we need to show all untracked files as new files
229            // Since there's no HEAD, we diff an empty tree against the workdir
230
231            // Create a diff with an empty tree (no parent tree)
232            // This is a read-only operation that doesn't modify the index
233            let mut diff_opts = git2::DiffOptions::new();
234            diff_opts.include_untracked(true);
235            diff_opts.recurse_untracked_dirs(true);
236
237            let diff = repo
238                .diff_tree_to_workdir_with_index(None, Some(&mut diff_opts))
239                .map_err(|e| git2_to_io_error(&e))?;
240
241            let mut result = Vec::new();
242            diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
243                result.extend_from_slice(line.content());
244                true
245            })
246            .map_err(|e| git2_to_io_error(&e))?;
247
248            return Ok(String::from_utf8_lossy(&result).to_string());
249        }
250        Err(e) => return Err(git2_to_io_error(&e)),
251    };
252
253    // For repos with commits, diff HEAD against working tree
254    // This includes both staged and unstaged changes
255    let mut diff_opts = git2::DiffOptions::new();
256    diff_opts.include_untracked(true);
257    diff_opts.recurse_untracked_dirs(true);
258
259    let diff = repo
260        .diff_tree_to_workdir_with_index(head_tree.as_ref(), Some(&mut diff_opts))
261        .map_err(|e| git2_to_io_error(&e))?;
262
263    // Generate diff text
264    let mut result = Vec::new();
265    diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
266        result.extend_from_slice(line.content());
267        true
268    })
269    .map_err(|e| git2_to_io_error(&e))?;
270
271    Ok(String::from_utf8_lossy(&result).to_string())
272}
273
274/// Truncate a diff for review using progressive fallback strategy.
275///
276/// This function implements a multi-level truncation approach:
277/// 1. If diff fits within `max_full_diff_size`, return as-is
278/// 2. If diff is too large, semantically truncate with file prioritization
279/// 3. If even abbreviated diff is too large, return just file paths
280/// 4. If file list is too large, return abbreviated file list
281///
282/// When truncation occurs, the returned content includes clear markers
283/// and the truncation level indicates what instructions should be shown
284/// to the reviewer agent about exploring the full diff themselves.
285///
286/// # Warning Behavior
287///
288/// This function does not print warnings directly. Callers should check the
289/// return value's boolean flag and log appropriate warnings if truncation occurred.
290///
291/// # Truncation Behavior
292///
293/// When a diff exceeds `MAX_DIFF_SIZE_HARD`, it is truncated and a warning marker
294/// is placed **before** the diff content (not after). This ensures the LLM reviewer
295/// is immediately aware that the context is incomplete before analyzing the diff.
296///
297/// # Arguments
298///
299/// * `diff` - The full git diff
300/// * `max_full_diff_size` - Maximum size for full diff (default: 100KB)
301/// * `max_abbreviated_size` - Maximum size for abbreviated diff (default: 50KB)
302/// * `max_file_list_size` - Maximum size for file list (default: 10KB)
303///
304/// # Returns
305///
306/// A `DiffReviewContent` struct containing the truncated content and metadata.
307pub fn truncate_diff_for_review(
308    diff: String,
309    max_full_diff_size: usize,
310    max_abbreviated_size: usize,
311    max_file_list_size: usize,
312) -> DiffReviewContent {
313    let diff_size = diff.len();
314
315    // Level 1: Full diff fits
316    // Parse file count for consistent metadata even when returning early
317    let files = parse_diff_to_files(&diff);
318    let total_file_count = files.len();
319
320    if diff_size <= max_full_diff_size {
321        return DiffReviewContent {
322            content: diff,
323            truncation_level: DiffTruncationLevel::Full,
324            total_file_count,
325            shown_file_count: None,
326            baseline_oid: None,
327            baseline_short: None,
328            baseline_description: String::new(),
329        };
330    }
331
332    // Level 2: Abbreviated diff with semantic prioritization
333    let abbreviated = truncate_diff_semantically(&diff, &files, max_abbreviated_size);
334    let abbreviated_size = abbreviated.content.len();
335
336    if abbreviated_size <= max_abbreviated_size {
337        return abbreviated;
338    }
339
340    // Level 3: File list only
341    let file_list = build_file_list(&files);
342    let file_list_size = file_list.content.len();
343
344    if file_list_size <= max_file_list_size {
345        return file_list;
346    }
347
348    // Level 4: Abbreviated file list
349    abbreviate_file_list(&files, max_file_list_size, total_file_count)
350}
351
352/// Represents a single file's diff chunk.
353#[derive(Debug, Default, Clone)]
354struct DiffFile {
355    /// File path (extracted from diff header)
356    path: String,
357    /// Priority for selection (higher = more important)
358    priority: i32,
359    /// Lines in this file's diff
360    lines: Vec<String>,
361}
362
363/// Assign a priority score to a file path for truncation selection.
364///
365/// Higher priority files are kept first when truncating:
366/// - src/*.rs: +100 (source code is most important)
367/// - src/*: +80 (other src files)
368/// - tests/*: +40 (tests are important but secondary)
369/// - Cargo.toml, package.json, etc.: +60 (config files)
370/// - docs/*, *.md: +20 (docs are least important)
371/// - Other: +50 (default)
372fn prioritize_file_path(path: &str) -> i32 {
373    use std::path::Path;
374    let path_lower = path.to_lowercase();
375
376    // Helper function for case-insensitive file extension check
377    let has_ext_lower = |ext: &str| -> bool {
378        Path::new(&path_lower)
379            .extension()
380            .and_then(std::ffi::OsStr::to_str)
381            .is_some_and(|e| e.eq_ignore_ascii_case(ext))
382    };
383
384    // Helper function for case-insensitive extension check on original path
385    let has_ext = |ext: &str| -> bool {
386        Path::new(path)
387            .extension()
388            .and_then(std::ffi::OsStr::to_str)
389            .is_some_and(|e| e.eq_ignore_ascii_case(ext))
390    };
391
392    // Source code files (highest priority)
393    if path_lower.contains("src/") && has_ext_lower("rs") {
394        100
395    } else if path_lower.contains("src/") {
396        80
397    }
398    // Test files
399    else if path_lower.contains("test") {
400        40
401    }
402    // Config files - use case-insensitive extension check
403    else if has_ext("toml")
404        || has_ext("json")
405        || path_lower.ends_with("cargo.toml")
406        || path_lower.ends_with("package.json")
407        || path_lower.ends_with("tsconfig.json")
408    {
409        60
410    }
411    // Documentation files (lowest priority)
412    else if path_lower.contains("doc") || has_ext("md") {
413        20
414    }
415    // Default priority
416    else {
417        50
418    }
419}
420
421/// Parse a git diff into individual file blocks.
422fn parse_diff_to_files(diff: &str) -> Vec<DiffFile> {
423    let mut files: Vec<DiffFile> = Vec::new();
424    let mut current_file = DiffFile::default();
425    let mut in_file = false;
426
427    for line in diff.lines() {
428        if line.starts_with("diff --git ") {
429            if in_file && !current_file.lines.is_empty() {
430                files.push(std::mem::take(&mut current_file));
431            }
432            in_file = true;
433            current_file.lines.push(line.to_string());
434
435            if let Some(path) = line.split(" b/").nth(1) {
436                current_file.path = path.to_string();
437                current_file.priority = prioritize_file_path(path);
438            }
439        } else if in_file {
440            current_file.lines.push(line.to_string());
441        }
442    }
443
444    if in_file && !current_file.lines.is_empty() {
445        files.push(current_file);
446    }
447
448    files
449}
450
451/// Semantically truncate diff with file prioritization.
452fn truncate_diff_semantically(
453    _diff: &str,
454    files: &[DiffFile],
455    max_size: usize,
456) -> DiffReviewContent {
457    // Sort by priority, greedily select files that fit
458    let mut sorted_files = files.to_vec();
459    sorted_files.sort_by_key(|f: &DiffFile| std::cmp::Reverse(f.priority));
460
461    let mut selected_files = Vec::new();
462    let mut current_size = 0;
463
464    for file in &sorted_files {
465        let file_size: usize = file.lines.iter().map(|l| l.len() + 1).sum();
466
467        if current_size + file_size <= max_size {
468            current_size += file_size;
469            selected_files.push(file.clone());
470        } else if current_size > 0 {
471            break;
472        } else {
473            // Even the first file is too large, take part of it
474            let truncated_lines = truncate_lines_to_fit(&file.lines, max_size);
475            selected_files.push(DiffFile {
476                path: file.path.clone(),
477                priority: file.priority,
478                lines: truncated_lines,
479            });
480            break;
481        }
482    }
483
484    let shown_count = selected_files.len();
485    let omitted_count = files.len().saturating_sub(shown_count);
486
487    let mut result = String::new();
488    if omitted_count > 0 {
489        use std::fmt::Write;
490        let _ = write!(
491            result,
492            "[DIFF TRUNCATED: Showing {shown_count} of {} files. You MUST explore the full diff using git commands to review properly.]\n\n",
493            files.len()
494        );
495    }
496
497    for file in &selected_files {
498        for line in &file.lines {
499            result.push_str(line);
500            result.push('\n');
501        }
502    }
503
504    DiffReviewContent {
505        content: result,
506        truncation_level: DiffTruncationLevel::Abbreviated,
507        total_file_count: files.len(),
508        shown_file_count: Some(shown_count),
509        baseline_oid: None,
510        baseline_short: None,
511        baseline_description: String::new(),
512    }
513}
514
515/// Build a file list from diff files.
516fn build_file_list(files: &[DiffFile]) -> DiffReviewContent {
517    let mut result = String::from(
518        "[FULL DIFF TOO LARGE - Showing file list only. You MUST explore each file's diff using git commands.]\n\n"
519    );
520    result.push_str("FILES CHANGED (you must explore each file's diff):\n");
521
522    for file in files {
523        if !file.path.is_empty() {
524            result.push_str("  - ");
525            result.push_str(&file.path);
526            result.push('\n');
527        }
528    }
529
530    DiffReviewContent {
531        content: result,
532        truncation_level: DiffTruncationLevel::FileList,
533        total_file_count: files.len(),
534        shown_file_count: Some(files.len()),
535        baseline_oid: None,
536        baseline_short: None,
537        baseline_description: String::new(),
538    }
539}
540
541/// Abbreviate a file list that's too large.
542fn abbreviate_file_list(
543    files: &[DiffFile],
544    max_size: usize,
545    total_count: usize,
546) -> DiffReviewContent {
547    let mut result = String::from(
548        "[FILE LIST TOO LARGE - You MUST explore the repository to find all changed files.]\n\n",
549    );
550
551    // Calculate how many files we can show
552    let mut size_so_far = result.len();
553    let mut shown_count = 0;
554
555    result.push_str("SAMPLE OF CHANGED FILES (explore to find all):\n");
556
557    for file in files {
558        let line = format!("  - {}\n", file.path);
559        if size_so_far + line.len() > max_size {
560            break;
561        }
562        result.push_str(&line);
563        size_so_far += line.len();
564        shown_count += 1;
565    }
566
567    let omitted = total_count.saturating_sub(shown_count);
568    if omitted > 0 {
569        use std::fmt::Write;
570        let _ = write!(
571            result,
572            "\n... and {} more files (explore to find all)\n",
573            omitted
574        );
575    }
576
577    DiffReviewContent {
578        content: result,
579        truncation_level: DiffTruncationLevel::FileListAbbreviated,
580        total_file_count: total_count,
581        shown_file_count: Some(shown_count),
582        baseline_oid: None,
583        baseline_short: None,
584        baseline_description: String::new(),
585    }
586}
587
588/// Truncate a slice of lines to fit within a maximum size.
589///
590/// This is a fallback for when even a single file is too large.
591/// Returns as many complete lines as will fit.
592fn truncate_lines_to_fit(lines: &[String], max_size: usize) -> Vec<String> {
593    let mut result = Vec::new();
594    let mut current_size = 0;
595
596    for line in lines {
597        let line_size = line.len() + 1; // +1 for newline
598        if current_size + line_size <= max_size {
599            current_size += line_size;
600            result.push(line.clone());
601        } else {
602            break;
603        }
604    }
605
606    // Add truncation marker to the last line
607    if let Some(last) = result.last_mut() {
608        last.push_str(" [truncated...]");
609    }
610
611    result
612}
613
614fn index_has_changes_to_commit(repo: &git2::Repository, index: &git2::Index) -> io::Result<bool> {
615    match repo.head() {
616        Ok(head) => {
617            let head_tree = head.peel_to_tree().map_err(|e| git2_to_io_error(&e))?;
618            let diff = repo
619                .diff_tree_to_index(Some(&head_tree), Some(index), None)
620                .map_err(|e| git2_to_io_error(&e))?;
621            Ok(diff.deltas().len() > 0)
622        }
623        Err(ref e) if e.code() == git2::ErrorCode::UnbornBranch => Ok(!index.is_empty()),
624        Err(e) => Err(git2_to_io_error(&e)),
625    }
626}
627
628fn is_internal_agent_artifact(path: &std::path::Path) -> bool {
629    let path_str = path.to_string_lossy();
630    path_str == ".no_agent_commit"
631        || path_str == ".agent"
632        || path_str.starts_with(".agent/")
633        || path_str == ".git"
634        || path_str.starts_with(".git/")
635}
636
637/// Stage all changes.
638///
639/// Similar to `git add -A`.
640///
641/// # Returns
642///
643/// Returns `Ok(true)` if files were successfully staged, `Ok(false)` if there
644/// were no files to stage, or an error if staging failed.
645pub fn git_add_all() -> io::Result<bool> {
646    let repo = git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
647
648    let mut index = repo.index().map_err(|e| git2_to_io_error(&e))?;
649
650    // Stage deletions (equivalent to `git add -A` behavior).
651    // libgit2's `add_all` doesn't automatically remove deleted paths.
652    let mut status_opts = git2::StatusOptions::new();
653    status_opts
654        .include_untracked(true)
655        .recurse_untracked_dirs(true)
656        .include_ignored(false);
657    let statuses = repo
658        .statuses(Some(&mut status_opts))
659        .map_err(|e| git2_to_io_error(&e))?;
660    for entry in statuses.iter() {
661        if entry.status().contains(git2::Status::WT_DELETED) {
662            if let Some(path) = entry.path() {
663                index
664                    .remove_path(std::path::Path::new(path))
665                    .map_err(|e| git2_to_io_error(&e))?;
666            }
667        }
668    }
669
670    // Add all files (staged, unstaged, and untracked)
671    // Note: add_all() is required here, not update_all(), to include untracked files
672    let mut filter_cb = |path: &std::path::Path, _matched: &[u8]| -> i32 {
673        // Return 0 to add the file, non-zero to skip.
674        // We skip (return 1) internal agent artifacts to avoid committing them.
675        i32::from(is_internal_agent_artifact(path))
676    };
677    index
678        .add_all(
679            vec!["."],
680            git2::IndexAddOption::DEFAULT,
681            Some(&mut filter_cb),
682        )
683        .map_err(|e| git2_to_io_error(&e))?;
684
685    index.write().map_err(|e| git2_to_io_error(&e))?;
686
687    // Return true if staging produced something commit-worthy.
688    index_has_changes_to_commit(&repo, &index)
689}
690
691/// Resolve git commit identity with the full priority chain.
692///
693/// This function implements the identity resolution priority chain:
694/// 1. Git config (via libgit2's `repo.signature()`) - primary source
695/// 2. Provided name/email parameters (from Ralph config, CLI args, or env vars)
696/// 3. Environment variables (`RALPH_GIT_USER_NAME`, `RALPH_GIT_USER_EMAIL`)
697/// 4. Ralph config file values (passed through)
698/// 5. System username + derived email
699/// 6. Default values ("Ralph Workflow", "ralph@localhost")
700///
701/// Partial overrides are supported: CLI args/env vars/config can override
702/// individual fields (name or email) from git config.
703///
704/// # Arguments
705///
706/// * `repo` - The git repository (for git config)
707/// * `provided_name` - Optional name from Ralph config or CLI
708/// * `provided_email` - Optional email from Ralph config or CLI
709///
710/// # Returns
711///
712/// Returns `GitIdentity` with the resolved name and email.
713fn resolve_commit_identity(
714    repo: &git2::Repository,
715    provided_name: Option<&str>,
716    provided_email: Option<&str>,
717) -> GitIdentity {
718    use super::identity::{default_identity, fallback_email, fallback_username};
719
720    // Priority 1: Git config (via libgit2) - primary source
721    let mut name = String::new();
722    let mut email = String::new();
723    let mut has_git_config = false;
724
725    if let Ok(sig) = repo.signature() {
726        let git_name = sig.name().unwrap_or("");
727        let git_email = sig.email().unwrap_or("");
728        if !git_name.is_empty() && !git_email.is_empty() {
729            name = git_name.to_string();
730            email = git_email.to_string();
731            has_git_config = true;
732        }
733    }
734
735    // Priority order (standard git behavior):
736    // 1. Git config (local .git/config, then global ~/.gitconfig) - primary source
737    // 2. Provided args (provided_name/provided_email) - from Ralph config or CLI override
738    // 3. Env vars (RALPH_GIT_USER_NAME/EMAIL) - fallback if above are missing
739    //
740    // This matches standard git behavior where git config is authoritative.
741    let env_name = std::env::var("RALPH_GIT_USER_NAME").ok();
742    let env_email = std::env::var("RALPH_GIT_USER_EMAIL").ok();
743
744    // Apply in priority order: git config > provided args > env vars
745    // Git config takes highest priority (standard git behavior)
746    let final_name = if has_git_config && !name.is_empty() {
747        name.as_str()
748    } else {
749        provided_name
750            .filter(|s| !s.is_empty())
751            .or(env_name.as_deref())
752            .filter(|s| !s.is_empty())
753            .unwrap_or("")
754    };
755
756    let final_email = if has_git_config && !email.is_empty() {
757        email.as_str()
758    } else {
759        provided_email
760            .filter(|s| !s.is_empty())
761            .or(env_email.as_deref())
762            .filter(|s| !s.is_empty())
763            .unwrap_or("")
764    };
765
766    // If we have both name and email from git config + overrides, use them
767    if !final_name.is_empty() && !final_email.is_empty() {
768        let identity = GitIdentity::new(final_name.to_string(), final_email.to_string());
769        if identity.validate().is_ok() {
770            return identity;
771        }
772    }
773
774    // Priority 5: System username + derived email
775    let username = fallback_username();
776    let system_email = fallback_email(&username);
777    let identity = GitIdentity::new(
778        if final_name.is_empty() {
779            username
780        } else {
781            final_name.to_string()
782        },
783        if final_email.is_empty() {
784            system_email
785        } else {
786            final_email.to_string()
787        },
788    );
789
790    if identity.validate().is_ok() {
791        return identity;
792    }
793
794    // Priority 6: Default values (last resort)
795    default_identity()
796}
797
798/// Create a commit.
799///
800/// Similar to `git commit -m <message>`.
801///
802/// Handles both initial commits (no HEAD yet) and subsequent commits.
803///
804/// # Identity Resolution
805///
806/// The git commit identity (name and email) is resolved using the following priority:
807/// 1. Git config (via libgit2) - primary source
808/// 2. Provided `git_user_name` and `git_user_email` parameters (overrides)
809/// 3. Environment variables (`RALPH_GIT_USER_NAME`, `RALPH_GIT_USER_EMAIL`)
810/// 4. Ralph config file (read by caller, passed as parameters)
811/// 5. System username + derived email (sane fallback)
812/// 6. Default values ("Ralph Workflow", "ralph@localhost") - last resort
813///
814/// Partial overrides are supported: CLI args/env vars/config can override individual
815/// fields (name or email) from git config.
816///
817/// # Arguments
818///
819/// * `message` - The commit message
820/// * `git_user_name` - Optional git user name (overrides git config)
821/// * `git_user_email` - Optional git user email (overrides git config)
822///
823/// # Returns
824///
825/// Returns `Ok(Some(oid))` with the commit OID if successful, `Ok(None)` if the
826/// OID is zero (no commit created), or an error if the operation failed.
827pub fn git_commit(
828    message: &str,
829    git_user_name: Option<&str>,
830    git_user_email: Option<&str>,
831) -> io::Result<Option<git2::Oid>> {
832    let repo = git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
833
834    // Get the index
835    let mut index = repo.index().map_err(|e| git2_to_io_error(&e))?;
836
837    // Don't create empty commits: if the index matches HEAD (or is empty on an unborn branch),
838    // there's nothing to commit.
839    if !index_has_changes_to_commit(&repo, &index)? {
840        return Ok(None);
841    }
842
843    // Get the tree from the index
844    let tree_oid = index.write_tree().map_err(|e| git2_to_io_error(&e))?;
845
846    let tree = repo.find_tree(tree_oid).map_err(|e| git2_to_io_error(&e))?;
847
848    // Resolve git identity using the identity resolution system.
849    // This implements the full priority chain with proper fallbacks.
850    let GitIdentity { name, email } = resolve_commit_identity(&repo, git_user_name, git_user_email);
851
852    // Debug logging: identity resolution source
853    // Only log if RALPH_DEBUG or similar debug mode is enabled
854    if std::env::var("RALPH_DEBUG").is_ok() {
855        let identity_source = if git_user_name.is_some() || git_user_email.is_some() {
856            "CLI/config override"
857        } else if std::env::var("RALPH_GIT_USER_NAME").is_ok()
858            || std::env::var("RALPH_GIT_USER_EMAIL").is_ok()
859        {
860            "environment variable"
861        } else if repo.signature().is_ok() {
862            "git config"
863        } else {
864            "system/default"
865        };
866        eprintln!("Git identity: {name} <{email}> (source: {identity_source})");
867    }
868
869    // Create the signature with the resolved identity
870    let sig = git2::Signature::now(&name, &email).map_err(|e| git2_to_io_error(&e))?;
871
872    let oid = match repo.head() {
873        Ok(head) => {
874            // Normal commit: has a parent
875            let head_commit = head.peel_to_commit().map_err(|e| git2_to_io_error(&e))?;
876            repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&head_commit])
877        }
878        Err(ref e) if e.code() == git2::ErrorCode::UnbornBranch => {
879            // Initial commit: no parents, but verify tree is not empty
880            // An empty tree can happen in edge cases where files were staged and then removed
881            let mut has_entries = false;
882            tree.walk(git2::TreeWalkMode::PreOrder, |_, _| {
883                has_entries = true;
884                1 // Stop iteration after first entry
885            })
886            .ok(); // Ignore errors, we just want to know if there's at least one entry
887
888            if !has_entries {
889                // Tree is empty, return None instead of creating empty commit
890                return Ok(None);
891            }
892            repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
893        }
894        Err(e) => return Err(git2_to_io_error(&e)),
895    }
896    .map_err(|e| git2_to_io_error(&e))?;
897
898    Ok(Some(oid))
899}
900
901/// Generate a diff from a specific starting commit.
902///
903/// Takes a starting commit OID and generates a diff between that commit
904/// and the current working tree. Returns a formatted diff string suitable
905/// for LLM analysis.
906///
907/// # Arguments
908///
909/// * `start_oid` - The OID of the commit to diff from
910///
911/// # Returns
912///
913/// Returns a formatted diff string, or an error if:
914/// - The repository cannot be opened
915/// - The starting commit cannot be found
916/// - The diff cannot be generated
917pub fn git_diff_from(start_oid: &str) -> io::Result<String> {
918    let repo = git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
919
920    // Parse the starting OID
921    let oid = git2::Oid::from_str(start_oid).map_err(|_| {
922        io::Error::new(
923            io::ErrorKind::InvalidInput,
924            format!("Invalid commit OID: {start_oid}"),
925        )
926    })?;
927
928    // Find the starting commit
929    let start_commit = repo.find_commit(oid).map_err(|e| git2_to_io_error(&e))?;
930    let start_tree = start_commit.tree().map_err(|e| git2_to_io_error(&e))?;
931
932    // Diff between start commit and current working tree, including staged + unstaged
933    // changes and untracked files.
934    let mut diff_opts = git2::DiffOptions::new();
935    diff_opts.include_untracked(true);
936    diff_opts.recurse_untracked_dirs(true);
937
938    let diff = repo
939        .diff_tree_to_workdir_with_index(Some(&start_tree), Some(&mut diff_opts))
940        .map_err(|e| git2_to_io_error(&e))?;
941
942    // Generate diff text
943    let mut result = Vec::new();
944    diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
945        result.extend_from_slice(line.content());
946        true
947    })
948    .map_err(|e| git2_to_io_error(&e))?;
949
950    Ok(String::from_utf8_lossy(&result).to_string())
951}
952
953fn git_diff_from_empty_tree(repo: &git2::Repository) -> io::Result<String> {
954    let mut diff_opts = git2::DiffOptions::new();
955    diff_opts.include_untracked(true);
956    diff_opts.recurse_untracked_dirs(true);
957
958    let diff = repo
959        .diff_tree_to_workdir_with_index(None, Some(&mut diff_opts))
960        .map_err(|e| git2_to_io_error(&e))?;
961
962    let mut result = Vec::new();
963    diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
964        result.extend_from_slice(line.content());
965        true
966    })
967    .map_err(|e| git2_to_io_error(&e))?;
968
969    Ok(String::from_utf8_lossy(&result).to_string())
970}
971
972/// Get the git diff from the starting commit.
973///
974/// Uses the saved starting commit from `.agent/start_commit` to generate
975/// an incremental diff. Falls back to diffing from HEAD if no start commit
976/// file exists.
977///
978/// # Returns
979///
980/// Returns a formatted diff string, or an error if:
981/// - The diff cannot be generated
982/// - The starting commit file exists but is invalid
983pub fn get_git_diff_from_start() -> io::Result<String> {
984    use crate::git_helpers::start_commit::{load_start_point, save_start_commit, StartPoint};
985
986    // Ensure a valid starting point exists. This is expected to persist across runs,
987    // but we also repair missing/corrupt files opportunistically for robustness.
988    save_start_commit()?;
989
990    let repo = git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
991
992    match load_start_point()? {
993        StartPoint::Commit(oid) => git_diff_from(&oid.to_string()),
994        StartPoint::EmptyRepo => git_diff_from_empty_tree(&repo),
995    }
996}
997
998/// Result of commit operation with fallback.
999///
1000/// This is the fallback-aware version of `CommitResult`.
1001#[derive(Debug, Clone, PartialEq, Eq)]
1002pub enum CommitResultFallback {
1003    /// A commit was successfully created with the given OID.
1004    Success(git2::Oid),
1005    /// No commit was created because there were no meaningful changes.
1006    NoChanges,
1007    /// The commit operation failed with an error message.
1008    Failed(String),
1009}
1010
1011#[cfg(test)]
1012mod tests {
1013    use super::*;
1014
1015    #[test]
1016    fn test_git_diff_returns_string() {
1017        // This test verifies the function exists and returns a Result
1018        // The actual content depends on the git state
1019        let result = git_diff();
1020        assert!(result.is_ok() || result.is_err());
1021    }
1022
1023    #[test]
1024    fn test_require_git_repo() {
1025        // This test verifies we can detect a git repository
1026        let result = require_git_repo();
1027        // Should succeed if we're in a git repo, fail otherwise
1028        // We don't assert either way since the test environment varies
1029        let _ = result;
1030    }
1031
1032    #[test]
1033    fn test_get_repo_root() {
1034        // This test verifies we can get the repo root
1035        let result = get_repo_root();
1036        // Only validate if we're in a git repo
1037        if let Ok(path) = result {
1038            // The path should exist and be a directory
1039            assert!(path.exists());
1040            assert!(path.is_dir());
1041            // Should contain a .git directory or be inside one
1042            let git_dir = path.join(".git");
1043            assert!(git_dir.exists() || path.ancestors().any(|p| p.join(".git").exists()));
1044        }
1045    }
1046
1047    #[test]
1048    fn test_git_diff_from_returns_result() {
1049        // Test that git_diff_from returns a Result
1050        // We use an invalid OID to test error handling
1051        let result = git_diff_from("invalid_oid_that_does_not_exist");
1052        assert!(result.is_err());
1053    }
1054
1055    #[test]
1056    fn test_git_snapshot_returns_result() {
1057        // Test that git_snapshot returns a Result
1058        let result = git_snapshot();
1059        assert!(result.is_ok() || result.is_err());
1060    }
1061
1062    #[test]
1063    fn test_git_add_all_returns_result() {
1064        // Test that git_add_all returns a Result
1065        let result = git_add_all();
1066        assert!(result.is_ok() || result.is_err());
1067    }
1068
1069    #[test]
1070    fn test_get_git_diff_from_start_returns_result() {
1071        // Test that get_git_diff_from_start returns a Result
1072        // It should fall back to git_diff() if no start commit file exists
1073        let result = get_git_diff_from_start();
1074        assert!(result.is_ok() || result.is_err());
1075    }
1076
1077    #[test]
1078    fn test_truncate_diff_for_review_full() {
1079        // Small diffs should not be truncated
1080        let diff = "diff --git a/file.rs b/file.rs\n+ new line\n- old line";
1081        let result = truncate_diff_for_review(diff.to_string(), 10_000, 5_000, 1_000);
1082        assert_eq!(result.truncation_level, DiffTruncationLevel::Full);
1083        // total_file_count is parsed for consistent metadata
1084        assert_eq!(result.total_file_count, 1);
1085        assert_eq!(result.shown_file_count, None);
1086    }
1087
1088    #[test]
1089    fn test_truncate_diff_for_review_abbreviated() {
1090        // Create a diff with multiple files that will exceed max_full_diff_size
1091        let mut diff = String::new();
1092        for i in 0..20 {
1093            diff.push_str(&format!("diff --git a/file{}.rs b/file{}.rs\n", i, i));
1094            diff.push_str("index abc123..def456 100644\n");
1095            diff.push_str(&format!("--- a/file{}.rs\n", i));
1096            diff.push_str(&format!("+++ b/file{}.rs\n", i));
1097            for j in 0..100 {
1098                diff.push_str(&format!("+line {} in file {}\n", j, i));
1099                diff.push_str(&format!("-line {} in file {}\n", j, i));
1100            }
1101        }
1102
1103        let result = truncate_diff_for_review(diff, 1_000, 5_000, 1_000);
1104        assert_eq!(result.truncation_level, DiffTruncationLevel::Abbreviated);
1105        assert!(result.shown_file_count.unwrap_or(0) < result.total_file_count);
1106        assert!(result.content.contains("TRUNCATED") || result.content.contains("truncated"));
1107    }
1108
1109    #[test]
1110    fn test_prioritize_file_path() {
1111        // Source files get highest priority
1112        assert!(prioritize_file_path("src/main.rs") > prioritize_file_path("tests/test.rs"));
1113        assert!(prioritize_file_path("src/lib.rs") > prioritize_file_path("README.md"));
1114
1115        // Tests get lower priority than src
1116        assert!(prioritize_file_path("src/main.rs") > prioritize_file_path("test/test.rs"));
1117
1118        // Config files get medium priority
1119        assert!(prioritize_file_path("Cargo.toml") > prioritize_file_path("docs/guide.md"));
1120
1121        // Docs get lowest priority
1122        assert!(prioritize_file_path("README.md") < prioritize_file_path("src/main.rs"));
1123    }
1124
1125    #[test]
1126    fn test_truncate_diff_keeps_high_priority_files() {
1127        let diff = "diff --git a/README.md b/README.md\n\
1128            +doc change\n\
1129            diff --git a/src/main.rs b/src/main.rs\n\
1130            +important change\n\
1131            diff --git a/tests/test.rs b/tests/test.rs\n\
1132            +test change\n";
1133
1134        // With a very small limit, should keep src/main.rs first due to priority
1135        let result = truncate_diff_for_review(diff.to_string(), 50, 100, 1_000);
1136
1137        // Should include the high priority src file
1138        assert!(result.content.contains("src/main.rs") || result.content.contains("file list"));
1139    }
1140
1141    #[test]
1142    fn test_diff_review_content_default_truncation_level() {
1143        // Test that DiffTruncationLevel::Full is the default
1144        assert_eq!(DiffTruncationLevel::default(), DiffTruncationLevel::Full);
1145    }
1146
1147    #[test]
1148    fn test_exploration_instruction_helper() {
1149        // Use the local helper function instead of importing from guided module
1150        // Test Full level - should return empty string
1151        let full_content = DiffReviewContent {
1152            content: "some diff".to_string(),
1153            truncation_level: DiffTruncationLevel::Full,
1154            total_file_count: 5,
1155            shown_file_count: None,
1156            baseline_oid: None,
1157            baseline_short: None,
1158            baseline_description: String::new(),
1159        };
1160        let instruction = build_exploration_instruction_for_test(&full_content);
1161        assert!(instruction.is_empty());
1162
1163        // Test Abbreviated level - should have instruction
1164        let abbreviated_content = DiffReviewContent {
1165            content: "truncated diff".to_string(),
1166            truncation_level: DiffTruncationLevel::Abbreviated,
1167            total_file_count: 10,
1168            shown_file_count: Some(3),
1169            baseline_oid: None,
1170            baseline_short: None,
1171            baseline_description: String::new(),
1172        };
1173        let instruction = build_exploration_instruction_for_test(&abbreviated_content);
1174        assert!(instruction.contains("ABBREVIATED"));
1175        assert!(instruction.contains("3/10"));
1176
1177        // Test FileList level - should have instruction
1178        let file_list_content = DiffReviewContent {
1179            content: "files list".to_string(),
1180            truncation_level: DiffTruncationLevel::FileList,
1181            total_file_count: 50,
1182            shown_file_count: Some(50),
1183            baseline_oid: None,
1184            baseline_short: None,
1185            baseline_description: String::new(),
1186        };
1187        let instruction = build_exploration_instruction_for_test(&file_list_content);
1188        assert!(instruction.contains("FILE LIST ONLY"));
1189        assert!(instruction.contains("50 files changed"));
1190
1191        // Test FileListAbbreviated level - should have instruction
1192        let abbreviated_list_content = DiffReviewContent {
1193            content: "abbreviated file list".to_string(),
1194            truncation_level: DiffTruncationLevel::FileListAbbreviated,
1195            total_file_count: 200,
1196            shown_file_count: Some(10),
1197            baseline_oid: None,
1198            baseline_short: None,
1199            baseline_description: String::new(),
1200        };
1201        let instruction = build_exploration_instruction_for_test(&abbreviated_list_content);
1202        assert!(instruction.contains("FILE LIST ABBREVIATED"));
1203        assert!(instruction.contains("10/200"));
1204    }
1205
1206    /// Helper function for testing exploration instruction generation.
1207    #[cfg(test)]
1208    fn build_exploration_instruction_for_test(diff_content: &DiffReviewContent) -> String {
1209        match diff_content.truncation_level {
1210            DiffTruncationLevel::Full => String::new(),
1211            DiffTruncationLevel::Abbreviated => format!(
1212                "[DIFF ABBREVIATED: {}/{} files shown. You MUST explore the full diff using 'git diff HEAD' to review properly.]",
1213                diff_content.shown_file_count.unwrap_or(0),
1214                diff_content.total_file_count
1215            ),
1216            DiffTruncationLevel::FileList => format!(
1217                "[FILE LIST ONLY: {} files changed. You MUST explore each file's diff using 'git diff HEAD -- <file>' to review properly.]",
1218                diff_content.total_file_count
1219            ),
1220            DiffTruncationLevel::FileListAbbreviated => format!(
1221                "[FILE LIST ABBREVIATED: {}/{} files shown. You MUST run 'git status' to find all files and explore their diffs.]",
1222                diff_content.shown_file_count.unwrap_or(0),
1223                diff_content.total_file_count
1224            ),
1225        }
1226    }
1227}