context_creator/utils/
git.rs

1//! Git utilities for executing git commands and parsing output
2
3use anyhow::{anyhow, Result};
4use git2::{Repository, Sort};
5use std::path::{Path, PathBuf};
6use std::process::Command;
7use tracing::{debug, trace, warn};
8
9/// Statistics from a git diff operation
10#[derive(Debug, Clone, PartialEq)]
11pub struct DiffStats {
12    pub files_changed: usize,
13    pub insertions: usize,
14    pub deletions: usize,
15}
16
17/// Information about a single commit
18#[derive(Debug, Clone)]
19pub struct CommitInfo {
20    pub message: String,
21    pub author: String,
22}
23
24/// Git context for a file containing recent commit history
25#[derive(Debug, Clone)]
26pub struct GitContext {
27    pub recent_commits: Vec<CommitInfo>,
28}
29
30/// Validate that a git reference looks safe and reasonable
31fn validate_git_reference(git_ref: &str) -> Result<()> {
32    // Basic validation to prevent command injection
33    if git_ref.is_empty() {
34        return Err(anyhow!("Git reference cannot be empty"));
35    }
36
37    // Check for dangerous characters that could be used for command injection
38    let dangerous_chars = [';', '&', '|', '`', '$', '(', ')', '\n', '\r'];
39    for &ch in &dangerous_chars {
40        if git_ref.contains(ch) {
41            return Err(anyhow!("Invalid character in git reference: '{}'", ch));
42        }
43    }
44
45    // Additional length check to prevent extremely long inputs
46    if git_ref.len() > 256 {
47        return Err(anyhow!("Git reference too long"));
48    }
49
50    Ok(())
51}
52
53/// Sanitize error messages to prevent information disclosure
54fn sanitize_git_error(error_output: &str) -> String {
55    // Remove potentially sensitive paths and information
56    let sanitized = error_output
57        .lines()
58        .filter(|line| !line.contains("fatal:") || line.contains("unknown revision"))
59        .collect::<Vec<_>>()
60        .join("\n");
61
62    if sanitized.is_empty() {
63        "Invalid git reference".to_string()
64    } else {
65        format!("Git error: {sanitized}")
66    }
67}
68
69/// Validate that a file path is safe (no directory traversal)
70fn validate_file_path(path: &str) -> Result<PathBuf> {
71    if path.contains("..") || path.starts_with('/') {
72        return Err(anyhow!("Unsafe file path detected: {}", path));
73    }
74    Ok(PathBuf::from(path))
75}
76
77/// Check if a directory is a git repository
78pub fn is_git_repository<P: AsRef<Path>>(path: P) -> bool {
79    let git_dir = path.as_ref().join(".git");
80    git_dir.exists()
81}
82
83/// Get the list of files changed between two git references
84pub fn get_changed_files<P: AsRef<Path>>(
85    repo_path: P,
86    from: &str,
87    to: &str,
88) -> Result<Vec<PathBuf>> {
89    // Validate git references to prevent command injection
90    validate_git_reference(from)?;
91    validate_git_reference(to)?;
92
93    let output = Command::new("git")
94        .args(["diff", "--name-only", from, to])
95        .current_dir(repo_path.as_ref())
96        .output()
97        .map_err(|e| anyhow!("Failed to execute git command: {}", e))?;
98
99    if !output.status.success() {
100        let stderr = String::from_utf8_lossy(&output.stderr);
101        return Err(anyhow!("{}", sanitize_git_error(&stderr)));
102    }
103
104    let stdout = String::from_utf8_lossy(&output.stdout);
105    let mut files = Vec::new();
106
107    for line in stdout.lines() {
108        let line = line.trim();
109        if !line.is_empty() {
110            // Validate each file path to prevent path traversal
111            let safe_path = validate_file_path(line)?;
112            files.push(repo_path.as_ref().join(safe_path));
113        }
114    }
115
116    Ok(files)
117}
118
119/// Get diff statistics between two git references
120pub fn get_diff_stats<P: AsRef<Path>>(repo_path: P, from: &str, to: &str) -> Result<DiffStats> {
121    // Validate git references to prevent command injection
122    validate_git_reference(from)?;
123    validate_git_reference(to)?;
124
125    let output = Command::new("git")
126        .args(["diff", "--numstat", from, to])
127        .current_dir(repo_path.as_ref())
128        .output()
129        .map_err(|e| anyhow!("Failed to execute git command: {}", e))?;
130
131    if !output.status.success() {
132        let stderr = String::from_utf8_lossy(&output.stderr);
133        return Err(anyhow!("{}", sanitize_git_error(&stderr)));
134    }
135
136    let stdout = String::from_utf8_lossy(&output.stdout);
137    let mut stats = DiffStats {
138        files_changed: 0,
139        insertions: 0,
140        deletions: 0,
141    };
142
143    for line in stdout.lines() {
144        let line = line.trim();
145        if line.is_empty() {
146            continue;
147        }
148
149        let parts: Vec<&str> = line.split_whitespace().collect();
150        if parts.len() >= 2 {
151            stats.files_changed += 1;
152
153            // Parse insertions (first column)
154            if let Ok(insertions) = parts[0].parse::<usize>() {
155                stats.insertions += insertions;
156            }
157
158            // Parse deletions (second column)
159            if let Ok(deletions) = parts[1].parse::<usize>() {
160                stats.deletions += deletions;
161            }
162        }
163    }
164
165    Ok(stats)
166}
167
168/// Get the root directory of the git repository
169pub fn get_repository_root<P: AsRef<Path>>(path: P) -> Result<PathBuf> {
170    let output = Command::new("git")
171        .args(["rev-parse", "--show-toplevel"])
172        .current_dir(path.as_ref())
173        .output()
174        .map_err(|e| anyhow!("Failed to execute git command: {}", e))?;
175
176    if !output.status.success() {
177        let stderr = String::from_utf8_lossy(&output.stderr);
178        return Err(anyhow!("{}", sanitize_git_error(&stderr)));
179    }
180
181    let stdout = String::from_utf8_lossy(&output.stdout);
182    let root_path = stdout.trim();
183
184    Ok(PathBuf::from(root_path))
185}
186
187/// Get git context (recent commits) for a specific file
188pub fn get_file_git_context<P: AsRef<Path>>(repo_path: P, file_path: P) -> Option<GitContext> {
189    get_file_git_context_with_depth(repo_path, file_path, 3)
190}
191
192/// Get git context (recent commits) for a specific file with configurable depth
193pub fn get_file_git_context_with_depth<P: AsRef<Path>>(
194    repo_path: P,
195    file_path: P,
196    max_commits: usize,
197) -> Option<GitContext> {
198    let repo_path_str = repo_path.as_ref().display();
199    let file_path_str = file_path.as_ref().display();
200
201    trace!(
202        "Getting git context for file: {} in repo: {}",
203        file_path_str,
204        repo_path_str
205    );
206
207    // First, try to discover the actual repository root
208    let repo = match Repository::discover(repo_path.as_ref()) {
209        Ok(r) => {
210            debug!(
211                "Successfully discovered git repository at: {}",
212                repo_path_str
213            );
214            r
215        }
216        Err(e) => {
217            debug!(
218                "Failed to discover git repository at {}: {}",
219                repo_path_str, e
220            );
221            return None;
222        }
223    };
224
225    // Get the repository root path
226    let repo_root = match repo.workdir() {
227        Some(root) => {
228            trace!("Repository workdir: {}", root.display());
229            root
230        }
231        None => {
232            warn!("Repository has no working directory (bare repository)");
233            return None;
234        }
235    };
236
237    // Get the relative path from repo root
238    let file_canonical = match file_path.as_ref().canonicalize() {
239        Ok(path) => path,
240        Err(e) => {
241            debug!("Failed to canonicalize file path {}: {}", file_path_str, e);
242            return None;
243        }
244    };
245
246    let repo_canonical = match repo_root.canonicalize() {
247        Ok(path) => path,
248        Err(e) => {
249            warn!(
250                "Failed to canonicalize repository path {}: {}",
251                repo_root.display(),
252                e
253            );
254            return None;
255        }
256    };
257
258    let relative_path = match file_canonical.strip_prefix(repo_canonical) {
259        Ok(path) => {
260            trace!("Relative path in repository: {}", path.display());
261            path
262        }
263        Err(e) => {
264            debug!(
265                "File {} is not within repository {}: {}",
266                file_path_str,
267                repo_root.display(),
268                e
269            );
270            return None;
271        }
272    };
273
274    // Create a revwalk starting from HEAD
275    let mut revwalk = match repo.revwalk() {
276        Ok(walk) => {
277            trace!("Created revwalk for repository");
278            walk
279        }
280        Err(e) => {
281            warn!("Failed to create revwalk: {}", e);
282            return None;
283        }
284    };
285
286    // Configure sorting
287    if let Err(e) = revwalk.set_sorting(Sort::TIME) {
288        warn!("Failed to set revwalk sorting: {}", e);
289        return None;
290    }
291
292    if let Err(e) = revwalk.push_head() {
293        debug!(
294            "Failed to push HEAD to revwalk (repository may be empty): {}",
295            e
296        );
297        return None;
298    }
299
300    let mut commits = Vec::new();
301    let mut commits_processed = 0;
302
303    trace!(
304        "Walking through commits to find those affecting file: {}",
305        relative_path.display()
306    );
307
308    // Walk through commits
309    for oid_result in revwalk {
310        if commits.len() >= max_commits {
311            trace!("Reached maximum commit limit of {}", max_commits);
312            break;
313        }
314
315        let oid = match oid_result {
316            Ok(o) => o,
317            Err(e) => {
318                debug!("Failed to get commit OID: {}", e);
319                continue;
320            }
321        };
322
323        let commit = match repo.find_commit(oid) {
324            Ok(c) => c,
325            Err(e) => {
326                debug!("Failed to find commit {}: {}", oid, e);
327                continue;
328            }
329        };
330
331        commits_processed += 1;
332
333        // Check if this commit touches our file
334        let touches_file = if let Ok(parent) = commit.parent(0) {
335            let parent_tree = match parent.tree() {
336                Ok(tree) => tree,
337                Err(e) => {
338                    debug!("Failed to get parent tree: {}", e);
339                    continue;
340                }
341            };
342            let commit_tree = match commit.tree() {
343                Ok(tree) => tree,
344                Err(e) => {
345                    debug!("Failed to get commit tree: {}", e);
346                    continue;
347                }
348            };
349            let diff = match repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None) {
350                Ok(diff) => diff,
351                Err(e) => {
352                    debug!("Failed to create diff: {}", e);
353                    continue;
354                }
355            };
356
357            diff.deltas().any(|delta| {
358                delta.old_file().path() == Some(relative_path)
359                    || delta.new_file().path() == Some(relative_path)
360            })
361        } else {
362            // First commit - check if file exists
363            let tree = match commit.tree() {
364                Ok(tree) => tree,
365                Err(e) => {
366                    debug!("Failed to get tree for root commit: {}", e);
367                    continue;
368                }
369            };
370            tree.get_path(relative_path).is_ok()
371        };
372
373        if touches_file {
374            let message = commit
375                .message()
376                .unwrap_or("<no message>")
377                .lines()
378                .next()
379                .unwrap_or("<no message>")
380                .to_string();
381            let author = commit.author().name().unwrap_or("Unknown").to_string();
382
383            trace!("Found relevant commit: {} by {}", message, author);
384            commits.push(CommitInfo { message, author });
385        }
386    }
387
388    debug!(
389        "Processed {} commits, found {} relevant commits for file {}",
390        commits_processed,
391        commits.len(),
392        relative_path.display()
393    );
394
395    if commits.is_empty() {
396        debug!("No git history found for file: {}", file_path_str);
397        None
398    } else {
399        trace!("Returning git context with {} commits", commits.len());
400        Some(GitContext {
401            recent_commits: commits,
402        })
403    }
404}
405
406/// Format git context as markdown string
407pub fn format_git_context_to_markdown(git_context: &GitContext) -> String {
408    if git_context.recent_commits.is_empty() {
409        return String::new();
410    }
411
412    let mut output = String::new();
413    output.push('\n');
414    output.push_str("Git history:\n");
415
416    for (i, commit) in git_context.recent_commits.iter().enumerate().take(3) {
417        if i > 0 {
418            output.push('\n');
419        }
420        output.push_str(&format!(
421            "  - {} by {}",
422            commit.message.trim(),
423            commit.author
424        ));
425    }
426    output.push('\n');
427
428    output
429}