Skip to main content

keyhog_sources/git/
diff.rs

1//! Git diff source: scans only added/modified lines from `git diff`, ideal for
2//! CI/CD pre-commit hooks that should only flag new secrets.
3
4use keyhog_core::{Chunk, ChunkMetadata, Source, SourceError};
5use std::io::BufRead;
6use std::path::{Path, PathBuf};
7use std::process::Command;
8
9/// Scans only the ADDED lines between two git refs.
10/// Uses `git diff` unified diff output and extracts lines starting with '+'.
11/// Useful for CI/CD pre-commit hooks and PR checks.
12///
13/// # Examples
14///
15/// ```rust
16/// use keyhog_core::Source;
17/// use keyhog_sources::GitDiffSource;
18/// use std::path::PathBuf;
19///
20/// let source = GitDiffSource::new(PathBuf::from("."), "main").with_head_ref("HEAD");
21/// assert_eq!(source.name(), "git-diff");
22/// ```
23pub struct GitDiffSource {
24    repo_path: PathBuf,
25    base_ref: String,
26    head_ref: Option<String>,
27}
28
29impl GitDiffSource {
30    /// Create a new diff source comparing `base_ref` to HEAD.
31    ///
32    /// # Examples
33    ///
34    /// ```rust
35    /// use keyhog_core::Source;
36    /// use keyhog_sources::GitDiffSource;
37    /// use std::path::PathBuf;
38    ///
39    /// let source = GitDiffSource::new(PathBuf::from("."), "origin/main");
40    /// assert_eq!(source.name(), "git-diff");
41    /// ```
42    pub fn new(repo_path: PathBuf, base_ref: impl Into<String>) -> Self {
43        Self {
44            repo_path,
45            base_ref: base_ref.into(),
46            head_ref: None,
47        }
48    }
49
50    /// Set a specific head ref to compare against (defaults to HEAD).
51    ///
52    /// # Examples
53    ///
54    /// ```rust
55    /// use keyhog_core::Source;
56    /// use keyhog_sources::GitDiffSource;
57    /// use std::path::PathBuf;
58    ///
59    /// let source = GitDiffSource::new(PathBuf::from("."), "main").with_head_ref("feature");
60    /// assert_eq!(source.name(), "git-diff");
61    /// ```
62    pub fn with_head_ref(mut self, head_ref: impl Into<String>) -> Self {
63        self.head_ref = Some(head_ref.into());
64        self
65    }
66}
67
68impl Source for GitDiffSource {
69    fn name(&self) -> &str {
70        "git-diff"
71    }
72
73    fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_> {
74        match stream_added_lines(&self.repo_path, &self.base_ref, self.head_ref.as_deref()) {
75            Ok(iter) => Box::new(iter),
76            Err(e) => Box::new(std::iter::once(Err(e))),
77        }
78    }
79    fn as_any(&self) -> &dyn std::any::Any {
80        self
81    }
82}
83
84/// Stream only ADDED lines from git diff output.
85fn stream_added_lines(
86    repo_path: &Path,
87    base_ref: &str,
88    head_ref: Option<&str>,
89) -> Result<impl Iterator<Item = Result<Chunk, SourceError>>, SourceError> {
90    let base_ref = super::validate_ref_name(base_ref)?;
91    let head_ref = super::validate_ref_name(head_ref.unwrap_or("HEAD"))?;
92    let repo_root = super::canonical_repo_root(repo_path)?;
93    let repo_arg = super::validate_repo_path(&repo_root)?;
94
95    // Verify the refs exist first
96    super::verify_ref(&repo_arg, &base_ref)?;
97    super::verify_ref(&repo_arg, &head_ref)?;
98    let base_commit = super::get_commit_hash(&repo_arg, &base_ref)?;
99    let head_commit = super::get_commit_hash(&repo_arg, &head_ref)?;
100
101    // Run git diff to get unified diff output
102    let mut command = Command::new(super::git_bin()?);
103    command.args([
104        "-C",
105        &repo_arg,
106        "diff",
107        "-U0",
108        "--end-of-options",
109        &base_commit,
110        &head_commit,
111    ]);
112
113    command.stdout(std::process::Stdio::piped());
114    command.stderr(std::process::Stdio::piped());
115
116    let mut child = command.spawn().map_err(SourceError::Io)?;
117    let stdout = child
118        .stdout
119        .take()
120        .ok_or_else(|| SourceError::Io(std::io::Error::other("missing stdout")))?;
121    let mut reader = std::io::BufReader::new(stdout).lines();
122
123    // Get commit info for metadata
124    let author = super::get_commit_author(&repo_arg, &head_commit)?;
125    let date = super::get_commit_date(&repo_arg, &head_commit)?;
126
127    let mut current_path: Option<String> = None;
128    let mut current_content = String::new();
129    let mut in_hunk = false;
130    let mut done = false;
131
132    Ok(std::iter::from_fn(move || {
133        if done {
134            return None;
135        }
136
137        loop {
138            let line = match reader.next() {
139                Some(Ok(l)) => l,
140                Some(Err(e)) => {
141                    done = true;
142                    return Some(Err(SourceError::Io(e)));
143                }
144                None => {
145                    done = true;
146                    if let Some(ref path) = current_path {
147                        if !current_content.trim().is_empty() {
148                            return Some(Ok(Chunk {
149                                data: current_content.trim().to_string().into(),
150                                metadata: ChunkMetadata {
151                                    base_offset: 0,
152                                    source_type: "git-diff".into(),
153                                    path: Some(path.clone()),
154                                    commit: Some(head_commit.clone()),
155                                    author: Some(author.clone()),
156                                    date: Some(date.clone()),
157                                                                    mtime_ns: None,
158                                    size_bytes: None,
159},
160                            }));
161                        }
162                    }
163                    return None;
164                }
165            };
166
167            if line.starts_with("diff --git ") {
168                let prev_path = current_path.take();
169                let prev_content = std::mem::take(&mut current_content);
170
171                in_hunk = false;
172
173                if let Some(path) = prev_path {
174                    if !prev_content.trim().is_empty() {
175                        return Some(Ok(Chunk {
176                            data: prev_content.trim().to_string().into(),
177                            metadata: ChunkMetadata {
178                                base_offset: 0,
179                                source_type: "git-diff".into(),
180                                path: Some(path),
181                                commit: Some(head_commit.clone()),
182                                author: Some(author.clone()),
183                                date: Some(date.clone()),
184                                                            mtime_ns: None,
185                                size_bytes: None,
186},
187                        }));
188                    }
189                }
190                continue;
191            }
192
193            if line.starts_with("deleted file mode") {
194                current_path = None;
195                continue;
196            }
197
198            if line.starts_with("new file mode")
199                || line.starts_with("index ")
200                || line.starts_with("--- ")
201            {
202                continue;
203            }
204
205            if let Some(path_part) = line.strip_prefix("+++ b/") {
206                current_path = Some(path_part.trim().to_string());
207                continue;
208            }
209
210            if line.starts_with("@@") && line.contains("@@") {
211                in_hunk = true;
212                continue;
213            }
214
215            if in_hunk && line.starts_with('+') && !line.starts_with("+++") {
216                current_content.push_str(&line[1..]);
217                current_content.push('\n');
218            }
219
220            if current_content.len() > 10 * 1024 * 1024 {
221                if let Some(ref path) = current_path {
222                    if !current_content.trim().is_empty() {
223                        let chunk_content = current_content.trim().to_string();
224                        current_content = String::new();
225                        return Some(Ok(Chunk {
226                            data: chunk_content.into(),
227                            metadata: ChunkMetadata {
228                                base_offset: 0,
229                                source_type: "git-diff".into(),
230                                path: Some(path.clone()),
231                                commit: Some(head_commit.clone()),
232                                author: Some(author.clone()),
233                                date: Some(date.clone()),
234                                                            mtime_ns: None,
235                                size_bytes: None,
236},
237                        }));
238                    }
239                }
240            }
241        }
242    }))
243}