keyhog_sources/git/
diff.rs1use keyhog_core::{Chunk, ChunkMetadata, Source, SourceError};
5use std::io::BufRead;
6use std::path::{Path, PathBuf};
7use std::process::Command;
8
9pub struct GitDiffSource {
24 repo_path: PathBuf,
25 base_ref: String,
26 head_ref: Option<String>,
27}
28
29impl GitDiffSource {
30 pub fn new(repo_path: PathBuf, base_ref: impl Into<String>) -> Self {
43 Self {
44 repo_path,
45 base_ref: base_ref.into(),
46 head_ref: None,
47 }
48 }
49
50 pub fn with_head_ref(mut self, head_ref: impl Into<String>) -> Self {
63 self.head_ref = Some(head_ref.into());
64 self
65 }
66}
67
68impl Source for GitDiffSource {
69 fn name(&self) -> &str {
70 "git-diff"
71 }
72
73 fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_> {
74 match stream_added_lines(&self.repo_path, &self.base_ref, self.head_ref.as_deref()) {
75 Ok(iter) => Box::new(iter),
76 Err(e) => Box::new(std::iter::once(Err(e))),
77 }
78 }
79 fn as_any(&self) -> &dyn std::any::Any {
80 self
81 }
82}
83
84fn stream_added_lines(
86 repo_path: &Path,
87 base_ref: &str,
88 head_ref: Option<&str>,
89) -> Result<impl Iterator<Item = Result<Chunk, SourceError>>, SourceError> {
90 let base_ref = super::validate_ref_name(base_ref)?;
91 let head_ref = super::validate_ref_name(head_ref.unwrap_or("HEAD"))?;
92 let repo_root = super::canonical_repo_root(repo_path)?;
93 let repo_arg = super::validate_repo_path(&repo_root)?;
94
95 super::verify_ref(&repo_arg, &base_ref)?;
97 super::verify_ref(&repo_arg, &head_ref)?;
98 let base_commit = super::get_commit_hash(&repo_arg, &base_ref)?;
99 let head_commit = super::get_commit_hash(&repo_arg, &head_ref)?;
100
101 let mut command = Command::new(super::git_bin()?);
103 command.args([
104 "-C",
105 &repo_arg,
106 "diff",
107 "-U0",
108 "--end-of-options",
109 &base_commit,
110 &head_commit,
111 ]);
112
113 command.stdout(std::process::Stdio::piped());
114 command.stderr(std::process::Stdio::piped());
115
116 let mut child = command.spawn().map_err(SourceError::Io)?;
117 let stdout = child
118 .stdout
119 .take()
120 .ok_or_else(|| SourceError::Io(std::io::Error::other("missing stdout")))?;
121 let mut reader = std::io::BufReader::new(stdout).lines();
122
123 let author = super::get_commit_author(&repo_arg, &head_commit)?;
125 let date = super::get_commit_date(&repo_arg, &head_commit)?;
126
127 let mut current_path: Option<String> = None;
128 let mut current_content = String::new();
129 let mut in_hunk = false;
130 let mut done = false;
131
132 Ok(std::iter::from_fn(move || {
133 if done {
134 return None;
135 }
136
137 loop {
138 let line = match reader.next() {
139 Some(Ok(l)) => l,
140 Some(Err(e)) => {
141 done = true;
142 return Some(Err(SourceError::Io(e)));
143 }
144 None => {
145 done = true;
146 if let Some(ref path) = current_path {
147 if !current_content.trim().is_empty() {
148 return Some(Ok(Chunk {
149 data: current_content.trim().to_string().into(),
150 metadata: ChunkMetadata {
151 base_offset: 0,
152 source_type: "git-diff".into(),
153 path: Some(path.clone()),
154 commit: Some(head_commit.clone()),
155 author: Some(author.clone()),
156 date: Some(date.clone()),
157 mtime_ns: None,
158 size_bytes: None,
159},
160 }));
161 }
162 }
163 return None;
164 }
165 };
166
167 if line.starts_with("diff --git ") {
168 let prev_path = current_path.take();
169 let prev_content = std::mem::take(&mut current_content);
170
171 in_hunk = false;
172
173 if let Some(path) = prev_path {
174 if !prev_content.trim().is_empty() {
175 return Some(Ok(Chunk {
176 data: prev_content.trim().to_string().into(),
177 metadata: ChunkMetadata {
178 base_offset: 0,
179 source_type: "git-diff".into(),
180 path: Some(path),
181 commit: Some(head_commit.clone()),
182 author: Some(author.clone()),
183 date: Some(date.clone()),
184 mtime_ns: None,
185 size_bytes: None,
186},
187 }));
188 }
189 }
190 continue;
191 }
192
193 if line.starts_with("deleted file mode") {
194 current_path = None;
195 continue;
196 }
197
198 if line.starts_with("new file mode")
199 || line.starts_with("index ")
200 || line.starts_with("--- ")
201 {
202 continue;
203 }
204
205 if let Some(path_part) = line.strip_prefix("+++ b/") {
206 current_path = Some(path_part.trim().to_string());
207 continue;
208 }
209
210 if line.starts_with("@@") && line.contains("@@") {
211 in_hunk = true;
212 continue;
213 }
214
215 if in_hunk && line.starts_with('+') && !line.starts_with("+++") {
216 current_content.push_str(&line[1..]);
217 current_content.push('\n');
218 }
219
220 if current_content.len() > 10 * 1024 * 1024 {
221 if let Some(ref path) = current_path {
222 if !current_content.trim().is_empty() {
223 let chunk_content = current_content.trim().to_string();
224 current_content = String::new();
225 return Some(Ok(Chunk {
226 data: chunk_content.into(),
227 metadata: ChunkMetadata {
228 base_offset: 0,
229 source_type: "git-diff".into(),
230 path: Some(path.clone()),
231 commit: Some(head_commit.clone()),
232 author: Some(author.clone()),
233 date: Some(date.clone()),
234 mtime_ns: None,
235 size_bytes: None,
236},
237 }));
238 }
239 }
240 }
241 }
242 }))
243}