1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::path::PathBuf;
4use std::sync::{Arc, Mutex};
5
6use eyre::{Context, OptionExt};
7use itertools::Itertools;
8use scm_record::helpers::make_binary_description;
9use scm_record::{ChangeType, File, FileMode, Section, SectionChangedLine};
10
11use super::{MaybeZeroOid, Repo};
12
13pub struct Diff<'repo> {
15 pub(super) inner: git2::Diff<'repo>,
16}
17
18impl Diff<'_> {
19 pub fn short_stats(&self) -> eyre::Result<String> {
21 let stats = self.inner.stats()?;
22 let buf = stats.to_buf(git2::DiffStatsFormat::SHORT, usize::MAX)?;
23 buf.as_str()
24 .ok_or_eyre("converting buf to str")
25 .map(|s| s.trim().to_string())
26 }
27}
28
29#[derive(Clone, Debug, PartialEq, Eq, Hash)]
30struct GitHunk {
31 old_start: usize,
32 old_lines: usize,
33 new_start: usize,
34 new_lines: usize,
35}
36
37pub fn summarize_diff_for_temporary_commit(diff: &Diff) -> eyre::Result<String> {
39 let stats = diff.inner.stats()?;
44 let filename_or_count = if stats.files_changed() == 1 {
45 let mut filename = None;
46
47 let _ = diff.inner.foreach(
50 &mut |delta: git2::DiffDelta, _| {
51 let relevant_path = delta
52 .old_file()
53 .path()
54 .or(delta.new_file().path())
55 .unwrap_or_else(|| unreachable!("diff should have contained at least 1 file"));
56 filename = Some(format!("{}", relevant_path.display()));
57 false
58 },
59 None,
60 None,
61 None,
62 );
63
64 filename.unwrap_or_else(|| unreachable!("file name should have been initialized"))
65 } else {
66 format!("{} files", stats.files_changed())
67 };
68
69 let ins_del = match (stats.insertions(), stats.deletions()) {
70 (0, 0) => unreachable!("empty diff"),
71 (i, 0) => format!("+{i}"),
72 (0, d) => format!("-{d}"),
73 (i, d) => format!("+{i}/-{d}"),
74 };
75
76 Ok(format!("{filename_or_count} ({ins_del})"))
77}
78
79pub fn process_diff_for_record(repo: &Repo, diff: &Diff) -> eyre::Result<Vec<File<'static>>> {
81 let Diff { inner: diff } = diff;
82
83 #[derive(Clone, Debug)]
84 enum DeltaFileContent {
85 Hunks(Vec<GitHunk>),
86 Binary {
87 old_num_bytes: u64,
88 new_num_bytes: u64,
89 },
90 }
91
92 #[derive(Clone, Debug)]
93 struct Delta {
94 old_oid: git2::Oid,
95 old_file_mode: git2::FileMode,
96 new_oid: git2::Oid,
97 new_file_mode: git2::FileMode,
98 content: DeltaFileContent,
99 }
100 let deltas: Arc<Mutex<HashMap<PathBuf, Delta>>> = Default::default();
101 diff.foreach(
102 &mut |delta, _| {
103 let mut deltas = deltas.lock().unwrap();
104 let old_file = delta.old_file().path().unwrap().into();
105 let new_file = delta.new_file().path().unwrap().into();
106 let delta = Delta {
107 old_oid: delta.old_file().id(),
108 old_file_mode: delta.old_file().mode(),
109 new_oid: delta.new_file().id(),
110 new_file_mode: delta.new_file().mode(),
111 content: DeltaFileContent::Hunks(Default::default()),
112 };
113 deltas.insert(old_file, delta.clone());
114 deltas.insert(new_file, delta);
115 true
116 },
117 Some(&mut |delta, _| {
118 let mut deltas = deltas.lock().unwrap();
119
120 let old_file = delta.old_file().path().unwrap().into();
121 let new_file = delta.new_file().path().unwrap().into();
122 let delta = Delta {
123 old_oid: delta.old_file().id(),
124 old_file_mode: delta.old_file().mode(),
125 new_oid: delta.new_file().id(),
126 new_file_mode: delta.new_file().mode(),
127 content: DeltaFileContent::Binary {
128 old_num_bytes: delta.old_file().size(),
129 new_num_bytes: delta.new_file().size(),
130 },
131 };
132 deltas.insert(old_file, delta.clone());
133 deltas.insert(new_file, delta);
134 true
135 }),
136 Some(&mut |delta, hunk| {
137 let path = delta.new_file().path().unwrap();
138 let mut deltas = deltas.lock().unwrap();
139 match &mut deltas.get_mut(path).unwrap().content {
140 DeltaFileContent::Hunks(hunks) => {
141 hunks.push(GitHunk {
142 old_start: hunk.old_start().try_into().unwrap(),
143 old_lines: hunk.old_lines().try_into().unwrap(),
144 new_start: hunk.new_start().try_into().unwrap(),
145 new_lines: hunk.new_lines().try_into().unwrap(),
146 });
147 }
148 DeltaFileContent::Binary { .. } => {
149 panic!("File {path:?} got a hunk callback, but it was a binary file")
150 }
151 }
152 true
153 }),
154 None,
155 )
156 .wrap_err("Iterating over diff deltas")?;
157
158 let deltas = std::mem::take(&mut *deltas.lock().unwrap());
159 let mut result = Vec::new();
160 for (path, delta) in deltas {
161 let Delta {
162 old_oid,
163 old_file_mode,
164 new_oid,
165 new_file_mode,
166 content,
167 } = delta;
168 let old_file_mode = u32::from(old_file_mode);
169 let old_file_mode = FileMode::try_from(old_file_mode).unwrap();
170 let new_file_mode = u32::from(new_file_mode);
171 let new_file_mode = FileMode::try_from(new_file_mode).unwrap();
172
173 if new_oid.is_zero() {
174 result.push(File {
175 old_path: None,
176 path: Cow::Owned(path),
177 file_mode: old_file_mode,
178 sections: vec![Section::FileMode {
179 is_checked: false,
180 mode: FileMode::Absent,
181 }],
182 });
183 continue;
184 }
185
186 let hunks = match content {
187 DeltaFileContent::Binary {
188 old_num_bytes,
189 new_num_bytes,
190 } => {
191 result.push(File {
192 old_path: None,
193 path: Cow::Owned(path),
194 file_mode: old_file_mode,
195 sections: vec![Section::Binary {
196 is_checked: false,
197 old_description: Some(Cow::Owned(make_binary_description(
198 &old_oid.to_string(),
199 old_num_bytes,
200 ))),
201 new_description: Some(Cow::Owned(make_binary_description(
202 &new_oid.to_string(),
203 new_num_bytes,
204 ))),
205 }],
206 });
207 continue;
208 }
209 DeltaFileContent::Hunks(mut hunks) => {
210 hunks.sort_by_key(|hunk| (hunk.old_start, hunk.old_lines));
211 hunks
212 }
213 };
214
215 enum BlobContents {
216 Absent,
217 Binary(u64),
218 Text(Vec<String>),
219 }
220 let get_lines_from_blob = |oid| -> eyre::Result<BlobContents> {
221 let oid = MaybeZeroOid::from(oid);
222 match oid {
223 MaybeZeroOid::Zero => Ok(BlobContents::Absent),
224 MaybeZeroOid::NonZero(oid) => {
225 let blob = repo.find_blob_or_fail(oid)?;
226 let num_bytes = blob.size();
227 if blob.is_binary() {
228 return Ok(BlobContents::Binary(num_bytes));
229 }
230
231 let contents = blob.get_content();
232 let contents = match std::str::from_utf8(contents) {
233 Ok(contents) => contents,
234 Err(_) => {
235 return Ok(BlobContents::Binary(num_bytes));
236 }
237 };
238
239 let lines: Vec<String> = contents
240 .split_inclusive('\n')
241 .map(|line| line.to_owned())
242 .collect();
243 Ok(BlobContents::Text(lines))
244 }
245 }
246 };
247
248 match repo.inner.blob_path(&path) {
251 Ok(_) => {}
252 Err(err) if err.code() == git2::ErrorCode::NotFound => {}
253 Err(err) => return Err(err.into()),
254 }
255 let before_lines = get_lines_from_blob(old_oid)?;
256 let after_lines = get_lines_from_blob(new_oid)?;
257
258 let mut unchanged_hunk_line_idx = 0;
259 let mut file_sections = Vec::new();
260 for hunk in hunks {
261 #[derive(Debug)]
262 enum Lines<'a> {
263 Lines(&'a [String]),
264 BinaryDescription(String),
265 }
266 let empty_lines: Vec<String> = Default::default();
267 let before_lines = match &before_lines {
268 BlobContents::Absent => Lines::Lines(&empty_lines),
269 BlobContents::Text(before_lines) => Lines::Lines(before_lines),
270 BlobContents::Binary(num_bytes) => Lines::BinaryDescription(
271 make_binary_description(&old_oid.to_string(), *num_bytes),
272 ),
273 };
274 let after_lines = match &after_lines {
275 BlobContents::Absent => Lines::Lines(Default::default()),
276 BlobContents::Text(after_lines) => Lines::Lines(after_lines),
277 BlobContents::Binary(num_bytes) => Lines::BinaryDescription(
278 make_binary_description(&new_oid.to_string(), *num_bytes),
279 ),
280 };
281
282 let (before_lines, after_lines) = match (before_lines, after_lines) {
283 (Lines::Lines(before_lines), Lines::Lines(after_lines)) => {
284 (before_lines, after_lines)
285 }
286 (Lines::BinaryDescription(_), Lines::Lines(after_lines)) => {
287 (Default::default(), after_lines)
288 }
289 (Lines::Lines(_), Lines::BinaryDescription(new_description)) => {
290 file_sections.push(Section::Binary {
291 is_checked: false,
292 old_description: None,
293 new_description: Some(Cow::Owned(new_description)),
294 });
295 continue;
296 }
297 (
298 Lines::BinaryDescription(old_description),
299 Lines::BinaryDescription(new_description),
300 ) => {
301 file_sections.push(Section::Binary {
302 is_checked: false,
303 old_description: Some(Cow::Owned(old_description)),
304 new_description: Some(Cow::Owned(new_description)),
305 });
306 continue;
307 }
308 };
309
310 let GitHunk {
311 old_start,
312 old_lines,
313 new_start,
314 new_lines,
315 } = hunk;
316
317 let (old_start, old_is_empty) = if old_start == 0 && old_lines == 0 {
319 (0, true)
320 } else {
321 assert!(old_start > 0);
322 (old_start - 1, false)
323 };
324 let new_start = if new_start == 0 && new_lines == 0 {
325 0
326 } else {
327 assert!(new_start > 0);
328 new_start - 1
329 };
330
331 if unchanged_hunk_line_idx <= old_start {
334 let end = if old_lines == 0 && !old_is_empty {
335 old_start + 1
339 } else {
340 old_start
341 };
342 file_sections.push(Section::Unchanged {
343 lines: before_lines[unchanged_hunk_line_idx..end]
344 .iter()
345 .cloned()
346 .map(Cow::Owned)
347 .collect_vec(),
348 });
349 unchanged_hunk_line_idx = end + old_lines;
350 }
351
352 let before_idx_start = old_start;
353 let before_idx_end = before_idx_start + old_lines;
354 assert!(
355 before_idx_end <= before_lines.len(),
356 "before_idx_end {end} was not in range [0, {len}): {hunk:?}, path: {path:?}; lines {start}-... are: {lines:?}",
357 start = before_idx_start,
358 end = before_idx_end,
359 len = before_lines.len(),
360 hunk = hunk,
361 path = path,
362 lines = &before_lines[before_idx_start..],
363 );
364 let before_section_lines = before_lines[before_idx_start..before_idx_end]
365 .iter()
366 .cloned()
367 .map(|before_line| SectionChangedLine {
368 is_checked: false,
369 change_type: ChangeType::Removed,
370 line: Cow::Owned(before_line),
371 })
372 .collect_vec();
373
374 let after_idx_start = new_start;
375 let after_idx_end = after_idx_start + new_lines;
376 assert!(
377 after_idx_end <= after_lines.len(),
378 "after_idx_end {end} was not in range [0, {len}): {hunk:?}, path: {path:?}; lines {start}-... are: {lines:?}",
379 start = after_idx_start,
380 end = after_idx_end,
381 len = after_lines.len(),
382 hunk = hunk,
383 path = path,
384 lines = &after_lines[after_idx_start..],
385 );
386 let after_section_lines = after_lines[after_idx_start..after_idx_end]
387 .iter()
388 .cloned()
389 .map(|after_line| SectionChangedLine {
390 is_checked: false,
391 change_type: ChangeType::Added,
392 line: Cow::Owned(after_line),
393 })
394 .collect_vec();
395
396 if !(before_section_lines.is_empty() && after_section_lines.is_empty()) {
397 file_sections.push(Section::Changed {
398 lines: before_section_lines
399 .into_iter()
400 .chain(after_section_lines)
401 .collect(),
402 });
403 }
404 }
405
406 if let BlobContents::Text(before_lines) = before_lines {
407 if unchanged_hunk_line_idx < before_lines.len() {
408 file_sections.push(Section::Unchanged {
409 lines: before_lines[unchanged_hunk_line_idx..]
410 .iter()
411 .cloned()
412 .map(Cow::Owned)
413 .collect(),
414 });
415 }
416 }
417
418 let file_mode_section = if old_file_mode != new_file_mode {
419 vec![Section::FileMode {
420 is_checked: false,
421 mode: new_file_mode,
422 }]
423 } else {
424 vec![]
425 };
426 result.push(File {
427 old_path: None,
428 path: Cow::Owned(path),
429 file_mode: old_file_mode,
430 sections: [file_mode_section, file_sections].concat().to_vec(),
431 });
432 }
433
434 result.sort_by_cached_key(|file| file.path.clone().into_owned());
435 Ok(result)
436}