Skip to main content

branchless/git/
diff.rs

1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::path::PathBuf;
4use std::sync::{Arc, Mutex};
5
6use eyre::{Context, OptionExt};
7use itertools::Itertools;
8use scm_record::helpers::make_binary_description;
9use scm_record::{ChangeType, File, FileMode, Section, SectionChangedLine};
10
11use super::{MaybeZeroOid, Repo};
12
13/// A diff between two trees/commits.
14pub struct Diff<'repo> {
15    pub(super) inner: git2::Diff<'repo>,
16}
17
18impl Diff<'_> {
19    /// Summarize this diff into a single line "short" format.
20    pub fn short_stats(&self) -> eyre::Result<String> {
21        let stats = self.inner.stats()?;
22        let buf = stats.to_buf(git2::DiffStatsFormat::SHORT, usize::MAX)?;
23        buf.as_str()
24            .ok_or_eyre("converting buf to str")
25            .map(|s| s.trim().to_string())
26    }
27}
28
29#[derive(Clone, Debug, PartialEq, Eq, Hash)]
30struct GitHunk {
31    old_start: usize,
32    old_lines: usize,
33    new_start: usize,
34    new_lines: usize,
35}
36
37/// Summarize a diff for use as part of a temporary commit message.
38pub fn summarize_diff_for_temporary_commit(diff: &Diff) -> eyre::Result<String> {
39    // this returns something like `1 file changed, 1 deletion(-)`
40    // diff.short_stats()
41
42    // this builds something like `test2.txt (-1)` or `2 files (+1/-2)`
43    let stats = diff.inner.stats()?;
44    let filename_or_count = if stats.files_changed() == 1 {
45        let mut filename = None;
46
47        // returning false in the closure terminates iteration, but that also
48        // returns an Err, so catch and ignore it
49        let _ = diff.inner.foreach(
50            &mut |delta: git2::DiffDelta, _| {
51                let relevant_path = delta
52                    .old_file()
53                    .path()
54                    .or(delta.new_file().path())
55                    .unwrap_or_else(|| unreachable!("diff should have contained at least 1 file"));
56                filename = Some(format!("{}", relevant_path.display()));
57                false
58            },
59            None,
60            None,
61            None,
62        );
63
64        filename.unwrap_or_else(|| unreachable!("file name should have been initialized"))
65    } else {
66        format!("{} files", stats.files_changed())
67    };
68
69    let ins_del = match (stats.insertions(), stats.deletions()) {
70        (0, 0) => unreachable!("empty diff"),
71        (i, 0) => format!("+{i}"),
72        (0, d) => format!("-{d}"),
73        (i, d) => format!("+{i}/-{d}"),
74    };
75
76    Ok(format!("{filename_or_count} ({ins_del})"))
77}
78
79/// Calculate the diff between the index and the working copy.
80pub fn process_diff_for_record(repo: &Repo, diff: &Diff) -> eyre::Result<Vec<File<'static>>> {
81    let Diff { inner: diff } = diff;
82
83    #[derive(Clone, Debug)]
84    enum DeltaFileContent {
85        Hunks(Vec<GitHunk>),
86        Binary {
87            old_num_bytes: u64,
88            new_num_bytes: u64,
89        },
90    }
91
92    #[derive(Clone, Debug)]
93    struct Delta {
94        old_oid: git2::Oid,
95        old_file_mode: git2::FileMode,
96        new_oid: git2::Oid,
97        new_file_mode: git2::FileMode,
98        content: DeltaFileContent,
99    }
100    let deltas: Arc<Mutex<HashMap<PathBuf, Delta>>> = Default::default();
101    diff.foreach(
102        &mut |delta, _| {
103            let mut deltas = deltas.lock().unwrap();
104            let old_file = delta.old_file().path().unwrap().into();
105            let new_file = delta.new_file().path().unwrap().into();
106            let delta = Delta {
107                old_oid: delta.old_file().id(),
108                old_file_mode: delta.old_file().mode(),
109                new_oid: delta.new_file().id(),
110                new_file_mode: delta.new_file().mode(),
111                content: DeltaFileContent::Hunks(Default::default()),
112            };
113            deltas.insert(old_file, delta.clone());
114            deltas.insert(new_file, delta);
115            true
116        },
117        Some(&mut |delta, _| {
118            let mut deltas = deltas.lock().unwrap();
119
120            let old_file = delta.old_file().path().unwrap().into();
121            let new_file = delta.new_file().path().unwrap().into();
122            let delta = Delta {
123                old_oid: delta.old_file().id(),
124                old_file_mode: delta.old_file().mode(),
125                new_oid: delta.new_file().id(),
126                new_file_mode: delta.new_file().mode(),
127                content: DeltaFileContent::Binary {
128                    old_num_bytes: delta.old_file().size(),
129                    new_num_bytes: delta.new_file().size(),
130                },
131            };
132            deltas.insert(old_file, delta.clone());
133            deltas.insert(new_file, delta);
134            true
135        }),
136        Some(&mut |delta, hunk| {
137            let path = delta.new_file().path().unwrap();
138            let mut deltas = deltas.lock().unwrap();
139            match &mut deltas.get_mut(path).unwrap().content {
140                DeltaFileContent::Hunks(hunks) => {
141                    hunks.push(GitHunk {
142                        old_start: hunk.old_start().try_into().unwrap(),
143                        old_lines: hunk.old_lines().try_into().unwrap(),
144                        new_start: hunk.new_start().try_into().unwrap(),
145                        new_lines: hunk.new_lines().try_into().unwrap(),
146                    });
147                }
148                DeltaFileContent::Binary { .. } => {
149                    panic!("File {path:?} got a hunk callback, but it was a binary file")
150                }
151            }
152            true
153        }),
154        None,
155    )
156    .wrap_err("Iterating over diff deltas")?;
157
158    let deltas = std::mem::take(&mut *deltas.lock().unwrap());
159    let mut result = Vec::new();
160    for (path, delta) in deltas {
161        let Delta {
162            old_oid,
163            old_file_mode,
164            new_oid,
165            new_file_mode,
166            content,
167        } = delta;
168        let old_file_mode = u32::from(old_file_mode);
169        let old_file_mode = FileMode::try_from(old_file_mode).unwrap();
170        let new_file_mode = u32::from(new_file_mode);
171        let new_file_mode = FileMode::try_from(new_file_mode).unwrap();
172
173        if new_oid.is_zero() {
174            result.push(File {
175                old_path: None,
176                path: Cow::Owned(path),
177                file_mode: old_file_mode,
178                sections: vec![Section::FileMode {
179                    is_checked: false,
180                    mode: FileMode::Absent,
181                }],
182            });
183            continue;
184        }
185
186        let hunks = match content {
187            DeltaFileContent::Binary {
188                old_num_bytes,
189                new_num_bytes,
190            } => {
191                result.push(File {
192                    old_path: None,
193                    path: Cow::Owned(path),
194                    file_mode: old_file_mode,
195                    sections: vec![Section::Binary {
196                        is_checked: false,
197                        old_description: Some(Cow::Owned(make_binary_description(
198                            &old_oid.to_string(),
199                            old_num_bytes,
200                        ))),
201                        new_description: Some(Cow::Owned(make_binary_description(
202                            &new_oid.to_string(),
203                            new_num_bytes,
204                        ))),
205                    }],
206                });
207                continue;
208            }
209            DeltaFileContent::Hunks(mut hunks) => {
210                hunks.sort_by_key(|hunk| (hunk.old_start, hunk.old_lines));
211                hunks
212            }
213        };
214
215        enum BlobContents {
216            Absent,
217            Binary(u64),
218            Text(Vec<String>),
219        }
220        let get_lines_from_blob = |oid| -> eyre::Result<BlobContents> {
221            let oid = MaybeZeroOid::from(oid);
222            match oid {
223                MaybeZeroOid::Zero => Ok(BlobContents::Absent),
224                MaybeZeroOid::NonZero(oid) => {
225                    let blob = repo.find_blob_or_fail(oid)?;
226                    let num_bytes = blob.size();
227                    if blob.is_binary() {
228                        return Ok(BlobContents::Binary(num_bytes));
229                    }
230
231                    let contents = blob.get_content();
232                    let contents = match std::str::from_utf8(contents) {
233                        Ok(contents) => contents,
234                        Err(_) => {
235                            return Ok(BlobContents::Binary(num_bytes));
236                        }
237                    };
238
239                    let lines: Vec<String> = contents
240                        .split_inclusive('\n')
241                        .map(|line| line.to_owned())
242                        .collect();
243                    Ok(BlobContents::Text(lines))
244                }
245            }
246        };
247
248        // FIXME: should we rely on the caller to add the file contents to
249        // the ODB?
250        match repo.inner.blob_path(&path) {
251            Ok(_) => {}
252            Err(err) if err.code() == git2::ErrorCode::NotFound => {}
253            Err(err) => return Err(err.into()),
254        }
255        let before_lines = get_lines_from_blob(old_oid)?;
256        let after_lines = get_lines_from_blob(new_oid)?;
257
258        let mut unchanged_hunk_line_idx = 0;
259        let mut file_sections = Vec::new();
260        for hunk in hunks {
261            #[derive(Debug)]
262            enum Lines<'a> {
263                Lines(&'a [String]),
264                BinaryDescription(String),
265            }
266            let empty_lines: Vec<String> = Default::default();
267            let before_lines = match &before_lines {
268                BlobContents::Absent => Lines::Lines(&empty_lines),
269                BlobContents::Text(before_lines) => Lines::Lines(before_lines),
270                BlobContents::Binary(num_bytes) => Lines::BinaryDescription(
271                    make_binary_description(&old_oid.to_string(), *num_bytes),
272                ),
273            };
274            let after_lines = match &after_lines {
275                BlobContents::Absent => Lines::Lines(Default::default()),
276                BlobContents::Text(after_lines) => Lines::Lines(after_lines),
277                BlobContents::Binary(num_bytes) => Lines::BinaryDescription(
278                    make_binary_description(&new_oid.to_string(), *num_bytes),
279                ),
280            };
281
282            let (before_lines, after_lines) = match (before_lines, after_lines) {
283                (Lines::Lines(before_lines), Lines::Lines(after_lines)) => {
284                    (before_lines, after_lines)
285                }
286                (Lines::BinaryDescription(_), Lines::Lines(after_lines)) => {
287                    (Default::default(), after_lines)
288                }
289                (Lines::Lines(_), Lines::BinaryDescription(new_description)) => {
290                    file_sections.push(Section::Binary {
291                        is_checked: false,
292                        old_description: None,
293                        new_description: Some(Cow::Owned(new_description)),
294                    });
295                    continue;
296                }
297                (
298                    Lines::BinaryDescription(old_description),
299                    Lines::BinaryDescription(new_description),
300                ) => {
301                    file_sections.push(Section::Binary {
302                        is_checked: false,
303                        old_description: Some(Cow::Owned(old_description)),
304                        new_description: Some(Cow::Owned(new_description)),
305                    });
306                    continue;
307                }
308            };
309
310            let GitHunk {
311                old_start,
312                old_lines,
313                new_start,
314                new_lines,
315            } = hunk;
316
317            // The line numbers are one-indexed.
318            let (old_start, old_is_empty) = if old_start == 0 && old_lines == 0 {
319                (0, true)
320            } else {
321                assert!(old_start > 0);
322                (old_start - 1, false)
323            };
324            let new_start = if new_start == 0 && new_lines == 0 {
325                0
326            } else {
327                assert!(new_start > 0);
328                new_start - 1
329            };
330
331            // If we're starting a new hunk, first paste in any unchanged
332            // lines since the last hunk (from the old version of the file).
333            if unchanged_hunk_line_idx <= old_start {
334                let end = if old_lines == 0 && !old_is_empty {
335                    // Insertions are indicated with `old_lines == 0`, but in
336                    // those cases, the inserted line is *after* the provided
337                    // line number.
338                    old_start + 1
339                } else {
340                    old_start
341                };
342                file_sections.push(Section::Unchanged {
343                    lines: before_lines[unchanged_hunk_line_idx..end]
344                        .iter()
345                        .cloned()
346                        .map(Cow::Owned)
347                        .collect_vec(),
348                });
349                unchanged_hunk_line_idx = end + old_lines;
350            }
351
352            let before_idx_start = old_start;
353            let before_idx_end = before_idx_start + old_lines;
354            assert!(
355                before_idx_end <= before_lines.len(),
356                "before_idx_end {end} was not in range [0, {len}): {hunk:?}, path: {path:?}; lines {start}-... are: {lines:?}",
357                start = before_idx_start,
358                end = before_idx_end,
359                len = before_lines.len(),
360                hunk = hunk,
361                path = path,
362                lines = &before_lines[before_idx_start..],
363            );
364            let before_section_lines = before_lines[before_idx_start..before_idx_end]
365                .iter()
366                .cloned()
367                .map(|before_line| SectionChangedLine {
368                    is_checked: false,
369                    change_type: ChangeType::Removed,
370                    line: Cow::Owned(before_line),
371                })
372                .collect_vec();
373
374            let after_idx_start = new_start;
375            let after_idx_end = after_idx_start + new_lines;
376            assert!(
377                after_idx_end <= after_lines.len(),
378                "after_idx_end {end} was not in range [0, {len}): {hunk:?}, path: {path:?}; lines {start}-... are: {lines:?}",
379                start = after_idx_start,
380                end = after_idx_end,
381                len = after_lines.len(),
382                hunk = hunk,
383                path = path,
384                lines = &after_lines[after_idx_start..],
385            );
386            let after_section_lines = after_lines[after_idx_start..after_idx_end]
387                .iter()
388                .cloned()
389                .map(|after_line| SectionChangedLine {
390                    is_checked: false,
391                    change_type: ChangeType::Added,
392                    line: Cow::Owned(after_line),
393                })
394                .collect_vec();
395
396            if !(before_section_lines.is_empty() && after_section_lines.is_empty()) {
397                file_sections.push(Section::Changed {
398                    lines: before_section_lines
399                        .into_iter()
400                        .chain(after_section_lines)
401                        .collect(),
402                });
403            }
404        }
405
406        if let BlobContents::Text(before_lines) = before_lines {
407            if unchanged_hunk_line_idx < before_lines.len() {
408                file_sections.push(Section::Unchanged {
409                    lines: before_lines[unchanged_hunk_line_idx..]
410                        .iter()
411                        .cloned()
412                        .map(Cow::Owned)
413                        .collect(),
414                });
415            }
416        }
417
418        let file_mode_section = if old_file_mode != new_file_mode {
419            vec![Section::FileMode {
420                is_checked: false,
421                mode: new_file_mode,
422            }]
423        } else {
424            vec![]
425        };
426        result.push(File {
427            old_path: None,
428            path: Cow::Owned(path),
429            file_mode: old_file_mode,
430            sections: [file_mode_section, file_sections].concat().to_vec(),
431        });
432    }
433
434    result.sort_by_cached_key(|file| file.path.clone().into_owned());
435    Ok(result)
436}