branchless/git/
diff.rs

1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::path::PathBuf;
4use std::sync::{Arc, Mutex};
5
6use eyre::Context;
7use itertools::Itertools;
8use scm_record::helpers::make_binary_description;
9use scm_record::{ChangeType, File, FileMode, Section, SectionChangedLine};
10
11use super::{MaybeZeroOid, Repo};
12
13/// A diff between two trees/commits.
14pub struct Diff<'repo> {
15    pub(super) inner: git2::Diff<'repo>,
16}
17
18#[derive(Clone, Debug, PartialEq, Eq, Hash)]
19struct GitHunk {
20    old_start: usize,
21    old_lines: usize,
22    new_start: usize,
23    new_lines: usize,
24}
25
26/// Calculate the diff between the index and the working copy.
27pub fn process_diff_for_record(repo: &Repo, diff: &Diff) -> eyre::Result<Vec<File<'static>>> {
28    let Diff { inner: diff } = diff;
29
30    #[derive(Clone, Debug)]
31    enum DeltaFileContent {
32        Hunks(Vec<GitHunk>),
33        Binary {
34            old_num_bytes: u64,
35            new_num_bytes: u64,
36        },
37    }
38
39    #[derive(Clone, Debug)]
40    struct Delta {
41        old_oid: git2::Oid,
42        old_file_mode: git2::FileMode,
43        new_oid: git2::Oid,
44        new_file_mode: git2::FileMode,
45        content: DeltaFileContent,
46    }
47    let deltas: Arc<Mutex<HashMap<PathBuf, Delta>>> = Default::default();
48    diff.foreach(
49        &mut |delta, _| {
50            let mut deltas = deltas.lock().unwrap();
51            let old_file = delta.old_file().path().unwrap().into();
52            let new_file = delta.new_file().path().unwrap().into();
53            let delta = Delta {
54                old_oid: delta.old_file().id(),
55                old_file_mode: delta.old_file().mode(),
56                new_oid: delta.new_file().id(),
57                new_file_mode: delta.new_file().mode(),
58                content: DeltaFileContent::Hunks(Default::default()),
59            };
60            deltas.insert(old_file, delta.clone());
61            deltas.insert(new_file, delta);
62            true
63        },
64        Some(&mut |delta, _| {
65            let mut deltas = deltas.lock().unwrap();
66
67            let old_file = delta.old_file().path().unwrap().into();
68            let new_file = delta.new_file().path().unwrap().into();
69            let delta = Delta {
70                old_oid: delta.old_file().id(),
71                old_file_mode: delta.old_file().mode(),
72                new_oid: delta.new_file().id(),
73                new_file_mode: delta.new_file().mode(),
74                content: DeltaFileContent::Binary {
75                    old_num_bytes: delta.old_file().size(),
76                    new_num_bytes: delta.new_file().size(),
77                },
78            };
79            deltas.insert(old_file, delta.clone());
80            deltas.insert(new_file, delta);
81            true
82        }),
83        Some(&mut |delta, hunk| {
84            let path = delta.new_file().path().unwrap();
85            let mut deltas = deltas.lock().unwrap();
86            match &mut deltas.get_mut(path).unwrap().content {
87                DeltaFileContent::Hunks(hunks) => {
88                    hunks.push(GitHunk {
89                        old_start: hunk.old_start().try_into().unwrap(),
90                        old_lines: hunk.old_lines().try_into().unwrap(),
91                        new_start: hunk.new_start().try_into().unwrap(),
92                        new_lines: hunk.new_lines().try_into().unwrap(),
93                    });
94                }
95                DeltaFileContent::Binary { .. } => {
96                    panic!("File {path:?} got a hunk callback, but it was a binary file")
97                }
98            }
99            true
100        }),
101        None,
102    )
103    .wrap_err("Iterating over diff deltas")?;
104
105    let deltas = std::mem::take(&mut *deltas.lock().unwrap());
106    let mut result = Vec::new();
107    for (path, delta) in deltas {
108        let Delta {
109            old_oid,
110            old_file_mode,
111            new_oid,
112            new_file_mode,
113            content,
114        } = delta;
115        let old_file_mode = u32::from(old_file_mode);
116        let old_file_mode = FileMode::try_from(old_file_mode).unwrap();
117        let new_file_mode = u32::from(new_file_mode);
118        let new_file_mode = FileMode::try_from(new_file_mode).unwrap();
119
120        if new_oid.is_zero() {
121            result.push(File {
122                old_path: None,
123                path: Cow::Owned(path),
124                file_mode: Some(old_file_mode),
125                sections: vec![Section::FileMode {
126                    is_checked: false,
127                    before: old_file_mode,
128                    after: FileMode::absent(),
129                }],
130            });
131            continue;
132        }
133
134        let hunks = match content {
135            DeltaFileContent::Binary {
136                old_num_bytes,
137                new_num_bytes,
138            } => {
139                result.push(File {
140                    old_path: None,
141                    path: Cow::Owned(path),
142                    file_mode: Some(old_file_mode),
143                    sections: vec![Section::Binary {
144                        is_checked: false,
145                        old_description: Some(Cow::Owned(make_binary_description(
146                            &old_oid.to_string(),
147                            old_num_bytes,
148                        ))),
149                        new_description: Some(Cow::Owned(make_binary_description(
150                            &new_oid.to_string(),
151                            new_num_bytes,
152                        ))),
153                    }],
154                });
155                continue;
156            }
157            DeltaFileContent::Hunks(mut hunks) => {
158                hunks.sort_by_key(|hunk| (hunk.old_start, hunk.old_lines));
159                hunks
160            }
161        };
162
163        enum BlobContents {
164            Absent,
165            Binary(u64),
166            Text(Vec<String>),
167        }
168        let get_lines_from_blob = |oid| -> eyre::Result<BlobContents> {
169            let oid = MaybeZeroOid::from(oid);
170            match oid {
171                MaybeZeroOid::Zero => Ok(BlobContents::Absent),
172                MaybeZeroOid::NonZero(oid) => {
173                    let blob = repo.find_blob_or_fail(oid)?;
174                    let num_bytes = blob.size();
175                    if blob.is_binary() {
176                        return Ok(BlobContents::Binary(num_bytes));
177                    }
178
179                    let contents = blob.get_content();
180                    let contents = match std::str::from_utf8(contents) {
181                        Ok(contents) => contents,
182                        Err(_) => {
183                            return Ok(BlobContents::Binary(num_bytes));
184                        }
185                    };
186
187                    let lines: Vec<String> = contents
188                        .split_inclusive('\n')
189                        .map(|line| line.to_owned())
190                        .collect();
191                    Ok(BlobContents::Text(lines))
192                }
193            }
194        };
195
196        // FIXME: should we rely on the caller to add the file contents to
197        // the ODB?
198        match repo.inner.blob_path(&path) {
199            Ok(_) => {}
200            Err(err) if err.code() == git2::ErrorCode::NotFound => {}
201            Err(err) => return Err(err.into()),
202        }
203        let before_lines = get_lines_from_blob(old_oid)?;
204        let after_lines = get_lines_from_blob(new_oid)?;
205
206        let mut unchanged_hunk_line_idx = 0;
207        let mut file_sections = Vec::new();
208        for hunk in hunks {
209            #[derive(Debug)]
210            enum Lines<'a> {
211                Lines(&'a [String]),
212                BinaryDescription(String),
213            }
214            let empty_lines: Vec<String> = Default::default();
215            let before_lines = match &before_lines {
216                BlobContents::Absent => Lines::Lines(&empty_lines),
217                BlobContents::Text(before_lines) => Lines::Lines(before_lines),
218                BlobContents::Binary(num_bytes) => Lines::BinaryDescription(
219                    make_binary_description(&old_oid.to_string(), *num_bytes),
220                ),
221            };
222            let after_lines = match &after_lines {
223                BlobContents::Absent => Lines::Lines(Default::default()),
224                BlobContents::Text(after_lines) => Lines::Lines(after_lines),
225                BlobContents::Binary(num_bytes) => Lines::BinaryDescription(
226                    make_binary_description(&new_oid.to_string(), *num_bytes),
227                ),
228            };
229
230            let (before_lines, after_lines) = match (before_lines, after_lines) {
231                (Lines::Lines(before_lines), Lines::Lines(after_lines)) => {
232                    (before_lines, after_lines)
233                }
234                (Lines::BinaryDescription(_), Lines::Lines(after_lines)) => {
235                    (Default::default(), after_lines)
236                }
237                (Lines::Lines(_), Lines::BinaryDescription(new_description)) => {
238                    file_sections.push(Section::Binary {
239                        is_checked: false,
240                        old_description: None,
241                        new_description: Some(Cow::Owned(new_description)),
242                    });
243                    continue;
244                }
245                (
246                    Lines::BinaryDescription(old_description),
247                    Lines::BinaryDescription(new_description),
248                ) => {
249                    file_sections.push(Section::Binary {
250                        is_checked: false,
251                        old_description: Some(Cow::Owned(old_description)),
252                        new_description: Some(Cow::Owned(new_description)),
253                    });
254                    continue;
255                }
256            };
257
258            let GitHunk {
259                old_start,
260                old_lines,
261                new_start,
262                new_lines,
263            } = hunk;
264
265            // The line numbers are one-indexed.
266            let (old_start, old_is_empty) = if old_start == 0 && old_lines == 0 {
267                (0, true)
268            } else {
269                assert!(old_start > 0);
270                (old_start - 1, false)
271            };
272            let new_start = if new_start == 0 && new_lines == 0 {
273                0
274            } else {
275                assert!(new_start > 0);
276                new_start - 1
277            };
278
279            // If we're starting a new hunk, first paste in any unchanged
280            // lines since the last hunk (from the old version of the file).
281            if unchanged_hunk_line_idx <= old_start {
282                let end = if old_lines == 0 && !old_is_empty {
283                    // Insertions are indicated with `old_lines == 0`, but in
284                    // those cases, the inserted line is *after* the provided
285                    // line number.
286                    old_start + 1
287                } else {
288                    old_start
289                };
290                file_sections.push(Section::Unchanged {
291                    lines: before_lines[unchanged_hunk_line_idx..end]
292                        .iter()
293                        .cloned()
294                        .map(Cow::Owned)
295                        .collect_vec(),
296                });
297                unchanged_hunk_line_idx = end + old_lines;
298            }
299
300            let before_idx_start = old_start;
301            let before_idx_end = before_idx_start + old_lines;
302            assert!(
303            before_idx_end <= before_lines.len(),
304                "before_idx_end {end} was not in range [0, {len}): {hunk:?}, path: {path:?}; lines {start}-... are: {lines:?}",
305                start = before_idx_start,
306                end = before_idx_end,
307                len = before_lines.len(),
308                hunk = hunk,
309                path = path,
310                lines = &before_lines[before_idx_start..],
311            );
312            let before_section_lines = before_lines[before_idx_start..before_idx_end]
313                .iter()
314                .cloned()
315                .map(|before_line| SectionChangedLine {
316                    is_checked: false,
317                    change_type: ChangeType::Removed,
318                    line: Cow::Owned(before_line),
319                })
320                .collect_vec();
321
322            let after_idx_start = new_start;
323            let after_idx_end = after_idx_start + new_lines;
324            assert!(
325                after_idx_end <= after_lines.len(),
326                "after_idx_end {end} was not in range [0, {len}): {hunk:?}, path: {path:?}; lines {start}-... are: {lines:?}",
327                start = after_idx_start,
328                end = after_idx_end,
329                len = after_lines.len(),
330                hunk = hunk,
331                path = path,
332                lines  = &after_lines[after_idx_start..],
333            );
334            let after_section_lines = after_lines[after_idx_start..after_idx_end]
335                .iter()
336                .cloned()
337                .map(|after_line| SectionChangedLine {
338                    is_checked: false,
339                    change_type: ChangeType::Added,
340                    line: Cow::Owned(after_line),
341                })
342                .collect_vec();
343
344            if !(before_section_lines.is_empty() && after_section_lines.is_empty()) {
345                file_sections.push(Section::Changed {
346                    lines: before_section_lines
347                        .into_iter()
348                        .chain(after_section_lines)
349                        .collect(),
350                });
351            }
352        }
353
354        if let BlobContents::Text(before_lines) = before_lines {
355            if unchanged_hunk_line_idx < before_lines.len() {
356                file_sections.push(Section::Unchanged {
357                    lines: before_lines[unchanged_hunk_line_idx..]
358                        .iter()
359                        .cloned()
360                        .map(Cow::Owned)
361                        .collect(),
362                });
363            }
364        }
365
366        let file_mode_section = if old_file_mode != new_file_mode {
367            vec![Section::FileMode {
368                is_checked: false,
369                before: old_file_mode,
370                after: new_file_mode,
371            }]
372        } else {
373            vec![]
374        };
375        result.push(File {
376            old_path: None,
377            path: Cow::Owned(path),
378            file_mode: Some(old_file_mode),
379            sections: [file_mode_section, file_sections].concat().to_vec(),
380        });
381    }
382
383    result.sort_by_cached_key(|file| file.path.clone().into_owned());
384    Ok(result)
385}