1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::path::PathBuf;
4use std::sync::{Arc, Mutex};
5
6use eyre::Context;
7use itertools::Itertools;
8use scm_record::helpers::make_binary_description;
9use scm_record::{ChangeType, File, FileMode, Section, SectionChangedLine};
10
11use super::{MaybeZeroOid, Repo};
12
13pub struct Diff<'repo> {
15 pub(super) inner: git2::Diff<'repo>,
16}
17
18#[derive(Clone, Debug, PartialEq, Eq, Hash)]
19struct GitHunk {
20 old_start: usize,
21 old_lines: usize,
22 new_start: usize,
23 new_lines: usize,
24}
25
26pub fn process_diff_for_record(repo: &Repo, diff: &Diff) -> eyre::Result<Vec<File<'static>>> {
28 let Diff { inner: diff } = diff;
29
30 #[derive(Clone, Debug)]
31 enum DeltaFileContent {
32 Hunks(Vec<GitHunk>),
33 Binary {
34 old_num_bytes: u64,
35 new_num_bytes: u64,
36 },
37 }
38
39 #[derive(Clone, Debug)]
40 struct Delta {
41 old_oid: git2::Oid,
42 old_file_mode: git2::FileMode,
43 new_oid: git2::Oid,
44 new_file_mode: git2::FileMode,
45 content: DeltaFileContent,
46 }
47 let deltas: Arc<Mutex<HashMap<PathBuf, Delta>>> = Default::default();
48 diff.foreach(
49 &mut |delta, _| {
50 let mut deltas = deltas.lock().unwrap();
51 let old_file = delta.old_file().path().unwrap().into();
52 let new_file = delta.new_file().path().unwrap().into();
53 let delta = Delta {
54 old_oid: delta.old_file().id(),
55 old_file_mode: delta.old_file().mode(),
56 new_oid: delta.new_file().id(),
57 new_file_mode: delta.new_file().mode(),
58 content: DeltaFileContent::Hunks(Default::default()),
59 };
60 deltas.insert(old_file, delta.clone());
61 deltas.insert(new_file, delta);
62 true
63 },
64 Some(&mut |delta, _| {
65 let mut deltas = deltas.lock().unwrap();
66
67 let old_file = delta.old_file().path().unwrap().into();
68 let new_file = delta.new_file().path().unwrap().into();
69 let delta = Delta {
70 old_oid: delta.old_file().id(),
71 old_file_mode: delta.old_file().mode(),
72 new_oid: delta.new_file().id(),
73 new_file_mode: delta.new_file().mode(),
74 content: DeltaFileContent::Binary {
75 old_num_bytes: delta.old_file().size(),
76 new_num_bytes: delta.new_file().size(),
77 },
78 };
79 deltas.insert(old_file, delta.clone());
80 deltas.insert(new_file, delta);
81 true
82 }),
83 Some(&mut |delta, hunk| {
84 let path = delta.new_file().path().unwrap();
85 let mut deltas = deltas.lock().unwrap();
86 match &mut deltas.get_mut(path).unwrap().content {
87 DeltaFileContent::Hunks(hunks) => {
88 hunks.push(GitHunk {
89 old_start: hunk.old_start().try_into().unwrap(),
90 old_lines: hunk.old_lines().try_into().unwrap(),
91 new_start: hunk.new_start().try_into().unwrap(),
92 new_lines: hunk.new_lines().try_into().unwrap(),
93 });
94 }
95 DeltaFileContent::Binary { .. } => {
96 panic!("File {path:?} got a hunk callback, but it was a binary file")
97 }
98 }
99 true
100 }),
101 None,
102 )
103 .wrap_err("Iterating over diff deltas")?;
104
105 let deltas = std::mem::take(&mut *deltas.lock().unwrap());
106 let mut result = Vec::new();
107 for (path, delta) in deltas {
108 let Delta {
109 old_oid,
110 old_file_mode,
111 new_oid,
112 new_file_mode,
113 content,
114 } = delta;
115 let old_file_mode = u32::from(old_file_mode);
116 let old_file_mode = FileMode::try_from(old_file_mode).unwrap();
117 let new_file_mode = u32::from(new_file_mode);
118 let new_file_mode = FileMode::try_from(new_file_mode).unwrap();
119
120 if new_oid.is_zero() {
121 result.push(File {
122 old_path: None,
123 path: Cow::Owned(path),
124 file_mode: Some(old_file_mode),
125 sections: vec![Section::FileMode {
126 is_checked: false,
127 before: old_file_mode,
128 after: FileMode::absent(),
129 }],
130 });
131 continue;
132 }
133
134 let hunks = match content {
135 DeltaFileContent::Binary {
136 old_num_bytes,
137 new_num_bytes,
138 } => {
139 result.push(File {
140 old_path: None,
141 path: Cow::Owned(path),
142 file_mode: Some(old_file_mode),
143 sections: vec![Section::Binary {
144 is_checked: false,
145 old_description: Some(Cow::Owned(make_binary_description(
146 &old_oid.to_string(),
147 old_num_bytes,
148 ))),
149 new_description: Some(Cow::Owned(make_binary_description(
150 &new_oid.to_string(),
151 new_num_bytes,
152 ))),
153 }],
154 });
155 continue;
156 }
157 DeltaFileContent::Hunks(mut hunks) => {
158 hunks.sort_by_key(|hunk| (hunk.old_start, hunk.old_lines));
159 hunks
160 }
161 };
162
163 enum BlobContents {
164 Absent,
165 Binary(u64),
166 Text(Vec<String>),
167 }
168 let get_lines_from_blob = |oid| -> eyre::Result<BlobContents> {
169 let oid = MaybeZeroOid::from(oid);
170 match oid {
171 MaybeZeroOid::Zero => Ok(BlobContents::Absent),
172 MaybeZeroOid::NonZero(oid) => {
173 let blob = repo.find_blob_or_fail(oid)?;
174 let num_bytes = blob.size();
175 if blob.is_binary() {
176 return Ok(BlobContents::Binary(num_bytes));
177 }
178
179 let contents = blob.get_content();
180 let contents = match std::str::from_utf8(contents) {
181 Ok(contents) => contents,
182 Err(_) => {
183 return Ok(BlobContents::Binary(num_bytes));
184 }
185 };
186
187 let lines: Vec<String> = contents
188 .split_inclusive('\n')
189 .map(|line| line.to_owned())
190 .collect();
191 Ok(BlobContents::Text(lines))
192 }
193 }
194 };
195
196 match repo.inner.blob_path(&path) {
199 Ok(_) => {}
200 Err(err) if err.code() == git2::ErrorCode::NotFound => {}
201 Err(err) => return Err(err.into()),
202 }
203 let before_lines = get_lines_from_blob(old_oid)?;
204 let after_lines = get_lines_from_blob(new_oid)?;
205
206 let mut unchanged_hunk_line_idx = 0;
207 let mut file_sections = Vec::new();
208 for hunk in hunks {
209 #[derive(Debug)]
210 enum Lines<'a> {
211 Lines(&'a [String]),
212 BinaryDescription(String),
213 }
214 let empty_lines: Vec<String> = Default::default();
215 let before_lines = match &before_lines {
216 BlobContents::Absent => Lines::Lines(&empty_lines),
217 BlobContents::Text(before_lines) => Lines::Lines(before_lines),
218 BlobContents::Binary(num_bytes) => Lines::BinaryDescription(
219 make_binary_description(&old_oid.to_string(), *num_bytes),
220 ),
221 };
222 let after_lines = match &after_lines {
223 BlobContents::Absent => Lines::Lines(Default::default()),
224 BlobContents::Text(after_lines) => Lines::Lines(after_lines),
225 BlobContents::Binary(num_bytes) => Lines::BinaryDescription(
226 make_binary_description(&new_oid.to_string(), *num_bytes),
227 ),
228 };
229
230 let (before_lines, after_lines) = match (before_lines, after_lines) {
231 (Lines::Lines(before_lines), Lines::Lines(after_lines)) => {
232 (before_lines, after_lines)
233 }
234 (Lines::BinaryDescription(_), Lines::Lines(after_lines)) => {
235 (Default::default(), after_lines)
236 }
237 (Lines::Lines(_), Lines::BinaryDescription(new_description)) => {
238 file_sections.push(Section::Binary {
239 is_checked: false,
240 old_description: None,
241 new_description: Some(Cow::Owned(new_description)),
242 });
243 continue;
244 }
245 (
246 Lines::BinaryDescription(old_description),
247 Lines::BinaryDescription(new_description),
248 ) => {
249 file_sections.push(Section::Binary {
250 is_checked: false,
251 old_description: Some(Cow::Owned(old_description)),
252 new_description: Some(Cow::Owned(new_description)),
253 });
254 continue;
255 }
256 };
257
258 let GitHunk {
259 old_start,
260 old_lines,
261 new_start,
262 new_lines,
263 } = hunk;
264
265 let (old_start, old_is_empty) = if old_start == 0 && old_lines == 0 {
267 (0, true)
268 } else {
269 assert!(old_start > 0);
270 (old_start - 1, false)
271 };
272 let new_start = if new_start == 0 && new_lines == 0 {
273 0
274 } else {
275 assert!(new_start > 0);
276 new_start - 1
277 };
278
279 if unchanged_hunk_line_idx <= old_start {
282 let end = if old_lines == 0 && !old_is_empty {
283 old_start + 1
287 } else {
288 old_start
289 };
290 file_sections.push(Section::Unchanged {
291 lines: before_lines[unchanged_hunk_line_idx..end]
292 .iter()
293 .cloned()
294 .map(Cow::Owned)
295 .collect_vec(),
296 });
297 unchanged_hunk_line_idx = end + old_lines;
298 }
299
300 let before_idx_start = old_start;
301 let before_idx_end = before_idx_start + old_lines;
302 assert!(
303 before_idx_end <= before_lines.len(),
304 "before_idx_end {end} was not in range [0, {len}): {hunk:?}, path: {path:?}; lines {start}-... are: {lines:?}",
305 start = before_idx_start,
306 end = before_idx_end,
307 len = before_lines.len(),
308 hunk = hunk,
309 path = path,
310 lines = &before_lines[before_idx_start..],
311 );
312 let before_section_lines = before_lines[before_idx_start..before_idx_end]
313 .iter()
314 .cloned()
315 .map(|before_line| SectionChangedLine {
316 is_checked: false,
317 change_type: ChangeType::Removed,
318 line: Cow::Owned(before_line),
319 })
320 .collect_vec();
321
322 let after_idx_start = new_start;
323 let after_idx_end = after_idx_start + new_lines;
324 assert!(
325 after_idx_end <= after_lines.len(),
326 "after_idx_end {end} was not in range [0, {len}): {hunk:?}, path: {path:?}; lines {start}-... are: {lines:?}",
327 start = after_idx_start,
328 end = after_idx_end,
329 len = after_lines.len(),
330 hunk = hunk,
331 path = path,
332 lines = &after_lines[after_idx_start..],
333 );
334 let after_section_lines = after_lines[after_idx_start..after_idx_end]
335 .iter()
336 .cloned()
337 .map(|after_line| SectionChangedLine {
338 is_checked: false,
339 change_type: ChangeType::Added,
340 line: Cow::Owned(after_line),
341 })
342 .collect_vec();
343
344 if !(before_section_lines.is_empty() && after_section_lines.is_empty()) {
345 file_sections.push(Section::Changed {
346 lines: before_section_lines
347 .into_iter()
348 .chain(after_section_lines)
349 .collect(),
350 });
351 }
352 }
353
354 if let BlobContents::Text(before_lines) = before_lines {
355 if unchanged_hunk_line_idx < before_lines.len() {
356 file_sections.push(Section::Unchanged {
357 lines: before_lines[unchanged_hunk_line_idx..]
358 .iter()
359 .cloned()
360 .map(Cow::Owned)
361 .collect(),
362 });
363 }
364 }
365
366 let file_mode_section = if old_file_mode != new_file_mode {
367 vec![Section::FileMode {
368 is_checked: false,
369 before: old_file_mode,
370 after: new_file_mode,
371 }]
372 } else {
373 vec![]
374 };
375 result.push(File {
376 old_path: None,
377 path: Cow::Owned(path),
378 file_mode: Some(old_file_mode),
379 sections: [file_mode_section, file_sections].concat().to_vec(),
380 });
381 }
382
383 result.sort_by_cached_key(|file| file.path.clone().into_owned());
384 Ok(result)
385}