1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
// Modified for gitoxide from the upstream imara-diff crate.
// Upstream source: git cat-file -p 32d1e45d3df061e6ccba6db7fdce92db29e345d8:src/postprocess.rs
use crate::intern::{InternedInput, Token};
use crate::slider_heuristic::SliderHeuristic;
use crate::util::{find_hunk_end, find_hunk_start};
use crate::{Diff, Hunk};
impl Diff {
/// Postprocesses the diff using explicit token sequences and a custom heuristic.
///
/// This is a lower-level method that works directly with token sequences rather than
/// an `InternedInput`. Use [`postprocess_with_heuristic`](Self::postprocess_with_heuristic)
/// for a more convenient API.
///
/// # Parameters
///
/// * `before` - The token sequence from the first file, before changes
/// * `after` - The token sequence from the second file, after changes
/// * `heuristic` - The slider heuristic to use for positioning hunks
pub fn postprocess_with(&mut self, before: &[Token], after: &[Token], mut heuristic: impl SliderHeuristic) {
Postprocessor {
added: &mut self.added,
removed: &mut self.removed,
tokens: after,
hunk: Hunk {
before: 0..0,
after: 0..0,
},
heuristic: &mut heuristic,
}
.run();
Postprocessor {
added: &mut self.removed,
removed: &mut self.added,
tokens: before,
hunk: Hunk {
before: 0..0,
after: 0..0,
},
heuristic: &mut heuristic,
}
.run()
}
/// Postprocesses the diff using an `InternedInput` and a custom heuristic.
///
/// This is a convenience wrapper around [`postprocess_with`](Self::postprocess_with)
/// that extracts the token sequences from the input automatically.
///
/// # Parameters
///
/// * `input` - The interned input containing the token sequences
/// * `heuristic` - The slider heuristic to use for positioning hunks
pub fn postprocess_with_heuristic<T>(&mut self, input: &InternedInput<T>, heuristic: impl SliderHeuristic) {
self.postprocess_with(&input.before, &input.after, heuristic);
}
}
/// Internal state for postprocessing a diff to improve readability.
struct Postprocessor<'a, H> {
/// The mutable array tracking which tokens were added.
added: &'a mut [bool],
/// The immutable array tracking which tokens were removed.
removed: &'a [bool],
/// The token sequence being processed.
tokens: &'a [Token],
/// The current hunk being processed in the iteration.
hunk: Hunk,
/// The heuristic used to determine optimal hunk positions.
heuristic: &'a mut H,
}
impl<H: SliderHeuristic> Postprocessor<'_, H> {
fn run(mut self) {
loop {
// find a hunk
if !self.hunk.next_hunk(self.removed, self.added) {
break;
}
let mut earliest_end;
let mut is_modification;
loop {
// move hunk up as far as possible to possibly merge it with other hunks
// and discover if there are other possible positions
while self.slide_up() {}
earliest_end = self.hunk.after.end;
is_modification = self.hunk.before.start != self.hunk.before.end;
let hunk_size_unexpanded = self.hunk.after.len();
// move hunk down as far as possible (and merge with other hunks it if
// possible) sliding down is often the most preferred position
while self.slide_down() {
is_modification |= self.hunk.before.start != self.hunk.before.end;
}
// if this hunk was merged with another hunk while sliding down we might
// be able to slide up more otherwise we are done
if hunk_size_unexpanded == self.hunk.after.len() {
break;
}
}
if self.hunk.after.end == earliest_end {
continue;
}
if is_modification {
// hunk can be moved and there is a removed hunk in the same region
// move the hunk so it align with the other hunk to produce a single
// MODIFIED hunk instead of two separate ADDED/REMOVED hunks
while self.hunk.before.start == self.hunk.before.end {
let success = self.slide_up();
debug_assert!(success);
}
} else {
let slider_end = self
.heuristic
.best_slider_end(self.tokens, self.hunk.after.clone(), earliest_end);
for _ in slider_end..self.hunk.after.end {
let success = self.slide_up();
debug_assert!(success);
}
}
}
}
/// Slides a hunk down by one token/line, potentially merging it with a subsequent hunk.
fn slide_down(&mut self) -> bool {
let Some(&next_token) = self.tokens.get(self.hunk.after.end as usize) else {
return false;
};
if self.tokens[self.hunk.after.start as usize] != next_token {
return false;
}
self.added[self.hunk.after.start as usize] = false;
self.added[self.hunk.after.end as usize] = true;
self.hunk.after.start += 1;
self.hunk.after.end = find_hunk_end(self.added, self.hunk.after.end);
// move the end of the remove range one down to keep the unchanged lines aligned
self.hunk.before.start = self.hunk.before.end + 1;
self.hunk.before.end = find_hunk_end(self.removed, self.hunk.before.start);
true
}
/// Slides a hunk up by one token/line, potentially merging it with a previous hunk.
fn slide_up(&mut self) -> bool {
if self.hunk.after.start == 0 {
return false;
}
if self.tokens[self.hunk.after.start as usize - 1] != self.tokens[self.hunk.after.end as usize - 1] {
return false;
}
self.added[self.hunk.after.start as usize - 1] = true;
self.added[self.hunk.after.end as usize - 1] = false;
self.hunk.after.end -= 1;
self.hunk.after.start = find_hunk_start(self.added, self.hunk.after.start - 1);
// move the start of the remove range one up to keep the unchanged lines aligned
self.hunk.before.end = self.hunk.before.start - 1;
self.hunk.before.start = find_hunk_start(self.removed, self.hunk.before.start - 1);
true
}
}