Skip to main content

grit_lib/
diff_moved.rs

1//! Move detection for `git diff --color-moved` (port of git/diff.c
2//! `add_lines_to_move_detection` + `mark_color_as_moved` + `dim_moved_lines`).
3//!
4//! Operates on the already-rendered *plain* unified diff text (one or more files
5//! of `diff --git` headers, `@@` hunk headers, and `+`/`-`/context body lines).
6//! It identifies blocks of moved code and returns, for every line of the input
7//! patch, a [`MovedClass`] telling the colorizer which color slot to use.
8
9/// `--color-moved` mode.
10#[derive(Clone, Copy, PartialEq, Eq, Debug)]
11pub enum ColorMovedMode {
12    /// No move detection.
13    No,
14    /// `plain`: all moved lines use the moved color, no alternation.
15    Plain,
16    /// `blocks`: alternate-by-block detection but no alternate color.
17    Blocks,
18    /// `zebra`: alternate colors between adjacent moved blocks.
19    Zebra,
20    /// `dimmed-zebra`: like zebra but dim the interior of blocks.
21    ZebraDim,
22}
23
24impl ColorMovedMode {
25    /// Parse a `--color-moved[=<mode>]` value. `default` maps to `Zebra`.
26    /// Returns `None` for an unrecognized mode.
27    #[must_use]
28    pub fn parse(arg: &str) -> Option<Self> {
29        match arg {
30            "no" | "false" | "off" => Some(Self::No),
31            "plain" => Some(Self::Plain),
32            "blocks" => Some(Self::Blocks),
33            "zebra" | "default" | "true" | "on" => Some(Self::Zebra),
34            "dimmed-zebra" | "dimmed_zebra" => Some(Self::ZebraDim),
35            _ => None,
36        }
37    }
38}
39
40// Whitespace handling bits for move detection (mirror git's XDF_* + the
41// allow-indentation-change bit).
42pub const MOVED_WS_IGNORE_ALL_SPACE: u32 = 1;
43pub const MOVED_WS_IGNORE_SPACE_CHANGE: u32 = 2;
44pub const MOVED_WS_IGNORE_SPACE_AT_EOL: u32 = 4;
45pub const MOVED_WS_ALLOW_INDENTATION_CHANGE: u32 = 8;
46pub const MOVED_WS_ERROR: u32 = 16;
47
48const WS_FLAGS_MASK: u32 =
49    MOVED_WS_IGNORE_ALL_SPACE | MOVED_WS_IGNORE_SPACE_CHANGE | MOVED_WS_IGNORE_SPACE_AT_EOL;
50
51/// Parse a comma-separated `--color-moved-ws=<modes>` value into the bit flags.
52/// Sets [`MOVED_WS_ERROR`] for unknown modes or the illegal
53/// allow-indentation-change + other-ws combination (git errors out then).
54#[must_use]
55pub fn parse_color_moved_ws(arg: &str) -> u32 {
56    let mut ret = 0u32;
57    for raw in arg.split(',') {
58        let part = raw.trim();
59        if part.is_empty() {
60            continue;
61        }
62        match part {
63            "no" => ret = 0,
64            "ignore-space-change" => ret |= MOVED_WS_IGNORE_SPACE_CHANGE,
65            "ignore-space-at-eol" => ret |= MOVED_WS_IGNORE_SPACE_AT_EOL,
66            "ignore-all-space" => ret |= MOVED_WS_IGNORE_ALL_SPACE,
67            "allow-indentation-change" => ret |= MOVED_WS_ALLOW_INDENTATION_CHANGE,
68            _ => ret |= MOVED_WS_ERROR,
69        }
70    }
71    if (ret & MOVED_WS_ALLOW_INDENTATION_CHANGE) != 0 && (ret & WS_FLAGS_MASK) != 0 {
72        ret |= MOVED_WS_ERROR;
73    }
74    ret
75}
76
77/// The color class assigned to one emitted line after move detection.
78#[derive(Clone, Copy, PartialEq, Eq, Debug)]
79pub enum MovedClass {
80    /// Not a moved line: use the normal old/new/context color.
81    None,
82    /// Moved, primary color (`oldMoved` / `newMoved`).
83    Moved,
84    /// Moved, alternate color (`oldMovedAlternative` / `newMovedAlternative`).
85    MovedAlt,
86    /// Moved, primary dim (`oldMovedDimmed` / `newMovedDimmed`).
87    MovedDim,
88    /// Moved, alternate dim (`oldMovedAlternativeDimmed` / ...).
89    MovedAltDim,
90}
91
92const INDENT_BLANKLINE: i64 = i64::MIN;
93
94#[derive(Clone)]
95struct Symbol {
96    /// Index into the original patch-line list.
97    line_index: usize,
98    /// `+` (true) or `-` (false).
99    is_plus: bool,
100    /// Raw body of the line (without the leading +/-, with trailing newline stripped).
101    body: String,
102    /// Interned content id (whitespace-normalized per ws flags).
103    id: usize,
104    /// Visual indent width, or [`INDENT_BLANKLINE`] for blank lines.
105    indent_width: i64,
106    /// Resulting move class.
107    class: MovedClass,
108    /// Whether this line is part of a moved block at all.
109    moved: bool,
110    /// Whether the alternate flag is set.
111    alt: bool,
112    /// Whether the line was dimmed (uninteresting interior).
113    uninteresting: bool,
114}
115
116/// Normalize a line for interning / comparison according to the ws flags
117/// (mirrors xdiff_compare_lines / xdiff_hash_string semantics enough for move
118/// detection: the comparison only needs a canonical key per ws mode).
119fn normalize_key(body: &str, flags: u32) -> String {
120    let ws = flags & WS_FLAGS_MASK;
121    if ws == 0 {
122        return body.to_owned();
123    }
124    if ws & MOVED_WS_IGNORE_ALL_SPACE != 0 {
125        return body.chars().filter(|c| !c.is_whitespace()).collect();
126    }
127    if ws & MOVED_WS_IGNORE_SPACE_CHANGE != 0 {
128        // Collapse runs of whitespace to a single space and trim trailing ws.
129        let mut out = String::with_capacity(body.len());
130        let mut in_ws = false;
131        for c in body.chars() {
132            if c.is_whitespace() {
133                in_ws = true;
134            } else {
135                if in_ws && !out.is_empty() {
136                    out.push(' ');
137                }
138                in_ws = false;
139                out.push(c);
140            }
141        }
142        return out;
143    }
144    if ws & MOVED_WS_IGNORE_SPACE_AT_EOL != 0 {
145        return body.trim_end().to_owned();
146    }
147    body.to_owned()
148}
149
150/// Visual indent width + blankness, mirroring git's `fill_es_indent_data`
151/// (8-wide tab stops, like git's default `WS_TAB_WIDTH`).
152fn fill_indent(body: &str) -> (usize, i64) {
153    let bytes = body.as_bytes();
154    let len = bytes.len();
155    let mut off = 0usize;
156    // skip \f \v \r at start of indentation
157    while off < len
158        && (bytes[off] == b'\x0c'
159            || bytes[off] == b'\x0b'
160            || (off + 1 < len && bytes[off] == b'\r'))
161    {
162        off += 1;
163    }
164    let tab_width = 8i64;
165    let mut width = 0i64;
166    loop {
167        if off < len && bytes[off] == b' ' {
168            width += 1;
169            off += 1;
170        } else if off < len && bytes[off] == b'\t' {
171            width += tab_width - (width % tab_width);
172            off += 1;
173            while off < len && bytes[off] == b'\t' {
174                width += tab_width;
175                off += 1;
176            }
177        } else {
178            break;
179        }
180    }
181    // is the line blank?
182    let mut i = off;
183    while i < len && (bytes[i] as char).is_whitespace() {
184        i += 1;
185    }
186    if i == len {
187        (len, INDENT_BLANKLINE)
188    } else {
189        (off, width)
190    }
191}
192
193struct MovedBlock {
194    /// Index into `syms` of the matched entry on the previous line of this block.
195    match_idx: usize,
196    wsd: i64,
197}
198
199/// Run move detection over a plain multi-file unified diff. Returns a vector
200/// parallel to the patch's lines (`patch.split_inclusive('\n')`) giving each
201/// line's [`MovedClass`]; non-+/- lines are always [`MovedClass::None`].
202#[must_use]
203pub fn detect_moved_lines(patch: &str, mode: ColorMovedMode, ws_flags: u32) -> Vec<MovedClass> {
204    let lines: Vec<&str> = patch.split_inclusive('\n').collect();
205    let mut classes = vec![MovedClass::None; lines.len()];
206    if mode == ColorMovedMode::No {
207        return classes;
208    }
209    let allow_indent = ws_flags & MOVED_WS_ALLOW_INDENTATION_CHANGE != 0;
210
211    // Build the emitted-symbol list: only +/- body lines inside hunks, resetting
212    // at any non +/- line (so consecutive runs map to next_line chains).
213    let mut syms: Vec<Symbol> = Vec::new();
214    let mut in_hunk = false;
215    let mut interner: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
216    let mut next_id = 0usize;
217    for (idx, &raw) in lines.iter().enumerate() {
218        let line = raw.strip_suffix('\n').unwrap_or(raw);
219        if line.starts_with("@@") {
220            in_hunk = true;
221            continue;
222        }
223        if line.starts_with("diff --git")
224            || line.starts_with("--- ")
225            || line.starts_with("+++ ")
226            || line.starts_with("index ")
227            || line.starts_with("new file")
228            || line.starts_with("deleted file")
229            || line.starts_with("old mode")
230            || line.starts_with("new mode")
231            || line.starts_with("similarity")
232            || line.starts_with("dissimilarity")
233            || line.starts_with("rename ")
234            || line.starts_with("copy ")
235            || line.starts_with("Binary files")
236            || line.starts_with('\\')
237        {
238            in_hunk = false;
239            continue;
240        }
241        if !in_hunk {
242            continue;
243        }
244        let (is_plus, body) = if let Some(rest) = line.strip_prefix('+') {
245            (true, rest)
246        } else if let Some(rest) = line.strip_prefix('-') {
247            (false, rest)
248        } else {
249            // context (or blank line rendered as a bare newline)
250            continue;
251        };
252
253        let (indent_off, indent_width) = if allow_indent {
254            fill_indent(body)
255        } else {
256            (0, 0)
257        };
258        // With allow-indentation-change, Git interns/compares from the indent
259        // offset (`l->line + l->indent_off`), so leading indentation does not
260        // affect the content id; the indent delta is checked separately.
261        let key = if allow_indent {
262            normalize_key(&body[indent_off.min(body.len())..], ws_flags)
263        } else {
264            normalize_key(body, ws_flags)
265        };
266        let id = *interner.entry(key).or_insert_with(|| {
267            let v = next_id;
268            next_id += 1;
269            v
270        });
271        syms.push(Symbol {
272            line_index: idx,
273            is_plus,
274            body: body.to_owned(),
275            id,
276            indent_width,
277            class: MovedClass::None,
278            moved: false,
279            alt: false,
280            uninteresting: false,
281        });
282    }
283
284    if syms.is_empty() {
285        return classes;
286    }
287
288    // next_line chains: for each symbol, the next symbol of the same sign that is
289    // immediately contiguous in the emitted stream. We approximate git's
290    // prev_line linkage by detecting contiguity through line_index adjacency
291    // within the same +/- run.
292    let n = syms.len();
293    let mut next_line: Vec<Option<usize>> = vec![None; n];
294    for i in 1..n {
295        // Contiguous if the previous symbol's patch line is immediately before
296        // this one and has the same sign (git resets prev_line on any non +/-).
297        if syms[i].line_index == syms[i - 1].line_index + 1
298            && syms[i].is_plus == syms[i - 1].is_plus
299        {
300            next_line[i - 1] = Some(i);
301        }
302    }
303
304    // Match lists per id: add[id] / del[id] hold symbol indices (in order, but we
305    // build the head-insertion lists like git so iteration matches).
306    let mut add_head: Vec<Option<usize>> = vec![None; next_id];
307    let mut del_head: Vec<Option<usize>> = vec![None; next_id];
308    let mut next_match: Vec<Option<usize>> = vec![None; n];
309    for i in 0..n {
310        let id = syms[i].id;
311        if syms[i].is_plus {
312            next_match[i] = add_head[id];
313            add_head[id] = Some(i);
314        } else {
315            next_match[i] = del_head[id];
316            del_head[id] = Some(i);
317        }
318    }
319    // git inserts at head, so the first-added ends up last; iterating next_match
320    // from head visits most-recent-first. git builds the same way, so keep it.
321
322    let min_alnum = 20usize; // COLOR_MOVED_MIN_ALNUM_COUNT
323
324    // adjust_last_block: returns whether the block (the last `block_length`
325    // symbols ending just before position `pos`) has enough alnum chars; if not,
326    // clears the moved/alt flags on those lines.
327    let adjust_last_block = |syms: &mut [Symbol], pos: usize, block_length: usize| -> bool {
328        if mode == ColorMovedMode::Plain {
329            return block_length != 0;
330        }
331        if block_length == 0 {
332            return false;
333        }
334        let mut alnum = 0usize;
335        for i in 1..=block_length {
336            let s = &syms[pos - i];
337            for c in s.body.chars() {
338                if c.is_alphanumeric() {
339                    alnum += 1;
340                    if alnum >= min_alnum {
341                        return true;
342                    }
343                }
344            }
345        }
346        for i in 1..=block_length {
347            syms[pos - i].moved = false;
348            syms[pos - i].alt = false;
349        }
350        false
351    };
352
353    // cmp_in_block_with_wsd: 0 == match, 1 == no match.
354    let cmp_in_block = |cur: &Symbol, l: &Symbol, pmb_wsd: &mut i64| -> bool {
355        if cur.id != l.id {
356            return false;
357        }
358        let a_width = cur.indent_width;
359        if a_width == INDENT_BLANKLINE {
360            return true;
361        }
362        let delta = l.indent_width - a_width;
363        if *pmb_wsd == INDENT_BLANKLINE {
364            *pmb_wsd = delta;
365        }
366        delta == *pmb_wsd
367    };
368
369    // pmb_advance_or_null: advance each potential block to its next line if it
370    // still matches; drop the rest.
371    let pmb_advance = |syms: &[Symbol], pmb: &mut Vec<MovedBlock>, l_idx: usize| {
372        let l = &syms[l_idx];
373        let mut kept: Vec<MovedBlock> = Vec::with_capacity(pmb.len());
374        for b in pmb.iter() {
375            let cur = next_line[b.match_idx];
376            let matched = if let Some(cur_idx) = cur {
377                if allow_indent {
378                    let mut wsd = b.wsd;
379                    let m = cmp_in_block(&syms[cur_idx], l, &mut wsd);
380                    if m {
381                        kept.push(MovedBlock {
382                            match_idx: cur_idx,
383                            wsd,
384                        });
385                    }
386                    m
387                } else {
388                    let m = syms[cur_idx].id == l.id;
389                    if m {
390                        kept.push(MovedBlock {
391                            match_idx: cur_idx,
392                            wsd: b.wsd,
393                        });
394                    }
395                    m
396                }
397            } else {
398                false
399            };
400            let _ = matched;
401        }
402        *pmb = kept;
403    };
404
405    // mark_color_as_moved (faithful port of git/diff.c). We emulate git's
406    // `for (n = 0; n < nr; n++)` loop: a "rewind" sets `nn -= block_length`
407    // here and the loop epilogue still does `nn += 1`, giving git's net
408    // `nn -= block_length - 1` (re-examine from the block's 2nd line).
409    let mut pmb: Vec<MovedBlock> = Vec::new();
410    let mut flipped_block = 0i32;
411    let mut block_length = 0usize;
412    // moved_symbol: Some(true)=plus, Some(false)=minus, None=neither
413    let mut moved_symbol: Option<bool> = None;
414
415    let mut nn = 0usize;
416    // Safety cap: the rewind logic strictly shrinks the re-examined block each
417    // time, so this terminates, but guard against any pathological input causing
418    // an unbounded loop (rewinds re-visit lines, so bound generously).
419    let max_iters = n.saturating_mul(n).saturating_add(n).saturating_add(16);
420    let mut iters = 0usize;
421    // True once a barrier (a non-`+/-` line, i.e. a gap in line_index, or the
422    // very start) has been processed for the line at `nn`. Git resets
423    // `flipped_block` on every non-`+/-` symbol (`switch ... default`), which our
424    // `+/-`-only `syms` list must emulate at line_index gaps.
425    while nn < n {
426        iters += 1;
427        if iters > max_iters {
428            break;
429        }
430        // Emulate git's non-`+/-` line handling: when a context line separated
431        // this symbol from the previous one (gap in line_index), reset
432        // `flipped_block` and finalize any open block before processing.
433        let barrier = nn == 0 || syms[nn].line_index != syms[nn - 1].line_index + 1;
434        if barrier {
435            if !pmb.is_empty() {
436                adjust_last_block(&mut syms, nn, block_length);
437                pmb.clear();
438                block_length = 0;
439            }
440            flipped_block = 0;
441            moved_symbol = None;
442        }
443        // match for this line: a plus matches dels, a minus matches adds.
444        let mut match_idx: Option<usize> = if syms[nn].is_plus {
445            del_head[syms[nn].id]
446        } else {
447            add_head[syms[nn].id]
448        };
449
450        let cur_sym = syms[nn].is_plus;
451        if !pmb.is_empty() && (match_idx.is_none() || Some(cur_sym) != moved_symbol) {
452            if !adjust_last_block(&mut syms, nn, block_length) && block_length > 1 {
453                match_idx = None;
454                nn -= block_length;
455            }
456            pmb.clear();
457            block_length = 0;
458            flipped_block = 0;
459        }
460
461        let Some(match_start) = match_idx else {
462            moved_symbol = None;
463            nn += 1;
464            continue;
465        };
466
467        if mode == ColorMovedMode::Plain {
468            syms[nn].moved = true;
469            nn += 1;
470            continue;
471        }
472
473        pmb_advance(&syms, &mut pmb, nn);
474
475        if pmb.is_empty() {
476            let contiguous = adjust_last_block(&mut syms, nn, block_length);
477
478            if !contiguous && block_length > 1 {
479                // Rewind in case there is another match starting at the second
480                // line of the block. pmb stays empty (no fill).
481                nn -= block_length;
482            } else {
483                // fill_potential_moved_blocks: set up pmb from all matches.
484                let mut m = Some(match_start);
485                pmb.clear();
486                while let Some(mi) = m {
487                    let wsd = if allow_indent {
488                        compute_ws_delta(&syms[nn], &syms[mi])
489                    } else {
490                        0
491                    };
492                    pmb.push(MovedBlock { match_idx: mi, wsd });
493                    m = next_match[mi];
494                }
495            }
496
497            if contiguous && !pmb.is_empty() && moved_symbol == Some(syms[nn].is_plus) {
498                flipped_block = (flipped_block + 1) % 2;
499            } else {
500                flipped_block = 0;
501            }
502
503            moved_symbol = if !pmb.is_empty() {
504                Some(syms[nn].is_plus)
505            } else {
506                None
507            };
508
509            block_length = 0;
510        }
511
512        if !pmb.is_empty() {
513            block_length += 1;
514            syms[nn].moved = true;
515            if flipped_block != 0 && mode != ColorMovedMode::Blocks {
516                syms[nn].alt = true;
517            }
518        }
519
520        nn += 1;
521    }
522    adjust_last_block(&mut syms, n, block_length);
523
524    // dim_moved_lines (dimmed-zebra only)
525    if mode == ColorMovedMode::ZebraDim {
526        dim_moved_lines(&mut syms);
527    }
528
529    // Resolve classes.
530    for s in &mut syms {
531        if !s.moved {
532            s.class = MovedClass::None;
533        } else {
534            s.class = match (s.alt, s.uninteresting) {
535                (true, true) => MovedClass::MovedAltDim,
536                (true, false) => MovedClass::MovedAlt,
537                (false, true) => MovedClass::MovedDim,
538                (false, false) => MovedClass::Moved,
539            };
540        }
541    }
542
543    for s in &syms {
544        classes[s.line_index] = s.class;
545    }
546    classes
547}
548
549fn compute_ws_delta(a: &Symbol, b: &Symbol) -> i64 {
550    if a.indent_width == INDENT_BLANKLINE && b.indent_width == INDENT_BLANKLINE {
551        return INDENT_BLANKLINE;
552    }
553    a.indent_width - b.indent_width
554}
555
556fn dim_moved_lines(syms: &mut [Symbol]) {
557    let n = syms.len();
558    for i in 0..n {
559        if !syms[i].moved {
560            continue;
561        }
562        // prev/next are only "real" if they are +/- (they always are in syms,
563        // but a gap in line_index means a context line separated them).
564        let prev = if i > 0 && syms[i - 1].line_index + 1 == syms[i].line_index {
565            Some(i - 1)
566        } else {
567            None
568        };
569        let next = if i + 1 < n && syms[i].line_index + 1 == syms[i + 1].line_index {
570            Some(i + 1)
571        } else {
572            None
573        };
574
575        let zebra_mask = |s: &Symbol| -> (bool, bool) { (s.moved, s.alt) };
576        let cur_mask = zebra_mask(&syms[i]);
577
578        // Inside a block? prev and next share the same moved+alt mask.
579        if let (Some(p), Some(nx)) = (prev, next) {
580            if zebra_mask(&syms[p]) == cur_mask && zebra_mask(&syms[nx]) == cur_mask {
581                syms[i].uninteresting = true;
582                continue;
583            }
584        }
585        // Interesting bound checks.
586        if let Some(p) = prev {
587            if syms[p].moved && syms[p].alt != syms[i].alt {
588                continue;
589            }
590        }
591        if let Some(nx) = next {
592            if syms[nx].moved && syms[nx].alt != syms[i].alt {
593                continue;
594            }
595        }
596        syms[i].uninteresting = true;
597    }
598}