Skip to main content

sley_diff_format/
words.rs

1//! Word-diff rendering (`--word-diff=plain|porcelain|color`, `--color-words`)
2//! and the diff color palette, ported from upstream `diff.c`
3//! (`diff_words_styles`, `fn_out_diff_words_aux`, `find_word_boundaries`,
4//! `diff_words_fill`, `emit_hunk_header`).
5
6use sley_config::GitConfig;
7use sley_grep::Regex;
8
9/// ANSI palette for colored diff output. Each slot holds the escape sequence
10/// (empty when color is disabled), mirroring `diff_get_color`.
11#[derive(Clone, Default)]
12pub struct DiffColors {
13    pub meta: String,
14    pub frag: String,
15    pub func: String,
16    pub old: String,
17    pub new: String,
18    pub context: String,
19    pub reset: String,
20    /// `color.diff.whitespace` — the highlight for whitespace errors
21    /// (`--ws-error-highlight`). Default `[7m` (reverse), matching git.
22    pub whitespace: String,
23    pub old_moved: String,
24    pub old_moved_alt: String,
25    pub old_moved_dim: String,
26    pub old_moved_alt_dim: String,
27    pub new_moved: String,
28    pub new_moved_alt: String,
29    pub new_moved_dim: String,
30    pub new_moved_alt_dim: String,
31}
32
33impl DiffColors {
34    /// The default enabled palette: meta=bold, frag=cyan, old=red, new=green,
35    /// func/context unset, overridden by `color.diff.<slot>` with the legacy
36    /// `diff.color.<slot>` spelling as a fallback.
37    pub fn enabled(config: Option<&GitConfig>) -> Self {
38        let lookup = |slot: &str, default: &str| -> String {
39            let value = config.and_then(|config| {
40                config
41                    .get("color", Some("diff"), slot)
42                    .or_else(|| config.get("diff", Some("color"), slot))
43            });
44            match value {
45                Some(name) => parse_color_value(name).unwrap_or_else(|| default.to_string()),
46                None => default.to_string(),
47            }
48        };
49        Self {
50            meta: lookup("meta", "\x1b[1m"),
51            frag: lookup("frag", "\x1b[36m"),
52            func: lookup("func", ""),
53            old: lookup("old", "\x1b[31m"),
54            new: lookup("new", "\x1b[32m"),
55            context: lookup("context", ""),
56            reset: "\x1b[m".to_string(),
57            // git's GIT_COLOR_REVERSE for whitespace by default; the test
58            // decoder names this red-background span `<BRED>`.
59            whitespace: lookup("whitespace", "\x1b[41m"),
60            old_moved: lookup("oldMoved", "\x1b[1;35m"),
61            old_moved_alt: lookup("oldMovedAlternative", "\x1b[1;34m"),
62            old_moved_dim: lookup("oldMovedDimmed", "\x1b[2m"),
63            old_moved_alt_dim: lookup("oldMovedAlternativeDimmed", "\x1b[2;3m"),
64            new_moved: lookup("newMoved", "\x1b[1;36m"),
65            new_moved_alt: lookup("newMovedAlternative", "\x1b[1;33m"),
66            new_moved_dim: lookup("newMovedDimmed", "\x1b[2m"),
67            new_moved_alt_dim: lookup("newMovedAlternativeDimmed", "\x1b[2;3m"),
68        }
69    }
70}
71
72/// Parse a git color word ("red", "bold", "green dim", ...) into an ANSI
73/// sequence. Only the simple forms the diff palette uses are supported;
74/// unknown words yield `None` (caller keeps the default).
75pub fn parse_color_value(value: &str) -> Option<String> {
76    let mut fg: Option<u8> = None;
77    let mut fg_seen = false;
78    let mut bg: Option<u8> = None;
79    let mut attrs: Vec<u8> = Vec::new();
80    for word in value.split_ascii_whitespace() {
81        let code = |name: &str| -> Option<u8> {
82            Some(match name {
83                "black" => 0,
84                "red" => 1,
85                "green" => 2,
86                "yellow" => 3,
87                "blue" => 4,
88                "magenta" => 5,
89                "cyan" => 6,
90                "white" => 7,
91                _ => return None,
92            })
93        };
94        match word {
95            "bold" => attrs.push(1),
96            "dim" => attrs.push(2),
97            "italic" => attrs.push(3),
98            "ul" => attrs.push(4),
99            "blink" => attrs.push(5),
100            "reverse" => attrs.push(7),
101            "normal" => fg_seen = true,
102            "reset" => return Some("\x1b[m".to_string()),
103            _ => {
104                if let Some(code) = code(word) {
105                    if !fg_seen {
106                        fg = Some(code);
107                        fg_seen = true;
108                    } else {
109                        bg = Some(code);
110                    }
111                } else {
112                    return None;
113                }
114            }
115        }
116    }
117    let mut parts: Vec<String> = attrs.iter().map(u8::to_string).collect();
118    if let Some(fg) = fg {
119        parts.push((30 + fg).to_string());
120    }
121    if let Some(bg) = bg {
122        parts.push((40 + bg).to_string());
123    }
124    if parts.is_empty() {
125        return Some(String::new());
126    }
127    Some(format!("\x1b[{}m", parts.join(";")))
128}
129
130/// Wrap one already-newline-terminated line in a color, mirroring
131/// `emit_line_0`: the reset lands before the trailing newline, and a line
132/// that is empty (ignoring its newline) is passed through uncolored.
133pub fn push_colored_line(out: &mut Vec<u8>, color: &str, reset: &str, line: &[u8]) {
134    let (body, newline): (&[u8], &[u8]) = match line.split_last() {
135        Some((b'\n', body)) => (body, b"\n"),
136        _ => (line, b""),
137    };
138    if body.is_empty() {
139        out.extend_from_slice(newline);
140        return;
141    }
142    if color.is_empty() && reset.is_empty() {
143        out.extend_from_slice(body);
144        out.extend_from_slice(newline);
145        return;
146    }
147    out.extend_from_slice(color.as_bytes());
148    out.extend_from_slice(body);
149    out.extend_from_slice(reset.as_bytes());
150    out.extend_from_slice(newline);
151}
152
153#[derive(Clone, Copy, PartialEq, Eq)]
154pub enum WordDiffMode {
155    Plain,
156    Porcelain,
157    Color,
158}
159
160/// Word-diff configuration for one file pair: the rendering mode, the
161/// compiled word regex (None = whitespace tokenization), and the palette.
162pub struct WordDiffConfig<'a> {
163    pub mode: WordDiffMode,
164    pub regex: Option<&'a Regex>,
165    pub colors: &'a DiffColors,
166}
167
168struct StyleElem<'a> {
169    prefix: &'a str,
170    suffix: &'a str,
171    color: &'a str,
172}
173
174struct WordStyle<'a> {
175    new_word: StyleElem<'a>,
176    old_word: StyleElem<'a>,
177    ctx: StyleElem<'a>,
178    newline: &'a str,
179}
180
181impl<'a> WordDiffConfig<'a> {
182    fn style(&self) -> WordStyle<'a> {
183        let colors = self.colors;
184        match self.mode {
185            WordDiffMode::Porcelain => WordStyle {
186                new_word: StyleElem {
187                    prefix: "+",
188                    suffix: "\n",
189                    color: &colors.new,
190                },
191                old_word: StyleElem {
192                    prefix: "-",
193                    suffix: "\n",
194                    color: &colors.old,
195                },
196                ctx: StyleElem {
197                    prefix: " ",
198                    suffix: "\n",
199                    color: &colors.context,
200                },
201                newline: "~\n",
202            },
203            WordDiffMode::Plain => WordStyle {
204                new_word: StyleElem {
205                    prefix: "{+",
206                    suffix: "+}",
207                    color: &colors.new,
208                },
209                old_word: StyleElem {
210                    prefix: "[-",
211                    suffix: "-]",
212                    color: &colors.old,
213                },
214                ctx: StyleElem {
215                    prefix: "",
216                    suffix: "",
217                    color: &colors.context,
218                },
219                newline: "\n",
220            },
221            WordDiffMode::Color => WordStyle {
222                new_word: StyleElem {
223                    prefix: "",
224                    suffix: "",
225                    color: &colors.new,
226                },
227                old_word: StyleElem {
228                    prefix: "",
229                    suffix: "",
230                    color: &colors.old,
231                },
232                ctx: StyleElem {
233                    prefix: "",
234                    suffix: "",
235                    color: &colors.context,
236                },
237                newline: "\n",
238            },
239        }
240    }
241}
242
243/// Port of `fn_out_diff_words_write_helper`: emit `buf` (a byte range of the
244/// original minus/plus text, possibly spanning newlines) one line segment at
245/// a time, wrapping each non-empty segment in the style element and emitting
246/// the style's newline string between segments.
247fn write_word_helper(out: &mut Vec<u8>, elem: &StyleElem<'_>, newline: &str, buf: &[u8]) {
248    let mut rest = buf;
249    loop {
250        let split = rest.iter().position(|&b| b == b'\n');
251        let segment = match split {
252            Some(at) => &rest[..at],
253            None => rest,
254        };
255        if !segment.is_empty() {
256            let colored = !elem.color.is_empty();
257            if colored {
258                out.extend_from_slice(elem.color.as_bytes());
259            }
260            out.extend_from_slice(elem.prefix.as_bytes());
261            out.extend_from_slice(segment);
262            out.extend_from_slice(elem.suffix.as_bytes());
263            if colored {
264                out.extend_from_slice(b"\x1b[m");
265            }
266        }
267        let Some(at) = split else { break };
268        out.extend_from_slice(newline.as_bytes());
269        rest = &rest[at + 1..];
270        if rest.is_empty() {
271            break;
272        }
273    }
274}
275
276/// One tokenized word: its byte span in the original buffer.
277struct WordSpan {
278    begin: usize,
279    end: usize,
280}
281
282/// Port of `find_word_boundaries` + `diff_words_fill`: split `text` into
283/// words per `regex` (None = whitespace-separated runs), returning the
284/// original spans. Zero-length regex matches skip one byte, exactly like the
285/// `(*begin)++` upstream.
286fn split_words(text: &[u8], regex: Option<&Regex>) -> Vec<WordSpan> {
287    let mut words = Vec::new();
288    let mut begin = 0usize;
289    while begin < text.len() {
290        match regex {
291            Some(regex) => {
292                let Some((so, eo)) = regex.find_longest_alternative(&text[begin..]) else {
293                    break;
294                };
295                let match_bytes = &text[begin + so..begin + eo];
296                let end = match match_bytes.iter().position(|&b| b == b'\n') {
297                    Some(at) => begin + so + at,
298                    None => begin + eo,
299                };
300                let start = begin + so;
301                if start == end {
302                    begin = start + 1;
303                    continue;
304                }
305                words.push(WordSpan { begin: start, end });
306                begin = end;
307            }
308            None => {
309                while begin < text.len() && is_xdl_space(text[begin]) {
310                    begin += 1;
311                }
312                if begin >= text.len() {
313                    break;
314                }
315                let mut end = begin + 1;
316                while end < text.len() && !is_xdl_space(text[end]) {
317                    end += 1;
318                }
319                words.push(WordSpan { begin, end });
320                begin = end;
321            }
322        }
323    }
324    words
325}
326
327fn is_xdl_space(byte: u8) -> bool {
328    matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c)
329}
330
331/// The per-hunk word-diff renderer state: accumulated minus/plus text.
332pub struct WordDiffBuffers {
333    minus: Vec<u8>,
334    plus: Vec<u8>,
335}
336
337impl Default for WordDiffBuffers {
338    fn default() -> Self {
339        Self::new()
340    }
341}
342
343impl WordDiffBuffers {
344    pub fn new() -> Self {
345        Self {
346            minus: Vec::new(),
347            plus: Vec::new(),
348        }
349    }
350
351    /// Append one removed line's content (prefix already stripped).
352    pub fn push_minus(&mut self, content: &[u8]) {
353        self.minus.extend_from_slice(content);
354    }
355
356    /// Append one added line's content (prefix already stripped).
357    pub fn push_plus(&mut self, content: &[u8]) {
358        self.plus.extend_from_slice(content);
359    }
360
361    /// Port of `diff_words_show`: word-diff the accumulated buffers into
362    /// `out` and reset them.
363    pub fn flush(&mut self, out: &mut Vec<u8>, config: &WordDiffConfig<'_>) {
364        if self.minus.is_empty() && self.plus.is_empty() {
365            return;
366        }
367        let style = config.style();
368        // Special case: only removal.
369        if self.plus.is_empty() {
370            write_word_helper(out, &style.old_word, style.newline, &self.minus);
371            self.minus.clear();
372            return;
373        }
374        let minus_words = split_words(&self.minus, config.regex);
375        let plus_words = split_words(&self.plus, config.regex);
376        // Word-level diff: each word becomes one "line" for the line differ.
377        let minus_lines: Vec<sley_diff_merge::DiffLine<'_>> = minus_words
378            .iter()
379            .map(|span| sley_diff_merge::DiffLine {
380                content: &self.minus[span.begin..span.end],
381                has_newline: true,
382            })
383            .collect();
384        let plus_lines: Vec<sley_diff_merge::DiffLine<'_>> = plus_words
385            .iter()
386            .map(|span| sley_diff_merge::DiffLine {
387                content: &self.plus[span.begin..span.end],
388                has_newline: true,
389            })
390            .collect();
391        let ops = sley_diff_merge::myers_diff_lines(&minus_lines, &plus_lines);
392
393        // Walk the edit script as (minus_first, minus_len, plus_first,
394        // plus_len) changes, mirroring fn_out_diff_words_aux.
395        let mut current_plus = 0usize; // byte offset into self.plus
396        let mut minus_idx = 0usize;
397        let mut plus_idx = 0usize;
398        let mut pending_del = 0usize;
399        let mut pending_ins = 0usize;
400        let emit_change = |out: &mut Vec<u8>,
401                           minus_first: usize,
402                           minus_len: usize,
403                           plus_first: usize,
404                           plus_len: usize,
405                           current_plus: &mut usize| {
406            let (minus_begin, minus_end) = if minus_len > 0 {
407                (
408                    minus_words[minus_first].begin,
409                    minus_words[minus_first + minus_len - 1].end,
410                )
411            } else {
412                let anchor = if minus_first == 0 {
413                    0
414                } else {
415                    minus_words[minus_first - 1].end
416                };
417                (anchor, anchor)
418            };
419            let (plus_begin, plus_end) = if plus_len > 0 {
420                (
421                    plus_words[plus_first].begin,
422                    plus_words[plus_first + plus_len - 1].end,
423                )
424            } else {
425                let anchor = if plus_first == 0 {
426                    0
427                } else {
428                    plus_words[plus_first - 1].end
429                };
430                (anchor, anchor)
431            };
432            if *current_plus != plus_begin {
433                write_word_helper(
434                    out,
435                    &style.ctx,
436                    style.newline,
437                    &self.plus[*current_plus..plus_begin],
438                );
439            }
440            if minus_begin != minus_end {
441                write_word_helper(
442                    out,
443                    &style.old_word,
444                    style.newline,
445                    &self.minus[minus_begin..minus_end],
446                );
447            }
448            if plus_begin != plus_end {
449                write_word_helper(
450                    out,
451                    &style.new_word,
452                    style.newline,
453                    &self.plus[plus_begin..plus_end],
454                );
455            }
456            *current_plus = plus_end;
457        };
458        for op in ops {
459            match op {
460                sley_diff_merge::DiffOp::Delete(n) => pending_del += n,
461                sley_diff_merge::DiffOp::Insert(n) => pending_ins += n,
462                sley_diff_merge::DiffOp::Equal(n) => {
463                    if pending_del > 0 || pending_ins > 0 {
464                        emit_change(
465                            out,
466                            minus_idx,
467                            pending_del,
468                            plus_idx,
469                            pending_ins,
470                            &mut current_plus,
471                        );
472                        minus_idx += pending_del;
473                        plus_idx += pending_ins;
474                        pending_del = 0;
475                        pending_ins = 0;
476                    }
477                    minus_idx += n;
478                    plus_idx += n;
479                }
480            }
481        }
482        if pending_del > 0 || pending_ins > 0 {
483            emit_change(
484                out,
485                minus_idx,
486                pending_del,
487                plus_idx,
488                pending_ins,
489                &mut current_plus,
490            );
491        }
492        if current_plus != self.plus.len() {
493            write_word_helper(out, &style.ctx, style.newline, &self.plus[current_plus..]);
494        }
495        self.minus.clear();
496        self.plus.clear();
497    }
498
499    /// Emit a context line in word-diff mode (after flushing): porcelain
500    /// keeps the ` ` prefix and appends `~`; plain/color drop the prefix.
501    pub fn emit_context_line(out: &mut Vec<u8>, config: &WordDiffConfig<'_>, content: &[u8]) {
502        let colors = config.colors;
503        match config.mode {
504            WordDiffMode::Porcelain => {
505                let mut line = Vec::with_capacity(content.len() + 1);
506                line.push(b' ');
507                line.extend_from_slice(content);
508                if !line.ends_with(b"\n") {
509                    line.push(b'\n');
510                }
511                push_colored_line(out, &colors.context, &colors.reset, &line);
512                out.extend_from_slice(b"~\n");
513            }
514            WordDiffMode::Plain | WordDiffMode::Color => {
515                let mut line = content.to_vec();
516                if !line.ends_with(b"\n") {
517                    line.push(b'\n');
518                }
519                push_colored_line(out, &colors.context, &colors.reset, &line);
520            }
521        }
522    }
523}