Skip to main content

sley_diff_merge/
render.rs

1//! Unified-diff / patch RENDERER: turn a computed file diff (the old/new
2//! blob contents) into the textual unified-diff hunk body git's `diff.c`
3//! emit path produces (`emit_diff_symbol` / `fn_out_consume`).
4//!
5//! This is the byte-for-byte port of git's hunk emitter: `@@ -os,oc +ns,nc @@
6//! <heading>` hunk headers, the `+`/`-`/context lines, and the
7//! `\ No newline at end of file` marker. It owns hunk *grouping* (combining
8//! changes whose context windows overlap, `xdl_get_hunk`'s `distance >
9//! max_common` break) and hunk *range* computation, then emits each hunk.
10//!
11//! What this module deliberately does NOT own (those stay with the caller,
12//! which has the repository/userdiff/config context):
13//!
14//! * **The per-file metainfo header** (`diff --git`, `index`, `---`/`+++`,
15//!   mode/similarity lines). That is repository- and option-shaped; the
16//!   renderer only produces the hunk body that follows it.
17//! * **Funcname section-heading resolution.** The caller supplies a
18//!   [`HeadingFn`] closure that, given a candidate line, returns its section
19//!   heading (git's `def_ff` default heuristic or a userdiff `xfuncname`
20//!   pattern). The renderer does the *scan upward* for the nearest heading
21//!   line; the caller only classifies a single line.
22//! * **Word-diff body rendering.** When [`HunkRenderOptions::word_diff`] is
23//!   set, the renderer delegates each hunk's body to a [`HunkWordDiff`] hook,
24//!   which the caller implements over its own word-diff machinery.
25//!
26//! The seams keep the byte-shaping (ranges, headers, prefixes, no-newline
27//! markers, color spans) here — the part every diff-emitting command used to
28//! re-derive — while leaving the repository-coupled concerns in the consumer.
29
30use crate::{DiffLine, DiffOp, myers_diff_lines, split_lines};
31
32/// git's default hunk context (`-U3`).
33pub const DEFAULT_CONTEXT: usize = 3;
34
35/// The per-line origin marker for an emitted diff line.
36#[derive(Clone, Copy, PartialEq, Eq, Debug)]
37pub enum LineKind {
38    /// An unchanged (` `) line, present on both sides.
39    Context,
40    /// A removed (`-`) line, present only on the old side.
41    Delete,
42    /// An added (`+`) line, present only on the new side.
43    Insert,
44}
45
46/// One line of the unified diff, with its origin and 0-based positions in the
47/// old/new files (used to compute hunk ranges and feed the word-diff hook).
48#[derive(Clone, Copy)]
49pub struct TaggedLine<'a> {
50    /// Whether the line is context / a deletion / an insertion.
51    pub kind: LineKind,
52    /// The raw line bytes, including the trailing `\n` when present.
53    pub content: &'a [u8],
54    /// 0-based index of this line on the old side.
55    pub old_index: usize,
56    /// 0-based index of this line on the new side.
57    pub new_index: usize,
58}
59
60/// ANSI color palette for a unified diff, mirroring git's `diff_get_color`
61/// slots. Each field is the raw escape sequence (empty string = no color).
62///
63/// The renderer only consults the slots it paints in the hunk body; the
64/// per-file metainfo slot (`meta`) lives with the caller's header emitter and
65/// is intentionally absent here.
66#[derive(Clone, Copy)]
67pub struct RenderColors<'a> {
68    /// `color.diff.frag` — the `@@ .. @@` span.
69    pub frag: &'a str,
70    /// `color.diff.func` — the section heading after the frag.
71    pub func: &'a str,
72    /// `color.diff.old` — removed (`-`) lines.
73    pub old: &'a str,
74    /// `color.diff.new` — added (`+`) lines.
75    pub new: &'a str,
76    /// `color.diff.context` — context (` `) lines and the no-newline marker.
77    pub context: &'a str,
78    /// The reset sequence terminating each colored span.
79    pub reset: &'a str,
80    /// `color.diff.whitespace` — the highlight for whitespace errors
81    /// (`--ws-error-highlight`).
82    pub whitespace: &'a str,
83}
84
85/// Resolve the section heading for one candidate line.
86///
87/// Returns `Some(heading)` when `line` is a heading line (git's `def_ff`
88/// default heuristic or a userdiff `xfuncname` match) and `None` otherwise.
89/// The renderer scans upward from each hunk's first line and uses the first
90/// `Some` it finds — the caller only has to classify a single line, so it can
91/// keep its userdiff-driver / config resolution out of this crate.
92pub type HeadingFn<'a> = dyn FnMut(&[u8]) -> Option<Vec<u8>> + 'a;
93
94/// A hook that renders a single hunk's body when `--word-diff` is active.
95///
96/// The renderer feeds the hunk's tagged lines through this in order
97/// (`fn_out_consume`'s `diff_words` branch): each removed line is pushed to
98/// the minus buffer, each added line to the plus buffer, and a context line
99/// flushes the accumulated word diff before emitting the context line itself.
100/// The implementor owns the actual word-level rendering and color spans; this
101/// keeps the word-diff machinery in the consumer.
102pub trait HunkWordDiff {
103    /// Buffer one removed line's content for the next word-diff flush.
104    fn push_minus(&mut self, content: &[u8]);
105    /// Buffer one added line's content for the next word-diff flush.
106    fn push_plus(&mut self, content: &[u8]);
107    /// Word-diff the accumulated minus/plus buffers into `out` and reset them.
108    fn flush(&mut self, out: &mut Vec<u8>);
109    /// Emit one context line (the `--word-diff` context style).
110    fn emit_context_line(&mut self, out: &mut Vec<u8>, content: &[u8]);
111}
112
113/// Hunk-shaping and styling options for [`render_hunks`].
114///
115/// Lifetimes are split so the funcname / word-diff hooks can be borrowed
116/// mutably while `colors` is borrowed shared.
117pub struct HunkRenderOptions<'a, 'h> {
118    /// Lines of context around each change (`-U<n>`, default
119    /// [`DEFAULT_CONTEXT`]).
120    pub context: usize,
121    /// Extra inter-hunk merging distance (`--inter-hunk-context`).
122    pub interhunk: usize,
123    /// Per-line section-heading classifier; `None` emits headerless hunks.
124    pub heading: Option<&'a mut HeadingFn<'h>>,
125    /// ANSI palette when color output is enabled.
126    pub colors: Option<RenderColors<'a>>,
127    /// Word-diff body hook (replaces the `+`/`-` line bodies of each hunk).
128    pub word_diff: Option<&'a mut dyn HunkWordDiff>,
129    /// `--ws-error-highlight` configuration: when set and colors are on, the
130    /// renderer paints whitespace errors on the selected line kinds with
131    /// `colors.whitespace` (git's `emit_line_ws_markup`). `None` disables it.
132    pub ws_error: Option<WsErrorHighlight>,
133}
134
135/// Which line kinds get whitespace-error highlighting, plus the rule to check
136/// against. git's `--ws-error-highlight` defaults to highlighting only new
137/// (`+`) lines.
138#[derive(Clone, Copy)]
139pub struct WsErrorHighlight {
140    /// The resolved whitespace rule to check each line against.
141    pub rule: crate::ws::WsRule,
142    /// Highlight errors on removed (`-`) lines.
143    pub old: bool,
144    /// Highlight errors on added (`+`) lines.
145    pub new: bool,
146    /// Highlight errors on context (` `) lines.
147    pub context: bool,
148}
149
150impl Default for HunkRenderOptions<'_, '_> {
151    fn default() -> Self {
152        Self {
153            context: DEFAULT_CONTEXT,
154            interhunk: 0,
155            heading: None,
156            colors: None,
157            word_diff: None,
158            ws_error: None,
159        }
160    }
161}
162
163/// Render the unified-diff hunk body for a single file change into `out`.
164///
165/// `old_content` / `new_content` are the full blob contents (`None` for an
166/// absent side — a created or deleted file). The function computes the
167/// line-level Myers diff, groups changes into hunks with `options.context`
168/// lines of surrounding context (merging nearby groups per
169/// `options.interhunk`), and emits each hunk: the `@@` header (with git's
170/// section heading), then the context / `-` / `+` lines including
171/// `\ No newline at end of file` markers.
172///
173/// Nothing is written when the contents are identical (no changed lines).
174/// This is the body *after* the per-file metainfo header the caller emits.
175pub fn render_hunks(
176    out: &mut Vec<u8>,
177    old_content: Option<&[u8]>,
178    new_content: Option<&[u8]>,
179    options: &mut HunkRenderOptions<'_, '_>,
180) {
181    let old = split_lines(old_content.unwrap_or_default());
182    let new = split_lines(new_content.unwrap_or_default());
183    let ops = myers_diff_lines(&old, &new);
184
185    // Flatten the edit script into a tagged line stream carrying old/new
186    // positions.
187    let mut tagged: Vec<TaggedLine<'_>> = Vec::new();
188    let mut old_idx = 0usize;
189    let mut new_idx = 0usize;
190    for op in ops {
191        match op {
192            DiffOp::Equal(n) => {
193                for _ in 0..n {
194                    tagged.push(TaggedLine {
195                        kind: LineKind::Context,
196                        content: old[old_idx].content,
197                        old_index: old_idx,
198                        new_index: new_idx,
199                    });
200                    old_idx += 1;
201                    new_idx += 1;
202                }
203            }
204            DiffOp::Delete(n) => {
205                for _ in 0..n {
206                    tagged.push(TaggedLine {
207                        kind: LineKind::Delete,
208                        content: old[old_idx].content,
209                        old_index: old_idx,
210                        new_index: new_idx,
211                    });
212                    old_idx += 1;
213                }
214            }
215            DiffOp::Insert(n) => {
216                for _ in 0..n {
217                    tagged.push(TaggedLine {
218                        kind: LineKind::Insert,
219                        content: new[new_idx].content,
220                        old_index: old_idx,
221                        new_index: new_idx,
222                    });
223                    new_idx += 1;
224                }
225            }
226        }
227    }
228
229    // Indices of changed (non-context) lines.
230    let change_positions: Vec<usize> = tagged
231        .iter()
232        .enumerate()
233        .filter(|(_, line)| line.kind != LineKind::Context)
234        .map(|(idx, _)| idx)
235        .collect();
236    if change_positions.is_empty() {
237        return;
238    }
239
240    // Group changes whose context windows overlap into single hunks.
241    let mut groups: Vec<(usize, usize)> = Vec::new();
242    let mut group_start = change_positions[0];
243    let mut group_end = change_positions[0];
244    for &pos in &change_positions[1..] {
245        // Two change runs merge when at most 2*context (+ interhunk) equal
246        // lines separate them, mirroring xdl_get_hunk's `distance >
247        // max_common` break (the position gap counts the separating equal
248        // lines plus one, so adjacent delete/insert runs always merge).
249        if pos - group_end <= 2 * options.context + options.interhunk + 1 {
250            group_end = pos;
251        } else {
252            groups.push((group_start, group_end));
253            group_start = pos;
254            group_end = pos;
255        }
256    }
257    groups.push((group_start, group_end));
258
259    for (first_change, last_change) in groups {
260        let hunk_start = first_change.saturating_sub(options.context);
261        let hunk_end = (last_change + options.context + 1).min(tagged.len());
262        render_one_hunk(out, &tagged, &old, hunk_start, hunk_end, options);
263    }
264}
265
266/// Emit a single hunk covering `tagged[start..end]`: the `@@ -os,oc +ns,nc @@
267/// <heading>` header followed by the context/`-`/`+` lines, including the
268/// `\ No newline at end of file` markers.
269fn render_one_hunk(
270    out: &mut Vec<u8>,
271    tagged: &[TaggedLine<'_>],
272    old_lines: &[DiffLine<'_>],
273    start: usize,
274    end: usize,
275    options: &mut HunkRenderOptions<'_, '_>,
276) {
277    let slice = &tagged[start..end];
278    let mut old_count = 0usize;
279    let mut new_count = 0usize;
280    for line in slice {
281        match line.kind {
282            LineKind::Context => {
283                old_count += 1;
284                new_count += 1;
285            }
286            LineKind::Delete => old_count += 1,
287            LineKind::Insert => new_count += 1,
288        }
289    }
290    // 1-based starting line numbers; an empty side starts at 0.
291    let old_start = if old_count == 0 {
292        slice.first().map(|line| line.old_index).unwrap_or(0)
293    } else {
294        slice
295            .iter()
296            .find(|line| line.kind != LineKind::Insert)
297            .map(|line| line.old_index + 1)
298            .unwrap_or(1)
299    };
300    let new_start = if new_count == 0 {
301        slice.first().map(|line| line.new_index).unwrap_or(0)
302    } else {
303        slice
304            .iter()
305            .find(|line| line.kind != LineKind::Delete)
306            .map(|line| line.new_index + 1)
307            .unwrap_or(1)
308    };
309
310    let heading = hunk_section_heading(
311        old_lines,
312        slice.first().map(|line| line.old_index),
313        options.heading.as_deref_mut(),
314    );
315    let frag = format!(
316        "@@ -{} +{} @@",
317        format_hunk_range(old_start, old_count),
318        format_hunk_range(new_start, new_count)
319    );
320    match options.colors {
321        // Port of emit_hunk_header: the "@@ .. @@" span in the frag color,
322        // the separating blank in the context color, the heading in the func
323        // color (each reset-terminated).
324        Some(colors) => {
325            out.extend_from_slice(colors.frag.as_bytes());
326            out.extend_from_slice(frag.as_bytes());
327            out.extend_from_slice(colors.reset.as_bytes());
328            if let Some(heading) = &heading {
329                out.extend_from_slice(colors.context.as_bytes());
330                out.push(b' ');
331                out.extend_from_slice(colors.reset.as_bytes());
332                out.extend_from_slice(colors.func.as_bytes());
333                out.extend_from_slice(heading);
334                out.extend_from_slice(colors.reset.as_bytes());
335            }
336            out.push(b'\n');
337        }
338        None => {
339            out.extend_from_slice(frag.as_bytes());
340            if let Some(heading) = &heading {
341                out.push(b' ');
342                out.extend_from_slice(heading);
343            }
344            out.push(b'\n');
345        }
346    }
347
348    if let Some(word_diff) = options.word_diff.as_deref_mut() {
349        // Word-diff rendering: minus/plus runs accumulate and flush at
350        // context lines (fn_out_consume's diff_words branch); the
351        // "\ No newline" markers are eaten.
352        for line in slice {
353            match line.kind {
354                LineKind::Delete => word_diff.push_minus(line.content),
355                LineKind::Insert => word_diff.push_plus(line.content),
356                LineKind::Context => {
357                    word_diff.flush(out);
358                    word_diff.emit_context_line(out, line.content);
359                }
360            }
361        }
362        word_diff.flush(out);
363        return;
364    }
365
366    for line in slice {
367        let prefix = match line.kind {
368            LineKind::Context => b' ',
369            LineKind::Delete => b'-',
370            LineKind::Insert => b'+',
371        };
372        match options.colors {
373            Some(colors) => {
374                // Whitespace-error highlighting applies to the selected line
375                // kinds (default: new lines only).
376                let ws_rule = options.ws_error.and_then(|ws| {
377                    let enabled = match line.kind {
378                        LineKind::Context => ws.context,
379                        LineKind::Delete => ws.old,
380                        LineKind::Insert => ws.new,
381                    };
382                    enabled.then_some(ws.rule)
383                });
384                write_patch_line_colored(out, prefix, line.content, colors, ws_rule);
385            }
386            None => write_patch_line(out, prefix, line.content),
387        }
388    }
389}
390
391/// Format one `start,count` side of an `@@` header. git omits the count when
392/// it is exactly 1 (e.g. `+5` rather than `+5,1`).
393fn format_hunk_range(start: usize, count: usize) -> String {
394    if count == 1 {
395        start.to_string()
396    } else {
397        format!("{start},{count}")
398    }
399}
400
401/// git's section heading for a hunk: the nearest line *before* the hunk's
402/// first line accepted by the caller's `heading` classifier. Headings are
403/// produced by the classifier (already capped/trimmed by the caller's
404/// userdiff machinery). Returns `None` when no such line precedes the hunk or
405/// no classifier was supplied.
406fn hunk_section_heading(
407    old_lines: &[DiffLine<'_>],
408    first_old_index: Option<usize>,
409    mut heading: Option<&mut HeadingFn<'_>>,
410) -> Option<Vec<u8>> {
411    let first = first_old_index?;
412    let classifier = heading.as_mut()?;
413    // Scan upward from the line just above the hunk.
414    for idx in (0..first).rev() {
415        if let Some(found) = classifier(old_lines[idx].content) {
416            return Some(found);
417        }
418    }
419    None
420}
421
422/// Write a single diff line with its `prefix` marker, appending the
423/// `\ No newline at end of file` note when the source line lacks a trailing
424/// LF.
425fn write_patch_line(out: &mut Vec<u8>, prefix: u8, line: &[u8]) {
426    out.push(prefix);
427    out.extend_from_slice(line);
428    if !line.ends_with(b"\n") {
429        out.extend_from_slice(b"\n\\ No newline at end of file\n");
430    }
431}
432
433/// [`write_patch_line`] in color, optionally painting whitespace errors.
434///
435/// When `ws_rule` is `Some`, the line body is emitted through
436/// [`crate::ws::ws_check_emit`] (git's `emit_line_ws_markup` highlighted
437/// branch): the sign is painted in the line color, then the body's non-error
438/// segments in the line color and its whitespace-error segments in
439/// `colors.whitespace`. A clean line produces no whitespace spans, so it stays
440/// visually plain.
441///
442/// When `ws_rule` is `None`, context/old lines paint the sign and body in one
443/// span; new lines paint the sign and body as separate spans (the default
444/// `ws-error-highlight` path with no rule).
445fn write_patch_line_colored(
446    out: &mut Vec<u8>,
447    prefix: u8,
448    line: &[u8],
449    colors: RenderColors<'_>,
450    ws_rule: Option<crate::ws::WsRule>,
451) {
452    let (body, terminated) = match line.split_last() {
453        Some((b'\n', body)) => (body, true),
454        _ => (line, false),
455    };
456    let color = match prefix {
457        b'-' => colors.old,
458        b'+' => colors.new,
459        _ => colors.context,
460    };
461
462    if let Some(rule) = ws_rule {
463        // Sign in the line color, then the body through ws_check_emit (no
464        // trailing newline in `body`, so the emit's own LF handling is inert).
465        out.extend_from_slice(color.as_bytes());
466        out.push(prefix);
467        out.extend_from_slice(colors.reset.as_bytes());
468        let emit_colors = crate::ws::WsEmitColors {
469            set: color,
470            reset: colors.reset,
471            ws: colors.whitespace,
472        };
473        crate::ws::ws_check_emit(body, rule, out, &emit_colors);
474        out.push(b'\n');
475        if !terminated {
476            out.extend_from_slice(colors.context.as_bytes());
477            out.extend_from_slice(b"\\ No newline at end of file");
478            out.extend_from_slice(colors.reset.as_bytes());
479            out.push(b'\n');
480        }
481        return;
482    }
483
484    if prefix == b'+' {
485        out.extend_from_slice(color.as_bytes());
486        out.push(prefix);
487        out.extend_from_slice(colors.reset.as_bytes());
488        if !body.is_empty() {
489            out.extend_from_slice(color.as_bytes());
490            out.extend_from_slice(body);
491            out.extend_from_slice(colors.reset.as_bytes());
492        }
493    } else {
494        out.extend_from_slice(color.as_bytes());
495        out.push(prefix);
496        out.extend_from_slice(body);
497        out.extend_from_slice(colors.reset.as_bytes());
498    }
499    out.push(b'\n');
500    if !terminated {
501        out.extend_from_slice(colors.context.as_bytes());
502        out.extend_from_slice(b"\\ No newline at end of file");
503        out.extend_from_slice(colors.reset.as_bytes());
504        out.push(b'\n');
505    }
506}
507
508#[cfg(test)]
509mod tests {
510    use super::*;
511
512    fn render_plain(old: Option<&[u8]>, new: Option<&[u8]>) -> Vec<u8> {
513        let mut out = Vec::new();
514        let mut options = HunkRenderOptions::default();
515        render_hunks(&mut out, old, new, &mut options);
516        out
517    }
518
519    #[test]
520    fn identical_content_renders_nothing() {
521        assert!(render_plain(Some(b"a\nb\n"), Some(b"a\nb\n")).is_empty());
522    }
523
524    #[test]
525    fn single_line_change_basic_hunk() {
526        let out = render_plain(Some(b"alpha\nbeta\ngamma\n"), Some(b"alpha\nBETA\ngamma\n"));
527        assert_eq!(
528            out,
529            b"@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n".to_vec(),
530        );
531    }
532
533    #[test]
534    fn count_omitted_when_one() {
535        // A single-line file changed in place yields `-1 +1` (no `,1`).
536        let out = render_plain(Some(b"old\n"), Some(b"new\n"));
537        assert_eq!(out, b"@@ -1 +1 @@\n-old\n+new\n".to_vec());
538    }
539
540    #[test]
541    fn no_newline_marker_on_old_side() {
542        let out = render_plain(Some(b"only line no newline"), None);
543        assert_eq!(
544            out,
545            b"@@ -1 +0,0 @@\n-only line no newline\n\\ No newline at end of file\n".to_vec(),
546        );
547    }
548
549    #[test]
550    fn no_newline_marker_on_new_side() {
551        let out = render_plain(Some(b"beta\n"), Some(b"beta-notail"));
552        assert_eq!(
553            out,
554            b"@@ -1 +1 @@\n-beta\n+beta-notail\n\\ No newline at end of file\n".to_vec(),
555        );
556    }
557
558    #[test]
559    fn pure_insertion_into_empty() {
560        let out = render_plain(None, Some(b"x\ny\n"));
561        assert_eq!(out, b"@@ -0,0 +1,2 @@\n+x\n+y\n".to_vec());
562    }
563
564    #[test]
565    fn distant_changes_split_into_two_hunks() {
566        let old: &[u8] = b"a\nb\nc\nd\ne\nf\ng\nh\ni\nj\n";
567        let new: &[u8] = b"A\nb\nc\nd\ne\nf\ng\nh\ni\nJ\n";
568        let out = render_plain(Some(old), Some(new));
569        // Two changes 9 lines apart (> 2*3+1) produce two separate hunks.
570        let text = String::from_utf8(out).expect("rendered output is valid UTF-8");
571        assert_eq!(text.matches("@@ ").count(), 2, "expected two hunks: {text}");
572    }
573
574    #[test]
575    fn heading_callback_supplies_section() {
576        // The change is far enough below `fn foo()` that the funcname line
577        // precedes the hunk (the heading scan looks *above* the hunk's first
578        // line, so a change touching line 1 would correctly find no heading).
579        let old: &[u8] =
580            b"fn foo() {\n    a\n    b\n    c\n    d\n    e\n    f\n    g\n}\n";
581        let new: &[u8] =
582            b"fn foo() {\n    a\n    b\n    c\n    d\n    CHANGED\n    f\n    g\n}\n";
583        let mut out = Vec::new();
584        // Classifier accepts any line whose first byte is an ASCII letter
585        // (a crude def_ff stand-in for the test).
586        let mut heading_fn = |line: &[u8]| -> Option<Vec<u8>> {
587            if line.first().is_some_and(u8::is_ascii_alphabetic) {
588                Some(line.strip_suffix(b"\n").unwrap_or(line).to_vec())
589            } else {
590                None
591            }
592        };
593        let mut options = HunkRenderOptions {
594            heading: Some(&mut heading_fn),
595            ..Default::default()
596        };
597        render_hunks(&mut out, Some(old), Some(new), &mut options);
598        let text = String::from_utf8(out).expect("rendered output is valid UTF-8");
599        assert!(
600            text.starts_with("@@ -3,7 +3,7 @@ fn foo() {\n"),
601            "expected funcname heading: {text}",
602        );
603    }
604}