sley_diff_merge/render.rs
1//! Unified-diff / patch RENDERER: turn a computed file diff (the old/new
2//! blob contents) into the textual unified-diff hunk body git's `diff.c`
3//! emit path produces (`emit_diff_symbol` / `fn_out_consume`).
4//!
5//! This is the byte-for-byte port of git's hunk emitter: `@@ -os,oc +ns,nc @@
6//! <heading>` hunk headers, the `+`/`-`/context lines, and the
7//! `\ No newline at end of file` marker. It owns hunk *grouping* (combining
8//! changes whose context windows overlap, `xdl_get_hunk`'s `distance >
9//! max_common` break) and hunk *range* computation, then emits each hunk.
10//!
11//! What this module deliberately does NOT own (those stay with the caller,
12//! which has the repository/userdiff/config context):
13//!
14//! * **The per-file metainfo header** (`diff --git`, `index`, `---`/`+++`,
15//! mode/similarity lines). That is repository- and option-shaped; the
16//! renderer only produces the hunk body that follows it.
17//! * **Funcname section-heading resolution.** The caller supplies a
18//! [`HeadingFn`] closure that, given a candidate line, returns its section
19//! heading (git's `def_ff` default heuristic or a userdiff `xfuncname`
20//! pattern). The renderer does the *scan upward* for the nearest heading
21//! line; the caller only classifies a single line.
22//! * **Word-diff body rendering.** When [`HunkRenderOptions::word_diff`] is
23//! set, the renderer delegates each hunk's body to a [`HunkWordDiff`] hook,
24//! which the caller implements over its own word-diff machinery.
25//!
26//! The seams keep the byte-shaping (ranges, headers, prefixes, no-newline
27//! markers, color spans) here — the part every diff-emitting command used to
28//! re-derive — while leaving the repository-coupled concerns in the consumer.
29
30use crate::{DiffLine, DiffOp, myers_diff_lines, split_lines};
31
32/// git's default hunk context (`-U3`).
33pub const DEFAULT_CONTEXT: usize = 3;
34
35/// The per-line origin marker for an emitted diff line.
36#[derive(Clone, Copy, PartialEq, Eq, Debug)]
37pub enum LineKind {
38 /// An unchanged (` `) line, present on both sides.
39 Context,
40 /// A removed (`-`) line, present only on the old side.
41 Delete,
42 /// An added (`+`) line, present only on the new side.
43 Insert,
44}
45
46/// One line of the unified diff, with its origin and 0-based positions in the
47/// old/new files (used to compute hunk ranges and feed the word-diff hook).
48#[derive(Clone, Copy)]
49pub struct TaggedLine<'a> {
50 /// Whether the line is context / a deletion / an insertion.
51 pub kind: LineKind,
52 /// The raw line bytes, including the trailing `\n` when present.
53 pub content: &'a [u8],
54 /// 0-based index of this line on the old side.
55 pub old_index: usize,
56 /// 0-based index of this line on the new side.
57 pub new_index: usize,
58}
59
60/// ANSI color palette for a unified diff, mirroring git's `diff_get_color`
61/// slots. Each field is the raw escape sequence (empty string = no color).
62///
63/// The renderer only consults the slots it paints in the hunk body; the
64/// per-file metainfo slot (`meta`) lives with the caller's header emitter and
65/// is intentionally absent here.
66#[derive(Clone, Copy)]
67pub struct RenderColors<'a> {
68 /// `color.diff.frag` — the `@@ .. @@` span.
69 pub frag: &'a str,
70 /// `color.diff.func` — the section heading after the frag.
71 pub func: &'a str,
72 /// `color.diff.old` — removed (`-`) lines.
73 pub old: &'a str,
74 /// `color.diff.new` — added (`+`) lines.
75 pub new: &'a str,
76 /// `color.diff.context` — context (` `) lines and the no-newline marker.
77 pub context: &'a str,
78 /// The reset sequence terminating each colored span.
79 pub reset: &'a str,
80 /// `color.diff.whitespace` — the highlight for whitespace errors
81 /// (`--ws-error-highlight`).
82 pub whitespace: &'a str,
83}
84
85/// Resolve the section heading for one candidate line.
86///
87/// Returns `Some(heading)` when `line` is a heading line (git's `def_ff`
88/// default heuristic or a userdiff `xfuncname` match) and `None` otherwise.
89/// The renderer scans upward from each hunk's first line and uses the first
90/// `Some` it finds — the caller only has to classify a single line, so it can
91/// keep its userdiff-driver / config resolution out of this crate.
92pub type HeadingFn<'a> = dyn FnMut(&[u8]) -> Option<Vec<u8>> + 'a;
93
94/// A hook that renders a single hunk's body when `--word-diff` is active.
95///
96/// The renderer feeds the hunk's tagged lines through this in order
97/// (`fn_out_consume`'s `diff_words` branch): each removed line is pushed to
98/// the minus buffer, each added line to the plus buffer, and a context line
99/// flushes the accumulated word diff before emitting the context line itself.
100/// The implementor owns the actual word-level rendering and color spans; this
101/// keeps the word-diff machinery in the consumer.
102pub trait HunkWordDiff {
103 /// Buffer one removed line's content for the next word-diff flush.
104 fn push_minus(&mut self, content: &[u8]);
105 /// Buffer one added line's content for the next word-diff flush.
106 fn push_plus(&mut self, content: &[u8]);
107 /// Word-diff the accumulated minus/plus buffers into `out` and reset them.
108 fn flush(&mut self, out: &mut Vec<u8>);
109 /// Emit one context line (the `--word-diff` context style).
110 fn emit_context_line(&mut self, out: &mut Vec<u8>, content: &[u8]);
111}
112
113/// Hunk-shaping and styling options for [`render_hunks`].
114///
115/// Lifetimes are split so the funcname / word-diff hooks can be borrowed
116/// mutably while `colors` is borrowed shared.
117pub struct HunkRenderOptions<'a, 'h> {
118 /// Lines of context around each change (`-U<n>`, default
119 /// [`DEFAULT_CONTEXT`]).
120 pub context: usize,
121 /// Extra inter-hunk merging distance (`--inter-hunk-context`).
122 pub interhunk: usize,
123 /// Per-line section-heading classifier; `None` emits headerless hunks.
124 pub heading: Option<&'a mut HeadingFn<'h>>,
125 /// ANSI palette when color output is enabled.
126 pub colors: Option<RenderColors<'a>>,
127 /// Word-diff body hook (replaces the `+`/`-` line bodies of each hunk).
128 pub word_diff: Option<&'a mut dyn HunkWordDiff>,
129 /// `--ws-error-highlight` configuration: when set and colors are on, the
130 /// renderer paints whitespace errors on the selected line kinds with
131 /// `colors.whitespace` (git's `emit_line_ws_markup`). `None` disables it.
132 pub ws_error: Option<WsErrorHighlight>,
133}
134
135/// Which line kinds get whitespace-error highlighting, plus the rule to check
136/// against. git's `--ws-error-highlight` defaults to highlighting only new
137/// (`+`) lines.
138#[derive(Clone, Copy)]
139pub struct WsErrorHighlight {
140 /// The resolved whitespace rule to check each line against.
141 pub rule: crate::ws::WsRule,
142 /// Highlight errors on removed (`-`) lines.
143 pub old: bool,
144 /// Highlight errors on added (`+`) lines.
145 pub new: bool,
146 /// Highlight errors on context (` `) lines.
147 pub context: bool,
148}
149
150impl Default for HunkRenderOptions<'_, '_> {
151 fn default() -> Self {
152 Self {
153 context: DEFAULT_CONTEXT,
154 interhunk: 0,
155 heading: None,
156 colors: None,
157 word_diff: None,
158 ws_error: None,
159 }
160 }
161}
162
163/// Render the unified-diff hunk body for a single file change into `out`.
164///
165/// `old_content` / `new_content` are the full blob contents (`None` for an
166/// absent side — a created or deleted file). The function computes the
167/// line-level Myers diff, groups changes into hunks with `options.context`
168/// lines of surrounding context (merging nearby groups per
169/// `options.interhunk`), and emits each hunk: the `@@` header (with git's
170/// section heading), then the context / `-` / `+` lines including
171/// `\ No newline at end of file` markers.
172///
173/// Nothing is written when the contents are identical (no changed lines).
174/// This is the body *after* the per-file metainfo header the caller emits.
175pub fn render_hunks(
176 out: &mut Vec<u8>,
177 old_content: Option<&[u8]>,
178 new_content: Option<&[u8]>,
179 options: &mut HunkRenderOptions<'_, '_>,
180) {
181 let old = split_lines(old_content.unwrap_or_default());
182 let new = split_lines(new_content.unwrap_or_default());
183 let ops = myers_diff_lines(&old, &new);
184
185 // Flatten the edit script into a tagged line stream carrying old/new
186 // positions.
187 let mut tagged: Vec<TaggedLine<'_>> = Vec::new();
188 let mut old_idx = 0usize;
189 let mut new_idx = 0usize;
190 for op in ops {
191 match op {
192 DiffOp::Equal(n) => {
193 for _ in 0..n {
194 tagged.push(TaggedLine {
195 kind: LineKind::Context,
196 content: old[old_idx].content,
197 old_index: old_idx,
198 new_index: new_idx,
199 });
200 old_idx += 1;
201 new_idx += 1;
202 }
203 }
204 DiffOp::Delete(n) => {
205 for _ in 0..n {
206 tagged.push(TaggedLine {
207 kind: LineKind::Delete,
208 content: old[old_idx].content,
209 old_index: old_idx,
210 new_index: new_idx,
211 });
212 old_idx += 1;
213 }
214 }
215 DiffOp::Insert(n) => {
216 for _ in 0..n {
217 tagged.push(TaggedLine {
218 kind: LineKind::Insert,
219 content: new[new_idx].content,
220 old_index: old_idx,
221 new_index: new_idx,
222 });
223 new_idx += 1;
224 }
225 }
226 }
227 }
228
229 // Indices of changed (non-context) lines.
230 let change_positions: Vec<usize> = tagged
231 .iter()
232 .enumerate()
233 .filter(|(_, line)| line.kind != LineKind::Context)
234 .map(|(idx, _)| idx)
235 .collect();
236 if change_positions.is_empty() {
237 return;
238 }
239
240 // Group changes whose context windows overlap into single hunks.
241 let mut groups: Vec<(usize, usize)> = Vec::new();
242 let mut group_start = change_positions[0];
243 let mut group_end = change_positions[0];
244 for &pos in &change_positions[1..] {
245 // Two change runs merge when at most 2*context (+ interhunk) equal
246 // lines separate them, mirroring xdl_get_hunk's `distance >
247 // max_common` break (the position gap counts the separating equal
248 // lines plus one, so adjacent delete/insert runs always merge).
249 if pos - group_end <= 2 * options.context + options.interhunk + 1 {
250 group_end = pos;
251 } else {
252 groups.push((group_start, group_end));
253 group_start = pos;
254 group_end = pos;
255 }
256 }
257 groups.push((group_start, group_end));
258
259 for (first_change, last_change) in groups {
260 let hunk_start = first_change.saturating_sub(options.context);
261 let hunk_end = (last_change + options.context + 1).min(tagged.len());
262 render_one_hunk(out, &tagged, &old, hunk_start, hunk_end, options);
263 }
264}
265
266/// Emit a single hunk covering `tagged[start..end]`: the `@@ -os,oc +ns,nc @@
267/// <heading>` header followed by the context/`-`/`+` lines, including the
268/// `\ No newline at end of file` markers.
269fn render_one_hunk(
270 out: &mut Vec<u8>,
271 tagged: &[TaggedLine<'_>],
272 old_lines: &[DiffLine<'_>],
273 start: usize,
274 end: usize,
275 options: &mut HunkRenderOptions<'_, '_>,
276) {
277 let slice = &tagged[start..end];
278 let mut old_count = 0usize;
279 let mut new_count = 0usize;
280 for line in slice {
281 match line.kind {
282 LineKind::Context => {
283 old_count += 1;
284 new_count += 1;
285 }
286 LineKind::Delete => old_count += 1,
287 LineKind::Insert => new_count += 1,
288 }
289 }
290 // 1-based starting line numbers; an empty side starts at 0.
291 let old_start = if old_count == 0 {
292 slice.first().map(|line| line.old_index).unwrap_or(0)
293 } else {
294 slice
295 .iter()
296 .find(|line| line.kind != LineKind::Insert)
297 .map(|line| line.old_index + 1)
298 .unwrap_or(1)
299 };
300 let new_start = if new_count == 0 {
301 slice.first().map(|line| line.new_index).unwrap_or(0)
302 } else {
303 slice
304 .iter()
305 .find(|line| line.kind != LineKind::Delete)
306 .map(|line| line.new_index + 1)
307 .unwrap_or(1)
308 };
309
310 let heading = hunk_section_heading(
311 old_lines,
312 slice.first().map(|line| line.old_index),
313 options.heading.as_deref_mut(),
314 );
315 let frag = format!(
316 "@@ -{} +{} @@",
317 format_hunk_range(old_start, old_count),
318 format_hunk_range(new_start, new_count)
319 );
320 match options.colors {
321 // Port of emit_hunk_header: the "@@ .. @@" span in the frag color,
322 // the separating blank in the context color, the heading in the func
323 // color (each reset-terminated).
324 Some(colors) => {
325 out.extend_from_slice(colors.frag.as_bytes());
326 out.extend_from_slice(frag.as_bytes());
327 out.extend_from_slice(colors.reset.as_bytes());
328 if let Some(heading) = &heading {
329 out.extend_from_slice(colors.context.as_bytes());
330 out.push(b' ');
331 out.extend_from_slice(colors.reset.as_bytes());
332 out.extend_from_slice(colors.func.as_bytes());
333 out.extend_from_slice(heading);
334 out.extend_from_slice(colors.reset.as_bytes());
335 }
336 out.push(b'\n');
337 }
338 None => {
339 out.extend_from_slice(frag.as_bytes());
340 if let Some(heading) = &heading {
341 out.push(b' ');
342 out.extend_from_slice(heading);
343 }
344 out.push(b'\n');
345 }
346 }
347
348 if let Some(word_diff) = options.word_diff.as_deref_mut() {
349 // Word-diff rendering: minus/plus runs accumulate and flush at
350 // context lines (fn_out_consume's diff_words branch); the
351 // "\ No newline" markers are eaten.
352 for line in slice {
353 match line.kind {
354 LineKind::Delete => word_diff.push_minus(line.content),
355 LineKind::Insert => word_diff.push_plus(line.content),
356 LineKind::Context => {
357 word_diff.flush(out);
358 word_diff.emit_context_line(out, line.content);
359 }
360 }
361 }
362 word_diff.flush(out);
363 return;
364 }
365
366 for line in slice {
367 let prefix = match line.kind {
368 LineKind::Context => b' ',
369 LineKind::Delete => b'-',
370 LineKind::Insert => b'+',
371 };
372 match options.colors {
373 Some(colors) => {
374 // Whitespace-error highlighting applies to the selected line
375 // kinds (default: new lines only).
376 let ws_rule = options.ws_error.and_then(|ws| {
377 let enabled = match line.kind {
378 LineKind::Context => ws.context,
379 LineKind::Delete => ws.old,
380 LineKind::Insert => ws.new,
381 };
382 enabled.then_some(ws.rule)
383 });
384 write_patch_line_colored(out, prefix, line.content, colors, ws_rule);
385 }
386 None => write_patch_line(out, prefix, line.content),
387 }
388 }
389}
390
391/// Format one `start,count` side of an `@@` header. git omits the count when
392/// it is exactly 1 (e.g. `+5` rather than `+5,1`).
393fn format_hunk_range(start: usize, count: usize) -> String {
394 if count == 1 {
395 start.to_string()
396 } else {
397 format!("{start},{count}")
398 }
399}
400
401/// git's section heading for a hunk: the nearest line *before* the hunk's
402/// first line accepted by the caller's `heading` classifier. Headings are
403/// produced by the classifier (already capped/trimmed by the caller's
404/// userdiff machinery). Returns `None` when no such line precedes the hunk or
405/// no classifier was supplied.
406fn hunk_section_heading(
407 old_lines: &[DiffLine<'_>],
408 first_old_index: Option<usize>,
409 mut heading: Option<&mut HeadingFn<'_>>,
410) -> Option<Vec<u8>> {
411 let first = first_old_index?;
412 let classifier = heading.as_mut()?;
413 // Scan upward from the line just above the hunk.
414 for idx in (0..first).rev() {
415 if let Some(found) = classifier(old_lines[idx].content) {
416 return Some(found);
417 }
418 }
419 None
420}
421
422/// Write a single diff line with its `prefix` marker, appending the
423/// `\ No newline at end of file` note when the source line lacks a trailing
424/// LF.
425fn write_patch_line(out: &mut Vec<u8>, prefix: u8, line: &[u8]) {
426 out.push(prefix);
427 out.extend_from_slice(line);
428 if !line.ends_with(b"\n") {
429 out.extend_from_slice(b"\n\\ No newline at end of file\n");
430 }
431}
432
433/// [`write_patch_line`] in color, optionally painting whitespace errors.
434///
435/// When `ws_rule` is `Some`, the line body is emitted through
436/// [`crate::ws::ws_check_emit`] (git's `emit_line_ws_markup` highlighted
437/// branch): the sign is painted in the line color, then the body's non-error
438/// segments in the line color and its whitespace-error segments in
439/// `colors.whitespace`. A clean line produces no whitespace spans, so it stays
440/// visually plain.
441///
442/// When `ws_rule` is `None`, context/old lines paint the sign and body in one
443/// span; new lines paint the sign and body as separate spans (the default
444/// `ws-error-highlight` path with no rule).
445fn write_patch_line_colored(
446 out: &mut Vec<u8>,
447 prefix: u8,
448 line: &[u8],
449 colors: RenderColors<'_>,
450 ws_rule: Option<crate::ws::WsRule>,
451) {
452 let (body, terminated) = match line.split_last() {
453 Some((b'\n', body)) => (body, true),
454 _ => (line, false),
455 };
456 let color = match prefix {
457 b'-' => colors.old,
458 b'+' => colors.new,
459 _ => colors.context,
460 };
461
462 if let Some(rule) = ws_rule {
463 // Sign in the line color, then the body through ws_check_emit (no
464 // trailing newline in `body`, so the emit's own LF handling is inert).
465 out.extend_from_slice(color.as_bytes());
466 out.push(prefix);
467 out.extend_from_slice(colors.reset.as_bytes());
468 let emit_colors = crate::ws::WsEmitColors {
469 set: color,
470 reset: colors.reset,
471 ws: colors.whitespace,
472 };
473 crate::ws::ws_check_emit(body, rule, out, &emit_colors);
474 out.push(b'\n');
475 if !terminated {
476 out.extend_from_slice(colors.context.as_bytes());
477 out.extend_from_slice(b"\\ No newline at end of file");
478 out.extend_from_slice(colors.reset.as_bytes());
479 out.push(b'\n');
480 }
481 return;
482 }
483
484 if prefix == b'+' {
485 out.extend_from_slice(color.as_bytes());
486 out.push(prefix);
487 out.extend_from_slice(colors.reset.as_bytes());
488 if !body.is_empty() {
489 out.extend_from_slice(color.as_bytes());
490 out.extend_from_slice(body);
491 out.extend_from_slice(colors.reset.as_bytes());
492 }
493 } else {
494 out.extend_from_slice(color.as_bytes());
495 out.push(prefix);
496 out.extend_from_slice(body);
497 out.extend_from_slice(colors.reset.as_bytes());
498 }
499 out.push(b'\n');
500 if !terminated {
501 out.extend_from_slice(colors.context.as_bytes());
502 out.extend_from_slice(b"\\ No newline at end of file");
503 out.extend_from_slice(colors.reset.as_bytes());
504 out.push(b'\n');
505 }
506}
507
508#[cfg(test)]
509mod tests {
510 use super::*;
511
512 fn render_plain(old: Option<&[u8]>, new: Option<&[u8]>) -> Vec<u8> {
513 let mut out = Vec::new();
514 let mut options = HunkRenderOptions::default();
515 render_hunks(&mut out, old, new, &mut options);
516 out
517 }
518
519 #[test]
520 fn identical_content_renders_nothing() {
521 assert!(render_plain(Some(b"a\nb\n"), Some(b"a\nb\n")).is_empty());
522 }
523
524 #[test]
525 fn single_line_change_basic_hunk() {
526 let out = render_plain(Some(b"alpha\nbeta\ngamma\n"), Some(b"alpha\nBETA\ngamma\n"));
527 assert_eq!(
528 out,
529 b"@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n".to_vec(),
530 );
531 }
532
533 #[test]
534 fn count_omitted_when_one() {
535 // A single-line file changed in place yields `-1 +1` (no `,1`).
536 let out = render_plain(Some(b"old\n"), Some(b"new\n"));
537 assert_eq!(out, b"@@ -1 +1 @@\n-old\n+new\n".to_vec());
538 }
539
540 #[test]
541 fn no_newline_marker_on_old_side() {
542 let out = render_plain(Some(b"only line no newline"), None);
543 assert_eq!(
544 out,
545 b"@@ -1 +0,0 @@\n-only line no newline\n\\ No newline at end of file\n".to_vec(),
546 );
547 }
548
549 #[test]
550 fn no_newline_marker_on_new_side() {
551 let out = render_plain(Some(b"beta\n"), Some(b"beta-notail"));
552 assert_eq!(
553 out,
554 b"@@ -1 +1 @@\n-beta\n+beta-notail\n\\ No newline at end of file\n".to_vec(),
555 );
556 }
557
558 #[test]
559 fn pure_insertion_into_empty() {
560 let out = render_plain(None, Some(b"x\ny\n"));
561 assert_eq!(out, b"@@ -0,0 +1,2 @@\n+x\n+y\n".to_vec());
562 }
563
564 #[test]
565 fn distant_changes_split_into_two_hunks() {
566 let old: &[u8] = b"a\nb\nc\nd\ne\nf\ng\nh\ni\nj\n";
567 let new: &[u8] = b"A\nb\nc\nd\ne\nf\ng\nh\ni\nJ\n";
568 let out = render_plain(Some(old), Some(new));
569 // Two changes 9 lines apart (> 2*3+1) produce two separate hunks.
570 let text = String::from_utf8(out).expect("rendered output is valid UTF-8");
571 assert_eq!(text.matches("@@ ").count(), 2, "expected two hunks: {text}");
572 }
573
574 #[test]
575 fn heading_callback_supplies_section() {
576 // The change is far enough below `fn foo()` that the funcname line
577 // precedes the hunk (the heading scan looks *above* the hunk's first
578 // line, so a change touching line 1 would correctly find no heading).
579 let old: &[u8] =
580 b"fn foo() {\n a\n b\n c\n d\n e\n f\n g\n}\n";
581 let new: &[u8] =
582 b"fn foo() {\n a\n b\n c\n d\n CHANGED\n f\n g\n}\n";
583 let mut out = Vec::new();
584 // Classifier accepts any line whose first byte is an ASCII letter
585 // (a crude def_ff stand-in for the test).
586 let mut heading_fn = |line: &[u8]| -> Option<Vec<u8>> {
587 if line.first().is_some_and(u8::is_ascii_alphabetic) {
588 Some(line.strip_suffix(b"\n").unwrap_or(line).to_vec())
589 } else {
590 None
591 }
592 };
593 let mut options = HunkRenderOptions {
594 heading: Some(&mut heading_fn),
595 ..Default::default()
596 };
597 render_hunks(&mut out, Some(old), Some(new), &mut options);
598 let text = String::from_utf8(out).expect("rendered output is valid UTF-8");
599 assert!(
600 text.starts_with("@@ -3,7 +3,7 @@ fn foo() {\n"),
601 "expected funcname heading: {text}",
602 );
603 }
604}