sley_diff_merge/
render.rs

1//! Unified-diff / patch RENDERER: turn a computed file diff (the old/new
2//! blob contents) into the textual unified-diff hunk body git's `diff.c`
3//! emit path produces (`emit_diff_symbol` / `fn_out_consume`).
4//!
5//! This is the byte-for-byte port of git's hunk emitter: `@@ -os,oc +ns,nc @@
6//! <heading>` hunk headers, the `+`/`-`/context lines, and the
7//! `\ No newline at end of file` marker. It owns hunk *grouping* (combining
8//! changes whose context windows overlap, `xdl_get_hunk`'s `distance >
9//! max_common` break) and hunk *range* computation, then emits each hunk.
10//!
11//! What this module deliberately does NOT own (those stay with the caller,
12//! which has the repository/userdiff/config context):
13//!
14//! * **The per-file metainfo header** (`diff --git`, `index`, `---`/`+++`,
15//!   mode/similarity lines). That is repository- and option-shaped; the
16//!   renderer only produces the hunk body that follows it.
17//! * **Funcname section-heading resolution.** The caller supplies a
18//!   [`HeadingFn`] closure that, given a candidate line, returns its section
19//!   heading (git's `def_ff` default heuristic or a userdiff `xfuncname`
20//!   pattern). The renderer does the *scan upward* for the nearest heading
21//!   line; the caller only classifies a single line.
22//! * **Word-diff body rendering.** When [`HunkRenderOptions::word_diff`] is
23//!   set, the renderer delegates each hunk's body to a [`HunkWordDiff`] hook,
24//!   which the caller implements over its own word-diff machinery.
25//!
26//! The seams keep the byte-shaping (ranges, headers, prefixes, no-newline
27//! markers, color spans) here — the part every diff-emitting command used to
28//! re-derive — while leaving the repository-coupled concerns in the consumer.
29
30use crate::{
31    DiffAlgorithm, DiffLine, DiffOp, WsIgnore, line_is_blank, myers_diff_lines_ws,
32    patience_diff_lines_anchored, split_lines,
33};
34use std::collections::HashMap;
35
36/// git's default hunk context (`-U3`).
37pub const DEFAULT_CONTEXT: usize = 3;
38const FUNCTION_CONTEXT_FLAG: usize = 1usize << (usize::BITS - 1);
39const CONTEXT_VALUE_MASK: usize = !FUNCTION_CONTEXT_FLAG;
40
41/// Encode `-W` / `--function-context` into the context field without changing
42/// the option shape used by the existing renderer call sites.
43pub fn enable_function_context(context: usize) -> usize {
44    (context & CONTEXT_VALUE_MASK) | FUNCTION_CONTEXT_FLAG
45}
46
47fn decode_context(context: usize) -> (usize, bool) {
48    (
49        context & CONTEXT_VALUE_MASK,
50        context & FUNCTION_CONTEXT_FLAG != 0,
51    )
52}
53
54fn replace_context_value(encoded: usize, context: usize) -> usize {
55    (encoded & !CONTEXT_VALUE_MASK) | (context & CONTEXT_VALUE_MASK)
56}
57
58/// The per-line origin marker for an emitted diff line.
59#[derive(Clone, Copy, PartialEq, Eq, Debug)]
60pub enum LineKind {
61    /// An unchanged (` `) line, present on both sides.
62    Context,
63    /// A removed (`-`) line, present only on the old side.
64    Delete,
65    /// An added (`+`) line, present only on the new side.
66    Insert,
67}
68
69/// One line of the unified diff, with its origin and 0-based positions in the
70/// old/new files (used to compute hunk ranges and feed the word-diff hook).
71#[derive(Clone, Copy)]
72pub struct TaggedLine<'a> {
73    /// Whether the line is context / a deletion / an insertion.
74    pub kind: LineKind,
75    /// The raw line bytes, including the trailing `\n` when present.
76    pub content: &'a [u8],
77    /// 0-based index of this line on the old side.
78    pub old_index: usize,
79    /// 0-based index of this line on the new side.
80    pub new_index: usize,
81}
82
83/// ANSI color palette for a unified diff, mirroring git's `diff_get_color`
84/// slots. Each field is the raw escape sequence (empty string = no color).
85///
86/// The renderer only consults the slots it paints in the hunk body; the
87/// per-file metainfo slot (`meta`) lives with the caller's header emitter and
88/// is intentionally absent here.
89#[derive(Clone, Copy)]
90pub struct RenderColors<'a> {
91    /// `color.diff.frag` — the `@@ .. @@` span.
92    pub frag: &'a str,
93    /// `color.diff.func` — the section heading after the frag.
94    pub func: &'a str,
95    /// `color.diff.old` — removed (`-`) lines.
96    pub old: &'a str,
97    /// `color.diff.new` — added (`+`) lines.
98    pub new: &'a str,
99    /// `color.diff.context` — context (` `) lines and the no-newline marker.
100    pub context: &'a str,
101    /// The reset sequence terminating each colored span.
102    pub reset: &'a str,
103    /// `color.diff.whitespace` — the highlight for whitespace errors
104    /// (`--ws-error-highlight`).
105    pub whitespace: &'a str,
106    /// `color.diff.oldMoved` — removed lines detected as moved.
107    pub old_moved: &'a str,
108    /// `color.diff.oldMovedAlternative` — alternate removed moved block.
109    pub old_moved_alt: &'a str,
110    /// `color.diff.oldMovedDimmed` — uninteresting removed moved line.
111    pub old_moved_dim: &'a str,
112    /// `color.diff.oldMovedAlternativeDimmed` — alternate dimmed removed line.
113    pub old_moved_alt_dim: &'a str,
114    /// `color.diff.newMoved` — added lines detected as moved.
115    pub new_moved: &'a str,
116    /// `color.diff.newMovedAlternative` — alternate added moved block.
117    pub new_moved_alt: &'a str,
118    /// `color.diff.newMovedDimmed` — uninteresting added moved line.
119    pub new_moved_dim: &'a str,
120    /// `color.diff.newMovedAlternativeDimmed` — alternate dimmed added line.
121    pub new_moved_alt_dim: &'a str,
122}
123
124/// `--color-moved=<mode>` hunk-body mode.
125#[derive(Clone, Copy, Debug, PartialEq, Eq)]
126pub enum ColorMovedMode {
127    /// Mark every matching `+`/`-` line.
128    Plain,
129    /// Mark moved blocks, without alternating colors.
130    Blocks,
131    /// Mark moved blocks, alternating adjacent blocks.
132    Zebra,
133    /// Like zebra, but dim interior lines.
134    DimmedZebra,
135}
136
137/// `--color-moved-ws=<mode>` comparison mode.
138#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
139pub struct ColorMovedWs {
140    /// Whitespace-ignore flags used when interning moved lines.
141    pub ignore: WsIgnore,
142    /// Allow a constant indentation delta across a moved block.
143    pub allow_indentation_change: bool,
144}
145
146/// Moved-code coloring configuration.
147#[derive(Clone, Copy, Debug, PartialEq, Eq)]
148pub struct ColorMoved {
149    /// Which moved-code coloring style to use.
150    pub mode: ColorMovedMode,
151    /// Whitespace handling for moved-code matching.
152    pub ws: ColorMovedWs,
153}
154
155/// Resolve the section heading for one candidate line.
156///
157/// Returns `Some(heading)` when `line` is a heading line (git's `def_ff`
158/// default heuristic or a userdiff `xfuncname` match) and `None` otherwise.
159/// The renderer scans upward from each hunk's first line and uses the first
160/// `Some` it finds — the caller only has to classify a single line, so it can
161/// keep its userdiff-driver / config resolution out of this crate.
162pub type HeadingFn<'a> = dyn FnMut(&[u8]) -> Option<Vec<u8>> + 'a;
163
164/// A hook that renders a single hunk's body when `--word-diff` is active.
165///
166/// The renderer feeds the hunk's tagged lines through this in order
167/// (`fn_out_consume`'s `diff_words` branch): each removed line is pushed to
168/// the minus buffer, each added line to the plus buffer, and a context line
169/// flushes the accumulated word diff before emitting the context line itself.
170/// The implementor owns the actual word-level rendering and color spans; this
171/// keeps the word-diff machinery in the consumer.
172pub trait HunkWordDiff {
173    /// Buffer one removed line's content for the next word-diff flush.
174    fn push_minus(&mut self, content: &[u8]);
175    /// Buffer one added line's content for the next word-diff flush.
176    fn push_plus(&mut self, content: &[u8]);
177    /// Word-diff the accumulated minus/plus buffers into `out` and reset them.
178    fn flush(&mut self, out: &mut Vec<u8>);
179    /// Emit one context line (the `--word-diff` context style).
180    fn emit_context_line(&mut self, out: &mut Vec<u8>, content: &[u8]);
181}
182
183/// Hunk-shaping and styling options for [`render_hunks`].
184///
185/// Lifetimes are split so the funcname / word-diff hooks can be borrowed
186/// mutably while `colors` is borrowed shared.
187pub struct HunkRenderOptions<'a, 'h> {
188    /// Lines of context around each change (`-U<n>`, default
189    /// [`DEFAULT_CONTEXT`]).
190    pub context: usize,
191    /// Extra inter-hunk merging distance (`--inter-hunk-context`).
192    pub interhunk: usize,
193    /// Per-line section-heading classifier; `None` emits headerless hunks.
194    pub heading: Option<&'a mut HeadingFn<'h>>,
195    /// ANSI palette when color output is enabled.
196    pub colors: Option<RenderColors<'a>>,
197    /// Word-diff body hook (replaces the `+`/`-` line bodies of each hunk).
198    pub word_diff: Option<&'a mut dyn HunkWordDiff>,
199    /// `--ws-error-highlight` configuration: when set and colors are on, the
200    /// renderer paints whitespace errors on the selected line kinds with
201    /// `colors.whitespace` (git's `emit_line_ws_markup`). `None` disables it.
202    pub ws_error: Option<WsErrorHighlight>,
203    /// Whitespace-ignore flags (`-w`, `-b`, `--ignore-space-at-eol`,
204    /// `--ignore-cr-at-eol`): applied to the line-level comparison so
205    /// whitespace-only changes do not appear as diffs (git's
206    /// `XDF_WHITESPACE_FLAGS`).
207    pub ws_ignore: WsIgnore,
208    /// The line-diff algorithm to use (Myers / patience / histogram).
209    pub algorithm: DiffAlgorithm,
210    /// Indent heuristic (`--indent-heuristic` / `diff.indentHeuristic`): when
211    /// set, change groups that can slide within surrounding identical lines are
212    /// shifted to the most readable boundary (git's `XDF_INDENT_HEURISTIC`
213    /// scoring in `xdl_change_compact`). The base change compaction — sliding
214    /// groups as far down as possible and aligning add/delete pairs — always
215    /// runs; this flag only enables the indent-based slider scoring. Defaults to
216    /// `true` to match git's `diff.indentHeuristic` default.
217    pub indent_heuristic: bool,
218    /// Change-group suppression (`--ignore-blank-lines`, `-I<regex>`): when
219    /// set, change groups all of whose old and new lines are blank (and/or
220    /// match a `-I` regex) are dropped from hunk emission, mirroring git's
221    /// `xdl_mark_ignorable_lines` / `xdl_mark_ignorable_regex` + `xdl_get_hunk`.
222    pub change_ignore: Option<&'a ChangeIgnore<'a>>,
223    /// `log -L`: restrict the emitted hunks to the new-side (post-image) line
224    /// ranges. Each range is 0-based, `[start, end)`. When set, the renderer
225    /// inflates context to the widest range span (so every change inside a
226    /// range merges into one xdiff hunk), then clips the emitted lines back to
227    /// the range boundaries — a port of diff.c's `line_range_*` callbacks.
228    /// Ranges must be sorted and disjoint. `None` disables the filter (every
229    /// non-line-log caller).
230    pub line_ranges: Option<&'a [LineRange]>,
231    /// `--color-moved`: classify moved `+`/`-` lines and paint them with the
232    /// moved-color slots. `None` disables moved-code coloring.
233    pub color_moved: Option<ColorMoved>,
234    /// `--anchored=<text>` prefixes (git's patience anchors). Only consulted when
235    /// `algorithm` is [`DiffAlgorithm::Patience`]; empty (the default) is plain
236    /// patience. Forces the matching unique lines to stay aligned.
237    pub anchors: &'a [Vec<u8>],
238}
239
240/// A half-open `[start, end)` line range (0-based) for `log -L` hunk
241/// restriction. Mirrors diff.c's `struct range`.
242#[derive(Clone, Copy, Debug, PartialEq, Eq)]
243pub struct LineRange {
244    /// 0-based inclusive start line (post-image).
245    pub start: i64,
246    /// 0-based exclusive end line (post-image).
247    pub end: i64,
248}
249
250/// Configuration for change-group suppression (`--ignore-blank-lines` and
251/// `-I<regex>`). A change group is *ignorable* iff every old line and every new
252/// line it touches is blank (when `ignore_blank_lines`) or matches one of the
253/// `-I` regexes (`regex_match`). Ignorable groups are kept out of hunk emission
254/// per `xdl_get_hunk`'s leading/isolated-ignorable removal.
255pub type ChangeIgnoreRegex<'a> = &'a dyn Fn(&[u8]) -> bool;
256
257pub struct ChangeIgnore<'a> {
258    /// `--ignore-blank-lines`: blank change groups are ignorable.
259    pub ignore_blank_lines: bool,
260    /// `-I<regex>`: a line is regex-ignorable when this returns `true`. The
261    /// closure receives the raw line bytes (including the trailing `\n`). When
262    /// `None`, no regex suppression applies.
263    pub regex_match: Option<ChangeIgnoreRegex<'a>>,
264}
265
266/// Which line kinds get whitespace-error highlighting, plus the rule to check
267/// against. git's `--ws-error-highlight` defaults to highlighting only new
268/// (`+`) lines.
269#[derive(Clone, Copy)]
270pub struct WsErrorHighlight {
271    /// The resolved whitespace rule to check each line against.
272    pub rule: crate::ws::WsRule,
273    /// Highlight errors on removed (`-`) lines.
274    pub old: bool,
275    /// Highlight errors on added (`+`) lines.
276    pub new: bool,
277    /// Highlight errors on context (` `) lines.
278    pub context: bool,
279}
280
281impl Default for HunkRenderOptions<'_, '_> {
282    fn default() -> Self {
283        Self {
284            context: DEFAULT_CONTEXT,
285            interhunk: 0,
286            heading: None,
287            colors: None,
288            word_diff: None,
289            ws_error: None,
290            ws_ignore: WsIgnore::default(),
291            algorithm: DiffAlgorithm::Myers,
292            indent_heuristic: true,
293            change_ignore: None,
294            line_ranges: None,
295            color_moved: None,
296            anchors: &[],
297        }
298    }
299}
300
301/// Render the unified-diff hunk body for a single file change into `out`.
302///
303/// `old_content` / `new_content` are the full blob contents (`None` for an
304/// absent side — a created or deleted file). The function computes the
305/// line-level Myers diff, groups changes into hunks with `options.context`
306/// lines of surrounding context (merging nearby groups per
307/// `options.interhunk`), and emits each hunk: the `@@` header (with git's
308/// section heading), then the context / `-` / `+` lines including
309/// `\ No newline at end of file` markers.
310///
311/// Nothing is written when the contents are identical (no changed lines).
312/// This is the body *after* the per-file metainfo header the caller emits.
313pub fn render_hunks(
314    out: &mut Vec<u8>,
315    old_content: Option<&[u8]>,
316    new_content: Option<&[u8]>,
317    options: &mut HunkRenderOptions<'_, '_>,
318) {
319    let (context, function_context) = decode_context(options.context);
320    // `log -L` hunk restriction: render with inflated context into a scratch
321    // buffer, then clip the emitted lines to the tracked ranges (diff.c's
322    // `line_range_*` callbacks). The widest range span is the upper bound on
323    // the context needed for every change in a range to land in one hunk.
324    if let Some(ranges) = options.line_ranges {
325        let max_span = ranges
326            .iter()
327            .map(|r| r.end - r.start)
328            .max()
329            .unwrap_or(0)
330            .max(0) as usize;
331        let saved_context = options.context;
332        options.context = replace_context_value(saved_context, context.max(max_span));
333        options.line_ranges = None;
334        let mut full = Vec::new();
335        render_hunks(&mut full, old_content, new_content, options);
336        options.context = saved_context;
337        options.line_ranges = Some(ranges);
338        filter_hunks_to_ranges(out, &full, ranges);
339        return;
340    }
341    let old = split_lines(old_content.unwrap_or_default());
342    let new = split_lines(new_content.unwrap_or_default());
343    // `--anchored` only applies to the patience algorithm and on the raw
344    // (no whitespace-ignore) comparison — git forces patience and matches anchor
345    // text against the line bytes. Everything else takes the normal path.
346    let mut ops = if options.algorithm == DiffAlgorithm::Patience
347        && !options.anchors.is_empty()
348        && options.ws_ignore.is_empty()
349    {
350        patience_diff_lines_anchored(&old, &new, options.anchors)
351    } else {
352        myers_diff_lines_ws(&old, &new, options.ws_ignore, options.algorithm)
353    };
354
355    // git's `xdl_change_compact`: slide each change group as far down as
356    // possible, snap add/delete pairs back into alignment, and (under the
357    // indent heuristic) shift to the most readable split. Runs on the raw edit
358    // script before it is flattened into tagged lines.
359    change_compact(
360        &mut ops,
361        &old,
362        &new,
363        options.ws_ignore,
364        options.indent_heuristic,
365    );
366
367    // Flatten the edit script into a tagged line stream carrying old/new
368    // positions.
369    let mut tagged: Vec<TaggedLine<'_>> = Vec::new();
370    let mut old_idx = 0usize;
371    let mut new_idx = 0usize;
372    for op in ops {
373        match op {
374            DiffOp::Equal(n) => {
375                for _ in 0..n {
376                    // When records match only under whitespace-ignore flags,
377                    // git emits the postimage bytes as context.
378                    tagged.push(TaggedLine {
379                        kind: LineKind::Context,
380                        content: new[new_idx].content,
381                        old_index: old_idx,
382                        new_index: new_idx,
383                    });
384                    old_idx += 1;
385                    new_idx += 1;
386                }
387            }
388            DiffOp::Delete(n) => {
389                for _ in 0..n {
390                    tagged.push(TaggedLine {
391                        kind: LineKind::Delete,
392                        content: old[old_idx].content,
393                        old_index: old_idx,
394                        new_index: new_idx,
395                    });
396                    old_idx += 1;
397                }
398            }
399            DiffOp::Insert(n) => {
400                for _ in 0..n {
401                    tagged.push(TaggedLine {
402                        kind: LineKind::Insert,
403                        content: new[new_idx].content,
404                        old_index: old_idx,
405                        new_index: new_idx,
406                    });
407                    new_idx += 1;
408                }
409            }
410        }
411    }
412
413    // Build the change list (git's xdchange script): each maximal run of
414    // consecutive `-`/`+` tagged lines is one change, carrying its old/new line
415    // ranges and the tagged-stream span it occupies.
416    let changes = build_changes(&tagged);
417    if changes.is_empty() {
418        return;
419    }
420
421    // Mark each change ignorable when `--ignore-blank-lines` / `-I<regex>`
422    // applies and every old and new line it touches is blank / regex-matched
423    // (git's xdl_mark_ignorable_lines + xdl_mark_ignorable_regex).
424    let mut changes = changes;
425    if let Some(ci) = options.change_ignore {
426        mark_ignorable_changes(&mut changes, &old, &new, options.ws_ignore, ci);
427    }
428
429    // Group changes into hunks (xdl_get_hunk): the `distance > max_common`
430    // break plus leading/isolated-ignorable-change removal. Each hunk is a
431    // tagged-stream `(first_change_pos, last_change_pos)` span of *real*
432    // (emitted) changes.
433    let mut groups = group_changes_into_hunks(&changes, context, options.interhunk);
434    if function_context {
435        groups = expand_hunks_to_function_context(
436            &groups,
437            &tagged,
438            &old,
439            &new,
440            options.heading.as_deref_mut(),
441        );
442    }
443
444    let moved_styles = options
445        .color_moved
446        .filter(|_| options.colors.is_some() && options.word_diff.is_none())
447        .map(|color_moved| mark_color_as_moved(&tagged, color_moved));
448
449    for (first_change, last_change) in groups {
450        let (hunk_start, hunk_end) = if function_context {
451            (first_change, (last_change + 1).min(tagged.len()))
452        } else {
453            (
454                first_change.saturating_sub(context),
455                (last_change + context + 1).min(tagged.len()),
456            )
457        };
458        render_one_hunk(
459            out,
460            &tagged,
461            moved_styles.as_deref(),
462            &old,
463            hunk_start,
464            hunk_end,
465            options,
466        );
467    }
468}
469
470// ===========================================================================
471// Change compaction: a faithful port of git's `xdl_change_compact`
472// (xdiff/xdiffi.c), including the `XDF_INDENT_HEURISTIC` slider scoring.
473//
474// git represents a diff as two per-file boolean "changed" arrays (`xdf1.rchg`
475// for the old file, `xdf2.rchg` for the new). A *group* is a maximal run of
476// changed lines (a deletion run in the old file, an insertion run in the new
477// file), separated by runs of unchanged lines. `xdl_change_compact` walks the
478// groups of one file while keeping a synchronized cursor over the groups of the
479// other, sliding each group up/down within identical surrounding lines to a
480// canonical (and, under the indent heuristic, more readable) position.
481//
482// We reconstruct the two `changed[]` arrays from the [`DiffOp`] script, run the
483// algorithm on each file (old then new, exactly as git does), and rebuild the
484// script. Line equality for sliding (`recs_match`) uses the same
485// whitespace-canonicalized bytes the line-level diff used, so a group only
486// slides across lines the diff itself considered identical.
487// ===========================================================================
488
489/// If a line is indented more than this, [`get_indent`] returns this value
490/// (git's `MAX_INDENT`).
491const MAX_INDENT: i32 = 200;
492/// Cap on consecutive blank lines counted around a split (git's `MAX_BLANKS`).
493const MAX_BLANKS: i32 = 20;
494
495// Empirically-determined weight factors from git's xdiffi.c.
496const START_OF_FILE_PENALTY: i32 = 1;
497const END_OF_FILE_PENALTY: i32 = 21;
498const TOTAL_BLANK_WEIGHT: i32 = -30;
499const POST_BLANK_WEIGHT: i32 = 6;
500const RELATIVE_INDENT_PENALTY: i32 = -4;
501const RELATIVE_INDENT_WITH_BLANK_PENALTY: i32 = 10;
502const RELATIVE_OUTDENT_PENALTY: i32 = 24;
503const RELATIVE_OUTDENT_WITH_BLANK_PENALTY: i32 = 17;
504const RELATIVE_DEDENT_PENALTY: i32 = 23;
505const RELATIVE_DEDENT_WITH_BLANK_PENALTY: i32 = 17;
506const INDENT_WEIGHT: i32 = 60;
507const INDENT_HEURISTIC_MAX_SLIDING: i64 = 100;
508
509/// One file's record set for compaction: the whitespace-canonicalized line
510/// bytes (for `recs_match` and `get_indent`) and the per-line `changed` flags.
511/// `nrec` is the number of records; index `-1` and `nrec` are treated as
512/// "unchanged" sentinels, matching git's zero-padded `rchg`.
513struct CompactFile {
514    recs: Vec<Vec<u8>>,
515    changed: Vec<bool>,
516}
517
518impl CompactFile {
519    fn nrec(&self) -> i64 {
520        self.recs.len() as i64
521    }
522
523    /// `xdf->changed[i]` with git's out-of-range sentinels: positions `-1` and
524    /// `nrec` are unchanged (`false`).
525    fn changed(&self, i: i64) -> bool {
526        if i < 0 || i >= self.nrec() {
527            false
528        } else {
529            self.changed[i as usize]
530        }
531    }
532
533    fn set_changed(&mut self, i: i64, v: bool) {
534        self.changed[i as usize] = v;
535    }
536}
537
538/// git's `get_indent`: indentation columns of `rec` treating TAB as advancing
539/// to the next multiple of 8; `-1` for a blank (whitespace-only / empty) line;
540/// clamped at [`MAX_INDENT`].
541fn get_indent(rec: &[u8]) -> i32 {
542    let mut ret: i32 = 0;
543    for &c in rec {
544        if !xdl_isspace(c) {
545            return ret;
546        } else if c == b' ' {
547            ret += 1;
548        } else if c == b'\t' {
549            ret += 8 - ret % 8;
550        }
551        // other whitespace (e.g. CR) is ignored, matching git.
552        if ret >= MAX_INDENT {
553            return MAX_INDENT;
554        }
555    }
556    // The line contains only whitespace.
557    -1
558}
559
560/// git's `XDL_ISSPACE`: space, tab, newline, vertical tab, form feed, carriage
561/// return.
562fn xdl_isspace(c: u8) -> bool {
563    matches!(c, b' ' | b'\t' | b'\n' | 0x0b | 0x0c | b'\r')
564}
565
566/// git's `struct split_measurement`.
567#[derive(Default)]
568struct SplitMeasurement {
569    end_of_file: bool,
570    indent: i32,
571    pre_blank: i32,
572    pre_indent: i32,
573    post_blank: i32,
574    post_indent: i32,
575}
576
577/// git's `struct split_score`.
578#[derive(Default, Clone, Copy)]
579struct SplitScore {
580    effective_indent: i32,
581    penalty: i32,
582}
583
584/// git's `measure_split`: characteristics of a hypothetical split above line
585/// `split` in `xdf`.
586fn measure_split(xdf: &CompactFile, split: i64) -> SplitMeasurement {
587    let mut m = SplitMeasurement::default();
588    if split >= xdf.nrec() {
589        m.end_of_file = true;
590        m.indent = -1;
591    } else {
592        m.end_of_file = false;
593        m.indent = get_indent(&xdf.recs[split as usize]);
594    }
595
596    m.pre_blank = 0;
597    m.pre_indent = -1;
598    let mut i = split - 1;
599    while i >= 0 {
600        m.pre_indent = get_indent(&xdf.recs[i as usize]);
601        if m.pre_indent != -1 {
602            break;
603        }
604        m.pre_blank += 1;
605        if m.pre_blank == MAX_BLANKS {
606            m.pre_indent = 0;
607            break;
608        }
609        i -= 1;
610    }
611
612    m.post_blank = 0;
613    m.post_indent = -1;
614    let mut i = split + 1;
615    while i < xdf.nrec() {
616        m.post_indent = get_indent(&xdf.recs[i as usize]);
617        if m.post_indent != -1 {
618            break;
619        }
620        m.post_blank += 1;
621        if m.post_blank == MAX_BLANKS {
622            m.post_indent = 0;
623            break;
624        }
625        i += 1;
626    }
627
628    m
629}
630
631/// git's `score_add_split`: accumulate the badness of split `m` into `s`.
632fn score_add_split(m: &SplitMeasurement, s: &mut SplitScore) {
633    if m.pre_indent == -1 && m.pre_blank == 0 {
634        s.penalty += START_OF_FILE_PENALTY;
635    }
636    if m.end_of_file {
637        s.penalty += END_OF_FILE_PENALTY;
638    }
639
640    let post_blank = if m.indent == -1 { 1 + m.post_blank } else { 0 };
641    let total_blank = m.pre_blank + post_blank;
642
643    s.penalty += TOTAL_BLANK_WEIGHT * total_blank;
644    s.penalty += POST_BLANK_WEIGHT * post_blank;
645
646    let indent = if m.indent != -1 {
647        m.indent
648    } else {
649        m.post_indent
650    };
651    let any_blanks = total_blank != 0;
652
653    s.effective_indent += indent;
654
655    if indent == -1 || m.pre_indent == -1 {
656        // End of file, or no non-blank predecessor: no adjustment needed
657        // (git's two separate `indent == -1` / `pre_indent == -1` no-op arms).
658    } else if indent > m.pre_indent {
659        s.penalty += if any_blanks {
660            RELATIVE_INDENT_WITH_BLANK_PENALTY
661        } else {
662            RELATIVE_INDENT_PENALTY
663        };
664    } else if indent == m.pre_indent {
665        // Same indentation as predecessor; no adjustment.
666    } else if m.post_indent != -1 && m.post_indent > indent {
667        s.penalty += if any_blanks {
668            RELATIVE_OUTDENT_WITH_BLANK_PENALTY
669        } else {
670            RELATIVE_OUTDENT_PENALTY
671        };
672    } else {
673        s.penalty += if any_blanks {
674            RELATIVE_DEDENT_WITH_BLANK_PENALTY
675        } else {
676            RELATIVE_DEDENT_PENALTY
677        };
678    }
679}
680
681/// git's `score_cmp`: `<0` when `s1` is the better (lower-badness) split.
682fn score_cmp(s1: &SplitScore, s2: &SplitScore) -> i32 {
683    let cmp_indents = (s1.effective_indent > s2.effective_indent) as i32
684        - (s1.effective_indent < s2.effective_indent) as i32;
685    INDENT_WEIGHT * cmp_indents + (s1.penalty - s2.penalty)
686}
687
688/// git's `struct xdlgroup`: a (possibly empty) group spanning `[start, end)` of
689/// changed lines.
690struct XdlGroup {
691    start: i64,
692    end: i64,
693}
694
695/// git's `recs_match`: the two records hash-equal (here: canonicalized bytes
696/// equal).
697fn recs_match(xdf: &CompactFile, a: i64, b: i64) -> bool {
698    xdf.recs[a as usize] == xdf.recs[b as usize]
699}
700
701/// git's `group_init`: point `g` at the first group in `xdf`.
702fn group_init(xdf: &CompactFile) -> XdlGroup {
703    let mut end = 0i64;
704    while xdf.changed(end) {
705        end += 1;
706    }
707    XdlGroup { start: 0, end }
708}
709
710/// git's `group_next`: advance to the next group; `false` if already at EOF.
711fn group_next(xdf: &CompactFile, g: &mut XdlGroup) -> bool {
712    if g.end == xdf.nrec() {
713        return false;
714    }
715    g.start = g.end + 1;
716    g.end = g.start;
717    while xdf.changed(g.end) {
718        g.end += 1;
719    }
720    true
721}
722
723/// git's `group_previous`: step back to the previous group; `false` if at BOF.
724fn group_previous(xdf: &CompactFile, g: &mut XdlGroup) -> bool {
725    if g.start == 0 {
726        return false;
727    }
728    g.end = g.start - 1;
729    g.start = g.end;
730    while xdf.changed(g.start - 1) {
731        g.start -= 1;
732    }
733    true
734}
735
736/// git's `group_slide_down`: slide `g` toward EOF if the line below equals the
737/// group's first line, absorbing any group it bumps into. `false` if it cannot
738/// slide.
739fn group_slide_down(xdf: &mut CompactFile, g: &mut XdlGroup) -> bool {
740    if g.end < xdf.nrec() && recs_match(xdf, g.start, g.end) {
741        xdf.set_changed(g.start, false);
742        xdf.set_changed(g.end, true);
743        g.start += 1;
744        g.end += 1;
745        while xdf.changed(g.end) {
746            g.end += 1;
747        }
748        true
749    } else {
750        false
751    }
752}
753
754/// git's `group_slide_up`: slide `g` toward BOF if the line above equals the
755/// group's last line, absorbing any group it bumps into. `false` if it cannot
756/// slide.
757fn group_slide_up(xdf: &mut CompactFile, g: &mut XdlGroup) -> bool {
758    if g.start > 0 && recs_match(xdf, g.start - 1, g.end - 1) {
759        g.start -= 1;
760        g.end -= 1;
761        xdf.set_changed(g.start, true);
762        xdf.set_changed(g.end, false);
763        while xdf.changed(g.start - 1) {
764            g.start -= 1;
765        }
766        true
767    } else {
768        false
769    }
770}
771
772/// Compact the change groups of `xdf`, keeping `xdfo` (the other file) in sync.
773/// A faithful port of the per-file body of git's `xdl_change_compact`. The
774/// `xdfo` re-diff tail (only reachable for histogram diff) is omitted: this
775/// compaction never merges groups in a way that creates new matching lines for
776/// the Myers/patience/histogram scripts produced here, so the re-diff is a
777/// no-op for our outputs.
778fn compact_one(xdf: &mut CompactFile, xdfo: &mut CompactFile, indent_heuristic: bool) {
779    let mut g = group_init(xdf);
780    let mut go = group_init(xdfo);
781
782    loop {
783        // Skip empty groups in the to-be-compacted file.
784        if g.end == g.start {
785            if !group_next(xdf, &mut g) {
786                break;
787            }
788            if !group_next(xdfo, &mut go) {
789                break;
790            }
791            continue;
792        }
793
794        let mut groupsize;
795        let mut earliest_end;
796        let mut end_matching_other;
797
798        loop {
799            groupsize = g.end - g.start;
800            end_matching_other = -1i64;
801
802            // Shift the group backward as far as possible.
803            while group_slide_up(xdf, &mut g) {
804                let ok = group_previous(xdfo, &mut go);
805                debug_assert!(ok, "group sync broken sliding up");
806            }
807            // Highest this group can be shifted; record its end.
808            earliest_end = g.end;
809            if go.end > go.start {
810                end_matching_other = g.end;
811            }
812            // Now shift the group forward as far as possible.
813            loop {
814                if !group_slide_down(xdf, &mut g) {
815                    break;
816                }
817                let ok = group_next(xdfo, &mut go);
818                debug_assert!(ok, "group sync broken sliding down");
819                if go.end > go.start {
820                    end_matching_other = g.end;
821                }
822            }
823            if groupsize == g.end - g.start {
824                break;
825            }
826        }
827
828        // The group is now shifted as far down as possible; only upward shifts
829        // remain to consider.
830        if g.end == earliest_end {
831            // No shifting was possible.
832        } else if end_matching_other != -1 {
833            // Move the (possibly merged) group back to line up with the last
834            // group of changes from the other file it can align with. Avoids
835            // splitting one change into a separate add/delete.
836            while go.end == go.start {
837                let ok = group_slide_up(xdf, &mut g);
838                debug_assert!(ok, "match disappeared");
839                let ok = group_previous(xdfo, &mut go);
840                debug_assert!(ok, "group sync broken sliding to match");
841            }
842        } else if indent_heuristic {
843            // Pick the shift with the lowest indent-heuristic score.
844            let mut best_shift = -1i64;
845            let mut best_score = SplitScore::default();
846
847            let mut shift = earliest_end;
848            if g.end - groupsize - 1 > shift {
849                shift = g.end - groupsize - 1;
850            }
851            if g.end - INDENT_HEURISTIC_MAX_SLIDING > shift {
852                shift = g.end - INDENT_HEURISTIC_MAX_SLIDING;
853            }
854            while shift <= g.end {
855                let mut score = SplitScore::default();
856                let m = measure_split(xdf, shift);
857                score_add_split(&m, &mut score);
858                let m = measure_split(xdf, shift - groupsize);
859                score_add_split(&m, &mut score);
860                if best_shift == -1 || score_cmp(&score, &best_score) <= 0 {
861                    best_score = score;
862                    best_shift = shift;
863                }
864                shift += 1;
865            }
866
867            while g.end > best_shift {
868                let ok = group_slide_up(xdf, &mut g);
869                debug_assert!(ok, "best shift unreached");
870                let ok = group_previous(xdfo, &mut go);
871                debug_assert!(ok, "group sync broken sliding to blank line");
872            }
873        }
874
875        // Advance to the next group pair.
876        if !group_next(xdf, &mut g) {
877            break;
878        }
879        if !group_next(xdfo, &mut go) {
880            break;
881        }
882    }
883}
884
885/// Run git's `xdl_change_compact` over the [`DiffOp`] script in place.
886///
887/// Reconstructs the per-file `changed[]` flags from `ops`, compacts the old
888/// file then the new file (each synchronized against the other, as git does),
889/// and rebuilds `ops` from the recompacted flags.
890fn change_compact(
891    ops: &mut Vec<DiffOp>,
892    old: &[DiffLine<'_>],
893    new: &[DiffLine<'_>],
894    ws_ignore: WsIgnore,
895    indent_heuristic: bool,
896) {
897    // Fast path: no changes (or a single trivial run) cannot be slid.
898    if ops.iter().all(|op| matches!(op, DiffOp::Equal(_))) {
899        return;
900    }
901
902    // Canonicalized record bytes — the same equality the line-level diff used.
903    let canon = |lines: &[DiffLine<'_>]| -> Vec<Vec<u8>> {
904        if ws_ignore.is_empty() {
905            lines.iter().map(|l| l.content.to_vec()).collect()
906        } else {
907            lines
908                .iter()
909                .map(|l| crate::canonicalize_line_for_match(l.content, ws_ignore))
910                .collect()
911        }
912    };
913
914    let mut xdf1 = CompactFile {
915        recs: canon(old),
916        changed: vec![false; old.len()],
917    };
918    let mut xdf2 = CompactFile {
919        recs: canon(new),
920        changed: vec![false; new.len()],
921    };
922
923    // Reconstruct git's two `changed[]` arrays from the run-length script.
924    let mut oi = 0usize;
925    let mut ni = 0usize;
926    for op in ops.iter() {
927        match *op {
928            DiffOp::Equal(n) => {
929                oi += n;
930                ni += n;
931            }
932            DiffOp::Delete(n) => {
933                for _ in 0..n {
934                    xdf1.changed[oi] = true;
935                    oi += 1;
936                }
937            }
938            DiffOp::Insert(n) => {
939                for _ in 0..n {
940                    xdf2.changed[ni] = true;
941                    ni += 1;
942                }
943            }
944        }
945    }
946
947    // git compacts xdf1 (synced against xdf2) then xdf2 (synced against xdf1).
948    compact_one(&mut xdf1, &mut xdf2, indent_heuristic);
949    compact_one(&mut xdf2, &mut xdf1, indent_heuristic);
950
951    // Rebuild the coalesced op script by walking both files' changed flags in
952    // lockstep, emitting deletes for the old side and inserts for the new side.
953    let n_old = xdf1.changed.len();
954    let n_new = xdf2.changed.len();
955    let mut rebuilt: Vec<DiffOp> = Vec::with_capacity(ops.len());
956    let mut i = 0usize; // old index
957    let mut j = 0usize; // new index
958    while i < n_old || j < n_new {
959        let del = i < n_old && xdf1.changed[i];
960        let ins = j < n_new && xdf2.changed[j];
961        if del {
962            let mut run = 0usize;
963            while i < n_old && xdf1.changed[i] {
964                run += 1;
965                i += 1;
966            }
967            push_op(&mut rebuilt, DiffOp::Delete(run));
968        } else if ins {
969            let mut run = 0usize;
970            while j < n_new && xdf2.changed[j] {
971                run += 1;
972                j += 1;
973            }
974            push_op(&mut rebuilt, DiffOp::Insert(run));
975        } else {
976            // Both sides are unchanged here: an equal run.
977            let mut run = 0usize;
978            while i < n_old && j < n_new && !xdf1.changed[i] && !xdf2.changed[j] {
979                run += 1;
980                i += 1;
981                j += 1;
982            }
983            debug_assert!(run > 0, "change_compact stalled rebuilding script");
984            push_op(&mut rebuilt, DiffOp::Equal(run));
985        }
986    }
987
988    *ops = rebuilt;
989}
990
991/// Append `op` to `out`, coalescing with a same-kind run at the tail.
992fn push_op(out: &mut Vec<DiffOp>, op: DiffOp) {
993    match (out.last_mut(), op) {
994        (Some(DiffOp::Equal(prev)), DiffOp::Equal(n)) => *prev += n,
995        (Some(DiffOp::Delete(prev)), DiffOp::Delete(n)) => *prev += n,
996        (Some(DiffOp::Insert(prev)), DiffOp::Insert(n)) => *prev += n,
997        _ => out.push(op),
998    }
999}
1000
1001/// State for [`filter_hunks_to_ranges`], a port of diff.c's
1002/// `struct line_range_callback`. We drive it over the already-rendered
1003/// unified-diff lines (with inflated context) rather than xdiff's raw line
1004/// callback, but the algorithm is the same: buffer pending removals, open a
1005/// range hunk when an in-range post-image line is seen, and emit the clipped
1006/// `@@` header + body for each range.
1007struct RangeFilter<'r> {
1008    ranges: &'r [LineRange],
1009    cur_range: usize,
1010    /// Post/pre-image 1-based line counters seeded from each `@@` header.
1011    lno_post: i64,
1012    lno_pre: i64,
1013    /// Function-name heading carried from the current `@@` header (the suffix
1014    /// after `@@ ... @@ `), reused verbatim on every emitted range hunk.
1015    func: Vec<u8>,
1016    /// Range hunk being accumulated.
1017    rhunk: Vec<u8>,
1018    rhunk_old_begin: i64,
1019    rhunk_old_count: i64,
1020    rhunk_new_begin: i64,
1021    rhunk_new_count: i64,
1022    rhunk_active: bool,
1023    rhunk_has_changes: bool,
1024    /// Removal lines not yet known to be in-range.
1025    pending_rm: Vec<u8>,
1026    pending_rm_count: i64,
1027    pending_rm_pre_begin: i64,
1028}
1029
1030impl RangeFilter<'_> {
1031    fn discard_pending_rm(&mut self) {
1032        self.pending_rm.clear();
1033        self.pending_rm_count = 0;
1034    }
1035
1036    /// Port of diff.c:flush_rhunk — emit the accumulated range hunk (header +
1037    /// body) into `out`, dropping context-only hunks.
1038    fn flush_rhunk(&mut self, out: &mut Vec<u8>) {
1039        if !self.rhunk_active {
1040            return;
1041        }
1042        if self.pending_rm_count != 0 {
1043            self.rhunk.extend_from_slice(&self.pending_rm);
1044            self.rhunk_old_count += self.pending_rm_count;
1045            self.rhunk_has_changes = true;
1046            self.discard_pending_rm();
1047        }
1048        if !self.rhunk_has_changes {
1049            self.rhunk_active = false;
1050            self.rhunk.clear();
1051            return;
1052        }
1053        // git's flush_rhunk uses `@@ -%ld,%ld +%ld,%ld @@` unconditionally —
1054        // the count is ALWAYS shown (unlike the normal emitter, which omits a
1055        // count of 1).
1056        out.extend_from_slice(
1057            format!(
1058                "@@ -{},{} +{},{} @@",
1059                self.rhunk_old_begin,
1060                self.rhunk_old_count,
1061                self.rhunk_new_begin,
1062                self.rhunk_new_count
1063            )
1064            .as_bytes(),
1065        );
1066        if !self.func.is_empty() {
1067            out.push(b' ');
1068            out.extend_from_slice(&self.func);
1069        }
1070        out.push(b'\n');
1071        out.extend_from_slice(&self.rhunk);
1072        self.rhunk_active = false;
1073        self.rhunk.clear();
1074    }
1075
1076    /// Port of diff.c:line_range_line_fn for one rendered body line. `marker`
1077    /// is the first byte (`' '`/`'+'`/`'-'`/`'\\'`), `line` the full bytes.
1078    fn body_line(&mut self, out: &mut Vec<u8>, marker: u8, line: &[u8]) {
1079        if marker == b'-' {
1080            if self.pending_rm_count == 0 {
1081                self.pending_rm_pre_begin = self.lno_pre;
1082            }
1083            self.lno_pre += 1;
1084            self.pending_rm.extend_from_slice(line);
1085            self.pending_rm_count += 1;
1086            return;
1087        }
1088        if marker == b'\\' {
1089            if self.pending_rm_count != 0 {
1090                self.pending_rm.extend_from_slice(line);
1091            } else if self.rhunk_active {
1092                self.rhunk.extend_from_slice(line);
1093            }
1094            return;
1095        }
1096        // marker is '+' or ' '
1097        let lno_0 = self.lno_post - 1;
1098        let cur_pre = self.lno_pre;
1099        self.lno_post += 1;
1100        if marker == b' ' {
1101            self.lno_pre += 1;
1102        }
1103
1104        while self.cur_range < self.ranges.len() && lno_0 >= self.ranges[self.cur_range].end {
1105            if self.rhunk_active {
1106                self.flush_rhunk(out);
1107            }
1108            self.discard_pending_rm();
1109            self.cur_range += 1;
1110        }
1111        if self.cur_range >= self.ranges.len() {
1112            self.discard_pending_rm();
1113            return;
1114        }
1115        let cur = self.ranges[self.cur_range];
1116        if lno_0 < cur.start {
1117            self.discard_pending_rm();
1118            return;
1119        }
1120        if !self.rhunk_active {
1121            self.rhunk_active = true;
1122            self.rhunk_has_changes = false;
1123            self.rhunk_new_begin = lno_0 + 1;
1124            self.rhunk_old_begin = if self.pending_rm_count != 0 {
1125                self.pending_rm_pre_begin
1126            } else {
1127                cur_pre
1128            };
1129            self.rhunk_old_count = 0;
1130            self.rhunk_new_count = 0;
1131            self.rhunk.clear();
1132        }
1133        if self.pending_rm_count != 0 {
1134            self.rhunk.extend_from_slice(&self.pending_rm);
1135            self.rhunk_old_count += self.pending_rm_count;
1136            self.rhunk_has_changes = true;
1137            self.discard_pending_rm();
1138        }
1139        self.rhunk.extend_from_slice(line);
1140        self.rhunk_new_count += 1;
1141        if marker == b'+' {
1142            self.rhunk_has_changes = true;
1143        } else {
1144            self.rhunk_old_count += 1;
1145        }
1146    }
1147}
1148
1149/// Clip a fully-rendered unified-diff hunk body (`full`, produced with
1150/// inflated context) down to the tracked `ranges`, mirroring diff.c's
1151/// `line_range_hunk_fn` / `line_range_line_fn` / `flush_rhunk`. The renderer's
1152/// `@@` header already carries the funcname suffix; we parse it back out and
1153/// reuse it on every emitted range hunk. No-color path only (`log -L` test
1154/// output is uncolored).
1155fn filter_hunks_to_ranges(out: &mut Vec<u8>, full: &[u8], ranges: &[LineRange]) {
1156    if ranges.is_empty() {
1157        return;
1158    }
1159    let mut filter = RangeFilter {
1160        ranges,
1161        cur_range: 0,
1162        lno_post: 0,
1163        lno_pre: 0,
1164        func: Vec::new(),
1165        rhunk: Vec::new(),
1166        rhunk_old_begin: 0,
1167        rhunk_old_count: 0,
1168        rhunk_new_begin: 0,
1169        rhunk_new_count: 0,
1170        rhunk_active: false,
1171        rhunk_has_changes: false,
1172        pending_rm: Vec::new(),
1173        pending_rm_count: 0,
1174        pending_rm_pre_begin: 0,
1175    };
1176    for line in split_keep_newline(full) {
1177        if line.starts_with(b"@@ ") {
1178            // New xdiff hunk: any pending removals from the previous hunk are
1179            // left in place (diff.c does the same — the next body line decides
1180            // their fate), and the range hunk cursor is NOT reset across xdiff
1181            // hunks. Parse the begin line numbers + funcname suffix.
1182            if let Some((old_begin, new_begin, func)) = parse_hunk_header(line) {
1183                filter.lno_post = new_begin;
1184                filter.lno_pre = old_begin;
1185                filter.func = func;
1186            }
1187            continue;
1188        }
1189        let marker = line.first().copied().unwrap_or(b' ');
1190        filter.body_line(out, marker, line);
1191    }
1192    filter.flush_rhunk(out);
1193}
1194
1195/// Split `buf` into lines, each INCLUDING its trailing `\n` (the final line may
1196/// lack one). Empty input yields no lines.
1197fn split_keep_newline(buf: &[u8]) -> impl Iterator<Item = &[u8]> {
1198    let mut start = 0usize;
1199    std::iter::from_fn(move || {
1200        if start >= buf.len() {
1201            return None;
1202        }
1203        let rel = buf[start..].iter().position(|&b| b == b'\n');
1204        let end = match rel {
1205            Some(pos) => start + pos + 1,
1206            None => buf.len(),
1207        };
1208        let line = &buf[start..end];
1209        start = end;
1210        Some(line)
1211    })
1212}
1213
1214/// Parse a rendered `@@ -o[,c] +n[,c] @@[ func]` header, returning the 1-based
1215/// old/new begin line numbers and the trailing funcname bytes (without the
1216/// leading space, without a trailing newline). Begin is the value xdiff emits:
1217/// 1-based when the count is non-zero, one-less when zero (unused in that case).
1218fn parse_hunk_header(line: &[u8]) -> Option<(i64, i64, Vec<u8>)> {
1219    // line = "@@ -A,B +C,D @@ func\n" (or "@@ -A +C @@\n", etc.)
1220    let rest = line.strip_prefix(b"@@ -")?;
1221    let plus = rest.iter().position(|&b| b == b'+')?;
1222    let old_part = &rest[..plus];
1223    // skip "+", parse new part up to " @@"
1224    let after_plus = &rest[plus + 1..];
1225    let close = find_subslice(after_plus, b" @@")?;
1226    let new_part = &after_plus[..close];
1227    let old_begin = parse_range_begin(old_part.split(|&b| b == b' ').next().unwrap_or(old_part))?;
1228    let new_begin = parse_range_begin(new_part)?;
1229    // Funcname suffix: everything after " @@ " (a single space separates it).
1230    let tail = &after_plus[close + 3..];
1231    let func = if let Some(f) = tail.strip_prefix(b" ") {
1232        let mut f = f.to_vec();
1233        if f.last() == Some(&b'\n') {
1234            f.pop();
1235        }
1236        f
1237    } else {
1238        Vec::new()
1239    };
1240    Some((old_begin, new_begin, func))
1241}
1242
1243/// Parse the "A" or "A,B" begin field of an `@@` range side into A.
1244fn parse_range_begin(field: &[u8]) -> Option<i64> {
1245    let begin = field.split(|&b| b == b',').next().unwrap_or(field);
1246    std::str::from_utf8(begin).ok()?.trim().parse::<i64>().ok()
1247}
1248
1249fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
1250    if needle.is_empty() || haystack.len() < needle.len() {
1251        return None;
1252    }
1253    (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
1254}
1255
1256/// One contiguous change in the edit script (git's `xdchange`): the old/new
1257/// line ranges it covers, the tagged-stream positions of its first/last
1258/// non-context line, and whether it is ignorable (`--ignore-blank-lines` /
1259/// `-I<regex>`).
1260#[derive(Clone, Copy)]
1261struct Change {
1262    /// 0-based first old line in this change (`i1`).
1263    i1: usize,
1264    /// Number of old lines deleted (`chg1`).
1265    chg1: usize,
1266    /// 0-based first new line in this change (`i2`).
1267    i2: usize,
1268    /// Number of new lines inserted (`chg2`).
1269    chg2: usize,
1270    /// Tagged-stream index of this change's first non-context line.
1271    tag_first: usize,
1272    /// Tagged-stream index of this change's last non-context line.
1273    tag_last: usize,
1274    /// Whether this change is ignorable (blank-only / regex-only).
1275    ignore: bool,
1276}
1277
1278/// Build the change list (xdchange script) from the flattened tagged stream.
1279/// Each maximal run of consecutive `-`/`+` lines becomes one [`Change`].
1280fn build_changes(tagged: &[TaggedLine<'_>]) -> Vec<Change> {
1281    let mut changes: Vec<Change> = Vec::new();
1282    let mut idx = 0usize;
1283    while idx < tagged.len() {
1284        if tagged[idx].kind == LineKind::Context {
1285            idx += 1;
1286            continue;
1287        }
1288        let tag_first = idx;
1289        let i1 = tagged[idx].old_index;
1290        let i2 = tagged[idx].new_index;
1291        let mut chg1 = 0usize;
1292        let mut chg2 = 0usize;
1293        while idx < tagged.len() && tagged[idx].kind != LineKind::Context {
1294            match tagged[idx].kind {
1295                LineKind::Delete => chg1 += 1,
1296                LineKind::Insert => chg2 += 1,
1297                LineKind::Context => unreachable!(),
1298            }
1299            idx += 1;
1300        }
1301        changes.push(Change {
1302            i1,
1303            chg1,
1304            i2,
1305            chg2,
1306            tag_first,
1307            tag_last: idx - 1,
1308            ignore: false,
1309        });
1310    }
1311    changes
1312}
1313
1314/// Mark each change ignorable when its old and new lines are all blank
1315/// (`--ignore-blank-lines`) or all regex-matched (`-I<regex>`), mirroring
1316/// git's `xdl_mark_ignorable_lines` then `xdl_mark_ignorable_regex` (the regex
1317/// pass never overrides a blank-marked change).
1318fn mark_ignorable_changes(
1319    changes: &mut [Change],
1320    old: &[DiffLine<'_>],
1321    new: &[DiffLine<'_>],
1322    ws_ignore: WsIgnore,
1323    ci: &ChangeIgnore<'_>,
1324) {
1325    for change in changes.iter_mut() {
1326        if ci.ignore_blank_lines {
1327            let blank = (change.i1..change.i1 + change.chg1)
1328                .all(|i| line_is_blank(old[i].content, ws_ignore))
1329                && (change.i2..change.i2 + change.chg2)
1330                    .all(|i| line_is_blank(new[i].content, ws_ignore));
1331            change.ignore = blank;
1332        }
1333        if !change.ignore
1334            && let Some(regex_match) = ci.regex_match
1335        {
1336            let matched = (change.i1..change.i1 + change.chg1).all(|i| regex_match(old[i].content))
1337                && (change.i2..change.i2 + change.chg2).all(|i| regex_match(new[i].content));
1338            change.ignore = matched;
1339        }
1340    }
1341}
1342
1343/// Group the change list into hunks, returning each hunk's
1344/// `(first_real_change_tag, last_real_change_tag)` tagged-stream span. This is
1345/// a behavioural port of git's `xemit.c:xdl_get_hunk` driving
1346/// `xdl_call_hunk_func`'s loop: it applies the `distance > max_common` hunk
1347/// break, drops leading ignorable changes, and excludes trailing/isolated
1348/// ignorable changes from the emitted span.
1349fn group_changes_into_hunks(
1350    changes: &[Change],
1351    context: usize,
1352    interhunk: usize,
1353) -> Vec<(usize, usize)> {
1354    let max_common = context.saturating_add(context).saturating_add(interhunk);
1355    let max_ignorable = context;
1356
1357    let mut hunks: Vec<(usize, usize)> = Vec::new();
1358    // `start` is the index into `changes` of the first change still to emit
1359    // (xdl_call_hunk_func's `xch` cursor).
1360    let mut start = 0usize;
1361    while start < changes.len() {
1362        // Remove ignorable changes that are too far before other changes
1363        // (xdl_get_hunk's leading-ignorable loop). Faithful port: `xchp`
1364        // iterates over EVERY leading ignorable change; whenever the gap to the
1365        // next change is ≥ max_ignorable (or there is no next change), the hunk
1366        // start advances to that next change (`None` ⇒ the whole tail is
1367        // ignorable ⇒ no hunk). Unlike a break-on-first-keep loop, this still
1368        // advances past a run of ignorables when the FINAL one has no successor.
1369        {
1370            let mut xchp = start;
1371            while xchp < changes.len() && changes[xchp].ignore {
1372                let cur = &changes[xchp];
1373                match changes.get(xchp + 1) {
1374                    None => {
1375                        start = changes.len();
1376                    }
1377                    Some(next) => {
1378                        if next.i1 - (cur.i1 + cur.chg1) >= max_ignorable {
1379                            start = xchp + 1;
1380                        }
1381                    }
1382                }
1383                xchp += 1;
1384            }
1385        }
1386        if start >= changes.len() {
1387            break;
1388        }
1389
1390        // Walk forward extending the hunk; `last` tracks the last *real*
1391        // (non-ignorable, or the very first) change that defines the hunk end.
1392        let mut last = start;
1393        let mut ignored = 0usize; // ignored new-line count accumulated
1394        let mut prev = start;
1395        let mut idx = start + 1;
1396        while idx < changes.len() {
1397            let xch = &changes[idx];
1398            let xchp = &changes[prev];
1399            let distance = xch.i1 - (xchp.i1 + xchp.chg1);
1400            if distance > max_common {
1401                break;
1402            }
1403            if distance < max_ignorable && (!xch.ignore || last == prev) {
1404                last = idx;
1405                ignored = 0;
1406            } else if distance < max_ignorable && xch.ignore {
1407                ignored += xch.chg2;
1408            } else if last != prev
1409                && xch.i1 + ignored - (changes[last].i1 + changes[last].chg1) > max_common
1410            {
1411                break;
1412            } else if !xch.ignore {
1413                last = idx;
1414                ignored = 0;
1415            } else {
1416                ignored += xch.chg2;
1417            }
1418            prev = idx;
1419            idx += 1;
1420        }
1421
1422        let first_change = &changes[start];
1423        let last_change = &changes[last];
1424        hunks.push((first_change.tag_first, last_change.tag_last));
1425        start = last + 1;
1426    }
1427
1428    hunks
1429}
1430
1431fn expand_hunks_to_function_context(
1432    groups: &[(usize, usize)],
1433    tagged: &[TaggedLine<'_>],
1434    old: &[DiffLine<'_>],
1435    new: &[DiffLine<'_>],
1436    mut heading: Option<&mut HeadingFn<'_>>,
1437) -> Vec<(usize, usize)> {
1438    let Some(classifier) = heading.as_mut() else {
1439        return groups.to_vec();
1440    };
1441    let mut expanded = Vec::with_capacity(groups.len());
1442    for &(start, end) in groups {
1443        let first = tagged[start];
1444        let last = tagged[end];
1445        let old_changed = tagged[start..=end]
1446            .iter()
1447            .any(|line| line.kind == LineKind::Delete);
1448        let (side, range) = if old_changed {
1449            (
1450                FunctionSide::Old,
1451                function_context_range(old, first.old_index, false, classifier),
1452            )
1453        } else {
1454            (
1455                FunctionSide::New,
1456                function_context_range(new, first.new_index, true, classifier),
1457            )
1458        };
1459        let Some((range_start, range_end)) = range else {
1460            expanded.push((start, end));
1461            continue;
1462        };
1463        let mut hunk_start = expand_tag_start(tagged, start, side, range_start);
1464        let mut hunk_end = expand_tag_end(tagged, end, side, range_end);
1465        if old_changed {
1466            if last.old_index >= range_end {
1467                hunk_end = end;
1468            }
1469        } else if last.new_index >= range_end {
1470            hunk_end = end;
1471        }
1472        if hunk_start > start {
1473            hunk_start = start;
1474        }
1475        if hunk_end < end {
1476            hunk_end = end;
1477        }
1478        if let Some(prev) = expanded.last_mut()
1479            && hunk_start <= prev.1 + 1
1480        {
1481            prev.1 = prev.1.max(hunk_end);
1482            continue;
1483        }
1484        expanded.push((hunk_start, hunk_end));
1485    }
1486    expanded
1487}
1488
1489#[derive(Clone, Copy)]
1490enum FunctionSide {
1491    Old,
1492    New,
1493}
1494
1495fn function_context_range(
1496    lines: &[DiffLine<'_>],
1497    anchor: usize,
1498    prefer_forward: bool,
1499    heading: &mut HeadingFn<'_>,
1500) -> Option<(usize, usize)> {
1501    if lines.is_empty() {
1502        return None;
1503    }
1504    let anchor = anchor.min(lines.len() - 1);
1505    let mut heading_idx = None;
1506    for idx in (0..=anchor).rev() {
1507        if heading(lines[idx].content).is_some() {
1508            heading_idx = Some(idx);
1509            break;
1510        }
1511    }
1512    if heading_idx.is_none() && prefer_forward {
1513        for (idx, line) in lines.iter().enumerate().skip(anchor) {
1514            if heading(line.content).is_some() {
1515                heading_idx = Some(idx);
1516                break;
1517            }
1518        }
1519    }
1520
1521    let (mut start, mut end) = if let Some(idx) = heading_idx {
1522        let mut start = idx;
1523        while start > 0 && !line_is_blank(lines[start - 1].content, WsIgnore::default()) {
1524            start -= 1;
1525        }
1526        let mut end = lines.len();
1527        for (next, line) in lines.iter().enumerate().skip(idx + 1) {
1528            if heading(line.content).is_some() {
1529                end = next;
1530                break;
1531            }
1532        }
1533        (start, end)
1534    } else {
1535        (0, lines.len())
1536    };
1537
1538    while start < end && line_is_blank(lines[start].content, WsIgnore::default()) {
1539        start += 1;
1540    }
1541    while end > start && line_is_blank(lines[end - 1].content, WsIgnore::default()) {
1542        end -= 1;
1543    }
1544    (start < end).then_some((start, end))
1545}
1546
1547fn expand_tag_start(
1548    tagged: &[TaggedLine<'_>],
1549    current: usize,
1550    side: FunctionSide,
1551    range_start: usize,
1552) -> usize {
1553    let mut start = current;
1554    while start > 0 {
1555        let prev = tagged[start - 1];
1556        let line_index = match side {
1557            FunctionSide::Old => prev.old_index,
1558            FunctionSide::New => prev.new_index,
1559        };
1560        if line_index < range_start {
1561            break;
1562        }
1563        start -= 1;
1564    }
1565    start
1566}
1567
1568fn expand_tag_end(
1569    tagged: &[TaggedLine<'_>],
1570    current: usize,
1571    side: FunctionSide,
1572    range_end: usize,
1573) -> usize {
1574    let mut end = current;
1575    while end + 1 < tagged.len() {
1576        let next = tagged[end + 1];
1577        let line_index = match side {
1578            FunctionSide::Old => next.old_index,
1579            FunctionSide::New => next.new_index,
1580        };
1581        if line_index >= range_end {
1582            break;
1583        }
1584        end += 1;
1585    }
1586    end
1587}
1588
1589const COLOR_MOVED_MIN_ALNUM_COUNT: usize = 20;
1590const INDENT_BLANKLINE: i32 = i32::MIN;
1591
1592#[derive(Clone, Copy, Default)]
1593struct MovedStyle {
1594    moved: bool,
1595    alt: bool,
1596    uninteresting: bool,
1597}
1598
1599#[derive(Clone)]
1600struct MovedEntry {
1601    tag_idx: usize,
1602    next_line: Option<usize>,
1603    next_match: Option<usize>,
1604    id: usize,
1605    indent_width: i32,
1606}
1607
1608#[derive(Clone, Copy, Default)]
1609struct MovedEntryList {
1610    add: Option<usize>,
1611    del: Option<usize>,
1612}
1613
1614#[derive(Clone, Copy)]
1615struct MovedBlock {
1616    match_entry: usize,
1617    wsd: i32,
1618}
1619
1620fn mark_color_as_moved(tagged: &[TaggedLine<'_>], color_moved: ColorMoved) -> Vec<MovedStyle> {
1621    let (entries, entry_for_tag, entry_list) = add_lines_to_move_detection(tagged, color_moved.ws);
1622    let mut styles = vec![MovedStyle::default(); tagged.len()];
1623    let mut pmb: Vec<MovedBlock> = Vec::new();
1624    let mut n = 0usize;
1625    let mut flipped_block = false;
1626    let mut block_length = 0usize;
1627    let mut moved_symbol: Option<LineKind> = None;
1628
1629    while n < tagged.len() {
1630        let line = tagged[n];
1631        let line_entry = entry_for_tag[n];
1632        let mut line_match = line_entry.and_then(|entry_idx| {
1633            let id = entries[entry_idx].id;
1634            match line.kind {
1635                LineKind::Insert => entry_list.get(id).and_then(|list| list.del),
1636                LineKind::Delete => entry_list.get(id).and_then(|list| list.add),
1637                LineKind::Context => None,
1638            }
1639        });
1640
1641        if line.kind == LineKind::Context {
1642            flipped_block = false;
1643        }
1644
1645        if !pmb.is_empty() && (line_match.is_none() || Some(line.kind) != moved_symbol) {
1646            if !adjust_last_block(&mut styles, tagged, color_moved.mode, n, block_length)
1647                && block_length > 1
1648            {
1649                line_match = None;
1650                n -= block_length;
1651            }
1652            pmb.clear();
1653            block_length = 0;
1654            flipped_block = false;
1655        }
1656
1657        let Some(line_match) = line_match else {
1658            moved_symbol = None;
1659            n += 1;
1660            continue;
1661        };
1662
1663        if color_moved.mode == ColorMovedMode::Plain {
1664            styles[n].moved = true;
1665            n += 1;
1666            continue;
1667        }
1668
1669        pmb_advance_or_null(
1670            &mut pmb,
1671            &entries,
1672            tagged,
1673            line_entry.expect("plus/minus line has move-detection entry"),
1674            color_moved.ws,
1675        );
1676
1677        if pmb.is_empty() {
1678            let contiguous =
1679                adjust_last_block(&mut styles, tagged, color_moved.mode, n, block_length);
1680            if !contiguous && block_length > 1 {
1681                n -= block_length;
1682            } else {
1683                fill_potential_moved_blocks(
1684                    line_match,
1685                    &entries,
1686                    tagged,
1687                    n,
1688                    color_moved.ws,
1689                    &mut pmb,
1690                );
1691            }
1692
1693            if contiguous && !pmb.is_empty() && moved_symbol == Some(line.kind) {
1694                flipped_block = !flipped_block;
1695            } else {
1696                flipped_block = false;
1697            }
1698
1699            moved_symbol = (!pmb.is_empty()).then_some(line.kind);
1700            block_length = 0;
1701        }
1702
1703        if !pmb.is_empty() {
1704            block_length += 1;
1705            styles[n].moved = true;
1706            if flipped_block && color_moved.mode != ColorMovedMode::Blocks {
1707                styles[n].alt = true;
1708            }
1709        }
1710        n += 1;
1711    }
1712
1713    adjust_last_block(&mut styles, tagged, color_moved.mode, n, block_length);
1714    if color_moved.mode == ColorMovedMode::DimmedZebra {
1715        dim_moved_lines(&mut styles, tagged);
1716    }
1717    styles
1718}
1719
1720fn add_lines_to_move_detection(
1721    tagged: &[TaggedLine<'_>],
1722    ws: ColorMovedWs,
1723) -> (Vec<MovedEntry>, Vec<Option<usize>>, Vec<MovedEntryList>) {
1724    let mut entries: Vec<MovedEntry> = Vec::new();
1725    let mut entry_for_tag = vec![None; tagged.len()];
1726    let mut entry_list = Vec::<MovedEntryList>::new();
1727    let mut interned = HashMap::<Vec<u8>, usize>::new();
1728    let mut prev_line: Option<usize> = None;
1729
1730    for (tag_idx, line) in tagged.iter().enumerate() {
1731        if line.kind != LineKind::Insert && line.kind != LineKind::Delete {
1732            prev_line = None;
1733            continue;
1734        }
1735        let indent = if ws.allow_indentation_change {
1736            moved_indent_data(line.content)
1737        } else {
1738            (0, 0)
1739        };
1740        let key = moved_line_key(line.content, indent.0, ws.ignore);
1741        let id = match interned.get(&key) {
1742            Some(id) => *id,
1743            None => {
1744                let id = interned.len();
1745                interned.insert(key, id);
1746                entry_list.push(MovedEntryList::default());
1747                id
1748            }
1749        };
1750
1751        let entry_idx = entries.len();
1752        if let Some(prev) = prev_line
1753            && tagged[entries[prev].tag_idx].kind == line.kind
1754        {
1755            entries[prev].next_line = Some(entry_idx);
1756        }
1757        let next_match = match line.kind {
1758            LineKind::Insert => entry_list[id].add,
1759            LineKind::Delete => entry_list[id].del,
1760            LineKind::Context => None,
1761        };
1762        entries.push(MovedEntry {
1763            tag_idx,
1764            next_line: None,
1765            next_match,
1766            id,
1767            indent_width: indent.1,
1768        });
1769        match line.kind {
1770            LineKind::Insert => entry_list[id].add = Some(entry_idx),
1771            LineKind::Delete => entry_list[id].del = Some(entry_idx),
1772            LineKind::Context => {}
1773        }
1774        entry_for_tag[tag_idx] = Some(entry_idx);
1775        prev_line = Some(entry_idx);
1776    }
1777
1778    (entries, entry_for_tag, entry_list)
1779}
1780
1781fn moved_line_key(line: &[u8], indent_off: usize, ignore: WsIgnore) -> Vec<u8> {
1782    let bytes = line.get(indent_off..).unwrap_or_default();
1783    if ignore.is_empty() {
1784        bytes.to_vec()
1785    } else {
1786        canonicalize_moved_line(bytes, ignore)
1787    }
1788}
1789
1790fn canonicalize_moved_line(line: &[u8], ignore: WsIgnore) -> Vec<u8> {
1791    let is_ws = |b: u8| matches!(b, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c);
1792    if ignore.all_space {
1793        return line.iter().copied().filter(|b| !is_ws(*b)).collect();
1794    }
1795    if ignore.space_change {
1796        let mut out = Vec::with_capacity(line.len());
1797        let mut i = 0usize;
1798        while i < line.len() {
1799            if is_ws(line[i]) {
1800                out.push(b' ');
1801                while i < line.len() && is_ws(line[i]) {
1802                    i += 1;
1803                }
1804            } else {
1805                out.push(line[i]);
1806                i += 1;
1807            }
1808        }
1809        while out.last() == Some(&b' ') {
1810            out.pop();
1811        }
1812        return out;
1813    }
1814    if ignore.space_at_eol {
1815        let mut end = line.len();
1816        if end > 0 && line[end - 1] == b'\n' {
1817            end -= 1;
1818        }
1819        while end > 0 && matches!(line[end - 1], b' ' | b'\t') {
1820            end -= 1;
1821        }
1822        let mut out = line[..end].to_vec();
1823        if line.ends_with(b"\n") {
1824            out.push(b'\n');
1825        }
1826        return out;
1827    }
1828    if ignore.cr_at_eol {
1829        let mut out = line.to_vec();
1830        if out.ends_with(b"\r\n") {
1831            let len = out.len();
1832            out.remove(len - 2);
1833        }
1834        return out;
1835    }
1836    line.to_vec()
1837}
1838
1839fn moved_indent_data(line: &[u8]) -> (usize, i32) {
1840    let mut off = 0usize;
1841    let mut width = 0i32;
1842    while off < line.len() && matches!(line[off], b'\x0c' | b'\x0b' | b'\r') {
1843        off += 1;
1844    }
1845    while off < line.len() {
1846        match line[off] {
1847            b' ' => {
1848                width += 1;
1849                off += 1;
1850            }
1851            b'\t' => {
1852                width += 8 - (width % 8);
1853                off += 1;
1854                while off < line.len() && line[off] == b'\t' {
1855                    width += 8;
1856                    off += 1;
1857                }
1858            }
1859            _ => break,
1860        }
1861    }
1862    if line[off..].iter().all(|b| b.is_ascii_whitespace()) {
1863        (line.len(), INDENT_BLANKLINE)
1864    } else {
1865        (off, width)
1866    }
1867}
1868
1869fn pmb_advance_or_null(
1870    pmb: &mut Vec<MovedBlock>,
1871    entries: &[MovedEntry],
1872    tagged: &[TaggedLine<'_>],
1873    line_entry: usize,
1874    ws: ColorMovedWs,
1875) {
1876    let mut kept = Vec::with_capacity(pmb.len());
1877    for mut block in pmb.iter().copied() {
1878        let Some(cur) = entries[block.match_entry].next_line else {
1879            continue;
1880        };
1881        let matched = if ws.allow_indentation_change {
1882            !cmp_in_block_with_wsd(entries, cur, tagged, line_entry, &mut block)
1883        } else {
1884            entries[cur].id == entries[line_entry].id
1885        };
1886        if matched {
1887            block.match_entry = cur;
1888            kept.push(block);
1889        }
1890    }
1891    *pmb = kept;
1892}
1893
1894fn fill_potential_moved_blocks(
1895    mut line_match: usize,
1896    entries: &[MovedEntry],
1897    tagged: &[TaggedLine<'_>],
1898    tag_idx: usize,
1899    ws: ColorMovedWs,
1900    pmb: &mut Vec<MovedBlock>,
1901) {
1902    loop {
1903        let entry = &entries[line_match];
1904        let wsd = if ws.allow_indentation_change {
1905            compute_ws_delta(tagged[tag_idx].content, entry.indent_width)
1906        } else {
1907            0
1908        };
1909        pmb.push(MovedBlock {
1910            match_entry: line_match,
1911            wsd,
1912        });
1913        match entry.next_match {
1914            Some(next) => line_match = next,
1915            None => break,
1916        }
1917    }
1918}
1919
1920fn compute_ws_delta(line: &[u8], match_indent_width: i32) -> i32 {
1921    let (_, width) = moved_indent_data(line);
1922    if width == INDENT_BLANKLINE && match_indent_width == INDENT_BLANKLINE {
1923        INDENT_BLANKLINE
1924    } else {
1925        width - match_indent_width
1926    }
1927}
1928
1929fn cmp_in_block_with_wsd(
1930    entries: &[MovedEntry],
1931    cur: usize,
1932    tagged: &[TaggedLine<'_>],
1933    line_entry: usize,
1934    block: &mut MovedBlock,
1935) -> bool {
1936    let cur_entry = &entries[cur];
1937    if cur_entry.id != entries[line_entry].id {
1938        return true;
1939    }
1940    if cur_entry.indent_width == INDENT_BLANKLINE {
1941        return false;
1942    }
1943    let (_, line_width) = moved_indent_data(tagged[entries[line_entry].tag_idx].content);
1944    let delta = line_width - cur_entry.indent_width;
1945    if block.wsd == INDENT_BLANKLINE {
1946        block.wsd = delta;
1947    }
1948    delta != block.wsd
1949}
1950
1951fn adjust_last_block(
1952    styles: &mut [MovedStyle],
1953    tagged: &[TaggedLine<'_>],
1954    mode: ColorMovedMode,
1955    n: usize,
1956    block_length: usize,
1957) -> bool {
1958    if mode == ColorMovedMode::Plain {
1959        return block_length != 0;
1960    }
1961    let mut alnum_count = 0usize;
1962    for i in 1..=block_length {
1963        for byte in tagged[n - i].content {
1964            if byte.is_ascii_alphanumeric() {
1965                alnum_count += 1;
1966                if alnum_count >= COLOR_MOVED_MIN_ALNUM_COUNT {
1967                    return true;
1968                }
1969            }
1970        }
1971    }
1972    for i in 1..=block_length {
1973        styles[n - i] = MovedStyle::default();
1974    }
1975    false
1976}
1977
1978fn dim_moved_lines(styles: &mut [MovedStyle], tagged: &[TaggedLine<'_>]) {
1979    for n in 0..tagged.len() {
1980        if tagged[n].kind != LineKind::Insert && tagged[n].kind != LineKind::Delete {
1981            continue;
1982        }
1983        if !styles[n].moved {
1984            continue;
1985        }
1986        let prev = (n > 0
1987            && (tagged[n - 1].kind == LineKind::Insert || tagged[n - 1].kind == LineKind::Delete))
1988            .then_some(n - 1);
1989        let next = (n + 1 < tagged.len()
1990            && (tagged[n + 1].kind == LineKind::Insert || tagged[n + 1].kind == LineKind::Delete))
1991            .then_some(n + 1);
1992
1993        if prev.is_some_and(|i| moved_zebra_mask(styles[i]) == moved_zebra_mask(styles[n]))
1994            && next.is_some_and(|i| moved_zebra_mask(styles[i]) == moved_zebra_mask(styles[n]))
1995        {
1996            styles[n].uninteresting = true;
1997            continue;
1998        }
1999        if prev.is_some_and(|i| styles[i].moved && styles[i].alt != styles[n].alt) {
2000            continue;
2001        }
2002        if next.is_some_and(|i| styles[i].moved && styles[i].alt != styles[n].alt) {
2003            continue;
2004        }
2005        styles[n].uninteresting = true;
2006    }
2007}
2008
2009fn moved_zebra_mask(style: MovedStyle) -> (bool, bool) {
2010    (style.moved, style.alt)
2011}
2012
2013/// Emit a single hunk covering `tagged[start..end]`: the `@@ -os,oc +ns,nc @@
2014/// <heading>` header followed by the context/`-`/`+` lines, including the
2015/// `\ No newline at end of file` markers.
2016fn render_one_hunk(
2017    out: &mut Vec<u8>,
2018    tagged: &[TaggedLine<'_>],
2019    moved_styles: Option<&[MovedStyle]>,
2020    old_lines: &[DiffLine<'_>],
2021    start: usize,
2022    end: usize,
2023    options: &mut HunkRenderOptions<'_, '_>,
2024) {
2025    let slice = &tagged[start..end];
2026    let mut old_count = 0usize;
2027    let mut new_count = 0usize;
2028    for line in slice {
2029        match line.kind {
2030            LineKind::Context => {
2031                old_count += 1;
2032                new_count += 1;
2033            }
2034            LineKind::Delete => old_count += 1,
2035            LineKind::Insert => new_count += 1,
2036        }
2037    }
2038    // 1-based starting line numbers; an empty side starts at 0.
2039    let old_start = if old_count == 0 {
2040        slice.first().map(|line| line.old_index).unwrap_or(0)
2041    } else {
2042        slice
2043            .iter()
2044            .find(|line| line.kind != LineKind::Insert)
2045            .map(|line| line.old_index + 1)
2046            .unwrap_or(1)
2047    };
2048    let new_start = if new_count == 0 {
2049        slice.first().map(|line| line.new_index).unwrap_or(0)
2050    } else {
2051        slice
2052            .iter()
2053            .find(|line| line.kind != LineKind::Delete)
2054            .map(|line| line.new_index + 1)
2055            .unwrap_or(1)
2056    };
2057
2058    let heading = hunk_section_heading(
2059        old_lines,
2060        slice.first().map(|line| line.old_index),
2061        options.heading.as_deref_mut(),
2062    );
2063    let frag = format!(
2064        "@@ -{} +{} @@",
2065        format_hunk_range(old_start, old_count),
2066        format_hunk_range(new_start, new_count)
2067    );
2068    match options.colors {
2069        // Port of emit_hunk_header: the "@@ .. @@" span in the frag color,
2070        // the separating blank in the context color, the heading in the func
2071        // color (each reset-terminated).
2072        Some(colors) => {
2073            out.extend_from_slice(colors.frag.as_bytes());
2074            out.extend_from_slice(frag.as_bytes());
2075            out.extend_from_slice(colors.reset.as_bytes());
2076            if let Some(heading) = &heading {
2077                out.extend_from_slice(colors.context.as_bytes());
2078                out.push(b' ');
2079                out.extend_from_slice(colors.reset.as_bytes());
2080                out.extend_from_slice(colors.func.as_bytes());
2081                out.extend_from_slice(heading);
2082                out.extend_from_slice(colors.reset.as_bytes());
2083            }
2084            out.push(b'\n');
2085        }
2086        None => {
2087            out.extend_from_slice(frag.as_bytes());
2088            if let Some(heading) = &heading {
2089                out.push(b' ');
2090                out.extend_from_slice(heading);
2091            }
2092            out.push(b'\n');
2093        }
2094    }
2095
2096    if let Some(word_diff) = options.word_diff.as_deref_mut() {
2097        // Word-diff rendering: minus/plus runs accumulate and flush at
2098        // context lines (fn_out_consume's diff_words branch); the
2099        // "\ No newline" markers are eaten.
2100        for line in slice {
2101            match line.kind {
2102                LineKind::Delete => word_diff.push_minus(line.content),
2103                LineKind::Insert => word_diff.push_plus(line.content),
2104                LineKind::Context => {
2105                    word_diff.flush(out);
2106                    word_diff.emit_context_line(out, line.content);
2107                }
2108            }
2109        }
2110        word_diff.flush(out);
2111        return;
2112    }
2113
2114    for (offset, line) in slice.iter().enumerate() {
2115        let prefix = match line.kind {
2116            LineKind::Context => b' ',
2117            LineKind::Delete => b'-',
2118            LineKind::Insert => b'+',
2119        };
2120        match options.colors {
2121            Some(colors) => {
2122                // Whitespace-error highlighting applies to the selected line
2123                // kinds (default: new lines only).
2124                let ws_rule = options.ws_error.and_then(|ws| {
2125                    let enabled = match line.kind {
2126                        LineKind::Context => ws.context,
2127                        LineKind::Delete => ws.old,
2128                        LineKind::Insert => ws.new,
2129                    };
2130                    enabled.then_some(ws.rule)
2131                });
2132                let moved = moved_styles
2133                    .and_then(|styles| styles.get(start + offset))
2134                    .copied()
2135                    .filter(|style| style.moved);
2136                write_patch_line_colored(out, prefix, line.content, colors, ws_rule, moved);
2137            }
2138            None => write_patch_line(out, prefix, line.content),
2139        }
2140    }
2141}
2142
2143/// Format one `start,count` side of an `@@` header. git omits the count when
2144/// it is exactly 1 (e.g. `+5` rather than `+5,1`).
2145fn format_hunk_range(start: usize, count: usize) -> String {
2146    if count == 1 {
2147        start.to_string()
2148    } else {
2149        format!("{start},{count}")
2150    }
2151}
2152
2153/// git's section heading for a hunk: the nearest line *before* the hunk's
2154/// first line accepted by the caller's `heading` classifier. Headings are
2155/// produced by the classifier (already capped/trimmed by the caller's
2156/// userdiff machinery). Returns `None` when no such line precedes the hunk or
2157/// no classifier was supplied.
2158fn hunk_section_heading(
2159    old_lines: &[DiffLine<'_>],
2160    first_old_index: Option<usize>,
2161    mut heading: Option<&mut HeadingFn<'_>>,
2162) -> Option<Vec<u8>> {
2163    let first = first_old_index?;
2164    let classifier = heading.as_mut()?;
2165    // Scan upward from the line just above the hunk.
2166    for idx in (0..first).rev() {
2167        if let Some(found) = classifier(old_lines[idx].content) {
2168            return Some(found);
2169        }
2170    }
2171    None
2172}
2173
2174/// Write a single diff line with its `prefix` marker, appending the
2175/// `\ No newline at end of file` note when the source line lacks a trailing
2176/// LF.
2177fn write_patch_line(out: &mut Vec<u8>, prefix: u8, line: &[u8]) {
2178    out.push(prefix);
2179    out.extend_from_slice(line);
2180    if !line.ends_with(b"\n") {
2181        out.extend_from_slice(b"\n\\ No newline at end of file\n");
2182    }
2183}
2184
2185/// [`write_patch_line`] in color, optionally painting whitespace errors.
2186///
2187/// When `ws_rule` is `Some`, the line body is emitted through
2188/// [`crate::ws::ws_check_emit`] (git's `emit_line_ws_markup` highlighted
2189/// branch): the sign is painted in the line color, then the body's non-error
2190/// segments in the line color and its whitespace-error segments in
2191/// `colors.whitespace`. A clean line produces no whitespace spans, so it stays
2192/// visually plain.
2193///
2194/// When `ws_rule` is `None`, context/old lines paint the sign and body in one
2195/// span; new lines paint the sign and body as separate spans (the default
2196/// `ws-error-highlight` path with no rule).
2197fn write_patch_line_colored(
2198    out: &mut Vec<u8>,
2199    prefix: u8,
2200    line: &[u8],
2201    colors: RenderColors<'_>,
2202    ws_rule: Option<crate::ws::WsRule>,
2203    moved: Option<MovedStyle>,
2204) {
2205    let (body, terminated) = match line.split_last() {
2206        Some((b'\n', body)) => (body, true),
2207        _ => (line, false),
2208    };
2209    let color = match (prefix, moved) {
2210        (b'-', Some(style)) if style.uninteresting && style.alt => colors.old_moved_alt_dim,
2211        (b'-', Some(style)) if style.uninteresting => colors.old_moved_dim,
2212        (b'-', Some(style)) if style.alt => colors.old_moved_alt,
2213        (b'-', Some(_)) => colors.old_moved,
2214        (b'+', Some(style)) if style.uninteresting && style.alt => colors.new_moved_alt_dim,
2215        (b'+', Some(style)) if style.uninteresting => colors.new_moved_dim,
2216        (b'+', Some(style)) if style.alt => colors.new_moved_alt,
2217        (b'+', Some(_)) => colors.new_moved,
2218        (b'-', _) => colors.old,
2219        (b'+', _) => colors.new,
2220        _ => colors.context,
2221    };
2222
2223    if let Some(rule) = ws_rule {
2224        if rule == 0 {
2225            out.extend_from_slice(color.as_bytes());
2226            out.push(prefix);
2227            out.extend_from_slice(body);
2228            out.extend_from_slice(colors.reset.as_bytes());
2229            out.push(b'\n');
2230            if !terminated {
2231                out.extend_from_slice(colors.context.as_bytes());
2232                out.extend_from_slice(b"\\ No newline at end of file");
2233                out.extend_from_slice(colors.reset.as_bytes());
2234                out.push(b'\n');
2235            }
2236            return;
2237        }
2238        // Sign in the line color, then the body through ws_check_emit (no
2239        // trailing newline in `body`, so the emit's own LF handling is inert).
2240        out.extend_from_slice(color.as_bytes());
2241        out.push(prefix);
2242        out.extend_from_slice(colors.reset.as_bytes());
2243        let emit_colors = crate::ws::WsEmitColors {
2244            set: color,
2245            reset: colors.reset,
2246            ws: colors.whitespace,
2247        };
2248        crate::ws::ws_check_emit(body, rule, out, &emit_colors);
2249        out.push(b'\n');
2250        if !terminated {
2251            let marker_color = if rule & crate::ws::WS_INCOMPLETE_LINE != 0 {
2252                colors.whitespace
2253            } else {
2254                colors.context
2255            };
2256            out.extend_from_slice(marker_color.as_bytes());
2257            out.extend_from_slice(b"\\ No newline at end of file");
2258            out.extend_from_slice(colors.reset.as_bytes());
2259            out.push(b'\n');
2260        }
2261        return;
2262    }
2263
2264    if prefix == b'+' {
2265        out.extend_from_slice(color.as_bytes());
2266        out.push(prefix);
2267        out.extend_from_slice(colors.reset.as_bytes());
2268        if !body.is_empty() {
2269            out.extend_from_slice(color.as_bytes());
2270            out.extend_from_slice(body);
2271            out.extend_from_slice(colors.reset.as_bytes());
2272        }
2273    } else {
2274        out.extend_from_slice(color.as_bytes());
2275        out.push(prefix);
2276        out.extend_from_slice(body);
2277        out.extend_from_slice(colors.reset.as_bytes());
2278    }
2279    out.push(b'\n');
2280    if !terminated {
2281        out.extend_from_slice(colors.context.as_bytes());
2282        out.extend_from_slice(b"\\ No newline at end of file");
2283        out.extend_from_slice(colors.reset.as_bytes());
2284        out.push(b'\n');
2285    }
2286}
2287
2288// ===========================================================================
2289// Combined / merge-commit diff renderer (`-c` / `--cc`).
2290//
2291// This is the byte-for-byte port of git's `combine-diff.c`
2292// (`combine_diff` / `make_hunks` / `dump_sline`): the multi-parent
2293// `@@@ -p1 -p2 +out @@@` hunk header (with one extra `@` per parent and one
2294// `-pN,cN` column per parent), the per-parent prefix columns on each body
2295// line, and the `--cc` "dense" simplification that drops hunks whose result
2296// matches at least one parent ("uninteresting" hunks).
2297//
2298// The renderer is repository-agnostic, exactly like the unified-diff renderer
2299// above: the caller supplies the *result* blob and one blob per parent (plus
2300// the line-diff algorithm / whitespace flags), and we emit only the hunk body
2301// — the per-file `diff --cc`/`index`/`---`/`+++` metainfo header stays with the
2302// command, which owns the repository / oid / mode context.
2303//
2304// Data-structure correspondence with combine-diff.c:
2305//   * `Sline`  <-> `struct sline` (one per result line, plus a trailing
2306//     sentinel `sline[cnt]` whose `bol` is empty),
2307//   * `Sline.lost` <-> `struct lline` list (lines deleted relative to some
2308//     parent, hung before the surviving result line),
2309//   * the `flag` bitset: bit `n` set => parent `n` did NOT have this result
2310//     line (i.e. it was added relative to parent `n`); bit `num_parent`
2311//     ("mark") => the line is part of a shown hunk; bit `num_parent+1`
2312//     ("no_pre_delete") => suppress the leading deletions before this line.
2313// ===========================================================================
2314
2315/// One result line in the combined-diff `sline` array, plus the deletions
2316/// ("lost" lines) that hang in front of it.
2317struct CdLine {
2318    /// The surviving result line bytes, WITHOUT the trailing newline.
2319    bol: Vec<u8>,
2320    /// Deletions hung before this line, in display order. `parent_map` is the
2321    /// bitset of parents that had the deleted line (git coalesces these across
2322    /// parents via an LCS; we replicate that with [`coalesce_lost`]).
2323    lost: Vec<CdLost>,
2324    /// Pre-coalesce deletions accumulated for the parent currently being
2325    /// folded (drained into `lost` after each parent, like git's `plost`).
2326    plost: Vec<Vec<u8>>,
2327    /// `flag` bitset (see module comment).
2328    flag: u64,
2329    /// `p_lno[n]` = 1-based line number in parent `n` at which a hunk starting
2330    /// at this result line begins. Sized `num_parent`.
2331    p_lno: Vec<u64>,
2332}
2333
2334/// A single deleted ("lost") line and the set of parents it was removed from.
2335struct CdLost {
2336    line: Vec<u8>,
2337    parent_map: u64,
2338}
2339
2340/// Options controlling the combined-diff body emission.
2341pub struct CombinedRenderOptions {
2342    /// `--cc` dense simplification (drop hunks the result shares with a parent);
2343    /// `-c` (plain combined) leaves it `false`.
2344    pub dense: bool,
2345    /// Unified-context line count (`-U`, default 3).
2346    pub context: usize,
2347    /// Line-diff algorithm used for each parent-vs-result 2-way diff.
2348    pub algorithm: DiffAlgorithm,
2349    /// Whitespace-ignore flags applied to each parent-vs-result 2-way diff and
2350    /// to the lost-line coalescing match.
2351    pub ws_ignore: WsIgnore,
2352}
2353
2354impl Default for CombinedRenderOptions {
2355    fn default() -> Self {
2356        Self {
2357            dense: true,
2358            context: DEFAULT_CONTEXT,
2359            algorithm: DiffAlgorithm::Myers,
2360            ws_ignore: WsIgnore::default(),
2361        }
2362    }
2363}
2364
2365/// Render a combined / merge diff body into `out`.
2366///
2367/// `result` is the merge-result blob; `parents` holds one blob per parent (in
2368/// parent order). Returns `true` when at least one hunk survives the
2369/// "interesting" filter — the caller uses this to decide whether to print the
2370/// metainfo header at all (git only prints `diff --cc <path>` + body when
2371/// `show_hunks || mode_differs`).
2372///
2373/// Mirrors `show_patch_diff`'s body half: build the `sline` array, fold each
2374/// parent into it via [`combine_one_parent`], run [`make_hunks`], then
2375/// [`dump_sline`].
2376pub fn render_combined(out: &mut Vec<u8>, result: &[u8], parents: &[&[u8]]) -> bool {
2377    render_combined_with(out, result, parents, &CombinedRenderOptions::default())
2378}
2379
2380/// [`render_combined`] with explicit options.
2381pub fn render_combined_with(
2382    out: &mut Vec<u8>,
2383    result: &[u8],
2384    parents: &[&[u8]],
2385    options: &CombinedRenderOptions,
2386) -> bool {
2387    let num_parent = parents.len();
2388    debug_assert!(num_parent >= 1);
2389
2390    // Split the result into lines (without trailing newline), counting an
2391    // unterminated final line as its own line — git's `cnt` counts '\n' plus an
2392    // incomplete trailing line.
2393    let result_lines = split_lines(result);
2394    let cnt = result_lines.len();
2395
2396    // git allocates `cnt + 2` slines: indices `0..cnt-1` are the result lines,
2397    // `sline[cnt]` is the trailing sentinel (where end-of-file deletions hang),
2398    // and `sline[cnt+1]` carries the per-parent trailer p_lno that
2399    // `show_parent_lno` reads for a hunk whose end touches the last line.
2400    let mut sline: Vec<CdLine> = Vec::with_capacity(cnt + 2);
2401    for line in &result_lines {
2402        sline.push(CdLine {
2403            bol: line.bytes_without_newline().to_vec(),
2404            lost: Vec::new(),
2405            plost: Vec::new(),
2406            flag: 0,
2407            p_lno: vec![0; num_parent],
2408        });
2409    }
2410    for _ in 0..2 {
2411        sline.push(CdLine {
2412            bol: Vec::new(),
2413            lost: Vec::new(),
2414            plost: Vec::new(),
2415            flag: 0,
2416            p_lno: vec![0; num_parent],
2417        });
2418    }
2419
2420    // Fold each parent into the sline array. git reuses an earlier parent's
2421    // result when two parents have the identical blob (`reuse_combine_diff`);
2422    // we replicate that to keep p_lno / flags identical.
2423    for n in 0..num_parent {
2424        let mut reused = None;
2425        for j in 0..n {
2426            if parents[j] == parents[n] {
2427                reused = Some(j);
2428                break;
2429            }
2430        }
2431        match reused {
2432            Some(j) => reuse_combine_diff(&mut sline, cnt, n, j),
2433            None => combine_one_parent(&mut sline, &result_lines, parents[n], n, options),
2434        }
2435    }
2436
2437    let show_hunks = make_hunks(&mut sline, cnt, num_parent, options.dense, options.context);
2438    if show_hunks {
2439        dump_sline(out, &sline, cnt, num_parent, options.context);
2440    }
2441    show_hunks
2442}
2443
2444/// Fold one parent's 2-way diff against the result into the `sline` array
2445/// (git's `combine_diff` + the consume_hunk/consume_line callbacks).
2446fn combine_one_parent(
2447    sline: &mut [CdLine],
2448    result_lines: &[DiffLine<'_>],
2449    parent: &[u8],
2450    n: usize,
2451    options: &CombinedRenderOptions,
2452) {
2453    let cnt = result_lines.len();
2454    let nmask = 1u64 << n;
2455    let parent_lines = split_lines(parent);
2456    let ops = myers_diff_lines_ws(
2457        &parent_lines,
2458        result_lines,
2459        options.ws_ignore,
2460        options.algorithm,
2461    );
2462
2463    // Walk the edit script, tracking the 1-based result line number (`lno`,
2464    // git's `state->lno`) and the parent line number (`p_lno`/`ob`). For each
2465    // hunk: deletions hang on `lost_bucket`; insertions set the nmask flag on
2466    // the result line; the hunk start records the parent line number into
2467    // `p_lno[n]` of the result line preceding the hunk.
2468    //
2469    // git groups the script into hunks (runs separated by Equal context); we
2470    // mirror consume_hunk by detecting the boundary at each non-Equal run.
2471    let mut old_idx: usize = 0; // 0-based parent line consumed
2472    let mut new_idx: usize = 0; // 0-based result line consumed
2473    let mut i = 0;
2474    while i < ops.len() {
2475        match ops[i] {
2476            DiffOp::Equal(k) => {
2477                old_idx += k;
2478                new_idx += k;
2479                i += 1;
2480            }
2481            _ => {
2482                // Collect a maximal run of consecutive Delete/Insert ops as one
2483                // hunk (git's xdiff emits one @@ hunk per such run).
2484                let hunk_old_start = old_idx; // 0-based
2485                let hunk_new_start = new_idx; // 0-based
2486                let mut dels: Vec<&[u8]> = Vec::new();
2487                while i < ops.len() {
2488                    match ops[i] {
2489                        DiffOp::Delete(k) => {
2490                            for _ in 0..k {
2491                                dels.push(parent_lines[old_idx].bytes_without_newline());
2492                                old_idx += 1;
2493                            }
2494                            i += 1;
2495                        }
2496                        DiffOp::Insert(k) => {
2497                            new_idx += k;
2498                            i += 1;
2499                        }
2500                        DiffOp::Equal(_) => break,
2501                    }
2502                }
2503                let _ = hunk_old_start;
2504
2505                // Lost bucket: deletions hang on the result line at the hunk
2506                // start (sline[hunk_new_start]). git's distinction between the
2507                // additions-present (`nb-1`) and pure-deletion (`nb`) cases
2508                // collapses to the same index here because our `hunk_new_start`
2509                // is the 0-based result line immediately *after* the preceding
2510                // context, matching git's `nb-1` for the additions case and the
2511                // bucket-after for the pure-deletion case once the result line
2512                // numbering (1-based `nb`) is accounted for. The authoritative
2513                // p_lno values are recomputed in the loop below, so we do not
2514                // record them here.
2515                for d in &dels {
2516                    sline[hunk_new_start].plost.push(d.to_vec());
2517                }
2518                // Mark inserted result lines: flag bit n set => parent n lacked
2519                // this line.
2520                for line in sline.iter_mut().take(new_idx.min(cnt)).skip(hunk_new_start) {
2521                    line.flag |= nmask;
2522                }
2523            }
2524        }
2525    }
2526
2527    // Coalesce the plost lines into lost (git's coalesce_lines), then assign
2528    // p_lno numbers per parent — git's second loop in combine_diff.
2529    let mut p_lno: u64 = 1;
2530    for (lno, line) in sline.iter_mut().enumerate().take(cnt + 1) {
2531        line.p_lno[n] = p_lno;
2532        if !line.plost.is_empty() {
2533            let plost = std::mem::take(&mut line.plost);
2534            coalesce_lost(&mut line.lost, plost, n, options);
2535        }
2536        // How many parent lines does this sline advance?
2537        for ll in &line.lost {
2538            if ll.parent_map & nmask != 0 {
2539                p_lno += 1; // '-' means parent had it
2540            }
2541        }
2542        if lno < cnt && (line.flag & nmask) == 0 {
2543            p_lno += 1; // no '+' means parent had it
2544        }
2545    }
2546    sline[cnt + 1].p_lno[n] = p_lno; // trailer (git's sline[cnt+1])
2547}
2548
2549/// Coalesce a parent's freshly-collected deletions into the line's existing
2550/// lost list (git's `coalesce_lines` LCS merge). A deletion that matches an
2551/// already-present lost line (under the active whitespace flags) gets its
2552/// parent bit OR'd into that line's `parent_map` instead of being added again.
2553fn coalesce_lost(
2554    base: &mut Vec<CdLost>,
2555    newlines: Vec<Vec<u8>>,
2556    n: usize,
2557    options: &CombinedRenderOptions,
2558) {
2559    let pmask = 1u64 << n;
2560    if newlines.is_empty() {
2561        return;
2562    }
2563    if base.is_empty() {
2564        for line in newlines {
2565            base.push(CdLost {
2566                line,
2567                parent_map: pmask,
2568            });
2569        }
2570        return;
2571    }
2572
2573    // LCS over (base lines, new lines) by whitespace-aware equality, exactly
2574    // like git: MATCH => OR the parent bit into the base line; NEW => insert the
2575    // new line at that position; BASE => keep base line as-is.
2576    let m = base.len();
2577    let k = newlines.len();
2578    let mut lcs = vec![vec![0i32; k + 1]; m + 1];
2579    for i in 1..=m {
2580        for j in 1..=k {
2581            if combined_lines_match(&base[i - 1].line, &newlines[j - 1], options.ws_ignore) {
2582                lcs[i][j] = lcs[i - 1][j - 1] + 1;
2583            } else if lcs[i][j - 1] >= lcs[i - 1][j] {
2584                lcs[i][j] = lcs[i][j - 1];
2585            } else {
2586                lcs[i][j] = lcs[i - 1][j];
2587            }
2588        }
2589    }
2590
2591    // Backtrack, building the merged list in reverse.
2592    let mut merged: Vec<CdLost> = Vec::with_capacity(m + k);
2593    let mut i = m;
2594    let mut j = k;
2595    while i > 0 || j > 0 {
2596        if i > 0
2597            && j > 0
2598            && combined_lines_match(&base[i - 1].line, &newlines[j - 1], options.ws_ignore)
2599        {
2600            let mut entry = std::mem::replace(
2601                &mut base[i - 1],
2602                CdLost {
2603                    line: Vec::new(),
2604                    parent_map: 0,
2605                },
2606            );
2607            entry.parent_map |= pmask;
2608            merged.push(entry);
2609            i -= 1;
2610            j -= 1;
2611        } else if j > 0 && (i == 0 || lcs[i][j - 1] >= lcs[i - 1][j]) {
2612            merged.push(CdLost {
2613                line: newlines[j - 1].clone(),
2614                parent_map: pmask,
2615            });
2616            j -= 1;
2617        } else {
2618            let entry = std::mem::replace(
2619                &mut base[i - 1],
2620                CdLost {
2621                    line: Vec::new(),
2622                    parent_map: 0,
2623                },
2624            );
2625            merged.push(entry);
2626            i -= 1;
2627        }
2628    }
2629    merged.reverse();
2630    *base = merged;
2631}
2632
2633/// Whitespace-aware line equality used by the lost-line coalescer
2634/// (git's `match_string_spaces`). Only the all-space / space-change flavours
2635/// affect the comparison; otherwise it is a byte compare.
2636fn combined_lines_match(a: &[u8], b: &[u8], ws: WsIgnore) -> bool {
2637    if ws.all_space || ws.space_change || ws.space_at_eol {
2638        let at = strip_trailing_ws(a);
2639        let bt = strip_trailing_ws(b);
2640        if !ws.all_space && !ws.space_change {
2641            return at == bt;
2642        }
2643        return ws_squash_eq(at, bt, ws.space_change);
2644    }
2645    a == b
2646}
2647
2648fn strip_trailing_ws(s: &[u8]) -> &[u8] {
2649    let mut end = s.len();
2650    while end > 0 && (s[end - 1] == b' ' || s[end - 1] == b'\t') {
2651        end -= 1;
2652    }
2653    &s[..end]
2654}
2655
2656/// Compare two lines ignoring whitespace runs (`-w`) or treating runs as a
2657/// single space (`-b`).
2658fn ws_squash_eq(a: &[u8], b: &[u8], change_only: bool) -> bool {
2659    let is_ws = |c: u8| c == b' ' || c == b'\t';
2660    let (mut ia, mut ib) = (0usize, 0usize);
2661    while ia < a.len() && ib < b.len() {
2662        let (ca, cb) = (a[ia], b[ib]);
2663        if is_ws(ca) || is_ws(cb) {
2664            if change_only && (!is_ws(ca) || !is_ws(cb)) {
2665                return false;
2666            }
2667            // For -b, a whitespace run on both sides counts as equal; for -w,
2668            // whitespace is skipped entirely. Skip the runs on both sides.
2669            if change_only {
2670                while ia < a.len() && is_ws(a[ia]) {
2671                    ia += 1;
2672                }
2673                while ib < b.len() && is_ws(b[ib]) {
2674                    ib += 1;
2675                }
2676                continue;
2677            } else {
2678                if is_ws(ca) {
2679                    ia += 1;
2680                    continue;
2681                }
2682                if is_ws(cb) {
2683                    ib += 1;
2684                    continue;
2685                }
2686            }
2687        }
2688        if ca != cb {
2689            return false;
2690        }
2691        ia += 1;
2692        ib += 1;
2693    }
2694    // Consume trailing whitespace.
2695    while ia < a.len() && is_ws(a[ia]) {
2696        ia += 1;
2697    }
2698    while ib < b.len() && is_ws(b[ib]) {
2699        ib += 1;
2700    }
2701    ia == a.len() && ib == b.len()
2702}
2703
2704/// git's `reuse_combine_diff`: when parent `i` has the same blob as a
2705/// previously-folded parent `j`, copy `j`'s flags / lost parent-bits / p_lno
2706/// across instead of re-diffing.
2707fn reuse_combine_diff(sline: &mut [CdLine], cnt: usize, i: usize, j: usize) {
2708    let imask = 1u64 << i;
2709    let jmask = 1u64 << j;
2710    for line in sline.iter_mut().take(cnt + 1) {
2711        line.p_lno[i] = line.p_lno[j];
2712        for ll in &mut line.lost {
2713            if ll.parent_map & jmask != 0 {
2714                ll.parent_map |= imask;
2715            }
2716        }
2717        if line.flag & jmask != 0 {
2718            line.flag |= imask;
2719        }
2720    }
2721    // The overall trailer (sline[cnt+1]).
2722    sline[cnt + 1].p_lno[i] = sline[cnt + 1].p_lno[j];
2723}
2724
2725/// Is this result line "interesting" — does any parent lack it, or does it have
2726/// deletions hung in front (git's `interesting`).
2727fn cd_interesting(sline: &CdLine, all_mask: u64) -> bool {
2728    (sline.flag & all_mask) != 0 || !sline.lost.is_empty()
2729}
2730
2731/// git's `adjust_hunk_tail`.
2732fn adjust_hunk_tail(sline: &[CdLine], all_mask: u64, hunk_begin: usize, mut i: usize) -> usize {
2733    if hunk_begin < i && (sline[i - 1].flag & all_mask) == 0 {
2734        i -= 1;
2735    }
2736    i
2737}
2738
2739/// git's `find_next`.
2740fn find_next(
2741    sline: &[CdLine],
2742    mark: u64,
2743    mut i: usize,
2744    cnt: usize,
2745    look_for_uninteresting: bool,
2746) -> usize {
2747    while i <= cnt {
2748        let marked = (sline[i].flag & mark) != 0;
2749        if look_for_uninteresting {
2750            if !marked {
2751                return i;
2752            }
2753        } else if marked {
2754            return i;
2755        }
2756        i += 1;
2757    }
2758    i
2759}
2760
2761/// git's `give_context`: paint context lines (and bridge small gaps) around the
2762/// interesting lines, using the `mark` bit. Returns whether any hunk shows.
2763fn give_context(sline: &mut [CdLine], cnt: usize, num_parent: usize, context: usize) -> bool {
2764    let all_mask = (1u64 << num_parent) - 1;
2765    let mark = 1u64 << num_parent;
2766    let no_pre_delete = 2u64 << num_parent;
2767
2768    let mut i = find_next(sline, mark, 0, cnt, false);
2769    if cnt < i {
2770        return false;
2771    }
2772
2773    while i <= cnt {
2774        let mut j = i.saturating_sub(context);
2775        // Paint a few lines before the first interesting line.
2776        while j < i {
2777            if (sline[j].flag & mark) == 0 {
2778                sline[j].flag |= no_pre_delete;
2779            }
2780            sline[j].flag |= mark;
2781            j += 1;
2782        }
2783
2784        loop {
2785            // Where does the next uninteresting line start?
2786            j = find_next(sline, mark, i, cnt, true);
2787            if cnt < j {
2788                // The rest are all interesting.
2789                return true;
2790            }
2791            // Lookahead context lines.
2792            let k = find_next(sline, mark, j, cnt, false);
2793            let j2 = adjust_hunk_tail(sline, all_mask, i, j);
2794
2795            if k < j2 + context {
2796                // Small gap: paint it interesting and continue.
2797                let mut jj = j2;
2798                while jj < k {
2799                    sline[jj].flag |= mark;
2800                    jj += 1;
2801                }
2802                i = k;
2803                continue;
2804            }
2805
2806            // No overlap within context: paint the trailing edge a bit.
2807            i = k;
2808            let kk = if j2 + context < cnt + 1 {
2809                j2 + context
2810            } else {
2811                cnt + 1
2812            };
2813            let mut jj = j2;
2814            while jj < kk {
2815                sline[jj].flag |= mark;
2816                jj += 1;
2817            }
2818            break;
2819        }
2820    }
2821    true
2822}
2823
2824/// git's `make_hunks`: mark interesting lines, run the `--cc` dense
2825/// simplification when requested, then `give_context`.
2826fn make_hunks(
2827    sline: &mut [CdLine],
2828    cnt: usize,
2829    num_parent: usize,
2830    dense: bool,
2831    context: usize,
2832) -> bool {
2833    let all_mask = (1u64 << num_parent) - 1;
2834    let mark = 1u64 << num_parent;
2835
2836    for line in sline.iter_mut().take(cnt + 1) {
2837        if cd_interesting(line, all_mask) {
2838            line.flag |= mark;
2839        } else {
2840            line.flag &= !mark;
2841        }
2842    }
2843    if !dense {
2844        return give_context(sline, cnt, num_parent, context);
2845    }
2846
2847    // Dense simplification: for each marked hunk, drop it when the result
2848    // differs from a single parent only (or matches all but one parent the
2849    // same way) — git's "interesting" recomputation.
2850    let mut i = 0;
2851    while i <= cnt {
2852        while i <= cnt && (sline[i].flag & mark) == 0 {
2853            i += 1;
2854        }
2855        if cnt < i {
2856            break;
2857        }
2858        let hunk_begin = i;
2859        let mut j = i + 1;
2860        while j <= cnt {
2861            if (sline[j].flag & mark) == 0 {
2862                // Look beyond the end for an interesting line within context.
2863                let mut la = adjust_hunk_tail(sline, all_mask, hunk_begin, j);
2864                la = if la + context < cnt + 1 {
2865                    la + context
2866                } else {
2867                    cnt + 1
2868                };
2869                let mut contin = false;
2870                while la > 0 && j < la {
2871                    la -= 1;
2872                    if (sline[la].flag & mark) != 0 {
2873                        contin = true;
2874                        break;
2875                    }
2876                }
2877                if !contin {
2878                    break;
2879                }
2880                j = la;
2881            }
2882            j += 1;
2883        }
2884        let hunk_end = j;
2885
2886        // Is the hunk "really" interesting? Check whether all changed lines
2887        // record the same set of parents.
2888        let mut same_diff: u64 = 0;
2889        let mut has_interesting = false;
2890        let mut jj = i;
2891        while jj < hunk_end && !has_interesting {
2892            let this_diff = sline[jj].flag & all_mask;
2893            if this_diff != 0 {
2894                if same_diff == 0 {
2895                    same_diff = this_diff;
2896                } else if same_diff != this_diff {
2897                    has_interesting = true;
2898                    break;
2899                }
2900            }
2901            for ll in &sline[jj].lost {
2902                if has_interesting {
2903                    break;
2904                }
2905                let td = ll.parent_map;
2906                if same_diff == 0 {
2907                    same_diff = td;
2908                } else if same_diff != td {
2909                    has_interesting = true;
2910                }
2911            }
2912            jj += 1;
2913        }
2914
2915        if !has_interesting && same_diff != all_mask {
2916            // Not interesting after all: unmark the whole hunk.
2917            for line in sline.iter_mut().take(hunk_end).skip(hunk_begin) {
2918                line.flag &= !mark;
2919            }
2920        }
2921        i = hunk_end;
2922    }
2923
2924    give_context(sline, cnt, num_parent, context)
2925}
2926
2927/// git's `show_parent_lno`: emit one `-l0,len` column for parent `n`.
2928fn show_parent_lno(
2929    out: &mut Vec<u8>,
2930    sline: &[CdLine],
2931    l0: usize,
2932    l1: usize,
2933    n: usize,
2934    null_context: u64,
2935) {
2936    let a = sline[l0].p_lno[n];
2937    let b = sline[l1].p_lno[n];
2938    out.extend_from_slice(format!(" -{},{}", a, b - a - null_context).as_bytes());
2939}
2940
2941/// git's `hunk_comment_line` test (used to append a function-context comment
2942/// to the `@@@ ... @@@` header).
2943fn hunk_comment_line(bol: &[u8]) -> bool {
2944    if bol.is_empty() {
2945        return false;
2946    }
2947    let ch = bol[0];
2948    ch.is_ascii_alphabetic() || ch == b'_' || ch == b'$'
2949}
2950
2951/// git's `show_line_to_eol`: emit a line, preserving a trailing CR. The bytes
2952/// here never include the newline; we add it.
2953fn show_line_to_eol(out: &mut Vec<u8>, line: &[u8]) {
2954    let saw_cr = line.last() == Some(&b'\r');
2955    if saw_cr {
2956        out.extend_from_slice(&line[..line.len() - 1]);
2957        out.push(b'\r');
2958    } else {
2959        out.extend_from_slice(line);
2960    }
2961    out.push(b'\n');
2962}
2963
2964/// git's `dump_sline`: emit the combined-diff hunk bodies for all marked hunks.
2965fn dump_sline(out: &mut Vec<u8>, sline: &[CdLine], cnt: usize, num_parent: usize, context: usize) {
2966    let mark = 1u64 << num_parent;
2967    let no_pre_delete = 2u64 << num_parent;
2968    let mut lno: usize = 0;
2969
2970    loop {
2971        let mut hunk_comment: Option<&[u8]> = None;
2972        while lno <= cnt && (sline[lno].flag & mark) == 0 {
2973            if hunk_comment_line(&sline[lno].bol) {
2974                hunk_comment = Some(&sline[lno].bol);
2975            }
2976            lno += 1;
2977        }
2978        if cnt < lno {
2979            break;
2980        }
2981        let mut hunk_end = lno + 1;
2982        while hunk_end <= cnt {
2983            if (sline[hunk_end].flag & mark) == 0 {
2984                break;
2985            }
2986            hunk_end += 1;
2987        }
2988
2989        let mut rlines = (hunk_end - lno) as u64;
2990        if cnt < hunk_end {
2991            rlines -= 1; // pointing at the last delete hunk
2992        }
2993
2994        let mut null_context: u64 = 0;
2995        if context == 0 {
2996            // --unified=0: count the all-blank-context result lines so the
2997            // header line counts exclude them.
2998            for sl in sline.iter().take(hunk_end).skip(lno) {
2999                if (sl.flag & (mark - 1)) == 0 {
3000                    null_context += 1;
3001                }
3002            }
3003            rlines -= null_context;
3004        }
3005
3006        // Header: `@@@`... (num_parent+1 markers), one -l,c column per parent,
3007        // ` +out_start,out_len `, num_parent+1 markers again.
3008        for _ in 0..=num_parent {
3009            out.push(b'@');
3010        }
3011        for i in 0..num_parent {
3012            show_parent_lno(out, sline, lno, hunk_end, i, null_context);
3013        }
3014        out.extend_from_slice(format!(" +{},{} ", lno + 1, rlines).as_bytes());
3015        for _ in 0..=num_parent {
3016            out.push(b'@');
3017        }
3018
3019        if let Some(comment) = hunk_comment {
3020            let mut comment_end = 0;
3021            for (idx, &ch) in comment.iter().take(40).enumerate() {
3022                if ch == b'\n' {
3023                    break;
3024                }
3025                if !ch.is_ascii_whitespace() {
3026                    comment_end = idx + 1;
3027                }
3028            }
3029            if comment_end != 0 {
3030                out.push(b' ');
3031                out.extend_from_slice(&comment[..comment_end]);
3032            }
3033        }
3034        out.push(b'\n');
3035
3036        // Body.
3037        while lno < hunk_end {
3038            let sl = &sline[lno];
3039            lno += 1;
3040            // Lost (deleted) lines hung before this result line.
3041            if (sl.flag & no_pre_delete) == 0 {
3042                for ll in &sl.lost {
3043                    for j in 0..num_parent {
3044                        if ll.parent_map & (1u64 << j) != 0 {
3045                            out.push(b'-');
3046                        } else {
3047                            out.push(b' ');
3048                        }
3049                    }
3050                    show_line_to_eol(out, &ll.line);
3051                }
3052            }
3053            if cnt < lno {
3054                break;
3055            }
3056            if (sl.flag & (mark - 1)) == 0 {
3057                // This sline only existed to hang the lost lines in front.
3058                if context == 0 {
3059                    continue;
3060                }
3061            }
3062            let mut p_mask = 1u64;
3063            for _ in 0..num_parent {
3064                if p_mask & sl.flag != 0 {
3065                    out.push(b'+');
3066                } else {
3067                    out.push(b' ');
3068                }
3069                p_mask <<= 1;
3070            }
3071            show_line_to_eol(out, &sl.bol);
3072        }
3073    }
3074}
3075
3076#[cfg(test)]
3077mod tests {
3078    use super::*;
3079
3080    fn render_plain(old: Option<&[u8]>, new: Option<&[u8]>) -> Vec<u8> {
3081        let mut out = Vec::new();
3082        let mut options = HunkRenderOptions::default();
3083        render_hunks(&mut out, old, new, &mut options);
3084        out
3085    }
3086
3087    #[test]
3088    fn identical_content_renders_nothing() {
3089        assert!(render_plain(Some(b"a\nb\n"), Some(b"a\nb\n")).is_empty());
3090    }
3091
3092    #[test]
3093    fn single_line_change_basic_hunk() {
3094        let out = render_plain(Some(b"alpha\nbeta\ngamma\n"), Some(b"alpha\nBETA\ngamma\n"));
3095        assert_eq!(
3096            out,
3097            b"@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n".to_vec(),
3098        );
3099    }
3100
3101    #[test]
3102    fn count_omitted_when_one() {
3103        // A single-line file changed in place yields `-1 +1` (no `,1`).
3104        let out = render_plain(Some(b"old\n"), Some(b"new\n"));
3105        assert_eq!(out, b"@@ -1 +1 @@\n-old\n+new\n".to_vec());
3106    }
3107
3108    #[test]
3109    fn no_newline_marker_on_old_side() {
3110        let out = render_plain(Some(b"only line no newline"), None);
3111        assert_eq!(
3112            out,
3113            b"@@ -1 +0,0 @@\n-only line no newline\n\\ No newline at end of file\n".to_vec(),
3114        );
3115    }
3116
3117    #[test]
3118    fn no_newline_marker_on_new_side() {
3119        let out = render_plain(Some(b"beta\n"), Some(b"beta-notail"));
3120        assert_eq!(
3121            out,
3122            b"@@ -1 +1 @@\n-beta\n+beta-notail\n\\ No newline at end of file\n".to_vec(),
3123        );
3124    }
3125
3126    #[test]
3127    fn pure_insertion_into_empty() {
3128        let out = render_plain(None, Some(b"x\ny\n"));
3129        assert_eq!(out, b"@@ -0,0 +1,2 @@\n+x\n+y\n".to_vec());
3130    }
3131
3132    #[test]
3133    fn distant_changes_split_into_two_hunks() {
3134        let old: &[u8] = b"a\nb\nc\nd\ne\nf\ng\nh\ni\nj\n";
3135        let new: &[u8] = b"A\nb\nc\nd\ne\nf\ng\nh\ni\nJ\n";
3136        let out = render_plain(Some(old), Some(new));
3137        // Two changes 9 lines apart (> 2*3+1) produce two separate hunks.
3138        let text = String::from_utf8(out).expect("rendered output is valid UTF-8");
3139        assert_eq!(text.matches("@@ ").count(), 2, "expected two hunks: {text}");
3140    }
3141
3142    #[test]
3143    fn heading_callback_supplies_section() {
3144        // The change is far enough below `fn foo()` that the funcname line
3145        // precedes the hunk (the heading scan looks *above* the hunk's first
3146        // line, so a change touching line 1 would correctly find no heading).
3147        let old: &[u8] = b"fn foo() {\n    a\n    b\n    c\n    d\n    e\n    f\n    g\n}\n";
3148        let new: &[u8] = b"fn foo() {\n    a\n    b\n    c\n    d\n    CHANGED\n    f\n    g\n}\n";
3149        let mut out = Vec::new();
3150        // Classifier accepts any line whose first byte is an ASCII letter
3151        // (a crude def_ff stand-in for the test).
3152        let mut heading_fn = |line: &[u8]| -> Option<Vec<u8>> {
3153            if line.first().is_some_and(u8::is_ascii_alphabetic) {
3154                Some(line.strip_suffix(b"\n").unwrap_or(line).to_vec())
3155            } else {
3156                None
3157            }
3158        };
3159        let mut options = HunkRenderOptions {
3160            heading: Some(&mut heading_fn),
3161            ..Default::default()
3162        };
3163        render_hunks(&mut out, Some(old), Some(new), &mut options);
3164        let text = String::from_utf8(out).expect("rendered output is valid UTF-8");
3165        assert!(
3166            text.starts_with("@@ -3,7 +3,7 @@ fn foo() {\n"),
3167            "expected funcname heading: {text}",
3168        );
3169    }
3170
3171    fn render_cc(result: &[u8], parents: &[&[u8]], dense: bool) -> String {
3172        let mut out = Vec::new();
3173        let opts = CombinedRenderOptions {
3174            dense,
3175            ..Default::default()
3176        };
3177        render_combined_with(&mut out, result, parents, &opts);
3178        String::from_utf8(out).expect("combined output is valid UTF-8")
3179    }
3180
3181    #[test]
3182    fn combined_two_parent_dense_header_and_columns() {
3183        // A merge result that adds lines on top of two parents, the t4013
3184        // dir/sub shape: parent0 = "A\nB\nC\nD\nE\nF\n", parent1 = "A\nB\n1\n2\n",
3185        // result = "A\nB\nC\nD\nE\nF\n1\n2\n". git emits one combined hunk with
3186        // the `@@@ -1,6 -1,4 +1,8 @@@` header and two prefix columns.
3187        let p0 = b"A\nB\nC\nD\nE\nF\n";
3188        let p1 = b"A\nB\n1\n2\n";
3189        let result = b"A\nB\nC\nD\nE\nF\n1\n2\n";
3190        let text = render_cc(result, &[p0, p1], true);
3191        assert_eq!(
3192            text, "@@@ -1,6 -1,4 +1,8 @@@\n  A\n  B\n +C\n +D\n +E\n +F\n+ 1\n+ 2\n",
3193            "combined dense output:\n{text}",
3194        );
3195    }
3196
3197    #[test]
3198    fn combined_identical_to_one_parent_dense_drops_hunk() {
3199        // When the result is identical to one parent, the dense (`--cc`) filter
3200        // drops the hunk (the change is "interesting" only against the other
3201        // parent), so nothing is emitted; the non-dense (`-c`) form still shows
3202        // every parent.
3203        let p0 = b"x\ny\n";
3204        let p1 = b"x\nCHANGED\n";
3205        let result = b"x\ny\n"; // identical to p0
3206        assert_eq!(render_cc(result, &[p0, p1], true), "");
3207        // Non-dense still shows the hunk (differs from p1).
3208        assert!(render_cc(result, &[p0, p1], false).starts_with("@@@"));
3209    }
3210
3211    #[test]
3212    fn combined_reuse_identical_parents() {
3213        // Two parents with the identical blob must produce identical columns
3214        // (git's reuse_combine_diff path); the result adds a line relative to
3215        // both, so both columns carry `+`.
3216        let parent = b"a\nb\n";
3217        let result = b"a\nb\nc\n";
3218        let text = render_cc(result, &[parent, parent], true);
3219        assert_eq!(
3220            text, "@@@ -1,2 -1,2 +1,3 @@@\n  a\n  b\n++c\n",
3221            "reuse output:\n{text}",
3222        );
3223    }
3224}
sley_diff_merge/render.rs

sley_diff_merge/
render.rs