Skip to main content

sley_diff_merge/
render.rs

1//! Unified-diff / patch RENDERER: turn a computed file diff (the old/new
2//! blob contents) into the textual unified-diff hunk body git's `diff.c`
3//! emit path produces (`emit_diff_symbol` / `fn_out_consume`).
4//!
5//! This is the byte-for-byte port of git's hunk emitter: `@@ -os,oc +ns,nc @@
6//! <heading>` hunk headers, the `+`/`-`/context lines, and the
7//! `\ No newline at end of file` marker. It owns hunk *grouping* (combining
8//! changes whose context windows overlap, `xdl_get_hunk`'s `distance >
9//! max_common` break) and hunk *range* computation, then emits each hunk.
10//!
11//! What this module deliberately does NOT own (those stay with the caller,
12//! which has the repository/userdiff/config context):
13//!
14//! * **The per-file metainfo header** (`diff --git`, `index`, `---`/`+++`,
15//!   mode/similarity lines). That is repository- and option-shaped; the
16//!   renderer only produces the hunk body that follows it.
17//! * **Funcname section-heading resolution.** The caller supplies a
18//!   [`HeadingFn`] closure that, given a candidate line, returns its section
19//!   heading (git's `def_ff` default heuristic or a userdiff `xfuncname`
20//!   pattern). The renderer does the *scan upward* for the nearest heading
21//!   line; the caller only classifies a single line.
22//! * **Word-diff body rendering.** When [`HunkRenderOptions::word_diff`] is
23//!   set, the renderer delegates each hunk's body to a [`HunkWordDiff`] hook,
24//!   which the caller implements over its own word-diff machinery.
25//!
26//! The seams keep the byte-shaping (ranges, headers, prefixes, no-newline
27//! markers, color spans) here — the part every diff-emitting command used to
28//! re-derive — while leaving the repository-coupled concerns in the consumer.
29
30use crate::{
31    DiffAlgorithm, DiffLine, DiffOp, WsIgnore, line_is_blank, myers_diff_lines_ws, split_lines,
32};
33use std::collections::HashMap;
34
35/// git's default hunk context (`-U3`).
36pub const DEFAULT_CONTEXT: usize = 3;
37const FUNCTION_CONTEXT_FLAG: usize = 1usize << (usize::BITS - 1);
38const CONTEXT_VALUE_MASK: usize = !FUNCTION_CONTEXT_FLAG;
39
40/// Encode `-W` / `--function-context` into the context field without changing
41/// the option shape used by the existing renderer call sites.
42pub fn enable_function_context(context: usize) -> usize {
43    (context & CONTEXT_VALUE_MASK) | FUNCTION_CONTEXT_FLAG
44}
45
46fn decode_context(context: usize) -> (usize, bool) {
47    (
48        context & CONTEXT_VALUE_MASK,
49        context & FUNCTION_CONTEXT_FLAG != 0,
50    )
51}
52
53fn replace_context_value(encoded: usize, context: usize) -> usize {
54    (encoded & !CONTEXT_VALUE_MASK) | (context & CONTEXT_VALUE_MASK)
55}
56
57/// The per-line origin marker for an emitted diff line.
58#[derive(Clone, Copy, PartialEq, Eq, Debug)]
59pub enum LineKind {
60    /// An unchanged (` `) line, present on both sides.
61    Context,
62    /// A removed (`-`) line, present only on the old side.
63    Delete,
64    /// An added (`+`) line, present only on the new side.
65    Insert,
66}
67
68/// One line of the unified diff, with its origin and 0-based positions in the
69/// old/new files (used to compute hunk ranges and feed the word-diff hook).
70#[derive(Clone, Copy)]
71pub struct TaggedLine<'a> {
72    /// Whether the line is context / a deletion / an insertion.
73    pub kind: LineKind,
74    /// The raw line bytes, including the trailing `\n` when present.
75    pub content: &'a [u8],
76    /// 0-based index of this line on the old side.
77    pub old_index: usize,
78    /// 0-based index of this line on the new side.
79    pub new_index: usize,
80}
81
82/// ANSI color palette for a unified diff, mirroring git's `diff_get_color`
83/// slots. Each field is the raw escape sequence (empty string = no color).
84///
85/// The renderer only consults the slots it paints in the hunk body; the
86/// per-file metainfo slot (`meta`) lives with the caller's header emitter and
87/// is intentionally absent here.
88#[derive(Clone, Copy)]
89pub struct RenderColors<'a> {
90    /// `color.diff.frag` — the `@@ .. @@` span.
91    pub frag: &'a str,
92    /// `color.diff.func` — the section heading after the frag.
93    pub func: &'a str,
94    /// `color.diff.old` — removed (`-`) lines.
95    pub old: &'a str,
96    /// `color.diff.new` — added (`+`) lines.
97    pub new: &'a str,
98    /// `color.diff.context` — context (` `) lines and the no-newline marker.
99    pub context: &'a str,
100    /// The reset sequence terminating each colored span.
101    pub reset: &'a str,
102    /// `color.diff.whitespace` — the highlight for whitespace errors
103    /// (`--ws-error-highlight`).
104    pub whitespace: &'a str,
105    /// `color.diff.oldMoved` — removed lines detected as moved.
106    pub old_moved: &'a str,
107    /// `color.diff.oldMovedAlternative` — alternate removed moved block.
108    pub old_moved_alt: &'a str,
109    /// `color.diff.oldMovedDimmed` — uninteresting removed moved line.
110    pub old_moved_dim: &'a str,
111    /// `color.diff.oldMovedAlternativeDimmed` — alternate dimmed removed line.
112    pub old_moved_alt_dim: &'a str,
113    /// `color.diff.newMoved` — added lines detected as moved.
114    pub new_moved: &'a str,
115    /// `color.diff.newMovedAlternative` — alternate added moved block.
116    pub new_moved_alt: &'a str,
117    /// `color.diff.newMovedDimmed` — uninteresting added moved line.
118    pub new_moved_dim: &'a str,
119    /// `color.diff.newMovedAlternativeDimmed` — alternate dimmed added line.
120    pub new_moved_alt_dim: &'a str,
121}
122
123/// `--color-moved=<mode>` hunk-body mode.
124#[derive(Clone, Copy, Debug, PartialEq, Eq)]
125pub enum ColorMovedMode {
126    /// Mark every matching `+`/`-` line.
127    Plain,
128    /// Mark moved blocks, without alternating colors.
129    Blocks,
130    /// Mark moved blocks, alternating adjacent blocks.
131    Zebra,
132    /// Like zebra, but dim interior lines.
133    DimmedZebra,
134}
135
136/// `--color-moved-ws=<mode>` comparison mode.
137#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
138pub struct ColorMovedWs {
139    /// Whitespace-ignore flags used when interning moved lines.
140    pub ignore: WsIgnore,
141    /// Allow a constant indentation delta across a moved block.
142    pub allow_indentation_change: bool,
143}
144
145/// Moved-code coloring configuration.
146#[derive(Clone, Copy, Debug, PartialEq, Eq)]
147pub struct ColorMoved {
148    /// Which moved-code coloring style to use.
149    pub mode: ColorMovedMode,
150    /// Whitespace handling for moved-code matching.
151    pub ws: ColorMovedWs,
152}
153
154/// Resolve the section heading for one candidate line.
155///
156/// Returns `Some(heading)` when `line` is a heading line (git's `def_ff`
157/// default heuristic or a userdiff `xfuncname` match) and `None` otherwise.
158/// The renderer scans upward from each hunk's first line and uses the first
159/// `Some` it finds — the caller only has to classify a single line, so it can
160/// keep its userdiff-driver / config resolution out of this crate.
161pub type HeadingFn<'a> = dyn FnMut(&[u8]) -> Option<Vec<u8>> + 'a;
162
163/// A hook that renders a single hunk's body when `--word-diff` is active.
164///
165/// The renderer feeds the hunk's tagged lines through this in order
166/// (`fn_out_consume`'s `diff_words` branch): each removed line is pushed to
167/// the minus buffer, each added line to the plus buffer, and a context line
168/// flushes the accumulated word diff before emitting the context line itself.
169/// The implementor owns the actual word-level rendering and color spans; this
170/// keeps the word-diff machinery in the consumer.
171pub trait HunkWordDiff {
172    /// Buffer one removed line's content for the next word-diff flush.
173    fn push_minus(&mut self, content: &[u8]);
174    /// Buffer one added line's content for the next word-diff flush.
175    fn push_plus(&mut self, content: &[u8]);
176    /// Word-diff the accumulated minus/plus buffers into `out` and reset them.
177    fn flush(&mut self, out: &mut Vec<u8>);
178    /// Emit one context line (the `--word-diff` context style).
179    fn emit_context_line(&mut self, out: &mut Vec<u8>, content: &[u8]);
180}
181
182/// Hunk-shaping and styling options for [`render_hunks`].
183///
184/// Lifetimes are split so the funcname / word-diff hooks can be borrowed
185/// mutably while `colors` is borrowed shared.
186pub struct HunkRenderOptions<'a, 'h> {
187    /// Lines of context around each change (`-U<n>`, default
188    /// [`DEFAULT_CONTEXT`]).
189    pub context: usize,
190    /// Extra inter-hunk merging distance (`--inter-hunk-context`).
191    pub interhunk: usize,
192    /// Per-line section-heading classifier; `None` emits headerless hunks.
193    pub heading: Option<&'a mut HeadingFn<'h>>,
194    /// ANSI palette when color output is enabled.
195    pub colors: Option<RenderColors<'a>>,
196    /// Word-diff body hook (replaces the `+`/`-` line bodies of each hunk).
197    pub word_diff: Option<&'a mut dyn HunkWordDiff>,
198    /// `--ws-error-highlight` configuration: when set and colors are on, the
199    /// renderer paints whitespace errors on the selected line kinds with
200    /// `colors.whitespace` (git's `emit_line_ws_markup`). `None` disables it.
201    pub ws_error: Option<WsErrorHighlight>,
202    /// Whitespace-ignore flags (`-w`, `-b`, `--ignore-space-at-eol`,
203    /// `--ignore-cr-at-eol`): applied to the line-level comparison so
204    /// whitespace-only changes do not appear as diffs (git's
205    /// `XDF_WHITESPACE_FLAGS`).
206    pub ws_ignore: WsIgnore,
207    /// The line-diff algorithm to use (Myers / patience / histogram).
208    pub algorithm: DiffAlgorithm,
209    /// Indent heuristic (`--indent-heuristic` / `diff.indentHeuristic`): when
210    /// set, change groups that can slide within surrounding identical lines are
211    /// shifted to the most readable boundary (git's `XDF_INDENT_HEURISTIC`
212    /// scoring in `xdl_change_compact`). The base change compaction — sliding
213    /// groups as far down as possible and aligning add/delete pairs — always
214    /// runs; this flag only enables the indent-based slider scoring. Defaults to
215    /// `true` to match git's `diff.indentHeuristic` default.
216    pub indent_heuristic: bool,
217    /// Change-group suppression (`--ignore-blank-lines`, `-I<regex>`): when
218    /// set, change groups all of whose old and new lines are blank (and/or
219    /// match a `-I` regex) are dropped from hunk emission, mirroring git's
220    /// `xdl_mark_ignorable_lines` / `xdl_mark_ignorable_regex` + `xdl_get_hunk`.
221    pub change_ignore: Option<&'a ChangeIgnore<'a>>,
222    /// `log -L`: restrict the emitted hunks to the new-side (post-image) line
223    /// ranges. Each range is 0-based, `[start, end)`. When set, the renderer
224    /// inflates context to the widest range span (so every change inside a
225    /// range merges into one xdiff hunk), then clips the emitted lines back to
226    /// the range boundaries — a port of diff.c's `line_range_*` callbacks.
227    /// Ranges must be sorted and disjoint. `None` disables the filter (every
228    /// non-line-log caller).
229    pub line_ranges: Option<&'a [LineRange]>,
230    /// `--color-moved`: classify moved `+`/`-` lines and paint them with the
231    /// moved-color slots. `None` disables moved-code coloring.
232    pub color_moved: Option<ColorMoved>,
233}
234
235/// A half-open `[start, end)` line range (0-based) for `log -L` hunk
236/// restriction. Mirrors diff.c's `struct range`.
237#[derive(Clone, Copy, Debug, PartialEq, Eq)]
238pub struct LineRange {
239    /// 0-based inclusive start line (post-image).
240    pub start: i64,
241    /// 0-based exclusive end line (post-image).
242    pub end: i64,
243}
244
245/// Configuration for change-group suppression (`--ignore-blank-lines` and
246/// `-I<regex>`). A change group is *ignorable* iff every old line and every new
247/// line it touches is blank (when `ignore_blank_lines`) or matches one of the
248/// `-I` regexes (`regex_match`). Ignorable groups are kept out of hunk emission
249/// per `xdl_get_hunk`'s leading/isolated-ignorable removal.
250pub type ChangeIgnoreRegex<'a> = &'a dyn Fn(&[u8]) -> bool;
251
252pub struct ChangeIgnore<'a> {
253    /// `--ignore-blank-lines`: blank change groups are ignorable.
254    pub ignore_blank_lines: bool,
255    /// `-I<regex>`: a line is regex-ignorable when this returns `true`. The
256    /// closure receives the raw line bytes (including the trailing `\n`). When
257    /// `None`, no regex suppression applies.
258    pub regex_match: Option<ChangeIgnoreRegex<'a>>,
259}
260
261/// Which line kinds get whitespace-error highlighting, plus the rule to check
262/// against. git's `--ws-error-highlight` defaults to highlighting only new
263/// (`+`) lines.
264#[derive(Clone, Copy)]
265pub struct WsErrorHighlight {
266    /// The resolved whitespace rule to check each line against.
267    pub rule: crate::ws::WsRule,
268    /// Highlight errors on removed (`-`) lines.
269    pub old: bool,
270    /// Highlight errors on added (`+`) lines.
271    pub new: bool,
272    /// Highlight errors on context (` `) lines.
273    pub context: bool,
274}
275
276impl Default for HunkRenderOptions<'_, '_> {
277    fn default() -> Self {
278        Self {
279            context: DEFAULT_CONTEXT,
280            interhunk: 0,
281            heading: None,
282            colors: None,
283            word_diff: None,
284            ws_error: None,
285            ws_ignore: WsIgnore::default(),
286            algorithm: DiffAlgorithm::Myers,
287            indent_heuristic: true,
288            change_ignore: None,
289            line_ranges: None,
290            color_moved: None,
291        }
292    }
293}
294
295/// Render the unified-diff hunk body for a single file change into `out`.
296///
297/// `old_content` / `new_content` are the full blob contents (`None` for an
298/// absent side — a created or deleted file). The function computes the
299/// line-level Myers diff, groups changes into hunks with `options.context`
300/// lines of surrounding context (merging nearby groups per
301/// `options.interhunk`), and emits each hunk: the `@@` header (with git's
302/// section heading), then the context / `-` / `+` lines including
303/// `\ No newline at end of file` markers.
304///
305/// Nothing is written when the contents are identical (no changed lines).
306/// This is the body *after* the per-file metainfo header the caller emits.
307pub fn render_hunks(
308    out: &mut Vec<u8>,
309    old_content: Option<&[u8]>,
310    new_content: Option<&[u8]>,
311    options: &mut HunkRenderOptions<'_, '_>,
312) {
313    let (context, function_context) = decode_context(options.context);
314    // `log -L` hunk restriction: render with inflated context into a scratch
315    // buffer, then clip the emitted lines to the tracked ranges (diff.c's
316    // `line_range_*` callbacks). The widest range span is the upper bound on
317    // the context needed for every change in a range to land in one hunk.
318    if let Some(ranges) = options.line_ranges {
319        let max_span = ranges
320            .iter()
321            .map(|r| r.end - r.start)
322            .max()
323            .unwrap_or(0)
324            .max(0) as usize;
325        let saved_context = options.context;
326        options.context = replace_context_value(saved_context, context.max(max_span));
327        options.line_ranges = None;
328        let mut full = Vec::new();
329        render_hunks(&mut full, old_content, new_content, options);
330        options.context = saved_context;
331        options.line_ranges = Some(ranges);
332        filter_hunks_to_ranges(out, &full, ranges);
333        return;
334    }
335    let old = split_lines(old_content.unwrap_or_default());
336    let new = split_lines(new_content.unwrap_or_default());
337    let mut ops = myers_diff_lines_ws(&old, &new, options.ws_ignore, options.algorithm);
338
339    // git's `xdl_change_compact`: slide each change group as far down as
340    // possible, snap add/delete pairs back into alignment, and (under the
341    // indent heuristic) shift to the most readable split. Runs on the raw edit
342    // script before it is flattened into tagged lines.
343    change_compact(
344        &mut ops,
345        &old,
346        &new,
347        options.ws_ignore,
348        options.indent_heuristic,
349    );
350
351    // Flatten the edit script into a tagged line stream carrying old/new
352    // positions.
353    let mut tagged: Vec<TaggedLine<'_>> = Vec::new();
354    let mut old_idx = 0usize;
355    let mut new_idx = 0usize;
356    for op in ops {
357        match op {
358            DiffOp::Equal(n) => {
359                for _ in 0..n {
360                    // When records match only under whitespace-ignore flags,
361                    // git emits the postimage bytes as context.
362                    tagged.push(TaggedLine {
363                        kind: LineKind::Context,
364                        content: new[new_idx].content,
365                        old_index: old_idx,
366                        new_index: new_idx,
367                    });
368                    old_idx += 1;
369                    new_idx += 1;
370                }
371            }
372            DiffOp::Delete(n) => {
373                for _ in 0..n {
374                    tagged.push(TaggedLine {
375                        kind: LineKind::Delete,
376                        content: old[old_idx].content,
377                        old_index: old_idx,
378                        new_index: new_idx,
379                    });
380                    old_idx += 1;
381                }
382            }
383            DiffOp::Insert(n) => {
384                for _ in 0..n {
385                    tagged.push(TaggedLine {
386                        kind: LineKind::Insert,
387                        content: new[new_idx].content,
388                        old_index: old_idx,
389                        new_index: new_idx,
390                    });
391                    new_idx += 1;
392                }
393            }
394        }
395    }
396
397    // Build the change list (git's xdchange script): each maximal run of
398    // consecutive `-`/`+` tagged lines is one change, carrying its old/new line
399    // ranges and the tagged-stream span it occupies.
400    let changes = build_changes(&tagged);
401    if changes.is_empty() {
402        return;
403    }
404
405    // Mark each change ignorable when `--ignore-blank-lines` / `-I<regex>`
406    // applies and every old and new line it touches is blank / regex-matched
407    // (git's xdl_mark_ignorable_lines + xdl_mark_ignorable_regex).
408    let mut changes = changes;
409    if let Some(ci) = options.change_ignore {
410        mark_ignorable_changes(&mut changes, &old, &new, options.ws_ignore, ci);
411    }
412
413    // Group changes into hunks (xdl_get_hunk): the `distance > max_common`
414    // break plus leading/isolated-ignorable-change removal. Each hunk is a
415    // tagged-stream `(first_change_pos, last_change_pos)` span of *real*
416    // (emitted) changes.
417    let mut groups = group_changes_into_hunks(&changes, context, options.interhunk);
418    if function_context {
419        groups = expand_hunks_to_function_context(
420            &groups,
421            &tagged,
422            &old,
423            &new,
424            options.heading.as_deref_mut(),
425        );
426    }
427
428    let moved_styles = options
429        .color_moved
430        .filter(|_| options.colors.is_some() && options.word_diff.is_none())
431        .map(|color_moved| mark_color_as_moved(&tagged, color_moved));
432
433    for (first_change, last_change) in groups {
434        let (hunk_start, hunk_end) = if function_context {
435            (first_change, (last_change + 1).min(tagged.len()))
436        } else {
437            (
438                first_change.saturating_sub(context),
439                (last_change + context + 1).min(tagged.len()),
440            )
441        };
442        render_one_hunk(
443            out,
444            &tagged,
445            moved_styles.as_deref(),
446            &old,
447            hunk_start,
448            hunk_end,
449            options,
450        );
451    }
452}
453
454// ===========================================================================
455// Change compaction: a faithful port of git's `xdl_change_compact`
456// (xdiff/xdiffi.c), including the `XDF_INDENT_HEURISTIC` slider scoring.
457//
458// git represents a diff as two per-file boolean "changed" arrays (`xdf1.rchg`
459// for the old file, `xdf2.rchg` for the new). A *group* is a maximal run of
460// changed lines (a deletion run in the old file, an insertion run in the new
461// file), separated by runs of unchanged lines. `xdl_change_compact` walks the
462// groups of one file while keeping a synchronized cursor over the groups of the
463// other, sliding each group up/down within identical surrounding lines to a
464// canonical (and, under the indent heuristic, more readable) position.
465//
466// We reconstruct the two `changed[]` arrays from the [`DiffOp`] script, run the
467// algorithm on each file (old then new, exactly as git does), and rebuild the
468// script. Line equality for sliding (`recs_match`) uses the same
469// whitespace-canonicalized bytes the line-level diff used, so a group only
470// slides across lines the diff itself considered identical.
471// ===========================================================================
472
473/// If a line is indented more than this, [`get_indent`] returns this value
474/// (git's `MAX_INDENT`).
475const MAX_INDENT: i32 = 200;
476/// Cap on consecutive blank lines counted around a split (git's `MAX_BLANKS`).
477const MAX_BLANKS: i32 = 20;
478
479// Empirically-determined weight factors from git's xdiffi.c.
480const START_OF_FILE_PENALTY: i32 = 1;
481const END_OF_FILE_PENALTY: i32 = 21;
482const TOTAL_BLANK_WEIGHT: i32 = -30;
483const POST_BLANK_WEIGHT: i32 = 6;
484const RELATIVE_INDENT_PENALTY: i32 = -4;
485const RELATIVE_INDENT_WITH_BLANK_PENALTY: i32 = 10;
486const RELATIVE_OUTDENT_PENALTY: i32 = 24;
487const RELATIVE_OUTDENT_WITH_BLANK_PENALTY: i32 = 17;
488const RELATIVE_DEDENT_PENALTY: i32 = 23;
489const RELATIVE_DEDENT_WITH_BLANK_PENALTY: i32 = 17;
490const INDENT_WEIGHT: i32 = 60;
491const INDENT_HEURISTIC_MAX_SLIDING: i64 = 100;
492
493/// One file's record set for compaction: the whitespace-canonicalized line
494/// bytes (for `recs_match` and `get_indent`) and the per-line `changed` flags.
495/// `nrec` is the number of records; index `-1` and `nrec` are treated as
496/// "unchanged" sentinels, matching git's zero-padded `rchg`.
497struct CompactFile {
498    recs: Vec<Vec<u8>>,
499    changed: Vec<bool>,
500}
501
502impl CompactFile {
503    fn nrec(&self) -> i64 {
504        self.recs.len() as i64
505    }
506
507    /// `xdf->changed[i]` with git's out-of-range sentinels: positions `-1` and
508    /// `nrec` are unchanged (`false`).
509    fn changed(&self, i: i64) -> bool {
510        if i < 0 || i >= self.nrec() {
511            false
512        } else {
513            self.changed[i as usize]
514        }
515    }
516
517    fn set_changed(&mut self, i: i64, v: bool) {
518        self.changed[i as usize] = v;
519    }
520}
521
522/// git's `get_indent`: indentation columns of `rec` treating TAB as advancing
523/// to the next multiple of 8; `-1` for a blank (whitespace-only / empty) line;
524/// clamped at [`MAX_INDENT`].
525fn get_indent(rec: &[u8]) -> i32 {
526    let mut ret: i32 = 0;
527    for &c in rec {
528        if !xdl_isspace(c) {
529            return ret;
530        } else if c == b' ' {
531            ret += 1;
532        } else if c == b'\t' {
533            ret += 8 - ret % 8;
534        }
535        // other whitespace (e.g. CR) is ignored, matching git.
536        if ret >= MAX_INDENT {
537            return MAX_INDENT;
538        }
539    }
540    // The line contains only whitespace.
541    -1
542}
543
544/// git's `XDL_ISSPACE`: space, tab, newline, vertical tab, form feed, carriage
545/// return.
546fn xdl_isspace(c: u8) -> bool {
547    matches!(c, b' ' | b'\t' | b'\n' | 0x0b | 0x0c | b'\r')
548}
549
550/// git's `struct split_measurement`.
551#[derive(Default)]
552struct SplitMeasurement {
553    end_of_file: bool,
554    indent: i32,
555    pre_blank: i32,
556    pre_indent: i32,
557    post_blank: i32,
558    post_indent: i32,
559}
560
561/// git's `struct split_score`.
562#[derive(Default, Clone, Copy)]
563struct SplitScore {
564    effective_indent: i32,
565    penalty: i32,
566}
567
568/// git's `measure_split`: characteristics of a hypothetical split above line
569/// `split` in `xdf`.
570fn measure_split(xdf: &CompactFile, split: i64) -> SplitMeasurement {
571    let mut m = SplitMeasurement::default();
572    if split >= xdf.nrec() {
573        m.end_of_file = true;
574        m.indent = -1;
575    } else {
576        m.end_of_file = false;
577        m.indent = get_indent(&xdf.recs[split as usize]);
578    }
579
580    m.pre_blank = 0;
581    m.pre_indent = -1;
582    let mut i = split - 1;
583    while i >= 0 {
584        m.pre_indent = get_indent(&xdf.recs[i as usize]);
585        if m.pre_indent != -1 {
586            break;
587        }
588        m.pre_blank += 1;
589        if m.pre_blank == MAX_BLANKS {
590            m.pre_indent = 0;
591            break;
592        }
593        i -= 1;
594    }
595
596    m.post_blank = 0;
597    m.post_indent = -1;
598    let mut i = split + 1;
599    while i < xdf.nrec() {
600        m.post_indent = get_indent(&xdf.recs[i as usize]);
601        if m.post_indent != -1 {
602            break;
603        }
604        m.post_blank += 1;
605        if m.post_blank == MAX_BLANKS {
606            m.post_indent = 0;
607            break;
608        }
609        i += 1;
610    }
611
612    m
613}
614
615/// git's `score_add_split`: accumulate the badness of split `m` into `s`.
616fn score_add_split(m: &SplitMeasurement, s: &mut SplitScore) {
617    if m.pre_indent == -1 && m.pre_blank == 0 {
618        s.penalty += START_OF_FILE_PENALTY;
619    }
620    if m.end_of_file {
621        s.penalty += END_OF_FILE_PENALTY;
622    }
623
624    let post_blank = if m.indent == -1 { 1 + m.post_blank } else { 0 };
625    let total_blank = m.pre_blank + post_blank;
626
627    s.penalty += TOTAL_BLANK_WEIGHT * total_blank;
628    s.penalty += POST_BLANK_WEIGHT * post_blank;
629
630    let indent = if m.indent != -1 {
631        m.indent
632    } else {
633        m.post_indent
634    };
635    let any_blanks = total_blank != 0;
636
637    s.effective_indent += indent;
638
639    if indent == -1 || m.pre_indent == -1 {
640        // End of file, or no non-blank predecessor: no adjustment needed
641        // (git's two separate `indent == -1` / `pre_indent == -1` no-op arms).
642    } else if indent > m.pre_indent {
643        s.penalty += if any_blanks {
644            RELATIVE_INDENT_WITH_BLANK_PENALTY
645        } else {
646            RELATIVE_INDENT_PENALTY
647        };
648    } else if indent == m.pre_indent {
649        // Same indentation as predecessor; no adjustment.
650    } else if m.post_indent != -1 && m.post_indent > indent {
651        s.penalty += if any_blanks {
652            RELATIVE_OUTDENT_WITH_BLANK_PENALTY
653        } else {
654            RELATIVE_OUTDENT_PENALTY
655        };
656    } else {
657        s.penalty += if any_blanks {
658            RELATIVE_DEDENT_WITH_BLANK_PENALTY
659        } else {
660            RELATIVE_DEDENT_PENALTY
661        };
662    }
663}
664
665/// git's `score_cmp`: `<0` when `s1` is the better (lower-badness) split.
666fn score_cmp(s1: &SplitScore, s2: &SplitScore) -> i32 {
667    let cmp_indents = (s1.effective_indent > s2.effective_indent) as i32
668        - (s1.effective_indent < s2.effective_indent) as i32;
669    INDENT_WEIGHT * cmp_indents + (s1.penalty - s2.penalty)
670}
671
672/// git's `struct xdlgroup`: a (possibly empty) group spanning `[start, end)` of
673/// changed lines.
674struct XdlGroup {
675    start: i64,
676    end: i64,
677}
678
679/// git's `recs_match`: the two records hash-equal (here: canonicalized bytes
680/// equal).
681fn recs_match(xdf: &CompactFile, a: i64, b: i64) -> bool {
682    xdf.recs[a as usize] == xdf.recs[b as usize]
683}
684
685/// git's `group_init`: point `g` at the first group in `xdf`.
686fn group_init(xdf: &CompactFile) -> XdlGroup {
687    let mut end = 0i64;
688    while xdf.changed(end) {
689        end += 1;
690    }
691    XdlGroup { start: 0, end }
692}
693
694/// git's `group_next`: advance to the next group; `false` if already at EOF.
695fn group_next(xdf: &CompactFile, g: &mut XdlGroup) -> bool {
696    if g.end == xdf.nrec() {
697        return false;
698    }
699    g.start = g.end + 1;
700    g.end = g.start;
701    while xdf.changed(g.end) {
702        g.end += 1;
703    }
704    true
705}
706
707/// git's `group_previous`: step back to the previous group; `false` if at BOF.
708fn group_previous(xdf: &CompactFile, g: &mut XdlGroup) -> bool {
709    if g.start == 0 {
710        return false;
711    }
712    g.end = g.start - 1;
713    g.start = g.end;
714    while xdf.changed(g.start - 1) {
715        g.start -= 1;
716    }
717    true
718}
719
720/// git's `group_slide_down`: slide `g` toward EOF if the line below equals the
721/// group's first line, absorbing any group it bumps into. `false` if it cannot
722/// slide.
723fn group_slide_down(xdf: &mut CompactFile, g: &mut XdlGroup) -> bool {
724    if g.end < xdf.nrec() && recs_match(xdf, g.start, g.end) {
725        xdf.set_changed(g.start, false);
726        xdf.set_changed(g.end, true);
727        g.start += 1;
728        g.end += 1;
729        while xdf.changed(g.end) {
730            g.end += 1;
731        }
732        true
733    } else {
734        false
735    }
736}
737
738/// git's `group_slide_up`: slide `g` toward BOF if the line above equals the
739/// group's last line, absorbing any group it bumps into. `false` if it cannot
740/// slide.
741fn group_slide_up(xdf: &mut CompactFile, g: &mut XdlGroup) -> bool {
742    if g.start > 0 && recs_match(xdf, g.start - 1, g.end - 1) {
743        g.start -= 1;
744        g.end -= 1;
745        xdf.set_changed(g.start, true);
746        xdf.set_changed(g.end, false);
747        while xdf.changed(g.start - 1) {
748            g.start -= 1;
749        }
750        true
751    } else {
752        false
753    }
754}
755
756/// Compact the change groups of `xdf`, keeping `xdfo` (the other file) in sync.
757/// A faithful port of the per-file body of git's `xdl_change_compact`. The
758/// `xdfo` re-diff tail (only reachable for histogram diff) is omitted: this
759/// compaction never merges groups in a way that creates new matching lines for
760/// the Myers/patience/histogram scripts produced here, so the re-diff is a
761/// no-op for our outputs.
762fn compact_one(xdf: &mut CompactFile, xdfo: &mut CompactFile, indent_heuristic: bool) {
763    let mut g = group_init(xdf);
764    let mut go = group_init(xdfo);
765
766    loop {
767        // Skip empty groups in the to-be-compacted file.
768        if g.end == g.start {
769            if !group_next(xdf, &mut g) {
770                break;
771            }
772            if !group_next(xdfo, &mut go) {
773                break;
774            }
775            continue;
776        }
777
778        let mut groupsize;
779        let mut earliest_end;
780        let mut end_matching_other;
781
782        loop {
783            groupsize = g.end - g.start;
784            end_matching_other = -1i64;
785
786            // Shift the group backward as far as possible.
787            while group_slide_up(xdf, &mut g) {
788                let ok = group_previous(xdfo, &mut go);
789                debug_assert!(ok, "group sync broken sliding up");
790            }
791            // Highest this group can be shifted; record its end.
792            earliest_end = g.end;
793            if go.end > go.start {
794                end_matching_other = g.end;
795            }
796            // Now shift the group forward as far as possible.
797            loop {
798                if !group_slide_down(xdf, &mut g) {
799                    break;
800                }
801                let ok = group_next(xdfo, &mut go);
802                debug_assert!(ok, "group sync broken sliding down");
803                if go.end > go.start {
804                    end_matching_other = g.end;
805                }
806            }
807            if groupsize == g.end - g.start {
808                break;
809            }
810        }
811
812        // The group is now shifted as far down as possible; only upward shifts
813        // remain to consider.
814        if g.end == earliest_end {
815            // No shifting was possible.
816        } else if end_matching_other != -1 {
817            // Move the (possibly merged) group back to line up with the last
818            // group of changes from the other file it can align with. Avoids
819            // splitting one change into a separate add/delete.
820            while go.end == go.start {
821                let ok = group_slide_up(xdf, &mut g);
822                debug_assert!(ok, "match disappeared");
823                let ok = group_previous(xdfo, &mut go);
824                debug_assert!(ok, "group sync broken sliding to match");
825            }
826        } else if indent_heuristic {
827            // Pick the shift with the lowest indent-heuristic score.
828            let mut best_shift = -1i64;
829            let mut best_score = SplitScore::default();
830
831            let mut shift = earliest_end;
832            if g.end - groupsize - 1 > shift {
833                shift = g.end - groupsize - 1;
834            }
835            if g.end - INDENT_HEURISTIC_MAX_SLIDING > shift {
836                shift = g.end - INDENT_HEURISTIC_MAX_SLIDING;
837            }
838            while shift <= g.end {
839                let mut score = SplitScore::default();
840                let m = measure_split(xdf, shift);
841                score_add_split(&m, &mut score);
842                let m = measure_split(xdf, shift - groupsize);
843                score_add_split(&m, &mut score);
844                if best_shift == -1 || score_cmp(&score, &best_score) <= 0 {
845                    best_score = score;
846                    best_shift = shift;
847                }
848                shift += 1;
849            }
850
851            while g.end > best_shift {
852                let ok = group_slide_up(xdf, &mut g);
853                debug_assert!(ok, "best shift unreached");
854                let ok = group_previous(xdfo, &mut go);
855                debug_assert!(ok, "group sync broken sliding to blank line");
856            }
857        }
858
859        // Advance to the next group pair.
860        if !group_next(xdf, &mut g) {
861            break;
862        }
863        if !group_next(xdfo, &mut go) {
864            break;
865        }
866    }
867}
868
869/// Run git's `xdl_change_compact` over the [`DiffOp`] script in place.
870///
871/// Reconstructs the per-file `changed[]` flags from `ops`, compacts the old
872/// file then the new file (each synchronized against the other, as git does),
873/// and rebuilds `ops` from the recompacted flags.
874fn change_compact(
875    ops: &mut Vec<DiffOp>,
876    old: &[DiffLine<'_>],
877    new: &[DiffLine<'_>],
878    ws_ignore: WsIgnore,
879    indent_heuristic: bool,
880) {
881    // Fast path: no changes (or a single trivial run) cannot be slid.
882    if ops.iter().all(|op| matches!(op, DiffOp::Equal(_))) {
883        return;
884    }
885
886    // Canonicalized record bytes — the same equality the line-level diff used.
887    let canon = |lines: &[DiffLine<'_>]| -> Vec<Vec<u8>> {
888        if ws_ignore.is_empty() {
889            lines.iter().map(|l| l.content.to_vec()).collect()
890        } else {
891            lines
892                .iter()
893                .map(|l| crate::canonicalize_line_for_match(l.content, ws_ignore))
894                .collect()
895        }
896    };
897
898    let mut xdf1 = CompactFile {
899        recs: canon(old),
900        changed: vec![false; old.len()],
901    };
902    let mut xdf2 = CompactFile {
903        recs: canon(new),
904        changed: vec![false; new.len()],
905    };
906
907    // Reconstruct git's two `changed[]` arrays from the run-length script.
908    let mut oi = 0usize;
909    let mut ni = 0usize;
910    for op in ops.iter() {
911        match *op {
912            DiffOp::Equal(n) => {
913                oi += n;
914                ni += n;
915            }
916            DiffOp::Delete(n) => {
917                for _ in 0..n {
918                    xdf1.changed[oi] = true;
919                    oi += 1;
920                }
921            }
922            DiffOp::Insert(n) => {
923                for _ in 0..n {
924                    xdf2.changed[ni] = true;
925                    ni += 1;
926                }
927            }
928        }
929    }
930
931    // git compacts xdf1 (synced against xdf2) then xdf2 (synced against xdf1).
932    compact_one(&mut xdf1, &mut xdf2, indent_heuristic);
933    compact_one(&mut xdf2, &mut xdf1, indent_heuristic);
934
935    // Rebuild the coalesced op script by walking both files' changed flags in
936    // lockstep, emitting deletes for the old side and inserts for the new side.
937    let n_old = xdf1.changed.len();
938    let n_new = xdf2.changed.len();
939    let mut rebuilt: Vec<DiffOp> = Vec::with_capacity(ops.len());
940    let mut i = 0usize; // old index
941    let mut j = 0usize; // new index
942    while i < n_old || j < n_new {
943        let del = i < n_old && xdf1.changed[i];
944        let ins = j < n_new && xdf2.changed[j];
945        if del {
946            let mut run = 0usize;
947            while i < n_old && xdf1.changed[i] {
948                run += 1;
949                i += 1;
950            }
951            push_op(&mut rebuilt, DiffOp::Delete(run));
952        } else if ins {
953            let mut run = 0usize;
954            while j < n_new && xdf2.changed[j] {
955                run += 1;
956                j += 1;
957            }
958            push_op(&mut rebuilt, DiffOp::Insert(run));
959        } else {
960            // Both sides are unchanged here: an equal run.
961            let mut run = 0usize;
962            while i < n_old && j < n_new && !xdf1.changed[i] && !xdf2.changed[j] {
963                run += 1;
964                i += 1;
965                j += 1;
966            }
967            debug_assert!(run > 0, "change_compact stalled rebuilding script");
968            push_op(&mut rebuilt, DiffOp::Equal(run));
969        }
970    }
971
972    *ops = rebuilt;
973}
974
975/// Append `op` to `out`, coalescing with a same-kind run at the tail.
976fn push_op(out: &mut Vec<DiffOp>, op: DiffOp) {
977    match (out.last_mut(), op) {
978        (Some(DiffOp::Equal(prev)), DiffOp::Equal(n)) => *prev += n,
979        (Some(DiffOp::Delete(prev)), DiffOp::Delete(n)) => *prev += n,
980        (Some(DiffOp::Insert(prev)), DiffOp::Insert(n)) => *prev += n,
981        _ => out.push(op),
982    }
983}
984
985/// State for [`filter_hunks_to_ranges`], a port of diff.c's
986/// `struct line_range_callback`. We drive it over the already-rendered
987/// unified-diff lines (with inflated context) rather than xdiff's raw line
988/// callback, but the algorithm is the same: buffer pending removals, open a
989/// range hunk when an in-range post-image line is seen, and emit the clipped
990/// `@@` header + body for each range.
991struct RangeFilter<'r> {
992    ranges: &'r [LineRange],
993    cur_range: usize,
994    /// Post/pre-image 1-based line counters seeded from each `@@` header.
995    lno_post: i64,
996    lno_pre: i64,
997    /// Function-name heading carried from the current `@@` header (the suffix
998    /// after `@@ ... @@ `), reused verbatim on every emitted range hunk.
999    func: Vec<u8>,
1000    /// Range hunk being accumulated.
1001    rhunk: Vec<u8>,
1002    rhunk_old_begin: i64,
1003    rhunk_old_count: i64,
1004    rhunk_new_begin: i64,
1005    rhunk_new_count: i64,
1006    rhunk_active: bool,
1007    rhunk_has_changes: bool,
1008    /// Removal lines not yet known to be in-range.
1009    pending_rm: Vec<u8>,
1010    pending_rm_count: i64,
1011    pending_rm_pre_begin: i64,
1012}
1013
1014impl RangeFilter<'_> {
1015    fn discard_pending_rm(&mut self) {
1016        self.pending_rm.clear();
1017        self.pending_rm_count = 0;
1018    }
1019
1020    /// Port of diff.c:flush_rhunk — emit the accumulated range hunk (header +
1021    /// body) into `out`, dropping context-only hunks.
1022    fn flush_rhunk(&mut self, out: &mut Vec<u8>) {
1023        if !self.rhunk_active {
1024            return;
1025        }
1026        if self.pending_rm_count != 0 {
1027            self.rhunk.extend_from_slice(&self.pending_rm);
1028            self.rhunk_old_count += self.pending_rm_count;
1029            self.rhunk_has_changes = true;
1030            self.discard_pending_rm();
1031        }
1032        if !self.rhunk_has_changes {
1033            self.rhunk_active = false;
1034            self.rhunk.clear();
1035            return;
1036        }
1037        // git's flush_rhunk uses `@@ -%ld,%ld +%ld,%ld @@` unconditionally —
1038        // the count is ALWAYS shown (unlike the normal emitter, which omits a
1039        // count of 1).
1040        out.extend_from_slice(
1041            format!(
1042                "@@ -{},{} +{},{} @@",
1043                self.rhunk_old_begin,
1044                self.rhunk_old_count,
1045                self.rhunk_new_begin,
1046                self.rhunk_new_count
1047            )
1048            .as_bytes(),
1049        );
1050        if !self.func.is_empty() {
1051            out.push(b' ');
1052            out.extend_from_slice(&self.func);
1053        }
1054        out.push(b'\n');
1055        out.extend_from_slice(&self.rhunk);
1056        self.rhunk_active = false;
1057        self.rhunk.clear();
1058    }
1059
1060    /// Port of diff.c:line_range_line_fn for one rendered body line. `marker`
1061    /// is the first byte (`' '`/`'+'`/`'-'`/`'\\'`), `line` the full bytes.
1062    fn body_line(&mut self, out: &mut Vec<u8>, marker: u8, line: &[u8]) {
1063        if marker == b'-' {
1064            if self.pending_rm_count == 0 {
1065                self.pending_rm_pre_begin = self.lno_pre;
1066            }
1067            self.lno_pre += 1;
1068            self.pending_rm.extend_from_slice(line);
1069            self.pending_rm_count += 1;
1070            return;
1071        }
1072        if marker == b'\\' {
1073            if self.pending_rm_count != 0 {
1074                self.pending_rm.extend_from_slice(line);
1075            } else if self.rhunk_active {
1076                self.rhunk.extend_from_slice(line);
1077            }
1078            return;
1079        }
1080        // marker is '+' or ' '
1081        let lno_0 = self.lno_post - 1;
1082        let cur_pre = self.lno_pre;
1083        self.lno_post += 1;
1084        if marker == b' ' {
1085            self.lno_pre += 1;
1086        }
1087
1088        while self.cur_range < self.ranges.len() && lno_0 >= self.ranges[self.cur_range].end {
1089            if self.rhunk_active {
1090                self.flush_rhunk(out);
1091            }
1092            self.discard_pending_rm();
1093            self.cur_range += 1;
1094        }
1095        if self.cur_range >= self.ranges.len() {
1096            self.discard_pending_rm();
1097            return;
1098        }
1099        let cur = self.ranges[self.cur_range];
1100        if lno_0 < cur.start {
1101            self.discard_pending_rm();
1102            return;
1103        }
1104        if !self.rhunk_active {
1105            self.rhunk_active = true;
1106            self.rhunk_has_changes = false;
1107            self.rhunk_new_begin = lno_0 + 1;
1108            self.rhunk_old_begin = if self.pending_rm_count != 0 {
1109                self.pending_rm_pre_begin
1110            } else {
1111                cur_pre
1112            };
1113            self.rhunk_old_count = 0;
1114            self.rhunk_new_count = 0;
1115            self.rhunk.clear();
1116        }
1117        if self.pending_rm_count != 0 {
1118            self.rhunk.extend_from_slice(&self.pending_rm);
1119            self.rhunk_old_count += self.pending_rm_count;
1120            self.rhunk_has_changes = true;
1121            self.discard_pending_rm();
1122        }
1123        self.rhunk.extend_from_slice(line);
1124        self.rhunk_new_count += 1;
1125        if marker == b'+' {
1126            self.rhunk_has_changes = true;
1127        } else {
1128            self.rhunk_old_count += 1;
1129        }
1130    }
1131}
1132
1133/// Clip a fully-rendered unified-diff hunk body (`full`, produced with
1134/// inflated context) down to the tracked `ranges`, mirroring diff.c's
1135/// `line_range_hunk_fn` / `line_range_line_fn` / `flush_rhunk`. The renderer's
1136/// `@@` header already carries the funcname suffix; we parse it back out and
1137/// reuse it on every emitted range hunk. No-color path only (`log -L` test
1138/// output is uncolored).
1139fn filter_hunks_to_ranges(out: &mut Vec<u8>, full: &[u8], ranges: &[LineRange]) {
1140    if ranges.is_empty() {
1141        return;
1142    }
1143    let mut filter = RangeFilter {
1144        ranges,
1145        cur_range: 0,
1146        lno_post: 0,
1147        lno_pre: 0,
1148        func: Vec::new(),
1149        rhunk: Vec::new(),
1150        rhunk_old_begin: 0,
1151        rhunk_old_count: 0,
1152        rhunk_new_begin: 0,
1153        rhunk_new_count: 0,
1154        rhunk_active: false,
1155        rhunk_has_changes: false,
1156        pending_rm: Vec::new(),
1157        pending_rm_count: 0,
1158        pending_rm_pre_begin: 0,
1159    };
1160    for line in split_keep_newline(full) {
1161        if line.starts_with(b"@@ ") {
1162            // New xdiff hunk: any pending removals from the previous hunk are
1163            // left in place (diff.c does the same — the next body line decides
1164            // their fate), and the range hunk cursor is NOT reset across xdiff
1165            // hunks. Parse the begin line numbers + funcname suffix.
1166            if let Some((old_begin, new_begin, func)) = parse_hunk_header(line) {
1167                filter.lno_post = new_begin;
1168                filter.lno_pre = old_begin;
1169                filter.func = func;
1170            }
1171            continue;
1172        }
1173        let marker = line.first().copied().unwrap_or(b' ');
1174        filter.body_line(out, marker, line);
1175    }
1176    filter.flush_rhunk(out);
1177}
1178
1179/// Split `buf` into lines, each INCLUDING its trailing `\n` (the final line may
1180/// lack one). Empty input yields no lines.
1181fn split_keep_newline(buf: &[u8]) -> impl Iterator<Item = &[u8]> {
1182    let mut start = 0usize;
1183    std::iter::from_fn(move || {
1184        if start >= buf.len() {
1185            return None;
1186        }
1187        let rel = buf[start..].iter().position(|&b| b == b'\n');
1188        let end = match rel {
1189            Some(pos) => start + pos + 1,
1190            None => buf.len(),
1191        };
1192        let line = &buf[start..end];
1193        start = end;
1194        Some(line)
1195    })
1196}
1197
1198/// Parse a rendered `@@ -o[,c] +n[,c] @@[ func]` header, returning the 1-based
1199/// old/new begin line numbers and the trailing funcname bytes (without the
1200/// leading space, without a trailing newline). Begin is the value xdiff emits:
1201/// 1-based when the count is non-zero, one-less when zero (unused in that case).
1202fn parse_hunk_header(line: &[u8]) -> Option<(i64, i64, Vec<u8>)> {
1203    // line = "@@ -A,B +C,D @@ func\n" (or "@@ -A +C @@\n", etc.)
1204    let rest = line.strip_prefix(b"@@ -")?;
1205    let plus = rest.iter().position(|&b| b == b'+')?;
1206    let old_part = &rest[..plus];
1207    // skip "+", parse new part up to " @@"
1208    let after_plus = &rest[plus + 1..];
1209    let close = find_subslice(after_plus, b" @@")?;
1210    let new_part = &after_plus[..close];
1211    let old_begin = parse_range_begin(old_part.split(|&b| b == b' ').next().unwrap_or(old_part))?;
1212    let new_begin = parse_range_begin(new_part)?;
1213    // Funcname suffix: everything after " @@ " (a single space separates it).
1214    let tail = &after_plus[close + 3..];
1215    let func = if let Some(f) = tail.strip_prefix(b" ") {
1216        let mut f = f.to_vec();
1217        if f.last() == Some(&b'\n') {
1218            f.pop();
1219        }
1220        f
1221    } else {
1222        Vec::new()
1223    };
1224    Some((old_begin, new_begin, func))
1225}
1226
1227/// Parse the "A" or "A,B" begin field of an `@@` range side into A.
1228fn parse_range_begin(field: &[u8]) -> Option<i64> {
1229    let begin = field.split(|&b| b == b',').next().unwrap_or(field);
1230    std::str::from_utf8(begin).ok()?.trim().parse::<i64>().ok()
1231}
1232
1233fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
1234    if needle.is_empty() || haystack.len() < needle.len() {
1235        return None;
1236    }
1237    (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
1238}
1239
1240/// One contiguous change in the edit script (git's `xdchange`): the old/new
1241/// line ranges it covers, the tagged-stream positions of its first/last
1242/// non-context line, and whether it is ignorable (`--ignore-blank-lines` /
1243/// `-I<regex>`).
1244#[derive(Clone, Copy)]
1245struct Change {
1246    /// 0-based first old line in this change (`i1`).
1247    i1: usize,
1248    /// Number of old lines deleted (`chg1`).
1249    chg1: usize,
1250    /// 0-based first new line in this change (`i2`).
1251    i2: usize,
1252    /// Number of new lines inserted (`chg2`).
1253    chg2: usize,
1254    /// Tagged-stream index of this change's first non-context line.
1255    tag_first: usize,
1256    /// Tagged-stream index of this change's last non-context line.
1257    tag_last: usize,
1258    /// Whether this change is ignorable (blank-only / regex-only).
1259    ignore: bool,
1260}
1261
1262/// Build the change list (xdchange script) from the flattened tagged stream.
1263/// Each maximal run of consecutive `-`/`+` lines becomes one [`Change`].
1264fn build_changes(tagged: &[TaggedLine<'_>]) -> Vec<Change> {
1265    let mut changes: Vec<Change> = Vec::new();
1266    let mut idx = 0usize;
1267    while idx < tagged.len() {
1268        if tagged[idx].kind == LineKind::Context {
1269            idx += 1;
1270            continue;
1271        }
1272        let tag_first = idx;
1273        let i1 = tagged[idx].old_index;
1274        let i2 = tagged[idx].new_index;
1275        let mut chg1 = 0usize;
1276        let mut chg2 = 0usize;
1277        while idx < tagged.len() && tagged[idx].kind != LineKind::Context {
1278            match tagged[idx].kind {
1279                LineKind::Delete => chg1 += 1,
1280                LineKind::Insert => chg2 += 1,
1281                LineKind::Context => unreachable!(),
1282            }
1283            idx += 1;
1284        }
1285        changes.push(Change {
1286            i1,
1287            chg1,
1288            i2,
1289            chg2,
1290            tag_first,
1291            tag_last: idx - 1,
1292            ignore: false,
1293        });
1294    }
1295    changes
1296}
1297
1298/// Mark each change ignorable when its old and new lines are all blank
1299/// (`--ignore-blank-lines`) or all regex-matched (`-I<regex>`), mirroring
1300/// git's `xdl_mark_ignorable_lines` then `xdl_mark_ignorable_regex` (the regex
1301/// pass never overrides a blank-marked change).
1302fn mark_ignorable_changes(
1303    changes: &mut [Change],
1304    old: &[DiffLine<'_>],
1305    new: &[DiffLine<'_>],
1306    ws_ignore: WsIgnore,
1307    ci: &ChangeIgnore<'_>,
1308) {
1309    for change in changes.iter_mut() {
1310        if ci.ignore_blank_lines {
1311            let blank = (change.i1..change.i1 + change.chg1)
1312                .all(|i| line_is_blank(old[i].content, ws_ignore))
1313                && (change.i2..change.i2 + change.chg2)
1314                    .all(|i| line_is_blank(new[i].content, ws_ignore));
1315            change.ignore = blank;
1316        }
1317        if !change.ignore
1318            && let Some(regex_match) = ci.regex_match
1319        {
1320            let matched = (change.i1..change.i1 + change.chg1).all(|i| regex_match(old[i].content))
1321                && (change.i2..change.i2 + change.chg2).all(|i| regex_match(new[i].content));
1322            change.ignore = matched;
1323        }
1324    }
1325}
1326
1327/// Group the change list into hunks, returning each hunk's
1328/// `(first_real_change_tag, last_real_change_tag)` tagged-stream span. This is
1329/// a behavioural port of git's `xemit.c:xdl_get_hunk` driving
1330/// `xdl_call_hunk_func`'s loop: it applies the `distance > max_common` hunk
1331/// break, drops leading ignorable changes, and excludes trailing/isolated
1332/// ignorable changes from the emitted span.
1333fn group_changes_into_hunks(
1334    changes: &[Change],
1335    context: usize,
1336    interhunk: usize,
1337) -> Vec<(usize, usize)> {
1338    let max_common = context.saturating_add(context).saturating_add(interhunk);
1339    let max_ignorable = context;
1340
1341    let mut hunks: Vec<(usize, usize)> = Vec::new();
1342    // `start` is the index into `changes` of the first change still to emit
1343    // (xdl_call_hunk_func's `xch` cursor).
1344    let mut start = 0usize;
1345    while start < changes.len() {
1346        // Remove ignorable changes that are too far before other changes
1347        // (xdl_get_hunk's leading-ignorable loop). Faithful port: `xchp`
1348        // iterates over EVERY leading ignorable change; whenever the gap to the
1349        // next change is ≥ max_ignorable (or there is no next change), the hunk
1350        // start advances to that next change (`None` ⇒ the whole tail is
1351        // ignorable ⇒ no hunk). Unlike a break-on-first-keep loop, this still
1352        // advances past a run of ignorables when the FINAL one has no successor.
1353        {
1354            let mut xchp = start;
1355            while xchp < changes.len() && changes[xchp].ignore {
1356                let cur = &changes[xchp];
1357                match changes.get(xchp + 1) {
1358                    None => {
1359                        start = changes.len();
1360                    }
1361                    Some(next) => {
1362                        if next.i1 - (cur.i1 + cur.chg1) >= max_ignorable {
1363                            start = xchp + 1;
1364                        }
1365                    }
1366                }
1367                xchp += 1;
1368            }
1369        }
1370        if start >= changes.len() {
1371            break;
1372        }
1373
1374        // Walk forward extending the hunk; `last` tracks the last *real*
1375        // (non-ignorable, or the very first) change that defines the hunk end.
1376        let mut last = start;
1377        let mut ignored = 0usize; // ignored new-line count accumulated
1378        let mut prev = start;
1379        let mut idx = start + 1;
1380        while idx < changes.len() {
1381            let xch = &changes[idx];
1382            let xchp = &changes[prev];
1383            let distance = xch.i1 - (xchp.i1 + xchp.chg1);
1384            if distance > max_common {
1385                break;
1386            }
1387            if distance < max_ignorable && (!xch.ignore || last == prev) {
1388                last = idx;
1389                ignored = 0;
1390            } else if distance < max_ignorable && xch.ignore {
1391                ignored += xch.chg2;
1392            } else if last != prev
1393                && xch.i1 + ignored - (changes[last].i1 + changes[last].chg1) > max_common
1394            {
1395                break;
1396            } else if !xch.ignore {
1397                last = idx;
1398                ignored = 0;
1399            } else {
1400                ignored += xch.chg2;
1401            }
1402            prev = idx;
1403            idx += 1;
1404        }
1405
1406        let first_change = &changes[start];
1407        let last_change = &changes[last];
1408        hunks.push((first_change.tag_first, last_change.tag_last));
1409        start = last + 1;
1410    }
1411
1412    hunks
1413}
1414
1415fn expand_hunks_to_function_context(
1416    groups: &[(usize, usize)],
1417    tagged: &[TaggedLine<'_>],
1418    old: &[DiffLine<'_>],
1419    new: &[DiffLine<'_>],
1420    mut heading: Option<&mut HeadingFn<'_>>,
1421) -> Vec<(usize, usize)> {
1422    let Some(classifier) = heading.as_mut() else {
1423        return groups.to_vec();
1424    };
1425    let mut expanded = Vec::with_capacity(groups.len());
1426    for &(start, end) in groups {
1427        let first = tagged[start];
1428        let last = tagged[end];
1429        let old_changed = tagged[start..=end]
1430            .iter()
1431            .any(|line| line.kind == LineKind::Delete);
1432        let (side, range) = if old_changed {
1433            (
1434                FunctionSide::Old,
1435                function_context_range(old, first.old_index, false, classifier),
1436            )
1437        } else {
1438            (
1439                FunctionSide::New,
1440                function_context_range(new, first.new_index, true, classifier),
1441            )
1442        };
1443        let Some((range_start, range_end)) = range else {
1444            expanded.push((start, end));
1445            continue;
1446        };
1447        let mut hunk_start = expand_tag_start(tagged, start, side, range_start);
1448        let mut hunk_end = expand_tag_end(tagged, end, side, range_end);
1449        if old_changed {
1450            if last.old_index >= range_end {
1451                hunk_end = end;
1452            }
1453        } else if last.new_index >= range_end {
1454            hunk_end = end;
1455        }
1456        if hunk_start > start {
1457            hunk_start = start;
1458        }
1459        if hunk_end < end {
1460            hunk_end = end;
1461        }
1462        if let Some(prev) = expanded.last_mut()
1463            && hunk_start <= prev.1 + 1
1464        {
1465            prev.1 = prev.1.max(hunk_end);
1466            continue;
1467        }
1468        expanded.push((hunk_start, hunk_end));
1469    }
1470    expanded
1471}
1472
1473#[derive(Clone, Copy)]
1474enum FunctionSide {
1475    Old,
1476    New,
1477}
1478
1479fn function_context_range(
1480    lines: &[DiffLine<'_>],
1481    anchor: usize,
1482    prefer_forward: bool,
1483    heading: &mut HeadingFn<'_>,
1484) -> Option<(usize, usize)> {
1485    if lines.is_empty() {
1486        return None;
1487    }
1488    let anchor = anchor.min(lines.len() - 1);
1489    let mut heading_idx = None;
1490    for idx in (0..=anchor).rev() {
1491        if heading(lines[idx].content).is_some() {
1492            heading_idx = Some(idx);
1493            break;
1494        }
1495    }
1496    if heading_idx.is_none() && prefer_forward {
1497        for (idx, line) in lines.iter().enumerate().skip(anchor) {
1498            if heading(line.content).is_some() {
1499                heading_idx = Some(idx);
1500                break;
1501            }
1502        }
1503    }
1504
1505    let (mut start, mut end) = if let Some(idx) = heading_idx {
1506        let mut start = idx;
1507        while start > 0 && !line_is_blank(lines[start - 1].content, WsIgnore::default()) {
1508            start -= 1;
1509        }
1510        let mut end = lines.len();
1511        for (next, line) in lines.iter().enumerate().skip(idx + 1) {
1512            if heading(line.content).is_some() {
1513                end = next;
1514                break;
1515            }
1516        }
1517        (start, end)
1518    } else {
1519        (0, lines.len())
1520    };
1521
1522    while start < end && line_is_blank(lines[start].content, WsIgnore::default()) {
1523        start += 1;
1524    }
1525    while end > start && line_is_blank(lines[end - 1].content, WsIgnore::default()) {
1526        end -= 1;
1527    }
1528    (start < end).then_some((start, end))
1529}
1530
1531fn expand_tag_start(
1532    tagged: &[TaggedLine<'_>],
1533    current: usize,
1534    side: FunctionSide,
1535    range_start: usize,
1536) -> usize {
1537    let mut start = current;
1538    while start > 0 {
1539        let prev = tagged[start - 1];
1540        let line_index = match side {
1541            FunctionSide::Old => prev.old_index,
1542            FunctionSide::New => prev.new_index,
1543        };
1544        if line_index < range_start {
1545            break;
1546        }
1547        start -= 1;
1548    }
1549    start
1550}
1551
1552fn expand_tag_end(
1553    tagged: &[TaggedLine<'_>],
1554    current: usize,
1555    side: FunctionSide,
1556    range_end: usize,
1557) -> usize {
1558    let mut end = current;
1559    while end + 1 < tagged.len() {
1560        let next = tagged[end + 1];
1561        let line_index = match side {
1562            FunctionSide::Old => next.old_index,
1563            FunctionSide::New => next.new_index,
1564        };
1565        if line_index >= range_end {
1566            break;
1567        }
1568        end += 1;
1569    }
1570    end
1571}
1572
1573const COLOR_MOVED_MIN_ALNUM_COUNT: usize = 20;
1574const INDENT_BLANKLINE: i32 = i32::MIN;
1575
1576#[derive(Clone, Copy, Default)]
1577struct MovedStyle {
1578    moved: bool,
1579    alt: bool,
1580    uninteresting: bool,
1581}
1582
1583#[derive(Clone)]
1584struct MovedEntry {
1585    tag_idx: usize,
1586    next_line: Option<usize>,
1587    next_match: Option<usize>,
1588    id: usize,
1589    indent_width: i32,
1590}
1591
1592#[derive(Clone, Copy, Default)]
1593struct MovedEntryList {
1594    add: Option<usize>,
1595    del: Option<usize>,
1596}
1597
1598#[derive(Clone, Copy)]
1599struct MovedBlock {
1600    match_entry: usize,
1601    wsd: i32,
1602}
1603
1604fn mark_color_as_moved(tagged: &[TaggedLine<'_>], color_moved: ColorMoved) -> Vec<MovedStyle> {
1605    let (entries, entry_for_tag, entry_list) = add_lines_to_move_detection(tagged, color_moved.ws);
1606    let mut styles = vec![MovedStyle::default(); tagged.len()];
1607    let mut pmb: Vec<MovedBlock> = Vec::new();
1608    let mut n = 0usize;
1609    let mut flipped_block = false;
1610    let mut block_length = 0usize;
1611    let mut moved_symbol: Option<LineKind> = None;
1612
1613    while n < tagged.len() {
1614        let line = tagged[n];
1615        let line_entry = entry_for_tag[n];
1616        let mut line_match = line_entry.and_then(|entry_idx| {
1617            let id = entries[entry_idx].id;
1618            match line.kind {
1619                LineKind::Insert => entry_list.get(id).and_then(|list| list.del),
1620                LineKind::Delete => entry_list.get(id).and_then(|list| list.add),
1621                LineKind::Context => None,
1622            }
1623        });
1624
1625        if line.kind == LineKind::Context {
1626            flipped_block = false;
1627        }
1628
1629        if !pmb.is_empty() && (line_match.is_none() || Some(line.kind) != moved_symbol) {
1630            if !adjust_last_block(&mut styles, tagged, color_moved.mode, n, block_length)
1631                && block_length > 1
1632            {
1633                line_match = None;
1634                n -= block_length;
1635            }
1636            pmb.clear();
1637            block_length = 0;
1638            flipped_block = false;
1639        }
1640
1641        let Some(line_match) = line_match else {
1642            moved_symbol = None;
1643            n += 1;
1644            continue;
1645        };
1646
1647        if color_moved.mode == ColorMovedMode::Plain {
1648            styles[n].moved = true;
1649            n += 1;
1650            continue;
1651        }
1652
1653        pmb_advance_or_null(
1654            &mut pmb,
1655            &entries,
1656            tagged,
1657            line_entry.expect("plus/minus line has move-detection entry"),
1658            color_moved.ws,
1659        );
1660
1661        if pmb.is_empty() {
1662            let contiguous =
1663                adjust_last_block(&mut styles, tagged, color_moved.mode, n, block_length);
1664            if !contiguous && block_length > 1 {
1665                n -= block_length;
1666            } else {
1667                fill_potential_moved_blocks(
1668                    line_match,
1669                    &entries,
1670                    tagged,
1671                    n,
1672                    color_moved.ws,
1673                    &mut pmb,
1674                );
1675            }
1676
1677            if contiguous && !pmb.is_empty() && moved_symbol == Some(line.kind) {
1678                flipped_block = !flipped_block;
1679            } else {
1680                flipped_block = false;
1681            }
1682
1683            moved_symbol = (!pmb.is_empty()).then_some(line.kind);
1684            block_length = 0;
1685        }
1686
1687        if !pmb.is_empty() {
1688            block_length += 1;
1689            styles[n].moved = true;
1690            if flipped_block && color_moved.mode != ColorMovedMode::Blocks {
1691                styles[n].alt = true;
1692            }
1693        }
1694        n += 1;
1695    }
1696
1697    adjust_last_block(&mut styles, tagged, color_moved.mode, n, block_length);
1698    if color_moved.mode == ColorMovedMode::DimmedZebra {
1699        dim_moved_lines(&mut styles, tagged);
1700    }
1701    styles
1702}
1703
1704fn add_lines_to_move_detection(
1705    tagged: &[TaggedLine<'_>],
1706    ws: ColorMovedWs,
1707) -> (Vec<MovedEntry>, Vec<Option<usize>>, Vec<MovedEntryList>) {
1708    let mut entries: Vec<MovedEntry> = Vec::new();
1709    let mut entry_for_tag = vec![None; tagged.len()];
1710    let mut entry_list = Vec::<MovedEntryList>::new();
1711    let mut interned = HashMap::<Vec<u8>, usize>::new();
1712    let mut prev_line: Option<usize> = None;
1713
1714    for (tag_idx, line) in tagged.iter().enumerate() {
1715        if line.kind != LineKind::Insert && line.kind != LineKind::Delete {
1716            prev_line = None;
1717            continue;
1718        }
1719        let indent = if ws.allow_indentation_change {
1720            moved_indent_data(line.content)
1721        } else {
1722            (0, 0)
1723        };
1724        let key = moved_line_key(line.content, indent.0, ws.ignore);
1725        let id = match interned.get(&key) {
1726            Some(id) => *id,
1727            None => {
1728                let id = interned.len();
1729                interned.insert(key, id);
1730                entry_list.push(MovedEntryList::default());
1731                id
1732            }
1733        };
1734
1735        let entry_idx = entries.len();
1736        if let Some(prev) = prev_line
1737            && tagged[entries[prev].tag_idx].kind == line.kind
1738        {
1739            entries[prev].next_line = Some(entry_idx);
1740        }
1741        let next_match = match line.kind {
1742            LineKind::Insert => entry_list[id].add,
1743            LineKind::Delete => entry_list[id].del,
1744            LineKind::Context => None,
1745        };
1746        entries.push(MovedEntry {
1747            tag_idx,
1748            next_line: None,
1749            next_match,
1750            id,
1751            indent_width: indent.1,
1752        });
1753        match line.kind {
1754            LineKind::Insert => entry_list[id].add = Some(entry_idx),
1755            LineKind::Delete => entry_list[id].del = Some(entry_idx),
1756            LineKind::Context => {}
1757        }
1758        entry_for_tag[tag_idx] = Some(entry_idx);
1759        prev_line = Some(entry_idx);
1760    }
1761
1762    (entries, entry_for_tag, entry_list)
1763}
1764
1765fn moved_line_key(line: &[u8], indent_off: usize, ignore: WsIgnore) -> Vec<u8> {
1766    let bytes = line.get(indent_off..).unwrap_or_default();
1767    if ignore.is_empty() {
1768        bytes.to_vec()
1769    } else {
1770        canonicalize_moved_line(bytes, ignore)
1771    }
1772}
1773
1774fn canonicalize_moved_line(line: &[u8], ignore: WsIgnore) -> Vec<u8> {
1775    let is_ws = |b: u8| matches!(b, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c);
1776    if ignore.all_space {
1777        return line.iter().copied().filter(|b| !is_ws(*b)).collect();
1778    }
1779    if ignore.space_change {
1780        let mut out = Vec::with_capacity(line.len());
1781        let mut i = 0usize;
1782        while i < line.len() {
1783            if is_ws(line[i]) {
1784                out.push(b' ');
1785                while i < line.len() && is_ws(line[i]) {
1786                    i += 1;
1787                }
1788            } else {
1789                out.push(line[i]);
1790                i += 1;
1791            }
1792        }
1793        while out.last() == Some(&b' ') {
1794            out.pop();
1795        }
1796        return out;
1797    }
1798    if ignore.space_at_eol {
1799        let mut end = line.len();
1800        if end > 0 && line[end - 1] == b'\n' {
1801            end -= 1;
1802        }
1803        while end > 0 && matches!(line[end - 1], b' ' | b'\t') {
1804            end -= 1;
1805        }
1806        let mut out = line[..end].to_vec();
1807        if line.ends_with(b"\n") {
1808            out.push(b'\n');
1809        }
1810        return out;
1811    }
1812    if ignore.cr_at_eol {
1813        let mut out = line.to_vec();
1814        if out.ends_with(b"\r\n") {
1815            let len = out.len();
1816            out.remove(len - 2);
1817        }
1818        return out;
1819    }
1820    line.to_vec()
1821}
1822
1823fn moved_indent_data(line: &[u8]) -> (usize, i32) {
1824    let mut off = 0usize;
1825    let mut width = 0i32;
1826    while off < line.len() && matches!(line[off], b'\x0c' | b'\x0b' | b'\r') {
1827        off += 1;
1828    }
1829    while off < line.len() {
1830        match line[off] {
1831            b' ' => {
1832                width += 1;
1833                off += 1;
1834            }
1835            b'\t' => {
1836                width += 8 - (width % 8);
1837                off += 1;
1838                while off < line.len() && line[off] == b'\t' {
1839                    width += 8;
1840                    off += 1;
1841                }
1842            }
1843            _ => break,
1844        }
1845    }
1846    if line[off..].iter().all(|b| b.is_ascii_whitespace()) {
1847        (line.len(), INDENT_BLANKLINE)
1848    } else {
1849        (off, width)
1850    }
1851}
1852
1853fn pmb_advance_or_null(
1854    pmb: &mut Vec<MovedBlock>,
1855    entries: &[MovedEntry],
1856    tagged: &[TaggedLine<'_>],
1857    line_entry: usize,
1858    ws: ColorMovedWs,
1859) {
1860    let mut kept = Vec::with_capacity(pmb.len());
1861    for mut block in pmb.iter().copied() {
1862        let Some(cur) = entries[block.match_entry].next_line else {
1863            continue;
1864        };
1865        let matched = if ws.allow_indentation_change {
1866            !cmp_in_block_with_wsd(entries, cur, tagged, line_entry, &mut block)
1867        } else {
1868            entries[cur].id == entries[line_entry].id
1869        };
1870        if matched {
1871            block.match_entry = cur;
1872            kept.push(block);
1873        }
1874    }
1875    *pmb = kept;
1876}
1877
1878fn fill_potential_moved_blocks(
1879    mut line_match: usize,
1880    entries: &[MovedEntry],
1881    tagged: &[TaggedLine<'_>],
1882    tag_idx: usize,
1883    ws: ColorMovedWs,
1884    pmb: &mut Vec<MovedBlock>,
1885) {
1886    loop {
1887        let entry = &entries[line_match];
1888        let wsd = if ws.allow_indentation_change {
1889            compute_ws_delta(tagged[tag_idx].content, entry.indent_width)
1890        } else {
1891            0
1892        };
1893        pmb.push(MovedBlock {
1894            match_entry: line_match,
1895            wsd,
1896        });
1897        match entry.next_match {
1898            Some(next) => line_match = next,
1899            None => break,
1900        }
1901    }
1902}
1903
1904fn compute_ws_delta(line: &[u8], match_indent_width: i32) -> i32 {
1905    let (_, width) = moved_indent_data(line);
1906    if width == INDENT_BLANKLINE && match_indent_width == INDENT_BLANKLINE {
1907        INDENT_BLANKLINE
1908    } else {
1909        width - match_indent_width
1910    }
1911}
1912
1913fn cmp_in_block_with_wsd(
1914    entries: &[MovedEntry],
1915    cur: usize,
1916    tagged: &[TaggedLine<'_>],
1917    line_entry: usize,
1918    block: &mut MovedBlock,
1919) -> bool {
1920    let cur_entry = &entries[cur];
1921    if cur_entry.id != entries[line_entry].id {
1922        return true;
1923    }
1924    if cur_entry.indent_width == INDENT_BLANKLINE {
1925        return false;
1926    }
1927    let (_, line_width) = moved_indent_data(tagged[entries[line_entry].tag_idx].content);
1928    let delta = line_width - cur_entry.indent_width;
1929    if block.wsd == INDENT_BLANKLINE {
1930        block.wsd = delta;
1931    }
1932    delta != block.wsd
1933}
1934
1935fn adjust_last_block(
1936    styles: &mut [MovedStyle],
1937    tagged: &[TaggedLine<'_>],
1938    mode: ColorMovedMode,
1939    n: usize,
1940    block_length: usize,
1941) -> bool {
1942    if mode == ColorMovedMode::Plain {
1943        return block_length != 0;
1944    }
1945    let mut alnum_count = 0usize;
1946    for i in 1..=block_length {
1947        for byte in tagged[n - i].content {
1948            if byte.is_ascii_alphanumeric() {
1949                alnum_count += 1;
1950                if alnum_count >= COLOR_MOVED_MIN_ALNUM_COUNT {
1951                    return true;
1952                }
1953            }
1954        }
1955    }
1956    for i in 1..=block_length {
1957        styles[n - i] = MovedStyle::default();
1958    }
1959    false
1960}
1961
1962fn dim_moved_lines(styles: &mut [MovedStyle], tagged: &[TaggedLine<'_>]) {
1963    for n in 0..tagged.len() {
1964        if tagged[n].kind != LineKind::Insert && tagged[n].kind != LineKind::Delete {
1965            continue;
1966        }
1967        if !styles[n].moved {
1968            continue;
1969        }
1970        let prev = (n > 0
1971            && (tagged[n - 1].kind == LineKind::Insert || tagged[n - 1].kind == LineKind::Delete))
1972            .then_some(n - 1);
1973        let next = (n + 1 < tagged.len()
1974            && (tagged[n + 1].kind == LineKind::Insert || tagged[n + 1].kind == LineKind::Delete))
1975            .then_some(n + 1);
1976
1977        if prev.is_some_and(|i| moved_zebra_mask(styles[i]) == moved_zebra_mask(styles[n]))
1978            && next.is_some_and(|i| moved_zebra_mask(styles[i]) == moved_zebra_mask(styles[n]))
1979        {
1980            styles[n].uninteresting = true;
1981            continue;
1982        }
1983        if prev.is_some_and(|i| styles[i].moved && styles[i].alt != styles[n].alt) {
1984            continue;
1985        }
1986        if next.is_some_and(|i| styles[i].moved && styles[i].alt != styles[n].alt) {
1987            continue;
1988        }
1989        styles[n].uninteresting = true;
1990    }
1991}
1992
1993fn moved_zebra_mask(style: MovedStyle) -> (bool, bool) {
1994    (style.moved, style.alt)
1995}
1996
1997/// Emit a single hunk covering `tagged[start..end]`: the `@@ -os,oc +ns,nc @@
1998/// <heading>` header followed by the context/`-`/`+` lines, including the
1999/// `\ No newline at end of file` markers.
2000fn render_one_hunk(
2001    out: &mut Vec<u8>,
2002    tagged: &[TaggedLine<'_>],
2003    moved_styles: Option<&[MovedStyle]>,
2004    old_lines: &[DiffLine<'_>],
2005    start: usize,
2006    end: usize,
2007    options: &mut HunkRenderOptions<'_, '_>,
2008) {
2009    let slice = &tagged[start..end];
2010    let mut old_count = 0usize;
2011    let mut new_count = 0usize;
2012    for line in slice {
2013        match line.kind {
2014            LineKind::Context => {
2015                old_count += 1;
2016                new_count += 1;
2017            }
2018            LineKind::Delete => old_count += 1,
2019            LineKind::Insert => new_count += 1,
2020        }
2021    }
2022    // 1-based starting line numbers; an empty side starts at 0.
2023    let old_start = if old_count == 0 {
2024        slice.first().map(|line| line.old_index).unwrap_or(0)
2025    } else {
2026        slice
2027            .iter()
2028            .find(|line| line.kind != LineKind::Insert)
2029            .map(|line| line.old_index + 1)
2030            .unwrap_or(1)
2031    };
2032    let new_start = if new_count == 0 {
2033        slice.first().map(|line| line.new_index).unwrap_or(0)
2034    } else {
2035        slice
2036            .iter()
2037            .find(|line| line.kind != LineKind::Delete)
2038            .map(|line| line.new_index + 1)
2039            .unwrap_or(1)
2040    };
2041
2042    let heading = hunk_section_heading(
2043        old_lines,
2044        slice.first().map(|line| line.old_index),
2045        options.heading.as_deref_mut(),
2046    );
2047    let frag = format!(
2048        "@@ -{} +{} @@",
2049        format_hunk_range(old_start, old_count),
2050        format_hunk_range(new_start, new_count)
2051    );
2052    match options.colors {
2053        // Port of emit_hunk_header: the "@@ .. @@" span in the frag color,
2054        // the separating blank in the context color, the heading in the func
2055        // color (each reset-terminated).
2056        Some(colors) => {
2057            out.extend_from_slice(colors.frag.as_bytes());
2058            out.extend_from_slice(frag.as_bytes());
2059            out.extend_from_slice(colors.reset.as_bytes());
2060            if let Some(heading) = &heading {
2061                out.extend_from_slice(colors.context.as_bytes());
2062                out.push(b' ');
2063                out.extend_from_slice(colors.reset.as_bytes());
2064                out.extend_from_slice(colors.func.as_bytes());
2065                out.extend_from_slice(heading);
2066                out.extend_from_slice(colors.reset.as_bytes());
2067            }
2068            out.push(b'\n');
2069        }
2070        None => {
2071            out.extend_from_slice(frag.as_bytes());
2072            if let Some(heading) = &heading {
2073                out.push(b' ');
2074                out.extend_from_slice(heading);
2075            }
2076            out.push(b'\n');
2077        }
2078    }
2079
2080    if let Some(word_diff) = options.word_diff.as_deref_mut() {
2081        // Word-diff rendering: minus/plus runs accumulate and flush at
2082        // context lines (fn_out_consume's diff_words branch); the
2083        // "\ No newline" markers are eaten.
2084        for line in slice {
2085            match line.kind {
2086                LineKind::Delete => word_diff.push_minus(line.content),
2087                LineKind::Insert => word_diff.push_plus(line.content),
2088                LineKind::Context => {
2089                    word_diff.flush(out);
2090                    word_diff.emit_context_line(out, line.content);
2091                }
2092            }
2093        }
2094        word_diff.flush(out);
2095        return;
2096    }
2097
2098    for (offset, line) in slice.iter().enumerate() {
2099        let prefix = match line.kind {
2100            LineKind::Context => b' ',
2101            LineKind::Delete => b'-',
2102            LineKind::Insert => b'+',
2103        };
2104        match options.colors {
2105            Some(colors) => {
2106                // Whitespace-error highlighting applies to the selected line
2107                // kinds (default: new lines only).
2108                let ws_rule = options.ws_error.and_then(|ws| {
2109                    let enabled = match line.kind {
2110                        LineKind::Context => ws.context,
2111                        LineKind::Delete => ws.old,
2112                        LineKind::Insert => ws.new,
2113                    };
2114                    enabled.then_some(ws.rule)
2115                });
2116                let moved = moved_styles
2117                    .and_then(|styles| styles.get(start + offset))
2118                    .copied()
2119                    .filter(|style| style.moved);
2120                write_patch_line_colored(out, prefix, line.content, colors, ws_rule, moved);
2121            }
2122            None => write_patch_line(out, prefix, line.content),
2123        }
2124    }
2125}
2126
2127/// Format one `start,count` side of an `@@` header. git omits the count when
2128/// it is exactly 1 (e.g. `+5` rather than `+5,1`).
2129fn format_hunk_range(start: usize, count: usize) -> String {
2130    if count == 1 {
2131        start.to_string()
2132    } else {
2133        format!("{start},{count}")
2134    }
2135}
2136
2137/// git's section heading for a hunk: the nearest line *before* the hunk's
2138/// first line accepted by the caller's `heading` classifier. Headings are
2139/// produced by the classifier (already capped/trimmed by the caller's
2140/// userdiff machinery). Returns `None` when no such line precedes the hunk or
2141/// no classifier was supplied.
2142fn hunk_section_heading(
2143    old_lines: &[DiffLine<'_>],
2144    first_old_index: Option<usize>,
2145    mut heading: Option<&mut HeadingFn<'_>>,
2146) -> Option<Vec<u8>> {
2147    let first = first_old_index?;
2148    let classifier = heading.as_mut()?;
2149    // Scan upward from the line just above the hunk.
2150    for idx in (0..first).rev() {
2151        if let Some(found) = classifier(old_lines[idx].content) {
2152            return Some(found);
2153        }
2154    }
2155    None
2156}
2157
2158/// Write a single diff line with its `prefix` marker, appending the
2159/// `\ No newline at end of file` note when the source line lacks a trailing
2160/// LF.
2161fn write_patch_line(out: &mut Vec<u8>, prefix: u8, line: &[u8]) {
2162    out.push(prefix);
2163    out.extend_from_slice(line);
2164    if !line.ends_with(b"\n") {
2165        out.extend_from_slice(b"\n\\ No newline at end of file\n");
2166    }
2167}
2168
2169/// [`write_patch_line`] in color, optionally painting whitespace errors.
2170///
2171/// When `ws_rule` is `Some`, the line body is emitted through
2172/// [`crate::ws::ws_check_emit`] (git's `emit_line_ws_markup` highlighted
2173/// branch): the sign is painted in the line color, then the body's non-error
2174/// segments in the line color and its whitespace-error segments in
2175/// `colors.whitespace`. A clean line produces no whitespace spans, so it stays
2176/// visually plain.
2177///
2178/// When `ws_rule` is `None`, context/old lines paint the sign and body in one
2179/// span; new lines paint the sign and body as separate spans (the default
2180/// `ws-error-highlight` path with no rule).
2181fn write_patch_line_colored(
2182    out: &mut Vec<u8>,
2183    prefix: u8,
2184    line: &[u8],
2185    colors: RenderColors<'_>,
2186    ws_rule: Option<crate::ws::WsRule>,
2187    moved: Option<MovedStyle>,
2188) {
2189    let (body, terminated) = match line.split_last() {
2190        Some((b'\n', body)) => (body, true),
2191        _ => (line, false),
2192    };
2193    let color = match (prefix, moved) {
2194        (b'-', Some(style)) if style.uninteresting && style.alt => colors.old_moved_alt_dim,
2195        (b'-', Some(style)) if style.uninteresting => colors.old_moved_dim,
2196        (b'-', Some(style)) if style.alt => colors.old_moved_alt,
2197        (b'-', Some(_)) => colors.old_moved,
2198        (b'+', Some(style)) if style.uninteresting && style.alt => colors.new_moved_alt_dim,
2199        (b'+', Some(style)) if style.uninteresting => colors.new_moved_dim,
2200        (b'+', Some(style)) if style.alt => colors.new_moved_alt,
2201        (b'+', Some(_)) => colors.new_moved,
2202        (b'-', _) => colors.old,
2203        (b'+', _) => colors.new,
2204        _ => colors.context,
2205    };
2206
2207    if let Some(rule) = ws_rule {
2208        if rule == 0 {
2209            out.extend_from_slice(color.as_bytes());
2210            out.push(prefix);
2211            out.extend_from_slice(body);
2212            out.extend_from_slice(colors.reset.as_bytes());
2213            out.push(b'\n');
2214            if !terminated {
2215                out.extend_from_slice(colors.context.as_bytes());
2216                out.extend_from_slice(b"\\ No newline at end of file");
2217                out.extend_from_slice(colors.reset.as_bytes());
2218                out.push(b'\n');
2219            }
2220            return;
2221        }
2222        // Sign in the line color, then the body through ws_check_emit (no
2223        // trailing newline in `body`, so the emit's own LF handling is inert).
2224        out.extend_from_slice(color.as_bytes());
2225        out.push(prefix);
2226        out.extend_from_slice(colors.reset.as_bytes());
2227        let emit_colors = crate::ws::WsEmitColors {
2228            set: color,
2229            reset: colors.reset,
2230            ws: colors.whitespace,
2231        };
2232        crate::ws::ws_check_emit(body, rule, out, &emit_colors);
2233        out.push(b'\n');
2234        if !terminated {
2235            let marker_color = if rule & crate::ws::WS_INCOMPLETE_LINE != 0 {
2236                colors.whitespace
2237            } else {
2238                colors.context
2239            };
2240            out.extend_from_slice(marker_color.as_bytes());
2241            out.extend_from_slice(b"\\ No newline at end of file");
2242            out.extend_from_slice(colors.reset.as_bytes());
2243            out.push(b'\n');
2244        }
2245        return;
2246    }
2247
2248    if prefix == b'+' {
2249        out.extend_from_slice(color.as_bytes());
2250        out.push(prefix);
2251        out.extend_from_slice(colors.reset.as_bytes());
2252        if !body.is_empty() {
2253            out.extend_from_slice(color.as_bytes());
2254            out.extend_from_slice(body);
2255            out.extend_from_slice(colors.reset.as_bytes());
2256        }
2257    } else {
2258        out.extend_from_slice(color.as_bytes());
2259        out.push(prefix);
2260        out.extend_from_slice(body);
2261        out.extend_from_slice(colors.reset.as_bytes());
2262    }
2263    out.push(b'\n');
2264    if !terminated {
2265        out.extend_from_slice(colors.context.as_bytes());
2266        out.extend_from_slice(b"\\ No newline at end of file");
2267        out.extend_from_slice(colors.reset.as_bytes());
2268        out.push(b'\n');
2269    }
2270}
2271
2272// ===========================================================================
2273// Combined / merge-commit diff renderer (`-c` / `--cc`).
2274//
2275// This is the byte-for-byte port of git's `combine-diff.c`
2276// (`combine_diff` / `make_hunks` / `dump_sline`): the multi-parent
2277// `@@@ -p1 -p2 +out @@@` hunk header (with one extra `@` per parent and one
2278// `-pN,cN` column per parent), the per-parent prefix columns on each body
2279// line, and the `--cc` "dense" simplification that drops hunks whose result
2280// matches at least one parent ("uninteresting" hunks).
2281//
2282// The renderer is repository-agnostic, exactly like the unified-diff renderer
2283// above: the caller supplies the *result* blob and one blob per parent (plus
2284// the line-diff algorithm / whitespace flags), and we emit only the hunk body
2285// — the per-file `diff --cc`/`index`/`---`/`+++` metainfo header stays with the
2286// command, which owns the repository / oid / mode context.
2287//
2288// Data-structure correspondence with combine-diff.c:
2289//   * `Sline`  <-> `struct sline` (one per result line, plus a trailing
2290//     sentinel `sline[cnt]` whose `bol` is empty),
2291//   * `Sline.lost` <-> `struct lline` list (lines deleted relative to some
2292//     parent, hung before the surviving result line),
2293//   * the `flag` bitset: bit `n` set => parent `n` did NOT have this result
2294//     line (i.e. it was added relative to parent `n`); bit `num_parent`
2295//     ("mark") => the line is part of a shown hunk; bit `num_parent+1`
2296//     ("no_pre_delete") => suppress the leading deletions before this line.
2297// ===========================================================================
2298
2299/// One result line in the combined-diff `sline` array, plus the deletions
2300/// ("lost" lines) that hang in front of it.
2301struct CdLine {
2302    /// The surviving result line bytes, WITHOUT the trailing newline.
2303    bol: Vec<u8>,
2304    /// Deletions hung before this line, in display order. `parent_map` is the
2305    /// bitset of parents that had the deleted line (git coalesces these across
2306    /// parents via an LCS; we replicate that with [`coalesce_lost`]).
2307    lost: Vec<CdLost>,
2308    /// Pre-coalesce deletions accumulated for the parent currently being
2309    /// folded (drained into `lost` after each parent, like git's `plost`).
2310    plost: Vec<Vec<u8>>,
2311    /// `flag` bitset (see module comment).
2312    flag: u64,
2313    /// `p_lno[n]` = 1-based line number in parent `n` at which a hunk starting
2314    /// at this result line begins. Sized `num_parent`.
2315    p_lno: Vec<u64>,
2316}
2317
2318/// A single deleted ("lost") line and the set of parents it was removed from.
2319struct CdLost {
2320    line: Vec<u8>,
2321    parent_map: u64,
2322}
2323
2324/// Options controlling the combined-diff body emission.
2325pub struct CombinedRenderOptions {
2326    /// `--cc` dense simplification (drop hunks the result shares with a parent);
2327    /// `-c` (plain combined) leaves it `false`.
2328    pub dense: bool,
2329    /// Unified-context line count (`-U`, default 3).
2330    pub context: usize,
2331    /// Line-diff algorithm used for each parent-vs-result 2-way diff.
2332    pub algorithm: DiffAlgorithm,
2333    /// Whitespace-ignore flags applied to each parent-vs-result 2-way diff and
2334    /// to the lost-line coalescing match.
2335    pub ws_ignore: WsIgnore,
2336}
2337
2338impl Default for CombinedRenderOptions {
2339    fn default() -> Self {
2340        Self {
2341            dense: true,
2342            context: DEFAULT_CONTEXT,
2343            algorithm: DiffAlgorithm::Myers,
2344            ws_ignore: WsIgnore::default(),
2345        }
2346    }
2347}
2348
2349/// Render a combined / merge diff body into `out`.
2350///
2351/// `result` is the merge-result blob; `parents` holds one blob per parent (in
2352/// parent order). Returns `true` when at least one hunk survives the
2353/// "interesting" filter — the caller uses this to decide whether to print the
2354/// metainfo header at all (git only prints `diff --cc <path>` + body when
2355/// `show_hunks || mode_differs`).
2356///
2357/// Mirrors `show_patch_diff`'s body half: build the `sline` array, fold each
2358/// parent into it via [`combine_one_parent`], run [`make_hunks`], then
2359/// [`dump_sline`].
2360pub fn render_combined(out: &mut Vec<u8>, result: &[u8], parents: &[&[u8]]) -> bool {
2361    render_combined_with(out, result, parents, &CombinedRenderOptions::default())
2362}
2363
2364/// [`render_combined`] with explicit options.
2365pub fn render_combined_with(
2366    out: &mut Vec<u8>,
2367    result: &[u8],
2368    parents: &[&[u8]],
2369    options: &CombinedRenderOptions,
2370) -> bool {
2371    let num_parent = parents.len();
2372    debug_assert!(num_parent >= 1);
2373
2374    // Split the result into lines (without trailing newline), counting an
2375    // unterminated final line as its own line — git's `cnt` counts '\n' plus an
2376    // incomplete trailing line.
2377    let result_lines = split_lines(result);
2378    let cnt = result_lines.len();
2379
2380    // git allocates `cnt + 2` slines: indices `0..cnt-1` are the result lines,
2381    // `sline[cnt]` is the trailing sentinel (where end-of-file deletions hang),
2382    // and `sline[cnt+1]` carries the per-parent trailer p_lno that
2383    // `show_parent_lno` reads for a hunk whose end touches the last line.
2384    let mut sline: Vec<CdLine> = Vec::with_capacity(cnt + 2);
2385    for line in &result_lines {
2386        sline.push(CdLine {
2387            bol: line.bytes_without_newline().to_vec(),
2388            lost: Vec::new(),
2389            plost: Vec::new(),
2390            flag: 0,
2391            p_lno: vec![0; num_parent],
2392        });
2393    }
2394    for _ in 0..2 {
2395        sline.push(CdLine {
2396            bol: Vec::new(),
2397            lost: Vec::new(),
2398            plost: Vec::new(),
2399            flag: 0,
2400            p_lno: vec![0; num_parent],
2401        });
2402    }
2403
2404    // Fold each parent into the sline array. git reuses an earlier parent's
2405    // result when two parents have the identical blob (`reuse_combine_diff`);
2406    // we replicate that to keep p_lno / flags identical.
2407    for n in 0..num_parent {
2408        let mut reused = None;
2409        for j in 0..n {
2410            if parents[j] == parents[n] {
2411                reused = Some(j);
2412                break;
2413            }
2414        }
2415        match reused {
2416            Some(j) => reuse_combine_diff(&mut sline, cnt, n, j),
2417            None => combine_one_parent(&mut sline, &result_lines, parents[n], n, options),
2418        }
2419    }
2420
2421    let show_hunks = make_hunks(&mut sline, cnt, num_parent, options.dense, options.context);
2422    if show_hunks {
2423        dump_sline(out, &sline, cnt, num_parent, options.context);
2424    }
2425    show_hunks
2426}
2427
2428/// Fold one parent's 2-way diff against the result into the `sline` array
2429/// (git's `combine_diff` + the consume_hunk/consume_line callbacks).
2430fn combine_one_parent(
2431    sline: &mut [CdLine],
2432    result_lines: &[DiffLine<'_>],
2433    parent: &[u8],
2434    n: usize,
2435    options: &CombinedRenderOptions,
2436) {
2437    let cnt = result_lines.len();
2438    let nmask = 1u64 << n;
2439    let parent_lines = split_lines(parent);
2440    let ops = myers_diff_lines_ws(
2441        &parent_lines,
2442        result_lines,
2443        options.ws_ignore,
2444        options.algorithm,
2445    );
2446
2447    // Walk the edit script, tracking the 1-based result line number (`lno`,
2448    // git's `state->lno`) and the parent line number (`p_lno`/`ob`). For each
2449    // hunk: deletions hang on `lost_bucket`; insertions set the nmask flag on
2450    // the result line; the hunk start records the parent line number into
2451    // `p_lno[n]` of the result line preceding the hunk.
2452    //
2453    // git groups the script into hunks (runs separated by Equal context); we
2454    // mirror consume_hunk by detecting the boundary at each non-Equal run.
2455    let mut old_idx: usize = 0; // 0-based parent line consumed
2456    let mut new_idx: usize = 0; // 0-based result line consumed
2457    let mut i = 0;
2458    while i < ops.len() {
2459        match ops[i] {
2460            DiffOp::Equal(k) => {
2461                old_idx += k;
2462                new_idx += k;
2463                i += 1;
2464            }
2465            _ => {
2466                // Collect a maximal run of consecutive Delete/Insert ops as one
2467                // hunk (git's xdiff emits one @@ hunk per such run).
2468                let hunk_old_start = old_idx; // 0-based
2469                let hunk_new_start = new_idx; // 0-based
2470                let mut dels: Vec<&[u8]> = Vec::new();
2471                while i < ops.len() {
2472                    match ops[i] {
2473                        DiffOp::Delete(k) => {
2474                            for _ in 0..k {
2475                                dels.push(parent_lines[old_idx].bytes_without_newline());
2476                                old_idx += 1;
2477                            }
2478                            i += 1;
2479                        }
2480                        DiffOp::Insert(k) => {
2481                            new_idx += k;
2482                            i += 1;
2483                        }
2484                        DiffOp::Equal(_) => break,
2485                    }
2486                }
2487                let _ = hunk_old_start;
2488
2489                // Lost bucket: deletions hang on the result line at the hunk
2490                // start (sline[hunk_new_start]). git's distinction between the
2491                // additions-present (`nb-1`) and pure-deletion (`nb`) cases
2492                // collapses to the same index here because our `hunk_new_start`
2493                // is the 0-based result line immediately *after* the preceding
2494                // context, matching git's `nb-1` for the additions case and the
2495                // bucket-after for the pure-deletion case once the result line
2496                // numbering (1-based `nb`) is accounted for. The authoritative
2497                // p_lno values are recomputed in the loop below, so we do not
2498                // record them here.
2499                for d in &dels {
2500                    sline[hunk_new_start].plost.push(d.to_vec());
2501                }
2502                // Mark inserted result lines: flag bit n set => parent n lacked
2503                // this line.
2504                for line in sline.iter_mut().take(new_idx.min(cnt)).skip(hunk_new_start) {
2505                    line.flag |= nmask;
2506                }
2507            }
2508        }
2509    }
2510
2511    // Coalesce the plost lines into lost (git's coalesce_lines), then assign
2512    // p_lno numbers per parent — git's second loop in combine_diff.
2513    let mut p_lno: u64 = 1;
2514    for (lno, line) in sline.iter_mut().enumerate().take(cnt + 1) {
2515        line.p_lno[n] = p_lno;
2516        if !line.plost.is_empty() {
2517            let plost = std::mem::take(&mut line.plost);
2518            coalesce_lost(&mut line.lost, plost, n, options);
2519        }
2520        // How many parent lines does this sline advance?
2521        for ll in &line.lost {
2522            if ll.parent_map & nmask != 0 {
2523                p_lno += 1; // '-' means parent had it
2524            }
2525        }
2526        if lno < cnt && (line.flag & nmask) == 0 {
2527            p_lno += 1; // no '+' means parent had it
2528        }
2529    }
2530    sline[cnt + 1].p_lno[n] = p_lno; // trailer (git's sline[cnt+1])
2531}
2532
2533/// Coalesce a parent's freshly-collected deletions into the line's existing
2534/// lost list (git's `coalesce_lines` LCS merge). A deletion that matches an
2535/// already-present lost line (under the active whitespace flags) gets its
2536/// parent bit OR'd into that line's `parent_map` instead of being added again.
2537fn coalesce_lost(
2538    base: &mut Vec<CdLost>,
2539    newlines: Vec<Vec<u8>>,
2540    n: usize,
2541    options: &CombinedRenderOptions,
2542) {
2543    let pmask = 1u64 << n;
2544    if newlines.is_empty() {
2545        return;
2546    }
2547    if base.is_empty() {
2548        for line in newlines {
2549            base.push(CdLost {
2550                line,
2551                parent_map: pmask,
2552            });
2553        }
2554        return;
2555    }
2556
2557    // LCS over (base lines, new lines) by whitespace-aware equality, exactly
2558    // like git: MATCH => OR the parent bit into the base line; NEW => insert the
2559    // new line at that position; BASE => keep base line as-is.
2560    let m = base.len();
2561    let k = newlines.len();
2562    let mut lcs = vec![vec![0i32; k + 1]; m + 1];
2563    for i in 1..=m {
2564        for j in 1..=k {
2565            if combined_lines_match(&base[i - 1].line, &newlines[j - 1], options.ws_ignore) {
2566                lcs[i][j] = lcs[i - 1][j - 1] + 1;
2567            } else if lcs[i][j - 1] >= lcs[i - 1][j] {
2568                lcs[i][j] = lcs[i][j - 1];
2569            } else {
2570                lcs[i][j] = lcs[i - 1][j];
2571            }
2572        }
2573    }
2574
2575    // Backtrack, building the merged list in reverse.
2576    let mut merged: Vec<CdLost> = Vec::with_capacity(m + k);
2577    let mut i = m;
2578    let mut j = k;
2579    while i > 0 || j > 0 {
2580        if i > 0
2581            && j > 0
2582            && combined_lines_match(&base[i - 1].line, &newlines[j - 1], options.ws_ignore)
2583        {
2584            let mut entry = std::mem::replace(
2585                &mut base[i - 1],
2586                CdLost {
2587                    line: Vec::new(),
2588                    parent_map: 0,
2589                },
2590            );
2591            entry.parent_map |= pmask;
2592            merged.push(entry);
2593            i -= 1;
2594            j -= 1;
2595        } else if j > 0 && (i == 0 || lcs[i][j - 1] >= lcs[i - 1][j]) {
2596            merged.push(CdLost {
2597                line: newlines[j - 1].clone(),
2598                parent_map: pmask,
2599            });
2600            j -= 1;
2601        } else {
2602            let entry = std::mem::replace(
2603                &mut base[i - 1],
2604                CdLost {
2605                    line: Vec::new(),
2606                    parent_map: 0,
2607                },
2608            );
2609            merged.push(entry);
2610            i -= 1;
2611        }
2612    }
2613    merged.reverse();
2614    *base = merged;
2615}
2616
2617/// Whitespace-aware line equality used by the lost-line coalescer
2618/// (git's `match_string_spaces`). Only the all-space / space-change flavours
2619/// affect the comparison; otherwise it is a byte compare.
2620fn combined_lines_match(a: &[u8], b: &[u8], ws: WsIgnore) -> bool {
2621    if ws.all_space || ws.space_change || ws.space_at_eol {
2622        let at = strip_trailing_ws(a);
2623        let bt = strip_trailing_ws(b);
2624        if !ws.all_space && !ws.space_change {
2625            return at == bt;
2626        }
2627        return ws_squash_eq(at, bt, ws.space_change);
2628    }
2629    a == b
2630}
2631
2632fn strip_trailing_ws(s: &[u8]) -> &[u8] {
2633    let mut end = s.len();
2634    while end > 0 && (s[end - 1] == b' ' || s[end - 1] == b'\t') {
2635        end -= 1;
2636    }
2637    &s[..end]
2638}
2639
2640/// Compare two lines ignoring whitespace runs (`-w`) or treating runs as a
2641/// single space (`-b`).
2642fn ws_squash_eq(a: &[u8], b: &[u8], change_only: bool) -> bool {
2643    let is_ws = |c: u8| c == b' ' || c == b'\t';
2644    let (mut ia, mut ib) = (0usize, 0usize);
2645    while ia < a.len() && ib < b.len() {
2646        let (ca, cb) = (a[ia], b[ib]);
2647        if is_ws(ca) || is_ws(cb) {
2648            if change_only && (!is_ws(ca) || !is_ws(cb)) {
2649                return false;
2650            }
2651            // For -b, a whitespace run on both sides counts as equal; for -w,
2652            // whitespace is skipped entirely. Skip the runs on both sides.
2653            if change_only {
2654                while ia < a.len() && is_ws(a[ia]) {
2655                    ia += 1;
2656                }
2657                while ib < b.len() && is_ws(b[ib]) {
2658                    ib += 1;
2659                }
2660                continue;
2661            } else {
2662                if is_ws(ca) {
2663                    ia += 1;
2664                    continue;
2665                }
2666                if is_ws(cb) {
2667                    ib += 1;
2668                    continue;
2669                }
2670            }
2671        }
2672        if ca != cb {
2673            return false;
2674        }
2675        ia += 1;
2676        ib += 1;
2677    }
2678    // Consume trailing whitespace.
2679    while ia < a.len() && is_ws(a[ia]) {
2680        ia += 1;
2681    }
2682    while ib < b.len() && is_ws(b[ib]) {
2683        ib += 1;
2684    }
2685    ia == a.len() && ib == b.len()
2686}
2687
2688/// git's `reuse_combine_diff`: when parent `i` has the same blob as a
2689/// previously-folded parent `j`, copy `j`'s flags / lost parent-bits / p_lno
2690/// across instead of re-diffing.
2691fn reuse_combine_diff(sline: &mut [CdLine], cnt: usize, i: usize, j: usize) {
2692    let imask = 1u64 << i;
2693    let jmask = 1u64 << j;
2694    for line in sline.iter_mut().take(cnt + 1) {
2695        line.p_lno[i] = line.p_lno[j];
2696        for ll in &mut line.lost {
2697            if ll.parent_map & jmask != 0 {
2698                ll.parent_map |= imask;
2699            }
2700        }
2701        if line.flag & jmask != 0 {
2702            line.flag |= imask;
2703        }
2704    }
2705    // The overall trailer (sline[cnt+1]).
2706    sline[cnt + 1].p_lno[i] = sline[cnt + 1].p_lno[j];
2707}
2708
2709/// Is this result line "interesting" — does any parent lack it, or does it have
2710/// deletions hung in front (git's `interesting`).
2711fn cd_interesting(sline: &CdLine, all_mask: u64) -> bool {
2712    (sline.flag & all_mask) != 0 || !sline.lost.is_empty()
2713}
2714
2715/// git's `adjust_hunk_tail`.
2716fn adjust_hunk_tail(sline: &[CdLine], all_mask: u64, hunk_begin: usize, mut i: usize) -> usize {
2717    if hunk_begin < i && (sline[i - 1].flag & all_mask) == 0 {
2718        i -= 1;
2719    }
2720    i
2721}
2722
2723/// git's `find_next`.
2724fn find_next(
2725    sline: &[CdLine],
2726    mark: u64,
2727    mut i: usize,
2728    cnt: usize,
2729    look_for_uninteresting: bool,
2730) -> usize {
2731    while i <= cnt {
2732        let marked = (sline[i].flag & mark) != 0;
2733        if look_for_uninteresting {
2734            if !marked {
2735                return i;
2736            }
2737        } else if marked {
2738            return i;
2739        }
2740        i += 1;
2741    }
2742    i
2743}
2744
2745/// git's `give_context`: paint context lines (and bridge small gaps) around the
2746/// interesting lines, using the `mark` bit. Returns whether any hunk shows.
2747fn give_context(sline: &mut [CdLine], cnt: usize, num_parent: usize, context: usize) -> bool {
2748    let all_mask = (1u64 << num_parent) - 1;
2749    let mark = 1u64 << num_parent;
2750    let no_pre_delete = 2u64 << num_parent;
2751
2752    let mut i = find_next(sline, mark, 0, cnt, false);
2753    if cnt < i {
2754        return false;
2755    }
2756
2757    while i <= cnt {
2758        let mut j = i.saturating_sub(context);
2759        // Paint a few lines before the first interesting line.
2760        while j < i {
2761            if (sline[j].flag & mark) == 0 {
2762                sline[j].flag |= no_pre_delete;
2763            }
2764            sline[j].flag |= mark;
2765            j += 1;
2766        }
2767
2768        loop {
2769            // Where does the next uninteresting line start?
2770            j = find_next(sline, mark, i, cnt, true);
2771            if cnt < j {
2772                // The rest are all interesting.
2773                return true;
2774            }
2775            // Lookahead context lines.
2776            let k = find_next(sline, mark, j, cnt, false);
2777            let j2 = adjust_hunk_tail(sline, all_mask, i, j);
2778
2779            if k < j2 + context {
2780                // Small gap: paint it interesting and continue.
2781                let mut jj = j2;
2782                while jj < k {
2783                    sline[jj].flag |= mark;
2784                    jj += 1;
2785                }
2786                i = k;
2787                continue;
2788            }
2789
2790            // No overlap within context: paint the trailing edge a bit.
2791            i = k;
2792            let kk = if j2 + context < cnt + 1 {
2793                j2 + context
2794            } else {
2795                cnt + 1
2796            };
2797            let mut jj = j2;
2798            while jj < kk {
2799                sline[jj].flag |= mark;
2800                jj += 1;
2801            }
2802            break;
2803        }
2804    }
2805    true
2806}
2807
2808/// git's `make_hunks`: mark interesting lines, run the `--cc` dense
2809/// simplification when requested, then `give_context`.
2810fn make_hunks(
2811    sline: &mut [CdLine],
2812    cnt: usize,
2813    num_parent: usize,
2814    dense: bool,
2815    context: usize,
2816) -> bool {
2817    let all_mask = (1u64 << num_parent) - 1;
2818    let mark = 1u64 << num_parent;
2819
2820    for line in sline.iter_mut().take(cnt + 1) {
2821        if cd_interesting(line, all_mask) {
2822            line.flag |= mark;
2823        } else {
2824            line.flag &= !mark;
2825        }
2826    }
2827    if !dense {
2828        return give_context(sline, cnt, num_parent, context);
2829    }
2830
2831    // Dense simplification: for each marked hunk, drop it when the result
2832    // differs from a single parent only (or matches all but one parent the
2833    // same way) — git's "interesting" recomputation.
2834    let mut i = 0;
2835    while i <= cnt {
2836        while i <= cnt && (sline[i].flag & mark) == 0 {
2837            i += 1;
2838        }
2839        if cnt < i {
2840            break;
2841        }
2842        let hunk_begin = i;
2843        let mut j = i + 1;
2844        while j <= cnt {
2845            if (sline[j].flag & mark) == 0 {
2846                // Look beyond the end for an interesting line within context.
2847                let mut la = adjust_hunk_tail(sline, all_mask, hunk_begin, j);
2848                la = if la + context < cnt + 1 {
2849                    la + context
2850                } else {
2851                    cnt + 1
2852                };
2853                let mut contin = false;
2854                while la > 0 && j < la {
2855                    la -= 1;
2856                    if (sline[la].flag & mark) != 0 {
2857                        contin = true;
2858                        break;
2859                    }
2860                }
2861                if !contin {
2862                    break;
2863                }
2864                j = la;
2865            }
2866            j += 1;
2867        }
2868        let hunk_end = j;
2869
2870        // Is the hunk "really" interesting? Check whether all changed lines
2871        // record the same set of parents.
2872        let mut same_diff: u64 = 0;
2873        let mut has_interesting = false;
2874        let mut jj = i;
2875        while jj < hunk_end && !has_interesting {
2876            let this_diff = sline[jj].flag & all_mask;
2877            if this_diff != 0 {
2878                if same_diff == 0 {
2879                    same_diff = this_diff;
2880                } else if same_diff != this_diff {
2881                    has_interesting = true;
2882                    break;
2883                }
2884            }
2885            for ll in &sline[jj].lost {
2886                if has_interesting {
2887                    break;
2888                }
2889                let td = ll.parent_map;
2890                if same_diff == 0 {
2891                    same_diff = td;
2892                } else if same_diff != td {
2893                    has_interesting = true;
2894                }
2895            }
2896            jj += 1;
2897        }
2898
2899        if !has_interesting && same_diff != all_mask {
2900            // Not interesting after all: unmark the whole hunk.
2901            for line in sline.iter_mut().take(hunk_end).skip(hunk_begin) {
2902                line.flag &= !mark;
2903            }
2904        }
2905        i = hunk_end;
2906    }
2907
2908    give_context(sline, cnt, num_parent, context)
2909}
2910
2911/// git's `show_parent_lno`: emit one `-l0,len` column for parent `n`.
2912fn show_parent_lno(
2913    out: &mut Vec<u8>,
2914    sline: &[CdLine],
2915    l0: usize,
2916    l1: usize,
2917    n: usize,
2918    null_context: u64,
2919) {
2920    let a = sline[l0].p_lno[n];
2921    let b = sline[l1].p_lno[n];
2922    out.extend_from_slice(format!(" -{},{}", a, b - a - null_context).as_bytes());
2923}
2924
2925/// git's `hunk_comment_line` test (used to append a function-context comment
2926/// to the `@@@ ... @@@` header).
2927fn hunk_comment_line(bol: &[u8]) -> bool {
2928    if bol.is_empty() {
2929        return false;
2930    }
2931    let ch = bol[0];
2932    ch.is_ascii_alphabetic() || ch == b'_' || ch == b'$'
2933}
2934
2935/// git's `show_line_to_eol`: emit a line, preserving a trailing CR. The bytes
2936/// here never include the newline; we add it.
2937fn show_line_to_eol(out: &mut Vec<u8>, line: &[u8]) {
2938    let saw_cr = line.last() == Some(&b'\r');
2939    if saw_cr {
2940        out.extend_from_slice(&line[..line.len() - 1]);
2941        out.push(b'\r');
2942    } else {
2943        out.extend_from_slice(line);
2944    }
2945    out.push(b'\n');
2946}
2947
2948/// git's `dump_sline`: emit the combined-diff hunk bodies for all marked hunks.
2949fn dump_sline(out: &mut Vec<u8>, sline: &[CdLine], cnt: usize, num_parent: usize, context: usize) {
2950    let mark = 1u64 << num_parent;
2951    let no_pre_delete = 2u64 << num_parent;
2952    let mut lno: usize = 0;
2953
2954    loop {
2955        let mut hunk_comment: Option<&[u8]> = None;
2956        while lno <= cnt && (sline[lno].flag & mark) == 0 {
2957            if hunk_comment_line(&sline[lno].bol) {
2958                hunk_comment = Some(&sline[lno].bol);
2959            }
2960            lno += 1;
2961        }
2962        if cnt < lno {
2963            break;
2964        }
2965        let mut hunk_end = lno + 1;
2966        while hunk_end <= cnt {
2967            if (sline[hunk_end].flag & mark) == 0 {
2968                break;
2969            }
2970            hunk_end += 1;
2971        }
2972
2973        let mut rlines = (hunk_end - lno) as u64;
2974        if cnt < hunk_end {
2975            rlines -= 1; // pointing at the last delete hunk
2976        }
2977
2978        let mut null_context: u64 = 0;
2979        if context == 0 {
2980            // --unified=0: count the all-blank-context result lines so the
2981            // header line counts exclude them.
2982            for sl in sline.iter().take(hunk_end).skip(lno) {
2983                if (sl.flag & (mark - 1)) == 0 {
2984                    null_context += 1;
2985                }
2986            }
2987            rlines -= null_context;
2988        }
2989
2990        // Header: `@@@`... (num_parent+1 markers), one -l,c column per parent,
2991        // ` +out_start,out_len `, num_parent+1 markers again.
2992        for _ in 0..=num_parent {
2993            out.push(b'@');
2994        }
2995        for i in 0..num_parent {
2996            show_parent_lno(out, sline, lno, hunk_end, i, null_context);
2997        }
2998        out.extend_from_slice(format!(" +{},{} ", lno + 1, rlines).as_bytes());
2999        for _ in 0..=num_parent {
3000            out.push(b'@');
3001        }
3002
3003        if let Some(comment) = hunk_comment {
3004            let mut comment_end = 0;
3005            for (idx, &ch) in comment.iter().take(40).enumerate() {
3006                if ch == b'\n' {
3007                    break;
3008                }
3009                if !ch.is_ascii_whitespace() {
3010                    comment_end = idx + 1;
3011                }
3012            }
3013            if comment_end != 0 {
3014                out.push(b' ');
3015                out.extend_from_slice(&comment[..comment_end]);
3016            }
3017        }
3018        out.push(b'\n');
3019
3020        // Body.
3021        while lno < hunk_end {
3022            let sl = &sline[lno];
3023            lno += 1;
3024            // Lost (deleted) lines hung before this result line.
3025            if (sl.flag & no_pre_delete) == 0 {
3026                for ll in &sl.lost {
3027                    for j in 0..num_parent {
3028                        if ll.parent_map & (1u64 << j) != 0 {
3029                            out.push(b'-');
3030                        } else {
3031                            out.push(b' ');
3032                        }
3033                    }
3034                    show_line_to_eol(out, &ll.line);
3035                }
3036            }
3037            if cnt < lno {
3038                break;
3039            }
3040            if (sl.flag & (mark - 1)) == 0 {
3041                // This sline only existed to hang the lost lines in front.
3042                if context == 0 {
3043                    continue;
3044                }
3045            }
3046            let mut p_mask = 1u64;
3047            for _ in 0..num_parent {
3048                if p_mask & sl.flag != 0 {
3049                    out.push(b'+');
3050                } else {
3051                    out.push(b' ');
3052                }
3053                p_mask <<= 1;
3054            }
3055            show_line_to_eol(out, &sl.bol);
3056        }
3057    }
3058}
3059
3060#[cfg(test)]
3061mod tests {
3062    use super::*;
3063
3064    fn render_plain(old: Option<&[u8]>, new: Option<&[u8]>) -> Vec<u8> {
3065        let mut out = Vec::new();
3066        let mut options = HunkRenderOptions::default();
3067        render_hunks(&mut out, old, new, &mut options);
3068        out
3069    }
3070
3071    #[test]
3072    fn identical_content_renders_nothing() {
3073        assert!(render_plain(Some(b"a\nb\n"), Some(b"a\nb\n")).is_empty());
3074    }
3075
3076    #[test]
3077    fn single_line_change_basic_hunk() {
3078        let out = render_plain(Some(b"alpha\nbeta\ngamma\n"), Some(b"alpha\nBETA\ngamma\n"));
3079        assert_eq!(
3080            out,
3081            b"@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n".to_vec(),
3082        );
3083    }
3084
3085    #[test]
3086    fn count_omitted_when_one() {
3087        // A single-line file changed in place yields `-1 +1` (no `,1`).
3088        let out = render_plain(Some(b"old\n"), Some(b"new\n"));
3089        assert_eq!(out, b"@@ -1 +1 @@\n-old\n+new\n".to_vec());
3090    }
3091
3092    #[test]
3093    fn no_newline_marker_on_old_side() {
3094        let out = render_plain(Some(b"only line no newline"), None);
3095        assert_eq!(
3096            out,
3097            b"@@ -1 +0,0 @@\n-only line no newline\n\\ No newline at end of file\n".to_vec(),
3098        );
3099    }
3100
3101    #[test]
3102    fn no_newline_marker_on_new_side() {
3103        let out = render_plain(Some(b"beta\n"), Some(b"beta-notail"));
3104        assert_eq!(
3105            out,
3106            b"@@ -1 +1 @@\n-beta\n+beta-notail\n\\ No newline at end of file\n".to_vec(),
3107        );
3108    }
3109
3110    #[test]
3111    fn pure_insertion_into_empty() {
3112        let out = render_plain(None, Some(b"x\ny\n"));
3113        assert_eq!(out, b"@@ -0,0 +1,2 @@\n+x\n+y\n".to_vec());
3114    }
3115
3116    #[test]
3117    fn distant_changes_split_into_two_hunks() {
3118        let old: &[u8] = b"a\nb\nc\nd\ne\nf\ng\nh\ni\nj\n";
3119        let new: &[u8] = b"A\nb\nc\nd\ne\nf\ng\nh\ni\nJ\n";
3120        let out = render_plain(Some(old), Some(new));
3121        // Two changes 9 lines apart (> 2*3+1) produce two separate hunks.
3122        let text = String::from_utf8(out).expect("rendered output is valid UTF-8");
3123        assert_eq!(text.matches("@@ ").count(), 2, "expected two hunks: {text}");
3124    }
3125
3126    #[test]
3127    fn heading_callback_supplies_section() {
3128        // The change is far enough below `fn foo()` that the funcname line
3129        // precedes the hunk (the heading scan looks *above* the hunk's first
3130        // line, so a change touching line 1 would correctly find no heading).
3131        let old: &[u8] = b"fn foo() {\n    a\n    b\n    c\n    d\n    e\n    f\n    g\n}\n";
3132        let new: &[u8] = b"fn foo() {\n    a\n    b\n    c\n    d\n    CHANGED\n    f\n    g\n}\n";
3133        let mut out = Vec::new();
3134        // Classifier accepts any line whose first byte is an ASCII letter
3135        // (a crude def_ff stand-in for the test).
3136        let mut heading_fn = |line: &[u8]| -> Option<Vec<u8>> {
3137            if line.first().is_some_and(u8::is_ascii_alphabetic) {
3138                Some(line.strip_suffix(b"\n").unwrap_or(line).to_vec())
3139            } else {
3140                None
3141            }
3142        };
3143        let mut options = HunkRenderOptions {
3144            heading: Some(&mut heading_fn),
3145            ..Default::default()
3146        };
3147        render_hunks(&mut out, Some(old), Some(new), &mut options);
3148        let text = String::from_utf8(out).expect("rendered output is valid UTF-8");
3149        assert!(
3150            text.starts_with("@@ -3,7 +3,7 @@ fn foo() {\n"),
3151            "expected funcname heading: {text}",
3152        );
3153    }
3154
3155    fn render_cc(result: &[u8], parents: &[&[u8]], dense: bool) -> String {
3156        let mut out = Vec::new();
3157        let opts = CombinedRenderOptions {
3158            dense,
3159            ..Default::default()
3160        };
3161        render_combined_with(&mut out, result, parents, &opts);
3162        String::from_utf8(out).expect("combined output is valid UTF-8")
3163    }
3164
3165    #[test]
3166    fn combined_two_parent_dense_header_and_columns() {
3167        // A merge result that adds lines on top of two parents, the t4013
3168        // dir/sub shape: parent0 = "A\nB\nC\nD\nE\nF\n", parent1 = "A\nB\n1\n2\n",
3169        // result = "A\nB\nC\nD\nE\nF\n1\n2\n". git emits one combined hunk with
3170        // the `@@@ -1,6 -1,4 +1,8 @@@` header and two prefix columns.
3171        let p0 = b"A\nB\nC\nD\nE\nF\n";
3172        let p1 = b"A\nB\n1\n2\n";
3173        let result = b"A\nB\nC\nD\nE\nF\n1\n2\n";
3174        let text = render_cc(result, &[p0, p1], true);
3175        assert_eq!(
3176            text, "@@@ -1,6 -1,4 +1,8 @@@\n  A\n  B\n +C\n +D\n +E\n +F\n+ 1\n+ 2\n",
3177            "combined dense output:\n{text}",
3178        );
3179    }
3180
3181    #[test]
3182    fn combined_identical_to_one_parent_dense_drops_hunk() {
3183        // When the result is identical to one parent, the dense (`--cc`) filter
3184        // drops the hunk (the change is "interesting" only against the other
3185        // parent), so nothing is emitted; the non-dense (`-c`) form still shows
3186        // every parent.
3187        let p0 = b"x\ny\n";
3188        let p1 = b"x\nCHANGED\n";
3189        let result = b"x\ny\n"; // identical to p0
3190        assert_eq!(render_cc(result, &[p0, p1], true), "");
3191        // Non-dense still shows the hunk (differs from p1).
3192        assert!(render_cc(result, &[p0, p1], false).starts_with("@@@"));
3193    }
3194
3195    #[test]
3196    fn combined_reuse_identical_parents() {
3197        // Two parents with the identical blob must produce identical columns
3198        // (git's reuse_combine_diff path); the result adds a line relative to
3199        // both, so both columns carry `+`.
3200        let parent = b"a\nb\n";
3201        let result = b"a\nb\nc\n";
3202        let text = render_cc(result, &[parent, parent], true);
3203        assert_eq!(
3204            text, "@@@ -1,2 -1,2 +1,3 @@@\n  a\n  b\n++c\n",
3205            "reuse output:\n{text}",
3206        );
3207    }
3208}