travelagent 1.11.1

//! Overlay word-level diff emphasis on top of syntax-highlighted spans.
//!
//! GitHub-style rendering: when a diff line is both syntax-highlighted AND has
//! a word-diff partner, we want to KEEP the syntax colors (foreground) while
//! layering a darker tinted background + BOLD on the specific byte ranges that
//! the LCS algorithm flagged as changed.
//!
//! The inputs:
//! - `syntax_spans: &[(StyleHint, String)]` from `DiffLine::highlighted_spans`.
//!   Concatenating the span strings reproduces the line content exactly.
//! - `tokens: &[Token]` from `highlight_line_pair`. Concatenating
//!   `token.text` also reproduces the line content exactly. `token.highlight`
//!   flags the words/runs that the LCS deemed unique to this side.
//!
//! The output is a flat sequence of `(text, style_hint, is_changed)` chunks
//! such that concatenating `text` reproduces the line. Each original syntax
//! span is split at word-diff boundaries; its StyleHint (and thus foreground
//! color) is preserved. Callers decide how to translate each chunk into a
//! `Span` — typically they layer `emphasis_bg` + `Modifier::BOLD` onto the
//! `is_changed = true` chunks.
//!
//! # UTF-8 safety
//!
//! Word-diff tokens are produced by iterating `char`s, so every token boundary
//! is a char boundary. The byte ranges this module derives from cumulative
//! token lengths are therefore all char-aligned. Splits of syntax spans land
//! exactly on those same boundaries, so every slice we take is valid UTF-8.
//! We assert `is_char_boundary` defensively when slicing the span text to
//! catch any future regression rather than panicking on a raw index.

use ratatui::style::{Color, Modifier, Style};
use ratatui::text::Span;
use travelagent_core::diff::Token;
use travelagent_core::style::StyleHint;
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};

use crate::ui::styles;

/// A chunk of rendered line content with the pieces the renderer needs to
/// build a `Span`.
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct OverlayChunk {
    /// Text of this chunk. A slice of the original line.
    pub text: String,
    /// Syntax style hint inherited from the underlying syntax span.
    pub hint: StyleHint,
    /// `true` when this chunk falls inside a word-diff "changed" run and
    /// should be rendered with the word-diff emphasis background + BOLD.
    pub is_changed: bool,
}

/// Build the sorted list of byte ranges that are flagged `highlight = true`
/// in the word-diff token stream. Adjacent highlighted tokens are merged so
/// callers get a minimal range set.
pub(crate) fn build_changed_ranges(tokens: &[Token]) -> Vec<std::ops::Range<usize>> {
    let mut out: Vec<std::ops::Range<usize>> = Vec::new();
    let mut offset: usize = 0;
    for tok in tokens {
        let len = tok.text.len();
        if tok.highlight && len > 0 {
            let start = offset;
            let end = offset + len;
            // Merge adjacent ranges so the renderer emits as few spans as possible.
            if let Some(last) = out.last_mut()
                && last.end == start
            {
                last.end = end;
            } else {
                out.push(start..end);
            }
        }
        offset += len;
    }
    out
}

/// Split a single syntax span into `(chunk_text, is_changed)` pieces by
/// intersecting its byte extent with `changed` ranges.
///
/// `span_byte_start` is the byte offset of this span inside the full line.
/// `span_text` is the span's text (its length defines the span's byte range).
/// The returned chunks, concatenated, reproduce `span_text`.
pub(crate) fn split_span_by_ranges(
    span_text: &str,
    span_byte_start: usize,
    changed: &[std::ops::Range<usize>],
) -> Vec<(String, bool)> {
    let span_len = span_text.len();
    if span_len == 0 {
        return vec![(String::new(), false)];
    }
    let span_end = span_byte_start + span_len;

    // Build a list of (relative_start, relative_end, is_changed) cuts within
    // this span, relative to `span_byte_start`. We walk the changed ranges
    // and emit interleaved unchanged/changed slices.
    let mut cuts: Vec<(usize, usize, bool)> = Vec::new();
    let mut cursor = span_byte_start;

    for range in changed {
        // Skip ranges that end before the span starts.
        if range.end <= span_byte_start {
            continue;
        }
        // Stop once a range starts after the span ends.
        if range.start >= span_end {
            break;
        }
        let overlap_start = range.start.max(span_byte_start);
        let overlap_end = range.end.min(span_end);
        if overlap_start > cursor {
            cuts.push((
                cursor - span_byte_start,
                overlap_start - span_byte_start,
                false,
            ));
        }
        cuts.push((
            overlap_start - span_byte_start,
            overlap_end - span_byte_start,
            true,
        ));
        cursor = overlap_end;
    }
    if cursor < span_end {
        cuts.push((cursor - span_byte_start, span_len, false));
    }

    // Convert to (String, bool). Assert char boundaries defensively.
    cuts.into_iter()
        .filter(|(s, e, _)| e > s)
        .map(|(s, e, changed)| {
            debug_assert!(
                span_text.is_char_boundary(s) && span_text.is_char_boundary(e),
                "word-diff overlay split {s}..{e} would slice a UTF-8 codepoint in {span_text:?}"
            );
            (span_text[s..e].to_string(), changed)
        })
        .collect()
}

/// Overlay word-diff emphasis on top of a syntax-highlighted line.
///
/// Returns a flat chunk list where each chunk carries its inherited syntax
/// `hint` and whether the word-diff LCS flagged it as changed. The caller
/// decides how to style each chunk (typically: translate `hint` to a base
/// `Style`, then layer `emphasis_bg` + `Modifier::BOLD` when `is_changed`).
pub(crate) fn overlay_word_diff_on_syntax(
    syntax_spans: &[(StyleHint, String)],
    tokens: &[Token],
) -> Vec<OverlayChunk> {
    let changed = build_changed_ranges(tokens);
    let mut out: Vec<OverlayChunk> = Vec::with_capacity(syntax_spans.len());
    let mut span_start: usize = 0;
    for (hint, text) in syntax_spans {
        let pieces = split_span_by_ranges(text, span_start, &changed);
        for (piece_text, is_changed) in pieces {
            out.push(OverlayChunk {
                text: piece_text,
                hint: *hint,
                is_changed,
            });
        }
        span_start += text.len();
    }
    out
}

/// Render a syntax-highlighted diff line with word-diff emphasis overlaid,
/// truncating or padding the result to exactly `width` display columns.
///
/// Mirrors the semantics of `app_layout::truncate_or_pad_spans` so the
/// side-by-side renderer can drop this in without changing column alignment:
/// overflow gets a trailing "..." styled with `pad_style`, and under-width
/// content gets padded with spaces (also `pad_style`). Each emitted span's
/// foreground inherits from the syntax hint; changed chunks additionally get
/// `emphasis_bg` + `Modifier::BOLD`. When `is_in_visual_selection` is true,
/// every span is patched with `visual_patch` (so the caller does not need to
/// loop over the returned spans).
#[allow(clippy::too_many_arguments)]
pub(crate) fn truncate_or_pad_overlay_spans(
    syntax_spans: &[(StyleHint, String)],
    tokens: &[Token],
    width: usize,
    pad_style: Style,
    emphasis_bg: Color,
    is_in_visual_selection: bool,
    visual_patch: Style,
) -> Vec<Span<'static>> {
    let chunks = overlay_word_diff_on_syntax(syntax_spans, tokens);
    let total_width: usize = chunks.iter().map(|c| c.text.width()).sum();

    let style_for = |chunk: &OverlayChunk| -> Style {
        let mut s = styles::style_hint_to_ratatui(chunk.hint);
        if chunk.is_changed {
            s = s.bg(emphasis_bg).add_modifier(Modifier::BOLD);
        }
        if is_in_visual_selection {
            s = s.patch(visual_patch);
        }
        s
    };

    let final_pad_style = if is_in_visual_selection {
        pad_style.patch(visual_patch)
    } else {
        pad_style
    };

    if total_width > width {
        let mut result: Vec<Span<'static>> = Vec::new();
        let mut remaining = width.saturating_sub(3);
        for chunk in &chunks {
            if remaining == 0 {
                break;
            }
            let text_width = chunk.text.width();
            let style = style_for(chunk);
            if text_width <= remaining {
                result.push(Span::styled(chunk.text.clone(), style));
                remaining -= text_width;
            } else {
                let mut truncated = String::new();
                let mut used = 0;
                for ch in chunk.text.chars() {
                    let cw = UnicodeWidthChar::width(ch).unwrap_or(0);
                    if used + cw > remaining {
                        break;
                    }
                    truncated.push(ch);
                    used += cw;
                }
                if !truncated.is_empty() {
                    result.push(Span::styled(truncated, style));
                }
                remaining = 0;
            }
        }
        result.push(Span::styled("...".to_string(), final_pad_style));
        result
    } else {
        let mut result: Vec<Span<'static>> = chunks
            .iter()
            .map(|c| Span::styled(c.text.clone(), style_for(c)))
            .collect();
        if total_width < width {
            result.push(Span::styled(
                " ".repeat(width - total_width),
                final_pad_style,
            ));
        }
        result
    }
}

#[cfg(test)]
#[allow(clippy::single_range_in_vec_init)]
mod tests {
    use super::*;
    use travelagent_core::diff::highlight_line_pair;
    use travelagent_core::style::{ColorHint, StyleHint};

    /// Helper: simulate a syntax-highlighted line that is one flat span.
    fn flat_spans(s: &str) -> Vec<(StyleHint, String)> {
        vec![(StyleHint::default(), s.to_string())]
    }

    /// Distinct hints so tests can assert that inheritance is preserved across
    /// splits without caring about exact colors.
    fn hint_a() -> StyleHint {
        StyleHint::with_fg(ColorHint::rgb(10, 20, 30))
    }

    fn hint_b() -> StyleHint {
        StyleHint::with_fg(ColorHint::rgb(40, 50, 60))
    }

    #[test]
    fn build_changed_ranges_merges_adjacent_highlighted_tokens() {
        // "foo bar baz" vs "foo XYZ baz": the middle word changes. Because
        // tokenize emits one token per word and per non-word char, "bar"
        // becomes a single highlighted token; the spaces on either side stay
        // shared. Ranges should cover exactly the "bar"/"XYZ" byte span.
        let (old, _new) = highlight_line_pair("foo bar baz", "foo XYZ baz");
        let ranges = build_changed_ranges(&old);
        assert_eq!(ranges.len(), 1);
        assert_eq!(&"foo bar baz"[ranges[0].clone()], "bar");
    }

    #[test]
    fn span_wholly_inside_changed_region_is_marked_changed() {
        // changed range covers the entire span
        let changed = vec![0..10];
        let pieces = split_span_by_ranges("hello_word", 0, &changed);
        assert_eq!(pieces.len(), 1);
        assert_eq!(pieces[0].0, "hello_word");
        assert!(pieces[0].1);
    }

    #[test]
    fn span_wholly_outside_changed_region_is_unchanged() {
        let changed = vec![20..30];
        let pieces = split_span_by_ranges("let x = 1;", 0, &changed);
        assert_eq!(pieces.len(), 1);
        assert_eq!(pieces[0].0, "let x = 1;");
        assert!(!pieces[0].1);
    }

    #[test]
    fn span_straddling_boundary_splits_into_two_subspans() {
        // The span "abcXYZ" starts at offset 0; the changed region is
        // [3..6] which covers exactly "XYZ". Expect two sub-pieces:
        // ("abc", false), ("XYZ", true).
        let changed = vec![3..6];
        let pieces = split_span_by_ranges("abcXYZ", 0, &changed);
        assert_eq!(pieces.len(), 2);
        assert_eq!(pieces[0], ("abc".to_string(), false));
        assert_eq!(pieces[1], ("XYZ".to_string(), true));
    }

    #[test]
    fn span_with_changed_in_middle_splits_into_three_subspans() {
        let changed = vec![2..4];
        let pieces = split_span_by_ranges("abXYef", 0, &changed);
        assert_eq!(pieces.len(), 3);
        assert_eq!(pieces[0], ("ab".to_string(), false));
        assert_eq!(pieces[1], ("XY".to_string(), true));
        assert_eq!(pieces[2], ("ef".to_string(), false));
    }

    #[test]
    fn overlay_handles_multiple_syntax_spans_with_split_on_one() {
        // Two syntax spans, with the changed range entirely inside the second
        // span. The first span stays a single unchanged chunk; the second
        // splits into unchanged/changed/unchanged.
        let a = hint_a();
        let b = hint_b();
        let spans = vec![
            (a, "let ".to_string()),     // 0..4
            (b, "foo = 1;".to_string()), // 4..12
        ];
        // "foo" is bytes 4..7
        let changed = vec![4..7];
        let mut out: Vec<(String, bool, StyleHint)> = Vec::new();
        let mut cursor = 0;
        for (hint, text) in &spans {
            for (piece, ch) in split_span_by_ranges(text, cursor, &changed) {
                out.push((piece, ch, *hint));
            }
            cursor += text.len();
        }
        // Reassembly reproduces the input.
        let joined: String = out.iter().map(|(t, _, _)| t.as_str()).collect();
        assert_eq!(joined, "let foo = 1;");
        // Expected shape: "let " (hint_a, unchanged), "foo" (hint_b, changed),
        // " = 1;" (hint_b, unchanged).
        assert_eq!(out[0], ("let ".to_string(), false, a));
        assert_eq!(out[1], ("foo".to_string(), true, b));
        assert_eq!(out[2], (" = 1;".to_string(), false, b));
    }

    #[test]
    fn overlay_word_diff_on_syntax_preserves_line_content() {
        // End-to-end: a two-span syntax line with a middle-of-the-line
        // word-diff change. After overlay, concatenating chunks must yield
        // the original line byte-for-byte.
        let spans = flat_spans("let foo = 1;");
        let (_, new_tokens) = highlight_line_pair("let bar = 1;", "let foo = 1;");
        let chunks = overlay_word_diff_on_syntax(&spans, &new_tokens);
        let joined: String = chunks.iter().map(|c| c.text.as_str()).collect();
        assert_eq!(joined, "let foo = 1;");
        // At least one chunk must be marked changed (covering "foo").
        assert!(chunks.iter().any(|c| c.is_changed));
        let changed: String = chunks
            .iter()
            .filter(|c| c.is_changed)
            .map(|c| c.text.as_str())
            .collect();
        assert!(
            changed.contains("foo"),
            "expected changed chunks to contain 'foo', got {changed:?}"
        );
    }

    #[test]
    fn overlay_word_diff_on_syntax_is_utf8_safe_for_multibyte() {
        // Multibyte CJK prefix shared across both sides; the trailing word
        // differs. Splitting must never slice a codepoint — splits are at
        // char boundaries by construction (tokenize walks chars).
        let old = "\u{c548}\u{b155} world";
        let new = "\u{c548}\u{b155} rust";
        let (_, new_tokens) = highlight_line_pair(old, new);
        let spans = flat_spans(new);
        let chunks = overlay_word_diff_on_syntax(&spans, &new_tokens);
        let joined: String = chunks.iter().map(|c| c.text.as_str()).collect();
        assert_eq!(joined, new);
        // "rust" must end up in a changed chunk.
        let changed: String = chunks
            .iter()
            .filter(|c| c.is_changed)
            .map(|c| c.text.as_str())
            .collect();
        assert!(
            changed.contains("rust"),
            "expected changed chunks to contain 'rust', got {changed:?}"
        );
    }

    #[test]
    fn overlay_empty_changed_ranges_emits_one_chunk_per_syntax_span() {
        // No word-diff changes: output is just the syntax spans, each as an
        // unchanged chunk.
        let spans = vec![
            (hint_a(), "let ".to_string()),
            (hint_b(), "x".to_string()),
            (StyleHint::default(), " = 1;".to_string()),
        ];
        let tokens: Vec<Token> = Vec::new();
        let chunks = overlay_word_diff_on_syntax(&spans, &tokens);
        assert_eq!(chunks.len(), 3);
        assert!(chunks.iter().all(|c| !c.is_changed));
        let joined: String = chunks.iter().map(|c| c.text.as_str()).collect();
        assert_eq!(joined, "let x = 1;");
    }
}