Skip to main content

fresh/view/ui/
view_pipeline.rs

1//! Token-based view rendering pipeline
2//!
3//! This module provides a clean pipeline for rendering view tokens:
4//!
5//! ```text
6//! source buffer
7//!     ↓ build_base_tokens()
8//! Vec<ViewTokenWire>  (base tokens with source mappings)
9//!     ↓ plugin transform (optional)
10//! Vec<ViewTokenWire>  (transformed tokens, may have injected content)
11//!     ↓ apply_wrapping() (optional)
12//! Vec<ViewTokenWire>  (with Break tokens for wrapped lines)
13//!     ↓ ViewLineIterator
14//! Iterator<ViewLine>  (one per display line, preserves token info)
15//!     ↓ render
16//! Display output
17//! ```
18//!
19//! The key design principle: preserve token-level information through the pipeline
20//! so rendering decisions (like line numbers) can be made based on token types,
21//! not reconstructed from flattened text.
22
23use crate::primitives::ansi::AnsiParser;
24use crate::primitives::display_width::str_width;
25use fresh_core::api::{ViewTokenStyle, ViewTokenWire, ViewTokenWireKind};
26use std::collections::HashSet;
27use std::ops::Range;
28use unicode_segmentation::UnicodeSegmentation;
29
30/// A display line built from tokens, preserving token-level information
31#[derive(Debug, Clone)]
32pub struct ViewLine {
33    /// The display text for this line (tabs expanded to spaces, etc.)
34    pub text: String,
35
36    /// Absolute source byte offset of the start of this line (if it has one)
37    pub source_start_byte: Option<usize>,
38
39    // === Per-CHARACTER mappings (indexed by char position in text) ===
40    /// Source byte offset for each character
41    /// Length == text.chars().count()
42    pub char_source_bytes: Vec<Option<usize>>,
43    /// Style for each character (from token styles)
44    pub char_styles: Vec<Option<ViewTokenStyle>>,
45    /// Visual column where each character starts
46    pub char_visual_cols: Vec<usize>,
47
48    // === Per-VISUAL-COLUMN mapping (indexed by visual column) ===
49    /// Character index at each visual column (for O(1) mouse clicks)
50    /// For double-width chars, consecutive visual columns map to the same char index
51    /// Length == total visual width of line
52    pub visual_to_char: Vec<usize>,
53
54    /// Positions that are the start of a tab expansion
55    pub tab_starts: HashSet<usize>,
56    /// How this line started (what kind of token/boundary preceded it)
57    pub line_start: LineStart,
58    /// Whether this line ends with a newline character
59    pub ends_with_newline: bool,
60    /// Gutter glyph to render in the line-number column. Only set on
61    /// the first visual row of a virtual line (`AfterInjectedNewline`)
62    /// whose source `VirtualText` carried a `gutter_glyph`. None on
63    /// source lines and on continuation rows of wrapped virtual
64    /// lines, so a multi-row deletion places a single "-" next to its
65    /// first row, not on every wrapped sub-row.
66    pub virtual_gutter_glyph: Option<(String, ratatui::style::Color)>,
67    /// Line-level style for plugin-injected virtual lines
68    /// (`AfterInjectedNewline`). Carries the `bg` the plugin asked for
69    /// even when `text` is empty, so the renderer's row-fill path can
70    /// stripe an empty deletion virtual line with the diff-remove bg
71    /// (it can't recover the bg from `char_styles.first()` when there
72    /// are no chars). `None` for source lines.
73    pub virtual_line_style: Option<ViewTokenStyle>,
74}
75
76impl ViewLine {
77    /// Get source byte at a given character index (O(1))
78    #[inline]
79    pub fn source_byte_at_char(&self, char_idx: usize) -> Option<usize> {
80        self.char_source_bytes.get(char_idx).copied().flatten()
81    }
82
83    /// Get character index at a given visual column (O(1))
84    #[inline]
85    pub fn char_at_visual_col(&self, visual_col: usize) -> usize {
86        self.visual_to_char
87            .get(visual_col)
88            .copied()
89            .unwrap_or_else(|| self.char_source_bytes.len().saturating_sub(1))
90    }
91
92    /// Get source byte at a given visual column (O(1) for mouse clicks)
93    #[inline]
94    pub fn source_byte_at_visual_col(&self, visual_col: usize) -> Option<usize> {
95        let char_idx = self.char_at_visual_col(visual_col);
96        self.source_byte_at_char(char_idx)
97    }
98
99    /// Get the visual column for a character at the given index
100    #[inline]
101    pub fn visual_col_at_char(&self, char_idx: usize) -> usize {
102        self.char_visual_cols.get(char_idx).copied().unwrap_or(0)
103    }
104
105    /// Total visual width of this line
106    #[inline]
107    pub fn visual_width(&self) -> usize {
108        self.visual_to_char.len()
109    }
110}
111
112/// What preceded the start of a display line
113#[derive(Debug, Clone, Copy, PartialEq, Eq)]
114pub enum LineStart {
115    /// First line of the view (no preceding token)
116    Beginning,
117    /// Line after a source Newline token (source_offset: Some)
118    AfterSourceNewline,
119    /// Line after an injected Newline token (source_offset: None)
120    AfterInjectedNewline,
121    /// Line after a Break token (wrapped continuation)
122    AfterBreak,
123}
124
125impl LineStart {
126    /// Should this line show a line number in the gutter?
127    ///
128    /// - Beginning: yes (first source line)
129    /// - AfterSourceNewline: yes (new source line)
130    /// - AfterInjectedNewline: depends on content (if injected, no; if source, yes)
131    /// - AfterBreak: no (wrapped continuation of same line)
132    pub fn is_continuation(&self) -> bool {
133        matches!(self, LineStart::AfterBreak)
134    }
135}
136
137/// Iterator that converts a token stream into display lines
138pub struct ViewLineIterator<'a> {
139    tokens: &'a [ViewTokenWire],
140    token_idx: usize,
141    /// How the next line should start (based on what ended the previous line)
142    next_line_start: LineStart,
143    /// Whether to render in binary mode (unprintable chars shown as code points)
144    binary_mode: bool,
145    /// Whether to parse ANSI escape sequences (giving them zero visual width)
146    ansi_aware: bool,
147    /// Tab width for rendering (number of spaces per tab)
148    tab_size: usize,
149    /// Whether the token stream covers the end of the buffer.
150    /// When true, a trailing empty line is emitted after a final source newline
151    /// (representing the empty line after a file's trailing '\n').
152    at_buffer_end: bool,
153    /// Sorted, non-overlapping source-byte ranges whose tokens should be
154    /// skipped at the source level (collapsed folds). Empty slice disables
155    /// skipping. Set via [`ViewLineIterator::with_fold_skip`].
156    fold_skip: &'a [Range<usize>],
157    /// Advances monotonically through `fold_skip` as token source offsets
158    /// advance. Lets the per-token skip check run in O(1) amortised.
159    fold_cursor: usize,
160}
161
162impl<'a> ViewLineIterator<'a> {
163    /// Create a new ViewLineIterator with all options
164    ///
165    /// - `tokens`: The token stream to convert to display lines
166    /// - `binary_mode`: Whether to render unprintable chars as code points
167    /// - `ansi_aware`: Whether to parse ANSI escape sequences (giving them zero visual width)
168    /// - `tab_size`: Tab width for rendering (number of spaces per tab, should be > 0)
169    /// - `at_buffer_end`: Whether the token stream covers the end of the buffer.
170    ///   When true, a trailing empty line is emitted after a final source newline.
171    ///
172    /// Note: If tab_size is 0, it will be treated as 4 (the default) to prevent division by zero.
173    /// This is a defensive measure to handle invalid configuration gracefully.
174    pub fn new(
175        tokens: &'a [ViewTokenWire],
176        binary_mode: bool,
177        ansi_aware: bool,
178        tab_size: usize,
179        at_buffer_end: bool,
180    ) -> Self {
181        // Defensive: treat 0 as 4 (default) to prevent division by zero in tab_expansion_width
182        // This can happen if invalid config (tab_size: 0) is loaded
183        let tab_size = if tab_size == 0 { 4 } else { tab_size };
184        Self {
185            tokens,
186            token_idx: 0,
187            next_line_start: LineStart::Beginning,
188            binary_mode,
189            ansi_aware,
190            tab_size,
191            at_buffer_end,
192            fold_skip: &[],
193            fold_cursor: 0,
194        }
195    }
196
197    /// Configure source-byte ranges to skip during iteration. `skip` must be
198    /// sorted by `start` ascending and non-overlapping; caller is responsible
199    /// (derived once per render from `FoldManager::resolved_ranges`). Tokens
200    /// whose `source_offset` lies inside a skip range are consumed without
201    /// contributing to a ViewLine, so folded content is never materialised.
202    pub fn with_fold_skip(mut self, skip: &'a [Range<usize>]) -> Self {
203        self.fold_skip = skip;
204        self.fold_cursor = 0;
205        self
206    }
207
208    /// Expand a tab to spaces based on current column and configured tab_size
209    #[inline]
210    fn tab_expansion_width(&self, col: usize) -> usize {
211        self.tab_size - (col % self.tab_size)
212    }
213
214    /// Advance past tokens whose `source_offset` is inside a fold skip range.
215    /// Monotonic in source offsets, so `fold_cursor` only moves forward.
216    /// Tokens with `source_offset == None` (injected / virtual) are never
217    /// skipped. Line-start transitions are NOT updated: the next emitted
218    /// ViewLine's `line_start` continues to reflect the *last emitted*
219    /// line's terminator (typically the fold header's source newline).
220    #[inline]
221    fn skip_folded_tokens(&mut self) {
222        while self.token_idx < self.tokens.len() {
223            let token = &self.tokens[self.token_idx];
224            let Some(offset) = token.source_offset else {
225                return;
226            };
227            while self.fold_cursor < self.fold_skip.len()
228                && self.fold_skip[self.fold_cursor].end <= offset
229            {
230                self.fold_cursor += 1;
231            }
232            let in_skip = self
233                .fold_skip
234                .get(self.fold_cursor)
235                .is_some_and(|r| r.start <= offset && offset < r.end);
236            if !in_skip {
237                return;
238            }
239            self.token_idx += 1;
240        }
241    }
242
243    /// Expand a single `Text` token into `acc`, handling UTF-8 decoding,
244    /// grapheme segmentation, tab expansion, ANSI escapes, and binary-mode
245    /// `<XX>` rendering. Works one display character at a time so the
246    /// byte↔column mappings stay exact.
247    fn push_text_token(
248        &self,
249        t: &str,
250        base: Option<usize>,
251        token_style: Option<ViewTokenStyle>,
252        acc: &mut LineAccumulator,
253        ansi_parser: &mut Option<AnsiParser>,
254    ) {
255        let t_bytes = t.as_bytes();
256        let mut byte_idx = 0;
257
258        while byte_idx < t_bytes.len() {
259            let b = t_bytes[byte_idx];
260
261            // In binary mode, render unprintable bytes as <XX> code points.
262            // These are never part of a grapheme cluster.
263            if self.binary_mode && is_unprintable_byte(b) {
264                acc.push_escape(
265                    &format_unprintable_byte(b),
266                    base.map(|s| s + byte_idx),
267                    token_style.clone(),
268                );
269                byte_idx += 1;
270                continue;
271            }
272
273            // Decode the largest valid UTF-8 slice starting here so we can
274            // segment it into grapheme clusters. Any invalid byte is
275            // handled as a single-byte replacement char and we resume
276            // decoding afterwards.
277            let remaining = &t_bytes[byte_idx..];
278            let valid = match std::str::from_utf8(remaining) {
279                Ok(s) => s,
280                Err(e) => {
281                    let valid_up_to = e.valid_up_to();
282                    if valid_up_to == 0 {
283                        let source = base.map(|s| s + byte_idx);
284                        if self.binary_mode {
285                            acc.push_escape(
286                                &format_unprintable_byte(b),
287                                source,
288                                token_style.clone(),
289                            );
290                        } else {
291                            acc.push_char('\u{FFFD}', source, token_style.clone(), 1);
292                        }
293                        byte_idx += 1;
294                        continue;
295                    } else {
296                        // SAFETY: `valid_up_to` is a char boundary.
297                        unsafe { std::str::from_utf8_unchecked(&remaining[..valid_up_to]) }
298                    }
299                }
300            };
301
302            // Canonical Unicode handling: iterate grapheme clusters, not
303            // codepoints. The width of a cluster is `str_width(cluster)` —
304            // `unicode-width` 0.2 correctly returns 2 for ZWJ family emoji,
305            // 1 for a base+combining sequence like "é", 2 for fullwidth
306            // letters, and so on. This is the same width ratatui computes
307            // when it re-segments the span, so every stage of the pipeline
308            // (wrap, column tracking, span placement) agrees on how many
309            // cells each cluster occupies.
310            //
311            // We still record per-codepoint entries in the char-indexed
312            // arrays (char_source_bytes / char_styles / char_visual_cols)
313            // so byte↔column mapping stays exact for LSP positions, mouse
314            // clicks, and cursor arithmetic. But `col` advances exactly
315            // once per grapheme: the first codepoint of a cluster carries
316            // the full width, the rest carry 0.
317            let mut segmented_bytes = 0usize;
318            for (g_byte_offset, grapheme) in valid.grapheme_indices(true) {
319                segmented_bytes = g_byte_offset + grapheme.len();
320
321                // In binary mode, any ASCII unprintable byte inside the
322                // decoded slice must still be rendered as `<XX>`. This
323                // covers graphemes consisting entirely of one unprintable
324                // byte (e.g. `\x1A`) and CRLF (`\r\n`) where only the
325                // `\r` half is unprintable — we split those out.
326                if self.binary_mode {
327                    let bytes = grapheme.as_bytes();
328                    let has_unprintable = bytes.iter().any(|&b| b < 0x80 && is_unprintable_byte(b));
329                    if has_unprintable {
330                        let mut inner = 0usize;
331                        for ch in grapheme.chars() {
332                            let ch_len = ch.len_utf8();
333                            let src = base.map(|s| s + byte_idx + g_byte_offset + inner);
334                            let ch_byte = ch as u32;
335                            if ch_byte < 0x80 && is_unprintable_byte(ch_byte as u8) {
336                                acc.push_escape(
337                                    &format_unprintable_byte(ch_byte as u8),
338                                    src,
339                                    token_style.clone(),
340                                );
341                            } else {
342                                acc.push_char(ch, src, token_style.clone(), 1);
343                            }
344                            inner += ch_len;
345                        }
346                        continue;
347                    }
348                }
349
350                // Tab: a single codepoint forming its own grapheme, expanded to spaces.
351                if grapheme == "\t" {
352                    let source = base.map(|s| s + byte_idx + g_byte_offset);
353                    acc.push_tab(
354                        source,
355                        token_style.clone(),
356                        self.tab_expansion_width(acc.col),
357                    );
358                    continue;
359                }
360
361                // ANSI escape sequences. Process char-by-char so the
362                // AnsiParser state machine keeps track of the escape,
363                // and keep them as width 0. In practice ESC never sits
364                // inside a grapheme with visible content, so treating
365                // a grapheme that starts with ESC as width-0 here is
366                // correct.
367                if let Some(parser) = ansi_parser.as_mut() {
368                    let first_ch = grapheme.chars().next().unwrap_or('\0');
369                    if parser.parse_char(first_ch).is_none() {
370                        for ch in grapheme.chars() {
371                            // All codepoints of an escape grapheme are width 0.
372                            let src = base.map(|s| s + byte_idx + g_byte_offset);
373                            // Keep the parser fed so state transitions work
374                            // even across a multi-codepoint escape (rare).
375                            if ch != first_ch {
376                                let _ = parser.parse_char(ch);
377                            }
378                            acc.push_char(ch, src, token_style.clone(), 0);
379                        }
380                        continue;
381                    }
382                }
383
384                // Normal case: emit one display unit per grapheme.
385                // Width goes on the FIRST codepoint, the rest are 0.
386                let cluster_width = str_width(grapheme);
387                let mut first = true;
388                let mut inner_byte_offset = 0usize;
389                for ch in grapheme.chars() {
390                    let source = base.map(|s| s + byte_idx + g_byte_offset + inner_byte_offset);
391                    let w = if first {
392                        first = false;
393                        cluster_width
394                    } else {
395                        0
396                    };
397                    acc.push_char(ch, source, token_style.clone(), w);
398                    inner_byte_offset += ch.len_utf8();
399                }
400            }
401
402            byte_idx += segmented_bytes.max(1);
403        }
404    }
405}
406
407/// Check if a byte is an unprintable control character that should be rendered as <XX>
408/// Returns true for control characters (0x00-0x1F, 0x7F) except tab and newline
409fn is_unprintable_byte(b: u8) -> bool {
410    // Only allow tab (0x09) and newline (0x0A) to render normally
411    // Everything else in control range should be shown as <XX>
412    if b == 0x09 || b == 0x0A {
413        return false;
414    }
415    // Control characters (0x00-0x1F) including CR, VT, FF, ESC are unprintable
416    if b < 0x20 {
417        return true;
418    }
419    // DEL character (0x7F) is also unprintable
420    if b == 0x7F {
421        return true;
422    }
423    false
424}
425
426/// Format an unprintable byte as a code point string like "<00>"
427fn format_unprintable_byte(b: u8) -> String {
428    format!("<{:02X}>", b)
429}
430
431/// Mutable per-line accumulator shared by all token-kind handlers. Owns the
432/// parallel character / visual-column mappings that a [`ViewLine`] exposes and
433/// grows them one display character at a time, keeping the byte↔column
434/// mappings exact for cursors, mouse clicks, and LSP positions.
435struct LineAccumulator {
436    text: String,
437    char_source_bytes: Vec<Option<usize>>,
438    char_styles: Vec<Option<ViewTokenStyle>>,
439    char_visual_cols: Vec<usize>,
440    visual_to_char: Vec<usize>,
441    tab_starts: HashSet<usize>,
442    /// Current visual column (advances by each character's display width).
443    col: usize,
444}
445
446impl LineAccumulator {
447    fn new() -> Self {
448        Self {
449            text: String::new(),
450            char_source_bytes: Vec::new(),
451            char_styles: Vec::new(),
452            char_visual_cols: Vec::new(),
453            visual_to_char: Vec::new(),
454            tab_starts: HashSet::new(),
455            col: 0,
456        }
457    }
458
459    /// Append one display character occupying `width` visual columns.
460    ///
461    /// `width` is 0 for zero-width codepoints (combining marks, ZWJ, the
462    /// continuation codepoints within a grapheme cluster, ANSI escapes) — we
463    /// deliberately emit no `visual_to_char` entries for them.
464    fn push_char(
465        &mut self,
466        ch: char,
467        source: Option<usize>,
468        style: Option<ViewTokenStyle>,
469        width: usize,
470    ) {
471        let char_idx = self.char_source_bytes.len();
472        self.text.push(ch);
473        self.char_source_bytes.push(source);
474        self.char_styles.push(style);
475        self.char_visual_cols.push(self.col);
476        for _ in 0..width {
477            self.visual_to_char.push(char_idx);
478        }
479        self.col += width;
480    }
481
482    /// Append each character of `s` as its own width-1 cell, all mapped to the
483    /// same `source` byte. Used to render `<XX>` escapes for unprintable bytes.
484    fn push_escape(&mut self, s: &str, source: Option<usize>, style: Option<ViewTokenStyle>) {
485        for ch in s.chars() {
486            self.push_char(ch, source, style.clone(), 1);
487        }
488    }
489
490    /// Expand a tab at the current column into `spaces` spaces. Every expanded
491    /// column maps back to the single source byte of the tab, and the first
492    /// space records the tab-start marker.
493    fn push_tab(&mut self, source: Option<usize>, style: Option<ViewTokenStyle>, spaces: usize) {
494        let char_idx = self.char_source_bytes.len();
495        self.tab_starts.insert(char_idx);
496
497        self.text.push(' ');
498        self.char_source_bytes.push(source);
499        self.char_styles.push(style.clone());
500        self.char_visual_cols.push(self.col);
501        for _ in 0..spaces {
502            self.visual_to_char.push(char_idx);
503        }
504        self.col += spaces;
505
506        // Spaces 1..N of the tab expansion. The i-th space sits at
507        // `col_before_tab + i`, where `col_before_tab = self.col - spaces`
508        // (`self.col` was already advanced above).
509        for i in 1..spaces {
510            self.text.push(' ');
511            self.char_source_bytes.push(source);
512            self.char_styles.push(style.clone());
513            self.char_visual_cols.push(self.col - spaces + i);
514        }
515    }
516
517    /// Finalize into a [`ViewLine`] with the given line metadata.
518    fn into_view_line(self, line_start: LineStart, ends_with_newline: bool) -> ViewLine {
519        let source_start_byte = self.char_source_bytes.iter().find_map(|s| *s);
520        ViewLine {
521            text: self.text,
522            source_start_byte,
523            char_source_bytes: self.char_source_bytes,
524            char_styles: self.char_styles,
525            char_visual_cols: self.char_visual_cols,
526            visual_to_char: self.visual_to_char,
527            tab_starts: self.tab_starts,
528            line_start,
529            ends_with_newline,
530            virtual_gutter_glyph: None,
531            virtual_line_style: None,
532        }
533    }
534}
535
536impl<'a> Iterator for ViewLineIterator<'a> {
537    type Item = ViewLine;
538
539    fn next(&mut self) -> Option<Self::Item> {
540        // Fold skip: advance past any tokens whose source bytes live inside
541        // a collapsed fold range before inspecting the next visible token.
542        self.skip_folded_tokens();
543
544        if self.token_idx >= self.tokens.len() {
545            // All tokens consumed.  If the previous line ended with a source
546            // newline there is one more real (empty) document line to emit —
547            // e.g. the empty line after a file's trailing '\n'.  Produce it
548            // exactly once, then stop.  Only do this when the tokens cover
549            // the actual end of the buffer (not a viewport slice).
550            if self.at_buffer_end && matches!(self.next_line_start, LineStart::AfterSourceNewline) {
551                // Flip to Beginning so the *next* call returns None.
552                self.next_line_start = LineStart::Beginning;
553                let last_source_byte = self.tokens.last().and_then(|t| t.source_offset);
554                return Some(ViewLine {
555                    text: String::new(),
556                    source_start_byte: last_source_byte.map(|s| s + 1),
557                    char_source_bytes: vec![],
558                    char_styles: vec![],
559                    char_visual_cols: vec![],
560                    visual_to_char: vec![],
561                    tab_starts: HashSet::new(),
562                    line_start: LineStart::AfterSourceNewline,
563                    ends_with_newline: false,
564                    virtual_gutter_glyph: None,
565                    virtual_line_style: None,
566                });
567            }
568            return None;
569        }
570
571        let line_start = self.next_line_start;
572        let mut acc = LineAccumulator::new();
573        let mut ends_with_newline = false;
574
575        // ANSI parser for tracking escape sequences (reuse existing implementation)
576        let mut ansi_parser = if self.ansi_aware {
577            Some(AnsiParser::new())
578        } else {
579            None
580        };
581
582        // Process tokens until we hit a line break
583        while self.token_idx < self.tokens.len() {
584            // Skip tokens that fall inside a collapsed fold before
585            // touching the current line's accumulators.
586            self.skip_folded_tokens();
587            if self.token_idx >= self.tokens.len() {
588                break;
589            }
590            let token = &self.tokens[self.token_idx];
591            let token_style = token.style.clone();
592
593            match &token.kind {
594                ViewTokenWireKind::Text(t) => {
595                    self.push_text_token(
596                        t,
597                        token.source_offset,
598                        token_style,
599                        &mut acc,
600                        &mut ansi_parser,
601                    );
602                    self.token_idx += 1;
603                }
604                ViewTokenWireKind::Space => {
605                    acc.push_char(' ', token.source_offset, token_style, 1);
606                    self.token_idx += 1;
607                }
608                ViewTokenWireKind::Newline => {
609                    // Newline ends this line - width 1 for the newline char
610                    acc.push_char('\n', token.source_offset, token_style, 1);
611                    ends_with_newline = true;
612
613                    // Determine how the next line starts
614                    self.next_line_start = if token.source_offset.is_some() {
615                        LineStart::AfterSourceNewline
616                    } else {
617                        LineStart::AfterInjectedNewline
618                    };
619                    self.token_idx += 1;
620                    break;
621                }
622                ViewTokenWireKind::Break => {
623                    // Break is a synthetic line break from wrapping
624                    acc.push_char('\n', None, None, 1);
625                    ends_with_newline = true;
626
627                    self.next_line_start = LineStart::AfterBreak;
628                    self.token_idx += 1;
629                    break;
630                }
631                ViewTokenWireKind::BinaryByte(b) => {
632                    // Binary byte rendered as <XX> - all chars map to same source byte
633                    acc.push_escape(
634                        &format_unprintable_byte(*b),
635                        token.source_offset,
636                        token_style,
637                    );
638                    self.token_idx += 1;
639                }
640            }
641        }
642
643        // If we consumed all remaining tokens without hitting a Newline or Break,
644        // the content didn't end with a line terminator.  Reset next_line_start
645        // so the trailing-empty-line logic (at the top of next()) doesn't
646        // incorrectly fire on the subsequent call.  The `ends_with_newline` flag
647        // tells us whether the loop exited via a Newline/Break (true) or by
648        // exhausting all tokens (false).
649        if !ends_with_newline && self.token_idx >= self.tokens.len() {
650            self.next_line_start = LineStart::Beginning;
651        }
652
653        // Don't return empty injected/virtual lines at the end of the token
654        // stream.  However, DO return a trailing empty line that follows a source
655        // newline — it represents a real document line (e.g. after a file's
656        // trailing '\n') and the cursor may sit on it — but only when
657        // at_buffer_end is set (otherwise this is just a viewport slice).
658        if acc.text.is_empty()
659            && self.token_idx >= self.tokens.len()
660            && !(self.at_buffer_end && matches!(line_start, LineStart::AfterSourceNewline))
661        {
662            return None;
663        }
664
665        Some(acc.into_view_line(line_start, ends_with_newline))
666    }
667}
668
669/// Determine if a display line should show a line number
670///
671/// Rules:
672/// - Wrapped continuation (line_start == AfterBreak): no line number
673/// - Injected content (first char has source_offset: None): no line number
674/// - Empty line at beginning or after source newline: yes line number
675/// - Otherwise: show line number
676pub fn should_show_line_number(line: &ViewLine) -> bool {
677    // Wrapped continuations never show line numbers
678    if line.line_start.is_continuation() {
679        return false;
680    }
681
682    // Check if this line contains injected (non-source) content
683    // An empty line is NOT injected if it's at the beginning or after a source newline
684    if line.char_source_bytes.is_empty() {
685        // Empty line - show line number if it's at beginning or after source newline
686        // (not after injected newline or break)
687        return matches!(
688            line.line_start,
689            LineStart::Beginning | LineStart::AfterSourceNewline
690        );
691    }
692
693    let first_char_is_source = line
694        .char_source_bytes
695        .first()
696        .map(|m| m.is_some())
697        .unwrap_or(false);
698
699    if !first_char_is_source {
700        // Injected line (header, etc.) - no line number
701        return false;
702    }
703
704    // Source content after a real line break - show line number
705    true
706}
707
708// ============================================================================
709// Layout: The computed display state for a view
710// ============================================================================
711
712use std::collections::BTreeMap;
713
714/// The Layout represents the computed display state for a view.
715///
716/// This is **View state**, not Buffer state. Each split has its own Layout
717/// computed from its view_transform (or base tokens if no transform).
718///
719/// The Layout provides:
720/// - ViewLines for the current viewport region
721/// - Bidirectional mapping between source bytes and view positions
722/// - Scroll limit information
723#[derive(Debug, Clone)]
724pub struct Layout {
725    /// Display lines for the current viewport region
726    pub lines: Vec<ViewLine>,
727
728    /// Source byte range this layout covers
729    pub source_range: Range<usize>,
730
731    /// Total view lines in entire document (estimated or exact)
732    pub total_view_lines: usize,
733
734    /// Total injected lines in entire document (from view transform)
735    pub total_injected_lines: usize,
736
737    /// Fast lookup: source byte → view line index
738    byte_to_line: BTreeMap<usize, usize>,
739}
740
741impl Layout {
742    /// Create a new Layout from ViewLines
743    pub fn new(lines: Vec<ViewLine>, source_range: Range<usize>) -> Self {
744        let mut byte_to_line = BTreeMap::new();
745
746        // Build the byte→line index from char_source_bytes
747        for (line_idx, line) in lines.iter().enumerate() {
748            // Find the first source byte in this line
749            if let Some(first_byte) = line.char_source_bytes.iter().find_map(|m| *m) {
750                byte_to_line.insert(first_byte, line_idx);
751            }
752        }
753
754        // Estimate total view lines (for now, just use what we have)
755        let total_view_lines = lines.len();
756        let total_injected_lines = lines.iter().filter(|l| !should_show_line_number(l)).count();
757
758        Self {
759            lines,
760            source_range,
761            total_view_lines,
762            total_injected_lines,
763            byte_to_line,
764        }
765    }
766
767    /// Build a Layout from a token stream
768    pub fn from_tokens(
769        tokens: &[ViewTokenWire],
770        source_range: Range<usize>,
771        tab_size: usize,
772    ) -> Self {
773        let lines: Vec<ViewLine> =
774            ViewLineIterator::new(tokens, false, false, tab_size, false).collect();
775        Self::new(lines, source_range)
776    }
777
778    /// Find the view position (line, visual column) for a source byte
779    pub fn source_byte_to_view_position(&self, byte: usize) -> Option<(usize, usize)> {
780        // Find the view line containing this byte
781        if let Some((&_line_start_byte, &line_idx)) = self.byte_to_line.range(..=byte).last() {
782            if line_idx < self.lines.len() {
783                let line = &self.lines[line_idx];
784                // Find the character with this source byte, then get its visual column
785                for (char_idx, mapping) in line.char_source_bytes.iter().enumerate() {
786                    if *mapping == Some(byte) {
787                        return Some((line_idx, line.visual_col_at_char(char_idx)));
788                    }
789                }
790                // Byte is in this line's range but not at a character boundary
791                // Return end of line (visual width)
792                return Some((line_idx, line.visual_width()));
793            }
794        }
795        None
796    }
797
798    /// Find the source byte for a view position (line, visual column)
799    pub fn view_position_to_source_byte(&self, line_idx: usize, col: usize) -> Option<usize> {
800        if line_idx >= self.lines.len() {
801            return None;
802        }
803        let line = &self.lines[line_idx];
804        if col < line.visual_width() {
805            // Use O(1) lookup via visual_to_char -> char_source_bytes
806            line.source_byte_at_visual_col(col)
807        } else if !line.char_source_bytes.is_empty() {
808            // Past end of line, return last valid byte
809            line.char_source_bytes.iter().rev().find_map(|m| *m)
810        } else {
811            None
812        }
813    }
814
815    /// Get the source byte for the start of a view line
816    pub fn get_source_byte_for_line(&self, line_idx: usize) -> Option<usize> {
817        if line_idx >= self.lines.len() {
818            return None;
819        }
820        self.lines[line_idx]
821            .char_source_bytes
822            .iter()
823            .find_map(|m| *m)
824    }
825
826    /// Find the nearest view line for a source byte (for stabilization)
827    pub fn find_nearest_view_line(&self, byte: usize) -> usize {
828        if let Some((&_line_start_byte, &line_idx)) = self.byte_to_line.range(..=byte).last() {
829            line_idx.min(self.lines.len().saturating_sub(1))
830        } else {
831            0
832        }
833    }
834
835    /// Calculate the maximum top line for scrolling
836    pub fn max_top_line(&self, viewport_height: usize) -> usize {
837        self.lines.len().saturating_sub(viewport_height)
838    }
839
840    /// Check if there's content below the current layout
841    pub fn has_content_below(&self, buffer_len: usize) -> bool {
842        self.source_range.end < buffer_len
843    }
844}
845
846#[cfg(test)]
847mod tests {
848    use super::*;
849
850    fn make_text_token(text: &str, source_offset: Option<usize>) -> ViewTokenWire {
851        ViewTokenWire {
852            kind: ViewTokenWireKind::Text(text.to_string()),
853            source_offset,
854            style: None,
855        }
856    }
857
858    fn make_newline_token(source_offset: Option<usize>) -> ViewTokenWire {
859        ViewTokenWire {
860            kind: ViewTokenWireKind::Newline,
861            source_offset,
862            style: None,
863        }
864    }
865
866    fn make_break_token() -> ViewTokenWire {
867        ViewTokenWire {
868            kind: ViewTokenWireKind::Break,
869            source_offset: None,
870            style: None,
871        }
872    }
873
874    #[test]
875    fn test_simple_source_lines() {
876        let tokens = vec![
877            make_text_token("Line 1", Some(0)),
878            make_newline_token(Some(6)),
879            make_text_token("Line 2", Some(7)),
880            make_newline_token(Some(13)),
881        ];
882
883        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
884
885        assert_eq!(lines.len(), 2);
886        assert_eq!(lines[0].text, "Line 1\n");
887        assert_eq!(lines[0].line_start, LineStart::Beginning);
888        assert!(should_show_line_number(&lines[0]));
889
890        assert_eq!(lines[1].text, "Line 2\n");
891        assert_eq!(lines[1].line_start, LineStart::AfterSourceNewline);
892        assert!(should_show_line_number(&lines[1]));
893    }
894
895    #[test]
896    fn test_wrapped_continuation() {
897        let tokens = vec![
898            make_text_token("Line 1 start", Some(0)),
899            make_break_token(), // Wrapped
900            make_text_token("continued", Some(12)),
901            make_newline_token(Some(21)),
902        ];
903
904        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
905
906        assert_eq!(lines.len(), 2);
907        assert_eq!(lines[0].line_start, LineStart::Beginning);
908        assert!(should_show_line_number(&lines[0]));
909
910        assert_eq!(lines[1].line_start, LineStart::AfterBreak);
911        assert!(
912            !should_show_line_number(&lines[1]),
913            "Wrapped continuation should NOT show line number"
914        );
915    }
916
917    #[test]
918    fn test_injected_header_then_source() {
919        // This is the bug scenario: header (injected) followed by source content
920        let tokens = vec![
921            // Injected header
922            make_text_token("== HEADER ==", None),
923            make_newline_token(None),
924            // Source content
925            make_text_token("Line 1", Some(0)),
926            make_newline_token(Some(6)),
927        ];
928
929        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
930
931        assert_eq!(lines.len(), 2);
932
933        // Header line - no line number (injected content)
934        assert_eq!(lines[0].text, "== HEADER ==\n");
935        assert_eq!(lines[0].line_start, LineStart::Beginning);
936        assert!(
937            !should_show_line_number(&lines[0]),
938            "Injected header should NOT show line number"
939        );
940
941        // Source line after header - SHOULD show line number
942        assert_eq!(lines[1].text, "Line 1\n");
943        assert_eq!(lines[1].line_start, LineStart::AfterInjectedNewline);
944        assert!(
945            should_show_line_number(&lines[1]),
946            "BUG: Source line after injected header SHOULD show line number!\n\
947             line_start={:?}, first_char_is_source={}",
948            lines[1].line_start,
949            lines[1]
950                .char_source_bytes
951                .first()
952                .map(|m| m.is_some())
953                .unwrap_or(false)
954        );
955    }
956
957    #[test]
958    fn test_mixed_scenario() {
959        // Header -> Source Line 1 -> Source Line 2 (wrapped) -> Source Line 3
960        let tokens = vec![
961            // Injected header
962            make_text_token("== Block 1 ==", None),
963            make_newline_token(None),
964            // Source line 1
965            make_text_token("Line 1", Some(0)),
966            make_newline_token(Some(6)),
967            // Source line 2 (gets wrapped)
968            make_text_token("Line 2 start", Some(7)),
969            make_break_token(),
970            make_text_token("wrapped", Some(19)),
971            make_newline_token(Some(26)),
972            // Source line 3
973            make_text_token("Line 3", Some(27)),
974            make_newline_token(Some(33)),
975        ];
976
977        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
978
979        assert_eq!(lines.len(), 5);
980
981        // Header - no line number
982        assert!(!should_show_line_number(&lines[0]));
983
984        // Line 1 - yes line number (source after header)
985        assert!(should_show_line_number(&lines[1]));
986
987        // Line 2 start - yes line number
988        assert!(should_show_line_number(&lines[2]));
989
990        // Line 2 wrapped - no line number (continuation)
991        assert!(!should_show_line_number(&lines[3]));
992
993        // Line 3 - yes line number
994        assert!(should_show_line_number(&lines[4]));
995    }
996
997    #[test]
998    fn test_is_unprintable_byte() {
999        // Null byte is unprintable
1000        assert!(is_unprintable_byte(0x00));
1001
1002        // Control characters 0x01-0x08 are unprintable
1003        assert!(is_unprintable_byte(0x01));
1004        assert!(is_unprintable_byte(0x02));
1005        assert!(is_unprintable_byte(0x08));
1006
1007        // Tab (0x09) and LF (0x0A) are allowed
1008        assert!(!is_unprintable_byte(0x09)); // tab
1009        assert!(!is_unprintable_byte(0x0A)); // newline
1010
1011        // VT (0x0B), FF (0x0C), CR (0x0D) are unprintable in binary mode
1012        assert!(is_unprintable_byte(0x0B)); // vertical tab
1013        assert!(is_unprintable_byte(0x0C)); // form feed
1014        assert!(is_unprintable_byte(0x0D)); // carriage return
1015
1016        // 0x0E-0x1F are all unprintable (including ESC)
1017        assert!(is_unprintable_byte(0x0E));
1018        assert!(is_unprintable_byte(0x1A)); // SUB - this is in PNG headers
1019        assert!(is_unprintable_byte(0x1B)); // ESC
1020        assert!(is_unprintable_byte(0x1C));
1021        assert!(is_unprintable_byte(0x1F));
1022
1023        // Printable ASCII (0x20-0x7E) is allowed
1024        assert!(!is_unprintable_byte(0x20)); // space
1025        assert!(!is_unprintable_byte(0x41)); // 'A'
1026        assert!(!is_unprintable_byte(0x7E)); // '~'
1027
1028        // DEL (0x7F) is unprintable
1029        assert!(is_unprintable_byte(0x7F));
1030
1031        // High bytes (0x80+) are allowed (could be UTF-8)
1032        assert!(!is_unprintable_byte(0x80));
1033        assert!(!is_unprintable_byte(0xFF));
1034    }
1035
1036    #[test]
1037    fn test_format_unprintable_byte() {
1038        assert_eq!(format_unprintable_byte(0x00), "<00>");
1039        assert_eq!(format_unprintable_byte(0x01), "<01>");
1040        assert_eq!(format_unprintable_byte(0x1A), "<1A>");
1041        assert_eq!(format_unprintable_byte(0x7F), "<7F>");
1042        assert_eq!(format_unprintable_byte(0xFF), "<FF>");
1043    }
1044
1045    #[test]
1046    fn test_binary_mode_renders_control_chars() {
1047        // Text with null byte and control character
1048        let tokens = vec![
1049            ViewTokenWire {
1050                kind: ViewTokenWireKind::Text("Hello\x00World\x01End".to_string()),
1051                source_offset: Some(0),
1052                style: None,
1053            },
1054            make_newline_token(Some(15)),
1055        ];
1056
1057        // Without binary mode - control chars would be rendered raw or as replacement
1058        let lines_normal: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1059        assert_eq!(lines_normal.len(), 1);
1060        // In normal mode, we don't format control chars specially
1061
1062        // With binary mode - control chars should be formatted as <XX>
1063        let lines_binary: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
1064        assert_eq!(lines_binary.len(), 1);
1065        assert!(
1066            lines_binary[0].text.contains("<00>"),
1067            "Binary mode should format null byte as <00>, got: {}",
1068            lines_binary[0].text
1069        );
1070        assert!(
1071            lines_binary[0].text.contains("<01>"),
1072            "Binary mode should format 0x01 as <01>, got: {}",
1073            lines_binary[0].text
1074        );
1075    }
1076
1077    #[test]
1078    fn test_binary_mode_png_header() {
1079        // PNG-like content with SUB control char (0x1A)
1080        // Using valid UTF-8 string with embedded control character
1081        let png_like = "PNG\r\n\x1A\n";
1082        let tokens = vec![ViewTokenWire {
1083            kind: ViewTokenWireKind::Text(png_like.to_string()),
1084            source_offset: Some(0),
1085            style: None,
1086        }];
1087
1088        let lines: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
1089
1090        // Should have rendered the 0x1A as <1A>
1091        let combined: String = lines.iter().map(|l| l.text.as_str()).collect();
1092        assert!(
1093            combined.contains("<1A>"),
1094            "PNG SUB byte (0x1A) should be rendered as <1A>, got: {:?}",
1095            combined
1096        );
1097    }
1098
1099    #[test]
1100    fn test_binary_mode_preserves_printable_chars() {
1101        let tokens = vec![
1102            ViewTokenWire {
1103                kind: ViewTokenWireKind::Text("Normal text 123".to_string()),
1104                source_offset: Some(0),
1105                style: None,
1106            },
1107            make_newline_token(Some(15)),
1108        ];
1109
1110        let lines: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
1111        assert_eq!(lines.len(), 1);
1112        assert!(
1113            lines[0].text.contains("Normal text 123"),
1114            "Printable chars should be preserved in binary mode"
1115        );
1116    }
1117
1118    #[test]
1119    fn test_double_width_visual_mappings() {
1120        // "你好" - two Chinese characters, each 3 bytes and 2 columns wide
1121        // Byte layout: 你=bytes 0-2, 好=bytes 3-5
1122        // Visual layout: 你 takes columns 0-1, 好 takes columns 2-3
1123        let tokens = vec![
1124            make_text_token("你好", Some(0)),
1125            make_newline_token(Some(6)),
1126        ];
1127
1128        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1129        assert_eq!(lines.len(), 1);
1130
1131        // visual_to_char should have one entry per visual column
1132        // 你 = 2 columns, 好 = 2 columns, \n = 1 column = 5 total
1133        assert_eq!(
1134            lines[0].visual_width(),
1135            5,
1136            "Expected 5 visual columns (2 for 你 + 2 for 好 + 1 for newline), got {}",
1137            lines[0].visual_width()
1138        );
1139
1140        // char_source_bytes should have one entry per character
1141        // 3 characters: 你, 好, \n
1142        assert_eq!(
1143            lines[0].char_source_bytes.len(),
1144            3,
1145            "Expected 3 char entries (你, 好, newline), got {}",
1146            lines[0].char_source_bytes.len()
1147        );
1148
1149        // Both columns of 你 should map to byte 0 via O(1) lookup
1150        assert_eq!(
1151            lines[0].source_byte_at_visual_col(0),
1152            Some(0),
1153            "Column 0 should map to byte 0"
1154        );
1155        assert_eq!(
1156            lines[0].source_byte_at_visual_col(1),
1157            Some(0),
1158            "Column 1 should map to byte 0"
1159        );
1160
1161        // Both columns of 好 should map to byte 3
1162        assert_eq!(
1163            lines[0].source_byte_at_visual_col(2),
1164            Some(3),
1165            "Column 2 should map to byte 3"
1166        );
1167        assert_eq!(
1168            lines[0].source_byte_at_visual_col(3),
1169            Some(3),
1170            "Column 3 should map to byte 3"
1171        );
1172
1173        // Newline maps to byte 6
1174        assert_eq!(
1175            lines[0].source_byte_at_visual_col(4),
1176            Some(6),
1177            "Column 4 (newline) should map to byte 6"
1178        );
1179    }
1180
1181    #[test]
1182    fn test_mixed_width_visual_mappings() {
1183        // "a你b" - ASCII, Chinese (2 cols), ASCII
1184        // Byte layout: a=0, 你=1-3, b=4
1185        // Visual columns: a=0, 你=1-2, b=3
1186        let tokens = vec![
1187            make_text_token("a你b", Some(0)),
1188            make_newline_token(Some(5)),
1189        ];
1190
1191        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1192        assert_eq!(lines.len(), 1);
1193
1194        // a=1 col, 你=2 cols, b=1 col, \n=1 col = 5 total visual width
1195        assert_eq!(
1196            lines[0].visual_width(),
1197            5,
1198            "Expected 5 visual columns, got {}",
1199            lines[0].visual_width()
1200        );
1201
1202        // 4 characters: a, 你, b, \n
1203        assert_eq!(
1204            lines[0].char_source_bytes.len(),
1205            4,
1206            "Expected 4 char entries, got {}",
1207            lines[0].char_source_bytes.len()
1208        );
1209
1210        // Test O(1) visual column to byte lookup
1211        assert_eq!(
1212            lines[0].source_byte_at_visual_col(0),
1213            Some(0),
1214            "Column 0 (a) should map to byte 0"
1215        );
1216        assert_eq!(
1217            lines[0].source_byte_at_visual_col(1),
1218            Some(1),
1219            "Column 1 (你 col 1) should map to byte 1"
1220        );
1221        assert_eq!(
1222            lines[0].source_byte_at_visual_col(2),
1223            Some(1),
1224            "Column 2 (你 col 2) should map to byte 1"
1225        );
1226        assert_eq!(
1227            lines[0].source_byte_at_visual_col(3),
1228            Some(4),
1229            "Column 3 (b) should map to byte 4"
1230        );
1231        assert_eq!(
1232            lines[0].source_byte_at_visual_col(4),
1233            Some(5),
1234            "Column 4 (newline) should map to byte 5"
1235        );
1236    }
1237
1238    // ==================== CRLF Mode Tests ====================
1239
1240    /// Test that ViewLineIterator correctly maps char_source_bytes for CRLF content.
1241    /// In CRLF mode, the Newline token is emitted at the \r position, and \n is skipped.
1242    /// This test verifies that char_source_bytes correctly tracks source byte positions.
1243    #[test]
1244    fn test_crlf_char_source_bytes_single_line() {
1245        // Simulate CRLF content "abc\r\n" where:
1246        // - bytes: a=0, b=1, c=2, \r=3, \n=4
1247        // - Newline token at source_offset=3 (position of \r)
1248        let tokens = vec![
1249            make_text_token("abc", Some(0)),
1250            make_newline_token(Some(3)), // \r position in CRLF
1251        ];
1252
1253        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1254        assert_eq!(lines.len(), 1);
1255
1256        // The ViewLine should have: 'a', 'b', 'c', '\n'
1257        assert_eq!(lines[0].text, "abc\n");
1258
1259        // char_source_bytes should correctly map each display char to source bytes
1260        assert_eq!(
1261            lines[0].char_source_bytes.len(),
1262            4,
1263            "Expected 4 chars: a, b, c, newline"
1264        );
1265        assert_eq!(
1266            lines[0].char_source_bytes[0],
1267            Some(0),
1268            "char 'a' should map to byte 0"
1269        );
1270        assert_eq!(
1271            lines[0].char_source_bytes[1],
1272            Some(1),
1273            "char 'b' should map to byte 1"
1274        );
1275        assert_eq!(
1276            lines[0].char_source_bytes[2],
1277            Some(2),
1278            "char 'c' should map to byte 2"
1279        );
1280        assert_eq!(
1281            lines[0].char_source_bytes[3],
1282            Some(3),
1283            "newline should map to byte 3 (\\r position)"
1284        );
1285    }
1286
1287    /// Test CRLF char_source_bytes across multiple lines.
1288    /// This is the critical test for the accumulating offset bug.
1289    #[test]
1290    fn test_crlf_char_source_bytes_multiple_lines() {
1291        // Simulate CRLF content "abc\r\ndef\r\nghi\r\n" where:
1292        // Line 1: a=0, b=1, c=2, \r=3, \n=4 (5 bytes)
1293        // Line 2: d=5, e=6, f=7, \r=8, \n=9 (5 bytes)
1294        // Line 3: g=10, h=11, i=12, \r=13, \n=14 (5 bytes)
1295        let tokens = vec![
1296            // Line 1
1297            make_text_token("abc", Some(0)),
1298            make_newline_token(Some(3)), // \r at byte 3
1299            // Line 2
1300            make_text_token("def", Some(5)),
1301            make_newline_token(Some(8)), // \r at byte 8
1302            // Line 3
1303            make_text_token("ghi", Some(10)),
1304            make_newline_token(Some(13)), // \r at byte 13
1305        ];
1306
1307        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1308        assert_eq!(lines.len(), 3);
1309
1310        // Line 1 verification
1311        assert_eq!(lines[0].text, "abc\n");
1312        assert_eq!(
1313            lines[0].char_source_bytes,
1314            vec![Some(0), Some(1), Some(2), Some(3)],
1315            "Line 1 char_source_bytes mismatch"
1316        );
1317
1318        // Line 2 verification - THIS IS WHERE THE BUG WOULD MANIFEST
1319        // If there's an off-by-one per line, line 2 might have wrong offsets
1320        assert_eq!(lines[1].text, "def\n");
1321        assert_eq!(
1322            lines[1].char_source_bytes,
1323            vec![Some(5), Some(6), Some(7), Some(8)],
1324            "Line 2 char_source_bytes mismatch - possible CRLF offset drift"
1325        );
1326
1327        // Line 3 verification - error accumulates
1328        assert_eq!(lines[2].text, "ghi\n");
1329        assert_eq!(
1330            lines[2].char_source_bytes,
1331            vec![Some(10), Some(11), Some(12), Some(13)],
1332            "Line 3 char_source_bytes mismatch - CRLF offset drift accumulated"
1333        );
1334    }
1335
1336    /// Issue #1997: adjacent tab characters caused the indicator arrow to be
1337    /// rendered twice. Root cause: `char_visual_cols` entries for the 2nd..Nth
1338    /// expansion-space of every tab were one column too high, so the
1339    /// renderer's `col_offset` skipped column 1, hit `tab_starts` for the next
1340    /// tab one iteration early, and emitted "→" both for the trailing space
1341    /// of the previous tab and the leading space of the next.
1342    #[test]
1343    fn test_adjacent_tabs_visual_cols_monotonic() {
1344        // Two adjacent tabs at the start of a line with tab_size = 4.
1345        // Source bytes: \t=0, \t=1
1346        // Each tab expands to 4 spaces, so we expect 8 expansion chars at
1347        // visual columns 0,1,2,3,4,5,6,7 — exactly one column per char.
1348        let tokens = vec![
1349            make_text_token("\t\t", Some(0)),
1350            make_newline_token(Some(2)),
1351        ];
1352
1353        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1354        assert_eq!(lines.len(), 1);
1355
1356        // 8 spaces + 1 newline char
1357        assert_eq!(lines[0].char_visual_cols.len(), 9);
1358        assert_eq!(
1359            &lines[0].char_visual_cols[..8],
1360            &[0, 1, 2, 3, 4, 5, 6, 7],
1361            "Each expansion space must sit at its own visual column"
1362        );
1363
1364        // tab_starts records the char indices where each tab begins.
1365        // With both tabs at col 0 and col 4, the only valid tab-start char
1366        // indices are 0 and 4.
1367        let mut starts: Vec<usize> = lines[0].tab_starts.iter().copied().collect();
1368        starts.sort();
1369        assert_eq!(starts, vec![0, 4]);
1370    }
1371
1372    /// Test CRLF visual column to source byte mapping.
1373    /// Verifies source_byte_at_visual_col works correctly for CRLF content.
1374    #[test]
1375    fn test_crlf_visual_to_source_mapping() {
1376        // CRLF content "ab\r\ncd\r\n"
1377        // Line 1: a=0, b=1, \r=2, \n=3
1378        // Line 2: c=4, d=5, \r=6, \n=7
1379        let tokens = vec![
1380            make_text_token("ab", Some(0)),
1381            make_newline_token(Some(2)),
1382            make_text_token("cd", Some(4)),
1383            make_newline_token(Some(6)),
1384        ];
1385
1386        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1387
1388        // Line 1: visual columns 0,1 should map to bytes 0,1
1389        assert_eq!(
1390            lines[0].source_byte_at_visual_col(0),
1391            Some(0),
1392            "Line 1 col 0"
1393        );
1394        assert_eq!(
1395            lines[0].source_byte_at_visual_col(1),
1396            Some(1),
1397            "Line 1 col 1"
1398        );
1399        assert_eq!(
1400            lines[0].source_byte_at_visual_col(2),
1401            Some(2),
1402            "Line 1 col 2 (newline)"
1403        );
1404
1405        // Line 2: visual columns 0,1 should map to bytes 4,5
1406        assert_eq!(
1407            lines[1].source_byte_at_visual_col(0),
1408            Some(4),
1409            "Line 2 col 0"
1410        );
1411        assert_eq!(
1412            lines[1].source_byte_at_visual_col(1),
1413            Some(5),
1414            "Line 2 col 1"
1415        );
1416        assert_eq!(
1417            lines[1].source_byte_at_visual_col(2),
1418            Some(6),
1419            "Line 2 col 2 (newline)"
1420        );
1421    }
1422}