Skip to main content

fresh/view/ui/
view_pipeline.rs

1//! Token-based view rendering pipeline
2//!
3//! This module provides a clean pipeline for rendering view tokens:
4//!
5//! ```text
6//! source buffer
7//!     ↓ build_base_tokens()
8//! Vec<ViewTokenWire>  (base tokens with source mappings)
9//!     ↓ plugin transform (optional)
10//! Vec<ViewTokenWire>  (transformed tokens, may have injected content)
11//!     ↓ apply_wrapping() (optional)
12//! Vec<ViewTokenWire>  (with Break tokens for wrapped lines)
13//!     ↓ ViewLineIterator
14//! Iterator<ViewLine>  (one per display line, preserves token info)
15//!     ↓ render
16//! Display output
17//! ```
18//!
19//! The key design principle: preserve token-level information through the pipeline
20//! so rendering decisions (like line numbers) can be made based on token types,
21//! not reconstructed from flattened text.
22
23use crate::primitives::ansi::AnsiParser;
24use crate::primitives::display_width::str_width;
25use fresh_core::api::{ViewTokenStyle, ViewTokenWire, ViewTokenWireKind};
26use std::collections::HashSet;
27use std::ops::Range;
28use unicode_segmentation::UnicodeSegmentation;
29
30/// A display line built from tokens, preserving token-level information
31#[derive(Debug, Clone)]
32pub struct ViewLine {
33    /// The display text for this line (tabs expanded to spaces, etc.)
34    pub text: String,
35
36    /// Absolute source byte offset of the start of this line (if it has one)
37    pub source_start_byte: Option<usize>,
38
39    // === Per-CHARACTER mappings (indexed by char position in text) ===
40    /// Source byte offset for each character
41    /// Length == text.chars().count()
42    pub char_source_bytes: Vec<Option<usize>>,
43    /// Style for each character (from token styles)
44    pub char_styles: Vec<Option<ViewTokenStyle>>,
45    /// Visual column where each character starts
46    pub char_visual_cols: Vec<usize>,
47
48    // === Per-VISUAL-COLUMN mapping (indexed by visual column) ===
49    /// Character index at each visual column (for O(1) mouse clicks)
50    /// For double-width chars, consecutive visual columns map to the same char index
51    /// Length == total visual width of line
52    pub visual_to_char: Vec<usize>,
53
54    /// Positions that are the start of a tab expansion
55    pub tab_starts: HashSet<usize>,
56    /// How this line started (what kind of token/boundary preceded it)
57    pub line_start: LineStart,
58    /// Whether this line ends with a newline character
59    pub ends_with_newline: bool,
60    /// Gutter glyph to render in the line-number column. Only set on
61    /// the first visual row of a virtual line (`AfterInjectedNewline`)
62    /// whose source `VirtualText` carried a `gutter_glyph`. None on
63    /// source lines and on continuation rows of wrapped virtual
64    /// lines, so a multi-row deletion places a single "-" next to its
65    /// first row, not on every wrapped sub-row.
66    pub virtual_gutter_glyph: Option<(String, ratatui::style::Color)>,
67    /// Line-level style for plugin-injected virtual lines
68    /// (`AfterInjectedNewline`). Carries the `bg` the plugin asked for
69    /// even when `text` is empty, so the renderer's row-fill path can
70    /// stripe an empty deletion virtual line with the diff-remove bg
71    /// (it can't recover the bg from `char_styles.first()` when there
72    /// are no chars). `None` for source lines.
73    pub virtual_line_style: Option<ViewTokenStyle>,
74}
75
76impl ViewLine {
77    /// Get source byte at a given character index (O(1))
78    #[inline]
79    pub fn source_byte_at_char(&self, char_idx: usize) -> Option<usize> {
80        self.char_source_bytes.get(char_idx).copied().flatten()
81    }
82
83    /// Get character index at a given visual column (O(1))
84    #[inline]
85    pub fn char_at_visual_col(&self, visual_col: usize) -> usize {
86        self.visual_to_char
87            .get(visual_col)
88            .copied()
89            .unwrap_or_else(|| self.char_source_bytes.len().saturating_sub(1))
90    }
91
92    /// Get source byte at a given visual column (O(1) for mouse clicks)
93    #[inline]
94    pub fn source_byte_at_visual_col(&self, visual_col: usize) -> Option<usize> {
95        let char_idx = self.char_at_visual_col(visual_col);
96        self.source_byte_at_char(char_idx)
97    }
98
99    /// Get the visual column for a character at the given index
100    #[inline]
101    pub fn visual_col_at_char(&self, char_idx: usize) -> usize {
102        self.char_visual_cols.get(char_idx).copied().unwrap_or(0)
103    }
104
105    /// Total visual width of this line
106    #[inline]
107    pub fn visual_width(&self) -> usize {
108        self.visual_to_char.len()
109    }
110}
111
112/// What preceded the start of a display line
113#[derive(Debug, Clone, Copy, PartialEq, Eq)]
114pub enum LineStart {
115    /// First line of the view (no preceding token)
116    Beginning,
117    /// Line after a source Newline token (source_offset: Some)
118    AfterSourceNewline,
119    /// Line after an injected Newline token (source_offset: None)
120    AfterInjectedNewline,
121    /// Line after a Break token (wrapped continuation)
122    AfterBreak,
123}
124
125impl LineStart {
126    /// Should this line show a line number in the gutter?
127    ///
128    /// - Beginning: yes (first source line)
129    /// - AfterSourceNewline: yes (new source line)
130    /// - AfterInjectedNewline: depends on content (if injected, no; if source, yes)
131    /// - AfterBreak: no (wrapped continuation of same line)
132    pub fn is_continuation(&self) -> bool {
133        matches!(self, LineStart::AfterBreak)
134    }
135}
136
137/// Iterator that converts a token stream into display lines
138pub struct ViewLineIterator<'a> {
139    tokens: &'a [ViewTokenWire],
140    token_idx: usize,
141    /// How the next line should start (based on what ended the previous line)
142    next_line_start: LineStart,
143    /// Whether to render in binary mode (unprintable chars shown as code points)
144    binary_mode: bool,
145    /// Whether to parse ANSI escape sequences (giving them zero visual width)
146    ansi_aware: bool,
147    /// Tab width for rendering (number of spaces per tab)
148    tab_size: usize,
149    /// Whether the token stream covers the end of the buffer.
150    /// When true, a trailing empty line is emitted after a final source newline
151    /// (representing the empty line after a file's trailing '\n').
152    at_buffer_end: bool,
153    /// Sorted, non-overlapping source-byte ranges whose tokens should be
154    /// skipped at the source level (collapsed folds). Empty slice disables
155    /// skipping. Set via [`ViewLineIterator::with_fold_skip`].
156    fold_skip: &'a [Range<usize>],
157    /// Advances monotonically through `fold_skip` as token source offsets
158    /// advance. Lets the per-token skip check run in O(1) amortised.
159    fold_cursor: usize,
160}
161
162impl<'a> ViewLineIterator<'a> {
163    /// Create a new ViewLineIterator with all options
164    ///
165    /// - `tokens`: The token stream to convert to display lines
166    /// - `binary_mode`: Whether to render unprintable chars as code points
167    /// - `ansi_aware`: Whether to parse ANSI escape sequences (giving them zero visual width)
168    /// - `tab_size`: Tab width for rendering (number of spaces per tab, should be > 0)
169    /// - `at_buffer_end`: Whether the token stream covers the end of the buffer.
170    ///   When true, a trailing empty line is emitted after a final source newline.
171    ///
172    /// Note: If tab_size is 0, it will be treated as 4 (the default) to prevent division by zero.
173    /// This is a defensive measure to handle invalid configuration gracefully.
174    pub fn new(
175        tokens: &'a [ViewTokenWire],
176        binary_mode: bool,
177        ansi_aware: bool,
178        tab_size: usize,
179        at_buffer_end: bool,
180    ) -> Self {
181        // Defensive: treat 0 as 4 (default) to prevent division by zero in tab_expansion_width
182        // This can happen if invalid config (tab_size: 0) is loaded
183        let tab_size = if tab_size == 0 { 4 } else { tab_size };
184        Self {
185            tokens,
186            token_idx: 0,
187            next_line_start: LineStart::Beginning,
188            binary_mode,
189            ansi_aware,
190            tab_size,
191            at_buffer_end,
192            fold_skip: &[],
193            fold_cursor: 0,
194        }
195    }
196
197    /// Configure source-byte ranges to skip during iteration. `skip` must be
198    /// sorted by `start` ascending and non-overlapping; caller is responsible
199    /// (derived once per render from `FoldManager::resolved_ranges`). Tokens
200    /// whose `source_offset` lies inside a skip range are consumed without
201    /// contributing to a ViewLine, so folded content is never materialised.
202    pub fn with_fold_skip(mut self, skip: &'a [Range<usize>]) -> Self {
203        self.fold_skip = skip;
204        self.fold_cursor = 0;
205        self
206    }
207
208    /// Expand a tab to spaces based on current column and configured tab_size
209    #[inline]
210    fn tab_expansion_width(&self, col: usize) -> usize {
211        self.tab_size - (col % self.tab_size)
212    }
213
214    /// Advance past tokens whose `source_offset` is inside a fold skip range.
215    /// Monotonic in source offsets, so `fold_cursor` only moves forward.
216    /// Tokens with `source_offset == None` (injected / virtual) are never
217    /// skipped. Line-start transitions are NOT updated: the next emitted
218    /// ViewLine's `line_start` continues to reflect the *last emitted*
219    /// line's terminator (typically the fold header's source newline).
220    #[inline]
221    fn skip_folded_tokens(&mut self) {
222        while self.token_idx < self.tokens.len() {
223            let token = &self.tokens[self.token_idx];
224            let Some(offset) = token.source_offset else {
225                return;
226            };
227            while self.fold_cursor < self.fold_skip.len()
228                && self.fold_skip[self.fold_cursor].end <= offset
229            {
230                self.fold_cursor += 1;
231            }
232            let in_skip = self
233                .fold_skip
234                .get(self.fold_cursor)
235                .is_some_and(|r| r.start <= offset && offset < r.end);
236            if !in_skip {
237                return;
238            }
239            self.token_idx += 1;
240        }
241    }
242}
243
244/// Check if a byte is an unprintable control character that should be rendered as <XX>
245/// Returns true for control characters (0x00-0x1F, 0x7F) except tab and newline
246fn is_unprintable_byte(b: u8) -> bool {
247    // Only allow tab (0x09) and newline (0x0A) to render normally
248    // Everything else in control range should be shown as <XX>
249    if b == 0x09 || b == 0x0A {
250        return false;
251    }
252    // Control characters (0x00-0x1F) including CR, VT, FF, ESC are unprintable
253    if b < 0x20 {
254        return true;
255    }
256    // DEL character (0x7F) is also unprintable
257    if b == 0x7F {
258        return true;
259    }
260    false
261}
262
263/// Format an unprintable byte as a code point string like "<00>"
264fn format_unprintable_byte(b: u8) -> String {
265    format!("<{:02X}>", b)
266}
267
268impl<'a> Iterator for ViewLineIterator<'a> {
269    type Item = ViewLine;
270
271    fn next(&mut self) -> Option<Self::Item> {
272        // Fold skip: advance past any tokens whose source bytes live inside
273        // a collapsed fold range before inspecting the next visible token.
274        self.skip_folded_tokens();
275
276        if self.token_idx >= self.tokens.len() {
277            // All tokens consumed.  If the previous line ended with a source
278            // newline there is one more real (empty) document line to emit —
279            // e.g. the empty line after a file's trailing '\n'.  Produce it
280            // exactly once, then stop.  Only do this when the tokens cover
281            // the actual end of the buffer (not a viewport slice).
282            if self.at_buffer_end && matches!(self.next_line_start, LineStart::AfterSourceNewline) {
283                // Flip to Beginning so the *next* call returns None.
284                self.next_line_start = LineStart::Beginning;
285                let last_source_byte = self.tokens.last().and_then(|t| t.source_offset);
286                return Some(ViewLine {
287                    text: String::new(),
288                    source_start_byte: last_source_byte.map(|s| s + 1),
289                    char_source_bytes: vec![],
290                    char_styles: vec![],
291                    char_visual_cols: vec![],
292                    visual_to_char: vec![],
293                    tab_starts: HashSet::new(),
294                    line_start: LineStart::AfterSourceNewline,
295                    ends_with_newline: false,
296                    virtual_gutter_glyph: None,
297                    virtual_line_style: None,
298                });
299            }
300            return None;
301        }
302
303        let line_start = self.next_line_start;
304        let mut text = String::new();
305
306        // Per-character tracking (indexed by character position)
307        let mut char_source_bytes: Vec<Option<usize>> = Vec::new();
308        let mut char_styles: Vec<Option<ViewTokenStyle>> = Vec::new();
309        let mut char_visual_cols: Vec<usize> = Vec::new();
310
311        // Per-visual-column tracking (indexed by visual column)
312        let mut visual_to_char: Vec<usize> = Vec::new();
313
314        let mut tab_starts = HashSet::new();
315        let mut col = 0usize; // Current visual column
316        let mut ends_with_newline = false;
317
318        // ANSI parser for tracking escape sequences (reuse existing implementation)
319        let mut ansi_parser = if self.ansi_aware {
320            Some(AnsiParser::new())
321        } else {
322            None
323        };
324
325        /// Helper to add a character with all its mappings
326        macro_rules! add_char {
327            ($ch:expr, $source:expr, $style:expr, $width:expr) => {{
328                let char_idx = char_source_bytes.len();
329
330                // Per-character data
331                text.push($ch);
332                char_source_bytes.push($source);
333                char_styles.push($style);
334                char_visual_cols.push(col);
335
336                // Per-visual-column data (for O(1) mouse clicks).
337                // Note: $width is 0 for zero-width codepoints (combining
338                // marks, ZWJ, continuation codepoints within a grapheme
339                // cluster) — we deliberately emit no visual_to_char
340                // entries for them.
341                #[allow(clippy::reversed_empty_ranges)]
342                for _ in 0..$width {
343                    visual_to_char.push(char_idx);
344                }
345
346                col += $width;
347            }};
348        }
349
350        // Process tokens until we hit a line break
351        while self.token_idx < self.tokens.len() {
352            // Skip tokens that fall inside a collapsed fold before
353            // touching the current line's accumulators.
354            self.skip_folded_tokens();
355            if self.token_idx >= self.tokens.len() {
356                break;
357            }
358            let token = &self.tokens[self.token_idx];
359            let token_style = token.style.clone();
360
361            match &token.kind {
362                ViewTokenWireKind::Text(t) => {
363                    let base = token.source_offset;
364                    let t_bytes = t.as_bytes();
365                    let mut byte_idx = 0;
366
367                    while byte_idx < t_bytes.len() {
368                        let b = t_bytes[byte_idx];
369
370                        // In binary mode, render unprintable bytes as <XX> code points.
371                        // These are never part of a grapheme cluster.
372                        if self.binary_mode && is_unprintable_byte(b) {
373                            let source = base.map(|s| s + byte_idx);
374                            let formatted = format_unprintable_byte(b);
375                            for display_ch in formatted.chars() {
376                                add_char!(display_ch, source, token_style.clone(), 1);
377                            }
378                            byte_idx += 1;
379                            continue;
380                        }
381
382                        // Decode the largest valid UTF-8 slice starting here so we can
383                        // segment it into grapheme clusters. Any invalid byte is
384                        // handled as a single-byte replacement char and we resume
385                        // decoding afterwards.
386                        let remaining = &t_bytes[byte_idx..];
387                        let valid = match std::str::from_utf8(remaining) {
388                            Ok(s) => s,
389                            Err(e) => {
390                                let valid_up_to = e.valid_up_to();
391                                if valid_up_to == 0 {
392                                    let source = base.map(|s| s + byte_idx);
393                                    if self.binary_mode {
394                                        let formatted = format_unprintable_byte(b);
395                                        for display_ch in formatted.chars() {
396                                            add_char!(display_ch, source, token_style.clone(), 1);
397                                        }
398                                    } else {
399                                        add_char!('\u{FFFD}', source, token_style.clone(), 1);
400                                    }
401                                    byte_idx += 1;
402                                    continue;
403                                } else {
404                                    // SAFETY: `valid_up_to` is a char boundary.
405                                    unsafe {
406                                        std::str::from_utf8_unchecked(&remaining[..valid_up_to])
407                                    }
408                                }
409                            }
410                        };
411
412                        // Canonical Unicode handling: iterate grapheme clusters, not
413                        // codepoints. The width of a cluster is `str_width(cluster)` —
414                        // `unicode-width` 0.2 correctly returns 2 for ZWJ family emoji,
415                        // 1 for a base+combining sequence like "é", 2 for fullwidth
416                        // letters, and so on. This is the same width ratatui computes
417                        // when it re-segments the span, so every stage of the pipeline
418                        // (wrap, column tracking, span placement) agrees on how many
419                        // cells each cluster occupies.
420                        //
421                        // We still record per-codepoint entries in the char-indexed
422                        // arrays (char_source_bytes / char_styles / char_visual_cols)
423                        // so byte↔column mapping stays exact for LSP positions, mouse
424                        // clicks, and cursor arithmetic. But `col` advances exactly
425                        // once per grapheme: the first codepoint of a cluster carries
426                        // the full width, the rest carry 0.
427                        let mut segmented_bytes = 0usize;
428                        for (g_byte_offset, grapheme) in valid.grapheme_indices(true) {
429                            segmented_bytes = g_byte_offset + grapheme.len();
430
431                            // In binary mode, any ASCII unprintable byte inside the
432                            // decoded slice must still be rendered as `<XX>`. This
433                            // covers graphemes consisting entirely of one unprintable
434                            // byte (e.g. `\x1A`) and CRLF (`\r\n`) where only the
435                            // `\r` half is unprintable — we split those out.
436                            if self.binary_mode {
437                                let bytes = grapheme.as_bytes();
438                                let has_unprintable =
439                                    bytes.iter().any(|&b| b < 0x80 && is_unprintable_byte(b));
440                                if has_unprintable {
441                                    let mut inner = 0usize;
442                                    for ch in grapheme.chars() {
443                                        let ch_len = ch.len_utf8();
444                                        let src =
445                                            base.map(|s| s + byte_idx + g_byte_offset + inner);
446                                        let ch_byte = ch as u32;
447                                        if ch_byte < 0x80 && is_unprintable_byte(ch_byte as u8) {
448                                            let formatted = format_unprintable_byte(ch_byte as u8);
449                                            for display_ch in formatted.chars() {
450                                                add_char!(display_ch, src, token_style.clone(), 1);
451                                            }
452                                        } else {
453                                            add_char!(ch, src, token_style.clone(), 1);
454                                        }
455                                        inner += ch_len;
456                                    }
457                                    continue;
458                                }
459                            }
460
461                            // Tab: a single codepoint forming its own grapheme, expanded to spaces.
462                            if grapheme == "\t" {
463                                let source = base.map(|s| s + byte_idx + g_byte_offset);
464                                let tab_start_pos = char_source_bytes.len();
465                                tab_starts.insert(tab_start_pos);
466                                let spaces = self.tab_expansion_width(col);
467
468                                let char_idx = char_source_bytes.len();
469                                text.push(' ');
470                                char_source_bytes.push(source);
471                                char_styles.push(token_style.clone());
472                                char_visual_cols.push(col);
473
474                                for _ in 0..spaces {
475                                    visual_to_char.push(char_idx);
476                                }
477                                col += spaces;
478
479                                // Spaces 1..N of the tab expansion. The i-th
480                                // space sits at `col_before_tab + i`, where
481                                // `col_before_tab = col - spaces` (col was
482                                // already incremented above).
483                                for i in 1..spaces {
484                                    text.push(' ');
485                                    char_source_bytes.push(source);
486                                    char_styles.push(token_style.clone());
487                                    char_visual_cols.push(col - spaces + i);
488                                }
489                                continue;
490                            }
491
492                            // ANSI escape sequences. Process char-by-char so the
493                            // AnsiParser state machine keeps track of the escape,
494                            // and keep them as width 0. In practice ESC never sits
495                            // inside a grapheme with visible content, so treating
496                            // a grapheme that starts with ESC as width-0 here is
497                            // correct.
498                            if let Some(ref mut parser) = ansi_parser {
499                                let first_ch = grapheme.chars().next().unwrap_or('\0');
500                                if parser.parse_char(first_ch).is_none() {
501                                    for ch in grapheme.chars() {
502                                        // All codepoints of an escape grapheme are width 0.
503                                        let src = base.map(|s| s + byte_idx + g_byte_offset);
504                                        // Keep the parser fed so state transitions work
505                                        // even across a multi-codepoint escape (rare).
506                                        if ch != first_ch {
507                                            let _ = parser.parse_char(ch);
508                                        }
509                                        add_char!(ch, src, token_style.clone(), 0);
510                                    }
511                                    continue;
512                                }
513                            }
514
515                            // Normal case: emit one display unit per grapheme.
516                            // Width goes on the FIRST codepoint, the rest are 0.
517                            let cluster_width = str_width(grapheme);
518                            let mut first = true;
519                            let mut inner_byte_offset = 0usize;
520                            for ch in grapheme.chars() {
521                                let source =
522                                    base.map(|s| s + byte_idx + g_byte_offset + inner_byte_offset);
523                                let w = if first {
524                                    first = false;
525                                    cluster_width
526                                } else {
527                                    0
528                                };
529                                add_char!(ch, source, token_style.clone(), w);
530                                inner_byte_offset += ch.len_utf8();
531                            }
532                        }
533
534                        byte_idx += segmented_bytes.max(1);
535                    }
536                    self.token_idx += 1;
537                }
538                ViewTokenWireKind::Space => {
539                    add_char!(' ', token.source_offset, token_style, 1);
540                    self.token_idx += 1;
541                }
542                ViewTokenWireKind::Newline => {
543                    // Newline ends this line - width 1 for the newline char
544                    add_char!('\n', token.source_offset, token_style, 1);
545                    ends_with_newline = true;
546
547                    // Determine how the next line starts
548                    self.next_line_start = if token.source_offset.is_some() {
549                        LineStart::AfterSourceNewline
550                    } else {
551                        LineStart::AfterInjectedNewline
552                    };
553                    self.token_idx += 1;
554                    break;
555                }
556                ViewTokenWireKind::Break => {
557                    // Break is a synthetic line break from wrapping
558                    add_char!('\n', None, None, 1);
559                    ends_with_newline = true;
560
561                    self.next_line_start = LineStart::AfterBreak;
562                    self.token_idx += 1;
563                    break;
564                }
565                ViewTokenWireKind::BinaryByte(b) => {
566                    // Binary byte rendered as <XX> - all 4 chars map to same source byte
567                    let formatted = format_unprintable_byte(*b);
568                    for display_ch in formatted.chars() {
569                        add_char!(display_ch, token.source_offset, token_style.clone(), 1);
570                    }
571                    self.token_idx += 1;
572                }
573            }
574        }
575
576        // col's final value is intentionally unused (only needed during iteration)
577        let _ = col;
578
579        // If we consumed all remaining tokens without hitting a Newline or Break,
580        // the content didn't end with a line terminator.  Reset next_line_start
581        // so the trailing-empty-line logic (at the top of next()) doesn't
582        // incorrectly fire on the subsequent call.  The `ends_with_newline` flag
583        // tells us whether the loop exited via a Newline/Break (true) or by
584        // exhausting all tokens (false).
585        if !ends_with_newline && self.token_idx >= self.tokens.len() {
586            self.next_line_start = LineStart::Beginning;
587        }
588
589        // Don't return empty injected/virtual lines at the end of the token
590        // stream.  However, DO return a trailing empty line that follows a source
591        // newline — it represents a real document line (e.g. after a file's
592        // trailing '\n') and the cursor may sit on it — but only when
593        // at_buffer_end is set (otherwise this is just a viewport slice).
594        if text.is_empty()
595            && self.token_idx >= self.tokens.len()
596            && !(self.at_buffer_end && matches!(line_start, LineStart::AfterSourceNewline))
597        {
598            return None;
599        }
600
601        Some(ViewLine {
602            text,
603            source_start_byte: char_source_bytes.iter().find_map(|s| *s),
604            char_source_bytes,
605            char_styles,
606            char_visual_cols,
607            visual_to_char,
608            tab_starts,
609            line_start,
610            ends_with_newline,
611            virtual_gutter_glyph: None,
612            virtual_line_style: None,
613        })
614    }
615}
616
617/// Determine if a display line should show a line number
618///
619/// Rules:
620/// - Wrapped continuation (line_start == AfterBreak): no line number
621/// - Injected content (first char has source_offset: None): no line number
622/// - Empty line at beginning or after source newline: yes line number
623/// - Otherwise: show line number
624pub fn should_show_line_number(line: &ViewLine) -> bool {
625    // Wrapped continuations never show line numbers
626    if line.line_start.is_continuation() {
627        return false;
628    }
629
630    // Check if this line contains injected (non-source) content
631    // An empty line is NOT injected if it's at the beginning or after a source newline
632    if line.char_source_bytes.is_empty() {
633        // Empty line - show line number if it's at beginning or after source newline
634        // (not after injected newline or break)
635        return matches!(
636            line.line_start,
637            LineStart::Beginning | LineStart::AfterSourceNewline
638        );
639    }
640
641    let first_char_is_source = line
642        .char_source_bytes
643        .first()
644        .map(|m| m.is_some())
645        .unwrap_or(false);
646
647    if !first_char_is_source {
648        // Injected line (header, etc.) - no line number
649        return false;
650    }
651
652    // Source content after a real line break - show line number
653    true
654}
655
656// ============================================================================
657// Layout: The computed display state for a view
658// ============================================================================
659
660use std::collections::BTreeMap;
661
662/// The Layout represents the computed display state for a view.
663///
664/// This is **View state**, not Buffer state. Each split has its own Layout
665/// computed from its view_transform (or base tokens if no transform).
666///
667/// The Layout provides:
668/// - ViewLines for the current viewport region
669/// - Bidirectional mapping between source bytes and view positions
670/// - Scroll limit information
671#[derive(Debug, Clone)]
672pub struct Layout {
673    /// Display lines for the current viewport region
674    pub lines: Vec<ViewLine>,
675
676    /// Source byte range this layout covers
677    pub source_range: Range<usize>,
678
679    /// Total view lines in entire document (estimated or exact)
680    pub total_view_lines: usize,
681
682    /// Total injected lines in entire document (from view transform)
683    pub total_injected_lines: usize,
684
685    /// Fast lookup: source byte → view line index
686    byte_to_line: BTreeMap<usize, usize>,
687}
688
689impl Layout {
690    /// Create a new Layout from ViewLines
691    pub fn new(lines: Vec<ViewLine>, source_range: Range<usize>) -> Self {
692        let mut byte_to_line = BTreeMap::new();
693
694        // Build the byte→line index from char_source_bytes
695        for (line_idx, line) in lines.iter().enumerate() {
696            // Find the first source byte in this line
697            if let Some(first_byte) = line.char_source_bytes.iter().find_map(|m| *m) {
698                byte_to_line.insert(first_byte, line_idx);
699            }
700        }
701
702        // Estimate total view lines (for now, just use what we have)
703        let total_view_lines = lines.len();
704        let total_injected_lines = lines.iter().filter(|l| !should_show_line_number(l)).count();
705
706        Self {
707            lines,
708            source_range,
709            total_view_lines,
710            total_injected_lines,
711            byte_to_line,
712        }
713    }
714
715    /// Build a Layout from a token stream
716    pub fn from_tokens(
717        tokens: &[ViewTokenWire],
718        source_range: Range<usize>,
719        tab_size: usize,
720    ) -> Self {
721        let lines: Vec<ViewLine> =
722            ViewLineIterator::new(tokens, false, false, tab_size, false).collect();
723        Self::new(lines, source_range)
724    }
725
726    /// Find the view position (line, visual column) for a source byte
727    pub fn source_byte_to_view_position(&self, byte: usize) -> Option<(usize, usize)> {
728        // Find the view line containing this byte
729        if let Some((&_line_start_byte, &line_idx)) = self.byte_to_line.range(..=byte).last() {
730            if line_idx < self.lines.len() {
731                let line = &self.lines[line_idx];
732                // Find the character with this source byte, then get its visual column
733                for (char_idx, mapping) in line.char_source_bytes.iter().enumerate() {
734                    if *mapping == Some(byte) {
735                        return Some((line_idx, line.visual_col_at_char(char_idx)));
736                    }
737                }
738                // Byte is in this line's range but not at a character boundary
739                // Return end of line (visual width)
740                return Some((line_idx, line.visual_width()));
741            }
742        }
743        None
744    }
745
746    /// Find the source byte for a view position (line, visual column)
747    pub fn view_position_to_source_byte(&self, line_idx: usize, col: usize) -> Option<usize> {
748        if line_idx >= self.lines.len() {
749            return None;
750        }
751        let line = &self.lines[line_idx];
752        if col < line.visual_width() {
753            // Use O(1) lookup via visual_to_char -> char_source_bytes
754            line.source_byte_at_visual_col(col)
755        } else if !line.char_source_bytes.is_empty() {
756            // Past end of line, return last valid byte
757            line.char_source_bytes.iter().rev().find_map(|m| *m)
758        } else {
759            None
760        }
761    }
762
763    /// Get the source byte for the start of a view line
764    pub fn get_source_byte_for_line(&self, line_idx: usize) -> Option<usize> {
765        if line_idx >= self.lines.len() {
766            return None;
767        }
768        self.lines[line_idx]
769            .char_source_bytes
770            .iter()
771            .find_map(|m| *m)
772    }
773
774    /// Find the nearest view line for a source byte (for stabilization)
775    pub fn find_nearest_view_line(&self, byte: usize) -> usize {
776        if let Some((&_line_start_byte, &line_idx)) = self.byte_to_line.range(..=byte).last() {
777            line_idx.min(self.lines.len().saturating_sub(1))
778        } else {
779            0
780        }
781    }
782
783    /// Calculate the maximum top line for scrolling
784    pub fn max_top_line(&self, viewport_height: usize) -> usize {
785        self.lines.len().saturating_sub(viewport_height)
786    }
787
788    /// Check if there's content below the current layout
789    pub fn has_content_below(&self, buffer_len: usize) -> bool {
790        self.source_range.end < buffer_len
791    }
792}
793
794#[cfg(test)]
795mod tests {
796    use super::*;
797
798    fn make_text_token(text: &str, source_offset: Option<usize>) -> ViewTokenWire {
799        ViewTokenWire {
800            kind: ViewTokenWireKind::Text(text.to_string()),
801            source_offset,
802            style: None,
803        }
804    }
805
806    fn make_newline_token(source_offset: Option<usize>) -> ViewTokenWire {
807        ViewTokenWire {
808            kind: ViewTokenWireKind::Newline,
809            source_offset,
810            style: None,
811        }
812    }
813
814    fn make_break_token() -> ViewTokenWire {
815        ViewTokenWire {
816            kind: ViewTokenWireKind::Break,
817            source_offset: None,
818            style: None,
819        }
820    }
821
822    #[test]
823    fn test_simple_source_lines() {
824        let tokens = vec![
825            make_text_token("Line 1", Some(0)),
826            make_newline_token(Some(6)),
827            make_text_token("Line 2", Some(7)),
828            make_newline_token(Some(13)),
829        ];
830
831        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
832
833        assert_eq!(lines.len(), 2);
834        assert_eq!(lines[0].text, "Line 1\n");
835        assert_eq!(lines[0].line_start, LineStart::Beginning);
836        assert!(should_show_line_number(&lines[0]));
837
838        assert_eq!(lines[1].text, "Line 2\n");
839        assert_eq!(lines[1].line_start, LineStart::AfterSourceNewline);
840        assert!(should_show_line_number(&lines[1]));
841    }
842
843    #[test]
844    fn test_wrapped_continuation() {
845        let tokens = vec![
846            make_text_token("Line 1 start", Some(0)),
847            make_break_token(), // Wrapped
848            make_text_token("continued", Some(12)),
849            make_newline_token(Some(21)),
850        ];
851
852        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
853
854        assert_eq!(lines.len(), 2);
855        assert_eq!(lines[0].line_start, LineStart::Beginning);
856        assert!(should_show_line_number(&lines[0]));
857
858        assert_eq!(lines[1].line_start, LineStart::AfterBreak);
859        assert!(
860            !should_show_line_number(&lines[1]),
861            "Wrapped continuation should NOT show line number"
862        );
863    }
864
865    #[test]
866    fn test_injected_header_then_source() {
867        // This is the bug scenario: header (injected) followed by source content
868        let tokens = vec![
869            // Injected header
870            make_text_token("== HEADER ==", None),
871            make_newline_token(None),
872            // Source content
873            make_text_token("Line 1", Some(0)),
874            make_newline_token(Some(6)),
875        ];
876
877        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
878
879        assert_eq!(lines.len(), 2);
880
881        // Header line - no line number (injected content)
882        assert_eq!(lines[0].text, "== HEADER ==\n");
883        assert_eq!(lines[0].line_start, LineStart::Beginning);
884        assert!(
885            !should_show_line_number(&lines[0]),
886            "Injected header should NOT show line number"
887        );
888
889        // Source line after header - SHOULD show line number
890        assert_eq!(lines[1].text, "Line 1\n");
891        assert_eq!(lines[1].line_start, LineStart::AfterInjectedNewline);
892        assert!(
893            should_show_line_number(&lines[1]),
894            "BUG: Source line after injected header SHOULD show line number!\n\
895             line_start={:?}, first_char_is_source={}",
896            lines[1].line_start,
897            lines[1]
898                .char_source_bytes
899                .first()
900                .map(|m| m.is_some())
901                .unwrap_or(false)
902        );
903    }
904
905    #[test]
906    fn test_mixed_scenario() {
907        // Header -> Source Line 1 -> Source Line 2 (wrapped) -> Source Line 3
908        let tokens = vec![
909            // Injected header
910            make_text_token("== Block 1 ==", None),
911            make_newline_token(None),
912            // Source line 1
913            make_text_token("Line 1", Some(0)),
914            make_newline_token(Some(6)),
915            // Source line 2 (gets wrapped)
916            make_text_token("Line 2 start", Some(7)),
917            make_break_token(),
918            make_text_token("wrapped", Some(19)),
919            make_newline_token(Some(26)),
920            // Source line 3
921            make_text_token("Line 3", Some(27)),
922            make_newline_token(Some(33)),
923        ];
924
925        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
926
927        assert_eq!(lines.len(), 5);
928
929        // Header - no line number
930        assert!(!should_show_line_number(&lines[0]));
931
932        // Line 1 - yes line number (source after header)
933        assert!(should_show_line_number(&lines[1]));
934
935        // Line 2 start - yes line number
936        assert!(should_show_line_number(&lines[2]));
937
938        // Line 2 wrapped - no line number (continuation)
939        assert!(!should_show_line_number(&lines[3]));
940
941        // Line 3 - yes line number
942        assert!(should_show_line_number(&lines[4]));
943    }
944
945    #[test]
946    fn test_is_unprintable_byte() {
947        // Null byte is unprintable
948        assert!(is_unprintable_byte(0x00));
949
950        // Control characters 0x01-0x08 are unprintable
951        assert!(is_unprintable_byte(0x01));
952        assert!(is_unprintable_byte(0x02));
953        assert!(is_unprintable_byte(0x08));
954
955        // Tab (0x09) and LF (0x0A) are allowed
956        assert!(!is_unprintable_byte(0x09)); // tab
957        assert!(!is_unprintable_byte(0x0A)); // newline
958
959        // VT (0x0B), FF (0x0C), CR (0x0D) are unprintable in binary mode
960        assert!(is_unprintable_byte(0x0B)); // vertical tab
961        assert!(is_unprintable_byte(0x0C)); // form feed
962        assert!(is_unprintable_byte(0x0D)); // carriage return
963
964        // 0x0E-0x1F are all unprintable (including ESC)
965        assert!(is_unprintable_byte(0x0E));
966        assert!(is_unprintable_byte(0x1A)); // SUB - this is in PNG headers
967        assert!(is_unprintable_byte(0x1B)); // ESC
968        assert!(is_unprintable_byte(0x1C));
969        assert!(is_unprintable_byte(0x1F));
970
971        // Printable ASCII (0x20-0x7E) is allowed
972        assert!(!is_unprintable_byte(0x20)); // space
973        assert!(!is_unprintable_byte(0x41)); // 'A'
974        assert!(!is_unprintable_byte(0x7E)); // '~'
975
976        // DEL (0x7F) is unprintable
977        assert!(is_unprintable_byte(0x7F));
978
979        // High bytes (0x80+) are allowed (could be UTF-8)
980        assert!(!is_unprintable_byte(0x80));
981        assert!(!is_unprintable_byte(0xFF));
982    }
983
984    #[test]
985    fn test_format_unprintable_byte() {
986        assert_eq!(format_unprintable_byte(0x00), "<00>");
987        assert_eq!(format_unprintable_byte(0x01), "<01>");
988        assert_eq!(format_unprintable_byte(0x1A), "<1A>");
989        assert_eq!(format_unprintable_byte(0x7F), "<7F>");
990        assert_eq!(format_unprintable_byte(0xFF), "<FF>");
991    }
992
993    #[test]
994    fn test_binary_mode_renders_control_chars() {
995        // Text with null byte and control character
996        let tokens = vec![
997            ViewTokenWire {
998                kind: ViewTokenWireKind::Text("Hello\x00World\x01End".to_string()),
999                source_offset: Some(0),
1000                style: None,
1001            },
1002            make_newline_token(Some(15)),
1003        ];
1004
1005        // Without binary mode - control chars would be rendered raw or as replacement
1006        let lines_normal: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1007        assert_eq!(lines_normal.len(), 1);
1008        // In normal mode, we don't format control chars specially
1009
1010        // With binary mode - control chars should be formatted as <XX>
1011        let lines_binary: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
1012        assert_eq!(lines_binary.len(), 1);
1013        assert!(
1014            lines_binary[0].text.contains("<00>"),
1015            "Binary mode should format null byte as <00>, got: {}",
1016            lines_binary[0].text
1017        );
1018        assert!(
1019            lines_binary[0].text.contains("<01>"),
1020            "Binary mode should format 0x01 as <01>, got: {}",
1021            lines_binary[0].text
1022        );
1023    }
1024
1025    #[test]
1026    fn test_binary_mode_png_header() {
1027        // PNG-like content with SUB control char (0x1A)
1028        // Using valid UTF-8 string with embedded control character
1029        let png_like = "PNG\r\n\x1A\n";
1030        let tokens = vec![ViewTokenWire {
1031            kind: ViewTokenWireKind::Text(png_like.to_string()),
1032            source_offset: Some(0),
1033            style: None,
1034        }];
1035
1036        let lines: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
1037
1038        // Should have rendered the 0x1A as <1A>
1039        let combined: String = lines.iter().map(|l| l.text.as_str()).collect();
1040        assert!(
1041            combined.contains("<1A>"),
1042            "PNG SUB byte (0x1A) should be rendered as <1A>, got: {:?}",
1043            combined
1044        );
1045    }
1046
1047    #[test]
1048    fn test_binary_mode_preserves_printable_chars() {
1049        let tokens = vec![
1050            ViewTokenWire {
1051                kind: ViewTokenWireKind::Text("Normal text 123".to_string()),
1052                source_offset: Some(0),
1053                style: None,
1054            },
1055            make_newline_token(Some(15)),
1056        ];
1057
1058        let lines: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
1059        assert_eq!(lines.len(), 1);
1060        assert!(
1061            lines[0].text.contains("Normal text 123"),
1062            "Printable chars should be preserved in binary mode"
1063        );
1064    }
1065
1066    #[test]
1067    fn test_double_width_visual_mappings() {
1068        // "你好" - two Chinese characters, each 3 bytes and 2 columns wide
1069        // Byte layout: 你=bytes 0-2, 好=bytes 3-5
1070        // Visual layout: 你 takes columns 0-1, 好 takes columns 2-3
1071        let tokens = vec![
1072            make_text_token("你好", Some(0)),
1073            make_newline_token(Some(6)),
1074        ];
1075
1076        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1077        assert_eq!(lines.len(), 1);
1078
1079        // visual_to_char should have one entry per visual column
1080        // 你 = 2 columns, 好 = 2 columns, \n = 1 column = 5 total
1081        assert_eq!(
1082            lines[0].visual_width(),
1083            5,
1084            "Expected 5 visual columns (2 for 你 + 2 for 好 + 1 for newline), got {}",
1085            lines[0].visual_width()
1086        );
1087
1088        // char_source_bytes should have one entry per character
1089        // 3 characters: 你, 好, \n
1090        assert_eq!(
1091            lines[0].char_source_bytes.len(),
1092            3,
1093            "Expected 3 char entries (你, 好, newline), got {}",
1094            lines[0].char_source_bytes.len()
1095        );
1096
1097        // Both columns of 你 should map to byte 0 via O(1) lookup
1098        assert_eq!(
1099            lines[0].source_byte_at_visual_col(0),
1100            Some(0),
1101            "Column 0 should map to byte 0"
1102        );
1103        assert_eq!(
1104            lines[0].source_byte_at_visual_col(1),
1105            Some(0),
1106            "Column 1 should map to byte 0"
1107        );
1108
1109        // Both columns of 好 should map to byte 3
1110        assert_eq!(
1111            lines[0].source_byte_at_visual_col(2),
1112            Some(3),
1113            "Column 2 should map to byte 3"
1114        );
1115        assert_eq!(
1116            lines[0].source_byte_at_visual_col(3),
1117            Some(3),
1118            "Column 3 should map to byte 3"
1119        );
1120
1121        // Newline maps to byte 6
1122        assert_eq!(
1123            lines[0].source_byte_at_visual_col(4),
1124            Some(6),
1125            "Column 4 (newline) should map to byte 6"
1126        );
1127    }
1128
1129    #[test]
1130    fn test_mixed_width_visual_mappings() {
1131        // "a你b" - ASCII, Chinese (2 cols), ASCII
1132        // Byte layout: a=0, 你=1-3, b=4
1133        // Visual columns: a=0, 你=1-2, b=3
1134        let tokens = vec![
1135            make_text_token("a你b", Some(0)),
1136            make_newline_token(Some(5)),
1137        ];
1138
1139        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1140        assert_eq!(lines.len(), 1);
1141
1142        // a=1 col, 你=2 cols, b=1 col, \n=1 col = 5 total visual width
1143        assert_eq!(
1144            lines[0].visual_width(),
1145            5,
1146            "Expected 5 visual columns, got {}",
1147            lines[0].visual_width()
1148        );
1149
1150        // 4 characters: a, 你, b, \n
1151        assert_eq!(
1152            lines[0].char_source_bytes.len(),
1153            4,
1154            "Expected 4 char entries, got {}",
1155            lines[0].char_source_bytes.len()
1156        );
1157
1158        // Test O(1) visual column to byte lookup
1159        assert_eq!(
1160            lines[0].source_byte_at_visual_col(0),
1161            Some(0),
1162            "Column 0 (a) should map to byte 0"
1163        );
1164        assert_eq!(
1165            lines[0].source_byte_at_visual_col(1),
1166            Some(1),
1167            "Column 1 (你 col 1) should map to byte 1"
1168        );
1169        assert_eq!(
1170            lines[0].source_byte_at_visual_col(2),
1171            Some(1),
1172            "Column 2 (你 col 2) should map to byte 1"
1173        );
1174        assert_eq!(
1175            lines[0].source_byte_at_visual_col(3),
1176            Some(4),
1177            "Column 3 (b) should map to byte 4"
1178        );
1179        assert_eq!(
1180            lines[0].source_byte_at_visual_col(4),
1181            Some(5),
1182            "Column 4 (newline) should map to byte 5"
1183        );
1184    }
1185
1186    // ==================== CRLF Mode Tests ====================
1187
1188    /// Test that ViewLineIterator correctly maps char_source_bytes for CRLF content.
1189    /// In CRLF mode, the Newline token is emitted at the \r position, and \n is skipped.
1190    /// This test verifies that char_source_bytes correctly tracks source byte positions.
1191    #[test]
1192    fn test_crlf_char_source_bytes_single_line() {
1193        // Simulate CRLF content "abc\r\n" where:
1194        // - bytes: a=0, b=1, c=2, \r=3, \n=4
1195        // - Newline token at source_offset=3 (position of \r)
1196        let tokens = vec![
1197            make_text_token("abc", Some(0)),
1198            make_newline_token(Some(3)), // \r position in CRLF
1199        ];
1200
1201        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1202        assert_eq!(lines.len(), 1);
1203
1204        // The ViewLine should have: 'a', 'b', 'c', '\n'
1205        assert_eq!(lines[0].text, "abc\n");
1206
1207        // char_source_bytes should correctly map each display char to source bytes
1208        assert_eq!(
1209            lines[0].char_source_bytes.len(),
1210            4,
1211            "Expected 4 chars: a, b, c, newline"
1212        );
1213        assert_eq!(
1214            lines[0].char_source_bytes[0],
1215            Some(0),
1216            "char 'a' should map to byte 0"
1217        );
1218        assert_eq!(
1219            lines[0].char_source_bytes[1],
1220            Some(1),
1221            "char 'b' should map to byte 1"
1222        );
1223        assert_eq!(
1224            lines[0].char_source_bytes[2],
1225            Some(2),
1226            "char 'c' should map to byte 2"
1227        );
1228        assert_eq!(
1229            lines[0].char_source_bytes[3],
1230            Some(3),
1231            "newline should map to byte 3 (\\r position)"
1232        );
1233    }
1234
1235    /// Test CRLF char_source_bytes across multiple lines.
1236    /// This is the critical test for the accumulating offset bug.
1237    #[test]
1238    fn test_crlf_char_source_bytes_multiple_lines() {
1239        // Simulate CRLF content "abc\r\ndef\r\nghi\r\n" where:
1240        // Line 1: a=0, b=1, c=2, \r=3, \n=4 (5 bytes)
1241        // Line 2: d=5, e=6, f=7, \r=8, \n=9 (5 bytes)
1242        // Line 3: g=10, h=11, i=12, \r=13, \n=14 (5 bytes)
1243        let tokens = vec![
1244            // Line 1
1245            make_text_token("abc", Some(0)),
1246            make_newline_token(Some(3)), // \r at byte 3
1247            // Line 2
1248            make_text_token("def", Some(5)),
1249            make_newline_token(Some(8)), // \r at byte 8
1250            // Line 3
1251            make_text_token("ghi", Some(10)),
1252            make_newline_token(Some(13)), // \r at byte 13
1253        ];
1254
1255        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1256        assert_eq!(lines.len(), 3);
1257
1258        // Line 1 verification
1259        assert_eq!(lines[0].text, "abc\n");
1260        assert_eq!(
1261            lines[0].char_source_bytes,
1262            vec![Some(0), Some(1), Some(2), Some(3)],
1263            "Line 1 char_source_bytes mismatch"
1264        );
1265
1266        // Line 2 verification - THIS IS WHERE THE BUG WOULD MANIFEST
1267        // If there's an off-by-one per line, line 2 might have wrong offsets
1268        assert_eq!(lines[1].text, "def\n");
1269        assert_eq!(
1270            lines[1].char_source_bytes,
1271            vec![Some(5), Some(6), Some(7), Some(8)],
1272            "Line 2 char_source_bytes mismatch - possible CRLF offset drift"
1273        );
1274
1275        // Line 3 verification - error accumulates
1276        assert_eq!(lines[2].text, "ghi\n");
1277        assert_eq!(
1278            lines[2].char_source_bytes,
1279            vec![Some(10), Some(11), Some(12), Some(13)],
1280            "Line 3 char_source_bytes mismatch - CRLF offset drift accumulated"
1281        );
1282    }
1283
1284    /// Issue #1997: adjacent tab characters caused the indicator arrow to be
1285    /// rendered twice. Root cause: `char_visual_cols` entries for the 2nd..Nth
1286    /// expansion-space of every tab were one column too high, so the
1287    /// renderer's `col_offset` skipped column 1, hit `tab_starts` for the next
1288    /// tab one iteration early, and emitted "→" both for the trailing space
1289    /// of the previous tab and the leading space of the next.
1290    #[test]
1291    fn test_adjacent_tabs_visual_cols_monotonic() {
1292        // Two adjacent tabs at the start of a line with tab_size = 4.
1293        // Source bytes: \t=0, \t=1
1294        // Each tab expands to 4 spaces, so we expect 8 expansion chars at
1295        // visual columns 0,1,2,3,4,5,6,7 — exactly one column per char.
1296        let tokens = vec![
1297            make_text_token("\t\t", Some(0)),
1298            make_newline_token(Some(2)),
1299        ];
1300
1301        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1302        assert_eq!(lines.len(), 1);
1303
1304        // 8 spaces + 1 newline char
1305        assert_eq!(lines[0].char_visual_cols.len(), 9);
1306        assert_eq!(
1307            &lines[0].char_visual_cols[..8],
1308            &[0, 1, 2, 3, 4, 5, 6, 7],
1309            "Each expansion space must sit at its own visual column"
1310        );
1311
1312        // tab_starts records the char indices where each tab begins.
1313        // With both tabs at col 0 and col 4, the only valid tab-start char
1314        // indices are 0 and 4.
1315        let mut starts: Vec<usize> = lines[0].tab_starts.iter().copied().collect();
1316        starts.sort();
1317        assert_eq!(starts, vec![0, 4]);
1318    }
1319
1320    /// Test CRLF visual column to source byte mapping.
1321    /// Verifies source_byte_at_visual_col works correctly for CRLF content.
1322    #[test]
1323    fn test_crlf_visual_to_source_mapping() {
1324        // CRLF content "ab\r\ncd\r\n"
1325        // Line 1: a=0, b=1, \r=2, \n=3
1326        // Line 2: c=4, d=5, \r=6, \n=7
1327        let tokens = vec![
1328            make_text_token("ab", Some(0)),
1329            make_newline_token(Some(2)),
1330            make_text_token("cd", Some(4)),
1331            make_newline_token(Some(6)),
1332        ];
1333
1334        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1335
1336        // Line 1: visual columns 0,1 should map to bytes 0,1
1337        assert_eq!(
1338            lines[0].source_byte_at_visual_col(0),
1339            Some(0),
1340            "Line 1 col 0"
1341        );
1342        assert_eq!(
1343            lines[0].source_byte_at_visual_col(1),
1344            Some(1),
1345            "Line 1 col 1"
1346        );
1347        assert_eq!(
1348            lines[0].source_byte_at_visual_col(2),
1349            Some(2),
1350            "Line 1 col 2 (newline)"
1351        );
1352
1353        // Line 2: visual columns 0,1 should map to bytes 4,5
1354        assert_eq!(
1355            lines[1].source_byte_at_visual_col(0),
1356            Some(4),
1357            "Line 2 col 0"
1358        );
1359        assert_eq!(
1360            lines[1].source_byte_at_visual_col(1),
1361            Some(5),
1362            "Line 2 col 1"
1363        );
1364        assert_eq!(
1365            lines[1].source_byte_at_visual_col(2),
1366            Some(6),
1367            "Line 2 col 2 (newline)"
1368        );
1369    }
1370}