fresh/view/ui/
view_pipeline.rs

1//! Token-based view rendering pipeline
2//!
3//! This module provides a clean pipeline for rendering view tokens:
4//!
5//! ```text
6//! source buffer
7//!     ↓ build_base_tokens()
8//! Vec<ViewTokenWire>  (base tokens with source mappings)
9//!     ↓ plugin transform (optional)
10//! Vec<ViewTokenWire>  (transformed tokens, may have injected content)
11//!     ↓ apply_wrapping() (optional)
12//! Vec<ViewTokenWire>  (with Break tokens for wrapped lines)
13//!     ↓ ViewLineIterator
14//! Iterator<ViewLine>  (one per display line, preserves token info)
15//!     ↓ render
16//! Display output
17//! ```
18//!
19//! The key design principle: preserve token-level information through the pipeline
20//! so rendering decisions (like line numbers) can be made based on token types,
21//! not reconstructed from flattened text.
22
23use crate::primitives::ansi::AnsiParser;
24use crate::primitives::display_width::str_width;
25use fresh_core::api::{ViewTokenStyle, ViewTokenWire, ViewTokenWireKind};
26use std::collections::HashSet;
27use std::ops::Range;
28use unicode_segmentation::UnicodeSegmentation;
29
30/// A display line built from tokens, preserving token-level information
31#[derive(Debug, Clone)]
32pub struct ViewLine {
33    /// The display text for this line (tabs expanded to spaces, etc.)
34    pub text: String,
35
36    /// Absolute source byte offset of the start of this line (if it has one)
37    pub source_start_byte: Option<usize>,
38
39    // === Per-CHARACTER mappings (indexed by char position in text) ===
40    /// Source byte offset for each character
41    /// Length == text.chars().count()
42    pub char_source_bytes: Vec<Option<usize>>,
43    /// Style for each character (from token styles)
44    pub char_styles: Vec<Option<ViewTokenStyle>>,
45    /// Visual column where each character starts
46    pub char_visual_cols: Vec<usize>,
47
48    // === Per-VISUAL-COLUMN mapping (indexed by visual column) ===
49    /// Character index at each visual column (for O(1) mouse clicks)
50    /// For double-width chars, consecutive visual columns map to the same char index
51    /// Length == total visual width of line
52    pub visual_to_char: Vec<usize>,
53
54    /// Positions that are the start of a tab expansion
55    pub tab_starts: HashSet<usize>,
56    /// How this line started (what kind of token/boundary preceded it)
57    pub line_start: LineStart,
58    /// Whether this line ends with a newline character
59    pub ends_with_newline: bool,
60}
61
62impl ViewLine {
63    /// Get source byte at a given character index (O(1))
64    #[inline]
65    pub fn source_byte_at_char(&self, char_idx: usize) -> Option<usize> {
66        self.char_source_bytes.get(char_idx).copied().flatten()
67    }
68
69    /// Get character index at a given visual column (O(1))
70    #[inline]
71    pub fn char_at_visual_col(&self, visual_col: usize) -> usize {
72        self.visual_to_char
73            .get(visual_col)
74            .copied()
75            .unwrap_or_else(|| self.char_source_bytes.len().saturating_sub(1))
76    }
77
78    /// Get source byte at a given visual column (O(1) for mouse clicks)
79    #[inline]
80    pub fn source_byte_at_visual_col(&self, visual_col: usize) -> Option<usize> {
81        let char_idx = self.char_at_visual_col(visual_col);
82        self.source_byte_at_char(char_idx)
83    }
84
85    /// Get the visual column for a character at the given index
86    #[inline]
87    pub fn visual_col_at_char(&self, char_idx: usize) -> usize {
88        self.char_visual_cols.get(char_idx).copied().unwrap_or(0)
89    }
90
91    /// Total visual width of this line
92    #[inline]
93    pub fn visual_width(&self) -> usize {
94        self.visual_to_char.len()
95    }
96}
97
98/// What preceded the start of a display line
99#[derive(Debug, Clone, Copy, PartialEq, Eq)]
100pub enum LineStart {
101    /// First line of the view (no preceding token)
102    Beginning,
103    /// Line after a source Newline token (source_offset: Some)
104    AfterSourceNewline,
105    /// Line after an injected Newline token (source_offset: None)
106    AfterInjectedNewline,
107    /// Line after a Break token (wrapped continuation)
108    AfterBreak,
109}
110
111impl LineStart {
112    /// Should this line show a line number in the gutter?
113    ///
114    /// - Beginning: yes (first source line)
115    /// - AfterSourceNewline: yes (new source line)
116    /// - AfterInjectedNewline: depends on content (if injected, no; if source, yes)
117    /// - AfterBreak: no (wrapped continuation of same line)
118    pub fn is_continuation(&self) -> bool {
119        matches!(self, LineStart::AfterBreak)
120    }
121}
122
123/// Iterator that converts a token stream into display lines
124pub struct ViewLineIterator<'a> {
125    tokens: &'a [ViewTokenWire],
126    token_idx: usize,
127    /// How the next line should start (based on what ended the previous line)
128    next_line_start: LineStart,
129    /// Whether to render in binary mode (unprintable chars shown as code points)
130    binary_mode: bool,
131    /// Whether to parse ANSI escape sequences (giving them zero visual width)
132    ansi_aware: bool,
133    /// Tab width for rendering (number of spaces per tab)
134    tab_size: usize,
135    /// Whether the token stream covers the end of the buffer.
136    /// When true, a trailing empty line is emitted after a final source newline
137    /// (representing the empty line after a file's trailing '\n').
138    at_buffer_end: bool,
139    /// Sorted, non-overlapping source-byte ranges whose tokens should be
140    /// skipped at the source level (collapsed folds). Empty slice disables
141    /// skipping. Set via [`ViewLineIterator::with_fold_skip`].
142    fold_skip: &'a [Range<usize>],
143    /// Advances monotonically through `fold_skip` as token source offsets
144    /// advance. Lets the per-token skip check run in O(1) amortised.
145    fold_cursor: usize,
146}
147
148impl<'a> ViewLineIterator<'a> {
149    /// Create a new ViewLineIterator with all options
150    ///
151    /// - `tokens`: The token stream to convert to display lines
152    /// - `binary_mode`: Whether to render unprintable chars as code points
153    /// - `ansi_aware`: Whether to parse ANSI escape sequences (giving them zero visual width)
154    /// - `tab_size`: Tab width for rendering (number of spaces per tab, should be > 0)
155    /// - `at_buffer_end`: Whether the token stream covers the end of the buffer.
156    ///   When true, a trailing empty line is emitted after a final source newline.
157    ///
158    /// Note: If tab_size is 0, it will be treated as 4 (the default) to prevent division by zero.
159    /// This is a defensive measure to handle invalid configuration gracefully.
160    pub fn new(
161        tokens: &'a [ViewTokenWire],
162        binary_mode: bool,
163        ansi_aware: bool,
164        tab_size: usize,
165        at_buffer_end: bool,
166    ) -> Self {
167        // Defensive: treat 0 as 4 (default) to prevent division by zero in tab_expansion_width
168        // This can happen if invalid config (tab_size: 0) is loaded
169        let tab_size = if tab_size == 0 { 4 } else { tab_size };
170        Self {
171            tokens,
172            token_idx: 0,
173            next_line_start: LineStart::Beginning,
174            binary_mode,
175            ansi_aware,
176            tab_size,
177            at_buffer_end,
178            fold_skip: &[],
179            fold_cursor: 0,
180        }
181    }
182
183    /// Configure source-byte ranges to skip during iteration. `skip` must be
184    /// sorted by `start` ascending and non-overlapping; caller is responsible
185    /// (derived once per render from `FoldManager::resolved_ranges`). Tokens
186    /// whose `source_offset` lies inside a skip range are consumed without
187    /// contributing to a ViewLine, so folded content is never materialised.
188    pub fn with_fold_skip(mut self, skip: &'a [Range<usize>]) -> Self {
189        self.fold_skip = skip;
190        self.fold_cursor = 0;
191        self
192    }
193
194    /// Expand a tab to spaces based on current column and configured tab_size
195    #[inline]
196    fn tab_expansion_width(&self, col: usize) -> usize {
197        self.tab_size - (col % self.tab_size)
198    }
199
200    /// Advance past tokens whose `source_offset` is inside a fold skip range.
201    /// Monotonic in source offsets, so `fold_cursor` only moves forward.
202    /// Tokens with `source_offset == None` (injected / virtual) are never
203    /// skipped. Line-start transitions are NOT updated: the next emitted
204    /// ViewLine's `line_start` continues to reflect the *last emitted*
205    /// line's terminator (typically the fold header's source newline).
206    #[inline]
207    fn skip_folded_tokens(&mut self) {
208        while self.token_idx < self.tokens.len() {
209            let token = &self.tokens[self.token_idx];
210            let Some(offset) = token.source_offset else {
211                return;
212            };
213            while self.fold_cursor < self.fold_skip.len()
214                && self.fold_skip[self.fold_cursor].end <= offset
215            {
216                self.fold_cursor += 1;
217            }
218            let in_skip = self
219                .fold_skip
220                .get(self.fold_cursor)
221                .is_some_and(|r| r.start <= offset && offset < r.end);
222            if !in_skip {
223                return;
224            }
225            self.token_idx += 1;
226        }
227    }
228}
229
230/// Check if a byte is an unprintable control character that should be rendered as <XX>
231/// Returns true for control characters (0x00-0x1F, 0x7F) except tab and newline
232fn is_unprintable_byte(b: u8) -> bool {
233    // Only allow tab (0x09) and newline (0x0A) to render normally
234    // Everything else in control range should be shown as <XX>
235    if b == 0x09 || b == 0x0A {
236        return false;
237    }
238    // Control characters (0x00-0x1F) including CR, VT, FF, ESC are unprintable
239    if b < 0x20 {
240        return true;
241    }
242    // DEL character (0x7F) is also unprintable
243    if b == 0x7F {
244        return true;
245    }
246    false
247}
248
249/// Format an unprintable byte as a code point string like "<00>"
250fn format_unprintable_byte(b: u8) -> String {
251    format!("<{:02X}>", b)
252}
253
254impl<'a> Iterator for ViewLineIterator<'a> {
255    type Item = ViewLine;
256
257    fn next(&mut self) -> Option<Self::Item> {
258        // Fold skip: advance past any tokens whose source bytes live inside
259        // a collapsed fold range before inspecting the next visible token.
260        self.skip_folded_tokens();
261
262        if self.token_idx >= self.tokens.len() {
263            // All tokens consumed.  If the previous line ended with a source
264            // newline there is one more real (empty) document line to emit —
265            // e.g. the empty line after a file's trailing '\n'.  Produce it
266            // exactly once, then stop.  Only do this when the tokens cover
267            // the actual end of the buffer (not a viewport slice).
268            if self.at_buffer_end && matches!(self.next_line_start, LineStart::AfterSourceNewline) {
269                // Flip to Beginning so the *next* call returns None.
270                self.next_line_start = LineStart::Beginning;
271                let last_source_byte = self.tokens.last().and_then(|t| t.source_offset);
272                return Some(ViewLine {
273                    text: String::new(),
274                    source_start_byte: last_source_byte.map(|s| s + 1),
275                    char_source_bytes: vec![],
276                    char_styles: vec![],
277                    char_visual_cols: vec![],
278                    visual_to_char: vec![],
279                    tab_starts: HashSet::new(),
280                    line_start: LineStart::AfterSourceNewline,
281                    ends_with_newline: false,
282                });
283            }
284            return None;
285        }
286
287        let line_start = self.next_line_start;
288        let mut text = String::new();
289
290        // Per-character tracking (indexed by character position)
291        let mut char_source_bytes: Vec<Option<usize>> = Vec::new();
292        let mut char_styles: Vec<Option<ViewTokenStyle>> = Vec::new();
293        let mut char_visual_cols: Vec<usize> = Vec::new();
294
295        // Per-visual-column tracking (indexed by visual column)
296        let mut visual_to_char: Vec<usize> = Vec::new();
297
298        let mut tab_starts = HashSet::new();
299        let mut col = 0usize; // Current visual column
300        let mut ends_with_newline = false;
301
302        // ANSI parser for tracking escape sequences (reuse existing implementation)
303        let mut ansi_parser = if self.ansi_aware {
304            Some(AnsiParser::new())
305        } else {
306            None
307        };
308
309        /// Helper to add a character with all its mappings
310        macro_rules! add_char {
311            ($ch:expr, $source:expr, $style:expr, $width:expr) => {{
312                let char_idx = char_source_bytes.len();
313
314                // Per-character data
315                text.push($ch);
316                char_source_bytes.push($source);
317                char_styles.push($style);
318                char_visual_cols.push(col);
319
320                // Per-visual-column data (for O(1) mouse clicks).
321                // Note: $width is 0 for zero-width codepoints (combining
322                // marks, ZWJ, continuation codepoints within a grapheme
323                // cluster) — we deliberately emit no visual_to_char
324                // entries for them.
325                #[allow(clippy::reversed_empty_ranges)]
326                for _ in 0..$width {
327                    visual_to_char.push(char_idx);
328                }
329
330                col += $width;
331            }};
332        }
333
334        // Process tokens until we hit a line break
335        while self.token_idx < self.tokens.len() {
336            // Skip tokens that fall inside a collapsed fold before
337            // touching the current line's accumulators.
338            self.skip_folded_tokens();
339            if self.token_idx >= self.tokens.len() {
340                break;
341            }
342            let token = &self.tokens[self.token_idx];
343            let token_style = token.style.clone();
344
345            match &token.kind {
346                ViewTokenWireKind::Text(t) => {
347                    let base = token.source_offset;
348                    let t_bytes = t.as_bytes();
349                    let mut byte_idx = 0;
350
351                    while byte_idx < t_bytes.len() {
352                        let b = t_bytes[byte_idx];
353
354                        // In binary mode, render unprintable bytes as <XX> code points.
355                        // These are never part of a grapheme cluster.
356                        if self.binary_mode && is_unprintable_byte(b) {
357                            let source = base.map(|s| s + byte_idx);
358                            let formatted = format_unprintable_byte(b);
359                            for display_ch in formatted.chars() {
360                                add_char!(display_ch, source, token_style.clone(), 1);
361                            }
362                            byte_idx += 1;
363                            continue;
364                        }
365
366                        // Decode the largest valid UTF-8 slice starting here so we can
367                        // segment it into grapheme clusters. Any invalid byte is
368                        // handled as a single-byte replacement char and we resume
369                        // decoding afterwards.
370                        let remaining = &t_bytes[byte_idx..];
371                        let valid = match std::str::from_utf8(remaining) {
372                            Ok(s) => s,
373                            Err(e) => {
374                                let valid_up_to = e.valid_up_to();
375                                if valid_up_to == 0 {
376                                    let source = base.map(|s| s + byte_idx);
377                                    if self.binary_mode {
378                                        let formatted = format_unprintable_byte(b);
379                                        for display_ch in formatted.chars() {
380                                            add_char!(display_ch, source, token_style.clone(), 1);
381                                        }
382                                    } else {
383                                        add_char!('\u{FFFD}', source, token_style.clone(), 1);
384                                    }
385                                    byte_idx += 1;
386                                    continue;
387                                } else {
388                                    // SAFETY: `valid_up_to` is a char boundary.
389                                    unsafe {
390                                        std::str::from_utf8_unchecked(&remaining[..valid_up_to])
391                                    }
392                                }
393                            }
394                        };
395
396                        // Canonical Unicode handling: iterate grapheme clusters, not
397                        // codepoints. The width of a cluster is `str_width(cluster)` —
398                        // `unicode-width` 0.2 correctly returns 2 for ZWJ family emoji,
399                        // 1 for a base+combining sequence like "é", 2 for fullwidth
400                        // letters, and so on. This is the same width ratatui computes
401                        // when it re-segments the span, so every stage of the pipeline
402                        // (wrap, column tracking, span placement) agrees on how many
403                        // cells each cluster occupies.
404                        //
405                        // We still record per-codepoint entries in the char-indexed
406                        // arrays (char_source_bytes / char_styles / char_visual_cols)
407                        // so byte↔column mapping stays exact for LSP positions, mouse
408                        // clicks, and cursor arithmetic. But `col` advances exactly
409                        // once per grapheme: the first codepoint of a cluster carries
410                        // the full width, the rest carry 0.
411                        let mut segmented_bytes = 0usize;
412                        for (g_byte_offset, grapheme) in valid.grapheme_indices(true) {
413                            segmented_bytes = g_byte_offset + grapheme.len();
414
415                            // In binary mode, any ASCII unprintable byte inside the
416                            // decoded slice must still be rendered as `<XX>`. This
417                            // covers graphemes consisting entirely of one unprintable
418                            // byte (e.g. `\x1A`) and CRLF (`\r\n`) where only the
419                            // `\r` half is unprintable — we split those out.
420                            if self.binary_mode {
421                                let bytes = grapheme.as_bytes();
422                                let has_unprintable =
423                                    bytes.iter().any(|&b| b < 0x80 && is_unprintable_byte(b));
424                                if has_unprintable {
425                                    let mut inner = 0usize;
426                                    for ch in grapheme.chars() {
427                                        let ch_len = ch.len_utf8();
428                                        let src =
429                                            base.map(|s| s + byte_idx + g_byte_offset + inner);
430                                        let ch_byte = ch as u32;
431                                        if ch_byte < 0x80 && is_unprintable_byte(ch_byte as u8) {
432                                            let formatted = format_unprintable_byte(ch_byte as u8);
433                                            for display_ch in formatted.chars() {
434                                                add_char!(display_ch, src, token_style.clone(), 1);
435                                            }
436                                        } else {
437                                            add_char!(ch, src, token_style.clone(), 1);
438                                        }
439                                        inner += ch_len;
440                                    }
441                                    continue;
442                                }
443                            }
444
445                            // Tab: a single codepoint forming its own grapheme, expanded to spaces.
446                            if grapheme == "\t" {
447                                let source = base.map(|s| s + byte_idx + g_byte_offset);
448                                let tab_start_pos = char_source_bytes.len();
449                                tab_starts.insert(tab_start_pos);
450                                let spaces = self.tab_expansion_width(col);
451
452                                let char_idx = char_source_bytes.len();
453                                text.push(' ');
454                                char_source_bytes.push(source);
455                                char_styles.push(token_style.clone());
456                                char_visual_cols.push(col);
457
458                                for _ in 0..spaces {
459                                    visual_to_char.push(char_idx);
460                                }
461                                col += spaces;
462
463                                for _ in 1..spaces {
464                                    text.push(' ');
465                                    char_source_bytes.push(source);
466                                    char_styles.push(token_style.clone());
467                                    char_visual_cols
468                                        .push(col - spaces + char_source_bytes.len() - char_idx);
469                                }
470                                continue;
471                            }
472
473                            // ANSI escape sequences. Process char-by-char so the
474                            // AnsiParser state machine keeps track of the escape,
475                            // and keep them as width 0. In practice ESC never sits
476                            // inside a grapheme with visible content, so treating
477                            // a grapheme that starts with ESC as width-0 here is
478                            // correct.
479                            if let Some(ref mut parser) = ansi_parser {
480                                let first_ch = grapheme.chars().next().unwrap_or('\0');
481                                if parser.parse_char(first_ch).is_none() {
482                                    for ch in grapheme.chars() {
483                                        // All codepoints of an escape grapheme are width 0.
484                                        let src = base.map(|s| s + byte_idx + g_byte_offset);
485                                        // Keep the parser fed so state transitions work
486                                        // even across a multi-codepoint escape (rare).
487                                        if ch != first_ch {
488                                            let _ = parser.parse_char(ch);
489                                        }
490                                        add_char!(ch, src, token_style.clone(), 0);
491                                    }
492                                    continue;
493                                }
494                            }
495
496                            // Normal case: emit one display unit per grapheme.
497                            // Width goes on the FIRST codepoint, the rest are 0.
498                            let cluster_width = str_width(grapheme);
499                            let mut first = true;
500                            let mut inner_byte_offset = 0usize;
501                            for ch in grapheme.chars() {
502                                let source =
503                                    base.map(|s| s + byte_idx + g_byte_offset + inner_byte_offset);
504                                let w = if first {
505                                    first = false;
506                                    cluster_width
507                                } else {
508                                    0
509                                };
510                                add_char!(ch, source, token_style.clone(), w);
511                                inner_byte_offset += ch.len_utf8();
512                            }
513                        }
514
515                        byte_idx += segmented_bytes.max(1);
516                    }
517                    self.token_idx += 1;
518                }
519                ViewTokenWireKind::Space => {
520                    add_char!(' ', token.source_offset, token_style, 1);
521                    self.token_idx += 1;
522                }
523                ViewTokenWireKind::Newline => {
524                    // Newline ends this line - width 1 for the newline char
525                    add_char!('\n', token.source_offset, token_style, 1);
526                    ends_with_newline = true;
527
528                    // Determine how the next line starts
529                    self.next_line_start = if token.source_offset.is_some() {
530                        LineStart::AfterSourceNewline
531                    } else {
532                        LineStart::AfterInjectedNewline
533                    };
534                    self.token_idx += 1;
535                    break;
536                }
537                ViewTokenWireKind::Break => {
538                    // Break is a synthetic line break from wrapping
539                    add_char!('\n', None, None, 1);
540                    ends_with_newline = true;
541
542                    self.next_line_start = LineStart::AfterBreak;
543                    self.token_idx += 1;
544                    break;
545                }
546                ViewTokenWireKind::BinaryByte(b) => {
547                    // Binary byte rendered as <XX> - all 4 chars map to same source byte
548                    let formatted = format_unprintable_byte(*b);
549                    for display_ch in formatted.chars() {
550                        add_char!(display_ch, token.source_offset, token_style.clone(), 1);
551                    }
552                    self.token_idx += 1;
553                }
554            }
555        }
556
557        // col's final value is intentionally unused (only needed during iteration)
558        let _ = col;
559
560        // If we consumed all remaining tokens without hitting a Newline or Break,
561        // the content didn't end with a line terminator.  Reset next_line_start
562        // so the trailing-empty-line logic (at the top of next()) doesn't
563        // incorrectly fire on the subsequent call.  The `ends_with_newline` flag
564        // tells us whether the loop exited via a Newline/Break (true) or by
565        // exhausting all tokens (false).
566        if !ends_with_newline && self.token_idx >= self.tokens.len() {
567            self.next_line_start = LineStart::Beginning;
568        }
569
570        // Don't return empty injected/virtual lines at the end of the token
571        // stream.  However, DO return a trailing empty line that follows a source
572        // newline — it represents a real document line (e.g. after a file's
573        // trailing '\n') and the cursor may sit on it — but only when
574        // at_buffer_end is set (otherwise this is just a viewport slice).
575        if text.is_empty()
576            && self.token_idx >= self.tokens.len()
577            && !(self.at_buffer_end && matches!(line_start, LineStart::AfterSourceNewline))
578        {
579            return None;
580        }
581
582        Some(ViewLine {
583            text,
584            source_start_byte: char_source_bytes.iter().find_map(|s| *s),
585            char_source_bytes,
586            char_styles,
587            char_visual_cols,
588            visual_to_char,
589            tab_starts,
590            line_start,
591            ends_with_newline,
592        })
593    }
594}
595
596/// Determine if a display line should show a line number
597///
598/// Rules:
599/// - Wrapped continuation (line_start == AfterBreak): no line number
600/// - Injected content (first char has source_offset: None): no line number
601/// - Empty line at beginning or after source newline: yes line number
602/// - Otherwise: show line number
603pub fn should_show_line_number(line: &ViewLine) -> bool {
604    // Wrapped continuations never show line numbers
605    if line.line_start.is_continuation() {
606        return false;
607    }
608
609    // Check if this line contains injected (non-source) content
610    // An empty line is NOT injected if it's at the beginning or after a source newline
611    if line.char_source_bytes.is_empty() {
612        // Empty line - show line number if it's at beginning or after source newline
613        // (not after injected newline or break)
614        return matches!(
615            line.line_start,
616            LineStart::Beginning | LineStart::AfterSourceNewline
617        );
618    }
619
620    let first_char_is_source = line
621        .char_source_bytes
622        .first()
623        .map(|m| m.is_some())
624        .unwrap_or(false);
625
626    if !first_char_is_source {
627        // Injected line (header, etc.) - no line number
628        return false;
629    }
630
631    // Source content after a real line break - show line number
632    true
633}
634
635// ============================================================================
636// Layout: The computed display state for a view
637// ============================================================================
638
639use std::collections::BTreeMap;
640
641/// The Layout represents the computed display state for a view.
642///
643/// This is **View state**, not Buffer state. Each split has its own Layout
644/// computed from its view_transform (or base tokens if no transform).
645///
646/// The Layout provides:
647/// - ViewLines for the current viewport region
648/// - Bidirectional mapping between source bytes and view positions
649/// - Scroll limit information
650#[derive(Debug, Clone)]
651pub struct Layout {
652    /// Display lines for the current viewport region
653    pub lines: Vec<ViewLine>,
654
655    /// Source byte range this layout covers
656    pub source_range: Range<usize>,
657
658    /// Total view lines in entire document (estimated or exact)
659    pub total_view_lines: usize,
660
661    /// Total injected lines in entire document (from view transform)
662    pub total_injected_lines: usize,
663
664    /// Fast lookup: source byte → view line index
665    byte_to_line: BTreeMap<usize, usize>,
666}
667
668impl Layout {
669    /// Create a new Layout from ViewLines
670    pub fn new(lines: Vec<ViewLine>, source_range: Range<usize>) -> Self {
671        let mut byte_to_line = BTreeMap::new();
672
673        // Build the byte→line index from char_source_bytes
674        for (line_idx, line) in lines.iter().enumerate() {
675            // Find the first source byte in this line
676            if let Some(first_byte) = line.char_source_bytes.iter().find_map(|m| *m) {
677                byte_to_line.insert(first_byte, line_idx);
678            }
679        }
680
681        // Estimate total view lines (for now, just use what we have)
682        let total_view_lines = lines.len();
683        let total_injected_lines = lines.iter().filter(|l| !should_show_line_number(l)).count();
684
685        Self {
686            lines,
687            source_range,
688            total_view_lines,
689            total_injected_lines,
690            byte_to_line,
691        }
692    }
693
694    /// Build a Layout from a token stream
695    pub fn from_tokens(
696        tokens: &[ViewTokenWire],
697        source_range: Range<usize>,
698        tab_size: usize,
699    ) -> Self {
700        let lines: Vec<ViewLine> =
701            ViewLineIterator::new(tokens, false, false, tab_size, false).collect();
702        Self::new(lines, source_range)
703    }
704
705    /// Find the view position (line, visual column) for a source byte
706    pub fn source_byte_to_view_position(&self, byte: usize) -> Option<(usize, usize)> {
707        // Find the view line containing this byte
708        if let Some((&_line_start_byte, &line_idx)) = self.byte_to_line.range(..=byte).last() {
709            if line_idx < self.lines.len() {
710                let line = &self.lines[line_idx];
711                // Find the character with this source byte, then get its visual column
712                for (char_idx, mapping) in line.char_source_bytes.iter().enumerate() {
713                    if *mapping == Some(byte) {
714                        return Some((line_idx, line.visual_col_at_char(char_idx)));
715                    }
716                }
717                // Byte is in this line's range but not at a character boundary
718                // Return end of line (visual width)
719                return Some((line_idx, line.visual_width()));
720            }
721        }
722        None
723    }
724
725    /// Find the source byte for a view position (line, visual column)
726    pub fn view_position_to_source_byte(&self, line_idx: usize, col: usize) -> Option<usize> {
727        if line_idx >= self.lines.len() {
728            return None;
729        }
730        let line = &self.lines[line_idx];
731        if col < line.visual_width() {
732            // Use O(1) lookup via visual_to_char -> char_source_bytes
733            line.source_byte_at_visual_col(col)
734        } else if !line.char_source_bytes.is_empty() {
735            // Past end of line, return last valid byte
736            line.char_source_bytes.iter().rev().find_map(|m| *m)
737        } else {
738            None
739        }
740    }
741
742    /// Get the source byte for the start of a view line
743    pub fn get_source_byte_for_line(&self, line_idx: usize) -> Option<usize> {
744        if line_idx >= self.lines.len() {
745            return None;
746        }
747        self.lines[line_idx]
748            .char_source_bytes
749            .iter()
750            .find_map(|m| *m)
751    }
752
753    /// Find the nearest view line for a source byte (for stabilization)
754    pub fn find_nearest_view_line(&self, byte: usize) -> usize {
755        if let Some((&_line_start_byte, &line_idx)) = self.byte_to_line.range(..=byte).last() {
756            line_idx.min(self.lines.len().saturating_sub(1))
757        } else {
758            0
759        }
760    }
761
762    /// Calculate the maximum top line for scrolling
763    pub fn max_top_line(&self, viewport_height: usize) -> usize {
764        self.lines.len().saturating_sub(viewport_height)
765    }
766
767    /// Check if there's content below the current layout
768    pub fn has_content_below(&self, buffer_len: usize) -> bool {
769        self.source_range.end < buffer_len
770    }
771}
772
773#[cfg(test)]
774mod tests {
775    use super::*;
776
777    fn make_text_token(text: &str, source_offset: Option<usize>) -> ViewTokenWire {
778        ViewTokenWire {
779            kind: ViewTokenWireKind::Text(text.to_string()),
780            source_offset,
781            style: None,
782        }
783    }
784
785    fn make_newline_token(source_offset: Option<usize>) -> ViewTokenWire {
786        ViewTokenWire {
787            kind: ViewTokenWireKind::Newline,
788            source_offset,
789            style: None,
790        }
791    }
792
793    fn make_break_token() -> ViewTokenWire {
794        ViewTokenWire {
795            kind: ViewTokenWireKind::Break,
796            source_offset: None,
797            style: None,
798        }
799    }
800
801    #[test]
802    fn test_simple_source_lines() {
803        let tokens = vec![
804            make_text_token("Line 1", Some(0)),
805            make_newline_token(Some(6)),
806            make_text_token("Line 2", Some(7)),
807            make_newline_token(Some(13)),
808        ];
809
810        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
811
812        assert_eq!(lines.len(), 2);
813        assert_eq!(lines[0].text, "Line 1\n");
814        assert_eq!(lines[0].line_start, LineStart::Beginning);
815        assert!(should_show_line_number(&lines[0]));
816
817        assert_eq!(lines[1].text, "Line 2\n");
818        assert_eq!(lines[1].line_start, LineStart::AfterSourceNewline);
819        assert!(should_show_line_number(&lines[1]));
820    }
821
822    #[test]
823    fn test_wrapped_continuation() {
824        let tokens = vec![
825            make_text_token("Line 1 start", Some(0)),
826            make_break_token(), // Wrapped
827            make_text_token("continued", Some(12)),
828            make_newline_token(Some(21)),
829        ];
830
831        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
832
833        assert_eq!(lines.len(), 2);
834        assert_eq!(lines[0].line_start, LineStart::Beginning);
835        assert!(should_show_line_number(&lines[0]));
836
837        assert_eq!(lines[1].line_start, LineStart::AfterBreak);
838        assert!(
839            !should_show_line_number(&lines[1]),
840            "Wrapped continuation should NOT show line number"
841        );
842    }
843
844    #[test]
845    fn test_injected_header_then_source() {
846        // This is the bug scenario: header (injected) followed by source content
847        let tokens = vec![
848            // Injected header
849            make_text_token("== HEADER ==", None),
850            make_newline_token(None),
851            // Source content
852            make_text_token("Line 1", Some(0)),
853            make_newline_token(Some(6)),
854        ];
855
856        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
857
858        assert_eq!(lines.len(), 2);
859
860        // Header line - no line number (injected content)
861        assert_eq!(lines[0].text, "== HEADER ==\n");
862        assert_eq!(lines[0].line_start, LineStart::Beginning);
863        assert!(
864            !should_show_line_number(&lines[0]),
865            "Injected header should NOT show line number"
866        );
867
868        // Source line after header - SHOULD show line number
869        assert_eq!(lines[1].text, "Line 1\n");
870        assert_eq!(lines[1].line_start, LineStart::AfterInjectedNewline);
871        assert!(
872            should_show_line_number(&lines[1]),
873            "BUG: Source line after injected header SHOULD show line number!\n\
874             line_start={:?}, first_char_is_source={}",
875            lines[1].line_start,
876            lines[1]
877                .char_source_bytes
878                .first()
879                .map(|m| m.is_some())
880                .unwrap_or(false)
881        );
882    }
883
884    #[test]
885    fn test_mixed_scenario() {
886        // Header -> Source Line 1 -> Source Line 2 (wrapped) -> Source Line 3
887        let tokens = vec![
888            // Injected header
889            make_text_token("== Block 1 ==", None),
890            make_newline_token(None),
891            // Source line 1
892            make_text_token("Line 1", Some(0)),
893            make_newline_token(Some(6)),
894            // Source line 2 (gets wrapped)
895            make_text_token("Line 2 start", Some(7)),
896            make_break_token(),
897            make_text_token("wrapped", Some(19)),
898            make_newline_token(Some(26)),
899            // Source line 3
900            make_text_token("Line 3", Some(27)),
901            make_newline_token(Some(33)),
902        ];
903
904        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
905
906        assert_eq!(lines.len(), 5);
907
908        // Header - no line number
909        assert!(!should_show_line_number(&lines[0]));
910
911        // Line 1 - yes line number (source after header)
912        assert!(should_show_line_number(&lines[1]));
913
914        // Line 2 start - yes line number
915        assert!(should_show_line_number(&lines[2]));
916
917        // Line 2 wrapped - no line number (continuation)
918        assert!(!should_show_line_number(&lines[3]));
919
920        // Line 3 - yes line number
921        assert!(should_show_line_number(&lines[4]));
922    }
923
924    #[test]
925    fn test_is_unprintable_byte() {
926        // Null byte is unprintable
927        assert!(is_unprintable_byte(0x00));
928
929        // Control characters 0x01-0x08 are unprintable
930        assert!(is_unprintable_byte(0x01));
931        assert!(is_unprintable_byte(0x02));
932        assert!(is_unprintable_byte(0x08));
933
934        // Tab (0x09) and LF (0x0A) are allowed
935        assert!(!is_unprintable_byte(0x09)); // tab
936        assert!(!is_unprintable_byte(0x0A)); // newline
937
938        // VT (0x0B), FF (0x0C), CR (0x0D) are unprintable in binary mode
939        assert!(is_unprintable_byte(0x0B)); // vertical tab
940        assert!(is_unprintable_byte(0x0C)); // form feed
941        assert!(is_unprintable_byte(0x0D)); // carriage return
942
943        // 0x0E-0x1F are all unprintable (including ESC)
944        assert!(is_unprintable_byte(0x0E));
945        assert!(is_unprintable_byte(0x1A)); // SUB - this is in PNG headers
946        assert!(is_unprintable_byte(0x1B)); // ESC
947        assert!(is_unprintable_byte(0x1C));
948        assert!(is_unprintable_byte(0x1F));
949
950        // Printable ASCII (0x20-0x7E) is allowed
951        assert!(!is_unprintable_byte(0x20)); // space
952        assert!(!is_unprintable_byte(0x41)); // 'A'
953        assert!(!is_unprintable_byte(0x7E)); // '~'
954
955        // DEL (0x7F) is unprintable
956        assert!(is_unprintable_byte(0x7F));
957
958        // High bytes (0x80+) are allowed (could be UTF-8)
959        assert!(!is_unprintable_byte(0x80));
960        assert!(!is_unprintable_byte(0xFF));
961    }
962
963    #[test]
964    fn test_format_unprintable_byte() {
965        assert_eq!(format_unprintable_byte(0x00), "<00>");
966        assert_eq!(format_unprintable_byte(0x01), "<01>");
967        assert_eq!(format_unprintable_byte(0x1A), "<1A>");
968        assert_eq!(format_unprintable_byte(0x7F), "<7F>");
969        assert_eq!(format_unprintable_byte(0xFF), "<FF>");
970    }
971
972    #[test]
973    fn test_binary_mode_renders_control_chars() {
974        // Text with null byte and control character
975        let tokens = vec![
976            ViewTokenWire {
977                kind: ViewTokenWireKind::Text("Hello\x00World\x01End".to_string()),
978                source_offset: Some(0),
979                style: None,
980            },
981            make_newline_token(Some(15)),
982        ];
983
984        // Without binary mode - control chars would be rendered raw or as replacement
985        let lines_normal: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
986        assert_eq!(lines_normal.len(), 1);
987        // In normal mode, we don't format control chars specially
988
989        // With binary mode - control chars should be formatted as <XX>
990        let lines_binary: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
991        assert_eq!(lines_binary.len(), 1);
992        assert!(
993            lines_binary[0].text.contains("<00>"),
994            "Binary mode should format null byte as <00>, got: {}",
995            lines_binary[0].text
996        );
997        assert!(
998            lines_binary[0].text.contains("<01>"),
999            "Binary mode should format 0x01 as <01>, got: {}",
1000            lines_binary[0].text
1001        );
1002    }
1003
1004    #[test]
1005    fn test_binary_mode_png_header() {
1006        // PNG-like content with SUB control char (0x1A)
1007        // Using valid UTF-8 string with embedded control character
1008        let png_like = "PNG\r\n\x1A\n";
1009        let tokens = vec![ViewTokenWire {
1010            kind: ViewTokenWireKind::Text(png_like.to_string()),
1011            source_offset: Some(0),
1012            style: None,
1013        }];
1014
1015        let lines: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
1016
1017        // Should have rendered the 0x1A as <1A>
1018        let combined: String = lines.iter().map(|l| l.text.as_str()).collect();
1019        assert!(
1020            combined.contains("<1A>"),
1021            "PNG SUB byte (0x1A) should be rendered as <1A>, got: {:?}",
1022            combined
1023        );
1024    }
1025
1026    #[test]
1027    fn test_binary_mode_preserves_printable_chars() {
1028        let tokens = vec![
1029            ViewTokenWire {
1030                kind: ViewTokenWireKind::Text("Normal text 123".to_string()),
1031                source_offset: Some(0),
1032                style: None,
1033            },
1034            make_newline_token(Some(15)),
1035        ];
1036
1037        let lines: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
1038        assert_eq!(lines.len(), 1);
1039        assert!(
1040            lines[0].text.contains("Normal text 123"),
1041            "Printable chars should be preserved in binary mode"
1042        );
1043    }
1044
1045    #[test]
1046    fn test_double_width_visual_mappings() {
1047        // "你好" - two Chinese characters, each 3 bytes and 2 columns wide
1048        // Byte layout: 你=bytes 0-2, 好=bytes 3-5
1049        // Visual layout: 你 takes columns 0-1, 好 takes columns 2-3
1050        let tokens = vec![
1051            make_text_token("你好", Some(0)),
1052            make_newline_token(Some(6)),
1053        ];
1054
1055        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1056        assert_eq!(lines.len(), 1);
1057
1058        // visual_to_char should have one entry per visual column
1059        // 你 = 2 columns, 好 = 2 columns, \n = 1 column = 5 total
1060        assert_eq!(
1061            lines[0].visual_width(),
1062            5,
1063            "Expected 5 visual columns (2 for 你 + 2 for 好 + 1 for newline), got {}",
1064            lines[0].visual_width()
1065        );
1066
1067        // char_source_bytes should have one entry per character
1068        // 3 characters: 你, 好, \n
1069        assert_eq!(
1070            lines[0].char_source_bytes.len(),
1071            3,
1072            "Expected 3 char entries (你, 好, newline), got {}",
1073            lines[0].char_source_bytes.len()
1074        );
1075
1076        // Both columns of 你 should map to byte 0 via O(1) lookup
1077        assert_eq!(
1078            lines[0].source_byte_at_visual_col(0),
1079            Some(0),
1080            "Column 0 should map to byte 0"
1081        );
1082        assert_eq!(
1083            lines[0].source_byte_at_visual_col(1),
1084            Some(0),
1085            "Column 1 should map to byte 0"
1086        );
1087
1088        // Both columns of 好 should map to byte 3
1089        assert_eq!(
1090            lines[0].source_byte_at_visual_col(2),
1091            Some(3),
1092            "Column 2 should map to byte 3"
1093        );
1094        assert_eq!(
1095            lines[0].source_byte_at_visual_col(3),
1096            Some(3),
1097            "Column 3 should map to byte 3"
1098        );
1099
1100        // Newline maps to byte 6
1101        assert_eq!(
1102            lines[0].source_byte_at_visual_col(4),
1103            Some(6),
1104            "Column 4 (newline) should map to byte 6"
1105        );
1106    }
1107
1108    #[test]
1109    fn test_mixed_width_visual_mappings() {
1110        // "a你b" - ASCII, Chinese (2 cols), ASCII
1111        // Byte layout: a=0, 你=1-3, b=4
1112        // Visual columns: a=0, 你=1-2, b=3
1113        let tokens = vec![
1114            make_text_token("a你b", Some(0)),
1115            make_newline_token(Some(5)),
1116        ];
1117
1118        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1119        assert_eq!(lines.len(), 1);
1120
1121        // a=1 col, 你=2 cols, b=1 col, \n=1 col = 5 total visual width
1122        assert_eq!(
1123            lines[0].visual_width(),
1124            5,
1125            "Expected 5 visual columns, got {}",
1126            lines[0].visual_width()
1127        );
1128
1129        // 4 characters: a, 你, b, \n
1130        assert_eq!(
1131            lines[0].char_source_bytes.len(),
1132            4,
1133            "Expected 4 char entries, got {}",
1134            lines[0].char_source_bytes.len()
1135        );
1136
1137        // Test O(1) visual column to byte lookup
1138        assert_eq!(
1139            lines[0].source_byte_at_visual_col(0),
1140            Some(0),
1141            "Column 0 (a) should map to byte 0"
1142        );
1143        assert_eq!(
1144            lines[0].source_byte_at_visual_col(1),
1145            Some(1),
1146            "Column 1 (你 col 1) should map to byte 1"
1147        );
1148        assert_eq!(
1149            lines[0].source_byte_at_visual_col(2),
1150            Some(1),
1151            "Column 2 (你 col 2) should map to byte 1"
1152        );
1153        assert_eq!(
1154            lines[0].source_byte_at_visual_col(3),
1155            Some(4),
1156            "Column 3 (b) should map to byte 4"
1157        );
1158        assert_eq!(
1159            lines[0].source_byte_at_visual_col(4),
1160            Some(5),
1161            "Column 4 (newline) should map to byte 5"
1162        );
1163    }
1164
1165    // ==================== CRLF Mode Tests ====================
1166
1167    /// Test that ViewLineIterator correctly maps char_source_bytes for CRLF content.
1168    /// In CRLF mode, the Newline token is emitted at the \r position, and \n is skipped.
1169    /// This test verifies that char_source_bytes correctly tracks source byte positions.
1170    #[test]
1171    fn test_crlf_char_source_bytes_single_line() {
1172        // Simulate CRLF content "abc\r\n" where:
1173        // - bytes: a=0, b=1, c=2, \r=3, \n=4
1174        // - Newline token at source_offset=3 (position of \r)
1175        let tokens = vec![
1176            make_text_token("abc", Some(0)),
1177            make_newline_token(Some(3)), // \r position in CRLF
1178        ];
1179
1180        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1181        assert_eq!(lines.len(), 1);
1182
1183        // The ViewLine should have: 'a', 'b', 'c', '\n'
1184        assert_eq!(lines[0].text, "abc\n");
1185
1186        // char_source_bytes should correctly map each display char to source bytes
1187        assert_eq!(
1188            lines[0].char_source_bytes.len(),
1189            4,
1190            "Expected 4 chars: a, b, c, newline"
1191        );
1192        assert_eq!(
1193            lines[0].char_source_bytes[0],
1194            Some(0),
1195            "char 'a' should map to byte 0"
1196        );
1197        assert_eq!(
1198            lines[0].char_source_bytes[1],
1199            Some(1),
1200            "char 'b' should map to byte 1"
1201        );
1202        assert_eq!(
1203            lines[0].char_source_bytes[2],
1204            Some(2),
1205            "char 'c' should map to byte 2"
1206        );
1207        assert_eq!(
1208            lines[0].char_source_bytes[3],
1209            Some(3),
1210            "newline should map to byte 3 (\\r position)"
1211        );
1212    }
1213
1214    /// Test CRLF char_source_bytes across multiple lines.
1215    /// This is the critical test for the accumulating offset bug.
1216    #[test]
1217    fn test_crlf_char_source_bytes_multiple_lines() {
1218        // Simulate CRLF content "abc\r\ndef\r\nghi\r\n" where:
1219        // Line 1: a=0, b=1, c=2, \r=3, \n=4 (5 bytes)
1220        // Line 2: d=5, e=6, f=7, \r=8, \n=9 (5 bytes)
1221        // Line 3: g=10, h=11, i=12, \r=13, \n=14 (5 bytes)
1222        let tokens = vec![
1223            // Line 1
1224            make_text_token("abc", Some(0)),
1225            make_newline_token(Some(3)), // \r at byte 3
1226            // Line 2
1227            make_text_token("def", Some(5)),
1228            make_newline_token(Some(8)), // \r at byte 8
1229            // Line 3
1230            make_text_token("ghi", Some(10)),
1231            make_newline_token(Some(13)), // \r at byte 13
1232        ];
1233
1234        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1235        assert_eq!(lines.len(), 3);
1236
1237        // Line 1 verification
1238        assert_eq!(lines[0].text, "abc\n");
1239        assert_eq!(
1240            lines[0].char_source_bytes,
1241            vec![Some(0), Some(1), Some(2), Some(3)],
1242            "Line 1 char_source_bytes mismatch"
1243        );
1244
1245        // Line 2 verification - THIS IS WHERE THE BUG WOULD MANIFEST
1246        // If there's an off-by-one per line, line 2 might have wrong offsets
1247        assert_eq!(lines[1].text, "def\n");
1248        assert_eq!(
1249            lines[1].char_source_bytes,
1250            vec![Some(5), Some(6), Some(7), Some(8)],
1251            "Line 2 char_source_bytes mismatch - possible CRLF offset drift"
1252        );
1253
1254        // Line 3 verification - error accumulates
1255        assert_eq!(lines[2].text, "ghi\n");
1256        assert_eq!(
1257            lines[2].char_source_bytes,
1258            vec![Some(10), Some(11), Some(12), Some(13)],
1259            "Line 3 char_source_bytes mismatch - CRLF offset drift accumulated"
1260        );
1261    }
1262
1263    /// Test CRLF visual column to source byte mapping.
1264    /// Verifies source_byte_at_visual_col works correctly for CRLF content.
1265    #[test]
1266    fn test_crlf_visual_to_source_mapping() {
1267        // CRLF content "ab\r\ncd\r\n"
1268        // Line 1: a=0, b=1, \r=2, \n=3
1269        // Line 2: c=4, d=5, \r=6, \n=7
1270        let tokens = vec![
1271            make_text_token("ab", Some(0)),
1272            make_newline_token(Some(2)),
1273            make_text_token("cd", Some(4)),
1274            make_newline_token(Some(6)),
1275        ];
1276
1277        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1278
1279        // Line 1: visual columns 0,1 should map to bytes 0,1
1280        assert_eq!(
1281            lines[0].source_byte_at_visual_col(0),
1282            Some(0),
1283            "Line 1 col 0"
1284        );
1285        assert_eq!(
1286            lines[0].source_byte_at_visual_col(1),
1287            Some(1),
1288            "Line 1 col 1"
1289        );
1290        assert_eq!(
1291            lines[0].source_byte_at_visual_col(2),
1292            Some(2),
1293            "Line 1 col 2 (newline)"
1294        );
1295
1296        // Line 2: visual columns 0,1 should map to bytes 4,5
1297        assert_eq!(
1298            lines[1].source_byte_at_visual_col(0),
1299            Some(4),
1300            "Line 2 col 0"
1301        );
1302        assert_eq!(
1303            lines[1].source_byte_at_visual_col(1),
1304            Some(5),
1305            "Line 2 col 1"
1306        );
1307        assert_eq!(
1308            lines[1].source_byte_at_visual_col(2),
1309            Some(6),
1310            "Line 2 col 2 (newline)"
1311        );
1312    }
1313}
fresh/view/ui/view_pipeline.rs

fresh/view/ui/
view_pipeline.rs