Skip to main content

fresh/view/ui/
view_pipeline.rs

1//! Token-based view rendering pipeline
2//!
3//! This module provides a clean pipeline for rendering view tokens:
4//!
5//! ```text
6//! source buffer
7//!     ↓ build_base_tokens()
8//! Vec<ViewTokenWire>  (base tokens with source mappings)
9//!     ↓ plugin transform (optional)
10//! Vec<ViewTokenWire>  (transformed tokens, may have injected content)
11//!     ↓ apply_wrapping() (optional)
12//! Vec<ViewTokenWire>  (with Break tokens for wrapped lines)
13//!     ↓ ViewLineIterator
14//! Iterator<ViewLine>  (one per display line, preserves token info)
15//!     ↓ render
16//! Display output
17//! ```
18//!
19//! The key design principle: preserve token-level information through the pipeline
20//! so rendering decisions (like line numbers) can be made based on token types,
21//! not reconstructed from flattened text.
22
23use crate::primitives::ansi::AnsiParser;
24use crate::primitives::display_width::char_width;
25use fresh_core::api::{ViewTokenStyle, ViewTokenWire, ViewTokenWireKind};
26use std::collections::HashSet;
27
28/// A display line built from tokens, preserving token-level information
29#[derive(Debug, Clone)]
30pub struct ViewLine {
31    /// The display text for this line (tabs expanded to spaces, etc.)
32    pub text: String,
33
34    // === Per-CHARACTER mappings (indexed by char position in text) ===
35    /// Source byte offset for each character
36    /// Length == text.chars().count()
37    pub char_source_bytes: Vec<Option<usize>>,
38    /// Style for each character (from token styles)
39    pub char_styles: Vec<Option<ViewTokenStyle>>,
40    /// Visual column where each character starts
41    pub char_visual_cols: Vec<usize>,
42
43    // === Per-VISUAL-COLUMN mapping (indexed by visual column) ===
44    /// Character index at each visual column (for O(1) mouse clicks)
45    /// For double-width chars, consecutive visual columns map to the same char index
46    /// Length == total visual width of line
47    pub visual_to_char: Vec<usize>,
48
49    /// Positions that are the start of a tab expansion
50    pub tab_starts: HashSet<usize>,
51    /// How this line started (what kind of token/boundary preceded it)
52    pub line_start: LineStart,
53    /// Whether this line ends with a newline character
54    pub ends_with_newline: bool,
55}
56
57impl ViewLine {
58    /// Get source byte at a given character index (O(1))
59    #[inline]
60    pub fn source_byte_at_char(&self, char_idx: usize) -> Option<usize> {
61        self.char_source_bytes.get(char_idx).copied().flatten()
62    }
63
64    /// Get character index at a given visual column (O(1))
65    #[inline]
66    pub fn char_at_visual_col(&self, visual_col: usize) -> usize {
67        self.visual_to_char
68            .get(visual_col)
69            .copied()
70            .unwrap_or_else(|| self.char_source_bytes.len().saturating_sub(1))
71    }
72
73    /// Get source byte at a given visual column (O(1) for mouse clicks)
74    #[inline]
75    pub fn source_byte_at_visual_col(&self, visual_col: usize) -> Option<usize> {
76        let char_idx = self.char_at_visual_col(visual_col);
77        self.source_byte_at_char(char_idx)
78    }
79
80    /// Get the visual column for a character at the given index
81    #[inline]
82    pub fn visual_col_at_char(&self, char_idx: usize) -> usize {
83        self.char_visual_cols.get(char_idx).copied().unwrap_or(0)
84    }
85
86    /// Total visual width of this line
87    #[inline]
88    pub fn visual_width(&self) -> usize {
89        self.visual_to_char.len()
90    }
91}
92
93/// What preceded the start of a display line
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub enum LineStart {
96    /// First line of the view (no preceding token)
97    Beginning,
98    /// Line after a source Newline token (source_offset: Some)
99    AfterSourceNewline,
100    /// Line after an injected Newline token (source_offset: None)
101    AfterInjectedNewline,
102    /// Line after a Break token (wrapped continuation)
103    AfterBreak,
104}
105
106impl LineStart {
107    /// Should this line show a line number in the gutter?
108    ///
109    /// - Beginning: yes (first source line)
110    /// - AfterSourceNewline: yes (new source line)
111    /// - AfterInjectedNewline: depends on content (if injected, no; if source, yes)
112    /// - AfterBreak: no (wrapped continuation of same line)
113    pub fn is_continuation(&self) -> bool {
114        matches!(self, LineStart::AfterBreak)
115    }
116}
117
118/// Iterator that converts a token stream into display lines
119pub struct ViewLineIterator<'a> {
120    tokens: &'a [ViewTokenWire],
121    token_idx: usize,
122    /// How the next line should start (based on what ended the previous line)
123    next_line_start: LineStart,
124    /// Whether to render in binary mode (unprintable chars shown as code points)
125    binary_mode: bool,
126    /// Whether to parse ANSI escape sequences (giving them zero visual width)
127    ansi_aware: bool,
128    /// Tab width for rendering (number of spaces per tab)
129    tab_size: usize,
130    /// Whether the token stream covers the end of the buffer.
131    /// When true, a trailing empty line is emitted after a final source newline
132    /// (representing the empty line after a file's trailing '\n').
133    at_buffer_end: bool,
134}
135
136impl<'a> ViewLineIterator<'a> {
137    /// Create a new ViewLineIterator with all options
138    ///
139    /// - `tokens`: The token stream to convert to display lines
140    /// - `binary_mode`: Whether to render unprintable chars as code points
141    /// - `ansi_aware`: Whether to parse ANSI escape sequences (giving them zero visual width)
142    /// - `tab_size`: Tab width for rendering (number of spaces per tab, should be > 0)
143    /// - `at_buffer_end`: Whether the token stream covers the end of the buffer.
144    ///   When true, a trailing empty line is emitted after a final source newline.
145    ///
146    /// Note: If tab_size is 0, it will be treated as 4 (the default) to prevent division by zero.
147    /// This is a defensive measure to handle invalid configuration gracefully.
148    pub fn new(
149        tokens: &'a [ViewTokenWire],
150        binary_mode: bool,
151        ansi_aware: bool,
152        tab_size: usize,
153        at_buffer_end: bool,
154    ) -> Self {
155        // Defensive: treat 0 as 4 (default) to prevent division by zero in tab_expansion_width
156        // This can happen if invalid config (tab_size: 0) is loaded
157        let tab_size = if tab_size == 0 { 4 } else { tab_size };
158        Self {
159            tokens,
160            token_idx: 0,
161            next_line_start: LineStart::Beginning,
162            binary_mode,
163            ansi_aware,
164            tab_size,
165            at_buffer_end,
166        }
167    }
168
169    /// Expand a tab to spaces based on current column and configured tab_size
170    #[inline]
171    fn tab_expansion_width(&self, col: usize) -> usize {
172        self.tab_size - (col % self.tab_size)
173    }
174}
175
176/// Check if a byte is an unprintable control character that should be rendered as <XX>
177/// Returns true for control characters (0x00-0x1F, 0x7F) except tab and newline
178fn is_unprintable_byte(b: u8) -> bool {
179    // Only allow tab (0x09) and newline (0x0A) to render normally
180    // Everything else in control range should be shown as <XX>
181    if b == 0x09 || b == 0x0A {
182        return false;
183    }
184    // Control characters (0x00-0x1F) including CR, VT, FF, ESC are unprintable
185    if b < 0x20 {
186        return true;
187    }
188    // DEL character (0x7F) is also unprintable
189    if b == 0x7F {
190        return true;
191    }
192    false
193}
194
195/// Format an unprintable byte as a code point string like "<00>"
196fn format_unprintable_byte(b: u8) -> String {
197    format!("<{:02X}>", b)
198}
199
200impl<'a> Iterator for ViewLineIterator<'a> {
201    type Item = ViewLine;
202
203    fn next(&mut self) -> Option<Self::Item> {
204        if self.token_idx >= self.tokens.len() {
205            // All tokens consumed.  If the previous line ended with a source
206            // newline there is one more real (empty) document line to emit —
207            // e.g. the empty line after a file's trailing '\n'.  Produce it
208            // exactly once, then stop.  Only do this when the tokens cover
209            // the actual end of the buffer (not a viewport slice).
210            if self.at_buffer_end && matches!(self.next_line_start, LineStart::AfterSourceNewline) {
211                // Flip to Beginning so the *next* call returns None.
212                self.next_line_start = LineStart::Beginning;
213                return Some(ViewLine {
214                    text: String::new(),
215                    char_source_bytes: vec![],
216                    char_styles: vec![],
217                    char_visual_cols: vec![],
218                    visual_to_char: vec![],
219                    tab_starts: HashSet::new(),
220                    line_start: LineStart::AfterSourceNewline,
221                    ends_with_newline: false,
222                });
223            }
224            return None;
225        }
226
227        let line_start = self.next_line_start;
228        let mut text = String::new();
229
230        // Per-character tracking (indexed by character position)
231        let mut char_source_bytes: Vec<Option<usize>> = Vec::new();
232        let mut char_styles: Vec<Option<ViewTokenStyle>> = Vec::new();
233        let mut char_visual_cols: Vec<usize> = Vec::new();
234
235        // Per-visual-column tracking (indexed by visual column)
236        let mut visual_to_char: Vec<usize> = Vec::new();
237
238        let mut tab_starts = HashSet::new();
239        let mut col = 0usize; // Current visual column
240        let mut ends_with_newline = false;
241
242        // ANSI parser for tracking escape sequences (reuse existing implementation)
243        let mut ansi_parser = if self.ansi_aware {
244            Some(AnsiParser::new())
245        } else {
246            None
247        };
248
249        /// Helper to add a character with all its mappings
250        macro_rules! add_char {
251            ($ch:expr, $source:expr, $style:expr, $width:expr) => {{
252                let char_idx = char_source_bytes.len();
253
254                // Per-character data
255                text.push($ch);
256                char_source_bytes.push($source);
257                char_styles.push($style);
258                char_visual_cols.push(col);
259
260                // Per-visual-column data (for O(1) mouse clicks)
261                for _ in 0..$width {
262                    visual_to_char.push(char_idx);
263                }
264
265                col += $width;
266            }};
267        }
268
269        // Process tokens until we hit a line break
270        while self.token_idx < self.tokens.len() {
271            let token = &self.tokens[self.token_idx];
272            let token_style = token.style.clone();
273
274            match &token.kind {
275                ViewTokenWireKind::Text(t) => {
276                    let base = token.source_offset;
277                    let t_bytes = t.as_bytes();
278                    let mut byte_idx = 0;
279
280                    while byte_idx < t_bytes.len() {
281                        let b = t_bytes[byte_idx];
282                        let source = base.map(|s| s + byte_idx);
283
284                        // In binary mode, render unprintable bytes as code points
285                        if self.binary_mode && is_unprintable_byte(b) {
286                            let formatted = format_unprintable_byte(b);
287                            for display_ch in formatted.chars() {
288                                add_char!(display_ch, source, token_style.clone(), 1);
289                            }
290                            byte_idx += 1;
291                            continue;
292                        }
293
294                        // Decode the character at this position
295                        let ch = if b < 0x80 {
296                            // ASCII character
297                            byte_idx += 1;
298                            b as char
299                        } else {
300                            // Multi-byte UTF-8 - decode carefully
301                            let remaining = &t_bytes[byte_idx..];
302                            match std::str::from_utf8(remaining) {
303                                Ok(s) => {
304                                    if let Some(ch) = s.chars().next() {
305                                        byte_idx += ch.len_utf8();
306                                        ch
307                                    } else {
308                                        byte_idx += 1;
309                                        '\u{FFFD}'
310                                    }
311                                }
312                                Err(e) => {
313                                    // Invalid UTF-8 - in binary mode show as hex, otherwise replacement char
314                                    if self.binary_mode {
315                                        let formatted = format_unprintable_byte(b);
316                                        for display_ch in formatted.chars() {
317                                            add_char!(display_ch, source, token_style.clone(), 1);
318                                        }
319                                        byte_idx += 1;
320                                        continue;
321                                    } else {
322                                        // Try to get valid portion, then skip the bad byte
323                                        let valid_up_to = e.valid_up_to();
324                                        if valid_up_to > 0 {
325                                            if let Some(ch) =
326                                                std::str::from_utf8(&remaining[..valid_up_to])
327                                                    .ok()
328                                                    .and_then(|s| s.chars().next())
329                                            {
330                                                byte_idx += ch.len_utf8();
331                                                ch
332                                            } else {
333                                                byte_idx += 1;
334                                                '\u{FFFD}'
335                                            }
336                                        } else {
337                                            byte_idx += 1;
338                                            '\u{FFFD}'
339                                        }
340                                    }
341                                }
342                            }
343                        };
344
345                        if ch == '\t' {
346                            // Tab expands to spaces - record start position
347                            let tab_start_pos = char_source_bytes.len();
348                            tab_starts.insert(tab_start_pos);
349                            let spaces = self.tab_expansion_width(col);
350
351                            // Tab is ONE character that expands to multiple visual columns
352                            let char_idx = char_source_bytes.len();
353                            text.push(' '); // First space char
354                            char_source_bytes.push(source);
355                            char_styles.push(token_style.clone());
356                            char_visual_cols.push(col);
357
358                            // All visual columns of the tab map to the same char
359                            for _ in 0..spaces {
360                                visual_to_char.push(char_idx);
361                            }
362                            col += spaces;
363
364                            // Push remaining spaces as separate display chars
365                            // (text contains expanded spaces for rendering)
366                            for _ in 1..spaces {
367                                text.push(' ');
368                                char_source_bytes.push(source);
369                                char_styles.push(token_style.clone());
370                                char_visual_cols
371                                    .push(col - spaces + char_source_bytes.len() - char_idx);
372                            }
373                        } else {
374                            // Handle ANSI escape sequences - give them width 0
375                            let width = if let Some(ref mut parser) = ansi_parser {
376                                // Use AnsiParser: parse_char returns None for escape chars
377                                if parser.parse_char(ch).is_none() {
378                                    0 // Part of escape sequence, zero width
379                                } else {
380                                    char_width(ch)
381                                }
382                            } else {
383                                char_width(ch)
384                            };
385                            add_char!(ch, source, token_style.clone(), width);
386                        }
387                    }
388                    self.token_idx += 1;
389                }
390                ViewTokenWireKind::Space => {
391                    add_char!(' ', token.source_offset, token_style, 1);
392                    self.token_idx += 1;
393                }
394                ViewTokenWireKind::Newline => {
395                    // Newline ends this line - width 1 for the newline char
396                    add_char!('\n', token.source_offset, token_style, 1);
397                    ends_with_newline = true;
398
399                    // Determine how the next line starts
400                    self.next_line_start = if token.source_offset.is_some() {
401                        LineStart::AfterSourceNewline
402                    } else {
403                        LineStart::AfterInjectedNewline
404                    };
405                    self.token_idx += 1;
406                    break;
407                }
408                ViewTokenWireKind::Break => {
409                    // Break is a synthetic line break from wrapping
410                    add_char!('\n', None, None, 1);
411                    ends_with_newline = true;
412
413                    self.next_line_start = LineStart::AfterBreak;
414                    self.token_idx += 1;
415                    break;
416                }
417                ViewTokenWireKind::BinaryByte(b) => {
418                    // Binary byte rendered as <XX> - all 4 chars map to same source byte
419                    let formatted = format_unprintable_byte(*b);
420                    for display_ch in formatted.chars() {
421                        add_char!(display_ch, token.source_offset, token_style.clone(), 1);
422                    }
423                    self.token_idx += 1;
424                }
425            }
426        }
427
428        // col's final value is intentionally unused (only needed during iteration)
429        let _ = col;
430
431        // If we consumed all remaining tokens without hitting a Newline or Break,
432        // the content didn't end with a line terminator.  Reset next_line_start
433        // so the trailing-empty-line logic (at the top of next()) doesn't
434        // incorrectly fire on the subsequent call.  The `ends_with_newline` flag
435        // tells us whether the loop exited via a Newline/Break (true) or by
436        // exhausting all tokens (false).
437        if !ends_with_newline && self.token_idx >= self.tokens.len() {
438            self.next_line_start = LineStart::Beginning;
439        }
440
441        // Don't return empty injected/virtual lines at the end of the token
442        // stream.  However, DO return a trailing empty line that follows a source
443        // newline — it represents a real document line (e.g. after a file's
444        // trailing '\n') and the cursor may sit on it — but only when
445        // at_buffer_end is set (otherwise this is just a viewport slice).
446        if text.is_empty()
447            && self.token_idx >= self.tokens.len()
448            && !(self.at_buffer_end && matches!(line_start, LineStart::AfterSourceNewline))
449        {
450            return None;
451        }
452
453        Some(ViewLine {
454            text,
455            char_source_bytes,
456            char_styles,
457            char_visual_cols,
458            visual_to_char,
459            tab_starts,
460            line_start,
461            ends_with_newline,
462        })
463    }
464}
465
466/// Determine if a display line should show a line number
467///
468/// Rules:
469/// - Wrapped continuation (line_start == AfterBreak): no line number
470/// - Injected content (first char has source_offset: None): no line number
471/// - Empty line at beginning or after source newline: yes line number
472/// - Otherwise: show line number
473pub fn should_show_line_number(line: &ViewLine) -> bool {
474    // Wrapped continuations never show line numbers
475    if line.line_start.is_continuation() {
476        return false;
477    }
478
479    // Check if this line contains injected (non-source) content
480    // An empty line is NOT injected if it's at the beginning or after a source newline
481    if line.char_source_bytes.is_empty() {
482        // Empty line - show line number if it's at beginning or after source newline
483        // (not after injected newline or break)
484        return matches!(
485            line.line_start,
486            LineStart::Beginning | LineStart::AfterSourceNewline
487        );
488    }
489
490    let first_char_is_source = line
491        .char_source_bytes
492        .first()
493        .map(|m| m.is_some())
494        .unwrap_or(false);
495
496    if !first_char_is_source {
497        // Injected line (header, etc.) - no line number
498        return false;
499    }
500
501    // Source content after a real line break - show line number
502    true
503}
504
505// ============================================================================
506// Layout: The computed display state for a view
507// ============================================================================
508
509use std::collections::BTreeMap;
510use std::ops::Range;
511
512/// The Layout represents the computed display state for a view.
513///
514/// This is **View state**, not Buffer state. Each split has its own Layout
515/// computed from its view_transform (or base tokens if no transform).
516///
517/// The Layout provides:
518/// - ViewLines for the current viewport region
519/// - Bidirectional mapping between source bytes and view positions
520/// - Scroll limit information
521#[derive(Debug, Clone)]
522pub struct Layout {
523    /// Display lines for the current viewport region
524    pub lines: Vec<ViewLine>,
525
526    /// Source byte range this layout covers
527    pub source_range: Range<usize>,
528
529    /// Total view lines in entire document (estimated or exact)
530    pub total_view_lines: usize,
531
532    /// Total injected lines in entire document (from view transform)
533    pub total_injected_lines: usize,
534
535    /// Fast lookup: source byte → view line index
536    byte_to_line: BTreeMap<usize, usize>,
537}
538
539impl Layout {
540    /// Create a new Layout from ViewLines
541    pub fn new(lines: Vec<ViewLine>, source_range: Range<usize>) -> Self {
542        let mut byte_to_line = BTreeMap::new();
543
544        // Build the byte→line index from char_source_bytes
545        for (line_idx, line) in lines.iter().enumerate() {
546            // Find the first source byte in this line
547            if let Some(first_byte) = line.char_source_bytes.iter().find_map(|m| *m) {
548                byte_to_line.insert(first_byte, line_idx);
549            }
550        }
551
552        // Estimate total view lines (for now, just use what we have)
553        let total_view_lines = lines.len();
554        let total_injected_lines = lines.iter().filter(|l| !should_show_line_number(l)).count();
555
556        Self {
557            lines,
558            source_range,
559            total_view_lines,
560            total_injected_lines,
561            byte_to_line,
562        }
563    }
564
565    /// Build a Layout from a token stream
566    pub fn from_tokens(
567        tokens: &[ViewTokenWire],
568        source_range: Range<usize>,
569        tab_size: usize,
570    ) -> Self {
571        let lines: Vec<ViewLine> =
572            ViewLineIterator::new(tokens, false, false, tab_size, false).collect();
573        Self::new(lines, source_range)
574    }
575
576    /// Find the view position (line, visual column) for a source byte
577    pub fn source_byte_to_view_position(&self, byte: usize) -> Option<(usize, usize)> {
578        // Find the view line containing this byte
579        if let Some((&_line_start_byte, &line_idx)) = self.byte_to_line.range(..=byte).last() {
580            if line_idx < self.lines.len() {
581                let line = &self.lines[line_idx];
582                // Find the character with this source byte, then get its visual column
583                for (char_idx, mapping) in line.char_source_bytes.iter().enumerate() {
584                    if *mapping == Some(byte) {
585                        return Some((line_idx, line.visual_col_at_char(char_idx)));
586                    }
587                }
588                // Byte is in this line's range but not at a character boundary
589                // Return end of line (visual width)
590                return Some((line_idx, line.visual_width()));
591            }
592        }
593        None
594    }
595
596    /// Find the source byte for a view position (line, visual column)
597    pub fn view_position_to_source_byte(&self, line_idx: usize, col: usize) -> Option<usize> {
598        if line_idx >= self.lines.len() {
599            return None;
600        }
601        let line = &self.lines[line_idx];
602        if col < line.visual_width() {
603            // Use O(1) lookup via visual_to_char -> char_source_bytes
604            line.source_byte_at_visual_col(col)
605        } else if !line.char_source_bytes.is_empty() {
606            // Past end of line, return last valid byte
607            line.char_source_bytes.iter().rev().find_map(|m| *m)
608        } else {
609            None
610        }
611    }
612
613    /// Get the source byte for the start of a view line
614    pub fn get_source_byte_for_line(&self, line_idx: usize) -> Option<usize> {
615        if line_idx >= self.lines.len() {
616            return None;
617        }
618        self.lines[line_idx]
619            .char_source_bytes
620            .iter()
621            .find_map(|m| *m)
622    }
623
624    /// Find the nearest view line for a source byte (for stabilization)
625    pub fn find_nearest_view_line(&self, byte: usize) -> usize {
626        if let Some((&_line_start_byte, &line_idx)) = self.byte_to_line.range(..=byte).last() {
627            line_idx.min(self.lines.len().saturating_sub(1))
628        } else {
629            0
630        }
631    }
632
633    /// Calculate the maximum top line for scrolling
634    pub fn max_top_line(&self, viewport_height: usize) -> usize {
635        self.lines.len().saturating_sub(viewport_height)
636    }
637
638    /// Check if there's content below the current layout
639    pub fn has_content_below(&self, buffer_len: usize) -> bool {
640        self.source_range.end < buffer_len
641    }
642}
643
644#[cfg(test)]
645mod tests {
646    use super::*;
647
648    fn make_text_token(text: &str, source_offset: Option<usize>) -> ViewTokenWire {
649        ViewTokenWire {
650            kind: ViewTokenWireKind::Text(text.to_string()),
651            source_offset,
652            style: None,
653        }
654    }
655
656    fn make_newline_token(source_offset: Option<usize>) -> ViewTokenWire {
657        ViewTokenWire {
658            kind: ViewTokenWireKind::Newline,
659            source_offset,
660            style: None,
661        }
662    }
663
664    fn make_break_token() -> ViewTokenWire {
665        ViewTokenWire {
666            kind: ViewTokenWireKind::Break,
667            source_offset: None,
668            style: None,
669        }
670    }
671
672    #[test]
673    fn test_simple_source_lines() {
674        let tokens = vec![
675            make_text_token("Line 1", Some(0)),
676            make_newline_token(Some(6)),
677            make_text_token("Line 2", Some(7)),
678            make_newline_token(Some(13)),
679        ];
680
681        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
682
683        assert_eq!(lines.len(), 2);
684        assert_eq!(lines[0].text, "Line 1\n");
685        assert_eq!(lines[0].line_start, LineStart::Beginning);
686        assert!(should_show_line_number(&lines[0]));
687
688        assert_eq!(lines[1].text, "Line 2\n");
689        assert_eq!(lines[1].line_start, LineStart::AfterSourceNewline);
690        assert!(should_show_line_number(&lines[1]));
691    }
692
693    #[test]
694    fn test_wrapped_continuation() {
695        let tokens = vec![
696            make_text_token("Line 1 start", Some(0)),
697            make_break_token(), // Wrapped
698            make_text_token("continued", Some(12)),
699            make_newline_token(Some(21)),
700        ];
701
702        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
703
704        assert_eq!(lines.len(), 2);
705        assert_eq!(lines[0].line_start, LineStart::Beginning);
706        assert!(should_show_line_number(&lines[0]));
707
708        assert_eq!(lines[1].line_start, LineStart::AfterBreak);
709        assert!(
710            !should_show_line_number(&lines[1]),
711            "Wrapped continuation should NOT show line number"
712        );
713    }
714
715    #[test]
716    fn test_injected_header_then_source() {
717        // This is the bug scenario: header (injected) followed by source content
718        let tokens = vec![
719            // Injected header
720            make_text_token("== HEADER ==", None),
721            make_newline_token(None),
722            // Source content
723            make_text_token("Line 1", Some(0)),
724            make_newline_token(Some(6)),
725        ];
726
727        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
728
729        assert_eq!(lines.len(), 2);
730
731        // Header line - no line number (injected content)
732        assert_eq!(lines[0].text, "== HEADER ==\n");
733        assert_eq!(lines[0].line_start, LineStart::Beginning);
734        assert!(
735            !should_show_line_number(&lines[0]),
736            "Injected header should NOT show line number"
737        );
738
739        // Source line after header - SHOULD show line number
740        assert_eq!(lines[1].text, "Line 1\n");
741        assert_eq!(lines[1].line_start, LineStart::AfterInjectedNewline);
742        assert!(
743            should_show_line_number(&lines[1]),
744            "BUG: Source line after injected header SHOULD show line number!\n\
745             line_start={:?}, first_char_is_source={}",
746            lines[1].line_start,
747            lines[1]
748                .char_source_bytes
749                .first()
750                .map(|m| m.is_some())
751                .unwrap_or(false)
752        );
753    }
754
755    #[test]
756    fn test_mixed_scenario() {
757        // Header -> Source Line 1 -> Source Line 2 (wrapped) -> Source Line 3
758        let tokens = vec![
759            // Injected header
760            make_text_token("== Block 1 ==", None),
761            make_newline_token(None),
762            // Source line 1
763            make_text_token("Line 1", Some(0)),
764            make_newline_token(Some(6)),
765            // Source line 2 (gets wrapped)
766            make_text_token("Line 2 start", Some(7)),
767            make_break_token(),
768            make_text_token("wrapped", Some(19)),
769            make_newline_token(Some(26)),
770            // Source line 3
771            make_text_token("Line 3", Some(27)),
772            make_newline_token(Some(33)),
773        ];
774
775        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
776
777        assert_eq!(lines.len(), 5);
778
779        // Header - no line number
780        assert!(!should_show_line_number(&lines[0]));
781
782        // Line 1 - yes line number (source after header)
783        assert!(should_show_line_number(&lines[1]));
784
785        // Line 2 start - yes line number
786        assert!(should_show_line_number(&lines[2]));
787
788        // Line 2 wrapped - no line number (continuation)
789        assert!(!should_show_line_number(&lines[3]));
790
791        // Line 3 - yes line number
792        assert!(should_show_line_number(&lines[4]));
793    }
794
795    #[test]
796    fn test_is_unprintable_byte() {
797        // Null byte is unprintable
798        assert!(is_unprintable_byte(0x00));
799
800        // Control characters 0x01-0x08 are unprintable
801        assert!(is_unprintable_byte(0x01));
802        assert!(is_unprintable_byte(0x02));
803        assert!(is_unprintable_byte(0x08));
804
805        // Tab (0x09) and LF (0x0A) are allowed
806        assert!(!is_unprintable_byte(0x09)); // tab
807        assert!(!is_unprintable_byte(0x0A)); // newline
808
809        // VT (0x0B), FF (0x0C), CR (0x0D) are unprintable in binary mode
810        assert!(is_unprintable_byte(0x0B)); // vertical tab
811        assert!(is_unprintable_byte(0x0C)); // form feed
812        assert!(is_unprintable_byte(0x0D)); // carriage return
813
814        // 0x0E-0x1F are all unprintable (including ESC)
815        assert!(is_unprintable_byte(0x0E));
816        assert!(is_unprintable_byte(0x1A)); // SUB - this is in PNG headers
817        assert!(is_unprintable_byte(0x1B)); // ESC
818        assert!(is_unprintable_byte(0x1C));
819        assert!(is_unprintable_byte(0x1F));
820
821        // Printable ASCII (0x20-0x7E) is allowed
822        assert!(!is_unprintable_byte(0x20)); // space
823        assert!(!is_unprintable_byte(0x41)); // 'A'
824        assert!(!is_unprintable_byte(0x7E)); // '~'
825
826        // DEL (0x7F) is unprintable
827        assert!(is_unprintable_byte(0x7F));
828
829        // High bytes (0x80+) are allowed (could be UTF-8)
830        assert!(!is_unprintable_byte(0x80));
831        assert!(!is_unprintable_byte(0xFF));
832    }
833
834    #[test]
835    fn test_format_unprintable_byte() {
836        assert_eq!(format_unprintable_byte(0x00), "<00>");
837        assert_eq!(format_unprintable_byte(0x01), "<01>");
838        assert_eq!(format_unprintable_byte(0x1A), "<1A>");
839        assert_eq!(format_unprintable_byte(0x7F), "<7F>");
840        assert_eq!(format_unprintable_byte(0xFF), "<FF>");
841    }
842
843    #[test]
844    fn test_binary_mode_renders_control_chars() {
845        // Text with null byte and control character
846        let tokens = vec![
847            ViewTokenWire {
848                kind: ViewTokenWireKind::Text("Hello\x00World\x01End".to_string()),
849                source_offset: Some(0),
850                style: None,
851            },
852            make_newline_token(Some(15)),
853        ];
854
855        // Without binary mode - control chars would be rendered raw or as replacement
856        let lines_normal: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
857        assert_eq!(lines_normal.len(), 1);
858        // In normal mode, we don't format control chars specially
859
860        // With binary mode - control chars should be formatted as <XX>
861        let lines_binary: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
862        assert_eq!(lines_binary.len(), 1);
863        assert!(
864            lines_binary[0].text.contains("<00>"),
865            "Binary mode should format null byte as <00>, got: {}",
866            lines_binary[0].text
867        );
868        assert!(
869            lines_binary[0].text.contains("<01>"),
870            "Binary mode should format 0x01 as <01>, got: {}",
871            lines_binary[0].text
872        );
873    }
874
875    #[test]
876    fn test_binary_mode_png_header() {
877        // PNG-like content with SUB control char (0x1A)
878        // Using valid UTF-8 string with embedded control character
879        let png_like = "PNG\r\n\x1A\n";
880        let tokens = vec![ViewTokenWire {
881            kind: ViewTokenWireKind::Text(png_like.to_string()),
882            source_offset: Some(0),
883            style: None,
884        }];
885
886        let lines: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
887
888        // Should have rendered the 0x1A as <1A>
889        let combined: String = lines.iter().map(|l| l.text.as_str()).collect();
890        assert!(
891            combined.contains("<1A>"),
892            "PNG SUB byte (0x1A) should be rendered as <1A>, got: {:?}",
893            combined
894        );
895    }
896
897    #[test]
898    fn test_binary_mode_preserves_printable_chars() {
899        let tokens = vec![
900            ViewTokenWire {
901                kind: ViewTokenWireKind::Text("Normal text 123".to_string()),
902                source_offset: Some(0),
903                style: None,
904            },
905            make_newline_token(Some(15)),
906        ];
907
908        let lines: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
909        assert_eq!(lines.len(), 1);
910        assert!(
911            lines[0].text.contains("Normal text 123"),
912            "Printable chars should be preserved in binary mode"
913        );
914    }
915
916    #[test]
917    fn test_double_width_visual_mappings() {
918        // "你好" - two Chinese characters, each 3 bytes and 2 columns wide
919        // Byte layout: 你=bytes 0-2, 好=bytes 3-5
920        // Visual layout: 你 takes columns 0-1, 好 takes columns 2-3
921        let tokens = vec![
922            make_text_token("你好", Some(0)),
923            make_newline_token(Some(6)),
924        ];
925
926        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
927        assert_eq!(lines.len(), 1);
928
929        // visual_to_char should have one entry per visual column
930        // 你 = 2 columns, 好 = 2 columns, \n = 1 column = 5 total
931        assert_eq!(
932            lines[0].visual_width(),
933            5,
934            "Expected 5 visual columns (2 for 你 + 2 for 好 + 1 for newline), got {}",
935            lines[0].visual_width()
936        );
937
938        // char_source_bytes should have one entry per character
939        // 3 characters: 你, 好, \n
940        assert_eq!(
941            lines[0].char_source_bytes.len(),
942            3,
943            "Expected 3 char entries (你, 好, newline), got {}",
944            lines[0].char_source_bytes.len()
945        );
946
947        // Both columns of 你 should map to byte 0 via O(1) lookup
948        assert_eq!(
949            lines[0].source_byte_at_visual_col(0),
950            Some(0),
951            "Column 0 should map to byte 0"
952        );
953        assert_eq!(
954            lines[0].source_byte_at_visual_col(1),
955            Some(0),
956            "Column 1 should map to byte 0"
957        );
958
959        // Both columns of 好 should map to byte 3
960        assert_eq!(
961            lines[0].source_byte_at_visual_col(2),
962            Some(3),
963            "Column 2 should map to byte 3"
964        );
965        assert_eq!(
966            lines[0].source_byte_at_visual_col(3),
967            Some(3),
968            "Column 3 should map to byte 3"
969        );
970
971        // Newline maps to byte 6
972        assert_eq!(
973            lines[0].source_byte_at_visual_col(4),
974            Some(6),
975            "Column 4 (newline) should map to byte 6"
976        );
977    }
978
979    #[test]
980    fn test_mixed_width_visual_mappings() {
981        // "a你b" - ASCII, Chinese (2 cols), ASCII
982        // Byte layout: a=0, 你=1-3, b=4
983        // Visual columns: a=0, 你=1-2, b=3
984        let tokens = vec![
985            make_text_token("a你b", Some(0)),
986            make_newline_token(Some(5)),
987        ];
988
989        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
990        assert_eq!(lines.len(), 1);
991
992        // a=1 col, 你=2 cols, b=1 col, \n=1 col = 5 total visual width
993        assert_eq!(
994            lines[0].visual_width(),
995            5,
996            "Expected 5 visual columns, got {}",
997            lines[0].visual_width()
998        );
999
1000        // 4 characters: a, 你, b, \n
1001        assert_eq!(
1002            lines[0].char_source_bytes.len(),
1003            4,
1004            "Expected 4 char entries, got {}",
1005            lines[0].char_source_bytes.len()
1006        );
1007
1008        // Test O(1) visual column to byte lookup
1009        assert_eq!(
1010            lines[0].source_byte_at_visual_col(0),
1011            Some(0),
1012            "Column 0 (a) should map to byte 0"
1013        );
1014        assert_eq!(
1015            lines[0].source_byte_at_visual_col(1),
1016            Some(1),
1017            "Column 1 (你 col 1) should map to byte 1"
1018        );
1019        assert_eq!(
1020            lines[0].source_byte_at_visual_col(2),
1021            Some(1),
1022            "Column 2 (你 col 2) should map to byte 1"
1023        );
1024        assert_eq!(
1025            lines[0].source_byte_at_visual_col(3),
1026            Some(4),
1027            "Column 3 (b) should map to byte 4"
1028        );
1029        assert_eq!(
1030            lines[0].source_byte_at_visual_col(4),
1031            Some(5),
1032            "Column 4 (newline) should map to byte 5"
1033        );
1034    }
1035
1036    // ==================== CRLF Mode Tests ====================
1037
1038    /// Test that ViewLineIterator correctly maps char_source_bytes for CRLF content.
1039    /// In CRLF mode, the Newline token is emitted at the \r position, and \n is skipped.
1040    /// This test verifies that char_source_bytes correctly tracks source byte positions.
1041    #[test]
1042    fn test_crlf_char_source_bytes_single_line() {
1043        // Simulate CRLF content "abc\r\n" where:
1044        // - bytes: a=0, b=1, c=2, \r=3, \n=4
1045        // - Newline token at source_offset=3 (position of \r)
1046        let tokens = vec![
1047            make_text_token("abc", Some(0)),
1048            make_newline_token(Some(3)), // \r position in CRLF
1049        ];
1050
1051        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1052        assert_eq!(lines.len(), 1);
1053
1054        // The ViewLine should have: 'a', 'b', 'c', '\n'
1055        assert_eq!(lines[0].text, "abc\n");
1056
1057        // char_source_bytes should correctly map each display char to source bytes
1058        assert_eq!(
1059            lines[0].char_source_bytes.len(),
1060            4,
1061            "Expected 4 chars: a, b, c, newline"
1062        );
1063        assert_eq!(
1064            lines[0].char_source_bytes[0],
1065            Some(0),
1066            "char 'a' should map to byte 0"
1067        );
1068        assert_eq!(
1069            lines[0].char_source_bytes[1],
1070            Some(1),
1071            "char 'b' should map to byte 1"
1072        );
1073        assert_eq!(
1074            lines[0].char_source_bytes[2],
1075            Some(2),
1076            "char 'c' should map to byte 2"
1077        );
1078        assert_eq!(
1079            lines[0].char_source_bytes[3],
1080            Some(3),
1081            "newline should map to byte 3 (\\r position)"
1082        );
1083    }
1084
1085    /// Test CRLF char_source_bytes across multiple lines.
1086    /// This is the critical test for the accumulating offset bug.
1087    #[test]
1088    fn test_crlf_char_source_bytes_multiple_lines() {
1089        // Simulate CRLF content "abc\r\ndef\r\nghi\r\n" where:
1090        // Line 1: a=0, b=1, c=2, \r=3, \n=4 (5 bytes)
1091        // Line 2: d=5, e=6, f=7, \r=8, \n=9 (5 bytes)
1092        // Line 3: g=10, h=11, i=12, \r=13, \n=14 (5 bytes)
1093        let tokens = vec![
1094            // Line 1
1095            make_text_token("abc", Some(0)),
1096            make_newline_token(Some(3)), // \r at byte 3
1097            // Line 2
1098            make_text_token("def", Some(5)),
1099            make_newline_token(Some(8)), // \r at byte 8
1100            // Line 3
1101            make_text_token("ghi", Some(10)),
1102            make_newline_token(Some(13)), // \r at byte 13
1103        ];
1104
1105        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1106        assert_eq!(lines.len(), 3);
1107
1108        // Line 1 verification
1109        assert_eq!(lines[0].text, "abc\n");
1110        assert_eq!(
1111            lines[0].char_source_bytes,
1112            vec![Some(0), Some(1), Some(2), Some(3)],
1113            "Line 1 char_source_bytes mismatch"
1114        );
1115
1116        // Line 2 verification - THIS IS WHERE THE BUG WOULD MANIFEST
1117        // If there's an off-by-one per line, line 2 might have wrong offsets
1118        assert_eq!(lines[1].text, "def\n");
1119        assert_eq!(
1120            lines[1].char_source_bytes,
1121            vec![Some(5), Some(6), Some(7), Some(8)],
1122            "Line 2 char_source_bytes mismatch - possible CRLF offset drift"
1123        );
1124
1125        // Line 3 verification - error accumulates
1126        assert_eq!(lines[2].text, "ghi\n");
1127        assert_eq!(
1128            lines[2].char_source_bytes,
1129            vec![Some(10), Some(11), Some(12), Some(13)],
1130            "Line 3 char_source_bytes mismatch - CRLF offset drift accumulated"
1131        );
1132    }
1133
1134    /// Test CRLF visual column to source byte mapping.
1135    /// Verifies source_byte_at_visual_col works correctly for CRLF content.
1136    #[test]
1137    fn test_crlf_visual_to_source_mapping() {
1138        // CRLF content "ab\r\ncd\r\n"
1139        // Line 1: a=0, b=1, \r=2, \n=3
1140        // Line 2: c=4, d=5, \r=6, \n=7
1141        let tokens = vec![
1142            make_text_token("ab", Some(0)),
1143            make_newline_token(Some(2)),
1144            make_text_token("cd", Some(4)),
1145            make_newline_token(Some(6)),
1146        ];
1147
1148        let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1149
1150        // Line 1: visual columns 0,1 should map to bytes 0,1
1151        assert_eq!(
1152            lines[0].source_byte_at_visual_col(0),
1153            Some(0),
1154            "Line 1 col 0"
1155        );
1156        assert_eq!(
1157            lines[0].source_byte_at_visual_col(1),
1158            Some(1),
1159            "Line 1 col 1"
1160        );
1161        assert_eq!(
1162            lines[0].source_byte_at_visual_col(2),
1163            Some(2),
1164            "Line 1 col 2 (newline)"
1165        );
1166
1167        // Line 2: visual columns 0,1 should map to bytes 4,5
1168        assert_eq!(
1169            lines[1].source_byte_at_visual_col(0),
1170            Some(4),
1171            "Line 2 col 0"
1172        );
1173        assert_eq!(
1174            lines[1].source_byte_at_visual_col(1),
1175            Some(5),
1176            "Line 2 col 1"
1177        );
1178        assert_eq!(
1179            lines[1].source_byte_at_visual_col(2),
1180            Some(6),
1181            "Line 2 col 2 (newline)"
1182        );
1183    }
1184}