fresh/view/ui/view_pipeline.rs
1//! Token-based view rendering pipeline
2//!
3//! This module provides a clean pipeline for rendering view tokens:
4//!
5//! ```text
6//! source buffer
7//! ↓ build_base_tokens()
8//! Vec<ViewTokenWire> (base tokens with source mappings)
9//! ↓ plugin transform (optional)
10//! Vec<ViewTokenWire> (transformed tokens, may have injected content)
11//! ↓ apply_wrapping() (optional)
12//! Vec<ViewTokenWire> (with Break tokens for wrapped lines)
13//! ↓ ViewLineIterator
14//! Iterator<ViewLine> (one per display line, preserves token info)
15//! ↓ render
16//! Display output
17//! ```
18//!
19//! The key design principle: preserve token-level information through the pipeline
20//! so rendering decisions (like line numbers) can be made based on token types,
21//! not reconstructed from flattened text.
22
23use crate::primitives::ansi::AnsiParser;
24use crate::primitives::display_width::char_width;
25use fresh_core::api::{ViewTokenStyle, ViewTokenWire, ViewTokenWireKind};
26use std::collections::HashSet;
27
28/// A display line built from tokens, preserving token-level information
29#[derive(Debug, Clone)]
30pub struct ViewLine {
31 /// The display text for this line (tabs expanded to spaces, etc.)
32 pub text: String,
33
34 // === Per-CHARACTER mappings (indexed by char position in text) ===
35 /// Source byte offset for each character
36 /// Length == text.chars().count()
37 pub char_source_bytes: Vec<Option<usize>>,
38 /// Style for each character (from token styles)
39 pub char_styles: Vec<Option<ViewTokenStyle>>,
40 /// Visual column where each character starts
41 pub char_visual_cols: Vec<usize>,
42
43 // === Per-VISUAL-COLUMN mapping (indexed by visual column) ===
44 /// Character index at each visual column (for O(1) mouse clicks)
45 /// For double-width chars, consecutive visual columns map to the same char index
46 /// Length == total visual width of line
47 pub visual_to_char: Vec<usize>,
48
49 /// Positions that are the start of a tab expansion
50 pub tab_starts: HashSet<usize>,
51 /// How this line started (what kind of token/boundary preceded it)
52 pub line_start: LineStart,
53 /// Whether this line ends with a newline character
54 pub ends_with_newline: bool,
55}
56
57impl ViewLine {
58 /// Get source byte at a given character index (O(1))
59 #[inline]
60 pub fn source_byte_at_char(&self, char_idx: usize) -> Option<usize> {
61 self.char_source_bytes.get(char_idx).copied().flatten()
62 }
63
64 /// Get character index at a given visual column (O(1))
65 #[inline]
66 pub fn char_at_visual_col(&self, visual_col: usize) -> usize {
67 self.visual_to_char
68 .get(visual_col)
69 .copied()
70 .unwrap_or_else(|| self.char_source_bytes.len().saturating_sub(1))
71 }
72
73 /// Get source byte at a given visual column (O(1) for mouse clicks)
74 #[inline]
75 pub fn source_byte_at_visual_col(&self, visual_col: usize) -> Option<usize> {
76 let char_idx = self.char_at_visual_col(visual_col);
77 self.source_byte_at_char(char_idx)
78 }
79
80 /// Get the visual column for a character at the given index
81 #[inline]
82 pub fn visual_col_at_char(&self, char_idx: usize) -> usize {
83 self.char_visual_cols.get(char_idx).copied().unwrap_or(0)
84 }
85
86 /// Total visual width of this line
87 #[inline]
88 pub fn visual_width(&self) -> usize {
89 self.visual_to_char.len()
90 }
91}
92
93/// What preceded the start of a display line
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub enum LineStart {
96 /// First line of the view (no preceding token)
97 Beginning,
98 /// Line after a source Newline token (source_offset: Some)
99 AfterSourceNewline,
100 /// Line after an injected Newline token (source_offset: None)
101 AfterInjectedNewline,
102 /// Line after a Break token (wrapped continuation)
103 AfterBreak,
104}
105
106impl LineStart {
107 /// Should this line show a line number in the gutter?
108 ///
109 /// - Beginning: yes (first source line)
110 /// - AfterSourceNewline: yes (new source line)
111 /// - AfterInjectedNewline: depends on content (if injected, no; if source, yes)
112 /// - AfterBreak: no (wrapped continuation of same line)
113 pub fn is_continuation(&self) -> bool {
114 matches!(self, LineStart::AfterBreak)
115 }
116}
117
118/// Iterator that converts a token stream into display lines
119pub struct ViewLineIterator<'a> {
120 tokens: &'a [ViewTokenWire],
121 token_idx: usize,
122 /// How the next line should start (based on what ended the previous line)
123 next_line_start: LineStart,
124 /// Whether to render in binary mode (unprintable chars shown as code points)
125 binary_mode: bool,
126 /// Whether to parse ANSI escape sequences (giving them zero visual width)
127 ansi_aware: bool,
128 /// Tab width for rendering (number of spaces per tab)
129 tab_size: usize,
130 /// Whether the token stream covers the end of the buffer.
131 /// When true, a trailing empty line is emitted after a final source newline
132 /// (representing the empty line after a file's trailing '\n').
133 at_buffer_end: bool,
134}
135
136impl<'a> ViewLineIterator<'a> {
137 /// Create a new ViewLineIterator with all options
138 ///
139 /// - `tokens`: The token stream to convert to display lines
140 /// - `binary_mode`: Whether to render unprintable chars as code points
141 /// - `ansi_aware`: Whether to parse ANSI escape sequences (giving them zero visual width)
142 /// - `tab_size`: Tab width for rendering (number of spaces per tab, should be > 0)
143 /// - `at_buffer_end`: Whether the token stream covers the end of the buffer.
144 /// When true, a trailing empty line is emitted after a final source newline.
145 ///
146 /// Note: If tab_size is 0, it will be treated as 4 (the default) to prevent division by zero.
147 /// This is a defensive measure to handle invalid configuration gracefully.
148 pub fn new(
149 tokens: &'a [ViewTokenWire],
150 binary_mode: bool,
151 ansi_aware: bool,
152 tab_size: usize,
153 at_buffer_end: bool,
154 ) -> Self {
155 // Defensive: treat 0 as 4 (default) to prevent division by zero in tab_expansion_width
156 // This can happen if invalid config (tab_size: 0) is loaded
157 let tab_size = if tab_size == 0 { 4 } else { tab_size };
158 Self {
159 tokens,
160 token_idx: 0,
161 next_line_start: LineStart::Beginning,
162 binary_mode,
163 ansi_aware,
164 tab_size,
165 at_buffer_end,
166 }
167 }
168
169 /// Expand a tab to spaces based on current column and configured tab_size
170 #[inline]
171 fn tab_expansion_width(&self, col: usize) -> usize {
172 self.tab_size - (col % self.tab_size)
173 }
174}
175
176/// Check if a byte is an unprintable control character that should be rendered as <XX>
177/// Returns true for control characters (0x00-0x1F, 0x7F) except tab and newline
178fn is_unprintable_byte(b: u8) -> bool {
179 // Only allow tab (0x09) and newline (0x0A) to render normally
180 // Everything else in control range should be shown as <XX>
181 if b == 0x09 || b == 0x0A {
182 return false;
183 }
184 // Control characters (0x00-0x1F) including CR, VT, FF, ESC are unprintable
185 if b < 0x20 {
186 return true;
187 }
188 // DEL character (0x7F) is also unprintable
189 if b == 0x7F {
190 return true;
191 }
192 false
193}
194
195/// Format an unprintable byte as a code point string like "<00>"
196fn format_unprintable_byte(b: u8) -> String {
197 format!("<{:02X}>", b)
198}
199
200impl<'a> Iterator for ViewLineIterator<'a> {
201 type Item = ViewLine;
202
203 fn next(&mut self) -> Option<Self::Item> {
204 if self.token_idx >= self.tokens.len() {
205 // All tokens consumed. If the previous line ended with a source
206 // newline there is one more real (empty) document line to emit —
207 // e.g. the empty line after a file's trailing '\n'. Produce it
208 // exactly once, then stop. Only do this when the tokens cover
209 // the actual end of the buffer (not a viewport slice).
210 if self.at_buffer_end && matches!(self.next_line_start, LineStart::AfterSourceNewline) {
211 // Flip to Beginning so the *next* call returns None.
212 self.next_line_start = LineStart::Beginning;
213 return Some(ViewLine {
214 text: String::new(),
215 char_source_bytes: vec![],
216 char_styles: vec![],
217 char_visual_cols: vec![],
218 visual_to_char: vec![],
219 tab_starts: HashSet::new(),
220 line_start: LineStart::AfterSourceNewline,
221 ends_with_newline: false,
222 });
223 }
224 return None;
225 }
226
227 let line_start = self.next_line_start;
228 let mut text = String::new();
229
230 // Per-character tracking (indexed by character position)
231 let mut char_source_bytes: Vec<Option<usize>> = Vec::new();
232 let mut char_styles: Vec<Option<ViewTokenStyle>> = Vec::new();
233 let mut char_visual_cols: Vec<usize> = Vec::new();
234
235 // Per-visual-column tracking (indexed by visual column)
236 let mut visual_to_char: Vec<usize> = Vec::new();
237
238 let mut tab_starts = HashSet::new();
239 let mut col = 0usize; // Current visual column
240 let mut ends_with_newline = false;
241
242 // ANSI parser for tracking escape sequences (reuse existing implementation)
243 let mut ansi_parser = if self.ansi_aware {
244 Some(AnsiParser::new())
245 } else {
246 None
247 };
248
249 /// Helper to add a character with all its mappings
250 macro_rules! add_char {
251 ($ch:expr, $source:expr, $style:expr, $width:expr) => {{
252 let char_idx = char_source_bytes.len();
253
254 // Per-character data
255 text.push($ch);
256 char_source_bytes.push($source);
257 char_styles.push($style);
258 char_visual_cols.push(col);
259
260 // Per-visual-column data (for O(1) mouse clicks)
261 for _ in 0..$width {
262 visual_to_char.push(char_idx);
263 }
264
265 col += $width;
266 }};
267 }
268
269 // Process tokens until we hit a line break
270 while self.token_idx < self.tokens.len() {
271 let token = &self.tokens[self.token_idx];
272 let token_style = token.style.clone();
273
274 match &token.kind {
275 ViewTokenWireKind::Text(t) => {
276 let base = token.source_offset;
277 let t_bytes = t.as_bytes();
278 let mut byte_idx = 0;
279
280 while byte_idx < t_bytes.len() {
281 let b = t_bytes[byte_idx];
282 let source = base.map(|s| s + byte_idx);
283
284 // In binary mode, render unprintable bytes as code points
285 if self.binary_mode && is_unprintable_byte(b) {
286 let formatted = format_unprintable_byte(b);
287 for display_ch in formatted.chars() {
288 add_char!(display_ch, source, token_style.clone(), 1);
289 }
290 byte_idx += 1;
291 continue;
292 }
293
294 // Decode the character at this position
295 let ch = if b < 0x80 {
296 // ASCII character
297 byte_idx += 1;
298 b as char
299 } else {
300 // Multi-byte UTF-8 - decode carefully
301 let remaining = &t_bytes[byte_idx..];
302 match std::str::from_utf8(remaining) {
303 Ok(s) => {
304 if let Some(ch) = s.chars().next() {
305 byte_idx += ch.len_utf8();
306 ch
307 } else {
308 byte_idx += 1;
309 '\u{FFFD}'
310 }
311 }
312 Err(e) => {
313 // Invalid UTF-8 - in binary mode show as hex, otherwise replacement char
314 if self.binary_mode {
315 let formatted = format_unprintable_byte(b);
316 for display_ch in formatted.chars() {
317 add_char!(display_ch, source, token_style.clone(), 1);
318 }
319 byte_idx += 1;
320 continue;
321 } else {
322 // Try to get valid portion, then skip the bad byte
323 let valid_up_to = e.valid_up_to();
324 if valid_up_to > 0 {
325 if let Some(ch) =
326 std::str::from_utf8(&remaining[..valid_up_to])
327 .ok()
328 .and_then(|s| s.chars().next())
329 {
330 byte_idx += ch.len_utf8();
331 ch
332 } else {
333 byte_idx += 1;
334 '\u{FFFD}'
335 }
336 } else {
337 byte_idx += 1;
338 '\u{FFFD}'
339 }
340 }
341 }
342 }
343 };
344
345 if ch == '\t' {
346 // Tab expands to spaces - record start position
347 let tab_start_pos = char_source_bytes.len();
348 tab_starts.insert(tab_start_pos);
349 let spaces = self.tab_expansion_width(col);
350
351 // Tab is ONE character that expands to multiple visual columns
352 let char_idx = char_source_bytes.len();
353 text.push(' '); // First space char
354 char_source_bytes.push(source);
355 char_styles.push(token_style.clone());
356 char_visual_cols.push(col);
357
358 // All visual columns of the tab map to the same char
359 for _ in 0..spaces {
360 visual_to_char.push(char_idx);
361 }
362 col += spaces;
363
364 // Push remaining spaces as separate display chars
365 // (text contains expanded spaces for rendering)
366 for _ in 1..spaces {
367 text.push(' ');
368 char_source_bytes.push(source);
369 char_styles.push(token_style.clone());
370 char_visual_cols
371 .push(col - spaces + char_source_bytes.len() - char_idx);
372 }
373 } else {
374 // Handle ANSI escape sequences - give them width 0
375 let width = if let Some(ref mut parser) = ansi_parser {
376 // Use AnsiParser: parse_char returns None for escape chars
377 if parser.parse_char(ch).is_none() {
378 0 // Part of escape sequence, zero width
379 } else {
380 char_width(ch)
381 }
382 } else {
383 char_width(ch)
384 };
385 add_char!(ch, source, token_style.clone(), width);
386 }
387 }
388 self.token_idx += 1;
389 }
390 ViewTokenWireKind::Space => {
391 add_char!(' ', token.source_offset, token_style, 1);
392 self.token_idx += 1;
393 }
394 ViewTokenWireKind::Newline => {
395 // Newline ends this line - width 1 for the newline char
396 add_char!('\n', token.source_offset, token_style, 1);
397 ends_with_newline = true;
398
399 // Determine how the next line starts
400 self.next_line_start = if token.source_offset.is_some() {
401 LineStart::AfterSourceNewline
402 } else {
403 LineStart::AfterInjectedNewline
404 };
405 self.token_idx += 1;
406 break;
407 }
408 ViewTokenWireKind::Break => {
409 // Break is a synthetic line break from wrapping
410 add_char!('\n', None, None, 1);
411 ends_with_newline = true;
412
413 self.next_line_start = LineStart::AfterBreak;
414 self.token_idx += 1;
415 break;
416 }
417 ViewTokenWireKind::BinaryByte(b) => {
418 // Binary byte rendered as <XX> - all 4 chars map to same source byte
419 let formatted = format_unprintable_byte(*b);
420 for display_ch in formatted.chars() {
421 add_char!(display_ch, token.source_offset, token_style.clone(), 1);
422 }
423 self.token_idx += 1;
424 }
425 }
426 }
427
428 // col's final value is intentionally unused (only needed during iteration)
429 let _ = col;
430
431 // If we consumed all remaining tokens without hitting a Newline or Break,
432 // the content didn't end with a line terminator. Reset next_line_start
433 // so the trailing-empty-line logic (at the top of next()) doesn't
434 // incorrectly fire on the subsequent call. The `ends_with_newline` flag
435 // tells us whether the loop exited via a Newline/Break (true) or by
436 // exhausting all tokens (false).
437 if !ends_with_newline && self.token_idx >= self.tokens.len() {
438 self.next_line_start = LineStart::Beginning;
439 }
440
441 // Don't return empty injected/virtual lines at the end of the token
442 // stream. However, DO return a trailing empty line that follows a source
443 // newline — it represents a real document line (e.g. after a file's
444 // trailing '\n') and the cursor may sit on it — but only when
445 // at_buffer_end is set (otherwise this is just a viewport slice).
446 if text.is_empty()
447 && self.token_idx >= self.tokens.len()
448 && !(self.at_buffer_end && matches!(line_start, LineStart::AfterSourceNewline))
449 {
450 return None;
451 }
452
453 Some(ViewLine {
454 text,
455 char_source_bytes,
456 char_styles,
457 char_visual_cols,
458 visual_to_char,
459 tab_starts,
460 line_start,
461 ends_with_newline,
462 })
463 }
464}
465
466/// Determine if a display line should show a line number
467///
468/// Rules:
469/// - Wrapped continuation (line_start == AfterBreak): no line number
470/// - Injected content (first char has source_offset: None): no line number
471/// - Empty line at beginning or after source newline: yes line number
472/// - Otherwise: show line number
473pub fn should_show_line_number(line: &ViewLine) -> bool {
474 // Wrapped continuations never show line numbers
475 if line.line_start.is_continuation() {
476 return false;
477 }
478
479 // Check if this line contains injected (non-source) content
480 // An empty line is NOT injected if it's at the beginning or after a source newline
481 if line.char_source_bytes.is_empty() {
482 // Empty line - show line number if it's at beginning or after source newline
483 // (not after injected newline or break)
484 return matches!(
485 line.line_start,
486 LineStart::Beginning | LineStart::AfterSourceNewline
487 );
488 }
489
490 let first_char_is_source = line
491 .char_source_bytes
492 .first()
493 .map(|m| m.is_some())
494 .unwrap_or(false);
495
496 if !first_char_is_source {
497 // Injected line (header, etc.) - no line number
498 return false;
499 }
500
501 // Source content after a real line break - show line number
502 true
503}
504
505// ============================================================================
506// Layout: The computed display state for a view
507// ============================================================================
508
509use std::collections::BTreeMap;
510use std::ops::Range;
511
512/// The Layout represents the computed display state for a view.
513///
514/// This is **View state**, not Buffer state. Each split has its own Layout
515/// computed from its view_transform (or base tokens if no transform).
516///
517/// The Layout provides:
518/// - ViewLines for the current viewport region
519/// - Bidirectional mapping between source bytes and view positions
520/// - Scroll limit information
521#[derive(Debug, Clone)]
522pub struct Layout {
523 /// Display lines for the current viewport region
524 pub lines: Vec<ViewLine>,
525
526 /// Source byte range this layout covers
527 pub source_range: Range<usize>,
528
529 /// Total view lines in entire document (estimated or exact)
530 pub total_view_lines: usize,
531
532 /// Total injected lines in entire document (from view transform)
533 pub total_injected_lines: usize,
534
535 /// Fast lookup: source byte → view line index
536 byte_to_line: BTreeMap<usize, usize>,
537}
538
539impl Layout {
540 /// Create a new Layout from ViewLines
541 pub fn new(lines: Vec<ViewLine>, source_range: Range<usize>) -> Self {
542 let mut byte_to_line = BTreeMap::new();
543
544 // Build the byte→line index from char_source_bytes
545 for (line_idx, line) in lines.iter().enumerate() {
546 // Find the first source byte in this line
547 if let Some(first_byte) = line.char_source_bytes.iter().find_map(|m| *m) {
548 byte_to_line.insert(first_byte, line_idx);
549 }
550 }
551
552 // Estimate total view lines (for now, just use what we have)
553 let total_view_lines = lines.len();
554 let total_injected_lines = lines.iter().filter(|l| !should_show_line_number(l)).count();
555
556 Self {
557 lines,
558 source_range,
559 total_view_lines,
560 total_injected_lines,
561 byte_to_line,
562 }
563 }
564
565 /// Build a Layout from a token stream
566 pub fn from_tokens(
567 tokens: &[ViewTokenWire],
568 source_range: Range<usize>,
569 tab_size: usize,
570 ) -> Self {
571 let lines: Vec<ViewLine> =
572 ViewLineIterator::new(tokens, false, false, tab_size, false).collect();
573 Self::new(lines, source_range)
574 }
575
576 /// Find the view position (line, visual column) for a source byte
577 pub fn source_byte_to_view_position(&self, byte: usize) -> Option<(usize, usize)> {
578 // Find the view line containing this byte
579 if let Some((&_line_start_byte, &line_idx)) = self.byte_to_line.range(..=byte).last() {
580 if line_idx < self.lines.len() {
581 let line = &self.lines[line_idx];
582 // Find the character with this source byte, then get its visual column
583 for (char_idx, mapping) in line.char_source_bytes.iter().enumerate() {
584 if *mapping == Some(byte) {
585 return Some((line_idx, line.visual_col_at_char(char_idx)));
586 }
587 }
588 // Byte is in this line's range but not at a character boundary
589 // Return end of line (visual width)
590 return Some((line_idx, line.visual_width()));
591 }
592 }
593 None
594 }
595
596 /// Find the source byte for a view position (line, visual column)
597 pub fn view_position_to_source_byte(&self, line_idx: usize, col: usize) -> Option<usize> {
598 if line_idx >= self.lines.len() {
599 return None;
600 }
601 let line = &self.lines[line_idx];
602 if col < line.visual_width() {
603 // Use O(1) lookup via visual_to_char -> char_source_bytes
604 line.source_byte_at_visual_col(col)
605 } else if !line.char_source_bytes.is_empty() {
606 // Past end of line, return last valid byte
607 line.char_source_bytes.iter().rev().find_map(|m| *m)
608 } else {
609 None
610 }
611 }
612
613 /// Get the source byte for the start of a view line
614 pub fn get_source_byte_for_line(&self, line_idx: usize) -> Option<usize> {
615 if line_idx >= self.lines.len() {
616 return None;
617 }
618 self.lines[line_idx]
619 .char_source_bytes
620 .iter()
621 .find_map(|m| *m)
622 }
623
624 /// Find the nearest view line for a source byte (for stabilization)
625 pub fn find_nearest_view_line(&self, byte: usize) -> usize {
626 if let Some((&_line_start_byte, &line_idx)) = self.byte_to_line.range(..=byte).last() {
627 line_idx.min(self.lines.len().saturating_sub(1))
628 } else {
629 0
630 }
631 }
632
633 /// Calculate the maximum top line for scrolling
634 pub fn max_top_line(&self, viewport_height: usize) -> usize {
635 self.lines.len().saturating_sub(viewport_height)
636 }
637
638 /// Check if there's content below the current layout
639 pub fn has_content_below(&self, buffer_len: usize) -> bool {
640 self.source_range.end < buffer_len
641 }
642}
643
644#[cfg(test)]
645mod tests {
646 use super::*;
647
648 fn make_text_token(text: &str, source_offset: Option<usize>) -> ViewTokenWire {
649 ViewTokenWire {
650 kind: ViewTokenWireKind::Text(text.to_string()),
651 source_offset,
652 style: None,
653 }
654 }
655
656 fn make_newline_token(source_offset: Option<usize>) -> ViewTokenWire {
657 ViewTokenWire {
658 kind: ViewTokenWireKind::Newline,
659 source_offset,
660 style: None,
661 }
662 }
663
664 fn make_break_token() -> ViewTokenWire {
665 ViewTokenWire {
666 kind: ViewTokenWireKind::Break,
667 source_offset: None,
668 style: None,
669 }
670 }
671
672 #[test]
673 fn test_simple_source_lines() {
674 let tokens = vec![
675 make_text_token("Line 1", Some(0)),
676 make_newline_token(Some(6)),
677 make_text_token("Line 2", Some(7)),
678 make_newline_token(Some(13)),
679 ];
680
681 let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
682
683 assert_eq!(lines.len(), 2);
684 assert_eq!(lines[0].text, "Line 1\n");
685 assert_eq!(lines[0].line_start, LineStart::Beginning);
686 assert!(should_show_line_number(&lines[0]));
687
688 assert_eq!(lines[1].text, "Line 2\n");
689 assert_eq!(lines[1].line_start, LineStart::AfterSourceNewline);
690 assert!(should_show_line_number(&lines[1]));
691 }
692
693 #[test]
694 fn test_wrapped_continuation() {
695 let tokens = vec![
696 make_text_token("Line 1 start", Some(0)),
697 make_break_token(), // Wrapped
698 make_text_token("continued", Some(12)),
699 make_newline_token(Some(21)),
700 ];
701
702 let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
703
704 assert_eq!(lines.len(), 2);
705 assert_eq!(lines[0].line_start, LineStart::Beginning);
706 assert!(should_show_line_number(&lines[0]));
707
708 assert_eq!(lines[1].line_start, LineStart::AfterBreak);
709 assert!(
710 !should_show_line_number(&lines[1]),
711 "Wrapped continuation should NOT show line number"
712 );
713 }
714
715 #[test]
716 fn test_injected_header_then_source() {
717 // This is the bug scenario: header (injected) followed by source content
718 let tokens = vec![
719 // Injected header
720 make_text_token("== HEADER ==", None),
721 make_newline_token(None),
722 // Source content
723 make_text_token("Line 1", Some(0)),
724 make_newline_token(Some(6)),
725 ];
726
727 let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
728
729 assert_eq!(lines.len(), 2);
730
731 // Header line - no line number (injected content)
732 assert_eq!(lines[0].text, "== HEADER ==\n");
733 assert_eq!(lines[0].line_start, LineStart::Beginning);
734 assert!(
735 !should_show_line_number(&lines[0]),
736 "Injected header should NOT show line number"
737 );
738
739 // Source line after header - SHOULD show line number
740 assert_eq!(lines[1].text, "Line 1\n");
741 assert_eq!(lines[1].line_start, LineStart::AfterInjectedNewline);
742 assert!(
743 should_show_line_number(&lines[1]),
744 "BUG: Source line after injected header SHOULD show line number!\n\
745 line_start={:?}, first_char_is_source={}",
746 lines[1].line_start,
747 lines[1]
748 .char_source_bytes
749 .first()
750 .map(|m| m.is_some())
751 .unwrap_or(false)
752 );
753 }
754
755 #[test]
756 fn test_mixed_scenario() {
757 // Header -> Source Line 1 -> Source Line 2 (wrapped) -> Source Line 3
758 let tokens = vec![
759 // Injected header
760 make_text_token("== Block 1 ==", None),
761 make_newline_token(None),
762 // Source line 1
763 make_text_token("Line 1", Some(0)),
764 make_newline_token(Some(6)),
765 // Source line 2 (gets wrapped)
766 make_text_token("Line 2 start", Some(7)),
767 make_break_token(),
768 make_text_token("wrapped", Some(19)),
769 make_newline_token(Some(26)),
770 // Source line 3
771 make_text_token("Line 3", Some(27)),
772 make_newline_token(Some(33)),
773 ];
774
775 let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
776
777 assert_eq!(lines.len(), 5);
778
779 // Header - no line number
780 assert!(!should_show_line_number(&lines[0]));
781
782 // Line 1 - yes line number (source after header)
783 assert!(should_show_line_number(&lines[1]));
784
785 // Line 2 start - yes line number
786 assert!(should_show_line_number(&lines[2]));
787
788 // Line 2 wrapped - no line number (continuation)
789 assert!(!should_show_line_number(&lines[3]));
790
791 // Line 3 - yes line number
792 assert!(should_show_line_number(&lines[4]));
793 }
794
795 #[test]
796 fn test_is_unprintable_byte() {
797 // Null byte is unprintable
798 assert!(is_unprintable_byte(0x00));
799
800 // Control characters 0x01-0x08 are unprintable
801 assert!(is_unprintable_byte(0x01));
802 assert!(is_unprintable_byte(0x02));
803 assert!(is_unprintable_byte(0x08));
804
805 // Tab (0x09) and LF (0x0A) are allowed
806 assert!(!is_unprintable_byte(0x09)); // tab
807 assert!(!is_unprintable_byte(0x0A)); // newline
808
809 // VT (0x0B), FF (0x0C), CR (0x0D) are unprintable in binary mode
810 assert!(is_unprintable_byte(0x0B)); // vertical tab
811 assert!(is_unprintable_byte(0x0C)); // form feed
812 assert!(is_unprintable_byte(0x0D)); // carriage return
813
814 // 0x0E-0x1F are all unprintable (including ESC)
815 assert!(is_unprintable_byte(0x0E));
816 assert!(is_unprintable_byte(0x1A)); // SUB - this is in PNG headers
817 assert!(is_unprintable_byte(0x1B)); // ESC
818 assert!(is_unprintable_byte(0x1C));
819 assert!(is_unprintable_byte(0x1F));
820
821 // Printable ASCII (0x20-0x7E) is allowed
822 assert!(!is_unprintable_byte(0x20)); // space
823 assert!(!is_unprintable_byte(0x41)); // 'A'
824 assert!(!is_unprintable_byte(0x7E)); // '~'
825
826 // DEL (0x7F) is unprintable
827 assert!(is_unprintable_byte(0x7F));
828
829 // High bytes (0x80+) are allowed (could be UTF-8)
830 assert!(!is_unprintable_byte(0x80));
831 assert!(!is_unprintable_byte(0xFF));
832 }
833
834 #[test]
835 fn test_format_unprintable_byte() {
836 assert_eq!(format_unprintable_byte(0x00), "<00>");
837 assert_eq!(format_unprintable_byte(0x01), "<01>");
838 assert_eq!(format_unprintable_byte(0x1A), "<1A>");
839 assert_eq!(format_unprintable_byte(0x7F), "<7F>");
840 assert_eq!(format_unprintable_byte(0xFF), "<FF>");
841 }
842
843 #[test]
844 fn test_binary_mode_renders_control_chars() {
845 // Text with null byte and control character
846 let tokens = vec![
847 ViewTokenWire {
848 kind: ViewTokenWireKind::Text("Hello\x00World\x01End".to_string()),
849 source_offset: Some(0),
850 style: None,
851 },
852 make_newline_token(Some(15)),
853 ];
854
855 // Without binary mode - control chars would be rendered raw or as replacement
856 let lines_normal: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
857 assert_eq!(lines_normal.len(), 1);
858 // In normal mode, we don't format control chars specially
859
860 // With binary mode - control chars should be formatted as <XX>
861 let lines_binary: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
862 assert_eq!(lines_binary.len(), 1);
863 assert!(
864 lines_binary[0].text.contains("<00>"),
865 "Binary mode should format null byte as <00>, got: {}",
866 lines_binary[0].text
867 );
868 assert!(
869 lines_binary[0].text.contains("<01>"),
870 "Binary mode should format 0x01 as <01>, got: {}",
871 lines_binary[0].text
872 );
873 }
874
875 #[test]
876 fn test_binary_mode_png_header() {
877 // PNG-like content with SUB control char (0x1A)
878 // Using valid UTF-8 string with embedded control character
879 let png_like = "PNG\r\n\x1A\n";
880 let tokens = vec![ViewTokenWire {
881 kind: ViewTokenWireKind::Text(png_like.to_string()),
882 source_offset: Some(0),
883 style: None,
884 }];
885
886 let lines: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
887
888 // Should have rendered the 0x1A as <1A>
889 let combined: String = lines.iter().map(|l| l.text.as_str()).collect();
890 assert!(
891 combined.contains("<1A>"),
892 "PNG SUB byte (0x1A) should be rendered as <1A>, got: {:?}",
893 combined
894 );
895 }
896
897 #[test]
898 fn test_binary_mode_preserves_printable_chars() {
899 let tokens = vec![
900 ViewTokenWire {
901 kind: ViewTokenWireKind::Text("Normal text 123".to_string()),
902 source_offset: Some(0),
903 style: None,
904 },
905 make_newline_token(Some(15)),
906 ];
907
908 let lines: Vec<_> = ViewLineIterator::new(&tokens, true, false, 4, false).collect();
909 assert_eq!(lines.len(), 1);
910 assert!(
911 lines[0].text.contains("Normal text 123"),
912 "Printable chars should be preserved in binary mode"
913 );
914 }
915
916 #[test]
917 fn test_double_width_visual_mappings() {
918 // "你好" - two Chinese characters, each 3 bytes and 2 columns wide
919 // Byte layout: 你=bytes 0-2, 好=bytes 3-5
920 // Visual layout: 你 takes columns 0-1, 好 takes columns 2-3
921 let tokens = vec![
922 make_text_token("你好", Some(0)),
923 make_newline_token(Some(6)),
924 ];
925
926 let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
927 assert_eq!(lines.len(), 1);
928
929 // visual_to_char should have one entry per visual column
930 // 你 = 2 columns, 好 = 2 columns, \n = 1 column = 5 total
931 assert_eq!(
932 lines[0].visual_width(),
933 5,
934 "Expected 5 visual columns (2 for 你 + 2 for 好 + 1 for newline), got {}",
935 lines[0].visual_width()
936 );
937
938 // char_source_bytes should have one entry per character
939 // 3 characters: 你, 好, \n
940 assert_eq!(
941 lines[0].char_source_bytes.len(),
942 3,
943 "Expected 3 char entries (你, 好, newline), got {}",
944 lines[0].char_source_bytes.len()
945 );
946
947 // Both columns of 你 should map to byte 0 via O(1) lookup
948 assert_eq!(
949 lines[0].source_byte_at_visual_col(0),
950 Some(0),
951 "Column 0 should map to byte 0"
952 );
953 assert_eq!(
954 lines[0].source_byte_at_visual_col(1),
955 Some(0),
956 "Column 1 should map to byte 0"
957 );
958
959 // Both columns of 好 should map to byte 3
960 assert_eq!(
961 lines[0].source_byte_at_visual_col(2),
962 Some(3),
963 "Column 2 should map to byte 3"
964 );
965 assert_eq!(
966 lines[0].source_byte_at_visual_col(3),
967 Some(3),
968 "Column 3 should map to byte 3"
969 );
970
971 // Newline maps to byte 6
972 assert_eq!(
973 lines[0].source_byte_at_visual_col(4),
974 Some(6),
975 "Column 4 (newline) should map to byte 6"
976 );
977 }
978
979 #[test]
980 fn test_mixed_width_visual_mappings() {
981 // "a你b" - ASCII, Chinese (2 cols), ASCII
982 // Byte layout: a=0, 你=1-3, b=4
983 // Visual columns: a=0, 你=1-2, b=3
984 let tokens = vec![
985 make_text_token("a你b", Some(0)),
986 make_newline_token(Some(5)),
987 ];
988
989 let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
990 assert_eq!(lines.len(), 1);
991
992 // a=1 col, 你=2 cols, b=1 col, \n=1 col = 5 total visual width
993 assert_eq!(
994 lines[0].visual_width(),
995 5,
996 "Expected 5 visual columns, got {}",
997 lines[0].visual_width()
998 );
999
1000 // 4 characters: a, 你, b, \n
1001 assert_eq!(
1002 lines[0].char_source_bytes.len(),
1003 4,
1004 "Expected 4 char entries, got {}",
1005 lines[0].char_source_bytes.len()
1006 );
1007
1008 // Test O(1) visual column to byte lookup
1009 assert_eq!(
1010 lines[0].source_byte_at_visual_col(0),
1011 Some(0),
1012 "Column 0 (a) should map to byte 0"
1013 );
1014 assert_eq!(
1015 lines[0].source_byte_at_visual_col(1),
1016 Some(1),
1017 "Column 1 (你 col 1) should map to byte 1"
1018 );
1019 assert_eq!(
1020 lines[0].source_byte_at_visual_col(2),
1021 Some(1),
1022 "Column 2 (你 col 2) should map to byte 1"
1023 );
1024 assert_eq!(
1025 lines[0].source_byte_at_visual_col(3),
1026 Some(4),
1027 "Column 3 (b) should map to byte 4"
1028 );
1029 assert_eq!(
1030 lines[0].source_byte_at_visual_col(4),
1031 Some(5),
1032 "Column 4 (newline) should map to byte 5"
1033 );
1034 }
1035
1036 // ==================== CRLF Mode Tests ====================
1037
1038 /// Test that ViewLineIterator correctly maps char_source_bytes for CRLF content.
1039 /// In CRLF mode, the Newline token is emitted at the \r position, and \n is skipped.
1040 /// This test verifies that char_source_bytes correctly tracks source byte positions.
1041 #[test]
1042 fn test_crlf_char_source_bytes_single_line() {
1043 // Simulate CRLF content "abc\r\n" where:
1044 // - bytes: a=0, b=1, c=2, \r=3, \n=4
1045 // - Newline token at source_offset=3 (position of \r)
1046 let tokens = vec![
1047 make_text_token("abc", Some(0)),
1048 make_newline_token(Some(3)), // \r position in CRLF
1049 ];
1050
1051 let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1052 assert_eq!(lines.len(), 1);
1053
1054 // The ViewLine should have: 'a', 'b', 'c', '\n'
1055 assert_eq!(lines[0].text, "abc\n");
1056
1057 // char_source_bytes should correctly map each display char to source bytes
1058 assert_eq!(
1059 lines[0].char_source_bytes.len(),
1060 4,
1061 "Expected 4 chars: a, b, c, newline"
1062 );
1063 assert_eq!(
1064 lines[0].char_source_bytes[0],
1065 Some(0),
1066 "char 'a' should map to byte 0"
1067 );
1068 assert_eq!(
1069 lines[0].char_source_bytes[1],
1070 Some(1),
1071 "char 'b' should map to byte 1"
1072 );
1073 assert_eq!(
1074 lines[0].char_source_bytes[2],
1075 Some(2),
1076 "char 'c' should map to byte 2"
1077 );
1078 assert_eq!(
1079 lines[0].char_source_bytes[3],
1080 Some(3),
1081 "newline should map to byte 3 (\\r position)"
1082 );
1083 }
1084
1085 /// Test CRLF char_source_bytes across multiple lines.
1086 /// This is the critical test for the accumulating offset bug.
1087 #[test]
1088 fn test_crlf_char_source_bytes_multiple_lines() {
1089 // Simulate CRLF content "abc\r\ndef\r\nghi\r\n" where:
1090 // Line 1: a=0, b=1, c=2, \r=3, \n=4 (5 bytes)
1091 // Line 2: d=5, e=6, f=7, \r=8, \n=9 (5 bytes)
1092 // Line 3: g=10, h=11, i=12, \r=13, \n=14 (5 bytes)
1093 let tokens = vec![
1094 // Line 1
1095 make_text_token("abc", Some(0)),
1096 make_newline_token(Some(3)), // \r at byte 3
1097 // Line 2
1098 make_text_token("def", Some(5)),
1099 make_newline_token(Some(8)), // \r at byte 8
1100 // Line 3
1101 make_text_token("ghi", Some(10)),
1102 make_newline_token(Some(13)), // \r at byte 13
1103 ];
1104
1105 let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1106 assert_eq!(lines.len(), 3);
1107
1108 // Line 1 verification
1109 assert_eq!(lines[0].text, "abc\n");
1110 assert_eq!(
1111 lines[0].char_source_bytes,
1112 vec![Some(0), Some(1), Some(2), Some(3)],
1113 "Line 1 char_source_bytes mismatch"
1114 );
1115
1116 // Line 2 verification - THIS IS WHERE THE BUG WOULD MANIFEST
1117 // If there's an off-by-one per line, line 2 might have wrong offsets
1118 assert_eq!(lines[1].text, "def\n");
1119 assert_eq!(
1120 lines[1].char_source_bytes,
1121 vec![Some(5), Some(6), Some(7), Some(8)],
1122 "Line 2 char_source_bytes mismatch - possible CRLF offset drift"
1123 );
1124
1125 // Line 3 verification - error accumulates
1126 assert_eq!(lines[2].text, "ghi\n");
1127 assert_eq!(
1128 lines[2].char_source_bytes,
1129 vec![Some(10), Some(11), Some(12), Some(13)],
1130 "Line 3 char_source_bytes mismatch - CRLF offset drift accumulated"
1131 );
1132 }
1133
1134 /// Test CRLF visual column to source byte mapping.
1135 /// Verifies source_byte_at_visual_col works correctly for CRLF content.
1136 #[test]
1137 fn test_crlf_visual_to_source_mapping() {
1138 // CRLF content "ab\r\ncd\r\n"
1139 // Line 1: a=0, b=1, \r=2, \n=3
1140 // Line 2: c=4, d=5, \r=6, \n=7
1141 let tokens = vec![
1142 make_text_token("ab", Some(0)),
1143 make_newline_token(Some(2)),
1144 make_text_token("cd", Some(4)),
1145 make_newline_token(Some(6)),
1146 ];
1147
1148 let lines: Vec<_> = ViewLineIterator::new(&tokens, false, false, 4, false).collect();
1149
1150 // Line 1: visual columns 0,1 should map to bytes 0,1
1151 assert_eq!(
1152 lines[0].source_byte_at_visual_col(0),
1153 Some(0),
1154 "Line 1 col 0"
1155 );
1156 assert_eq!(
1157 lines[0].source_byte_at_visual_col(1),
1158 Some(1),
1159 "Line 1 col 1"
1160 );
1161 assert_eq!(
1162 lines[0].source_byte_at_visual_col(2),
1163 Some(2),
1164 "Line 1 col 2 (newline)"
1165 );
1166
1167 // Line 2: visual columns 0,1 should map to bytes 4,5
1168 assert_eq!(
1169 lines[1].source_byte_at_visual_col(0),
1170 Some(4),
1171 "Line 2 col 0"
1172 );
1173 assert_eq!(
1174 lines[1].source_byte_at_visual_col(1),
1175 Some(5),
1176 "Line 2 col 1"
1177 );
1178 assert_eq!(
1179 lines[1].source_byte_at_visual_col(2),
1180 Some(6),
1181 "Line 2 col 2 (newline)"
1182 );
1183 }
1184}