Skip to main content

fresh/primitives/
visual_layout.rs

1//! Unified visual layout calculations for text display
2//!
3//! This module provides consistent handling of visual column calculations
4//! across all editor operations: rendering, mouse clicks, and cursor navigation.
5//!
6//! Key concepts:
7//! - **Character index**: Position in the character sequence (0, 1, 2, ...)
8//! - **Visual column**: Screen column position accounting for char widths
9//! - **Source byte**: Byte offset in the source buffer
10//!
11//! Handles:
12//! - ANSI escape sequences (zero visual width)
13//! - Double-width characters (CJK, emoji)
14//! - Tab expansion
15//! - Zero-width Unicode characters
16
17use crate::primitives::ansi::AnsiParser;
18use crate::primitives::display_width::char_width;
19
20/// Standard tab width for terminal display
21pub const TAB_WIDTH: usize = 8;
22
23/// Calculate tab expansion width at a given column
24#[inline]
25pub fn tab_expansion_width(col: usize) -> usize {
26    TAB_WIDTH - (col % TAB_WIDTH)
27}
28
29/// Per-line mappings that support all visual layout operations with O(1) lookups
30#[derive(Debug, Clone, Default)]
31pub struct LineMappings {
32    /// Source byte for each character (indexed by char position)
33    /// Length == number of characters in processed text
34    pub char_source_bytes: Vec<Option<usize>>,
35
36    /// Visual column for each character (indexed by char position)
37    /// For zero-width chars, this is the same as the previous char's visual column
38    pub char_visual_cols: Vec<usize>,
39
40    /// Character index at each visual column (indexed by visual column)
41    /// For double-width chars, consecutive visual columns map to the same char
42    /// Length == total visual width of line
43    pub visual_to_char: Vec<usize>,
44
45    /// Total visual width of the line
46    pub total_visual_width: usize,
47}
48
49impl LineMappings {
50    /// Get source byte for a character at the given index
51    #[inline]
52    pub fn source_byte_at_char(&self, char_idx: usize) -> Option<usize> {
53        self.char_source_bytes.get(char_idx).copied().flatten()
54    }
55
56    /// Get visual column for a character at the given index
57    #[inline]
58    pub fn visual_col_at_char(&self, char_idx: usize) -> usize {
59        self.char_visual_cols.get(char_idx).copied().unwrap_or(0)
60    }
61
62    /// Get character index at a given visual column (O(1) for mouse clicks)
63    #[inline]
64    pub fn char_at_visual_col(&self, visual_col: usize) -> usize {
65        self.visual_to_char
66            .get(visual_col)
67            .copied()
68            .unwrap_or_else(|| {
69                // Past end of line - return last char index
70                self.char_source_bytes.len().saturating_sub(1)
71            })
72    }
73
74    /// Get source byte at a given visual column (O(1) for mouse clicks)
75    #[inline]
76    pub fn source_byte_at_visual_col(&self, visual_col: usize) -> Option<usize> {
77        let char_idx = self.char_at_visual_col(visual_col);
78        self.source_byte_at_char(char_idx)
79    }
80
81    /// Get the source byte at the end of the line
82    #[inline]
83    pub fn line_end_byte(&self) -> usize {
84        self.char_source_bytes
85            .iter()
86            .rev()
87            .find_map(|&b| b)
88            .map(|b| b + 1) // One past last char
89            .unwrap_or(0)
90    }
91}
92
93/// Builder for constructing LineMappings incrementally
94#[derive(Debug)]
95pub struct LineMappingsBuilder {
96    mappings: LineMappings,
97    current_visual_col: usize,
98    ansi_parser: Option<AnsiParser>,
99}
100
101impl LineMappingsBuilder {
102    /// Create a new builder, optionally with ANSI parsing enabled
103    pub fn new(has_ansi: bool) -> Self {
104        Self {
105            mappings: LineMappings::default(),
106            current_visual_col: 0,
107            ansi_parser: if has_ansi {
108                Some(AnsiParser::new())
109            } else {
110                None
111            },
112        }
113    }
114
115    /// Add a character to the mappings
116    ///
117    /// Returns the visual width of the character (0 for ANSI/zero-width, 1-2 for visible chars)
118    pub fn add_char(&mut self, ch: char, source_byte: Option<usize>) -> usize {
119        // Check if this is part of an ANSI escape sequence
120        if let Some(ref mut parser) = self.ansi_parser {
121            if parser.parse_char(ch).is_none() {
122                // ANSI escape character - zero visual width
123                let _char_idx = self.mappings.char_source_bytes.len();
124                self.mappings.char_source_bytes.push(source_byte);
125                self.mappings.char_visual_cols.push(self.current_visual_col);
126                // No entry in visual_to_char for zero-width chars
127                return 0;
128            }
129        }
130
131        // Regular character (possibly zero-width Unicode)
132        let width = if ch == '\t' {
133            tab_expansion_width(self.current_visual_col)
134        } else {
135            char_width(ch)
136        };
137
138        let char_idx = self.mappings.char_source_bytes.len();
139        self.mappings.char_source_bytes.push(source_byte);
140        self.mappings.char_visual_cols.push(self.current_visual_col);
141
142        // Add visual column entries for this character
143        for _ in 0..width {
144            self.mappings.visual_to_char.push(char_idx);
145        }
146
147        self.current_visual_col += width;
148        width
149    }
150
151    /// Add a tab character with custom expansion
152    pub fn add_tab(&mut self, source_byte: Option<usize>) -> usize {
153        let width = tab_expansion_width(self.current_visual_col);
154        let char_idx = self.mappings.char_source_bytes.len();
155
156        self.mappings.char_source_bytes.push(source_byte);
157        self.mappings.char_visual_cols.push(self.current_visual_col);
158
159        for _ in 0..width {
160            self.mappings.visual_to_char.push(char_idx);
161        }
162
163        self.current_visual_col += width;
164        width
165    }
166
167    /// Get the current visual column
168    pub fn current_visual_col(&self) -> usize {
169        self.current_visual_col
170    }
171
172    /// Finish building and return the mappings
173    pub fn finish(mut self) -> LineMappings {
174        self.mappings.total_visual_width = self.current_visual_col;
175        self.mappings
176    }
177}
178
179/// Calculate visual width of a string, handling ANSI escapes and tabs
180///
181/// This is the canonical function for visual width calculation.
182/// Use this instead of `str_width()` when the text may contain ANSI codes or tabs.
183pub fn visual_width(s: &str, start_col: usize) -> usize {
184    if !s.contains('\x1b') && !s.contains('\t') {
185        // Fast path: no special handling needed
186        return crate::primitives::display_width::str_width(s);
187    }
188
189    let mut col = start_col;
190    let mut parser = AnsiParser::new();
191
192    for ch in s.chars() {
193        if parser.parse_char(ch).is_none() {
194            continue; // ANSI escape char, skip
195        }
196        if ch == '\t' {
197            col += tab_expansion_width(col);
198        } else {
199            col += char_width(ch);
200        }
201    }
202
203    col - start_col
204}
205
206/// Convert byte offset to visual column (ANSI-aware, tab-aware)
207///
208/// Given a byte offset within the string, returns the visual column at that position.
209pub fn byte_to_visual_col(s: &str, byte_offset: usize) -> usize {
210    let clamped_offset = byte_offset.min(s.len());
211
212    if !s.contains('\x1b') && !s.contains('\t') {
213        // Fast path: just calculate width of the prefix
214        return crate::primitives::display_width::str_width(&s[..clamped_offset]);
215    }
216
217    let mut col = 0;
218    let mut current_byte = 0;
219    let mut parser = AnsiParser::new();
220
221    for ch in s.chars() {
222        if current_byte >= clamped_offset {
223            break;
224        }
225
226        if parser.parse_char(ch).is_some() {
227            // Visible character
228            if ch == '\t' {
229                col += tab_expansion_width(col);
230            } else {
231                col += char_width(ch);
232            }
233        }
234        // ANSI chars don't add to visual column
235
236        current_byte += ch.len_utf8();
237    }
238
239    col
240}
241
242/// Convert visual column to byte offset (ANSI-aware, tab-aware)
243///
244/// Given a visual column, returns the byte offset of the character at or after that column.
245/// If the visual column is beyond the string's width, returns the string's length.
246pub fn visual_col_to_byte(s: &str, target_visual_col: usize) -> usize {
247    if !s.contains('\x1b') && !s.contains('\t') {
248        // Fast path: use simple character iteration (no ANSI, no tabs)
249        let mut col = 0;
250        for (byte_idx, ch) in s.char_indices() {
251            let width = char_width(ch);
252            // Check if target falls within this character's visual range [col, col+width)
253            if target_visual_col < col + width {
254                return byte_idx;
255            }
256            col += width;
257        }
258        return s.len();
259    }
260
261    let mut col = 0;
262    let mut parser = AnsiParser::new();
263
264    for (byte_idx, ch) in s.char_indices() {
265        if parser.parse_char(ch).is_some() {
266            // Visible character - check if target falls within this char's range
267            let width = if ch == '\t' {
268                tab_expansion_width(col)
269            } else {
270                char_width(ch)
271            };
272
273            // Target is within [col, col+width) range of this character
274            if target_visual_col < col + width {
275                return byte_idx;
276            }
277
278            col += width;
279        }
280        // ANSI chars: don't add to visual column, don't match target
281    }
282
283    s.len()
284}
285
286/// Build complete line mappings from text and source byte information
287///
288/// This is used when constructing ViewLine during token processing.
289pub fn build_line_mappings(
290    text: &str,
291    source_bytes: impl Iterator<Item = Option<usize>>,
292    has_ansi: bool,
293) -> LineMappings {
294    let mut builder = LineMappingsBuilder::new(has_ansi);
295    let mut source_iter = source_bytes;
296
297    for ch in text.chars() {
298        let source_byte = source_iter.next().flatten();
299        builder.add_char(ch, source_byte);
300    }
301
302    builder.finish()
303}
304
305#[cfg(test)]
306mod tests {
307    use super::*;
308
309    #[test]
310    fn test_visual_width_ascii() {
311        assert_eq!(visual_width("Hello", 0), 5);
312        assert_eq!(visual_width("", 0), 0);
313    }
314
315    #[test]
316    fn test_visual_width_with_tabs() {
317        // Tab at column 0 expands to 8 spaces
318        assert_eq!(visual_width("\t", 0), 8);
319        // Tab at column 4 expands to 4 spaces
320        assert_eq!(visual_width("1234\t", 0), 8);
321        // "12" (2) + tab (6 to reach 8) = 8
322        assert_eq!(visual_width("12\t", 0), 8);
323    }
324
325    #[test]
326    fn test_visual_width_with_ansi() {
327        // ANSI escape sequences should have zero width
328        assert_eq!(visual_width("\x1b[31mRed\x1b[0m", 0), 3);
329        assert_eq!(visual_width("\x1b[1;31;4mBold\x1b[0m", 0), 4);
330    }
331
332    #[test]
333    fn test_visual_width_cjk() {
334        // CJK characters are 2 columns each
335        assert_eq!(visual_width("你好", 0), 4);
336        assert_eq!(visual_width("Hello你好", 0), 9);
337    }
338
339    #[test]
340    fn test_byte_to_visual_col_simple() {
341        let s = "Hello";
342        assert_eq!(byte_to_visual_col(s, 0), 0);
343        assert_eq!(byte_to_visual_col(s, 1), 1);
344        assert_eq!(byte_to_visual_col(s, 5), 5);
345    }
346
347    #[test]
348    fn test_byte_to_visual_col_with_ansi() {
349        // "\x1b[31m" is 5 bytes, "Red" is 3 bytes
350        let s = "\x1b[31mRed";
351        assert_eq!(byte_to_visual_col(s, 0), 0); // At ESC
352        assert_eq!(byte_to_visual_col(s, 5), 0); // At 'R' (ANSI prefix has 0 width)
353        assert_eq!(byte_to_visual_col(s, 6), 1); // At 'e'
354        assert_eq!(byte_to_visual_col(s, 8), 3); // Past end
355    }
356
357    #[test]
358    fn test_byte_to_visual_col_with_cjk() {
359        // "你" is 3 bytes and 2 columns
360        let s = "a你b";
361        assert_eq!(byte_to_visual_col(s, 0), 0); // 'a'
362        assert_eq!(byte_to_visual_col(s, 1), 1); // '你' start
363        assert_eq!(byte_to_visual_col(s, 4), 3); // 'b'
364    }
365
366    #[test]
367    fn test_visual_col_to_byte_simple() {
368        let s = "Hello";
369        assert_eq!(visual_col_to_byte(s, 0), 0);
370        assert_eq!(visual_col_to_byte(s, 3), 3);
371        assert_eq!(visual_col_to_byte(s, 5), 5);
372        assert_eq!(visual_col_to_byte(s, 10), 5); // Past end
373    }
374
375    #[test]
376    fn test_visual_col_to_byte_with_ansi() {
377        // "\x1b[31m" is 5 bytes, "Red" is 3 bytes
378        let s = "\x1b[31mRed";
379        assert_eq!(visual_col_to_byte(s, 0), 5); // Visual col 0 = 'R' at byte 5
380        assert_eq!(visual_col_to_byte(s, 1), 6); // Visual col 1 = 'e' at byte 6
381        assert_eq!(visual_col_to_byte(s, 3), 8); // Past end
382    }
383
384    #[test]
385    fn test_visual_col_to_byte_with_cjk() {
386        // "a你b" - 'a' at 0, '你' at 1-3, 'b' at 4
387        let s = "a你b";
388        assert_eq!(visual_col_to_byte(s, 0), 0); // 'a'
389        assert_eq!(visual_col_to_byte(s, 1), 1); // '你' (both cols 1 and 2 map to byte 1)
390        assert_eq!(visual_col_to_byte(s, 2), 1); // Still '你'
391        assert_eq!(visual_col_to_byte(s, 3), 4); // 'b'
392    }
393
394    #[test]
395    fn test_line_mappings_builder_simple() {
396        let mut builder = LineMappingsBuilder::new(false);
397        builder.add_char('H', Some(0));
398        builder.add_char('i', Some(1));
399
400        let mappings = builder.finish();
401
402        assert_eq!(mappings.char_source_bytes.len(), 2);
403        assert_eq!(mappings.visual_to_char.len(), 2);
404        assert_eq!(mappings.source_byte_at_char(0), Some(0));
405        assert_eq!(mappings.source_byte_at_char(1), Some(1));
406        assert_eq!(mappings.char_at_visual_col(0), 0);
407        assert_eq!(mappings.char_at_visual_col(1), 1);
408    }
409
410    #[test]
411    fn test_line_mappings_builder_with_cjk() {
412        let mut builder = LineMappingsBuilder::new(false);
413        builder.add_char('a', Some(0)); // 1 column
414        builder.add_char('你', Some(1)); // 2 columns
415        builder.add_char('b', Some(4)); // 1 column
416
417        let mappings = builder.finish();
418
419        assert_eq!(mappings.char_source_bytes.len(), 3);
420        assert_eq!(mappings.visual_to_char.len(), 4); // 1 + 2 + 1
421
422        // Click on visual col 0 -> char 0 ('a')
423        assert_eq!(mappings.source_byte_at_visual_col(0), Some(0));
424        // Click on visual col 1 -> char 1 ('你')
425        assert_eq!(mappings.source_byte_at_visual_col(1), Some(1));
426        // Click on visual col 2 -> still char 1 ('你')
427        assert_eq!(mappings.source_byte_at_visual_col(2), Some(1));
428        // Click on visual col 3 -> char 2 ('b')
429        assert_eq!(mappings.source_byte_at_visual_col(3), Some(4));
430    }
431
432    #[test]
433    fn test_line_mappings_builder_with_ansi() {
434        let mut builder = LineMappingsBuilder::new(true);
435
436        // Simulate "\x1b[31mA" - ANSI prefix (5 chars) + 'A'
437        builder.add_char('\x1b', Some(0));
438        builder.add_char('[', Some(1));
439        builder.add_char('3', Some(2));
440        builder.add_char('1', Some(3));
441        builder.add_char('m', Some(4));
442        builder.add_char('A', Some(5));
443
444        let mappings = builder.finish();
445
446        // 6 characters total
447        assert_eq!(mappings.char_source_bytes.len(), 6);
448        // But only 1 visual column (only 'A' is visible)
449        assert_eq!(mappings.visual_to_char.len(), 1);
450        assert_eq!(mappings.total_visual_width, 1);
451
452        // All chars have correct source bytes
453        assert_eq!(mappings.source_byte_at_char(0), Some(0)); // ESC
454        assert_eq!(mappings.source_byte_at_char(5), Some(5)); // 'A'
455
456        // Visual col 0 maps to char 5 ('A')
457        assert_eq!(mappings.char_at_visual_col(0), 5);
458        assert_eq!(mappings.source_byte_at_visual_col(0), Some(5));
459    }
460
461    #[test]
462    fn test_line_mappings_cursor_on_ansi() {
463        let mut builder = LineMappingsBuilder::new(true);
464
465        // "\x1b[31mHi" - cursor at byte 0 (ESC) should work
466        builder.add_char('\x1b', Some(0));
467        builder.add_char('[', Some(1));
468        builder.add_char('3', Some(2));
469        builder.add_char('1', Some(3));
470        builder.add_char('m', Some(4));
471        builder.add_char('H', Some(5));
472        builder.add_char('i', Some(6));
473
474        let mappings = builder.finish();
475
476        // Can look up source byte for any char, including ANSI
477        assert_eq!(mappings.source_byte_at_char(0), Some(0)); // ESC at byte 0
478        assert_eq!(mappings.source_byte_at_char(1), Some(1)); // '[' at byte 1
479
480        // Visual column of ANSI chars is 0 (same as where 'H' will be displayed)
481        assert_eq!(mappings.visual_col_at_char(0), 0);
482        assert_eq!(mappings.visual_col_at_char(4), 0);
483        assert_eq!(mappings.visual_col_at_char(5), 0); // 'H'
484        assert_eq!(mappings.visual_col_at_char(6), 1); // 'i'
485    }
486}