Skip to main content

fresh/primitives/
visual_layout.rs

1//! Unified visual layout calculations for text display
2//!
3//! This module provides consistent handling of visual column calculations
4//! across all editor operations: rendering, mouse clicks, and cursor navigation.
5//!
6//! Key concepts:
7//! - **Character index**: Position in the character sequence (0, 1, 2, ...)
8//! - **Visual column**: Screen column position accounting for char widths
9//! - **Source byte**: Byte offset in the source buffer
10//!
11//! Handles:
12//! - ANSI escape sequences (zero visual width)
13//! - Double-width characters (CJK, emoji)
14//! - Tab expansion
15//! - Zero-width Unicode characters
16
17use crate::primitives::ansi::AnsiParser;
18use crate::primitives::display_width::char_width;
19use crate::primitives::display_width::str_width;
20use std::ops::Range;
21
22/// Standard tab width for terminal display
23pub const TAB_WIDTH: usize = 8;
24
25/// Calculate tab expansion width at a given column
26#[inline]
27pub fn tab_expansion_width(col: usize) -> usize {
28    TAB_WIDTH - (col % TAB_WIDTH)
29}
30
31/// Per-line mappings that support all visual layout operations with O(1) lookups
32#[derive(Debug, Clone, Default)]
33pub struct LineMappings {
34    /// Source byte for each character (indexed by char position)
35    /// Length == number of characters in processed text
36    pub char_source_bytes: Vec<Option<usize>>,
37
38    /// Visual column for each character (indexed by char position)
39    /// For zero-width chars, this is the same as the previous char's visual column
40    pub char_visual_cols: Vec<usize>,
41
42    /// Character index at each visual column (indexed by visual column)
43    /// For double-width chars, consecutive visual columns map to the same char
44    /// Length == total visual width of line
45    pub visual_to_char: Vec<usize>,
46
47    /// Total visual width of the line
48    pub total_visual_width: usize,
49}
50
51impl LineMappings {
52    /// Get source byte for a character at the given index
53    #[inline]
54    pub fn source_byte_at_char(&self, char_idx: usize) -> Option<usize> {
55        self.char_source_bytes.get(char_idx).copied().flatten()
56    }
57
58    /// Get visual column for a character at the given index
59    #[inline]
60    pub fn visual_col_at_char(&self, char_idx: usize) -> usize {
61        self.char_visual_cols.get(char_idx).copied().unwrap_or(0)
62    }
63
64    /// Get character index at a given visual column (O(1) for mouse clicks)
65    #[inline]
66    pub fn char_at_visual_col(&self, visual_col: usize) -> usize {
67        self.visual_to_char
68            .get(visual_col)
69            .copied()
70            .unwrap_or_else(|| {
71                // Past end of line - return last char index
72                self.char_source_bytes.len().saturating_sub(1)
73            })
74    }
75
76    /// Get source byte at a given visual column (O(1) for mouse clicks)
77    #[inline]
78    pub fn source_byte_at_visual_col(&self, visual_col: usize) -> Option<usize> {
79        let char_idx = self.char_at_visual_col(visual_col);
80        self.source_byte_at_char(char_idx)
81    }
82
83    /// Get the source byte at the end of the line
84    #[inline]
85    pub fn line_end_byte(&self) -> usize {
86        self.char_source_bytes
87            .iter()
88            .rev()
89            .find_map(|&b| b)
90            .map(|b| b + 1) // One past last char
91            .unwrap_or(0)
92    }
93}
94
95/// Builder for constructing LineMappings incrementally
96#[derive(Debug)]
97pub struct LineMappingsBuilder {
98    mappings: LineMappings,
99    current_visual_col: usize,
100    ansi_parser: Option<AnsiParser>,
101}
102
103impl LineMappingsBuilder {
104    /// Create a new builder, optionally with ANSI parsing enabled
105    pub fn new(has_ansi: bool) -> Self {
106        Self {
107            mappings: LineMappings::default(),
108            current_visual_col: 0,
109            ansi_parser: if has_ansi {
110                Some(AnsiParser::new())
111            } else {
112                None
113            },
114        }
115    }
116
117    /// Add a character to the mappings
118    ///
119    /// Returns the visual width of the character (0 for ANSI/zero-width, 1-2 for visible chars)
120    pub fn add_char(&mut self, ch: char, source_byte: Option<usize>) -> usize {
121        // Check if this is part of an ANSI escape sequence
122        if let Some(ref mut parser) = self.ansi_parser {
123            if parser.parse_char(ch).is_none() {
124                // ANSI escape character - zero visual width
125                let _char_idx = self.mappings.char_source_bytes.len();
126                self.mappings.char_source_bytes.push(source_byte);
127                self.mappings.char_visual_cols.push(self.current_visual_col);
128                // No entry in visual_to_char for zero-width chars
129                return 0;
130            }
131        }
132
133        // Regular character (possibly zero-width Unicode)
134        let width = if ch == '\t' {
135            tab_expansion_width(self.current_visual_col)
136        } else {
137            char_width(ch)
138        };
139
140        let char_idx = self.mappings.char_source_bytes.len();
141        self.mappings.char_source_bytes.push(source_byte);
142        self.mappings.char_visual_cols.push(self.current_visual_col);
143
144        // Add visual column entries for this character
145        for _ in 0..width {
146            self.mappings.visual_to_char.push(char_idx);
147        }
148
149        self.current_visual_col += width;
150        width
151    }
152
153    /// Add a tab character with custom expansion
154    pub fn add_tab(&mut self, source_byte: Option<usize>) -> usize {
155        let width = tab_expansion_width(self.current_visual_col);
156        let char_idx = self.mappings.char_source_bytes.len();
157
158        self.mappings.char_source_bytes.push(source_byte);
159        self.mappings.char_visual_cols.push(self.current_visual_col);
160
161        for _ in 0..width {
162            self.mappings.visual_to_char.push(char_idx);
163        }
164
165        self.current_visual_col += width;
166        width
167    }
168
169    /// Get the current visual column
170    pub fn current_visual_col(&self) -> usize {
171        self.current_visual_col
172    }
173
174    /// Finish building and return the mappings
175    pub fn finish(mut self) -> LineMappings {
176        self.mappings.total_visual_width = self.current_visual_col;
177        self.mappings
178    }
179}
180
181/// Calculate visual width of a string, handling ANSI escapes and tabs
182///
183/// This is the canonical function for visual width calculation.
184/// Use this instead of `str_width()` when the text may contain ANSI codes or tabs.
185pub fn visual_width(s: &str, start_col: usize) -> usize {
186    if !s.contains('\x1b') && !s.contains('\t') {
187        // Fast path: no special handling needed
188        return crate::primitives::display_width::str_width(s);
189    }
190
191    let mut col = start_col;
192    let mut parser = AnsiParser::new();
193
194    for ch in s.chars() {
195        if parser.parse_char(ch).is_none() {
196            continue; // ANSI escape char, skip
197        }
198        if ch == '\t' {
199            col += tab_expansion_width(col);
200        } else {
201            col += char_width(ch);
202        }
203    }
204
205    col - start_col
206}
207
208/// Convert byte offset to visual column (ANSI-aware, tab-aware)
209///
210/// Given a byte offset within the string, returns the visual column at that position.
211pub fn byte_to_visual_col(s: &str, byte_offset: usize) -> usize {
212    let clamped_offset = byte_offset.min(s.len());
213
214    if !s.contains('\x1b') && !s.contains('\t') {
215        // Fast path: just calculate width of the prefix
216        return crate::primitives::display_width::str_width(&s[..clamped_offset]);
217    }
218
219    let mut col = 0;
220    let mut current_byte = 0;
221    let mut parser = AnsiParser::new();
222
223    for ch in s.chars() {
224        if current_byte >= clamped_offset {
225            break;
226        }
227
228        if parser.parse_char(ch).is_some() {
229            // Visible character
230            if ch == '\t' {
231                col += tab_expansion_width(col);
232            } else {
233                col += char_width(ch);
234            }
235        }
236        // ANSI chars don't add to visual column
237
238        current_byte += ch.len_utf8();
239    }
240
241    col
242}
243
244/// Convert visual column to byte offset (ANSI-aware, tab-aware)
245///
246/// Given a visual column, returns the byte offset of the character at or after that column.
247/// If the visual column is beyond the string's width, returns the string's length.
248pub fn visual_col_to_byte(s: &str, target_visual_col: usize) -> usize {
249    if !s.contains('\x1b') && !s.contains('\t') {
250        // Fast path: use simple character iteration (no ANSI, no tabs)
251        let mut col = 0;
252        for (byte_idx, ch) in s.char_indices() {
253            let width = char_width(ch);
254            // Check if target falls within this character's visual range [col, col+width)
255            if target_visual_col < col + width {
256                return byte_idx;
257            }
258            col += width;
259        }
260        return s.len();
261    }
262
263    let mut col = 0;
264    let mut parser = AnsiParser::new();
265
266    for (byte_idx, ch) in s.char_indices() {
267        if parser.parse_char(ch).is_some() {
268            // Visible character - check if target falls within this char's range
269            let width = if ch == '\t' {
270                tab_expansion_width(col)
271            } else {
272                char_width(ch)
273            };
274
275            // Target is within [col, col+width) range of this character
276            if target_visual_col < col + width {
277                return byte_idx;
278            }
279
280            col += width;
281        }
282        // ANSI chars: don't add to visual column, don't match target
283    }
284
285    s.len()
286}
287
288/// Build complete line mappings from text and source byte information
289///
290/// This is used when constructing ViewLine during token processing.
291pub fn build_line_mappings(
292    text: &str,
293    source_bytes: impl Iterator<Item = Option<usize>>,
294    has_ansi: bool,
295) -> LineMappings {
296    let mut builder = LineMappingsBuilder::new(has_ansi);
297    let mut source_iter = source_bytes;
298
299    for ch in text.chars() {
300        let source_byte = source_iter.next().flatten();
301        builder.add_char(ch, source_byte);
302    }
303
304    builder.finish()
305}
306
307/// How many columns of look-back from a hard cap a word-boundary split is
308/// still considered acceptable. Rows shorter than `wrap_width / 2` fall
309/// back to char-wrap so a boundary near the start doesn't strand most of
310/// the row empty.  Matches the constant used by the renderer's
311/// `apply_wrapping_transform` so virtual-line wrap and source-line wrap
312/// stay aligned.
313pub const WRAP_MAX_LOOKBACK: usize = 16;
314
315/// Greedy soft-wrap of `text` into chunks whose visual width does not
316/// exceed `wrap_width`.  Within each chunk, prefer to end at a UAX #29
317/// word boundary that lies within `WRAP_MAX_LOOKBACK` columns of the
318/// hard cap (or past `wrap_width / 2` — whichever is larger).  Falls
319/// back to the hard cap when no boundary qualifies.  Always makes
320/// forward progress: a single grapheme wider than `wrap_width` (e.g. a
321/// double-width CJK glyph in a 1-col viewport) is emitted on its own
322/// row.
323///
324/// Returns the byte ranges of the chunks; concatenating them recovers
325/// the original input.  An empty input yields no chunks; `wrap_width`
326/// of `0` degenerates to one chunk covering the whole input (the
327/// caller decides how to render a zero-width row).
328///
329/// The algorithm mirrors the inner Text-token char-split path of
330/// `view::ui::split_rendering::transforms::apply_wrapping_transform` —
331/// keep the two in sync if either changes.  Tabs and ANSI escapes are
332/// out of scope for this helper; callers needing tab-aware wrapping
333/// (the source-line path) handle them in their own pre/post passes.
334pub fn wrap_str_to_width(text: &str, wrap_width: usize) -> Vec<Range<usize>> {
335    if text.is_empty() {
336        return Vec::new();
337    }
338    if wrap_width == 0 {
339        return vec![0..text.len()];
340    }
341
342    use unicode_segmentation::UnicodeSegmentation;
343
344    let graphemes: Vec<(usize, &str)> = text.grapheme_indices(true).collect();
345    let word_bounds: Vec<usize> = text.split_word_bound_indices().map(|(b, _)| b).collect();
346    let text_len = text.len();
347
348    let mut chunks: Vec<Range<usize>> = Vec::new();
349    let mut grapheme_idx = 0;
350    // Monotonic cursor into `word_bounds` so the per-chunk boundary search
351    // is amortised O(1) rather than rescanning from byte 0.
352    let mut wb_lo: usize = 0;
353
354    while grapheme_idx < graphemes.len() {
355        let chunk_start_byte = graphemes[grapheme_idx].0;
356
357        // Greedy fill: how many graphemes fit in `wrap_width`?
358        let mut chunk_visual_width = 0usize;
359        let mut chunk_grapheme_count = 0usize;
360        for &(_b, g) in &graphemes[grapheme_idx..] {
361            let g_width = str_width(g);
362            if chunk_visual_width + g_width > wrap_width && chunk_grapheme_count > 0 {
363                break;
364            }
365            chunk_visual_width += g_width;
366            chunk_grapheme_count += 1;
367        }
368        // Forward-progress guarantee for an oversized lone grapheme.
369        if chunk_grapheme_count == 0 {
370            chunk_grapheme_count = 1;
371        }
372
373        let slice_end_hard = if grapheme_idx + chunk_grapheme_count < graphemes.len() {
374            graphemes[grapheme_idx + chunk_grapheme_count].0
375        } else {
376            text_len
377        };
378
379        // Boundary preference within `[floor_byte, slice_end_hard]`.  Floor
380        // is row-relative — we only enter this loop on a fresh row, so
381        // `current_line_width` would be 0 and `chunk_floor_from_cursor`
382        // collapses to `row_floor`.
383        let row_floor = wrap_width
384            .saturating_sub(WRAP_MAX_LOOKBACK)
385            .max(wrap_width / 2);
386        let floor_byte = if row_floor < chunk_grapheme_count {
387            graphemes[grapheme_idx + row_floor].0
388        } else {
389            slice_end_hard
390        };
391
392        // Advance `wb_lo` past entries already at or before chunk start.
393        while wb_lo < word_bounds.len() && word_bounds[wb_lo] <= chunk_start_byte {
394            wb_lo += 1;
395        }
396        let mut wb_hi = wb_lo;
397        while wb_hi < word_bounds.len() && word_bounds[wb_hi] <= slice_end_hard {
398            wb_hi += 1;
399        }
400
401        // Largest boundary in `[floor_byte, slice_end_hard]`.
402        let mut best_target_byte = word_bounds[wb_lo..wb_hi]
403            .iter()
404            .rev()
405            .copied()
406            .find(|&b| b >= floor_byte);
407        // `text.len()` is a virtual boundary if it falls inside the window —
408        // this stops a chunk that happens to end exactly at the text end
409        // from being shrunk to an earlier boundary (which would leak chars
410        // onto the next row).
411        if text_len > chunk_start_byte
412            && text_len >= floor_byte
413            && text_len <= slice_end_hard
414            && best_target_byte.map_or(true, |b| text_len > b)
415        {
416            best_target_byte = Some(text_len);
417        }
418
419        let chunk_end_byte = if let Some(target_byte) = best_target_byte {
420            let new_count = graphemes[grapheme_idx..]
421                .iter()
422                .position(|(b, _)| *b == target_byte)
423                .unwrap_or(chunk_grapheme_count);
424            if new_count > 0 && new_count < chunk_grapheme_count {
425                chunk_grapheme_count = new_count;
426                if grapheme_idx + new_count < graphemes.len() {
427                    graphemes[grapheme_idx + new_count].0
428                } else {
429                    text_len
430                }
431            } else {
432                slice_end_hard
433            }
434        } else {
435            slice_end_hard
436        };
437
438        chunks.push(chunk_start_byte..chunk_end_byte);
439        grapheme_idx += chunk_grapheme_count;
440    }
441
442    chunks
443}
444
445#[cfg(test)]
446mod tests {
447    use super::*;
448
449    #[test]
450    fn test_visual_width_ascii() {
451        assert_eq!(visual_width("Hello", 0), 5);
452        assert_eq!(visual_width("", 0), 0);
453    }
454
455    #[test]
456    fn test_visual_width_with_tabs() {
457        // Tab at column 0 expands to 8 spaces
458        assert_eq!(visual_width("\t", 0), 8);
459        // Tab at column 4 expands to 4 spaces
460        assert_eq!(visual_width("1234\t", 0), 8);
461        // "12" (2) + tab (6 to reach 8) = 8
462        assert_eq!(visual_width("12\t", 0), 8);
463    }
464
465    #[test]
466    fn test_visual_width_with_ansi() {
467        // ANSI escape sequences should have zero width
468        assert_eq!(visual_width("\x1b[31mRed\x1b[0m", 0), 3);
469        assert_eq!(visual_width("\x1b[1;31;4mBold\x1b[0m", 0), 4);
470    }
471
472    #[test]
473    fn test_visual_width_cjk() {
474        // CJK characters are 2 columns each
475        assert_eq!(visual_width("你好", 0), 4);
476        assert_eq!(visual_width("Hello你好", 0), 9);
477    }
478
479    #[test]
480    fn test_byte_to_visual_col_simple() {
481        let s = "Hello";
482        assert_eq!(byte_to_visual_col(s, 0), 0);
483        assert_eq!(byte_to_visual_col(s, 1), 1);
484        assert_eq!(byte_to_visual_col(s, 5), 5);
485    }
486
487    #[test]
488    fn test_byte_to_visual_col_with_ansi() {
489        // "\x1b[31m" is 5 bytes, "Red" is 3 bytes
490        let s = "\x1b[31mRed";
491        assert_eq!(byte_to_visual_col(s, 0), 0); // At ESC
492        assert_eq!(byte_to_visual_col(s, 5), 0); // At 'R' (ANSI prefix has 0 width)
493        assert_eq!(byte_to_visual_col(s, 6), 1); // At 'e'
494        assert_eq!(byte_to_visual_col(s, 8), 3); // Past end
495    }
496
497    #[test]
498    fn test_byte_to_visual_col_with_cjk() {
499        // "你" is 3 bytes and 2 columns
500        let s = "a你b";
501        assert_eq!(byte_to_visual_col(s, 0), 0); // 'a'
502        assert_eq!(byte_to_visual_col(s, 1), 1); // '你' start
503        assert_eq!(byte_to_visual_col(s, 4), 3); // 'b'
504    }
505
506    #[test]
507    fn test_visual_col_to_byte_simple() {
508        let s = "Hello";
509        assert_eq!(visual_col_to_byte(s, 0), 0);
510        assert_eq!(visual_col_to_byte(s, 3), 3);
511        assert_eq!(visual_col_to_byte(s, 5), 5);
512        assert_eq!(visual_col_to_byte(s, 10), 5); // Past end
513    }
514
515    #[test]
516    fn test_visual_col_to_byte_with_ansi() {
517        // "\x1b[31m" is 5 bytes, "Red" is 3 bytes
518        let s = "\x1b[31mRed";
519        assert_eq!(visual_col_to_byte(s, 0), 5); // Visual col 0 = 'R' at byte 5
520        assert_eq!(visual_col_to_byte(s, 1), 6); // Visual col 1 = 'e' at byte 6
521        assert_eq!(visual_col_to_byte(s, 3), 8); // Past end
522    }
523
524    #[test]
525    fn test_visual_col_to_byte_with_cjk() {
526        // "a你b" - 'a' at 0, '你' at 1-3, 'b' at 4
527        let s = "a你b";
528        assert_eq!(visual_col_to_byte(s, 0), 0); // 'a'
529        assert_eq!(visual_col_to_byte(s, 1), 1); // '你' (both cols 1 and 2 map to byte 1)
530        assert_eq!(visual_col_to_byte(s, 2), 1); // Still '你'
531        assert_eq!(visual_col_to_byte(s, 3), 4); // 'b'
532    }
533
534    #[test]
535    fn test_line_mappings_builder_simple() {
536        let mut builder = LineMappingsBuilder::new(false);
537        builder.add_char('H', Some(0));
538        builder.add_char('i', Some(1));
539
540        let mappings = builder.finish();
541
542        assert_eq!(mappings.char_source_bytes.len(), 2);
543        assert_eq!(mappings.visual_to_char.len(), 2);
544        assert_eq!(mappings.source_byte_at_char(0), Some(0));
545        assert_eq!(mappings.source_byte_at_char(1), Some(1));
546        assert_eq!(mappings.char_at_visual_col(0), 0);
547        assert_eq!(mappings.char_at_visual_col(1), 1);
548    }
549
550    #[test]
551    fn test_line_mappings_builder_with_cjk() {
552        let mut builder = LineMappingsBuilder::new(false);
553        builder.add_char('a', Some(0)); // 1 column
554        builder.add_char('你', Some(1)); // 2 columns
555        builder.add_char('b', Some(4)); // 1 column
556
557        let mappings = builder.finish();
558
559        assert_eq!(mappings.char_source_bytes.len(), 3);
560        assert_eq!(mappings.visual_to_char.len(), 4); // 1 + 2 + 1
561
562        // Click on visual col 0 -> char 0 ('a')
563        assert_eq!(mappings.source_byte_at_visual_col(0), Some(0));
564        // Click on visual col 1 -> char 1 ('你')
565        assert_eq!(mappings.source_byte_at_visual_col(1), Some(1));
566        // Click on visual col 2 -> still char 1 ('你')
567        assert_eq!(mappings.source_byte_at_visual_col(2), Some(1));
568        // Click on visual col 3 -> char 2 ('b')
569        assert_eq!(mappings.source_byte_at_visual_col(3), Some(4));
570    }
571
572    #[test]
573    fn test_line_mappings_builder_with_ansi() {
574        let mut builder = LineMappingsBuilder::new(true);
575
576        // Simulate "\x1b[31mA" - ANSI prefix (5 chars) + 'A'
577        builder.add_char('\x1b', Some(0));
578        builder.add_char('[', Some(1));
579        builder.add_char('3', Some(2));
580        builder.add_char('1', Some(3));
581        builder.add_char('m', Some(4));
582        builder.add_char('A', Some(5));
583
584        let mappings = builder.finish();
585
586        // 6 characters total
587        assert_eq!(mappings.char_source_bytes.len(), 6);
588        // But only 1 visual column (only 'A' is visible)
589        assert_eq!(mappings.visual_to_char.len(), 1);
590        assert_eq!(mappings.total_visual_width, 1);
591
592        // All chars have correct source bytes
593        assert_eq!(mappings.source_byte_at_char(0), Some(0)); // ESC
594        assert_eq!(mappings.source_byte_at_char(5), Some(5)); // 'A'
595
596        // Visual col 0 maps to char 5 ('A')
597        assert_eq!(mappings.char_at_visual_col(0), 5);
598        assert_eq!(mappings.source_byte_at_visual_col(0), Some(5));
599    }
600
601    #[test]
602    fn test_line_mappings_cursor_on_ansi() {
603        let mut builder = LineMappingsBuilder::new(true);
604
605        // "\x1b[31mHi" - cursor at byte 0 (ESC) should work
606        builder.add_char('\x1b', Some(0));
607        builder.add_char('[', Some(1));
608        builder.add_char('3', Some(2));
609        builder.add_char('1', Some(3));
610        builder.add_char('m', Some(4));
611        builder.add_char('H', Some(5));
612        builder.add_char('i', Some(6));
613
614        let mappings = builder.finish();
615
616        // Can look up source byte for any char, including ANSI
617        assert_eq!(mappings.source_byte_at_char(0), Some(0)); // ESC at byte 0
618        assert_eq!(mappings.source_byte_at_char(1), Some(1)); // '[' at byte 1
619
620        // Visual column of ANSI chars is 0 (same as where 'H' will be displayed)
621        assert_eq!(mappings.visual_col_at_char(0), 0);
622        assert_eq!(mappings.visual_col_at_char(4), 0);
623        assert_eq!(mappings.visual_col_at_char(5), 0); // 'H'
624        assert_eq!(mappings.visual_col_at_char(6), 1); // 'i'
625    }
626
627    fn collect_chunks<'a>(text: &'a str, chunks: &[Range<usize>]) -> Vec<&'a str> {
628        chunks.iter().map(|r| &text[r.clone()]).collect()
629    }
630
631    #[test]
632    fn wrap_str_to_width_empty_input_yields_no_chunks() {
633        assert!(wrap_str_to_width("", 10).is_empty());
634    }
635
636    #[test]
637    fn wrap_str_to_width_short_text_fits_in_one_chunk() {
638        let chunks = wrap_str_to_width("hello", 80);
639        assert_eq!(chunks.len(), 1);
640        assert_eq!(&"hello"[chunks[0].clone()], "hello");
641    }
642
643    #[test]
644    fn wrap_str_to_width_no_word_boundaries_falls_back_to_hard_cap() {
645        // 64 of the same char — no word boundary — must hard-cap at 32.
646        let text: String = std::iter::repeat('A').take(64).collect();
647        let chunks = wrap_str_to_width(&text, 32);
648        assert_eq!(chunks.len(), 2);
649        assert_eq!(chunks[0].len(), 32);
650        assert_eq!(chunks[1].len(), 32);
651    }
652
653    #[test]
654    fn wrap_str_to_width_prefers_word_boundary_over_mid_word_break() {
655        // Two words: "hello world" — wrap at width 8.  Hard cap would
656        // split mid-word at "hello wo|rld"; the helper should prefer the
657        // boundary at the space and emit "hello |world" instead.
658        let text = "hello world";
659        let chunks = wrap_str_to_width(text, 8);
660        let pieces = collect_chunks(text, &chunks);
661        assert_eq!(pieces, vec!["hello ", "world"]);
662    }
663
664    #[test]
665    fn wrap_str_to_width_handles_double_width_chars() {
666        // "世界你好" — each glyph is width 2.  At width 4, two glyphs fit.
667        let text = "世界你好";
668        let chunks = wrap_str_to_width(text, 4);
669        let pieces = collect_chunks(text, &chunks);
670        assert_eq!(pieces, vec!["世界", "你好"]);
671    }
672
673    #[test]
674    fn wrap_str_to_width_progress_for_oversized_grapheme() {
675        // Double-width glyph in a 1-col viewport: emit on its own row so
676        // we don't loop forever.
677        let chunks = wrap_str_to_width("世", 1);
678        assert_eq!(chunks.len(), 1);
679        assert_eq!(&"世"[chunks[0].clone()], "世");
680    }
681
682    #[test]
683    fn wrap_str_to_width_breaks_at_word_boundary_inside_url() {
684        // UAX #29 treats '/', '.', and '-' as word boundaries inside a
685        // URL.  Wrapping "https://example.com/very-long-path/file" at
686        // width 24 should not split "very" mid-word: a boundary exists
687        // at byte 24 (right after "very", before "-long-path/file"), so
688        // the helper should pick it.
689        let text = "https://example.com/very-long-path/file";
690        let chunks = wrap_str_to_width(text, 24);
691
692        // Round-trip + width invariants.
693        let mut acc = String::new();
694        for r in &chunks {
695            let piece = &text[r.clone()];
696            assert!(str_width(piece) <= 24, "chunk over width: {piece:?}");
697            acc.push_str(piece);
698        }
699        assert_eq!(acc, text);
700
701        // No chunk should split a UAX #29 alphabetic word in half — the
702        // boundary right after "very" must be honoured.
703        assert!(
704            !text[chunks[0].clone()].ends_with("ver"),
705            "first chunk truncated 'very' mid-word: {:?}",
706            &text[chunks[0].clone()],
707        );
708        assert!(
709            text[chunks[0].clone()].ends_with("very"),
710            "first chunk should end at the word boundary right after \
711             'very': {:?}",
712            &text[chunks[0].clone()],
713        );
714    }
715
716    #[test]
717    fn wrap_str_to_width_round_trips_input() {
718        // Property-flavoured spot check: chunks should always tile the input.
719        let text = "the quick brown fox jumps over the lazy dog. \
720                    the quick brown fox jumps over the lazy dog.";
721        for w in [8usize, 10, 16, 25, 40] {
722            let chunks = wrap_str_to_width(text, w);
723            let mut acc = String::new();
724            for r in &chunks {
725                let piece = &text[r.clone()];
726                assert!(
727                    str_width(piece) <= w,
728                    "chunk over width at w={w}: {piece:?}"
729                );
730                acc.push_str(piece);
731            }
732            assert_eq!(acc, text, "round-trip mismatch at w={w}");
733        }
734    }
735}