Skip to main content

kimun_notes/components/text_editor/
word_wrap.rs

1#[derive(Debug, Clone, PartialEq)]
2pub struct VisualLine {
3    pub logical_row: usize,
4    /// Character offset (Unicode scalar) where this visual line begins in the original line.
5    pub start_col: usize,
6    /// Character offset (exclusive) where this visual line ends.
7    pub end_col: usize,
8    /// Byte offset in the original logical line where this visual line begins.
9    pub start_byte: usize,
10    /// Byte offset (exclusive) in the original logical line where this visual line ends.
11    pub end_byte: usize,
12    pub is_first_visual_line: bool,
13}
14
15impl VisualLine {
16    /// Borrow the content slice from the original logical line string.
17    /// This avoids storing a redundant `String` copy on each `VisualLine`.
18    pub fn content<'a>(&self, source: &'a str) -> &'a str {
19        &source[self.start_byte..self.end_byte]
20    }
21}
22
23/// One grapheme cluster's position and metrics within a logical line,
24/// cached in the reuse buffer so `wrap_one_row` breaks only on cluster
25/// boundaries (never mid-cluster) and measures fit by display columns.
26struct Cluster {
27    /// Starting char (Unicode scalar) offset in the logical line.
28    char_pos: usize,
29    /// Starting byte offset in the logical line.
30    byte_pos: usize,
31    /// Display-column width of the cluster, before visibility is applied.
32    width: usize,
33    /// True when the cluster is a single whitespace scalar (a wrap
34    /// opportunity). Multi-scalar clusters are never whitespace.
35    is_ws: bool,
36}
37
38/// Wrap a single logical row at the given width, appending the
39/// produced `VisualLine`s to `out` (always at least one entry).
40///
41/// Breaks land only on grapheme-cluster boundaries: a multi-codepoint
42/// cluster (ZWJ emoji, combining-mark sequence) is never split across
43/// two visual lines, so the byte slice each `VisualLine` borrows always
44/// reclusters identically to the full line (the renderer in
45/// `spanner.rs` walks `content.graphemes(true)`). Fit is measured in
46/// display columns via [`cluster_display_width`], so wide CJK clusters
47/// count as 2 and zero-width combining marks as 0 — matching the
48/// renderer instead of the old one-column-per-scalar count.
49///
50/// `rendered_row` is the per-char rendered mask for this row (empty
51/// slice if absent — every char treated as visible). A hidden cluster
52/// (markdown sigil) contributes 0 columns.
53///
54/// `scratch` is reused for the row's per-cluster buffer. Caller owns
55/// it; the function clears+refills on entry. Threading this buffer
56/// through `compute` / `splice_range` lets a 5000-row recompute reuse a
57/// single allocation instead of N transient `Vec`s — perf #11 in the
58/// holistic review.
59fn wrap_one_row(
60    logical_row: usize,
61    line: &str,
62    width: usize,
63    rendered_row: &[bool],
64    scratch: &mut Vec<Cluster>,
65    out: &mut Vec<VisualLine>,
66) {
67    use unicode_segmentation::UnicodeSegmentation;
68
69    scratch.clear();
70    let mut char_pos = 0usize;
71    for (byte_pos, g) in line.grapheme_indices(true) {
72        let char_len = g.chars().count();
73        let is_ws = char_len == 1 && g.chars().next().is_some_and(char::is_whitespace);
74        scratch.push(Cluster {
75            char_pos,
76            byte_pos,
77            width: super::markdown::cluster_display_width(g),
78            is_ws,
79        });
80        char_pos += char_len;
81    }
82    let total_chars = char_pos;
83    let cl: &[Cluster] = scratch.as_slice();
84    if cl.is_empty() || width == 0 {
85        out.push(VisualLine {
86            logical_row,
87            start_col: 0,
88            end_col: 0,
89            start_byte: 0,
90            end_byte: 0,
91            is_first_visual_line: true,
92        });
93        return;
94    }
95
96    let is_rendered = |char_pos: usize| -> bool {
97        if char_pos < rendered_row.len() {
98            rendered_row[char_pos]
99        } else {
100            true
101        }
102    };
103    // Cluster's display width with visibility applied (hidden → 0).
104    let vis_width = |idx: usize| -> usize {
105        if is_rendered(cl[idx].char_pos) {
106            cl[idx].width
107        } else {
108            0
109        }
110    };
111    // Char / byte offset at a cluster index (or the line end past it).
112    let char_at = |idx: usize| -> usize {
113        if idx < cl.len() {
114            cl[idx].char_pos
115        } else {
116            total_chars
117        }
118    };
119    let byte_at = |idx: usize| -> usize {
120        if idx < cl.len() {
121            cl[idx].byte_pos
122        } else {
123            line.len()
124        }
125    };
126
127    let total = cl.len(); // number of clusters
128    let mut start = 0; // cluster index
129    let mut is_first = true;
130
131    while start < total {
132        // Find the first cluster where the column count from `start`
133        // exceeds `width`.
134        let fit_end = {
135            let mut rcount = 0usize;
136            let mut pos = start;
137            while pos < total {
138                let r = vis_width(pos);
139                if rcount + r > width {
140                    break;
141                }
142                rcount += r;
143                pos += 1;
144            }
145            // Guarantee forward progress: a single cluster wider than
146            // `width` (e.g. a width-2 glyph in a width-1 column) must
147            // still advance by one cluster, else the loop never ends.
148            if pos == start { start + 1 } else { pos }
149        };
150
151        if fit_end >= total {
152            out.push(VisualLine {
153                logical_row,
154                start_col: char_at(start),
155                end_col: total_chars,
156                start_byte: byte_at(start),
157                end_byte: line.len(),
158                is_first_visual_line: is_first,
159            });
160            break;
161        }
162
163        // Find break point: prefer last whitespace cluster in [start..fit_end].
164        let (content_end, next_start) = if cl[fit_end].is_ws {
165            (fit_end, fit_end + 1)
166        } else {
167            match cl[start..fit_end]
168                .iter()
169                .enumerate()
170                .rev()
171                .find(|(_, c)| c.is_ws)
172            {
173                Some((i, _)) => (start + i, start + i + 1),
174                None => (fit_end, fit_end), // hard break (mid-word, on a cluster boundary)
175            }
176        };
177
178        out.push(VisualLine {
179            logical_row,
180            start_col: char_at(start),
181            end_col: char_at(content_end),
182            start_byte: byte_at(start),
183            end_byte: byte_at(content_end),
184            is_first_visual_line: is_first,
185        });
186        start = next_start;
187        is_first = false;
188    }
189}
190
191#[derive(Clone)]
192pub struct WordWrapLayout {
193    visual_lines: Vec<VisualLine>,
194    /// Maps logical row index → index of its first `VisualLine` in `visual_lines`.
195    /// Enables O(wrap-count) lookup in `logical_to_visual` instead of O(total visual lines).
196    row_starts: Vec<usize>,
197}
198
199impl WordWrapLayout {
200    /// Compute word-wrap layout.
201    /// `rendered`: per-line bitmask of which char positions are actually rendered (visible).
202    /// Pass `&[]` to use raw char widths (e.g. in tests that don't involve markdown).
203    pub fn compute(lines: &[String], width: u16, rendered: &[Vec<bool>]) -> Self {
204        let width = width as usize;
205        let mut visual_lines = Vec::new();
206        let mut row_starts = Vec::with_capacity(lines.len());
207
208        if lines.is_empty() {
209            return Self::default();
210        }
211
212        // One scratch buffer reused across every `wrap_one_row` call —
213        // a 5000-row recompute pays a single allocation instead of N.
214        let mut scratch: Vec<Cluster> = Vec::new();
215        for (row, line) in lines.iter().enumerate() {
216            row_starts.push(visual_lines.len());
217            let rendered_row = rendered.get(row).map(|v| v.as_slice()).unwrap_or(&[]);
218            wrap_one_row(
219                row,
220                line,
221                width,
222                rendered_row,
223                &mut scratch,
224                &mut visual_lines,
225            );
226        }
227
228        Self {
229            visual_lines,
230            row_starts,
231        }
232    }
233
234    /// Re-wrap only the rows in `row_range`, splicing the result into
235    /// `visual_lines` and updating `row_starts` accordingly.
236    ///
237    /// **Contract:** caller must pass the SAME `lines` and `width` as the
238    /// most recent `compute` call (or previous `splice_range`); only rows
239    /// in `row_range` are assumed to have changed. Other rows' content,
240    /// width, and rendered masks must be byte-identical.
241    ///
242    /// `row_range` is half-open in logical-row space. Empty ranges are
243    /// a no-op.
244    pub fn splice_range(
245        &mut self,
246        lines: &[String],
247        width: u16,
248        rendered: &[Vec<bool>],
249        row_range: std::ops::Range<usize>,
250    ) {
251        if row_range.is_empty() {
252            return;
253        }
254        let width = width as usize;
255        debug_assert!(
256            row_range.end <= lines.len(),
257            "splice_range: row_range.end {} > lines.len() {}",
258            row_range.end,
259            lines.len(),
260        );
261        debug_assert!(
262            row_range.start <= self.row_starts.len(),
263            "splice_range: row_range.start {} > row_starts.len() {}",
264            row_range.start,
265            self.row_starts.len(),
266        );
267
268        // Compute the old visual-line index span for this row range.
269        let old_vstart = self.row_starts[row_range.start];
270        let old_vend = if row_range.end < self.row_starts.len() {
271            self.row_starts[row_range.end]
272        } else {
273            self.visual_lines.len()
274        };
275
276        // Wrap the new contents of the affected rows. Also record per-row
277        // starting indices inside the new slice so we can rebuild
278        // row_starts[row_range] without searching. One scratch buffer
279        // shared across every row in the range (perf #11).
280        let mut new_slice: Vec<VisualLine> = Vec::new();
281        let mut new_row_starts_for_range: Vec<usize> = Vec::with_capacity(row_range.len());
282        let mut scratch: Vec<Cluster> = Vec::new();
283        for row in row_range.clone() {
284            new_row_starts_for_range.push(new_slice.len());
285            let rendered_row = rendered.get(row).map(|v| v.as_slice()).unwrap_or(&[]);
286            wrap_one_row(
287                row,
288                &lines[row],
289                width,
290                rendered_row,
291                &mut scratch,
292                &mut new_slice,
293            );
294        }
295
296        // Splice visual_lines.
297        let new_vcount = new_slice.len();
298        self.visual_lines.splice(old_vstart..old_vend, new_slice);
299
300        // Shift row_starts for the range and for rows after the range.
301        let old_vcount = old_vend - old_vstart;
302        let delta_i = new_vcount as isize - old_vcount as isize;
303
304        // Update row_starts within the spliced range (absolute indices).
305        for (i, local_start) in new_row_starts_for_range.into_iter().enumerate() {
306            self.row_starts[row_range.start + i] = old_vstart + local_start;
307        }
308
309        // Shift row_starts for rows AFTER the spliced range.
310        if delta_i != 0 {
311            for rs in &mut self.row_starts[row_range.end..] {
312                *rs = ((*rs as isize) + delta_i) as usize;
313            }
314        }
315    }
316
317    pub fn total_visual_lines(&self) -> usize {
318        self.visual_lines.len()
319    }
320
321    /// Returns the number of logical rows tracked by this layout.
322    /// Used by `view.update` to detect line-count changes without exposing
323    /// `row_starts` directly.
324    pub fn row_starts_len(&self) -> usize {
325        self.row_starts.len()
326    }
327
328    pub fn visual_lines(&self) -> &[VisualLine] {
329        &self.visual_lines
330    }
331
332    /// Convert logical (row, col) to (visual_row, visual_col).
333    pub fn logical_to_visual(&self, row: usize, col: usize) -> (usize, usize) {
334        let row = row.min(self.row_starts.len().saturating_sub(1));
335        let first = self.row_starts.get(row).copied().unwrap_or(0);
336        let vrow = self.visual_lines[first..]
337            .iter()
338            .enumerate()
339            .take_while(|(_, vl)| vl.logical_row == row)
340            .filter(|(_, vl)| vl.start_col <= col)
341            .last()
342            .map(|(i, _)| first + i)
343            .unwrap_or(first);
344        let vl = &self.visual_lines[vrow];
345        (vrow, col.saturating_sub(vl.start_col))
346    }
347
348    /// Convert visual (vrow, vcol) to logical (row, col).
349    pub fn visual_to_logical(&self, vrow: usize, vcol: usize) -> (usize, usize) {
350        let vrow = vrow.min(self.visual_lines.len().saturating_sub(1));
351        let vl = &self.visual_lines[vrow];
352        let col = (vl.start_col + vcol).min(vl.end_col);
353        (vl.logical_row, col)
354    }
355}
356
357impl Default for WordWrapLayout {
358    fn default() -> Self {
359        Self {
360            visual_lines: vec![VisualLine {
361                logical_row: 0,
362                start_col: 0,
363                end_col: 0,
364                start_byte: 0,
365                end_byte: 0,
366                is_first_visual_line: true,
367            }],
368            row_starts: vec![0],
369        }
370    }
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376
377    fn ls(s: &str) -> Vec<String> {
378        s.lines().map(str::to_owned).collect()
379    }
380
381    // Helper: get content string for a visual line from its source line.
382    fn content_of<'a>(vl: &VisualLine, source: &'a str) -> &'a str {
383        vl.content(source)
384    }
385
386    #[test]
387    fn empty_input_produces_one_visual_line() {
388        let layout = WordWrapLayout::compute(&[], 40, &[]);
389        assert_eq!(layout.total_visual_lines(), 1);
390        assert_eq!(layout.visual_lines()[0].logical_row, 0);
391        assert!(layout.visual_lines()[0].is_first_visual_line);
392    }
393
394    #[test]
395    fn empty_string_produces_one_visual_line() {
396        let src = String::new();
397        let layout = WordWrapLayout::compute(std::slice::from_ref(&src), 40, &[]);
398        assert_eq!(layout.total_visual_lines(), 1);
399        assert_eq!(content_of(&layout.visual_lines()[0], &src), "");
400        assert!(layout.visual_lines()[0].is_first_visual_line);
401    }
402
403    #[test]
404    fn short_line_fits_on_one_visual_line() {
405        let lines = ls("hello world");
406        let layout = WordWrapLayout::compute(&lines, 40, &[]);
407        assert_eq!(layout.total_visual_lines(), 1);
408        assert_eq!(
409            content_of(&layout.visual_lines()[0], &lines[0]),
410            "hello world"
411        );
412        assert!(layout.visual_lines()[0].is_first_visual_line);
413    }
414
415    #[test]
416    fn long_line_wraps_at_whitespace() {
417        // "hello world foo" width=11 → "hello world" (11) fits; " foo" wraps
418        let lines = ls("hello world foo");
419        let layout = WordWrapLayout::compute(&lines, 11, &[]);
420        assert_eq!(layout.total_visual_lines(), 2);
421        assert_eq!(
422            content_of(&layout.visual_lines()[0], &lines[0]),
423            "hello world"
424        );
425        assert_eq!(content_of(&layout.visual_lines()[1], &lines[0]), "foo");
426        assert!(layout.visual_lines()[0].is_first_visual_line);
427        assert!(!layout.visual_lines()[1].is_first_visual_line);
428    }
429
430    #[test]
431    fn long_word_hard_breaks_at_width() {
432        let lines = vec!["abcdefgh".to_string()];
433        let layout = WordWrapLayout::compute(&lines, 4, &[]);
434        assert_eq!(layout.total_visual_lines(), 2);
435        assert_eq!(content_of(&layout.visual_lines()[0], &lines[0]), "abcd");
436        assert_eq!(content_of(&layout.visual_lines()[1], &lines[0]), "efgh");
437    }
438
439    #[test]
440    fn two_logical_lines_have_correct_logical_rows() {
441        let layout = WordWrapLayout::compute(&ls("abc\nxyz"), 10, &[]);
442        assert_eq!(layout.total_visual_lines(), 2);
443        assert_eq!(layout.visual_lines()[0].logical_row, 0);
444        assert_eq!(layout.visual_lines()[1].logical_row, 1);
445    }
446
447    #[test]
448    fn unicode_chars_counted_not_bytes() {
449        // "あいう" is 3 chars, 9 bytes. Each is a full-width CJK glyph
450        // (2 display columns), so at width=4 two fit per visual line —
451        // the break is by display width, never mid-byte.
452        let lines = vec!["あいう".to_string()];
453        let layout = WordWrapLayout::compute(&lines, 4, &[]);
454        assert_eq!(layout.total_visual_lines(), 2);
455        assert_eq!(content_of(&layout.visual_lines()[0], &lines[0]), "あい");
456        assert_eq!(content_of(&layout.visual_lines()[1], &lines[0]), "う");
457    }
458
459    #[test]
460    fn full_width_glyph_counts_as_two_columns() {
461        // At width=2, a single full-width glyph fills the line on its own.
462        let lines = vec!["あい".to_string()];
463        let layout = WordWrapLayout::compute(&lines, 2, &[]);
464        assert_eq!(layout.total_visual_lines(), 2);
465        assert_eq!(content_of(&layout.visual_lines()[0], &lines[0]), "あ");
466        assert_eq!(content_of(&layout.visual_lines()[1], &lines[0]), "い");
467    }
468
469    #[test]
470    fn multi_codepoint_cluster_never_split() {
471        // "e" + U+0301 (combining acute) = one grapheme cluster, two
472        // scalars, one display column. A narrow width must keep the
473        // cluster intact on one visual line — a mid-cluster break would
474        // leave the renderer reclustering a partial slice (review #3).
475        let combined = "e\u{0301}fg"; // é f g
476        let lines = vec![combined.to_string()];
477        let layout = WordWrapLayout::compute(&lines, 1, &[]);
478        // Width 1: "é" (1 col, 2 scalars) | "f" | "g" → 3 visual lines,
479        // and the first never splits the cluster.
480        assert_eq!(layout.total_visual_lines(), 3);
481        assert_eq!(content_of(&layout.visual_lines()[0], combined), "e\u{0301}");
482        assert_eq!(content_of(&layout.visual_lines()[1], combined), "f");
483        assert_eq!(content_of(&layout.visual_lines()[2], combined), "g");
484    }
485
486    #[test]
487    fn logical_to_visual_start_of_line() {
488        let layout = WordWrapLayout::compute(&ls("hello world"), 40, &[]);
489        assert_eq!(layout.logical_to_visual(0, 0), (0, 0));
490    }
491
492    #[test]
493    fn logical_to_visual_wrapped_cursor() {
494        let layout = WordWrapLayout::compute(&ls("hello world foo"), 11, &[]);
495        let (vrow, vcol) = layout.logical_to_visual(0, 12);
496        assert_eq!(vrow, 1);
497        assert_eq!(vcol, 0);
498    }
499
500    #[test]
501    fn visual_to_logical_first_line() {
502        let layout = WordWrapLayout::compute(&ls("hello"), 40, &[]);
503        assert_eq!(layout.visual_to_logical(0, 3), (0, 3));
504    }
505
506    #[test]
507    fn visual_to_logical_accounts_for_start_col() {
508        let layout = WordWrapLayout::compute(&ls("hello world foo"), 11, &[]);
509        let (row, col) = layout.visual_to_logical(1, 0);
510        assert_eq!(row, 0);
511        assert_eq!(col, 12);
512    }
513
514    #[test]
515    fn row_starts_index_multi_line_multi_wrap() {
516        let lines = vec![
517            "abc".to_string(),
518            "hello world foo".to_string(),
519            "xyz".to_string(),
520        ];
521        let layout = WordWrapLayout::compute(&lines, 11, &[]);
522        assert_eq!(layout.row_starts, vec![0, 1, 3]);
523        assert_eq!(layout.logical_to_visual(2, 0), (3, 0));
524    }
525
526    #[test]
527    fn coordinate_roundtrip_vrow_zero() {
528        let layout = WordWrapLayout::compute(&ls("hello world foo"), 11, &[]);
529        let (row, col) = layout.visual_to_logical(0, 3);
530        let (vrow2, vcol2) = layout.logical_to_visual(row, col);
531        assert_eq!((vrow2, vcol2), (0, 3));
532    }
533
534    #[test]
535    fn byte_offsets_correct_for_unicode() {
536        // "あいう": あ=3 bytes, い=3 bytes, う=3 bytes; each 2 columns.
537        // At width=4 the first visual line holds "あい" (bytes 0..6).
538        let lines = vec!["あいう".to_string()];
539        let layout = WordWrapLayout::compute(&lines, 4, &[]);
540        let vl0 = &layout.visual_lines()[0];
541        let vl1 = &layout.visual_lines()[1];
542        assert_eq!((vl0.start_byte, vl0.end_byte), (0, 6)); // "あい"
543        assert_eq!((vl1.start_byte, vl1.end_byte), (6, 9)); // "う"
544    }
545
546    #[test]
547    fn splice_range_full_buffer_equals_compute() {
548        let lines = ls("hello world\nfoo bar baz\nlast line");
549        let mut layout = WordWrapLayout::compute(&lines, 40, &[]);
550        layout.splice_range(&lines, 40, &[], 0..lines.len());
551        let fresh = WordWrapLayout::compute(&lines, 40, &[]);
552        assert_eq!(layout.visual_lines(), fresh.visual_lines());
553        assert_eq!(layout.row_starts, fresh.row_starts);
554    }
555
556    #[test]
557    fn splice_range_middle_row_only() {
558        // Edit row 1 — splice should only re-wrap row 1.
559        let lines_before = ls("alpha beta\nfoo bar\ngamma delta");
560        let layout_before = WordWrapLayout::compute(&lines_before, 40, &[]);
561
562        let lines_after = ls("alpha beta\nFOO BAR\ngamma delta");
563        let mut layout = layout_before.clone();
564        layout.splice_range(&lines_after, 40, &[], 1..2);
565
566        let fresh = WordWrapLayout::compute(&lines_after, 40, &[]);
567        assert_eq!(layout.visual_lines(), fresh.visual_lines());
568        assert_eq!(layout.row_starts, fresh.row_starts);
569    }
570
571    #[test]
572    fn splice_range_handles_wrap_count_change() {
573        // Row 0: "short" (1 visual line) → "a very long line that will wrap" (2 visual lines at width 10).
574        let lines_before = ls("short\ntail");
575        let mut layout = WordWrapLayout::compute(&lines_before, 10, &[]);
576        let lines_after = ls("a very long line that will wrap\ntail");
577        layout.splice_range(&lines_after, 10, &[], 0..1);
578
579        let fresh = WordWrapLayout::compute(&lines_after, 10, &[]);
580        assert_eq!(layout.visual_lines(), fresh.visual_lines());
581        assert_eq!(layout.row_starts, fresh.row_starts);
582    }
583
584    #[test]
585    fn splice_range_at_buffer_start() {
586        let lines = ls("first line\nsecond line\nthird line");
587        let mut layout = WordWrapLayout::compute(&lines, 40, &[]);
588        let edited = ls("first EDITED line\nsecond line\nthird line");
589        layout.splice_range(&edited, 40, &[], 0..1);
590
591        let fresh = WordWrapLayout::compute(&edited, 40, &[]);
592        assert_eq!(layout.visual_lines(), fresh.visual_lines());
593        assert_eq!(layout.row_starts, fresh.row_starts);
594    }
595
596    #[test]
597    fn splice_range_at_buffer_end() {
598        let lines = ls("first\nsecond\nthird");
599        let mut layout = WordWrapLayout::compute(&lines, 40, &[]);
600        let edited = ls("first\nsecond\nthird EDITED");
601        layout.splice_range(&edited, 40, &[], 2..3);
602
603        let fresh = WordWrapLayout::compute(&edited, 40, &[]);
604        assert_eq!(layout.visual_lines(), fresh.visual_lines());
605        assert_eq!(layout.row_starts, fresh.row_starts);
606    }
607}