edit/unicode/
measurement.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4use std::hint::cold_path;
5
6use super::Utf8Chars;
7use super::tables::*;
8use crate::document::ReadableDocument;
9use crate::helpers::{CoordType, Point};
10
11// On one hand it's disgusting that I wrote this as a global variable, but on the
12// other hand, this isn't a public library API, and it makes the code a lot cleaner,
13// because we don't need to inject this once-per-process value everywhere.
14static mut AMBIGUOUS_WIDTH: usize = 1;
15
16/// Sets the width of "ambiguous" width characters as per "UAX #11: East Asian Width".
17///
18/// Defaults to 1.
19pub fn setup_ambiguous_width(ambiguous_width: CoordType) {
20    unsafe { AMBIGUOUS_WIDTH = ambiguous_width as usize };
21}
22
23#[inline]
24fn ambiguous_width() -> usize {
25    // SAFETY: This is a global variable that is set once per process.
26    // It is never changed after that, so this is safe to call.
27    unsafe { AMBIGUOUS_WIDTH }
28}
29
30/// Stores a position inside a [`ReadableDocument`].
31///
32/// The cursor tracks both the absolute byte-offset,
33/// as well as the position in terminal-related coordinates.
34#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
35pub struct Cursor {
36    /// Offset in bytes within the buffer.
37    pub offset: usize,
38    /// Position in the buffer in lines (.y) and grapheme clusters (.x).
39    ///
40    /// Line wrapping has NO influence on this.
41    pub logical_pos: Point,
42    /// Position in the buffer in laid out rows (.y) and columns (.x).
43    ///
44    /// Line wrapping has an influence on this.
45    pub visual_pos: Point,
46    /// Horizontal position in visual columns.
47    ///
48    /// Line wrapping has NO influence on this and if word wrap is disabled,
49    /// it's identical to `visual_pos.x`. This is useful for calculating tab widths.
50    pub column: CoordType,
51    /// When `measure_forward` hits the `word_wrap_column`, the question is:
52    /// Was there a wrap opportunity on this line? Because if there wasn't,
53    /// a hard-wrap is required; otherwise, the word that is being laid-out is
54    /// moved to the next line. This boolean carries this state between calls.
55    pub wrap_opp: bool,
56}
57
58/// Your entrypoint to navigating inside a [`ReadableDocument`].
59#[derive(Clone)]
60pub struct MeasurementConfig<'doc> {
61    cursor: Cursor,
62    tab_size: CoordType,
63    word_wrap_column: CoordType,
64    buffer: &'doc dyn ReadableDocument,
65}
66
67impl<'doc> MeasurementConfig<'doc> {
68    /// Creates a new [`MeasurementConfig`] for the given document.
69    pub fn new(buffer: &'doc dyn ReadableDocument) -> Self {
70        Self { cursor: Default::default(), tab_size: 8, word_wrap_column: 0, buffer }
71    }
72
73    /// Sets the initial cursor to the given position.
74    ///
75    /// WARNING: While the code doesn't panic if the cursor is invalid,
76    /// the results will obviously be complete garbage.
77    pub fn with_cursor(mut self, cursor: Cursor) -> Self {
78        self.cursor = cursor;
79        self
80    }
81
82    /// Sets the tab size.
83    ///
84    /// Defaults to 8, because that's what a tab in terminals evaluates to.
85    pub fn with_tab_size(mut self, tab_size: CoordType) -> Self {
86        self.tab_size = tab_size.max(1);
87        self
88    }
89
90    /// You want word wrap? Set it here!
91    ///
92    /// Defaults to 0, which means no word wrap.
93    pub fn with_word_wrap_column(mut self, word_wrap_column: CoordType) -> Self {
94        self.word_wrap_column = word_wrap_column;
95        self
96    }
97
98    /// Navigates **forward** to the given absolute offset.
99    ///
100    /// # Returns
101    ///
102    /// The cursor position after the navigation.
103    pub fn goto_offset(&mut self, offset: usize) -> Cursor {
104        self.measure_forward(offset, Point::MAX, Point::MAX)
105    }
106
107    /// Navigates **forward** to the given logical position.
108    ///
109    /// Logical positions are in lines and grapheme clusters.
110    ///
111    /// # Returns
112    ///
113    /// The cursor position after the navigation.
114    pub fn goto_logical(&mut self, logical_target: Point) -> Cursor {
115        self.measure_forward(usize::MAX, logical_target, Point::MAX)
116    }
117
118    /// Navigates **forward** to the given visual position.
119    ///
120    /// Visual positions are in laid out rows and columns.
121    ///
122    /// # Returns
123    ///
124    /// The cursor position after the navigation.
125    pub fn goto_visual(&mut self, visual_target: Point) -> Cursor {
126        self.measure_forward(usize::MAX, Point::MAX, visual_target)
127    }
128
129    /// Returns the current cursor position.
130    pub fn cursor(&self) -> Cursor {
131        self.cursor
132    }
133
134    // NOTE that going to a visual target can result in ambiguous results,
135    // where going to an identical logical target will yield a different result.
136    //
137    // Imagine if you have a `word_wrap_column` of 6 and there's "Hello World" on the line:
138    // `goto_logical` will return a `visual_pos` of {0,1}, while `goto_visual` returns {6,0}.
139    // This is because from a logical POV, if the wrap location equals the wrap column,
140    // the wrap exists on both lines and it'll default to wrapping. `goto_visual` however will always
141    // try to return a Y position that matches the requested position, so that Home/End works properly.
142    fn measure_forward(
143        &mut self,
144        offset_target: usize,
145        logical_target: Point,
146        visual_target: Point,
147    ) -> Cursor {
148        if self.cursor.offset >= offset_target
149            || self.cursor.logical_pos >= logical_target
150            || self.cursor.visual_pos >= visual_target
151        {
152            return self.cursor;
153        }
154
155        let mut offset = self.cursor.offset;
156        let mut logical_pos_x = self.cursor.logical_pos.x;
157        let mut logical_pos_y = self.cursor.logical_pos.y;
158        let mut visual_pos_x = self.cursor.visual_pos.x;
159        let mut visual_pos_y = self.cursor.visual_pos.y;
160        let mut column = self.cursor.column;
161
162        let mut logical_target_x = Self::calc_target_x(logical_target, logical_pos_y);
163        let mut visual_target_x = Self::calc_target_x(visual_target, visual_pos_y);
164
165        // wrap_opp = Wrap Opportunity
166        // These store the position and column of the last wrap opportunity. If `word_wrap_column` is
167        // zero (word wrap disabled), all grapheme clusters are a wrap opportunity, because none are.
168        let mut wrap_opp = self.cursor.wrap_opp;
169        let mut wrap_opp_offset = offset;
170        let mut wrap_opp_logical_pos_x = logical_pos_x;
171        let mut wrap_opp_visual_pos_x = visual_pos_x;
172        let mut wrap_opp_column = column;
173
174        let mut chunk_iter = Utf8Chars::new(b"", 0);
175        let mut chunk_range = offset..offset;
176        let mut props_next_cluster = ucd_start_of_text_properties();
177
178        loop {
179            // Have we reached the target already? Stop.
180            if offset >= offset_target
181                || logical_pos_x >= logical_target_x
182                || visual_pos_x >= visual_target_x
183            {
184                break;
185            }
186
187            let props_current_cluster = props_next_cluster;
188            let mut props_last_char;
189            let mut offset_next_cluster;
190            let mut state = 0;
191            let mut width = 0;
192
193            // Since we want to measure the width of the current cluster,
194            // by necessity we need to seek to the next cluster.
195            // We'll then reuse the offset and properties of the next cluster in
196            // the next iteration of the this (outer) loop (`props_next_cluster`).
197            loop {
198                if !chunk_iter.has_next() {
199                    cold_path();
200                    chunk_iter = Utf8Chars::new(self.buffer.read_forward(chunk_range.end), 0);
201                    chunk_range = chunk_range.end..chunk_range.end + chunk_iter.len();
202                }
203
204                // Since this loop seeks ahead to the next cluster, and since `chunk_iter`
205                // records the offset of the next character after the returned one, we need
206                // to save the offset of the previous `chunk_iter` before calling `next()`.
207                // Similar applies to the width.
208                props_last_char = props_next_cluster;
209                offset_next_cluster = chunk_range.start + chunk_iter.offset();
210                width += ucd_grapheme_cluster_character_width(props_next_cluster, ambiguous_width())
211                    as CoordType;
212
213                // The `Document::read_forward` interface promises us that it will not split
214                // grapheme clusters across chunks. Therefore, we can safely break here.
215                let ch = match chunk_iter.next() {
216                    Some(ch) => ch,
217                    None => break,
218                };
219
220                // Get the properties of the next cluster.
221                props_next_cluster = ucd_grapheme_cluster_lookup(ch);
222                state = ucd_grapheme_cluster_joins(state, props_last_char, props_next_cluster);
223
224                // Stop if the next character does not join.
225                if ucd_grapheme_cluster_joins_done(state) {
226                    break;
227                }
228            }
229
230            if offset_next_cluster == offset {
231                // No advance and the iterator is empty? End of text reached.
232                if chunk_iter.is_empty() {
233                    break;
234                }
235                // Ignore the first iteration when processing the start-of-text.
236                continue;
237            }
238
239            // The max. width of a terminal cell is 2.
240            width = width.min(2);
241
242            // Tabs require special handling because they can have a variable width.
243            if props_last_char == ucd_tab_properties() {
244                // SAFETY: `self.tab_size` is clamped to >= 1 in `with_tab_size`.
245                // This assert ensures that Rust doesn't insert panicking null checks.
246                unsafe { std::hint::assert_unchecked(self.tab_size >= 1) };
247                width = self.tab_size - (column % self.tab_size);
248            }
249
250            // Hard wrap: Both the logical and visual position advance by one line.
251            if props_last_char == ucd_linefeed_properties() {
252                cold_path();
253
254                wrap_opp = false;
255
256                // Don't cross the newline if the target is on this line but we haven't reached it.
257                // E.g. if the callers asks for column 100 on a 10 column line,
258                // we'll return with the cursor set to column 10.
259                if logical_pos_y >= logical_target.y || visual_pos_y >= visual_target.y {
260                    break;
261                }
262
263                offset = offset_next_cluster;
264                logical_pos_x = 0;
265                logical_pos_y += 1;
266                visual_pos_x = 0;
267                visual_pos_y += 1;
268                column = 0;
269
270                logical_target_x = Self::calc_target_x(logical_target, logical_pos_y);
271                visual_target_x = Self::calc_target_x(visual_target, visual_pos_y);
272                continue;
273            }
274
275            // Avoid advancing past the visual target, because `width` can be greater than 1.
276            if visual_pos_x + width > visual_target_x {
277                break;
278            }
279
280            // Since this code above may need to revert to a previous `wrap_opp_*`,
281            // it must be done before advancing / checking for `ucd_line_break_joins`.
282            if self.word_wrap_column > 0 && visual_pos_x + width > self.word_wrap_column {
283                if !wrap_opp {
284                    // Otherwise, the lack of a wrap opportunity means that a single word
285                    // is wider than the word wrap column. We need to force-break the word.
286                    // This is similar to the above, but "bar" gets written at column 0.
287                    wrap_opp_offset = offset;
288                    wrap_opp_logical_pos_x = logical_pos_x;
289                    wrap_opp_visual_pos_x = visual_pos_x;
290                    wrap_opp_column = column;
291                    visual_pos_x = 0;
292                } else {
293                    // If we had a wrap opportunity on this line, we can move all
294                    // characters since then to the next line without stopping this loop:
295                    //   +---------+      +---------+      +---------+
296                    //   |      foo|  ->  |         |  ->  |         |
297                    //   |         |      |foo      |      |foobar   |
298                    //   +---------+      +---------+      +---------+
299                    // We don't actually move "foo", but rather just change where "bar" goes.
300                    // Since this function doesn't copy text, the end result is the same.
301                    visual_pos_x -= wrap_opp_visual_pos_x;
302                }
303
304                wrap_opp = false;
305                visual_pos_y += 1;
306                visual_target_x = Self::calc_target_x(visual_target, visual_pos_y);
307
308                if visual_pos_x == visual_target_x {
309                    break;
310                }
311
312                // Imagine the word is "hello" and on the "o" we notice it wraps.
313                // If the target however was the "e", then we must revert back to "h" and search for it.
314                if visual_pos_x > visual_target_x {
315                    cold_path();
316
317                    offset = wrap_opp_offset;
318                    logical_pos_x = wrap_opp_logical_pos_x;
319                    visual_pos_x = 0;
320                    column = wrap_opp_column;
321
322                    chunk_iter.seek(chunk_iter.len());
323                    chunk_range = offset..offset;
324                    props_next_cluster = ucd_start_of_text_properties();
325                    continue;
326                }
327            }
328
329            offset = offset_next_cluster;
330            logical_pos_x += 1;
331            visual_pos_x += width;
332            column += width;
333
334            if self.word_wrap_column > 0
335                && !ucd_line_break_joins(props_current_cluster, props_next_cluster)
336            {
337                wrap_opp = true;
338                wrap_opp_offset = offset;
339                wrap_opp_logical_pos_x = logical_pos_x;
340                wrap_opp_visual_pos_x = visual_pos_x;
341                wrap_opp_column = column;
342            }
343        }
344
345        // If we're here, we hit our target. Now the only question is:
346        // Is the word we're currently on so wide that it will be wrapped further down the document?
347        if self.word_wrap_column > 0 {
348            if !wrap_opp {
349                // If the current laid-out line had no wrap opportunities, it means we had an input
350                // such as "fooooooooooooooooooooo" at a `word_wrap_column` of e.g. 10. The word
351                // didn't fit and the lack of a `wrap_opp` indicates we must force a hard wrap.
352                // Thankfully, if we reach this point, that was already done by the code above.
353            } else if wrap_opp_logical_pos_x != logical_pos_x && visual_pos_y <= visual_target.y {
354                // Imagine the string "foo bar" with a word wrap column of 6. If I ask for the cursor at
355                // `logical_pos={5,0}`, then the code above exited while reaching the target.
356                // At this point, this function doesn't know yet that after the "b" there's "ar"
357                // which causes a word wrap, and causes the final visual position to be {1,1}.
358                // This code thus seeks ahead and checks if the current word will wrap or not.
359                // Of course we only need to do this if the cursor isn't on a wrap opportunity already.
360
361                // The loop below should not modify the target we already found.
362                let mut visual_pos_x_lookahead = visual_pos_x;
363
364                loop {
365                    let props_current_cluster = props_next_cluster;
366                    let mut props_last_char;
367                    let mut offset_next_cluster;
368                    let mut state = 0;
369                    let mut width = 0;
370
371                    // Since we want to measure the width of the current cluster,
372                    // by necessity we need to seek to the next cluster.
373                    // We'll then reuse the offset and properties of the next cluster in
374                    // the next iteration of the this (outer) loop (`props_next_cluster`).
375                    loop {
376                        if !chunk_iter.has_next() {
377                            cold_path();
378                            chunk_iter =
379                                Utf8Chars::new(self.buffer.read_forward(chunk_range.end), 0);
380                            chunk_range = chunk_range.end..chunk_range.end + chunk_iter.len();
381                        }
382
383                        // Since this loop seeks ahead to the next cluster, and since `chunk_iter`
384                        // records the offset of the next character after the returned one, we need
385                        // to save the offset of the previous `chunk_iter` before calling `next()`.
386                        // Similar applies to the width.
387                        props_last_char = props_next_cluster;
388                        offset_next_cluster = chunk_range.start + chunk_iter.offset();
389                        width += ucd_grapheme_cluster_character_width(
390                            props_next_cluster,
391                            ambiguous_width(),
392                        ) as CoordType;
393
394                        // The `Document::read_forward` interface promises us that it will not split
395                        // grapheme clusters across chunks. Therefore, we can safely break here.
396                        let ch = match chunk_iter.next() {
397                            Some(ch) => ch,
398                            None => break,
399                        };
400
401                        // Get the properties of the next cluster.
402                        props_next_cluster = ucd_grapheme_cluster_lookup(ch);
403                        state =
404                            ucd_grapheme_cluster_joins(state, props_last_char, props_next_cluster);
405
406                        // Stop if the next character does not join.
407                        if ucd_grapheme_cluster_joins_done(state) {
408                            break;
409                        }
410                    }
411
412                    if offset_next_cluster == offset {
413                        // No advance and the iterator is empty? End of text reached.
414                        if chunk_iter.is_empty() {
415                            break;
416                        }
417                        // Ignore the first iteration when processing the start-of-text.
418                        continue;
419                    }
420
421                    // The max. width of a terminal cell is 2.
422                    width = width.min(2);
423
424                    // Tabs require special handling because they can have a variable width.
425                    if props_last_char == ucd_tab_properties() {
426                        // SAFETY: `self.tab_size` is clamped to >= 1 in `with_tab_size`.
427                        // This assert ensures that Rust doesn't insert panicking null checks.
428                        unsafe { std::hint::assert_unchecked(self.tab_size >= 1) };
429                        width = self.tab_size - (column % self.tab_size);
430                    }
431
432                    // Hard wrap: Both the logical and visual position advance by one line.
433                    if props_last_char == ucd_linefeed_properties() {
434                        break;
435                    }
436
437                    visual_pos_x_lookahead += width;
438
439                    if visual_pos_x_lookahead > self.word_wrap_column {
440                        visual_pos_x -= wrap_opp_visual_pos_x;
441                        visual_pos_y += 1;
442                        break;
443                    } else if !ucd_line_break_joins(props_current_cluster, props_next_cluster) {
444                        break;
445                    }
446                }
447            }
448
449            if visual_pos_y > visual_target.y {
450                // Imagine the string "foo bar" with a word wrap column of 6. If I ask for the cursor at
451                // `visual_pos={100,0}`, the code above exited early after wrapping without reaching the target.
452                // Since I asked for the last character on the first line, we must wrap back up the last wrap
453                offset = wrap_opp_offset;
454                logical_pos_x = wrap_opp_logical_pos_x;
455                visual_pos_x = wrap_opp_visual_pos_x;
456                visual_pos_y = visual_target.y;
457                column = wrap_opp_column;
458                wrap_opp = true;
459            }
460        }
461
462        self.cursor.offset = offset;
463        self.cursor.logical_pos = Point { x: logical_pos_x, y: logical_pos_y };
464        self.cursor.visual_pos = Point { x: visual_pos_x, y: visual_pos_y };
465        self.cursor.column = column;
466        self.cursor.wrap_opp = wrap_opp;
467        self.cursor
468    }
469
470    #[inline]
471    fn calc_target_x(target: Point, pos_y: CoordType) -> CoordType {
472        match pos_y.cmp(&target.y) {
473            std::cmp::Ordering::Less => CoordType::MAX,
474            std::cmp::Ordering::Equal => target.x,
475            std::cmp::Ordering::Greater => 0,
476        }
477    }
478}
479
480/// Returns an offset past a newline.
481///
482/// If `offset` is right in front of a newline,
483/// this will return the offset past said newline.
484pub fn skip_newline(text: &[u8], mut offset: usize) -> usize {
485    if offset >= text.len() {
486        return offset;
487    }
488    if text[offset] == b'\r' {
489        offset += 1;
490    }
491    if offset >= text.len() {
492        return offset;
493    }
494    if text[offset] == b'\n' {
495        offset += 1;
496    }
497    offset
498}
499
500/// Strips a trailing newline from the given text.
501pub fn strip_newline(mut text: &[u8]) -> &[u8] {
502    // Rust generates surprisingly tight assembly for this.
503    if text.last() == Some(&b'\n') {
504        text = &text[..text.len() - 1];
505    }
506    if text.last() == Some(&b'\r') {
507        text = &text[..text.len() - 1];
508    }
509    text
510}
511
512#[cfg(test)]
513mod test {
514    use super::*;
515
516    struct ChunkedDoc<'a>(&'a [&'a [u8]]);
517
518    impl ReadableDocument for ChunkedDoc<'_> {
519        fn read_forward(&self, mut off: usize) -> &[u8] {
520            for chunk in self.0 {
521                if off < chunk.len() {
522                    return &chunk[off..];
523                }
524                off -= chunk.len();
525            }
526            &[]
527        }
528
529        fn read_backward(&self, mut off: usize) -> &[u8] {
530            for chunk in self.0.iter().rev() {
531                if off < chunk.len() {
532                    return &chunk[..chunk.len() - off];
533                }
534                off -= chunk.len();
535            }
536            &[]
537        }
538    }
539
540    #[test]
541    fn test_measure_forward_newline_start() {
542        let cursor =
543            MeasurementConfig::new(&"foo\nbar".as_bytes()).goto_visual(Point { x: 0, y: 1 });
544        assert_eq!(
545            cursor,
546            Cursor {
547                offset: 4,
548                logical_pos: Point { x: 0, y: 1 },
549                visual_pos: Point { x: 0, y: 1 },
550                column: 0,
551                wrap_opp: false,
552            }
553        );
554    }
555
556    #[test]
557    fn test_measure_forward_clipped_wide_char() {
558        let cursor = MeasurementConfig::new(&"a😶‍🌫️b".as_bytes()).goto_visual(Point { x: 2, y: 0 });
559        assert_eq!(
560            cursor,
561            Cursor {
562                offset: 1,
563                logical_pos: Point { x: 1, y: 0 },
564                visual_pos: Point { x: 1, y: 0 },
565                column: 1,
566                wrap_opp: false,
567            }
568        );
569    }
570
571    #[test]
572    fn test_measure_forward_word_wrap() {
573        //   |foo␣  |
574        //   |bar␣  |
575        //   |baz   |
576        let text = "foo bar \nbaz".as_bytes();
577
578        // Does hitting a logical target wrap the visual position along with the word?
579        let mut cfg = MeasurementConfig::new(&text).with_word_wrap_column(6);
580        let cursor = cfg.goto_logical(Point { x: 5, y: 0 });
581        assert_eq!(
582            cursor,
583            Cursor {
584                offset: 5,
585                logical_pos: Point { x: 5, y: 0 },
586                visual_pos: Point { x: 1, y: 1 },
587                column: 5,
588                wrap_opp: true,
589            }
590        );
591
592        // Does hitting the visual target within a word reset the hit back to the end of the visual line?
593        let mut cfg = MeasurementConfig::new(&text).with_word_wrap_column(6);
594        let cursor = cfg.goto_visual(Point { x: CoordType::MAX, y: 0 });
595        assert_eq!(
596            cursor,
597            Cursor {
598                offset: 4,
599                logical_pos: Point { x: 4, y: 0 },
600                visual_pos: Point { x: 4, y: 0 },
601                column: 4,
602                wrap_opp: true,
603            }
604        );
605
606        // Does hitting the same target but with a non-zero starting position result in the same outcome?
607        let mut cfg = MeasurementConfig::new(&text).with_word_wrap_column(6).with_cursor(Cursor {
608            offset: 1,
609            logical_pos: Point { x: 1, y: 0 },
610            visual_pos: Point { x: 1, y: 0 },
611            column: 1,
612            wrap_opp: false,
613        });
614        let cursor = cfg.goto_visual(Point { x: 5, y: 0 });
615        assert_eq!(
616            cursor,
617            Cursor {
618                offset: 4,
619                logical_pos: Point { x: 4, y: 0 },
620                visual_pos: Point { x: 4, y: 0 },
621                column: 4,
622                wrap_opp: true,
623            }
624        );
625
626        let cursor = cfg.goto_visual(Point { x: 0, y: 1 });
627        assert_eq!(
628            cursor,
629            Cursor {
630                offset: 4,
631                logical_pos: Point { x: 4, y: 0 },
632                visual_pos: Point { x: 0, y: 1 },
633                column: 4,
634                wrap_opp: false,
635            }
636        );
637
638        let cursor = cfg.goto_visual(Point { x: 5, y: 1 });
639        assert_eq!(
640            cursor,
641            Cursor {
642                offset: 8,
643                logical_pos: Point { x: 8, y: 0 },
644                visual_pos: Point { x: 4, y: 1 },
645                column: 8,
646                wrap_opp: false,
647            }
648        );
649
650        let cursor = cfg.goto_visual(Point { x: 0, y: 2 });
651        assert_eq!(
652            cursor,
653            Cursor {
654                offset: 9,
655                logical_pos: Point { x: 0, y: 1 },
656                visual_pos: Point { x: 0, y: 2 },
657                column: 0,
658                wrap_opp: false,
659            }
660        );
661
662        let cursor = cfg.goto_visual(Point { x: 5, y: 2 });
663        assert_eq!(
664            cursor,
665            Cursor {
666                offset: 12,
667                logical_pos: Point { x: 3, y: 1 },
668                visual_pos: Point { x: 3, y: 2 },
669                column: 3,
670                wrap_opp: false,
671            }
672        );
673    }
674
675    #[test]
676    fn test_measure_forward_tabs() {
677        let text = "a\tb\tc".as_bytes();
678        let cursor =
679            MeasurementConfig::new(&text).with_tab_size(4).goto_visual(Point { x: 4, y: 0 });
680        assert_eq!(
681            cursor,
682            Cursor {
683                offset: 2,
684                logical_pos: Point { x: 2, y: 0 },
685                visual_pos: Point { x: 4, y: 0 },
686                column: 4,
687                wrap_opp: false,
688            }
689        );
690    }
691
692    #[test]
693    fn test_measure_forward_chunk_boundaries() {
694        let chunks = [
695            "Hello".as_bytes(),
696            "\u{1F469}\u{1F3FB}".as_bytes(), // 8 bytes, 2 columns
697            "World".as_bytes(),
698        ];
699        let doc = ChunkedDoc(&chunks);
700        let cursor = MeasurementConfig::new(&doc).goto_visual(Point { x: 5 + 2 + 3, y: 0 });
701        assert_eq!(cursor.offset, 5 + 8 + 3);
702        assert_eq!(cursor.logical_pos, Point { x: 5 + 1 + 3, y: 0 });
703    }
704
705    #[test]
706    fn test_exact_wrap() {
707        //   |foo_   |
708        //   |bar.   |
709        //   |abc    |
710        let chunks = ["foo ".as_bytes(), "bar".as_bytes(), ".\n".as_bytes(), "abc".as_bytes()];
711        let doc = ChunkedDoc(&chunks);
712        let mut cfg = MeasurementConfig::new(&doc).with_word_wrap_column(7);
713        let max = CoordType::MAX;
714
715        let end0 = cfg.goto_visual(Point { x: 7, y: 0 });
716        assert_eq!(
717            end0,
718            Cursor {
719                offset: 4,
720                logical_pos: Point { x: 4, y: 0 },
721                visual_pos: Point { x: 4, y: 0 },
722                column: 4,
723                wrap_opp: true,
724            }
725        );
726
727        let beg1 = cfg.goto_visual(Point { x: 0, y: 1 });
728        assert_eq!(
729            beg1,
730            Cursor {
731                offset: 4,
732                logical_pos: Point { x: 4, y: 0 },
733                visual_pos: Point { x: 0, y: 1 },
734                column: 4,
735                wrap_opp: false,
736            }
737        );
738
739        let end1 = cfg.goto_visual(Point { x: max, y: 1 });
740        assert_eq!(
741            end1,
742            Cursor {
743                offset: 8,
744                logical_pos: Point { x: 8, y: 0 },
745                visual_pos: Point { x: 4, y: 1 },
746                column: 8,
747                wrap_opp: false,
748            }
749        );
750
751        let beg2 = cfg.goto_visual(Point { x: 0, y: 2 });
752        assert_eq!(
753            beg2,
754            Cursor {
755                offset: 9,
756                logical_pos: Point { x: 0, y: 1 },
757                visual_pos: Point { x: 0, y: 2 },
758                column: 0,
759                wrap_opp: false,
760            }
761        );
762
763        let end2 = cfg.goto_visual(Point { x: max, y: 2 });
764        assert_eq!(
765            end2,
766            Cursor {
767                offset: 12,
768                logical_pos: Point { x: 3, y: 1 },
769                visual_pos: Point { x: 3, y: 2 },
770                column: 3,
771                wrap_opp: false,
772            }
773        );
774    }
775
776    #[test]
777    fn test_force_wrap() {
778        // |//_     |
779        // |aaaaaaaa|
780        // |aaaa    |
781        let bytes = "// aaaaaaaaaaaa".as_bytes();
782        let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(8);
783        let max = CoordType::MAX;
784
785        // At the end of "// " there should be a wrap.
786        let end0 = cfg.goto_visual(Point { x: max, y: 0 });
787        assert_eq!(
788            end0,
789            Cursor {
790                offset: 3,
791                logical_pos: Point { x: 3, y: 0 },
792                visual_pos: Point { x: 3, y: 0 },
793                column: 3,
794                wrap_opp: true,
795            }
796        );
797
798        // Test if the ambiguous visual position at the wrap location doesn't change the offset.
799        let beg0 = cfg.goto_visual(Point { x: 0, y: 1 });
800        assert_eq!(
801            beg0,
802            Cursor {
803                offset: 3,
804                logical_pos: Point { x: 3, y: 0 },
805                visual_pos: Point { x: 0, y: 1 },
806                column: 3,
807                wrap_opp: false,
808            }
809        );
810
811        // Test if navigating inside the wrapped line doesn't cause further wrapping.
812        //
813        // This step of the test is important, as it ensures that the following force-wrap works,
814        // even if 1 of the 8 "a"s was already processed.
815        let beg0_off1 = cfg.goto_logical(Point { x: 4, y: 0 });
816        assert_eq!(
817            beg0_off1,
818            Cursor {
819                offset: 4,
820                logical_pos: Point { x: 4, y: 0 },
821                visual_pos: Point { x: 1, y: 1 },
822                column: 4,
823                wrap_opp: false,
824            }
825        );
826
827        // Test if the force-wrap applies at the end of the first 8 "a"s.
828        let end1 = cfg.goto_visual(Point { x: max, y: 1 });
829        assert_eq!(
830            end1,
831            Cursor {
832                offset: 11,
833                logical_pos: Point { x: 11, y: 0 },
834                visual_pos: Point { x: 8, y: 1 },
835                column: 11,
836                wrap_opp: true,
837            }
838        );
839
840        // Test if the remaining 4 "a"s are properly laid-out.
841        let end2 = cfg.goto_visual(Point { x: max, y: 2 });
842        assert_eq!(
843            end2,
844            Cursor {
845                offset: 15,
846                logical_pos: Point { x: 15, y: 0 },
847                visual_pos: Point { x: 4, y: 2 },
848                column: 15,
849                wrap_opp: false,
850            }
851        );
852    }
853
854    #[test]
855    fn test_force_wrap_wide() {
856        // These Yijing Hexagram Symbols form no word wrap opportunities.
857        let text = "䷀䷁䷂䷃䷄䷅䷆䷇䷈䷉";
858        let expected = ["䷀䷁", "䷂䷃", "䷄䷅", "䷆䷇", "䷈䷉"];
859        let bytes = text.as_bytes();
860        let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(5);
861
862        for (y, &expected) in expected.iter().enumerate() {
863            let y = y as CoordType;
864            // In order for `goto_visual()` to hit column 0 after a word wrap,
865            // it MUST be able to go back by 1 grapheme, which is what this tests.
866            let beg = cfg.goto_visual(Point { x: 0, y });
867            let end = cfg.goto_visual(Point { x: 5, y });
868            let actual = &text[beg.offset..end.offset];
869            assert_eq!(actual, expected);
870        }
871    }
872
873    // Similar to the `test_force_wrap` test, but here we vertically descend
874    // down the document without ever touching the first or last column.
875    // I found that this finds curious bugs at times.
876    #[test]
877    fn test_force_wrap_column() {
878        // |//_     |
879        // |aaaaaaaa|
880        // |aaaa    |
881        let bytes = "// aaaaaaaaaaaa".as_bytes();
882        let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(8);
883
884        // At the end of "// " there should be a wrap.
885        let end0 = cfg.goto_visual(Point { x: CoordType::MAX, y: 0 });
886        assert_eq!(
887            end0,
888            Cursor {
889                offset: 3,
890                logical_pos: Point { x: 3, y: 0 },
891                visual_pos: Point { x: 3, y: 0 },
892                column: 3,
893                wrap_opp: true,
894            }
895        );
896
897        let mid1 = cfg.goto_visual(Point { x: end0.visual_pos.x, y: 1 });
898        assert_eq!(
899            mid1,
900            Cursor {
901                offset: 6,
902                logical_pos: Point { x: 6, y: 0 },
903                visual_pos: Point { x: 3, y: 1 },
904                column: 6,
905                wrap_opp: false,
906            }
907        );
908
909        let mid2 = cfg.goto_visual(Point { x: end0.visual_pos.x, y: 2 });
910        assert_eq!(
911            mid2,
912            Cursor {
913                offset: 14,
914                logical_pos: Point { x: 14, y: 0 },
915                visual_pos: Point { x: 3, y: 2 },
916                column: 14,
917                wrap_opp: false,
918            }
919        );
920    }
921
922    #[test]
923    fn test_any_wrap() {
924        // |//_-----|
925        // |------- |
926        let bytes = "// ------------".as_bytes();
927        let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(8);
928        let max = CoordType::MAX;
929
930        let end0 = cfg.goto_visual(Point { x: max, y: 0 });
931        assert_eq!(
932            end0,
933            Cursor {
934                offset: 8,
935                logical_pos: Point { x: 8, y: 0 },
936                visual_pos: Point { x: 8, y: 0 },
937                column: 8,
938                wrap_opp: true,
939            }
940        );
941
942        let end1 = cfg.goto_visual(Point { x: max, y: 1 });
943        assert_eq!(
944            end1,
945            Cursor {
946                offset: 15,
947                logical_pos: Point { x: 15, y: 0 },
948                visual_pos: Point { x: 7, y: 1 },
949                column: 15,
950                wrap_opp: true,
951            }
952        );
953    }
954
955    #[test]
956    fn test_any_wrap_wide() {
957        // These Japanese characters form word wrap opportunity between each character.
958        let text = "零一二三四五六七八九";
959        let expected = ["零一", "二三", "四五", "六七", "八九"];
960        let bytes = text.as_bytes();
961        let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(5);
962
963        for (y, &expected) in expected.iter().enumerate() {
964            let y = y as CoordType;
965            // In order for `goto_visual()` to hit column 0 after a word wrap,
966            // it MUST be able to go back by 1 grapheme, which is what this tests.
967            let beg = cfg.goto_visual(Point { x: 0, y });
968            let end = cfg.goto_visual(Point { x: 5, y });
969            let actual = &text[beg.offset..end.offset];
970            assert_eq!(actual, expected);
971        }
972    }
973
974    #[test]
975    fn test_wrap_tab() {
976        // |foo_    | <- 1 space
977        // |____b   | <- 1 tab, 1 space
978        let text = "foo \t b";
979        let bytes = text.as_bytes();
980        let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(8).with_tab_size(4);
981        let max = CoordType::MAX;
982
983        let end0 = cfg.goto_visual(Point { x: max, y: 0 });
984        assert_eq!(
985            end0,
986            Cursor {
987                offset: 4,
988                logical_pos: Point { x: 4, y: 0 },
989                visual_pos: Point { x: 4, y: 0 },
990                column: 4,
991                wrap_opp: true,
992            },
993        );
994
995        let beg1 = cfg.goto_visual(Point { x: 0, y: 1 });
996        assert_eq!(
997            beg1,
998            Cursor {
999                offset: 4,
1000                logical_pos: Point { x: 4, y: 0 },
1001                visual_pos: Point { x: 0, y: 1 },
1002                column: 4,
1003                wrap_opp: false,
1004            },
1005        );
1006
1007        let end1 = cfg.goto_visual(Point { x: max, y: 1 });
1008        assert_eq!(
1009            end1,
1010            Cursor {
1011                offset: 7,
1012                logical_pos: Point { x: 7, y: 0 },
1013                visual_pos: Point { x: 6, y: 1 },
1014                column: 10,
1015                wrap_opp: true,
1016            },
1017        );
1018    }
1019
1020    #[test]
1021    fn test_crlf() {
1022        let text = "a\r\nbcd\r\ne".as_bytes();
1023        let cursor = MeasurementConfig::new(&text).goto_visual(Point { x: CoordType::MAX, y: 1 });
1024        assert_eq!(
1025            cursor,
1026            Cursor {
1027                offset: 6,
1028                logical_pos: Point { x: 3, y: 1 },
1029                visual_pos: Point { x: 3, y: 1 },
1030                column: 3,
1031                wrap_opp: false,
1032            }
1033        );
1034    }
1035
1036    #[test]
1037    fn test_wrapped_cursor_can_seek_backward() {
1038        let bytes = "hello world".as_bytes();
1039        let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(10);
1040
1041        // When the word wrap at column 10 hits, the cursor will be at the end of the word "world" (between l and d).
1042        // This tests if the algorithm is capable of going back to the start of the word and find the actual target.
1043        let cursor = cfg.goto_visual(Point { x: 2, y: 1 });
1044        assert_eq!(
1045            cursor,
1046            Cursor {
1047                offset: 8,
1048                logical_pos: Point { x: 8, y: 0 },
1049                visual_pos: Point { x: 2, y: 1 },
1050                column: 8,
1051                wrap_opp: false,
1052            }
1053        );
1054    }
1055
1056    #[test]
1057    fn test_strip_newline() {
1058        assert_eq!(strip_newline(b"hello\n"), b"hello");
1059        assert_eq!(strip_newline(b"hello\r\n"), b"hello");
1060        assert_eq!(strip_newline(b"hello"), b"hello");
1061    }
1062}