edit/unicode/measurement.rs
1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4use std::hint::cold_path;
5
6use super::Utf8Chars;
7use super::tables::*;
8use crate::document::ReadableDocument;
9use crate::helpers::{CoordType, Point};
10
11// On one hand it's disgusting that I wrote this as a global variable, but on the
12// other hand, this isn't a public library API, and it makes the code a lot cleaner,
13// because we don't need to inject this once-per-process value everywhere.
14static mut AMBIGUOUS_WIDTH: usize = 1;
15
16/// Sets the width of "ambiguous" width characters as per "UAX #11: East Asian Width".
17///
18/// Defaults to 1.
19pub fn setup_ambiguous_width(ambiguous_width: CoordType) {
20 unsafe { AMBIGUOUS_WIDTH = ambiguous_width as usize };
21}
22
23#[inline]
24fn ambiguous_width() -> usize {
25 // SAFETY: This is a global variable that is set once per process.
26 // It is never changed after that, so this is safe to call.
27 unsafe { AMBIGUOUS_WIDTH }
28}
29
30/// Stores a position inside a [`ReadableDocument`].
31///
32/// The cursor tracks both the absolute byte-offset,
33/// as well as the position in terminal-related coordinates.
34#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
35pub struct Cursor {
36 /// Offset in bytes within the buffer.
37 pub offset: usize,
38 /// Position in the buffer in lines (.y) and grapheme clusters (.x).
39 ///
40 /// Line wrapping has NO influence on this.
41 pub logical_pos: Point,
42 /// Position in the buffer in laid out rows (.y) and columns (.x).
43 ///
44 /// Line wrapping has an influence on this.
45 pub visual_pos: Point,
46 /// Horizontal position in visual columns.
47 ///
48 /// Line wrapping has NO influence on this and if word wrap is disabled,
49 /// it's identical to `visual_pos.x`. This is useful for calculating tab widths.
50 pub column: CoordType,
51 /// When `measure_forward` hits the `word_wrap_column`, the question is:
52 /// Was there a wrap opportunity on this line? Because if there wasn't,
53 /// a hard-wrap is required; otherwise, the word that is being laid-out is
54 /// moved to the next line. This boolean carries this state between calls.
55 pub wrap_opp: bool,
56}
57
58/// Your entrypoint to navigating inside a [`ReadableDocument`].
59#[derive(Clone)]
60pub struct MeasurementConfig<'doc> {
61 cursor: Cursor,
62 tab_size: CoordType,
63 word_wrap_column: CoordType,
64 buffer: &'doc dyn ReadableDocument,
65}
66
67impl<'doc> MeasurementConfig<'doc> {
68 /// Creates a new [`MeasurementConfig`] for the given document.
69 pub fn new(buffer: &'doc dyn ReadableDocument) -> Self {
70 Self { cursor: Default::default(), tab_size: 8, word_wrap_column: 0, buffer }
71 }
72
73 /// Sets the initial cursor to the given position.
74 ///
75 /// WARNING: While the code doesn't panic if the cursor is invalid,
76 /// the results will obviously be complete garbage.
77 pub fn with_cursor(mut self, cursor: Cursor) -> Self {
78 self.cursor = cursor;
79 self
80 }
81
82 /// Sets the tab size.
83 ///
84 /// Defaults to 8, because that's what a tab in terminals evaluates to.
85 pub fn with_tab_size(mut self, tab_size: CoordType) -> Self {
86 self.tab_size = tab_size.max(1);
87 self
88 }
89
90 /// You want word wrap? Set it here!
91 ///
92 /// Defaults to 0, which means no word wrap.
93 pub fn with_word_wrap_column(mut self, word_wrap_column: CoordType) -> Self {
94 self.word_wrap_column = word_wrap_column;
95 self
96 }
97
98 /// Navigates **forward** to the given absolute offset.
99 ///
100 /// # Returns
101 ///
102 /// The cursor position after the navigation.
103 pub fn goto_offset(&mut self, offset: usize) -> Cursor {
104 self.measure_forward(offset, Point::MAX, Point::MAX)
105 }
106
107 /// Navigates **forward** to the given logical position.
108 ///
109 /// Logical positions are in lines and grapheme clusters.
110 ///
111 /// # Returns
112 ///
113 /// The cursor position after the navigation.
114 pub fn goto_logical(&mut self, logical_target: Point) -> Cursor {
115 self.measure_forward(usize::MAX, logical_target, Point::MAX)
116 }
117
118 /// Navigates **forward** to the given visual position.
119 ///
120 /// Visual positions are in laid out rows and columns.
121 ///
122 /// # Returns
123 ///
124 /// The cursor position after the navigation.
125 pub fn goto_visual(&mut self, visual_target: Point) -> Cursor {
126 self.measure_forward(usize::MAX, Point::MAX, visual_target)
127 }
128
129 /// Returns the current cursor position.
130 pub fn cursor(&self) -> Cursor {
131 self.cursor
132 }
133
134 // NOTE that going to a visual target can result in ambiguous results,
135 // where going to an identical logical target will yield a different result.
136 //
137 // Imagine if you have a `word_wrap_column` of 6 and there's "Hello World" on the line:
138 // `goto_logical` will return a `visual_pos` of {0,1}, while `goto_visual` returns {6,0}.
139 // This is because from a logical POV, if the wrap location equals the wrap column,
140 // the wrap exists on both lines and it'll default to wrapping. `goto_visual` however will always
141 // try to return a Y position that matches the requested position, so that Home/End works properly.
142 fn measure_forward(
143 &mut self,
144 offset_target: usize,
145 logical_target: Point,
146 visual_target: Point,
147 ) -> Cursor {
148 if self.cursor.offset >= offset_target
149 || self.cursor.logical_pos >= logical_target
150 || self.cursor.visual_pos >= visual_target
151 {
152 return self.cursor;
153 }
154
155 let mut offset = self.cursor.offset;
156 let mut logical_pos_x = self.cursor.logical_pos.x;
157 let mut logical_pos_y = self.cursor.logical_pos.y;
158 let mut visual_pos_x = self.cursor.visual_pos.x;
159 let mut visual_pos_y = self.cursor.visual_pos.y;
160 let mut column = self.cursor.column;
161
162 let mut logical_target_x = Self::calc_target_x(logical_target, logical_pos_y);
163 let mut visual_target_x = Self::calc_target_x(visual_target, visual_pos_y);
164
165 // wrap_opp = Wrap Opportunity
166 // These store the position and column of the last wrap opportunity. If `word_wrap_column` is
167 // zero (word wrap disabled), all grapheme clusters are a wrap opportunity, because none are.
168 let mut wrap_opp = self.cursor.wrap_opp;
169 let mut wrap_opp_offset = offset;
170 let mut wrap_opp_logical_pos_x = logical_pos_x;
171 let mut wrap_opp_visual_pos_x = visual_pos_x;
172 let mut wrap_opp_column = column;
173
174 let mut chunk_iter = Utf8Chars::new(b"", 0);
175 let mut chunk_range = offset..offset;
176 let mut props_next_cluster = ucd_start_of_text_properties();
177
178 loop {
179 // Have we reached the target already? Stop.
180 if offset >= offset_target
181 || logical_pos_x >= logical_target_x
182 || visual_pos_x >= visual_target_x
183 {
184 break;
185 }
186
187 let props_current_cluster = props_next_cluster;
188 let mut props_last_char;
189 let mut offset_next_cluster;
190 let mut state = 0;
191 let mut width = 0;
192
193 // Since we want to measure the width of the current cluster,
194 // by necessity we need to seek to the next cluster.
195 // We'll then reuse the offset and properties of the next cluster in
196 // the next iteration of the this (outer) loop (`props_next_cluster`).
197 loop {
198 if !chunk_iter.has_next() {
199 cold_path();
200 chunk_iter = Utf8Chars::new(self.buffer.read_forward(chunk_range.end), 0);
201 chunk_range = chunk_range.end..chunk_range.end + chunk_iter.len();
202 }
203
204 // Since this loop seeks ahead to the next cluster, and since `chunk_iter`
205 // records the offset of the next character after the returned one, we need
206 // to save the offset of the previous `chunk_iter` before calling `next()`.
207 // Similar applies to the width.
208 props_last_char = props_next_cluster;
209 offset_next_cluster = chunk_range.start + chunk_iter.offset();
210 width += ucd_grapheme_cluster_character_width(props_next_cluster, ambiguous_width())
211 as CoordType;
212
213 // The `Document::read_forward` interface promises us that it will not split
214 // grapheme clusters across chunks. Therefore, we can safely break here.
215 let ch = match chunk_iter.next() {
216 Some(ch) => ch,
217 None => break,
218 };
219
220 // Get the properties of the next cluster.
221 props_next_cluster = ucd_grapheme_cluster_lookup(ch);
222 state = ucd_grapheme_cluster_joins(state, props_last_char, props_next_cluster);
223
224 // Stop if the next character does not join.
225 if ucd_grapheme_cluster_joins_done(state) {
226 break;
227 }
228 }
229
230 if offset_next_cluster == offset {
231 // No advance and the iterator is empty? End of text reached.
232 if chunk_iter.is_empty() {
233 break;
234 }
235 // Ignore the first iteration when processing the start-of-text.
236 continue;
237 }
238
239 // The max. width of a terminal cell is 2.
240 width = width.min(2);
241
242 // Tabs require special handling because they can have a variable width.
243 if props_last_char == ucd_tab_properties() {
244 // SAFETY: `self.tab_size` is clamped to >= 1 in `with_tab_size`.
245 // This assert ensures that Rust doesn't insert panicking null checks.
246 unsafe { std::hint::assert_unchecked(self.tab_size >= 1) };
247 width = self.tab_size - (column % self.tab_size);
248 }
249
250 // Hard wrap: Both the logical and visual position advance by one line.
251 if props_last_char == ucd_linefeed_properties() {
252 cold_path();
253
254 wrap_opp = false;
255
256 // Don't cross the newline if the target is on this line but we haven't reached it.
257 // E.g. if the callers asks for column 100 on a 10 column line,
258 // we'll return with the cursor set to column 10.
259 if logical_pos_y >= logical_target.y || visual_pos_y >= visual_target.y {
260 break;
261 }
262
263 offset = offset_next_cluster;
264 logical_pos_x = 0;
265 logical_pos_y += 1;
266 visual_pos_x = 0;
267 visual_pos_y += 1;
268 column = 0;
269
270 logical_target_x = Self::calc_target_x(logical_target, logical_pos_y);
271 visual_target_x = Self::calc_target_x(visual_target, visual_pos_y);
272 continue;
273 }
274
275 // Avoid advancing past the visual target, because `width` can be greater than 1.
276 if visual_pos_x + width > visual_target_x {
277 break;
278 }
279
280 // Since this code above may need to revert to a previous `wrap_opp_*`,
281 // it must be done before advancing / checking for `ucd_line_break_joins`.
282 if self.word_wrap_column > 0 && visual_pos_x + width > self.word_wrap_column {
283 if !wrap_opp {
284 // Otherwise, the lack of a wrap opportunity means that a single word
285 // is wider than the word wrap column. We need to force-break the word.
286 // This is similar to the above, but "bar" gets written at column 0.
287 wrap_opp_offset = offset;
288 wrap_opp_logical_pos_x = logical_pos_x;
289 wrap_opp_visual_pos_x = visual_pos_x;
290 wrap_opp_column = column;
291 visual_pos_x = 0;
292 } else {
293 // If we had a wrap opportunity on this line, we can move all
294 // characters since then to the next line without stopping this loop:
295 // +---------+ +---------+ +---------+
296 // | foo| -> | | -> | |
297 // | | |foo | |foobar |
298 // +---------+ +---------+ +---------+
299 // We don't actually move "foo", but rather just change where "bar" goes.
300 // Since this function doesn't copy text, the end result is the same.
301 visual_pos_x -= wrap_opp_visual_pos_x;
302 }
303
304 wrap_opp = false;
305 visual_pos_y += 1;
306 visual_target_x = Self::calc_target_x(visual_target, visual_pos_y);
307
308 if visual_pos_x == visual_target_x {
309 break;
310 }
311
312 // Imagine the word is "hello" and on the "o" we notice it wraps.
313 // If the target however was the "e", then we must revert back to "h" and search for it.
314 if visual_pos_x > visual_target_x {
315 cold_path();
316
317 offset = wrap_opp_offset;
318 logical_pos_x = wrap_opp_logical_pos_x;
319 visual_pos_x = 0;
320 column = wrap_opp_column;
321
322 chunk_iter.seek(chunk_iter.len());
323 chunk_range = offset..offset;
324 props_next_cluster = ucd_start_of_text_properties();
325 continue;
326 }
327 }
328
329 offset = offset_next_cluster;
330 logical_pos_x += 1;
331 visual_pos_x += width;
332 column += width;
333
334 if self.word_wrap_column > 0
335 && !ucd_line_break_joins(props_current_cluster, props_next_cluster)
336 {
337 wrap_opp = true;
338 wrap_opp_offset = offset;
339 wrap_opp_logical_pos_x = logical_pos_x;
340 wrap_opp_visual_pos_x = visual_pos_x;
341 wrap_opp_column = column;
342 }
343 }
344
345 // If we're here, we hit our target. Now the only question is:
346 // Is the word we're currently on so wide that it will be wrapped further down the document?
347 if self.word_wrap_column > 0 {
348 if !wrap_opp {
349 // If the current laid-out line had no wrap opportunities, it means we had an input
350 // such as "fooooooooooooooooooooo" at a `word_wrap_column` of e.g. 10. The word
351 // didn't fit and the lack of a `wrap_opp` indicates we must force a hard wrap.
352 // Thankfully, if we reach this point, that was already done by the code above.
353 } else if wrap_opp_logical_pos_x != logical_pos_x && visual_pos_y <= visual_target.y {
354 // Imagine the string "foo bar" with a word wrap column of 6. If I ask for the cursor at
355 // `logical_pos={5,0}`, then the code above exited while reaching the target.
356 // At this point, this function doesn't know yet that after the "b" there's "ar"
357 // which causes a word wrap, and causes the final visual position to be {1,1}.
358 // This code thus seeks ahead and checks if the current word will wrap or not.
359 // Of course we only need to do this if the cursor isn't on a wrap opportunity already.
360
361 // The loop below should not modify the target we already found.
362 let mut visual_pos_x_lookahead = visual_pos_x;
363
364 loop {
365 let props_current_cluster = props_next_cluster;
366 let mut props_last_char;
367 let mut offset_next_cluster;
368 let mut state = 0;
369 let mut width = 0;
370
371 // Since we want to measure the width of the current cluster,
372 // by necessity we need to seek to the next cluster.
373 // We'll then reuse the offset and properties of the next cluster in
374 // the next iteration of the this (outer) loop (`props_next_cluster`).
375 loop {
376 if !chunk_iter.has_next() {
377 cold_path();
378 chunk_iter =
379 Utf8Chars::new(self.buffer.read_forward(chunk_range.end), 0);
380 chunk_range = chunk_range.end..chunk_range.end + chunk_iter.len();
381 }
382
383 // Since this loop seeks ahead to the next cluster, and since `chunk_iter`
384 // records the offset of the next character after the returned one, we need
385 // to save the offset of the previous `chunk_iter` before calling `next()`.
386 // Similar applies to the width.
387 props_last_char = props_next_cluster;
388 offset_next_cluster = chunk_range.start + chunk_iter.offset();
389 width += ucd_grapheme_cluster_character_width(
390 props_next_cluster,
391 ambiguous_width(),
392 ) as CoordType;
393
394 // The `Document::read_forward` interface promises us that it will not split
395 // grapheme clusters across chunks. Therefore, we can safely break here.
396 let ch = match chunk_iter.next() {
397 Some(ch) => ch,
398 None => break,
399 };
400
401 // Get the properties of the next cluster.
402 props_next_cluster = ucd_grapheme_cluster_lookup(ch);
403 state =
404 ucd_grapheme_cluster_joins(state, props_last_char, props_next_cluster);
405
406 // Stop if the next character does not join.
407 if ucd_grapheme_cluster_joins_done(state) {
408 break;
409 }
410 }
411
412 if offset_next_cluster == offset {
413 // No advance and the iterator is empty? End of text reached.
414 if chunk_iter.is_empty() {
415 break;
416 }
417 // Ignore the first iteration when processing the start-of-text.
418 continue;
419 }
420
421 // The max. width of a terminal cell is 2.
422 width = width.min(2);
423
424 // Tabs require special handling because they can have a variable width.
425 if props_last_char == ucd_tab_properties() {
426 // SAFETY: `self.tab_size` is clamped to >= 1 in `with_tab_size`.
427 // This assert ensures that Rust doesn't insert panicking null checks.
428 unsafe { std::hint::assert_unchecked(self.tab_size >= 1) };
429 width = self.tab_size - (column % self.tab_size);
430 }
431
432 // Hard wrap: Both the logical and visual position advance by one line.
433 if props_last_char == ucd_linefeed_properties() {
434 break;
435 }
436
437 visual_pos_x_lookahead += width;
438
439 if visual_pos_x_lookahead > self.word_wrap_column {
440 visual_pos_x -= wrap_opp_visual_pos_x;
441 visual_pos_y += 1;
442 break;
443 } else if !ucd_line_break_joins(props_current_cluster, props_next_cluster) {
444 break;
445 }
446 }
447 }
448
449 if visual_pos_y > visual_target.y {
450 // Imagine the string "foo bar" with a word wrap column of 6. If I ask for the cursor at
451 // `visual_pos={100,0}`, the code above exited early after wrapping without reaching the target.
452 // Since I asked for the last character on the first line, we must wrap back up the last wrap
453 offset = wrap_opp_offset;
454 logical_pos_x = wrap_opp_logical_pos_x;
455 visual_pos_x = wrap_opp_visual_pos_x;
456 visual_pos_y = visual_target.y;
457 column = wrap_opp_column;
458 wrap_opp = true;
459 }
460 }
461
462 self.cursor.offset = offset;
463 self.cursor.logical_pos = Point { x: logical_pos_x, y: logical_pos_y };
464 self.cursor.visual_pos = Point { x: visual_pos_x, y: visual_pos_y };
465 self.cursor.column = column;
466 self.cursor.wrap_opp = wrap_opp;
467 self.cursor
468 }
469
470 #[inline]
471 fn calc_target_x(target: Point, pos_y: CoordType) -> CoordType {
472 match pos_y.cmp(&target.y) {
473 std::cmp::Ordering::Less => CoordType::MAX,
474 std::cmp::Ordering::Equal => target.x,
475 std::cmp::Ordering::Greater => 0,
476 }
477 }
478}
479
480/// Returns an offset past a newline.
481///
482/// If `offset` is right in front of a newline,
483/// this will return the offset past said newline.
484pub fn skip_newline(text: &[u8], mut offset: usize) -> usize {
485 if offset >= text.len() {
486 return offset;
487 }
488 if text[offset] == b'\r' {
489 offset += 1;
490 }
491 if offset >= text.len() {
492 return offset;
493 }
494 if text[offset] == b'\n' {
495 offset += 1;
496 }
497 offset
498}
499
500/// Strips a trailing newline from the given text.
501pub fn strip_newline(mut text: &[u8]) -> &[u8] {
502 // Rust generates surprisingly tight assembly for this.
503 if text.last() == Some(&b'\n') {
504 text = &text[..text.len() - 1];
505 }
506 if text.last() == Some(&b'\r') {
507 text = &text[..text.len() - 1];
508 }
509 text
510}
511
512#[cfg(test)]
513mod test {
514 use super::*;
515
516 struct ChunkedDoc<'a>(&'a [&'a [u8]]);
517
518 impl ReadableDocument for ChunkedDoc<'_> {
519 fn read_forward(&self, mut off: usize) -> &[u8] {
520 for chunk in self.0 {
521 if off < chunk.len() {
522 return &chunk[off..];
523 }
524 off -= chunk.len();
525 }
526 &[]
527 }
528
529 fn read_backward(&self, mut off: usize) -> &[u8] {
530 for chunk in self.0.iter().rev() {
531 if off < chunk.len() {
532 return &chunk[..chunk.len() - off];
533 }
534 off -= chunk.len();
535 }
536 &[]
537 }
538 }
539
540 #[test]
541 fn test_measure_forward_newline_start() {
542 let cursor =
543 MeasurementConfig::new(&"foo\nbar".as_bytes()).goto_visual(Point { x: 0, y: 1 });
544 assert_eq!(
545 cursor,
546 Cursor {
547 offset: 4,
548 logical_pos: Point { x: 0, y: 1 },
549 visual_pos: Point { x: 0, y: 1 },
550 column: 0,
551 wrap_opp: false,
552 }
553 );
554 }
555
556 #[test]
557 fn test_measure_forward_clipped_wide_char() {
558 let cursor = MeasurementConfig::new(&"a😶🌫️b".as_bytes()).goto_visual(Point { x: 2, y: 0 });
559 assert_eq!(
560 cursor,
561 Cursor {
562 offset: 1,
563 logical_pos: Point { x: 1, y: 0 },
564 visual_pos: Point { x: 1, y: 0 },
565 column: 1,
566 wrap_opp: false,
567 }
568 );
569 }
570
571 #[test]
572 fn test_measure_forward_word_wrap() {
573 // |foo␣ |
574 // |bar␣ |
575 // |baz |
576 let text = "foo bar \nbaz".as_bytes();
577
578 // Does hitting a logical target wrap the visual position along with the word?
579 let mut cfg = MeasurementConfig::new(&text).with_word_wrap_column(6);
580 let cursor = cfg.goto_logical(Point { x: 5, y: 0 });
581 assert_eq!(
582 cursor,
583 Cursor {
584 offset: 5,
585 logical_pos: Point { x: 5, y: 0 },
586 visual_pos: Point { x: 1, y: 1 },
587 column: 5,
588 wrap_opp: true,
589 }
590 );
591
592 // Does hitting the visual target within a word reset the hit back to the end of the visual line?
593 let mut cfg = MeasurementConfig::new(&text).with_word_wrap_column(6);
594 let cursor = cfg.goto_visual(Point { x: CoordType::MAX, y: 0 });
595 assert_eq!(
596 cursor,
597 Cursor {
598 offset: 4,
599 logical_pos: Point { x: 4, y: 0 },
600 visual_pos: Point { x: 4, y: 0 },
601 column: 4,
602 wrap_opp: true,
603 }
604 );
605
606 // Does hitting the same target but with a non-zero starting position result in the same outcome?
607 let mut cfg = MeasurementConfig::new(&text).with_word_wrap_column(6).with_cursor(Cursor {
608 offset: 1,
609 logical_pos: Point { x: 1, y: 0 },
610 visual_pos: Point { x: 1, y: 0 },
611 column: 1,
612 wrap_opp: false,
613 });
614 let cursor = cfg.goto_visual(Point { x: 5, y: 0 });
615 assert_eq!(
616 cursor,
617 Cursor {
618 offset: 4,
619 logical_pos: Point { x: 4, y: 0 },
620 visual_pos: Point { x: 4, y: 0 },
621 column: 4,
622 wrap_opp: true,
623 }
624 );
625
626 let cursor = cfg.goto_visual(Point { x: 0, y: 1 });
627 assert_eq!(
628 cursor,
629 Cursor {
630 offset: 4,
631 logical_pos: Point { x: 4, y: 0 },
632 visual_pos: Point { x: 0, y: 1 },
633 column: 4,
634 wrap_opp: false,
635 }
636 );
637
638 let cursor = cfg.goto_visual(Point { x: 5, y: 1 });
639 assert_eq!(
640 cursor,
641 Cursor {
642 offset: 8,
643 logical_pos: Point { x: 8, y: 0 },
644 visual_pos: Point { x: 4, y: 1 },
645 column: 8,
646 wrap_opp: false,
647 }
648 );
649
650 let cursor = cfg.goto_visual(Point { x: 0, y: 2 });
651 assert_eq!(
652 cursor,
653 Cursor {
654 offset: 9,
655 logical_pos: Point { x: 0, y: 1 },
656 visual_pos: Point { x: 0, y: 2 },
657 column: 0,
658 wrap_opp: false,
659 }
660 );
661
662 let cursor = cfg.goto_visual(Point { x: 5, y: 2 });
663 assert_eq!(
664 cursor,
665 Cursor {
666 offset: 12,
667 logical_pos: Point { x: 3, y: 1 },
668 visual_pos: Point { x: 3, y: 2 },
669 column: 3,
670 wrap_opp: false,
671 }
672 );
673 }
674
675 #[test]
676 fn test_measure_forward_tabs() {
677 let text = "a\tb\tc".as_bytes();
678 let cursor =
679 MeasurementConfig::new(&text).with_tab_size(4).goto_visual(Point { x: 4, y: 0 });
680 assert_eq!(
681 cursor,
682 Cursor {
683 offset: 2,
684 logical_pos: Point { x: 2, y: 0 },
685 visual_pos: Point { x: 4, y: 0 },
686 column: 4,
687 wrap_opp: false,
688 }
689 );
690 }
691
692 #[test]
693 fn test_measure_forward_chunk_boundaries() {
694 let chunks = [
695 "Hello".as_bytes(),
696 "\u{1F469}\u{1F3FB}".as_bytes(), // 8 bytes, 2 columns
697 "World".as_bytes(),
698 ];
699 let doc = ChunkedDoc(&chunks);
700 let cursor = MeasurementConfig::new(&doc).goto_visual(Point { x: 5 + 2 + 3, y: 0 });
701 assert_eq!(cursor.offset, 5 + 8 + 3);
702 assert_eq!(cursor.logical_pos, Point { x: 5 + 1 + 3, y: 0 });
703 }
704
705 #[test]
706 fn test_exact_wrap() {
707 // |foo_ |
708 // |bar. |
709 // |abc |
710 let chunks = ["foo ".as_bytes(), "bar".as_bytes(), ".\n".as_bytes(), "abc".as_bytes()];
711 let doc = ChunkedDoc(&chunks);
712 let mut cfg = MeasurementConfig::new(&doc).with_word_wrap_column(7);
713 let max = CoordType::MAX;
714
715 let end0 = cfg.goto_visual(Point { x: 7, y: 0 });
716 assert_eq!(
717 end0,
718 Cursor {
719 offset: 4,
720 logical_pos: Point { x: 4, y: 0 },
721 visual_pos: Point { x: 4, y: 0 },
722 column: 4,
723 wrap_opp: true,
724 }
725 );
726
727 let beg1 = cfg.goto_visual(Point { x: 0, y: 1 });
728 assert_eq!(
729 beg1,
730 Cursor {
731 offset: 4,
732 logical_pos: Point { x: 4, y: 0 },
733 visual_pos: Point { x: 0, y: 1 },
734 column: 4,
735 wrap_opp: false,
736 }
737 );
738
739 let end1 = cfg.goto_visual(Point { x: max, y: 1 });
740 assert_eq!(
741 end1,
742 Cursor {
743 offset: 8,
744 logical_pos: Point { x: 8, y: 0 },
745 visual_pos: Point { x: 4, y: 1 },
746 column: 8,
747 wrap_opp: false,
748 }
749 );
750
751 let beg2 = cfg.goto_visual(Point { x: 0, y: 2 });
752 assert_eq!(
753 beg2,
754 Cursor {
755 offset: 9,
756 logical_pos: Point { x: 0, y: 1 },
757 visual_pos: Point { x: 0, y: 2 },
758 column: 0,
759 wrap_opp: false,
760 }
761 );
762
763 let end2 = cfg.goto_visual(Point { x: max, y: 2 });
764 assert_eq!(
765 end2,
766 Cursor {
767 offset: 12,
768 logical_pos: Point { x: 3, y: 1 },
769 visual_pos: Point { x: 3, y: 2 },
770 column: 3,
771 wrap_opp: false,
772 }
773 );
774 }
775
776 #[test]
777 fn test_force_wrap() {
778 // |//_ |
779 // |aaaaaaaa|
780 // |aaaa |
781 let bytes = "// aaaaaaaaaaaa".as_bytes();
782 let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(8);
783 let max = CoordType::MAX;
784
785 // At the end of "// " there should be a wrap.
786 let end0 = cfg.goto_visual(Point { x: max, y: 0 });
787 assert_eq!(
788 end0,
789 Cursor {
790 offset: 3,
791 logical_pos: Point { x: 3, y: 0 },
792 visual_pos: Point { x: 3, y: 0 },
793 column: 3,
794 wrap_opp: true,
795 }
796 );
797
798 // Test if the ambiguous visual position at the wrap location doesn't change the offset.
799 let beg0 = cfg.goto_visual(Point { x: 0, y: 1 });
800 assert_eq!(
801 beg0,
802 Cursor {
803 offset: 3,
804 logical_pos: Point { x: 3, y: 0 },
805 visual_pos: Point { x: 0, y: 1 },
806 column: 3,
807 wrap_opp: false,
808 }
809 );
810
811 // Test if navigating inside the wrapped line doesn't cause further wrapping.
812 //
813 // This step of the test is important, as it ensures that the following force-wrap works,
814 // even if 1 of the 8 "a"s was already processed.
815 let beg0_off1 = cfg.goto_logical(Point { x: 4, y: 0 });
816 assert_eq!(
817 beg0_off1,
818 Cursor {
819 offset: 4,
820 logical_pos: Point { x: 4, y: 0 },
821 visual_pos: Point { x: 1, y: 1 },
822 column: 4,
823 wrap_opp: false,
824 }
825 );
826
827 // Test if the force-wrap applies at the end of the first 8 "a"s.
828 let end1 = cfg.goto_visual(Point { x: max, y: 1 });
829 assert_eq!(
830 end1,
831 Cursor {
832 offset: 11,
833 logical_pos: Point { x: 11, y: 0 },
834 visual_pos: Point { x: 8, y: 1 },
835 column: 11,
836 wrap_opp: true,
837 }
838 );
839
840 // Test if the remaining 4 "a"s are properly laid-out.
841 let end2 = cfg.goto_visual(Point { x: max, y: 2 });
842 assert_eq!(
843 end2,
844 Cursor {
845 offset: 15,
846 logical_pos: Point { x: 15, y: 0 },
847 visual_pos: Point { x: 4, y: 2 },
848 column: 15,
849 wrap_opp: false,
850 }
851 );
852 }
853
854 #[test]
855 fn test_force_wrap_wide() {
856 // These Yijing Hexagram Symbols form no word wrap opportunities.
857 let text = "䷀䷁䷂䷃䷄䷅䷆䷇䷈䷉";
858 let expected = ["䷀䷁", "䷂䷃", "䷄䷅", "䷆䷇", "䷈䷉"];
859 let bytes = text.as_bytes();
860 let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(5);
861
862 for (y, &expected) in expected.iter().enumerate() {
863 let y = y as CoordType;
864 // In order for `goto_visual()` to hit column 0 after a word wrap,
865 // it MUST be able to go back by 1 grapheme, which is what this tests.
866 let beg = cfg.goto_visual(Point { x: 0, y });
867 let end = cfg.goto_visual(Point { x: 5, y });
868 let actual = &text[beg.offset..end.offset];
869 assert_eq!(actual, expected);
870 }
871 }
872
873 // Similar to the `test_force_wrap` test, but here we vertically descend
874 // down the document without ever touching the first or last column.
875 // I found that this finds curious bugs at times.
876 #[test]
877 fn test_force_wrap_column() {
878 // |//_ |
879 // |aaaaaaaa|
880 // |aaaa |
881 let bytes = "// aaaaaaaaaaaa".as_bytes();
882 let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(8);
883
884 // At the end of "// " there should be a wrap.
885 let end0 = cfg.goto_visual(Point { x: CoordType::MAX, y: 0 });
886 assert_eq!(
887 end0,
888 Cursor {
889 offset: 3,
890 logical_pos: Point { x: 3, y: 0 },
891 visual_pos: Point { x: 3, y: 0 },
892 column: 3,
893 wrap_opp: true,
894 }
895 );
896
897 let mid1 = cfg.goto_visual(Point { x: end0.visual_pos.x, y: 1 });
898 assert_eq!(
899 mid1,
900 Cursor {
901 offset: 6,
902 logical_pos: Point { x: 6, y: 0 },
903 visual_pos: Point { x: 3, y: 1 },
904 column: 6,
905 wrap_opp: false,
906 }
907 );
908
909 let mid2 = cfg.goto_visual(Point { x: end0.visual_pos.x, y: 2 });
910 assert_eq!(
911 mid2,
912 Cursor {
913 offset: 14,
914 logical_pos: Point { x: 14, y: 0 },
915 visual_pos: Point { x: 3, y: 2 },
916 column: 14,
917 wrap_opp: false,
918 }
919 );
920 }
921
922 #[test]
923 fn test_any_wrap() {
924 // |//_-----|
925 // |------- |
926 let bytes = "// ------------".as_bytes();
927 let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(8);
928 let max = CoordType::MAX;
929
930 let end0 = cfg.goto_visual(Point { x: max, y: 0 });
931 assert_eq!(
932 end0,
933 Cursor {
934 offset: 8,
935 logical_pos: Point { x: 8, y: 0 },
936 visual_pos: Point { x: 8, y: 0 },
937 column: 8,
938 wrap_opp: true,
939 }
940 );
941
942 let end1 = cfg.goto_visual(Point { x: max, y: 1 });
943 assert_eq!(
944 end1,
945 Cursor {
946 offset: 15,
947 logical_pos: Point { x: 15, y: 0 },
948 visual_pos: Point { x: 7, y: 1 },
949 column: 15,
950 wrap_opp: true,
951 }
952 );
953 }
954
955 #[test]
956 fn test_any_wrap_wide() {
957 // These Japanese characters form word wrap opportunity between each character.
958 let text = "零一二三四五六七八九";
959 let expected = ["零一", "二三", "四五", "六七", "八九"];
960 let bytes = text.as_bytes();
961 let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(5);
962
963 for (y, &expected) in expected.iter().enumerate() {
964 let y = y as CoordType;
965 // In order for `goto_visual()` to hit column 0 after a word wrap,
966 // it MUST be able to go back by 1 grapheme, which is what this tests.
967 let beg = cfg.goto_visual(Point { x: 0, y });
968 let end = cfg.goto_visual(Point { x: 5, y });
969 let actual = &text[beg.offset..end.offset];
970 assert_eq!(actual, expected);
971 }
972 }
973
974 #[test]
975 fn test_wrap_tab() {
976 // |foo_ | <- 1 space
977 // |____b | <- 1 tab, 1 space
978 let text = "foo \t b";
979 let bytes = text.as_bytes();
980 let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(8).with_tab_size(4);
981 let max = CoordType::MAX;
982
983 let end0 = cfg.goto_visual(Point { x: max, y: 0 });
984 assert_eq!(
985 end0,
986 Cursor {
987 offset: 4,
988 logical_pos: Point { x: 4, y: 0 },
989 visual_pos: Point { x: 4, y: 0 },
990 column: 4,
991 wrap_opp: true,
992 },
993 );
994
995 let beg1 = cfg.goto_visual(Point { x: 0, y: 1 });
996 assert_eq!(
997 beg1,
998 Cursor {
999 offset: 4,
1000 logical_pos: Point { x: 4, y: 0 },
1001 visual_pos: Point { x: 0, y: 1 },
1002 column: 4,
1003 wrap_opp: false,
1004 },
1005 );
1006
1007 let end1 = cfg.goto_visual(Point { x: max, y: 1 });
1008 assert_eq!(
1009 end1,
1010 Cursor {
1011 offset: 7,
1012 logical_pos: Point { x: 7, y: 0 },
1013 visual_pos: Point { x: 6, y: 1 },
1014 column: 10,
1015 wrap_opp: true,
1016 },
1017 );
1018 }
1019
1020 #[test]
1021 fn test_crlf() {
1022 let text = "a\r\nbcd\r\ne".as_bytes();
1023 let cursor = MeasurementConfig::new(&text).goto_visual(Point { x: CoordType::MAX, y: 1 });
1024 assert_eq!(
1025 cursor,
1026 Cursor {
1027 offset: 6,
1028 logical_pos: Point { x: 3, y: 1 },
1029 visual_pos: Point { x: 3, y: 1 },
1030 column: 3,
1031 wrap_opp: false,
1032 }
1033 );
1034 }
1035
1036 #[test]
1037 fn test_wrapped_cursor_can_seek_backward() {
1038 let bytes = "hello world".as_bytes();
1039 let mut cfg = MeasurementConfig::new(&bytes).with_word_wrap_column(10);
1040
1041 // When the word wrap at column 10 hits, the cursor will be at the end of the word "world" (between l and d).
1042 // This tests if the algorithm is capable of going back to the start of the word and find the actual target.
1043 let cursor = cfg.goto_visual(Point { x: 2, y: 1 });
1044 assert_eq!(
1045 cursor,
1046 Cursor {
1047 offset: 8,
1048 logical_pos: Point { x: 8, y: 0 },
1049 visual_pos: Point { x: 2, y: 1 },
1050 column: 8,
1051 wrap_opp: false,
1052 }
1053 );
1054 }
1055
1056 #[test]
1057 fn test_strip_newline() {
1058 assert_eq!(strip_newline(b"hello\n"), b"hello");
1059 assert_eq!(strip_newline(b"hello\r\n"), b"hello");
1060 assert_eq!(strip_newline(b"hello"), b"hello");
1061 }
1062}