yumy/
text.rs

1use unicode_segmentation::UnicodeSegmentation;
2use unicode_width::UnicodeWidthStr;
3
4const TAB: &str = "\t";
5const ZERO_WIDTH_JOINER: &str = "\u{200d}";
6const VARIATION_SELECTOR_16: &str = "\u{fe0f}";
7const SKIN_TONES: [&str; 5] = [
8    "\u{1f3fb}", // Light Skin Tone
9    "\u{1f3fc}", // Medium-Light Skin Tone
10    "\u{1f3fd}", // Medium Skin Tone
11    "\u{1f3fe}", // Medium-Dark Skin Tone
12    "\u{1f3ff}", // Dark Skin Tone
13];
14
15/// Returns the display width of a grapheme. This function _does not_ assert that
16/// the argument is indeed a single grapheme and therefore isn't reliable if it isn't.
17pub fn grapheme_width(grapheme: &str) -> usize {
18    if grapheme == TAB {
19        return 4;
20    }
21
22    if grapheme == ZERO_WIDTH_JOINER || grapheme == VARIATION_SELECTOR_16 {
23        return 0;
24    }
25
26    if grapheme.contains(ZERO_WIDTH_JOINER) {
27        return 2;
28    }
29
30    for skin_tone in SKIN_TONES {
31        if grapheme.contains(skin_tone) {
32            return 2;
33        }
34    }
35
36    grapheme.width()
37}
38
39/// Returns the display width of a string.
40#[inline]
41pub fn dislay_width(s: &str) -> usize {
42    s.graphemes(true).map(grapheme_width).sum()
43}
44
45/// Dedents a string by removing whitespace at the start and returns the byte index of the start
46/// of the dedented section, the display width of the removed segment and the dedented slice,
47/// respectively.
48#[inline]
49pub fn dedent(s: &str) -> (usize, usize, &str) {
50    let mut width = 0;
51    for (index, grapheme) in s.grapheme_indices(true) {
52        match grapheme {
53            " " => width += 1,
54            TAB => width += 4,
55            _ => return (index, width, &s[index..]),
56        }
57    }
58
59    (s.len(), dislay_width(s), &s[s.len()..])
60}
61
62#[cfg(test)]
63mod test {
64    use super::*;
65
66    #[test]
67    pub fn test_dedent() {
68        assert_eq!(dedent("  dedent this"), (2, 2, "dedent this"));
69        assert_eq!(dedent("\tdedent this"), (1, 4, "dedent this"));
70        assert_eq!(dedent("\t dedent this"), (2, 5, "dedent this"));
71        assert_eq!(
72            dedent(" \t   \t \t dedent this"),
73            (9, 1 + 4 + 3 + 4 + 1 + 4 + 1, "dedent this")
74        );
75        assert_eq!(dedent(""), (0, 0, ""));
76        assert_eq!(dedent(" "), (1, 1, ""));
77        assert_eq!(dedent(" \t"), (2, 5, ""));
78    }
79}