unicode_display_width/
lib.rs1#![doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/README.md"))]
2
3mod code_point_ranges;
4
5use code_point_ranges::{CodePointRange, ASCII_TABLE, DOUBLEWIDE_TABLE};
6use unicode_segmentation::UnicodeSegmentation;
7
8fn in_table(arr: &[CodePointRange], c: char) -> bool {
12 let c = c as u32;
13 arr.binary_search_by(|range| {
14 if range.contains(&c) {
15 std::cmp::Ordering::Equal
16 } else {
17 range.start().cmp(&c)
18 }
19 })
20 .is_ok()
21}
22
23pub fn is_double_width(c: char) -> bool {
34 if in_table(ASCII_TABLE, c) {
36 return false;
37 }
38
39 if in_table(DOUBLEWIDE_TABLE, c) {
40 return true;
41 }
42
43 false
44}
45
46fn get_grapheme_width(grapheme_cluster: &str) -> u64 {
50 for scalar_value in grapheme_cluster.chars() {
51 if scalar_value == '\u{FE0F}' {
53 return 2;
54 }
55
56 if is_double_width(scalar_value) {
57 return 2;
58 }
59 }
60
61 1
62}
63
64pub fn width(text: &str) -> u64 {
78 text.graphemes(true).fold(0, |acc, grapheme_cluster| {
79 acc + (get_grapheme_width(grapheme_cluster))
80 })
81}
82
83#[cfg(test)]
84mod test {
85 use super::*;
86 use test_case::test_case;
87
88 #[test_case('🛡', false)]
89 #[test_case('✅', true)]
90 fn test_width(text: char, wide: bool) {
91 assert_eq!(is_double_width(text), wide);
92 }
93
94 #[test_case("🛡", 1; "length 1 grapheme")]
95 #[test_case("\u{2764}", 1; "Heavy Black Heart emoji")]
96 #[test_case("\u{2764}\u{FE0F}", 2; "Heavy Black Heart emoji with emoji style variation selector in Hex representation")]
97 #[test_case("❤️", 2; "Heavy Black Heart emoji with emoji style variation selector")] #[test_case("✅", 2; "length 2 grapheme")]
99 #[test_case("👨👩👧👧", 2; "grapheme composed of multiple emojis, at least one of which is length 2")]
100 #[test_case("test test", 9; "ASCII text")]
101 #[test_case("🗡", 1; "single width because it may be paired with the shield which is also a length 1 code point")]
102 #[test_case("🔥🗡🍩👩🏻🚀⏰💃🏼🔦👍🏻", 15; "U+1F608")] #[test_case("слава україні", 13; "Glory to Ukraine in Ukrainian")]
104 #[test_case("슬라바 우크라이나", 17; "Glory to Ukraine in Korean")]
105 #[test_case("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ", 9; "corrupted text")]
106 fn test_string_width(text: &str, length: u64) {
107 assert_eq!(width(text), length);
108 }
109
110 #[test_case("ണ്", 1; "Indic text with zero width joiner")]
112 #[test_case("ന്", 1; "Indic text with zero width joiner 2")]
113 #[test_case("ര്", 1; "Indic text with zero width joiner 3")]
114 #[test_case(
115 "\u{0924}\u{094D}\u{200D}\u{0928}",
116 2;
117 "Half letter form" )]
119 #[test_case(
120 "\u{0924}\u{094D}\u{200C}\u{0928}",
121 2;
122 "Single glyph form" )]
124 fn indic_script(text: &str, length: u64) {
125 assert_eq!(width(text), length);
126 }
127}