reifydb_value/util/
unicode.rs1pub trait UnicodeWidthStr {
5 fn width(&self) -> usize;
6}
7
8impl UnicodeWidthStr for str {
9 fn width(&self) -> usize {
10 self.chars().map(char_width).sum()
11 }
12}
13
14impl UnicodeWidthStr for &str {
15 fn width(&self) -> usize {
16 self.chars().map(char_width).sum()
17 }
18}
19
20fn char_width(ch: char) -> usize {
21 match ch {
22 '\x00'..='\x1F' | '\x7F'..='\u{9F}' => 0,
23
24 '\u{1100}'..='\u{115F}'
25 | '\u{2E80}'..='\u{2EFF}'
26 | '\u{2F00}'..='\u{2FDF}'
27 | '\u{3000}'..='\u{303F}'
28 | '\u{3040}'..='\u{309F}'
29 | '\u{30A0}'..='\u{30FF}'
30 | '\u{3100}'..='\u{312F}'
31 | '\u{3130}'..='\u{318F}'
32 | '\u{31A0}'..='\u{31BF}'
33 | '\u{31F0}'..='\u{31FF}'
34 | '\u{3200}'..='\u{32FF}'
35 | '\u{3300}'..='\u{33FF}'
36 | '\u{3400}'..='\u{4DBF}'
37 | '\u{4E00}'..='\u{9FFF}'
38 | '\u{A000}'..='\u{A48F}'
39 | '\u{A490}'..='\u{A4CF}'
40 | '\u{AC00}'..='\u{D7AF}'
41 | '\u{F900}'..='\u{FAFF}'
42 | '\u{FE30}'..='\u{FE4F}'
43 | '\u{FF00}'..='\u{FF60}'
44 | '\u{FFE0}'..='\u{FFE6}'
45 | '\u{20000}'..='\u{2FFFD}' => 2,
46
47 '\u{1F300}'..='\u{1F6FF}'
48 | '\u{1F700}'..='\u{1F77F}'
49 | '\u{1F780}'..='\u{1F7FF}'
50 | '\u{1F800}'..='\u{1F8FF}'
51 | '\u{1F900}'..='\u{1F9FF}'
52 | '\u{1FA00}'..='\u{1FA6F}'
53 | '\u{1FA70}'..='\u{1FAFF}' => 2,
54
55 '\u{200B}'..='\u{200F}' | '\u{2028}'..='\u{202E}' | '\u{2060}'..='\u{206F}' => 0,
56
57 '\u{0300}'..='\u{036F}'
58 | '\u{1AB0}'..='\u{1AFF}'
59 | '\u{1DC0}'..='\u{1DFF}'
60 | '\u{FE20}'..='\u{FE2F}' => 0,
61
62 '\u{FE00}'..='\u{FE0F}' => 0,
63 '\u{E0100}'..='\u{E01EF}' => 0,
64
65 '\u{2600}'..='\u{27BF}' => 2,
66
67 _ => 1,
68 }
69}
70
71#[cfg(test)]
72pub mod tests {
73 use super::*;
74
75 #[test]
76 fn test_ascii() {
77 assert_eq!("Hello".width(), 5);
78 assert_eq!("Hello, World!".width(), 13);
79 assert_eq!("".width(), 0);
80 }
81
82 #[test]
83 fn test_cjk() {
84 assert_eq!("你好".width(), 4); assert_eq!("こんにちは".width(), 10); assert_eq!("안녕하세요".width(), 10); }
88
89 #[test]
90 fn test_mixed() {
91 assert_eq!("Hello 世界".width(), 10); }
93
94 #[test]
95 fn test_control_chars() {
96 assert_eq!("\x00\x01\x02".width(), 0);
97 assert_eq!("Hello\nWorld".width(), 10); assert_eq!("Hello\tWorld".width(), 10); }
100
101 #[test]
102 fn test_combining_marks() {
103 assert_eq!("e\u{0301}".width(), 1);
105 assert_eq!("a\u{0303}".width(), 1);
107 }
108
109 #[test]
110 fn test_emoji() {
111 assert_eq!("🚀".width(), 2); assert_eq!("😀".width(), 2); assert_eq!("🎉".width(), 2); assert_eq!("Unicode: 🚀 ñ é ü".width(), 17); }
116}