reifydb_type/util/
unicode.rs1pub trait UnicodeWidthStr {
6 fn width(&self) -> usize;
8}
9
10impl UnicodeWidthStr for str {
11 fn width(&self) -> usize {
12 self.chars().map(char_width).sum()
13 }
14}
15
16impl UnicodeWidthStr for &str {
17 fn width(&self) -> usize {
18 self.chars().map(char_width).sum()
19 }
20}
21
22fn char_width(ch: char) -> usize {
24 match ch {
25 '\x00'..='\x1F' | '\x7F'..='\u{9F}' => 0,
27
28 '\u{1100}'..='\u{115F}' | '\u{2E80}'..='\u{2EFF}' | '\u{2F00}'..='\u{2FDF}' | '\u{3000}'..='\u{303F}' | '\u{3040}'..='\u{309F}' | '\u{30A0}'..='\u{30FF}' | '\u{3100}'..='\u{312F}' | '\u{3130}'..='\u{318F}' | '\u{31A0}'..='\u{31BF}' | '\u{31F0}'..='\u{31FF}' | '\u{3200}'..='\u{32FF}' | '\u{3300}'..='\u{33FF}' | '\u{3400}'..='\u{4DBF}' | '\u{4E00}'..='\u{9FFF}' | '\u{A000}'..='\u{A48F}' | '\u{A490}'..='\u{A4CF}' | '\u{AC00}'..='\u{D7AF}' | '\u{F900}'..='\u{FAFF}' | '\u{FE30}'..='\u{FE4F}' | '\u{FF00}'..='\u{FF60}' | '\u{FFE0}'..='\u{FFE6}' | '\u{20000}'..='\u{2FFFD}' => 2,
51
52 '\u{1F300}'..='\u{1F6FF}' | '\u{1F700}'..='\u{1F77F}' | '\u{1F780}'..='\u{1F7FF}' | '\u{1F800}'..='\u{1F8FF}' | '\u{1F900}'..='\u{1F9FF}' | '\u{1FA00}'..='\u{1FA6F}' | '\u{1FA70}'..='\u{1FAFF}' => 2, '\u{200B}'..='\u{200F}' | '\u{2028}'..='\u{202E}' | '\u{2060}'..='\u{206F}' => 0,
65
66 '\u{0300}'..='\u{036F}' | '\u{1AB0}'..='\u{1AFF}' | '\u{1DC0}'..='\u{1DFF}' | '\u{FE20}'..='\u{FE2F}' => 0,
71
72 '\u{FE00}'..='\u{FE0F}' => 0,
74 '\u{E0100}'..='\u{E01EF}' => 0,
75
76 '\u{2600}'..='\u{27BF}' => 2, _ => 1}
80}
81
82#[cfg(test)]
83pub mod tests {
84 use super::*;
85
86 #[test]
87 fn test_ascii() {
88 assert_eq!("Hello".width(), 5);
89 assert_eq!("Hello, World!".width(), 13);
90 assert_eq!("".width(), 0);
91 }
92
93 #[test]
94 fn test_cjk() {
95 assert_eq!("你好".width(), 4); assert_eq!("こんにちは".width(), 10); assert_eq!("안녕하세요".width(), 10); }
99
100 #[test]
101 fn test_mixed() {
102 assert_eq!("Hello 世界".width(), 10); }
104
105 #[test]
106 fn test_control_chars() {
107 assert_eq!("\x00\x01\x02".width(), 0);
108 assert_eq!("Hello\nWorld".width(), 10); assert_eq!("Hello\tWorld".width(), 10); }
111
112 #[test]
113 fn test_combining_marks() {
114 assert_eq!("e\u{0301}".width(), 1);
116 assert_eq!("a\u{0303}".width(), 1);
118 }
119
120 #[test]
121 fn test_emoji() {
122 assert_eq!("🚀".width(), 2); assert_eq!("😀".width(), 2); assert_eq!("🎉".width(), 2); assert_eq!("Unicode: 🚀 ñ é ü".width(), 17); }
127}