Skip to main content

reifydb_type/util/
unicode.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4/// Trait for calculating display width of strings
5pub trait UnicodeWidthStr {
6	fn width(&self) -> usize;
7}
8
9impl UnicodeWidthStr for str {
10	fn width(&self) -> usize {
11		self.chars().map(char_width).sum()
12	}
13}
14
15impl UnicodeWidthStr for &str {
16	fn width(&self) -> usize {
17		self.chars().map(char_width).sum()
18	}
19}
20
21fn char_width(ch: char) -> usize {
22	match ch {
23		'\x00'..='\x1F' | '\x7F'..='\u{9F}' => 0,
24
25		'\u{1100}'..='\u{115F}'
26		| '\u{2E80}'..='\u{2EFF}'
27		| '\u{2F00}'..='\u{2FDF}'
28		| '\u{3000}'..='\u{303F}'
29		| '\u{3040}'..='\u{309F}'
30		| '\u{30A0}'..='\u{30FF}'
31		| '\u{3100}'..='\u{312F}'
32		| '\u{3130}'..='\u{318F}'
33		| '\u{31A0}'..='\u{31BF}'
34		| '\u{31F0}'..='\u{31FF}'
35		| '\u{3200}'..='\u{32FF}'
36		| '\u{3300}'..='\u{33FF}'
37		| '\u{3400}'..='\u{4DBF}'
38		| '\u{4E00}'..='\u{9FFF}'
39		| '\u{A000}'..='\u{A48F}'
40		| '\u{A490}'..='\u{A4CF}'
41		| '\u{AC00}'..='\u{D7AF}'
42		| '\u{F900}'..='\u{FAFF}'
43		| '\u{FE30}'..='\u{FE4F}'
44		| '\u{FF00}'..='\u{FF60}'
45		| '\u{FFE0}'..='\u{FFE6}'
46		| '\u{20000}'..='\u{2FFFD}' => 2,
47
48		'\u{1F300}'..='\u{1F6FF}'
49		| '\u{1F700}'..='\u{1F77F}'
50		| '\u{1F780}'..='\u{1F7FF}'
51		| '\u{1F800}'..='\u{1F8FF}'
52		| '\u{1F900}'..='\u{1F9FF}'
53		| '\u{1FA00}'..='\u{1FA6F}'
54		| '\u{1FA70}'..='\u{1FAFF}' => 2,
55
56		'\u{200B}'..='\u{200F}' | '\u{2028}'..='\u{202E}' | '\u{2060}'..='\u{206F}' => 0,
57
58		'\u{0300}'..='\u{036F}'
59		| '\u{1AB0}'..='\u{1AFF}'
60		| '\u{1DC0}'..='\u{1DFF}'
61		| '\u{FE20}'..='\u{FE2F}' => 0,
62
63		'\u{FE00}'..='\u{FE0F}' => 0,
64		'\u{E0100}'..='\u{E01EF}' => 0,
65
66		'\u{2600}'..='\u{27BF}' => 2,
67
68		_ => 1,
69	}
70}
71
72#[cfg(test)]
73pub mod tests {
74	use super::*;
75
76	#[test]
77	fn test_ascii() {
78		assert_eq!("Hello".width(), 5);
79		assert_eq!("Hello, World!".width(), 13);
80		assert_eq!("".width(), 0);
81	}
82
83	#[test]
84	fn test_cjk() {
85		assert_eq!("你好".width(), 4); // Two Chinese characters
86		assert_eq!("こんにちは".width(), 10); // Five Japanese characters
87		assert_eq!("안녕하세요".width(), 10); // Five Korean characters
88	}
89
90	#[test]
91	fn test_mixed() {
92		assert_eq!("Hello 世界".width(), 10); // 6 for "Hello " + 4 for two CJK chars
93	}
94
95	#[test]
96	fn test_control_chars() {
97		assert_eq!("\x00\x01\x02".width(), 0);
98		assert_eq!("Hello\nWorld".width(), 10); // newline has 0 width
99		assert_eq!("Hello\tWorld".width(), 10); // tab has 0 width
100	}
101
102	#[test]
103	fn test_combining_marks() {
104		// e with combining acute accent
105		assert_eq!("e\u{0301}".width(), 1);
106		// a with combining tilde
107		assert_eq!("a\u{0303}".width(), 1);
108	}
109
110	#[test]
111	fn test_emoji() {
112		assert_eq!("🚀".width(), 2); // Rocket emoji
113		assert_eq!("😀".width(), 2); // Smiley face
114		assert_eq!("🎉".width(), 2); // Party popper
115		assert_eq!("Unicode: 🚀 ñ é ü".width(), 17); // "Unicode: " (9) + 🚀 (2) + " ñ é ü" (6) = 17
116	}
117}