1#[inline(always)]
12fn ceil_char_boundary(s: &str, i: usize) -> usize {
13 let bytes = s.as_bytes();
14 let len = bytes.len();
15 if i >= len {
16 return len;
17 }
18 let mut pos = i;
21 while pos < len && (unsafe { *bytes.get_unchecked(pos) } & 0xC0) == 0x80 {
22 pos += 1;
23 }
24 pos
25}
26
27#[cfg(target_arch = "x86_64")]
28mod x86_64;
29
30#[cfg(target_arch = "aarch64")]
31mod aarch64;
32
33#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
34mod wasm32;
35
36#[cfg(not(any(
37 target_arch = "x86_64",
38 target_arch = "aarch64",
39 all(target_arch = "wasm32", target_feature = "simd128"),
40)))]
41mod scalar;
42
43#[cfg(target_arch = "x86_64")]
44pub use x86_64::utf16_len;
45
46#[cfg(target_arch = "aarch64")]
47pub use aarch64::utf16_len;
48
49#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
50pub use wasm32::utf16_len;
51
52#[cfg(not(any(
53 target_arch = "x86_64",
54 target_arch = "aarch64",
55 all(target_arch = "wasm32", target_feature = "simd128"),
56)))]
57pub use scalar::utf16_len;
58
59#[cfg(test)]
60mod tests {
61 use super::utf16_len;
62
63 fn reference(s: &str) -> usize {
65 s.encode_utf16().count()
66 }
67
68 #[test]
69 fn empty() {
70 assert_eq!(utf16_len(""), reference(""));
71 }
72
73 #[test]
74 fn ascii_only() {
75 assert_eq!(utf16_len("hello"), reference("hello"));
76 }
77
78 #[test]
79 fn two_byte_chars() {
80 let s = "café résumé";
82 assert_eq!(utf16_len(s), reference(s));
83 }
84
85 #[test]
86 fn three_byte_chars() {
87 let s = "你好世界";
89 assert_eq!(utf16_len(s), reference(s));
90 }
91
92 #[test]
93 fn four_byte_chars() {
94 let s = "😀🎉🚀💯";
96 assert_eq!(utf16_len(s), reference(s));
97 }
98
99 #[test]
100 fn mixed() {
101 let s = "Hello, 世界! 🌍🌎🌏 café";
102 assert_eq!(utf16_len(s), reference(s));
103 }
104
105 #[test]
106 fn single_char_boundaries() {
107 for c in ['a', 'é', '中', '🦀'] {
109 let s = String::from(c);
110 assert_eq!(utf16_len(&s), reference(&s), "char: {c}");
111 }
112 }
113
114 #[test]
115 fn longer_than_simd_width() {
116 let s = "abcdefghijklmnopqrstuvwxyz";
118 assert_eq!(utf16_len(s), reference(s));
119
120 let s = "αβγδεζηθικλμνξοπρστυφχψω";
121 assert_eq!(utf16_len(s), reference(s));
122
123 let s = "你好世界你好世界你好世界你好世界";
124 assert_eq!(utf16_len(s), reference(s));
125
126 let s = "🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀";
127 assert_eq!(utf16_len(s), reference(s));
128 }
129
130 #[test]
131 fn repeated_pattern_large() {
132 let s = "a".repeat(5000);
134 assert_eq!(utf16_len(&s), reference(&s));
135
136 let s = "🦀".repeat(1500); assert_eq!(utf16_len(&s), reference(&s));
138 }
139
140 #[test]
141 fn all_byte_widths_interleaved() {
142 let pattern = "aé中🦀";
144 let s = pattern.repeat(100);
145 assert_eq!(utf16_len(&s), reference(&s));
146 }
147}