utf_64/
lib.rs

1//! # UTF64
2//!
3//! A revolutionary text encoding standard that uses a fixed 64 bits per character.
4//!
5//! UTF64 provides consistent O(1) character indexing and simplified implementation
6//! by encoding each Unicode character in exactly 8 bytes. The upper 32 bits contain
7//! the UTF-8 encoding of the character, while the lower 32 bits are reserved for
8//! future enhancements.
9//!
10//! ## Example
11//!
12//! ```
13//! use utf64::String64;
14//!
15//! let text = String64::from("Hello, 世界!");
16//! assert_eq!(text.len(), 10);
17//!
18//! let decoded: String = text.to_string().unwrap();
19//! assert_eq!(decoded, "Hello, 世界!");
20//! ```
21
22pub mod error;
23pub mod string64;
24
25pub use error::{Result, Utf64Error};
26pub use string64::String64;
27
28#[cfg(test)]
29mod tests {
30    use super::*;
31
32    #[test]
33    fn test_ascii_roundtrip() {
34        let original = "Hello, World!";
35        let utf64 = String64::from(original);
36        let decoded = utf64.to_string().unwrap();
37        assert_eq!(original, decoded);
38    }
39
40    #[test]
41    fn test_unicode_roundtrip() {
42        let original = "Hello, 世界! 🌍";
43        let utf64 = String64::from(original);
44        let decoded = utf64.to_string().unwrap();
45        assert_eq!(original, decoded);
46    }
47
48    #[test]
49    fn test_empty_string() {
50        let utf64 = String64::from("");
51        assert_eq!(utf64.len(), 0);
52        assert!(utf64.is_empty());
53        assert_eq!(utf64.to_string().unwrap(), "");
54    }
55
56    #[test]
57    fn test_length() {
58        let utf64 = String64::from("Hi");
59        assert_eq!(utf64.len(), 2);
60
61        let utf64 = String64::from("世界");
62        assert_eq!(utf64.len(), 2);
63
64        let utf64 = String64::from("🌍🌎🌏");
65        assert_eq!(utf64.len(), 3);
66    }
67
68    #[test]
69    fn test_emoji() {
70        let original = "😀😃😄😁";
71        let utf64 = String64::from(original);
72        let decoded = utf64.to_string().unwrap();
73        assert_eq!(original, decoded);
74    }
75
76    #[test]
77    fn test_mixed_content() {
78        let original = "ASCII, 日本語, émojis: 🎉, symbols: ∑∫∂";
79        let utf64 = String64::from(original);
80        let decoded = utf64.to_string().unwrap();
81        assert_eq!(original, decoded);
82    }
83
84    #[test]
85    fn test_display_trait() {
86        let utf64 = String64::from("test");
87        assert_eq!(format!("{}", utf64), "test");
88    }
89
90    #[test]
91    fn test_debug_trait() {
92        let utf64 = String64::from("test");
93        assert_eq!(format!("{:?}", utf64), "String64(\"test\")");
94    }
95
96    #[test]
97    fn test_clone_and_equality() {
98        let utf64_1 = String64::from("test");
99        let utf64_2 = utf64_1.clone();
100        assert_eq!(utf64_1, utf64_2);
101    }
102
103    #[test]
104    fn test_reserved_bits_are_zero() {
105        let utf64 = String64::from("A");
106        let slice = utf64.as_slice();
107        assert_eq!(slice.len(), 1);
108
109        // Lower 32 bits should be zero (reserved)
110        assert_eq!(slice[0] & 0xFFFFFFFF, 0);
111    }
112
113    #[test]
114    fn test_utf8_encoding_in_upper_bits() {
115        let utf64 = String64::from("A"); // 'A' = U+0041, UTF-8 = 0x41
116        let slice = utf64.as_slice();
117
118        // Upper 32 bits should contain 0x41 in the most significant byte
119        let upper_bits = (slice[0] >> 32) as u32;
120        assert_eq!(upper_bits, 0x41000000);
121    }
122
123    #[test]
124    fn test_multibyte_utf8() {
125        let utf64 = String64::from("€"); // Euro sign: U+20AC, UTF-8 = E2 82 AC
126        let slice = utf64.as_slice();
127
128        let upper_bits = (slice[0] >> 32) as u32;
129        assert_eq!(upper_bits, 0xE282AC00);
130    }
131
132    #[test]
133    fn test_four_byte_utf8() {
134        let utf64 = String64::from("😀"); // U+1F600, UTF-8 = F0 9F 98 80
135        let slice = utf64.as_slice();
136
137        let upper_bits = (slice[0] >> 32) as u32;
138        assert_eq!(upper_bits, 0xF09F9880);
139    }
140
141    #[test]
142    fn test_hash() {
143        use std::collections::HashMap;
144        let mut map = HashMap::new();
145        let s = String64::from("hello");
146        map.insert(s.clone(), 42);
147        assert_eq!(map.get(&s), Some(&42));
148    }
149
150    #[test]
151    fn test_ord() {
152        let s1 = String64::from("apple");
153        let s2 = String64::from("banana");
154        let s3 = String64::from("apple");
155
156        assert!(s1 < s2);
157        assert!(s2 > s1);
158        assert_eq!(s1.cmp(&s3), std::cmp::Ordering::Equal);
159    }
160
161    #[test]
162    fn test_indexing() {
163        let s = String64::from("Hi");
164        let first = s[0];
165        let upper_bits = (first >> 32) as u32;
166        assert_eq!(upper_bits, 0x48000000); // 'H'
167    }
168
169    #[test]
170    fn test_range_indexing() {
171        let s = String64::from("Hello");
172        let slice = &s[1..3];
173        assert_eq!(slice.len(), 2);
174    }
175
176    #[test]
177    fn test_into_iterator() {
178        let s = String64::from("Hi🌍");
179        let chars: Vec<char> = s.into_iter().collect();
180        assert_eq!(chars, vec!['H', 'i', '🌍']);
181    }
182
183    #[test]
184    fn test_ref_iterator() {
185        let s = String64::from("Hi");
186        let chars: Vec<char> = (&s).into_iter().collect();
187        assert_eq!(chars, vec!['H', 'i']);
188        // s is still usable
189        assert_eq!(s.len(), 2);
190    }
191
192    #[test]
193    fn test_from_iterator() {
194        let chars = vec!['H', 'e', 'l', 'l', 'o'];
195        let s: String64 = chars.into_iter().collect();
196        assert_eq!(s.to_string().unwrap(), "Hello");
197    }
198
199    #[test]
200    fn test_extend() {
201        let mut s = String64::from("Hello");
202        s.extend(vec![' ', 'W', 'o', 'r', 'l', 'd']);
203        assert_eq!(s.to_string().unwrap(), "Hello World");
204    }
205
206    #[test]
207    fn test_add() {
208        let s1 = String64::from("Hello");
209        let s2 = s1 + " World";
210        assert_eq!(s2.to_string().unwrap(), "Hello World");
211    }
212
213    #[test]
214    fn test_add_assign() {
215        let mut s = String64::from("Hello");
216        s += " World";
217        assert_eq!(s.to_string().unwrap(), "Hello World");
218    }
219
220    #[test]
221    fn test_partial_eq_str() {
222        let s = String64::from("test");
223        assert_eq!(s, "test");
224        assert_ne!(s, "other");
225    }
226
227    #[test]
228    fn test_partial_eq_string() {
229        let s = String64::from("test");
230        let string = String::from("test");
231        assert_eq!(s, string);
232    }
233
234    #[test]
235    fn test_as_ref() {
236        let s = String64::from("Hi");
237        let slice: &[u64] = s.as_ref();
238        assert_eq!(slice.len(), 2);
239    }
240
241    #[test]
242    fn test_try_from() {
243        let s = String64::from("test");
244        let result = String::try_from(s);
245        assert_eq!(result.unwrap(), "test");
246    }
247
248    #[test]
249    fn test_deref() {
250        let s = String64::from("Hi");
251        let slice: &[u64] = &*s;  // Deref coercion
252        assert_eq!(slice.len(), 2);
253    }
254
255    #[test]
256    fn test_deref_mut() {
257        let mut s = String64::from("Hi");
258        let slice: &mut [u64] = &mut *s;
259        assert_eq!(slice.len(), 2);
260    }
261
262    #[test]
263    fn test_borrow() {
264        use std::borrow::Borrow;
265        let s = String64::from("test");
266        let borrowed: &[u64] = s.borrow();
267        assert_eq!(borrowed.len(), 4);
268    }
269}