Skip to main content

realhydroper_utf16/
lib.rs

1pub mod slice;
2pub mod utils;
3mod iterators;
4pub use iterators::*;
5
6mod utf16str;
7mod utf16string;
8
9/// A UTF-16 string slice consisting of UCS-2 code units.
10/// 
11/// Indexing this type is equivalent to indexing UTF-16 code units (not bytes),
12/// which are represented by `u16`.
13#[derive(Debug, Eq, PartialEq, Hash)]
14#[repr(transparent)]
15pub struct Utf16Str {
16    /// UTF-16 code units.
17    pub(crate) raw: [u16],
18}
19
20/// An owned UTF-16 string consisting of UCS-2 code units.
21///
22/// Indexing this type is equivalent to indexing UTF-16 code units (not bytes),
23/// which are represented by `u16`.
24#[derive(Debug, Eq, PartialEq, Hash)]
25pub struct Utf16String {
26    /// UTF-16 code units.
27    pub(crate) buf: Vec<u16>,
28}
29
30#[cfg(test)]
31mod tests {
32    use super::Utf16String;
33
34    #[test]
35    fn test_iter() {
36        let string = Utf16String::from("a\u{10000}");
37        let mut chars = string.chars();
38        assert_eq!(chars.next().unwrap(), 'a');
39        assert_eq!(chars.next().unwrap(), '\u{10000}');
40
41        let string = Utf16String::from("\u{10000}\u{10FFFF}");
42        let mut chars = string.chars();
43        assert_eq!(chars.next().unwrap(), '\u{10000}');
44        assert_eq!(chars.next().unwrap(), '\u{10FFFF}');
45    }
46
47    #[test]
48    fn test_length() {
49        let mut string1 = Utf16String::from("a\u{10000}");
50        assert_eq!(string1.len(), 3);
51        string1.pop();
52        assert_eq!(string1.len(), 1);
53    }
54
55    #[test]
56    fn test_slicing() {
57        let string1 = Utf16String::from("a\u{10000}");
58        assert_eq!(string1[1..3].len(), 2);
59        assert_eq!(string1[0..1].len(), 1);
60        assert_eq!(string1[0..].len(), 3);
61        assert_eq!(string1[1..].len(), 2);
62        assert_eq!(string1[..1].len(), 1);
63    }
64
65    #[test]
66    fn test_offset_conversion() {
67        use crate::utils::*;
68
69        // \u{10000} is 4 bytes, so 'b' is at 1 + 4 = 5
70        let utf8string = "a\u{10000}b";
71        // \u{10000} is 2 code units, so 'b' is at 1 + 2 = 3
72        let utf16string = Utf16String::from(utf8string);
73
74        // test '\u{10000}' as first offset and 'b' as last offset
75        assert_eq!(two_utf16_offsets_as_utf8_offsets(utf8string, &utf16string, 1, 3), (1, 5));
76        assert_eq!(two_utf8_offsets_as_utf16_offsets(&utf16string, utf8string, 1, 5), (1, 3));
77
78        // 'b' is at 1 + 4 = 5, and \u{10000} is at 1 + 4 + 1 = 6
79        let utf8string = "a\u{10FFFF}b\u{10000}";
80        // 'b' is at 1 + 2 = 3, and \u{10000} is at 1 + 2 + 1 = 4
81        let utf16string = Utf16String::from(utf8string);
82
83        // test 'b' as first offset and '\u{10000}' as last offset
84        assert_eq!(two_utf16_offsets_as_utf8_offsets(utf8string, &utf16string, 3, 4), (5, 6));
85        assert_eq!(two_utf8_offsets_as_utf16_offsets(&utf16string, utf8string, 5, 6), (3, 4));
86    }
87}