unicode_converter/
cesu_8.rs1use crate::unicode_encoding::UnicodeEncodingError::*;
5use crate::unicode_encoding::UnicodeEncodingError;
6use crate::unicode_encoding::UnicodeEncoding;
7use crate::utf_32::Utf32;
8use crate::utf_16::*;
9use crate::utf_8::Utf8;
10
11pub struct Cesu8 {
13 pub data: Utf8,
16}
17
18impl UnicodeEncoding for Cesu8 {
19 fn from_utf_32(data_utf32: &Utf32)-> Self {
21 let mut data: Vec<u8> = Vec::new();
22 for glyph in &data_utf32.data {
23 let new_bytes = utf_32_glyph_to_cesu_8(*glyph);
24 for byte in new_bytes {
25 data.push(byte);
26 }
27 }
28 return Cesu8{data: Utf8::from_bytes_no_check(&data, false).unwrap()};
29 }
30
31 fn to_utf_32(&self) -> Utf32 {
33 let tmp_utf32 = self.data.to_utf_32();
34 let mut ret: Vec<u32> = Vec::new();
35 let mut loop_pointer = 0;
36 while loop_pointer < tmp_utf32.data.len() {
37 if loop_pointer < tmp_utf32.data.len() - 1 {
38 match compatible_codepoints(tmp_utf32.data[loop_pointer], tmp_utf32.data[loop_pointer+1]) {
39 NoError => {
40 ret.push(tmp_utf32.data[loop_pointer]);
41 loop_pointer = loop_pointer + 1;
42 },
43 AmbiguousUnpairedSurrogates => {
44 let surrogate_pair = vec![tmp_utf32.data[loop_pointer] as u16, tmp_utf32.data[loop_pointer+1] as u16];
45 let utf16_bit = Utf16{data: surrogate_pair};
46 ret.push(utf16_bit.to_utf_32().data[0]);
47 loop_pointer = loop_pointer + 2;
48 },
49 x => {
50 eprintln!("[UNICODE ENCODING ERROR] {:?}.", x);
51 panic!("This should not have happen if the source was safely generated with from_string or from_bytes. This could happen if from_string_no_check was used. This need to be corrected from the library's user side.");
52 }
53 }
54 } else {
55 ret.push(tmp_utf32.data[loop_pointer]);
56 loop_pointer = loop_pointer + 1;
57 }
58 }
59 return Utf32{data: ret};
60 }
61
62 fn to_bytes(&self, big_endian: bool) -> Vec<u8> {
65 return self.data.to_bytes(big_endian);
66 }
67
68 fn from_bytes_no_check(bytes: &[u8], big_endian: bool) -> Result<Self, UnicodeEncodingError> {
71 match Utf8::from_bytes_no_check(bytes, big_endian) {
72 Ok(x) => Ok(Cesu8{data: x}),
73 Err(y) => Err(y),
74 }
75 }
76
77
78
79}
80
81const SMALL_DATA_LIMIT: u32 = 0xFFFF;
84
85fn utf_32_glyph_to_cesu_8(glyph: u32) -> Vec<u8> {
86 let glyph_in_vec = Utf32{data: vec![glyph]};
87 if glyph <= SMALL_DATA_LIMIT {
88 return glyph_in_vec.convert_to::<Utf8>().to_bytes(false);
89 } else {
90 let utf16 = glyph_in_vec.convert_to::<Utf16>();
91 let mut ret: Vec<u8> = Vec::new();
92 for surrogate in utf16.data {
93 let surrogate_in_vec = Utf32{data: vec![surrogate as u32]};
94 for byte in surrogate_in_vec.convert_to::<Utf8>().to_bytes(false) {
95 ret.push(byte);
96 }
97 }
98 return ret;
99 }
100}
101
102#[test]
105fn test_utf_32_glyph_to_cesu_8() {
106 let g1: u32 = 0x0045;
107 let v1: Vec<u8> = vec![0x45];
108 assert_eq!(v1, utf_32_glyph_to_cesu_8(g1));
109
110 let g2: u32 = 0x0205;
111 let v2: Vec<u8> = vec![0xC8, 0x85];
112 assert_eq!(v2, utf_32_glyph_to_cesu_8(g2));
113
114 let g3: u32 = 0x10400;
115 let v3: Vec<u8> = vec![0xED, 0xA0, 0x81, 0xED, 0xB0, 0x80];
116 assert_eq!(v3, utf_32_glyph_to_cesu_8(g3));
117}
118
119#[test]
120fn test_utf32_to_cesu_8_and_back() {
121 fn conv_two_ways(glyph: u32) {
122 let v = utf_32_glyph_to_cesu_8(glyph);
123 let cesu = Cesu8::from_bytes_no_check(&v, false).unwrap();
124 let utf32 = cesu.to_utf_32();
125 let glyph_back = utf32.data[0];
126 assert_eq!(glyph_back, glyph);
127 }
128
129 conv_two_ways(0x0045);
130 conv_two_ways(0x0205);
131 conv_two_ways(0x10400);
132}
133