1#![no_std]
8#![allow(clippy::uninlined_format_args)]
9#![forbid(unsafe_code, future_incompatible)]
10
11extern crate alloc;
12
13#[cfg(feature = "std")]
14extern crate std;
15
16use alloc::string::String;
17use alloc::vec::Vec;
18use core::fmt;
19
20#[cfg(feature = "std")]
21use std::io::{self, Write};
22
23const UTF8_START: &[u8] = &[0x1B, 0x25, 0x47];
24const UTF8_END: &[u8] = &[0x1B, 0x25, 0x40];
25
26#[derive(Clone, Copy)]
28#[repr(transparent)]
29pub struct CText<'s> {
30 utf8: &'s str,
31}
32
33impl<'s> fmt::Debug for CText<'s> {
34 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
35 f.write_str(self.utf8)
36 }
37}
38
39impl<'s> fmt::Display for CText<'s> {
40 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41 f.write_str(self.utf8)
42 }
43}
44
45impl<'s> CText<'s> {
46 pub const fn new(utf8: &'s str) -> Self {
47 Self { utf8 }
48 }
49
50 pub const fn len(self) -> usize {
51 self.utf8.len() + UTF8_START.len() + UTF8_END.len()
52 }
53
54 pub const fn is_empty(self) -> bool {
55 self.utf8.is_empty()
56 }
57
58 #[cfg(feature = "std")]
59 pub fn write(self, mut out: impl Write) -> io::Result<usize> {
60 let mut writed = 0;
61 writed += out.write(UTF8_START)?;
62 writed += out.write(self.utf8.as_bytes())?;
63 writed += out.write(UTF8_END)?;
64 Ok(writed)
65 }
66}
67
68pub fn utf8_to_compound_text(text: &str) -> Vec<u8> {
70 let mut ret = Vec::with_capacity(text.len() + 6);
71 ret.extend_from_slice(UTF8_START);
72 ret.extend_from_slice(text.as_bytes());
73 ret.extend_from_slice(UTF8_END);
74 ret
75}
76
77#[derive(Debug, Clone)]
78pub enum DecodeError {
79 InvalidEncoding,
80 UnsupportedEncoding,
81 Utf8Error(alloc::string::FromUtf8Error),
82}
83
84impl From<alloc::string::FromUtf8Error> for DecodeError {
85 fn from(err: alloc::string::FromUtf8Error) -> Self {
86 DecodeError::Utf8Error(err)
87 }
88}
89
90impl fmt::Display for DecodeError {
91 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
92 match self {
93 Self::InvalidEncoding => write!(f, "Invalid compound text"),
94 Self::UnsupportedEncoding => write!(f, "This encoding is not supported yet"),
95 Self::Utf8Error(e) => write!(f, "Not a valid utf8 {}", e),
96 }
97 }
98}
99
100macro_rules! decode {
101 ($decoder:expr, $out:expr, $bytes:expr, $last:expr) => {
102 let mut _current_bytes: &[u8] = $bytes;
103 loop {
104 let (ret, nread, _) = $decoder.decode_to_string(_current_bytes, $out, $last);
105
106 match ret {
107 encoding_rs::CoderResult::InputEmpty => break,
108 encoding_rs::CoderResult::OutputFull => {
109 $out.reserve(
110 $decoder
111 .max_utf8_buffer_length($bytes.len())
112 .unwrap_or_default(),
113 );
114 _current_bytes = &_current_bytes[nread..];
115 }
116 }
117 }
118 };
119}
120
121pub fn compound_text_to_utf8(bytes: &[u8]) -> Result<String, DecodeError> {
122 let split = bytes.split(|&b| b == 0x1b);
123
124 let mut result = String::new();
125
126 for chunk in split {
127 let mut iter = chunk.iter();
128 match (iter.next(), iter.next()) {
129 (Some(0x25), Some(0x47)) => {
131 let left = iter.as_slice().to_vec();
132 match String::from_utf8(left) {
133 Ok(out) => result.push_str(&out),
134 Err(e) => return Err(DecodeError::from(e)),
135 };
136 }
137 (Some(0x25), Some(0x40)) => {}
139 (Some(0x24), Some(0x28)) => match iter.next() {
141 Some(0x42) => {
143 let left = iter.as_slice();
144 let mut decoder = encoding_rs::ISO_2022_JP.new_decoder_without_bom_handling();
145 let mut out = String::new();
146 decode!(decoder, &mut out, &[0x1B, 0x24, 0x42], false);
147 decode!(decoder, &mut out, &left, true);
148
149 result.push_str(&out);
150 }
151
152 Some(0x41) => {
154 let left: Vec<u8> = iter.map(|&b| b + 0x80).collect();
155 let (out, _) = encoding_rs::GBK.decode_without_bom_handling(&left);
156 result.push_str(&out);
157 }
158
159 Some(0x43) => {
161 let left: Vec<u8> = iter.map(|&b| b + 0x80).collect();
162 let (out, _) = encoding_rs::EUC_KR.decode_with_bom_removal(&left);
163 result.push_str(&out);
164 }
165 _ => return Err(DecodeError::InvalidEncoding),
167 },
168 (Some(0x2d), Some(0x41)) => {
170 let left = iter.as_slice();
171 let out = encoding_rs::mem::decode_latin1(left);
172 result.push_str(&out);
173 }
174 (Some(0x2d), Some(0x42)) => {
176 let left = iter.as_slice();
177 let (out, _) = encoding_rs::ISO_8859_2.decode_without_bom_handling(left);
178 result.push_str(&out);
179 }
180 (Some(0x2d), Some(0x43)) => {
182 let left = iter.as_slice();
183 let (out, _) = encoding_rs::ISO_8859_3.decode_without_bom_handling(left);
184 result.push_str(&out);
185 }
186 (Some(0x2d), Some(0x44)) => {
188 let left = iter.as_slice();
189 let (out, _) = encoding_rs::ISO_8859_4.decode_without_bom_handling(left);
190 result.push_str(&out);
191 }
192 (Some(0x2d), Some(0x46)) => {
194 let left = iter.as_slice();
195 let (out, _) = encoding_rs::ISO_8859_7.decode_without_bom_handling(left);
196 result.push_str(&out);
197 }
198 (Some(0x2d), Some(0x47)) => {
200 let left = iter.as_slice();
201 let (out, _) = encoding_rs::ISO_8859_6.decode_without_bom_handling(left);
202 result.push_str(&out);
203 }
204 (Some(0x2d), Some(0x48)) => {
206 let left = iter.as_slice();
207 let (out, _) = encoding_rs::ISO_8859_8.decode_without_bom_handling(left);
208 result.push_str(&out);
209 }
210 (Some(0x2d), Some(0x4c)) => {
212 let left = iter.as_slice();
213 let (out, _) = encoding_rs::ISO_8859_5.decode_without_bom_handling(left);
214 result.push_str(&out);
215 }
216 (Some(0x2d), Some(0x4d)) => {
218 let left = iter.as_slice();
219 let (out, _) = encoding_rs::WINDOWS_1254.decode_without_bom_handling(left);
220 result.push_str(&out);
221 }
222 (Some(0x2d), Some(0x56)) => {
224 let left = iter.as_slice();
225 let (out, _) = encoding_rs::ISO_8859_10.decode_without_bom_handling(left);
226 result.push_str(&out);
227 }
228 (Some(0x2d), Some(0x59)) => {
230 let left = iter.as_slice();
231 let (out, _) = encoding_rs::ISO_8859_13.decode_without_bom_handling(left);
232 result.push_str(&out);
233 }
234 (Some(0x2d), Some(0x5f)) => {
236 let left = iter.as_slice();
237 let (out, _) = encoding_rs::ISO_8859_14.decode_without_bom_handling(left);
238 result.push_str(&out);
239 }
240 (Some(0x2d), Some(0x62)) => {
242 let left = iter.as_slice();
243 let (out, _) = encoding_rs::ISO_8859_15.decode_without_bom_handling(left);
244 result.push_str(&out);
245 }
246 (Some(0x2d), Some(0x66)) => {
248 let left = iter.as_slice();
249 let (out, _) = encoding_rs::ISO_8859_16.decode_without_bom_handling(left);
250 result.push_str(&out);
251 }
252 _ => {
254 let out = encoding_rs::mem::decode_latin1(chunk);
255 result.push_str(&out);
256 }
257 };
258 }
259 Ok(result)
260}
261
262#[cfg(test)]
263mod tests {
264 #[test]
265 fn korean() {
266 const UTF8: &str = "가나다";
267 const COMP: &[u8] = &[
268 27, 37, 71, 234, 176, 128, 235, 130, 152, 235, 139, 164, 27, 37, 64,
269 ];
270 assert_eq!(crate::utf8_to_compound_text(UTF8), COMP);
271 assert_eq!(crate::compound_text_to_utf8(COMP).unwrap(), UTF8);
272 }
273
274 #[test]
275 fn iso_2022_jp() {
276 const UTF8: &str = "東京";
277 const COMP: &[u8] = &[27, 36, 40, 66, 69, 108, 53, 126];
278 assert_eq!(crate::compound_text_to_utf8(COMP).unwrap(), UTF8);
279 }
280
281 #[test]
282 fn iso_2022_jp_long() {
283 const UTF8: &str = "知ってるつもり(B";
284 const COMP: &[u8] = &[
285 27, 36, 40, 66, 67, 78, 36, 67, 36, 70, 36, 107, 36, 68, 36, 98, 36, 106, 27, 40, 66,
286 ];
287 assert_eq!(crate::compound_text_to_utf8(COMP).unwrap(), UTF8);
288 }
289
290 #[test]
291 fn gb2312_cn() {
292 const UTF8: &str = "很高兴认识你";
293 const COMP: &[u8] = &[
294 0x1b, 0x24, 0x28, 0x41, 0x3a, 0x5c, 0x38, 0x5f, 0x50, 0x4b, 0x48, 0x4f, 0x4a, 0x36,
295 0x44, 0x63,
296 ];
297 assert_eq!(crate::compound_text_to_utf8(COMP).unwrap(), UTF8);
298 }
299
300 #[test]
301 fn gb2312_cn_mixed() {
302 const UTF8: &str = "炸哦你";
303 const COMP: &[u8] = &[
304 0x1b, 0x24, 0x28, 0x42, 0x5f, 0x5a, 0x53, 0x28, 0x1b, 0x24, 0x28, 0x41, 0x44, 0x63,
305 ];
306 assert_eq!(crate::compound_text_to_utf8(COMP).unwrap(), UTF8);
307 }
308
309 #[test]
310 fn ks_c_5601() {
311 const UTF8: &str = "넌최고야";
312 const COMP: &[u8] = &[
313 0x1b, 0x24, 0x28, 0x43, 0x33, 0x4d, 0x43, 0x56, 0x30, 0x6d, 0x3e, 0x5f,
314 ];
315 assert_eq!(crate::compound_text_to_utf8(COMP).unwrap(), UTF8);
316 }
317
318 #[test]
319 fn iso_8859_1() {
320 const UTF8: &str = "¡¸ÀÑâó";
321 const COMP: &[u8] = &[0x1b, 0x2d, 0x41, 0xa1, 0xb8, 0xc0, 0xd1, 0xe2, 0xf3];
322 assert_eq!(crate::compound_text_to_utf8(COMP).unwrap(), UTF8);
323 }
324
325 #[test]
326 fn iso_8859_2() {
327 const UTF8: &str = "ĄŁĽŚŠŤ";
328 const COMP: &[u8] = &[0x1b, 0x2d, 0x42, 0xa1, 0xa3, 0xa5, 0xa6, 0xa9, 0xab];
329 assert_eq!(crate::compound_text_to_utf8(COMP).unwrap(), UTF8);
330 }
331}