1#![allow(clippy::unusual_byte_groupings)]
12#![allow(clippy::indexing_slicing)]
13
14use crate::{Bits, BitsErrorKind, Error, MutBits};
15
16const MAX_ONE_BYTE: u8 = 0x80;
17
18const TAG_TWO_BYTE: u8 = 0b110_00000;
19const TAG_TWO_BYTE_MASK: u8 = 0b111_00000;
20const MAX_TWO_BYTE: u32 = 0x800;
21
22const TAG_THREE_BYTE: u8 = 0b1110_0000;
23const TAG_THREE_BYTE_MASK: u8 = 0b1111_0000;
24const MAX_THREE_BYTE: u32 = 0x10000;
25
26const TAG_FOUR_BYTE: u8 = 0b11110_000;
27const TAG_FOUR_BYTE_MASK: u8 = 0b11111_000;
28
29const TAG_CONTINUE: u8 = 0b1000_0000;
30
31pub fn required_utf8_bytes(val: char) -> usize {
33 let val = val as u32;
34 if val < MAX_ONE_BYTE as u32 {
35 1
36 } else if val < MAX_TWO_BYTE {
37 2
38 } else if val < MAX_THREE_BYTE {
39 3
40 } else {
41 4
42 }
43}
44pub fn encode_be_utf8_char(val: char, buf: &mut [u8; 4]) -> Result<&[u8], Error> {
48 let len = required_utf8_bytes(val);
49 let val = val as u32;
50 match (len, &mut buf[..]) {
51 (1, [a, ..]) => {
52 *a = val as u8;
53 }
54 (2, [a, b, ..]) => {
55 *a = ((val >> 6) & 0x1F) as u8 | TAG_TWO_BYTE;
56 *b = (val & 0x3F) as u8 | TAG_CONTINUE;
57 }
58 (3, [a, b, c, ..]) => {
59 *a = ((val >> 12) & 0x0F) as u8 | TAG_THREE_BYTE;
60 *b = ((val >> 6) & 0x3F) as u8 | TAG_CONTINUE;
61 *c = (val & 0x3F) as u8 | TAG_CONTINUE;
62 }
63 (4, [a, b, c, d]) => {
64 *a = ((val >> 18) & 0x07) as u8 | TAG_FOUR_BYTE;
65 *b = ((val >> 12) & 0x3F) as u8 | TAG_CONTINUE;
66 *c = ((val >> 6) & 0x3F) as u8 | TAG_CONTINUE;
67 *d = (val & 0x3F) as u8 | TAG_CONTINUE;
68 }
69 _ => return Err(BitsErrorKind::FormatError.into()),
70 }
71 Ok(&buf[..len])
72}
73
74pub fn write_be_utf8_char<T: MutBits + ?Sized>(val: char, out: &mut T) -> Result<usize, Error> {
77 let mut buf = [0u8; 4];
78 let val = encode_be_utf8_char(val, &mut buf)?;
79 out.write_all_bytes(val)?;
80 Ok(val.len())
81}
82
83pub fn read_be_utf8_char<T: Bits + ?Sized>(src: &mut T) -> Result<(char, usize), Error> {
86 let a = src.read_u8()?;
87 if a < MAX_ONE_BYTE {
88 return Ok((a as char, 1));
89 }
90 let (val, len) = if (a & TAG_TWO_BYTE_MASK) == TAG_TWO_BYTE {
91 let b = (src.read_u8()? & 0x3F) as u32;
92 let a = ((a & 0x1F) as u32) << 6;
93 (a | b, 2)
94 } else if (a & TAG_THREE_BYTE_MASK) == TAG_THREE_BYTE {
95 let b = ((src.read_u8()? & 0x3F) as u32) << 6;
96 let c = (src.read_u8()? & 0x3F) as u32;
97 let a = ((a & 0xF) as u32) << 12;
98 (a | b | c, 3)
99 } else if (a & TAG_FOUR_BYTE_MASK) == TAG_FOUR_BYTE {
100 let b = ((src.read_u8()? & 0x3F) as u32) << 12;
101 let c = ((src.read_u8()? & 0x3F) as u32) << 6;
102 let d = (src.read_u8()? & 0x3F) as u32;
103 let a = ((a & 0x7) as u32) << 24;
104 (a | b | c | d, 4)
105 } else {
106 return Err(BitsErrorKind::FormatError.into());
107 };
108 let Some(val) = char::from_u32(val) else {
109 return Err(BitsErrorKind::InvalidInput.into());
110 };
111 Ok((val, len))
112}