#[cfg(feature="std")]
use {
alloc::vec::Vec,
crate::{Error, Result},
};
#[cfg(feature="std")]
#[derive(Debug)]
pub struct UnicodeChar {
bytes: [u8; 4],
idx: usize,
}
#[cfg(feature="std")]
impl UnicodeChar {
pub fn new() -> Self {
Self {
bytes: [0; 4],
idx: 0,
}
}
pub fn add_hex(&mut self, hex: &u8) -> Result<()> {
match self.bytes.get_mut(self.idx) {
Some(item) => {
let byte = match hex {
b'0'..=b'9' => hex - b'0',
b'a'..=b'f' => hex - b'a' + 10,
b'A'..=b'F' => hex - b'A' + 10,
_ => return Err(Error::from(__!("Not a hexadecimal: {:?}", char::from(*hex)))),
};
match self.idx % 2 {
0 => *item = byte * 16,
_ => *item = byte,
};
self.idx += 1;
Ok(())
},
None => return Err(Error::from(__!("Bytes are full"))),
}
}
pub fn is_full(&self) -> bool {
self.idx >= self.bytes.len()
}
pub fn encode_as_utf8_bytes(self, out: &mut Vec<u8>) -> Result<()> {
const MARKER: u8 = 0b_1000_0000;
match self.is_full() {
true => {
let first = self.bytes[0] + self.bytes[1];
let last = self.bytes[2] + self.bytes[3];
let byte_count = match first {
0x00 => match last {
0x00..=0x7f => ByteCount::One,
_ => ByteCount::Two,
},
0x01..=0x07 => ByteCount::Two,
_ => ByteCount::Three,
};
match byte_count {
ByteCount::One => out.push(last),
ByteCount::Two => {
const HEADER_OF_TWO: u8 = 0b_1100_0000;
out.push(HEADER_OF_TWO | (first << 5 >> 3) | (last >> 6));
out.push(MARKER | (last << 2 >> 2));
},
ByteCount::Three => {
const HEADER_OF_THREE: u8 = 0b_1110_0000;
out.push(HEADER_OF_THREE | (first >> 4));
out.push(MARKER | (first << 4 >> 2) | (last >> 6));
out.push(MARKER | (last << 2 >> 2));
},
};
Ok(())
},
false => Err(Error::from(__!("Bytes are not full"))),
}
}
}
#[cfg(feature="std")]
#[derive(Debug)]
enum ByteCount {
One,
Two,
Three,
}
#[cfg(feature="std")]
#[test]
fn tests() -> Result<()> {
for (hex, chr) in &[
("0024", '\u{0024}'), ("00A2", '\u{00a2}'),
("07fF", '\u{07ff}'),
("0939", '\u{0939}'), ("20AC", '\u{20AC}'), ("d55c", '\u{d55c}'), ("1d2D", '\u{1d2d}'), ("0800", '\u{0800}'), ("fFFf", '\u{FFff}'),
] {
let mut buf = [0; 4];
chr.encode_utf8(&mut buf);
let buf = &buf[..chr.len_utf8()];
let mut uc = UnicodeChar::new();
for b in hex.as_bytes() {
uc.add_hex(b)?;
}
let mut out = Vec::with_capacity(4);
uc.encode_as_utf8_bytes(&mut out)?;
assert_eq!(out, buf);
}
Ok(())
}