sj 0.17.0

Some JSON implementation
Documentation
// License: see LICENSE file at root directory of `master` branch

//! # Unicode character
//!
//! ## References
//!
//! - <https://en.wikipedia.org/wiki/UTF-8>

#[cfg(feature="std")]
use {
    alloc::vec::Vec,
    crate::{Error, Result},
};

/// # Unicode character
#[cfg(feature="std")]
#[derive(Debug)]
pub struct UnicodeChar {
    bytes: [u8; 4],
    idx: usize,
}

#[cfg(feature="std")]
impl UnicodeChar {

    /// # Makes new instance
    pub fn new() -> Self {
        Self {
            bytes: [0; 4],
            idx: 0,
        }
    }

    /// # Adds new byte written as a hexadecimal
    pub fn add_hex(&mut self, hex: &u8) -> Result<()> {
        match self.bytes.get_mut(self.idx) {
            Some(item) => {
                let byte = match hex {
                    b'0'..=b'9' => hex - b'0',
                    b'a'..=b'f' => hex - b'a' + 10,
                    b'A'..=b'F' => hex - b'A' + 10,
                    _ => return Err(Error::from(__!("Not a hexadecimal: {:?}", char::from(*hex)))),
                };
                match self.idx % 2 {
                    0 => *item = byte * 16,
                    _ => *item = byte,
                };
                self.idx += 1;
                Ok(())
            },
            None => return Err(Error::from(__!("Bytes are full"))),
        }
    }

    /// # Checks if data is full
    pub fn is_full(&self) -> bool {
        self.idx >= self.bytes.len()
    }

    /// # Encodes as UTF-8 bytes
    pub fn encode_as_utf8_bytes(self, out: &mut Vec<u8>) -> Result<()> {
        const MARKER: u8 = 0b_1000_0000;

        match self.is_full() {
            true => {
                let first = self.bytes[0] + self.bytes[1];
                let last = self.bytes[2] + self.bytes[3];
                let byte_count = match first {
                    0x00 => match last {
                        0x00..=0x7f => ByteCount::One,
                        _ => ByteCount::Two,
                    },
                    0x01..=0x07 => ByteCount::Two,
                    _ => ByteCount::Three,
                };
                match byte_count {
                    ByteCount::One => out.push(last),
                    ByteCount::Two => {
                        const HEADER_OF_TWO: u8 = 0b_1100_0000;
                        out.push(HEADER_OF_TWO | (first << 5 >> 3) | (last >> 6));
                        out.push(MARKER | (last << 2 >> 2));
                    },
                    ByteCount::Three => {
                        const HEADER_OF_THREE: u8 = 0b_1110_0000;
                        out.push(HEADER_OF_THREE | (first >> 4));
                        out.push(MARKER | (first << 4 >> 2) | (last >> 6));
                        out.push(MARKER | (last << 2 >> 2));
                    },
                };
                Ok(())
            },
            false => Err(Error::from(__!("Bytes are not full"))),
        }
    }

}

/// # Byte count
#[cfg(feature="std")]
#[derive(Debug)]
enum ByteCount {
    One,
    Two,
    Three,
}


#[cfg(feature="std")]
#[test]
fn tests() -> Result<()> {
    for (hex, chr) in &[
        ("0024", '\u{0024}'), ("00A2", '\u{00a2}'),

        ("07fF", '\u{07ff}'),

        ("0939", '\u{0939}'), ("20AC", '\u{20AC}'), ("d55c", '\u{d55c}'), ("1d2D", '\u{1d2d}'), ("0800", '\u{0800}'), ("fFFf", '\u{FFff}'),
    ] {
        let mut buf = [0; 4];
        chr.encode_utf8(&mut buf);
        let buf = &buf[..chr.len_utf8()];

        let mut uc = UnicodeChar::new();
        for b in hex.as_bytes() {
            uc.add_hex(b)?;
        }

        let mut out = Vec::with_capacity(4);
        uc.encode_as_utf8_bytes(&mut out)?;
        assert_eq!(out, buf);
    }

    Ok(())
}