Documentation
/*
==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--

SJ

Copyright (C) 2019-2025  Anonymous

There are several releases over multiple years,
they are listed as ranges, such as: "2019-2025".

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.

::--::--::--::--::--::--::--::--::--::--::--::--::--::--::--::--
*/

//! # Unicode character
//!
//! ## References
//!
//! - <https://en.wikipedia.org/wiki/UTF-8>

use {
    alloc::vec::Vec,
    crate::Result,
};

/// # Unicode character
#[derive(Debug)]
pub (super) struct UnicodeChar {
    bytes: [u8; 4],
    idx: usize,
}

impl UnicodeChar {

    /// # Makes new instance
    #[inline(always)]
    pub fn new() -> Self {
        Self {
            bytes: [0; 4],
            idx: 0,
        }
    }

    /// # Adds new byte written as a hexadecimal
    #[inline(always)]
    pub fn add_hex(&mut self, hex: &u8) -> Result<()> {
        match self.bytes.get_mut(self.idx) {
            Some(item) => {
                let byte = match hex {
                    b'0'..=b'9' => hex - b'0',
                    b'a'..=b'f' => hex - b'a' + 10,
                    b'A'..=b'F' => hex - b'A' + 10,
                    _ => return Err(err!("Not a hexadecimal: {:?}", char::from(*hex))),
                };
                match self.idx % 2 {
                    0 => *item = byte * 16,
                    _ => *item = byte,
                };
                self.idx += 1;
                Ok(())
            },
            None => return Err(e!("Bytes are full")),
        }
    }

    /// # Checks if data is full
    #[inline(always)]
    pub fn is_full(&self) -> bool {
        self.idx >= self.bytes.len()
    }

    /// # Encodes as UTF-8 bytes
    #[inline(always)]
    pub fn encode_as_utf8_bytes(self, out: &mut Vec<u8>) -> Result<()> {
        const MARKER: u8 = 0b_1000_0000;

        if self.is_full() {
            let first = self.bytes[0] + self.bytes[1];
            let last = self.bytes[2] + self.bytes[3];
            let byte_count = match first {
                0x00 => match last {
                    0x00..=0x7f => ByteCount::One,
                    _ => ByteCount::Two,
                },
                0x01..=0x07 => ByteCount::Two,
                _ => ByteCount::Three,
            };
            match byte_count {
                ByteCount::One => out.push(last),
                ByteCount::Two => {
                    const HEADER_OF_TWO: u8 = 0b_1100_0000;
                    out.push(HEADER_OF_TWO | (first << 5 >> 3) | (last >> 6));
                    out.push(MARKER | (last << 2 >> 2));
                },
                ByteCount::Three => {
                    const HEADER_OF_THREE: u8 = 0b_1110_0000;
                    out.push(HEADER_OF_THREE | (first >> 4));
                    out.push(MARKER | (first << 4 >> 2) | (last >> 6));
                    out.push(MARKER | (last << 2 >> 2));
                },
            };
            return Ok(());
        }

        Err(e!("Bytes are not full"))
    }

}

/// # Byte count
#[derive(Debug)]
enum ByteCount {
    One,
    Two,
    Three,
}

#[test]
fn tests() -> Result<()> {
    for (hex, chr) in &[
        ("0024", '\u{0024}'), ("00A2", '\u{00a2}'),

        ("07fF", '\u{07ff}'),

        ("0939", '\u{0939}'), ("20AC", '\u{20AC}'), ("d55c", '\u{d55c}'), ("1d2D", '\u{1d2d}'), ("0800", '\u{0800}'), ("fFFf", '\u{FFff}'),
    ] {
        let mut buf = [0; 4];
        chr.encode_utf8(&mut buf);
        let buf = &buf[..chr.len_utf8()];

        let mut uc = UnicodeChar::new();
        for b in hex.as_bytes() {
            uc.add_hex(b)?;
        }

        let mut out = Vec::with_capacity(4);
        uc.encode_as_utf8_bytes(&mut out)?;
        assert_eq!(out, buf);
    }

    Ok(())
}