#[inline]
pub fn read_char(data: &[u8], position: usize) -> (u32, usize) {
let mut code: u32 = 0;
let first_byte = data[position];
let byte_length = match first_byte.leading_ones() {
0 => {
code |= first_byte as u32;
1
}
2 => {
code |= ((first_byte & 0b1_1111) as u32) << 6;
code |= (data[position + 1] & 0b11_1111) as u32;
2
}
3 => {
code |= ((first_byte & 0b1111) as u32) << 12;
code |= ((data[position + 1] & 0b11_1111) as u32) << 6;
code |= (data[position + 2] & 0b11_1111) as u32;
3
}
4 => {
code |= ((first_byte & 0b111) as u32) << 18;
code |= ((data[position + 1] & 0b11_1111) as u32) << 12;
code |= ((data[position + 2] & 0b11_1111) as u32) << 6;
code |= (data[position + 3] & 0b11_1111) as u32;
4
}
1 => panic!("Invalid UTF-8 position."),
_ => panic!("Invalid UTF-8 data."),
};
(code, byte_length)
}
#[allow(dead_code)]
pub fn read_previous_char(data: &[u8], mut position: usize) -> (u32, usize) {
position -= 1;
while data[position].leading_ones() == 1 {
position -= 1;
}
read_char(data, position)
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use crate::utf8reader::{read_char, read_previous_char};
#[test]
fn test_next_char() {
let data = "aćbđc".bytes().collect::<Vec<u8>>();
let data_ref = &data[..];
assert_eq!(read_char(data_ref, 0), ('a' as u32, 1));
assert_eq!(read_char(data_ref, 1), ('ć' as u32, 3));
assert_eq!(read_char(data_ref, 4), ('b' as u32, 1));
assert_eq!(read_char(data_ref, 5), ('đ' as u32, 4));
assert_eq!(read_char(data_ref, 9), ('c' as u32, 1));
}
#[test]
fn test_previous_char() {
let data = "aćbđc".bytes().collect::<Vec<u8>>();
let data_ref = &data[..];
assert_eq!(read_previous_char(data_ref, 1), ('a' as u32, 1));
assert_eq!(read_previous_char(data_ref, 4), ('ć' as u32, 3));
assert_eq!(read_previous_char(data_ref, 5), ('b' as u32, 1));
assert_eq!(read_previous_char(data_ref, 9), ('đ' as u32, 4));
assert_eq!(read_previous_char(data_ref, 10), ('c' as u32, 1));
}
}