use std::error::Error;
use std::fmt::{Debug, Display, Formatter};
use crate::range::*;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct StringIndex {
pub(crate) value: usize,
pub(crate) raw_start_index: usize,
pub(crate) raw_len: u8,
pub(crate) bytes: [u8; 4],
pub(crate) extra_variation_selector: Option<[u8; 3]>
}
impl StringIndex {
pub fn zero() -> Self {
Self {
value: 0,
raw_start_index: 0,
raw_len: 0,
bytes: [0, 0, 0, 0],
extra_variation_selector: None
}
}
pub fn raw_bytes_description(&self) -> String {
format!("{:?}", self.bytes)
}
pub fn variation_selector_description(&self) -> String {
if let Some(bytes) = self.extra_variation_selector {
format!("{:?}", bytes)
} else {
format!("Empty")
}
}
pub fn utf8_char(&self) -> String {
if self.bytes == [0, 0, 0, 0] {
"\u{0}".to_string()
} else {
let mut bytes: Vec<u8> = Vec::new();
self.bytes.iter().for_each(|&i| {
if i != 0 { bytes.push(i) }
});
if let Some(variation_selector) = self.extra_variation_selector {
variation_selector.iter().for_each(|&i| {
bytes.push(i)
});
}
String::from_utf8(bytes).unwrap()
}
}
}
impl Display for StringIndex {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "index: {},\
bytes: [{}],\
location in bytes: {}\
bytes length: {}\
variation selector: {}",
self.value,
self.raw_bytes_description(),
self.raw_start_index,
self.raw_len,
self.variation_selector_description())
}
}
#[derive(Debug)]
pub enum IndexingError {
UnexpectedTerminate {
location: usize
},
InvalidUnicodePoint {
location: usize,
code_point: usize
}
}
impl IndexingError {
fn error_message(&self) -> String {
match self {
IndexingError::UnexpectedTerminate { location } =>
format!("unexpected terminated at location {}", location.to_string()),
IndexingError::InvalidUnicodePoint { location, code_point} =>
format!("Invalid Unicode Scalar {} found at {}", code_point, location)
}
}
}
impl Display for IndexingError {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
write!(f, "{}", self.error_message())?;
Ok(())
}
}
impl Error for IndexingError { }
pub trait Indexing {
fn try_index_after(&self, index: &StringIndex) -> Result<Option<StringIndex>, IndexingError>;
fn utf8_char_at(&self, index: &StringIndex) -> &str;
fn index_after(&self, index: &StringIndex) -> Option<StringIndex> {
match self.try_index_after(index) {
Ok(index) => index,
Err(error) => {
match error {
IndexingError::UnexpectedTerminate { location } => {
panic!("unexpected terminate at {}", location)
}
IndexingError::InvalidUnicodePoint {location, code_point} => {
panic!("invalid utf-8 codepoint {} found at {}", code_point, location)
}
}
}
}
}
fn first_index(&self) -> Option<StringIndex> {
self.index_after(&StringIndex::zero())
}
}
impl Indexing for String {
fn try_index_after(&self, index: &StringIndex) -> Result<Option<StringIndex>, IndexingError> {
let raw_start_index = index.raw_start_index + index.raw_len as usize;
let value = index.value + 1;
let raw_bytes = self.as_bytes();
let mut character_len: u8;
let mut extra_variation_selector: Option<[u8; 3]>;
let mut raw_len: u8;
let replacing_bytes = |location: usize, n: u8, dst: &mut [u8], raw_bytes: &[u8]| {
if let Some(src_bytes) = raw_bytes.items(location + 1, n as usize) {
dst.replace_items(1, src_bytes);
Ok(())
} else {
Err(IndexingError::UnexpectedTerminate { location })
}
};
if let Some(first_byte) = raw_bytes.get(raw_start_index) {
let mut bytes: [u8; 4] = [*first_byte, 0, 0 ,0];
if *first_byte >> 7 == 0 {
character_len = 1;
} else if *first_byte >> 3 == 0b00011110u8 {
character_len = 4;
replacing_bytes(raw_start_index, 3, &mut bytes, raw_bytes)?
} else if *first_byte >> 4 == 0b00001110u8 {
character_len = 3;
replacing_bytes(raw_start_index, 2, &mut bytes, raw_bytes)?
} else if *first_byte >> 5 == 0b00000110u8 {
character_len = 2;
replacing_bytes(raw_start_index, 1, &mut bytes, raw_bytes)?
} else {
return Err(IndexingError::InvalidUnicodePoint { location: index.value + 1,
code_point: *first_byte as usize
})
}
let detect_variation_selector = |raw_bytes: &[u8], location: usize| {
if location + 3 <= raw_bytes.len() {
if raw_bytes[location] == 0xef && raw_bytes[location + 1] == 0xb8 {
Some([raw_bytes[location], raw_bytes[location + 1], raw_bytes[location + 2]])
} else {
None
}
} else {
None
}
};
extra_variation_selector = detect_variation_selector(raw_bytes, raw_start_index + character_len as usize);
if extra_variation_selector != None {
raw_len = character_len + 3;
} else {
raw_len = character_len;
}
Ok(Some(StringIndex {
value,
raw_start_index,
raw_len,
bytes,
extra_variation_selector
}))
} else {
Ok(None)
}
}
fn utf8_char_at(&self, index: &StringIndex) -> &str {
let raw_end_index = index.raw_start_index + index.raw_len as usize;
&self[index.raw_start_index..raw_end_index]
}
}