use std::collections::{BTreeMap, HashMap};
use std::collections::btree_map::{IntoIter, Iter};
use std::fmt::{Display, Formatter};
use std::ops::Bound::Included;
use crate::char_reader::{UTF8_HIGH_MIN, UTF8_LOW_MAX, UTF8_MAX, UTF8_MIN};
use crate::char_reader::escape_char;
#[derive(Clone, Copy, PartialOrd, PartialEq, Eq, Ord, Debug)]
pub struct Seg(pub u32, pub u32);
impl Seg {
pub const DOT_LOW: Seg = Seg(UTF8_MIN, UTF8_LOW_MAX);
pub const DOT_HIGH: Seg = Seg(UTF8_HIGH_MIN, UTF8_MAX);
}
impl Display for Seg {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
if self.0 == self.1 {
write!(f, "'{}'", escape_char(char::from_u32(self.0).unwrap()))
} else {
write!(f, "'{}'-'{}'", escape_char(char::from_u32(self.0).unwrap()), escape_char(char::from_u32(self.1).unwrap()))
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct SegMap<T>(BTreeMap<Seg, T>);
impl<T: Clone> SegMap<T> {
pub fn new() -> Self {
SegMap(BTreeMap::new())
}
pub fn keys(&self) -> impl Iterator<Item = &Seg> {
self.0.keys()
}
pub fn get(&self, value: u32) -> Option<T> {
let (Seg(_a, b), data) = self.0.range((Included(&Seg(0, 0)), Included(&Seg(value, u32::MAX)))).next_back()?;
if *b >= value {
Some(data.clone())
} else {
None
}
}
pub fn insert(&mut self, key: Seg, value: T) -> Option<T> {
self.0.insert(key, value)
}
pub fn clear(&mut self) {
self.0.clear();
}
pub fn iter(&self) -> Iter<'_, Seg, T> {
self.into_iter()
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
}
impl<T: Clone> Default for SegMap<T> {
fn default() -> Self {
Self::new()
}
}
impl<T> FromIterator<(Seg, T)> for SegMap<T> {
fn from_iter<I: IntoIterator<Item = (Seg, T)>>(iter: I) -> Self {
SegMap(BTreeMap::from_iter(iter))
}
}
impl<T: Clone, const N: usize> From<[(Seg, T); N]> for SegMap<T> {
fn from(value: [(Seg, T); N]) -> Self {
SegMap(BTreeMap::from(value))
}
}
impl<T> IntoIterator for SegMap<T> {
type Item = (Seg, T);
type IntoIter = IntoIter<Seg, T>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl<'a, T> IntoIterator for &'a SegMap<T> {
type Item = (&'a Seg, &'a T);
type IntoIter = Iter<'a, Seg, T>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}
#[inline]
pub fn char_to_group(ascii_to_group: &[GroupId], utf8_to_group: &HashMap<char, GroupId>, seg_to_group: &SegMap<GroupId>, symbol: char) -> Option<GroupId> {
if symbol.len_utf8() == 1 {
Some(ascii_to_group[u8::try_from(symbol).unwrap() as usize])
} else {
utf8_to_group.get(&symbol).cloned().or_else(|| seg_to_group.get(symbol as u32))
}
}
pub mod macros {
#[macro_export]
macro_rules! seg {
($($a1:literal)?$($a2:ident)? - $($b1:literal)?$($b2:ident)?) => { $crate::segmap::Seg($crate::utf8!($($a1)?$($a2)?), $crate::utf8!($($b1)?$($b2)?)) };
($($a1:literal)?$($a2:ident)?) => { $crate::segmap::Seg($crate::utf8!($($a1)?$($a2)?), $crate::utf8!($($a1)?$($a2)?)) };
}
}
pub type GroupId = u32;