1use std::collections::{BTreeMap, HashMap};
7use std::collections::btree_map::{IntoIter, Iter};
8use std::fmt::{Display, Formatter};
9use std::ops::Bound::Included;
10use crate::char_reader::{UTF8_HIGH_MIN, UTF8_LOW_MAX, UTF8_MAX, UTF8_MIN};
11use crate::char_reader::escape_char;
12
13#[derive(Clone, Copy, PartialOrd, PartialEq, Eq, Ord, Debug)]
14pub struct Seg(pub u32, pub u32);
15
16impl Seg {
17 pub const DOT_LOW: Seg = Seg(UTF8_MIN, UTF8_LOW_MAX);
19 pub const DOT_HIGH: Seg = Seg(UTF8_HIGH_MIN, UTF8_MAX);
21}
22
23impl Display for Seg {
24 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
25 if self.0 == self.1 {
26 write!(f, "'{}'", escape_char(char::from_u32(self.0).unwrap()))
27 } else {
28 write!(f, "'{}'-'{}'", escape_char(char::from_u32(self.0).unwrap()), escape_char(char::from_u32(self.1).unwrap()))
29 }
30 }
31}
32
33#[derive(Debug, Clone, PartialEq)]
34pub struct SegMap<T>(BTreeMap<Seg, T>);
35
36impl<T: Clone> SegMap<T> {
37 pub fn new() -> Self {
38 SegMap(BTreeMap::new())
39 }
40
41 pub fn keys(&self) -> impl Iterator<Item = &Seg> {
42 self.0.keys()
43 }
44
45 pub fn from_iter<I: IntoIterator<Item = (Seg, T)>>(iter: I) -> Self {
46 SegMap(BTreeMap::from_iter(iter))
47 }
48
49 pub fn get(&self, value: u32) -> Option<T> {
50 let (Seg(_a, b), data) = self.0.range((Included(&Seg(0, 0)), Included(&Seg(value, u32::MAX)))).next_back()?;
51 if *b >= value {
52 Some(data.clone())
53 } else {
54 None
55 }
56 }
57
58 pub fn insert(&mut self, key: Seg, value: T) -> Option<T> {
59 self.0.insert(key, value)
60 }
61
62 pub fn clear(&mut self) {
63 self.0.clear();
64 }
65
66 pub fn iter(&self) -> Iter<'_, Seg, T> {
67 self.into_iter()
68 }
69
70 pub fn len(&self) -> usize {
71 self.0.len()
72 }
73}
74
75impl<T: Clone, const N: usize> From<[(Seg, T); N]> for SegMap<T> {
76 fn from(value: [(Seg, T); N]) -> Self {
77 SegMap(BTreeMap::from(value))
78 }
79}
80
81impl<T> IntoIterator for SegMap<T> {
82 type Item = (Seg, T);
83 type IntoIter = IntoIter<Seg, T>;
84
85 fn into_iter(self) -> Self::IntoIter {
86 self.0.into_iter()
87 }
88}
89
90impl<'a, T> IntoIterator for &'a SegMap<T> {
91 type Item = (&'a Seg, &'a T);
92 type IntoIter = Iter<'a, Seg, T>;
93
94 fn into_iter(self) -> Self::IntoIter {
95 self.0.iter()
96 }
97}
98
99#[inline]
100pub fn char_to_group(ascii_to_group: &[GroupId], utf8_to_group: &HashMap<char, GroupId>, seg_to_group: &SegMap<GroupId>, symbol: char) -> Option<GroupId> {
101 if symbol.len_utf8() == 1 {
102 Some(ascii_to_group[u8::try_from(symbol).unwrap() as usize])
103 } else {
104 utf8_to_group.get(&symbol).cloned().or_else(|| seg_to_group.get(symbol as u32))
105 }
106}
107
108pub mod macros {
109 #[macro_export]
118 macro_rules! seg {
119 ($($a1:literal)?$($a2:ident)? - $($b1:literal)?$($b2:ident)?) => { $crate::segmap::Seg($crate::utf8!($($a1)?$($a2)?), $crate::utf8!($($b1)?$($b2)?)) };
120 ($($a1:literal)?$($a2:ident)?) => { $crate::segmap::Seg($crate::utf8!($($a1)?$($a2)?), $crate::utf8!($($a1)?$($a2)?)) };
121 }
122}
123
124pub type GroupId = u32;