Skip to main content

lexigram_core/
segmap.rs

1// Copyright (c) 2025 Redglyph (@gmail.com). All Rights Reserved.
2
3// ---------------------------------------------------------------------------------------------
4// Seg
5
6use std::collections::{BTreeMap, HashMap};
7use std::collections::btree_map::{IntoIter, Iter};
8use std::fmt::{Display, Formatter};
9use std::ops::Bound::Included;
10use crate::char_reader::{UTF8_HIGH_MIN, UTF8_LOW_MAX, UTF8_MAX, UTF8_MIN};
11use crate::char_reader::escape_char;
12
13#[derive(Clone, Copy, PartialOrd, PartialEq, Eq, Ord, Debug)]
14pub struct Seg(pub u32, pub u32);
15
16impl Seg {
17    /// low segment of Unicode codepoint values:
18    pub const DOT_LOW: Seg = Seg(UTF8_MIN, UTF8_LOW_MAX);
19    /// high segment of Unicode codepoint values:
20    pub const DOT_HIGH: Seg = Seg(UTF8_HIGH_MIN, UTF8_MAX);
21}
22
23impl Display for Seg {
24    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
25        if self.0 == self.1 {
26            write!(f, "'{}'", escape_char(char::from_u32(self.0).unwrap()))
27        } else {
28            write!(f, "'{}'-'{}'", escape_char(char::from_u32(self.0).unwrap()), escape_char(char::from_u32(self.1).unwrap()))
29        }
30    }
31}
32
33#[derive(Debug, Clone, PartialEq)]
34pub struct SegMap<T>(BTreeMap<Seg, T>);
35
36impl<T: Clone> SegMap<T> {
37    pub fn new() -> Self {
38        SegMap(BTreeMap::new())
39    }
40
41    pub fn keys(&self) -> impl Iterator<Item = &Seg> {
42        self.0.keys()
43    }
44
45    pub fn get(&self, value: u32) -> Option<T> {
46        let (Seg(_a, b), data) = self.0.range((Included(&Seg(0, 0)), Included(&Seg(value, u32::MAX)))).next_back()?;
47        if *b >= value {
48            Some(data.clone())
49        } else {
50            None
51        }
52    }
53
54    pub fn insert(&mut self, key: Seg, value: T) -> Option<T> {
55        self.0.insert(key, value)
56    }
57
58    pub fn clear(&mut self) {
59        self.0.clear();
60    }
61
62    pub fn iter(&self) -> Iter<'_, Seg, T> {
63        self.into_iter()
64    }
65
66    pub fn len(&self) -> usize {
67        self.0.len()
68    }
69
70    pub fn is_empty(&self) -> bool {
71        self.0.is_empty()
72    }
73}
74
75impl<T: Clone> Default for SegMap<T> {
76    fn default() -> Self {
77        Self::new()
78    }
79}
80
81impl<T> FromIterator<(Seg, T)> for SegMap<T> {
82    fn from_iter<I: IntoIterator<Item = (Seg, T)>>(iter: I) -> Self {
83        SegMap(BTreeMap::from_iter(iter))
84    }
85}
86
87impl<T: Clone, const N: usize> From<[(Seg, T); N]> for SegMap<T> {
88    fn from(value: [(Seg, T); N]) -> Self {
89        SegMap(BTreeMap::from(value))
90    }
91}
92
93impl<T> IntoIterator for SegMap<T> {
94    type Item = (Seg, T);
95    type IntoIter = IntoIter<Seg, T>;
96
97    fn into_iter(self) -> Self::IntoIter {
98        self.0.into_iter()
99    }
100}
101
102impl<'a, T> IntoIterator for &'a SegMap<T> {
103    type Item = (&'a Seg, &'a T);
104    type IntoIter = Iter<'a, Seg, T>;
105
106    fn into_iter(self) -> Self::IntoIter {
107        self.0.iter()
108    }
109}
110
111#[inline]
112pub fn char_to_group(ascii_to_group: &[GroupId], utf8_to_group: &HashMap<char, GroupId>, seg_to_group: &SegMap<GroupId>, symbol: char) -> Option<GroupId> {
113    if symbol.len_utf8() == 1 {
114        Some(ascii_to_group[u8::try_from(symbol).unwrap() as usize])
115    } else {
116        utf8_to_group.get(&symbol).cloned().or_else(|| seg_to_group.get(symbol as u32))
117    }
118}
119
120pub mod macros {
121    /// Generates a Seg (tuple of u32 values) from one or two values (characters or integers).
122    ///
123    /// # Example
124    /// ```
125    /// # use lexigram_core::{seg, segmap::Seg};
126    /// assert_eq!(seg!('a'), Seg('a' as u32, 'a' as u32));
127    /// assert_eq!(seg!('0'-'9'), Seg('0' as u32, '9' as u32));
128    /// ```
129    #[macro_export]
130    macro_rules! seg {
131        ($($a1:literal)?$($a2:ident)? - $($b1:literal)?$($b2:ident)?) => { $crate::segmap::Seg($crate::utf8!($($a1)?$($a2)?), $crate::utf8!($($b1)?$($b2)?)) };
132        ($($a1:literal)?$($a2:ident)?) => { $crate::segmap::Seg($crate::utf8!($($a1)?$($a2)?), $crate::utf8!($($a1)?$($a2)?)) };
133    }
134}
135
136pub type GroupId = u32;