lexigram_core/
segmap.rs

1// Copyright (c) 2025 Redglyph (@gmail.com). All Rights Reserved.
2
3// ---------------------------------------------------------------------------------------------
4// Seg
5
6use std::collections::{BTreeMap, HashMap};
7use std::collections::btree_map::{IntoIter, Iter};
8use std::fmt::{Display, Formatter};
9use std::ops::Bound::Included;
10use crate::char_reader::{UTF8_HIGH_MIN, UTF8_LOW_MAX, UTF8_MAX, UTF8_MIN};
11use crate::char_reader::escape_char;
12
13#[derive(Clone, Copy, PartialOrd, PartialEq, Eq, Ord, Debug)]
14pub struct Seg(pub u32, pub u32);
15
16impl Seg {
17    /// low segment of Unicode codepoint values:
18    pub const DOT_LOW: Seg = Seg(UTF8_MIN, UTF8_LOW_MAX);
19    /// high segment of Unicode codepoint values:
20    pub const DOT_HIGH: Seg = Seg(UTF8_HIGH_MIN, UTF8_MAX);
21}
22
23impl Display for Seg {
24    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
25        if self.0 == self.1 {
26            write!(f, "'{}'", escape_char(char::from_u32(self.0).unwrap()))
27        } else {
28            write!(f, "'{}'-'{}'", escape_char(char::from_u32(self.0).unwrap()), escape_char(char::from_u32(self.1).unwrap()))
29        }
30    }
31}
32
33#[derive(Debug, Clone, PartialEq)]
34pub struct SegMap<T>(BTreeMap<Seg, T>);
35
36impl<T: Clone> SegMap<T> {
37    pub fn new() -> Self {
38        SegMap(BTreeMap::new())
39    }
40
41    pub fn keys(&self) -> impl Iterator<Item = &Seg> {
42        self.0.keys()
43    }
44
45    pub fn from_iter<I: IntoIterator<Item = (Seg, T)>>(iter: I) -> Self {
46        SegMap(BTreeMap::from_iter(iter))
47    }
48
49    pub fn get(&self, value: u32) -> Option<T> {
50        let (Seg(_a, b), data) = self.0.range((Included(&Seg(0, 0)), Included(&Seg(value, u32::MAX)))).next_back()?;
51        if *b >= value {
52            Some(data.clone())
53        } else {
54            None
55        }
56    }
57
58    pub fn insert(&mut self, key: Seg, value: T) -> Option<T> {
59        self.0.insert(key, value)
60    }
61
62    pub fn clear(&mut self) {
63        self.0.clear();
64    }
65
66    pub fn iter(&self) -> Iter<'_, Seg, T> {
67        self.into_iter()
68    }
69
70    pub fn len(&self) -> usize {
71        self.0.len()
72    }
73}
74
75impl<T: Clone, const N: usize> From<[(Seg, T); N]> for SegMap<T> {
76    fn from(value: [(Seg, T); N]) -> Self {
77        SegMap(BTreeMap::from(value))
78    }
79}
80
81impl<T> IntoIterator for SegMap<T> {
82    type Item = (Seg, T);
83    type IntoIter = IntoIter<Seg, T>;
84
85    fn into_iter(self) -> Self::IntoIter {
86        self.0.into_iter()
87    }
88}
89
90impl<'a, T> IntoIterator for &'a SegMap<T> {
91    type Item = (&'a Seg, &'a T);
92    type IntoIter = Iter<'a, Seg, T>;
93
94    fn into_iter(self) -> Self::IntoIter {
95        self.0.iter()
96    }
97}
98
99#[inline]
100pub fn char_to_group(ascii_to_group: &[GroupId], utf8_to_group: &HashMap<char, GroupId>, seg_to_group: &SegMap<GroupId>, symbol: char) -> Option<GroupId> {
101    if symbol.len_utf8() == 1 {
102        Some(ascii_to_group[u8::try_from(symbol).unwrap() as usize])
103    } else {
104        utf8_to_group.get(&symbol).cloned().or_else(|| seg_to_group.get(symbol as u32))
105    }
106}
107
108pub mod macros {
109    /// Generates a Seg (tuple of u32 values) from one or two values (characters or integers).
110    ///
111    /// # Example
112    /// ```
113    /// # use lexigram_core::{seg, segmap::Seg};
114    /// assert_eq!(seg!('a'), Seg('a' as u32, 'a' as u32));
115    /// assert_eq!(seg!('0'-'9'), Seg('0' as u32, '9' as u32));
116    /// ```
117    #[macro_export]
118    macro_rules! seg {
119        ($($a1:literal)?$($a2:ident)? - $($b1:literal)?$($b2:ident)?) => { $crate::segmap::Seg($crate::utf8!($($a1)?$($a2)?), $crate::utf8!($($b1)?$($b2)?)) };
120        ($($a1:literal)?$($a2:ident)?) => { $crate::segmap::Seg($crate::utf8!($($a1)?$($a2)?), $crate::utf8!($($a1)?$($a2)?)) };
121    }
122}
123
124pub type GroupId = u32;