character_set/utils/
mod.rs

1use std::{
2    fmt::{Debug, Display, Formatter},
3    ops::{Range, RangeInclusive},
4};
5
6use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer};
7use ucd_trie::TrieSetOwned;
8
9use crate::CharacterSet;
10
11mod arithmetic;
12mod save;
13
14impl Default for CharacterSet {
15    fn default() -> Self {
16        Self::nil()
17    }
18}
19
20impl Debug for CharacterSet {
21    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
22        write!(f, "CharacterSet({}) ", self.count())?;
23        let mut w = &mut f.debug_set();
24        for range in self.to_ranges() {
25            if range.start() == range.end() {
26                w = w.entry(&(*range.start() as u32))
27            }
28            else {
29                w = w.entry(&RangeInclusive::new(*range.start() as u32, *range.end() as u32))
30            }
31        }
32        w.finish()
33    }
34}
35
36impl Display for CharacterSet {
37    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
38        write!(f, "CharacterSet({}) ", self.count())?;
39        let mut w = &mut f.debug_set();
40        for range in self.to_ranges() {
41            if range.start() == range.end() { w = w.entry(range.start()) } else { w = w.entry(&range) }
42        }
43        w.finish()
44    }
45}
46
47impl CharacterSet {
48    /// Count how many characters are in this set
49    pub fn count(&self) -> usize {
50        self.all.iter().filter(|f| **f == true).count()
51    }
52    /// Determines whether the set contains the given character
53    pub fn compress(&self) -> TrieSetOwned {
54        let set = TrieSetOwned::from_codepoints(self.codepoints());
55        #[cfg(debug_assertions)]
56        {
57            set.unwrap()
58        }
59        #[cfg(not(debug_assertions))]
60        {
61            unsafe { set.unwrap_unchecked() }
62        }
63    }
64    pub fn contains(&self, c: char) -> bool {
65        self.compress().contains_char(c)
66    }
67    fn codepoints(&self) -> Vec<u32> {
68        let mut codepoints = vec![];
69        let mut this_cp: u32 = 0;
70        for contains in self.all.iter() {
71            if *contains {
72                codepoints.push(this_cp)
73            }
74            this_cp += 1;
75        }
76        return codepoints;
77    }
78
79    pub fn from_ranges(ranges: &[Range<char>]) -> Self {
80        let mut out = Self::nil();
81        for range in ranges {
82            out.include(range.start..range.end).unwrap_or_default()
83        }
84        return out;
85    }
86
87    pub fn to_ranges(&self) -> Vec<RangeInclusive<char>> {
88        let mut ranges = vec![];
89        for cp in self.codepoints() {
90            range_add(&mut ranges, cp);
91        }
92        ranges.into_iter().map(|(min, max)| range_u2c(min, max)).collect()
93    }
94}
95
96#[track_caller]
97pub(crate) fn range_u2c(start: u32, end: u32) -> RangeInclusive<char> {
98    #[cfg(debug_assertions)]
99    {
100        let start = char::from_u32(start).unwrap();
101        let end = char::from_u32(end).unwrap();
102        RangeInclusive::new(start, end)
103    }
104    #[cfg(not(debug_assertions))]
105    {
106        unsafe {
107            let start = char::from_u32_unchecked(start);
108            let end = char::from_u32_unchecked(end);
109            RangeInclusive::new(start, end)
110        }
111    }
112}
113
114/// https://github.com/BurntSushi/ucd-generate/blob/07c11775dbc8e659e5e9485284f74fe7429ead6c/src/util.rs#L206
115fn range_add(ranges: &mut Vec<(u32, u32)>, codepoint: u32) {
116    if let Some(&mut (_, ref mut end)) = ranges.last_mut() {
117        assert!(*end < codepoint);
118        if codepoint == *end + 1 {
119            *end = codepoint;
120            return;
121        }
122    }
123    ranges.push((codepoint, codepoint));
124}