mkwebfont_common/
character_set.rs

1use crate::hashing::WyHashSet;
2use bincode::{Decode, Encode};
3use std::{
4    collections::hash_set::{IntoIter, Iter},
5    fmt::{Debug, Formatter},
6    ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign},
7};
8
9#[derive(Clone, Eq, PartialEq, Default)]
10pub struct CharacterSet(WyHashSet<u32>);
11impl CharacterSet {
12    pub fn new() -> Self {
13        Self::default()
14    }
15
16    pub fn insert(&mut self, character: u32) -> bool {
17        self.0.insert(character)
18    }
19
20    pub fn intersects(&self, other: &CharacterSet) -> bool {
21        for char in &self.0 {
22            if other.contains(*char) {
23                return true;
24            }
25        }
26        false
27    }
28
29    pub fn remove(&mut self, character: u32) -> bool {
30        self.0.remove(&character)
31    }
32
33    pub fn contains(&self, character: u32) -> bool {
34        self.0.contains(&character)
35    }
36
37    pub fn len(&self) -> usize {
38        self.0.len()
39    }
40
41    pub fn is_empty(&self) -> bool {
42        self.0.is_empty()
43    }
44
45    pub fn min(&self) -> Option<u32> {
46        self.0.iter().min().cloned()
47    }
48
49    pub fn debug_str(&self) -> String {
50        self.iter_sorted()
51            .map(|x| char::from_u32(x).unwrap())
52            .collect()
53    }
54
55    /// Returns an iterator.
56    pub fn iter(&self) -> CharacterSetIter {
57        CharacterSetIter(self.0.iter())
58    }
59
60    /// Returns an iterator.
61    pub fn iter_sorted(&self) -> std::vec::IntoIter<u32> {
62        let mut vec: Vec<_> = self.iter().collect();
63        vec.sort();
64        vec.into_iter()
65    }
66
67    /// Converts this into an iterator.
68    pub fn into_iter(self) -> CharacterSetIntoIter {
69        CharacterSetIntoIter(self.0.into_iter())
70    }
71
72    /// Decompresses the character set.
73    pub fn decompress(data: &CompressedCharacterSet) -> Self {
74        let mut data = data.0.clone();
75        if data.len() >= 2 {
76            for i in 1..data.len() {
77                data[i] += data[i - 1];
78            }
79        }
80        CharacterSet(data.into_iter().collect())
81    }
82
83    /// Compresses a character set.
84    pub fn compressed(&self) -> CompressedCharacterSet {
85        let mut data: Vec<_> = self.iter_sorted().collect();
86        if data.len() >= 2 {
87            for i in (1..data.len()).rev() {
88                data[i] -= data[i - 1];
89            }
90        }
91
92        CompressedCharacterSet(data)
93    }
94}
95impl Debug for CharacterSet {
96    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
97        write!(f, "[set of {} characters]", self.0.len())
98    }
99}
100impl IntoIterator for CharacterSet {
101    type Item = u32;
102    type IntoIter = CharacterSetIntoIter;
103    fn into_iter(self) -> Self::IntoIter {
104        self.into_iter()
105    }
106}
107impl<'a> IntoIterator for &'a CharacterSet {
108    type Item = u32;
109    type IntoIter = CharacterSetIter<'a>;
110    fn into_iter(self) -> Self::IntoIter {
111        self.iter()
112    }
113}
114impl Extend<u32> for CharacterSet {
115    fn extend<T: IntoIterator<Item = u32>>(&mut self, iter: T) {
116        self.0.extend(iter)
117    }
118}
119
120impl<'a> BitAndAssign<&'a CharacterSet> for CharacterSet {
121    fn bitand_assign(&mut self, rhs: &'a CharacterSet) {
122        self.0.retain(|x| rhs.contains(*x));
123    }
124}
125impl<'a> BitOrAssign<&'a CharacterSet> for CharacterSet {
126    fn bitor_assign(&mut self, rhs: &'a CharacterSet) {
127        for char in rhs {
128            self.insert(char);
129        }
130    }
131}
132impl<'a> BitXorAssign<&'a CharacterSet> for CharacterSet {
133    fn bitxor_assign(&mut self, rhs: &'a CharacterSet) {
134        for char in rhs {
135            if !self.contains(char) {
136                self.insert(char);
137            } else {
138                self.remove(char);
139            }
140        }
141    }
142}
143impl<'a> SubAssign<&'a CharacterSet> for CharacterSet {
144    fn sub_assign(&mut self, rhs: &'a CharacterSet) {
145        for char in rhs {
146            self.remove(char);
147        }
148    }
149}
150
151macro_rules! bitops {
152    ($trait_name:ident, $trait_func:ident, $assign_trait:ident, $assign_func:ident) => {
153        impl $assign_trait for CharacterSet {
154            fn $assign_func(&mut self, rhs: CharacterSet) {
155                $assign_trait::$assign_func(self, &rhs);
156            }
157        }
158        impl<'a> $assign_trait<CharacterSet> for &'a mut CharacterSet {
159            fn $assign_func(&mut self, rhs: CharacterSet) {
160                $assign_trait::$assign_func(*self, &rhs);
161            }
162        }
163        impl<'a, 'b> $assign_trait<&'a CharacterSet> for &'b mut CharacterSet {
164            fn $assign_func(&mut self, rhs: &'a CharacterSet) {
165                $assign_trait::$assign_func(*self, rhs);
166            }
167        }
168        impl $trait_name for CharacterSet {
169            type Output = CharacterSet;
170            fn $trait_func(self, rhs: CharacterSet) -> Self::Output {
171                $trait_name::$trait_func(self, &rhs)
172            }
173        }
174        impl<'a> $trait_name<&'a CharacterSet> for CharacterSet {
175            type Output = CharacterSet;
176            fn $trait_func(mut self, rhs: &'a CharacterSet) -> Self::Output {
177                $assign_trait::$assign_func(&mut self, rhs);
178                self
179            }
180        }
181        impl<'a> $trait_name<CharacterSet> for &'a CharacterSet {
182            type Output = CharacterSet;
183            fn $trait_func(self, rhs: CharacterSet) -> Self::Output {
184                $trait_name::$trait_func(self.clone(), &rhs)
185            }
186        }
187        impl<'a, 'b> $trait_name<&'a CharacterSet> for &'b CharacterSet {
188            type Output = CharacterSet;
189            fn $trait_func(self, rhs: &'a CharacterSet) -> Self::Output {
190                $trait_name::$trait_func(self.clone(), rhs)
191            }
192        }
193    };
194}
195bitops!(BitAnd, bitand, BitAndAssign, bitand_assign);
196bitops!(BitOr, bitor, BitOrAssign, bitor_assign);
197bitops!(BitXor, bitxor, BitXorAssign, bitxor_assign);
198bitops!(Sub, sub, SubAssign, sub_assign);
199
200/// The iterator for [`CharacterSet`]
201pub struct CharacterSetIter<'a>(Iter<'a, u32>);
202impl<'a> Iterator for CharacterSetIter<'a> {
203    type Item = u32;
204    fn next(&mut self) -> Option<Self::Item> {
205        self.0.next().map(|x| *x)
206    }
207}
208
209/// The owned iterator for [`CharacterSet`]
210pub struct CharacterSetIntoIter(IntoIter<u32>);
211impl Iterator for CharacterSetIntoIter {
212    type Item = u32;
213    fn next(&mut self) -> Option<Self::Item> {
214        self.0.next()
215    }
216}
217
218/// A compressed character set.
219///
220/// This does not take less memory, but compresses better, as it is delta encoded and not in a
221/// random order as a [`HashSet`] would be.
222#[derive(Clone, Encode, Decode)]
223pub struct CompressedCharacterSet(Vec<u32>);
224impl Debug for CompressedCharacterSet {
225    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
226        write!(f, "[compressed set of {} characters]", self.0.len())
227    }
228}