character_set/utils/
mod.rs1use std::{
2 fmt::{Debug, Display, Formatter},
3 ops::{Range, RangeInclusive},
4};
5
6use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer};
7use ucd_trie::TrieSetOwned;
8
9use crate::CharacterSet;
10
11mod arithmetic;
12mod save;
13
14impl Default for CharacterSet {
15 fn default() -> Self {
16 Self::nil()
17 }
18}
19
20impl Debug for CharacterSet {
21 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
22 write!(f, "CharacterSet({}) ", self.count())?;
23 let mut w = &mut f.debug_set();
24 for range in self.to_ranges() {
25 if range.start() == range.end() {
26 w = w.entry(&(*range.start() as u32))
27 }
28 else {
29 w = w.entry(&RangeInclusive::new(*range.start() as u32, *range.end() as u32))
30 }
31 }
32 w.finish()
33 }
34}
35
36impl Display for CharacterSet {
37 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
38 write!(f, "CharacterSet({}) ", self.count())?;
39 let mut w = &mut f.debug_set();
40 for range in self.to_ranges() {
41 if range.start() == range.end() { w = w.entry(range.start()) } else { w = w.entry(&range) }
42 }
43 w.finish()
44 }
45}
46
47impl CharacterSet {
48 pub fn count(&self) -> usize {
50 self.all.iter().filter(|f| **f == true).count()
51 }
52 pub fn compress(&self) -> TrieSetOwned {
54 let set = TrieSetOwned::from_codepoints(self.codepoints());
55 #[cfg(debug_assertions)]
56 {
57 set.unwrap()
58 }
59 #[cfg(not(debug_assertions))]
60 {
61 unsafe { set.unwrap_unchecked() }
62 }
63 }
64 pub fn contains(&self, c: char) -> bool {
65 self.compress().contains_char(c)
66 }
67 fn codepoints(&self) -> Vec<u32> {
68 let mut codepoints = vec![];
69 let mut this_cp: u32 = 0;
70 for contains in self.all.iter() {
71 if *contains {
72 codepoints.push(this_cp)
73 }
74 this_cp += 1;
75 }
76 return codepoints;
77 }
78
79 pub fn from_ranges(ranges: &[Range<char>]) -> Self {
80 let mut out = Self::nil();
81 for range in ranges {
82 out.include(range.start..range.end).unwrap_or_default()
83 }
84 return out;
85 }
86
87 pub fn to_ranges(&self) -> Vec<RangeInclusive<char>> {
88 let mut ranges = vec![];
89 for cp in self.codepoints() {
90 range_add(&mut ranges, cp);
91 }
92 ranges.into_iter().map(|(min, max)| range_u2c(min, max)).collect()
93 }
94}
95
96#[track_caller]
97pub(crate) fn range_u2c(start: u32, end: u32) -> RangeInclusive<char> {
98 #[cfg(debug_assertions)]
99 {
100 let start = char::from_u32(start).unwrap();
101 let end = char::from_u32(end).unwrap();
102 RangeInclusive::new(start, end)
103 }
104 #[cfg(not(debug_assertions))]
105 {
106 unsafe {
107 let start = char::from_u32_unchecked(start);
108 let end = char::from_u32_unchecked(end);
109 RangeInclusive::new(start, end)
110 }
111 }
112}
113
114fn range_add(ranges: &mut Vec<(u32, u32)>, codepoint: u32) {
116 if let Some(&mut (_, ref mut end)) = ranges.last_mut() {
117 assert!(*end < codepoint);
118 if codepoint == *end + 1 {
119 *end = codepoint;
120 return;
121 }
122 }
123 ranges.push((codepoint, codepoint));
124}