1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
use std::{
fmt::{Debug, Display, Formatter},
ops::{Range, RangeInclusive},
};
use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer};
use ucd_trie::TrieSetOwned;
use crate::CharacterSet;
mod arithmetic;
mod save;
impl Default for CharacterSet {
fn default() -> Self {
Self::nil()
}
}
impl Debug for CharacterSet {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "CharacterSet({}) ", self.count())?;
let mut w = &mut f.debug_set();
for range in self.to_ranges() {
if range.start() == range.end() {
w = w.entry(&(*range.start() as u32))
}
else {
w = w.entry(&RangeInclusive::new(*range.start() as u32, *range.end() as u32))
}
}
w.finish()
}
}
impl Display for CharacterSet {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "CharacterSet({}) ", self.count())?;
let mut w = &mut f.debug_set();
for range in self.to_ranges() {
if range.start() == range.end() { w = w.entry(range.start()) } else { w = w.entry(&range) }
}
w.finish()
}
}
impl CharacterSet {
pub fn count(&self) -> usize {
self.all.iter().filter(|f| **f == true).count()
}
pub fn compress(&self) -> TrieSetOwned {
let set = TrieSetOwned::from_codepoints(self.codepoints());
#[cfg(debug_assertions)]
{
set.unwrap()
}
#[cfg(not(debug_assertions))]
{
unsafe { set.unwrap_unchecked() }
}
}
pub fn contains(&self, c: char) -> bool {
self.compress().contains_char(c)
}
fn codepoints(&self) -> Vec<u32> {
let mut codepoints = vec![];
let mut this_cp: u32 = 0;
for contains in self.all.iter() {
if *contains {
codepoints.push(this_cp)
}
this_cp += 1;
}
return codepoints;
}
pub fn from_ranges(ranges: &[Range<char>]) -> Self {
let mut out = Self::nil();
for range in ranges {
out.include(range.start..range.end).unwrap_or_default()
}
return out;
}
pub fn to_ranges(&self) -> Vec<RangeInclusive<char>> {
let mut ranges = vec![];
for cp in self.codepoints() {
range_add(&mut ranges, cp);
}
ranges.into_iter().map(|(min, max)| range_u2c(min, max)).collect()
}
}
#[track_caller]
pub(crate) fn range_u2c(start: u32, end: u32) -> RangeInclusive<char> {
#[cfg(debug_assertions)]
{
let start = char::from_u32(start).unwrap();
let end = char::from_u32(end).unwrap();
RangeInclusive::new(start, end)
}
#[cfg(not(debug_assertions))]
{
unsafe {
let start = char::from_u32_unchecked(start);
let end = char::from_u32_unchecked(end);
RangeInclusive::new(start, end)
}
}
}
fn range_add(ranges: &mut Vec<(u32, u32)>, codepoint: u32) {
if let Some(&mut (_, ref mut end)) = ranges.last_mut() {
assert!(*end < codepoint);
if codepoint == *end + 1 {
*end = codepoint;
return;
}
}
ranges.push((codepoint, codepoint));
}