1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
use std::{
    fmt::{Debug, Display, Formatter},
    ops::{Range, RangeInclusive},
};

use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer};
use ucd_trie::TrieSetOwned;

use crate::CharacterSet;

mod arithmetic;
mod save;

impl Default for CharacterSet {
    fn default() -> Self {
        Self::nil()
    }
}

impl Debug for CharacterSet {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(f, "CharacterSet({}) ", self.count())?;
        let mut w = &mut f.debug_set();
        for range in self.to_ranges() {
            if range.start() == range.end() {
                w = w.entry(&(*range.start() as u32))
            }
            else {
                w = w.entry(&RangeInclusive::new(*range.start() as u32, *range.end() as u32))
            }
        }
        w.finish()
    }
}

impl Display for CharacterSet {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(f, "CharacterSet({}) ", self.count())?;
        let mut w = &mut f.debug_set();
        for range in self.to_ranges() {
            if range.start() == range.end() { w = w.entry(range.start()) } else { w = w.entry(&range) }
        }
        w.finish()
    }
}

impl CharacterSet {
    /// Count how many characters are in this set
    pub fn count(&self) -> usize {
        self.all.iter().filter(|f| **f == true).count()
    }
    /// Determines whether the set contains the given character
    pub fn compress(&self) -> TrieSetOwned {
        let set = TrieSetOwned::from_codepoints(self.codepoints());
        #[cfg(debug_assertions)]
        {
            set.unwrap()
        }
        #[cfg(not(debug_assertions))]
        {
            unsafe { set.unwrap_unchecked() }
        }
    }
    pub fn contains(&self, c: char) -> bool {
        self.compress().contains_char(c)
    }
    fn codepoints(&self) -> Vec<u32> {
        let mut codepoints = vec![];
        let mut this_cp: u32 = 0;
        for contains in self.all.iter() {
            if *contains {
                codepoints.push(this_cp)
            }
            this_cp += 1;
        }
        return codepoints;
    }

    pub fn from_ranges(ranges: &[Range<char>]) -> Self {
        let mut out = Self::nil();
        for range in ranges {
            out.include(range.start..range.end).unwrap_or_default()
        }
        return out;
    }

    pub fn to_ranges(&self) -> Vec<RangeInclusive<char>> {
        let mut ranges = vec![];
        for cp in self.codepoints() {
            range_add(&mut ranges, cp);
        }
        ranges.into_iter().map(|(min, max)| range_u2c(min, max)).collect()
    }
}

#[track_caller]
pub(crate) fn range_u2c(start: u32, end: u32) -> RangeInclusive<char> {
    #[cfg(debug_assertions)]
    {
        let start = char::from_u32(start).unwrap();
        let end = char::from_u32(end).unwrap();
        RangeInclusive::new(start, end)
    }
    #[cfg(not(debug_assertions))]
    {
        unsafe {
            let start = char::from_u32_unchecked(start);
            let end = char::from_u32_unchecked(end);
            RangeInclusive::new(start, end)
        }
    }
}

/// https://github.com/BurntSushi/ucd-generate/blob/07c11775dbc8e659e5e9485284f74fe7429ead6c/src/util.rs#L206
fn range_add(ranges: &mut Vec<(u32, u32)>, codepoint: u32) {
    if let Some(&mut (_, ref mut end)) = ranges.last_mut() {
        assert!(*end < codepoint);
        if codepoint == *end + 1 {
            *end = codepoint;
            return;
        }
    }
    ranges.push((codepoint, codepoint));
}