pex_trie/unicode_set/
mod.rs

1use crate::writer::EasyWrite;
2use std::{
3    collections::BTreeSet,
4    fmt::{Debug, Display, Formatter},
5};
6use ucd_trie::{Error, TrieSetOwned};
7
8pub struct UnicodeSet {
9    name: String,
10    max_width: usize,
11    set: BTreeSet<char>,
12}
13
14impl Debug for UnicodeSet {
15    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
16        f.debug_struct("UnicodeSet").field("name", &self.name).field("count", &self.set.len()).finish()
17    }
18}
19
20impl UnicodeSet {
21    pub fn new(set: &str) -> Self {
22        let mut name = String::with_capacity(set.len());
23        for c in set.chars() {
24            if c.is_ascii_alphanumeric() {
25                // upper
26                name.push(c.to_ascii_uppercase());
27            }
28            else {
29                name.push('_');
30            }
31        }
32
33        Self { name, max_width: 144, set: BTreeSet::new() }
34    }
35    pub fn with_ranges(mut self, ranges: &[(char, char)]) -> Self {
36        for (start, end) in ranges {
37            for i in *start..=*end {
38                self.set.insert(i);
39            }
40        }
41        self
42    }
43    pub fn with_chars<I>(mut self, chars: I) -> Self
44    where
45        I: IntoIterator<Item = char>,
46    {
47        for c in chars {
48            self.set.insert(c);
49        }
50        self
51    }
52    pub fn with_max_width(mut self, max_width: usize) -> Self {
53        assert!(max_width >= 42, "max_width must be at least 42");
54        self.max_width = max_width;
55        self
56    }
57    pub fn export_rust_code(&self) -> Result<String, Error> {
58        let name = self.name.as_str();
59        let mut code = format!("#[rustfmt::skip]\nconst {name}: TrieSetSlice<'static> = TrieSetSlice");
60        code.push_str(" {\n");
61        let trie = TrieSetOwned::from_scalars(self.set.iter())?;
62        let trie = trie.as_slice();
63        self.write_slice_numbers(&mut code, trie.tree1_level1, "tree1_level1");
64        self.write_slice_numbers(&mut code, trie.tree2_level1, "tree2_level1");
65        self.write_slice_numbers(&mut code, trie.tree2_level2, "tree2_level2");
66        self.write_slice_numbers(&mut code, trie.tree3_level1, "tree3_level1");
67        self.write_slice_numbers(&mut code, trie.tree3_level2, "tree3_level2");
68        self.write_slice_numbers(&mut code, trie.tree3_level3, "tree3_level3");
69        code.push_str("};\n");
70        Ok(code)
71    }
72    fn write_slice_numbers<T: Display>(&self, buffer: &mut String, slice: &[T], field: &str) {
73        buffer.push_str(field);
74        buffer.write_slice_numbers(slice, self.max_width, 8).ok();
75    }
76}