Skip to main content

resharp_algebra/unicode_classes/
mod.rs

1mod classes;
2
3use crate::{NodeId, RegexBuilder};
4
5pub use classes::*;
6
7fn utf8_char(b: &mut RegexBuilder) -> NodeId {
8    let ascii = b.mk_range_u8(0, 127);
9    let cont = b.mk_range_u8(0x80, 0xBF);
10    let c2 = b.mk_range_u8(0xC0, 0xDF);
11    let c2s = b.mk_concat(c2, cont);
12    let e0 = b.mk_range_u8(0xE0, 0xEF);
13    let e0s = b.mk_concats([e0, cont, cont].into_iter());
14    let f0 = b.mk_range_u8(0xF0, 0xF7);
15    let f0s = b.mk_concats([f0, cont, cont, cont].into_iter());
16    b.mk_unions([ascii, c2s, e0s, f0s].into_iter())
17}
18
19fn neg_class(b: &mut RegexBuilder, positive: NodeId) -> NodeId {
20    let neg = b.mk_compl(positive);
21    let uc = utf8_char(b);
22    b.mk_inters([neg, uc].into_iter())
23}
24
25#[derive(Clone, Debug)]
26pub struct UnicodeClassCache {
27    pub word: NodeId,
28    pub non_word: NodeId,
29    pub digit: NodeId,
30    pub non_digit: NodeId,
31    pub space: NodeId,
32    pub non_space: NodeId,
33}
34
35impl Default for UnicodeClassCache {
36    fn default() -> Self {
37        UnicodeClassCache {
38            word: NodeId::MISSING,
39            non_word: NodeId::MISSING,
40            digit: NodeId::MISSING,
41            non_digit: NodeId::MISSING,
42            space: NodeId::MISSING,
43            non_space: NodeId::MISSING,
44        }
45    }
46}
47
48impl UnicodeClassCache {
49    pub fn ensure_word(&mut self, b: &mut RegexBuilder) {
50        if self.word == NodeId::MISSING {
51            self.word = build_word_class(b);
52            self.non_word = neg_class(b, self.word);
53        }
54    }
55
56    pub fn ensure_digit(&mut self, b: &mut RegexBuilder) {
57        if self.digit == NodeId::MISSING {
58            self.digit = build_digit_class(b);
59            self.non_digit = neg_class(b, self.digit);
60        }
61    }
62
63    pub fn ensure_space(&mut self, b: &mut RegexBuilder) {
64        if self.space == NodeId::MISSING {
65            self.space = build_space_class(b);
66            self.non_space = neg_class(b, self.space);
67        }
68    }
69}