Skip to main content

resharp_algebra/unicode_classes/
mod.rs

1mod classes;
2
3use crate::{NodeId, RegexBuilder};
4
5pub use classes::{
6    build_digit_class, build_digit_class_full, build_space_class, build_word_class,
7    build_word_class_full,
8};
9
10pub(crate) fn utf8_char(b: &mut RegexBuilder) -> NodeId {
11    let ascii = b.mk_range_u8(0, 127);
12    let cont = b.mk_range_u8(0x80, 0xBF);
13    let c2 = b.mk_range_u8(0xC0, 0xDF);
14    let c2s = b.mk_concat(c2, cont);
15    let e0 = b.mk_range_u8(0xE0, 0xEF);
16    let e0s = b.mk_concats([e0, cont, cont].into_iter());
17    let f0 = b.mk_range_u8(0xF0, 0xF7);
18    let f0s = b.mk_concats([f0, cont, cont, cont].into_iter());
19    b.mk_unions([ascii, c2s, e0s, f0s].into_iter())
20}
21
22fn neg_class(b: &mut RegexBuilder, positive: NodeId) -> NodeId {
23    let neg = b.mk_compl(positive);
24    let uc = utf8_char(b);
25    b.mk_inters([neg, uc].into_iter())
26}
27
28#[derive(Clone, Debug)]
29pub struct UnicodeClassCache {
30    pub word: NodeId,
31    pub non_word: NodeId,
32    pub digit: NodeId,
33    pub non_digit: NodeId,
34    pub space: NodeId,
35    pub non_space: NodeId,
36}
37
38impl Default for UnicodeClassCache {
39    fn default() -> Self {
40        UnicodeClassCache {
41            word: NodeId::MISSING,
42            non_word: NodeId::MISSING,
43            digit: NodeId::MISSING,
44            non_digit: NodeId::MISSING,
45            space: NodeId::MISSING,
46            non_space: NodeId::MISSING,
47        }
48    }
49}
50
51impl UnicodeClassCache {
52    pub fn ensure_word(&mut self, b: &mut RegexBuilder) {
53        if self.word == NodeId::MISSING {
54            self.word = build_word_class(b);
55            self.non_word = neg_class(b, self.word);
56        }
57    }
58
59    pub fn ensure_word_full(&mut self, b: &mut RegexBuilder) {
60        if self.word == NodeId::MISSING {
61            self.word = build_word_class_full(b);
62            self.non_word = neg_class(b, self.word);
63        }
64    }
65
66    pub fn ensure_digit(&mut self, b: &mut RegexBuilder) {
67        if self.digit == NodeId::MISSING {
68            self.digit = build_digit_class(b);
69            self.non_digit = neg_class(b, self.digit);
70        }
71    }
72
73    pub fn ensure_digit_full(&mut self, b: &mut RegexBuilder) {
74        if self.digit == NodeId::MISSING {
75            self.digit = build_digit_class_full(b);
76            self.non_digit = neg_class(b, self.digit);
77        }
78    }
79
80    pub fn ensure_space(&mut self, b: &mut RegexBuilder) {
81        if self.space == NodeId::MISSING {
82            self.space = build_space_class(b);
83            self.non_space = neg_class(b, self.space);
84        }
85    }
86}