Skip to main content

oak_regex/builder/
mod.rs

1use crate::{
2    RegexLanguage, RegexParser, RegexSyntaxKind,
3    ast::{Alternative, Assertion, AssertionKind, CharacterClass, CharacterRange, Group, GroupKind, Literal, Pattern, PatternElement, RegexRoot, Special, SpecialKind},
4    lexer::RegexLexer,
5};
6use core::range::Range;
7use oak_core::{
8    Builder, GreenNode, Lexer, OakDiagnostics, OakError, Parser, RedNode, RedTree, SourceText,
9    builder::{BuildOutput, BuilderCache},
10    parser::ParseSession,
11    source::{Source, TextEdit},
12};
13
14impl<'config> Builder<RegexLanguage> for RegexParser<'config> {
15    fn build<'a, S: Source + ?Sized>(&self, source: &S, edits: &[TextEdit], _cache: &'a mut impl BuilderCache<RegexLanguage>) -> BuildOutput<RegexLanguage> {
16        let parser = RegexParser::new(self.config);
17        let lexer = RegexLexer::new(&self.config);
18
19        let mut session = ParseSession::<RegexLanguage>::default();
20        lexer.lex(source, edits, &mut session);
21        let parse_result = parser.parse(source, edits, &mut session);
22
23        match parse_result.result {
24            Ok(green_tree) => {
25                let source_text = SourceText::new(source.get_text_in((0..source.length()).into()).into_owned());
26                match self.build_root(&green_tree, &source_text) {
27                    Ok(ast_root) => OakDiagnostics { result: Ok(ast_root), diagnostics: parse_result.diagnostics },
28                    Err(build_error) => {
29                        let mut diagnostics = parse_result.diagnostics;
30                        diagnostics.push(build_error.clone());
31                        OakDiagnostics { result: Err(build_error), diagnostics }
32                    }
33                }
34            }
35            Err(parse_error) => OakDiagnostics { result: Err(parse_error), diagnostics: parse_result.diagnostics },
36        }
37    }
38}
39
40impl<'config> RegexParser<'config> {
41    pub(crate) fn build_root<'a>(&self, green_tree: &'a GreenNode<'a, RegexLanguage>, source: &SourceText) -> Result<RegexRoot, OakError> {
42        let red_root = RedNode::new(green_tree, 0);
43        let mut patterns = Vec::new();
44
45        for child in red_root.children() {
46            match child {
47                RedTree::Node(n) => match n.green.kind {
48                    RegexSyntaxKind::RegexPattern => {
49                        let pattern = self.build_pattern(n, source)?;
50                        patterns.push(pattern);
51                    }
52                    _ => {
53                        return Err(OakError::syntax_error("Unexpected item in root".to_string(), n.span().start, None));
54                    }
55                },
56                RedTree::Leaf(t) => {
57                    if t.kind == RegexSyntaxKind::Whitespace || t.kind == RegexSyntaxKind::Comment {
58                        continue;
59                    }
60                    return Err(OakError::syntax_error("Unexpected token in root".to_string(), t.span.start, None));
61                }
62            }
63        }
64
65        Ok(RegexRoot { alternatives: patterns })
66    }
67
68    /// Build a pattern from a node
69    pub(crate) fn build_pattern(&self, node: RedNode<RegexLanguage>, source: &SourceText) -> Result<Pattern, OakError> {
70        let span = node.span();
71        let mut elements = Vec::new();
72
73        for child in node.children() {
74            match child {
75                RedTree::Node(n) => {
76                    let element = self.build_pattern_element(n, source)?;
77                    elements.push(element);
78                }
79                RedTree::Leaf(t) => {
80                    if t.kind == RegexSyntaxKind::Pipe {
81                        continue;
82                    }
83                    if t.kind == RegexSyntaxKind::Whitespace || t.kind == RegexSyntaxKind::Comment {
84                        continue;
85                    }
86
87                    if t.kind == RegexSyntaxKind::Character {
88                        let value = text(source, t.span.clone());
89                        let lit = Literal { value, span: t.span.clone() };
90                        elements.push(PatternElement::Literal(lit));
91                    }
92                    else {
93                        return Err(OakError::syntax_error("Unexpected token in pattern".to_string(), t.span.start, None));
94                    }
95                }
96            }
97        }
98
99        Ok(Pattern { alternatives: vec![Alternative { elements, span: span.clone() }], span })
100    }
101
102    /// Build a pattern element from a node
103    pub(crate) fn build_pattern_element(&self, node: RedNode<RegexLanguage>, source: &SourceText) -> Result<PatternElement, OakError> {
104        let span = node.span();
105
106        match node.green.kind {
107            RegexSyntaxKind::Character => {
108                let value = text(source, node.span());
109                Ok(PatternElement::Literal(Literal { value, span }))
110            }
111            RegexSyntaxKind::Dot => Ok(PatternElement::Special(Special { kind: SpecialKind::Any, span })),
112            RegexSyntaxKind::LBrack => self.build_character_class(node, source),
113            RegexSyntaxKind::LParen => self.build_group(node, source),
114            RegexSyntaxKind::Question | RegexSyntaxKind::Star | RegexSyntaxKind::Plus | RegexSyntaxKind::LBrace => Err(OakError::syntax_error("Quantifier without preceding element".to_string(), span.start, None)),
115            RegexSyntaxKind::Backslash => self.build_escape_sequence(node, source),
116            RegexSyntaxKind::Hat | RegexSyntaxKind::Dollar => self.build_assertion(node, source),
117            _ => Err(OakError::syntax_error(format!("Unexpected pattern element: {:?}", node.green.kind), span.start, None)),
118        }
119    }
120
121    /// Build a character class from a node
122    fn build_character_class(&self, node: RedNode<RegexLanguage>, source: &SourceText) -> Result<PatternElement, OakError> {
123        let span = node.span();
124        let mut ranges: Vec<CharacterRange> = Vec::new();
125        let mut negated = false;
126
127        for child in node.children() {
128            match child {
129                RedTree::Leaf(t) => {
130                    match t.kind {
131                        RegexSyntaxKind::Hat => {
132                            negated = true;
133                        }
134                        RegexSyntaxKind::Character => {
135                            let value = text(source, t.span.clone());
136                            let ch = value.chars().next().unwrap();
137                            ranges.push(CharacterRange { start: ch, end: None, span: t.span.clone() });
138                        }
139                        RegexSyntaxKind::Dash => {
140                            // Range separator, ignored in this minimal implementation
141                        }
142                        _ => {
143                            // Skip other tokens
144                        }
145                    }
146                }
147                RedTree::Node(n) => {
148                    return Err(OakError::syntax_error("Unexpected node in character class".to_string(), n.span().start, None));
149                }
150            }
151        }
152
153        Ok(PatternElement::CharacterClass(CharacterClass { negated, ranges, span }))
154    }
155
156    /// Build a group from a node
157    fn build_group(&self, node: RedNode<RegexLanguage>, _source: &SourceText) -> Result<PatternElement, OakError> {
158        let span = node.span();
159        Ok(PatternElement::Group(Group { kind: GroupKind::NonCapturing, element: Box::new(PatternElement::Literal(Literal { value: String::new(), span: span.clone() })), span }))
160    }
161
162    /// Build an escape sequence from a node
163    fn build_escape_sequence(&self, node: RedNode<RegexLanguage>, source: &SourceText) -> Result<PatternElement, OakError> {
164        let span = node.span();
165        let mut escaped_char = None;
166
167        for child in node.children() {
168            match child {
169                RedTree::Leaf(t) => {
170                    if t.kind == RegexSyntaxKind::Character {
171                        let value = text(source, t.span.clone());
172                        escaped_char = value.chars().next();
173                    }
174                }
175                _ => {
176                    return Err(OakError::syntax_error("Unexpected node in escape sequence".to_string(), child.span().start, None));
177                }
178            }
179        }
180
181        if let Some(c) = escaped_char { Ok(PatternElement::Special(Special { kind: SpecialKind::Control(c), span })) } else { Err(OakError::syntax_error("Invalid escape sequence".to_string(), span.start, None)) }
182    }
183
184    /// Build an assertion from a node
185    fn build_assertion(&self, node: RedNode<RegexLanguage>, _source: &SourceText) -> Result<PatternElement, OakError> {
186        let span = node.span();
187
188        match node.green.kind {
189            RegexSyntaxKind::Hat => Ok(PatternElement::Assertion(Assertion { kind: AssertionKind::Start, span })),
190            RegexSyntaxKind::Dollar => Ok(PatternElement::Assertion(Assertion { kind: AssertionKind::End, span })),
191            _ => Err(OakError::syntax_error(format!("Unexpected assertion: {:?}", node.green.kind), span.start, None)),
192        }
193    }
194}
195
196/// Helper function to extract text from source
197fn text(source: &SourceText, span: Range<usize>) -> String {
198    source.get_text_in(span.into()).to_string()
199}