1use crate::{
2 RegexLanguage, RegexParser, RegexSyntaxKind,
3 ast::{Alternative, Assertion, AssertionKind, CharacterClass, CharacterRange, Group, GroupKind, Literal, Pattern, PatternElement, RegexRoot, Special, SpecialKind},
4 lexer::RegexLexer,
5};
6use core::range::Range;
7use oak_core::{
8 Builder, GreenNode, Lexer, OakDiagnostics, OakError, Parser, RedNode, RedTree, SourceText,
9 builder::{BuildOutput, BuilderCache},
10 parser::ParseSession,
11 source::{Source, TextEdit},
12};
13
14impl<'config> Builder<RegexLanguage> for RegexParser<'config> {
15 fn build<'a, S: Source + ?Sized>(&self, source: &S, edits: &[TextEdit], _cache: &'a mut impl BuilderCache<RegexLanguage>) -> BuildOutput<RegexLanguage> {
16 let parser = RegexParser::new(self.config);
17 let lexer = RegexLexer::new(&self.config);
18
19 let mut session = ParseSession::<RegexLanguage>::default();
20 lexer.lex(source, edits, &mut session);
21 let parse_result = parser.parse(source, edits, &mut session);
22
23 match parse_result.result {
24 Ok(green_tree) => {
25 let source_text = SourceText::new(source.get_text_in((0..source.length()).into()).into_owned());
26 match self.build_root(&green_tree, &source_text) {
27 Ok(ast_root) => OakDiagnostics { result: Ok(ast_root), diagnostics: parse_result.diagnostics },
28 Err(build_error) => {
29 let mut diagnostics = parse_result.diagnostics;
30 diagnostics.push(build_error.clone());
31 OakDiagnostics { result: Err(build_error), diagnostics }
32 }
33 }
34 }
35 Err(parse_error) => OakDiagnostics { result: Err(parse_error), diagnostics: parse_result.diagnostics },
36 }
37 }
38}
39
40impl<'config> RegexParser<'config> {
41 pub(crate) fn build_root<'a>(&self, green_tree: &'a GreenNode<'a, RegexLanguage>, source: &SourceText) -> Result<RegexRoot, OakError> {
42 let red_root = RedNode::new(green_tree, 0);
43 let mut patterns = Vec::new();
44
45 for child in red_root.children() {
46 match child {
47 RedTree::Node(n) => match n.green.kind {
48 RegexSyntaxKind::RegexPattern => {
49 let pattern = self.build_pattern(n, source)?;
50 patterns.push(pattern);
51 }
52 _ => {
53 return Err(OakError::syntax_error("Unexpected item in root".to_string(), n.span().start, None));
54 }
55 },
56 RedTree::Leaf(t) => {
57 if t.kind == RegexSyntaxKind::Whitespace || t.kind == RegexSyntaxKind::Comment {
58 continue;
59 }
60 return Err(OakError::syntax_error("Unexpected token in root".to_string(), t.span.start, None));
61 }
62 }
63 }
64
65 Ok(RegexRoot { alternatives: patterns })
66 }
67
68 pub(crate) fn build_pattern(&self, node: RedNode<RegexLanguage>, source: &SourceText) -> Result<Pattern, OakError> {
70 let span = node.span();
71 let mut elements = Vec::new();
72
73 for child in node.children() {
74 match child {
75 RedTree::Node(n) => {
76 let element = self.build_pattern_element(n, source)?;
77 elements.push(element);
78 }
79 RedTree::Leaf(t) => {
80 if t.kind == RegexSyntaxKind::Pipe {
81 continue;
82 }
83 if t.kind == RegexSyntaxKind::Whitespace || t.kind == RegexSyntaxKind::Comment {
84 continue;
85 }
86
87 if t.kind == RegexSyntaxKind::Character {
88 let value = text(source, t.span.clone());
89 let lit = Literal { value, span: t.span.clone() };
90 elements.push(PatternElement::Literal(lit));
91 }
92 else {
93 return Err(OakError::syntax_error("Unexpected token in pattern".to_string(), t.span.start, None));
94 }
95 }
96 }
97 }
98
99 Ok(Pattern { alternatives: vec![Alternative { elements, span: span.clone() }], span })
100 }
101
102 pub(crate) fn build_pattern_element(&self, node: RedNode<RegexLanguage>, source: &SourceText) -> Result<PatternElement, OakError> {
104 let span = node.span();
105
106 match node.green.kind {
107 RegexSyntaxKind::Character => {
108 let value = text(source, node.span());
109 Ok(PatternElement::Literal(Literal { value, span }))
110 }
111 RegexSyntaxKind::Dot => Ok(PatternElement::Special(Special { kind: SpecialKind::Any, span })),
112 RegexSyntaxKind::LBrack => self.build_character_class(node, source),
113 RegexSyntaxKind::LParen => self.build_group(node, source),
114 RegexSyntaxKind::Question | RegexSyntaxKind::Star | RegexSyntaxKind::Plus | RegexSyntaxKind::LBrace => Err(OakError::syntax_error("Quantifier without preceding element".to_string(), span.start, None)),
115 RegexSyntaxKind::Backslash => self.build_escape_sequence(node, source),
116 RegexSyntaxKind::Hat | RegexSyntaxKind::Dollar => self.build_assertion(node, source),
117 _ => Err(OakError::syntax_error(format!("Unexpected pattern element: {:?}", node.green.kind), span.start, None)),
118 }
119 }
120
121 fn build_character_class(&self, node: RedNode<RegexLanguage>, source: &SourceText) -> Result<PatternElement, OakError> {
123 let span = node.span();
124 let mut ranges: Vec<CharacterRange> = Vec::new();
125 let mut negated = false;
126
127 for child in node.children() {
128 match child {
129 RedTree::Leaf(t) => {
130 match t.kind {
131 RegexSyntaxKind::Hat => {
132 negated = true;
133 }
134 RegexSyntaxKind::Character => {
135 let value = text(source, t.span.clone());
136 let ch = value.chars().next().unwrap();
137 ranges.push(CharacterRange { start: ch, end: None, span: t.span.clone() });
138 }
139 RegexSyntaxKind::Dash => {
140 }
142 _ => {
143 }
145 }
146 }
147 RedTree::Node(n) => {
148 return Err(OakError::syntax_error("Unexpected node in character class".to_string(), n.span().start, None));
149 }
150 }
151 }
152
153 Ok(PatternElement::CharacterClass(CharacterClass { negated, ranges, span }))
154 }
155
156 fn build_group(&self, node: RedNode<RegexLanguage>, _source: &SourceText) -> Result<PatternElement, OakError> {
158 let span = node.span();
159 Ok(PatternElement::Group(Group { kind: GroupKind::NonCapturing, element: Box::new(PatternElement::Literal(Literal { value: String::new(), span: span.clone() })), span }))
160 }
161
162 fn build_escape_sequence(&self, node: RedNode<RegexLanguage>, source: &SourceText) -> Result<PatternElement, OakError> {
164 let span = node.span();
165 let mut escaped_char = None;
166
167 for child in node.children() {
168 match child {
169 RedTree::Leaf(t) => {
170 if t.kind == RegexSyntaxKind::Character {
171 let value = text(source, t.span.clone());
172 escaped_char = value.chars().next();
173 }
174 }
175 _ => {
176 return Err(OakError::syntax_error("Unexpected node in escape sequence".to_string(), child.span().start, None));
177 }
178 }
179 }
180
181 if let Some(c) = escaped_char { Ok(PatternElement::Special(Special { kind: SpecialKind::Control(c), span })) } else { Err(OakError::syntax_error("Invalid escape sequence".to_string(), span.start, None)) }
182 }
183
184 fn build_assertion(&self, node: RedNode<RegexLanguage>, _source: &SourceText) -> Result<PatternElement, OakError> {
186 let span = node.span();
187
188 match node.green.kind {
189 RegexSyntaxKind::Hat => Ok(PatternElement::Assertion(Assertion { kind: AssertionKind::Start, span })),
190 RegexSyntaxKind::Dollar => Ok(PatternElement::Assertion(Assertion { kind: AssertionKind::End, span })),
191 _ => Err(OakError::syntax_error(format!("Unexpected assertion: {:?}", node.green.kind), span.start, None)),
192 }
193 }
194}
195
196fn text(source: &SourceText, span: Range<usize>) -> String {
198 source.get_text_in(span.into()).to_string()
199}