1use std::{collections::HashMap, rc::Rc};
10
11use pag_lexer::{normalization::normalize, regex_tree::RegexTree};
12
13use crate::frontend::unicode;
14use crate::span_errors;
15use crate::{utilities::unreachable_branch, utilities::Symbol};
16
17use super::{SurfaceSyntaxTree, WithSpan};
18
19use super::Error;
20
21type SpanRegexTree<'a> = WithSpan<'a, Rc<RegexTree>>;
22
23pub struct LexerRule<'a> {
24 pub active: bool,
25 pub rule: SpanRegexTree<'a>,
26}
27
28pub struct LexerDatabase<'a> {
29 pub symbol_table: HashMap<&'a str, Symbol<'a>>,
30 pub entries: HashMap<Symbol<'a>, LexerRule<'a>>,
31 pub skip: Option<Symbol<'a>>,
32}
33
34impl<'a> LexerDatabase<'a> {
35 pub fn new(
36 sst: &WithSpan<'a, SurfaceSyntaxTree<'a>>,
37 ) -> Result<Self, Vec<WithSpan<'a, Error<'a>>>> {
38 TranslationContext::create_database(sst)
39 }
40 pub fn nullability_check(&self) -> Vec<WithSpan<'a, Error<'a>>> {
41 let mut errors = Vec::new();
42 for (sym, rule) in self.entries.iter() {
43 if rule.rule.node.is_nullable() {
44 errors.push(WithSpan {
45 span: rule.rule.span,
46 node: Error::NullableToken(sym.name()),
47 });
48 }
49 }
50 errors
51 }
52}
53
54struct TranslationContext<'a> {
55 definitions: HashMap<&'a str, SpanRegexTree<'a>>,
56 database: LexerDatabase<'a>,
57}
58
59fn construct_regex_tree<'a, F>(
60 sst: &WithSpan<'a, SurfaceSyntaxTree<'a>>,
61 reference_handler: &F,
62) -> Result<Rc<RegexTree>, Vec<WithSpan<'a, Error<'a>>>>
63where
64 F: Fn(WithSpan<'a, ()>) -> Result<Rc<RegexTree>, Vec<WithSpan<'a, Error<'a>>>>,
65{
66 match &sst.node {
67 SurfaceSyntaxTree::LexicalAlternative { lhs, rhs } => {
68 let lhs = construct_regex_tree(lhs, reference_handler);
69 let rhs = construct_regex_tree(rhs, reference_handler);
70 match (lhs, rhs) {
71 (Err(mut lhs), Err(rhs)) => {
72 lhs.extend(rhs.into_iter());
73 Err(lhs)
74 }
75 (Err(lhs), _) => Err(lhs),
76 (_, Err(rhs)) => Err(rhs),
77 (Ok(lhs), Ok(rhs)) => Ok(Rc::new(RegexTree::Union(lhs, rhs))),
78 }
79 }
80 SurfaceSyntaxTree::LexicalSequence { lhs, rhs } => {
81 let lhs = construct_regex_tree(lhs, reference_handler);
82 let rhs = construct_regex_tree(rhs, reference_handler);
83 match (lhs, rhs) {
84 (Err(mut lhs), Err(rhs)) => {
85 lhs.extend(rhs.into_iter());
86 Err(lhs)
87 }
88 (Err(lhs), _) => Err(lhs),
89 (_, Err(rhs)) => Err(rhs),
90 (Ok(lhs), Ok(rhs)) => Ok(Rc::new(RegexTree::Concat(lhs, rhs))),
91 }
92 }
93 SurfaceSyntaxTree::LexicalStar { inner } => {
94 let inner = construct_regex_tree(inner, reference_handler)?;
95 Ok(Rc::new(RegexTree::KleeneClosure(inner)))
96 }
97 SurfaceSyntaxTree::LexicalPlus { inner } => {
98 let inner = construct_regex_tree(inner, reference_handler)?;
99 Ok(Rc::new(RegexTree::Concat(
100 inner.clone(),
101 Rc::new(RegexTree::KleeneClosure(inner)),
102 )))
103 }
104 SurfaceSyntaxTree::LexicalOptional { inner } => {
105 let inner = construct_regex_tree(inner, reference_handler)?;
106 Ok(Rc::new(RegexTree::Union(
107 inner,
108 Rc::new(RegexTree::Epsilon),
109 )))
110 }
111 SurfaceSyntaxTree::LexicalNot { inner } => {
112 let inner = construct_regex_tree(inner, reference_handler)?;
113 Ok(Rc::new(RegexTree::Complement(inner)))
114 }
115 SurfaceSyntaxTree::Range { start, end } => Ok(unicode::encode_range(*start, *end)),
116 SurfaceSyntaxTree::String(x) => {
117 if x.is_empty() {
118 Ok(Rc::new(RegexTree::Epsilon))
119 } else {
120 let mut iter = x.bytes();
121 let fst = Rc::new(RegexTree::single(unsafe { iter.next().unwrap_unchecked() }));
122 Ok(iter.fold(fst, |acc, c| {
123 Rc::new(RegexTree::Concat(acc, Rc::new(RegexTree::single(c))))
124 }))
125 }
126 }
127 SurfaceSyntaxTree::Bottom => Ok(Rc::new(RegexTree::Bottom)),
128 SurfaceSyntaxTree::Empty => Ok(Rc::new(RegexTree::Epsilon)),
129 SurfaceSyntaxTree::Char { value } => Ok(unicode::encode_char(value.node)),
130 SurfaceSyntaxTree::LexicalRuleRef { name } => reference_handler(name.clone()),
131 _ => unreachable_branch!(
132 "lexer translation is called with unsupported code: {}",
133 sst.span.as_str()
134 ),
135 }
136}
137
138fn construct_definition<'a>(
139 sst: &WithSpan<'a, SurfaceSyntaxTree<'a>>,
140) -> Result<(&'a str, SpanRegexTree<'a>), Vec<WithSpan<'a, Error<'a>>>> {
141 fn reference_handler(
142 name: WithSpan<'_, ()>,
143 ) -> Result<Rc<RegexTree>, Vec<WithSpan<'_, Error<'_>>>> {
144 Err(span_errors!(
145 InvalidLexicalReference,
146 name.span,
147 name.span.as_str(),
148 ))
149 }
150 match &sst.node {
151 SurfaceSyntaxTree::LexicalDefinition { name, expr } => {
152 let name = name.span.as_str();
153 let tree = construct_regex_tree(expr, &reference_handler)?;
154 Ok((
155 name,
156 WithSpan {
157 span: expr.span,
158 node: normalize(tree),
159 },
160 ))
161 }
162 _ => unreachable_branch!(
163 "lexer translation can only be called with lexical definition: {}",
164 sst.span.as_str()
165 ),
166 }
167}
168
169fn construct_lexical_rule<'a>(
170 definitions: &HashMap<&'a str, SpanRegexTree<'a>>,
171 sst: &WithSpan<'a, SurfaceSyntaxTree<'a>>,
172) -> Result<(Symbol<'a>, SpanRegexTree<'a>), Vec<WithSpan<'a, Error<'a>>>> {
173 let reference_handler = |name: WithSpan<'a, ()>| match definitions.get(name.span.as_str()) {
174 Some(x) => Ok(x.node.clone()),
175 _ => Err(span_errors!(
176 UndefinedLexicalReference,
177 name.span,
178 name.span.as_str(),
179 )),
180 };
181 match &sst.node {
182 SurfaceSyntaxTree::LexicalToken { name, expr, .. } => {
183 let name = name.span.as_str();
184 let tree = construct_regex_tree(expr, &reference_handler)?;
185 Ok((
186 Symbol::new(name),
187 WithSpan {
188 span: expr.span,
189 node: normalize(tree),
190 },
191 ))
192 }
193 _ => unreachable_branch!(
194 "lexer rule translation can only be called with lexical definition: {}",
195 sst.span.as_str()
196 ),
197 }
198}
199
200impl<'a> TranslationContext<'a> {
201 fn new() -> Self {
202 TranslationContext {
203 definitions: HashMap::new(),
204 database: LexerDatabase {
205 entries: HashMap::new(),
206 symbol_table: HashMap::new(),
207 skip: None,
208 },
209 }
210 }
211 fn populate_definitions(
212 &mut self,
213 sst: &WithSpan<'a, SurfaceSyntaxTree<'a>>,
214 ) -> Result<(), Vec<WithSpan<'a, Error<'a>>>> {
215 match &sst.node {
216 SurfaceSyntaxTree::Lexer { rules } => {
217 let mut error = Vec::new();
218 for i in rules
219 .iter()
220 .filter(|x| matches!(x.node, SurfaceSyntaxTree::LexicalDefinition { .. }))
221 .map(construct_definition)
222 {
223 match i {
224 Err(e) => {
225 error.extend(e.into_iter());
226 }
227 Ok((k, v)) => {
228 let current_span = v.span;
229 match self.definitions.insert(k, v) {
230 None => (),
231 Some(x) => {
232 error.push(WithSpan {
233 span: current_span,
234 node: Error::MultipleDefinition(k, x.span),
235 });
236 }
237 }
238 }
239 }
240 }
241
242 if error.is_empty() {
243 Ok(())
244 } else {
245 Err(error)
246 }
247 }
248 _ => Err(span_errors!(
249 InternalLogicalError,
250 sst.span,
251 "populating definitions from a non-lexer node".into(),
252 )),
253 }
254 }
255 fn create_database(
256 sst: &WithSpan<'a, SurfaceSyntaxTree<'a>>,
257 ) -> Result<LexerDatabase<'a>, Vec<WithSpan<'a, Error<'a>>>> {
258 let mut ctx = Self::new();
259 let mut errs = match ctx.populate_definitions(sst) {
260 Ok(_) => Vec::new(),
261 Err(errs) => errs,
262 };
263 match &sst.node {
264 SurfaceSyntaxTree::Lexer { rules } => {
265 for i in rules
266 .iter()
267 .filter_map(|x| match &x.node {
268 SurfaceSyntaxTree::LexicalToken { active, .. } => Some((*active, x)),
269 _ => None,
270 })
271 .map(|(active, sst)| (active, construct_lexical_rule(&ctx.definitions, sst)))
272 {
273 match i.1 {
274 Err(e) => {
275 errs.extend(e.into_iter());
276 }
277 Ok((k, rule)) => {
278 if !i.0 {
279 if let Some(x) = ctx.database.skip.replace(k) {
280 errs.push(WithSpan {
281 span: rule.span,
282 node: Error::MultipleSkippingRule(x.name()),
283 });
284 }
285 }
286 let current_span = rule.span;
287 match ctx.database.symbol_table.insert(k.name(), k) {
288 None => {
289 ctx.database
290 .entries
291 .insert(k, LexerRule { active: i.0, rule });
292 }
293 Some(x) => {
294 errs.push(WithSpan {
295 span: current_span,
296 node: Error::MultipleDefinition(k.name(), unsafe {
297 ctx.database
298 .entries
299 .get(&x)
300 .unwrap_unchecked()
301 .rule
302 .span
303 }),
304 });
305 }
306 }
307 }
308 }
309 }
310
311 if errs.is_empty() {
312 Ok(ctx.database)
313 } else {
314 Err(errs)
315 }
316 }
317 _ => Err(span_errors!(
318 InternalLogicalError,
319 sst.span,
320 "creating lexer rule database from a non-lexer node".into(),
321 )),
322 }
323 }
324}