1use std::collections::{HashMap, HashSet};
7
8use crate::grammar::{GrammarRule, GrammarRuleParser, Precedence};
9use crate::state::ParserState;
10use builder_pattern::Builder;
11use sipha_core::traits::{GrammarContext, NodeId, RuleId, TokenKind};
12use sipha_error::ParseError;
13use sipha_tree::{CstKind, NodeChildren};
14
15#[derive(Builder)]
50pub struct Parser<K: TokenKind, R: RuleId> {
51 #[default(HashMap::new())]
53 grammar_rules: HashMap<R, GrammarRule<K, R>>,
54 #[default(HashSet::new())]
56 sync_tokens: HashSet<K>,
57 #[default(10)]
59 max_recovery_depth: usize,
60}
61
62impl<K: TokenKind, R: RuleId> Parser<K, R> {
63 pub fn create() -> Self {
67 Self::default()
68 }
69
70 pub fn set_max_recovery_depth(&mut self, depth: usize) {
72 self.max_recovery_depth = depth;
73 }
74
75 pub fn register_rule(&mut self, rule_id: R, rule: GrammarRule<K, R>) {
80 if let Err(err) = crate::grammar::validate_rule(&rule) {
81 panic!("Invalid grammar rule registered: {:?}", err);
82 }
83 self.grammar_rules.insert(rule_id, rule);
84 }
85
86 pub fn set_sync_tokens(&mut self, tokens: Vec<K>) {
88 self.sync_tokens = tokens.into_iter().collect();
89 }
90
91 #[must_use = "parse results should be checked for errors"]
93 pub fn parse_rule<'tokens, 'arena, N, Ctx>(
94 &mut self,
95 rule_id: R,
96 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
97 ) -> Result<N, ParseError<K, R>>
98 where
99 N: NodeId,
100 Ctx: GrammarContext,
101 {
102 let start_pos = state.position();
103 if let Some(memo_entry) = state.check_memo(rule_id) {
104 match memo_entry {
105 sipha_memo::MemoEntry::Success { node, consumed } => {
106 let node = *node;
107 let consumed = *consumed;
108 state.reset(start_pos + consumed);
109 return Ok(node);
110 }
111 sipha_memo::MemoEntry::Failure { error, consumed } => {
112 let error = error.clone();
113 let consumed = *consumed;
114 state.reset(start_pos + consumed);
115 return Err(error);
116 }
117 }
118 }
119
120 state.push_rule(rule_id);
121 let result = {
122 let rule = self.grammar_rules.get(&rule_id).cloned().ok_or_else(|| {
123 ParseError::InvalidContext {
124 rule: "unknown rule",
125 span: state.current_span(),
126 rule_context: state.current_rule(),
127 context_stack: state.build_context_stack(),
128 }
129 })?;
130 self.parse_grammar_rule(rule_id, &rule, state)
131 };
132
133 let consumed = state.position() - start_pos;
134 match &result {
135 Ok(node) => state.store_memo_success(rule_id, start_pos, *node, consumed),
136 Err(error) => state.store_memo_failure(rule_id, start_pos, error.clone(), consumed),
137 }
138
139 state.pop_rule();
140 result
141 }
142
143 fn try_recover<'tokens, 'arena, N, Ctx>(
145 &self,
146 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
147 recovery_depth: usize,
148 ) -> bool
149 where
150 N: NodeId,
151 Ctx: GrammarContext,
152 {
153 if recovery_depth >= self.max_recovery_depth {
154 return false;
155 }
156
157 let start_pos = state.position();
158
159 if !self.sync_tokens.is_empty() {
160 let mut offset = 0;
161 let max_scan = 100;
162
163 while offset < max_scan {
164 if let Some(token) = state.peek(offset) {
165 if self.sync_tokens.contains(&token.kind) {
166 state.reset(start_pos + offset);
167 return true;
168 }
169 offset += 1;
170 } else {
171 break;
172 }
173 }
174 }
175
176 let mut offset = 1;
177 let max_skip = 10;
178
179 while offset < max_skip {
180 if let Some(token) = state.peek(offset) {
181 if !token.kind.is_trivia() {
182 state.reset(start_pos + offset);
183 return true;
184 }
185 offset += 1;
186 } else {
187 break;
188 }
189 }
190
191 false
192 }
193
194 fn parse_sequence<'tokens, 'arena, N, Ctx>(
195 &mut self,
196 rule_id: R,
197 rules: &[GrammarRule<K, R>],
198 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
199 ) -> Result<N, ParseError<K, R>>
200 where
201 N: NodeId,
202 Ctx: GrammarContext,
203 {
204 self.parse_sequence_with_recovery(rule_id, rules, state, 0)
205 }
206
207 fn parse_sequence_with_recovery<'tokens, 'arena, N, Ctx>(
208 &mut self,
209 rule_id: R,
210 rules: &[GrammarRule<K, R>],
211 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
212 recovery_depth: usize,
213 ) -> Result<N, ParseError<K, R>>
214 where
215 N: NodeId,
216 Ctx: GrammarContext,
217 {
218 let start_pos = state.position();
219 let mut builder = state.start_node(rule_id);
220 for rule in rules {
221 match self.parse_grammar_rule(rule_id, rule, state) {
222 Ok(child) => {
223 builder.node(child);
224 }
225 Err(err) => {
226 let error_span = state.current_span();
227 if self.try_recover(state, recovery_depth) {
228 let error_node = state.alloc_node(
229 CstKind::Error("parse error"),
230 error_span,
231 NodeChildren::new(),
232 );
233 builder.node(error_node);
234 } else {
235 state.reset(start_pos);
236 return Err(err);
237 }
238 }
239 }
240 }
241 Ok(builder.finish(state))
242 }
243
244 fn parse_choice<'tokens, 'arena, N, Ctx>(
245 &mut self,
246 rule_id: R,
247 rules: &[GrammarRule<K, R>],
248 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
249 ) -> Result<N, ParseError<K, R>>
250 where
251 N: NodeId,
252 Ctx: GrammarContext,
253 {
254 if rules.is_empty() {
255 return Err(ParseError::InvalidContext {
256 rule: "empty choice",
257 span: state.current_span(),
258 rule_context: state.current_rule(),
259 context_stack: state.build_context_stack(),
260 });
261 }
262
263 let start = state.position();
264 let mut errors: Vec<ParseError<K, R>> = Vec::new();
265
266 for rule in rules {
267 if let GrammarRule::Token(expected_kind) = rule {
268 if state.at(*expected_kind) {
269 match self.parse_grammar_rule(rule_id, rule, state) {
270 Ok(node) => return Ok(node),
271 Err(err) => {
272 errors.push(err);
273 state.reset(start);
274 }
275 }
276 continue;
277 } else {
278 errors.push(ParseError::Expected {
279 expected: sipha_error::Expected::Single(*expected_kind),
280 found: state.peek(0).map(|t| t.kind),
281 span: state.current_span(),
282 rule_context: state.current_rule(),
283 context_stack: state.build_context_stack(),
284 });
285 continue;
286 }
287 }
288
289 match self.parse_grammar_rule(rule_id, rule, state) {
290 Ok(node) => return Ok(node),
291 Err(err) => {
292 errors.push(err);
293 state.reset(start);
294 }
295 }
296 }
297
298 if errors.is_empty() {
299 Err(ParseError::UnexpectedEof {
300 span: state.current_span(),
301 rule_context: state.current_rule(),
302 context_stack: state.build_context_stack(),
303 })
304 } else if errors.len() == 1 {
305 Err(errors.into_iter().next().unwrap())
306 } else {
307 Err(ParseError::MultipleAlternatives {
308 alternatives: errors,
309 span: state.current_span(),
310 rule_context: state.current_rule(),
311 context_stack: state.build_context_stack(),
312 })
313 }
314 }
315
316 fn parse_optional<'tokens, 'arena, N, Ctx>(
317 &mut self,
318 rule_id: R,
319 rule: &GrammarRule<K, R>,
320 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
321 ) -> Result<N, ParseError<K, R>>
322 where
323 N: NodeId,
324 Ctx: GrammarContext,
325 {
326 let start = state.position();
327 match self.parse_grammar_rule(rule_id, rule, state) {
328 Ok(node) => Ok(node),
329 Err(_) => {
330 state.reset(start);
331 Ok(state.alloc_node(
332 CstKind::Rule(rule_id),
333 state.current_span(),
334 NodeChildren::new(),
335 ))
336 }
337 }
338 }
339
340 fn parse_zero_or_more<'tokens, 'arena, N, Ctx>(
341 &mut self,
342 rule_id: R,
343 rule: &GrammarRule<K, R>,
344 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
345 ) -> Result<N, ParseError<K, R>>
346 where
347 N: NodeId,
348 Ctx: GrammarContext,
349 {
350 let mut builder = state.start_node(rule_id);
351 while let Ok(child) = self.parse_grammar_rule(rule_id, rule, state) {
352 builder.node(child);
353 }
354 Ok(builder.finish(state))
355 }
356
357 fn parse_one_or_more<'tokens, 'arena, N, Ctx>(
358 &mut self,
359 rule_id: R,
360 rule: &GrammarRule<K, R>,
361 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
362 ) -> Result<N, ParseError<K, R>>
363 where
364 N: NodeId,
365 Ctx: GrammarContext,
366 {
367 let mut builder = state.start_node(rule_id);
368 let first = self.parse_grammar_rule(rule_id, rule, state)?;
369 builder.node(first);
370 while let Ok(child) = self.parse_grammar_rule(rule_id, rule, state) {
371 builder.node(child);
372 }
373 Ok(builder.finish(state))
374 }
375
376 fn parse_range<'tokens, 'arena, N, Ctx>(
377 &mut self,
378 rule_id: R,
379 rule: &GrammarRule<K, R>,
380 min: usize,
381 max: usize,
382 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
383 ) -> Result<N, ParseError<K, R>>
384 where
385 N: NodeId,
386 Ctx: GrammarContext,
387 {
388 if min > max {
389 return Err(ParseError::InvalidContext {
390 rule: "range min > max",
391 span: state.current_span(),
392 rule_context: state.current_rule(),
393 context_stack: state.build_context_stack(),
394 });
395 }
396 let mut builder = state.start_node(rule_id);
397 let mut count = 0;
398
399 while count < max {
400 match self.parse_grammar_rule(rule_id, rule, state) {
401 Ok(node) => {
402 builder.node(node);
403 count += 1;
404 }
405 Err(err) => {
406 if count < min {
407 return Err(err);
408 }
409 break;
410 }
411 }
412 }
413
414 if count < min {
415 return Err(ParseError::RangeError {
416 min,
417 max,
418 found: count,
419 span: state.current_span(),
420 rule_context: state.current_rule(),
421 context_stack: state.build_context_stack(),
422 });
423 }
424
425 Ok(builder.finish(state))
426 }
427
428 fn parse_pratt_expr<'tokens, 'arena, N, Ctx>(
429 &mut self,
430 _rule_id: R,
431 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
432 _min_prec: Precedence,
433 ) -> Result<N, ParseError<K, R>>
434 where
435 N: NodeId,
436 Ctx: GrammarContext,
437 {
438 Err(ParseError::InvalidContext {
440 rule: "Pratt parsing not yet integrated",
441 span: state.current_span(),
442 rule_context: state.current_rule(),
443 context_stack: state.build_context_stack(),
444 })
445 }
446
447 fn parse_negative_lookahead<'tokens, 'arena, N, Ctx>(
448 &mut self,
449 rule_id: R,
450 rule: &GrammarRule<K, R>,
451 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
452 ) -> Result<N, ParseError<K, R>>
453 where
454 N: NodeId,
455 Ctx: GrammarContext,
456 {
457 let start = state.position();
458 match self.parse_grammar_rule(rule_id, rule, state) {
459 Ok(_) => {
460 state.reset(start);
461 let token_kind =
462 state
463 .peek(0)
464 .map(|t| t.kind)
465 .ok_or_else(|| ParseError::UnexpectedEof {
466 span: state.current_span(),
467 rule_context: state.current_rule(),
468 context_stack: state.build_context_stack(),
469 })?;
470 Err(ParseError::UnexpectedToken {
471 kind: token_kind,
472 span: state.current_span(),
473 rule_context: state.current_rule(),
474 context_stack: state.build_context_stack(),
475 })
476 }
477 Err(_) => {
478 state.reset(start);
479 let builder = state.start_node(rule_id);
480 Ok(builder.finish(state))
481 }
482 }
483 }
484
485 fn parse_positive_lookahead<'tokens, 'arena, N, Ctx>(
486 &mut self,
487 rule_id: R,
488 rule: &GrammarRule<K, R>,
489 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
490 ) -> Result<N, ParseError<K, R>>
491 where
492 N: NodeId,
493 Ctx: GrammarContext,
494 {
495 let start = state.position();
496 match self.parse_grammar_rule(rule_id, rule, state) {
497 Ok(_) => {
498 state.reset(start);
499 let builder = state.start_node(rule_id);
500 Ok(builder.finish(state))
501 }
502 Err(err) => {
503 state.reset(start);
504 Err(err)
505 }
506 }
507 }
508}
509
510impl<K: TokenKind, R: RuleId> GrammarRuleParser<K, R> for Parser<K, R> {
511 fn parse_grammar_rule<'tokens, 'arena, N, Ctx>(
512 &mut self,
513 rule_id: R,
514 rule: &GrammarRule<K, R>,
515 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
516 ) -> Result<N, ParseError<K, R>>
517 where
518 N: NodeId,
519 Ctx: GrammarContext,
520 {
521 match rule {
522 GrammarRule::Sequence(rules) => self.parse_sequence(rule_id, rules, state),
523 GrammarRule::Choice(rules) => self.parse_choice(rule_id, rules, state),
524 GrammarRule::Optional(inner) => self.parse_optional(rule_id, inner, state),
525 GrammarRule::ZeroOrMore(inner) => self.parse_zero_or_more(rule_id, inner, state),
526 GrammarRule::OneOrMore(inner) => self.parse_one_or_more(rule_id, inner, state),
527 GrammarRule::Range {
528 rule: inner,
529 min,
530 max,
531 } => self.parse_range(rule_id, inner, *min, *max, state),
532 GrammarRule::PrattExpr(min_prec) => self.parse_pratt_expr(rule_id, state, *min_prec),
533 GrammarRule::Token(kind) => state.expect_node(*kind),
534 GrammarRule::Rule(id) => self.parse_rule(*id, state),
535 GrammarRule::Conditional {
536 rule: inner,
537 condition,
538 } => {
539 if condition(state.position()) {
540 self.parse_grammar_rule(rule_id, inner, state)
541 } else {
542 Err(ParseError::InvalidContext {
543 rule: "conditional",
544 span: state.current_span(),
545 rule_context: state.current_rule(),
546 context_stack: state.build_context_stack(),
547 })
548 }
549 }
550 GrammarRule::NegativeLookahead(rule) => {
551 self.parse_negative_lookahead(rule_id, rule, state)
552 }
553 GrammarRule::PositiveLookahead(rule) => {
554 self.parse_positive_lookahead(rule_id, rule, state)
555 }
556 }
557 }
558}
559
560impl<K: TokenKind, R: RuleId> Default for Parser<K, R> {
561 fn default() -> Self {
562 Parser::new().build()
563 }
564}