1use crate::{
2 Language,
3 errors::OakError,
4 language::TokenType,
5 lexer::{LexOutput, Token, Tokens},
6 memory::arena::SyntaxArena,
7 source::Source,
8 tree::{Cursor, GreenLeaf, GreenNode, GreenTree, TokenProvenance},
9};
10use core::range::Range;
11use triomphe::Arc;
12
13pub fn deep_clone_node<'a, L: Language>(node: &GreenNode<'_, L>, arena: &'a SyntaxArena) -> &'a GreenNode<'a, L> {
18 let children_iter = node.children.iter().map(|child| match child {
19 GreenTree::Node(n) => GreenTree::Node(deep_clone_node(n, arena)),
20 GreenTree::Leaf(l) => GreenTree::Leaf(*l),
21 });
22
23 let slice = arena.alloc_slice_fill_iter(node.children.len(), children_iter);
24 let new_node = GreenNode::new(node.kind, slice);
25 arena.alloc(new_node)
26}
27
28pub struct TokenSource<L: Language> {
30 tokens: Tokens<L>,
31 index: usize,
32}
33
34impl<L: Language> Clone for TokenSource<L> {
35 fn clone(&self) -> Self {
36 Self { tokens: self.tokens.clone(), index: self.index }
37 }
38}
39
40impl<L: Language> TokenSource<L> {
41 pub fn new(tokens: Tokens<L>) -> Self {
43 Self { tokens, index: 0 }
44 }
45
46 #[inline]
48 pub fn current(&self) -> Option<&Token<L::TokenType>> {
49 self.tokens.get(self.index)
50 }
51
52 #[inline]
54 pub fn peek_at(&self, offset: usize) -> Option<&Token<L::TokenType>> {
55 self.tokens.get(self.index + offset)
56 }
57
58 #[inline]
60 pub fn advance(&mut self) {
61 self.index += 1;
62 }
63
64 #[inline]
66 pub fn is_end(&self) -> bool {
67 self.index >= self.tokens.len()
68 }
69
70 #[inline]
72 pub fn index(&self) -> usize {
73 self.index
74 }
75
76 #[inline]
78 pub fn set_index(&mut self, index: usize) {
79 self.index = index;
80 }
81}
82
83pub struct TreeSink<'a, L: Language> {
85 arena: &'a SyntaxArena,
86 children: Vec<GreenTree<'a, L>>,
87}
88
89impl<'a, L: Language> TreeSink<'a, L> {
90 pub fn new(arena: &'a SyntaxArena, capacity_hint: usize) -> Self {
92 Self { arena, children: Vec::with_capacity(capacity_hint) }
93 }
94
95 pub fn push_leaf(&mut self, kind: L::TokenType, len: usize) {
97 self.children.push(GreenTree::Leaf(GreenLeaf::new(kind, len as u32)));
98 }
99
100 pub fn push_leaf_with_metadata(&mut self, kind: L::TokenType, len: usize, provenance: TokenProvenance) {
102 let index = self.arena.add_metadata(provenance);
103 self.children.push(GreenTree::Leaf(GreenLeaf::with_metadata(kind, len as u32, Some(index))));
104 }
105
106 pub fn push_node(&mut self, node: &'a GreenNode<'a, L>) {
108 self.children.push(GreenTree::Node(node));
109 }
110
111 pub fn checkpoint(&self) -> usize {
113 self.children.len()
114 }
115
116 pub fn arena(&self) -> &'a SyntaxArena {
118 self.arena
119 }
120
121 pub fn restore(&mut self, checkpoint: usize) {
123 self.children.truncate(checkpoint)
124 }
125
126 pub fn finish_node(&mut self, checkpoint: usize, kind: L::ElementType) -> &'a GreenNode<'a, L> {
128 let children_slice = self.arena.alloc_slice_copy(&self.children[checkpoint..]);
129 self.children.truncate(checkpoint);
130 let node = GreenNode::new(kind, children_slice);
131 let node_ref = self.arena.alloc(node);
132 self.children.push(GreenTree::Node(node_ref));
133 node_ref
134 }
135}
136
137pub struct IncrementalContext<'a, L: Language> {
139 cursor: Cursor<'a, L>,
140 edits: Vec<(Range<usize>, isize)>,
143}
144
145impl<'a, L: Language> IncrementalContext<'a, L> {
146 pub fn new(old_root: &'a GreenNode<'a, L>, edits: &[crate::source::TextEdit]) -> Self {
148 let mut sorted_edits: Vec<_> = edits.iter().collect();
149 sorted_edits.sort_by_key(|e| e.span.start);
150
151 let mut cumulative_delta = 0isize;
152 let mut processed_edits = Vec::with_capacity(edits.len());
153
154 for edit in sorted_edits {
155 let old_len = edit.span.end - edit.span.start;
156 let new_len = edit.text.len();
157 cumulative_delta += new_len as isize - old_len as isize;
158 processed_edits.push((edit.span, cumulative_delta));
159 }
160
161 Self { cursor: Cursor::new(old_root), edits: processed_edits }
162 }
163
164 fn map_new_to_old(&self, new_pos: usize) -> Option<usize> {
165 let mut current_delta = 0isize;
166
167 for (old_range, cumulative_delta) in &self.edits {
168 let new_start = (old_range.start as isize + current_delta) as usize;
169 let edit_delta = *cumulative_delta - current_delta;
170 let new_end = (old_range.end as isize + current_delta + edit_delta) as usize;
171
172 if new_pos < new_start {
173 return Some((new_pos as isize - current_delta) as usize);
174 }
175 else if new_pos < new_end {
176 return None;
178 }
179
180 current_delta = *cumulative_delta;
181 }
182
183 Some((new_pos as isize - current_delta) as usize)
184 }
185
186 fn is_dirty(&self, old_start: usize, old_end: usize) -> bool {
187 for (old_range, _) in &self.edits {
188 if old_start < old_range.end && old_end > old_range.start {
190 return true;
191 }
192 }
193 false
194 }
195}
196
197pub struct ParserState<'a, L: Language, S: Source + ?Sized = crate::source::SourceText> {
199 pub tokens: TokenSource<L>,
201
202 pub sink: TreeSink<'a, L>,
204
205 pub incremental: Option<IncrementalContext<'a, L>>,
207
208 pub errors: Vec<OakError>,
210 pub source: &'a S,
212}
213
214impl<'a, L: Language, S: Source + ?Sized> ParserState<'a, L, S> {
215 pub fn source_id(&self) -> Option<crate::source::SourceId> {
217 self.source.source_id()
218 }
219
220 pub fn arena(&self) -> &'a SyntaxArena {
222 self.sink.arena()
223 }
224
225 pub fn new(arena: &'a SyntaxArena, lex_output: LexOutput<L>, source: &'a S, capacity_hint: usize) -> Self {
233 let (tokens, mut errors) = match lex_output.result {
234 Ok(tokens) => (tokens, Vec::new()),
235 Err(e) => (Arc::from(Vec::new()), vec![e]),
236 };
237 errors.extend(lex_output.diagnostics);
238
239 let mut st = Self { tokens: TokenSource::new(tokens), sink: TreeSink::new(arena, capacity_hint), incremental: None, errors, source };
240 st.skip_trivia();
241 st
242 }
243
244 pub fn nested(&self) -> ParserState<'a, L, S> {
247 ParserState { tokens: self.tokens.clone(), sink: TreeSink::new(self.sink.arena, 1024), incremental: None, errors: Vec::new(), source: self.source }
248 }
249
250 pub fn peek_text(&self) -> Option<std::borrow::Cow<'_, str>> {
252 self.current().map(|t| self.source.get_text_in(t.span))
253 }
254
255 pub fn promote(&mut self, node: &'a GreenNode<'a, L>) -> &'a GreenNode<'a, L> {
258 deep_clone_node(node, self.sink.arena)
259 }
260
261 pub fn set_incremental(&mut self, old: &'a GreenNode<'a, L>, edits: &[crate::source::TextEdit]) {
267 self.incremental = Some(IncrementalContext::new(old, edits));
268 }
269
270 pub fn current_offset(&self) -> usize {
274 self.tokens.current().map(|t| t.span.start).unwrap_or_else(|| self.source.length())
275 }
276
277 pub fn syntax_error(&mut self, message: impl Into<String>) -> OakError {
279 let err = OakError::syntax_error(message, self.current_offset(), self.source.source_id());
280 self.errors.push(err.clone());
281 err
282 }
283
284 pub fn record_unexpected_token(&mut self, token: impl Into<String>) {
286 let err = OakError::unexpected_token(token, self.current_offset(), self.source.source_id());
287 self.errors.push(err)
288 }
289
290 pub fn record_expected(&mut self, expected: impl Into<String>) {
292 let offset = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
293 let err = OakError::expected_token(expected, offset, self.source.source_id());
294 self.errors.push(err)
295 }
296
297 pub fn record_expected_name(&mut self, name_kind: impl Into<String>) {
299 let offset = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
300 let err = OakError::expected_name(name_kind, offset, self.source.source_id());
301 self.errors.push(err)
302 }
303
304 pub fn record_trailing_comma_not_allowed(&mut self) {
306 let offset = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
307 let err = OakError::trailing_comma_not_allowed(offset, self.source.source_id());
308 self.errors.push(err)
309 }
310
311 pub fn record_unexpected_eof(&mut self) {
313 let offset = self.source.length();
314 let err = OakError::unexpected_eof(offset, self.source.source_id());
315 self.errors.push(err)
316 }
317
318 pub fn unexpected_eof(&mut self) -> OakError {
320 let offset = self.source.length();
321 let err = OakError::unexpected_eof(offset, self.source.source_id());
322 self.errors.push(err.clone());
323 err
324 }
325
326 #[inline]
330 pub fn current(&self) -> Option<&Token<L::TokenType>> {
331 self.tokens.current()
332 }
333
334 #[inline]
336 pub fn peek_kind(&self) -> Option<L::TokenType> {
337 self.current().map(|t| t.kind)
338 }
339
340 #[inline]
342 pub fn peek_at(&self, offset: usize) -> Option<&Token<L::TokenType>> {
343 self.tokens.peek_at(offset)
344 }
345
346 pub fn peek_non_trivia_at(&self, mut n: usize) -> Option<&Token<L::TokenType>> {
348 let mut offset = 0;
349 while let Some(token) = self.tokens.peek_at(offset) {
350 if !L::TokenType::is_ignored(&token.kind) {
351 if n == 0 {
352 return Some(token);
353 }
354 n -= 1
355 }
356 offset += 1
357 }
358 None
359 }
360
361 #[inline]
363 pub fn peek_kind_at(&self, offset: usize) -> Option<L::TokenType> {
364 self.tokens.peek_at(offset).map(|t| t.kind)
365 }
366
367 pub fn peek_non_trivia_kind_at(&self, n: usize) -> Option<L::TokenType> {
369 self.peek_non_trivia_at(n).map(|t| t.kind)
370 }
371
372 #[inline]
374 pub fn not_at_end(&self) -> bool {
375 !self.tokens.is_end()
376 }
377
378 #[inline]
380 pub fn at(&self, kind: L::TokenType) -> bool {
381 self.peek_kind() == Some(kind)
382 }
383
384 #[inline]
386 pub fn not_at(&self, kind: L::TokenType) -> bool {
387 !self.at(kind)
388 }
389
390 pub fn advance(&mut self) {
392 self.tokens.advance();
393 self.skip_trivia()
394 }
395
396 pub fn skip_trivia(&mut self) {
398 while let Some(token) = self.tokens.current() {
399 if L::TokenType::is_ignored(&token.kind) {
400 self.sink.push_leaf(token.kind, token.length());
401 self.tokens.advance()
402 }
403 else {
404 break;
405 }
406 }
407 }
408
409 pub fn advance_until(&mut self, kind: L::TokenType) {
411 while self.not_at_end() && !self.at(kind) {
412 self.advance()
413 }
414 }
415
416 pub fn advance_until_any(&mut self, kinds: &[L::TokenType]) {
418 while self.not_at_end() && !kinds.iter().any(|&k| self.at(k)) {
419 self.advance()
420 }
421 }
422
423 pub fn bump(&mut self) {
425 if let Some(token) = self.current() {
426 self.sink.push_leaf(token.kind, token.length());
427 self.advance()
428 }
429 }
430
431 pub fn eat(&mut self, kind: L::TokenType) -> bool {
434 if self.at(kind) {
435 self.bump();
436 true
437 }
438 else {
439 false
440 }
441 }
442
443 pub fn expect(&mut self, kind: L::TokenType) -> Result<(), OakError> {
446 if self.eat(kind) {
447 Ok(())
448 }
449 else {
450 let err = OakError::expected_token(format!("{:?}", kind), self.current_offset(), self.source.source_id());
451 self.errors.push(err.clone());
452 Err(err)
453 }
454 }
455
456 pub fn try_parse<T, F>(&mut self, parser: F) -> Result<T, OakError>
458 where
459 F: FnOnce(&mut Self) -> Result<T, OakError>,
460 {
461 let checkpoint = self.checkpoint();
462 match parser(self) {
463 Ok(value) => Ok(value),
464 Err(err) => {
465 self.restore(checkpoint);
466 Err(err)
467 }
468 }
469 }
470
471 pub fn try_parse_with_error<T, F>(&mut self, parser: F) -> Result<T, OakError>
476 where
477 F: FnOnce(&mut Self) -> Result<T, OakError>,
478 {
479 let checkpoint = self.checkpoint();
480 match parser(self) {
481 Ok(value) => Ok(value),
482 Err(err) => {
483 self.restore(checkpoint);
484 Err(err)
485 }
486 }
487 }
488
489 pub fn checkpoint(&self) -> (usize, usize) {
494 (self.tokens.index(), self.sink.checkpoint())
495 }
496
497 pub fn restore(&mut self, (token_index, tree_checkpoint): (usize, usize)) {
499 self.tokens.set_index(token_index);
500 self.sink.restore(tree_checkpoint)
501 }
502
503 pub fn finish_at(&mut self, checkpoint: (usize, usize), kind: L::ElementType) -> &'a GreenNode<'a, L> {
505 self.sink.finish_node(checkpoint.1, kind)
506 }
507
508 pub fn checkpoint_before(&mut self, node: &'a GreenNode<'a, L>) -> (usize, usize) {
510 let sink_checkpoint = self.sink.checkpoint();
512 for i in (0..sink_checkpoint).rev() {
513 if let Some(child) = self.sink.children.get(i) {
514 if let GreenTree::Node(n) = child {
515 if std::ptr::eq(*n, node) {
516 return (0, i); }
518 }
519 }
520 }
521 (0, sink_checkpoint)
522 }
523
524 pub fn push_child(&mut self, node: &'a GreenNode<'a, L>) {
526 self.sink.push_node(node)
527 }
528
529 pub fn try_reuse(&mut self, kind: L::ElementType) -> bool {
532 let Some(inc) = &mut self.incremental
533 else {
534 return false;
535 };
536 let current_index = self.tokens.index();
537 let new_pos = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
538
539 #[cfg(test)]
540 println!("try_reuse: kind={:?}, new_pos={}", kind, new_pos);
541
542 let Some(target_old_pos) = inc.map_new_to_old(new_pos)
543 else {
544 return false;
545 };
546
547 #[cfg(test)]
548 println!("Trying to reuse node at pos {};: {:?}", new_pos, kind);
549
550 let mut steps = 0;
551 const MAX_STEPS: usize = 100;
552
553 loop {
554 let start = inc.cursor.offset();
555 let end = inc.cursor.end_offset();
556
557 if start == target_old_pos {
558 if let Some(node) = inc.cursor.as_node() {
559 if node.kind == kind {
560 if !inc.is_dirty(start, end) {
561 let node_len = node.text_len() as usize;
563
564 let mut lookahead = 0;
566 let mut current_pos = new_pos;
567 let target_new_end = new_pos + node_len;
568
569 while let Some(t) = self.tokens.peek_at(lookahead) {
570 if t.span.start != current_pos {
571 break;
573 }
574 current_pos = t.span.end;
575
576 if t.span.end == target_new_end {
577 let tokens_consumed = lookahead + 1;
579 let new_node = deep_clone_node(node, self.sink.arena);
580 self.sink.push_node(new_node);
581 self.tokens.set_index(current_index + tokens_consumed);
582 inc.cursor.step_over();
583 return true;
584 }
585 else if t.span.end > target_new_end {
586 break;
587 }
588 lookahead += 1;
589 }
590 }
591 }
592 }
593 if !inc.cursor.step_into() && !inc.cursor.step_next() {
594 return false;
595 }
596 }
597 else if start < target_old_pos && end > target_old_pos {
598 if !inc.cursor.step_into() && !inc.cursor.step_next() {
599 return false;
600 }
601 }
602 else if end <= target_old_pos {
603 if !inc.cursor.step_next() {
604 return false;
605 }
606 }
607 else {
608 return false;
609 }
610
611 steps += 1;
612 if steps > MAX_STEPS {
613 return false;
614 }
615 }
616 }
617
618 pub fn finish(self, result: Result<&'a GreenNode<'a, L>, OakError>) -> crate::parser::ParseOutput<'a, L> {
620 crate::errors::OakDiagnostics { result, diagnostics: self.errors }
621 }
622
623 pub fn incremental_node<F>(&mut self, kind: L::ElementType, f: F) -> Result<(), OakError>
630 where
631 F: FnOnce(&mut Self) -> Result<(), OakError>,
632 {
633 if self.try_reuse(kind) {
634 return Ok(());
635 };
636
637 let checkpoint = self.checkpoint();
638 let res = f(self);
639 self.finish_at(checkpoint, kind);
640 res
641 }
642
643 pub fn incremental_opt<F>(&mut self, kind: L::ElementType, f: F) -> Result<bool, OakError>
648 where
649 F: FnOnce(&mut Self) -> Result<bool, OakError>,
650 {
651 if self.try_reuse(kind) {
652 return Ok(true);
653 };
654
655 let checkpoint = self.checkpoint();
656 match f(self) {
657 Ok(true) => {
658 self.finish_at(checkpoint, kind);
659 Ok(true)
660 }
661 Ok(false) => Ok(false),
662 Err(e) => {
663 self.finish_at(checkpoint, kind);
664 Err(e)
665 }
666 }
667 }
668}