1use crate::{
2 Language,
3 errors::OakError,
4 language::TokenType,
5 lexer::{LexOutput, Token, Tokens},
6 memory::arena::SyntaxArena,
7 source::Source,
8 tree::{Cursor, GreenLeaf, GreenNode, GreenTree, TokenProvenance},
9};
10use core::range::Range;
11
12pub fn deep_clone_node<'a, L: Language>(node: &GreenNode<'_, L>, arena: &'a SyntaxArena) -> &'a GreenNode<'a, L> {
17 let children_iter = node.children.iter().map(|child| match child {
18 GreenTree::Node(n) => GreenTree::Node(deep_clone_node(n, arena)),
19 GreenTree::Leaf(l) => GreenTree::Leaf(*l),
20 });
21
22 let slice = arena.alloc_slice_fill_iter(node.children.len(), children_iter);
23 let new_node = GreenNode::new(node.kind, slice);
24 arena.alloc(new_node)
25}
26
27pub struct TokenSource<L: Language> {
29 tokens: Tokens<L>,
31 index: usize,
33}
34
35impl<L: Language> Clone for TokenSource<L> {
36 fn clone(&self) -> Self {
37 Self { tokens: self.tokens.clone(), index: self.index }
38 }
39}
40
41impl<L: Language> TokenSource<L> {
42 pub fn new(tokens: Tokens<L>) -> Self {
44 Self { tokens, index: 0 }
45 }
46
47 #[inline]
49 pub fn current(&self) -> Option<&Token<L::TokenType>> {
50 self.tokens.get(self.index)
51 }
52
53 #[inline]
55 pub fn peek_at(&self, offset: usize) -> Option<&Token<L::TokenType>> {
56 self.tokens.get(self.index + offset)
57 }
58
59 #[inline]
61 pub fn advance(&mut self) {
62 self.index += 1;
63 }
64
65 #[inline]
67 pub fn is_end(&self) -> bool {
68 self.index >= self.tokens.len()
69 }
70
71 #[inline]
73 pub fn index(&self) -> usize {
74 self.index
75 }
76
77 #[inline]
79 pub fn set_index(&mut self, index: usize) {
80 self.index = index;
81 }
82}
83
84pub struct TreeSink<'a, L: Language> {
86 arena: &'a SyntaxArena,
88 children: Vec<GreenTree<'a, L>>,
90}
91
92impl<'a, L: Language> TreeSink<'a, L> {
93 pub fn new(arena: &'a SyntaxArena, capacity_hint: usize) -> Self {
95 let capacity = usize::max(capacity_hint, 1024);
97 Self { arena, children: Vec::with_capacity(capacity) }
98 }
99
100 #[inline(always)]
102 pub fn push_leaf(&mut self, kind: L::TokenType, len: usize) {
103 self.children.push(GreenTree::Leaf(GreenLeaf::new(kind, len as u32)));
104 }
105
106 #[inline(always)]
108 pub fn push_leaf_with_metadata(&mut self, kind: L::TokenType, len: usize, provenance: TokenProvenance) {
109 let index = self.arena.add_metadata(provenance);
110 self.children.push(GreenTree::Leaf(GreenLeaf::with_metadata(kind, len as u32, Some(index))));
111 }
112
113 #[inline(always)]
115 pub fn push_node(&mut self, node: &'a GreenNode<'a, L>) {
116 self.children.push(GreenTree::Node(node));
117 }
118
119 #[inline(always)]
121 pub fn checkpoint(&self) -> usize {
122 self.children.len()
123 }
124
125 #[inline(always)]
127 pub fn arena(&self) -> &'a SyntaxArena {
128 self.arena
129 }
130
131 #[inline(always)]
133 pub fn restore(&mut self, checkpoint: usize) {
134 self.children.truncate(checkpoint)
135 }
136
137 pub fn finish_node(&mut self, checkpoint: usize, kind: L::ElementType) -> &'a GreenNode<'a, L> {
139 let children_count = self.children.len() - checkpoint;
140 if children_count == 0 {
141 let children_slice = &[];
143 let node = GreenNode::new(kind, children_slice);
144 let node_ref = self.arena.alloc(node);
145 self.children.truncate(checkpoint);
146 self.children.push(GreenTree::Node(node_ref));
147 return node_ref;
148 }
149
150 let children_iter = self.children[checkpoint..].iter().cloned();
152 let children_slice = self.arena.alloc_slice_fill_iter(children_count, children_iter);
153 self.children.truncate(checkpoint);
154 let node = GreenNode::new(kind, children_slice);
155 let node_ref = self.arena.alloc(node);
156 self.children.push(GreenTree::Node(node_ref));
157 node_ref
158 }
159}
160
161pub struct IncrementalContext<'a, L: Language> {
163 cursor: Cursor<'a, L>,
165 edits: Vec<(Range<usize>, isize)>,
168}
169
170impl<'a, L: Language> IncrementalContext<'a, L> {
171 pub fn new(old_root: &'a GreenNode<'a, L>, edits: &[crate::source::TextEdit]) -> Self {
173 let mut sorted_edits: Vec<_> = edits.iter().collect();
174 sorted_edits.sort_by_key(|e| e.span.start);
175
176 let mut cumulative_delta = 0isize;
177 let mut processed_edits = Vec::with_capacity(edits.len());
178
179 for edit in sorted_edits {
180 let old_len = edit.span.end - edit.span.start;
181 let new_len = edit.text.len();
182 cumulative_delta += new_len as isize - old_len as isize;
183 processed_edits.push((edit.span, cumulative_delta));
184 }
185
186 Self { cursor: Cursor::new(old_root), edits: processed_edits }
187 }
188
189 fn map_new_to_old(&self, new_pos: usize) -> Option<usize> {
190 let mut current_delta = 0isize;
191
192 for (old_range, cumulative_delta) in &self.edits {
193 let new_start = (old_range.start as isize + current_delta) as usize;
194 let edit_delta = *cumulative_delta - current_delta;
195 let new_end = (old_range.end as isize + current_delta + edit_delta) as usize;
196
197 if new_pos < new_start {
198 return Some((new_pos as isize - current_delta) as usize);
199 }
200 else if new_pos < new_end {
201 return None;
203 }
204
205 current_delta = *cumulative_delta;
206 }
207
208 Some((new_pos as isize - current_delta) as usize)
209 }
210
211 fn is_dirty(&self, old_start: usize, old_end: usize) -> bool {
212 for (old_range, _) in &self.edits {
213 if old_start < old_range.end && old_end > old_range.start {
215 return true;
216 }
217 }
218 false
219 }
220}
221
222pub struct ParserState<'a, L: Language, S: Source + ?Sized = crate::source::SourceText> {
224 pub tokens: TokenSource<L>,
226
227 pub sink: TreeSink<'a, L>,
229
230 pub incremental: Option<IncrementalContext<'a, L>>,
232
233 pub errors: Vec<OakError>,
235 pub source: &'a S,
237}
238
239impl<'a, L: Language, S: Source + ?Sized> ParserState<'a, L, S> {
240 pub fn source_id(&self) -> Option<crate::source::SourceId> {
242 self.source.source_id()
243 }
244
245 pub fn arena(&self) -> &'a SyntaxArena {
247 self.sink.arena()
248 }
249
250 pub fn new(arena: &'a SyntaxArena, lex_output: LexOutput<L>, source: &'a S, capacity_hint: usize) -> Self {
258 let (tokens, mut errors) = match lex_output.result {
259 Ok(tokens) => (tokens, Vec::new()),
260 Err(e) => (Tokens::default(), vec![e]),
261 };
262 errors.extend(lex_output.diagnostics);
263
264 Self { tokens: TokenSource::new(tokens), sink: TreeSink::new(arena, capacity_hint), incremental: None, errors, source }
265 }
266
267 pub fn nested(&self) -> ParserState<'a, L, S> {
270 ParserState { tokens: self.tokens.clone(), sink: TreeSink::new(self.sink.arena, 1024), incremental: None, errors: Vec::new(), source: self.source }
271 }
272
273 pub fn peek_text(&self) -> Option<std::borrow::Cow<'_, str>> {
275 self.current().map(|t| self.source.get_text_in(t.span))
276 }
277
278 pub fn promote(&mut self, node: &'a GreenNode<'a, L>) -> &'a GreenNode<'a, L> {
281 deep_clone_node(node, self.sink.arena)
282 }
283
284 pub fn set_incremental(&mut self, old: &'a GreenNode<'a, L>, edits: &[crate::source::TextEdit]) {
290 self.incremental = Some(IncrementalContext::new(old, edits));
291 }
292
293 pub fn current_offset(&self) -> usize {
297 self.tokens.current().map(|t| t.span.start).unwrap_or_else(|| self.source.length())
298 }
299
300 pub fn syntax_error(&mut self, message: impl Into<String>) -> OakError {
302 let err = OakError::syntax_error(message, self.current_offset(), self.source.source_id());
303 self.errors.push(err.clone());
304 err
305 }
306
307 pub fn record_unexpected_token(&mut self, token: impl Into<String>) {
309 let err = OakError::unexpected_token(token, self.current_offset(), self.source.source_id());
310 self.errors.push(err)
311 }
312
313 pub fn record_expected(&mut self, expected: impl Into<String>) {
315 let offset = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
316 let err = OakError::expected_token(expected, offset, self.source.source_id());
317 self.errors.push(err)
318 }
319
320 pub fn record_expected_name(&mut self, name_kind: impl Into<String>) {
322 let offset = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
323 let err = OakError::expected_name(name_kind, offset, self.source.source_id());
324 self.errors.push(err)
325 }
326
327 pub fn record_trailing_comma_not_allowed(&mut self) {
329 let offset = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
330 let err = OakError::trailing_comma_not_allowed(offset, self.source.source_id());
331 self.errors.push(err)
332 }
333
334 pub fn record_unexpected_eof(&mut self) {
336 let offset = self.source.length();
337 let err = OakError::unexpected_eof(offset, self.source.source_id());
338 self.errors.push(err)
339 }
340
341 pub fn unexpected_eof(&mut self) -> OakError {
343 let offset = self.source.length();
344 let err = OakError::unexpected_eof(offset, self.source.source_id());
345 self.errors.push(err.clone());
346 err
347 }
348
349 #[inline]
353 pub fn current(&self) -> Option<&Token<L::TokenType>> {
354 self.tokens.current()
355 }
356
357 #[inline]
359 pub fn peek_kind(&self) -> Option<L::TokenType> {
360 self.current().map(|t| t.kind)
361 }
362
363 #[inline]
365 pub fn peek_at(&self, offset: usize) -> Option<&Token<L::TokenType>> {
366 self.tokens.peek_at(offset)
367 }
368
369 pub fn peek_non_trivia_at(&self, mut n: usize) -> Option<&Token<L::TokenType>> {
371 let mut offset = 0;
372 while let Some(token) = self.tokens.peek_at(offset) {
373 if !L::TokenType::is_ignored(&token.kind) {
374 if n == 0 {
375 return Some(token);
376 }
377 n -= 1
378 }
379 offset += 1
380 }
381 None
382 }
383
384 #[inline]
386 pub fn peek_kind_at(&self, offset: usize) -> Option<L::TokenType> {
387 self.tokens.peek_at(offset).map(|t| t.kind)
388 }
389
390 pub fn peek_non_trivia_kind_at(&self, n: usize) -> Option<L::TokenType> {
392 self.peek_non_trivia_at(n).map(|t| t.kind)
393 }
394
395 #[inline]
397 pub fn not_at_end(&self) -> bool {
398 !self.tokens.is_end()
399 }
400
401 #[inline]
403 pub fn at(&self, kind: L::TokenType) -> bool {
404 self.peek_kind() == Some(kind)
405 }
406
407 #[inline]
409 pub fn not_at(&self, kind: L::TokenType) -> bool {
410 !self.at(kind)
411 }
412
413 pub fn advance(&mut self) {
415 self.tokens.advance();
416 self.skip_trivia()
417 }
418
419 pub fn skip_trivia(&mut self) {
421 while let Some(token) = self.tokens.current() {
422 if std::intrinsics::likely(L::TokenType::is_ignored(&token.kind)) {
423 self.sink.push_leaf(token.kind, token.length());
424 self.tokens.advance()
425 }
426 else {
427 break;
428 }
429 }
430 }
431
432 pub fn advance_until(&mut self, kind: L::TokenType) {
434 while std::intrinsics::likely(self.not_at_end() && !self.at(kind)) {
435 self.advance()
436 }
437 }
438
439 pub fn advance_until_any(&mut self, kinds: &[L::TokenType]) {
441 while std::intrinsics::likely(self.not_at_end() && !kinds.iter().any(|&k| self.at(k))) {
442 self.advance()
443 }
444 }
445
446 pub fn bump(&mut self) {
448 if let Some(token) = self.current() {
449 self.sink.push_leaf(token.kind, token.length());
450 self.advance()
451 }
452 }
453
454 pub fn eat(&mut self, kind: L::TokenType) -> bool {
457 if std::intrinsics::unlikely(self.at(kind)) {
458 self.bump();
459 true
460 }
461 else {
462 false
463 }
464 }
465
466 pub fn expect(&mut self, kind: L::TokenType) -> Result<(), OakError> {
469 if std::intrinsics::likely(self.eat(kind)) {
470 Ok(())
471 }
472 else {
473 let err = OakError::expected_token(format!("{:?}", kind), self.current_offset(), self.source.source_id());
474 self.errors.push(err.clone());
475 Err(err)
476 }
477 }
478
479 pub fn try_parse<T, F>(&mut self, parser: F) -> Result<T, OakError>
481 where
482 F: FnOnce(&mut Self) -> Result<T, OakError>,
483 {
484 let checkpoint = self.checkpoint();
485 match parser(self) {
486 Ok(value) => Ok(value),
487 Err(err) => {
488 self.restore(checkpoint);
489 Err(err)
490 }
491 }
492 }
493
494 pub fn try_parse_with_error<T, F>(&mut self, parser: F) -> Result<T, OakError>
499 where
500 F: FnOnce(&mut Self) -> Result<T, OakError>,
501 {
502 let checkpoint = self.checkpoint();
503 match parser(self) {
504 Ok(value) => Ok(value),
505 Err(err) => {
506 self.restore(checkpoint);
507 Err(err)
508 }
509 }
510 }
511
512 pub fn checkpoint(&self) -> (usize, usize) {
517 (self.tokens.index(), self.sink.checkpoint())
518 }
519
520 pub fn restore(&mut self, (token_index, tree_checkpoint): (usize, usize)) {
522 self.tokens.set_index(token_index);
523 self.sink.restore(tree_checkpoint)
524 }
525
526 pub fn finish_at(&mut self, checkpoint: (usize, usize), kind: L::ElementType) -> &'a GreenNode<'a, L> {
528 self.sink.finish_node(checkpoint.1, kind)
529 }
530
531 pub fn checkpoint_before(&mut self, node: &'a GreenNode<'a, L>) -> (usize, usize) {
533 let sink_checkpoint = self.sink.checkpoint();
535 for i in (0..sink_checkpoint).rev() {
536 if let Some(child) = self.sink.children.get(i) {
537 if let GreenTree::Node(n) = child {
538 if std::ptr::eq(*n, node) {
539 return (0, i); }
541 }
542 }
543 }
544 (0, sink_checkpoint)
545 }
546
547 pub fn push_child(&mut self, node: &'a GreenNode<'a, L>) {
549 self.sink.push_node(node)
550 }
551
552 pub fn try_reuse(&mut self, kind: L::ElementType) -> bool {
555 let Some(inc) = &mut self.incremental
556 else {
557 return false;
558 };
559 let current_index = self.tokens.index();
560 let new_pos = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
561
562 #[cfg(test)]
563 println!("try_reuse: kind={:?}, new_pos={}", kind, new_pos);
564
565 let Some(target_old_pos) = inc.map_new_to_old(new_pos)
566 else {
567 return false;
568 };
569
570 #[cfg(test)]
571 println!("Trying to reuse node at pos {};: {:?}", new_pos, kind);
572
573 let mut steps = 0;
574 const MAX_STEPS: usize = 100;
575
576 loop {
577 let start = inc.cursor.offset();
578 let end = inc.cursor.end_offset();
579
580 if std::intrinsics::likely(start == target_old_pos) {
581 if let Some(node) = inc.cursor.as_node() {
582 if std::intrinsics::likely(node.kind == kind) {
583 if std::intrinsics::likely(!inc.is_dirty(start, end)) {
584 let node_len = node.text_len() as usize;
586 let target_new_end = new_pos + node_len;
587
588 let mut lookahead = 0;
590 let mut current_pos = new_pos;
591 let mut total_length = 0;
592
593 while let Some(t) = self.tokens.peek_at(lookahead) {
594 if t.span.start != current_pos {
595 break;
597 }
598 total_length += t.length();
599 current_pos = t.span.end;
600
601 if total_length == node_len && current_pos == target_new_end {
602 let tokens_consumed = lookahead + 1;
604 let new_node = deep_clone_node(node, self.sink.arena);
605 self.sink.push_node(new_node);
606 self.tokens.set_index(current_index + tokens_consumed);
607 inc.cursor.step_over();
608 return true;
609 }
610 else if total_length > node_len {
611 break;
612 }
613 lookahead += 1;
614 }
615 }
616 }
617 }
618 if !inc.cursor.step_into() && !inc.cursor.step_next() {
619 return false;
620 }
621 }
622 else if std::intrinsics::likely(start < target_old_pos && end > target_old_pos) {
623 if !inc.cursor.step_into() {
624 if !inc.cursor.step_next() {
626 return false;
627 }
628 }
629 }
630 else if std::intrinsics::likely(end <= target_old_pos) {
631 if !inc.cursor.step_next() {
632 return false;
633 }
634 }
635 else {
636 return false;
637 }
638
639 steps += 1;
640 if steps > MAX_STEPS {
641 return false;
642 }
643 }
644 }
645
646 pub fn finish(self, result: Result<&'a GreenNode<'a, L>, OakError>) -> crate::parser::ParseOutput<'a, L> {
648 crate::errors::OakDiagnostics { result, diagnostics: self.errors }
649 }
650
651 pub fn incremental_node<F>(&mut self, kind: L::ElementType, f: F) -> Result<(), OakError>
658 where
659 F: FnOnce(&mut Self) -> Result<(), OakError>,
660 {
661 if self.try_reuse(kind) {
662 return Ok(());
663 };
664
665 let checkpoint = self.checkpoint();
666 let res = f(self);
667 self.finish_at(checkpoint, kind);
668 res
669 }
670
671 pub fn incremental_opt<F>(&mut self, kind: L::ElementType, f: F) -> Result<bool, OakError>
676 where
677 F: FnOnce(&mut Self) -> Result<bool, OakError>,
678 {
679 if self.try_reuse(kind) {
680 return Ok(true);
681 };
682
683 let checkpoint = self.checkpoint();
684 match f(self) {
685 Ok(true) => {
686 self.finish_at(checkpoint, kind);
687 Ok(true)
688 }
689 Ok(false) => Ok(false),
690 Err(e) => {
691 self.finish_at(checkpoint, kind);
692 Err(e)
693 }
694 }
695 }
696}