1use crate::{
2 Language,
3 errors::OakError,
4 language::TokenType,
5 lexer::{LexOutput, Token, Tokens},
6 memory::arena::SyntaxArena,
7 source::Source,
8 tree::{Cursor, GreenLeaf, GreenNode, GreenTree, TokenProvenance},
9};
10use core::range::Range;
11
12pub fn deep_clone_node<'a, L: Language>(node: &GreenNode<'_, L>, arena: &'a SyntaxArena) -> &'a GreenNode<'a, L> {
17 let children_iter = node.children.iter().map(|child| match child {
18 GreenTree::Node(n) => GreenTree::Node(deep_clone_node(n, arena)),
19 GreenTree::Leaf(l) => GreenTree::Leaf(*l),
20 });
21
22 let slice = arena.alloc_slice_fill_iter(node.children.len(), children_iter);
23 let new_node = GreenNode::new(node.kind, slice);
24 arena.alloc(new_node)
25}
26
27pub struct TokenSource<L: Language> {
29 tokens: Tokens<L>,
31 index: usize,
33}
34
35impl<L: Language> Clone for TokenSource<L> {
36 fn clone(&self) -> Self {
37 Self { tokens: self.tokens.clone(), index: self.index }
38 }
39}
40
41impl<L: Language> TokenSource<L> {
42 pub fn new(tokens: Tokens<L>) -> Self {
44 Self { tokens, index: 0 }
45 }
46
47 #[inline]
49 pub fn current(&self) -> Option<&Token<L::TokenType>> {
50 self.tokens.get(self.index)
51 }
52
53 #[inline]
55 pub fn peek_at(&self, offset: usize) -> Option<&Token<L::TokenType>> {
56 self.tokens.get(self.index + offset)
57 }
58
59 #[inline]
61 pub fn advance(&mut self) {
62 self.index += 1;
63 }
64
65 #[inline]
67 pub fn is_end(&self) -> bool {
68 self.index >= self.tokens.len()
69 }
70
71 #[inline]
73 pub fn index(&self) -> usize {
74 self.index
75 }
76
77 #[inline]
79 pub fn set_index(&mut self, index: usize) {
80 self.index = index;
81 }
82}
83
84pub struct TreeSink<'a, L: Language> {
86 arena: &'a SyntaxArena,
88 children: Vec<GreenTree<'a, L>>,
90}
91
92impl<'a, L: Language> TreeSink<'a, L> {
93 pub fn new(arena: &'a SyntaxArena, capacity_hint: usize) -> Self {
95 Self { arena, children: Vec::with_capacity(capacity_hint) }
96 }
97
98 pub fn push_leaf(&mut self, kind: L::TokenType, len: usize) {
100 self.children.push(GreenTree::Leaf(GreenLeaf::new(kind, len as u32)));
101 }
102
103 pub fn push_leaf_with_metadata(&mut self, kind: L::TokenType, len: usize, provenance: TokenProvenance) {
105 let index = self.arena.add_metadata(provenance);
106 self.children.push(GreenTree::Leaf(GreenLeaf::with_metadata(kind, len as u32, Some(index))));
107 }
108
109 pub fn push_node(&mut self, node: &'a GreenNode<'a, L>) {
111 self.children.push(GreenTree::Node(node));
112 }
113
114 pub fn checkpoint(&self) -> usize {
116 self.children.len()
117 }
118
119 pub fn arena(&self) -> &'a SyntaxArena {
121 self.arena
122 }
123
124 pub fn restore(&mut self, checkpoint: usize) {
126 self.children.truncate(checkpoint)
127 }
128
129 pub fn finish_node(&mut self, checkpoint: usize, kind: L::ElementType) -> &'a GreenNode<'a, L> {
131 let children_slice = self.arena.alloc_slice_copy(&self.children[checkpoint..]);
132 self.children.truncate(checkpoint);
133 let node = GreenNode::new(kind, children_slice);
134 let node_ref = self.arena.alloc(node);
135 self.children.push(GreenTree::Node(node_ref));
136 node_ref
137 }
138}
139
140pub struct IncrementalContext<'a, L: Language> {
142 cursor: Cursor<'a, L>,
144 edits: Vec<(Range<usize>, isize)>,
147}
148
149impl<'a, L: Language> IncrementalContext<'a, L> {
150 pub fn new(old_root: &'a GreenNode<'a, L>, edits: &[crate::source::TextEdit]) -> Self {
152 let mut sorted_edits: Vec<_> = edits.iter().collect();
153 sorted_edits.sort_by_key(|e| e.span.start);
154
155 let mut cumulative_delta = 0isize;
156 let mut processed_edits = Vec::with_capacity(edits.len());
157
158 for edit in sorted_edits {
159 let old_len = edit.span.end - edit.span.start;
160 let new_len = edit.text.len();
161 cumulative_delta += new_len as isize - old_len as isize;
162 processed_edits.push((edit.span, cumulative_delta));
163 }
164
165 Self { cursor: Cursor::new(old_root), edits: processed_edits }
166 }
167
168 fn map_new_to_old(&self, new_pos: usize) -> Option<usize> {
169 let mut current_delta = 0isize;
170
171 for (old_range, cumulative_delta) in &self.edits {
172 let new_start = (old_range.start as isize + current_delta) as usize;
173 let edit_delta = *cumulative_delta - current_delta;
174 let new_end = (old_range.end as isize + current_delta + edit_delta) as usize;
175
176 if new_pos < new_start {
177 return Some((new_pos as isize - current_delta) as usize);
178 }
179 else if new_pos < new_end {
180 return None;
182 }
183
184 current_delta = *cumulative_delta;
185 }
186
187 Some((new_pos as isize - current_delta) as usize)
188 }
189
190 fn is_dirty(&self, old_start: usize, old_end: usize) -> bool {
191 for (old_range, _) in &self.edits {
192 if old_start < old_range.end && old_end > old_range.start {
194 return true;
195 }
196 }
197 false
198 }
199}
200
201pub struct ParserState<'a, L: Language, S: Source + ?Sized = crate::source::SourceText> {
203 pub tokens: TokenSource<L>,
205
206 pub sink: TreeSink<'a, L>,
208
209 pub incremental: Option<IncrementalContext<'a, L>>,
211
212 pub errors: Vec<OakError>,
214 pub source: &'a S,
216}
217
218impl<'a, L: Language, S: Source + ?Sized> ParserState<'a, L, S> {
219 pub fn source_id(&self) -> Option<crate::source::SourceId> {
221 self.source.source_id()
222 }
223
224 pub fn arena(&self) -> &'a SyntaxArena {
226 self.sink.arena()
227 }
228
229 pub fn new(arena: &'a SyntaxArena, lex_output: LexOutput<L>, source: &'a S, capacity_hint: usize) -> Self {
237 let (tokens, mut errors) = match lex_output.result {
238 Ok(tokens) => (tokens, Vec::new()),
239 Err(e) => (Tokens::default(), vec![e]),
240 };
241 errors.extend(lex_output.diagnostics);
242
243 let mut st = Self { tokens: TokenSource::new(tokens), sink: TreeSink::new(arena, capacity_hint), incremental: None, errors, source };
244 st.skip_trivia();
245 st
246 }
247
248 pub fn nested(&self) -> ParserState<'a, L, S> {
251 ParserState { tokens: self.tokens.clone(), sink: TreeSink::new(self.sink.arena, 1024), incremental: None, errors: Vec::new(), source: self.source }
252 }
253
254 pub fn peek_text(&self) -> Option<std::borrow::Cow<'_, str>> {
256 self.current().map(|t| self.source.get_text_in(t.span))
257 }
258
259 pub fn promote(&mut self, node: &'a GreenNode<'a, L>) -> &'a GreenNode<'a, L> {
262 deep_clone_node(node, self.sink.arena)
263 }
264
265 pub fn set_incremental(&mut self, old: &'a GreenNode<'a, L>, edits: &[crate::source::TextEdit]) {
271 self.incremental = Some(IncrementalContext::new(old, edits));
272 }
273
274 pub fn current_offset(&self) -> usize {
278 self.tokens.current().map(|t| t.span.start).unwrap_or_else(|| self.source.length())
279 }
280
281 pub fn syntax_error(&mut self, message: impl Into<String>) -> OakError {
283 let err = OakError::syntax_error(message, self.current_offset(), self.source.source_id());
284 self.errors.push(err.clone());
285 err
286 }
287
288 pub fn record_unexpected_token(&mut self, token: impl Into<String>) {
290 let err = OakError::unexpected_token(token, self.current_offset(), self.source.source_id());
291 self.errors.push(err)
292 }
293
294 pub fn record_expected(&mut self, expected: impl Into<String>) {
296 let offset = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
297 let err = OakError::expected_token(expected, offset, self.source.source_id());
298 self.errors.push(err)
299 }
300
301 pub fn record_expected_name(&mut self, name_kind: impl Into<String>) {
303 let offset = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
304 let err = OakError::expected_name(name_kind, offset, self.source.source_id());
305 self.errors.push(err)
306 }
307
308 pub fn record_trailing_comma_not_allowed(&mut self) {
310 let offset = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
311 let err = OakError::trailing_comma_not_allowed(offset, self.source.source_id());
312 self.errors.push(err)
313 }
314
315 pub fn record_unexpected_eof(&mut self) {
317 let offset = self.source.length();
318 let err = OakError::unexpected_eof(offset, self.source.source_id());
319 self.errors.push(err)
320 }
321
322 pub fn unexpected_eof(&mut self) -> OakError {
324 let offset = self.source.length();
325 let err = OakError::unexpected_eof(offset, self.source.source_id());
326 self.errors.push(err.clone());
327 err
328 }
329
330 #[inline]
334 pub fn current(&self) -> Option<&Token<L::TokenType>> {
335 self.tokens.current()
336 }
337
338 #[inline]
340 pub fn peek_kind(&self) -> Option<L::TokenType> {
341 self.current().map(|t| t.kind)
342 }
343
344 #[inline]
346 pub fn peek_at(&self, offset: usize) -> Option<&Token<L::TokenType>> {
347 self.tokens.peek_at(offset)
348 }
349
350 pub fn peek_non_trivia_at(&self, mut n: usize) -> Option<&Token<L::TokenType>> {
352 let mut offset = 0;
353 while let Some(token) = self.tokens.peek_at(offset) {
354 if !L::TokenType::is_ignored(&token.kind) {
355 if n == 0 {
356 return Some(token);
357 }
358 n -= 1
359 }
360 offset += 1
361 }
362 None
363 }
364
365 #[inline]
367 pub fn peek_kind_at(&self, offset: usize) -> Option<L::TokenType> {
368 self.tokens.peek_at(offset).map(|t| t.kind)
369 }
370
371 pub fn peek_non_trivia_kind_at(&self, n: usize) -> Option<L::TokenType> {
373 self.peek_non_trivia_at(n).map(|t| t.kind)
374 }
375
376 #[inline]
378 pub fn not_at_end(&self) -> bool {
379 !self.tokens.is_end()
380 }
381
382 #[inline]
384 pub fn at(&self, kind: L::TokenType) -> bool {
385 self.peek_kind() == Some(kind)
386 }
387
388 #[inline]
390 pub fn not_at(&self, kind: L::TokenType) -> bool {
391 !self.at(kind)
392 }
393
394 pub fn advance(&mut self) {
396 self.tokens.advance();
397 self.skip_trivia()
398 }
399
400 pub fn skip_trivia(&mut self) {
402 while let Some(token) = self.tokens.current() {
403 if L::TokenType::is_ignored(&token.kind) {
404 self.sink.push_leaf(token.kind, token.length());
405 self.tokens.advance()
406 }
407 else {
408 break;
409 }
410 }
411 }
412
413 pub fn advance_until(&mut self, kind: L::TokenType) {
415 while self.not_at_end() && !self.at(kind) {
416 self.advance()
417 }
418 }
419
420 pub fn advance_until_any(&mut self, kinds: &[L::TokenType]) {
422 while self.not_at_end() && !kinds.iter().any(|&k| self.at(k)) {
423 self.advance()
424 }
425 }
426
427 pub fn bump(&mut self) {
429 if let Some(token) = self.current() {
430 self.sink.push_leaf(token.kind, token.length());
431 self.advance()
432 }
433 }
434
435 pub fn eat(&mut self, kind: L::TokenType) -> bool {
438 if self.at(kind) {
439 self.bump();
440 true
441 }
442 else {
443 false
444 }
445 }
446
447 pub fn expect(&mut self, kind: L::TokenType) -> Result<(), OakError> {
450 if self.eat(kind) {
451 Ok(())
452 }
453 else {
454 let err = OakError::expected_token(format!("{:?}", kind), self.current_offset(), self.source.source_id());
455 self.errors.push(err.clone());
456 Err(err)
457 }
458 }
459
460 pub fn try_parse<T, F>(&mut self, parser: F) -> Result<T, OakError>
462 where
463 F: FnOnce(&mut Self) -> Result<T, OakError>,
464 {
465 let checkpoint = self.checkpoint();
466 match parser(self) {
467 Ok(value) => Ok(value),
468 Err(err) => {
469 self.restore(checkpoint);
470 Err(err)
471 }
472 }
473 }
474
475 pub fn try_parse_with_error<T, F>(&mut self, parser: F) -> Result<T, OakError>
480 where
481 F: FnOnce(&mut Self) -> Result<T, OakError>,
482 {
483 let checkpoint = self.checkpoint();
484 match parser(self) {
485 Ok(value) => Ok(value),
486 Err(err) => {
487 self.restore(checkpoint);
488 Err(err)
489 }
490 }
491 }
492
493 pub fn checkpoint(&self) -> (usize, usize) {
498 (self.tokens.index(), self.sink.checkpoint())
499 }
500
501 pub fn restore(&mut self, (token_index, tree_checkpoint): (usize, usize)) {
503 self.tokens.set_index(token_index);
504 self.sink.restore(tree_checkpoint)
505 }
506
507 pub fn finish_at(&mut self, checkpoint: (usize, usize), kind: L::ElementType) -> &'a GreenNode<'a, L> {
509 self.sink.finish_node(checkpoint.1, kind)
510 }
511
512 pub fn checkpoint_before(&mut self, node: &'a GreenNode<'a, L>) -> (usize, usize) {
514 let sink_checkpoint = self.sink.checkpoint();
516 for i in (0..sink_checkpoint).rev() {
517 if let Some(child) = self.sink.children.get(i) {
518 if let GreenTree::Node(n) = child {
519 if std::ptr::eq(*n, node) {
520 return (0, i); }
522 }
523 }
524 }
525 (0, sink_checkpoint)
526 }
527
528 pub fn push_child(&mut self, node: &'a GreenNode<'a, L>) {
530 self.sink.push_node(node)
531 }
532
533 pub fn try_reuse(&mut self, kind: L::ElementType) -> bool {
536 let Some(inc) = &mut self.incremental
537 else {
538 return false;
539 };
540 let current_index = self.tokens.index();
541 let new_pos = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
542
543 #[cfg(test)]
544 println!("try_reuse: kind={:?}, new_pos={}", kind, new_pos);
545
546 let Some(target_old_pos) = inc.map_new_to_old(new_pos)
547 else {
548 return false;
549 };
550
551 #[cfg(test)]
552 println!("Trying to reuse node at pos {};: {:?}", new_pos, kind);
553
554 let mut steps = 0;
555 const MAX_STEPS: usize = 100;
556
557 loop {
558 let start = inc.cursor.offset();
559 let end = inc.cursor.end_offset();
560
561 if start == target_old_pos {
562 if let Some(node) = inc.cursor.as_node() {
563 if node.kind == kind {
564 if !inc.is_dirty(start, end) {
565 let node_len = node.text_len() as usize;
567
568 let mut lookahead = 0;
570 let mut current_pos = new_pos;
571 let target_new_end = new_pos + node_len;
572
573 while let Some(t) = self.tokens.peek_at(lookahead) {
574 if t.span.start != current_pos {
575 break;
577 }
578 current_pos = t.span.end;
579
580 if t.span.end == target_new_end {
581 let tokens_consumed = lookahead + 1;
583 let new_node = deep_clone_node(node, self.sink.arena);
584 self.sink.push_node(new_node);
585 self.tokens.set_index(current_index + tokens_consumed);
586 inc.cursor.step_over();
587 return true;
588 }
589 else if t.span.end > target_new_end {
590 break;
591 }
592 lookahead += 1;
593 }
594 }
595 }
596 }
597 if !inc.cursor.step_into() && !inc.cursor.step_next() {
598 return false;
599 }
600 }
601 else if start < target_old_pos && end > target_old_pos {
602 if !inc.cursor.step_into() && !inc.cursor.step_next() {
603 return false;
604 }
605 }
606 else if end <= target_old_pos {
607 if !inc.cursor.step_next() {
608 return false;
609 }
610 }
611 else {
612 return false;
613 }
614
615 steps += 1;
616 if steps > MAX_STEPS {
617 return false;
618 }
619 }
620 }
621
622 pub fn finish(self, result: Result<&'a GreenNode<'a, L>, OakError>) -> crate::parser::ParseOutput<'a, L> {
624 crate::errors::OakDiagnostics { result, diagnostics: self.errors }
625 }
626
627 pub fn incremental_node<F>(&mut self, kind: L::ElementType, f: F) -> Result<(), OakError>
634 where
635 F: FnOnce(&mut Self) -> Result<(), OakError>,
636 {
637 if self.try_reuse(kind) {
638 return Ok(());
639 };
640
641 let checkpoint = self.checkpoint();
642 let res = f(self);
643 self.finish_at(checkpoint, kind);
644 res
645 }
646
647 pub fn incremental_opt<F>(&mut self, kind: L::ElementType, f: F) -> Result<bool, OakError>
652 where
653 F: FnOnce(&mut Self) -> Result<bool, OakError>,
654 {
655 if self.try_reuse(kind) {
656 return Ok(true);
657 };
658
659 let checkpoint = self.checkpoint();
660 match f(self) {
661 Ok(true) => {
662 self.finish_at(checkpoint, kind);
663 Ok(true)
664 }
665 Ok(false) => Ok(false),
666 Err(e) => {
667 self.finish_at(checkpoint, kind);
668 Err(e)
669 }
670 }
671 }
672}