1use crate::{
2 Language,
3 errors::OakError,
4 lexer::{LexOutput, Token, Tokens},
5 memory::arena::SyntaxArena,
6 source::Source,
7 tree::{Cursor, GreenLeaf, GreenNode, GreenTree},
8};
9use core::range::Range;
10use triomphe::Arc;
11
12pub fn deep_clone_node<'a, L: Language>(node: &GreenNode<'_, L>, arena: &'a SyntaxArena) -> &'a GreenNode<'a, L> {
17 let children_iter = node.children.iter().map(|child| match child {
18 GreenTree::Node(n) => GreenTree::Node(deep_clone_node(n, arena)),
19 GreenTree::Leaf(l) => GreenTree::Leaf(*l),
20 });
21
22 let slice = arena.alloc_slice_fill_iter(node.children.len(), children_iter);
23 let new_node = GreenNode::new(node.kind, slice);
24 arena.alloc(new_node)
25}
26
27pub struct TokenSource<L: Language> {
29 tokens: Tokens<L>,
30 index: usize,
31}
32
33impl<L: Language> Clone for TokenSource<L> {
34 fn clone(&self) -> Self {
35 Self { tokens: self.tokens.clone(), index: self.index }
36 }
37}
38
39impl<L: Language> TokenSource<L> {
40 pub fn new(tokens: Tokens<L>) -> Self {
42 Self { tokens, index: 0 }
43 }
44
45 #[inline]
47 pub fn current(&self) -> Option<&Token<L::TokenType>> {
48 self.tokens.get(self.index)
49 }
50
51 #[inline]
53 pub fn peek_at(&self, offset: usize) -> Option<&Token<L::TokenType>> {
54 self.tokens.get(self.index + offset)
55 }
56
57 #[inline]
59 pub fn advance(&mut self) {
60 self.index += 1;
61 }
62
63 #[inline]
65 pub fn is_end(&self) -> bool {
66 self.index >= self.tokens.len()
67 }
68
69 #[inline]
71 pub fn index(&self) -> usize {
72 self.index
73 }
74
75 #[inline]
77 pub fn set_index(&mut self, index: usize) {
78 self.index = index;
79 }
80}
81
82pub struct TreeSink<'a, L: Language> {
84 arena: &'a SyntaxArena,
85 children: Vec<GreenTree<'a, L>>,
86}
87
88impl<'a, L: Language> TreeSink<'a, L> {
89 pub fn new(arena: &'a SyntaxArena, capacity_hint: usize) -> Self {
91 Self { arena, children: Vec::with_capacity(capacity_hint) }
92 }
93
94 pub fn push_leaf(&mut self, kind: L::TokenType, len: usize) {
96 self.children.push(GreenTree::Leaf(GreenLeaf::new(kind, len as u32)));
97 }
98
99 pub fn push_node(&mut self, node: &'a GreenNode<'a, L>) {
101 self.children.push(GreenTree::Node(node));
102 }
103
104 pub fn checkpoint(&self) -> usize {
106 self.children.len()
107 }
108
109 pub fn arena(&self) -> &'a SyntaxArena {
111 self.arena
112 }
113
114 pub fn restore(&mut self, checkpoint: usize) {
116 self.children.truncate(checkpoint);
117 }
118
119 pub fn finish_node(&mut self, checkpoint: usize, kind: L::ElementType) -> &'a GreenNode<'a, L> {
121 let children_slice = self.arena.alloc_slice_copy(&self.children[checkpoint..]);
122 self.children.truncate(checkpoint);
123 let node = GreenNode::new(kind, children_slice);
124 let node_ref = self.arena.alloc(node);
125 self.children.push(GreenTree::Node(node_ref));
126 node_ref
127 }
128}
129
130pub struct IncrementalContext<'a, L: Language> {
132 cursor: Cursor<'a, L>,
133 edits: Vec<(Range<usize>, isize)>,
136}
137
138impl<'a, L: Language> IncrementalContext<'a, L> {
139 pub fn new(old_root: &'a GreenNode<'a, L>, edits: &[crate::source::TextEdit]) -> Self {
141 let mut sorted_edits: Vec<_> = edits.iter().collect();
142 sorted_edits.sort_by_key(|e| e.span.start);
143
144 let mut cumulative_delta = 0isize;
145 let mut processed_edits = Vec::with_capacity(edits.len());
146
147 for edit in sorted_edits {
148 let old_len = edit.span.end - edit.span.start;
149 let new_len = edit.text.len();
150 cumulative_delta += new_len as isize - old_len as isize;
151 processed_edits.push((edit.span, cumulative_delta));
152 }
153
154 Self { cursor: Cursor::new(old_root), edits: processed_edits }
155 }
156
157 fn map_new_to_old(&self, new_pos: usize) -> Option<usize> {
158 let mut current_delta = 0isize;
159
160 for (old_range, cumulative_delta) in &self.edits {
161 let new_start = (old_range.start as isize + current_delta) as usize;
162 let edit_delta = *cumulative_delta - current_delta;
163 let new_end = (old_range.end as isize + current_delta + edit_delta) as usize;
164
165 if new_pos < new_start {
166 return Some((new_pos as isize - current_delta) as usize);
167 }
168 else if new_pos < new_end {
169 return None;
171 }
172
173 current_delta = *cumulative_delta;
174 }
175
176 Some((new_pos as isize - current_delta) as usize)
177 }
178
179 fn is_dirty(&self, old_start: usize, old_end: usize) -> bool {
180 for (old_range, _) in &self.edits {
181 if old_start < old_range.end && old_end > old_range.start {
183 return true;
184 }
185 }
186 false
187 }
188}
189
190pub struct ParserState<'a, L: Language, S: Source + ?Sized = crate::source::SourceText> {
192 pub tokens: TokenSource<L>,
194
195 pub sink: TreeSink<'a, L>,
197
198 pub incremental: Option<IncrementalContext<'a, L>>,
200
201 pub errors: Vec<OakError>,
203 pub source: &'a S,
205}
206
207impl<'a, L: Language, S: Source + ?Sized> ParserState<'a, L, S> {
208 pub fn source_id(&self) -> Option<crate::source::SourceId> {
210 self.source.source_id()
211 }
212
213 pub fn arena(&self) -> &'a SyntaxArena {
215 self.sink.arena()
216 }
217
218 pub fn new(arena: &'a SyntaxArena, lex_output: LexOutput<L>, source: &'a S, capacity_hint: usize) -> Self {
226 let (tokens, mut errors) = match lex_output.result {
227 Ok(tokens) => (tokens, Vec::new()),
228 Err(e) => (Arc::from(Vec::new()), vec![e]),
229 };
230 errors.extend(lex_output.diagnostics);
231
232 Self { tokens: TokenSource::new(tokens), sink: TreeSink::new(arena, capacity_hint), incremental: None, errors, source }
233 }
234
235 pub fn nested(&self) -> ParserState<'a, L, S> {
238 ParserState { tokens: self.tokens.clone(), sink: TreeSink::new(self.sink.arena, 1024), incremental: None, errors: Vec::new(), source: self.source }
239 }
240
241 pub fn peek_text(&self) -> Option<std::borrow::Cow<'_, str>> {
243 self.current().map(|t| self.source.get_text_in(t.span))
244 }
245
246 pub fn promote(&mut self, node: &'a GreenNode<'a, L>) -> &'a GreenNode<'a, L> {
249 deep_clone_node(node, self.sink.arena)
250 }
251
252 pub fn set_incremental(&mut self, old: &'a GreenNode<'a, L>, edits: &[crate::source::TextEdit]) {
258 self.incremental = Some(IncrementalContext::new(old, edits));
259 }
260
261 pub fn current_offset(&self) -> usize {
265 self.tokens.current().map(|t| t.span.start).unwrap_or_else(|| self.source.length())
266 }
267
268 pub fn syntax_error(&mut self, message: impl Into<String>) -> OakError {
270 let err = OakError::syntax_error(message, self.current_offset(), self.source.source_id());
271 self.errors.push(err.clone());
272 err
273 }
274
275 pub fn record_unexpected_token(&mut self, token: impl Into<String>) {
277 let err = OakError::unexpected_token(token, self.current_offset(), self.source.source_id());
278 self.errors.push(err);
279 }
280
281 pub fn record_expected(&mut self, expected: impl Into<String>) {
283 let offset = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
284 let err = OakError::expected_token(expected, offset, self.source.source_id());
285 self.errors.push(err);
286 }
287
288 pub fn record_expected_name(&mut self, name_kind: impl Into<String>) {
290 let offset = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
291 let err = OakError::expected_name(name_kind, offset, self.source.source_id());
292 self.errors.push(err);
293 }
294
295 pub fn record_trailing_comma_not_allowed(&mut self) {
297 let offset = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
298 let err = OakError::trailing_comma_not_allowed(offset, self.source.source_id());
299 self.errors.push(err);
300 }
301
302 pub fn record_unexpected_eof(&mut self) {
304 let offset = self.source.length();
305 let err = OakError::unexpected_eof(offset, self.source.source_id());
306 self.errors.push(err);
307 }
308
309 pub fn unexpected_eof(&mut self) -> OakError {
311 let offset = self.source.length();
312 let err = OakError::unexpected_eof(offset, self.source.source_id());
313 self.errors.push(err.clone());
314 err
315 }
316
317 #[inline]
321 pub fn current(&self) -> Option<&Token<L::TokenType>> {
322 self.tokens.current()
323 }
324
325 #[inline]
327 pub fn peek_kind(&self) -> Option<L::TokenType> {
328 self.current().map(|t| t.kind)
329 }
330
331 #[inline]
333 pub fn peek_at(&self, offset: usize) -> Option<&Token<L::TokenType>> {
334 self.tokens.peek_at(offset)
335 }
336
337 #[inline]
339 pub fn peek_kind_at(&self, offset: usize) -> Option<L::TokenType> {
340 self.tokens.peek_at(offset).map(|t| t.kind)
341 }
342
343 #[inline]
345 pub fn not_at_end(&self) -> bool {
346 !self.tokens.is_end()
347 }
348
349 #[inline]
351 pub fn at(&self, kind: L::TokenType) -> bool {
352 self.peek_kind() == Some(kind)
353 }
354
355 #[inline]
357 pub fn not_at(&self, kind: L::TokenType) -> bool {
358 !self.at(kind)
359 }
360
361 pub fn advance(&mut self) {
363 self.tokens.advance();
364 }
365
366 pub fn advance_until(&mut self, kind: L::TokenType) {
368 while self.not_at_end() && !self.at(kind) {
369 self.advance();
370 }
371 }
372
373 pub fn advance_until_any(&mut self, kinds: &[L::TokenType]) {
375 while self.not_at_end() && !kinds.iter().any(|&k| self.at(k)) {
376 self.advance();
377 }
378 }
379
380 pub fn bump(&mut self)
382 where
383 L::ElementType: From<L::TokenType>,
384 {
385 if let Some(token) = self.current() {
386 self.sink.push_leaf(token.kind, token.length());
387 self.tokens.advance();
388 }
389 }
390
391 pub fn eat(&mut self, kind: L::TokenType) -> bool
394 where
395 L::ElementType: From<L::TokenType>,
396 {
397 if self.at(kind) {
398 self.bump();
399 true
400 }
401 else {
402 false
403 }
404 }
405
406 pub fn expect(&mut self, kind: L::TokenType) -> Result<(), OakError>
409 where
410 L::ElementType: From<L::TokenType>,
411 {
412 if self.eat(kind) {
413 Ok(())
414 }
415 else {
416 let err = OakError::expected_token(format!("{:?}", kind), self.current_offset(), self.source.source_id());
417 self.errors.push(err.clone());
418 Err(err)
419 }
420 }
421
422 pub fn try_parse<T, F>(&mut self, parser: F) -> Result<T, OakError>
424 where
425 F: FnOnce(&mut Self) -> Result<T, OakError>,
426 {
427 let checkpoint = self.checkpoint();
428 match parser(self) {
429 Ok(value) => Ok(value),
430 Err(err) => {
431 self.restore(checkpoint);
432 Err(err)
433 }
434 }
435 }
436
437 pub fn try_parse_with_error<T, F>(&mut self, parser: F) -> Result<T, OakError>
442 where
443 F: FnOnce(&mut Self) -> Result<T, OakError>,
444 {
445 let checkpoint = self.checkpoint();
446 match parser(self) {
447 Ok(value) => Ok(value),
448 Err(err) => {
449 self.restore(checkpoint);
450 Err(err)
451 }
452 }
453 }
454
455 pub fn checkpoint(&self) -> (usize, usize) {
460 (self.tokens.index(), self.sink.checkpoint())
461 }
462
463 pub fn restore(&mut self, (token_index, tree_checkpoint): (usize, usize)) {
465 self.tokens.set_index(token_index);
466 self.sink.restore(tree_checkpoint);
467 }
468
469 pub fn finish_at(&mut self, checkpoint: (usize, usize), kind: L::ElementType) -> &'a GreenNode<'a, L> {
471 self.sink.finish_node(checkpoint.1, kind)
472 }
473
474 pub fn push_child(&mut self, node: &'a GreenNode<'a, L>) {
476 self.sink.push_node(node);
477 }
478
479 pub fn try_reuse(&mut self, kind: L::ElementType) -> bool {
482 let Some(inc) = &mut self.incremental
483 else {
484 return false;
485 };
486 let current_index = self.tokens.index();
487 let new_pos = self.tokens.current().map(|t| t.span.start).unwrap_or(self.source.length());
488
489 #[cfg(test)]
490 println!("try_reuse: kind={:?}, new_pos={}", kind, new_pos);
491
492 let Some(target_old_pos) = inc.map_new_to_old(new_pos)
493 else {
494 return false;
495 };
496
497 #[cfg(test)]
498 println!("Trying to reuse node at pos {}: {:?}", new_pos, kind);
499
500 let mut steps = 0;
501 const MAX_STEPS: usize = 100;
502
503 loop {
504 let start = inc.cursor.offset();
505 let end = inc.cursor.end_offset();
506
507 if start == target_old_pos {
508 if let Some(node) = inc.cursor.as_node() {
509 if node.kind == kind {
510 if !inc.is_dirty(start, end) {
511 let node_len = node.text_len() as usize;
513
514 let mut lookahead = 0;
516 let mut current_pos = new_pos;
517 let target_new_end = new_pos + node_len;
518
519 while let Some(t) = self.tokens.peek_at(lookahead) {
520 if t.span.start != current_pos {
521 break;
523 }
524 current_pos = t.span.end;
525
526 if t.span.end == target_new_end {
527 let tokens_consumed = lookahead + 1;
529 let new_node = deep_clone_node(node, self.sink.arena);
530 self.sink.push_node(new_node);
531 self.tokens.set_index(current_index + tokens_consumed);
532 inc.cursor.step_over();
533 return true;
534 }
535 else if t.span.end > target_new_end {
536 break;
537 }
538 lookahead += 1;
539 }
540 }
541 }
542 }
543 if !inc.cursor.step_into() && !inc.cursor.step_next() {
544 return false;
545 }
546 }
547 else if start < target_old_pos && end > target_old_pos {
548 if !inc.cursor.step_into() && !inc.cursor.step_next() {
549 return false;
550 }
551 }
552 else if end <= target_old_pos {
553 if !inc.cursor.step_next() {
554 return false;
555 }
556 }
557 else {
558 return false;
559 }
560
561 steps += 1;
562 if steps > MAX_STEPS {
563 return false;
564 }
565 }
566 }
567
568 pub fn finish(self, result: Result<&'a GreenNode<'a, L>, OakError>) -> crate::parser::ParseOutput<'a, L> {
570 crate::errors::OakDiagnostics { result, diagnostics: self.errors }
571 }
572
573 pub fn incremental_node<F>(&mut self, kind: L::ElementType, f: F) -> Result<(), OakError>
580 where
581 F: FnOnce(&mut Self) -> Result<(), OakError>,
582 {
583 if self.try_reuse(kind) {
584 return Ok(());
585 }
586
587 let checkpoint = self.checkpoint();
588 let res = f(self);
589 self.finish_at(checkpoint, kind);
590 res
591 }
592
593 pub fn incremental_opt<F>(&mut self, kind: L::ElementType, f: F) -> Result<bool, OakError>
598 where
599 F: FnOnce(&mut Self) -> Result<bool, OakError>,
600 {
601 if self.try_reuse(kind) {
602 return Ok(true);
603 }
604
605 let checkpoint = self.checkpoint();
606 match f(self) {
607 Ok(true) => {
608 self.finish_at(checkpoint, kind);
609 Ok(true)
610 }
611 Ok(false) => Ok(false),
612 Err(e) => {
613 self.finish_at(checkpoint, kind);
614 Err(e)
615 }
616 }
617 }
618}