ra_ap_syntax_bridge/
lib.rs

1//! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].
2
3use std::{collections::VecDeque, fmt, hash::Hash};
4
5use intern::Symbol;
6use rustc_hash::{FxHashMap, FxHashSet};
7use span::{Edition, SpanAnchor, SpanData, SpanMap};
8use stdx::{format_to, never};
9use syntax::{
10    AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement,
11    SyntaxKind::{self, *},
12    SyntaxNode, SyntaxToken, SyntaxTreeBuilder, T, TextRange, TextSize, WalkEvent,
13    ast::{self, make::tokens::doc_comment},
14    format_smolstr,
15};
16use tt::{Punct, buffer::Cursor, token_to_literal};
17
18pub mod prettify_macro_expansion;
19mod to_parser_input;
20pub use to_parser_input::to_parser_input;
21// FIXME: we probably should re-think  `token_tree_to_syntax_node` interfaces
22pub use ::parser::TopEntryPoint;
23
24#[cfg(test)]
25mod tests;
26
27pub trait SpanMapper<S> {
28    fn span_for(&self, range: TextRange) -> S;
29}
30
31impl<S> SpanMapper<SpanData<S>> for SpanMap<S>
32where
33    SpanData<S>: Copy,
34{
35    fn span_for(&self, range: TextRange) -> SpanData<S> {
36        self.span_at(range.start())
37    }
38}
39
40impl<S: Copy, SM: SpanMapper<S>> SpanMapper<S> for &SM {
41    fn span_for(&self, range: TextRange) -> S {
42        SM::span_for(self, range)
43    }
44}
45
46/// Dummy things for testing where spans don't matter.
47pub mod dummy_test_span_utils {
48
49    use span::{Span, SyntaxContext};
50
51    use super::*;
52
53    pub const DUMMY: Span = Span {
54        range: TextRange::empty(TextSize::new(0)),
55        anchor: span::SpanAnchor {
56            file_id: span::EditionedFileId::new(
57                span::FileId::from_raw(0xe4e4e),
58                span::Edition::CURRENT,
59            ),
60            ast_id: span::ROOT_ERASED_FILE_AST_ID,
61        },
62        ctx: SyntaxContext::root(Edition::CURRENT),
63    };
64
65    pub struct DummyTestSpanMap;
66
67    impl SpanMapper<Span> for DummyTestSpanMap {
68        fn span_for(&self, range: syntax::TextRange) -> Span {
69            Span {
70                range,
71                anchor: span::SpanAnchor {
72                    file_id: span::EditionedFileId::new(
73                        span::FileId::from_raw(0xe4e4e),
74                        span::Edition::CURRENT,
75                    ),
76                    ast_id: span::ROOT_ERASED_FILE_AST_ID,
77                },
78                ctx: SyntaxContext::root(Edition::CURRENT),
79            }
80        }
81    }
82}
83
84/// Doc comment desugaring differs between mbe and proc-macros.
85#[derive(Copy, Clone, PartialEq, Eq)]
86pub enum DocCommentDesugarMode {
87    /// Desugars doc comments as quoted raw strings
88    Mbe,
89    /// Desugars doc comments as quoted strings
90    ProcMacro,
91}
92
93/// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the
94/// subtree's spans.
95pub fn syntax_node_to_token_tree<Ctx, SpanMap>(
96    node: &SyntaxNode,
97    map: SpanMap,
98    span: SpanData<Ctx>,
99    mode: DocCommentDesugarMode,
100) -> tt::TopSubtree<SpanData<Ctx>>
101where
102    SpanData<Ctx>: Copy + fmt::Debug,
103    SpanMap: SpanMapper<SpanData<Ctx>>,
104{
105    let mut c =
106        Converter::new(node, map, Default::default(), Default::default(), span, mode, |_, _| {
107            (true, Vec::new())
108        });
109    convert_tokens(&mut c)
110}
111
112/// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the
113/// subtree's spans. Additionally using the append and remove parameters, the additional tokens can
114/// be injected or hidden from the output.
115pub fn syntax_node_to_token_tree_modified<Ctx, SpanMap, OnEvent>(
116    node: &SyntaxNode,
117    map: SpanMap,
118    append: FxHashMap<SyntaxElement, Vec<tt::Leaf<SpanData<Ctx>>>>,
119    remove: FxHashSet<SyntaxElement>,
120    call_site: SpanData<Ctx>,
121    mode: DocCommentDesugarMode,
122    on_enter: OnEvent,
123) -> tt::TopSubtree<SpanData<Ctx>>
124where
125    SpanMap: SpanMapper<SpanData<Ctx>>,
126    SpanData<Ctx>: Copy + fmt::Debug,
127    OnEvent: FnMut(
128        &mut PreorderWithTokens,
129        &WalkEvent<SyntaxElement>,
130    ) -> (bool, Vec<tt::Leaf<SpanData<Ctx>>>),
131{
132    let mut c = Converter::new(node, map, append, remove, call_site, mode, on_enter);
133    convert_tokens(&mut c)
134}
135
136// The following items are what `rustc` macro can be parsed into :
137// link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141
138// * Expr(Box<ast::Expr>)                     -> token_tree_to_expr
139// * Pat(Box<ast::Pat>)                       -> token_tree_to_pat
140// * Ty(Box<ast::Ty>)                         -> token_tree_to_ty
141// * Stmts(SmallVec<[ast::Stmt; 1]>)        -> token_tree_to_stmts
142// * Items(SmallVec<[Box<ast::Item>; 1]>)     -> token_tree_to_items
143//
144// * TraitItems(SmallVec<[ast::TraitItem; 1]>)
145// * AssocItems(SmallVec<[ast::AssocItem; 1]>)
146// * ForeignItems(SmallVec<[ast::ForeignItem; 1]>
147
148/// Converts a [`tt::Subtree`] back to a [`SyntaxNode`].
149/// The produced `SpanMap` contains a mapping from the syntax nodes offsets to the subtree's spans.
150pub fn token_tree_to_syntax_node<Ctx>(
151    tt: &tt::TopSubtree<SpanData<Ctx>>,
152    entry_point: parser::TopEntryPoint,
153    span_to_edition: &mut dyn FnMut(Ctx) -> Edition,
154) -> (Parse<SyntaxNode>, SpanMap<Ctx>)
155where
156    Ctx: Copy + fmt::Debug + PartialEq + PartialEq + Eq + Hash,
157{
158    let buffer = tt.view().strip_invisible();
159    let parser_input = to_parser_input(buffer, span_to_edition);
160    // It matters what edition we parse with even when we escape all identifiers correctly.
161    let parser_output = entry_point.parse(&parser_input);
162    let mut tree_sink = TtTreeSink::new(buffer.cursor());
163    for event in parser_output.iter() {
164        match event {
165            parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => {
166                tree_sink.token(kind, n_raw_tokens)
167            }
168            parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
169                tree_sink.float_split(has_pseudo_dot)
170            }
171            parser::Step::Enter { kind } => tree_sink.start_node(kind),
172            parser::Step::Exit => tree_sink.finish_node(),
173            parser::Step::Error { msg } => tree_sink.error(msg.to_owned()),
174        }
175    }
176    tree_sink.finish()
177}
178
179/// Convert a string to a `TokenTree`. The spans of the subtree will be anchored to the provided
180/// anchor with the given context.
181pub fn parse_to_token_tree<Ctx>(
182    edition: Edition,
183    anchor: SpanAnchor,
184    ctx: Ctx,
185    text: &str,
186) -> Option<tt::TopSubtree<SpanData<Ctx>>>
187where
188    SpanData<Ctx>: Copy + fmt::Debug,
189    Ctx: Copy,
190{
191    let lexed = parser::LexedStr::new(edition, text);
192    if lexed.errors().next().is_some() {
193        return None;
194    }
195    let mut conv =
196        RawConverter { lexed, anchor, pos: 0, ctx, mode: DocCommentDesugarMode::ProcMacro };
197    Some(convert_tokens(&mut conv))
198}
199
200/// Convert a string to a `TokenTree`. The passed span will be used for all spans of the produced subtree.
201pub fn parse_to_token_tree_static_span<S>(
202    edition: Edition,
203    span: S,
204    text: &str,
205) -> Option<tt::TopSubtree<S>>
206where
207    S: Copy + fmt::Debug,
208{
209    let lexed = parser::LexedStr::new(edition, text);
210    if lexed.errors().next().is_some() {
211        return None;
212    }
213    let mut conv =
214        StaticRawConverter { lexed, pos: 0, span, mode: DocCommentDesugarMode::ProcMacro };
215    Some(convert_tokens(&mut conv))
216}
217
218fn convert_tokens<S, C>(conv: &mut C) -> tt::TopSubtree<S>
219where
220    C: TokenConverter<S>,
221    S: Copy + fmt::Debug,
222    C::Token: fmt::Debug,
223{
224    let mut builder =
225        tt::TopSubtreeBuilder::new(tt::Delimiter::invisible_spanned(conv.call_site()));
226
227    while let Some((token, abs_range)) = conv.bump() {
228        let tt = match token.as_leaf() {
229            // These delimiters are not actually valid punctuation, but we produce them in syntax fixup.
230            // So we need to handle them specially here.
231            Some(&tt::Leaf::Punct(Punct {
232                char: char @ ('(' | ')' | '{' | '}' | '[' | ']'),
233                span,
234                spacing: _,
235            })) => {
236                let found_expected_delimiter =
237                    builder.expected_delimiters().enumerate().find(|(_, delim)| match delim.kind {
238                        tt::DelimiterKind::Parenthesis => char == ')',
239                        tt::DelimiterKind::Brace => char == '}',
240                        tt::DelimiterKind::Bracket => char == ']',
241                        tt::DelimiterKind::Invisible => false,
242                    });
243                if let Some((idx, _)) = found_expected_delimiter {
244                    for _ in 0..=idx {
245                        builder.close(span);
246                    }
247                    continue;
248                }
249
250                let delim = match char {
251                    '(' => tt::DelimiterKind::Parenthesis,
252                    '{' => tt::DelimiterKind::Brace,
253                    '[' => tt::DelimiterKind::Bracket,
254                    _ => panic!("unmatched closing delimiter from syntax fixup"),
255                };
256
257                // Start a new subtree
258                builder.open(delim, span);
259                continue;
260            }
261            Some(leaf) => leaf.clone(),
262            None => match token.kind(conv) {
263                // Desugar doc comments into doc attributes
264                COMMENT => {
265                    let span = conv.span_for(abs_range);
266                    conv.convert_doc_comment(&token, span, &mut builder);
267                    continue;
268                }
269                kind if kind.is_punct() && kind != UNDERSCORE => {
270                    let found_expected_delimiter =
271                        builder.expected_delimiters().enumerate().find(|(_, delim)| {
272                            match delim.kind {
273                                tt::DelimiterKind::Parenthesis => kind == T![')'],
274                                tt::DelimiterKind::Brace => kind == T!['}'],
275                                tt::DelimiterKind::Bracket => kind == T![']'],
276                                tt::DelimiterKind::Invisible => false,
277                            }
278                        });
279
280                    // Current token is a closing delimiter that we expect, fix up the closing span
281                    // and end the subtree here.
282                    // We also close any open inner subtrees that might be missing their delimiter.
283                    if let Some((idx, _)) = found_expected_delimiter {
284                        for _ in 0..=idx {
285                            // FIXME: record an error somewhere if we're closing more than one tree here?
286                            builder.close(conv.span_for(abs_range));
287                        }
288                        continue;
289                    }
290
291                    let delim = match kind {
292                        T!['('] => Some(tt::DelimiterKind::Parenthesis),
293                        T!['{'] => Some(tt::DelimiterKind::Brace),
294                        T!['['] => Some(tt::DelimiterKind::Bracket),
295                        _ => None,
296                    };
297
298                    // Start a new subtree
299                    if let Some(kind) = delim {
300                        builder.open(kind, conv.span_for(abs_range));
301                        continue;
302                    }
303
304                    let spacing = match conv.peek().map(|next| next.kind(conv)) {
305                        Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint,
306                        _ => tt::Spacing::Alone,
307                    };
308                    let Some(char) = token.to_char(conv) else {
309                        panic!("Token from lexer must be single char: token = {token:#?}")
310                    };
311                    // FIXME: this might still be an unmatched closing delimiter? Maybe we should assert here
312                    tt::Leaf::from(tt::Punct { char, spacing, span: conv.span_for(abs_range) })
313                }
314                kind => {
315                    macro_rules! make_ident {
316                        () => {
317                            tt::Ident {
318                                span: conv.span_for(abs_range),
319                                sym: Symbol::intern(&token.to_text(conv)),
320                                is_raw: tt::IdentIsRaw::No,
321                            }
322                            .into()
323                        };
324                    }
325                    let leaf: tt::Leaf<_> = match kind {
326                        k if k.is_any_identifier() => {
327                            let text = token.to_text(conv);
328                            tt::Ident::new(&text, conv.span_for(abs_range)).into()
329                        }
330                        UNDERSCORE => make_ident!(),
331                        k if k.is_literal() => {
332                            let text = token.to_text(conv);
333                            let span = conv.span_for(abs_range);
334                            token_to_literal(&text, span).into()
335                        }
336                        LIFETIME_IDENT => {
337                            let apostrophe = tt::Leaf::from(tt::Punct {
338                                char: '\'',
339                                spacing: tt::Spacing::Joint,
340                                span: conv
341                                    .span_for(TextRange::at(abs_range.start(), TextSize::of('\''))),
342                            });
343                            builder.push(apostrophe);
344
345                            let ident = tt::Leaf::from(tt::Ident {
346                                sym: Symbol::intern(&token.to_text(conv)[1..]),
347                                span: conv.span_for(TextRange::new(
348                                    abs_range.start() + TextSize::of('\''),
349                                    abs_range.end(),
350                                )),
351                                is_raw: tt::IdentIsRaw::No,
352                            });
353                            builder.push(ident);
354                            continue;
355                        }
356                        _ => continue,
357                    };
358
359                    leaf
360                }
361            },
362        };
363
364        builder.push(tt);
365    }
366
367    while builder.expected_delimiters().next().is_some() {
368        // FIXME: record an error somewhere?
369        builder.close(conv.call_site());
370    }
371    builder.build_skip_top_subtree()
372}
373
374fn is_single_token_op(kind: SyntaxKind) -> bool {
375    matches!(
376        kind,
377        EQ | L_ANGLE
378            | R_ANGLE
379            | BANG
380            | AMP
381            | PIPE
382            | TILDE
383            | AT
384            | DOT
385            | COMMA
386            | SEMICOLON
387            | COLON
388            | POUND
389            | DOLLAR
390            | QUESTION
391            | PLUS
392            | MINUS
393            | STAR
394            | SLASH
395            | PERCENT
396            | CARET
397            // LIFETIME_IDENT will be split into a sequence of `'` (a single quote) and an
398            // identifier.
399            | LIFETIME_IDENT
400    )
401}
402
403/// Returns the textual content of a doc comment block as a quoted string
404/// That is, strips leading `///` (or `/**`, etc)
405/// and strips the ending `*/`
406/// And then quote the string, which is needed to convert to `tt::Literal`
407///
408/// Note that proc-macros desugar with string literals where as macro_rules macros desugar with raw string literals.
409pub fn desugar_doc_comment_text(text: &str, mode: DocCommentDesugarMode) -> (Symbol, tt::LitKind) {
410    match mode {
411        DocCommentDesugarMode::Mbe => {
412            let mut num_of_hashes = 0;
413            let mut count = 0;
414            for ch in text.chars() {
415                count = match ch {
416                    '"' => 1,
417                    '#' if count > 0 => count + 1,
418                    _ => 0,
419                };
420                num_of_hashes = num_of_hashes.max(count);
421            }
422
423            // Quote raw string with delimiters
424            (Symbol::intern(text), tt::LitKind::StrRaw(num_of_hashes))
425        }
426        // Quote string with delimiters
427        DocCommentDesugarMode::ProcMacro => {
428            (Symbol::intern(&format_smolstr!("{}", text.escape_debug())), tt::LitKind::Str)
429        }
430    }
431}
432
433fn convert_doc_comment<S: Copy>(
434    token: &syntax::SyntaxToken,
435    span: S,
436    mode: DocCommentDesugarMode,
437    builder: &mut tt::TopSubtreeBuilder<S>,
438) {
439    let Some(comment) = ast::Comment::cast(token.clone()) else { return };
440    let Some(doc) = comment.kind().doc else { return };
441
442    let mk_ident = |s: &str| {
443        tt::Leaf::from(tt::Ident { sym: Symbol::intern(s), span, is_raw: tt::IdentIsRaw::No })
444    };
445
446    let mk_punct =
447        |c: char| tt::Leaf::from(tt::Punct { char: c, spacing: tt::Spacing::Alone, span });
448
449    let mk_doc_literal = |comment: &ast::Comment| {
450        let prefix_len = comment.prefix().len();
451        let mut text = &comment.text()[prefix_len..];
452
453        // Remove ending "*/"
454        if comment.kind().shape == ast::CommentShape::Block {
455            text = &text[0..text.len() - 2];
456        }
457        let (text, kind) = desugar_doc_comment_text(text, mode);
458        let lit = tt::Literal { symbol: text, span, kind, suffix: None };
459
460        tt::Leaf::from(lit)
461    };
462
463    // Make `doc="\" Comments\""
464    let meta_tkns = [mk_ident("doc"), mk_punct('='), mk_doc_literal(&comment)];
465
466    // Make `#![]`
467    builder.push(mk_punct('#'));
468    if let ast::CommentPlacement::Inner = doc {
469        builder.push(mk_punct('!'));
470    }
471    builder.open(tt::DelimiterKind::Bracket, span);
472    builder.extend(meta_tkns);
473    builder.close(span);
474}
475
476/// A raw token (straight from lexer) converter
477struct RawConverter<'a, Ctx> {
478    lexed: parser::LexedStr<'a>,
479    pos: usize,
480    anchor: SpanAnchor,
481    ctx: Ctx,
482    mode: DocCommentDesugarMode,
483}
484/// A raw token (straight from lexer) converter that gives every token the same span.
485struct StaticRawConverter<'a, S> {
486    lexed: parser::LexedStr<'a>,
487    pos: usize,
488    span: S,
489    mode: DocCommentDesugarMode,
490}
491
492trait SrcToken<Ctx, S> {
493    fn kind(&self, ctx: &Ctx) -> SyntaxKind;
494
495    fn to_char(&self, ctx: &Ctx) -> Option<char>;
496
497    fn to_text(&self, ctx: &Ctx) -> SmolStr;
498
499    fn as_leaf(&self) -> Option<&tt::Leaf<S>> {
500        None
501    }
502}
503
504trait TokenConverter<S>: Sized {
505    type Token: SrcToken<Self, S>;
506
507    fn convert_doc_comment(
508        &self,
509        token: &Self::Token,
510        span: S,
511        builder: &mut tt::TopSubtreeBuilder<S>,
512    );
513
514    fn bump(&mut self) -> Option<(Self::Token, TextRange)>;
515
516    fn peek(&self) -> Option<Self::Token>;
517
518    fn span_for(&self, range: TextRange) -> S;
519
520    fn call_site(&self) -> S;
521}
522
523impl<S, Ctx> SrcToken<RawConverter<'_, Ctx>, S> for usize {
524    fn kind(&self, ctx: &RawConverter<'_, Ctx>) -> SyntaxKind {
525        ctx.lexed.kind(*self)
526    }
527
528    fn to_char(&self, ctx: &RawConverter<'_, Ctx>) -> Option<char> {
529        ctx.lexed.text(*self).chars().next()
530    }
531
532    fn to_text(&self, ctx: &RawConverter<'_, Ctx>) -> SmolStr {
533        ctx.lexed.text(*self).into()
534    }
535}
536
537impl<S: Copy> SrcToken<StaticRawConverter<'_, S>, S> for usize {
538    fn kind(&self, ctx: &StaticRawConverter<'_, S>) -> SyntaxKind {
539        ctx.lexed.kind(*self)
540    }
541
542    fn to_char(&self, ctx: &StaticRawConverter<'_, S>) -> Option<char> {
543        ctx.lexed.text(*self).chars().next()
544    }
545
546    fn to_text(&self, ctx: &StaticRawConverter<'_, S>) -> SmolStr {
547        ctx.lexed.text(*self).into()
548    }
549}
550
551impl<Ctx: Copy> TokenConverter<SpanData<Ctx>> for RawConverter<'_, Ctx>
552where
553    SpanData<Ctx>: Copy,
554{
555    type Token = usize;
556
557    fn convert_doc_comment(
558        &self,
559        &token: &usize,
560        span: SpanData<Ctx>,
561        builder: &mut tt::TopSubtreeBuilder<SpanData<Ctx>>,
562    ) {
563        let text = self.lexed.text(token);
564        convert_doc_comment(&doc_comment(text), span, self.mode, builder);
565    }
566
567    fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
568        if self.pos == self.lexed.len() {
569            return None;
570        }
571        let token = self.pos;
572        self.pos += 1;
573        let range = self.lexed.text_range(token);
574        let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?);
575
576        Some((token, range))
577    }
578
579    fn peek(&self) -> Option<Self::Token> {
580        if self.pos == self.lexed.len() {
581            return None;
582        }
583        Some(self.pos)
584    }
585
586    fn span_for(&self, range: TextRange) -> SpanData<Ctx> {
587        SpanData { range, anchor: self.anchor, ctx: self.ctx }
588    }
589
590    fn call_site(&self) -> SpanData<Ctx> {
591        SpanData { range: TextRange::empty(0.into()), anchor: self.anchor, ctx: self.ctx }
592    }
593}
594
595impl<S> TokenConverter<S> for StaticRawConverter<'_, S>
596where
597    S: Copy,
598{
599    type Token = usize;
600
601    fn convert_doc_comment(&self, &token: &usize, span: S, builder: &mut tt::TopSubtreeBuilder<S>) {
602        let text = self.lexed.text(token);
603        convert_doc_comment(&doc_comment(text), span, self.mode, builder);
604    }
605
606    fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
607        if self.pos == self.lexed.len() {
608            return None;
609        }
610        let token = self.pos;
611        self.pos += 1;
612        let range = self.lexed.text_range(token);
613        let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?);
614
615        Some((token, range))
616    }
617
618    fn peek(&self) -> Option<Self::Token> {
619        if self.pos == self.lexed.len() {
620            return None;
621        }
622        Some(self.pos)
623    }
624
625    fn span_for(&self, _: TextRange) -> S {
626        self.span
627    }
628
629    fn call_site(&self) -> S {
630        self.span
631    }
632}
633
634struct Converter<SpanMap, S, OnEvent> {
635    current: Option<SyntaxToken>,
636    current_leaves: VecDeque<tt::Leaf<S>>,
637    preorder: PreorderWithTokens,
638    range: TextRange,
639    punct_offset: Option<(SyntaxToken, TextSize)>,
640    /// Used to make the emitted text ranges in the spans relative to the span anchor.
641    map: SpanMap,
642    append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>,
643    remove: FxHashSet<SyntaxElement>,
644    call_site: S,
645    mode: DocCommentDesugarMode,
646    on_event: OnEvent,
647}
648
649impl<SpanMap, S, OnEvent> Converter<SpanMap, S, OnEvent>
650where
651    OnEvent: FnMut(&mut PreorderWithTokens, &WalkEvent<SyntaxElement>) -> (bool, Vec<tt::Leaf<S>>),
652{
653    fn new(
654        node: &SyntaxNode,
655        map: SpanMap,
656        append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>,
657        remove: FxHashSet<SyntaxElement>,
658        call_site: S,
659        mode: DocCommentDesugarMode,
660        on_enter: OnEvent,
661    ) -> Self {
662        let mut converter = Converter {
663            current: None,
664            preorder: node.preorder_with_tokens(),
665            range: node.text_range(),
666            punct_offset: None,
667            map,
668            append,
669            remove,
670            call_site,
671            current_leaves: VecDeque::new(),
672            mode,
673            on_event: on_enter,
674        };
675        converter.current = converter.next_token();
676        converter
677    }
678
679    fn next_token(&mut self) -> Option<SyntaxToken> {
680        while let Some(ev) = self.preorder.next() {
681            let (keep_event, insert_leaves) = (self.on_event)(&mut self.preorder, &ev);
682            self.current_leaves.extend(insert_leaves);
683            if !keep_event {
684                continue;
685            }
686            match ev {
687                WalkEvent::Enter(token) => {
688                    if self.remove.contains(&token) {
689                        match token {
690                            syntax::NodeOrToken::Token(_) => {
691                                continue;
692                            }
693                            node => {
694                                self.preorder.skip_subtree();
695                                if let Some(v) = self.append.remove(&node) {
696                                    self.current_leaves.extend(v);
697                                    continue;
698                                }
699                            }
700                        }
701                    } else if let syntax::NodeOrToken::Token(token) = token {
702                        return Some(token);
703                    }
704                }
705                WalkEvent::Leave(ele) => {
706                    if let Some(v) = self.append.remove(&ele) {
707                        self.current_leaves.extend(v);
708                        continue;
709                    }
710                }
711            }
712        }
713        None
714    }
715}
716
717#[derive(Debug)]
718enum SynToken<S> {
719    Ordinary(SyntaxToken),
720    Punct { token: SyntaxToken, offset: usize },
721    Leaf(tt::Leaf<S>),
722}
723
724impl<S> SynToken<S> {
725    fn token(&self) -> &SyntaxToken {
726        match self {
727            SynToken::Ordinary(it) | SynToken::Punct { token: it, offset: _ } => it,
728            SynToken::Leaf(_) => unreachable!(),
729        }
730    }
731}
732
733impl<SpanMap, S, OnEvent> SrcToken<Converter<SpanMap, S, OnEvent>, S> for SynToken<S> {
734    fn kind(&self, _ctx: &Converter<SpanMap, S, OnEvent>) -> SyntaxKind {
735        match self {
736            SynToken::Ordinary(token) => token.kind(),
737            SynToken::Punct { token, offset: i } => {
738                SyntaxKind::from_char(token.text().chars().nth(*i).unwrap()).unwrap()
739            }
740            SynToken::Leaf(_) => {
741                never!();
742                SyntaxKind::ERROR
743            }
744        }
745    }
746    fn to_char(&self, _ctx: &Converter<SpanMap, S, OnEvent>) -> Option<char> {
747        match self {
748            SynToken::Ordinary(_) => None,
749            SynToken::Punct { token: it, offset: i } => it.text().chars().nth(*i),
750            SynToken::Leaf(_) => None,
751        }
752    }
753    fn to_text(&self, _ctx: &Converter<SpanMap, S, OnEvent>) -> SmolStr {
754        match self {
755            SynToken::Ordinary(token) | SynToken::Punct { token, offset: _ } => token.text().into(),
756            SynToken::Leaf(_) => {
757                never!();
758                "".into()
759            }
760        }
761    }
762    fn as_leaf(&self) -> Option<&tt::Leaf<S>> {
763        match self {
764            SynToken::Ordinary(_) | SynToken::Punct { .. } => None,
765            SynToken::Leaf(it) => Some(it),
766        }
767    }
768}
769
770impl<S, SpanMap, OnEvent> TokenConverter<S> for Converter<SpanMap, S, OnEvent>
771where
772    S: Copy,
773    SpanMap: SpanMapper<S>,
774    OnEvent: FnMut(&mut PreorderWithTokens, &WalkEvent<SyntaxElement>) -> (bool, Vec<tt::Leaf<S>>),
775{
776    type Token = SynToken<S>;
777    fn convert_doc_comment(
778        &self,
779        token: &Self::Token,
780        span: S,
781        builder: &mut tt::TopSubtreeBuilder<S>,
782    ) {
783        convert_doc_comment(token.token(), span, self.mode, builder);
784    }
785
786    fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
787        if let Some((punct, offset)) = self.punct_offset.clone()
788            && usize::from(offset) + 1 < punct.text().len()
789        {
790            let offset = offset + TextSize::of('.');
791            let range = punct.text_range();
792            self.punct_offset = Some((punct.clone(), offset));
793            let range = TextRange::at(range.start() + offset, TextSize::of('.'));
794            return Some((
795                SynToken::Punct { token: punct, offset: u32::from(offset) as usize },
796                range,
797            ));
798        }
799
800        if let Some(leaf) = self.current_leaves.pop_front() {
801            return Some((SynToken::Leaf(leaf), TextRange::empty(TextSize::new(0))));
802        }
803
804        let curr = self.current.clone()?;
805        if !self.range.contains_range(curr.text_range()) {
806            return None;
807        }
808
809        self.current = self.next_token();
810        let token = if curr.kind().is_punct() {
811            self.punct_offset = Some((curr.clone(), 0.into()));
812            let range = curr.text_range();
813            let range = TextRange::at(range.start(), TextSize::of('.'));
814            (SynToken::Punct { token: curr, offset: 0_usize }, range)
815        } else {
816            self.punct_offset = None;
817            let range = curr.text_range();
818            (SynToken::Ordinary(curr), range)
819        };
820
821        Some(token)
822    }
823
824    fn peek(&self) -> Option<Self::Token> {
825        if let Some((punct, mut offset)) = self.punct_offset.clone() {
826            offset += TextSize::of('.');
827            if usize::from(offset) < punct.text().len() {
828                return Some(SynToken::Punct { token: punct, offset: usize::from(offset) });
829            }
830        }
831
832        let curr = self.current.clone()?;
833        if !self.range.contains_range(curr.text_range()) {
834            return None;
835        }
836
837        let token = if curr.kind().is_punct() {
838            SynToken::Punct { token: curr, offset: 0_usize }
839        } else {
840            SynToken::Ordinary(curr)
841        };
842        Some(token)
843    }
844
845    fn span_for(&self, range: TextRange) -> S {
846        self.map.span_for(range)
847    }
848    fn call_site(&self) -> S {
849        self.call_site
850    }
851}
852
853struct TtTreeSink<'a, Ctx>
854where
855    SpanData<Ctx>: Copy,
856{
857    buf: String,
858    cursor: Cursor<'a, SpanData<Ctx>>,
859    text_pos: TextSize,
860    inner: SyntaxTreeBuilder,
861    token_map: SpanMap<Ctx>,
862}
863
864impl<'a, Ctx> TtTreeSink<'a, Ctx>
865where
866    SpanData<Ctx>: Copy,
867{
868    fn new(cursor: Cursor<'a, SpanData<Ctx>>) -> Self {
869        TtTreeSink {
870            buf: String::new(),
871            cursor,
872            text_pos: 0.into(),
873            inner: SyntaxTreeBuilder::default(),
874            token_map: SpanMap::empty(),
875        }
876    }
877
878    fn finish(mut self) -> (Parse<SyntaxNode>, SpanMap<Ctx>) {
879        self.token_map.finish();
880        (self.inner.finish(), self.token_map)
881    }
882}
883
884fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> {
885    let texts = match d {
886        tt::DelimiterKind::Parenthesis => "()",
887        tt::DelimiterKind::Brace => "{}",
888        tt::DelimiterKind::Bracket => "[]",
889        tt::DelimiterKind::Invisible => return None,
890    };
891
892    let idx = closing as usize;
893    Some(&texts[idx..texts.len() - (1 - idx)])
894}
895
896impl<Ctx> TtTreeSink<'_, Ctx>
897where
898    SpanData<Ctx>: Copy + fmt::Debug,
899    Ctx: PartialEq,
900{
901    /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween.
902    /// This occurs when a float literal is used as a field access.
903    fn float_split(&mut self, has_pseudo_dot: bool) {
904        let (text, span) = match self.cursor.token_tree() {
905            Some(tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal {
906                symbol: text,
907                span,
908                kind: tt::LitKind::Float,
909                suffix: _,
910            }))) => (text.as_str(), *span),
911            tt => unreachable!("{tt:?}"),
912        };
913        // FIXME: Span splitting
914        match text.split_once('.') {
915            Some((left, right)) => {
916                assert!(!left.is_empty());
917
918                self.inner.start_node(SyntaxKind::NAME_REF);
919                self.inner.token(SyntaxKind::INT_NUMBER, left);
920                self.inner.finish_node();
921                self.token_map.push(self.text_pos + TextSize::of(left), span);
922
923                // here we move the exit up, the original exit has been deleted in process
924                self.inner.finish_node();
925
926                self.inner.token(SyntaxKind::DOT, ".");
927                self.token_map.push(self.text_pos + TextSize::of(left) + TextSize::of("."), span);
928
929                if has_pseudo_dot {
930                    assert!(right.is_empty(), "{left}.{right}");
931                } else {
932                    assert!(!right.is_empty(), "{left}.{right}");
933                    self.inner.start_node(SyntaxKind::NAME_REF);
934                    self.inner.token(SyntaxKind::INT_NUMBER, right);
935                    self.token_map.push(self.text_pos + TextSize::of(text), span);
936                    self.inner.finish_node();
937
938                    // the parser creates an unbalanced start node, we are required to close it here
939                    self.inner.finish_node();
940                }
941                self.text_pos += TextSize::of(text);
942            }
943            None => unreachable!(),
944        }
945        self.cursor.bump();
946    }
947
948    fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) {
949        if kind == LIFETIME_IDENT {
950            n_tokens = 2;
951        }
952
953        let mut last_two = self.cursor.peek_two_leaves();
954        let mut combined_span = None;
955        'tokens: for _ in 0..n_tokens {
956            let tmp: u8;
957            if self.cursor.eof() {
958                break;
959            }
960            last_two = self.cursor.peek_two_leaves();
961            let (text, span) = loop {
962                break match self.cursor.token_tree() {
963                    Some(tt::TokenTree::Leaf(leaf)) => match leaf {
964                        tt::Leaf::Ident(ident) => {
965                            if ident.is_raw.yes() {
966                                self.buf.push_str("r#");
967                                self.text_pos += TextSize::of("r#");
968                            }
969                            let r = (ident.sym.as_str(), ident.span);
970                            self.cursor.bump();
971                            r
972                        }
973                        tt::Leaf::Punct(punct) => {
974                            assert!(punct.char.is_ascii());
975                            tmp = punct.char as u8;
976                            let r = (
977                                std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(),
978                                punct.span,
979                            );
980                            self.cursor.bump();
981                            r
982                        }
983                        tt::Leaf::Literal(lit) => {
984                            let buf_l = self.buf.len();
985                            format_to!(self.buf, "{lit}");
986                            debug_assert_ne!(self.buf.len() - buf_l, 0);
987                            self.text_pos += TextSize::new((self.buf.len() - buf_l) as u32);
988                            combined_span = match combined_span {
989                                None => Some(lit.span),
990                                Some(prev_span) => Some(Self::merge_spans(prev_span, lit.span)),
991                            };
992                            self.cursor.bump();
993                            continue 'tokens;
994                        }
995                    },
996                    Some(tt::TokenTree::Subtree(subtree)) => {
997                        self.cursor.bump();
998                        match delim_to_str(subtree.delimiter.kind, false) {
999                            Some(it) => (it, subtree.delimiter.open),
1000                            None => continue,
1001                        }
1002                    }
1003                    None => {
1004                        let parent = self.cursor.end();
1005                        match delim_to_str(parent.delimiter.kind, true) {
1006                            Some(it) => (it, parent.delimiter.close),
1007                            None => continue,
1008                        }
1009                    }
1010                };
1011            };
1012            self.buf += text;
1013            self.text_pos += TextSize::of(text);
1014            combined_span = match combined_span {
1015                None => Some(span),
1016                Some(prev_span) => Some(Self::merge_spans(prev_span, span)),
1017            }
1018        }
1019
1020        self.token_map.push(self.text_pos, combined_span.expect("expected at least one token"));
1021        self.inner.token(kind, self.buf.as_str());
1022        self.buf.clear();
1023        // FIXME: Emitting whitespace for this is really just a hack, we should get rid of it.
1024        // Add whitespace between adjoint puncts
1025        if let Some([tt::Leaf::Punct(curr), tt::Leaf::Punct(next)]) = last_two {
1026            // Note: We always assume the semi-colon would be the last token in
1027            // other parts of RA such that we don't add whitespace here.
1028            //
1029            // When `next` is a `Punct` of `'`, that's a part of a lifetime identifier so we don't
1030            // need to add whitespace either.
1031            if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' {
1032                self.inner.token(WHITESPACE, " ");
1033                self.text_pos += TextSize::of(' ');
1034                self.token_map.push(self.text_pos, curr.span);
1035            }
1036        }
1037    }
1038
1039    fn start_node(&mut self, kind: SyntaxKind) {
1040        self.inner.start_node(kind);
1041    }
1042
1043    fn finish_node(&mut self) {
1044        self.inner.finish_node();
1045    }
1046
1047    fn error(&mut self, error: String) {
1048        self.inner.error(error, self.text_pos)
1049    }
1050
1051    fn merge_spans(a: SpanData<Ctx>, b: SpanData<Ctx>) -> SpanData<Ctx> {
1052        // We don't do what rustc does exactly, rustc does something clever when the spans have different syntax contexts
1053        // but this runs afoul of our separation between `span` and `hir-expand`.
1054        SpanData {
1055            range: if a.ctx == b.ctx && a.anchor == b.anchor {
1056                TextRange::new(
1057                    std::cmp::min(a.range.start(), b.range.start()),
1058                    std::cmp::max(a.range.end(), b.range.end()),
1059                )
1060            } else {
1061                // Combining ranges make no sense when they come from different syntax contexts.
1062                a.range
1063            },
1064            anchor: a.anchor,
1065            ctx: a.ctx,
1066        }
1067    }
1068}
ra_ap_syntax_bridge/lib.rs

ra_ap_syntax_bridge/
lib.rs