ra_ap_syntax_bridge/
lib.rs

1//! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].
2
3use std::{fmt, hash::Hash};
4
5use intern::Symbol;
6use rustc_hash::{FxHashMap, FxHashSet};
7use span::{Edition, SpanAnchor, SpanData, SpanMap};
8use stdx::{format_to, never};
9use syntax::{
10    AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement,
11    SyntaxKind::{self, *},
12    SyntaxNode, SyntaxToken, SyntaxTreeBuilder, T, TextRange, TextSize, WalkEvent,
13    ast::{self, make::tokens::doc_comment},
14    format_smolstr,
15};
16use tt::{Punct, buffer::Cursor, token_to_literal};
17
18pub mod prettify_macro_expansion;
19mod to_parser_input;
20pub use to_parser_input::to_parser_input;
21// FIXME: we probably should re-think  `token_tree_to_syntax_node` interfaces
22pub use ::parser::TopEntryPoint;
23
24#[cfg(test)]
25mod tests;
26
27pub trait SpanMapper<S> {
28    fn span_for(&self, range: TextRange) -> S;
29}
30
31impl<S> SpanMapper<SpanData<S>> for SpanMap<S>
32where
33    SpanData<S>: Copy,
34{
35    fn span_for(&self, range: TextRange) -> SpanData<S> {
36        self.span_at(range.start())
37    }
38}
39
40impl<S: Copy, SM: SpanMapper<S>> SpanMapper<S> for &SM {
41    fn span_for(&self, range: TextRange) -> S {
42        SM::span_for(self, range)
43    }
44}
45
46/// Dummy things for testing where spans don't matter.
47pub mod dummy_test_span_utils {
48
49    use span::{Span, SyntaxContext};
50
51    use super::*;
52
53    pub const DUMMY: Span = Span {
54        range: TextRange::empty(TextSize::new(0)),
55        anchor: span::SpanAnchor {
56            file_id: span::EditionedFileId::new(
57                span::FileId::from_raw(0xe4e4e),
58                span::Edition::CURRENT,
59            ),
60            ast_id: span::ROOT_ERASED_FILE_AST_ID,
61        },
62        ctx: SyntaxContext::root(Edition::CURRENT),
63    };
64
65    pub struct DummyTestSpanMap;
66
67    impl SpanMapper<Span> for DummyTestSpanMap {
68        fn span_for(&self, range: syntax::TextRange) -> Span {
69            Span {
70                range,
71                anchor: span::SpanAnchor {
72                    file_id: span::EditionedFileId::new(
73                        span::FileId::from_raw(0xe4e4e),
74                        span::Edition::CURRENT,
75                    ),
76                    ast_id: span::ROOT_ERASED_FILE_AST_ID,
77                },
78                ctx: SyntaxContext::root(Edition::CURRENT),
79            }
80        }
81    }
82}
83
84/// Doc comment desugaring differs between mbe and proc-macros.
85#[derive(Copy, Clone, PartialEq, Eq)]
86pub enum DocCommentDesugarMode {
87    /// Desugars doc comments as quoted raw strings
88    Mbe,
89    /// Desugars doc comments as quoted strings
90    ProcMacro,
91}
92
93/// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the
94/// subtree's spans.
95pub fn syntax_node_to_token_tree<Ctx, SpanMap>(
96    node: &SyntaxNode,
97    map: SpanMap,
98    span: SpanData<Ctx>,
99    mode: DocCommentDesugarMode,
100) -> tt::TopSubtree<SpanData<Ctx>>
101where
102    SpanData<Ctx>: Copy + fmt::Debug,
103    SpanMap: SpanMapper<SpanData<Ctx>>,
104{
105    let mut c = Converter::new(node, map, Default::default(), Default::default(), span, mode);
106    convert_tokens(&mut c)
107}
108
109/// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the
110/// subtree's spans. Additionally using the append and remove parameters, the additional tokens can
111/// be injected or hidden from the output.
112pub fn syntax_node_to_token_tree_modified<Ctx, SpanMap>(
113    node: &SyntaxNode,
114    map: SpanMap,
115    append: FxHashMap<SyntaxElement, Vec<tt::Leaf<SpanData<Ctx>>>>,
116    remove: FxHashSet<SyntaxElement>,
117    call_site: SpanData<Ctx>,
118    mode: DocCommentDesugarMode,
119) -> tt::TopSubtree<SpanData<Ctx>>
120where
121    SpanMap: SpanMapper<SpanData<Ctx>>,
122    SpanData<Ctx>: Copy + fmt::Debug,
123{
124    let mut c = Converter::new(node, map, append, remove, call_site, mode);
125    convert_tokens(&mut c)
126}
127
128// The following items are what `rustc` macro can be parsed into :
129// link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141
130// * Expr(P<ast::Expr>)                     -> token_tree_to_expr
131// * Pat(P<ast::Pat>)                       -> token_tree_to_pat
132// * Ty(P<ast::Ty>)                         -> token_tree_to_ty
133// * Stmts(SmallVec<[ast::Stmt; 1]>)        -> token_tree_to_stmts
134// * Items(SmallVec<[P<ast::Item>; 1]>)     -> token_tree_to_items
135//
136// * TraitItems(SmallVec<[ast::TraitItem; 1]>)
137// * AssocItems(SmallVec<[ast::AssocItem; 1]>)
138// * ForeignItems(SmallVec<[ast::ForeignItem; 1]>
139
140/// Converts a [`tt::Subtree`] back to a [`SyntaxNode`].
141/// The produced `SpanMap` contains a mapping from the syntax nodes offsets to the subtree's spans.
142pub fn token_tree_to_syntax_node<Ctx>(
143    tt: &tt::TopSubtree<SpanData<Ctx>>,
144    entry_point: parser::TopEntryPoint,
145    span_to_edition: &mut dyn FnMut(Ctx) -> Edition,
146    top_edition: Edition,
147) -> (Parse<SyntaxNode>, SpanMap<Ctx>)
148where
149    Ctx: Copy + fmt::Debug + PartialEq + PartialEq + Eq + Hash,
150{
151    let buffer = tt.view().strip_invisible();
152    let parser_input = to_parser_input(buffer, span_to_edition);
153    // It matters what edition we parse with even when we escape all identifiers correctly.
154    let parser_output = entry_point.parse(&parser_input, top_edition);
155    let mut tree_sink = TtTreeSink::new(buffer.cursor());
156    for event in parser_output.iter() {
157        match event {
158            parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => {
159                tree_sink.token(kind, n_raw_tokens)
160            }
161            parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
162                tree_sink.float_split(has_pseudo_dot)
163            }
164            parser::Step::Enter { kind } => tree_sink.start_node(kind),
165            parser::Step::Exit => tree_sink.finish_node(),
166            parser::Step::Error { msg } => tree_sink.error(msg.to_owned()),
167        }
168    }
169    tree_sink.finish()
170}
171
172/// Convert a string to a `TokenTree`. The spans of the subtree will be anchored to the provided
173/// anchor with the given context.
174pub fn parse_to_token_tree<Ctx>(
175    edition: Edition,
176    anchor: SpanAnchor,
177    ctx: Ctx,
178    text: &str,
179) -> Option<tt::TopSubtree<SpanData<Ctx>>>
180where
181    SpanData<Ctx>: Copy + fmt::Debug,
182    Ctx: Copy,
183{
184    let lexed = parser::LexedStr::new(edition, text);
185    if lexed.errors().next().is_some() {
186        return None;
187    }
188    let mut conv =
189        RawConverter { lexed, anchor, pos: 0, ctx, mode: DocCommentDesugarMode::ProcMacro };
190    Some(convert_tokens(&mut conv))
191}
192
193/// Convert a string to a `TokenTree`. The passed span will be used for all spans of the produced subtree.
194pub fn parse_to_token_tree_static_span<S>(
195    edition: Edition,
196    span: S,
197    text: &str,
198) -> Option<tt::TopSubtree<S>>
199where
200    S: Copy + fmt::Debug,
201{
202    let lexed = parser::LexedStr::new(edition, text);
203    if lexed.errors().next().is_some() {
204        return None;
205    }
206    let mut conv =
207        StaticRawConverter { lexed, pos: 0, span, mode: DocCommentDesugarMode::ProcMacro };
208    Some(convert_tokens(&mut conv))
209}
210
211fn convert_tokens<S, C>(conv: &mut C) -> tt::TopSubtree<S>
212where
213    C: TokenConverter<S>,
214    S: Copy + fmt::Debug,
215    C::Token: fmt::Debug,
216{
217    let mut builder =
218        tt::TopSubtreeBuilder::new(tt::Delimiter::invisible_spanned(conv.call_site()));
219
220    while let Some((token, abs_range)) = conv.bump() {
221        let tt = match token.as_leaf() {
222            // These delimiters are not actually valid punctuation, but we produce them in syntax fixup.
223            // So we need to handle them specially here.
224            Some(&tt::Leaf::Punct(Punct {
225                char: char @ ('(' | ')' | '{' | '}' | '[' | ']'),
226                span,
227                spacing: _,
228            })) => {
229                let found_expected_delimiter =
230                    builder.expected_delimiters().enumerate().find(|(_, delim)| match delim.kind {
231                        tt::DelimiterKind::Parenthesis => char == ')',
232                        tt::DelimiterKind::Brace => char == '}',
233                        tt::DelimiterKind::Bracket => char == ']',
234                        tt::DelimiterKind::Invisible => false,
235                    });
236                if let Some((idx, _)) = found_expected_delimiter {
237                    for _ in 0..=idx {
238                        builder.close(span);
239                    }
240                    continue;
241                }
242
243                let delim = match char {
244                    '(' => tt::DelimiterKind::Parenthesis,
245                    '{' => tt::DelimiterKind::Brace,
246                    '[' => tt::DelimiterKind::Bracket,
247                    _ => panic!("unmatched closing delimiter from syntax fixup"),
248                };
249
250                // Start a new subtree
251                builder.open(delim, span);
252                continue;
253            }
254            Some(leaf) => leaf.clone(),
255            None => match token.kind(conv) {
256                // Desugar doc comments into doc attributes
257                COMMENT => {
258                    let span = conv.span_for(abs_range);
259                    conv.convert_doc_comment(&token, span, &mut builder);
260                    continue;
261                }
262                kind if kind.is_punct() && kind != UNDERSCORE => {
263                    let found_expected_delimiter =
264                        builder.expected_delimiters().enumerate().find(|(_, delim)| {
265                            match delim.kind {
266                                tt::DelimiterKind::Parenthesis => kind == T![')'],
267                                tt::DelimiterKind::Brace => kind == T!['}'],
268                                tt::DelimiterKind::Bracket => kind == T![']'],
269                                tt::DelimiterKind::Invisible => false,
270                            }
271                        });
272
273                    // Current token is a closing delimiter that we expect, fix up the closing span
274                    // and end the subtree here.
275                    // We also close any open inner subtrees that might be missing their delimiter.
276                    if let Some((idx, _)) = found_expected_delimiter {
277                        for _ in 0..=idx {
278                            // FIXME: record an error somewhere if we're closing more than one tree here?
279                            builder.close(conv.span_for(abs_range));
280                        }
281                        continue;
282                    }
283
284                    let delim = match kind {
285                        T!['('] => Some(tt::DelimiterKind::Parenthesis),
286                        T!['{'] => Some(tt::DelimiterKind::Brace),
287                        T!['['] => Some(tt::DelimiterKind::Bracket),
288                        _ => None,
289                    };
290
291                    // Start a new subtree
292                    if let Some(kind) = delim {
293                        builder.open(kind, conv.span_for(abs_range));
294                        continue;
295                    }
296
297                    let spacing = match conv.peek().map(|next| next.kind(conv)) {
298                        Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint,
299                        _ => tt::Spacing::Alone,
300                    };
301                    let Some(char) = token.to_char(conv) else {
302                        panic!("Token from lexer must be single char: token = {token:#?}")
303                    };
304                    // FIXME: this might still be an unmatched closing delimiter? Maybe we should assert here
305                    tt::Leaf::from(tt::Punct { char, spacing, span: conv.span_for(abs_range) })
306                }
307                kind => {
308                    macro_rules! make_ident {
309                        () => {
310                            tt::Ident {
311                                span: conv.span_for(abs_range),
312                                sym: Symbol::intern(&token.to_text(conv)),
313                                is_raw: tt::IdentIsRaw::No,
314                            }
315                            .into()
316                        };
317                    }
318                    let leaf: tt::Leaf<_> = match kind {
319                        k if k.is_any_identifier() => {
320                            let text = token.to_text(conv);
321                            tt::Ident::new(&text, conv.span_for(abs_range)).into()
322                        }
323                        UNDERSCORE => make_ident!(),
324                        k if k.is_literal() => {
325                            let text = token.to_text(conv);
326                            let span = conv.span_for(abs_range);
327                            token_to_literal(&text, span).into()
328                        }
329                        LIFETIME_IDENT => {
330                            let apostrophe = tt::Leaf::from(tt::Punct {
331                                char: '\'',
332                                spacing: tt::Spacing::Joint,
333                                span: conv
334                                    .span_for(TextRange::at(abs_range.start(), TextSize::of('\''))),
335                            });
336                            builder.push(apostrophe);
337
338                            let ident = tt::Leaf::from(tt::Ident {
339                                sym: Symbol::intern(&token.to_text(conv)[1..]),
340                                span: conv.span_for(TextRange::new(
341                                    abs_range.start() + TextSize::of('\''),
342                                    abs_range.end(),
343                                )),
344                                is_raw: tt::IdentIsRaw::No,
345                            });
346                            builder.push(ident);
347                            continue;
348                        }
349                        _ => continue,
350                    };
351
352                    leaf
353                }
354            },
355        };
356
357        builder.push(tt);
358    }
359
360    while builder.expected_delimiters().next().is_some() {
361        // FIXME: record an error somewhere?
362        builder.close(conv.call_site());
363    }
364    builder.build_skip_top_subtree()
365}
366
367fn is_single_token_op(kind: SyntaxKind) -> bool {
368    matches!(
369        kind,
370        EQ | L_ANGLE
371            | R_ANGLE
372            | BANG
373            | AMP
374            | PIPE
375            | TILDE
376            | AT
377            | DOT
378            | COMMA
379            | SEMICOLON
380            | COLON
381            | POUND
382            | DOLLAR
383            | QUESTION
384            | PLUS
385            | MINUS
386            | STAR
387            | SLASH
388            | PERCENT
389            | CARET
390            // LIFETIME_IDENT will be split into a sequence of `'` (a single quote) and an
391            // identifier.
392            | LIFETIME_IDENT
393    )
394}
395
396/// Returns the textual content of a doc comment block as a quoted string
397/// That is, strips leading `///` (or `/**`, etc)
398/// and strips the ending `*/`
399/// And then quote the string, which is needed to convert to `tt::Literal`
400///
401/// Note that proc-macros desugar with string literals where as macro_rules macros desugar with raw string literals.
402pub fn desugar_doc_comment_text(text: &str, mode: DocCommentDesugarMode) -> (Symbol, tt::LitKind) {
403    match mode {
404        DocCommentDesugarMode::Mbe => {
405            let mut num_of_hashes = 0;
406            let mut count = 0;
407            for ch in text.chars() {
408                count = match ch {
409                    '"' => 1,
410                    '#' if count > 0 => count + 1,
411                    _ => 0,
412                };
413                num_of_hashes = num_of_hashes.max(count);
414            }
415
416            // Quote raw string with delimiters
417            (Symbol::intern(text), tt::LitKind::StrRaw(num_of_hashes))
418        }
419        // Quote string with delimiters
420        DocCommentDesugarMode::ProcMacro => {
421            (Symbol::intern(&format_smolstr!("{}", text.escape_debug())), tt::LitKind::Str)
422        }
423    }
424}
425
426fn convert_doc_comment<S: Copy>(
427    token: &syntax::SyntaxToken,
428    span: S,
429    mode: DocCommentDesugarMode,
430    builder: &mut tt::TopSubtreeBuilder<S>,
431) {
432    let Some(comment) = ast::Comment::cast(token.clone()) else { return };
433    let Some(doc) = comment.kind().doc else { return };
434
435    let mk_ident = |s: &str| {
436        tt::Leaf::from(tt::Ident { sym: Symbol::intern(s), span, is_raw: tt::IdentIsRaw::No })
437    };
438
439    let mk_punct =
440        |c: char| tt::Leaf::from(tt::Punct { char: c, spacing: tt::Spacing::Alone, span });
441
442    let mk_doc_literal = |comment: &ast::Comment| {
443        let prefix_len = comment.prefix().len();
444        let mut text = &comment.text()[prefix_len..];
445
446        // Remove ending "*/"
447        if comment.kind().shape == ast::CommentShape::Block {
448            text = &text[0..text.len() - 2];
449        }
450        let (text, kind) = desugar_doc_comment_text(text, mode);
451        let lit = tt::Literal { symbol: text, span, kind, suffix: None };
452
453        tt::Leaf::from(lit)
454    };
455
456    // Make `doc="\" Comments\""
457    let meta_tkns = [mk_ident("doc"), mk_punct('='), mk_doc_literal(&comment)];
458
459    // Make `#![]`
460    builder.push(mk_punct('#'));
461    if let ast::CommentPlacement::Inner = doc {
462        builder.push(mk_punct('!'));
463    }
464    builder.open(tt::DelimiterKind::Bracket, span);
465    builder.extend(meta_tkns);
466    builder.close(span);
467}
468
469/// A raw token (straight from lexer) converter
470struct RawConverter<'a, Ctx> {
471    lexed: parser::LexedStr<'a>,
472    pos: usize,
473    anchor: SpanAnchor,
474    ctx: Ctx,
475    mode: DocCommentDesugarMode,
476}
477/// A raw token (straight from lexer) converter that gives every token the same span.
478struct StaticRawConverter<'a, S> {
479    lexed: parser::LexedStr<'a>,
480    pos: usize,
481    span: S,
482    mode: DocCommentDesugarMode,
483}
484
485trait SrcToken<Ctx, S> {
486    fn kind(&self, ctx: &Ctx) -> SyntaxKind;
487
488    fn to_char(&self, ctx: &Ctx) -> Option<char>;
489
490    fn to_text(&self, ctx: &Ctx) -> SmolStr;
491
492    fn as_leaf(&self) -> Option<&tt::Leaf<S>> {
493        None
494    }
495}
496
497trait TokenConverter<S>: Sized {
498    type Token: SrcToken<Self, S>;
499
500    fn convert_doc_comment(
501        &self,
502        token: &Self::Token,
503        span: S,
504        builder: &mut tt::TopSubtreeBuilder<S>,
505    );
506
507    fn bump(&mut self) -> Option<(Self::Token, TextRange)>;
508
509    fn peek(&self) -> Option<Self::Token>;
510
511    fn span_for(&self, range: TextRange) -> S;
512
513    fn call_site(&self) -> S;
514}
515
516impl<S, Ctx> SrcToken<RawConverter<'_, Ctx>, S> for usize {
517    fn kind(&self, ctx: &RawConverter<'_, Ctx>) -> SyntaxKind {
518        ctx.lexed.kind(*self)
519    }
520
521    fn to_char(&self, ctx: &RawConverter<'_, Ctx>) -> Option<char> {
522        ctx.lexed.text(*self).chars().next()
523    }
524
525    fn to_text(&self, ctx: &RawConverter<'_, Ctx>) -> SmolStr {
526        ctx.lexed.text(*self).into()
527    }
528}
529
530impl<S: Copy> SrcToken<StaticRawConverter<'_, S>, S> for usize {
531    fn kind(&self, ctx: &StaticRawConverter<'_, S>) -> SyntaxKind {
532        ctx.lexed.kind(*self)
533    }
534
535    fn to_char(&self, ctx: &StaticRawConverter<'_, S>) -> Option<char> {
536        ctx.lexed.text(*self).chars().next()
537    }
538
539    fn to_text(&self, ctx: &StaticRawConverter<'_, S>) -> SmolStr {
540        ctx.lexed.text(*self).into()
541    }
542}
543
544impl<Ctx: Copy> TokenConverter<SpanData<Ctx>> for RawConverter<'_, Ctx>
545where
546    SpanData<Ctx>: Copy,
547{
548    type Token = usize;
549
550    fn convert_doc_comment(
551        &self,
552        &token: &usize,
553        span: SpanData<Ctx>,
554        builder: &mut tt::TopSubtreeBuilder<SpanData<Ctx>>,
555    ) {
556        let text = self.lexed.text(token);
557        convert_doc_comment(&doc_comment(text), span, self.mode, builder);
558    }
559
560    fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
561        if self.pos == self.lexed.len() {
562            return None;
563        }
564        let token = self.pos;
565        self.pos += 1;
566        let range = self.lexed.text_range(token);
567        let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?);
568
569        Some((token, range))
570    }
571
572    fn peek(&self) -> Option<Self::Token> {
573        if self.pos == self.lexed.len() {
574            return None;
575        }
576        Some(self.pos)
577    }
578
579    fn span_for(&self, range: TextRange) -> SpanData<Ctx> {
580        SpanData { range, anchor: self.anchor, ctx: self.ctx }
581    }
582
583    fn call_site(&self) -> SpanData<Ctx> {
584        SpanData { range: TextRange::empty(0.into()), anchor: self.anchor, ctx: self.ctx }
585    }
586}
587
588impl<S> TokenConverter<S> for StaticRawConverter<'_, S>
589where
590    S: Copy,
591{
592    type Token = usize;
593
594    fn convert_doc_comment(&self, &token: &usize, span: S, builder: &mut tt::TopSubtreeBuilder<S>) {
595        let text = self.lexed.text(token);
596        convert_doc_comment(&doc_comment(text), span, self.mode, builder);
597    }
598
599    fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
600        if self.pos == self.lexed.len() {
601            return None;
602        }
603        let token = self.pos;
604        self.pos += 1;
605        let range = self.lexed.text_range(token);
606        let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?);
607
608        Some((token, range))
609    }
610
611    fn peek(&self) -> Option<Self::Token> {
612        if self.pos == self.lexed.len() {
613            return None;
614        }
615        Some(self.pos)
616    }
617
618    fn span_for(&self, _: TextRange) -> S {
619        self.span
620    }
621
622    fn call_site(&self) -> S {
623        self.span
624    }
625}
626
627struct Converter<SpanMap, S> {
628    current: Option<SyntaxToken>,
629    current_leaves: Vec<tt::Leaf<S>>,
630    preorder: PreorderWithTokens,
631    range: TextRange,
632    punct_offset: Option<(SyntaxToken, TextSize)>,
633    /// Used to make the emitted text ranges in the spans relative to the span anchor.
634    map: SpanMap,
635    append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>,
636    remove: FxHashSet<SyntaxElement>,
637    call_site: S,
638    mode: DocCommentDesugarMode,
639}
640
641impl<SpanMap, S> Converter<SpanMap, S> {
642    fn new(
643        node: &SyntaxNode,
644        map: SpanMap,
645        append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>,
646        remove: FxHashSet<SyntaxElement>,
647        call_site: S,
648        mode: DocCommentDesugarMode,
649    ) -> Self {
650        let mut this = Converter {
651            current: None,
652            preorder: node.preorder_with_tokens(),
653            range: node.text_range(),
654            punct_offset: None,
655            map,
656            append,
657            remove,
658            call_site,
659            current_leaves: vec![],
660            mode,
661        };
662        let first = this.next_token();
663        this.current = first;
664        this
665    }
666
667    fn next_token(&mut self) -> Option<SyntaxToken> {
668        while let Some(ev) = self.preorder.next() {
669            match ev {
670                WalkEvent::Enter(token) => {
671                    if self.remove.contains(&token) {
672                        match token {
673                            syntax::NodeOrToken::Token(_) => {
674                                continue;
675                            }
676                            node => {
677                                self.preorder.skip_subtree();
678                                if let Some(mut v) = self.append.remove(&node) {
679                                    v.reverse();
680                                    self.current_leaves.extend(v);
681                                    return None;
682                                }
683                            }
684                        }
685                    } else if let syntax::NodeOrToken::Token(token) = token {
686                        return Some(token);
687                    }
688                }
689                WalkEvent::Leave(ele) => {
690                    if let Some(mut v) = self.append.remove(&ele) {
691                        v.reverse();
692                        self.current_leaves.extend(v);
693                        return None;
694                    }
695                }
696            }
697        }
698        None
699    }
700}
701
702#[derive(Debug)]
703enum SynToken<S> {
704    Ordinary(SyntaxToken),
705    Punct { token: SyntaxToken, offset: usize },
706    Leaf(tt::Leaf<S>),
707}
708
709impl<S> SynToken<S> {
710    fn token(&self) -> &SyntaxToken {
711        match self {
712            SynToken::Ordinary(it) | SynToken::Punct { token: it, offset: _ } => it,
713            SynToken::Leaf(_) => unreachable!(),
714        }
715    }
716}
717
718impl<SpanMap, S> SrcToken<Converter<SpanMap, S>, S> for SynToken<S> {
719    fn kind(&self, _ctx: &Converter<SpanMap, S>) -> SyntaxKind {
720        match self {
721            SynToken::Ordinary(token) => token.kind(),
722            SynToken::Punct { token, offset: i } => {
723                SyntaxKind::from_char(token.text().chars().nth(*i).unwrap()).unwrap()
724            }
725            SynToken::Leaf(_) => {
726                never!();
727                SyntaxKind::ERROR
728            }
729        }
730    }
731    fn to_char(&self, _ctx: &Converter<SpanMap, S>) -> Option<char> {
732        match self {
733            SynToken::Ordinary(_) => None,
734            SynToken::Punct { token: it, offset: i } => it.text().chars().nth(*i),
735            SynToken::Leaf(_) => None,
736        }
737    }
738    fn to_text(&self, _ctx: &Converter<SpanMap, S>) -> SmolStr {
739        match self {
740            SynToken::Ordinary(token) | SynToken::Punct { token, offset: _ } => token.text().into(),
741            SynToken::Leaf(_) => {
742                never!();
743                "".into()
744            }
745        }
746    }
747    fn as_leaf(&self) -> Option<&tt::Leaf<S>> {
748        match self {
749            SynToken::Ordinary(_) | SynToken::Punct { .. } => None,
750            SynToken::Leaf(it) => Some(it),
751        }
752    }
753}
754
755impl<S, SpanMap> TokenConverter<S> for Converter<SpanMap, S>
756where
757    S: Copy,
758    SpanMap: SpanMapper<S>,
759{
760    type Token = SynToken<S>;
761    fn convert_doc_comment(
762        &self,
763        token: &Self::Token,
764        span: S,
765        builder: &mut tt::TopSubtreeBuilder<S>,
766    ) {
767        convert_doc_comment(token.token(), span, self.mode, builder);
768    }
769
770    fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
771        if let Some((punct, offset)) = self.punct_offset.clone() {
772            if usize::from(offset) + 1 < punct.text().len() {
773                let offset = offset + TextSize::of('.');
774                let range = punct.text_range();
775                self.punct_offset = Some((punct.clone(), offset));
776                let range = TextRange::at(range.start() + offset, TextSize::of('.'));
777                return Some((
778                    SynToken::Punct { token: punct, offset: u32::from(offset) as usize },
779                    range,
780                ));
781            }
782        }
783
784        if let Some(leaf) = self.current_leaves.pop() {
785            if self.current_leaves.is_empty() {
786                self.current = self.next_token();
787            }
788            return Some((SynToken::Leaf(leaf), TextRange::empty(TextSize::new(0))));
789        }
790
791        let curr = self.current.clone()?;
792        if !self.range.contains_range(curr.text_range()) {
793            return None;
794        }
795
796        self.current = self.next_token();
797        let token = if curr.kind().is_punct() {
798            self.punct_offset = Some((curr.clone(), 0.into()));
799            let range = curr.text_range();
800            let range = TextRange::at(range.start(), TextSize::of('.'));
801            (SynToken::Punct { token: curr, offset: 0_usize }, range)
802        } else {
803            self.punct_offset = None;
804            let range = curr.text_range();
805            (SynToken::Ordinary(curr), range)
806        };
807
808        Some(token)
809    }
810
811    fn peek(&self) -> Option<Self::Token> {
812        if let Some((punct, mut offset)) = self.punct_offset.clone() {
813            offset += TextSize::of('.');
814            if usize::from(offset) < punct.text().len() {
815                return Some(SynToken::Punct { token: punct, offset: usize::from(offset) });
816            }
817        }
818
819        let curr = self.current.clone()?;
820        if !self.range.contains_range(curr.text_range()) {
821            return None;
822        }
823
824        let token = if curr.kind().is_punct() {
825            SynToken::Punct { token: curr, offset: 0_usize }
826        } else {
827            SynToken::Ordinary(curr)
828        };
829        Some(token)
830    }
831
832    fn span_for(&self, range: TextRange) -> S {
833        self.map.span_for(range)
834    }
835    fn call_site(&self) -> S {
836        self.call_site
837    }
838}
839
840struct TtTreeSink<'a, Ctx>
841where
842    SpanData<Ctx>: Copy,
843{
844    buf: String,
845    cursor: Cursor<'a, SpanData<Ctx>>,
846    text_pos: TextSize,
847    inner: SyntaxTreeBuilder,
848    token_map: SpanMap<Ctx>,
849}
850
851impl<'a, Ctx> TtTreeSink<'a, Ctx>
852where
853    SpanData<Ctx>: Copy,
854{
855    fn new(cursor: Cursor<'a, SpanData<Ctx>>) -> Self {
856        TtTreeSink {
857            buf: String::new(),
858            cursor,
859            text_pos: 0.into(),
860            inner: SyntaxTreeBuilder::default(),
861            token_map: SpanMap::empty(),
862        }
863    }
864
865    fn finish(mut self) -> (Parse<SyntaxNode>, SpanMap<Ctx>) {
866        self.token_map.finish();
867        (self.inner.finish(), self.token_map)
868    }
869}
870
871fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> {
872    let texts = match d {
873        tt::DelimiterKind::Parenthesis => "()",
874        tt::DelimiterKind::Brace => "{}",
875        tt::DelimiterKind::Bracket => "[]",
876        tt::DelimiterKind::Invisible => return None,
877    };
878
879    let idx = closing as usize;
880    Some(&texts[idx..texts.len() - (1 - idx)])
881}
882
883impl<Ctx> TtTreeSink<'_, Ctx>
884where
885    SpanData<Ctx>: Copy + fmt::Debug,
886    Ctx: PartialEq,
887{
888    /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween.
889    /// This occurs when a float literal is used as a field access.
890    fn float_split(&mut self, has_pseudo_dot: bool) {
891        let (text, span) = match self.cursor.token_tree() {
892            Some(tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal {
893                symbol: text,
894                span,
895                kind: tt::LitKind::Float,
896                suffix: _,
897            }))) => (text.as_str(), *span),
898            tt => unreachable!("{tt:?}"),
899        };
900        // FIXME: Span splitting
901        match text.split_once('.') {
902            Some((left, right)) => {
903                assert!(!left.is_empty());
904
905                self.inner.start_node(SyntaxKind::NAME_REF);
906                self.inner.token(SyntaxKind::INT_NUMBER, left);
907                self.inner.finish_node();
908                self.token_map.push(self.text_pos + TextSize::of(left), span);
909
910                // here we move the exit up, the original exit has been deleted in process
911                self.inner.finish_node();
912
913                self.inner.token(SyntaxKind::DOT, ".");
914                self.token_map.push(self.text_pos + TextSize::of(left) + TextSize::of("."), span);
915
916                if has_pseudo_dot {
917                    assert!(right.is_empty(), "{left}.{right}");
918                } else {
919                    assert!(!right.is_empty(), "{left}.{right}");
920                    self.inner.start_node(SyntaxKind::NAME_REF);
921                    self.inner.token(SyntaxKind::INT_NUMBER, right);
922                    self.token_map.push(self.text_pos + TextSize::of(text), span);
923                    self.inner.finish_node();
924
925                    // the parser creates an unbalanced start node, we are required to close it here
926                    self.inner.finish_node();
927                }
928                self.text_pos += TextSize::of(text);
929            }
930            None => unreachable!(),
931        }
932        self.cursor.bump();
933    }
934
935    fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) {
936        if kind == LIFETIME_IDENT {
937            n_tokens = 2;
938        }
939
940        let mut last_two = self.cursor.peek_two_leaves();
941        let mut combined_span = None;
942        'tokens: for _ in 0..n_tokens {
943            let tmp: u8;
944            if self.cursor.eof() {
945                break;
946            }
947            last_two = self.cursor.peek_two_leaves();
948            let (text, span) = loop {
949                break match self.cursor.token_tree() {
950                    Some(tt::TokenTree::Leaf(leaf)) => match leaf {
951                        tt::Leaf::Ident(ident) => {
952                            if ident.is_raw.yes() {
953                                self.buf.push_str("r#");
954                                self.text_pos += TextSize::of("r#");
955                            }
956                            let r = (ident.sym.as_str(), ident.span);
957                            self.cursor.bump();
958                            r
959                        }
960                        tt::Leaf::Punct(punct) => {
961                            assert!(punct.char.is_ascii());
962                            tmp = punct.char as u8;
963                            let r = (
964                                std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(),
965                                punct.span,
966                            );
967                            self.cursor.bump();
968                            r
969                        }
970                        tt::Leaf::Literal(lit) => {
971                            let buf_l = self.buf.len();
972                            format_to!(self.buf, "{lit}");
973                            debug_assert_ne!(self.buf.len() - buf_l, 0);
974                            self.text_pos += TextSize::new((self.buf.len() - buf_l) as u32);
975                            combined_span = match combined_span {
976                                None => Some(lit.span),
977                                Some(prev_span) => Some(Self::merge_spans(prev_span, lit.span)),
978                            };
979                            self.cursor.bump();
980                            continue 'tokens;
981                        }
982                    },
983                    Some(tt::TokenTree::Subtree(subtree)) => {
984                        self.cursor.bump();
985                        match delim_to_str(subtree.delimiter.kind, false) {
986                            Some(it) => (it, subtree.delimiter.open),
987                            None => continue,
988                        }
989                    }
990                    None => {
991                        let parent = self.cursor.end();
992                        match delim_to_str(parent.delimiter.kind, true) {
993                            Some(it) => (it, parent.delimiter.close),
994                            None => continue,
995                        }
996                    }
997                };
998            };
999            self.buf += text;
1000            self.text_pos += TextSize::of(text);
1001            combined_span = match combined_span {
1002                None => Some(span),
1003                Some(prev_span) => Some(Self::merge_spans(prev_span, span)),
1004            }
1005        }
1006
1007        self.token_map.push(self.text_pos, combined_span.expect("expected at least one token"));
1008        self.inner.token(kind, self.buf.as_str());
1009        self.buf.clear();
1010        // FIXME: Emitting whitespace for this is really just a hack, we should get rid of it.
1011        // Add whitespace between adjoint puncts
1012        if let Some([tt::Leaf::Punct(curr), tt::Leaf::Punct(next)]) = last_two {
1013            // Note: We always assume the semi-colon would be the last token in
1014            // other parts of RA such that we don't add whitespace here.
1015            //
1016            // When `next` is a `Punct` of `'`, that's a part of a lifetime identifier so we don't
1017            // need to add whitespace either.
1018            if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' {
1019                self.inner.token(WHITESPACE, " ");
1020                self.text_pos += TextSize::of(' ');
1021                self.token_map.push(self.text_pos, curr.span);
1022            }
1023        }
1024    }
1025
1026    fn start_node(&mut self, kind: SyntaxKind) {
1027        self.inner.start_node(kind);
1028    }
1029
1030    fn finish_node(&mut self) {
1031        self.inner.finish_node();
1032    }
1033
1034    fn error(&mut self, error: String) {
1035        self.inner.error(error, self.text_pos)
1036    }
1037
1038    fn merge_spans(a: SpanData<Ctx>, b: SpanData<Ctx>) -> SpanData<Ctx> {
1039        // We don't do what rustc does exactly, rustc does something clever when the spans have different syntax contexts
1040        // but this runs afoul of our separation between `span` and `hir-expand`.
1041        SpanData {
1042            range: if a.ctx == b.ctx && a.anchor == b.anchor {
1043                TextRange::new(
1044                    std::cmp::min(a.range.start(), b.range.start()),
1045                    std::cmp::max(a.range.end(), b.range.end()),
1046                )
1047            } else {
1048                // Combining ranges make no sense when they come from different syntax contexts.
1049                a.range
1050            },
1051            anchor: a.anchor,
1052            ctx: a.ctx,
1053        }
1054    }
1055}
ra_ap_syntax_bridge/lib.rs

ra_ap_syntax_bridge/
lib.rs