ra_ap_syntax_bridge/
lib.rs

1//! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].
2
3use std::{fmt, hash::Hash};
4
5use intern::Symbol;
6use rustc_hash::{FxHashMap, FxHashSet};
7use span::{Edition, SpanAnchor, SpanData, SpanMap};
8use stdx::{format_to, never};
9use syntax::{
10    ast::{self, make::tokens::doc_comment},
11    format_smolstr, AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement,
12    SyntaxKind::{self, *},
13    SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, WalkEvent, T,
14};
15use tt::{buffer::Cursor, token_to_literal};
16
17pub mod prettify_macro_expansion;
18mod to_parser_input;
19pub use to_parser_input::to_parser_input;
20// FIXME: we probably should re-think  `token_tree_to_syntax_node` interfaces
21pub use ::parser::TopEntryPoint;
22
23#[cfg(test)]
24mod tests;
25
26pub trait SpanMapper<S> {
27    fn span_for(&self, range: TextRange) -> S;
28}
29
30impl<S> SpanMapper<SpanData<S>> for SpanMap<S>
31where
32    SpanData<S>: Copy,
33{
34    fn span_for(&self, range: TextRange) -> SpanData<S> {
35        self.span_at(range.start())
36    }
37}
38
39impl<S: Copy, SM: SpanMapper<S>> SpanMapper<S> for &SM {
40    fn span_for(&self, range: TextRange) -> S {
41        SM::span_for(self, range)
42    }
43}
44
45/// Dummy things for testing where spans don't matter.
46pub mod dummy_test_span_utils {
47
48    use span::{Span, SyntaxContextId};
49
50    use super::*;
51
52    pub const DUMMY: Span = Span {
53        range: TextRange::empty(TextSize::new(0)),
54        anchor: span::SpanAnchor {
55            file_id: span::EditionedFileId::new(
56                span::FileId::from_raw(0xe4e4e),
57                span::Edition::CURRENT,
58            ),
59            ast_id: span::ROOT_ERASED_FILE_AST_ID,
60        },
61        ctx: SyntaxContextId::root(Edition::CURRENT),
62    };
63
64    pub struct DummyTestSpanMap;
65
66    impl SpanMapper<Span> for DummyTestSpanMap {
67        fn span_for(&self, range: syntax::TextRange) -> Span {
68            Span {
69                range,
70                anchor: span::SpanAnchor {
71                    file_id: span::EditionedFileId::new(
72                        span::FileId::from_raw(0xe4e4e),
73                        span::Edition::CURRENT,
74                    ),
75                    ast_id: span::ROOT_ERASED_FILE_AST_ID,
76                },
77                ctx: SyntaxContextId::root(Edition::CURRENT),
78            }
79        }
80    }
81}
82
83/// Doc comment desugaring differs between mbe and proc-macros.
84#[derive(Copy, Clone, PartialEq, Eq)]
85pub enum DocCommentDesugarMode {
86    /// Desugars doc comments as quoted raw strings
87    Mbe,
88    /// Desugars doc comments as quoted strings
89    ProcMacro,
90}
91
92/// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the
93/// subtree's spans.
94pub fn syntax_node_to_token_tree<Ctx, SpanMap>(
95    node: &SyntaxNode,
96    map: SpanMap,
97    span: SpanData<Ctx>,
98    mode: DocCommentDesugarMode,
99) -> tt::TopSubtree<SpanData<Ctx>>
100where
101    SpanData<Ctx>: Copy + fmt::Debug,
102    SpanMap: SpanMapper<SpanData<Ctx>>,
103{
104    let mut c = Converter::new(node, map, Default::default(), Default::default(), span, mode);
105    convert_tokens(&mut c)
106}
107
108/// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the
109/// subtree's spans. Additionally using the append and remove parameters, the additional tokens can
110/// be injected or hidden from the output.
111pub fn syntax_node_to_token_tree_modified<Ctx, SpanMap>(
112    node: &SyntaxNode,
113    map: SpanMap,
114    append: FxHashMap<SyntaxElement, Vec<tt::Leaf<SpanData<Ctx>>>>,
115    remove: FxHashSet<SyntaxElement>,
116    call_site: SpanData<Ctx>,
117    mode: DocCommentDesugarMode,
118) -> tt::TopSubtree<SpanData<Ctx>>
119where
120    SpanMap: SpanMapper<SpanData<Ctx>>,
121    SpanData<Ctx>: Copy + fmt::Debug,
122{
123    let mut c = Converter::new(node, map, append, remove, call_site, mode);
124    convert_tokens(&mut c)
125}
126
127// The following items are what `rustc` macro can be parsed into :
128// link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141
129// * Expr(P<ast::Expr>)                     -> token_tree_to_expr
130// * Pat(P<ast::Pat>)                       -> token_tree_to_pat
131// * Ty(P<ast::Ty>)                         -> token_tree_to_ty
132// * Stmts(SmallVec<[ast::Stmt; 1]>)        -> token_tree_to_stmts
133// * Items(SmallVec<[P<ast::Item>; 1]>)     -> token_tree_to_items
134//
135// * TraitItems(SmallVec<[ast::TraitItem; 1]>)
136// * AssocItems(SmallVec<[ast::AssocItem; 1]>)
137// * ForeignItems(SmallVec<[ast::ForeignItem; 1]>
138
139/// Converts a [`tt::Subtree`] back to a [`SyntaxNode`].
140/// The produced `SpanMap` contains a mapping from the syntax nodes offsets to the subtree's spans.
141pub fn token_tree_to_syntax_node<Ctx>(
142    tt: &tt::TopSubtree<SpanData<Ctx>>,
143    entry_point: parser::TopEntryPoint,
144    span_to_edition: &mut dyn FnMut(Ctx) -> Edition,
145    top_edition: Edition,
146) -> (Parse<SyntaxNode>, SpanMap<Ctx>)
147where
148    Ctx: Copy + fmt::Debug + PartialEq + PartialEq + Eq + Hash,
149{
150    let buffer = tt.view().strip_invisible();
151    let parser_input = to_parser_input(buffer, span_to_edition);
152    // It matters what edition we parse with even when we escape all identifiers correctly.
153    let parser_output = entry_point.parse(&parser_input, top_edition);
154    let mut tree_sink = TtTreeSink::new(buffer.cursor());
155    for event in parser_output.iter() {
156        match event {
157            parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => {
158                tree_sink.token(kind, n_raw_tokens)
159            }
160            parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
161                tree_sink.float_split(has_pseudo_dot)
162            }
163            parser::Step::Enter { kind } => tree_sink.start_node(kind),
164            parser::Step::Exit => tree_sink.finish_node(),
165            parser::Step::Error { msg } => tree_sink.error(msg.to_owned()),
166        }
167    }
168    tree_sink.finish()
169}
170
171/// Convert a string to a `TokenTree`. The spans of the subtree will be anchored to the provided
172/// anchor with the given context.
173pub fn parse_to_token_tree<Ctx>(
174    edition: Edition,
175    anchor: SpanAnchor,
176    ctx: Ctx,
177    text: &str,
178) -> Option<tt::TopSubtree<SpanData<Ctx>>>
179where
180    SpanData<Ctx>: Copy + fmt::Debug,
181    Ctx: Copy,
182{
183    let lexed = parser::LexedStr::new(edition, text);
184    if lexed.errors().next().is_some() {
185        return None;
186    }
187    let mut conv =
188        RawConverter { lexed, anchor, pos: 0, ctx, mode: DocCommentDesugarMode::ProcMacro };
189    Some(convert_tokens(&mut conv))
190}
191
192/// Convert a string to a `TokenTree`. The passed span will be used for all spans of the produced subtree.
193pub fn parse_to_token_tree_static_span<S>(
194    edition: Edition,
195    span: S,
196    text: &str,
197) -> Option<tt::TopSubtree<S>>
198where
199    S: Copy + fmt::Debug,
200{
201    let lexed = parser::LexedStr::new(edition, text);
202    if lexed.errors().next().is_some() {
203        return None;
204    }
205    let mut conv =
206        StaticRawConverter { lexed, pos: 0, span, mode: DocCommentDesugarMode::ProcMacro };
207    Some(convert_tokens(&mut conv))
208}
209
210fn convert_tokens<S, C>(conv: &mut C) -> tt::TopSubtree<S>
211where
212    C: TokenConverter<S>,
213    S: Copy + fmt::Debug,
214    C::Token: fmt::Debug,
215{
216    let mut builder =
217        tt::TopSubtreeBuilder::new(tt::Delimiter::invisible_spanned(conv.call_site()));
218
219    while let Some((token, abs_range)) = conv.bump() {
220        let delimiter = builder.expected_delimiter().map(|it| it.kind);
221        let tt = match token.as_leaf() {
222            Some(leaf) => leaf.clone(),
223            None => match token.kind(conv) {
224                // Desugar doc comments into doc attributes
225                COMMENT => {
226                    let span = conv.span_for(abs_range);
227                    conv.convert_doc_comment(&token, span, &mut builder);
228                    continue;
229                }
230                kind if kind.is_punct() && kind != UNDERSCORE => {
231                    let expected = match delimiter {
232                        Some(tt::DelimiterKind::Parenthesis) => Some(T![')']),
233                        Some(tt::DelimiterKind::Brace) => Some(T!['}']),
234                        Some(tt::DelimiterKind::Bracket) => Some(T![']']),
235                        Some(tt::DelimiterKind::Invisible) | None => None,
236                    };
237
238                    // Current token is a closing delimiter that we expect, fix up the closing span
239                    // and end the subtree here
240                    if matches!(expected, Some(expected) if expected == kind) {
241                        builder.close(conv.span_for(abs_range));
242                        continue;
243                    }
244
245                    let delim = match kind {
246                        T!['('] => Some(tt::DelimiterKind::Parenthesis),
247                        T!['{'] => Some(tt::DelimiterKind::Brace),
248                        T!['['] => Some(tt::DelimiterKind::Bracket),
249                        _ => None,
250                    };
251
252                    // Start a new subtree
253                    if let Some(kind) = delim {
254                        builder.open(kind, conv.span_for(abs_range));
255                        continue;
256                    }
257
258                    let spacing = match conv.peek().map(|next| next.kind(conv)) {
259                        Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint,
260                        _ => tt::Spacing::Alone,
261                    };
262                    let Some(char) = token.to_char(conv) else {
263                        panic!("Token from lexer must be single char: token = {token:#?}")
264                    };
265                    tt::Leaf::from(tt::Punct { char, spacing, span: conv.span_for(abs_range) })
266                }
267                kind => {
268                    macro_rules! make_ident {
269                        () => {
270                            tt::Ident {
271                                span: conv.span_for(abs_range),
272                                sym: Symbol::intern(&token.to_text(conv)),
273                                is_raw: tt::IdentIsRaw::No,
274                            }
275                            .into()
276                        };
277                    }
278                    let leaf: tt::Leaf<_> = match kind {
279                        k if k.is_any_identifier() => {
280                            let text = token.to_text(conv);
281                            tt::Ident::new(&text, conv.span_for(abs_range)).into()
282                        }
283                        UNDERSCORE => make_ident!(),
284                        k if k.is_literal() => {
285                            let text = token.to_text(conv);
286                            let span = conv.span_for(abs_range);
287                            token_to_literal(&text, span).into()
288                        }
289                        LIFETIME_IDENT => {
290                            let apostrophe = tt::Leaf::from(tt::Punct {
291                                char: '\'',
292                                spacing: tt::Spacing::Joint,
293                                span: conv
294                                    .span_for(TextRange::at(abs_range.start(), TextSize::of('\''))),
295                            });
296                            builder.push(apostrophe);
297
298                            let ident = tt::Leaf::from(tt::Ident {
299                                sym: Symbol::intern(&token.to_text(conv)[1..]),
300                                span: conv.span_for(TextRange::new(
301                                    abs_range.start() + TextSize::of('\''),
302                                    abs_range.end(),
303                                )),
304                                is_raw: tt::IdentIsRaw::No,
305                            });
306                            builder.push(ident);
307                            continue;
308                        }
309                        _ => continue,
310                    };
311
312                    leaf
313                }
314            },
315        };
316
317        builder.push(tt);
318    }
319
320    // If we get here, we've consumed all input tokens.
321    // We might have more than one subtree in the stack, if the delimiters are improperly balanced.
322    // Merge them so we're left with one.
323    builder.flatten_unclosed_subtrees();
324
325    builder.build_skip_top_subtree()
326}
327
328fn is_single_token_op(kind: SyntaxKind) -> bool {
329    matches!(
330        kind,
331        EQ | L_ANGLE
332            | R_ANGLE
333            | BANG
334            | AMP
335            | PIPE
336            | TILDE
337            | AT
338            | DOT
339            | COMMA
340            | SEMICOLON
341            | COLON
342            | POUND
343            | DOLLAR
344            | QUESTION
345            | PLUS
346            | MINUS
347            | STAR
348            | SLASH
349            | PERCENT
350            | CARET
351            // LIFETIME_IDENT will be split into a sequence of `'` (a single quote) and an
352            // identifier.
353            | LIFETIME_IDENT
354    )
355}
356
357/// Returns the textual content of a doc comment block as a quoted string
358/// That is, strips leading `///` (or `/**`, etc)
359/// and strips the ending `*/`
360/// And then quote the string, which is needed to convert to `tt::Literal`
361///
362/// Note that proc-macros desugar with string literals where as macro_rules macros desugar with raw string literals.
363pub fn desugar_doc_comment_text(text: &str, mode: DocCommentDesugarMode) -> (Symbol, tt::LitKind) {
364    match mode {
365        DocCommentDesugarMode::Mbe => {
366            let mut num_of_hashes = 0;
367            let mut count = 0;
368            for ch in text.chars() {
369                count = match ch {
370                    '"' => 1,
371                    '#' if count > 0 => count + 1,
372                    _ => 0,
373                };
374                num_of_hashes = num_of_hashes.max(count);
375            }
376
377            // Quote raw string with delimiters
378            (Symbol::intern(text), tt::LitKind::StrRaw(num_of_hashes))
379        }
380        // Quote string with delimiters
381        DocCommentDesugarMode::ProcMacro => {
382            (Symbol::intern(&format_smolstr!("{}", text.escape_debug())), tt::LitKind::Str)
383        }
384    }
385}
386
387fn convert_doc_comment<S: Copy>(
388    token: &syntax::SyntaxToken,
389    span: S,
390    mode: DocCommentDesugarMode,
391    builder: &mut tt::TopSubtreeBuilder<S>,
392) {
393    let Some(comment) = ast::Comment::cast(token.clone()) else { return };
394    let Some(doc) = comment.kind().doc else { return };
395
396    let mk_ident = |s: &str| {
397        tt::Leaf::from(tt::Ident { sym: Symbol::intern(s), span, is_raw: tt::IdentIsRaw::No })
398    };
399
400    let mk_punct =
401        |c: char| tt::Leaf::from(tt::Punct { char: c, spacing: tt::Spacing::Alone, span });
402
403    let mk_doc_literal = |comment: &ast::Comment| {
404        let prefix_len = comment.prefix().len();
405        let mut text = &comment.text()[prefix_len..];
406
407        // Remove ending "*/"
408        if comment.kind().shape == ast::CommentShape::Block {
409            text = &text[0..text.len() - 2];
410        }
411        let (text, kind) = desugar_doc_comment_text(text, mode);
412        let lit = tt::Literal { symbol: text, span, kind, suffix: None };
413
414        tt::Leaf::from(lit)
415    };
416
417    // Make `doc="\" Comments\""
418    let meta_tkns = [mk_ident("doc"), mk_punct('='), mk_doc_literal(&comment)];
419
420    // Make `#![]`
421    builder.push(mk_punct('#'));
422    if let ast::CommentPlacement::Inner = doc {
423        builder.push(mk_punct('!'));
424    }
425    builder.open(tt::DelimiterKind::Bracket, span);
426    builder.extend(meta_tkns);
427    builder.close(span);
428}
429
430/// A raw token (straight from lexer) converter
431struct RawConverter<'a, Ctx> {
432    lexed: parser::LexedStr<'a>,
433    pos: usize,
434    anchor: SpanAnchor,
435    ctx: Ctx,
436    mode: DocCommentDesugarMode,
437}
438/// A raw token (straight from lexer) converter that gives every token the same span.
439struct StaticRawConverter<'a, S> {
440    lexed: parser::LexedStr<'a>,
441    pos: usize,
442    span: S,
443    mode: DocCommentDesugarMode,
444}
445
446trait SrcToken<Ctx, S> {
447    fn kind(&self, ctx: &Ctx) -> SyntaxKind;
448
449    fn to_char(&self, ctx: &Ctx) -> Option<char>;
450
451    fn to_text(&self, ctx: &Ctx) -> SmolStr;
452
453    fn as_leaf(&self) -> Option<&tt::Leaf<S>> {
454        None
455    }
456}
457
458trait TokenConverter<S>: Sized {
459    type Token: SrcToken<Self, S>;
460
461    fn convert_doc_comment(
462        &self,
463        token: &Self::Token,
464        span: S,
465        builder: &mut tt::TopSubtreeBuilder<S>,
466    );
467
468    fn bump(&mut self) -> Option<(Self::Token, TextRange)>;
469
470    fn peek(&self) -> Option<Self::Token>;
471
472    fn span_for(&self, range: TextRange) -> S;
473
474    fn call_site(&self) -> S;
475}
476
477impl<S, Ctx> SrcToken<RawConverter<'_, Ctx>, S> for usize {
478    fn kind(&self, ctx: &RawConverter<'_, Ctx>) -> SyntaxKind {
479        ctx.lexed.kind(*self)
480    }
481
482    fn to_char(&self, ctx: &RawConverter<'_, Ctx>) -> Option<char> {
483        ctx.lexed.text(*self).chars().next()
484    }
485
486    fn to_text(&self, ctx: &RawConverter<'_, Ctx>) -> SmolStr {
487        ctx.lexed.text(*self).into()
488    }
489}
490
491impl<S: Copy> SrcToken<StaticRawConverter<'_, S>, S> for usize {
492    fn kind(&self, ctx: &StaticRawConverter<'_, S>) -> SyntaxKind {
493        ctx.lexed.kind(*self)
494    }
495
496    fn to_char(&self, ctx: &StaticRawConverter<'_, S>) -> Option<char> {
497        ctx.lexed.text(*self).chars().next()
498    }
499
500    fn to_text(&self, ctx: &StaticRawConverter<'_, S>) -> SmolStr {
501        ctx.lexed.text(*self).into()
502    }
503}
504
505impl<Ctx: Copy> TokenConverter<SpanData<Ctx>> for RawConverter<'_, Ctx>
506where
507    SpanData<Ctx>: Copy,
508{
509    type Token = usize;
510
511    fn convert_doc_comment(
512        &self,
513        &token: &usize,
514        span: SpanData<Ctx>,
515        builder: &mut tt::TopSubtreeBuilder<SpanData<Ctx>>,
516    ) {
517        let text = self.lexed.text(token);
518        convert_doc_comment(&doc_comment(text), span, self.mode, builder);
519    }
520
521    fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
522        if self.pos == self.lexed.len() {
523            return None;
524        }
525        let token = self.pos;
526        self.pos += 1;
527        let range = self.lexed.text_range(token);
528        let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?);
529
530        Some((token, range))
531    }
532
533    fn peek(&self) -> Option<Self::Token> {
534        if self.pos == self.lexed.len() {
535            return None;
536        }
537        Some(self.pos)
538    }
539
540    fn span_for(&self, range: TextRange) -> SpanData<Ctx> {
541        SpanData { range, anchor: self.anchor, ctx: self.ctx }
542    }
543
544    fn call_site(&self) -> SpanData<Ctx> {
545        SpanData { range: TextRange::empty(0.into()), anchor: self.anchor, ctx: self.ctx }
546    }
547}
548
549impl<S> TokenConverter<S> for StaticRawConverter<'_, S>
550where
551    S: Copy,
552{
553    type Token = usize;
554
555    fn convert_doc_comment(&self, &token: &usize, span: S, builder: &mut tt::TopSubtreeBuilder<S>) {
556        let text = self.lexed.text(token);
557        convert_doc_comment(&doc_comment(text), span, self.mode, builder);
558    }
559
560    fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
561        if self.pos == self.lexed.len() {
562            return None;
563        }
564        let token = self.pos;
565        self.pos += 1;
566        let range = self.lexed.text_range(token);
567        let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?);
568
569        Some((token, range))
570    }
571
572    fn peek(&self) -> Option<Self::Token> {
573        if self.pos == self.lexed.len() {
574            return None;
575        }
576        Some(self.pos)
577    }
578
579    fn span_for(&self, _: TextRange) -> S {
580        self.span
581    }
582
583    fn call_site(&self) -> S {
584        self.span
585    }
586}
587
588struct Converter<SpanMap, S> {
589    current: Option<SyntaxToken>,
590    current_leaves: Vec<tt::Leaf<S>>,
591    preorder: PreorderWithTokens,
592    range: TextRange,
593    punct_offset: Option<(SyntaxToken, TextSize)>,
594    /// Used to make the emitted text ranges in the spans relative to the span anchor.
595    map: SpanMap,
596    append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>,
597    remove: FxHashSet<SyntaxElement>,
598    call_site: S,
599    mode: DocCommentDesugarMode,
600}
601
602impl<SpanMap, S> Converter<SpanMap, S> {
603    fn new(
604        node: &SyntaxNode,
605        map: SpanMap,
606        append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>,
607        remove: FxHashSet<SyntaxElement>,
608        call_site: S,
609        mode: DocCommentDesugarMode,
610    ) -> Self {
611        let mut this = Converter {
612            current: None,
613            preorder: node.preorder_with_tokens(),
614            range: node.text_range(),
615            punct_offset: None,
616            map,
617            append,
618            remove,
619            call_site,
620            current_leaves: vec![],
621            mode,
622        };
623        let first = this.next_token();
624        this.current = first;
625        this
626    }
627
628    fn next_token(&mut self) -> Option<SyntaxToken> {
629        while let Some(ev) = self.preorder.next() {
630            match ev {
631                WalkEvent::Enter(token) => {
632                    if self.remove.contains(&token) {
633                        match token {
634                            syntax::NodeOrToken::Token(_) => {
635                                continue;
636                            }
637                            node => {
638                                self.preorder.skip_subtree();
639                                if let Some(mut v) = self.append.remove(&node) {
640                                    v.reverse();
641                                    self.current_leaves.extend(v);
642                                    return None;
643                                }
644                            }
645                        }
646                    } else if let syntax::NodeOrToken::Token(token) = token {
647                        return Some(token);
648                    }
649                }
650                WalkEvent::Leave(ele) => {
651                    if let Some(mut v) = self.append.remove(&ele) {
652                        v.reverse();
653                        self.current_leaves.extend(v);
654                        return None;
655                    }
656                }
657            }
658        }
659        None
660    }
661}
662
663#[derive(Debug)]
664enum SynToken<S> {
665    Ordinary(SyntaxToken),
666    Punct { token: SyntaxToken, offset: usize },
667    Leaf(tt::Leaf<S>),
668}
669
670impl<S> SynToken<S> {
671    fn token(&self) -> &SyntaxToken {
672        match self {
673            SynToken::Ordinary(it) | SynToken::Punct { token: it, offset: _ } => it,
674            SynToken::Leaf(_) => unreachable!(),
675        }
676    }
677}
678
679impl<SpanMap, S> SrcToken<Converter<SpanMap, S>, S> for SynToken<S> {
680    fn kind(&self, _ctx: &Converter<SpanMap, S>) -> SyntaxKind {
681        match self {
682            SynToken::Ordinary(token) => token.kind(),
683            SynToken::Punct { token, offset: i } => {
684                SyntaxKind::from_char(token.text().chars().nth(*i).unwrap()).unwrap()
685            }
686            SynToken::Leaf(_) => {
687                never!();
688                SyntaxKind::ERROR
689            }
690        }
691    }
692    fn to_char(&self, _ctx: &Converter<SpanMap, S>) -> Option<char> {
693        match self {
694            SynToken::Ordinary(_) => None,
695            SynToken::Punct { token: it, offset: i } => it.text().chars().nth(*i),
696            SynToken::Leaf(_) => None,
697        }
698    }
699    fn to_text(&self, _ctx: &Converter<SpanMap, S>) -> SmolStr {
700        match self {
701            SynToken::Ordinary(token) | SynToken::Punct { token, offset: _ } => token.text().into(),
702            SynToken::Leaf(_) => {
703                never!();
704                "".into()
705            }
706        }
707    }
708    fn as_leaf(&self) -> Option<&tt::Leaf<S>> {
709        match self {
710            SynToken::Ordinary(_) | SynToken::Punct { .. } => None,
711            SynToken::Leaf(it) => Some(it),
712        }
713    }
714}
715
716impl<S, SpanMap> TokenConverter<S> for Converter<SpanMap, S>
717where
718    S: Copy,
719    SpanMap: SpanMapper<S>,
720{
721    type Token = SynToken<S>;
722    fn convert_doc_comment(
723        &self,
724        token: &Self::Token,
725        span: S,
726        builder: &mut tt::TopSubtreeBuilder<S>,
727    ) {
728        convert_doc_comment(token.token(), span, self.mode, builder);
729    }
730
731    fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
732        if let Some((punct, offset)) = self.punct_offset.clone() {
733            if usize::from(offset) + 1 < punct.text().len() {
734                let offset = offset + TextSize::of('.');
735                let range = punct.text_range();
736                self.punct_offset = Some((punct.clone(), offset));
737                let range = TextRange::at(range.start() + offset, TextSize::of('.'));
738                return Some((
739                    SynToken::Punct { token: punct, offset: u32::from(offset) as usize },
740                    range,
741                ));
742            }
743        }
744
745        if let Some(leaf) = self.current_leaves.pop() {
746            if self.current_leaves.is_empty() {
747                self.current = self.next_token();
748            }
749            return Some((SynToken::Leaf(leaf), TextRange::empty(TextSize::new(0))));
750        }
751
752        let curr = self.current.clone()?;
753        if !self.range.contains_range(curr.text_range()) {
754            return None;
755        }
756
757        self.current = self.next_token();
758        let token = if curr.kind().is_punct() {
759            self.punct_offset = Some((curr.clone(), 0.into()));
760            let range = curr.text_range();
761            let range = TextRange::at(range.start(), TextSize::of('.'));
762            (SynToken::Punct { token: curr, offset: 0_usize }, range)
763        } else {
764            self.punct_offset = None;
765            let range = curr.text_range();
766            (SynToken::Ordinary(curr), range)
767        };
768
769        Some(token)
770    }
771
772    fn peek(&self) -> Option<Self::Token> {
773        if let Some((punct, mut offset)) = self.punct_offset.clone() {
774            offset += TextSize::of('.');
775            if usize::from(offset) < punct.text().len() {
776                return Some(SynToken::Punct { token: punct, offset: usize::from(offset) });
777            }
778        }
779
780        let curr = self.current.clone()?;
781        if !self.range.contains_range(curr.text_range()) {
782            return None;
783        }
784
785        let token = if curr.kind().is_punct() {
786            SynToken::Punct { token: curr, offset: 0_usize }
787        } else {
788            SynToken::Ordinary(curr)
789        };
790        Some(token)
791    }
792
793    fn span_for(&self, range: TextRange) -> S {
794        self.map.span_for(range)
795    }
796    fn call_site(&self) -> S {
797        self.call_site
798    }
799}
800
801struct TtTreeSink<'a, Ctx>
802where
803    SpanData<Ctx>: Copy,
804{
805    buf: String,
806    cursor: Cursor<'a, SpanData<Ctx>>,
807    text_pos: TextSize,
808    inner: SyntaxTreeBuilder,
809    token_map: SpanMap<Ctx>,
810}
811
812impl<'a, Ctx> TtTreeSink<'a, Ctx>
813where
814    SpanData<Ctx>: Copy,
815{
816    fn new(cursor: Cursor<'a, SpanData<Ctx>>) -> Self {
817        TtTreeSink {
818            buf: String::new(),
819            cursor,
820            text_pos: 0.into(),
821            inner: SyntaxTreeBuilder::default(),
822            token_map: SpanMap::empty(),
823        }
824    }
825
826    fn finish(mut self) -> (Parse<SyntaxNode>, SpanMap<Ctx>) {
827        self.token_map.finish();
828        (self.inner.finish(), self.token_map)
829    }
830}
831
832fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> {
833    let texts = match d {
834        tt::DelimiterKind::Parenthesis => "()",
835        tt::DelimiterKind::Brace => "{}",
836        tt::DelimiterKind::Bracket => "[]",
837        tt::DelimiterKind::Invisible => return None,
838    };
839
840    let idx = closing as usize;
841    Some(&texts[idx..texts.len() - (1 - idx)])
842}
843
844impl<Ctx> TtTreeSink<'_, Ctx>
845where
846    SpanData<Ctx>: Copy + fmt::Debug,
847    Ctx: PartialEq,
848{
849    /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween.
850    /// This occurs when a float literal is used as a field access.
851    fn float_split(&mut self, has_pseudo_dot: bool) {
852        let (text, span) = match self.cursor.token_tree() {
853            Some(tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal {
854                symbol: text,
855                span,
856                kind: tt::LitKind::Float,
857                suffix: _,
858            }))) => (text.as_str(), *span),
859            tt => unreachable!("{tt:?}"),
860        };
861        // FIXME: Span splitting
862        match text.split_once('.') {
863            Some((left, right)) => {
864                assert!(!left.is_empty());
865
866                self.inner.start_node(SyntaxKind::NAME_REF);
867                self.inner.token(SyntaxKind::INT_NUMBER, left);
868                self.inner.finish_node();
869                self.token_map.push(self.text_pos + TextSize::of(left), span);
870
871                // here we move the exit up, the original exit has been deleted in process
872                self.inner.finish_node();
873
874                self.inner.token(SyntaxKind::DOT, ".");
875                self.token_map.push(self.text_pos + TextSize::of(left) + TextSize::of("."), span);
876
877                if has_pseudo_dot {
878                    assert!(right.is_empty(), "{left}.{right}");
879                } else {
880                    assert!(!right.is_empty(), "{left}.{right}");
881                    self.inner.start_node(SyntaxKind::NAME_REF);
882                    self.inner.token(SyntaxKind::INT_NUMBER, right);
883                    self.token_map.push(self.text_pos + TextSize::of(text), span);
884                    self.inner.finish_node();
885
886                    // the parser creates an unbalanced start node, we are required to close it here
887                    self.inner.finish_node();
888                }
889                self.text_pos += TextSize::of(text);
890            }
891            None => unreachable!(),
892        }
893        self.cursor.bump();
894    }
895
896    fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) {
897        if kind == LIFETIME_IDENT {
898            n_tokens = 2;
899        }
900
901        let mut last_two = self.cursor.peek_two_leaves();
902        let mut combined_span = None;
903        'tokens: for _ in 0..n_tokens {
904            let tmp: u8;
905            if self.cursor.eof() {
906                break;
907            }
908            last_two = self.cursor.peek_two_leaves();
909            let (text, span) = loop {
910                break match self.cursor.token_tree() {
911                    Some(tt::TokenTree::Leaf(leaf)) => match leaf {
912                        tt::Leaf::Ident(ident) => {
913                            if ident.is_raw.yes() {
914                                self.buf.push_str("r#");
915                                self.text_pos += TextSize::of("r#");
916                            }
917                            let r = (ident.sym.as_str(), ident.span);
918                            self.cursor.bump();
919                            r
920                        }
921                        tt::Leaf::Punct(punct) => {
922                            assert!(punct.char.is_ascii());
923                            tmp = punct.char as u8;
924                            let r = (
925                                std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(),
926                                punct.span,
927                            );
928                            self.cursor.bump();
929                            r
930                        }
931                        tt::Leaf::Literal(lit) => {
932                            let buf_l = self.buf.len();
933                            format_to!(self.buf, "{lit}");
934                            debug_assert_ne!(self.buf.len() - buf_l, 0);
935                            self.text_pos += TextSize::new((self.buf.len() - buf_l) as u32);
936                            combined_span = match combined_span {
937                                None => Some(lit.span),
938                                Some(prev_span) => Some(Self::merge_spans(prev_span, lit.span)),
939                            };
940                            self.cursor.bump();
941                            continue 'tokens;
942                        }
943                    },
944                    Some(tt::TokenTree::Subtree(subtree)) => {
945                        self.cursor.bump();
946                        match delim_to_str(subtree.delimiter.kind, false) {
947                            Some(it) => (it, subtree.delimiter.open),
948                            None => continue,
949                        }
950                    }
951                    None => {
952                        let parent = self.cursor.end();
953                        match delim_to_str(parent.delimiter.kind, true) {
954                            Some(it) => (it, parent.delimiter.close),
955                            None => continue,
956                        }
957                    }
958                };
959            };
960            self.buf += text;
961            self.text_pos += TextSize::of(text);
962            combined_span = match combined_span {
963                None => Some(span),
964                Some(prev_span) => Some(Self::merge_spans(prev_span, span)),
965            }
966        }
967
968        self.token_map.push(self.text_pos, combined_span.expect("expected at least one token"));
969        self.inner.token(kind, self.buf.as_str());
970        self.buf.clear();
971        // FIXME: Emitting whitespace for this is really just a hack, we should get rid of it.
972        // Add whitespace between adjoint puncts
973        if let Some([tt::Leaf::Punct(curr), tt::Leaf::Punct(next)]) = last_two {
974            // Note: We always assume the semi-colon would be the last token in
975            // other parts of RA such that we don't add whitespace here.
976            //
977            // When `next` is a `Punct` of `'`, that's a part of a lifetime identifier so we don't
978            // need to add whitespace either.
979            if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' {
980                self.inner.token(WHITESPACE, " ");
981                self.text_pos += TextSize::of(' ');
982                self.token_map.push(self.text_pos, curr.span);
983            }
984        }
985    }
986
987    fn start_node(&mut self, kind: SyntaxKind) {
988        self.inner.start_node(kind);
989    }
990
991    fn finish_node(&mut self) {
992        self.inner.finish_node();
993    }
994
995    fn error(&mut self, error: String) {
996        self.inner.error(error, self.text_pos)
997    }
998
999    fn merge_spans(a: SpanData<Ctx>, b: SpanData<Ctx>) -> SpanData<Ctx> {
1000        // We don't do what rustc does exactly, rustc does something clever when the spans have different syntax contexts
1001        // but this runs afoul of our separation between `span` and `hir-expand`.
1002        SpanData {
1003            range: if a.ctx == b.ctx && a.anchor == b.anchor {
1004                TextRange::new(
1005                    std::cmp::min(a.range.start(), b.range.start()),
1006                    std::cmp::max(a.range.end(), b.range.end()),
1007                )
1008            } else {
1009                // Combining ranges make no sense when they come from different syntax contexts.
1010                a.range
1011            },
1012            anchor: a.anchor,
1013            ctx: a.ctx,
1014        }
1015    }
1016}
ra_ap_syntax_bridge/lib.rs

ra_ap_syntax_bridge/
lib.rs