standalone_syn/
buffer.rs

1// Copyright 2018 Syn Developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! A stably addressed token buffer supporting efficient traversal based on a
10//! cheaply copyable cursor.
11//!
12//! The [`Synom`] trait is implemented for syntax tree types that can be parsed
13//! from one of these token cursors.
14//!
15//! [`Synom`]: ../synom/trait.Synom.html
16//!
17//! *This module is available if Syn is built with the `"parsing"` feature.*
18//!
19//! # Example
20//!
21//! This example shows a basic token parser for parsing a token stream without
22//! using Syn's parser combinator macros.
23//!
24//! ```
25//! #![feature(proc_macro)]
26//!
27//! extern crate syn;
28//! extern crate proc_macro;
29//!
30//! #[macro_use]
31//! extern crate quote;
32//!
33//! use syn::{token, ExprTuple};
34//! use syn::buffer::{Cursor, TokenBuffer};
35//! use syn::spanned::Spanned;
36//! use syn::synom::Synom;
37//! use proc_macro::{Diagnostic, Span, TokenStream};
38//!
39//! /// A basic token parser for parsing a token stream without using Syn's
40//! /// parser combinator macros.
41//! pub struct Parser<'a> {
42//!     cursor: Cursor<'a>,
43//! }
44//!
45//! impl<'a> Parser<'a> {
46//!     pub fn new(cursor: Cursor<'a>) -> Self {
47//!         Parser { cursor }
48//!     }
49//!
50//!     pub fn current_span(&self) -> Span {
51//!         self.cursor.span().unstable()
52//!     }
53//!
54//!     pub fn parse<T: Synom>(&mut self) -> Result<T, Diagnostic> {
55//!         let (val, rest) = T::parse(self.cursor)
56//!             .map_err(|e| match T::description() {
57//!                 Some(desc) => {
58//!                     self.current_span().error(format!("{}: expected {}", e, desc))
59//!                 }
60//!                 None => {
61//!                     self.current_span().error(e.to_string())
62//!                 }
63//!             })?;
64//!
65//!         self.cursor = rest;
66//!         Ok(val)
67//!     }
68//!
69//!     pub fn expect_eof(&mut self) -> Result<(), Diagnostic> {
70//!         if !self.cursor.eof() {
71//!             return Err(self.current_span().error("trailing characters; expected eof"));
72//!         }
73//!
74//!         Ok(())
75//!     }
76//! }
77//!
78//! fn eval(input: TokenStream) -> Result<TokenStream, Diagnostic> {
79//!     let buffer = TokenBuffer::new(input);
80//!     let mut parser = Parser::new(buffer.begin());
81//!
82//!     // Parse some syntax tree types out of the input tokens. In this case we
83//!     // expect something like:
84//!     //
85//!     //     (a, b, c) = (1, 2, 3)
86//!     let a = parser.parse::<ExprTuple>()?;
87//!     parser.parse::<token::Eq>()?;
88//!     let b = parser.parse::<ExprTuple>()?;
89//!     parser.expect_eof()?;
90//!
91//!     // Perform some validation and report errors.
92//!     let (a_len, b_len) = (a.elems.len(), b.elems.len());
93//!     if a_len != b_len {
94//!         let diag = b.span().unstable()
95//!             .error(format!("expected {} element(s), got {}", a_len, b_len))
96//!             .span_note(a.span().unstable(), "because of this");
97//!
98//!         return Err(diag);
99//!     }
100//!
101//!     // Build the output tokens.
102//!     let out = quote! {
103//!         println!("All good! Received two tuples of size {}", #a_len);
104//!     };
105//!
106//!     Ok(out.into())
107//! }
108//! #
109//! # extern crate proc_macro2;
110//! #
111//! # // This method exists on proc_macro2::Span but is behind the "nightly"
112//! # // feature.
113//! # trait ToUnstableSpan {
114//! #     fn unstable(&self) -> Span;
115//! # }
116//! #
117//! # impl ToUnstableSpan for proc_macro2::Span {
118//! #     fn unstable(&self) -> Span {
119//! #         unimplemented!()
120//! #     }
121//! # }
122//! #
123//! # fn main() {}
124//! ```
125
126// This module is heavily commented as it contains the only unsafe code in Syn,
127// and caution should be used when editing it. The public-facing interface is
128// 100% safe but the implementation is fragile internally.
129
130#[cfg(feature = "proc-macro")]
131use proc_macro as pm;
132use proc_macro2::{Delimiter, Literal, Spacing, Span, Term, TokenNode, TokenStream, TokenTree};
133
134use std::ptr;
135use std::marker::PhantomData;
136
137#[cfg(synom_verbose_trace)]
138use std::fmt::{self, Debug};
139
140/// Internal type which is used instead of `TokenTree` to represent a token tree
141/// within a `TokenBuffer`.
142enum Entry {
143    // Mimicking types from proc-macro.
144    Group(Span, Delimiter, TokenBuffer),
145    Term(Span, Term),
146    Op(Span, char, Spacing),
147    Literal(Span, Literal),
148    // End entries contain a raw pointer to the entry from the containing
149    // token tree, or null if this is the outermost level.
150    End(*const Entry),
151}
152
153/// A buffer that can be efficiently traversed multiple times, unlike
154/// `TokenStream` which requires a deep copy in order to traverse more than
155/// once.
156///
157/// See the [module documentation] for an example of `TokenBuffer` in action.
158///
159/// [module documentation]: index.html
160///
161/// *This type is available if Syn is built with the `"parsing"` feature.*
162pub struct TokenBuffer {
163    // NOTE: Do not derive clone on this - there are raw pointers inside which
164    // will be messed up. Moving the `TokenBuffer` itself is safe as the actual
165    // backing slices won't be moved.
166    data: Box<[Entry]>,
167}
168
169impl TokenBuffer {
170    // NOTE: DO NOT MUTATE THE `Vec` RETURNED FROM THIS FUNCTION ONCE IT
171    // RETURNS, THE ADDRESS OF ITS BACKING MEMORY MUST REMAIN STABLE.
172    fn inner_new(stream: TokenStream, up: *const Entry) -> TokenBuffer {
173        // Build up the entries list, recording the locations of any Groups
174        // in the list to be processed later.
175        let mut entries = Vec::new();
176        let mut seqs = Vec::new();
177        for tt in stream {
178            match tt.kind {
179                TokenNode::Term(sym) => {
180                    entries.push(Entry::Term(tt.span, sym));
181                }
182                TokenNode::Op(chr, ok) => {
183                    entries.push(Entry::Op(tt.span, chr, ok));
184                }
185                TokenNode::Literal(lit) => {
186                    entries.push(Entry::Literal(tt.span, lit));
187                }
188                TokenNode::Group(delim, seq_stream) => {
189                    // Record the index of the interesting entry, and store an
190                    // `End(null)` there temporarially.
191                    seqs.push((entries.len(), tt.span, delim, seq_stream));
192                    entries.push(Entry::End(ptr::null()));
193                }
194            }
195        }
196        // Add an `End` entry to the end with a reference to the enclosing token
197        // stream which was passed in.
198        entries.push(Entry::End(up));
199
200        // NOTE: This is done to ensure that we don't accidentally modify the
201        // length of the backing buffer. The backing buffer must remain at a
202        // constant address after this point, as we are going to store a raw
203        // pointer into it.
204        let mut entries = entries.into_boxed_slice();
205        for (idx, span, delim, seq_stream) in seqs {
206            // We know that this index refers to one of the temporary
207            // `End(null)` entries, and we know that the last entry is
208            // `End(up)`, so the next index is also valid.
209            let seq_up = &entries[idx + 1] as *const Entry;
210
211            // The end entry stored at the end of this Entry::Group should
212            // point to the Entry which follows the Group in the list.
213            let inner = Self::inner_new(seq_stream, seq_up);
214            entries[idx] = Entry::Group(span, delim, inner);
215        }
216
217        TokenBuffer { data: entries }
218    }
219
220    /// Creates a `TokenBuffer` containing all the tokens from the input
221    /// `TokenStream`.
222    #[cfg(feature = "proc-macro")]
223    pub fn new(stream: pm::TokenStream) -> TokenBuffer {
224        Self::new2(stream.into())
225    }
226
227    /// Creates a `TokenBuffer` containing all the tokens from the input
228    /// `TokenStream`.
229    pub fn new2(stream: TokenStream) -> TokenBuffer {
230        Self::inner_new(stream, ptr::null())
231    }
232
233    /// Creates a cursor referencing the first token in the buffer and able to
234    /// traverse until the end of the buffer.
235    pub fn begin(&self) -> Cursor {
236        unsafe { Cursor::create(&self.data[0], &self.data[self.data.len() - 1]) }
237    }
238}
239
240/// A cheaply copyable cursor into a `TokenBuffer`.
241///
242/// This cursor holds a shared reference into the immutable data which is used
243/// internally to represent a `TokenStream`, and can be efficiently manipulated
244/// and copied around.
245///
246/// An empty `Cursor` can be created directly, or one may create a `TokenBuffer`
247/// object and get a cursor to its first token with `begin()`.
248///
249/// Two cursors are equal if they have the same location in the same input
250/// stream, and have the same scope.
251///
252/// See the [module documentation] for an example of a `Cursor` in action.
253///
254/// [module documentation]: index.html
255///
256/// *This type is available if Syn is built with the `"parsing"` feature.*
257#[derive(Copy, Clone, Eq, PartialEq)]
258pub struct Cursor<'a> {
259    /// The current entry which the `Cursor` is pointing at.
260    ptr: *const Entry,
261    /// This is the only `Entry::End(..)` object which this cursor is allowed to
262    /// point at. All other `End` objects are skipped over in `Cursor::create`.
263    scope: *const Entry,
264    /// This uses the &'a reference which guarantees that these pointers are
265    /// still valid.
266    marker: PhantomData<&'a Entry>,
267}
268
269impl<'a> Cursor<'a> {
270    /// Creates a cursor referencing a static empty TokenStream.
271    pub fn empty() -> Self {
272        // It's safe in this situation for us to put an `Entry` object in global
273        // storage, despite it not actually being safe to send across threads
274        // (`Term` is a reference into a thread-local table). This is because
275        // this entry never includes a `Term` object.
276        //
277        // This wrapper struct allows us to break the rules and put a `Sync`
278        // object in global storage.
279        struct UnsafeSyncEntry(Entry);
280        unsafe impl Sync for UnsafeSyncEntry {}
281        static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0 as *const Entry));
282
283        Cursor {
284            ptr: &EMPTY_ENTRY.0,
285            scope: &EMPTY_ENTRY.0,
286            marker: PhantomData,
287        }
288    }
289
290    /// This create method intelligently exits non-explicitly-entered
291    /// `None`-delimited scopes when the cursor reaches the end of them,
292    /// allowing for them to be treated transparently.
293    unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
294        // NOTE: If we're looking at a `End(..)`, we want to advance the cursor
295        // past it, unless `ptr == scope`, which means that we're at the edge of
296        // our cursor's scope. We should only have `ptr != scope` at the exit
297        // from None-delimited groups entered with `ignore_none`.
298        while let Entry::End(exit) = *ptr {
299            if ptr == scope {
300                break;
301            }
302            ptr = exit;
303        }
304
305        Cursor {
306            ptr: ptr,
307            scope: scope,
308            marker: PhantomData,
309        }
310    }
311
312    /// Get the current entry.
313    fn entry(self) -> &'a Entry {
314        unsafe { &*self.ptr }
315    }
316
317    /// Bump the cursor to point at the next token after the current one. This
318    /// is undefined behavior if the cursor is currently looking at an
319    /// `Entry::End`.
320    unsafe fn bump(self) -> Cursor<'a> {
321        Cursor::create(self.ptr.offset(1), self.scope)
322    }
323
324    /// If the cursor is looking at a `None`-delimited group, move it to look at
325    /// the first token inside instead. If the group is empty, this will move
326    /// the cursor past the `None`-delimited group.
327    ///
328    /// WARNING: This mutates its argument.
329    fn ignore_none(&mut self) {
330        if let Entry::Group(_, Delimiter::None, ref buf) = *self.entry() {
331            // NOTE: We call `Cursor::create` here to make sure that situations
332            // where we should immediately exit the span after entering it are
333            // handled correctly.
334            unsafe {
335                *self = Cursor::create(&buf.data[0], self.scope);
336            }
337        }
338    }
339
340    /// Checks whether the cursor is currently pointing at the end of its valid
341    /// scope.
342    #[inline]
343    pub fn eof(self) -> bool {
344        // We're at eof if we're at the end of our scope.
345        self.ptr == self.scope
346    }
347
348    /// If the cursor is pointing at a `Group` with the given delimiter, returns
349    /// a cursor into that group and one pointing to the next `TokenTree`.
350    pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, Span, Cursor<'a>)> {
351        // If we're not trying to enter a none-delimited group, we want to
352        // ignore them. We have to make sure to _not_ ignore them when we want
353        // to enter them, of course. For obvious reasons.
354        if delim != Delimiter::None {
355            self.ignore_none();
356        }
357
358        if let Entry::Group(span, group_delim, ref buf) = *self.entry() {
359            if group_delim == delim {
360                return Some((buf.begin(), span, unsafe { self.bump() }));
361            }
362        }
363
364        None
365    }
366
367    /// If the cursor is pointing at a `Term`, returns it along with a cursor
368    /// pointing at the next `TokenTree`.
369    pub fn term(mut self) -> Option<(Span, Term, Cursor<'a>)> {
370        self.ignore_none();
371        match *self.entry() {
372            Entry::Term(span, term) => Some((span, term, unsafe { self.bump() })),
373            _ => None,
374        }
375    }
376
377    /// If the cursor is pointing at an `Op`, returns it along with a cursor
378    /// pointing at the next `TokenTree`.
379    pub fn op(mut self) -> Option<(Span, char, Spacing, Cursor<'a>)> {
380        self.ignore_none();
381        match *self.entry() {
382            Entry::Op(span, op, spacing) => Some((span, op, spacing, unsafe { self.bump() })),
383            _ => None,
384        }
385    }
386
387    /// If the cursor is pointing at a `Literal`, return it along with a cursor
388    /// pointing at the next `TokenTree`.
389    pub fn literal(mut self) -> Option<(Span, Literal, Cursor<'a>)> {
390        self.ignore_none();
391        match *self.entry() {
392            Entry::Literal(span, ref lit) => Some((span, lit.clone(), unsafe { self.bump() })),
393            _ => None,
394        }
395    }
396
397    /// Copies all remaining tokens visible from this cursor into a
398    /// `TokenStream`.
399    pub fn token_stream(self) -> TokenStream {
400        let mut tts = Vec::new();
401        let mut cursor = self;
402        while let Some((tt, rest)) = cursor.token_tree() {
403            tts.push(tt);
404            cursor = rest;
405        }
406        tts.into_iter().collect()
407    }
408
409    /// If the cursor is pointing at a `TokenTree`, returns it along with a
410    /// cursor pointing at the next `TokenTree`.
411    ///
412    /// Returns `None` if the cursor has reached the end of its stream.
413    ///
414    /// This method does not treat `None`-delimited groups as transparent, and
415    /// will return a `Group(None, ..)` if the cursor is looking at one.
416    pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> {
417        let tree = match *self.entry() {
418            Entry::Group(span, delim, ref buf) => {
419                let stream = buf.begin().token_stream();
420                TokenTree {
421                    span: span,
422                    kind: TokenNode::Group(delim, stream),
423                }
424            }
425            Entry::Literal(span, ref lit) => TokenTree {
426                span: span,
427                kind: TokenNode::Literal(lit.clone()),
428            },
429            Entry::Term(span, sym) => TokenTree {
430                span: span,
431                kind: TokenNode::Term(sym),
432            },
433            Entry::Op(span, chr, spacing) => TokenTree {
434                span: span,
435                kind: TokenNode::Op(chr, spacing),
436            },
437            Entry::End(..) => {
438                return None;
439            }
440        };
441
442        Some((tree, unsafe { self.bump() }))
443    }
444
445    /// Returns the `Span` of the current token, or `Span::call_site()` if this
446    /// cursor points to eof.
447    pub fn span(self) -> Span {
448        match *self.entry() {
449            Entry::Group(span, ..)
450            | Entry::Literal(span, ..)
451            | Entry::Term(span, ..)
452            | Entry::Op(span, ..) => span,
453            Entry::End(..) => Span::call_site(),
454        }
455    }
456}
457
458// We do a custom implementation for `Debug` as the default implementation is
459// pretty useless.
460#[cfg(synom_verbose_trace)]
461impl<'a> Debug for Cursor<'a> {
462    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
463        // Print what the cursor is currently looking at.
464        // This will look like Cursor("some remaining tokens here")
465        f.debug_tuple("Cursor")
466            .field(&self.token_stream().to_string())
467            .finish()
468    }
469}