standalone_syn/buffer.rs
1// Copyright 2018 Syn Developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! A stably addressed token buffer supporting efficient traversal based on a
10//! cheaply copyable cursor.
11//!
12//! The [`Synom`] trait is implemented for syntax tree types that can be parsed
13//! from one of these token cursors.
14//!
15//! [`Synom`]: ../synom/trait.Synom.html
16//!
17//! *This module is available if Syn is built with the `"parsing"` feature.*
18//!
19//! # Example
20//!
21//! This example shows a basic token parser for parsing a token stream without
22//! using Syn's parser combinator macros.
23//!
24//! ```
25//! #![feature(proc_macro)]
26//!
27//! extern crate syn;
28//! extern crate proc_macro;
29//!
30//! #[macro_use]
31//! extern crate quote;
32//!
33//! use syn::{token, ExprTuple};
34//! use syn::buffer::{Cursor, TokenBuffer};
35//! use syn::spanned::Spanned;
36//! use syn::synom::Synom;
37//! use proc_macro::{Diagnostic, Span, TokenStream};
38//!
39//! /// A basic token parser for parsing a token stream without using Syn's
40//! /// parser combinator macros.
41//! pub struct Parser<'a> {
42//! cursor: Cursor<'a>,
43//! }
44//!
45//! impl<'a> Parser<'a> {
46//! pub fn new(cursor: Cursor<'a>) -> Self {
47//! Parser { cursor }
48//! }
49//!
50//! pub fn current_span(&self) -> Span {
51//! self.cursor.span().unstable()
52//! }
53//!
54//! pub fn parse<T: Synom>(&mut self) -> Result<T, Diagnostic> {
55//! let (val, rest) = T::parse(self.cursor)
56//! .map_err(|e| match T::description() {
57//! Some(desc) => {
58//! self.current_span().error(format!("{}: expected {}", e, desc))
59//! }
60//! None => {
61//! self.current_span().error(e.to_string())
62//! }
63//! })?;
64//!
65//! self.cursor = rest;
66//! Ok(val)
67//! }
68//!
69//! pub fn expect_eof(&mut self) -> Result<(), Diagnostic> {
70//! if !self.cursor.eof() {
71//! return Err(self.current_span().error("trailing characters; expected eof"));
72//! }
73//!
74//! Ok(())
75//! }
76//! }
77//!
78//! fn eval(input: TokenStream) -> Result<TokenStream, Diagnostic> {
79//! let buffer = TokenBuffer::new(input);
80//! let mut parser = Parser::new(buffer.begin());
81//!
82//! // Parse some syntax tree types out of the input tokens. In this case we
83//! // expect something like:
84//! //
85//! // (a, b, c) = (1, 2, 3)
86//! let a = parser.parse::<ExprTuple>()?;
87//! parser.parse::<token::Eq>()?;
88//! let b = parser.parse::<ExprTuple>()?;
89//! parser.expect_eof()?;
90//!
91//! // Perform some validation and report errors.
92//! let (a_len, b_len) = (a.elems.len(), b.elems.len());
93//! if a_len != b_len {
94//! let diag = b.span().unstable()
95//! .error(format!("expected {} element(s), got {}", a_len, b_len))
96//! .span_note(a.span().unstable(), "because of this");
97//!
98//! return Err(diag);
99//! }
100//!
101//! // Build the output tokens.
102//! let out = quote! {
103//! println!("All good! Received two tuples of size {}", #a_len);
104//! };
105//!
106//! Ok(out.into())
107//! }
108//! #
109//! # extern crate proc_macro2;
110//! #
111//! # // This method exists on proc_macro2::Span but is behind the "nightly"
112//! # // feature.
113//! # trait ToUnstableSpan {
114//! # fn unstable(&self) -> Span;
115//! # }
116//! #
117//! # impl ToUnstableSpan for proc_macro2::Span {
118//! # fn unstable(&self) -> Span {
119//! # unimplemented!()
120//! # }
121//! # }
122//! #
123//! # fn main() {}
124//! ```
125
126// This module is heavily commented as it contains the only unsafe code in Syn,
127// and caution should be used when editing it. The public-facing interface is
128// 100% safe but the implementation is fragile internally.
129
130#[cfg(feature = "proc-macro")]
131use proc_macro as pm;
132use proc_macro2::{Delimiter, Literal, Spacing, Span, Term, TokenNode, TokenStream, TokenTree};
133
134use std::ptr;
135use std::marker::PhantomData;
136
137#[cfg(synom_verbose_trace)]
138use std::fmt::{self, Debug};
139
140/// Internal type which is used instead of `TokenTree` to represent a token tree
141/// within a `TokenBuffer`.
142enum Entry {
143 // Mimicking types from proc-macro.
144 Group(Span, Delimiter, TokenBuffer),
145 Term(Span, Term),
146 Op(Span, char, Spacing),
147 Literal(Span, Literal),
148 // End entries contain a raw pointer to the entry from the containing
149 // token tree, or null if this is the outermost level.
150 End(*const Entry),
151}
152
153/// A buffer that can be efficiently traversed multiple times, unlike
154/// `TokenStream` which requires a deep copy in order to traverse more than
155/// once.
156///
157/// See the [module documentation] for an example of `TokenBuffer` in action.
158///
159/// [module documentation]: index.html
160///
161/// *This type is available if Syn is built with the `"parsing"` feature.*
162pub struct TokenBuffer {
163 // NOTE: Do not derive clone on this - there are raw pointers inside which
164 // will be messed up. Moving the `TokenBuffer` itself is safe as the actual
165 // backing slices won't be moved.
166 data: Box<[Entry]>,
167}
168
169impl TokenBuffer {
170 // NOTE: DO NOT MUTATE THE `Vec` RETURNED FROM THIS FUNCTION ONCE IT
171 // RETURNS, THE ADDRESS OF ITS BACKING MEMORY MUST REMAIN STABLE.
172 fn inner_new(stream: TokenStream, up: *const Entry) -> TokenBuffer {
173 // Build up the entries list, recording the locations of any Groups
174 // in the list to be processed later.
175 let mut entries = Vec::new();
176 let mut seqs = Vec::new();
177 for tt in stream {
178 match tt.kind {
179 TokenNode::Term(sym) => {
180 entries.push(Entry::Term(tt.span, sym));
181 }
182 TokenNode::Op(chr, ok) => {
183 entries.push(Entry::Op(tt.span, chr, ok));
184 }
185 TokenNode::Literal(lit) => {
186 entries.push(Entry::Literal(tt.span, lit));
187 }
188 TokenNode::Group(delim, seq_stream) => {
189 // Record the index of the interesting entry, and store an
190 // `End(null)` there temporarially.
191 seqs.push((entries.len(), tt.span, delim, seq_stream));
192 entries.push(Entry::End(ptr::null()));
193 }
194 }
195 }
196 // Add an `End` entry to the end with a reference to the enclosing token
197 // stream which was passed in.
198 entries.push(Entry::End(up));
199
200 // NOTE: This is done to ensure that we don't accidentally modify the
201 // length of the backing buffer. The backing buffer must remain at a
202 // constant address after this point, as we are going to store a raw
203 // pointer into it.
204 let mut entries = entries.into_boxed_slice();
205 for (idx, span, delim, seq_stream) in seqs {
206 // We know that this index refers to one of the temporary
207 // `End(null)` entries, and we know that the last entry is
208 // `End(up)`, so the next index is also valid.
209 let seq_up = &entries[idx + 1] as *const Entry;
210
211 // The end entry stored at the end of this Entry::Group should
212 // point to the Entry which follows the Group in the list.
213 let inner = Self::inner_new(seq_stream, seq_up);
214 entries[idx] = Entry::Group(span, delim, inner);
215 }
216
217 TokenBuffer { data: entries }
218 }
219
220 /// Creates a `TokenBuffer` containing all the tokens from the input
221 /// `TokenStream`.
222 #[cfg(feature = "proc-macro")]
223 pub fn new(stream: pm::TokenStream) -> TokenBuffer {
224 Self::new2(stream.into())
225 }
226
227 /// Creates a `TokenBuffer` containing all the tokens from the input
228 /// `TokenStream`.
229 pub fn new2(stream: TokenStream) -> TokenBuffer {
230 Self::inner_new(stream, ptr::null())
231 }
232
233 /// Creates a cursor referencing the first token in the buffer and able to
234 /// traverse until the end of the buffer.
235 pub fn begin(&self) -> Cursor {
236 unsafe { Cursor::create(&self.data[0], &self.data[self.data.len() - 1]) }
237 }
238}
239
240/// A cheaply copyable cursor into a `TokenBuffer`.
241///
242/// This cursor holds a shared reference into the immutable data which is used
243/// internally to represent a `TokenStream`, and can be efficiently manipulated
244/// and copied around.
245///
246/// An empty `Cursor` can be created directly, or one may create a `TokenBuffer`
247/// object and get a cursor to its first token with `begin()`.
248///
249/// Two cursors are equal if they have the same location in the same input
250/// stream, and have the same scope.
251///
252/// See the [module documentation] for an example of a `Cursor` in action.
253///
254/// [module documentation]: index.html
255///
256/// *This type is available if Syn is built with the `"parsing"` feature.*
257#[derive(Copy, Clone, Eq, PartialEq)]
258pub struct Cursor<'a> {
259 /// The current entry which the `Cursor` is pointing at.
260 ptr: *const Entry,
261 /// This is the only `Entry::End(..)` object which this cursor is allowed to
262 /// point at. All other `End` objects are skipped over in `Cursor::create`.
263 scope: *const Entry,
264 /// This uses the &'a reference which guarantees that these pointers are
265 /// still valid.
266 marker: PhantomData<&'a Entry>,
267}
268
269impl<'a> Cursor<'a> {
270 /// Creates a cursor referencing a static empty TokenStream.
271 pub fn empty() -> Self {
272 // It's safe in this situation for us to put an `Entry` object in global
273 // storage, despite it not actually being safe to send across threads
274 // (`Term` is a reference into a thread-local table). This is because
275 // this entry never includes a `Term` object.
276 //
277 // This wrapper struct allows us to break the rules and put a `Sync`
278 // object in global storage.
279 struct UnsafeSyncEntry(Entry);
280 unsafe impl Sync for UnsafeSyncEntry {}
281 static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0 as *const Entry));
282
283 Cursor {
284 ptr: &EMPTY_ENTRY.0,
285 scope: &EMPTY_ENTRY.0,
286 marker: PhantomData,
287 }
288 }
289
290 /// This create method intelligently exits non-explicitly-entered
291 /// `None`-delimited scopes when the cursor reaches the end of them,
292 /// allowing for them to be treated transparently.
293 unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
294 // NOTE: If we're looking at a `End(..)`, we want to advance the cursor
295 // past it, unless `ptr == scope`, which means that we're at the edge of
296 // our cursor's scope. We should only have `ptr != scope` at the exit
297 // from None-delimited groups entered with `ignore_none`.
298 while let Entry::End(exit) = *ptr {
299 if ptr == scope {
300 break;
301 }
302 ptr = exit;
303 }
304
305 Cursor {
306 ptr: ptr,
307 scope: scope,
308 marker: PhantomData,
309 }
310 }
311
312 /// Get the current entry.
313 fn entry(self) -> &'a Entry {
314 unsafe { &*self.ptr }
315 }
316
317 /// Bump the cursor to point at the next token after the current one. This
318 /// is undefined behavior if the cursor is currently looking at an
319 /// `Entry::End`.
320 unsafe fn bump(self) -> Cursor<'a> {
321 Cursor::create(self.ptr.offset(1), self.scope)
322 }
323
324 /// If the cursor is looking at a `None`-delimited group, move it to look at
325 /// the first token inside instead. If the group is empty, this will move
326 /// the cursor past the `None`-delimited group.
327 ///
328 /// WARNING: This mutates its argument.
329 fn ignore_none(&mut self) {
330 if let Entry::Group(_, Delimiter::None, ref buf) = *self.entry() {
331 // NOTE: We call `Cursor::create` here to make sure that situations
332 // where we should immediately exit the span after entering it are
333 // handled correctly.
334 unsafe {
335 *self = Cursor::create(&buf.data[0], self.scope);
336 }
337 }
338 }
339
340 /// Checks whether the cursor is currently pointing at the end of its valid
341 /// scope.
342 #[inline]
343 pub fn eof(self) -> bool {
344 // We're at eof if we're at the end of our scope.
345 self.ptr == self.scope
346 }
347
348 /// If the cursor is pointing at a `Group` with the given delimiter, returns
349 /// a cursor into that group and one pointing to the next `TokenTree`.
350 pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, Span, Cursor<'a>)> {
351 // If we're not trying to enter a none-delimited group, we want to
352 // ignore them. We have to make sure to _not_ ignore them when we want
353 // to enter them, of course. For obvious reasons.
354 if delim != Delimiter::None {
355 self.ignore_none();
356 }
357
358 if let Entry::Group(span, group_delim, ref buf) = *self.entry() {
359 if group_delim == delim {
360 return Some((buf.begin(), span, unsafe { self.bump() }));
361 }
362 }
363
364 None
365 }
366
367 /// If the cursor is pointing at a `Term`, returns it along with a cursor
368 /// pointing at the next `TokenTree`.
369 pub fn term(mut self) -> Option<(Span, Term, Cursor<'a>)> {
370 self.ignore_none();
371 match *self.entry() {
372 Entry::Term(span, term) => Some((span, term, unsafe { self.bump() })),
373 _ => None,
374 }
375 }
376
377 /// If the cursor is pointing at an `Op`, returns it along with a cursor
378 /// pointing at the next `TokenTree`.
379 pub fn op(mut self) -> Option<(Span, char, Spacing, Cursor<'a>)> {
380 self.ignore_none();
381 match *self.entry() {
382 Entry::Op(span, op, spacing) => Some((span, op, spacing, unsafe { self.bump() })),
383 _ => None,
384 }
385 }
386
387 /// If the cursor is pointing at a `Literal`, return it along with a cursor
388 /// pointing at the next `TokenTree`.
389 pub fn literal(mut self) -> Option<(Span, Literal, Cursor<'a>)> {
390 self.ignore_none();
391 match *self.entry() {
392 Entry::Literal(span, ref lit) => Some((span, lit.clone(), unsafe { self.bump() })),
393 _ => None,
394 }
395 }
396
397 /// Copies all remaining tokens visible from this cursor into a
398 /// `TokenStream`.
399 pub fn token_stream(self) -> TokenStream {
400 let mut tts = Vec::new();
401 let mut cursor = self;
402 while let Some((tt, rest)) = cursor.token_tree() {
403 tts.push(tt);
404 cursor = rest;
405 }
406 tts.into_iter().collect()
407 }
408
409 /// If the cursor is pointing at a `TokenTree`, returns it along with a
410 /// cursor pointing at the next `TokenTree`.
411 ///
412 /// Returns `None` if the cursor has reached the end of its stream.
413 ///
414 /// This method does not treat `None`-delimited groups as transparent, and
415 /// will return a `Group(None, ..)` if the cursor is looking at one.
416 pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> {
417 let tree = match *self.entry() {
418 Entry::Group(span, delim, ref buf) => {
419 let stream = buf.begin().token_stream();
420 TokenTree {
421 span: span,
422 kind: TokenNode::Group(delim, stream),
423 }
424 }
425 Entry::Literal(span, ref lit) => TokenTree {
426 span: span,
427 kind: TokenNode::Literal(lit.clone()),
428 },
429 Entry::Term(span, sym) => TokenTree {
430 span: span,
431 kind: TokenNode::Term(sym),
432 },
433 Entry::Op(span, chr, spacing) => TokenTree {
434 span: span,
435 kind: TokenNode::Op(chr, spacing),
436 },
437 Entry::End(..) => {
438 return None;
439 }
440 };
441
442 Some((tree, unsafe { self.bump() }))
443 }
444
445 /// Returns the `Span` of the current token, or `Span::call_site()` if this
446 /// cursor points to eof.
447 pub fn span(self) -> Span {
448 match *self.entry() {
449 Entry::Group(span, ..)
450 | Entry::Literal(span, ..)
451 | Entry::Term(span, ..)
452 | Entry::Op(span, ..) => span,
453 Entry::End(..) => Span::call_site(),
454 }
455 }
456}
457
458// We do a custom implementation for `Debug` as the default implementation is
459// pretty useless.
460#[cfg(synom_verbose_trace)]
461impl<'a> Debug for Cursor<'a> {
462 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
463 // Print what the cursor is currently looking at.
464 // This will look like Cursor("some remaining tokens here")
465 f.debug_tuple("Cursor")
466 .field(&self.token_stream().to_string())
467 .finish()
468 }
469}