cairo_lang_macro/types/
token.rs

1use crate::{CALL_SITE, CONTEXT};
2use bumpalo::Bump;
3use cairo_lang_primitive_token::{PrimitiveSpan, PrimitiveToken, ToPrimitiveTokenStream};
4use std::fmt::{Debug, Display, Write};
5use std::hash::{Hash, Hasher};
6use std::iter::{Map, Once, once};
7use std::ops::Deref;
8use std::rc::Rc;
9use std::vec::IntoIter;
10
11/// An abstract stream of Cairo tokens.
12///
13/// This is both input and part of an output of a procedural macro.
14#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
15#[cfg_attr(feature = "serde", serde(try_from = "deserializer::TokenStream"))]
16#[derive(Debug, Clone, PartialEq, Eq, Hash)]
17pub struct TokenStream {
18    pub tokens: Vec<TokenTree>,
19    pub metadata: TokenStreamMetadata,
20}
21
22/// This module implements deserialization of the token stream, for the serde feature.
23/// This is intermediate representation is needed, as real [`Token`] only contains a reference to the
24/// represented string, which needs to be allocated outside the [`Token`] struct.
25/// Here we allocate each token to an owned String with SerDe and then copy it's content into context.
26#[cfg(feature = "serde")]
27#[doc(hidden)]
28mod deserializer {
29    use crate::{AllocationContext, TextSpan, TokenStreamMetadata};
30    use std::fmt::{Display, Formatter};
31
32    #[derive(serde::Serialize, serde::Deserialize)]
33    pub struct TokenStream {
34        pub tokens: Vec<TokenTree>,
35        pub metadata: TokenStreamMetadata,
36    }
37
38    #[derive(serde::Serialize, serde::Deserialize)]
39    pub enum TokenTree {
40        Ident(Token),
41    }
42
43    #[derive(serde::Serialize, serde::Deserialize)]
44    pub struct Token {
45        pub content: String,
46        pub span: TextSpan,
47    }
48
49    pub struct Error {}
50
51    impl Display for Error {
52        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
53            f.write_str("TokenStream deserialization error")
54        }
55    }
56
57    impl TryFrom<TokenStream> for crate::TokenStream {
58        type Error = Error;
59
60        fn try_from(value: TokenStream) -> Result<Self, Self::Error> {
61            let ctx = AllocationContext::default();
62            let tokens = value
63                .tokens
64                .into_iter()
65                .map(|token| match token {
66                    TokenTree::Ident(token) => {
67                        let content = ctx.intern(token.content.as_str());
68                        let token = crate::Token {
69                            content,
70                            span: token.span,
71                        };
72                        crate::TokenTree::Ident(token)
73                    }
74                })
75                .collect::<Vec<_>>();
76            Ok(Self {
77                tokens,
78                metadata: value.metadata,
79            })
80        }
81    }
82}
83
84/// A single token or a delimited sequence of token trees.
85#[cfg_attr(feature = "serde", derive(serde::Serialize))]
86#[derive(Debug, Clone, PartialEq, Eq, Hash)]
87pub enum TokenTree {
88    Ident(Token),
89}
90
91impl TokenTree {
92    /// Get the size hint for the [`TokenTree`].
93    /// This can be used to estimate size of a buffer needed for allocating this [`TokenTree`].
94    pub(crate) fn size_hint(&self) -> usize {
95        match self {
96            Self::Ident(token) => token.size_hint(),
97        }
98    }
99}
100
101pub type TextOffset = u32;
102
103/// A range of text offsets that form a span (like text selection).
104#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
105#[derive(Debug, Clone, PartialEq, Eq, Hash)]
106pub struct TextSpan {
107    pub start: TextOffset,
108    pub end: TextOffset,
109}
110
111/// A single Cairo token.
112///
113/// The most atomic item of Cairo code representation.
114#[cfg_attr(feature = "serde", derive(serde::Serialize))]
115#[derive(Debug, Clone, PartialEq, Eq, Hash)]
116pub struct Token {
117    pub content: InternedStr,
118    pub span: TextSpan,
119}
120
121impl Token {
122    /// Get the size hint for the [`Token`].
123    /// This can be used to estimate size of a buffer needed for allocating this [`Token`].
124    pub(crate) fn size_hint(&self) -> usize {
125        self.content.deref().len()
126    }
127}
128
129/// A wrapper over a string pointer.
130/// This contains a pointer to a string allocated in a bump allocator
131/// and a guard which keeps the buffer alive.
132/// This way we do not need to allocate a new string,
133/// but also do not need to worry about the lifetime of the string.
134#[derive(Clone)]
135pub struct InternedStr {
136    ptr: *const str,
137    // Holding a rc to the underlying buffer, so that ptr will always point to valid memory.
138    _bump: Rc<BumpWrap>,
139}
140
141impl Debug for InternedStr {
142    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143        f.write_char('"')?;
144        f.write_str(self.as_ref())?;
145        f.write_char('"')
146    }
147}
148
149impl InternedStr {
150    #[allow(unknown_lints)]
151    #[allow(private_interfaces)]
152    #[doc(hidden)]
153    pub(crate) fn new_in(s: &str, bump: Rc<BumpWrap>) -> Self {
154        let allocated = bump.0.alloc_str(s);
155        let ptr = allocated as *const str;
156        Self { ptr, _bump: bump }
157    }
158}
159
160impl AsRef<str> for InternedStr {
161    fn as_ref(&self) -> &str {
162        self.deref()
163    }
164}
165
166impl Deref for InternedStr {
167    type Target = str;
168
169    fn deref(&self) -> &Self::Target {
170        unsafe { &*self.ptr }
171    }
172}
173
174#[cfg(feature = "serde")]
175impl serde::Serialize for InternedStr {
176    fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
177        s.serialize_str(self.as_ref())
178    }
179}
180
181impl PartialEq for InternedStr {
182    fn eq(&self, other: &Self) -> bool {
183        self.as_ref().eq(other.as_ref())
184    }
185}
186
187impl Eq for InternedStr {}
188
189impl Hash for InternedStr {
190    fn hash<H: Hasher>(&self, state: &mut H) {
191        self.as_ref().hash(state);
192    }
193}
194
195/// This wrapper de-allocates the underlying buffer on drop.
196#[derive(Debug)]
197pub(crate) struct BumpWrap(pub Bump);
198
199impl Drop for BumpWrap {
200    fn drop(&mut self) {
201        self.0.reset();
202    }
203}
204
205/// A context for allocating Cairo tokens.
206/// This wrapper contains a bump allocator, which is used to allocate strings for tokens.
207#[derive(Clone)]
208pub struct AllocationContext {
209    bump: Rc<BumpWrap>,
210}
211
212impl AllocationContext {
213    /// Allocate a new context with pre-determined buffer size.
214    pub fn with_capacity(size_hint: usize) -> Self {
215        Self {
216            bump: Rc::new(BumpWrap(Bump::with_capacity(size_hint))),
217        }
218    }
219
220    /// Allocate a string in the context.
221    /// This returned a string pointer, guarded by reference counter to the buffer.
222    /// The buffer will be deallocated when the last reference to the buffer is dropped.
223    /// No special handling or lifetimes are needed for the string.
224    pub(crate) fn intern(&self, value: &str) -> InternedStr {
225        InternedStr::new_in(value, self.bump.clone())
226    }
227}
228
229impl Default for AllocationContext {
230    fn default() -> Self {
231        Self {
232            bump: Rc::new(BumpWrap(Bump::new())),
233        }
234    }
235}
236
237/// Metadata of [`TokenStream`].
238///
239/// This struct describes the origin of the [`TokenStream`].
240#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
241#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
242pub struct TokenStreamMetadata {
243    /// The path to the file from which the [`TokenStream`] has been created.
244    pub original_file_path: Option<String>,
245    /// ID of the file from which the [`TokenStream`] has been created.
246    ///
247    /// It is guaranteed, that the `file_id` will be unique for each file.
248    pub file_id: Option<String>,
249    /// Cairo edition defined for the token stream.
250    pub edition: Option<String>,
251}
252
253impl TokenStream {
254    /// Create a new [`TokenStream`] from a vector of [`TokenTree`]s.
255    pub fn new(tokens: Vec<TokenTree>) -> Self {
256        Self {
257            tokens,
258            metadata: TokenStreamMetadata::default(),
259        }
260    }
261
262    /// Create a new empty [`TokenStream`].
263    pub fn empty() -> Self {
264        Self::new(Vec::default())
265    }
266
267    #[doc(hidden)]
268    pub fn with_metadata(mut self, metadata: TokenStreamMetadata) -> Self {
269        self.metadata = metadata;
270        self
271    }
272
273    /// Get `[TokenStreamMetadata`] associated with this [`TokenStream`].
274    ///
275    /// The metadata struct can be used to describe the [`TokenStream`] origin.
276    pub fn metadata(&self) -> &TokenStreamMetadata {
277        &self.metadata
278    }
279
280    /// Check if the [`TokenStream`] is empty.
281    pub fn is_empty(&self) -> bool {
282        self.tokens.is_empty()
283    }
284
285    pub fn from_primitive_token_stream(
286        stable_token_stream: impl Iterator<Item = PrimitiveToken>,
287    ) -> Self {
288        Self::new(
289            stable_token_stream
290                .map(|stable_token| {
291                    TokenTree::Ident(Token::new(
292                        stable_token.content,
293                        stable_token
294                            .span
295                            .map(|stable_span| TextSpan {
296                                start: stable_span.start as u32,
297                                end: stable_span.end as u32,
298                            })
299                            .unwrap_or(TextSpan::call_site()),
300                    ))
301                })
302                .collect(),
303        )
304    }
305
306    pub fn push_token(&mut self, token_tree: TokenTree) {
307        self.tokens.push(token_tree);
308    }
309}
310
311impl IntoIterator for TokenStream {
312    type Item = TokenTree;
313    type IntoIter = IntoIter<TokenTree>;
314
315    fn into_iter(self) -> Self::IntoIter {
316        self.tokens.into_iter()
317    }
318}
319
320impl Extend<TokenTree> for TokenStream {
321    fn extend<T: IntoIterator<Item = TokenTree>>(&mut self, iter: T) {
322        self.tokens.extend(iter);
323    }
324}
325
326impl Extend<TokenStream> for TokenStream {
327    fn extend<T: IntoIterator<Item = TokenStream>>(&mut self, iter: T) {
328        iter.into_iter()
329            .for_each(|token_stream| self.extend(token_stream));
330    }
331}
332
333impl Display for TokenStream {
334    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
335        for token in &self.tokens {
336            match token {
337                TokenTree::Ident(token) => {
338                    write!(f, "{}", token.content.as_ref())?;
339                }
340            }
341        }
342        Ok(())
343    }
344}
345
346impl TokenStreamMetadata {
347    #[doc(hidden)]
348    pub fn new(file_path: impl ToString, file_id: impl ToString, edition: impl ToString) -> Self {
349        Self {
350            original_file_path: Some(file_path.to_string()),
351            file_id: Some(file_id.to_string()),
352            edition: Some(edition.to_string()),
353        }
354    }
355}
356
357impl TokenTree {
358    /// Create a new [`TokenTree`] from an identifier [`Token`].
359    pub fn from_ident(token: Token) -> Self {
360        Self::Ident(token)
361    }
362}
363
364impl TextSpan {
365    /// Create a new [`TextSpan`].
366    pub fn new(start: TextOffset, end: TextOffset) -> TextSpan {
367        TextSpan { start, end }
368    }
369
370    /// Create a new [`TextSpan`], located at the invocation of the current procedural macro.
371    /// Identifiers created with this span will be resolved as if they were written directly at
372    /// the macro call location (call-site hygiene).
373    pub fn call_site() -> Self {
374        CALL_SITE.with(|call_site| {
375            let call_site = call_site.borrow();
376            Self::new(call_site.0, call_site.1)
377        })
378    }
379
380    /// Create a new [`TextSpan`], with width `0`, located right before this span.
381    pub fn start(self) -> Self {
382        Self::new(self.start, self.start)
383    }
384
385    /// Create a new [`TextSpan`], with width `0`, located right after this span.
386    pub fn end(self) -> Self {
387        Self::new(self.end, self.end)
388    }
389}
390
391impl Token {
392    /// Create [`Token`] in thread-local context.
393    pub fn new(content: impl AsRef<str>, span: TextSpan) -> Self {
394        CONTEXT.with(|ctx| {
395            let ctx_borrow = ctx.borrow();
396            let ctx: &AllocationContext = ctx_borrow.deref();
397            Self::new_in(content, span, ctx)
398        })
399    }
400
401    /// Create [`Token`] in specified context.
402    pub fn new_in(content: impl AsRef<str>, span: TextSpan, ctx: &AllocationContext) -> Self {
403        let content = ctx.intern(content.as_ref());
404        Self { content, span }
405    }
406}
407
408impl ToPrimitiveTokenStream for TokenStream {
409    type Iter = Map<IntoIter<TokenTree>, fn(TokenTree) -> PrimitiveToken>;
410    fn to_primitive_token_stream(&self) -> Self::Iter {
411        self.tokens
412            .clone()
413            .into_iter()
414            .map(|token_tree| match token_tree {
415                TokenTree::Ident(token) => PrimitiveToken::new(
416                    token.content.to_string(),
417                    Some(PrimitiveSpan {
418                        start: token.span.start as usize,
419                        end: token.span.end as usize,
420                    }),
421                ),
422            })
423    }
424}
425
426impl ToPrimitiveTokenStream for TokenTree {
427    type Iter = Once<PrimitiveToken>;
428    fn to_primitive_token_stream(&self) -> Self::Iter {
429        once(match self {
430            TokenTree::Ident(token) => PrimitiveToken::new(
431                token.content.to_string(),
432                Some(PrimitiveSpan {
433                    start: token.span.start as usize,
434                    end: token.span.end as usize,
435                }),
436            ),
437        })
438    }
439}
440
441#[cfg(test)]
442mod test {
443    use crate::{AllocationContext, TextSpan, Token, TokenStream, TokenTree};
444
445    #[test]
446    pub fn can_serde_empty_token_stream() {
447        let original = TokenStream::empty();
448        let serialized = serde_json::to_string(&original).unwrap();
449        let derived: TokenStream = serde_json::from_str(serialized.as_str()).unwrap();
450        assert_eq!(original, derived);
451        let val: serde_json::Value = serde_json::from_str(serialized.as_str()).unwrap();
452        assert_eq!(
453            val,
454            serde_json::json!({
455                "tokens": [],
456                "metadata": {
457                    "original_file_path": null,
458                    "file_id": null,
459                    "edition": null
460                }
461            })
462        );
463    }
464
465    #[test]
466    pub fn can_serde_token_stream() {
467        let ctx = AllocationContext::default();
468        let original = TokenStream::new(vec![
469            TokenTree::Ident(Token::new_in("first", TextSpan::new(0, 1), &ctx)),
470            TokenTree::Ident(Token::new_in("second", TextSpan::new(2, 3), &ctx)),
471            TokenTree::Ident(Token::new_in("third", TextSpan::new(4, 5), &ctx)),
472            TokenTree::Ident(Token::new_in("fourth", TextSpan::new(6, 7), &ctx)),
473        ]);
474        let serialized = serde_json::to_string(&original).unwrap();
475        let derived: TokenStream = serde_json::from_str(serialized.as_str()).unwrap();
476        assert_eq!(original, derived);
477        let val: serde_json::Value = serde_json::from_str(serialized.as_str()).unwrap();
478        assert_eq!(
479            val,
480            serde_json::json!({
481                "tokens": [
482                    {"Ident": {"content": "first", "span": {"start": 0, "end": 1}}},
483                    {"Ident": {"content": "second", "span": {"start": 2, "end": 3}}},
484                    {"Ident": {"content": "third", "span": {"start": 4, "end": 5}}},
485                    {"Ident": {"content": "fourth", "span": {"start": 6, "end": 7}}},
486                ],
487                "metadata": {
488                    "original_file_path": null,
489                    "file_id": null,
490                    "edition": null
491                }
492            })
493        );
494    }
495
496    #[test]
497    pub fn token_stream_can_be_extended_with_token_stream() {
498        let mut first = TokenStream::new(vec![TokenTree::Ident(Token::new(
499            "first",
500            TextSpan::new(0, 1),
501        ))]);
502        let second = TokenStream::new(vec![TokenTree::Ident(Token::new(
503            "second",
504            TextSpan::new(2, 3),
505        ))]);
506        first.extend(second);
507        assert_eq!(
508            first.tokens,
509            vec![
510                TokenTree::Ident(Token::new("first", TextSpan::new(0, 1))),
511                TokenTree::Ident(Token::new("second", TextSpan::new(2, 3))),
512            ]
513        );
514    }
515
516    #[test]
517    pub fn token_stream_can_be_extended_with_vec_of_token_sterams() {
518        let mut first = TokenStream::new(vec![TokenTree::Ident(Token::new(
519            "first",
520            TextSpan::new(0, 1),
521        ))]);
522        let second = TokenStream::new(vec![TokenTree::Ident(Token::new(
523            "second",
524            TextSpan::new(2, 3),
525        ))]);
526        let third = TokenStream::new(vec![TokenTree::Ident(Token::new(
527            "third",
528            TextSpan::new(4, 5),
529        ))]);
530        first.extend(vec![second, third]);
531        assert_eq!(
532            first.tokens,
533            vec![
534                TokenTree::Ident(Token::new("first", TextSpan::new(0, 1))),
535                TokenTree::Ident(Token::new("second", TextSpan::new(2, 3))),
536                TokenTree::Ident(Token::new("third", TextSpan::new(4, 5))),
537            ]
538        );
539    }
540}