1use crate::{CALL_SITE, CONTEXT};
2use bumpalo::Bump;
3use cairo_lang_primitive_token::{PrimitiveSpan, PrimitiveToken, ToPrimitiveTokenStream};
4use std::fmt::{Debug, Display, Write};
5use std::hash::{Hash, Hasher};
6use std::iter::{Map, Once, once};
7use std::ops::Deref;
8use std::rc::Rc;
9use std::vec::IntoIter;
10
11#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
15#[cfg_attr(feature = "serde", serde(try_from = "deserializer::TokenStream"))]
16#[derive(Debug, Clone, PartialEq, Eq, Hash)]
17pub struct TokenStream {
18 pub tokens: Vec<TokenTree>,
19 pub metadata: TokenStreamMetadata,
20}
21
22#[cfg(feature = "serde")]
27#[doc(hidden)]
28mod deserializer {
29 use crate::{AllocationContext, TextSpan, TokenStreamMetadata};
30 use std::fmt::{Display, Formatter};
31
32 #[derive(serde::Serialize, serde::Deserialize)]
33 pub struct TokenStream {
34 pub tokens: Vec<TokenTree>,
35 pub metadata: TokenStreamMetadata,
36 }
37
38 #[derive(serde::Serialize, serde::Deserialize)]
39 pub enum TokenTree {
40 Ident(Token),
41 }
42
43 #[derive(serde::Serialize, serde::Deserialize)]
44 pub struct Token {
45 pub content: String,
46 pub span: TextSpan,
47 }
48
49 pub struct Error {}
50
51 impl Display for Error {
52 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
53 f.write_str("TokenStream deserialization error")
54 }
55 }
56
57 impl TryFrom<TokenStream> for crate::TokenStream {
58 type Error = Error;
59
60 fn try_from(value: TokenStream) -> Result<Self, Self::Error> {
61 let ctx = AllocationContext::default();
62 let tokens = value
63 .tokens
64 .into_iter()
65 .map(|token| match token {
66 TokenTree::Ident(token) => {
67 let content = ctx.intern(token.content.as_str());
68 let token = crate::Token {
69 content,
70 span: token.span,
71 };
72 crate::TokenTree::Ident(token)
73 }
74 })
75 .collect::<Vec<_>>();
76 Ok(Self {
77 tokens,
78 metadata: value.metadata,
79 })
80 }
81 }
82}
83
84#[cfg_attr(feature = "serde", derive(serde::Serialize))]
86#[derive(Debug, Clone, PartialEq, Eq, Hash)]
87pub enum TokenTree {
88 Ident(Token),
89}
90
91impl TokenTree {
92 pub(crate) fn size_hint(&self) -> usize {
95 match self {
96 Self::Ident(token) => token.size_hint(),
97 }
98 }
99}
100
101pub type TextOffset = u32;
102
103#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
105#[derive(Debug, Clone, PartialEq, Eq, Hash)]
106pub struct TextSpan {
107 pub start: TextOffset,
108 pub end: TextOffset,
109}
110
111#[cfg_attr(feature = "serde", derive(serde::Serialize))]
115#[derive(Debug, Clone, PartialEq, Eq, Hash)]
116pub struct Token {
117 pub content: InternedStr,
118 pub span: TextSpan,
119}
120
121impl Token {
122 pub(crate) fn size_hint(&self) -> usize {
125 self.content.deref().len()
126 }
127}
128
129#[derive(Clone)]
135pub struct InternedStr {
136 ptr: *const str,
137 _bump: Rc<BumpWrap>,
139}
140
141impl Debug for InternedStr {
142 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143 f.write_char('"')?;
144 f.write_str(self.as_ref())?;
145 f.write_char('"')
146 }
147}
148
149impl InternedStr {
150 #[allow(unknown_lints)]
151 #[allow(private_interfaces)]
152 #[doc(hidden)]
153 pub(crate) fn new_in(s: &str, bump: Rc<BumpWrap>) -> Self {
154 let allocated = bump.0.alloc_str(s);
155 let ptr = allocated as *const str;
156 Self { ptr, _bump: bump }
157 }
158}
159
160impl AsRef<str> for InternedStr {
161 fn as_ref(&self) -> &str {
162 self.deref()
163 }
164}
165
166impl Deref for InternedStr {
167 type Target = str;
168
169 fn deref(&self) -> &Self::Target {
170 unsafe { &*self.ptr }
171 }
172}
173
174#[cfg(feature = "serde")]
175impl serde::Serialize for InternedStr {
176 fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
177 s.serialize_str(self.as_ref())
178 }
179}
180
181impl PartialEq for InternedStr {
182 fn eq(&self, other: &Self) -> bool {
183 self.as_ref().eq(other.as_ref())
184 }
185}
186
187impl Eq for InternedStr {}
188
189impl Hash for InternedStr {
190 fn hash<H: Hasher>(&self, state: &mut H) {
191 self.as_ref().hash(state);
192 }
193}
194
195#[derive(Debug)]
197pub(crate) struct BumpWrap(pub Bump);
198
199impl Drop for BumpWrap {
200 fn drop(&mut self) {
201 self.0.reset();
202 }
203}
204
205#[derive(Clone)]
208pub struct AllocationContext {
209 bump: Rc<BumpWrap>,
210}
211
212impl AllocationContext {
213 pub fn with_capacity(size_hint: usize) -> Self {
215 Self {
216 bump: Rc::new(BumpWrap(Bump::with_capacity(size_hint))),
217 }
218 }
219
220 pub(crate) fn intern(&self, value: &str) -> InternedStr {
225 InternedStr::new_in(value, self.bump.clone())
226 }
227}
228
229impl Default for AllocationContext {
230 fn default() -> Self {
231 Self {
232 bump: Rc::new(BumpWrap(Bump::new())),
233 }
234 }
235}
236
237#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
241#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
242pub struct TokenStreamMetadata {
243 pub original_file_path: Option<String>,
245 pub file_id: Option<String>,
249 pub edition: Option<String>,
251}
252
253impl TokenStream {
254 pub fn new(tokens: Vec<TokenTree>) -> Self {
256 Self {
257 tokens,
258 metadata: TokenStreamMetadata::default(),
259 }
260 }
261
262 pub fn empty() -> Self {
264 Self::new(Vec::default())
265 }
266
267 #[doc(hidden)]
268 pub fn with_metadata(mut self, metadata: TokenStreamMetadata) -> Self {
269 self.metadata = metadata;
270 self
271 }
272
273 pub fn metadata(&self) -> &TokenStreamMetadata {
277 &self.metadata
278 }
279
280 pub fn is_empty(&self) -> bool {
282 self.tokens.is_empty()
283 }
284
285 pub fn from_primitive_token_stream(
286 stable_token_stream: impl Iterator<Item = PrimitiveToken>,
287 ) -> Self {
288 Self::new(
289 stable_token_stream
290 .map(|stable_token| {
291 TokenTree::Ident(Token::new(
292 stable_token.content,
293 stable_token
294 .span
295 .map(|stable_span| TextSpan {
296 start: stable_span.start as u32,
297 end: stable_span.end as u32,
298 })
299 .unwrap_or(TextSpan::call_site()),
300 ))
301 })
302 .collect(),
303 )
304 }
305
306 pub fn push_token(&mut self, token_tree: TokenTree) {
307 self.tokens.push(token_tree);
308 }
309}
310
311impl IntoIterator for TokenStream {
312 type Item = TokenTree;
313 type IntoIter = IntoIter<TokenTree>;
314
315 fn into_iter(self) -> Self::IntoIter {
316 self.tokens.into_iter()
317 }
318}
319
320impl Extend<TokenTree> for TokenStream {
321 fn extend<T: IntoIterator<Item = TokenTree>>(&mut self, iter: T) {
322 self.tokens.extend(iter);
323 }
324}
325
326impl Extend<TokenStream> for TokenStream {
327 fn extend<T: IntoIterator<Item = TokenStream>>(&mut self, iter: T) {
328 iter.into_iter()
329 .for_each(|token_stream| self.extend(token_stream));
330 }
331}
332
333impl Display for TokenStream {
334 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
335 for token in &self.tokens {
336 match token {
337 TokenTree::Ident(token) => {
338 write!(f, "{}", token.content.as_ref())?;
339 }
340 }
341 }
342 Ok(())
343 }
344}
345
346impl TokenStreamMetadata {
347 #[doc(hidden)]
348 pub fn new(file_path: impl ToString, file_id: impl ToString, edition: impl ToString) -> Self {
349 Self {
350 original_file_path: Some(file_path.to_string()),
351 file_id: Some(file_id.to_string()),
352 edition: Some(edition.to_string()),
353 }
354 }
355}
356
357impl TokenTree {
358 pub fn from_ident(token: Token) -> Self {
360 Self::Ident(token)
361 }
362}
363
364impl TextSpan {
365 pub fn new(start: TextOffset, end: TextOffset) -> TextSpan {
367 TextSpan { start, end }
368 }
369
370 pub fn call_site() -> Self {
374 CALL_SITE.with(|call_site| {
375 let call_site = call_site.borrow();
376 Self::new(call_site.0, call_site.1)
377 })
378 }
379
380 pub fn start(self) -> Self {
382 Self::new(self.start, self.start)
383 }
384
385 pub fn end(self) -> Self {
387 Self::new(self.end, self.end)
388 }
389}
390
391impl Token {
392 pub fn new(content: impl AsRef<str>, span: TextSpan) -> Self {
394 CONTEXT.with(|ctx| {
395 let ctx_borrow = ctx.borrow();
396 let ctx: &AllocationContext = ctx_borrow.deref();
397 Self::new_in(content, span, ctx)
398 })
399 }
400
401 pub fn new_in(content: impl AsRef<str>, span: TextSpan, ctx: &AllocationContext) -> Self {
403 let content = ctx.intern(content.as_ref());
404 Self { content, span }
405 }
406}
407
408impl ToPrimitiveTokenStream for TokenStream {
409 type Iter = Map<IntoIter<TokenTree>, fn(TokenTree) -> PrimitiveToken>;
410 fn to_primitive_token_stream(&self) -> Self::Iter {
411 self.tokens
412 .clone()
413 .into_iter()
414 .map(|token_tree| match token_tree {
415 TokenTree::Ident(token) => PrimitiveToken::new(
416 token.content.to_string(),
417 Some(PrimitiveSpan {
418 start: token.span.start as usize,
419 end: token.span.end as usize,
420 }),
421 ),
422 })
423 }
424}
425
426impl ToPrimitiveTokenStream for TokenTree {
427 type Iter = Once<PrimitiveToken>;
428 fn to_primitive_token_stream(&self) -> Self::Iter {
429 once(match self {
430 TokenTree::Ident(token) => PrimitiveToken::new(
431 token.content.to_string(),
432 Some(PrimitiveSpan {
433 start: token.span.start as usize,
434 end: token.span.end as usize,
435 }),
436 ),
437 })
438 }
439}
440
441#[cfg(test)]
442mod test {
443 use crate::{AllocationContext, TextSpan, Token, TokenStream, TokenTree};
444
445 #[test]
446 pub fn can_serde_empty_token_stream() {
447 let original = TokenStream::empty();
448 let serialized = serde_json::to_string(&original).unwrap();
449 let derived: TokenStream = serde_json::from_str(serialized.as_str()).unwrap();
450 assert_eq!(original, derived);
451 let val: serde_json::Value = serde_json::from_str(serialized.as_str()).unwrap();
452 assert_eq!(
453 val,
454 serde_json::json!({
455 "tokens": [],
456 "metadata": {
457 "original_file_path": null,
458 "file_id": null,
459 "edition": null
460 }
461 })
462 );
463 }
464
465 #[test]
466 pub fn can_serde_token_stream() {
467 let ctx = AllocationContext::default();
468 let original = TokenStream::new(vec![
469 TokenTree::Ident(Token::new_in("first", TextSpan::new(0, 1), &ctx)),
470 TokenTree::Ident(Token::new_in("second", TextSpan::new(2, 3), &ctx)),
471 TokenTree::Ident(Token::new_in("third", TextSpan::new(4, 5), &ctx)),
472 TokenTree::Ident(Token::new_in("fourth", TextSpan::new(6, 7), &ctx)),
473 ]);
474 let serialized = serde_json::to_string(&original).unwrap();
475 let derived: TokenStream = serde_json::from_str(serialized.as_str()).unwrap();
476 assert_eq!(original, derived);
477 let val: serde_json::Value = serde_json::from_str(serialized.as_str()).unwrap();
478 assert_eq!(
479 val,
480 serde_json::json!({
481 "tokens": [
482 {"Ident": {"content": "first", "span": {"start": 0, "end": 1}}},
483 {"Ident": {"content": "second", "span": {"start": 2, "end": 3}}},
484 {"Ident": {"content": "third", "span": {"start": 4, "end": 5}}},
485 {"Ident": {"content": "fourth", "span": {"start": 6, "end": 7}}},
486 ],
487 "metadata": {
488 "original_file_path": null,
489 "file_id": null,
490 "edition": null
491 }
492 })
493 );
494 }
495
496 #[test]
497 pub fn token_stream_can_be_extended_with_token_stream() {
498 let mut first = TokenStream::new(vec![TokenTree::Ident(Token::new(
499 "first",
500 TextSpan::new(0, 1),
501 ))]);
502 let second = TokenStream::new(vec![TokenTree::Ident(Token::new(
503 "second",
504 TextSpan::new(2, 3),
505 ))]);
506 first.extend(second);
507 assert_eq!(
508 first.tokens,
509 vec![
510 TokenTree::Ident(Token::new("first", TextSpan::new(0, 1))),
511 TokenTree::Ident(Token::new("second", TextSpan::new(2, 3))),
512 ]
513 );
514 }
515
516 #[test]
517 pub fn token_stream_can_be_extended_with_vec_of_token_sterams() {
518 let mut first = TokenStream::new(vec![TokenTree::Ident(Token::new(
519 "first",
520 TextSpan::new(0, 1),
521 ))]);
522 let second = TokenStream::new(vec![TokenTree::Ident(Token::new(
523 "second",
524 TextSpan::new(2, 3),
525 ))]);
526 let third = TokenStream::new(vec![TokenTree::Ident(Token::new(
527 "third",
528 TextSpan::new(4, 5),
529 ))]);
530 first.extend(vec![second, third]);
531 assert_eq!(
532 first.tokens,
533 vec![
534 TokenTree::Ident(Token::new("first", TextSpan::new(0, 1))),
535 TokenTree::Ident(Token::new("second", TextSpan::new(2, 3))),
536 TokenTree::Ident(Token::new("third", TextSpan::new(4, 5))),
537 ]
538 );
539 }
540}