Skip to main content

veryl_parser/
veryl_token.rs

1use crate::doc_comment_table;
2use crate::resource_table::{self, PathId, StrId, TokenId};
3use crate::text_table::{self, TextId};
4use crate::veryl_grammar_trait::*;
5use once_cell::sync::Lazy;
6use paste::paste;
7use regex::Regex;
8use std::fmt;
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
11pub enum TokenSource {
12    File { path: PathId, text: TextId },
13    Builtin,
14    External,
15    Generated(PathId),
16}
17
18impl fmt::Display for TokenSource {
19    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
20        let text = match self {
21            TokenSource::File { path, .. } => path.to_string(),
22            TokenSource::Builtin => "builtin".to_string(),
23            TokenSource::External => "external".to_string(),
24            TokenSource::Generated(_) => "generated".to_string(),
25        };
26        text.fmt(f)
27    }
28}
29
30impl PartialEq<PathId> for TokenSource {
31    fn eq(&self, other: &PathId) -> bool {
32        match self {
33            TokenSource::File { path, .. } => path == other,
34            TokenSource::Generated(x) => x == other,
35            _ => false,
36        }
37    }
38}
39
40impl PartialEq<Option<PathId>> for TokenSource {
41    fn eq(&self, other: &Option<PathId>) -> bool {
42        match self {
43            TokenSource::File { path, .. } => Some(path) == other.as_ref(),
44            TokenSource::Generated(x) => Some(x) == other.as_ref(),
45            _ => false,
46        }
47    }
48}
49
50impl PartialOrd<PathId> for TokenSource {
51    fn partial_cmp(&self, other: &PathId) -> Option<std::cmp::Ordering> {
52        match self {
53            TokenSource::File { path, .. } => Some(path.cmp(other)),
54            TokenSource::Generated(x) => Some(x.cmp(other)),
55            _ => None,
56        }
57    }
58}
59
60impl TokenSource {
61    pub fn get_text(&self) -> String {
62        if let TokenSource::File { text, .. } = self {
63            if let Some(x) = text_table::get(*text) {
64                x.text
65            } else {
66                String::new()
67            }
68        } else {
69            String::new()
70        }
71    }
72
73    pub fn get_path(&self) -> Option<PathId> {
74        match self {
75            TokenSource::File { path, .. } => Some(*path),
76            TokenSource::Generated(x) => Some(*x),
77            _ => None,
78        }
79    }
80}
81
82#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
83pub struct Token {
84    pub id: TokenId,
85    pub text: StrId,
86    pub line: u32,
87    pub column: u32,
88    pub length: u32,
89    pub pos: u32,
90    pub source: TokenSource,
91}
92
93impl Token {
94    pub fn new(
95        text: &str,
96        line: u32,
97        column: u32,
98        length: u32,
99        pos: u32,
100        source: TokenSource,
101    ) -> Self {
102        let id = resource_table::new_token_id();
103        let text = resource_table::insert_str(text);
104        Token {
105            id,
106            text,
107            line,
108            column,
109            length,
110            pos,
111            source,
112        }
113    }
114
115    pub fn generate(text: StrId, path: PathId) -> Self {
116        let id = resource_table::new_token_id();
117        Token {
118            id,
119            text,
120            line: 0,
121            column: 0,
122            length: 0,
123            pos: 0,
124            source: TokenSource::Generated(path),
125        }
126    }
127
128    pub fn end_line(&self) -> u32 {
129        let text = self.to_string();
130        self.line + text.matches('\n').count() as u32
131    }
132
133    pub fn end_column(&self) -> u32 {
134        let text = self.to_string();
135        if text.matches('\n').count() > 0 {
136            text.split('\n')
137                .next_back()
138                .map(|x| x.len() as u32)
139                .unwrap()
140        } else {
141            self.column + self.length - 1
142        }
143    }
144}
145
146pub fn is_anonymous_text(text: StrId) -> bool {
147    let anonymous_id = resource_table::insert_str("_");
148    text == anonymous_id
149}
150
151pub fn is_anonymous_token(token: &Token) -> bool {
152    is_anonymous_text(token.text)
153}
154
155impl Default for Token {
156    fn default() -> Self {
157        Self::generate(StrId::default(), PathId::default())
158    }
159}
160
161impl fmt::Display for Token {
162    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
163        let text = format!("{}", self.text);
164        text.fmt(f)
165    }
166}
167
168impl<'t> TryFrom<&parol_runtime::lexer::Token<'t>> for Token {
169    type Error = anyhow::Error;
170    fn try_from(x: &parol_runtime::lexer::Token<'t>) -> Result<Self, anyhow::Error> {
171        let id = resource_table::new_token_id();
172        let text = resource_table::insert_str(x.text());
173        let pos = x.location.start;
174        let source = TokenSource::File {
175            path: resource_table::insert_path(&x.location.file_name),
176            text: text_table::get_current_text(),
177        };
178        Ok(Token {
179            id,
180            text,
181            line: x.location.start_line,
182            column: x.location.start_column,
183            length: x.location.len() as u32,
184            pos,
185            source,
186        })
187    }
188}
189
190impl From<&Token> for miette::SourceSpan {
191    fn from(x: &Token) -> Self {
192        (x.pos as usize, x.length as usize).into()
193    }
194}
195
196impl From<Token> for miette::SourceSpan {
197    fn from(x: Token) -> Self {
198        (x.pos as usize, x.length as usize).into()
199    }
200}
201
202#[derive(Debug, Clone)]
203pub struct VerylToken {
204    pub token: Token,
205    pub comments: Vec<Token>,
206}
207
208impl VerylToken {
209    pub fn new(token: Token) -> Self {
210        Self {
211            token,
212            comments: vec![],
213        }
214    }
215
216    pub fn replace(&self, text: &str) -> Self {
217        let length = text.len();
218        let text = resource_table::insert_str(text);
219        let mut ret = self.clone();
220        ret.token.text = text;
221        ret.token.length = length as u32;
222        ret
223    }
224
225    pub fn append(&self, prefix: &Option<String>, suffix: &Option<String>) -> Self {
226        let prefix_str = if let Some(x) = prefix { x.as_str() } else { "" };
227        let suffix_str = if let Some(x) = suffix { x.as_str() } else { "" };
228        let text = format!("{}{}{}", prefix_str, self.token.text, suffix_str);
229        let length = text.len();
230        let text = resource_table::insert_str(&text);
231        let mut ret = self.clone();
232        ret.token.text = text;
233        ret.token.length = length as u32;
234        ret
235    }
236
237    pub fn strip_prefix(&self, prefix: &str) -> Self {
238        let text = self.token.text.to_string();
239        if let Some(text) = text.strip_prefix(prefix) {
240            let length = text.len();
241            let text = resource_table::insert_str(text);
242            let mut ret = self.clone();
243            ret.token.text = text;
244            ret.token.length = length as u32;
245            ret
246        } else {
247            self.clone()
248        }
249    }
250}
251
252impl fmt::Display for VerylToken {
253    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
254        let text = format!("{}", self.token);
255        text.fmt(f)
256    }
257}
258
259impl ScopedIdentifier {
260    pub fn identifier(&self) -> &VerylToken {
261        match &*self.scoped_identifier_group {
262            ScopedIdentifierGroup::IdentifierScopedIdentifierOpt(x) => {
263                &x.identifier.identifier_token
264            }
265            ScopedIdentifierGroup::DollarIdentifier(x) => {
266                &x.dollar_identifier.dollar_identifier_token
267            }
268        }
269    }
270}
271
272impl ExpressionIdentifier {
273    pub fn identifier(&self) -> &VerylToken {
274        self.scoped_identifier.identifier()
275    }
276}
277
278static COMMENT_REGEX: Lazy<Regex> =
279    Lazy::new(|| Regex::new(r"((?://.*(?:\r\n|\r|\n|$))|(?:(?ms)/\u{2a}.*?\u{2a}/))").unwrap());
280
281fn split_comment_token(token: Token) -> Vec<Token> {
282    let mut line = token.line;
283    let mut column = token.column;
284    let text = resource_table::get_str_value(token.text).unwrap();
285
286    let mut prev_pos = 0;
287    let mut ret = Vec::new();
288    for cap in COMMENT_REGEX.captures_iter(&text) {
289        let cap = cap.get(0).unwrap();
290        let pos = cap.start();
291        let length = (cap.end() - pos) as u32;
292
293        let prev_text = &text[prev_pos..(pos)];
294        let n_lines = prev_text.matches('\n').count() as u32;
295        line += n_lines;
296
297        column = if n_lines == 0 {
298            column + prev_text.len() as u32
299        } else {
300            (prev_text.len() - prev_text.rfind('\n').unwrap_or(0)) as u32
301        };
302
303        prev_pos = pos;
304
305        let id = resource_table::new_token_id();
306        let text = &text[pos..pos + length as usize];
307        let is_doc_comment = text.starts_with("///");
308        let text = resource_table::insert_str(text);
309
310        if is_doc_comment && let TokenSource::File { path, .. } = token.source {
311            doc_comment_table::insert(path, line, text);
312        }
313
314        let token = Token {
315            id,
316            text,
317            line,
318            column,
319            length,
320            pos: pos as u32 + length,
321            source: token.source,
322        };
323        ret.push(token);
324    }
325    ret
326}
327
328impl TryFrom<&StartToken> for VerylToken {
329    type Error = anyhow::Error;
330
331    fn try_from(x: &StartToken) -> Result<Self, anyhow::Error> {
332        let mut comments = Vec::new();
333        if let Some(ref x) = x.comments.comments_opt {
334            let mut tokens = split_comment_token(x.comments_term.comments_term);
335            comments.append(&mut tokens)
336        }
337        let id = resource_table::new_token_id();
338        let text = resource_table::insert_str("");
339        let source = TokenSource::Builtin;
340        let token = Token {
341            id,
342            text,
343            line: 1,
344            column: 1,
345            length: 0,
346            pos: 0,
347            source,
348        };
349        Ok(VerylToken { token, comments })
350    }
351}
352
353macro_rules! token_with_comments {
354    ($x:ident) => {
355        paste! {
356            impl TryFrom<&[<$x Token>]> for VerylToken {
357                type Error = anyhow::Error;
358
359                fn try_from(x: &[<$x Token>]) -> Result<Self, anyhow::Error> {
360                    let mut comments = Vec::new();
361                    if let Some(ref x) = x.comments.comments_opt {
362                        let mut tokens = split_comment_token(x.comments_term.comments_term);
363                        comments.append(&mut tokens)
364                    }
365                    Ok(VerylToken {
366                        token: x.[<$x:snake _term>].clone(),
367                        comments,
368                    })
369                }
370            }
371            impl TryFrom<&[<$x Term>]> for Token {
372                type Error = anyhow::Error;
373
374                fn try_from(x: &[<$x Term>]) -> Result<Self, anyhow::Error> {
375                    Ok(Token {
376                        id: x.[<$x:snake _term>].id,
377                        text: x.[<$x:snake _term>].text,
378                        line: x.[<$x:snake _term>].line,
379                        column: x.[<$x:snake _term>].column,
380                        length: x.[<$x:snake _term>].length,
381                        pos: x.[<$x:snake _term>].pos,
382                        source: x.[<$x:snake _term>].source,
383                    })
384                }
385            }
386        }
387    };
388}
389
390macro_rules! token_without_comments {
391    ($x:ident, $y:ident) => {
392        paste! {
393            impl TryFrom<&[<$x Token>]> for VerylToken {
394                type Error = anyhow::Error;
395
396                fn try_from(x: &[<$x Token>]) -> Result<Self, anyhow::Error> {
397                    Ok(VerylToken {
398                        token: x.[<$y:snake _term>].clone(),
399                        comments: Vec::new(),
400                    })
401                }
402            }
403        }
404    };
405    ($x:ident) => {
406        paste! {
407            impl TryFrom<&[<$x Token>]> for VerylToken {
408                type Error = anyhow::Error;
409
410                fn try_from(x: &[<$x Token>]) -> Result<Self, anyhow::Error> {
411                    Ok(VerylToken {
412                        token: x.[<$x:snake _term>].clone(),
413                        comments: Vec::new(),
414                    })
415                }
416            }
417            impl TryFrom<&[<$x Term>]> for Token {
418                type Error = anyhow::Error;
419
420                fn try_from(x: &[<$x Term>]) -> Result<Self, anyhow::Error> {
421                    Ok(Token {
422                        id: x.[<$x:snake _term>].id,
423                        text: x.[<$x:snake _term>].text,
424                        line: x.[<$x:snake _term>].line,
425                        column: x.[<$x:snake _term>].column,
426                        length: x.[<$x:snake _term>].length,
427                        pos: x.[<$x:snake _term>].pos,
428                        source: x.[<$x:snake _term>].source,
429                    })
430                }
431            }
432        }
433    };
434}
435
436token_with_comments!(StringLiteral);
437
438token_with_comments!(FixedPoint);
439token_with_comments!(Exponent);
440token_with_comments!(Based);
441token_with_comments!(BaseLess);
442token_with_comments!(AllBit);
443
444token_with_comments!(Colon);
445token_with_comments!(ColonColon);
446token_with_comments!(ColonColonLAngle);
447token_with_comments!(Comma);
448token_with_comments!(DotDot);
449token_with_comments!(DotDotEqu);
450token_with_comments!(Dot);
451token_with_comments!(Equ);
452token_with_comments!(HashLBracket);
453token_with_comments!(Hash);
454token_with_comments!(Question);
455token_with_comments!(Quote);
456token_with_comments!(QuoteLBrace);
457token_with_comments!(LAngle);
458token_without_comments!(EmbedLBrace, LBrace);
459token_without_comments!(EscapedLBrace);
460token_without_comments!(TripleLBrace);
461token_with_comments!(LBrace);
462token_with_comments!(LBracket);
463token_with_comments!(LParen);
464token_with_comments!(LTMinus);
465token_with_comments!(MinusColon);
466token_with_comments!(MinusGT);
467token_with_comments!(PlusColon);
468token_with_comments!(RAngle);
469token_without_comments!(EmbedRBrace, RBrace);
470token_without_comments!(EscapedRBrace);
471token_with_comments!(TripleRBrace);
472token_with_comments!(RBrace);
473token_with_comments!(RBracket);
474token_with_comments!(RParen);
475token_with_comments!(Semicolon);
476token_with_comments!(Star);
477
478token_with_comments!(AssignmentOperator);
479token_with_comments!(DiamondOperator);
480token_with_comments!(Operator01);
481token_with_comments!(Operator02);
482token_with_comments!(Operator03);
483token_with_comments!(Operator04);
484token_with_comments!(Operator05);
485token_with_comments!(Operator06);
486token_with_comments!(Operator07);
487token_with_comments!(Operator08);
488token_with_comments!(UnaryOperator);
489
490token_with_comments!(Alias);
491token_with_comments!(AlwaysComb);
492token_with_comments!(AlwaysFf);
493token_with_comments!(As);
494token_with_comments!(Assign);
495token_with_comments!(Bind);
496token_with_comments!(Bit);
497token_with_comments!(Block);
498token_with_comments!(BBool);
499token_with_comments!(LBool);
500token_with_comments!(Break);
501token_with_comments!(Case);
502token_with_comments!(Clock);
503token_with_comments!(ClockPosedge);
504token_with_comments!(ClockNegedge);
505token_with_comments!(Connect);
506token_with_comments!(Const);
507token_with_comments!(Converse);
508token_with_comments!(Default);
509token_with_comments!(Else);
510token_with_comments!(Embed);
511token_with_comments!(Enum);
512token_with_comments!(F32);
513token_with_comments!(F64);
514token_with_comments!(False);
515token_with_comments!(Final);
516token_with_comments!(For);
517token_with_comments!(Function);
518token_with_comments!(I8);
519token_with_comments!(I16);
520token_with_comments!(I32);
521token_with_comments!(I64);
522token_with_comments!(If);
523token_with_comments!(IfReset);
524token_with_comments!(Import);
525token_with_comments!(Include);
526token_with_comments!(Initial);
527token_with_comments!(Inout);
528token_with_comments!(Input);
529token_with_comments!(Inside);
530token_with_comments!(Inst);
531token_with_comments!(Interface);
532token_with_comments!(In);
533token_with_comments!(Let);
534token_with_comments!(Logic);
535token_with_comments!(Lsb);
536token_with_comments!(Modport);
537token_with_comments!(Module);
538token_with_comments!(Msb);
539token_with_comments!(Output);
540token_with_comments!(Outside);
541token_with_comments!(Package);
542token_with_comments!(Param);
543token_with_comments!(Proto);
544token_with_comments!(Pub);
545token_with_comments!(Repeat);
546token_with_comments!(Reset);
547token_with_comments!(ResetAsyncHigh);
548token_with_comments!(ResetAsyncLow);
549token_with_comments!(ResetSyncHigh);
550token_with_comments!(ResetSyncLow);
551token_with_comments!(Return);
552token_with_comments!(Rev);
553token_with_comments!(Same);
554token_with_comments!(Signed);
555token_with_comments!(Step);
556token_with_comments!(String);
557token_with_comments!(Struct);
558token_with_comments!(Switch);
559token_with_comments!(Tri);
560token_with_comments!(True);
561token_with_comments!(Type);
562token_with_comments!(P8);
563token_with_comments!(P16);
564token_with_comments!(P32);
565token_with_comments!(P64);
566token_with_comments!(U8);
567token_with_comments!(U16);
568token_with_comments!(U32);
569token_with_comments!(U64);
570token_with_comments!(Union);
571token_with_comments!(Unsafe);
572token_with_comments!(Var);
573
574token_with_comments!(DollarIdentifier);
575token_with_comments!(Identifier);
576
577token_without_comments!(Any);