vbscript/lexer/
token.rs

1use std::fmt;
2use std::ops::{Index, Range};
3
4#[derive(Eq, PartialEq, Copy, Clone, Hash)]
5pub struct Token {
6    pub kind: TokenKind,
7    pub span: Span,
8    /// 1-indexed, 0 means unknown
9    pub line: usize,
10    /// 1-indexed, 0 means unknown
11    pub column: usize,
12}
13
14impl Token {
15    pub fn error(p0: Range<i32>, line: usize, column: usize) -> Token {
16        Token {
17            kind: TokenKind::ParseError,
18            span: Span {
19                start: p0.start as u32,
20                end: p0.end as u32,
21            },
22            line,
23            column,
24        }
25    }
26    pub fn eof(p0: Range<i32>, line: usize, column: usize) -> Token {
27        Token {
28            kind: TokenKind::Eof,
29            span: Span {
30                start: p0.start as u32,
31                end: p0.end as u32,
32            },
33            line,
34            column,
35        }
36    }
37
38    pub fn ident(p1: Range<i32>, line: usize, column: usize) -> Token {
39        Token {
40            kind: TokenKind::Identifier,
41            span: Span {
42                start: p1.start as u32,
43                end: p1.end as u32,
44            },
45            line,
46            column,
47        }
48    }
49}
50
51impl Token {
52    pub fn is_empty(&self) -> bool {
53        self.kind == TokenKind::Eof
54    }
55
56    pub fn len(&self) -> usize {
57        (self.span.end - self.span.start) as usize
58    }
59
60    pub fn text<'input>(&self, input: &'input str) -> &'input str {
61        &input[self.span]
62    }
63}
64
65impl fmt::Debug for Token {
66    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
67        write!(
68            f,
69            "{:?} - line {}, col {} <{} - {}>",
70            self.kind, self.line, self.column, self.span.start, self.span.end
71        )
72    }
73}
74
75impl fmt::Display for Token {
76    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
77        write!(f, "{}", self.kind)
78    }
79}
80
81#[derive(Eq, PartialEq, Clone, Copy, Hash, Default, Debug)]
82pub struct Span {
83    /// inclusive
84    pub start: u32,
85    /// exclusive
86    pub end: u32,
87}
88
89impl Span {
90    pub(crate) fn len(&self) -> u32 {
91        self.end - self.start
92    }
93}
94
95impl From<Span> for Range<usize> {
96    fn from(span: Span) -> Self {
97        span.start as usize..span.end as usize
98    }
99}
100
101impl From<Range<usize>> for Span {
102    fn from(range: Range<usize>) -> Self {
103        Self {
104            start: range.start as u32,
105            end: range.end as u32,
106        }
107    }
108}
109
110impl Index<Span> for str {
111    type Output = str;
112
113    fn index(&self, index: Span) -> &Self::Output {
114        &self[Range::<usize>::from(index)]
115    }
116}
117
118#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
119pub enum TokenKind {
120    // Single characters
121    Plus,
122    Minus,
123    Times,
124    /// Regular division
125    Slash,
126    /// Integer division
127    Backslash,
128    Pow,
129    Eq,
130    Dot,
131    DotSuffix,
132    Comma,
133    Ampersand,
134    Colon,
135    SemiColon,
136    // Brackets
137    LAngle,
138    RAngle,
139    LParen,
140    RParen,
141    // Multiple characters
142    Option,
143    Comment,
144    Identifier,
145    // Literals
146    // see
147    Integer,
148    HexInteger,
149    OctalInteger,
150    Real,
151    String,
152    DateTime,
153    // Keywords
154    KeywordMod,
155    KeywordConst,
156    KeywordDim,
157    KeywordRedim,
158    KeywordSub,
159    KeywordFunction,
160    KeywordIf,
161    KeywordThen,
162    KeywordElse,
163    KeywordElseIf,
164    KeywordSet,
165    KeywordEnd,
166    KeywordClass,
167    KeywordProperty,
168    KeywordPublic,
169    KeywordPrivate,
170    KeywordPreserve,
171    KeywordDefault,
172    KeywordGet,
173    KeywordLet,
174    KeywordAs,
175    KeywordByVal,
176    KeywordByRef,
177    KeywordNew,
178    KeywordReturn,
179    KeywordFor,
180    KeywordEach,
181    KeywordIn,
182    KeywordTo,
183    KeywordStep,
184    KeywordNext,
185    KeywordWhile,
186    KeywordWend,
187    KeywordDo,
188    KeywordLoop,
189    KeywordUntil,
190    KeywordWith,
191    KeywordSelect,
192    KeywordCase,
193    KeywordCall,
194    KeywordExit,
195    KeywordMe,
196    /// https://www.vbsedit.com/html/3ff21ea0-54e5-4f95-9c77-7f2d02977463.asp
197    /// The Stop statement suspends execution, similar to setting a breakpoint in the code.
198    KeywordStop,
199    /// Any keywords that are reserved but not used, like "As"
200    /// see https://docs.microsoft.com/en-us/dotnet/visual-basic/language-reference/keywords/reserved-keywords
201    KeywordUnused,
202    //Special values
203    Empty,
204    Null,
205    Nothing,
206    True,
207    False,
208    // Logical operators
209    Not,
210    And,
211    Or,
212    Xor,
213    Eqv,
214    Imp,
215    // Comparison operators
216    // = is shared with assignment
217    Neq,
218    Geq,
219    Leq,
220    Is,
221    // Error handling
222    On,
223    Error,
224    Resume,
225    // Next is already defined as a keyword
226    Goto,
227
228    // Misc,
229    Whitespace,
230    /// CRLF, LF (or CR)
231    Newline,
232    LineContinuation,
233    Eof,
234    /// We found something that we can't tokenize
235    ParseError,
236}
237
238impl TokenKind {
239    pub(crate) fn is_ending_expression(&self) -> bool {
240        matches!(
241            self,
242            Self::Eof
243                | Self::KeywordEnd
244                | Self::KeywordWend
245                | Self::KeywordLoop
246                | Self::KeywordNext
247                | Self::KeywordUntil
248                | Self::KeywordThen
249                | Self::KeywordTo
250                | Self::KeywordStep
251                | Self::KeywordElse
252                | Self::Newline
253                | Self::Colon
254                | Self::Comment
255        )
256    }
257}
258
259#[macro_export]
260macro_rules! T {
261    // Single characters
262    [.] => {
263        $crate::lexer::TokenKind::Dot
264    };
265    [_.] => {
266        $crate::lexer::TokenKind::DotSuffix
267    };
268    [+] => {
269        $crate::lexer::TokenKind::Plus
270    };
271    [-] => {
272        $crate::lexer::TokenKind::Minus
273    };
274    [*] => {
275        $crate::lexer::TokenKind::Times
276    };
277    [/] => {
278        $crate::lexer::TokenKind::Slash
279    };
280    ['\\'] => {
281        $crate::lexer::TokenKind::Backslash
282    };
283    [mod] => {
284        $crate::lexer::TokenKind::KeywordMod
285    };
286    [^] => {
287        $crate::lexer::TokenKind::Pow
288    };
289    [=] => {
290        $crate::lexer::TokenKind::Eq
291    };
292    [,] => {
293        $crate::lexer::TokenKind::Comma
294    };
295    [!] => {
296        $crate::lexer::TokenKind::Bang
297    };
298    [&] => {
299        $crate::lexer::TokenKind::Ampersand
300    };
301    [:] => {
302        $crate::lexer::TokenKind::Colon
303    };
304    [;] => {
305        $crate::lexer::TokenKind::SemiColon
306    };
307    [<] => {
308        $crate::lexer::TokenKind::LAngle
309    };
310    [>] => {
311        $crate::lexer::TokenKind::RAngle
312    };
313    ['['] => {
314        $crate::lexer::TokenKind::LSquare
315    };
316    [']'] => {
317        $crate::lexer::TokenKind::RSquare
318    };
319    ['('] => {
320        $crate::lexer::TokenKind::LParen
321    };
322    [')'] => {
323        $crate::lexer::TokenKind::RParen
324    };
325    [option] => {
326        $crate::lexer::TokenKind::Option
327    };
328    [comment] => {
329        $crate::lexer::TokenKind::Comment
330    };
331    // literals
332    [integer_literal] => {
333        $crate::lexer::TokenKind::Integer
334    };
335    [hex_integer_literal] => {
336        $crate::lexer::TokenKind::HexInteger
337    };
338    [octal_integer_literal] => {
339        $crate::lexer::TokenKind::OctalInteger
340    };
341    [real_literal] => {
342        $crate::lexer::TokenKind::Real
343    };
344    [string_literal] => {
345        $crate::lexer::TokenKind::String
346    };
347    [date_time_literal] => {
348        $crate::lexer::TokenKind::DateTime
349    };
350    // Keywords
351    [ident] => {
352        $crate::lexer::TokenKind::Identifier
353    };
354    [const] => {
355        $crate::lexer::TokenKind::KeywordConst
356    };
357    [dim] => {
358        $crate::lexer::TokenKind::KeywordDim
359    };
360    [redim] => {
361        $crate::lexer::TokenKind::KeywordRedim
362    };
363    [set] => {
364        $crate::lexer::TokenKind::KeywordSet
365    };
366    [let] => {
367        $crate::lexer::TokenKind::KeywordLet
368    };
369    [get] => {
370        $crate::lexer::TokenKind::KeywordGet
371    };
372    [sub] => {
373        $crate::lexer::TokenKind::KeywordSub
374    };
375    [function] => {
376        $crate::lexer::TokenKind::KeywordFunction
377    };
378    [byval] => {
379        $crate::lexer::TokenKind::KeywordByVal
380    };
381    [byref] => {
382        $crate::lexer::TokenKind::KeywordByRef
383    };
384    [call] => {
385        $crate::lexer::TokenKind::KeywordCall
386    };
387    [class] => {
388        $crate::lexer::TokenKind::KeywordClass
389    };
390    [property] => {
391        $crate::lexer::TokenKind::KeywordProperty
392    };
393    [public] => {
394        $crate::lexer::TokenKind::KeywordPublic
395    };
396    [private] => {
397        $crate::lexer::TokenKind::KeywordPrivate
398    };
399    [preserve] => {
400        $crate::lexer::TokenKind::KeywordPreserve
401    };
402    [default] => {
403        $crate::lexer::TokenKind::KeywordDefault
404    };
405    [as] => {
406        $crate::lexer::TokenKind::KeywordAs
407    };
408    [new] => {
409        $crate::lexer::TokenKind::KeywordNew
410    };
411    [return] => {
412        $crate::lexer::TokenKind::KeywordReturn
413    };
414    [for] => {
415        $crate::lexer::TokenKind::KeywordFor
416    };
417    [each] => {
418        $crate::lexer::TokenKind::KeywordEach
419    };
420    [in] => {
421        $crate::lexer::TokenKind::KeywordIn
422    };
423    [to] => {
424        $crate::lexer::TokenKind::KeywordTo
425    };
426    [step] => {
427        $crate::lexer::TokenKind::KeywordStep
428    };
429    [next] => {
430        $crate::lexer::TokenKind::KeywordNext
431    };
432    [while] => {
433        $crate::lexer::TokenKind::KeywordWhile
434    };
435    [wend] => {
436        $crate::lexer::TokenKind::KeywordWend
437    };
438    [do] => {
439        $crate::lexer::TokenKind::KeywordDo
440    };
441    [loop] => {
442        $crate::lexer::TokenKind::KeywordLoop
443    };
444    [until] => {
445        $crate::lexer::TokenKind::KeywordUntil
446    };
447    [with] => {
448        $crate::lexer::TokenKind::KeywordWith
449    };
450    [select] => {
451        $crate::lexer::TokenKind::KeywordSelect
452    };
453    [case] => {
454        $crate::lexer::TokenKind::KeywordCase
455    };
456    [if] => {
457        $crate::lexer::TokenKind::KeywordIf
458    };
459    [then] => {
460        $crate::lexer::TokenKind::KeywordThen
461    };
462    [else] => {
463        $crate::lexer::TokenKind::KeywordElse
464    };
465    [elseif] => {
466        $crate::lexer::TokenKind::KeywordElseIf
467    };
468    [end] => {
469        $crate::lexer::TokenKind::KeywordEnd
470    };
471    [exit] => {
472        $crate::lexer::TokenKind::KeywordExit
473    };
474    [me] => {
475        $crate::lexer::TokenKind::KeywordMe
476    };
477    [unused] => {
478        $crate::lexer::TokenKind::KeywordUnused
479    };
480    // Special values
481    [empty] => {
482        $crate::lexer::TokenKind::Empty
483    };
484    [null] => {
485        $crate::lexer::TokenKind::Null
486    };
487    [nothing] => {
488        $crate::lexer::TokenKind::Nothing
489    };
490    [true] => {
491        $crate::lexer::TokenKind::True
492    };
493    [false] => {
494        $crate::lexer::TokenKind::False
495    };
496    // Logical operators
497    [not] => {
498        $crate::lexer::TokenKind::Not
499    };
500    [and] => {
501        $crate::lexer::TokenKind::And
502    };
503    [or] => {
504        $crate::lexer::TokenKind::Or
505    };
506    [xor] => {
507        $crate::lexer::TokenKind::Xor
508    };
509    [eqv] => {
510        $crate::lexer::TokenKind::Eqv
511    };
512    [imp] => {
513        $crate::lexer::TokenKind::Imp
514    };
515    // Comparison operators
516    [<>] => {
517        $crate::lexer::TokenKind::Neq
518    };
519    [>=] => {
520        $crate::lexer::TokenKind::Geq
521    };
522    [<=] => {
523        $crate::lexer::TokenKind::Leq
524    };
525    [is] => {
526        $crate::lexer::TokenKind::Is
527    };
528    // Error handling
529    [error] => {
530        $crate::lexer::TokenKind::Error
531    };
532    [resume] => {
533        $crate::lexer::TokenKind::Resume
534    };
535    [goto] => {
536        $crate::lexer::TokenKind::Goto
537    };
538    [on] => {
539        $crate::lexer::TokenKind::On
540    };
541    // Misc
542    [stop] => {
543        $crate::lexer::TokenKind::KeywordStop
544    };
545    [ws] => {
546        $crate::lexer::TokenKind::Whitespace
547    };
548    [nl] => {
549        $crate::lexer::TokenKind::Newline
550    };
551    [line_continuation] => {
552        $crate::lexer::TokenKind::LineContinuation
553    };
554    [EOF] => {
555        $crate::lexer::TokenKind::Eof
556    };
557    [parse_error] => {
558        $crate::lexer::TokenKind::ParseError
559    };
560}
561
562impl fmt::Display for TokenKind {
563    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
564        write!(
565            f,
566            "{}",
567            match self {
568                // Single characters
569                T![.] => ".",
570                T![_.] => "_.",
571                T![+] => "+",
572                T![-] => "-",
573                T![*] => "*",
574                T![/] => "/",
575                T!['\\'] => "\\",
576                T![^] => "^",
577                T![=] => "=",
578                T![,] => ",",
579                T![&] => "&",
580                T![:] => ":",
581                T![;] => ";",
582                // Brackets
583                T![<] => "<",
584                T![>] => ">",
585                T!['('] => "(",
586                T![')'] => ")",
587                // Multiple characters
588                T![mod] => "mod",
589                T![me] => "me",
590                T![option] => "option",
591                T![comment] => "' comment",
592                // literals
593                T![integer_literal] => "integer_literal",
594                T![hex_integer_literal] => "hex_integer_literal",
595                T![octal_integer_literal] => "octal_integer_literal",
596                T![real_literal] => "real_literal",
597                T![string_literal] => "string_literal",
598                T![date_time_literal] => "date_time_literal",
599                // Keywords
600                T![ident] => "identifier",
601                T![const] => "const",
602                T![dim] => "dim",
603                T![redim] => "redim",
604                T![set] => "set",
605                T![let] => "let",
606                T![get] => "get",
607                T![sub] => "sub",
608                T![function] => "function",
609                T![byval] => "byval",
610                T![byref] => "byref",
611                T![class] => "class",
612                T![property] => "property",
613                T![public] => "public",
614                T![private] => "private",
615                T![preserve] => "preserve",
616                T![default] => "default",
617                T![call] => "call",
618                T![as] => "as",
619                T![new] => "new",
620                T![return] => "return",
621                T![for] => "for",
622                T![each] => "each",
623                T![in] => "in",
624                T![to] => "to",
625                T![step] => "step",
626                T![next] => "next",
627                T![while] => "while",
628                T![wend] => "wend",
629                T![do] => "do",
630                T![loop] => "loop",
631                T![until] => "until",
632                T![with] => "with",
633                T![select] => "select",
634                T![case] => "case",
635                T![if] => "if",
636                T![then] => "then",
637                T![elseif] => "elseif",
638                T![else] => "else",
639                T![end] => "end",
640                T![exit] => "exit",
641                // Special values
642                T![empty] => "empty",
643                T![null] => "null",
644                T![nothing] => "nothing",
645                T![true] => "true",
646                T![false] => "false",
647                // Logical operators
648                T![not] => "not",
649                T![and] => "and",
650                T![or] => "or",
651                T![xor] => "xor",
652                T![eqv] => "eqv",
653                T![imp] => "imp",
654                // Comparison operators
655                T![<>] => "<>",
656                T![>=] => ">=",
657                T![<=] => "<=",
658                T![is] => "is",
659                // Error handling
660                T![on] => "on",
661                T![error] => "error",
662                T![resume] => "resume",
663                T![goto] => "goto",
664                // Misc
665                T![stop] => "stop",
666                T![ws] => "<WS>",
667                T![nl] => "<NL>",
668                T![line_continuation] => "<_>",
669                T![EOF] => "<EOF>",
670                T![parse_error] => "<?>",
671                T![unused] => "<unused_keyword>",
672            }
673        )
674    }
675}
676
677#[cfg(test)]
678mod tests {
679    use pretty_assertions::assert_eq;
680
681    #[test]
682    fn token_kind_display() {
683        assert_eq!(T![+].to_string(), "+");
684        assert_eq!(T![<=].to_string(), "<=");
685        assert_eq!(T![dim].to_string(), "dim");
686        assert_eq!(T![parse_error].to_string(), "<?>");
687        assert_eq!(T![comment].to_string(), "' comment");
688    }
689}