bulloak_syntax/
tokenizer.rs

1//! Defines a scanner for bulloak trees that produces a token stream.
2
3use std::{borrow::Borrow, cell::Cell, fmt, result};
4
5use thiserror::Error;
6
7use crate::{
8    char::CharExt,
9    error::FrontendError,
10    span::{Position, Span},
11};
12
13type Result<T> = result::Result<T, Error>;
14
15/// An error that occurred while tokenizing a .tree string into a sequence of
16/// tokens.
17#[derive(Error, Clone, Debug, Eq, PartialEq)]
18pub struct Error {
19    /// The kind of error.
20    #[source]
21    kind: ErrorKind,
22    /// The original text that the tokenizer generated the error from. Every
23    /// span in an error is a valid range into this string.
24    text: String,
25    /// The span of this error.
26    span: Span,
27}
28
29impl FrontendError<ErrorKind> for Error {
30    /// Return the type of this error.
31    fn kind(&self) -> &ErrorKind {
32        &self.kind
33    }
34
35    /// The original text string in which this error occurred.
36    fn text(&self) -> &str {
37        &self.text
38    }
39
40    /// Return the span at which this error occurred.
41    fn span(&self) -> &Span {
42        &self.span
43    }
44}
45
46impl fmt::Display for Error {
47    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48        self.format_error(f)
49    }
50}
51
52/// The type of an error that occurred while tokenizing a tree.
53#[derive(Error, Clone, Debug, Eq, PartialEq)]
54#[non_exhaustive]
55pub enum ErrorKind {
56    /// Found an invalid identifier character.
57    #[error("invalid identifier: {0}")]
58    IdentifierCharInvalid(char),
59}
60
61/// `Token` represents a single unit of meaning in a .tree.
62///
63/// A token has a kind, a span, and a lexeme. The kind is
64/// the type of the token, the span is the range in which a
65/// token appears in the original text, and the lexeme is the
66/// text that the token represents.
67#[derive(PartialEq, Eq)]
68pub struct Token {
69    /// The type of the token.
70    pub kind: TokenKind,
71    /// The range in which a token appears in the original
72    /// text.
73    pub span: Span,
74    /// The literal characters that make up the token.
75    pub lexeme: String,
76}
77
78impl Token {
79    fn is_branch(&self) -> bool {
80        match self.kind {
81            TokenKind::Tee | TokenKind::Corner => true,
82            TokenKind::Word
83            | TokenKind::When
84            | TokenKind::Given
85            | TokenKind::It => false,
86        }
87    }
88}
89
90impl fmt::Debug for Token {
91    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
92        write!(f, "Token({:?}, {:?}, {:?})", self.kind, self.lexeme, self.span)
93    }
94}
95
96/// The type of a token.
97#[derive(Debug, PartialEq, Eq)]
98pub enum TokenKind {
99    /// A token representing the `├` character.
100    Tee,
101    /// A token representing the `└` character.
102    Corner,
103    /// A token representing a string.
104    ///
105    /// For example, in the text `foo bar`, both `foo` and `bar` are
106    /// `Word` tokens.
107    Word,
108    /// A token representing a `when` keyword.
109    When,
110    /// A token representing a `given` keyword.
111    Given,
112    /// A token representing an `it` keyword.
113    It,
114}
115
116impl From<&str> for TokenKind {
117    fn from(value: &str) -> Self {
118        match value.to_lowercase().as_str() {
119            "when" => TokenKind::When,
120            "it" => TokenKind::It,
121            "given" => TokenKind::Given,
122            _ => TokenKind::Word,
123        }
124    }
125}
126
127/// A tokenizer for .tree files.
128///
129/// This struct represents the state of the tokenizer. It is not
130/// tied to any particular input, while `TokenizerI` is.
131pub struct Tokenizer {
132    /// The current position of the tokenizer in the input.
133    ///
134    /// By default this is set to the start of the input.
135    pos: Cell<Position>,
136    /// When true, the tokenizer is in `identifier` mode.
137    ///
138    /// In `identifier` mode, the tokenizer will error if it encounters a
139    /// a character that is not a valid identifier character.
140    /// This is to prevent malformed names when emitting identifiers.
141    identifier_mode: Cell<bool>,
142}
143
144impl Default for Tokenizer {
145    fn default() -> Self {
146        Self::new()
147    }
148}
149
150impl Tokenizer {
151    /// Create a new tokenizer.
152    #[must_use]
153    pub const fn new() -> Self {
154        Self {
155            pos: Cell::new(Position::new(0, 1, 1)),
156            identifier_mode: Cell::new(false),
157        }
158    }
159
160    /// Tokenize the input .tree text.
161    ///
162    /// `tokenize` is the entry point of the Tokenizer.
163    /// It takes a string of .tree text and returns a vector of tokens.
164    pub fn tokenize(&mut self, text: &str) -> Result<Vec<Token>> {
165        TokenizerI::new(self, text).tokenize()
166    }
167
168    /// Reset the tokenizer's state.
169    fn reset(&self) {
170        self.pos.set(Position::new(0, 1, 1));
171        self.identifier_mode.set(false);
172    }
173}
174
175/// `TokenizerI` is the internal tokenizer implementation.
176struct TokenizerI<'s, T> {
177    /// The text being tokenized.
178    text: &'s str,
179    /// The tokenizer state.
180    tokenizer: T,
181}
182
183impl<'s, T: Borrow<Tokenizer>> TokenizerI<'s, T> {
184    /// Create an internal tokenizer from a tokenizer state holder
185    /// and the input text.
186    fn new(tokenizer: T, text: &'s str) -> Self {
187        Self { text, tokenizer }
188    }
189
190    /// Return a reference to the tokenizer state.
191    fn tokenizer(&self) -> &Tokenizer {
192        self.tokenizer.borrow()
193    }
194
195    /// Create a new error with the given span and error type.
196    fn error(&self, span: Span, kind: ErrorKind) -> Error {
197        Error { kind, text: self.text.to_owned(), span }
198    }
199
200    /// Return a reference to the text being parsed.
201    fn text(&self) -> &str {
202        self.text
203    }
204
205    /// Return the character at the current position of the tokenizer.
206    ///
207    /// This panics if the current position does not point to a valid char.
208    fn char(&self) -> char {
209        self.char_at(self.offset())
210    }
211
212    /// Return the character at the given position.
213    ///
214    /// This panics if the given position does not point to a valid char.
215    fn char_at(&self, i: usize) -> char {
216        self.text()[i..]
217            .chars()
218            .next()
219            .unwrap_or_else(|| panic!("expected char at offset {i}"))
220    }
221
222    /// Return the current offset of the tokenizer.
223    ///
224    /// The offset starts at `0` from the beginning of the tree.
225    fn offset(&self) -> usize {
226        self.tokenizer().pos.get().offset
227    }
228
229    /// Returns true if the next call to `next` would return false.
230    fn is_eof(&self) -> bool {
231        self.offset() == self.text().len()
232    }
233
234    /// Return the current position of the tokenizer, which includes the offset,
235    /// line and column.
236    fn pos(&self) -> Position {
237        self.tokenizer().pos.get()
238    }
239
240    /// Create a span at the current position of the tokenizer. Both the start
241    /// and end of the span are set.
242    fn span(&self) -> Span {
243        Span::splat(self.pos())
244    }
245
246    /// Peek at the next character in the input without advancing the tokenizer.
247    ///
248    /// If the input has been exhausted, then this returns `None`.
249    fn peek(&self) -> Option<char> {
250        if self.is_eof() {
251            return None;
252        }
253        self.text()[self.offset() + self.char().len_utf8()..].chars().next()
254    }
255
256    /// Enters identifier mode.
257    fn enter_identifier_mode(&self) {
258        self.tokenizer().identifier_mode.set(true);
259    }
260
261    /// Exits identifier mode.
262    fn exit_identifier_mode(&self) {
263        self.tokenizer().identifier_mode.set(false);
264    }
265
266    /// Returns true if the tokenizer is in identifier mode.
267    fn is_identifier_mode(&self) -> bool {
268        self.tokenizer().identifier_mode.get()
269    }
270
271    /// Returns the tokenizer to its default mode.
272    fn exit_mode(&self) {
273        if self.is_identifier_mode() {
274            self.exit_identifier_mode();
275        }
276    }
277
278    /// Advance the tokenizer by one character.
279    ///
280    /// If the input has been exhausted, then this returns `None`.
281    ///
282    /// This panics when the number of lines or columns does not fit `usize`.
283    fn bump(&self) -> Option<char> {
284        if self.is_eof() {
285            return None;
286        }
287        let Position { mut offset, mut line, mut column } = self.pos();
288
289        if self.char() == '\n' {
290            line = line + 1;
291            column = 1;
292        } else {
293            column = column + 1;
294        }
295
296        offset += self.char().len_utf8();
297        self.tokenizer().pos.set(Position { offset, line, column });
298        self.text()[self.offset()..].chars().next()
299    }
300
301    /// Tokenize the text.
302    pub(crate) fn tokenize(&self) -> Result<Vec<Token>> {
303        let mut tokens = Vec::new();
304        self.tokenizer().reset();
305
306        loop {
307            if self.is_eof() {
308                break;
309            }
310
311            match self.char() {
312                ' ' | '─' | '│' => {}
313                '\n' | '\t' | '\r' => {
314                    self.exit_mode();
315                }
316                '├' => tokens.push(Token {
317                    kind: TokenKind::Tee,
318                    span: self.span(),
319                    lexeme: "├".to_owned(),
320                }),
321                '└' => tokens.push(Token {
322                    kind: TokenKind::Corner,
323                    span: self.span(),
324                    lexeme: "└".to_owned(),
325                }),
326                // Comments start with `//`.
327                '/' if self.peek().is_some_and(|c| c == '/') => {
328                    self.exit_mode();
329                    self.scan_comments();
330                }
331                _ => {
332                    let token = self.scan_word()?;
333
334                    let last_is_branch =
335                        tokens.last().is_some_and(Token::is_branch);
336                    let is_condition = token.kind == TokenKind::When
337                        || token.kind == TokenKind::Given;
338                    if last_is_branch && is_condition {
339                        self.enter_identifier_mode();
340                    };
341
342                    tokens.push(token);
343                }
344            }
345
346            if self.bump().is_none() {
347                break;
348            }
349        }
350
351        Ok(tokens)
352    }
353
354    /// Discards all characters until the end of the line.
355    fn scan_comments(&self) {
356        loop {
357            match self.peek() {
358                Some('\n') | None => break,
359                Some(_) => self.bump(),
360            };
361        }
362    }
363
364    /// Consumes a word from the input.
365    ///
366    /// A word is defined as a sequence of characters that are not whitespace.
367    /// If the word is a keyword, then the appropriate token is returned.
368    /// Otherwise, a `Word` token is returned.
369    fn scan_word(&self) -> Result<Token> {
370        let mut lexeme = String::new();
371        let span_start = self.pos();
372
373        loop {
374            if self.is_identifier_mode() && !self.char().is_valid_identifier() {
375                let kind = ErrorKind::IdentifierCharInvalid(self.char());
376                let error = self.error(self.span(), kind);
377                return Err(error);
378            };
379
380            if self.peek().is_none()
381                || self.peek().is_some_and(char::is_whitespace)
382            {
383                lexeme.push(self.char());
384                let kind = TokenKind::from(lexeme.as_str());
385                let span = self.span().with_start(span_start);
386                return Ok(Token { kind, span, lexeme });
387            }
388
389            lexeme.push(self.char());
390            self.bump();
391        }
392    }
393}
394
395#[cfg(test)]
396mod tests {
397    use indoc::indoc;
398    use pretty_assertions::assert_eq;
399
400    use crate::{
401        span::Span,
402        test_utils::{p, s, TestError},
403        tokenizer::{
404            self, ErrorKind::IdentifierCharInvalid, Token, TokenKind, Tokenizer,
405        },
406    };
407
408    impl PartialEq<tokenizer::Error> for TestError<tokenizer::ErrorKind> {
409        fn eq(&self, other: &tokenizer::Error) -> bool {
410            self.span == other.span && self.kind == other.kind
411        }
412    }
413
414    impl PartialEq<TestError<tokenizer::ErrorKind>> for tokenizer::Error {
415        fn eq(&self, other: &TestError<tokenizer::ErrorKind>) -> bool {
416            self.span == other.span && self.kind == other.kind
417        }
418    }
419
420    fn e<K>(kind: K, span: Span) -> TestError<K> {
421        TestError { kind, span }
422    }
423
424    fn t(kind: TokenKind, lexeme: &str, span: Span) -> Token {
425        Token { kind, lexeme: lexeme.to_owned(), span }
426    }
427
428    fn tokenize(text: &str) -> tokenizer::Result<Vec<Token>> {
429        Tokenizer::new().tokenize(text)
430    }
431
432    #[test]
433    fn only_contract_name() -> anyhow::Result<()> {
434        let simple_name = String::from("Foo");
435        let starts_whitespace = String::from(" Foo");
436        let ends_whitespace = String::from("Foo ");
437
438        let mut tokenizer = Tokenizer::new();
439
440        assert_eq!(
441            tokenizer.tokenize(&simple_name)?,
442            vec![t(TokenKind::Word, "Foo", s(p(0, 1, 1), p(2, 1, 3)))]
443        );
444        assert_eq!(
445            tokenizer.tokenize(&starts_whitespace)?,
446            vec![t(TokenKind::Word, "Foo", s(p(1, 1, 2), p(3, 1, 4)))]
447        );
448        assert_eq!(
449            tokenizer.tokenize(&ends_whitespace)?,
450            vec![t(TokenKind::Word, "Foo", s(p(0, 1, 1), p(2, 1, 3)))]
451        );
452
453        Ok(())
454    }
455
456    #[test]
457    fn comments() {
458        let file_contents = String::from(indoc! {"
459            Foo_Test
460            └── when something bad happens // some comments
461               └── it should revert
462        "});
463
464        #[rustfmt::skip]
465        assert_eq!(
466            tokenize(&file_contents).unwrap(),
467            vec![
468                t(TokenKind::Word,   "Foo_Test",  s(p(0, 1, 1),   p(7, 1, 8))),
469                t(TokenKind::Corner, "└",         s(p(9, 2, 1),   p(9, 2, 1))),
470                t(TokenKind::When,   "when",      s(p(19, 2, 5),  p(22, 2, 8))),
471                t(TokenKind::Word,   "something", s(p(24, 2, 10), p(32, 2, 18))),
472                t(TokenKind::Word,   "bad",       s(p(34, 2, 20), p(36, 2, 22))),
473                t(TokenKind::Word,   "happens",   s(p(38, 2, 24), p(44, 2, 30))),
474                t(TokenKind::Corner, "└",         s(p(66, 3, 4),  p(66, 3, 4))),
475                t(TokenKind::It,     "it",        s(p(76, 3, 8),  p(77, 3, 9))),
476                t(TokenKind::Word,   "should",    s(p(79, 3, 11), p(84, 3, 16))),
477                t(TokenKind::Word,   "revert",    s(p(86, 3, 18), p(91, 3, 23))),
478            ]
479        );
480
481        let file_contents = String::from(indoc! {"
482            Foo_Test
483            └── when something bad happens
484               // some comments
485               └── it should revert
486        "});
487
488        #[rustfmt::skip]
489        assert_eq!(
490            tokenize(&file_contents).unwrap(),
491            vec![
492                t(TokenKind::Word,   "Foo_Test",  s(p(0, 1, 1),   p(7, 1, 8))),
493                t(TokenKind::Corner, "└",         s(p(9, 2, 1),   p(9, 2, 1))),
494                t(TokenKind::When,   "when",      s(p(19, 2, 5),  p(22, 2, 8))),
495                t(TokenKind::Word,   "something", s(p(24, 2, 10), p(32, 2, 18))),
496                t(TokenKind::Word,   "bad",       s(p(34, 2, 20), p(36, 2, 22))),
497                t(TokenKind::Word,   "happens",   s(p(38, 2, 24), p(44, 2, 30))),
498                t(TokenKind::Corner, "└",         s(p(69, 4, 4),  p(69, 4, 4))),
499                t(TokenKind::It,     "it",        s(p(79, 4, 8),  p(80, 4, 9))),
500                t(TokenKind::Word,   "should",    s(p(82, 4, 11), p(87, 4, 16))),
501                t(TokenKind::Word,   "revert",    s(p(89, 4, 18), p(94, 4, 23))),
502            ]
503        );
504    }
505
506    #[test]
507    fn invalid_characters() {
508        macro_rules! invalid_chars {
509            ($($char:literal => $input:expr => $pos:expr),* $(,)?) => {
510                $(
511                    let input = format!("foo\n└── {} identifier", $input);
512                    assert_eq!(
513                        tokenize(&input).unwrap_err(),
514                        e(IdentifierCharInvalid($char), s($pos, $pos))
515                    );
516                )*
517            };
518        }
519
520        invalid_chars! {
521            '|' => "when |weird"  => p(19, 2, 10),
522            '|' => "when w|eird"  => p(20, 2, 11),
523            '|' => "when weird|"  => p(24, 2, 15),
524            '.' => "when .weird"  => p(19, 2, 10),
525            ',' => "when w,eird"  => p(20, 2, 11),
526            '|' => "given |weird" => p(20, 2, 11),
527            '|' => "given w|eird" => p(21, 2, 12),
528            '|' => "given weird|" => p(25, 2, 16),
529            '.' => "given .weird" => p(20, 2, 11),
530            ',' => "given w,eird" => p(21, 2, 12),
531        };
532    }
533
534    #[test]
535    fn only_filename_and_newline() {
536        let simple_name = String::from("foo\n");
537        let starts_whitespace = String::from(" foo\n");
538        let ends_whitespace = String::from("foo \n");
539
540        let expected =
541            vec![t(TokenKind::Word, "foo", s(p(0, 1, 1), p(2, 1, 3)))];
542        let mut tokenizer = Tokenizer::new();
543
544        assert_eq!(tokenizer.tokenize(&simple_name).unwrap(), expected);
545        assert_eq!(
546            tokenizer.tokenize(&starts_whitespace).unwrap(),
547            vec![t(TokenKind::Word, "foo", s(p(1, 1, 2), p(3, 1, 4)))]
548        );
549        assert_eq!(tokenizer.tokenize(&ends_whitespace).unwrap(), expected);
550    }
551
552    #[test]
553    fn one_child() {
554        // Test parsing a when.
555        let file_contents = String::from(indoc! {"
556            Foo_Test
557            └── when something bad happens
558               └── it should revert
559        "});
560
561        assert_eq!(
562            tokenize(&file_contents).unwrap(),
563            vec![
564                t(TokenKind::Word, "Foo_Test", s(p(0, 1, 1), p(7, 1, 8))),
565                t(TokenKind::Corner, "└", s(p(9, 2, 1), p(9, 2, 1))),
566                t(TokenKind::When, "when", s(p(19, 2, 5), p(22, 2, 8))),
567                t(TokenKind::Word, "something", s(p(24, 2, 10), p(32, 2, 18))),
568                t(TokenKind::Word, "bad", s(p(34, 2, 20), p(36, 2, 22))),
569                t(TokenKind::Word, "happens", s(p(38, 2, 24), p(44, 2, 30))),
570                t(TokenKind::Corner, "└", s(p(49, 3, 4), p(49, 3, 4))),
571                t(TokenKind::It, "it", s(p(59, 3, 8), p(60, 3, 9))),
572                t(TokenKind::Word, "should", s(p(62, 3, 11), p(67, 3, 16))),
573                t(TokenKind::Word, "revert", s(p(69, 3, 18), p(74, 3, 23))),
574            ]
575        );
576
577        // Test parsing a given.
578        let file_contents = String::from(indoc! {"
579            Foo_Test
580            └── given something bad happens
581               └── it should revert
582        "});
583
584        assert_eq!(
585            tokenize(&file_contents).unwrap(),
586            vec![
587                t(TokenKind::Word, "Foo_Test", s(p(0, 1, 1), p(7, 1, 8))),
588                t(TokenKind::Corner, "└", s(p(9, 2, 1), p(9, 2, 1))),
589                t(TokenKind::Given, "given", s(p(19, 2, 5), p(23, 2, 9))),
590                t(TokenKind::Word, "something", s(p(25, 2, 11), p(33, 2, 19))),
591                t(TokenKind::Word, "bad", s(p(35, 2, 21), p(37, 2, 23))),
592                t(TokenKind::Word, "happens", s(p(39, 2, 25), p(45, 2, 31))),
593                t(TokenKind::Corner, "└", s(p(50, 3, 4), p(50, 3, 4))),
594                t(TokenKind::It, "it", s(p(60, 3, 8), p(61, 3, 9))),
595                t(TokenKind::Word, "should", s(p(63, 3, 11), p(68, 3, 16))),
596                t(TokenKind::Word, "revert", s(p(70, 3, 18), p(75, 3, 23))),
597            ]
598        );
599    }
600
601    #[test]
602    fn multiple_children() {
603        let file_contents = String::from(indoc! {"
604            multiple_children.t.sol
605            ├── when stuff called
606            │  └── it should revert
607            └── when not stuff called
608               ├── when the deposit amount is zero
609               │  └── it should revert
610               └── when the deposit amount is not zero
611                  ├── when the number count is zero
612                  │  └── it should revert
613                  ├── when the asset is not a contract
614                  │  └── it should revert
615                  └── when the asset is a contract
616                      ├── when the asset misses the ERC-20 return value
617                      │  ├── it should create the child
618                      │  ├── it should perform the ERC-20 transfers
619                      │  └── it should emit a {MultipleChildren} event
620                      └── when the asset does not miss the ERC-20 return value
621                          ├── it should create the child
622                          └── it should emit a {MultipleChildren} event
623        "});
624
625        let tokens = tokenize(&file_contents).unwrap();
626        #[rustfmt::skip]
627        let expected = vec![
628            t(TokenKind::Word, "multiple_children.t.sol", s(p(0, 1, 1), p(22, 1, 23))),
629            t(TokenKind::Tee, "├", s(p(24, 2, 1), p(24, 2, 1))),
630            t(TokenKind::When, "when", s(p(34, 2, 5), p(37, 2, 8))),
631            t(TokenKind::Word, "stuff", s(p(39, 2, 10), p(43, 2, 14))),
632            t(TokenKind::Word, "called", s(p(45, 2, 16), p(50, 2, 21))),
633            t(TokenKind::Corner, "└", s(p(57, 3, 4), p(57, 3, 4))),
634            t(TokenKind::It, "it", s(p(67, 3, 8), p(68, 3, 9))),
635            t(TokenKind::Word, "should", s(p(70, 3, 11), p(75, 3, 16))),
636            t(TokenKind::Word, "revert", s(p(77, 3, 18), p(82, 3, 23))),
637            t(TokenKind::Corner, "└", s(p(84, 4, 1), p(84, 4, 1))),
638            t(TokenKind::When, "when", s(p(94, 4, 5), p(97, 4, 8))),
639            t(TokenKind::Word, "not", s(p(99, 4, 10), p(101, 4, 12))),
640            t(TokenKind::Word, "stuff", s(p(103, 4, 14), p(107, 4, 18))),
641            t(TokenKind::Word, "called", s(p(109, 4, 20), p(114, 4, 25))),
642            t(TokenKind::Tee, "├", s(p(119, 5, 4), p(119, 5, 4))),
643            t(TokenKind::When, "when", s(p(129, 5, 8), p(132, 5, 11))),
644            t(TokenKind::Word, "the", s(p(134, 5, 13), p(136, 5, 15))),
645            t(TokenKind::Word, "deposit", s(p(138, 5, 17), p(144, 5, 23))),
646            t(TokenKind::Word, "amount", s(p(146, 5, 25), p(151, 5, 30))),
647            t(TokenKind::Word, "is", s(p(153, 5, 32), p(154, 5, 33))),
648            t(TokenKind::Word, "zero", s(p(156, 5, 35), p(159, 5, 38))),
649            t(TokenKind::Corner, "└", s(p(169, 6, 7), p(169, 6, 7))),
650            t(TokenKind::It, "it", s(p(179, 6, 11), p(180, 6, 12))),
651            t(TokenKind::Word, "should", s(p(182, 6, 14), p(187, 6, 19))),
652            t(TokenKind::Word, "revert", s(p(189, 6, 21), p(194, 6, 26))),
653            t(TokenKind::Corner, "└", s(p(199, 7, 4), p(199, 7, 4))),
654            t(TokenKind::When, "when", s(p(209, 7, 8), p(212, 7, 11))),
655            t(TokenKind::Word, "the", s(p(214, 7, 13), p(216, 7, 15))),
656            t(TokenKind::Word, "deposit", s(p(218, 7, 17), p(224, 7, 23))),
657            t(TokenKind::Word, "amount", s(p(226, 7, 25), p(231, 7, 30))),
658            t(TokenKind::Word, "is", s(p(233, 7, 32), p(234, 7, 33))),
659            t(TokenKind::Word, "not", s(p(236, 7, 35), p(238, 7, 37))),
660            t(TokenKind::Word, "zero", s(p(240, 7, 39), p(243, 7, 42))),
661            t(TokenKind::Tee, "├", s(p(251, 8, 7), p(251, 8, 7))),
662            t(TokenKind::When, "when", s(p(261, 8, 11), p(264, 8, 14))),
663            t(TokenKind::Word, "the", s(p(266, 8, 16), p(268, 8, 18))),
664            t(TokenKind::Word, "number", s(p(270, 8, 20), p(275, 8, 25))),
665            t(TokenKind::Word, "count", s(p(277, 8, 27), p(281, 8, 31))),
666            t(TokenKind::Word, "is", s(p(283, 8, 33), p(284, 8, 34))),
667            t(TokenKind::Word, "zero", s(p(286, 8, 36), p(289, 8, 39))),
668            t(TokenKind::Corner, "└", s(p(302, 9, 10), p(302, 9, 10))),
669            t(TokenKind::It, "it", s(p(312, 9, 14), p(313, 9, 15))),
670            t(TokenKind::Word, "should", s(p(315, 9, 17), p(320, 9, 22))),
671            t(TokenKind::Word, "revert", s(p(322, 9, 24), p(327, 9, 29))),
672            t(TokenKind::Tee, "├", s(p(335, 10, 7), p(335, 10, 7))),
673            t(TokenKind::When, "when", s(p(345, 10, 11), p(348, 10, 14))),
674            t(TokenKind::Word, "the", s(p(350, 10, 16), p(352, 10, 18))),
675            t(TokenKind::Word, "asset", s(p(354, 10, 20), p(358, 10, 24))),
676            t(TokenKind::Word, "is", s(p(360, 10, 26), p(361, 10, 27))),
677            t(TokenKind::Word, "not", s(p(363, 10, 29), p(365, 10, 31))),
678            t(TokenKind::Word, "a", s(p(367, 10, 33), p(367, 10, 33))),
679            t(TokenKind::Word, "contract", s(p(369, 10, 35), p(376, 10, 42))),
680            t(TokenKind::Corner, "└", s(p(389, 11, 10), p(389, 11, 10))),
681            t(TokenKind::It, "it", s(p(399, 11, 14), p(400, 11, 15))),
682            t(TokenKind::Word, "should", s(p(402, 11, 17), p(407, 11, 22))),
683            t(TokenKind::Word, "revert", s(p(409, 11, 24), p(414, 11, 29))),
684            t(TokenKind::Corner, "└", s(p(422, 12, 7), p(422, 12, 7))),
685            t(TokenKind::When, "when", s(p(432, 12, 11), p(435, 12, 14))),
686            t(TokenKind::Word, "the", s(p(437, 12, 16), p(439, 12, 18))),
687            t(TokenKind::Word, "asset", s(p(441, 12, 20), p(445, 12, 24))),
688            t(TokenKind::Word, "is", s(p(447, 12, 26), p(448, 12, 27))),
689            t(TokenKind::Word, "a", s(p(450, 12, 29), p(450, 12, 29))),
690            t(TokenKind::Word, "contract", s(p(452, 12, 31), p(459, 12, 38))),
691            t(TokenKind::Tee, "├", s(p(471, 13, 11), p(471, 13, 11))),
692            t(TokenKind::When, "when", s(p(481, 13, 15), p(484, 13, 18))),
693            t(TokenKind::Word, "the", s(p(486, 13, 20), p(488, 13, 22))),
694            t(TokenKind::Word, "asset", s(p(490, 13, 24), p(494, 13, 28))),
695            t(TokenKind::Word, "misses", s(p(496, 13, 30), p(501, 13, 35))),
696            t(TokenKind::Word, "the", s(p(503, 13, 37), p(505, 13, 39))),
697            t(TokenKind::Word, "ERC-20", s(p(507, 13, 41), p(512, 13, 46))),
698            t(TokenKind::Word, "return", s(p(514, 13, 48), p(519, 13, 53))),
699            t(TokenKind::Word, "value", s(p(521, 13, 55), p(525, 13, 59))),
700            t(TokenKind::Tee, "├", s(p(542, 14, 14), p(542, 14, 14))),
701            t(TokenKind::It, "it", s(p(552, 14, 18), p(553, 14, 19))),
702            t(TokenKind::Word, "should", s(p(555, 14, 21), p(560, 14, 26))),
703            t(TokenKind::Word, "create", s(p(562, 14, 28), p(567, 14, 33))),
704            t(TokenKind::Word, "the", s(p(569, 14, 35), p(571, 14, 37))),
705            t(TokenKind::Word, "child", s(p(573, 14, 39), p(577, 14, 43))),
706            t(TokenKind::Tee, "├", s(p(594, 15, 14), p(594, 15, 14))),
707            t(TokenKind::It, "it", s(p(604, 15, 18), p(605, 15, 19))),
708            t(TokenKind::Word, "should", s(p(607, 15, 21), p(612, 15, 26))),
709            t(TokenKind::Word, "perform", s(p(614, 15, 28), p(620, 15, 34))),
710            t(TokenKind::Word, "the", s(p(622, 15, 36), p(624, 15, 38))),
711            t(TokenKind::Word, "ERC-20", s(p(626, 15, 40), p(631, 15, 45))),
712            t(TokenKind::Word, "transfers", s(p(633, 15, 47), p(641, 15, 55))),
713            t(TokenKind::Corner, "└", s(p(658, 16, 14), p(658, 16, 14))),
714            t(TokenKind::It, "it", s(p(668, 16, 18), p(669, 16, 19))),
715            t(TokenKind::Word, "should", s(p(671, 16, 21), p(676, 16, 26))),
716            t(TokenKind::Word, "emit", s(p(678, 16, 28), p(681, 16, 31))),
717            t(TokenKind::Word, "a", s(p(683, 16, 33), p(683, 16, 33))),
718            t(TokenKind::Word, "{MultipleChildren}", s(p(685, 16, 35), p(702, 16, 52))),
719            t(TokenKind::Word, "event", s(p(704, 16, 54), p(708, 16, 58))),
720            t(TokenKind::Corner, "└", s(p(720, 17, 11), p(720, 17, 11))),
721            t(TokenKind::When, "when", s(p(730, 17, 15), p(733, 17, 18))),
722            t(TokenKind::Word, "the", s(p(735, 17, 20), p(737, 17, 22))),
723            t(TokenKind::Word, "asset", s(p(739, 17, 24), p(743, 17, 28))),
724            t(TokenKind::Word, "does", s(p(745, 17, 30), p(748, 17, 33))),
725            t(TokenKind::Word, "not", s(p(750, 17, 35), p(752, 17, 37))),
726            t(TokenKind::Word, "miss", s(p(754, 17, 39), p(757, 17, 42))),
727            t(TokenKind::Word, "the", s(p(759, 17, 44), p(761, 17, 46))),
728            t(TokenKind::Word, "ERC-20", s(p(763, 17, 48), p(768, 17, 53))),
729            t(TokenKind::Word, "return", s(p(770, 17, 55), p(775, 17, 60))),
730            t(TokenKind::Word, "value", s(p(777, 17, 62), p(781, 17, 66))),
731            t(TokenKind::Tee, "├", s(p(797, 18, 15), p(797, 18, 15))),
732            t(TokenKind::It, "it", s(p(807, 18, 19), p(808, 18, 20))),
733            t(TokenKind::Word, "should", s(p(810, 18, 22), p(815, 18, 27))),
734            t(TokenKind::Word, "create", s(p(817, 18, 29), p(822, 18, 34))),
735            t(TokenKind::Word, "the", s(p(824, 18, 36), p(826, 18, 38))),
736            t(TokenKind::Word, "child", s(p(828, 18, 40), p(832, 18, 44))),
737            t(TokenKind::Corner, "└", s(p(848, 19, 15), p(848, 19, 15))),
738            t(TokenKind::It, "it", s(p(858, 19, 19), p(859, 19, 20))),
739            t(TokenKind::Word, "should", s(p(861, 19, 22), p(866, 19, 27))),
740            t(TokenKind::Word, "emit", s(p(868, 19, 29), p(871, 19, 32))),
741            t(TokenKind::Word, "a", s(p(873, 19, 34), p(873, 19, 34))),
742            t(TokenKind::Word, "{MultipleChildren}", s(p(875, 19, 36), p(892, 19, 53))),
743            t(TokenKind::Word, "event", s(p(894, 19, 55), p(898, 19, 59))),
744        ];
745
746        assert_eq!(tokens.len(), expected.len());
747        assert_eq!(tokens, expected);
748    }
749
750    #[test]
751    fn case_insensitive_keywords() {
752        let file_contents =
753            String::from("Foo_Test\n└── GIVEN something bad happens\n   └── whEN stuff is true\n   └── It should revert.");
754
755        assert_eq!(
756            tokenize(&file_contents).unwrap(),
757            vec![
758                t(TokenKind::Word, "Foo_Test", s(p(0, 1, 1), p(7, 1, 8))),
759                t(TokenKind::Corner, "└", s(p(9, 2, 1), p(9, 2, 1))),
760                t(TokenKind::Given, "GIVEN", s(p(19, 2, 5), p(23, 2, 9))),
761                t(TokenKind::Word, "something", s(p(25, 2, 11), p(33, 2, 19))),
762                t(TokenKind::Word, "bad", s(p(35, 2, 21), p(37, 2, 23))),
763                t(TokenKind::Word, "happens", s(p(39, 2, 25), p(45, 2, 31))),
764                t(TokenKind::Corner, "└", s(p(50, 3, 4), p(50, 3, 4))),
765                t(TokenKind::When, "whEN", s(p(60, 3, 8), p(63, 3, 11))),
766                t(TokenKind::Word, "stuff", s(p(65, 3, 13), p(69, 3, 17))),
767                t(TokenKind::Word, "is", s(p(71, 3, 19), p(72, 3, 20))),
768                t(TokenKind::Word, "true", s(p(74, 3, 22), p(77, 3, 25))),
769                t(TokenKind::Corner, "└", s(p(82, 4, 4), p(82, 4, 4))),
770                t(TokenKind::It, "It", s(p(92, 4, 8), p(93, 4, 9))),
771                t(TokenKind::Word, "should", s(p(95, 4, 11), p(100, 4, 16))),
772                t(TokenKind::Word, "revert.", s(p(102, 4, 18), p(108, 4, 24))),
773            ]
774        );
775    }
776}
bulloak_syntax/tokenizer.rs

bulloak_syntax/
tokenizer.rs