scilla_parser/parser/
lexer.rs

1use std::{convert::From, iter::Peekable, str::CharIndices, string::String};
2
3use regex::Regex;
4
5pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
6
7const KEYWORD_FORALL: &str = "forall";
8const KEYWORD_BUILTIN: &str = "builtin";
9const KEYWORD_LIBRARY: &str = "library";
10const KEYWORD_IMPORT: &str = "import";
11const KEYWORD_LET: &str = "let";
12const KEYWORD_IN: &str = "in";
13const KEYWORD_MATCH: &str = "match";
14const KEYWORD_WITH: &str = "with";
15const KEYWORD_END: &str = "end";
16const KEYWORD_FUN: &str = "fun";
17const KEYWORD_TFUN: &str = "tfun";
18const KEYWORD_CONTRACT: &str = "contract";
19const KEYWORD_TRANSITION: &str = "transition";
20const KEYWORD_SEND: &str = "send";
21const KEYWORD_FIELD: &str = "field";
22const KEYWORD_ACCEPT: &str = "accept";
23const KEYWORD_EXISTS: &str = "exists";
24const KEYWORD_DELETE: &str = "delete";
25const KEYWORD_THROW: &str = "throw";
26const KEYWORD_MAP: &str = "Map";
27const KEYWORD_SCILLA_VERSION: &str = "scilla_version";
28const KEYWORD_TYPE: &str = "type";
29const KEYWORD_OF: &str = "of";
30const KEYWORD_AS: &str = "as";
31const KEYWORD_PROCEDURE: &str = "procedure";
32const KEYWORD_EMP: &str = "Emp";
33const KEYWORD_EVENT: &str = "event";
34const KEYWORD_EVENT_TYPE: &str = "Event";
35const KEYWORD_BYSTR: &str = "ByStr";
36
37#[derive(Debug, Clone, PartialEq, Eq)]
38pub enum Token<S> {
39    Plus,
40    Asterisk,
41    Semicolon,
42    Colon,
43    Dot,
44    Pipe,
45    OpenBracket,
46    CloseBracket,
47    OpenParen,
48    CloseParen,
49    OpenBrace,
50    CloseBrace,
51    Comma,
52    DoubleArrow,
53    Arrow,
54    Equals,
55    Ampersand,
56    LeftArrow,
57    ColonEquals,
58
59    At,
60    Minus,
61    Underscore,
62    Forall,
63    Builtin,
64    Library,
65    Import,
66    Let,
67    In,
68    Match,
69    With,
70    End,
71    Fun,
72    Tfun,
73    Contract,
74    Transition,
75    Send,
76    Field,
77    Accept,
78    Exists,
79    Delete,
80    Throw,
81    Map,
82    ScillaVersion,
83    Type,
84    Of,
85    As,
86    Procedure,
87    Emp,
88    Event,
89    EventType,
90    ByStr,
91    ByStrWithSize(S),
92    Comment(S),
93    Number(S),
94    HexNumber(S),
95    Identifier(S),
96    TemplateIdentifier(S),
97    CustomIdentifier(S),
98    SpecialIdentifier(S),
99    TypeName(S),
100    StringLiteral(S),
101    Whitespace,
102
103    Unknown,
104}
105
106impl<S: ToString> From<Token<S>> for String {
107    fn from(token: Token<S>) -> Self {
108        match token {
109            Token::ByStrWithSize(value) => value.to_string(),
110            Token::Comment(value) => value.to_string(),
111            Token::Number(value) => value.to_string(),
112            Token::HexNumber(value) => value.to_string(),
113            Token::Identifier(value) => value.to_string(),
114            Token::TemplateIdentifier(value) => value.to_string(),
115            Token::CustomIdentifier(value) => value.to_string(),
116            Token::SpecialIdentifier(value) => value.to_string(),
117            Token::TypeName(value) => value.to_string(),
118            Token::StringLiteral(value) => value.to_string(),
119            _ => match token {
120                Token::Plus => "+",
121                Token::Asterisk => "*",
122                Token::Semicolon => ";",
123                Token::Colon => ":",
124                Token::Dot => ".",
125                Token::Pipe => "|",
126                Token::OpenBracket => "[",
127                Token::CloseBracket => "]",
128                Token::OpenParen => "(",
129                Token::CloseParen => ")",
130                Token::OpenBrace => "{",
131                Token::CloseBrace => "}",
132                Token::Comma => ",",
133                Token::DoubleArrow => "=>",
134                Token::Arrow => "->",
135                Token::Equals => "=",
136                Token::Ampersand => "&",
137                Token::LeftArrow => "<-",
138                Token::ColonEquals => ":=",
139                Token::At => "@",
140                Token::Minus => "-",
141                Token::Underscore => "_",
142                Token::Forall => KEYWORD_FORALL,
143                Token::Builtin => KEYWORD_BUILTIN,
144                Token::Library => KEYWORD_LIBRARY,
145                Token::Import => KEYWORD_IMPORT,
146                Token::Let => KEYWORD_LET,
147                Token::In => KEYWORD_IN,
148                Token::Match => KEYWORD_MATCH,
149                Token::With => KEYWORD_WITH,
150                Token::End => KEYWORD_END,
151                Token::Fun => KEYWORD_FUN,
152                Token::Tfun => KEYWORD_TFUN,
153                Token::Contract => KEYWORD_CONTRACT,
154                Token::Transition => KEYWORD_TRANSITION,
155                Token::Send => KEYWORD_SEND,
156                Token::Field => KEYWORD_FIELD,
157                Token::Accept => KEYWORD_ACCEPT,
158                Token::Exists => KEYWORD_EXISTS,
159                Token::Delete => KEYWORD_DELETE,
160                Token::Throw => KEYWORD_THROW,
161                Token::Map => KEYWORD_MAP,
162                Token::ScillaVersion => KEYWORD_SCILLA_VERSION,
163                Token::Type => KEYWORD_TYPE,
164                Token::Of => KEYWORD_OF,
165                Token::As => KEYWORD_AS,
166                Token::Procedure => KEYWORD_PROCEDURE,
167                Token::Emp => KEYWORD_EMP,
168                Token::Event => KEYWORD_EVENT,
169                Token::EventType => KEYWORD_EVENT_TYPE,
170                Token::ByStr => KEYWORD_BYSTR,
171
172                Token::Whitespace => " ",
173                _ => "?", // Token::Unknown made as a wild card to avoid compiler complaining.
174            }
175            .to_string(),
176        }
177    }
178}
179
180#[derive(Debug, Clone, PartialEq, Eq)]
181pub enum ParseError {
182    // Not possible
183}
184
185/// Provides the ability to tokenize a source file.
186pub struct Lexer<'input> {
187    /// An iterator of the source file's characters, along with their indices.
188    chars: Peekable<CharIndices<'input>>,
189    /// A reference to the source file being tokenized.
190    document: &'input str,
191    /// The current line number being tokenized.
192    line: usize,
193    /// The current character number within the current line being tokenized.
194    character: usize,
195
196    /// The last position the lexer visited
197    last_position: usize,
198}
199
200impl<'input> Lexer<'input> {
201    pub fn new(input: &'input str) -> Self {
202        Lexer {
203            chars: input.char_indices().peekable(),
204            document: input,
205            line: 0, // Note: We use machine indices, not human indices
206            character: 0,
207            last_position: 0,
208        }
209    }
210}
211
212#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Default)]
213pub struct SourcePosition {
214    pub position: usize,
215    pub line: usize,
216    pub column: usize,
217}
218
219impl SourcePosition {
220    pub fn is_valid(&self) -> bool {
221        self.position < (usize::MAX >> 1)
222    }
223    pub fn start_position() -> Self {
224        Self {
225            position: 0,
226            line: 0,
227            column: 0,
228        }
229    }
230    pub fn invalid_position() -> Self {
231        Self {
232            position: usize::MAX,
233            line: usize::MAX,
234            column: usize::MAX,
235        }
236    }
237    pub fn with_end(&self, new_position: usize) -> Self {
238        let mut ret = *self;
239        ret.column += new_position - ret.position;
240        ret.position = new_position;
241        ret
242    }
243}
244
245impl<'input> Iterator for Lexer<'input> {
246    type Item = Spanned<Token<&'input str>, SourcePosition, ParseError>;
247
248    // <(usize, Token, usize, usize, usize);
249
250    fn next(&mut self) -> Option<Self::Item> {
251        while let Some((start, ch)) = self.chars.next() {
252            let source_position = SourcePosition {
253                position: start,
254                line: self.line,
255                column: self.character,
256            };
257
258            let (token, end): (Token<&'input str>, SourcePosition) = {
259                let look_ahead = self.chars.peek().map(|(_, next_ch)| *next_ch);
260                self.character += start - self.last_position;
261                self.last_position = start;
262
263                let next_is_alpha_num_under = look_ahead
264                    .map(|c| c.is_alphanumeric() || c == '_')
265                    .unwrap_or(false);
266                let next_is_numeric = look_ahead.map(|c| c.is_numeric()).unwrap_or(false);
267
268                // Handle more complex tokens, whitespace, and comments
269                if ch.is_whitespace() {
270                    if ch == '\n' {
271                        self.character = 0;
272                        self.line += 1;
273                    }
274                    continue;
275                } else if ch == '=' && look_ahead == Some('>') {
276                    self.chars.next();
277                    (
278                        Token::DoubleArrow,
279                        source_position.with_end(start + 2 * ch.len_utf8()),
280                    )
281                } else if ch == '-' && look_ahead == Some('>') {
282                    self.chars.next();
283                    (
284                        Token::Arrow,
285                        source_position.with_end(start + 2 * ch.len_utf8()),
286                    )
287                } else if ch == '-' && !next_is_numeric {
288                    (
289                        Token::Minus,
290                        source_position.with_end(start + ch.len_utf8()),
291                    )
292                } else if ch == '<' && look_ahead == Some('-') {
293                    self.chars.next();
294                    (
295                        Token::LeftArrow,
296                        source_position.with_end(start + 2 * ch.len_utf8()),
297                    )
298                } else if ch == ':' && look_ahead == Some('=') {
299                    self.chars.next();
300                    (
301                        Token::ColonEquals,
302                        source_position.with_end(start + 2 * ch.len_utf8()),
303                    )
304                } else if ch == '_' && !next_is_alpha_num_under {
305                    (
306                        Token::Underscore,
307                        source_position.with_end(start + ch.len_utf8()),
308                    )
309                } else if ch == '(' && look_ahead == Some('*') {
310                    // Consume comment
311
312                    self.chars.next(); // Consume '*'
313                    let mut comment = String::new();
314
315                    while let Some((_, ch)) = self.chars.next() {
316                        if ch == '*' && self.chars.peek().map(|(_, next_ch)| *next_ch) == Some(')')
317                        {
318                            self.chars.next();
319                            break;
320                        } else {
321                            comment.push(ch);
322                        }
323                    }
324
325                    continue;
326                    // TODO: Hack to avoid emitting comment. However, ideally these should be part of the AST or at least the token stream
327                    // let len = comment.len();
328                    // let end = start + len + 2 + 1; // +2: `*)`, +1: move to char beyond last
329                    // let s = &self.document[start + 2..end - 1]; // +2: skip `(*`
330                    // (Token::Comment(s), end)
331                } else {
332                    let (token, end): (Token<&'input str>, SourcePosition) = match ch {
333                        '+' => (Token::Plus, source_position.with_end(start + ch.len_utf8())),
334                        '*' => (
335                            Token::Asterisk,
336                            source_position.with_end(start + ch.len_utf8()),
337                        ),
338                        ';' => (
339                            Token::Semicolon,
340                            source_position.with_end(start + ch.len_utf8()),
341                        ),
342                        ':' => (
343                            Token::Colon,
344                            source_position.with_end(start + ch.len_utf8()),
345                        ),
346                        '.' => (Token::Dot, source_position.with_end(start + ch.len_utf8())),
347                        '|' => (Token::Pipe, source_position.with_end(start + ch.len_utf8())),
348                        '[' => (
349                            Token::OpenBracket,
350                            source_position.with_end(start + ch.len_utf8()),
351                        ),
352                        ']' => (
353                            Token::CloseBracket,
354                            source_position.with_end(start + ch.len_utf8()),
355                        ),
356                        '(' => (
357                            Token::OpenParen,
358                            source_position.with_end(start + ch.len_utf8()),
359                        ),
360                        ')' => (
361                            Token::CloseParen,
362                            source_position.with_end(start + ch.len_utf8()),
363                        ),
364                        '{' => (
365                            Token::OpenBrace,
366                            source_position.with_end(start + ch.len_utf8()),
367                        ),
368                        '}' => (
369                            Token::CloseBrace,
370                            source_position.with_end(start + ch.len_utf8()),
371                        ),
372                        ',' => (
373                            Token::Comma,
374                            source_position.with_end(start + ch.len_utf8()),
375                        ),
376                        '&' => (
377                            Token::Ampersand,
378                            source_position.with_end(start + ch.len_utf8()),
379                        ),
380                        '@' => (Token::At, source_position.with_end(start + ch.len_utf8())),
381                        '=' => (
382                            Token::Equals,
383                            source_position.with_end(start + ch.len_utf8()),
384                        ),
385                        _ => {
386                            let token_str: &str = &self.document[start..];
387                            let mut index = 0;
388                            let token_str_chars = token_str.chars();
389                            for (i, c) in token_str_chars.enumerate() {
390                                if !c.is_alphanumeric() && c != '_' {
391                                    index = i;
392                                    break;
393                                }
394                            }
395                            let keyword_token: &str = if index > 0 {
396                                &token_str[..index]
397                            } else {
398                                token_str
399                            };
400
401                            let (token, end): (Token<&'input str>, SourcePosition) =
402                                match keyword_token {
403                                    KEYWORD_FORALL => {
404                                        self.chars.nth(KEYWORD_FORALL.len() - 2);
405                                        (
406                                            Token::Forall,
407                                            source_position.with_end(start + KEYWORD_FORALL.len()),
408                                        )
409                                    }
410                                    KEYWORD_BUILTIN => {
411                                        self.chars.nth(KEYWORD_BUILTIN.len() - 2);
412                                        (
413                                            Token::Builtin,
414                                            source_position.with_end(start + KEYWORD_BUILTIN.len()),
415                                        )
416                                    }
417                                    KEYWORD_LIBRARY => {
418                                        self.chars.nth(KEYWORD_LIBRARY.len() - 2);
419                                        (
420                                            Token::Library,
421                                            source_position.with_end(start + KEYWORD_LIBRARY.len()),
422                                        )
423                                    }
424                                    KEYWORD_IMPORT => {
425                                        self.chars.nth(KEYWORD_IMPORT.len() - 2);
426                                        (
427                                            Token::Import,
428                                            source_position.with_end(start + KEYWORD_IMPORT.len()),
429                                        )
430                                    }
431                                    KEYWORD_LET => {
432                                        self.chars.nth(KEYWORD_LET.len() - 2);
433                                        (
434                                            Token::Let,
435                                            source_position.with_end(start + KEYWORD_LET.len()),
436                                        )
437                                    }
438                                    KEYWORD_IN => {
439                                        self.chars.nth(KEYWORD_IN.len() - 2);
440                                        (
441                                            Token::In,
442                                            source_position.with_end(start + KEYWORD_IN.len()),
443                                        )
444                                    }
445                                    KEYWORD_MATCH => {
446                                        self.chars.nth(KEYWORD_MATCH.len() - 2);
447                                        (
448                                            Token::Match,
449                                            source_position.with_end(start + KEYWORD_MATCH.len()),
450                                        )
451                                    }
452                                    KEYWORD_WITH => {
453                                        self.chars.nth(KEYWORD_WITH.len() - 2);
454                                        (
455                                            Token::With,
456                                            source_position.with_end(start + KEYWORD_WITH.len()),
457                                        )
458                                    }
459                                    KEYWORD_END => {
460                                        self.chars.nth(KEYWORD_END.len() - 2);
461                                        (
462                                            Token::End,
463                                            source_position.with_end(start + KEYWORD_END.len()),
464                                        )
465                                    }
466                                    KEYWORD_FUN => {
467                                        self.chars.nth(KEYWORD_FUN.len() - 2);
468                                        (
469                                            Token::Fun,
470                                            source_position.with_end(start + KEYWORD_FUN.len()),
471                                        )
472                                    }
473                                    KEYWORD_TFUN => {
474                                        self.chars.nth(KEYWORD_TFUN.len() - 2);
475                                        (
476                                            Token::Tfun,
477                                            source_position.with_end(start + KEYWORD_TFUN.len()),
478                                        )
479                                    }
480                                    KEYWORD_CONTRACT => {
481                                        self.chars.nth(KEYWORD_CONTRACT.len() - 2);
482                                        (
483                                            Token::Contract,
484                                            source_position
485                                                .with_end(start + KEYWORD_CONTRACT.len()),
486                                        )
487                                    }
488                                    KEYWORD_TRANSITION => {
489                                        self.chars.nth(KEYWORD_TRANSITION.len() - 2);
490                                        (
491                                            Token::Transition,
492                                            source_position
493                                                .with_end(start + KEYWORD_TRANSITION.len()),
494                                        )
495                                    }
496                                    KEYWORD_SEND => {
497                                        self.chars.nth(KEYWORD_SEND.len() - 2);
498                                        (
499                                            Token::Send,
500                                            source_position.with_end(start + KEYWORD_SEND.len()),
501                                        )
502                                    }
503                                    KEYWORD_FIELD => {
504                                        self.chars.nth(KEYWORD_FIELD.len() - 2);
505                                        (
506                                            Token::Field,
507                                            source_position.with_end(start + KEYWORD_FIELD.len()),
508                                        )
509                                    }
510                                    KEYWORD_ACCEPT => {
511                                        self.chars.nth(KEYWORD_ACCEPT.len() - 2);
512                                        (
513                                            Token::Accept,
514                                            source_position.with_end(start + KEYWORD_ACCEPT.len()),
515                                        )
516                                    }
517                                    KEYWORD_EXISTS => {
518                                        self.chars.nth(KEYWORD_EXISTS.len() - 2);
519                                        (
520                                            Token::Exists,
521                                            source_position.with_end(start + KEYWORD_EXISTS.len()),
522                                        )
523                                    }
524                                    KEYWORD_DELETE => {
525                                        self.chars.nth(KEYWORD_DELETE.len() - 2);
526                                        (
527                                            Token::Delete,
528                                            source_position.with_end(start + KEYWORD_DELETE.len()),
529                                        )
530                                    }
531                                    KEYWORD_THROW => {
532                                        self.chars.nth(KEYWORD_THROW.len() - 2);
533                                        (
534                                            Token::Throw,
535                                            source_position.with_end(start + KEYWORD_THROW.len()),
536                                        )
537                                    }
538                                    KEYWORD_MAP => {
539                                        self.chars.nth(KEYWORD_MAP.len() - 2);
540                                        (
541                                            Token::Map,
542                                            source_position.with_end(start + KEYWORD_MAP.len()),
543                                        )
544                                    }
545                                    KEYWORD_SCILLA_VERSION => {
546                                        self.chars.nth(KEYWORD_SCILLA_VERSION.len() - 2);
547                                        (
548                                            Token::ScillaVersion,
549                                            source_position
550                                                .with_end(start + KEYWORD_SCILLA_VERSION.len()),
551                                        )
552                                    }
553                                    KEYWORD_TYPE => {
554                                        self.chars.nth(KEYWORD_TYPE.len() - 2);
555                                        (
556                                            Token::Type,
557                                            source_position.with_end(start + KEYWORD_TYPE.len()),
558                                        )
559                                    }
560                                    KEYWORD_OF => {
561                                        self.chars.nth(KEYWORD_OF.len() - 2);
562                                        (
563                                            Token::Of,
564                                            source_position.with_end(start + KEYWORD_OF.len()),
565                                        )
566                                    }
567                                    KEYWORD_AS => {
568                                        self.chars.nth(KEYWORD_AS.len() - 2);
569                                        (
570                                            Token::As,
571                                            source_position.with_end(start + KEYWORD_AS.len()),
572                                        )
573                                    }
574                                    KEYWORD_PROCEDURE => {
575                                        self.chars.nth(KEYWORD_PROCEDURE.len() - 2);
576                                        (
577                                            Token::Procedure,
578                                            source_position
579                                                .with_end(start + KEYWORD_PROCEDURE.len()),
580                                        )
581                                    }
582                                    KEYWORD_EMP => {
583                                        self.chars.nth(KEYWORD_EMP.len() - 2);
584                                        (
585                                            Token::Emp,
586                                            source_position.with_end(start + KEYWORD_EMP.len()),
587                                        )
588                                    }
589                                    KEYWORD_EVENT => {
590                                        self.chars.nth(KEYWORD_EVENT.len() - 2);
591                                        (
592                                            Token::Event,
593                                            source_position.with_end(start + KEYWORD_EVENT.len()),
594                                        )
595                                    }
596                                    KEYWORD_EVENT_TYPE => {
597                                        self.chars.nth(KEYWORD_EVENT_TYPE.len() - 2);
598                                        (
599                                            Token::EventType,
600                                            source_position
601                                                .with_end(start + KEYWORD_EVENT_TYPE.len()),
602                                        )
603                                    }
604                                    _ => {
605                                        // Handle other cases here
606                                        let bystr_with_size = Regex::new(r"^ByStr[0-9]+").unwrap();
607
608                                        let signed_integer = Regex::new(r"^[+-]?[0-9]+").unwrap();
609                                        let hex_number =
610                                            Regex::new(r"^0(x|X)([a-fA-F0-9][a-fA-F0-9])*")
611                                                .unwrap();
612                                        let string_literal =
613                                            Regex::new(r#"^"(?:\\.|[^"])*""#).unwrap();
614                                        let regular_id =
615                                            Regex::new(r"^[a-z][a-zA-Z0-9_]*").unwrap();
616                                        let template_type_id =
617                                            Regex::new(r"^['][A-Z][a-zA-Z0-9_]*").unwrap();
618                                        let custom_type_id =
619                                            Regex::new(r"^[A-Z][a-zA-Z0-9_]*").unwrap();
620                                        let special_id = Regex::new(r"^[_][a-zA-Z0-9_]*").unwrap();
621
622                                        if let Some(mat) = bystr_with_size.find(token_str) {
623                                            let end = start + mat.end();
624                                            let s = &self.document[start..end];
625                                            if mat.end() > 1 {
626                                                self.chars.nth(end - start - 2);
627                                                // -2, because we already consumed the first char
628                                            }
629
630                                            (Token::ByStrWithSize(s), source_position.with_end(end))
631                                        } else if token_str.starts_with(KEYWORD_BYSTR) {
632                                            self.chars.nth(KEYWORD_BYSTR.len() - 2);
633                                            (
634                                                Token::ByStr,
635                                                source_position
636                                                    .with_end(start + KEYWORD_BYSTR.len()),
637                                            )
638                                        } else if let Some(mat) = hex_number.find(token_str) {
639                                            let end = start + mat.end();
640                                            let s = &self.document[start..end];
641                                            if mat.end() > 1 {
642                                                self.chars.nth(end - start - 2);
643                                                // -2, because we already consumed the first char
644                                            }
645
646                                            (Token::HexNumber(s), source_position.with_end(end))
647                                        } else if let Some(mat) = signed_integer.find(token_str) {
648                                            let end = start + mat.end();
649                                            let s = &self.document[start..end];
650                                            if mat.end() > 1 {
651                                                self.chars.nth(end - start - 2);
652                                                // -2, because we already consumed the first char
653                                            }
654                                            (Token::Number(s), source_position.with_end(end))
655                                        } else if let Some(mat) = string_literal.find(token_str) {
656                                            let end = start + mat.end();
657                                            let s = &self.document[start..end];
658                                            if mat.end() > 1 {
659                                                self.chars.nth(end - start - 2);
660                                                // -2, because we already consumed the first char
661                                            }
662
663                                            (Token::StringLiteral(s), source_position.with_end(end))
664                                        } else if let Some(mat) = regular_id.find(token_str) {
665                                            let end = start + mat.end();
666                                            let s = &self.document[start..end];
667                                            if mat.end() > 1 {
668                                                self.chars.nth(end - start - 2);
669                                                // -2, because we already consumed the first char
670                                            }
671
672                                            (Token::Identifier(s), source_position.with_end(end))
673                                        } else if let Some(mat) = template_type_id.find(token_str) {
674                                            let end = start + mat.end();
675                                            let s = &self.document[start..end];
676                                            if mat.end() > 1 {
677                                                self.chars.nth(end - start - 2);
678                                                // -2, because we already consumed the first char
679                                            }
680
681                                            (
682                                                Token::TemplateIdentifier(s),
683                                                source_position.with_end(end),
684                                            )
685                                        } else if let Some(mat) = custom_type_id.find(token_str) {
686                                            let end = start + mat.end();
687                                            let s = &self.document[start..end];
688                                            if mat.end() > 1 {
689                                                self.chars.nth(end - start - 2);
690                                            }
691                                            (
692                                                Token::CustomIdentifier(s),
693                                                source_position.with_end(end),
694                                            )
695                                        } else if let Some(mat) = special_id.find(token_str) {
696                                            let end = start + mat.end();
697                                            let s = &self.document[start..end];
698                                            if mat.end() > 1 {
699                                                self.chars.nth(end - start - 2);
700                                                // -2, because we already consumed the first char
701                                            }
702
703                                            (
704                                                Token::SpecialIdentifier(s),
705                                                source_position.with_end(end),
706                                            )
707                                        } else {
708                                            (Token::Unknown, source_position.with_end(start))
709                                        }
710                                    }
711                                };
712
713                            (token, end)
714                        }
715                    };
716                    (token, end)
717                }
718            };
719
720            return Some(Ok((source_position, token, end)));
721        }
722
723        None
724    }
725}
726
727/*
728#[cfg(test)]
729mod tests {
730    use super::*;
731    macro_rules! test {
732        ($src:expr, $($span:expr => $token:expr,)*) => {{
733            let lexed_tokens: Vec<_> = Lexer::new($src.into()).collect();
734            let expected_tokens : Vec<Result<(usize, Token<&str>, usize), ParseError>>= vec![$({
735                let start : usize = $span.find("~").unwrap() as usize;
736                let end : usize = $span.rfind("~").unwrap() as usize;
737                Ok((start, $token, end))
738            }),*];
739
740            assert_eq!(lexed_tokens, expected_tokens);
741        }};
742    }
743
744    // TODO: Integrate comments into the AST
745    #[test]
746    fn doc_comment() {
747        test! {
748            "       (* hello Scilla *)",
749            "       ~~~~~~~~~~~~~~~~~~" => Token::Comment(" hello Scilla "),
750        };
751        test! {
752            "       (***** hello *****)",
753            "       ~~~~~~~~~~~~~~~~~~~" => Token::Comment("**** hello ****"),
754        };
755        test! {
756            "       (* *** hello ** **)",
757            "       ~~~~~~~~~~~~~~~~~~~" => Token::Comment(" *** hello ** *"),
758        };
759        test! {
760            "       (*(*(* hello *(*(*)",
761            "       ~~~~~~~~~~~~~~~~~~~" => Token::Comment("(*(* hello *(*("),
762        };
763    }
764
765    // TODO: Add support for
766    // (* Fish (* Soup *) *)
767    // (* Fish (* Soup  *)
768}
769*/
scilla_parser/parser/lexer.rs

scilla_parser/parser/
lexer.rs