1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
mod classes;
mod comments;
mod cursor;
mod numbers;
mod strings;

use self::{
    classes::*, comments::scan_comment, cursor::Cursor, numbers::scan_number, strings::scan_string,
};
use crate::{
    SyntaxKind::{self, *},
    TextSize,
};

/// A token of Mun source
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token {
    /// The kind of token
    pub kind: SyntaxKind,

    /// The length of the token
    pub len: TextSize,
}

/// Break a string up into its component tokens
pub fn tokenize(text: &str) -> Vec<Token> {
    let mut text = text;
    let mut result = Vec::new();
    while !text.is_empty() {
        let token = next_token(text);
        result.push(token);
        let len: u32 = token.len.into();
        text = &text[len as usize..];
    }
    result
}

/// Get the next token from a string
pub fn next_token(text: &str) -> Token {
    assert!(!text.is_empty());
    let mut ptr = Cursor::new(text);
    let c = ptr.bump().unwrap();
    let kind = next_token_inner(c, &mut ptr);
    let len = ptr.into_len();
    Token { kind, len }
}

fn next_token_inner(c: char, cursor: &mut Cursor) -> SyntaxKind {
    if is_whitespace(c) {
        cursor.bump_while(is_whitespace);
        return WHITESPACE;
    }

    if c == '/' {
        if let Some(kind) = scan_comment(cursor) {
            return kind;
        }
    }

    let ident_start = is_ident_start(c);
    if ident_start {
        return scan_identifier_or_keyword(c, cursor);
    }

    if is_dec_digit(c) {
        return scan_number(c, cursor);
    }

    if let Some(kind) = scan_index(c, cursor) {
        return kind;
    }

    if let Some(kind) = SyntaxKind::from_char(c) {
        return kind;
    }

    match c {
        '!' if cursor.matches('=') => {
            cursor.bump();
            return NEQ;
        }
        '"' | '\'' => {
            scan_string(c, cursor);
            return STRING;
        }
        _ => (),
    }
    ERROR
}

fn scan_identifier_or_keyword(c: char, cursor: &mut Cursor) -> SyntaxKind {
    match (c, cursor.current()) {
        ('_', None) => return UNDERSCORE,
        ('_', Some(c)) if !is_ident_continue(c) => return UNDERSCORE,
        _ => (),
    };
    cursor.bump_while(is_ident_continue);
    if let Some(kind) = SyntaxKind::from_keyword(cursor.current_token_text()) {
        return kind;
    }
    IDENT
}

fn scan_index(c: char, cursor: &mut Cursor) -> Option<SyntaxKind> {
    if c == '.' {
        let mut is_first = true;
        while let Some(cc) = cursor.current() {
            match cc {
                '0' => {
                    cursor.bump();
                    if is_first {
                        break;
                    }
                }
                '1'..='9' => {
                    cursor.bump();
                }
                _ => {
                    if is_first {
                        return None;
                    } else {
                        break;
                    }
                }
            }
            is_first = false;
        }
        Some(SyntaxKind::INDEX)
    } else {
        None
    }
}