onescript_preprocessor/
lexer.rs

1use std::collections::HashMap;
2use std::iter::Peekable;
3use std::str::Chars;
4use crate::token::{KeywordTable, Token, TokenKind};
5
6/// Лексер, разбирающий исходный текст на токены
7pub struct Lexer {
8    current_line: u16,
9}
10
11impl Lexer {
12    /// Создает новый экземпляр лексера. Отсчет номеров строк токенов начинается с 1.
13    pub fn new() -> Self { Lexer { current_line: 1 } }
14
15    /// Осуществляет лексинг переданного исходного текста. Возвращает вектор токенов, включающих в себя
16    /// как простой текст, так и токены инструкций препроцессора
17    pub fn lex(&mut self, source: &str) -> Vec<Token> {
18        let mut chars = source.chars().peekable();
19        let mut tokens: Vec<Token> = Vec::new();
20        let keywords_table = KeywordTable::new();
21
22        while let Some(char) = chars.peek() {
23            match char {
24                '#' => {
25                    chars.next();
26
27                    if match_char('!', &mut chars) {
28                        let token = Token::new(TokenKind::Shebang, "#!".to_string(), self
29                            .current_line, self.current_line);
30                        tokens.push(token);
31                        Lexer::shebang_text(self, &mut tokens, &mut chars);
32                    } else {
33                        let token = Token::new(TokenKind::Hash, "#".to_string(), self
34                            .current_line, self.current_line);
35                        tokens.push(token);
36                        Lexer::preprocessor_line(self, &mut tokens, &mut chars, &keywords_table);
37                    }
38                }
39                _ => {
40                    Lexer::text(self, &mut tokens, &mut chars);
41                }
42            }
43        }
44
45        tokens
46    }
47
48    fn shebang_text(&mut self, tokens: &mut Vec<Token>, chars: &mut Peekable<Chars>) {
49        let mut text_chars: Vec<char> = Vec::new();
50        let start_line = self.current_line;
51        let end_line = self.current_line;
52
53        while let Some(char) = chars.peek() {
54            match char {
55                '\n' => {
56                    break;
57                }
58                _ => {
59                    text_chars.push(*char);
60                    chars.next();
61                }
62            }
63        }
64
65        if !text_chars.is_empty() {
66            let token = Token::new(TokenKind::Text, text_chars.iter().collect(), start_line, end_line);
67            tokens.push(token);
68        }
69    }
70
71    fn preprocessor_line(&mut self, tokens: &mut Vec<Token>, chars: &mut Peekable<Chars>, keywords: &KeywordTable) {
72        while let Some(char) = chars.peek() {
73            match char {
74                '\n' => {
75                    break;
76                }
77                char if char.is_alphabetic() || *char == '_' => {
78                    let token = Lexer::identifier(self, chars, &keywords.table);
79                    tokens.push(token);
80                }
81                '"' => {
82                    let mut token = Lexer::string(self, chars);
83                    token.token_kind = TokenKind::Path;
84                    tokens.push(token);
85                }
86                _ => {
87                    chars.next();
88                }
89            }
90        }
91    }
92
93    fn identifier(&mut self, chars: &mut Peekable<Chars>, keywords: &HashMap<String, TokenKind>) -> Token {
94        let mut text_chars: Vec<char> = Vec::new();
95
96        while let Some(char) = chars.peek() {
97            if char.is_alphabetic() || *char == '_' {
98                text_chars.push(*char);
99                chars.next();
100            } else {
101                break;
102            }
103        }
104
105        let identifier: String = String::from_iter(text_chars);
106        if let Some(token_kind) = keywords.get(identifier.to_uppercase().as_str()) {
107            return Token::new(*token_kind, identifier, self.current_line, self.current_line);
108        }
109
110        return Token::new(TokenKind::Identifier, identifier, self.current_line, self.current_line);
111    }
112
113    fn text(&mut self, tokens: &mut Vec<Token>, chars: &mut Peekable<Chars>) {
114        let mut text_chars: Vec<char> = Vec::new();
115        let mut string_or_date = false;
116        let start_line = self.current_line;
117        let mut end_line = self.current_line;
118
119        while let Some(char) = chars.peek() {
120            match char {
121                '#' => {
122                    if !string_or_date {
123                        break;
124                    } else {
125                        text_chars.push(*char);
126                        chars.next();
127                    }
128                }
129                '"' | '\'' => {
130                    if string_or_date == false {
131                        string_or_date = true
132                    } else {
133                        string_or_date = false
134                    }
135                    text_chars.push(*char);
136                    chars.next();
137                }
138                '\n' => {
139                    self.current_line = self.current_line + 1;
140                    end_line = end_line + 1;
141                    text_chars.push(*char);
142                    chars.next();
143                }
144                _ => {
145                    text_chars.push(*char);
146                    chars.next();
147                }
148            }
149        }
150
151        let token = Token::new(TokenKind::Text, text_chars.into_iter().collect(),
152                               start_line, end_line);
153        tokens.push(token);
154    }
155
156    fn string(&mut self, chars: &mut Peekable<Chars>) -> Token {
157        let mut text_chars: Vec<char> = Vec::new();
158        // add first quote symbol
159        text_chars.push(chars.next().unwrap());
160        let start_line = self.current_line;
161        let mut end_line = self.current_line;
162
163        while let Some(char) = chars.next() {
164            match char {
165                char if char == '"' => {
166                    text_chars.push(char);
167                    break;
168                }
169                '\n' => {
170                    self.current_line = self.current_line + 1;
171                    end_line = end_line + 1;
172                    text_chars.push(char);
173                }
174                _ => { text_chars.push(char) }
175            }
176        }
177
178        Token::new(TokenKind::Text, text_chars.into_iter().collect(), start_line, end_line)
179    }
180}
181
182fn match_char(expected: char, chars: &mut Peekable<Chars>) -> bool {
183    let next_char = chars.peek();
184
185    if next_char.is_none() {
186        return false;
187    }
188
189    if next_char.is_some() && *next_char.unwrap() != expected {
190        return false;
191    }
192
193    // Consume matched character
194    chars.next();
195    true
196}