onescript_preprocessor/
lexer.rs1use std::collections::HashMap;
2use std::iter::Peekable;
3use std::str::Chars;
4use crate::token::{KeywordTable, Token, TokenKind};
5
6pub struct Lexer {
8 current_line: u16,
9}
10
11impl Lexer {
12 pub fn new() -> Self { Lexer { current_line: 1 } }
14
15 pub fn lex(&mut self, source: &str) -> Vec<Token> {
18 let mut chars = source.chars().peekable();
19 let mut tokens: Vec<Token> = Vec::new();
20 let keywords_table = KeywordTable::new();
21
22 while let Some(char) = chars.peek() {
23 match char {
24 '#' => {
25 chars.next();
26
27 if match_char('!', &mut chars) {
28 let token = Token::new(TokenKind::Shebang, "#!".to_string(), self
29 .current_line, self.current_line);
30 tokens.push(token);
31 Lexer::shebang_text(self, &mut tokens, &mut chars);
32 } else {
33 let token = Token::new(TokenKind::Hash, "#".to_string(), self
34 .current_line, self.current_line);
35 tokens.push(token);
36 Lexer::preprocessor_line(self, &mut tokens, &mut chars, &keywords_table);
37 }
38 }
39 _ => {
40 Lexer::text(self, &mut tokens, &mut chars);
41 }
42 }
43 }
44
45 tokens
46 }
47
48 fn shebang_text(&mut self, tokens: &mut Vec<Token>, chars: &mut Peekable<Chars>) {
49 let mut text_chars: Vec<char> = Vec::new();
50 let start_line = self.current_line;
51 let end_line = self.current_line;
52
53 while let Some(char) = chars.peek() {
54 match char {
55 '\n' => {
56 break;
57 }
58 _ => {
59 text_chars.push(*char);
60 chars.next();
61 }
62 }
63 }
64
65 if !text_chars.is_empty() {
66 let token = Token::new(TokenKind::Text, text_chars.iter().collect(), start_line, end_line);
67 tokens.push(token);
68 }
69 }
70
71 fn preprocessor_line(&mut self, tokens: &mut Vec<Token>, chars: &mut Peekable<Chars>, keywords: &KeywordTable) {
72 while let Some(char) = chars.peek() {
73 match char {
74 '\n' => {
75 break;
76 }
77 char if char.is_alphabetic() || *char == '_' => {
78 let token = Lexer::identifier(self, chars, &keywords.table);
79 tokens.push(token);
80 }
81 '"' => {
82 let mut token = Lexer::string(self, chars);
83 token.token_kind = TokenKind::Path;
84 tokens.push(token);
85 }
86 _ => {
87 chars.next();
88 }
89 }
90 }
91 }
92
93 fn identifier(&mut self, chars: &mut Peekable<Chars>, keywords: &HashMap<String, TokenKind>) -> Token {
94 let mut text_chars: Vec<char> = Vec::new();
95
96 while let Some(char) = chars.peek() {
97 if char.is_alphabetic() || *char == '_' {
98 text_chars.push(*char);
99 chars.next();
100 } else {
101 break;
102 }
103 }
104
105 let identifier: String = String::from_iter(text_chars);
106 if let Some(token_kind) = keywords.get(identifier.to_uppercase().as_str()) {
107 return Token::new(*token_kind, identifier, self.current_line, self.current_line);
108 }
109
110 return Token::new(TokenKind::Identifier, identifier, self.current_line, self.current_line);
111 }
112
113 fn text(&mut self, tokens: &mut Vec<Token>, chars: &mut Peekable<Chars>) {
114 let mut text_chars: Vec<char> = Vec::new();
115 let mut string_or_date = false;
116 let start_line = self.current_line;
117 let mut end_line = self.current_line;
118
119 while let Some(char) = chars.peek() {
120 match char {
121 '#' => {
122 if !string_or_date {
123 break;
124 } else {
125 text_chars.push(*char);
126 chars.next();
127 }
128 }
129 '"' | '\'' => {
130 if string_or_date == false {
131 string_or_date = true
132 } else {
133 string_or_date = false
134 }
135 text_chars.push(*char);
136 chars.next();
137 }
138 '\n' => {
139 self.current_line = self.current_line + 1;
140 end_line = end_line + 1;
141 text_chars.push(*char);
142 chars.next();
143 }
144 _ => {
145 text_chars.push(*char);
146 chars.next();
147 }
148 }
149 }
150
151 let token = Token::new(TokenKind::Text, text_chars.into_iter().collect(),
152 start_line, end_line);
153 tokens.push(token);
154 }
155
156 fn string(&mut self, chars: &mut Peekable<Chars>) -> Token {
157 let mut text_chars: Vec<char> = Vec::new();
158 text_chars.push(chars.next().unwrap());
160 let start_line = self.current_line;
161 let mut end_line = self.current_line;
162
163 while let Some(char) = chars.next() {
164 match char {
165 char if char == '"' => {
166 text_chars.push(char);
167 break;
168 }
169 '\n' => {
170 self.current_line = self.current_line + 1;
171 end_line = end_line + 1;
172 text_chars.push(char);
173 }
174 _ => { text_chars.push(char) }
175 }
176 }
177
178 Token::new(TokenKind::Text, text_chars.into_iter().collect(), start_line, end_line)
179 }
180}
181
182fn match_char(expected: char, chars: &mut Peekable<Chars>) -> bool {
183 let next_char = chars.peek();
184
185 if next_char.is_none() {
186 return false;
187 }
188
189 if next_char.is_some() && *next_char.unwrap() != expected {
190 return false;
191 }
192
193 chars.next();
195 true
196}