1pub(crate) use crate::lexer_utils::*;
27use crate::parser_utils::XkbFileParseError;
28use logos::Logos;
29use std::iter::Peekable;
30
31pub(crate) struct Lexer<'input> {
33 bracket_depth: usize,
34 closed_last_bracket: bool,
35 finished_block: bool,
36 token_stream: Peekable<logos::SpannedIter<'input, RawToken<'input>>>,
37}
38
39impl<'input> Lexer<'input> {
40 pub(crate) fn new(input: &'input str) -> Result<Self, XkbFileParseError> {
41 let input = check_supported_char_encoding(input)
42 .map_err(|_| XkbFileParseError::WrongInputFormat)?;
43 Ok(Self {
44 bracket_depth: 0,
45 closed_last_bracket: false,
46 finished_block: false,
47 token_stream: RawToken::lexer(input).spanned().peekable(),
48 })
49 }
50 pub(crate) fn is_empty(&mut self) -> bool {
51 self.token_stream.peek().is_none()
52 }
53 pub(crate) fn reset(&mut self) {
54 self.bracket_depth = 0;
55 self.closed_last_bracket = false;
56 self.finished_block = false;
57 }
58}
59
60impl<'input> Iterator for Lexer<'input> {
61 type Item = Token;
62
63 fn next(&mut self) -> Option<Self::Item> {
64 if self.finished_block {
70 return None;
71 }
72 self.token_stream
73 .next()
74 .map(|(raw_token, _span)| match raw_token {
75 Ok(raw_token) => {
76 let token = Token::from(raw_token);
77
78 if token == Token::Obrace {
84 self.bracket_depth += 1;
85 } else if token == Token::Cbrace {
86 if self.bracket_depth > 0 {
87 self.bracket_depth -= 1;
88 }
89 if self.bracket_depth == 0 {
90 self.closed_last_bracket = true;
91 }
92 } else if self.closed_last_bracket && token == Token::Semi {
93 self.finished_block = true;
94 }
95
96 Some(token)
97 }
98 Err(_) => None,
99 })?
100 }
101}
102
103#[allow(dead_code)]
104#[derive(Logos, Debug, PartialEq)]
105enum RawToken<'input> {
106 #[regex("\"[^\"]*\"", priority = 5)]
107 String(&'input str),
108
109 #[regex(r"[[//]#][^\n]*[\n\r]?", |_| logos::Skip, priority=5)]
110 Comment,
111
112 #[regex(r"<[\x21-\x3B\x3D\x3F-\x7E]*>", priority = 4)]
114 Keyname(&'input str),
115
116 #[regex("[ \x00\t\n]+", |_| logos::Skip, priority=3)]
117 Whitespace,
118
119 #[token(";", priority = 3)]
120 Semi,
121
122 #[token(r"{", priority = 3)]
123 Obrace,
124
125 #[token(r"}", priority = 3)]
126 Cbrace,
127
128 #[token("=", priority = 3)]
129 Equals,
130
131 #[token(r"[", priority = 3)]
132 Obracket,
133
134 #[token(r"]", priority = 3)]
135 Cbracket,
136
137 #[token(r"(", priority = 3)]
138 Oparen,
139
140 #[token(r")", priority = 3)]
141 Cparen,
142
143 #[token(r".", priority = 3)]
144 Dot,
145
146 #[token(",", priority = 3)]
147 Comma,
148
149 #[token("+", priority = 3)]
150 Plus,
151
152 #[token(r"-", priority = 3)]
153 Minus,
154
155 #[token(r"*", priority = 3)]
156 Times,
157
158 #[token(r"/", priority = 3)]
159 Divide,
160
161 #[token(r"!", priority = 3)]
162 Exclam,
163
164 #[token(r"~", priority = 3)]
165 Invert,
166
167 #[regex("[A-Za-z_][A-Za-z0-9_]*", priority = 2)]
168 Ident(&'input str),
169 #[regex("0[xX][0-9a-fA-F]+", |lex| hex_convert(lex.slice()), priority=1)]
170 HexNumber(u32),
171
172 #[regex("[0-9]+", |lex| lex.slice().parse().ok(), priority=1)]
173 UInt(u32),
174
175 #[regex(r"[0-9]*\.[0-9]+", |lex| lex.slice().parse().ok(), priority=1)]
176 Float(f64),
177}
178#[derive(Clone, Debug, PartialEq)]
179pub(crate) enum Token {
180 Skip,
181 Keyname(String),
182 String(String),
183 Ident(String),
184 UInt(u32),
185 Float(f64),
186 Semi,
187 Obrace,
188 Cbrace,
189 Equals,
190 Obracket,
191 Cbracket,
192 Oparen,
193 Cparen,
194 Dot,
195 Comma,
196 Plus,
197 Minus,
198 Times,
199 Divide,
200 Exclam,
201 Invert,
202 ActionTok,
203 Alias,
204 AlphanumericKeys,
205 AlternateGroup,
206 Alternate,
207 Augment,
208 Default,
209 FunctionKeys,
210 Group,
211 Hidden,
212 Include,
213 Indicator,
214 Interpret,
215 KeypadKeys,
216 Key,
217 Keys,
218 Logo,
219 ModifierKeys,
220 ModifierMap,
221 Outline,
222 Overlay,
223 Override,
224 Partial,
225 Replace,
226 Row,
227 Section,
228 Shape,
229 Solid,
230 Text,
231 Type,
232 VirtualMods,
233 Virtual,
234 XkbCompatmap,
235 XkbGeometry,
236 XkbKeycodes,
237 XkbKeymap,
238 XkbLayout,
239 XkbSemantics,
240 XkbSymbols,
241 XkbTypes,
242}
243
244impl std::fmt::Display for Token {
245 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
246 write!(f, "{:?}", self)
247 }
248}
249
250impl<'token> From<RawToken<'token>> for Token {
251 fn from(raw_token: RawToken) -> Self {
252 match raw_token {
253 RawToken::Comment => Token::Skip,
254 RawToken::Whitespace => Token::Skip,
255 RawToken::String(s) => Token::String(process_string(s[1..s.len() - 1].as_bytes())),
257 RawToken::Ident(s) => Token::keyword_match(s),
258 RawToken::UInt(s) => Token::UInt(s),
259 RawToken::Float(f) => Token::Float(f),
260 RawToken::Semi => Token::Semi,
261 RawToken::Obrace => Token::Obrace,
262 RawToken::Cbrace => Token::Cbrace,
263 RawToken::Equals => Token::Equals,
264 RawToken::Obracket => Token::Obracket,
265 RawToken::Cbracket => Token::Cbracket,
266 RawToken::Oparen => Token::Oparen,
267 RawToken::Cparen => Token::Cparen,
268 RawToken::Dot => Token::Dot,
269 RawToken::Comma => Token::Comma,
270 RawToken::Plus => Token::Plus,
271 RawToken::Minus => Token::Minus,
272 RawToken::Times => Token::Times,
273 RawToken::Divide => Token::Divide,
274 RawToken::Exclam => Token::Exclam,
275 RawToken::Invert => Token::Invert,
276 RawToken::Keyname(s) => Token::Keyname(s[1..s.len() - 1].into()),
278 RawToken::HexNumber(u) => Token::UInt(u),
279 }
280 }
281}
282
283fn hex_convert(token: &str) -> Option<u32> {
284 u32::from_str_radix(&token[2..], 16).ok()
285}
286
287fn process_string(bytes: &[u8]) -> String {
290 let len = bytes.len();
291 let mut new: Vec<u8> = Vec::with_capacity(len);
292 let mut i = 0;
293
294 while i < len {
295 if let Some(esc) = bytes.get(i..i + 2) {
296 let mut increment = 2;
297
298 let backslash: u8 = '\\'.try_into().unwrap();
299 match esc {
300 s if s.starts_with(&[backslash]) => {
301 match s[1] as char {
302 'n' => new.extend("\n".as_bytes()),
303 't' => new.extend("\t".as_bytes()),
304 'r' => new.extend("\r".as_bytes()),
305 'b' => new.extend("\\".as_bytes()), 'f' => new.extend("\x0c".as_bytes()), 'v' => new.extend("\x0b".as_bytes()),
308 'e' => new.extend("\x1b".as_bytes()), _ => {
310 let octal = bytes
312 .get(i + 1..i + 4)
313 .or_else(|| bytes.get(i + 1..i + 3))
314 .or_else(|| s.get(1..2))
315 .unwrap()
316 .iter()
317 .map(|byte| *byte as char)
318 .take_while(|c| ('0'..='7').contains(c))
319 .collect::<String>();
320
321 if !octal.is_empty() {
322 if let Ok(c) = u8::from_str_radix(&octal, 8) {
323 if c != 0 {
325 new.push(c);
326 }
327 }
328 increment += octal.len() - 1;
329 }
330 }
331 }
332 }
333 s => {
335 new.push(s[0]);
336 increment = 1;
337 }
338 };
339 i += increment;
340 } else {
341 assert_eq!(i + 1, len);
342
343 new.push(bytes[i]);
344 i += 1;
345 }
346 }
347
348 String::from_utf8(new).expect("escaped string is not valid utf8")
349}
350
351impl Token {
352 fn keyword_match(token: &str) -> Self {
353 crate::text::lookup_key(&crate::keywords::KEYWORDS, token)
354 .cloned()
355 .unwrap_or_else(|| Token::Ident(token.into()))
356 }
357}
358
359#[cfg(test)]
360mod test {
361
362 fn test_process_string(s: &str) -> String {
363 process_string(&s[1..s.len() - 1].as_bytes())
364 }
365 use super::*;
366 #[test]
367 fn test_string_process() {
368 assert_eq!(test_process_string(r#""""#), "");
369 assert_eq!(test_process_string(r#""Test\e""#), "Test\x1b");
370 assert_eq!(test_process_string(r#""Test\e1""#), "Test\x1b1");
371 assert_eq!(test_process_string(r#""Test\00f""#), "Testf");
372 assert_eq!(test_process_string(r#""Test\00\00\0f""#), "Testf");
373 assert_eq!(test_process_string(r#""\456Test\00\00\082""#), "Test82");
374 assert_eq!(test_process_string(r#""\456\00\00\081""#), "81");
375 assert_eq!(test_process_string(r#""\000\00\0000\00\""#), r"0\");
376 assert_eq!(test_process_string(r#""\000\00\000\00\""#), r"\");
377 assert_eq!(test_process_string(r#""\000\00\0\00""#), r"");
378 assert_eq!(test_process_string(r#""\456Test\0000""#), "Test0");
379 assert_eq!(test_process_string(r#""Test\9f""#), "Testf");
380 assert_eq!(test_process_string(r#""Test\1f""#), "Test\u{1}f");
381 assert_eq!(test_process_string(r#""Test\1\2""#), "Test\u{1}\u{2}");
382 assert_eq!(test_process_string(r#""Test\401\2""#), "Test\u{2}");
383 }
384}