1#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
5#[allow(non_camel_case_types)]
6#[repr(u16)]
7pub enum SyntaxKind {
8 LEFT_BRACKET = 0,
10 RIGHT_BRACKET,
12 EQUALS,
14 KEY,
16 SECTION_NAME,
18 LOCALE,
20 VALUE,
22 COMMENT,
24 NEWLINE,
26 WHITESPACE,
28 ERROR,
30
31 ROOT,
33 GROUP,
35 GROUP_HEADER,
37 ENTRY,
39 BLANK_LINE,
41}
42
43impl From<SyntaxKind> for rowan::SyntaxKind {
45 fn from(kind: SyntaxKind) -> Self {
46 Self(kind as u16)
47 }
48}
49
50#[inline]
52fn is_valid_initial_key_char(c: char) -> bool {
53 c.is_ascii_alphanumeric()
55}
56
57#[inline]
59fn is_valid_key_char(c: char) -> bool {
60 c.is_ascii_alphanumeric() || c == '-'
62}
63
64#[inline]
66fn is_newline(c: char) -> bool {
67 c == '\n' || c == '\r'
68}
69
70#[inline]
72fn is_whitespace(c: char) -> bool {
73 c == ' ' || c == '\t'
74}
75
76fn lex_impl(input: &str) -> impl Iterator<Item = (SyntaxKind, &str)> + '_ {
78 let mut remaining = input;
79 let mut at_line_start = true;
80 let mut in_section_header = false;
81 let mut in_locale = false;
82
83 std::iter::from_fn(move || {
84 if remaining.is_empty() {
85 return None;
86 }
87
88 let c = remaining.chars().next()?;
89
90 match c {
91 _ if is_newline(c) => {
93 let char_len = c.len_utf8();
94 if c == '\r' && remaining.get(1..2) == Some("\n") {
96 let (token, rest) = remaining.split_at(2);
97 remaining = rest;
98 at_line_start = true;
99 in_section_header = false;
100 in_locale = false;
101 Some((SyntaxKind::NEWLINE, token))
102 } else {
103 let (token, rest) = remaining.split_at(char_len);
104 remaining = rest;
105 at_line_start = true;
106 in_section_header = false;
107 in_locale = false;
108 Some((SyntaxKind::NEWLINE, token))
109 }
110 }
111
112 '#' if at_line_start => {
114 let end = remaining.find(is_newline).unwrap_or(remaining.len());
115 let (token, rest) = remaining.split_at(end);
116 remaining = rest;
117 Some((SyntaxKind::COMMENT, token))
118 }
119
120 '[' if at_line_start => {
122 remaining = &remaining[1..]; at_line_start = false;
124 in_section_header = true;
125 Some((SyntaxKind::LEFT_BRACKET, "["))
126 }
127
128 '[' => {
130 remaining = &remaining[1..]; in_locale = true;
132 Some((SyntaxKind::LEFT_BRACKET, "["))
133 }
134
135 ']' => {
136 remaining = &remaining[1..]; in_section_header = false;
138 in_locale = false;
139 Some((SyntaxKind::RIGHT_BRACKET, "]"))
140 }
141
142 _ if is_whitespace(c) && at_line_start => {
144 let end = remaining
145 .find(|c| !is_whitespace(c))
146 .unwrap_or(remaining.len());
147 let (token, rest) = remaining.split_at(end);
148 remaining = rest;
149 Some((SyntaxKind::WHITESPACE, token))
152 }
153
154 _ if is_whitespace(c) => {
156 let end = remaining
157 .find(|c| !is_whitespace(c))
158 .unwrap_or(remaining.len());
159 let (token, rest) = remaining.split_at(end);
160 remaining = rest;
161 Some((SyntaxKind::WHITESPACE, token))
162 }
163
164 '=' => {
166 remaining = &remaining[1..];
167 Some((SyntaxKind::EQUALS, "="))
168 }
169
170 _ if is_valid_initial_key_char(c) && at_line_start => {
172 let end = remaining
173 .find(|c: char| !is_valid_key_char(c))
174 .unwrap_or(remaining.len());
175 let (token, rest) = remaining.split_at(end);
176 remaining = rest;
177 at_line_start = false;
178 Some((SyntaxKind::KEY, token))
179 }
180
181 _ if in_section_header || in_locale => {
183 let end = remaining.find(']').unwrap_or(remaining.len());
185 let (token, rest) = remaining.split_at(end);
186 remaining = rest;
187 Some((SyntaxKind::VALUE, token))
188 }
189
190 _ if !at_line_start => {
192 let end = remaining.find(is_newline).unwrap_or(remaining.len());
194 let (token, rest) = remaining.split_at(end);
195 remaining = rest;
196 Some((SyntaxKind::VALUE, token))
197 }
198
199 _ => {
201 let char_len = c.len_utf8();
202 let (token, rest) = remaining.split_at(char_len);
203 remaining = rest;
204 at_line_start = false;
205 Some((SyntaxKind::ERROR, token))
206 }
207 }
208 })
209}
210
211pub(crate) fn lex(input: &str) -> impl Iterator<Item = (SyntaxKind, &str)> {
213 lex_impl(input)
214}
215
216#[cfg(test)]
217mod tests {
218 use super::SyntaxKind::*;
219 use super::*;
220
221 #[test]
222 fn test_empty() {
223 assert_eq!(lex("").collect::<Vec<_>>(), vec![]);
224 }
225
226 #[test]
227 fn test_simple_section() {
228 let input = "[Desktop Entry]\n";
229 assert_eq!(
230 lex(input).collect::<Vec<_>>(),
231 vec![
232 (LEFT_BRACKET, "["),
233 (VALUE, "Desktop Entry"),
234 (RIGHT_BRACKET, "]"),
235 (NEWLINE, "\n"),
236 ]
237 );
238 }
239
240 #[test]
241 fn test_key_value() {
242 let input = "Name=Example\n";
243 assert_eq!(
244 lex(input).collect::<Vec<_>>(),
245 vec![
246 (KEY, "Name"),
247 (EQUALS, "="),
248 (VALUE, "Example"),
249 (NEWLINE, "\n"),
250 ]
251 );
252 }
253
254 #[test]
255 fn test_key_value_with_spaces() {
256 let input = "Name = Example Application\n";
257 assert_eq!(
258 lex(input).collect::<Vec<_>>(),
259 vec![
260 (KEY, "Name"),
261 (WHITESPACE, " "),
262 (EQUALS, "="),
263 (WHITESPACE, " "),
264 (VALUE, "Example Application"),
265 (NEWLINE, "\n"),
266 ]
267 );
268 }
269
270 #[test]
271 fn test_comment() {
272 let input = "# This is a comment\n";
273 assert_eq!(
274 lex(input).collect::<Vec<_>>(),
275 vec![(COMMENT, "# This is a comment"), (NEWLINE, "\n"),]
276 );
277 }
278
279 #[test]
280 fn test_full_desktop_file() {
281 let input = r#"[Desktop Entry]
282Name=Example
283Type=Application
284Exec=example
285# Comment
286Icon=example.png
287
288[Desktop Action Play]
289Name=Play
290Exec=example --play
291"#;
292 let tokens: Vec<_> = lex(input).collect();
293
294 assert_eq!(tokens[0].0, LEFT_BRACKET);
296 assert_eq!(tokens[1].0, VALUE); assert_eq!(tokens[2].0, RIGHT_BRACKET);
298 assert_eq!(tokens[3].0, NEWLINE);
299
300 let name_idx = tokens
302 .iter()
303 .position(|(k, t)| *k == KEY && *t == "Name")
304 .unwrap();
305 assert_eq!(tokens[name_idx + 1].0, EQUALS);
306 assert_eq!(tokens[name_idx + 2].0, VALUE);
307 assert_eq!(tokens[name_idx + 2].1, "Example");
308 }
309
310 #[test]
311 fn test_blank_lines() {
312 let input = "Key=Value\n\nKey2=Value2\n";
313 let tokens: Vec<_> = lex(input).collect();
314
315 let first_newline = tokens.iter().position(|(k, _)| *k == NEWLINE).unwrap();
317 assert_eq!(tokens[first_newline + 1].0, NEWLINE);
318 }
319}