1#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
5#[allow(non_camel_case_types)]
6#[repr(u16)]
7pub enum SyntaxKind {
8 LEFT_BRACKET = 0,
10 RIGHT_BRACKET,
12 EQUALS,
14 KEY,
16 SECTION_NAME,
18 VALUE,
20 COMMENT,
22 NEWLINE,
24 WHITESPACE,
26 LINE_CONTINUATION,
28 ERROR,
30
31 ROOT,
33 SECTION,
35 SECTION_HEADER,
37 ENTRY,
39 BLANK_LINE,
41}
42
43impl From<SyntaxKind> for rowan::SyntaxKind {
45 fn from(kind: SyntaxKind) -> Self {
46 Self(kind as u16)
47 }
48}
49
50#[inline]
52fn is_valid_initial_key_char(c: char) -> bool {
53 c.is_ascii_alphabetic()
55}
56
57#[inline]
59fn is_valid_key_char(c: char) -> bool {
60 c.is_ascii_alphanumeric() || c == '-' || c == '_'
62}
63
64#[inline]
66fn is_newline(c: char) -> bool {
67 c == '\n' || c == '\r'
68}
69
70#[inline]
72fn is_whitespace(c: char) -> bool {
73 c == ' ' || c == '\t'
74}
75
76fn lex_impl(input: &str) -> impl Iterator<Item = (SyntaxKind, &str)> + '_ {
78 let mut remaining = input;
79 let mut at_line_start = true;
80 let mut in_section_header = false;
81
82 std::iter::from_fn(move || {
83 if remaining.is_empty() {
84 return None;
85 }
86
87 let c = remaining.chars().next()?;
88
89 match c {
90 _ if is_newline(c) => {
92 let char_len = c.len_utf8();
93 if c == '\r' && remaining.get(1..2) == Some("\n") {
95 let (token, rest) = remaining.split_at(2);
96 remaining = rest;
97 at_line_start = true;
98 in_section_header = false;
99 Some((SyntaxKind::NEWLINE, token))
100 } else {
101 let (token, rest) = remaining.split_at(char_len);
102 remaining = rest;
103 at_line_start = true;
104 in_section_header = false;
105 Some((SyntaxKind::NEWLINE, token))
106 }
107 }
108
109 '#' | ';' if at_line_start => {
111 let end = remaining.find(is_newline).unwrap_or(remaining.len());
112 let (token, rest) = remaining.split_at(end);
113 remaining = rest;
114 Some((SyntaxKind::COMMENT, token))
115 }
116
117 '\\' if remaining.get(1..2) == Some("\n") || remaining.get(1..3) == Some("\r\n") => {
119 let len = if remaining.get(1..3) == Some("\r\n") {
120 3
121 } else {
122 2
123 };
124 let (token, rest) = remaining.split_at(len);
125 remaining = rest;
126 at_line_start = false; Some((SyntaxKind::LINE_CONTINUATION, token))
128 }
129
130 '[' if at_line_start => {
132 remaining = &remaining[1..]; at_line_start = false;
134 in_section_header = true;
135 Some((SyntaxKind::LEFT_BRACKET, "["))
136 }
137
138 ']' if in_section_header => {
139 remaining = &remaining[1..]; in_section_header = false;
141 Some((SyntaxKind::RIGHT_BRACKET, "]"))
142 }
143
144 _ if is_whitespace(c) && at_line_start => {
146 let end = remaining
147 .find(|c| !is_whitespace(c))
148 .unwrap_or(remaining.len());
149 let (token, rest) = remaining.split_at(end);
150 remaining = rest;
151 Some((SyntaxKind::WHITESPACE, token))
152 }
153
154 _ if is_whitespace(c) => {
156 let end = remaining
157 .find(|c| !is_whitespace(c))
158 .unwrap_or(remaining.len());
159 let (token, rest) = remaining.split_at(end);
160 remaining = rest;
161 Some((SyntaxKind::WHITESPACE, token))
162 }
163
164 '=' => {
166 remaining = &remaining[1..];
167 Some((SyntaxKind::EQUALS, "="))
168 }
169
170 _ if is_valid_initial_key_char(c) && at_line_start => {
172 let end = remaining
173 .find(|c: char| !is_valid_key_char(c))
174 .unwrap_or(remaining.len());
175 let (token, rest) = remaining.split_at(end);
176 remaining = rest;
177 at_line_start = false;
178 Some((SyntaxKind::KEY, token))
179 }
180
181 _ if in_section_header => {
183 let end = remaining.find(']').unwrap_or(remaining.len());
185 let (token, rest) = remaining.split_at(end);
186 remaining = rest;
187 Some((SyntaxKind::SECTION_NAME, token))
188 }
189
190 _ if !at_line_start => {
192 let mut end = 0;
194 for ch in remaining.chars() {
195 if ch == '\\' {
196 let remaining_from_here = &remaining[end..];
198 if remaining_from_here.get(1..2) == Some("\n")
199 || remaining_from_here.get(1..3) == Some("\r\n")
200 {
201 break;
203 }
204 end += ch.len_utf8();
205 } else if is_newline(ch) {
206 break;
208 } else {
209 end += ch.len_utf8();
210 }
211 }
212
213 if end == 0 {
214 None
216 } else {
217 let (token, rest) = remaining.split_at(end);
218 remaining = rest;
219 Some((SyntaxKind::VALUE, token))
220 }
221 }
222
223 _ => {
225 let char_len = c.len_utf8();
226 let (token, rest) = remaining.split_at(char_len);
227 remaining = rest;
228 at_line_start = false;
229 Some((SyntaxKind::ERROR, token))
230 }
231 }
232 })
233}
234
235pub(crate) fn lex(input: &str) -> impl Iterator<Item = (SyntaxKind, &str)> {
237 lex_impl(input)
238}
239
240#[cfg(test)]
241mod tests {
242 use super::SyntaxKind::*;
243 use super::*;
244
245 #[test]
246 fn test_empty() {
247 assert_eq!(lex("").collect::<Vec<_>>(), vec![]);
248 }
249
250 #[test]
251 fn test_simple_section() {
252 let input = "[Unit]\n";
253 assert_eq!(
254 lex(input).collect::<Vec<_>>(),
255 vec![
256 (LEFT_BRACKET, "["),
257 (SECTION_NAME, "Unit"),
258 (RIGHT_BRACKET, "]"),
259 (NEWLINE, "\n"),
260 ]
261 );
262 }
263
264 #[test]
265 fn test_key_value() {
266 let input = "Description=Test Service\n";
267 assert_eq!(
268 lex(input).collect::<Vec<_>>(),
269 vec![
270 (KEY, "Description"),
271 (EQUALS, "="),
272 (VALUE, "Test Service"),
273 (NEWLINE, "\n"),
274 ]
275 );
276 }
277
278 #[test]
279 fn test_key_value_with_spaces() {
280 let input = "Description = Test Service\n";
281 assert_eq!(
282 lex(input).collect::<Vec<_>>(),
283 vec![
284 (KEY, "Description"),
285 (WHITESPACE, " "),
286 (EQUALS, "="),
287 (WHITESPACE, " "),
288 (VALUE, "Test Service"),
289 (NEWLINE, "\n"),
290 ]
291 );
292 }
293
294 #[test]
295 fn test_comment_hash() {
296 let input = "# This is a comment\n";
297 assert_eq!(
298 lex(input).collect::<Vec<_>>(),
299 vec![(COMMENT, "# This is a comment"), (NEWLINE, "\n"),]
300 );
301 }
302
303 #[test]
304 fn test_comment_semicolon() {
305 let input = "; This is a comment\n";
306 assert_eq!(
307 lex(input).collect::<Vec<_>>(),
308 vec![(COMMENT, "; This is a comment"), (NEWLINE, "\n"),]
309 );
310 }
311
312 #[test]
313 fn test_line_continuation() {
314 let input = "ExecStart=/bin/echo \\\n hello\n";
315 let tokens: Vec<_> = lex(input).collect();
316 assert_eq!(tokens[0], (KEY, "ExecStart"));
317 assert_eq!(tokens[1], (EQUALS, "="));
318 assert_eq!(tokens[2], (VALUE, "/bin/echo "));
319 assert_eq!(tokens[3], (LINE_CONTINUATION, "\\\n"));
320 assert_eq!(tokens[4], (WHITESPACE, " "));
321 assert_eq!(tokens[5], (VALUE, "hello"));
322 assert_eq!(tokens[6], (NEWLINE, "\n"));
323 }
324
325 #[test]
326 fn test_full_unit_file() {
327 let input = r#"[Unit]
328Description=Test Service
329After=network.target
330
331[Service]
332Type=simple
333ExecStart=/usr/bin/test
334"#;
335 let tokens: Vec<_> = lex(input).collect();
336
337 assert_eq!(tokens[0].0, LEFT_BRACKET);
339 assert_eq!(tokens[1].0, SECTION_NAME);
340 assert_eq!(tokens[1].1, "Unit");
341 assert_eq!(tokens[2].0, RIGHT_BRACKET);
342 assert_eq!(tokens[3].0, NEWLINE);
343
344 let desc_idx = tokens
346 .iter()
347 .position(|(k, t)| *k == KEY && *t == "Description")
348 .unwrap();
349 assert_eq!(tokens[desc_idx + 1].0, EQUALS);
350 assert_eq!(tokens[desc_idx + 2].0, VALUE);
351 assert_eq!(tokens[desc_idx + 2].1, "Test Service");
352 }
353
354 #[test]
355 fn test_blank_lines() {
356 let input = "Key=Value\n\nKey2=Value2\n";
357 let tokens: Vec<_> = lex(input).collect();
358
359 let first_newline = tokens.iter().position(|(k, _)| *k == NEWLINE).unwrap();
361 assert_eq!(tokens[first_newline + 1].0, NEWLINE);
362 }
363}