1#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
5#[allow(non_camel_case_types)]
6#[repr(u16)]
7pub enum SyntaxKind {
8 LEFT_BRACKET = 0,
10 RIGHT_BRACKET,
12 EQUALS,
14 KEY,
16 SECTION_NAME,
18 VALUE,
20 COMMENT,
22 NEWLINE,
24 WHITESPACE,
26 LINE_CONTINUATION,
28 ERROR,
30
31 ROOT,
33 SECTION,
35 SECTION_HEADER,
37 ENTRY,
39 BLANK_LINE,
41}
42
43impl From<SyntaxKind> for rowan::SyntaxKind {
45 fn from(kind: SyntaxKind) -> Self {
46 Self(kind as u16)
47 }
48}
49
50#[inline]
52fn is_valid_initial_key_char(c: char) -> bool {
53 c.is_ascii_alphabetic()
55}
56
57#[inline]
59fn is_valid_key_char(c: char) -> bool {
60 c.is_ascii_alphanumeric() || c == '-' || c == '_'
62}
63
64#[inline]
66fn is_newline(c: char) -> bool {
67 c == '\n' || c == '\r'
68}
69
70#[inline]
72fn is_whitespace(c: char) -> bool {
73 c == ' ' || c == '\t'
74}
75
76fn lex_impl(input: &str) -> impl Iterator<Item = (SyntaxKind, &str)> + '_ {
78 let mut remaining = input;
79 let mut at_line_start = true;
80 let mut in_section_header = false;
81
82 std::iter::from_fn(move || {
83 if remaining.is_empty() {
84 return None;
85 }
86
87 let c = remaining.chars().next()?;
88
89 match c {
90 _ if is_newline(c) => {
92 let char_len = c.len_utf8();
93 if c == '\r' && remaining.get(1..2) == Some("\n") {
95 let (token, rest) = remaining.split_at(2);
96 remaining = rest;
97 at_line_start = true;
98 in_section_header = false;
99 Some((SyntaxKind::NEWLINE, token))
100 } else {
101 let (token, rest) = remaining.split_at(char_len);
102 remaining = rest;
103 at_line_start = true;
104 in_section_header = false;
105 Some((SyntaxKind::NEWLINE, token))
106 }
107 }
108
109 '#' | ';' if at_line_start => {
111 let end = remaining.find(is_newline).unwrap_or(remaining.len());
112 let (token, rest) = remaining.split_at(end);
113 remaining = rest;
114 Some((SyntaxKind::COMMENT, token))
115 }
116
117 '\\' if remaining.get(1..2) == Some("\n") || remaining.get(1..3) == Some("\r\n") => {
119 let len = if remaining.get(1..3) == Some("\r\n") {
120 3
121 } else {
122 2
123 };
124 let (token, rest) = remaining.split_at(len);
125 remaining = rest;
126 at_line_start = false; Some((SyntaxKind::LINE_CONTINUATION, token))
128 }
129
130 '[' if at_line_start => {
132 remaining = &remaining[1..]; at_line_start = false;
134 in_section_header = true;
135 Some((SyntaxKind::LEFT_BRACKET, "["))
136 }
137
138 ']' if in_section_header => {
139 remaining = &remaining[1..]; in_section_header = false;
141 Some((SyntaxKind::RIGHT_BRACKET, "]"))
142 }
143
144 _ if is_whitespace(c) && at_line_start => {
146 let end = remaining
147 .find(|c| !is_whitespace(c))
148 .unwrap_or(remaining.len());
149 let (token, rest) = remaining.split_at(end);
150 remaining = rest;
151 at_line_start = false; Some((SyntaxKind::WHITESPACE, token))
153 }
154
155 _ if is_whitespace(c) => {
157 let end = remaining
158 .find(|c| !is_whitespace(c))
159 .unwrap_or(remaining.len());
160 let (token, rest) = remaining.split_at(end);
161 remaining = rest;
162 Some((SyntaxKind::WHITESPACE, token))
163 }
164
165 '=' => {
167 remaining = &remaining[1..];
168 Some((SyntaxKind::EQUALS, "="))
169 }
170
171 _ if is_valid_initial_key_char(c) && at_line_start => {
173 let end = remaining
174 .find(|c: char| !is_valid_key_char(c))
175 .unwrap_or(remaining.len());
176 let (token, rest) = remaining.split_at(end);
177 remaining = rest;
178 at_line_start = false;
179 Some((SyntaxKind::KEY, token))
180 }
181
182 _ if in_section_header => {
184 let end = remaining.find(']').unwrap_or(remaining.len());
186 let (token, rest) = remaining.split_at(end);
187 remaining = rest;
188 Some((SyntaxKind::SECTION_NAME, token))
189 }
190
191 _ if !at_line_start => {
193 let mut end = 0;
195 for ch in remaining.chars() {
196 if ch == '\\' {
197 let remaining_from_here = &remaining[end..];
199 if remaining_from_here.get(1..2) == Some("\n")
200 || remaining_from_here.get(1..3) == Some("\r\n")
201 {
202 break;
204 }
205 end += ch.len_utf8();
206 } else if is_newline(ch) {
207 break;
209 } else {
210 end += ch.len_utf8();
211 }
212 }
213
214 if end == 0 {
215 None
217 } else {
218 let (token, rest) = remaining.split_at(end);
219 remaining = rest;
220 Some((SyntaxKind::VALUE, token))
221 }
222 }
223
224 _ => {
226 let char_len = c.len_utf8();
227 let (token, rest) = remaining.split_at(char_len);
228 remaining = rest;
229 at_line_start = false;
230 Some((SyntaxKind::ERROR, token))
231 }
232 }
233 })
234}
235
236pub(crate) fn lex(input: &str) -> impl Iterator<Item = (SyntaxKind, &str)> {
238 lex_impl(input)
239}
240
241#[cfg(test)]
242mod tests {
243 use super::SyntaxKind::*;
244 use super::*;
245
246 #[test]
247 fn test_empty() {
248 assert_eq!(lex("").collect::<Vec<_>>(), vec![]);
249 }
250
251 #[test]
252 fn test_simple_section() {
253 let input = "[Unit]\n";
254 assert_eq!(
255 lex(input).collect::<Vec<_>>(),
256 vec![
257 (LEFT_BRACKET, "["),
258 (SECTION_NAME, "Unit"),
259 (RIGHT_BRACKET, "]"),
260 (NEWLINE, "\n"),
261 ]
262 );
263 }
264
265 #[test]
266 fn test_key_value() {
267 let input = "Description=Test Service\n";
268 assert_eq!(
269 lex(input).collect::<Vec<_>>(),
270 vec![
271 (KEY, "Description"),
272 (EQUALS, "="),
273 (VALUE, "Test Service"),
274 (NEWLINE, "\n"),
275 ]
276 );
277 }
278
279 #[test]
280 fn test_key_value_with_spaces() {
281 let input = "Description = Test Service\n";
282 assert_eq!(
283 lex(input).collect::<Vec<_>>(),
284 vec![
285 (KEY, "Description"),
286 (WHITESPACE, " "),
287 (EQUALS, "="),
288 (WHITESPACE, " "),
289 (VALUE, "Test Service"),
290 (NEWLINE, "\n"),
291 ]
292 );
293 }
294
295 #[test]
296 fn test_comment_hash() {
297 let input = "# This is a comment\n";
298 assert_eq!(
299 lex(input).collect::<Vec<_>>(),
300 vec![(COMMENT, "# This is a comment"), (NEWLINE, "\n"),]
301 );
302 }
303
304 #[test]
305 fn test_comment_semicolon() {
306 let input = "; This is a comment\n";
307 assert_eq!(
308 lex(input).collect::<Vec<_>>(),
309 vec![(COMMENT, "; This is a comment"), (NEWLINE, "\n"),]
310 );
311 }
312
313 #[test]
314 fn test_line_continuation() {
315 let input = "ExecStart=/bin/echo \\\n hello\n";
316 let tokens: Vec<_> = lex(input).collect();
317 assert_eq!(tokens[0], (KEY, "ExecStart"));
318 assert_eq!(tokens[1], (EQUALS, "="));
319 assert_eq!(tokens[2], (VALUE, "/bin/echo "));
320 assert_eq!(tokens[3], (LINE_CONTINUATION, "\\\n"));
321 assert_eq!(tokens[4], (WHITESPACE, " "));
322 assert_eq!(tokens[5], (VALUE, "hello"));
323 assert_eq!(tokens[6], (NEWLINE, "\n"));
324 }
325
326 #[test]
327 fn test_full_unit_file() {
328 let input = r#"[Unit]
329Description=Test Service
330After=network.target
331
332[Service]
333Type=simple
334ExecStart=/usr/bin/test
335"#;
336 let tokens: Vec<_> = lex(input).collect();
337
338 assert_eq!(tokens[0].0, LEFT_BRACKET);
340 assert_eq!(tokens[1].0, SECTION_NAME);
341 assert_eq!(tokens[1].1, "Unit");
342 assert_eq!(tokens[2].0, RIGHT_BRACKET);
343 assert_eq!(tokens[3].0, NEWLINE);
344
345 let desc_idx = tokens
347 .iter()
348 .position(|(k, t)| *k == KEY && *t == "Description")
349 .unwrap();
350 assert_eq!(tokens[desc_idx + 1].0, EQUALS);
351 assert_eq!(tokens[desc_idx + 2].0, VALUE);
352 assert_eq!(tokens[desc_idx + 2].1, "Test Service");
353 }
354
355 #[test]
356 fn test_blank_lines() {
357 let input = "Key=Value\n\nKey2=Value2\n";
358 let tokens: Vec<_> = lex(input).collect();
359
360 let first_newline = tokens.iter().position(|(k, _)| *k == NEWLINE).unwrap();
362 assert_eq!(tokens[first_newline + 1].0, NEWLINE);
363 }
364}