1#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum Token {
23 Term(String),
29
30 Eq,
32
33 Open,
35
36 Close,
38
39 Eof,
41}
42
43#[derive(PartialEq)]
45enum State {
46 Whitespace,
48 Comment,
50 Term,
52 Quoted,
54}
55
56pub fn tokenize(input: &str) -> crate::error::Result<Vec<(Token, u32)>> {
82 let mut tokens = Vec::new();
83 let mut state = State::Whitespace;
84 let mut buf = String::new();
85 let mut line = 1u32;
86 let mut token_line = 1u32;
87
88 for ch in input.chars() {
89 match state {
90 State::Whitespace => match ch {
91 '\n' => line = inc_line(line)?,
92 ' ' | '\t' | '\r' => {}
93 '#' => state = State::Comment,
94 '"' => {
95 token_line = line;
96 state = State::Quoted;
97 }
98 '=' => tokens.push((Token::Eq, line)),
99 '{' => tokens.push((Token::Open, line)),
100 '}' => tokens.push((Token::Close, line)),
101 _ => {
102 token_line = line;
103 buf.push(ch);
104 state = State::Term;
105 }
106 },
107 State::Quoted => {
108 if ch == '"' {
109 tokens.push((Token::Term(buf.clone()), token_line));
110 buf.clear();
111 state = State::Whitespace;
112 } else {
113 if ch == '\n' {
114 line = inc_line(line)?;
115 }
116 buf.push(ch);
117 }
118 }
119 State::Comment => {
120 if ch == '\n' {
121 line = inc_line(line)?;
122 state = State::Whitespace;
123 }
124 }
125 State::Term => {
126 flush_term_char(
127 ch,
128 &mut buf,
129 &mut tokens,
130 &mut line,
131 &mut token_line,
132 &mut state,
133 )?;
134 }
135 }
136 }
137
138 if state == State::Term {
140 let term = buf.trim().to_owned();
141 if !term.is_empty() {
142 tokens.push((Token::Term(term), token_line));
143 }
144 }
145
146 if state == State::Quoted {
148 return Err(crate::error::Error::Parse(format!(
149 "line {token_line}: unterminated quoted string"
150 )));
151 }
152
153 tokens.push((Token::Eof, line));
154 Ok(tokens)
155}
156
157fn inc_line(line: u32) -> crate::error::Result<u32> {
159 line.checked_add(1).ok_or_else(|| {
160 crate::error::Error::Parse("file exceeds maximum line count (u32::MAX)".to_owned())
161 })
162}
163
164fn flush_term_char(
167 ch: char,
168 buf: &mut String,
169 tokens: &mut Vec<(Token, u32)>,
170 line: &mut u32,
171 token_line: &mut u32,
172 state: &mut State,
173) -> crate::error::Result<()> {
174 match ch {
175 '\n' => {
176 flush_buf(buf, tokens, *token_line);
177 *line = inc_line(*line)?;
178 *state = State::Whitespace;
179 }
180 '#' | ' ' | '\t' | '\r' => {
181 flush_buf(buf, tokens, *token_line);
182 *state = if ch == '#' {
183 State::Comment
184 } else {
185 State::Whitespace
186 };
187 }
188 '=' => {
189 flush_buf(buf, tokens, *token_line);
190 tokens.push((Token::Eq, *line));
191 *state = State::Whitespace;
192 }
193 '{' => {
194 flush_buf(buf, tokens, *token_line);
195 tokens.push((Token::Open, *line));
196 *state = State::Whitespace;
197 }
198 '}' => {
199 flush_buf(buf, tokens, *token_line);
200 tokens.push((Token::Close, *line));
201 *state = State::Whitespace;
202 }
203 _ => buf.push(ch),
204 }
205 Ok(())
206}
207
208fn flush_buf(buf: &mut String, tokens: &mut Vec<(Token, u32)>, token_line: u32) {
210 let term = buf.trim().to_owned();
211 if !term.is_empty() {
212 tokens.push((Token::Term(term), token_line));
213 }
214 buf.clear();
215}
216
217#[cfg(test)]
218mod tests {
219 use super::*;
220
221 #[test]
222 fn basic_kv() {
223 let toks = tokenize("key = value").unwrap();
224 assert_eq!(
225 toks,
226 vec![
227 (Token::Term("key".into()), 1),
228 (Token::Eq, 1),
229 (Token::Term("value".into()), 1),
230 (Token::Eof, 1),
231 ]
232 );
233 }
234
235 #[test]
236 fn quoted_value() {
237 let toks = tokenize(r#"key = "hello world""#).unwrap();
238 assert_eq!(
239 toks,
240 vec![
241 (Token::Term("key".into()), 1),
242 (Token::Eq, 1),
243 (Token::Term("hello world".into()), 1),
244 (Token::Eof, 1),
245 ]
246 );
247 }
248
249 #[test]
250 fn comment_stripped() {
251 let toks = tokenize("# comment\nkey = val").unwrap();
252 assert_eq!(
253 toks,
254 vec![
255 (Token::Term("key".into()), 2),
256 (Token::Eq, 2),
257 (Token::Term("val".into()), 2),
258 (Token::Eof, 2),
259 ]
260 );
261 }
262
263 #[test]
264 fn array() {
265 let toks = tokenize("k = { 1 2 3 }").unwrap();
266 assert_eq!(
267 toks,
268 vec![
269 (Token::Term("k".into()), 1),
270 (Token::Eq, 1),
271 (Token::Open, 1),
272 (Token::Term("1".into()), 1),
273 (Token::Term("2".into()), 1),
274 (Token::Term("3".into()), 1),
275 (Token::Close, 1),
276 (Token::Eof, 1),
277 ]
278 );
279 }
280
281 #[test]
282 fn multiline_line_numbers() {
283 let toks = tokenize("a = 1\nb = 2\nc = 3\n").unwrap();
284 assert_eq!(
285 toks,
286 vec![
287 (Token::Term("a".into()), 1),
288 (Token::Eq, 1),
289 (Token::Term("1".into()), 1),
290 (Token::Term("b".into()), 2),
291 (Token::Eq, 2),
292 (Token::Term("2".into()), 2),
293 (Token::Term("c".into()), 3),
294 (Token::Eq, 3),
295 (Token::Term("3".into()), 3),
296 (Token::Eof, 4),
297 ]
298 );
299 }
300
301 #[test]
302 fn line_overflow_returns_error() {
303 assert!(inc_line(u32::MAX).is_err());
307 }
308
309 #[test]
310 fn unterminated_quoted_string_reports_line_number() {
311 let err = tokenize("key = \"unterminated").unwrap_err();
314 let msg = err.to_string();
315 assert!(
316 msg.contains("unterminated quoted string"),
317 "expected 'unterminated quoted string' in error: {msg}"
318 );
319 assert!(
320 msg.contains("line 1"),
321 "expected line number in error: {msg}"
322 );
323 }
324
325 #[test]
326 fn unterminated_quoted_string_multiline_reports_correct_line() {
327 let err = tokenize("a = 1\nb = \"unterminated").unwrap_err();
329 let msg = err.to_string();
330 assert!(msg.contains("line 2"), "expected 'line 2' in error: {msg}");
331 }
332}