keytree/parser.rs
1//! Parses string into `KeyTreeCore` type.
2
3use crate::path::UniquePath;
4use crate::error::KeyTreeErr;
5
6use crate::{
7 EachIndent,
8 Key,
9 KeyLen,
10 Value,
11 Token,
12 Tokens,
13 KeyMap,
14 KeyTreeCore,
15};
16
17const INDENT_STEP: usize = 4;
18
19// The parser has a set of states that change as it reads through the characters. The states are:
20//
21// ```text
22// this_is_a_key: "v\"alue"
23// ^ ^ ^ ^ ^ ^
24// | | | | | |
25// FC BK IK RAK IV AV
26// ```
27//
28// or
29//
30// ```test
31// CM
32// |
33// v
34// // this is a comment
35// ^ ^ ^
36// | | |
37// FC BK COK
38// ```
39//
40// or
41//
42// ```test
43// IK
44// |
45// v
46// /this_is_a_key:
47// ^ ^ ^
48// | | |
49// FC BC COK
50// ```
51
52#[derive(Clone, Debug, PartialEq)]
53enum PS {
54 FC, // First char.
55 BK, // Before key.
56 COK, // Comment or key
57
58 IK, // In key.
59 RAK, // The character right after the key.
60 AK, // After key.
61
62 IV, // In value.
63
64 CM, // In comment
65}
66
67pub struct KeyTreeBuilder;
68
69// Because there are many variables that need to be passed from parse() to new_token(), the
70// BuildVars struct is used to collect them together.
71
72#[derive(Debug)]
73pub struct BuildVars<'a> {
74
75 // Grow while looping
76
77 keymap: KeyMap,
78
79 keylen: KeyLen,
80
81 tokens: Tokens,
82
83 each_indent: EachIndent,
84 // While parsing, this Vec keeps track of the set of all paths which do not
85 // have 'end' set. The first element in the Vec has indent equal to its
86 // index. It also keeps track of indent numbers.
87
88 path: UniquePath,
89 // The last path inserted into KeyMap.
90
91 first_key: bool,
92 // Starts off as true and flips to false after the
93 // first key is read. This is used to check that the first
94 // non-blank, non-comment token is a key only.
95
96 ch_root_indent: Option<usize>,
97 // This is the indentation of the top key. Indentation of
98 // other keys in the data string should be aligned to this
99 // value.
100
101 root_path: Option<UniquePath>,
102 // The root path.
103
104 pos: usize,
105 // Char position. This is required after while loop.
106
107 // Reset with each new line
108
109 ch_indent: Option<usize>,
110
111 start_line: Option<usize>,
112 // The index of the start of a new line.
113
114 start_key: Option<usize>,
115
116 end_key: Option<usize>,
117 // The index of the end of a key.
118
119 start_val: Option<usize>,
120 // The index of the start of a key. It is also set at the
121 // start of a comment.
122
123 end_val: Option<usize>,
124 // The index of the end of a value.
125
126 pub s: &'a str,
127}
128
129impl<'a> BuildVars<'a> {
130
131 fn new(s: &'a str) -> Self {
132 Self {
133 keymap: KeyMap::new(),
134 keylen: KeyLen::new(),
135 tokens: Tokens::new(),
136 each_indent: EachIndent::new(),
137 path: UniquePath::new(),
138 first_key: true,
139 ch_root_indent: None,
140 root_path: None,
141 pos: 0,
142 ch_indent: None,
143 start_line: None,
144 start_key: None,
145 end_key: None,
146 start_val: None,
147 end_val: None,
148 s: s,
149 }
150 }
151
152 // Resets values that are not valid for the next loop.
153 fn new_line(&mut self, pos: usize) {
154 self.start_line = Some(pos);
155 self.start_key = None;
156 self.end_key = None;
157 self.start_val = None;
158 self.end_val = None;
159 }
160
161 // pub fn err_output(&self, pos: usize) {
162 // let line: &str;
163 // let num: usize;
164 // let mut spos: usize = 0;
165 // let mut iter = self.s.lines().enumerate();
166 // while spos < pos {
167 // if let Some((num, line)) = iter.next() {
168 // spos += line.chars().count();
169 // }
170 // };
171 // println!("{:3} {}", num + 1, line)
172 // }
173
174 // pub fn line_of_pos(&self, pos: usize) {
175
176 // println!("{}", pos)
177 // }
178}
179
180impl<'a> KeyTreeBuilder {
181
182 /// Parse a `KeyTree` string into an immutable `KeyTreeCore`. For context, see main example at
183 /// the start of the documentation or in README.md
184 ///
185 pub fn parse(s: &'a str) -> KeyTreeCore<'a> {
186
187 if s == "" { KeyTreeErr::empty_string(); unreachable!() };
188
189 let mut vars = BuildVars::new(s);
190
191 let mut parse_state: PS = PS::FC;
192
193 // Declared here so that it can be used after iterating over chars.
194
195 let mut iter = s.char_indices();
196
197 while let Some((pos, ch)) = iter.next() {
198
199 vars.pos = pos;
200
201 // 'continue's are required at the end of each section because parse_state may have
202 // changed and so the parser may enter into a new section without iterating to the next
203 // character.
204 //
205 // `fn ParseErr::name()` functions are errors that exit and so never return.
206
207 match (&parse_state, ch, ch.is_whitespace()) {
208
209 // If the first char is '\n' then must be blank line.
210 (PS::FC, '\n', true) => {
211 parse_state = PS::FC;
212 },
213
214 // First character in line. Whitespace.
215 (PS::FC, _, true) => {
216 Self::set_start_line(&mut vars, pos);
217 parse_state = PS::BK;
218 },
219
220 // First character in line. Could be either first '/' of comment or first char of
221 // key.
222 (PS::FC, '/', false) => {
223 Self::set_start_line(&mut vars, pos);
224 Self::set_start_key(&mut vars, pos);
225 parse_state = PS::COK;
226 },
227
228 // First character in line. Key cannot start with colon.
229 (PS::FC, ':', false) => {
230 KeyTreeErr::colon_before_key(pos);
231 unreachable!();
232 },
233
234 // At first character and receive a non-whitespace other than '/'. This must be a
235 // key or key_value.
236 (PS::FC, _, false) => {
237 Self::set_start_line(&mut vars, pos);
238 Self::set_start_key(&mut vars, pos);
239 vars.start_key = Some(pos);
240 parse_state = PS::IK;
241 },
242
243 // If we are given a '\n' before a key it must be a blank line.
244 (PS::BK, '\n', true) => {
245 parse_state = PS::FC;
246 },
247
248 // Before key and receive a whitespace. Continue.
249 (PS::BK, _, true) => { },
250
251 // Before key and receive a `/`. This Could be either first '/' of comment or first
252 // char of key.
253 (PS::BK, '/', false) => {
254 Self::set_start_key(&mut vars, pos);
255 parse_state = PS::COK;
256 },
257
258 // Before key and recieve ':'. Key cannot start with colon.
259 (PS::BK, ':', false) => {
260 KeyTreeErr::colon_before_key(pos);
261 unreachable!();
262 },
263
264 // Before key are receive non-whitespace other than ':'. Must be first token in a
265 // key.
266 (PS::BK, _, false) => {
267 Self::set_start_key(&mut vars, pos);
268 parse_state = PS::IK;
269 },
270
271 // Have received one '/' and receive a newline. Line is incomplete.
272 (PS::COK, '\n', true) => {
273 KeyTreeErr::line_incomplete(pos);
274 unreachable!();
275 },
276
277 // Have received one '/' and receive a whitespace. This is an error.
278 (PS::COK, _, true) => {
279 KeyTreeErr::no_colon(pos);
280 unreachable!();
281 },
282
283 // Have received one '/' and receive another '/'. This must be a comment.
284 (PS::COK, '/', false) => {
285 parse_state = PS::CM;
286 },
287
288 // Have received one '/' and get a non-whitespace. This must be a key.
289 (PS::COK, _, false) => {
290 parse_state = PS::IK;
291 },
292
293 // In comment and recieve '\n'. End of line.
294 (PS::CM, '\n', true) => {
295 parse_state = PS::FC;
296 },
297
298 // In comment and receive something other than '\n'. Continue.
299 (PS::CM, _, _) => { },
300
301 // In key and receive a '\n'. The line is incomplete.
302 (PS::IK, '\n', true) => {
303 KeyTreeErr::line_incomplete(pos);
304 unreachable!();
305 },
306
307 // In key and receive a whitespace. The key in incomplete.
308 (PS::IK, _, true) => {
309 KeyTreeErr::no_colon(pos);
310 unreachable!();
311 },
312
313 // In key and receive a ':'. This must be end of key.
314 (PS::IK, ':', false) => {
315 Self::set_end_key(&mut vars, pos - 1);
316 parse_state = PS::RAK;
317 },
318
319 // In key and receive a non-whitespace. Continue.
320 (PS::IK, _, false) => { }
321
322 // Right after key and receive a non-whitespace. This is an error.
323 (PS::RAK, _, false) => {
324 KeyTreeErr::no_space_after_key(pos);
325 unreachable!();
326 },
327
328 // Right after key and receive a '\n\'. This must be a key token.
329 (PS::RAK, '\n', true) => {
330 Self::new_token(Self::key_token(&vars), &mut vars);
331 parse_state = PS::FC;
332 },
333
334 // Right after key and receive a whitespace other than '\n'. Continue.
335 (PS::RAK, _, true) => {
336 parse_state = PS::AK;
337 },
338
339 // After key and receive a non-whitespace which must be the start of value.
340 (PS::AK, _, false) => {
341 // First key must be key only.
342 if vars.first_key {
343 KeyTreeErr::first_token_is_val(vars.start_key.unwrap(), &vars);
344 unreachable!();
345 };
346 Self::set_start_val(&mut vars, pos);
347 parse_state = PS::IV;
348 },
349
350 // After key. No value.
351 (PS::AK, '\n', true) => {
352 Self::new_token(Self::key_token(&vars), &mut vars);
353 parse_state = PS::FC;
354 },
355
356 // After key. Whitespace is a no-op.
357 (PS::AK, _, true) => { },
358
359 // In value and receive a '\n'. This must be a key_value.
360 (PS::IV, '\n', true) => {
361 Self::set_end_val(&mut vars, pos - 1);
362 Self::new_token(Self::value_token(&vars), &mut vars);
363 parse_state = PS::FC;
364 },
365
366 // In value. Whitespace is a no-op.
367 (PS::IV, _, true) => { },
368
369 // In value. Non-whitespace, update end_val.
370 (PS::IV, _, false) => {
371 vars.end_val = Some(pos);
372 },
373 }; // end match
374 };
375
376 // Need to handle end of text with no newline. Expect parse start to be
377 //
378 // FC (first char) do nothing
379 // RAK (right after key) insert new key
380 // AK (after key) insert new key
381 // AV insert new key_value
382 // CM do nothing
383 // _ error: incomplete_parse()
384
385 match parse_state {
386
387 // In comment. Non-whitespace.
388 PS::CM => { },
389
390 // After key. No value.
391 PS::RAK | PS::AK => {
392 vars.end_key = Some(vars.pos);
393 Self::new_token(Self::key_token(&vars), &mut vars);
394 },
395
396 // After value.
397 PS::IV => {
398 // First key must be key only.
399 if vars.first_key {
400 KeyTreeErr::first_token_is_val(vars.start_key.unwrap(), &vars);
401 unreachable!();
402 };
403 vars.end_val = Some(vars.pos);
404 Self::new_token(Self::value_token(&vars), &mut vars);
405 },
406
407 _ => {
408 KeyTreeErr::line_incomplete(s.len() - 1);
409 },
410 };
411
412 // This sets ends in vars.keymap
413 Self::insert_end_indices(&mut vars, 0);
414
415 KeyTreeCore {
416 s: s,
417 keymap: vars.keymap,
418 keylen: vars.keylen,
419 tokens: vars.tokens,
420 root: vars.root_path.unwrap(),
421 }
422 }
423
424 // New token takes a new Token and inserts it into a KeyMap and Tokens list. We are passing a
425 // whole lot of variables that we need to change in the calling function `parse()`.
426 //
427 fn new_token(token: Token, mut vars: &mut BuildVars) {
428
429 // Check that we can use the token to create a path segment.
430
431 let key = &vars.s[vars.start_key.unwrap()..=vars.end_key.unwrap()];
432
433 // let indent = Self::indent(vars);
434
435 if vars.first_key { // Root token
436
437 vars.ch_root_indent = Some(vars.start_key.unwrap());
438 vars.first_key = false;
439
440 vars.path = UniquePath::from(key).unwrap();
441 vars.root_path = Some(vars.path.clone());
442
443 vars.tokens.push(token);
444 vars.keymap.insert(&vars.path, vars.tokens.len() - 1);
445 vars.keylen.insert(&vars.path);
446
447 // Update vars.each_indent
448 vars.each_indent.push(&vars.path);
449
450 } else { // All other tokens
451
452 // Order is important in this section because the dependencies are intricate. First we
453 // set independent variables
454 //
455 // old_indent
456 // new_indent
457 //
458 // To set vars.path, we need to create it from `key` and then set its index by looking
459 // up each_indent. Each_indent is determined by the previous loop, and therefore
460 // should be set at the end of this function.
461 //
462 // vars.path
463 //
464 // Inserting end indices should be done before
465
466 // Set independent variables
467
468 let old_indent = vars.path.len() - 1;
469 let new_indent = Self::indent(&vars); // Indent from new token.
470
471
472 // Set vars.path
473
474 vars.path = vars.path
475 .clone()
476 .truncate(new_indent)
477 .append_unique(
478 &mut UniquePath::from(key).unwrap()
479 ); // Parsing should eliminate
480 // badly formed strings.
481
482 let index = vars.each_indent.new_index(
483 &vars.path,
484 new_indent
485 );
486
487 vars.path.set_last_index(index);
488
489 // Update end indices
490
491 if new_indent <= old_indent {
492 Self::insert_end_indices(&mut vars, new_indent);
493 };
494
495 // Insert the data
496
497 vars.keylen.insert(&vars.path);
498
499 vars.tokens.push(token);
500
501 vars.keymap.insert(&vars.path, vars.tokens.len() - 1);
502
503 // Insert var.each_indent should be at the end of this function as its state should be
504 // set by the previous parser loop.
505
506 vars.each_indent.insert(&vars.path, new_indent);
507
508 };
509 }
510
511 // Return indentation given position from start of line and root_indent (as an integer 0, 1, 2,
512 // ...).
513 //
514 fn indent(vars: &BuildVars) -> usize {
515
516 let ch_indent = (vars.start_key.unwrap() - vars.start_line.unwrap()) - vars.ch_root_indent.unwrap() + 1;
517
518 if ch_indent % INDENT_STEP != 0 {
519 KeyTreeErr::indent(ch_indent, vars);
520 unreachable!();
521 } else {
522 ch_indent / INDENT_STEP
523 }
524 }
525
526 // Construct a Value Token
527
528 fn value_token(vars: &BuildVars) -> Token {
529 Token::Value(
530 Value::new(
531 vars.start_key.unwrap(),
532 vars.end_key.unwrap(),
533 vars.start_val.unwrap(),
534 vars.end_val.unwrap(),
535 )
536 )
537 }
538
539 fn key_token(vars: &BuildVars) -> Token {
540 Token::Key(
541 Key::new(
542 vars.start_key.unwrap(),
543 vars.end_key.unwrap(),
544 )
545 )
546 }
547
548 fn set_start_line(vars: &mut BuildVars, pos: usize) {
549 vars.start_line = Some(pos);
550 }
551
552 fn set_start_key(vars: &mut BuildVars, pos: usize) {
553 vars.start_key = Some(pos);
554 }
555
556 fn set_end_key(vars: &mut BuildVars, pos: usize) {
557 vars.end_key = Some(pos);
558 }
559
560 fn set_start_val(vars: &mut BuildVars, pos: usize) {
561 vars.start_val = Some(pos);
562 }
563
564 fn set_end_val(vars: &mut BuildVars, pos: usize) {
565 vars.end_val = Some(pos);
566 }
567
568 // When new tokens are inserted into KeyMap, the end index is not known. This function inserts
569 // the end index when it is known.
570 //
571 fn insert_end_indices(vars: &mut BuildVars, indent: usize) {
572 for i in indent..vars.each_indent.len() {
573 vars.keymap
574 .set_end(&vars.each_indent[i], vars.tokens.len() - 1);
575 };
576 vars.each_indent.0.truncate(indent);
577 }
578}