keytree/
parser.rs

1//! Parses string into `KeyTreeCore` type.
2
3use crate::path::UniquePath;
4use crate::error::KeyTreeErr;
5
6use crate::{
7    EachIndent,
8    Key,
9    KeyLen,
10    Value,
11    Token,
12    Tokens,
13    KeyMap,
14    KeyTreeCore,
15};
16
17const INDENT_STEP: usize = 4;
18
19// The parser has a set of states that change as it reads through the characters. The states are:
20//
21// ```text
22//         this_is_a_key:      "v\"alue"
23// ^  ^    ^             ^     ^         ^
24// |  |    |             |     |         |
25// FC BK   IK            RAK   IV        AV
26// ```
27//
28// or 
29//
30// ```test
31//           CM
32//           |
33//           v
34//          // this is a comment
35// ^   ^    ^
36// |   |    |
37// FC  BK   COK
38// ```
39//
40// or
41//
42// ```test
43//           IK
44//           |
45//           v
46//          /this_is_a_key:
47// ^   ^    ^
48// |   |    |
49// FC  BC   COK
50// ```
51
52#[derive(Clone, Debug, PartialEq)]
53enum PS {
54    FC,       // First char.
55    BK,       // Before key.
56    COK,      // Comment or key
57
58    IK,       // In key.
59    RAK,      // The character right after the key.
60    AK,       // After key.
61
62    IV,       // In value.
63
64    CM,       // In comment
65}
66 
67pub struct KeyTreeBuilder;
68
69// Because there are many variables that need to be passed from parse() to new_token(), the
70// BuildVars struct is used to collect them together.
71
72#[derive(Debug)]
73pub struct BuildVars<'a> {
74
75    // Grow while looping
76
77    keymap:         KeyMap,
78
79    keylen:         KeyLen,
80
81    tokens:         Tokens,
82
83    each_indent:    EachIndent,
84                        // While parsing, this Vec keeps track of the set of all paths which do not
85                        // have 'end' set. The first element in the Vec has indent equal to its
86                        // index. It also keeps track of indent numbers.
87
88    path:           UniquePath,
89                        // The last path inserted into KeyMap.
90
91    first_key:      bool,
92                        // Starts off as true and flips to false after the         
93                        // first key is read. This is used to check that the first
94                        // non-blank, non-comment token is a key only.
95
96    ch_root_indent: Option<usize>,
97                    // This is the indentation of the top key. Indentation of
98                    // other keys in the data string should be aligned to this
99                    // value.
100    
101    root_path:      Option<UniquePath>,
102                        // The root path.
103
104    pos:            usize,
105                        // Char position. This is required after while loop.
106
107    // Reset with each new line 
108
109    ch_indent:      Option<usize>,
110    
111    start_line:     Option<usize>,
112                        // The index of the start of a new line.
113    
114    start_key:      Option<usize>,
115
116    end_key:        Option<usize>,
117                        // The index of the end of a key.
118
119    start_val:      Option<usize>,
120                        // The index of the start of a key. It is also set at the
121                        // start of a comment.
122    
123    end_val:        Option<usize>,
124                        // The index of the end of a value.
125
126    pub s:              &'a str,
127}
128
129impl<'a> BuildVars<'a> {
130
131    fn new(s: &'a str) -> Self {
132        Self {
133            keymap:             KeyMap::new(),
134            keylen:             KeyLen::new(),
135            tokens:             Tokens::new(),
136            each_indent:        EachIndent::new(),
137            path:               UniquePath::new(),
138            first_key:          true,
139            ch_root_indent:     None,
140            root_path:          None,
141            pos:                0,
142            ch_indent:          None,
143            start_line:         None,
144            start_key:          None,
145            end_key:            None,
146            start_val:          None,
147            end_val:            None,
148            s:                  s,
149        }
150    }
151
152    // Resets values that are not valid for the next loop. 
153    fn new_line(&mut self, pos: usize) {
154        self.start_line = Some(pos);
155        self.start_key  = None;
156        self.end_key    = None;
157        self.start_val  = None;
158        self.end_val    = None;
159    }
160
161    // pub fn err_output(&self, pos: usize) {
162    //     let line: &str;
163    //     let num: usize;
164    //     let mut spos: usize = 0;
165    //     let mut iter = self.s.lines().enumerate();
166    //     while spos < pos {
167    //         if let Some((num, line)) = iter.next() {
168    //             spos += line.chars().count();
169    //         }
170    //     };
171    //     println!("{:3} {}", num + 1, line)
172    // }
173
174    // pub fn line_of_pos(&self, pos: usize) {
175
176    //     println!("{}", pos)
177    // }
178}
179
180impl<'a> KeyTreeBuilder {
181
182    /// Parse a `KeyTree` string into an immutable `KeyTreeCore`. For context, see main example at
183    /// the start of the documentation or in README.md
184    ///
185    pub fn parse(s: &'a str) -> KeyTreeCore<'a> {
186
187        if s == "" { KeyTreeErr::empty_string(); unreachable!() };
188
189        let mut vars = BuildVars::new(s);
190
191        let mut parse_state: PS = PS::FC;
192
193                // Declared here so that it can be used after iterating over chars.
194
195        let mut iter = s.char_indices();
196
197        while let Some((pos, ch)) = iter.next() {
198
199            vars.pos = pos;
200
201            // 'continue's are required at the end of each section because parse_state may have
202            // changed and so the parser may enter into a new section without iterating to the next
203            // character.
204            //
205            // `fn ParseErr::name()` functions are errors that exit and so never return.
206
207            match (&parse_state, ch, ch.is_whitespace()) {
208
209                // If the first char is '\n' then must be blank line.
210                (PS::FC, '\n', true) => {
211                    parse_state = PS::FC;
212                },
213
214                // First character in line. Whitespace.
215                (PS::FC, _, true) => {
216                    Self::set_start_line(&mut vars, pos);
217                    parse_state = PS::BK;
218                },
219
220                // First character in line. Could be either first '/' of comment or first char of
221                // key.
222                (PS::FC, '/', false) => {
223                    Self::set_start_line(&mut vars, pos);
224                    Self::set_start_key(&mut vars, pos);
225                    parse_state = PS::COK;
226                },
227
228                // First character in line. Key cannot start with colon.
229                (PS::FC, ':', false) => {
230                    KeyTreeErr::colon_before_key(pos);
231                    unreachable!();
232                },
233
234                // At first character and receive a non-whitespace other than '/'. This must be a
235                // key or key_value.
236                (PS::FC, _, false) => {
237                    Self::set_start_line(&mut vars, pos);
238                    Self::set_start_key(&mut vars, pos);
239                    vars.start_key = Some(pos);
240                    parse_state = PS::IK;
241                },
242
243                // If we are given a '\n' before a key it must be a blank line.
244                (PS::BK, '\n', true) => {
245                    parse_state = PS::FC;
246                },
247                
248                // Before key and receive a whitespace. Continue.
249                (PS::BK, _, true) => { },
250
251                // Before key and receive a `/`. This Could be either first '/' of comment or first
252                // char of key.
253                (PS::BK, '/', false) => {
254                    Self::set_start_key(&mut vars, pos);
255                    parse_state = PS::COK;
256                },
257
258                // Before key and recieve ':'. Key cannot start with colon.
259                (PS::BK, ':', false) => {
260                    KeyTreeErr::colon_before_key(pos);
261                    unreachable!();
262                },
263
264                // Before key are receive non-whitespace other than ':'. Must be first token in a
265                // key.
266                (PS::BK, _, false) => {
267                    Self::set_start_key(&mut vars, pos);
268                    parse_state = PS::IK;
269                },
270
271                // Have received one '/' and receive a newline. Line is incomplete.
272                (PS::COK, '\n', true) => {
273                    KeyTreeErr::line_incomplete(pos);
274                    unreachable!();
275                },
276                
277                // Have received one '/' and receive a whitespace. This is an error.
278                (PS::COK, _, true) => {
279                    KeyTreeErr::no_colon(pos);
280                    unreachable!();
281                },
282
283                // Have received one '/' and receive another '/'. This must be a comment.
284                (PS::COK, '/', false) => {
285                    parse_state = PS::CM;
286                },
287
288                // Have received one '/' and get a non-whitespace. This must be a key.
289                (PS::COK, _, false) => {
290                    parse_state = PS::IK;
291                },
292
293                // In comment and recieve '\n'. End of line.
294                (PS::CM, '\n', true) => {
295                    parse_state = PS::FC;
296                },
297
298                // In comment and receive something other than '\n'. Continue.
299                (PS::CM, _, _) => { },
300
301                // In key and receive a '\n'. The line is incomplete.
302                (PS::IK, '\n', true) => {
303                    KeyTreeErr::line_incomplete(pos);
304                    unreachable!();
305                },
306
307                // In key and receive a whitespace. The key in incomplete.
308                (PS::IK, _, true) => {
309                    KeyTreeErr::no_colon(pos);
310                    unreachable!();
311                },
312
313                // In key and receive a ':'. This must be end of key.
314                (PS::IK, ':', false) => {
315                    Self::set_end_key(&mut vars, pos - 1);
316                    parse_state = PS::RAK;
317                },
318                
319                // In key and receive a non-whitespace. Continue.
320                (PS::IK, _, false) => { }
321
322                // Right after key and receive a non-whitespace. This is an error.
323                (PS::RAK, _, false) => {
324                    KeyTreeErr::no_space_after_key(pos);
325                    unreachable!();
326                },
327
328                // Right after key and receive a '\n\'. This must be a key token.
329                (PS::RAK, '\n', true) => {
330                    Self::new_token(Self::key_token(&vars), &mut vars);
331                    parse_state = PS::FC;
332                },
333
334                // Right after key and receive a whitespace other than '\n'. Continue.
335                (PS::RAK, _, true) => {
336                    parse_state = PS::AK;
337                },
338
339                // After key and receive a non-whitespace which must be the start of value.
340                (PS::AK, _, false) => {
341                    // First key must be key only.
342                    if vars.first_key {
343                        KeyTreeErr::first_token_is_val(vars.start_key.unwrap(), &vars);
344                        unreachable!();
345                    };
346                    Self::set_start_val(&mut vars, pos);
347                    parse_state = PS::IV;
348                },
349
350                // After key. No value.
351                (PS::AK, '\n', true) => {
352                    Self::new_token(Self::key_token(&vars), &mut vars);
353                    parse_state = PS::FC;
354                },
355
356                // After key. Whitespace is a no-op.
357                (PS::AK, _, true) => { },
358
359                // In value and receive a '\n'. This must be a key_value.
360                (PS::IV, '\n', true) => {
361                    Self::set_end_val(&mut vars, pos - 1);
362                    Self::new_token(Self::value_token(&vars), &mut vars);
363                    parse_state = PS::FC;
364                },
365
366                // In value. Whitespace is a no-op.
367                (PS::IV, _, true) => { },
368
369                // In value. Non-whitespace, update end_val.
370                (PS::IV, _, false) => {
371                    vars.end_val = Some(pos);
372                },
373            };  // end match
374        };
375 
376        // Need to handle end of text with no newline. Expect parse start to be
377        //
378        //  FC (first char)          do nothing  
379        //  RAK (right after key)    insert new key
380        //  AK (after key)           insert new key
381        //  AV                       insert new key_value
382        //  CM                       do nothing
383        //  _                        error: incomplete_parse()
384        
385        match parse_state {
386
387            // In comment. Non-whitespace.
388            PS::CM => { }, 
389
390            // After key. No value.
391            PS::RAK | PS::AK => {
392                vars.end_key = Some(vars.pos);
393                Self::new_token(Self::key_token(&vars), &mut vars);
394            },
395
396            // After value.
397            PS::IV => {
398                // First key must be key only.
399                if vars.first_key {
400                    KeyTreeErr::first_token_is_val(vars.start_key.unwrap(), &vars);
401                    unreachable!();
402                };
403                vars.end_val = Some(vars.pos);
404                Self::new_token(Self::value_token(&vars), &mut vars);
405            },
406
407            _ => {
408                KeyTreeErr::line_incomplete(s.len() - 1);
409            },
410        };
411
412        // This sets ends in vars.keymap
413        Self::insert_end_indices(&mut vars, 0);
414
415        KeyTreeCore {
416            s:      s,
417            keymap: vars.keymap,
418            keylen: vars.keylen,
419            tokens: vars.tokens,
420            root:   vars.root_path.unwrap(),
421        }
422    }
423
424    // New token takes a new Token and inserts it into a KeyMap and Tokens list. We are passing a
425    // whole lot of variables that we need to change in the calling function `parse()`.
426    //
427    fn new_token(token: Token, mut vars: &mut BuildVars) {
428
429        // Check that we can use the token to create a path segment.
430        
431        let key = &vars.s[vars.start_key.unwrap()..=vars.end_key.unwrap()];
432
433        // let indent = Self::indent(vars);
434
435        if vars.first_key {     // Root token
436
437            vars.ch_root_indent = Some(vars.start_key.unwrap());
438            vars.first_key      = false;
439
440            vars.path           = UniquePath::from(key).unwrap();
441            vars.root_path      = Some(vars.path.clone());
442
443            vars.tokens.push(token);
444            vars.keymap.insert(&vars.path, vars.tokens.len() - 1);
445            vars.keylen.insert(&vars.path);
446
447            // Update vars.each_indent
448            vars.each_indent.push(&vars.path);
449
450        } else {                // All other tokens
451
452            // Order is important in this section because the dependencies are intricate. First we
453            // set independent variables
454            //
455            //  old_indent
456            //  new_indent
457            //
458            //  To set vars.path, we need to create it from `key` and then set its index by looking
459            //  up each_indent. Each_indent is determined by the previous loop, and therefore
460            //  should be set at the end of this function.
461            //
462            //  vars.path
463            //
464            //  Inserting end indices should be done before
465
466            // Set independent variables
467            
468            let old_indent = vars.path.len() - 1;
469            let new_indent = Self::indent(&vars); // Indent from new token.
470
471
472            // Set vars.path
473
474            vars.path = vars.path
475                .clone()
476                .truncate(new_indent)
477                .append_unique(
478                    &mut UniquePath::from(key).unwrap()
479                );                            // Parsing should eliminate
480                                              // badly formed strings.
481                                              
482            let index = vars.each_indent.new_index(
483                    &vars.path,
484                    new_indent
485                );
486
487            vars.path.set_last_index(index);
488
489            // Update end indices
490
491            if new_indent <= old_indent {
492                Self::insert_end_indices(&mut vars, new_indent);
493            };
494
495            // Insert the data
496
497            vars.keylen.insert(&vars.path);
498
499            vars.tokens.push(token);
500
501            vars.keymap.insert(&vars.path, vars.tokens.len() - 1);
502
503            // Insert var.each_indent should be at the end of this function as its state should be
504            // set by the previous parser loop.
505            
506            vars.each_indent.insert(&vars.path, new_indent);
507
508        };
509    }
510 
511    // Return indentation given position from start of line and root_indent (as an integer 0, 1, 2,
512    // ...).
513    //
514    fn indent(vars: &BuildVars) -> usize {
515
516        let ch_indent = (vars.start_key.unwrap() - vars.start_line.unwrap()) - vars.ch_root_indent.unwrap() + 1;
517
518        if ch_indent % INDENT_STEP != 0 {
519            KeyTreeErr::indent(ch_indent, vars);
520            unreachable!();
521        } else {
522            ch_indent / INDENT_STEP
523        }
524    }
525
526    // Construct a Value Token
527
528    fn value_token(vars: &BuildVars) -> Token {
529        Token::Value(
530            Value::new(
531                vars.start_key.unwrap(),
532                vars.end_key.unwrap(),
533                vars.start_val.unwrap(),
534                vars.end_val.unwrap(),
535            )
536        )
537    }
538
539    fn key_token(vars: &BuildVars) -> Token {
540        Token::Key(
541            Key::new(
542                vars.start_key.unwrap(),
543                vars.end_key.unwrap(),
544            )
545        )
546    }
547
548    fn set_start_line(vars: &mut BuildVars, pos: usize) {
549        vars.start_line = Some(pos);
550    }
551
552    fn set_start_key(vars: &mut BuildVars, pos: usize) {
553        vars.start_key = Some(pos);
554    }
555
556    fn set_end_key(vars: &mut BuildVars, pos: usize) {
557        vars.end_key = Some(pos);
558    }
559
560    fn set_start_val(vars: &mut BuildVars, pos: usize) {
561        vars.start_val = Some(pos);
562    }
563
564    fn set_end_val(vars: &mut BuildVars, pos: usize) {
565        vars.end_val = Some(pos);
566    }
567
568    // When new tokens are inserted into KeyMap, the end index is not known. This function inserts
569    // the end index when it is known.
570    //
571    fn insert_end_indices(vars: &mut BuildVars, indent: usize) {
572        for i in indent..vars.each_indent.len() {
573            vars.keymap
574                .set_end(&vars.each_indent[i], vars.tokens.len() - 1);
575        };
576        vars.each_indent.0.truncate(indent);
577    }
578}