keytree/lib.rs
1//! # KeyTree
2//!
3//! `KeyTree` is an elegant markup language designed to convert human readable information into Rust
4//! data-structures. It is designed to be fast, to reduce cognitive load and to be easy to
5//! implement for one's own types. It has no dependencies on other crates and so is fast to
6//! compile. The format looks like
7//!
8//! ```text
9//! hobbit:
10//! name: Frodo Baggins
11//! age: 60
12//! friends:
13//! hobbit:
14//! name: Bilbo Baggins
15//! age: 111
16//! hobbit:
17//! name: Samwise Gamgee
18//! age: 38
19//! ```
20//!
21//! so data can be recursive. Also, it is easy to refer to a set of data using a path such as
22//! `hobbit::friends::hobbit` refers to a collection of two hobbits.
23//!
24//! This library does not follow the standard Rust error handling pattern. If there is a parsing
25//! error it will crash or if there is an error in converting a value into a Rust type it will
26//! crash (with a nice error message). If you don't want this to happen, you will need to run this
27//! in its own thread/process.
28//!
29//! ## Data Format Rules
30//!
31//! - Indentation has meaning and is 4 spaces, relative to the top key. Since indenting is
32//! relative to the top key, then you can neatly align strings embedded in code.
33//!
34//! - Each line can be empty, have whitespace only, be a comment, be a key, or be a key/value
35//! pair.
36//!
37//! - There are keys and values. Key/Value pairs look like
38//!
39//! ```text
40//! name: Frodo
41//! ```
42//! are used for `struct` fields and `enum` variants.
43//!
44//! Keys refer to child keys or child key/value pairs indented on lines under it, for example
45//!
46//! ```text
47//! hobbit:
48//! name: Frodo
49//! ```
50//! hobbit refers to the name of the struct or enum. In this way, the data maps simply to Rust
51//! data-structures.
52//!
53//! - If a key has many children with the same key, it forms a collection, for example
54//!
55//! ```test
56//! hobbit:
57//! name: Frodo
58//! name: Bilbo
59//! ```
60//! is a collection of hobbits.
61//!
62//! - Keys must not include but must be followed by a colon `:`.
63//!
64//! - Values are all characters between the combination of ':' and whitespace and the end of the
65//! line. The value is trimmed of whitespace at both ends.
66//!
67//! - Comments require `//` at the start of the line. For example
68//!
69//! ```text
70//! // comment
71//! hobbit:
72//! name: Frodo
73//! ```
74//!
75//! ## Example
76//!
77//! `Into` from `KeyTree` into Rust types is automatically implemented for `Vec<T>`, `Option<T>`
78//! and basic Rust types. `KeyTree` text can be automatically converted to these data types, making
79//! use of type inference. The `at()` function returns an iterator over `KeyTree` types that can be
80//! used to implement `Into` for your own types. The following example should cover 90 percent of
81//! use cases,
82//!
83//! ```rust
84//! use keytree::KeyTree;
85//! use keytree::parser::KeyTreeBuilder;
86//!
87//! #[derive(Debug)]
88//! struct Hobbit {
89//! name: String,
90//! age: u32,
91//! friends: Vec<Hobbit>,
92//! nick: Option<String>,
93//! }
94//!
95//! impl<'a> Into<Hobbit> for KeyTree<'a> {
96//! fn into(self) -> Hobbit {
97//! Hobbit {
98//! name: self.at("hobbit::name"),
99//! age: self.at("hobbit::age"),
100//! friends: self.at("hobbit::friends::hobbit"),
101//! nick: self.op("hobbit::nick"),
102//! }
103//! }
104//! }
105//!
106//! fn main() {
107//! let s = r#"
108//! hobbit:
109//! name: Frodo Baggins
110//! age: 98
111//! friends:
112//! hobbit:
113//! name: Bilbo Baggins
114//! age: 176
115//! hobbit:
116//! name: Samwise Gamgee
117//! age: 66
118//! nick: Sam"#;
119//!
120//!
121//! let core = KeyTreeBuilder::parse(s);
122//! let hobbit: Hobbit = KeyTree::from_core(&core).into();
123//! dbg!(&hobbit);
124//! }
125//! ```
126//!
127//! ## Details
128//!
129//! We'll have a look in detail at what is happening in the example on the line
130//!
131//! ```
132//! friends: self.at("hobbit::friends::hobbit"),
133//! ```
134//!
135//! In the `Into` trait impl, we want Bilbo Baggins and Samwise Gamgee to be Frodo's
136//! friends. We specify their location in the `KeyTree` string using a path
137//! `"hobbit::friends::hobbit"` which refers to two branches in the tree (two
138//! hobbits). The `at()` function, unlike the the `op()` function, requires that the
139//! branches exist. Rust infers that they need to be converted into the type `Vec<Hobbit>` as
140//! specified in the `Hobbit` struct. The `Vec<T>` impl of `Into` is supplied by `KeyTree`. In fact
141//! the `at()` function supplies an iterator over the Hobbits, which the `Vec<T>` impl uses.
142
143pub mod error;
144pub mod into;
145pub mod parser;
146pub mod path;
147
148use error::KeyTreeErr;
149use path::{UniquePath, NonUniquePath};
150use std::collections::{BTreeMap, HashMap};
151use std::collections::btree_map::Range;
152use core::fmt;
153use core::fmt::{Debug, Display};
154use core::iter::Peekable;
155use core::ops::Index;
156use core::ops::Bound::Included;
157
158#[derive(Clone, Copy, Debug)]
159enum Token {
160 Key(Key),
161 Value(Value),
162}
163
164impl Token {
165 // Returns the index of the start character of the key in the data string.
166 fn start_key(&self) -> usize {
167 match self {
168 Token::Key(k) => k.start_key,
169 Token::Value(kv) => kv.start_key,
170 }
171 }
172}
173
174impl Display for Token {
175 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
176 match self {
177 Self::Key(k) => {
178 write!(f, "({}, {})", k.start_key, k.end_key)
179 },
180 Self::Value(v) => {
181 write!(f, "({}, {}):({}, {})", v.start_key, v.end_key, v.start_val, v.end_val)
182 },
183 }
184 }
185}
186
187/// Line 1 is a Key Token. It refers to indented lines under it. Contains pointers into the data
188/// string.
189///
190/// 1. hobbit:
191/// 2. name: Frodo Baggins
192/// 3. age: 98
193///
194#[derive(Clone, Copy, Debug)]
195struct Key {
196 start_key: usize,
197 end_key: usize,
198}
199
200impl Key {
201 fn new(sk: usize, ek: usize) -> Self {
202 Key {
203 start_key: sk,
204 end_key: ek,
205 }
206 }
207}
208
209
210/// Line 2 and 3 are Value tokens. They each have a value. Contains pointers into the data string.
211///
212/// 1. hobbit:
213/// 2. name: Frodo Baggins
214/// 3. age: 98
215///
216#[derive(Clone, Copy, Debug)]
217struct Value {
218 start_key: usize,
219 end_key: usize,
220 start_val: usize,
221 end_val: usize,
222}
223
224impl Value {
225 fn new(sk: usize, ek: usize, sv: usize, ev: usize) -> Self {
226 Value {
227 start_key: sk,
228 end_key: ek,
229 start_val: sv,
230 end_val: ev,
231 }
232 }
233}
234
235struct Tokens(Vec<Token>);
236
237impl Tokens {
238 fn new() -> Self {
239 Tokens(Vec::new())
240 }
241
242 fn push(&mut self, token: Token) {
243 self.0.push(token)
244 }
245
246 fn len(&self) -> usize {
247 self.0.len()
248 }
249}
250
251impl Index<usize> for Tokens {
252 type Output = Token;
253
254 fn index(&self, i: usize) -> &Self::Output {
255 &(self.0)[i]
256 }
257}
258
259impl fmt::Debug for Tokens {
260 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
261 let mut s = String::from("Tokens: ");
262 for tok in &self.0 {
263 s.push_str(&tok.to_string());
264 s.push_str(", ");
265 };
266 s.pop();
267 s.pop();
268 write!(f, "{:?}", s)
269 }
270}
271
272// TokenIndex holds indexes into a start and end position in a list of tokens.
273//
274#[derive(Clone, Copy)]
275struct TokenIndex((usize, Option<usize>));
276
277impl TokenIndex {
278
279 // Create a token builder. The end index will be set later.
280 fn new(start: usize) -> Self {
281 TokenIndex((start, None))
282 }
283
284 fn start(&self) -> usize {
285 (self.0).0
286 }
287
288 // Will fail if Option is None.
289 fn end(&self) -> usize {
290 (self.0).1.unwrap()
291 }
292
293 fn set_end(&mut self, i: usize) {
294 (self.0).1 = Some(i);
295 }
296}
297
298impl fmt::Display for TokenIndex {
299 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300 match (self.0).1 {
301 Some(_) => write!(f, "({}, {})", self.start(), self.end()),
302 None => write!(f, "({}, _)", self.start()),
303 }
304 }
305}
306
307impl fmt::Debug for TokenIndex{
308 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
309 write!(f, "{}", self.to_string())
310 }
311}
312
313// While parsing, tracks the last ocurring path for each indent level. This is used for getting the
314// index of the latest path, and for inserting end indices in TokenIndex when the contents of a
315// key have been fully read.
316//
317#[derive(Debug)]
318struct EachIndent(Vec<UniquePath>);
319
320impl EachIndent {
321
322 fn push(&mut self, path: &UniquePath) {
323 self.0.push(path.clone())
324 }
325
326 fn new() -> Self {
327 EachIndent(Vec::new())
328 }
329
330 // Used by parser to calculate the index of a newly parsed NonUniquePath.
331 //
332 fn new_index(&self, path: &UniquePath, indent: usize) -> usize {
333 let max_indent = self.0.len() - 1;
334 if indent <= max_indent && path.eq_base(&self.0[indent]) {
335 self.0[indent].last_index() + 1
336 } else {
337 0
338 }
339 }
340
341 fn insert(&mut self, path: &UniquePath, indent: usize) {
342 if self.0.is_empty() {
343 self.0.push(path.clone())
344 } else if indent <= self.0.len() - 1 {
345 self.0[indent] = path.clone();
346 } else {
347 self.0.push(path.clone())
348 }
349 }
350
351 fn len(&self) -> usize {
352 self.0.len()
353 }
354}
355
356impl Index<usize> for EachIndent {
357 type Output = UniquePath;
358
359 fn index(&self, index: usize) -> &Self::Output {
360 &self.0[index]
361 }
362}
363
364struct KeyMap(BTreeMap<UniquePath, TokenIndex>);
365
366impl KeyMap {
367 fn new() -> Self {
368 let bt: BTreeMap<UniquePath, TokenIndex> = BTreeMap::new();
369 KeyMap(bt)
370 }
371
372 // That path is a borrow simply reflects stdlib <HashMap>.get().
373 //
374 fn get(&self, path: &UniquePath) -> Option<TokenIndex> {
375 self.0.get(path).map(|tok_ix| *tok_ix)
376 }
377
378 fn set_end(&mut self, path: &UniquePath, end: usize) {
379 let token_index = self.0.get_mut(path).unwrap();
380 token_index.set_end(end);
381 }
382
383 // Check if the path already exists and return an error if it does, otherwise insert the path.
384 //
385 fn insert(&mut self, path: &UniquePath, start: usize) {
386 self.0.insert(path.clone(), TokenIndex::new(start));
387 }
388
389 pub fn len(&self) -> usize {
390 self.0.len()
391 }
392}
393
394impl Debug for KeyMap {
395 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
396 let mut s = String::with_capacity(self.len() * 40);
397 s.push_str("KeyMap:\n");
398 for (key, value) in self.0.iter() {
399 s.push_str(&format!("{:?}:{}\n", key, value));
400 };
401 s.pop();
402 write!(f, "{}", s)
403 }
404}
405
406// This is an iterator over something that can be used to construct KeyTrees.
407//
408#[derive(Clone, Debug)]
409enum Context<'a> {
410 // Unique and Iter are the same internally. Unique marks an iterator with one element. Iter
411 // marks an iterator with zero of more than one elements.
412 Unique(Peekable<Range<'a, UniquePath, TokenIndex>>),
413 Iter(Peekable<Range<'a, UniquePath, TokenIndex>>),
414
415 // We need to be able to handle paths that point to nothing. These will fail if we apply into()
416 // on these unless the target type is Option<T> or if the target type is Vec<T> which creates
417 // an empty Vec. It is not possible to have an empty BTreeMap Range, so we use EmptyIter
418 // instead.
419 EmptyIter(NonUniquePath),
420}
421
422// You can think of converting a KeyTree into a data-type as an algorithm that creates a whole set of
423// KeyTrees (which are small efficient structures all pointing into an immutable KeyTreeCore) at each
424// node in the tree, and then consumes each of these KeyTrees into the target data-type. The at()
425// function creates a new KeyTree which applies a local_path to Self's context to construct a new
426// KeyTree and then convert it using into(). next() also creates a new KeyTree by iterating over Self's
427// context. The iter() function creates a new Context as a component of constructing a KeyTree.
428//
429/// `KeyTree` is a mutable reference into the immutable `KeyTreeCore`. You can move the `KeyTree`
430/// reference around by applying the `at()` function to it.
431///
432#[derive(Clone, Debug)]
433pub struct KeyTree<'a> {
434 keytree_core: &'a KeyTreeCore<'a>,
435 context: Context<'a>,
436}
437
438impl<'a> KeyTree<'a> {
439
440 /// Construct a mutable KeyTree from an immutable KeyTreeCore. See main example at the start of
441 /// the documentation or in README.md
442 ///
443 pub fn from_core(keytree_core: &'a KeyTreeCore) -> Self {
444 KeyTree {
445 keytree_core: keytree_core,
446 context: keytree_core.iter(keytree_core.root.clone().non_unique()),
447 }
448 }
449
450 /// Finds the value in `path` and converts into `T`. Will crash if it cannot be found. For
451 /// context, see main example at the start of the documentation or in README.md
452 ///
453 // Creates a new KeyTree at a new position in the tree. Then calls into() to convert itself into
454 // a type T.
455 //
456 pub fn at<T>(&self, path: &str) -> T
457 where
458 KeyTree<'a>: Into<T>
459 // T has to be something such that <KeyTree>.into() returns a T.
460 {
461 let local_path = match NonUniquePath::from(path) {
462 Ok(path) => path,
463 Err(_) => { KeyTreeErr::parse_keytree(); unreachable!() },
464 };
465
466 // Clone this value and append
467 // local_path to create a new Context. Call iter() to create a context and then use this to
468 // construct KeyTree.
469 //
470 let global_path = match &self.context {
471 Context::Unique(iter) | Context::Iter(iter) => {
472 iter.clone()
473 .peek()
474 .unwrap()
475 .0
476 .clone()
477 .append_non_unique(&local_path.tail())
478 },
479 Context::EmptyIter(_) => {
480 KeyTreeErr::not_unique_but_empty();
481 unreachable!();
482 },
483 };
484 KeyTree {
485 keytree_core: &self.keytree_core,
486 context: self.keytree_core.iter(global_path),
487 }.into()
488 }
489
490 /// Finds the value in `path` and converts into `Some<T>` if it can be found otherwise returns
491 /// `None`. For context, see main example at the start of the documentation or in README.md
492 ///
493 pub fn op<T>(&self, path: &str) -> Option<T>
494 where
495 KeyTree<'a>: Into<T>
496 {
497 let local_path = match NonUniquePath::from(path) {
498 Ok(path) => path,
499 Err(_) => { KeyTreeErr::parse_keytree(); unreachable!() },
500 };
501
502 // Clone this value and append
503 // local_path to create a new Context. Call iter() to create a context and then use this to
504 // construct KeyTree.
505 //
506 let global_path = match &self.context {
507 Context::Unique(iter) | Context::Iter(iter) => {
508 iter.clone()
509 .peek()
510 .unwrap()
511 .0
512 .clone()
513 .append_non_unique(&local_path.tail())
514 },
515 Context::EmptyIter(_) => {
516 KeyTreeErr::not_unique_but_empty();
517 unreachable!();
518 },
519 };
520
521 let context = self.keytree_core.iter(global_path);
522
523 match context {
524 Context::EmptyIter(_) => None,
525 _ => {
526 Some(
527 KeyTree {
528 keytree_core: &self.keytree_core,
529 context: context,
530 }.into()
531 )
532 }
533 }
534 }
535
536 /// Returns the value of at the root of the `KeyTree`. Requires that it is unique, otherwise
537 /// crashes with an error.
538 ///
539 pub fn value(&mut self) -> Option<&str> {
540 match &self.context {
541 Context::Unique(iter) => {
542 let tok_ix = iter.clone()
543 .peek()
544 .unwrap()
545 .1
546 .end();
547
548 let token = self
549 .keytree_core
550 .tokens[tok_ix];
551
552 match token {
553 Token::Value(tok) => Some(&self.keytree_core.s[tok.start_val..=tok.end_val]),
554 Token::Key(_) => { panic!("Should not be reachable.") },
555 }
556 },
557 Context::Iter(_) => { KeyTreeErr::not_unique(); unreachable!() },
558 Context::EmptyIter(_) => None,
559 }
560 }
561}
562
563impl<'a> Iterator for KeyTree<'a> {
564 type Item = KeyTree<'a>;
565
566 // Creates a new KeyTree, that has as its context its present Iterator state.
567 //
568 fn next(&mut self) -> Option<Self::Item> {
569 match self.context {
570 Context::Iter(ref mut context) | Context::Unique(ref mut context) => {
571 match context.next() {
572 Some((path, _)) => {
573 let iter = self
574 .keytree_core.keymap.0
575 .range((Included(path), Included(path)))
576 .peekable();
577 Some(
578 KeyTree {
579 keytree_core: &self.keytree_core,
580 context: Context::Unique(iter),
581 }
582 )
583 },
584 None => None,
585 }
586 },
587 Context::EmptyIter(_) => {
588 // Should not call next() on an KeyTree with an EmptyIter, it should be handled
589 // separately in Into implementation.
590 panic!("Should not call next() on KeyTree with Context::EmptyIter(_)");
591 },
592 }
593 }
594}
595
596// KeyTreeCore is the data-structure generated from parsing an KeyTree string. It can be thought of as
597// a tree, with Paths refering to locations in the tree. It is represented as a HashMap (keylen) of
598// paths[..] => max. Keymap is a BTree which maps path[0]...path[max] => TokenIndex, which has
599// pointers to the start and end of a list of Tokens (tokens). Each token then points into the
600// original &str s.
601//
602// All possible searches are recorded. It is possible to dig into this data structure using
603// path[index], but it is simpler for the user to dig using path[..] which returns an iterator over
604// the multiple paths. This iterator is then converted to a Rust data structure by taking the
605// iterator and apply into() from the Into Trait, which in turn can dig further into its
606// (sub)-KeyTree, recording its position in the tree, and then doing another lookup in the global
607// KeyTreeCore.
608//
609/// `KeyTreeCore` is the immutable result of parsing a string.
610///
611pub struct KeyTreeCore<'a> {
612 s: &'a str,
613 // `s` is a reference to the original str passed into new(),
614
615 keymap: KeyMap,
616 // Maps unique Path to a TokenIndex
617
618 keylen: KeyLen,
619 // Maps non-unique Path to a usize. The usize is the max index of this Path in KeyMap.
620
621 tokens: Tokens,
622 // Lists all the tokens. Tokens point into `s`.
623
624 root: UniquePath,
625 // Sets the immutable root of the parsed string.
626}
627
628impl<'a> KeyTreeCore<'a> {
629 // Constructs a Context given a global_path. This is used in at().
630 fn iter(&self, global_path: NonUniquePath) -> Context {
631 match self.keylen.0.get(&global_path) {
632 Some(max) => {
633 // Set the bounds on the BTree
634 let start_path = global_path.clone().unique(0);
635 let end_path = global_path.clone().unique(*max);
636
637 let iter = self
638 .keymap.0
639 .range((Included(start_path), Included(end_path)))
640 .peekable();
641
642 if *max == 0 {
643 Context::Unique(iter)
644 } else {
645 Context::Iter(iter)
646 }
647 },
648 None => {
649 Context::EmptyIter(global_path)
650 },
651 }
652 }
653}
654
655impl<'a> fmt::Debug for KeyTreeCore<'a> {
656 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
657 write!(f, "{}\n{:?}\n{:?}\n{:?}", self.s, self.tokens, self.keymap, self.keylen)
658 }
659}
660
661// KeyLen is a part of KeyTreeCore (the immutable representation of a KeyTree string). It records the
662// number of elements in a path.
663//
664struct KeyLen(HashMap<NonUniquePath, usize>);
665
666impl KeyLen {
667
668 fn new() -> Self {
669 KeyLen(HashMap::new())
670 }
671
672 fn get(&self, path: &NonUniquePath) -> Option<usize> {
673 match self.0.get(path) {
674 Some(path) => Some(*path),
675 None => None,
676 }
677 }
678
679 // This is used only while parsing.
680 //
681 fn insert(&mut self, path: &UniquePath) {
682 self.0.insert(path.clone().non_unique(), path.last_index());
683 }
684}
685
686impl<'a> fmt::Debug for KeyLen {
687 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
688 let mut s = String::from("Keylen: \n");
689 for (key, value) in self.0.iter() {
690 s.push_str(&format!("{:?}: {:?}\n", key, value));
691 };
692 write!(f, "{}", s)
693 }
694}