nbf 1.1.0

Draft implementation of the Nested Blocks Format, which is a human-friendly text format for expressing nested or hierarchical data.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
/*
Copyright (c) 2023 Christoph Freitag

Licensed under the Mozilla Public License, version 2.0.
*/

//! Parsing module which contains the API for mapping a NBF string to structs
//! in rust.
//!
//! Parsing a NBF string or text consists of the following steps:
//!   1. mapping the NBF blocks into nested instances of the [DataTreeNode]
//! struct; and
//!   2. parsing the contents of the resulting tree of [DataTreeNode]s by
//! implementing the trait [FromTreeNode] for the user-defined types to which
//! the NBF text is supposed to be mapped.
//!
//! # Examples
//!
//! Parsing the simple structure from above can be achieved in the following
//! way.
//! (At the moment the API is too complicated to use.
//! It will be simplified and support for
//! [serde](https://crates.io/crates/serde) is planned.)
//! ```
//! use nbf::parser::{
//!     FromTreeNode,
//!     DataTreeNode,
//!     EmptyExternalErrorType,
//!     parse_uniform,
//!     parse_value,
//!     ParsingError,
//!     ParsingErrorType,
//! };
//!
//! struct Person {
//!     name: String,
//!     year_of_birth: u16,
//! }
//!     
//! impl FromTreeNode for Person {
//! 
//!     type ExternalErrorType = EmptyExternalErrorType;
//!         
//!     fn get_keyword() -> &'static str {
//!         "person"
//!     }
//! 
//!     fn from_tree_node_internal(
//!         node: DataTreeNode<'_>
//!     ) -> Result<Self, ParsingError<Self::ExternalErrorType>> {
//! 
//!         let name = node.get_header_value().to_owned();
//!         let mut year_of_birth_opt: Option<u16> = None;
//!           
//!         let node_source_line = node.get_source_line();
//! 
//!         for child_node in node.get_children() {
//!             match child_node.get_keyword() {
//!                 "year of birth" => year_of_birth_opt = Some(
//!                     parse_value(
//!                         child_node.get_header_value(),
//!                         child_node.get_source_line(),
//!                     )?
//!                 ),
//!                 _ => return Err(
//!                     ParsingError::new(
//!                         child_node.get_source_line(),
//!                         ParsingErrorType::UnknownKeyword(
//!                             child_node.get_keyword().to_owned()
//!                         ),
//!                     )
//!                 )
//!             }
//!         }
//! 
//!         let Some( year_of_birth ) = year_of_birth_opt else {
//!             return Err(
//!                 ParsingError::new(
//!                     node_source_line,
//!                     ParsingErrorType::MissingChild( "year of birth".to_owned() )
//!                 )
//!             );
//!         };
//! 
//!         Ok( Person {
//!             name,
//!             year_of_birth,
//!         } )
//!     }
//! }
//! 
//! let source =
//! r#"person: Albert Einstein {
//!     year of birth: 1879
//! }
//! person: Max Planck {
//!     year of birth: 1858
//! }"#;
//! 
//! let persons: Vec<Person> = match parse_uniform( source ) {
//!     Ok( vec ) => vec,
//!     Err( error ) => panic!( "{}", error ),
//! };
//! 
//! assert_eq!( persons.len(), 2 );
//!
//! assert_eq!( persons[0].name, "Albert Einstein" );
//! assert_eq!( persons[0].year_of_birth, 1879 );
//!
//! assert_eq!( persons[1].name, "Max Planck" );
//! assert_eq!( persons[1].year_of_birth, 1858 );
//! ```

pub mod error;

pub use error::{
    ParsingError,
    ParsingErrorType,
};

use std::error::Error;
use std::fmt::Display;
use std::fmt::Formatter;
use std::str::FromStr;

#[ derive( Debug ) ]
pub struct EmptyExternalErrorType;

impl Display for EmptyExternalErrorType {
    fn fmt( &self, f: &mut Formatter<'_> ) -> std::fmt::Result {
        write!( f,
            "empty error"
        )
    }
}

impl Error for EmptyExternalErrorType {
    fn source( &self ) -> Option< &( dyn Error + 'static ) > {
        None
    }
}

/// Function for conveniently parsing a string, given that all first level
/// blocks are of the same type.
pub fn parse_uniform<T,ExternalErrorType>( content: &str ) -> Result<Vec<T>,ParsingError<ExternalErrorType>>
where
    T: FromTreeNode<ExternalErrorType = ExternalErrorType>,
    ExternalErrorType: Display + Error {

    let line_group = LineGroup::new_root( &content );

    let root_nodes = line_group.into_data_tree_nodes()?;

    let mut result = Vec::new();

    // consume
    for root_node in root_nodes {
        let parsed_value = T::from_tree_node( root_node )?;
        result.push( parsed_value );
    }

    Ok( result )
}

/// Wrapper function to safely parse a type implementing [`FromStr`] or produce
/// a [`ParsingError`].
pub fn parse_value<T,ExternalErrorType>( source: &str, line: usize ) -> Result<T,ParsingError<ExternalErrorType>>
where
    T: FromStr,
    ExternalErrorType: Display + Error {

    match source.parse() {
        Ok( number ) => Ok( number ),
        Err( _ ) => Err(
            ParsingError::new(
                line,
                ParsingErrorType::FailedParsingValue,
            )
        ),
    }
}

/// Instances of this struct represent blocks which contain the keyword, header
/// value, first source line and children of the block in NBF.
/// They contain no parsed values, just text mapped into snippets in the
/// [DataTreeNode]s.
#[derive(Debug,PartialEq)]
pub struct DataTreeNode<'a> {
    keyword: &'a str,
    header_value: &'a str,
    children: Vec<DataTreeNode<'a>>,
    source_line: usize,
}

impl<'a> DataTreeNode<'a> {

    /// Return a block's keyword(s), that is the word(s) before the colon as in
    /// `person` from the block `person: Albert Einstein`.
    pub fn get_keyword( &self ) -> &'a str {
        self.keyword
    }

    /// Returns the header value of a block as in `Albert Einstein` from the
    /// block `person: Albert Einstein`.
    pub fn get_header_value( &self ) -> &'a str {
        self.header_value
    }

    /// Returns the child or nested blocks which belong to `self`.
    pub fn get_children( &'a self ) -> &'a Vec<DataTreeNode<'a>> {
        &self.children
    }

    /// Consumes `self` and hands ownership of its children to the caller.
    pub fn into_children( self ) -> Vec<DataTreeNode<'a>> {
        self.children
    }

    /// Returns the first line of the original text where this block's content
    /// started.
    /// This is needed for useful error messages when converting the strings
    /// given in `self` into values of a struct which implements
    /// [`FromTreeNode`].
    pub fn get_source_line( &self ) -> usize {
        self.source_line
    }
    
    /// Change the type of this instance to match the type corresponding to the
    /// given implementation of trait [`FromTreeNode`] trait.
    /// (Internally this changes the tree node's keyword before the colon to the
    /// implementation's keyword.)
    pub fn cast_to_type<T>( mut self ) -> DataTreeNode<'a>
    where
        T: FromTreeNode {

        self.keyword = T::get_keyword();

        self
    }
}

/// Group of lines, aware of their first line's offset in an original text.
/// Line groups depend on the life time of the original text by reason of
/// optimisation.
/// This allows [LineGroup]s to be created as slices of other [LineGroup]s
/// without ever having to copy any [String]s.
#[derive(PartialEq,Debug)]
pub struct LineGroup<'a> {
    /// a slice of text comprising the lines of this group, perhaps being a
    /// slice of a parent group's text
    text: &'a str,
    /// the line offset of the first line of [text](LineGroup::text) in the
    /// original encompassing text
    line_offset: usize,
}

impl<'a> LineGroup<'a> {

    /// Create a [LineGroup] from an original text.
    pub fn new_root( text: &'a str ) -> LineGroup<'a> {
        LineGroup{
            text,
            line_offset: 0,
        }
    }

    /// Create a child of this line group which points to the given slice of
    /// text and tracks its offset line number.
    /// The given indices are counting characters in
    /// [`self.get_text()`](LineGroup::get_text), not lines.
    pub fn new_child(
        &self,
        start_index: usize,
        end_index: usize,
    ) -> LineGroup<'a> {

        let nested_text = &self.text[ start_index .. end_index ];
        let newlines_count_before =
            count_newline_characters( &self.text[ 0 .. start_index ] );

        LineGroup{
            text: nested_text,
            line_offset: self.line_offset + newlines_count_before,
        }
    }

    /// Maps this [LineGroup] into corresponding [DataTreeNode]s.
    /// A vector is returned because multiple NBF blocks can exist on the same
    /// level of nesting, e.g.
    /// ```txt
    /// person: Albert Einstein {
    /// }
    /// person: Max Planck {
    /// }
    /// ```
    pub fn into_data_tree_nodes<ExternalErrorType>( self ) ->
        Result<Vec<DataTreeNode<'a>>,ParsingError<ExternalErrorType>>
    where
        ExternalErrorType: Display + Error {

        let self_line = self.line_offset;

        let outer_line_groups = split_into_outer_blocks( self )?;

        let mut result_nodes: Vec<DataTreeNode<'a>> = Vec::new();

        for outer_line_group in outer_line_groups.into_iter() {

            let Some( colon_position ) = outer_line_group.text.find( ':' )
            else {
                return Err( ParsingError::new(
                    outer_line_group.line_offset,
                    ParsingErrorType::MissingColon
                ) );
            };

            // Find the end of the header value, which is either a closing curly
            // braket, a newline character or the end of the string.
            let header_value_end_position =
                match outer_line_group.text.find( '{' ) {
                    Some( braket_position ) => braket_position,
                    None => match outer_line_group.text.find( '\n' ) {
                        Some( newline_position ) => newline_position,
                        None => outer_line_group.text.len()
                    }
                };

            let mut result = DataTreeNode {
                keyword: outer_line_group.text[ 0 .. colon_position ].trim(),
                header_value: outer_line_group.text[
                    colon_position + 1 ..
                    header_value_end_position
                ].trim(),
                children: Vec::new(),
                source_line: self_line,
            };

            // Do not add any children if there aren't multiple lines.
            if let Some( first_newline_position ) = outer_line_group.text.find( '\n' ) {

                // Unwrap will always pull out of `Some` because we already found a
                // newline character in `outer_line_group.text`.
                let last_newline_position = outer_line_group.text.rfind( '\n' ).unwrap();

                // Only consider inner line groups if there is space for any.
                if first_newline_position < last_newline_position {

                    let inner_line_groups =
                        split_into_outer_blocks(
                            outer_line_group.new_child(
                                first_newline_position + 1,
                                last_newline_position,
                            )
                        )?;

                    for inner_line_group in inner_line_groups.into_iter() {
                        result.children.append( &mut inner_line_group.into_data_tree_nodes()? );
                    }
                }
            }

            result_nodes.push( result );
        }

        Ok( result_nodes )
    }
    
    /// Returns the text (slice) of this line group.
    pub fn get_text( &self ) -> &'a str {
        self.text
    }
    
    /// Returns the offset of the first line of this group of lines.
    /// This offset is always absolute to the line numbering in the original
    /// text.
    pub fn get_line_offset( &self ) -> usize {
        self.line_offset
    }
}

fn count_newline_characters( source: &str ) -> usize {
    source.chars()
        .filter( | c | c.eq( &'\n' ) )
        .count()
}

/// Return the line offsets of blocks relative to the first line of a given
/// [LineGroup].
fn find_character_offsets_of_outer_blocks<ExternalErrorType>( source: &LineGroup )
-> Result<Vec<usize>,ParsingError<ExternalErrorType>>
where
    ExternalErrorType: Display + Error {

    let mut character_offsets: Vec<usize> = Vec::new();

    let mut opened_brakets_count: isize = 0;

    let mut character_offset_of_line: usize = 0;

    // Split at `'\n'` instead of using the `lines` function in order to have
    // a well defined count of missing characters after splitting.
    for ( line_offset, line ) in source.text.split( '\n' ).enumerate() {

        // The absolute line in the original code is offset by the `source`'s
        // offset.
        let current_global_line = line_offset + source.line_offset;

        if opened_brakets_count == 0 {
            character_offsets.push( character_offset_of_line );
        }

        let has_left_braket = line.contains( '{' );
        let has_right_braket = line.contains( '}' );

        if has_left_braket && has_right_braket {
            return Err(
                ParsingError::new(
                    current_global_line,
                    ParsingErrorType::BothBraketsInLine
                )
            );
        }
        else if has_left_braket {
            opened_brakets_count += 1;
        }
        else if has_right_braket {
            opened_brakets_count -= 1;
        }

        if opened_brakets_count < 0 {
            return Err(
                ParsingError::new(
                    current_global_line,
                    ParsingErrorType::IsolatedClosingBraket
                )
            );
        }

        // Advance by length of line plus length of the newline character.
        character_offset_of_line += line.len() + 1;
    }

    Ok( character_offsets )
}

/// Split a given line group into its outer blocks.
fn split_into_outer_blocks<'a, ExternalErrorType>( source: LineGroup<'a> )
-> Result<Vec<LineGroup<'a>>,ParsingError<ExternalErrorType>>
where
    ExternalErrorType: Display + Error {

    let mut untrimmed_result: Vec<LineGroup> = Vec::new();

    let character_offsets_of_outer_blocks =
        find_character_offsets_of_outer_blocks( &source )?;

    let mut character_offset_iter = 
        character_offsets_of_outer_blocks.into_iter();

    let Some( mut previous_character_offset ) =
        character_offset_iter.next() else {
            return Ok( Vec::new() );
        };

    for current_character_offset in character_offset_iter {
        untrimmed_result.push(
            source.new_child(
                previous_character_offset,
                current_character_offset
            )
        );
        previous_character_offset = current_character_offset;
    }

    // last line
    untrimmed_result.push(
        source.new_child(
            previous_character_offset,
            source.text.len()
        )
    );

    let mut result = Vec::new();
    for line_group in untrimmed_result.into_iter() {
        let trimmed_text = line_group.text.trim();
        if trimmed_text.len() > 0 {
            result.push(
                LineGroup {
                    text: trimmed_text,
                    line_offset: line_group.line_offset
                }
            );
        }
    }

    Ok( result )
}

/// Trait which guides the mapping from a [`DataTreeNode`] into the
/// implementing type `Self`.
pub trait FromTreeNode: Sized {
    type ExternalErrorType: Display + Error;

    /// Return the keyword by which this block's type is to be identified (the
    /// word or group of words before the colon, as in `person: Albert
    /// Einstein`).
    fn get_keyword() -> &'static str;
    
    /// Implement the mapping from an instance of [`DataTreeNode`] to an instance
    /// of the `Self` type.
    /// This function should also perform the creation of the children as
    /// provided by [`DataTreeNode::get_children`] or
    /// [`DataTreeNode::into_children`].
    fn from_tree_node_internal( node: DataTreeNode ) -> Result<Self,ParsingError<Self::ExternalErrorType>>;

    /// This function should be called to map into the `Self` type instead of
    /// [`FromTreeNode::from_tree_node_internal`].
    /// It tests whether the given text block is consistent with the `Self`
    /// type by checking its keyword and then calls
    /// [`FromTreeNode::from_tree_node_internal`].
    fn from_tree_node( node: DataTreeNode ) -> Result<Self,ParsingError<Self::ExternalErrorType>> {

        assert_eq!( node.keyword, Self::get_keyword() );

        Self::from_tree_node_internal( node )
    }
}

#[cfg(test)]
mod test {

    use crate::parser::*;

    fn get_source_sample() -> &'static str {
r#"abc
def {
    ghi {
    }
    jkl
}
mno
pqr
stu"#
    }

    #[test]
    fn get_child_of_line_group() {

        let root = LineGroup::new_root( get_source_sample() );
        assert_eq!( root.line_offset, 0 );

        let child = root.new_child( 14, 17 );
        assert_eq!( child.text, "ghi" );
        assert_eq!( child.line_offset, 2 );
    }

    #[test]
    #[should_panic]
    fn find_line_offsets_of_outer_blocks_errs_on_two_brakets() {

        let source = "abc{}def";

        let _groups = match find_character_offsets_of_outer_blocks::<EmptyExternalErrorType>(
            &LineGroup::new_root( source )
        ) {
            Ok( val ) => val,
            Err( error ) => panic!( "{}", error ),
        };
    }

    #[test]
    #[should_panic]
    fn find_line_offsets_of_outer_blocks_errs_on_isolated_closing_braket() {

        let source = "abc\ndef{\nghi\njkl\n}\nmno}";

        let _groups = match find_character_offsets_of_outer_blocks::<EmptyExternalErrorType>(
            &LineGroup::new_root( source )
        ) {
            Ok( val ) => val,
            Err( error ) => panic!( "{}", error ),
        };
    }

    #[test]
    fn find_line_offsets_of_outer_blocks_works() {

        let source = get_source_sample();

        let groups = match find_character_offsets_of_outer_blocks::<EmptyExternalErrorType>(
            &LineGroup::new_root( source )
        ) {
            Ok( val ) => val,
            Err( error ) => panic!( "{}", error ),
        };

        assert_eq!( groups, vec![ 0, 4, 36, 40, 44 ] );
    }

    #[test]
    fn split_into_outer_blocks_works() {

        let source = LineGroup::new_root(
            get_source_sample()
        );

        let line_groups = match split_into_outer_blocks::<EmptyExternalErrorType>( source ) {
            Ok( val ) => val,
            Err( error ) => panic!( "{}", error ),
        };

        assert_eq!(
            line_groups,
            vec![
                LineGroup {
                    text: "abc",
                    line_offset: 0
                },
                LineGroup {
                    text: "def {\n    ghi {\n    }\n    jkl\n}",
                    line_offset: 1
                },
                LineGroup {
                    text: "mno",
                    line_offset: 6
                },
                LineGroup {
                    text: "pqr",
                    line_offset: 7
                },
                LineGroup {
                    text: "stu",
                    line_offset: 8
                }
            ]
        );
    }

    #[test]
    fn into_data_tree_nodes_when_end_on_next_line() {
        
        let source = "abc:def{\n}";

        // Used to panic when the bug of this test was still present.
        let _ = LineGroup::new_root( source ).into_data_tree_nodes::<EmptyExternalErrorType>();
    }

    fn get_data_file_example() -> &'static str {
r#"abc: def {
    ghi: jkl
    mno: pqr {
        stu: vwx
        yza: bcd
    }
    efg: hij
}
"#
    }

    #[test]
    fn create_data_tree_node<'a>() {

        let source = LineGroup::new_root(
            get_data_file_example()
        );

        let data_tree_nodes = match source.into_data_tree_nodes::<EmptyExternalErrorType>() {
            Ok( val ) => val,
            Err( error ) => panic!( "{}", error ),
        };

        assert_eq!( data_tree_nodes.len(), 1 );

        let root_node = &data_tree_nodes[ 0 ];

        assert_eq!(
            root_node,
            &DataTreeNode{
                keyword: "abc",
                header_value: "def",
                children: vec![
                    DataTreeNode{
                        keyword: "ghi",
                        header_value: "jkl",
                        children: vec![],
                        source_line: 1,
                    },
                    DataTreeNode{
                        keyword: "mno",
                        header_value: "pqr",
                        children: vec![
                            DataTreeNode{
                                keyword: "stu",
                                header_value: "vwx",
                                children: vec![],
                                source_line: 3,
                            },
                            DataTreeNode{
                                keyword: "yza",
                                header_value: "bcd",
                                children: vec![],
                                source_line: 4,
                            }
                        ],
                        source_line: 2,
                    },
                    DataTreeNode{
                        keyword: "efg",
                        header_value: "hij",
                        children: vec![],
                        source_line: 6,
                    }
                ],
                source_line: 0,
            }
        );
    }
}