marked_yaml/
loader.rs

1//! Loading YAML
2//!
3
4use crate::types::*;
5
6use hashlink::linked_hash_map::Entry;
7use yaml_rust::parser::{Event, MarkedEventReceiver, Parser};
8use yaml_rust::scanner::ScanError;
9use yaml_rust::scanner::{Marker as YamlMarker, TScalarStyle};
10
11use std::error::Error;
12use std::fmt::{self, Display};
13
14/// An error indicating that a duplicate key was detected in a mapping
15#[derive(Debug, PartialEq, Eq)]
16
17pub struct DuplicateKeyInner {
18    /// The first key
19    pub prev_key: MarkedScalarNode,
20    /// The second key
21    pub key: MarkedScalarNode,
22}
23
24/// Errors which can occur during loading of YAML
25#[derive(Debug, PartialEq, Eq)]
26pub enum LoadError {
27    /// Something other than a mapping detected at the top level
28    TopLevelMustBeMapping(Marker),
29    /// Something other than a sequence detected at the top level
30    TopLevelMustBeSequence(Marker),
31    /// Unexpected definition of anchor
32    UnexpectedAnchor(Marker),
33    /// Mapping keys must be scalars
34    MappingKeyMustBeScalar(Marker),
35    /// An explicit tag was detected
36    UnexpectedTag(Marker),
37    /// A YAML scanner error occured
38    ScanError(Marker, ScanError),
39    /// A duplicate key was detected in a mapping
40    DuplicateKey(Box<DuplicateKeyInner>),
41}
42
43/// Options for loading YAML
44///
45/// Default options ([`LoaderOptions::default()`]) are:
46///
47/// - Permit duplicate keys
48///
49#[derive(Debug)]
50pub struct LoaderOptions {
51    error_on_duplicate_keys: bool,
52    prevent_coercion: bool,
53    toplevel_is_mapping: bool,
54    lowercase_keys: bool,
55}
56
57impl Default for LoaderOptions {
58    fn default() -> Self {
59        Self {
60            error_on_duplicate_keys: false,
61            prevent_coercion: false,
62            toplevel_is_mapping: true,
63            lowercase_keys: false,
64        }
65    }
66}
67
68impl LoaderOptions {
69    /// Enable errors on duplicate keys
70    ///
71    /// If enabled, duplicate keys in mappings will cause an error.
72    /// If disabled, the last key/value pair will be used.
73    pub fn error_on_duplicate_keys(self, enable: bool) -> Self {
74        Self {
75            error_on_duplicate_keys: enable,
76            ..self
77        }
78    }
79
80    /// Prevent coercion of scalar nodes
81    ///
82    /// If you want to disable things like [`.as_bool()`](crate::types::MarkedScalarNode::as_bool())
83    /// then you can call this and set coercion to be prevented.
84    pub fn prevent_coercion(self, prevent: bool) -> Self {
85        Self {
86            prevent_coercion: prevent,
87            ..self
88        }
89    }
90
91    /// Require that the top level is a mapping node
92    ///
93    /// This is the default, but you can call this to be explicit.
94    pub fn toplevel_mapping(self) -> Self {
95        Self {
96            toplevel_is_mapping: true,
97            ..self
98        }
99    }
100
101    /// Require that the top level is a sequence node
102    ///
103    /// Without calling this, the top level of the YAML is must be a mapping node
104    pub fn toplevel_sequence(self) -> Self {
105        Self {
106            toplevel_is_mapping: false,
107            ..self
108        }
109    }
110
111    /// Whether or not to force-lowercase mapping keys when loading
112    ///
113    /// By default, the loader will leave key names alone, but in some
114    /// cases it can be preferable to normalise them to lowercase
115    pub fn lowercase_keys(self, force_lowercase: bool) -> Self {
116        Self {
117            lowercase_keys: force_lowercase,
118            ..self
119        }
120    }
121}
122
123impl Display for LoadError {
124    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
125        use LoadError::*;
126        #[allow(deprecated)]
127        match self {
128            TopLevelMustBeMapping(m) => write!(f, "{}: Top level must be a mapping", m),
129            TopLevelMustBeSequence(m) => write!(f, "{}: Top level must be a sequence", m),
130            UnexpectedAnchor(m) => write!(f, "{}: Unexpected definition of anchor", m),
131            MappingKeyMustBeScalar(m) => write!(f, "{}: Keys in mappings must be scalar", m),
132            UnexpectedTag(m) => write!(f, "{}: Unexpected use of YAML tag", m),
133            DuplicateKey(inner) => {
134                let DuplicateKeyInner { prev_key, key } = inner.as_ref();
135                write!(
136                    f,
137                    "Duplicate key \"{}\" in mapping at {} and {}",
138                    prev_key.as_str(),
139                    prev_key
140                        .span()
141                        .start()
142                        .map(ToString::to_string)
143                        .unwrap_or_else(|| "?".to_string()),
144                    key.span()
145                        .start()
146                        .map(ToString::to_string)
147                        .unwrap_or_else(|| "?".to_string()),
148                )
149            }
150            ScanError(m, e) => {
151                // e.description() is deprecated but it's the only way to get
152                // the exact info we want out of yaml-rust
153                write!(f, "{}: {}", m, e.description())
154            }
155        }
156    }
157}
158
159impl Error for LoadError {}
160
161#[derive(Debug, PartialEq, Eq)]
162enum LoaderState {
163    Initial,
164    StartStream,
165    StartDocument,
166    MappingWaitingOnKey(Marker, MappingHash),
167    MappingWaitingOnValue(Marker, MappingHash, MarkedScalarNode),
168    SequenceWaitingOnValue(Marker, Vec<Node>),
169    Finished(Node),
170    Error(LoadError),
171}
172use LoaderState::*;
173
174impl LoaderState {
175    fn is_error(&self) -> bool {
176        matches!(self, Error(_))
177    }
178}
179
180struct MarkedLoader {
181    source: usize,
182    state_stack: Vec<LoaderState>,
183    options: LoaderOptions,
184}
185
186impl MarkedEventReceiver for MarkedLoader {
187    fn on_event(&mut self, ev: Event, mark: YamlMarker) {
188        // Short-circuit if the state stack is in error
189        if self.state_stack[self.state_stack.len() - 1].is_error() {
190            return;
191        }
192        let mark = self.marker(mark);
193        let curstate = self
194            .state_stack
195            .pop()
196            .expect("State stack became unbalanced");
197        let newstate = match ev {
198            Event::Alias(_) => unreachable!(),
199            Event::StreamStart => {
200                assert_eq!(curstate, Initial);
201                StartStream
202            }
203            Event::DocumentStart => {
204                assert_eq!(curstate, StartStream);
205                StartDocument
206            }
207            Event::MappingStart(aid, tag) => {
208                if tag.is_some() {
209                    Error(LoadError::UnexpectedTag(mark))
210                } else if aid == 0 {
211                    match curstate {
212                        StartDocument => {
213                            if self.options.toplevel_is_mapping {
214                                MappingWaitingOnKey(mark, MappingHash::new())
215                            } else {
216                                Error(LoadError::TopLevelMustBeSequence(mark))
217                            }
218                        }
219                        MappingWaitingOnKey(_, _) => Error(LoadError::MappingKeyMustBeScalar(mark)),
220                        MappingWaitingOnValue(_, _, _) => {
221                            self.state_stack.push(curstate);
222                            MappingWaitingOnKey(mark, MappingHash::new())
223                        }
224                        SequenceWaitingOnValue(_, _) => {
225                            self.state_stack.push(curstate);
226                            MappingWaitingOnKey(mark, MappingHash::new())
227                        }
228                        _ => unreachable!(),
229                    }
230                } else {
231                    Error(LoadError::UnexpectedAnchor(mark))
232                }
233            }
234            Event::MappingEnd => match curstate {
235                MappingWaitingOnKey(startmark, map) => {
236                    let span = Span::new_with_marks(startmark, mark);
237                    let node = Node::from(MarkedMappingNode::new(span, map));
238                    if let Some(topstate) = self.state_stack.pop() {
239                        match topstate {
240                            MappingWaitingOnValue(mark, mut map, key) => {
241                                match map.entry(key.clone()) {
242                                    Entry::Occupied(entry)
243                                        if self.options.error_on_duplicate_keys =>
244                                    {
245                                        Error(LoadError::DuplicateKey(Box::new(
246                                            DuplicateKeyInner {
247                                                prev_key: entry.key().clone(),
248                                                key,
249                                            },
250                                        )))
251                                    }
252                                    _ => {
253                                        map.insert(key, node);
254                                        MappingWaitingOnKey(mark, map)
255                                    }
256                                }
257                            }
258                            SequenceWaitingOnValue(mark, mut list) => {
259                                list.push(node);
260                                SequenceWaitingOnValue(mark, list)
261                            }
262                            _ => unreachable!(),
263                        }
264                    } else {
265                        Finished(node)
266                    }
267                }
268                _ => unreachable!(),
269            },
270            Event::SequenceStart(aid, tag) => {
271                if tag.is_some() {
272                    Error(LoadError::UnexpectedTag(mark))
273                } else if aid == 0 {
274                    match curstate {
275                        StartDocument => {
276                            if self.options.toplevel_is_mapping {
277                                Error(LoadError::TopLevelMustBeMapping(mark))
278                            } else {
279                                SequenceWaitingOnValue(mark, Vec::new())
280                            }
281                        }
282                        MappingWaitingOnKey(_, _) => Error(LoadError::MappingKeyMustBeScalar(mark)),
283                        mv @ MappingWaitingOnValue(_, _, _) => {
284                            self.state_stack.push(mv);
285                            SequenceWaitingOnValue(mark, Vec::new())
286                        }
287                        sv @ SequenceWaitingOnValue(_, _) => {
288                            self.state_stack.push(sv);
289                            SequenceWaitingOnValue(mark, Vec::new())
290                        }
291                        _ => unreachable!(),
292                    }
293                } else {
294                    Error(LoadError::UnexpectedAnchor(mark))
295                }
296            }
297            Event::SequenceEnd => match curstate {
298                SequenceWaitingOnValue(startmark, list) => {
299                    let span = Span::new_with_marks(startmark, mark);
300                    let node = Node::from(MarkedSequenceNode::new(span, list));
301                    if let Some(topstate) = self.state_stack.pop() {
302                        match topstate {
303                            MappingWaitingOnValue(mark, mut map, key) => {
304                                match map.entry(key.clone()) {
305                                    Entry::Occupied(entry)
306                                        if self.options.error_on_duplicate_keys =>
307                                    {
308                                        Error(LoadError::DuplicateKey(Box::new(
309                                            DuplicateKeyInner {
310                                                prev_key: entry.key().clone(),
311                                                key,
312                                            },
313                                        )))
314                                    }
315                                    _ => {
316                                        map.insert(key, node);
317                                        MappingWaitingOnKey(mark, map)
318                                    }
319                                }
320                            }
321                            SequenceWaitingOnValue(mark, mut list) => {
322                                list.push(node);
323                                SequenceWaitingOnValue(mark, list)
324                            }
325                            _ => unreachable!(),
326                        }
327                    } else {
328                        Finished(node)
329                    }
330                }
331                _ => unreachable!(),
332            },
333            Event::DocumentEnd => match curstate {
334                Finished(_) => curstate,
335                _ => unreachable!(),
336            },
337            Event::StreamEnd => match curstate {
338                StartStream => Finished(Node::from(MarkedMappingNode::new_empty(
339                    Span::new_with_marks(mark, mark),
340                ))),
341                Finished(_) => curstate,
342                _ => unreachable!(),
343            },
344            Event::Scalar(val, kind, aid, tag) => {
345                if aid == 0 {
346                    if tag.is_some() {
347                        Error(LoadError::UnexpectedTag(mark))
348                    } else {
349                        let span = Span::new_start(mark);
350                        let val = if matches!(curstate, MappingWaitingOnKey(_, _))
351                            && self.options.lowercase_keys
352                        {
353                            val.to_lowercase()
354                        } else {
355                            val
356                        };
357                        let mut node = MarkedScalarNode::new(span, val);
358                        if self.options.prevent_coercion {
359                            node.set_coerce(matches!(kind, TScalarStyle::Plain));
360                        }
361                        match curstate {
362                            MappingWaitingOnKey(mark, map) => {
363                                MappingWaitingOnValue(mark, map, node)
364                            }
365                            MappingWaitingOnValue(mark, mut map, key) => {
366                                match map.entry(key.clone()) {
367                                    Entry::Occupied(entry)
368                                        if self.options.error_on_duplicate_keys =>
369                                    {
370                                        Error(LoadError::DuplicateKey(Box::new(
371                                            DuplicateKeyInner {
372                                                prev_key: entry.key().clone(),
373                                                key,
374                                            },
375                                        )))
376                                    }
377                                    _ => {
378                                        map.insert(key, Node::from(node));
379                                        MappingWaitingOnKey(mark, map)
380                                    }
381                                }
382                            }
383                            SequenceWaitingOnValue(mark, mut list) => {
384                                list.push(Node::from(node));
385                                SequenceWaitingOnValue(mark, list)
386                            }
387                            StartDocument => Error(LoadError::TopLevelMustBeMapping(mark)),
388                            _ => unreachable!(),
389                        }
390                    }
391                } else {
392                    Error(LoadError::UnexpectedAnchor(mark))
393                }
394            }
395            Event::Nothing => unreachable!(),
396        };
397        self.state_stack.push(newstate);
398    }
399}
400
401impl MarkedLoader {
402    fn new(source: usize, options: LoaderOptions) -> Self {
403        Self {
404            source,
405            state_stack: vec![Initial],
406            options,
407        }
408    }
409
410    fn marker(&self, mark: YamlMarker) -> Marker {
411        Marker::new(self.source, mark.index(), mark.line(), mark.col() + 1)
412    }
413
414    fn finish(mut self) -> Result<Node, LoadError> {
415        let top = self.state_stack.pop();
416        match top.expect("YAML parser state stack unexpectedly empty") {
417            Finished(n) => Ok(n),
418            Error(e) => Err(e),
419            _ => unreachable!(),
420        }
421    }
422}
423
424/// Parse YAML from a string and return a Node representing
425/// the content.
426///
427/// When parsing YAML, the source is stored into all markers which are
428/// in the node spans.  This means that later if you only have a node,
429/// you can determine which source it came from without needing complex
430/// lifetimes to bind strings or other non-copy data to nodes.
431///
432/// This function requires that the top level be a mapping, but the returned
433/// type here is the generic Node enumeration to make it potentially easier
434/// for callers to use.  Regardless, it's always possible to treat the
435/// returned node as a mapping node without risk of panic.
436///
437/// If you wish to load a sequence instead of a mapping, then you will
438/// need to use [`parse_yaml_with_options`] to request that.
439///
440/// ```
441/// # use marked_yaml::*;
442/// let node = parse_yaml(0, include_str!("../examples/everything.yaml"))
443///     .unwrap()
444///     .as_mapping()
445///     .unwrap();
446/// ```
447pub fn parse_yaml<S>(source: usize, yaml: S) -> Result<Node, LoadError>
448where
449    S: AsRef<str>,
450{
451    let options = LoaderOptions::default();
452
453    parse_yaml_with_options(source, yaml, options)
454}
455
456/// Parse YAML from a string and return a Node representing
457/// the content.
458///
459/// Takes an additional LoaderOptions struct to control the behavior of the loader.
460///
461/// This is the way to parse a file with a top-level sequence instead of a mapping
462/// node.
463///
464/// See `parse_yaml` for more information.
465pub fn parse_yaml_with_options<S>(
466    source: usize,
467    yaml: S,
468    options: LoaderOptions,
469) -> Result<Node, LoadError>
470where
471    S: AsRef<str>,
472{
473    let mut loader = MarkedLoader::new(source, options);
474    let mut parser = Parser::new(yaml.as_ref().chars());
475    parser.load(&mut loader, false).map_err(|se| {
476        let mark = loader.marker(*se.marker());
477        LoadError::ScanError(mark, se)
478    })?;
479    loader.finish()
480}
481
482#[cfg(test)]
483mod test {
484    use super::*;
485
486    #[test]
487    fn smoke_basics() {
488        let node = parse_yaml(0, "{}").unwrap();
489        assert!(node.as_mapping().is_some());
490    }
491
492    #[test]
493    fn load_everything() {
494        let node = parse_yaml(0, include_str!("../examples/everything.yaml")).unwrap();
495        let map = node.as_mapping().unwrap();
496        assert_eq!(map.get_scalar("simple").unwrap().as_str(), "scalar");
497        assert_eq!(map.get_scalar("boolean1").unwrap().as_bool(), Some(true));
498        assert_eq!(map.get_scalar("boolean2").unwrap().as_bool(), Some(false));
499    }
500
501    #[test]
502    fn prevent_coercion() {
503        let node = parse_yaml_with_options(
504            0,
505            include_str!("../examples/everything.yaml"),
506            LoaderOptions::default().prevent_coercion(true),
507        )
508        .unwrap();
509        let map = node.as_mapping().unwrap();
510        assert_eq!(map.get_scalar("simple").unwrap().as_str(), "scalar");
511        assert_eq!(map.get_scalar("boolean1").unwrap().as_str(), "true");
512        assert_eq!(map.get_scalar("boolean1").unwrap().as_bool(), None);
513        assert_eq!(map.get_scalar("boolean2").unwrap().as_str(), "false");
514        assert_eq!(map.get_scalar("boolean2").unwrap().as_bool(), Some(false));
515        assert_eq!(map.get_scalar("integer").unwrap().as_str(), "1234");
516        assert_eq!(map.get_scalar("integer").unwrap().as_i32(), None);
517        assert_eq!(map.get_scalar("float").unwrap().as_str(), "12.34");
518        assert_eq!(map.get_scalar("float").unwrap().as_f32(), Some(12.34));
519    }
520
521    #[test]
522    fn toplevel_is_empty() {
523        let node = parse_yaml(0, "").unwrap();
524        let map = node.as_mapping().unwrap();
525        assert!(map.is_empty());
526    }
527
528    #[test]
529    fn toplevel_is_empty_inline() {
530        let node = parse_yaml(0, "{}").unwrap();
531        let map = node.as_mapping().unwrap();
532        assert!(map.is_empty());
533    }
534
535    #[test]
536    fn toplevel_is_scalar() {
537        let err = parse_yaml(0, "foo");
538        assert_eq!(
539            err,
540            Err(LoadError::TopLevelMustBeMapping(Marker::new(0, 0, 1, 1)))
541        );
542        assert!(format!("{}", err.err().unwrap()).contains("1:1: "));
543    }
544
545    #[test]
546    fn toplevel_is_sequence() {
547        assert_eq!(
548            parse_yaml(0, "[]"),
549            Err(LoadError::TopLevelMustBeMapping(Marker::new(0, 0, 1, 1)))
550        );
551    }
552
553    #[test]
554    fn duplicate_key() {
555        let err = parse_yaml_with_options(
556            0,
557            "{foo: bar, foo: baz}",
558            LoaderOptions::default().error_on_duplicate_keys(true),
559        );
560
561        assert_eq!(
562            err,
563            Err(LoadError::DuplicateKey(Box::new(DuplicateKeyInner {
564                prev_key: MarkedScalarNode::new(Span::new_start(Marker::new(0, 0, 1, 1)), "foo"),
565                key: MarkedScalarNode::new(Span::new_start(Marker::new(0, 10, 1, 11)), "foo")
566            })))
567        );
568
569        assert_eq!(
570            format!("{}", err.err().unwrap()),
571            "Duplicate key \"foo\" in mapping at 1:2 and 1:12"
572        );
573
574        // Without error_on_duplicate_keys, the last key wins
575        let node = parse_yaml(0, "{foo: bar, foo: baz}").unwrap();
576        let map = node.as_mapping().unwrap();
577        assert_eq!(map.get_scalar("foo").unwrap().as_str(), "baz");
578    }
579
580    #[test]
581    fn unexpected_anchor() {
582        let err = parse_yaml(0, "&foo {}");
583        assert_eq!(
584            err,
585            Err(LoadError::UnexpectedAnchor(Marker::new(0, 5, 1, 6)))
586        );
587        assert!(format!("{}", err.err().unwrap()).starts_with("1:6: "));
588    }
589
590    #[test]
591    fn unexpected_anchor2() {
592        assert_eq!(
593            parse_yaml(0, "{bar: &foo []}"),
594            Err(LoadError::UnexpectedAnchor(Marker::new(0, 11, 1, 12)))
595        );
596    }
597
598    #[test]
599    fn unexpected_anchor3() {
600        assert_eq!(
601            parse_yaml(0, "{bar: &foo susan}"),
602            Err(LoadError::UnexpectedAnchor(Marker::new(0, 11, 1, 12)))
603        );
604    }
605
606    #[test]
607    fn mapping_key_mapping() {
608        let err = parse_yaml(0, "{? {} : {}}");
609        assert_eq!(
610            err,
611            Err(LoadError::MappingKeyMustBeScalar(Marker::new(0, 3, 1, 4)))
612        );
613        assert!(format!("{}", err.err().unwrap()).starts_with("1:4: "));
614    }
615
616    #[test]
617    fn mapping_key_sequence() {
618        assert_eq!(
619            parse_yaml(0, "{? [] : {}}"),
620            Err(LoadError::MappingKeyMustBeScalar(Marker::new(0, 3, 1, 4)))
621        );
622    }
623
624    #[test]
625    fn unexpected_tag() {
626        let err = parse_yaml(0, "{foo: !!str bar}");
627        assert_eq!(
628            err,
629            Err(LoadError::UnexpectedTag(Marker::new(0, 12, 1, 13)))
630        );
631        assert!(format!("{}", err.err().unwrap()).starts_with("1:13: "));
632    }
633
634    #[test]
635    fn nested_mapping_key_mapping() {
636        assert_eq!(
637            parse_yaml(0, "{foo: {? [] : {}}}"),
638            Err(LoadError::MappingKeyMustBeScalar(Marker::new(0, 9, 1, 10)))
639        );
640    }
641
642    #[test]
643    fn malformed_yaml_for_scanerror() {
644        let err = parse_yaml(0, "{");
645        assert!(err.is_err());
646        assert!(format!("{}", err.err().unwrap()).starts_with("2:1: "));
647    }
648
649    #[test]
650    fn toplevel_sequence_wanted() {
651        let node =
652            parse_yaml_with_options(0, "[yaml]", LoaderOptions::default().toplevel_sequence())
653                .unwrap();
654        assert!(node.as_sequence().is_some());
655    }
656
657    #[test]
658    fn toplevel_sequence_wanted_got_mapping() {
659        assert_eq!(
660            parse_yaml_with_options(0, "{}", LoaderOptions::default().toplevel_sequence()),
661            Err(LoadError::TopLevelMustBeSequence(Marker::new(0, 0, 1, 1)))
662        );
663    }
664
665    #[test]
666    fn lowercase_keys() {
667        let node = parse_yaml_with_options(
668            0,
669            "KEY: VALUE",
670            LoaderOptions::default().lowercase_keys(false),
671        )
672        .unwrap();
673        assert!(node.as_mapping().unwrap().contains_key("KEY"));
674        assert!(!node.as_mapping().unwrap().contains_key("key"));
675
676        let node = parse_yaml_with_options(
677            0,
678            "KEY: VALUE",
679            LoaderOptions::default().lowercase_keys(true),
680        )
681        .unwrap();
682        assert!(!node.as_mapping().unwrap().contains_key("KEY"));
683        assert!(node.as_mapping().unwrap().contains_key("key"));
684    }
685}