marked_yaml/
loader.rs

1//! Loading YAML
2//!
3
4use crate::types::*;
5
6use hashlink::linked_hash_map::Entry;
7use yaml_rust::parser::{Event, MarkedEventReceiver, Parser};
8use yaml_rust::scanner::ScanError;
9use yaml_rust::scanner::{Marker as YamlMarker, TScalarStyle};
10
11use std::error::Error;
12use std::fmt::{self, Display};
13
14/// An error indicating that a duplicate key was detected in a mapping
15#[derive(Debug, PartialEq, Eq)]
16
17pub struct DuplicateKeyInner {
18    /// The first key
19    pub prev_key: MarkedScalarNode,
20    /// The second key
21    pub key: MarkedScalarNode,
22}
23
24/// Errors which can occur during loading of YAML
25#[derive(Debug, PartialEq, Eq)]
26#[non_exhaustive]
27pub enum LoadError {
28    /// Something other than a mapping detected at the top level
29    TopLevelMustBeMapping(Marker),
30    /// Something other than a sequence detected at the top level
31    TopLevelMustBeSequence(Marker),
32    /// Unexpected definition of anchor
33    UnexpectedAnchor(Marker),
34    /// Mapping keys must be scalars
35    MappingKeyMustBeScalar(Marker),
36    /// An explicit tag was detected
37    UnexpectedTag(Marker),
38    /// A YAML scanner error occured
39    ScanError(Marker, ScanError),
40    /// A duplicate key was detected in a mapping
41    DuplicateKey(Box<DuplicateKeyInner>),
42}
43
44/// Options for loading YAML
45///
46/// Default options ([`LoaderOptions::default()`]) are:
47///
48/// - Permit duplicate keys
49///
50#[derive(Debug)]
51pub struct LoaderOptions {
52    error_on_duplicate_keys: bool,
53    prevent_coercion: bool,
54    toplevel_is_mapping: bool,
55    lowercase_keys: bool,
56}
57
58impl Default for LoaderOptions {
59    fn default() -> Self {
60        Self {
61            error_on_duplicate_keys: false,
62            prevent_coercion: false,
63            toplevel_is_mapping: true,
64            lowercase_keys: false,
65        }
66    }
67}
68
69impl LoaderOptions {
70    /// Enable errors on duplicate keys
71    ///
72    /// If enabled, duplicate keys in mappings will cause an error.
73    /// If disabled, the last key/value pair will be used.
74    pub fn error_on_duplicate_keys(self, enable: bool) -> Self {
75        Self {
76            error_on_duplicate_keys: enable,
77            ..self
78        }
79    }
80
81    /// Prevent coercion of scalar nodes
82    ///
83    /// If you want to disable things like [`.as_bool()`](crate::types::MarkedScalarNode::as_bool())
84    /// then you can call this and set coercion to be prevented.
85    pub fn prevent_coercion(self, prevent: bool) -> Self {
86        Self {
87            prevent_coercion: prevent,
88            ..self
89        }
90    }
91
92    /// Require that the top level is a mapping node
93    ///
94    /// This is the default, but you can call this to be explicit.
95    pub fn toplevel_mapping(self) -> Self {
96        Self {
97            toplevel_is_mapping: true,
98            ..self
99        }
100    }
101
102    /// Require that the top level is a sequence node
103    ///
104    /// Without calling this, the top level of the YAML is must be a mapping node
105    pub fn toplevel_sequence(self) -> Self {
106        Self {
107            toplevel_is_mapping: false,
108            ..self
109        }
110    }
111
112    /// Whether or not to force-lowercase mapping keys when loading
113    ///
114    /// By default, the loader will leave key names alone, but in some
115    /// cases it can be preferable to normalise them to lowercase
116    pub fn lowercase_keys(self, force_lowercase: bool) -> Self {
117        Self {
118            lowercase_keys: force_lowercase,
119            ..self
120        }
121    }
122}
123
124impl Display for LoadError {
125    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
126        use LoadError::*;
127        #[allow(deprecated)]
128        match self {
129            TopLevelMustBeMapping(m) => write!(f, "{}: Top level must be a mapping", m),
130            TopLevelMustBeSequence(m) => write!(f, "{}: Top level must be a sequence", m),
131            UnexpectedAnchor(m) => write!(f, "{}: Unexpected definition of anchor", m),
132            MappingKeyMustBeScalar(m) => write!(f, "{}: Keys in mappings must be scalar", m),
133            UnexpectedTag(m) => write!(f, "{}: Unexpected use of YAML tag", m),
134            DuplicateKey(inner) => {
135                let DuplicateKeyInner { prev_key, key } = inner.as_ref();
136                write!(
137                    f,
138                    "Duplicate key \"{}\" in mapping at {} and {}",
139                    prev_key.as_str(),
140                    prev_key
141                        .span()
142                        .start()
143                        .map(ToString::to_string)
144                        .unwrap_or_else(|| "?".to_string()),
145                    key.span()
146                        .start()
147                        .map(ToString::to_string)
148                        .unwrap_or_else(|| "?".to_string()),
149                )
150            }
151            ScanError(m, e) => {
152                // e.description() is deprecated but it's the only way to get
153                // the exact info we want out of yaml-rust
154                write!(f, "{}: {}", m, e.description())
155            }
156        }
157    }
158}
159
160impl Error for LoadError {}
161
162#[derive(Debug, PartialEq, Eq)]
163enum LoaderState {
164    Initial,
165    StartStream,
166    StartDocument,
167    MappingWaitingOnKey(Marker, MappingHash),
168    MappingWaitingOnValue(Marker, MappingHash, MarkedScalarNode),
169    SequenceWaitingOnValue(Marker, Vec<Node>),
170    Finished(Node),
171    Error(LoadError),
172}
173use LoaderState::*;
174
175impl LoaderState {
176    fn is_error(&self) -> bool {
177        matches!(self, Error(_))
178    }
179}
180
181struct MarkedLoader {
182    source: usize,
183    state_stack: Vec<LoaderState>,
184    options: LoaderOptions,
185}
186
187impl MarkedEventReceiver for MarkedLoader {
188    fn on_event(&mut self, ev: Event, mark: YamlMarker) {
189        // Short-circuit if the state stack is in error
190        if self.state_stack[self.state_stack.len() - 1].is_error() {
191            return;
192        }
193        let mark = self.marker(mark);
194        let curstate = self
195            .state_stack
196            .pop()
197            .expect("State stack became unbalanced");
198        let newstate = match ev {
199            Event::Alias(_) => unreachable!(),
200            Event::StreamStart => {
201                assert_eq!(curstate, Initial);
202                StartStream
203            }
204            Event::DocumentStart => {
205                assert_eq!(curstate, StartStream);
206                StartDocument
207            }
208            Event::MappingStart(aid, tag) => {
209                if tag.is_some() {
210                    Error(LoadError::UnexpectedTag(mark))
211                } else if aid == 0 {
212                    match curstate {
213                        StartDocument => {
214                            if self.options.toplevel_is_mapping {
215                                MappingWaitingOnKey(mark, MappingHash::new())
216                            } else {
217                                Error(LoadError::TopLevelMustBeSequence(mark))
218                            }
219                        }
220                        MappingWaitingOnKey(_, _) => Error(LoadError::MappingKeyMustBeScalar(mark)),
221                        MappingWaitingOnValue(_, _, _) => {
222                            self.state_stack.push(curstate);
223                            MappingWaitingOnKey(mark, MappingHash::new())
224                        }
225                        SequenceWaitingOnValue(_, _) => {
226                            self.state_stack.push(curstate);
227                            MappingWaitingOnKey(mark, MappingHash::new())
228                        }
229                        _ => unreachable!(),
230                    }
231                } else {
232                    Error(LoadError::UnexpectedAnchor(mark))
233                }
234            }
235            Event::MappingEnd => match curstate {
236                MappingWaitingOnKey(startmark, map) => {
237                    let span = Span::new_with_marks(startmark, mark);
238                    let node = Node::from(MarkedMappingNode::new(span, map));
239                    if let Some(topstate) = self.state_stack.pop() {
240                        match topstate {
241                            MappingWaitingOnValue(mark, mut map, key) => {
242                                match map.entry(key.clone()) {
243                                    Entry::Occupied(entry)
244                                        if self.options.error_on_duplicate_keys =>
245                                    {
246                                        Error(LoadError::DuplicateKey(Box::new(
247                                            DuplicateKeyInner {
248                                                prev_key: entry.key().clone(),
249                                                key,
250                                            },
251                                        )))
252                                    }
253                                    _ => {
254                                        map.insert(key, node);
255                                        MappingWaitingOnKey(mark, map)
256                                    }
257                                }
258                            }
259                            SequenceWaitingOnValue(mark, mut list) => {
260                                list.push(node);
261                                SequenceWaitingOnValue(mark, list)
262                            }
263                            _ => unreachable!(),
264                        }
265                    } else {
266                        Finished(node)
267                    }
268                }
269                _ => unreachable!(),
270            },
271            Event::SequenceStart(aid, tag) => {
272                if tag.is_some() {
273                    Error(LoadError::UnexpectedTag(mark))
274                } else if aid == 0 {
275                    match curstate {
276                        StartDocument => {
277                            if self.options.toplevel_is_mapping {
278                                Error(LoadError::TopLevelMustBeMapping(mark))
279                            } else {
280                                SequenceWaitingOnValue(mark, Vec::new())
281                            }
282                        }
283                        MappingWaitingOnKey(_, _) => Error(LoadError::MappingKeyMustBeScalar(mark)),
284                        mv @ MappingWaitingOnValue(_, _, _) => {
285                            self.state_stack.push(mv);
286                            SequenceWaitingOnValue(mark, Vec::new())
287                        }
288                        sv @ SequenceWaitingOnValue(_, _) => {
289                            self.state_stack.push(sv);
290                            SequenceWaitingOnValue(mark, Vec::new())
291                        }
292                        _ => unreachable!(),
293                    }
294                } else {
295                    Error(LoadError::UnexpectedAnchor(mark))
296                }
297            }
298            Event::SequenceEnd => match curstate {
299                SequenceWaitingOnValue(startmark, list) => {
300                    let span = Span::new_with_marks(startmark, mark);
301                    let node = Node::from(MarkedSequenceNode::new(span, list));
302                    if let Some(topstate) = self.state_stack.pop() {
303                        match topstate {
304                            MappingWaitingOnValue(mark, mut map, key) => {
305                                match map.entry(key.clone()) {
306                                    Entry::Occupied(entry)
307                                        if self.options.error_on_duplicate_keys =>
308                                    {
309                                        Error(LoadError::DuplicateKey(Box::new(
310                                            DuplicateKeyInner {
311                                                prev_key: entry.key().clone(),
312                                                key,
313                                            },
314                                        )))
315                                    }
316                                    _ => {
317                                        map.insert(key, node);
318                                        MappingWaitingOnKey(mark, map)
319                                    }
320                                }
321                            }
322                            SequenceWaitingOnValue(mark, mut list) => {
323                                list.push(node);
324                                SequenceWaitingOnValue(mark, list)
325                            }
326                            _ => unreachable!(),
327                        }
328                    } else {
329                        Finished(node)
330                    }
331                }
332                _ => unreachable!(),
333            },
334            Event::DocumentEnd => match curstate {
335                Finished(_) => curstate,
336                _ => unreachable!(),
337            },
338            Event::StreamEnd => match curstate {
339                StartStream => Finished(Node::from(MarkedMappingNode::new_empty(
340                    Span::new_with_marks(mark, mark),
341                ))),
342                Finished(_) => curstate,
343                _ => unreachable!(),
344            },
345            Event::Scalar(val, kind, aid, tag) => {
346                if aid == 0 {
347                    if tag.is_some() {
348                        Error(LoadError::UnexpectedTag(mark))
349                    } else {
350                        let span = Span::new_start(mark);
351                        let val = if matches!(curstate, MappingWaitingOnKey(_, _))
352                            && self.options.lowercase_keys
353                        {
354                            val.to_lowercase()
355                        } else {
356                            val
357                        };
358                        let mut node = MarkedScalarNode::new(span, val);
359                        if self.options.prevent_coercion {
360                            node.set_coerce(matches!(kind, TScalarStyle::Plain));
361                        }
362                        match curstate {
363                            MappingWaitingOnKey(mark, map) => {
364                                MappingWaitingOnValue(mark, map, node)
365                            }
366                            MappingWaitingOnValue(mark, mut map, key) => {
367                                match map.entry(key.clone()) {
368                                    Entry::Occupied(entry)
369                                        if self.options.error_on_duplicate_keys =>
370                                    {
371                                        Error(LoadError::DuplicateKey(Box::new(
372                                            DuplicateKeyInner {
373                                                prev_key: entry.key().clone(),
374                                                key,
375                                            },
376                                        )))
377                                    }
378                                    _ => {
379                                        map.insert(key, Node::from(node));
380                                        MappingWaitingOnKey(mark, map)
381                                    }
382                                }
383                            }
384                            SequenceWaitingOnValue(mark, mut list) => {
385                                list.push(Node::from(node));
386                                SequenceWaitingOnValue(mark, list)
387                            }
388                            StartDocument => Error(LoadError::TopLevelMustBeMapping(mark)),
389                            _ => unreachable!(),
390                        }
391                    }
392                } else {
393                    Error(LoadError::UnexpectedAnchor(mark))
394                }
395            }
396            Event::Nothing => unreachable!(),
397        };
398        self.state_stack.push(newstate);
399    }
400}
401
402impl MarkedLoader {
403    fn new(source: usize, options: LoaderOptions) -> Self {
404        Self {
405            source,
406            state_stack: vec![Initial],
407            options,
408        }
409    }
410
411    fn marker(&self, mark: YamlMarker) -> Marker {
412        Marker::new(self.source, mark.line(), mark.col() + 1)
413    }
414
415    fn finish(mut self) -> Result<Node, LoadError> {
416        let top = self.state_stack.pop();
417        match top.expect("YAML parser state stack unexpectedly empty") {
418            Finished(n) => Ok(n),
419            Error(e) => Err(e),
420            _ => unreachable!(),
421        }
422    }
423}
424
425/// Parse YAML from a string and return a Node representing
426/// the content.
427///
428/// When parsing YAML, the source is stored into all markers which are
429/// in the node spans.  This means that later if you only have a node,
430/// you can determine which source it came from without needing complex
431/// lifetimes to bind strings or other non-copy data to nodes.
432///
433/// This function requires that the top level be a mapping, but the returned
434/// type here is the generic Node enumeration to make it potentially easier
435/// for callers to use.  Regardless, it's always possible to treat the
436/// returned node as a mapping node without risk of panic.
437///
438/// If you wish to load a sequence instead of a mapping, then you will
439/// need to use [`parse_yaml_with_options`] to request that.
440///
441/// ```
442/// # use marked_yaml::*;
443/// let node = parse_yaml(0, include_str!("../examples/everything.yaml"))
444///     .unwrap()
445///     .as_mapping()
446///     .unwrap();
447/// ```
448pub fn parse_yaml<S>(source: usize, yaml: S) -> Result<Node, LoadError>
449where
450    S: AsRef<str>,
451{
452    let options = LoaderOptions::default();
453
454    parse_yaml_with_options(source, yaml, options)
455}
456
457/// Parse YAML from a string and return a Node representing
458/// the content.
459///
460/// Takes an additional LoaderOptions struct to control the behavior of the loader.
461///
462/// This is the way to parse a file with a top-level sequence instead of a mapping
463/// node.
464///
465/// See `parse_yaml` for more information.
466pub fn parse_yaml_with_options<S>(
467    source: usize,
468    yaml: S,
469    options: LoaderOptions,
470) -> Result<Node, LoadError>
471where
472    S: AsRef<str>,
473{
474    let mut loader = MarkedLoader::new(source, options);
475    let mut parser = Parser::new(yaml.as_ref().chars());
476    parser.load(&mut loader, false).map_err(|se| {
477        let mark = loader.marker(*se.marker());
478        LoadError::ScanError(mark, se)
479    })?;
480    loader.finish()
481}
482
483#[cfg(test)]
484mod test {
485    use super::*;
486
487    #[test]
488    fn smoke_basics() {
489        let node = parse_yaml(0, "{}").unwrap();
490        assert!(node.as_mapping().is_some());
491    }
492
493    #[test]
494    fn load_everything() {
495        let node = parse_yaml(0, include_str!("../examples/everything.yaml")).unwrap();
496        let map = node.as_mapping().unwrap();
497        assert_eq!(map.get_scalar("simple").unwrap().as_str(), "scalar");
498        assert_eq!(map.get_scalar("boolean1").unwrap().as_bool(), Some(true));
499        assert_eq!(map.get_scalar("boolean2").unwrap().as_bool(), Some(false));
500    }
501
502    #[test]
503    fn prevent_coercion() {
504        let node = parse_yaml_with_options(
505            0,
506            include_str!("../examples/everything.yaml"),
507            LoaderOptions::default().prevent_coercion(true),
508        )
509        .unwrap();
510        let map = node.as_mapping().unwrap();
511        assert_eq!(map.get_scalar("simple").unwrap().as_str(), "scalar");
512        assert_eq!(map.get_scalar("boolean1").unwrap().as_str(), "true");
513        assert_eq!(map.get_scalar("boolean1").unwrap().as_bool(), None);
514        assert_eq!(map.get_scalar("boolean2").unwrap().as_str(), "false");
515        assert_eq!(map.get_scalar("boolean2").unwrap().as_bool(), Some(false));
516        assert_eq!(map.get_scalar("integer").unwrap().as_str(), "1234");
517        assert_eq!(map.get_scalar("integer").unwrap().as_i32(), None);
518        assert_eq!(map.get_scalar("float").unwrap().as_str(), "12.34");
519        assert_eq!(map.get_scalar("float").unwrap().as_f32(), Some(12.34));
520    }
521
522    #[test]
523    fn toplevel_is_empty() {
524        let node = parse_yaml(0, "").unwrap();
525        let map = node.as_mapping().unwrap();
526        assert!(map.is_empty());
527    }
528
529    #[test]
530    fn toplevel_is_empty_inline() {
531        let node = parse_yaml(0, "{}").unwrap();
532        let map = node.as_mapping().unwrap();
533        assert!(map.is_empty());
534    }
535
536    #[test]
537    fn toplevel_is_scalar() {
538        let err = parse_yaml(0, "foo");
539        assert_eq!(
540            err,
541            Err(LoadError::TopLevelMustBeMapping(Marker::new(0, 1, 1)))
542        );
543        assert!(format!("{}", err.err().unwrap()).contains("1:1: "));
544    }
545
546    #[test]
547    fn toplevel_is_sequence() {
548        assert_eq!(
549            parse_yaml(0, "[]"),
550            Err(LoadError::TopLevelMustBeMapping(Marker::new(0, 1, 1)))
551        );
552    }
553
554    #[test]
555    fn duplicate_key() {
556        let err = parse_yaml_with_options(
557            0,
558            "{foo: bar, foo: baz}",
559            LoaderOptions::default().error_on_duplicate_keys(true),
560        );
561
562        assert_eq!(
563            err,
564            Err(LoadError::DuplicateKey(Box::new(DuplicateKeyInner {
565                prev_key: MarkedScalarNode::new(Span::new_start(Marker::new(0, 1, 1)), "foo"),
566                key: MarkedScalarNode::new(Span::new_start(Marker::new(0, 1, 11)), "foo")
567            })))
568        );
569
570        assert_eq!(
571            format!("{}", err.err().unwrap()),
572            "Duplicate key \"foo\" in mapping at 1:2 and 1:12"
573        );
574
575        // Without error_on_duplicate_keys, the last key wins
576        let node = parse_yaml(0, "{foo: bar, foo: baz}").unwrap();
577        let map = node.as_mapping().unwrap();
578        assert_eq!(map.get_scalar("foo").unwrap().as_str(), "baz");
579    }
580
581    #[test]
582    fn unexpected_anchor() {
583        let err = parse_yaml(0, "&foo {}");
584        assert_eq!(err, Err(LoadError::UnexpectedAnchor(Marker::new(0, 1, 6))));
585        assert!(format!("{}", err.err().unwrap()).starts_with("1:6: "));
586    }
587
588    #[test]
589    fn unexpected_anchor2() {
590        assert_eq!(
591            parse_yaml(0, "{bar: &foo []}"),
592            Err(LoadError::UnexpectedAnchor(Marker::new(0, 1, 12)))
593        );
594    }
595
596    #[test]
597    fn unexpected_anchor3() {
598        assert_eq!(
599            parse_yaml(0, "{bar: &foo susan}"),
600            Err(LoadError::UnexpectedAnchor(Marker::new(0, 1, 12)))
601        );
602    }
603
604    #[test]
605    fn mapping_key_mapping() {
606        let err = parse_yaml(0, "{? {} : {}}");
607        assert_eq!(
608            err,
609            Err(LoadError::MappingKeyMustBeScalar(Marker::new(0, 1, 4)))
610        );
611        assert!(format!("{}", err.err().unwrap()).starts_with("1:4: "));
612    }
613
614    #[test]
615    fn mapping_key_sequence() {
616        assert_eq!(
617            parse_yaml(0, "{? [] : {}}"),
618            Err(LoadError::MappingKeyMustBeScalar(Marker::new(0, 1, 4)))
619        );
620    }
621
622    #[test]
623    fn unexpected_tag() {
624        let err = parse_yaml(0, "{foo: !!str bar}");
625        assert_eq!(err, Err(LoadError::UnexpectedTag(Marker::new(0, 1, 13))));
626        assert!(format!("{}", err.err().unwrap()).starts_with("1:13: "));
627    }
628
629    #[test]
630    fn nested_mapping_key_mapping() {
631        assert_eq!(
632            parse_yaml(0, "{foo: {? [] : {}}}"),
633            Err(LoadError::MappingKeyMustBeScalar(Marker::new(0, 1, 10)))
634        );
635    }
636
637    #[test]
638    fn malformed_yaml_for_scanerror() {
639        let err = parse_yaml(0, "{");
640        assert!(err.is_err());
641        assert!(format!("{}", err.err().unwrap()).starts_with("2:1: "));
642    }
643
644    #[test]
645    fn toplevel_sequence_wanted() {
646        let node =
647            parse_yaml_with_options(0, "[yaml]", LoaderOptions::default().toplevel_sequence())
648                .unwrap();
649        assert!(node.as_sequence().is_some());
650    }
651
652    #[test]
653    fn toplevel_sequence_wanted_got_mapping() {
654        assert_eq!(
655            parse_yaml_with_options(0, "{}", LoaderOptions::default().toplevel_sequence()),
656            Err(LoadError::TopLevelMustBeSequence(Marker::new(0, 1, 1)))
657        );
658    }
659
660    #[test]
661    fn lowercase_keys() {
662        let node = parse_yaml_with_options(
663            0,
664            "KEY: VALUE",
665            LoaderOptions::default().lowercase_keys(false),
666        )
667        .unwrap();
668        assert!(node.as_mapping().unwrap().contains_key("KEY"));
669        assert!(!node.as_mapping().unwrap().contains_key("key"));
670
671        let node = parse_yaml_with_options(
672            0,
673            "KEY: VALUE",
674            LoaderOptions::default().lowercase_keys(true),
675        )
676        .unwrap();
677        assert!(!node.as_mapping().unwrap().contains_key("KEY"));
678        assert!(node.as_mapping().unwrap().contains_key("key"));
679    }
680}