1use crate::types::*;
5
6use hashlink::linked_hash_map::Entry;
7use yaml_rust::parser::{Event, MarkedEventReceiver, Parser};
8use yaml_rust::scanner::ScanError;
9use yaml_rust::scanner::{Marker as YamlMarker, TScalarStyle};
10
11use std::error::Error;
12use std::fmt::{self, Display};
13
14#[derive(Debug, PartialEq, Eq)]
16
17pub struct DuplicateKeyInner {
18 pub prev_key: MarkedScalarNode,
20 pub key: MarkedScalarNode,
22}
23
24#[derive(Debug, PartialEq, Eq)]
26#[non_exhaustive]
27pub enum LoadError {
28 TopLevelMustBeMapping(Marker),
30 TopLevelMustBeSequence(Marker),
32 UnexpectedAnchor(Marker),
34 MappingKeyMustBeScalar(Marker),
36 UnexpectedTag(Marker),
38 ScanError(Marker, ScanError),
40 DuplicateKey(Box<DuplicateKeyInner>),
42}
43
44#[derive(Debug)]
51pub struct LoaderOptions {
52 error_on_duplicate_keys: bool,
53 prevent_coercion: bool,
54 toplevel_is_mapping: bool,
55 lowercase_keys: bool,
56}
57
58impl Default for LoaderOptions {
59 fn default() -> Self {
60 Self {
61 error_on_duplicate_keys: false,
62 prevent_coercion: false,
63 toplevel_is_mapping: true,
64 lowercase_keys: false,
65 }
66 }
67}
68
69impl LoaderOptions {
70 pub fn error_on_duplicate_keys(self, enable: bool) -> Self {
75 Self {
76 error_on_duplicate_keys: enable,
77 ..self
78 }
79 }
80
81 pub fn prevent_coercion(self, prevent: bool) -> Self {
86 Self {
87 prevent_coercion: prevent,
88 ..self
89 }
90 }
91
92 pub fn toplevel_mapping(self) -> Self {
96 Self {
97 toplevel_is_mapping: true,
98 ..self
99 }
100 }
101
102 pub fn toplevel_sequence(self) -> Self {
106 Self {
107 toplevel_is_mapping: false,
108 ..self
109 }
110 }
111
112 pub fn lowercase_keys(self, force_lowercase: bool) -> Self {
117 Self {
118 lowercase_keys: force_lowercase,
119 ..self
120 }
121 }
122}
123
124impl Display for LoadError {
125 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
126 use LoadError::*;
127 #[allow(deprecated)]
128 match self {
129 TopLevelMustBeMapping(m) => write!(f, "{}: Top level must be a mapping", m),
130 TopLevelMustBeSequence(m) => write!(f, "{}: Top level must be a sequence", m),
131 UnexpectedAnchor(m) => write!(f, "{}: Unexpected definition of anchor", m),
132 MappingKeyMustBeScalar(m) => write!(f, "{}: Keys in mappings must be scalar", m),
133 UnexpectedTag(m) => write!(f, "{}: Unexpected use of YAML tag", m),
134 DuplicateKey(inner) => {
135 let DuplicateKeyInner { prev_key, key } = inner.as_ref();
136 write!(
137 f,
138 "Duplicate key \"{}\" in mapping at {} and {}",
139 prev_key.as_str(),
140 prev_key
141 .span()
142 .start()
143 .map(ToString::to_string)
144 .unwrap_or_else(|| "?".to_string()),
145 key.span()
146 .start()
147 .map(ToString::to_string)
148 .unwrap_or_else(|| "?".to_string()),
149 )
150 }
151 ScanError(m, e) => {
152 write!(f, "{}: {}", m, e.description())
155 }
156 }
157 }
158}
159
160impl Error for LoadError {}
161
162#[derive(Debug, PartialEq, Eq)]
163enum LoaderState {
164 Initial,
165 StartStream,
166 StartDocument,
167 MappingWaitingOnKey(Marker, MappingHash),
168 MappingWaitingOnValue(Marker, MappingHash, MarkedScalarNode),
169 SequenceWaitingOnValue(Marker, Vec<Node>),
170 Finished(Node),
171 Error(LoadError),
172}
173use LoaderState::*;
174
175impl LoaderState {
176 fn is_error(&self) -> bool {
177 matches!(self, Error(_))
178 }
179}
180
181struct MarkedLoader {
182 source: usize,
183 state_stack: Vec<LoaderState>,
184 options: LoaderOptions,
185}
186
187impl MarkedEventReceiver for MarkedLoader {
188 fn on_event(&mut self, ev: Event, mark: YamlMarker) {
189 if self.state_stack[self.state_stack.len() - 1].is_error() {
191 return;
192 }
193 let mark = self.marker(mark);
194 let curstate = self
195 .state_stack
196 .pop()
197 .expect("State stack became unbalanced");
198 let newstate = match ev {
199 Event::Alias(_) => unreachable!(),
200 Event::StreamStart => {
201 assert_eq!(curstate, Initial);
202 StartStream
203 }
204 Event::DocumentStart => {
205 assert_eq!(curstate, StartStream);
206 StartDocument
207 }
208 Event::MappingStart(aid, tag) => {
209 if tag.is_some() {
210 Error(LoadError::UnexpectedTag(mark))
211 } else if aid == 0 {
212 match curstate {
213 StartDocument => {
214 if self.options.toplevel_is_mapping {
215 MappingWaitingOnKey(mark, MappingHash::new())
216 } else {
217 Error(LoadError::TopLevelMustBeSequence(mark))
218 }
219 }
220 MappingWaitingOnKey(_, _) => Error(LoadError::MappingKeyMustBeScalar(mark)),
221 MappingWaitingOnValue(_, _, _) => {
222 self.state_stack.push(curstate);
223 MappingWaitingOnKey(mark, MappingHash::new())
224 }
225 SequenceWaitingOnValue(_, _) => {
226 self.state_stack.push(curstate);
227 MappingWaitingOnKey(mark, MappingHash::new())
228 }
229 _ => unreachable!(),
230 }
231 } else {
232 Error(LoadError::UnexpectedAnchor(mark))
233 }
234 }
235 Event::MappingEnd => match curstate {
236 MappingWaitingOnKey(startmark, map) => {
237 let span = Span::new_with_marks(startmark, mark);
238 let node = Node::from(MarkedMappingNode::new(span, map));
239 if let Some(topstate) = self.state_stack.pop() {
240 match topstate {
241 MappingWaitingOnValue(mark, mut map, key) => {
242 match map.entry(key.clone()) {
243 Entry::Occupied(entry)
244 if self.options.error_on_duplicate_keys =>
245 {
246 Error(LoadError::DuplicateKey(Box::new(
247 DuplicateKeyInner {
248 prev_key: entry.key().clone(),
249 key,
250 },
251 )))
252 }
253 _ => {
254 map.insert(key, node);
255 MappingWaitingOnKey(mark, map)
256 }
257 }
258 }
259 SequenceWaitingOnValue(mark, mut list) => {
260 list.push(node);
261 SequenceWaitingOnValue(mark, list)
262 }
263 _ => unreachable!(),
264 }
265 } else {
266 Finished(node)
267 }
268 }
269 _ => unreachable!(),
270 },
271 Event::SequenceStart(aid, tag) => {
272 if tag.is_some() {
273 Error(LoadError::UnexpectedTag(mark))
274 } else if aid == 0 {
275 match curstate {
276 StartDocument => {
277 if self.options.toplevel_is_mapping {
278 Error(LoadError::TopLevelMustBeMapping(mark))
279 } else {
280 SequenceWaitingOnValue(mark, Vec::new())
281 }
282 }
283 MappingWaitingOnKey(_, _) => Error(LoadError::MappingKeyMustBeScalar(mark)),
284 mv @ MappingWaitingOnValue(_, _, _) => {
285 self.state_stack.push(mv);
286 SequenceWaitingOnValue(mark, Vec::new())
287 }
288 sv @ SequenceWaitingOnValue(_, _) => {
289 self.state_stack.push(sv);
290 SequenceWaitingOnValue(mark, Vec::new())
291 }
292 _ => unreachable!(),
293 }
294 } else {
295 Error(LoadError::UnexpectedAnchor(mark))
296 }
297 }
298 Event::SequenceEnd => match curstate {
299 SequenceWaitingOnValue(startmark, list) => {
300 let span = Span::new_with_marks(startmark, mark);
301 let node = Node::from(MarkedSequenceNode::new(span, list));
302 if let Some(topstate) = self.state_stack.pop() {
303 match topstate {
304 MappingWaitingOnValue(mark, mut map, key) => {
305 match map.entry(key.clone()) {
306 Entry::Occupied(entry)
307 if self.options.error_on_duplicate_keys =>
308 {
309 Error(LoadError::DuplicateKey(Box::new(
310 DuplicateKeyInner {
311 prev_key: entry.key().clone(),
312 key,
313 },
314 )))
315 }
316 _ => {
317 map.insert(key, node);
318 MappingWaitingOnKey(mark, map)
319 }
320 }
321 }
322 SequenceWaitingOnValue(mark, mut list) => {
323 list.push(node);
324 SequenceWaitingOnValue(mark, list)
325 }
326 _ => unreachable!(),
327 }
328 } else {
329 Finished(node)
330 }
331 }
332 _ => unreachable!(),
333 },
334 Event::DocumentEnd => match curstate {
335 Finished(_) => curstate,
336 _ => unreachable!(),
337 },
338 Event::StreamEnd => match curstate {
339 StartStream => Finished(Node::from(MarkedMappingNode::new_empty(
340 Span::new_with_marks(mark, mark),
341 ))),
342 Finished(_) => curstate,
343 _ => unreachable!(),
344 },
345 Event::Scalar(val, kind, aid, tag) => {
346 if aid == 0 {
347 if tag.is_some() {
348 Error(LoadError::UnexpectedTag(mark))
349 } else {
350 let span = Span::new_start(mark);
351 let val = if matches!(curstate, MappingWaitingOnKey(_, _))
352 && self.options.lowercase_keys
353 {
354 val.to_lowercase()
355 } else {
356 val
357 };
358 let mut node = MarkedScalarNode::new(span, val);
359 if self.options.prevent_coercion {
360 node.set_coerce(matches!(kind, TScalarStyle::Plain));
361 }
362 match curstate {
363 MappingWaitingOnKey(mark, map) => {
364 MappingWaitingOnValue(mark, map, node)
365 }
366 MappingWaitingOnValue(mark, mut map, key) => {
367 match map.entry(key.clone()) {
368 Entry::Occupied(entry)
369 if self.options.error_on_duplicate_keys =>
370 {
371 Error(LoadError::DuplicateKey(Box::new(
372 DuplicateKeyInner {
373 prev_key: entry.key().clone(),
374 key,
375 },
376 )))
377 }
378 _ => {
379 map.insert(key, Node::from(node));
380 MappingWaitingOnKey(mark, map)
381 }
382 }
383 }
384 SequenceWaitingOnValue(mark, mut list) => {
385 list.push(Node::from(node));
386 SequenceWaitingOnValue(mark, list)
387 }
388 StartDocument => Error(LoadError::TopLevelMustBeMapping(mark)),
389 _ => unreachable!(),
390 }
391 }
392 } else {
393 Error(LoadError::UnexpectedAnchor(mark))
394 }
395 }
396 Event::Nothing => unreachable!(),
397 };
398 self.state_stack.push(newstate);
399 }
400}
401
402impl MarkedLoader {
403 fn new(source: usize, options: LoaderOptions) -> Self {
404 Self {
405 source,
406 state_stack: vec![Initial],
407 options,
408 }
409 }
410
411 fn marker(&self, mark: YamlMarker) -> Marker {
412 Marker::new(self.source, mark.line(), mark.col() + 1)
413 }
414
415 fn finish(mut self) -> Result<Node, LoadError> {
416 let top = self.state_stack.pop();
417 match top.expect("YAML parser state stack unexpectedly empty") {
418 Finished(n) => Ok(n),
419 Error(e) => Err(e),
420 _ => unreachable!(),
421 }
422 }
423}
424
425pub fn parse_yaml<S>(source: usize, yaml: S) -> Result<Node, LoadError>
449where
450 S: AsRef<str>,
451{
452 let options = LoaderOptions::default();
453
454 parse_yaml_with_options(source, yaml, options)
455}
456
457pub fn parse_yaml_with_options<S>(
467 source: usize,
468 yaml: S,
469 options: LoaderOptions,
470) -> Result<Node, LoadError>
471where
472 S: AsRef<str>,
473{
474 let mut loader = MarkedLoader::new(source, options);
475 let mut parser = Parser::new(yaml.as_ref().chars());
476 parser.load(&mut loader, false).map_err(|se| {
477 let mark = loader.marker(*se.marker());
478 LoadError::ScanError(mark, se)
479 })?;
480 loader.finish()
481}
482
483#[cfg(test)]
484mod test {
485 use super::*;
486
487 #[test]
488 fn smoke_basics() {
489 let node = parse_yaml(0, "{}").unwrap();
490 assert!(node.as_mapping().is_some());
491 }
492
493 #[test]
494 fn load_everything() {
495 let node = parse_yaml(0, include_str!("../examples/everything.yaml")).unwrap();
496 let map = node.as_mapping().unwrap();
497 assert_eq!(map.get_scalar("simple").unwrap().as_str(), "scalar");
498 assert_eq!(map.get_scalar("boolean1").unwrap().as_bool(), Some(true));
499 assert_eq!(map.get_scalar("boolean2").unwrap().as_bool(), Some(false));
500 }
501
502 #[test]
503 fn prevent_coercion() {
504 let node = parse_yaml_with_options(
505 0,
506 include_str!("../examples/everything.yaml"),
507 LoaderOptions::default().prevent_coercion(true),
508 )
509 .unwrap();
510 let map = node.as_mapping().unwrap();
511 assert_eq!(map.get_scalar("simple").unwrap().as_str(), "scalar");
512 assert_eq!(map.get_scalar("boolean1").unwrap().as_str(), "true");
513 assert_eq!(map.get_scalar("boolean1").unwrap().as_bool(), None);
514 assert_eq!(map.get_scalar("boolean2").unwrap().as_str(), "false");
515 assert_eq!(map.get_scalar("boolean2").unwrap().as_bool(), Some(false));
516 assert_eq!(map.get_scalar("integer").unwrap().as_str(), "1234");
517 assert_eq!(map.get_scalar("integer").unwrap().as_i32(), None);
518 assert_eq!(map.get_scalar("float").unwrap().as_str(), "12.34");
519 assert_eq!(map.get_scalar("float").unwrap().as_f32(), Some(12.34));
520 }
521
522 #[test]
523 fn toplevel_is_empty() {
524 let node = parse_yaml(0, "").unwrap();
525 let map = node.as_mapping().unwrap();
526 assert!(map.is_empty());
527 }
528
529 #[test]
530 fn toplevel_is_empty_inline() {
531 let node = parse_yaml(0, "{}").unwrap();
532 let map = node.as_mapping().unwrap();
533 assert!(map.is_empty());
534 }
535
536 #[test]
537 fn toplevel_is_scalar() {
538 let err = parse_yaml(0, "foo");
539 assert_eq!(
540 err,
541 Err(LoadError::TopLevelMustBeMapping(Marker::new(0, 1, 1)))
542 );
543 assert!(format!("{}", err.err().unwrap()).contains("1:1: "));
544 }
545
546 #[test]
547 fn toplevel_is_sequence() {
548 assert_eq!(
549 parse_yaml(0, "[]"),
550 Err(LoadError::TopLevelMustBeMapping(Marker::new(0, 1, 1)))
551 );
552 }
553
554 #[test]
555 fn duplicate_key() {
556 let err = parse_yaml_with_options(
557 0,
558 "{foo: bar, foo: baz}",
559 LoaderOptions::default().error_on_duplicate_keys(true),
560 );
561
562 assert_eq!(
563 err,
564 Err(LoadError::DuplicateKey(Box::new(DuplicateKeyInner {
565 prev_key: MarkedScalarNode::new(Span::new_start(Marker::new(0, 1, 1)), "foo"),
566 key: MarkedScalarNode::new(Span::new_start(Marker::new(0, 1, 11)), "foo")
567 })))
568 );
569
570 assert_eq!(
571 format!("{}", err.err().unwrap()),
572 "Duplicate key \"foo\" in mapping at 1:2 and 1:12"
573 );
574
575 let node = parse_yaml(0, "{foo: bar, foo: baz}").unwrap();
577 let map = node.as_mapping().unwrap();
578 assert_eq!(map.get_scalar("foo").unwrap().as_str(), "baz");
579 }
580
581 #[test]
582 fn unexpected_anchor() {
583 let err = parse_yaml(0, "&foo {}");
584 assert_eq!(err, Err(LoadError::UnexpectedAnchor(Marker::new(0, 1, 6))));
585 assert!(format!("{}", err.err().unwrap()).starts_with("1:6: "));
586 }
587
588 #[test]
589 fn unexpected_anchor2() {
590 assert_eq!(
591 parse_yaml(0, "{bar: &foo []}"),
592 Err(LoadError::UnexpectedAnchor(Marker::new(0, 1, 12)))
593 );
594 }
595
596 #[test]
597 fn unexpected_anchor3() {
598 assert_eq!(
599 parse_yaml(0, "{bar: &foo susan}"),
600 Err(LoadError::UnexpectedAnchor(Marker::new(0, 1, 12)))
601 );
602 }
603
604 #[test]
605 fn mapping_key_mapping() {
606 let err = parse_yaml(0, "{? {} : {}}");
607 assert_eq!(
608 err,
609 Err(LoadError::MappingKeyMustBeScalar(Marker::new(0, 1, 4)))
610 );
611 assert!(format!("{}", err.err().unwrap()).starts_with("1:4: "));
612 }
613
614 #[test]
615 fn mapping_key_sequence() {
616 assert_eq!(
617 parse_yaml(0, "{? [] : {}}"),
618 Err(LoadError::MappingKeyMustBeScalar(Marker::new(0, 1, 4)))
619 );
620 }
621
622 #[test]
623 fn unexpected_tag() {
624 let err = parse_yaml(0, "{foo: !!str bar}");
625 assert_eq!(err, Err(LoadError::UnexpectedTag(Marker::new(0, 1, 13))));
626 assert!(format!("{}", err.err().unwrap()).starts_with("1:13: "));
627 }
628
629 #[test]
630 fn nested_mapping_key_mapping() {
631 assert_eq!(
632 parse_yaml(0, "{foo: {? [] : {}}}"),
633 Err(LoadError::MappingKeyMustBeScalar(Marker::new(0, 1, 10)))
634 );
635 }
636
637 #[test]
638 fn malformed_yaml_for_scanerror() {
639 let err = parse_yaml(0, "{");
640 assert!(err.is_err());
641 assert!(format!("{}", err.err().unwrap()).starts_with("2:1: "));
642 }
643
644 #[test]
645 fn toplevel_sequence_wanted() {
646 let node =
647 parse_yaml_with_options(0, "[yaml]", LoaderOptions::default().toplevel_sequence())
648 .unwrap();
649 assert!(node.as_sequence().is_some());
650 }
651
652 #[test]
653 fn toplevel_sequence_wanted_got_mapping() {
654 assert_eq!(
655 parse_yaml_with_options(0, "{}", LoaderOptions::default().toplevel_sequence()),
656 Err(LoadError::TopLevelMustBeSequence(Marker::new(0, 1, 1)))
657 );
658 }
659
660 #[test]
661 fn lowercase_keys() {
662 let node = parse_yaml_with_options(
663 0,
664 "KEY: VALUE",
665 LoaderOptions::default().lowercase_keys(false),
666 )
667 .unwrap();
668 assert!(node.as_mapping().unwrap().contains_key("KEY"));
669 assert!(!node.as_mapping().unwrap().contains_key("key"));
670
671 let node = parse_yaml_with_options(
672 0,
673 "KEY: VALUE",
674 LoaderOptions::default().lowercase_keys(true),
675 )
676 .unwrap();
677 assert!(!node.as_mapping().unwrap().contains_key("KEY"));
678 assert!(node.as_mapping().unwrap().contains_key("key"));
679 }
680}