1use crate::types::*;
5
6use hashlink::linked_hash_map::Entry;
7use yaml_rust::parser::{Event, MarkedEventReceiver, Parser};
8use yaml_rust::scanner::ScanError;
9use yaml_rust::scanner::{Marker as YamlMarker, TScalarStyle};
10
11use std::error::Error;
12use std::fmt::{self, Display};
13
14#[derive(Debug, PartialEq, Eq)]
16
17pub struct DuplicateKeyInner {
18 pub prev_key: MarkedScalarNode,
20 pub key: MarkedScalarNode,
22}
23
24#[derive(Debug, PartialEq, Eq)]
26pub enum LoadError {
27 TopLevelMustBeMapping(Marker),
29 TopLevelMustBeSequence(Marker),
31 UnexpectedAnchor(Marker),
33 MappingKeyMustBeScalar(Marker),
35 UnexpectedTag(Marker),
37 ScanError(Marker, ScanError),
39 DuplicateKey(Box<DuplicateKeyInner>),
41}
42
43#[derive(Debug)]
50pub struct LoaderOptions {
51 error_on_duplicate_keys: bool,
52 prevent_coercion: bool,
53 toplevel_is_mapping: bool,
54 lowercase_keys: bool,
55}
56
57impl Default for LoaderOptions {
58 fn default() -> Self {
59 Self {
60 error_on_duplicate_keys: false,
61 prevent_coercion: false,
62 toplevel_is_mapping: true,
63 lowercase_keys: false,
64 }
65 }
66}
67
68impl LoaderOptions {
69 pub fn error_on_duplicate_keys(self, enable: bool) -> Self {
74 Self {
75 error_on_duplicate_keys: enable,
76 ..self
77 }
78 }
79
80 pub fn prevent_coercion(self, prevent: bool) -> Self {
85 Self {
86 prevent_coercion: prevent,
87 ..self
88 }
89 }
90
91 pub fn toplevel_mapping(self) -> Self {
95 Self {
96 toplevel_is_mapping: true,
97 ..self
98 }
99 }
100
101 pub fn toplevel_sequence(self) -> Self {
105 Self {
106 toplevel_is_mapping: false,
107 ..self
108 }
109 }
110
111 pub fn lowercase_keys(self, force_lowercase: bool) -> Self {
116 Self {
117 lowercase_keys: force_lowercase,
118 ..self
119 }
120 }
121}
122
123impl Display for LoadError {
124 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
125 use LoadError::*;
126 #[allow(deprecated)]
127 match self {
128 TopLevelMustBeMapping(m) => write!(f, "{}: Top level must be a mapping", m),
129 TopLevelMustBeSequence(m) => write!(f, "{}: Top level must be a sequence", m),
130 UnexpectedAnchor(m) => write!(f, "{}: Unexpected definition of anchor", m),
131 MappingKeyMustBeScalar(m) => write!(f, "{}: Keys in mappings must be scalar", m),
132 UnexpectedTag(m) => write!(f, "{}: Unexpected use of YAML tag", m),
133 DuplicateKey(inner) => {
134 let DuplicateKeyInner { prev_key, key } = inner.as_ref();
135 write!(
136 f,
137 "Duplicate key \"{}\" in mapping at {} and {}",
138 prev_key.as_str(),
139 prev_key
140 .span()
141 .start()
142 .map(ToString::to_string)
143 .unwrap_or_else(|| "?".to_string()),
144 key.span()
145 .start()
146 .map(ToString::to_string)
147 .unwrap_or_else(|| "?".to_string()),
148 )
149 }
150 ScanError(m, e) => {
151 write!(f, "{}: {}", m, e.description())
154 }
155 }
156 }
157}
158
159impl Error for LoadError {}
160
161#[derive(Debug, PartialEq, Eq)]
162enum LoaderState {
163 Initial,
164 StartStream,
165 StartDocument,
166 MappingWaitingOnKey(Marker, MappingHash),
167 MappingWaitingOnValue(Marker, MappingHash, MarkedScalarNode),
168 SequenceWaitingOnValue(Marker, Vec<Node>),
169 Finished(Node),
170 Error(LoadError),
171}
172use LoaderState::*;
173
174impl LoaderState {
175 fn is_error(&self) -> bool {
176 matches!(self, Error(_))
177 }
178}
179
180struct MarkedLoader {
181 source: usize,
182 state_stack: Vec<LoaderState>,
183 options: LoaderOptions,
184}
185
186impl MarkedEventReceiver for MarkedLoader {
187 fn on_event(&mut self, ev: Event, mark: YamlMarker) {
188 if self.state_stack[self.state_stack.len() - 1].is_error() {
190 return;
191 }
192 let mark = self.marker(mark);
193 let curstate = self
194 .state_stack
195 .pop()
196 .expect("State stack became unbalanced");
197 let newstate = match ev {
198 Event::Alias(_) => unreachable!(),
199 Event::StreamStart => {
200 assert_eq!(curstate, Initial);
201 StartStream
202 }
203 Event::DocumentStart => {
204 assert_eq!(curstate, StartStream);
205 StartDocument
206 }
207 Event::MappingStart(aid, tag) => {
208 if tag.is_some() {
209 Error(LoadError::UnexpectedTag(mark))
210 } else if aid == 0 {
211 match curstate {
212 StartDocument => {
213 if self.options.toplevel_is_mapping {
214 MappingWaitingOnKey(mark, MappingHash::new())
215 } else {
216 Error(LoadError::TopLevelMustBeSequence(mark))
217 }
218 }
219 MappingWaitingOnKey(_, _) => Error(LoadError::MappingKeyMustBeScalar(mark)),
220 MappingWaitingOnValue(_, _, _) => {
221 self.state_stack.push(curstate);
222 MappingWaitingOnKey(mark, MappingHash::new())
223 }
224 SequenceWaitingOnValue(_, _) => {
225 self.state_stack.push(curstate);
226 MappingWaitingOnKey(mark, MappingHash::new())
227 }
228 _ => unreachable!(),
229 }
230 } else {
231 Error(LoadError::UnexpectedAnchor(mark))
232 }
233 }
234 Event::MappingEnd => match curstate {
235 MappingWaitingOnKey(startmark, map) => {
236 let span = Span::new_with_marks(startmark, mark);
237 let node = Node::from(MarkedMappingNode::new(span, map));
238 if let Some(topstate) = self.state_stack.pop() {
239 match topstate {
240 MappingWaitingOnValue(mark, mut map, key) => {
241 match map.entry(key.clone()) {
242 Entry::Occupied(entry)
243 if self.options.error_on_duplicate_keys =>
244 {
245 Error(LoadError::DuplicateKey(Box::new(
246 DuplicateKeyInner {
247 prev_key: entry.key().clone(),
248 key,
249 },
250 )))
251 }
252 _ => {
253 map.insert(key, node);
254 MappingWaitingOnKey(mark, map)
255 }
256 }
257 }
258 SequenceWaitingOnValue(mark, mut list) => {
259 list.push(node);
260 SequenceWaitingOnValue(mark, list)
261 }
262 _ => unreachable!(),
263 }
264 } else {
265 Finished(node)
266 }
267 }
268 _ => unreachable!(),
269 },
270 Event::SequenceStart(aid, tag) => {
271 if tag.is_some() {
272 Error(LoadError::UnexpectedTag(mark))
273 } else if aid == 0 {
274 match curstate {
275 StartDocument => {
276 if self.options.toplevel_is_mapping {
277 Error(LoadError::TopLevelMustBeMapping(mark))
278 } else {
279 SequenceWaitingOnValue(mark, Vec::new())
280 }
281 }
282 MappingWaitingOnKey(_, _) => Error(LoadError::MappingKeyMustBeScalar(mark)),
283 mv @ MappingWaitingOnValue(_, _, _) => {
284 self.state_stack.push(mv);
285 SequenceWaitingOnValue(mark, Vec::new())
286 }
287 sv @ SequenceWaitingOnValue(_, _) => {
288 self.state_stack.push(sv);
289 SequenceWaitingOnValue(mark, Vec::new())
290 }
291 _ => unreachable!(),
292 }
293 } else {
294 Error(LoadError::UnexpectedAnchor(mark))
295 }
296 }
297 Event::SequenceEnd => match curstate {
298 SequenceWaitingOnValue(startmark, list) => {
299 let span = Span::new_with_marks(startmark, mark);
300 let node = Node::from(MarkedSequenceNode::new(span, list));
301 if let Some(topstate) = self.state_stack.pop() {
302 match topstate {
303 MappingWaitingOnValue(mark, mut map, key) => {
304 match map.entry(key.clone()) {
305 Entry::Occupied(entry)
306 if self.options.error_on_duplicate_keys =>
307 {
308 Error(LoadError::DuplicateKey(Box::new(
309 DuplicateKeyInner {
310 prev_key: entry.key().clone(),
311 key,
312 },
313 )))
314 }
315 _ => {
316 map.insert(key, node);
317 MappingWaitingOnKey(mark, map)
318 }
319 }
320 }
321 SequenceWaitingOnValue(mark, mut list) => {
322 list.push(node);
323 SequenceWaitingOnValue(mark, list)
324 }
325 _ => unreachable!(),
326 }
327 } else {
328 Finished(node)
329 }
330 }
331 _ => unreachable!(),
332 },
333 Event::DocumentEnd => match curstate {
334 Finished(_) => curstate,
335 _ => unreachable!(),
336 },
337 Event::StreamEnd => match curstate {
338 StartStream => Finished(Node::from(MarkedMappingNode::new_empty(
339 Span::new_with_marks(mark, mark),
340 ))),
341 Finished(_) => curstate,
342 _ => unreachable!(),
343 },
344 Event::Scalar(val, kind, aid, tag) => {
345 if aid == 0 {
346 if tag.is_some() {
347 Error(LoadError::UnexpectedTag(mark))
348 } else {
349 let span = Span::new_start(mark);
350 let val = if matches!(curstate, MappingWaitingOnKey(_, _))
351 && self.options.lowercase_keys
352 {
353 val.to_lowercase()
354 } else {
355 val
356 };
357 let mut node = MarkedScalarNode::new(span, val);
358 if self.options.prevent_coercion {
359 node.set_coerce(matches!(kind, TScalarStyle::Plain));
360 }
361 match curstate {
362 MappingWaitingOnKey(mark, map) => {
363 MappingWaitingOnValue(mark, map, node)
364 }
365 MappingWaitingOnValue(mark, mut map, key) => {
366 match map.entry(key.clone()) {
367 Entry::Occupied(entry)
368 if self.options.error_on_duplicate_keys =>
369 {
370 Error(LoadError::DuplicateKey(Box::new(
371 DuplicateKeyInner {
372 prev_key: entry.key().clone(),
373 key,
374 },
375 )))
376 }
377 _ => {
378 map.insert(key, Node::from(node));
379 MappingWaitingOnKey(mark, map)
380 }
381 }
382 }
383 SequenceWaitingOnValue(mark, mut list) => {
384 list.push(Node::from(node));
385 SequenceWaitingOnValue(mark, list)
386 }
387 StartDocument => Error(LoadError::TopLevelMustBeMapping(mark)),
388 _ => unreachable!(),
389 }
390 }
391 } else {
392 Error(LoadError::UnexpectedAnchor(mark))
393 }
394 }
395 Event::Nothing => unreachable!(),
396 };
397 self.state_stack.push(newstate);
398 }
399}
400
401impl MarkedLoader {
402 fn new(source: usize, options: LoaderOptions) -> Self {
403 Self {
404 source,
405 state_stack: vec![Initial],
406 options,
407 }
408 }
409
410 fn marker(&self, mark: YamlMarker) -> Marker {
411 Marker::new(self.source, mark.index(), mark.line(), mark.col() + 1)
412 }
413
414 fn finish(mut self) -> Result<Node, LoadError> {
415 let top = self.state_stack.pop();
416 match top.expect("YAML parser state stack unexpectedly empty") {
417 Finished(n) => Ok(n),
418 Error(e) => Err(e),
419 _ => unreachable!(),
420 }
421 }
422}
423
424pub fn parse_yaml<S>(source: usize, yaml: S) -> Result<Node, LoadError>
448where
449 S: AsRef<str>,
450{
451 let options = LoaderOptions::default();
452
453 parse_yaml_with_options(source, yaml, options)
454}
455
456pub fn parse_yaml_with_options<S>(
466 source: usize,
467 yaml: S,
468 options: LoaderOptions,
469) -> Result<Node, LoadError>
470where
471 S: AsRef<str>,
472{
473 let mut loader = MarkedLoader::new(source, options);
474 let mut parser = Parser::new(yaml.as_ref().chars());
475 parser.load(&mut loader, false).map_err(|se| {
476 let mark = loader.marker(*se.marker());
477 LoadError::ScanError(mark, se)
478 })?;
479 loader.finish()
480}
481
482#[cfg(test)]
483mod test {
484 use super::*;
485
486 #[test]
487 fn smoke_basics() {
488 let node = parse_yaml(0, "{}").unwrap();
489 assert!(node.as_mapping().is_some());
490 }
491
492 #[test]
493 fn load_everything() {
494 let node = parse_yaml(0, include_str!("../examples/everything.yaml")).unwrap();
495 let map = node.as_mapping().unwrap();
496 assert_eq!(map.get_scalar("simple").unwrap().as_str(), "scalar");
497 assert_eq!(map.get_scalar("boolean1").unwrap().as_bool(), Some(true));
498 assert_eq!(map.get_scalar("boolean2").unwrap().as_bool(), Some(false));
499 }
500
501 #[test]
502 fn prevent_coercion() {
503 let node = parse_yaml_with_options(
504 0,
505 include_str!("../examples/everything.yaml"),
506 LoaderOptions::default().prevent_coercion(true),
507 )
508 .unwrap();
509 let map = node.as_mapping().unwrap();
510 assert_eq!(map.get_scalar("simple").unwrap().as_str(), "scalar");
511 assert_eq!(map.get_scalar("boolean1").unwrap().as_str(), "true");
512 assert_eq!(map.get_scalar("boolean1").unwrap().as_bool(), None);
513 assert_eq!(map.get_scalar("boolean2").unwrap().as_str(), "false");
514 assert_eq!(map.get_scalar("boolean2").unwrap().as_bool(), Some(false));
515 assert_eq!(map.get_scalar("integer").unwrap().as_str(), "1234");
516 assert_eq!(map.get_scalar("integer").unwrap().as_i32(), None);
517 assert_eq!(map.get_scalar("float").unwrap().as_str(), "12.34");
518 assert_eq!(map.get_scalar("float").unwrap().as_f32(), Some(12.34));
519 }
520
521 #[test]
522 fn toplevel_is_empty() {
523 let node = parse_yaml(0, "").unwrap();
524 let map = node.as_mapping().unwrap();
525 assert!(map.is_empty());
526 }
527
528 #[test]
529 fn toplevel_is_empty_inline() {
530 let node = parse_yaml(0, "{}").unwrap();
531 let map = node.as_mapping().unwrap();
532 assert!(map.is_empty());
533 }
534
535 #[test]
536 fn toplevel_is_scalar() {
537 let err = parse_yaml(0, "foo");
538 assert_eq!(
539 err,
540 Err(LoadError::TopLevelMustBeMapping(Marker::new(0, 0, 1, 1)))
541 );
542 assert!(format!("{}", err.err().unwrap()).contains("1:1: "));
543 }
544
545 #[test]
546 fn toplevel_is_sequence() {
547 assert_eq!(
548 parse_yaml(0, "[]"),
549 Err(LoadError::TopLevelMustBeMapping(Marker::new(0, 0, 1, 1)))
550 );
551 }
552
553 #[test]
554 fn duplicate_key() {
555 let err = parse_yaml_with_options(
556 0,
557 "{foo: bar, foo: baz}",
558 LoaderOptions::default().error_on_duplicate_keys(true),
559 );
560
561 assert_eq!(
562 err,
563 Err(LoadError::DuplicateKey(Box::new(DuplicateKeyInner {
564 prev_key: MarkedScalarNode::new(Span::new_start(Marker::new(0, 0, 1, 1)), "foo"),
565 key: MarkedScalarNode::new(Span::new_start(Marker::new(0, 10, 1, 11)), "foo")
566 })))
567 );
568
569 assert_eq!(
570 format!("{}", err.err().unwrap()),
571 "Duplicate key \"foo\" in mapping at 1:2 and 1:12"
572 );
573
574 let node = parse_yaml(0, "{foo: bar, foo: baz}").unwrap();
576 let map = node.as_mapping().unwrap();
577 assert_eq!(map.get_scalar("foo").unwrap().as_str(), "baz");
578 }
579
580 #[test]
581 fn unexpected_anchor() {
582 let err = parse_yaml(0, "&foo {}");
583 assert_eq!(
584 err,
585 Err(LoadError::UnexpectedAnchor(Marker::new(0, 5, 1, 6)))
586 );
587 assert!(format!("{}", err.err().unwrap()).starts_with("1:6: "));
588 }
589
590 #[test]
591 fn unexpected_anchor2() {
592 assert_eq!(
593 parse_yaml(0, "{bar: &foo []}"),
594 Err(LoadError::UnexpectedAnchor(Marker::new(0, 11, 1, 12)))
595 );
596 }
597
598 #[test]
599 fn unexpected_anchor3() {
600 assert_eq!(
601 parse_yaml(0, "{bar: &foo susan}"),
602 Err(LoadError::UnexpectedAnchor(Marker::new(0, 11, 1, 12)))
603 );
604 }
605
606 #[test]
607 fn mapping_key_mapping() {
608 let err = parse_yaml(0, "{? {} : {}}");
609 assert_eq!(
610 err,
611 Err(LoadError::MappingKeyMustBeScalar(Marker::new(0, 3, 1, 4)))
612 );
613 assert!(format!("{}", err.err().unwrap()).starts_with("1:4: "));
614 }
615
616 #[test]
617 fn mapping_key_sequence() {
618 assert_eq!(
619 parse_yaml(0, "{? [] : {}}"),
620 Err(LoadError::MappingKeyMustBeScalar(Marker::new(0, 3, 1, 4)))
621 );
622 }
623
624 #[test]
625 fn unexpected_tag() {
626 let err = parse_yaml(0, "{foo: !!str bar}");
627 assert_eq!(
628 err,
629 Err(LoadError::UnexpectedTag(Marker::new(0, 12, 1, 13)))
630 );
631 assert!(format!("{}", err.err().unwrap()).starts_with("1:13: "));
632 }
633
634 #[test]
635 fn nested_mapping_key_mapping() {
636 assert_eq!(
637 parse_yaml(0, "{foo: {? [] : {}}}"),
638 Err(LoadError::MappingKeyMustBeScalar(Marker::new(0, 9, 1, 10)))
639 );
640 }
641
642 #[test]
643 fn malformed_yaml_for_scanerror() {
644 let err = parse_yaml(0, "{");
645 assert!(err.is_err());
646 assert!(format!("{}", err.err().unwrap()).starts_with("2:1: "));
647 }
648
649 #[test]
650 fn toplevel_sequence_wanted() {
651 let node =
652 parse_yaml_with_options(0, "[yaml]", LoaderOptions::default().toplevel_sequence())
653 .unwrap();
654 assert!(node.as_sequence().is_some());
655 }
656
657 #[test]
658 fn toplevel_sequence_wanted_got_mapping() {
659 assert_eq!(
660 parse_yaml_with_options(0, "{}", LoaderOptions::default().toplevel_sequence()),
661 Err(LoadError::TopLevelMustBeSequence(Marker::new(0, 0, 1, 1)))
662 );
663 }
664
665 #[test]
666 fn lowercase_keys() {
667 let node = parse_yaml_with_options(
668 0,
669 "KEY: VALUE",
670 LoaderOptions::default().lowercase_keys(false),
671 )
672 .unwrap();
673 assert!(node.as_mapping().unwrap().contains_key("KEY"));
674 assert!(!node.as_mapping().unwrap().contains_key("key"));
675
676 let node = parse_yaml_with_options(
677 0,
678 "KEY: VALUE",
679 LoaderOptions::default().lowercase_keys(true),
680 )
681 .unwrap();
682 assert!(!node.as_mapping().unwrap().contains_key("KEY"));
683 assert!(node.as_mapping().unwrap().contains_key("key"));
684 }
685}