apollo_router/configuration/
yaml.rs

1use std::collections::HashMap;
2use std::collections::HashSet;
3
4use derivative::Derivative;
5use indexmap::IndexMap;
6use jsonschema::paths::Location;
7use jsonschema::paths::LocationSegment;
8use yaml_rust::Event;
9use yaml_rust::parser::MarkedEventReceiver;
10use yaml_rust::parser::Parser;
11use yaml_rust::scanner::Marker;
12
13use crate::configuration::ConfigurationError;
14
15#[derive(Derivative, Clone, Debug, Eq)]
16#[derivative(Hash, PartialEq)]
17pub(crate) struct Label {
18    pub(crate) name: String,
19    #[derivative(Hash = "ignore", PartialEq = "ignore")]
20    pub(crate) marker: Option<Marker>,
21}
22
23impl From<String> for Label {
24    fn from(name: String) -> Self {
25        Label { name, marker: None }
26    }
27}
28
29#[derive(Clone, Debug)]
30pub(crate) enum Value {
31    // These types are not currently used.
32    // In theory if we want to parse the YAML properly then we need them, but we're only interested
33    // in the markers, so maybe we don't need them?
34    // Null(Marker),
35    // Bool(bool, Marker),
36    // Number(Number, Marker),
37    String(String, Marker),
38    Sequence(Vec<Value>, Marker),
39    Mapping(Option<Label>, IndexMap<Label, Value>, Marker),
40}
41
42impl Value {
43    pub(crate) fn end_marker(&self) -> &Marker {
44        match self {
45            Value::String(_, m) => m,
46            Value::Sequence(v, m) => v.last().map(|l| l.end_marker()).unwrap_or_else(|| m),
47            Value::Mapping(_, v, m) => v
48                .last()
49                .map(|(_, val)| val.end_marker())
50                .unwrap_or_else(|| m),
51        }
52    }
53}
54
55/// A basic yaml parser that retains marker information.
56/// This is an incomplete parser that is useful for config validation.
57/// First the yaml is loaded via serde_yaml. This ensures valid yaml.
58/// Then it is validated against a json schema.
59/// The output from json schema validation is a set of errors with json paths.
60/// The json path doesn't contain line number info, so we reparse so that we can convert the
61/// paths into nice error messages.
62#[derive(Default, Debug)]
63pub(crate) struct MarkedYaml {
64    anchors: HashMap<usize, Value>,
65    current_label: Option<Label>,
66    object_stack: Vec<(Option<Label>, Value, usize)>,
67    root: Option<Value>,
68    duplicated_fields: HashSet<(Option<Label>, Label)>,
69}
70
71impl MarkedYaml {
72    pub(crate) fn get_element(&self, pointer: &Location) -> Option<&Value> {
73        let mut current = self.root();
74        for item in pointer {
75            current = match (current, item) {
76                (
77                    Some(Value::Mapping(_current_label, mapping, _)),
78                    LocationSegment::Property(value),
79                ) => mapping.get(&Label::from(value.to_string())),
80                (Some(Value::Sequence(sequence, _)), LocationSegment::Index(idx)) => {
81                    sequence.get(idx)
82                }
83                _ => None,
84            }
85        }
86        current
87    }
88
89    fn root(&self) -> Option<&Value> {
90        self.root.as_ref()
91    }
92
93    fn end_container(&mut self) -> Option<Value> {
94        let (label, v, id) = self.object_stack.pop().expect("imbalanced parse events");
95        self.anchors.insert(id, v.clone());
96        match (label, self.object_stack.last_mut()) {
97            (Some(label), Some((_, Value::Mapping(current_label, mapping, _), _))) => {
98                if let Some(_previous) = mapping.insert(label.clone(), v) {
99                    self.duplicated_fields
100                        .insert((current_label.clone(), label));
101                }
102                None
103            }
104            (None, Some((_, Value::Sequence(sequence, _), _))) => {
105                sequence.push(v);
106                None
107            }
108            _ => Some(v),
109        }
110    }
111
112    fn add_value(&mut self, marker: Marker, v: String, id: usize) {
113        match (self.current_label.take(), self.object_stack.last_mut()) {
114            (Some(label), Some((_, Value::Mapping(current_label, mapping, _), _))) => {
115                let v = Value::String(v, marker);
116                self.anchors.insert(id, v.clone());
117                if let Some(_previous) = mapping.insert(label.clone(), v) {
118                    self.duplicated_fields
119                        .insert((current_label.clone(), label));
120                }
121            }
122            (None, Some((_, Value::Sequence(sequence, _), _))) => {
123                let v = Value::String(v, marker);
124                self.anchors.insert(id, v.clone());
125                sequence.push(v);
126            }
127            (None, _) => {
128                self.current_label = Some(Label {
129                    name: v,
130                    marker: Some(marker),
131                })
132            }
133            _ => tracing::warn!("labeled scalar without container in yaml"),
134        }
135    }
136
137    fn add_alias_value(&mut self, v: Value) {
138        match (self.current_label.take(), self.object_stack.last_mut()) {
139            (Some(label), Some((_, Value::Mapping(_current_label, mapping, _), _))) => {
140                mapping.insert(label, v);
141            }
142            (None, Some((_, Value::Sequence(sequence, _), _))) => {
143                sequence.push(v);
144            }
145            _ => tracing::warn!("scalar without container in yaml"),
146        }
147    }
148}
149
150pub(crate) fn parse(source: &str) -> Result<MarkedYaml, ConfigurationError> {
151    // Yaml parser doesn't support CRLF. Remove CRs.
152    // https://github.com/chyh1990/yaml-rust/issues/165
153    let source = source.replace('\r', "");
154    let mut parser = Parser::new(source.chars());
155    let mut loader = MarkedYaml::default();
156    parser
157        .load(&mut loader, true)
158        .map_err(|e| ConfigurationError::InvalidConfiguration {
159            message: "could not parse yaml",
160            error: e.to_string(),
161        })?;
162
163    // Detect duplicated keys in configuration file
164    if !loader.duplicated_fields.is_empty() {
165        let error = loader
166            .duplicated_fields
167            .iter()
168            .map(|(parent_label, dup_label)| {
169                let prefix = parent_label
170                    .as_ref()
171                    .map(|l| format!("{}.", l.name))
172                    .unwrap_or_default();
173                format!("'{prefix}{}'", dup_label.name)
174            })
175            .collect::<Vec<String>>()
176            .join(", ");
177        return Err(ConfigurationError::InvalidConfiguration {
178            message: "duplicated keys detected in your yaml configuration",
179            error,
180        });
181    }
182
183    Ok(loader)
184}
185
186impl MarkedEventReceiver for MarkedYaml {
187    fn on_event(&mut self, ev: Event, marker: Marker) {
188        match ev {
189            Event::Scalar(v, _style, id, _tag) => self.add_value(marker, v, id),
190            Event::SequenceStart(id) => {
191                self.object_stack.push((
192                    self.current_label.take(),
193                    Value::Sequence(Vec::new(), marker),
194                    id,
195                ));
196            }
197            Event::SequenceEnd => {
198                self.root = self.end_container();
199            }
200            Event::MappingStart(id) => {
201                let current_label = self.current_label.take();
202                self.object_stack.push((
203                    current_label.clone(),
204                    Value::Mapping(current_label, IndexMap::default(), marker),
205                    id,
206                ));
207            }
208            Event::MappingEnd => {
209                self.root = self.end_container();
210            }
211            Event::Alias(id) => {
212                if let Some(v) = self.anchors.get(&id) {
213                    let cloned = v.clone();
214                    self.add_alias_value(cloned);
215                } else {
216                    tracing::warn!("unresolved anchor in yaml");
217                }
218            }
219            Event::DocumentStart => {}
220            Event::DocumentEnd => {}
221            _ => {}
222        }
223    }
224}
225
226#[cfg(test)]
227mod test {
228    use insta::assert_snapshot;
229
230    use crate::configuration::yaml::parse;
231
232    #[test]
233    fn test() {
234        // DON'T reformat this. It'll change the test results
235        let yaml = r#"test:
236  a: 4
237  b: 3       
238  c: &id001
239  - d
240  - e
241  - f:
242     - g
243     - h:
244         i: k 
245  l: *id001
246      
247"#;
248        let parsed = parse(yaml).unwrap();
249        let root = parsed.root().unwrap();
250        assert_snapshot!(format!("{root:#?}"));
251    }
252
253    #[test]
254    fn test_duplicate_keys() {
255        // DON'T reformat this. It'll change the test results
256        let yaml = r#"test:
257  a: 4
258  b: 3
259  a: 5
260  c:
261    dup: 5
262    other: 3
263    dup: 8
264test:
265  foo: bar
266"#;
267        let err = parse(yaml).unwrap_err();
268        match err {
269            crate::configuration::ConfigurationError::InvalidConfiguration { message, error } => {
270                assert_eq!(
271                    message,
272                    "duplicated keys detected in your yaml configuration"
273                );
274                // Can't do an assert on error because under the hood it uses an hashset then the order is not guaranteed
275                let error_splitted: Vec<&str> = error.split(", ").collect();
276                assert_eq!(error_splitted.len(), 3);
277                assert!(error_splitted.contains(&"'test.a'"));
278                assert!(error_splitted.contains(&"'test'"));
279                assert!(error_splitted.contains(&"'c.dup'"));
280            }
281            _ => panic!("this error must be InvalidConfiguration variant"),
282        }
283    }
284}