Skip to main content

tanzim_parse/
yaml.rs

1//! YAML parser (`yaml` feature).
2//!
3//! **Formats:** `yml`, `yaml`
4//!
5//! # Behaviour
6//!
7//! - Parses YAML with source markers. Mappings become maps, sequences become lists, and
8//!   scalars become strings/integers/floats/booleans. An empty document yields an empty map.
9//! - Every node carries its marker as a [`Location`] (line/column); for single-line input the
10//!   line/column are omitted.
11//! - YAML `null` is rejected with [`Error::UnsupportedNull`]. Non-scalar mapping keys, aliases,
12//!   and malformed nodes become [`Error::Parse`]; non-UTF-8 input fails with
13//!   [`Error::InvalidUtf8`].
14//! - [`is_format_supported`](crate::Parse::is_format_supported) returns `Some(true)` when
15//!   the bytes parse as YAML, else `Some(false)`.
16//!
17//! # Example
18//!
19//! ```
20//! use tanzim_parse::{Parse, yaml::Yaml};
21//!
22//! let value = Yaml::new().parse("file", "config.yaml", b"host: 127.0.0.1\n").unwrap();
23//! assert_eq!(
24//!     value.value.as_map().unwrap().get("host").unwrap().value.as_string().unwrap(),
25//!     "127.0.0.1"
26//! );
27//! ```
28
29use crate::Parse;
30use crate::span::is_single_line;
31use cfg_if::cfg_if;
32use saphyr::{LoadableYamlNode, MarkedYaml, Scalar, YamlData};
33use tanzim_value::{Error, LocatedValue, Location, Map, Value};
34
35/// Parser for the `yml`/`yaml` formats: YAML into a source-located value tree.
36///
37/// Mappings, sequences, and scalars map to the value tree with a per-node marker [`Location`];
38/// YAML `null` is rejected with [`Error::UnsupportedNull`]. Stateless — construct with
39/// [`Yaml::new`].
40///
41/// ```
42/// use tanzim_parse::{Parse, yaml::Yaml};
43///
44/// let value = Yaml::new().parse("file", "config.yaml", b"port: 8080\n").unwrap();
45/// let port = value.value.as_map().unwrap().get("port").unwrap();
46/// assert_eq!(port.value.as_int().unwrap(), 8080);
47/// ```
48#[derive(Default, Copy, Clone)]
49pub struct Yaml;
50
51impl Yaml {
52    /// Create a YAML parser.
53    pub fn new() -> Self {
54        Self
55    }
56}
57
58impl Parse for Yaml {
59    fn name(&self) -> &str {
60        "YAML"
61    }
62
63    fn supported_format_list(&self) -> Vec<String> {
64        vec!["yml".into(), "yaml".into()]
65    }
66
67    fn parse(&self, source: &str, resource: &str, bytes: &[u8]) -> Result<LocatedValue, Error> {
68        cfg_if! {
69            if #[cfg(feature = "tracing")] {
70                tracing::debug!(msg = "Parsing YAML configuration", source = source, resource = resource, bytes = bytes.len());
71            } else if #[cfg(feature = "logging")] {
72                log::debug!("msg=\"Parsing YAML configuration\" source={source} resource={resource} bytes={}", bytes.len());
73            }
74        }
75        let text = match std::str::from_utf8(bytes) {
76            Ok(value) => value,
77            Err(_) => {
78                return Err(Error::InvalidUtf8 {
79                    location: Location::at(source, resource, None, None, None),
80                });
81            }
82        };
83        let single_line = is_single_line(bytes);
84        let docs = match MarkedYaml::load_from_str(text) {
85            Ok(value) => value,
86            Err(error) => {
87                let marker = error.marker();
88                return Err(Error::Parse {
89                    text: text.to_string(),
90                    location: Some(Location::at(
91                        source,
92                        resource,
93                        Some(marker.line()),
94                        Some(marker.col() + 1),
95                        None,
96                    )),
97                    message: error.info().to_string(),
98                });
99            }
100        };
101        if docs.is_empty() {
102            cfg_if! {
103                if #[cfg(feature = "tracing")] {
104                    tracing::trace!(msg = "Parsed YAML configuration (empty document)", source = source, resource = resource);
105                } else if #[cfg(feature = "logging")] {
106                    log::trace!("msg=\"Parsed YAML configuration (empty document)\" source={source} resource={resource}");
107                }
108            }
109            return Ok(LocatedValue {
110                value: Value::Map(Map::new()),
111                location: Location::at(source, resource, None, None, None),
112            });
113        }
114        let result = convert_node(source, resource, text, single_line, &docs[0]);
115        if result.is_ok() {
116            cfg_if! {
117                if #[cfg(feature = "tracing")] {
118                    tracing::trace!(msg = "Parsed YAML configuration", source = source, resource = resource);
119                } else if #[cfg(feature = "logging")] {
120                    log::trace!("msg=\"Parsed YAML configuration\" source={source} resource={resource}");
121                }
122            }
123        }
124        result
125    }
126
127    fn is_format_supported(&self, bytes: &[u8]) -> Option<bool> {
128        match std::str::from_utf8(bytes) {
129            Ok(text) => Some(MarkedYaml::load_from_str(text).is_ok()),
130            Err(_) => Some(false),
131        }
132    }
133}
134
135fn convert_node(
136    source: &str,
137    resource: &str,
138    text: &str,
139    single_line: bool,
140    node: &MarkedYaml<'_>,
141) -> Result<LocatedValue, Error> {
142    let location = if single_line {
143        Location::at(source, resource, None, None, None)
144    } else {
145        let marker = node.span.start;
146        let length = if !node.span.is_empty() {
147            Some(node.span.len())
148        } else {
149            None
150        };
151        Location::at(
152            source,
153            resource,
154            Some(marker.line()),
155            Some(marker.col() + 1),
156            length,
157        )
158    };
159    match &node.data {
160        YamlData::Value(scalar) => match scalar {
161            Scalar::Null => Err(Error::UnsupportedNull {
162                text: text.to_string(),
163                location,
164            }),
165            Scalar::Boolean(value) => Ok(LocatedValue {
166                value: Value::Bool(*value),
167                location,
168            }),
169            Scalar::Integer(value) => Ok(LocatedValue {
170                value: Value::Int(*value as isize),
171                location,
172            }),
173            Scalar::FloatingPoint(value) => Ok(LocatedValue {
174                value: Value::Float(value.into_inner()),
175                location,
176            }),
177            Scalar::String(value) => Ok(LocatedValue {
178                value: Value::String(value.to_string()),
179                location,
180            }),
181        },
182        YamlData::Sequence(sequence) => {
183            let mut list = Vec::new();
184            for node in sequence {
185                list.push(convert_node(source, resource, text, single_line, node)?);
186            }
187            Ok(LocatedValue {
188                value: Value::List(list),
189                location,
190            })
191        }
192        YamlData::Mapping(mapping) => {
193            let mut map = Map::new();
194            for (key_node, value_node) in mapping {
195                let key = match &key_node.data {
196                    YamlData::Value(Scalar::String(value)) => value.to_string(),
197                    YamlData::Representation(value, _, _) => value.to_string(),
198                    _ => {
199                        return Err(Error::Parse {
200                            text: String::new(),
201                            location: None,
202                            message: "yaml map key must be a string".to_string(),
203                        });
204                    }
205                };
206                let value = convert_node(source, resource, text, single_line, value_node)?;
207                map.insert(key, value);
208            }
209            Ok(LocatedValue {
210                value: Value::Map(map),
211                location,
212            })
213        }
214        YamlData::Tagged(_, inner) => convert_node(source, resource, text, single_line, inner),
215        YamlData::Representation(representation, _, _) => {
216            if representation == "~" || representation == "null" || representation == "Null" {
217                return Err(Error::UnsupportedNull {
218                    text: text.to_string(),
219                    location,
220                });
221            }
222            Ok(LocatedValue {
223                value: Value::String(representation.to_string()),
224                location,
225            })
226        }
227        YamlData::Alias(_) | YamlData::BadValue => Err(Error::Parse {
228            text: text.to_string(),
229            location: Some(location),
230            message: "unsupported yaml node".to_string(),
231        }),
232    }
233}
234
235#[cfg(all(test, feature = "yaml"))]
236mod tests {
237    use super::*;
238
239    #[test]
240    fn parses_yaml_map() {
241        let parsed = Yaml::new()
242            .parse("file", "config.yaml", b"hello: world\n")
243            .unwrap();
244        assert_eq!(
245            parsed
246                .value
247                .as_map()
248                .unwrap()
249                .get("hello")
250                .unwrap()
251                .value
252                .as_string()
253                .unwrap(),
254            "world"
255        );
256    }
257
258    #[test]
259    fn parses_yaml_map_with_lines() {
260        let root = Yaml::new()
261            .parse("file", "config.yaml", b"foo: bar\nbaz: qux\n")
262            .unwrap();
263        let map = root.value.as_map().unwrap();
264        let foo = map.get("foo").unwrap();
265        assert_eq!(foo.value.as_string().unwrap(), "bar");
266        assert_eq!(foo.location.line, std::num::NonZeroU32::new(1));
267        let baz = map.get("baz").unwrap();
268        assert_eq!(baz.location.line, std::num::NonZeroU32::new(2));
269    }
270
271    #[test]
272    fn rejects_yaml_null_at_correct_column() {
273        let text = "foo: bar\n\nbaz:\n\n  qux: ~\n";
274        let error = Yaml::new()
275            .parse("file", "config.yaml", text.as_bytes())
276            .unwrap_err();
277        if let Error::UnsupportedNull { location, .. } = &error {
278            assert_eq!(location.line, std::num::NonZeroU32::new(5));
279            assert_eq!(location.column, std::num::NonZeroU32::new(8));
280            assert_eq!(location.length, std::num::NonZeroU32::new(1));
281        } else {
282            panic!("expected unsupported null");
283        }
284        let message = format!("{error:#}");
285        let mut source_line = "";
286        for line in message.split('\n') {
287            if line.contains("qux: ~") {
288                source_line = line;
289                break;
290            }
291        }
292        let mut caret_line = "";
293        for line in message.split('\n') {
294            if line.contains('^') {
295                caret_line = line;
296                break;
297            }
298        }
299        let mut tilde_column = 0usize;
300        if let Some(after_pipe) = source_line.split('|').nth(1) {
301            let mut index = 0usize;
302            let mut byte_index = 0usize;
303            while byte_index < after_pipe.len() {
304                let ch = after_pipe[byte_index..]
305                    .chars()
306                    .next()
307                    .expect("valid utf-8");
308                if ch == '~' {
309                    tilde_column = index;
310                    break;
311                }
312                index += 1;
313                byte_index += ch.len_utf8();
314            }
315        }
316        let mut caret_column = 0usize;
317        if let Some(after_pipe) = caret_line.split('|').nth(1) {
318            let mut index = 0usize;
319            let mut byte_index = 0usize;
320            while byte_index < after_pipe.len() {
321                let ch = after_pipe[byte_index..]
322                    .chars()
323                    .next()
324                    .expect("valid utf-8");
325                if ch == '^' {
326                    caret_column = index;
327                    break;
328                }
329                index += 1;
330                byte_index += ch.len_utf8();
331            }
332        }
333        assert_eq!(caret_column, tilde_column);
334    }
335
336    #[test]
337    fn syntax_error_has_location() {
338        let error = Yaml::new()
339            .parse("file", "config.yaml", b"foo: [\n")
340            .unwrap_err();
341        if let Error::Parse { location, .. } = &error {
342            let location = location.as_ref().expect("syntax error location");
343            assert!(location.line.is_some());
344            assert!(location.column.is_some());
345        } else {
346            panic!("expected parse error");
347        }
348        let message = format!("{error:#}");
349        assert!(message.contains('^'));
350    }
351}