Skip to main content

tanzim_parse/
yaml.rs

1//! YAML parser (`yaml` feature).
2//!
3//! **Formats:** `yml`, `yaml`
4//!
5//! # Behaviour
6//!
7//! - Parses YAML with source markers. Mappings become maps, sequences become lists, and
8//!   scalars become strings/integers/floats/booleans. An empty document yields an empty map.
9//! - Every node carries its marker as a [`Location`] (line/column); for single-line input the
10//!   line/column are omitted.
11//! - YAML `null` becomes [`Value::Null`]. Non-scalar mapping keys, aliases,
12//!   and malformed nodes become [`Error::Parse`]; non-UTF-8 input fails with
13//!   [`Error::InvalidUtf8`].
14//! - [`is_format_supported`](crate::Parse::is_format_supported) returns `Some(true)` when
15//!   the bytes parse as YAML, else `Some(false)`.
16//!
17//! # Example
18//!
19//! ```
20//! use tanzim_parse::{Parse, yaml::Yaml};
21//! use tanzim_source::SourceBuilder;
22//!
23//! let source = SourceBuilder::new()
24//!     .with_source("file")
25//!     .with_resource("config.yaml")
26//!     .build()
27//!     .unwrap();
28//! let value = Yaml::new().parse(&source, b"host: 127.0.0.1\n").unwrap();
29//! assert_eq!(
30//!     value.value().as_map().unwrap().get("host").unwrap().value().as_string().unwrap(),
31//!     "127.0.0.1"
32//! );
33//! ```
34
35use crate::span::is_single_line;
36use crate::{Parse, Source};
37use cfg_if::cfg_if;
38use saphyr::{LoadableYamlNode, MarkedYaml, Scalar, YamlData};
39use tanzim_value::{Error, LocatedValue, Location, Map, Value};
40
41/// Parser for the `yml`/`yaml` formats: YAML into a source-located value tree.
42///
43/// Mappings, sequences, and scalars map to the value tree with a per-node marker [`Location`];
44/// YAML `null` becomes [`Value::Null`]. Stateless — construct with
45/// [`Yaml::new`].
46///
47/// ```
48/// use tanzim_parse::{Parse, yaml::Yaml};
49/// use tanzim_source::SourceBuilder;
50///
51/// let source = SourceBuilder::new()
52///     .with_source("file")
53///     .with_resource("config.yaml")
54///     .build()
55///     .unwrap();
56/// let value = Yaml::new().parse(&source, b"port: 8080\n").unwrap();
57/// let port = value.value().as_map().unwrap().get("port").unwrap();
58/// assert_eq!(port.value().as_int().unwrap(), 8080);
59/// ```
60#[derive(Default, Copy, Clone)]
61pub struct Yaml;
62
63impl Yaml {
64    /// Create a YAML parser.
65    pub fn new() -> Self {
66        Self
67    }
68}
69
70impl Parse for Yaml {
71    fn name(&self) -> &str {
72        "YAML"
73    }
74
75    fn supported_format_list(&self) -> Vec<String> {
76        vec!["yml".into(), "yaml".into()]
77    }
78
79    fn parse(&self, src: &Source, bytes: &[u8]) -> Result<LocatedValue, Error> {
80        #[cfg(any(feature = "tracing", feature = "logging"))]
81        let source = src.source();
82        #[cfg(any(feature = "tracing", feature = "logging"))]
83        let resource = src.resource();
84        cfg_if! {
85            if #[cfg(feature = "tracing")] {
86                tracing::debug!(msg = "Parsing YAML configuration", source = source, resource = resource, bytes = bytes.len());
87            } else if #[cfg(feature = "logging")] {
88                log::debug!("msg=\"Parsing YAML configuration\" source={source} resource={resource} bytes={}", bytes.len());
89            }
90        }
91        let text = match std::str::from_utf8(bytes) {
92            Ok(value) => value,
93            Err(_) => {
94                return Err(Error::InvalidUtf8 {
95                    location: Box::new(Location::in_source(src.clone(), None, None, None)),
96                });
97            }
98        };
99        let single_line = is_single_line(bytes);
100        let docs = match MarkedYaml::load_from_str(text) {
101            Ok(value) => value,
102            Err(error) => {
103                let marker = error.marker();
104                return Err(Error::Parse {
105                    text: text.to_string(),
106                    location: Some(Box::new(Location::in_source(
107                        src.clone(),
108                        Some(marker.line()),
109                        Some(marker.col() + 1),
110                        None,
111                    ))),
112                    message: error.info().to_string(),
113                });
114            }
115        };
116        if docs.is_empty() {
117            cfg_if! {
118                if #[cfg(feature = "tracing")] {
119                    tracing::trace!(msg = "Parsed YAML configuration (empty document)", source = source, resource = resource);
120                } else if #[cfg(feature = "logging")] {
121                    log::trace!("msg=\"Parsed YAML configuration (empty document)\" source={source} resource={resource}");
122                }
123            }
124            return Ok(LocatedValue::new(
125                Value::Map(Map::new()),
126                Location::in_source(src.clone(), None, None, None),
127            ));
128        }
129        let result = convert_node(src, text, single_line, &docs[0]);
130        if result.is_ok() {
131            cfg_if! {
132                if #[cfg(feature = "tracing")] {
133                    tracing::trace!(msg = "Parsed YAML configuration", source = source, resource = resource);
134                } else if #[cfg(feature = "logging")] {
135                    log::trace!("msg=\"Parsed YAML configuration\" source={source} resource={resource}");
136                }
137            }
138        }
139        result
140    }
141
142    fn is_format_supported(&self, bytes: &[u8]) -> Option<bool> {
143        match std::str::from_utf8(bytes) {
144            Ok(text) => Some(MarkedYaml::load_from_str(text).is_ok()),
145            Err(_) => Some(false),
146        }
147    }
148}
149
150/// Serialize a [`Value`] tree into block-style YAML.
151///
152/// Accepts a [`Value`], `&Value`, [`LocatedValue`], or `&LocatedValue`. `source` is
153/// accepted for signature symmetry with [`Parse::parse`] but is unused here.
154///
155/// ```
156/// use tanzim_parse::yaml::unparse;
157/// use tanzim_source::SourceBuilder;
158/// use tanzim_value::{Map, LocatedValue, Location, Value};
159///
160/// let source = SourceBuilder::new().with_source("file").build().unwrap();
161/// let mut map = Map::new();
162/// map.insert("port".into(), LocatedValue::new(
163///     Value::Int(8080),
164///     Location::at("file", "", None, None, None),
165/// ));
166/// assert_eq!(unparse(&source, Value::Map(map)).unwrap(), "port: 8080\n");
167/// ```
168pub fn unparse<V: AsRef<Value>>(
169    _source: &Source,
170    value: V,
171) -> Result<String, Box<dyn std::error::Error + Send + Sync + 'static>> {
172    let value = value.as_ref();
173    let mut out = String::new();
174    match value {
175        Value::Map(map) if map.entries().is_empty() => out.push_str("{}\n"),
176        Value::List(items) if items.is_empty() => out.push_str("[]\n"),
177        Value::Map(map) => write_yaml_map(&mut out, map, 0)?,
178        Value::List(items) => write_yaml_list(&mut out, items, 0)?,
179        scalar => {
180            write_yaml_scalar(&mut out, scalar)?;
181            out.push('\n');
182        }
183    }
184    Ok(out)
185}
186
187fn write_yaml_map(
188    out: &mut String,
189    map: &Map,
190    indent: usize,
191) -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
192    for (key, item) in map.entries() {
193        push_yaml_indent(out, indent);
194        write_yaml_string(out, key);
195        out.push(':');
196        match item.value() {
197            Value::Map(inner) if inner.entries().is_empty() => out.push_str(" {}\n"),
198            Value::List(items) if items.is_empty() => out.push_str(" []\n"),
199            Value::Map(inner) => {
200                out.push('\n');
201                write_yaml_map(out, inner, indent + 1)?;
202            }
203            Value::List(items) => {
204                out.push('\n');
205                write_yaml_list(out, items, indent + 1)?;
206            }
207            scalar => {
208                out.push(' ');
209                write_yaml_scalar(out, scalar)?;
210                out.push('\n');
211            }
212        }
213    }
214    Ok(())
215}
216
217fn write_yaml_list(
218    out: &mut String,
219    items: &[LocatedValue],
220    indent: usize,
221) -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
222    for item in items {
223        push_yaml_indent(out, indent);
224        match item.value() {
225            Value::Map(inner) if inner.entries().is_empty() => out.push_str("- {}\n"),
226            Value::List(inner) if inner.is_empty() => out.push_str("- []\n"),
227            Value::Map(inner) => {
228                out.push_str("-\n");
229                write_yaml_map(out, inner, indent + 1)?;
230            }
231            Value::List(inner) => {
232                out.push_str("-\n");
233                write_yaml_list(out, inner, indent + 1)?;
234            }
235            scalar => {
236                out.push_str("- ");
237                write_yaml_scalar(out, scalar)?;
238                out.push('\n');
239            }
240        }
241    }
242    Ok(())
243}
244
245fn write_yaml_scalar(
246    out: &mut String,
247    value: &Value,
248) -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
249    match value {
250        Value::Bool(value) => out.push_str(if *value { "true" } else { "false" }),
251        Value::Int(value) => out.push_str(&value.to_string()),
252        Value::Float(value) => {
253            if !value.is_finite() {
254                return Err(format!("cannot serialize non-finite float {value} as YAML").into());
255            }
256            out.push_str(&format!("{value:?}"));
257        }
258        Value::String(value) => write_yaml_string(out, value),
259        Value::Null => out.push('~'),
260        Value::List(_) | Value::Map(_) => {
261            return Err("internal error: write_yaml_scalar called on a collection".into());
262        }
263    }
264    Ok(())
265}
266
267fn push_yaml_indent(out: &mut String, indent: usize) {
268    for _ in 0..indent {
269        out.push_str("  ");
270    }
271}
272
273fn write_yaml_string(out: &mut String, value: &str) {
274    let needs_quote = value.is_empty()
275        || matches!(
276            value.to_ascii_lowercase().as_str(),
277            "true" | "false" | "null" | "yes" | "no" | "on" | "off" | "~"
278        )
279        || value.parse::<i64>().is_ok()
280        || value.parse::<f64>().is_ok()
281        || value.starts_with(char::is_whitespace)
282        || value.ends_with(char::is_whitespace)
283        || value.starts_with(|ch: char| {
284            matches!(
285                ch,
286                '-' | '?'
287                    | ':'
288                    | ','
289                    | '['
290                    | ']'
291                    | '{'
292                    | '}'
293                    | '&'
294                    | '*'
295                    | '!'
296                    | '|'
297                    | '>'
298                    | '\''
299                    | '"'
300                    | '%'
301                    | '@'
302                    | '`'
303                    | '#'
304            )
305        })
306        || value.contains(':')
307        || value.contains('#')
308        || value.contains('\n')
309        || value.contains('\t');
310    if !needs_quote {
311        out.push_str(value);
312        return;
313    }
314    out.push('"');
315    for ch in value.chars() {
316        match ch {
317            '"' => out.push_str("\\\""),
318            '\\' => out.push_str("\\\\"),
319            '\n' => out.push_str("\\n"),
320            '\r' => out.push_str("\\r"),
321            '\t' => out.push_str("\\t"),
322            other => out.push(other),
323        }
324    }
325    out.push('"');
326}
327
328fn convert_node(
329    source: &Source,
330    text: &str,
331    single_line: bool,
332    node: &MarkedYaml<'_>,
333) -> Result<LocatedValue, Error> {
334    let location = if single_line {
335        Location::in_source(source.clone(), None, None, None)
336    } else {
337        let marker = node.span.start;
338        let length = if !node.span.is_empty() {
339            Some(node.span.len())
340        } else {
341            None
342        };
343        Location::in_source(
344            source.clone(),
345            Some(marker.line()),
346            Some(marker.col() + 1),
347            length,
348        )
349    };
350    match &node.data {
351        YamlData::Value(scalar) => match scalar {
352            Scalar::Null => Ok(LocatedValue::new(Value::Null, location)),
353            Scalar::Boolean(value) => Ok(LocatedValue::new(Value::Bool(*value), location)),
354            Scalar::Integer(value) => Ok(LocatedValue::new(Value::Int(*value as isize), location)),
355            Scalar::FloatingPoint(value) => Ok(LocatedValue::new(
356                Value::Float(value.into_inner()),
357                location,
358            )),
359            Scalar::String(value) => Ok(LocatedValue::new(
360                Value::String(value.to_string()),
361                location,
362            )),
363        },
364        YamlData::Sequence(sequence) => {
365            let mut list = Vec::new();
366            for node in sequence {
367                list.push(convert_node(source, text, single_line, node)?);
368            }
369            Ok(LocatedValue::new(Value::List(list), location))
370        }
371        YamlData::Mapping(mapping) => {
372            let mut map = Map::new();
373            for (key_node, value_node) in mapping {
374                let key = match &key_node.data {
375                    YamlData::Value(Scalar::String(value)) => value.to_string(),
376                    YamlData::Representation(value, _, _) => value.to_string(),
377                    _ => {
378                        return Err(Error::Parse {
379                            text: String::new(),
380                            location: None,
381                            message: "yaml map key must be a string".to_string(),
382                        });
383                    }
384                };
385                let value = convert_node(source, text, single_line, value_node)?;
386                map.insert(key, value);
387            }
388            Ok(LocatedValue::new(Value::Map(map), location))
389        }
390        YamlData::Tagged(_, inner) => convert_node(source, text, single_line, inner),
391        YamlData::Representation(representation, _, _) => {
392            if representation == "~" || representation == "null" || representation == "Null" {
393                return Ok(LocatedValue::new(Value::Null, location));
394            }
395            Ok(LocatedValue::new(
396                Value::String(representation.to_string()),
397                location,
398            ))
399        }
400        YamlData::Alias(_) | YamlData::BadValue => Err(Error::Parse {
401            text: text.to_string(),
402            location: Some(Box::new(location)),
403            message: "unsupported yaml node".to_string(),
404        }),
405    }
406}
407
408#[cfg(all(test, feature = "yaml"))]
409mod tests {
410    use super::*;
411    use tanzim_source::SourceBuilder;
412
413    fn file_source(resource: &str) -> Source {
414        SourceBuilder::new()
415            .with_source("file")
416            .with_resource(resource)
417            .build()
418            .unwrap()
419    }
420
421    fn loc(value: Value) -> LocatedValue {
422        LocatedValue::new(value, Location::at("file", "test", None, None, None))
423    }
424
425    #[test]
426    fn unparses_complex_yaml() {
427        let mut nested = Map::new();
428        nested.insert("key".into(), loc(Value::String("value".into())));
429        let mut map = Map::new();
430        map.insert("name".into(), loc(Value::String("tanzim".into())));
431        map.insert("port".into(), loc(Value::Int(8080)));
432        map.insert("ratio".into(), loc(Value::Float(0.5)));
433        map.insert("debug".into(), loc(Value::Bool(true)));
434        map.insert(
435            "tags".into(),
436            loc(Value::List(vec![
437                loc(Value::String("a".into())),
438                loc(Value::String("b".into())),
439            ])),
440        );
441        map.insert("nested".into(), loc(Value::Map(nested)));
442
443        let text = unparse(&file_source("out.yaml"), Value::Map(map)).unwrap();
444        assert_eq!(
445            text,
446            "name: tanzim\nport: 8080\nratio: 0.5\ndebug: true\ntags:\n  - a\n  - b\nnested:\n  key: value\n"
447        );
448    }
449
450    #[test]
451    fn parses_yaml_map() {
452        let parsed = Yaml::new()
453            .parse(&file_source("config.yaml"), b"hello: world\n")
454            .unwrap();
455        assert_eq!(
456            parsed
457                .value()
458                .as_map()
459                .unwrap()
460                .get("hello")
461                .unwrap()
462                .value()
463                .as_string()
464                .unwrap(),
465            "world"
466        );
467    }
468
469    #[test]
470    fn parses_yaml_map_with_lines() {
471        let root = Yaml::new()
472            .parse(&file_source("config.yaml"), b"foo: bar\nbaz: qux\n")
473            .unwrap();
474        let map = root.value().as_map().unwrap();
475        let foo = map.get("foo").unwrap();
476        assert_eq!(foo.value().as_string().unwrap(), "bar");
477        assert_eq!(foo.location().line, std::num::NonZeroU32::new(1));
478        let baz = map.get("baz").unwrap();
479        assert_eq!(baz.location().line, std::num::NonZeroU32::new(2));
480    }
481
482    #[test]
483    fn parses_yaml_null_at_correct_column() {
484        let text = "foo: bar\n\nbaz:\n\n  qux: ~\n";
485        let root = Yaml::new()
486            .parse(&file_source("config.yaml"), text.as_bytes())
487            .unwrap();
488        let map = root.value().as_map().unwrap();
489        let baz = map.get("baz").unwrap();
490        let nested = baz.value().as_map().unwrap();
491        let qux = nested.get("qux").unwrap();
492        assert!(qux.value().is_null());
493        assert_eq!(qux.location().line, std::num::NonZeroU32::new(5));
494        assert_eq!(qux.location().column, std::num::NonZeroU32::new(8));
495        assert_eq!(qux.location().length, std::num::NonZeroU32::new(1));
496    }
497
498    #[test]
499    fn syntax_error_has_location() {
500        let error = Yaml::new()
501            .parse(&file_source("config.yaml"), b"foo: [\n")
502            .unwrap_err();
503        if let Error::Parse { location, .. } = &error {
504            let location = location.as_ref().expect("syntax error location");
505            assert!(location.line.is_some());
506            assert!(location.column.is_some());
507        } else {
508            panic!("expected parse error");
509        }
510        let message = format!("{error:#}");
511        assert!(message.contains('^'));
512    }
513}