Skip to main content

tanzim_parse/
toml.rs

1//! TOML parser (`toml` feature).
2//!
3//! **Format:** `toml`
4//!
5//! # Behaviour
6//!
7//! - Parses TOML with source spans. Tables and inline tables become maps, arrays become lists, and
8//!   strings/integers/floats/booleans become the matching scalar values. Prefix and inline comments
9//!   are preserved on each [`LocatedValue`] via [`tanzim_value::Comment`].
10//! - Every node carries its span as a [`Location`] (line/column); for single-line input the
11//!   line/column are omitted.
12//! - TOML date-times have no configuration representation and are rejected with
13//!   [`Error::UnsupportedType`]. Non-UTF-8 input fails with [`Error::InvalidUtf8`], and any syntax
14//!   error becomes [`Error::Parse`].
15//! - [`is_format_supported`](crate::Parse::is_format_supported) returns `Some(true)` when
16//!   the bytes parse as TOML, else `Some(false)`.
17//!
18//! # Example
19//!
20//! ```
21//! use tanzim_parse::{Parse, toml::Toml};
22//! use tanzim_source::SourceBuilder;
23//!
24//! let source = SourceBuilder::new()
25//!     .with_source("file")
26//!     .with_resource("config.toml")
27//!     .build()
28//!     .unwrap();
29//! let value = Toml::new().parse(&source, b"host = \"127.0.0.1\"\n").unwrap();
30//! assert_eq!(
31//!     value.value().as_map().unwrap().get("host").unwrap().value().as_string().unwrap(),
32//!     "127.0.0.1"
33//! );
34//! ```
35
36use crate::span::{char_count, is_single_line, line_column};
37use crate::{Parse, Source};
38use cfg_if::cfg_if;
39use tanzim_value::{Comment, Error, LocatedValue, Location, Map, Value};
40use toml_edit::{
41    Array, DocumentMut, ImDocument, InlineTable, Item, RawString, Table, Value as TomlValue,
42};
43
44/// Parser for the `toml` format: TOML into a source-located value tree.
45///
46/// Tables, arrays, and scalars map to the value tree with a per-node span [`Location`]; date-times
47/// are rejected with [`Error::UnsupportedType`]. Stateless — construct with [`Toml::new`].
48///
49/// ```
50/// use tanzim_parse::{Parse, toml::Toml};
51/// use tanzim_source::SourceBuilder;
52///
53/// let source = SourceBuilder::new()
54///     .with_source("file")
55///     .with_resource("config.toml")
56///     .build()
57///     .unwrap();
58/// let value = Toml::new().parse(&source, b"port = 8080\n").unwrap();
59/// let port = value.value().as_map().unwrap().get("port").unwrap();
60/// assert_eq!(port.value().as_int().unwrap(), 8080);
61/// ```
62#[derive(Default, Debug, Clone, Copy)]
63pub struct Toml;
64
65impl Toml {
66    /// Create a TOML parser.
67    pub fn new() -> Self {
68        Self
69    }
70}
71
72impl Parse for Toml {
73    fn name(&self) -> &str {
74        "TOML"
75    }
76
77    fn supported_format_list(&self) -> Vec<String> {
78        vec!["toml".into()]
79    }
80
81    fn parse(&self, src: &Source, bytes: &[u8]) -> Result<LocatedValue, Error> {
82        #[cfg(any(feature = "tracing", feature = "logging"))]
83        let source = src.source();
84        #[cfg(any(feature = "tracing", feature = "logging"))]
85        let resource = src.resource();
86        cfg_if! {
87            if #[cfg(feature = "tracing")] {
88                tracing::debug!(msg = "Parsing TOML configuration", source = source, resource = resource, bytes = bytes.len());
89            } else if #[cfg(feature = "logging")] {
90                log::debug!("msg=\"Parsing TOML configuration\" source={source} resource={resource} bytes={}", bytes.len());
91            }
92        }
93        let text = match std::str::from_utf8(bytes) {
94            Ok(value) => value,
95            Err(_) => {
96                return Err(Error::InvalidUtf8 {
97                    location: Box::new(Location::in_source(src.clone(), None, None, None)),
98                });
99            }
100        };
101        let single_line = is_single_line(bytes);
102        let document = match ImDocument::parse(text.to_string()) {
103            Ok(value) => value,
104            Err(error) => {
105                let location = match error.span() {
106                    Some(span) => {
107                        let (line, column) = line_column(text, span.start);
108                        let length = char_count(text, span.start, span.end).max(1);
109                        Some(Box::new(Location::in_source(
110                            src.clone(),
111                            Some(line),
112                            Some(column),
113                            Some(length),
114                        )))
115                    }
116                    None => None,
117                };
118                return Err(Error::Parse {
119                    text: text.to_string(),
120                    location,
121                    message: error.message().to_string(),
122                });
123            }
124        };
125        let result = convert_table(src, text, single_line, document.as_table(), 0);
126        if result.is_ok() {
127            cfg_if! {
128                if #[cfg(feature = "tracing")] {
129                    tracing::trace!(msg = "Parsed TOML configuration", source = source, resource = resource);
130                } else if #[cfg(feature = "logging")] {
131                    log::trace!("msg=\"Parsed TOML configuration\" source={source} resource={resource}");
132                }
133            }
134        }
135        result
136    }
137
138    fn is_format_supported(&self, bytes: &[u8]) -> Option<bool> {
139        match std::str::from_utf8(bytes) {
140            Ok(text) => Some(ImDocument::parse(text.to_string()).is_ok()),
141            Err(_) => Some(false),
142        }
143    }
144}
145
146/// Serialize a [`Value`] map into TOML.
147///
148/// Accepts a [`Value`], `&Value`, [`LocatedValue`], or `&LocatedValue`; the root must be a
149/// [`Value::Map`], since a TOML document is a table. Nested maps under a key become
150/// `[table]` sections; maps inside a list become inline tables. `source` is accepted for
151/// signature symmetry with [`Parse::parse`] but is unused here.
152///
153/// ```
154/// use tanzim_parse::toml::unparse;
155/// use tanzim_source::SourceBuilder;
156/// use tanzim_value::{Map, LocatedValue, Location, Value};
157///
158/// let source = SourceBuilder::new().with_source("file").build().unwrap();
159/// let mut map = Map::new();
160/// map.insert("port".into(), LocatedValue::new(
161///     Value::Int(8080),
162///     Location::at("file", "", None, None, None),
163/// ));
164/// assert_eq!(unparse(&source, Value::Map(map)).unwrap(), "port = 8080\n");
165/// ```
166pub fn unparse<V: AsRef<Value>>(
167    _source: &Source,
168    value: V,
169) -> Result<String, Box<dyn std::error::Error + Send + Sync + 'static>> {
170    let value = value.as_ref();
171    let map = match value.as_map() {
172        Some(map) => map,
173        None => {
174            return Err(format!("toml root must be a map, found {}", value.type_name()).into());
175        }
176    };
177    let mut document = DocumentMut::new();
178    build_table(document.as_table_mut(), map)?;
179    Ok(document.to_string())
180}
181
182fn build_table(
183    table: &mut Table,
184    map: &Map,
185) -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
186    for (key, lv) in map.entries() {
187        let item = to_toml_item(lv.value())?;
188        table.insert(key, item);
189
190        let before = lv.comment().before();
191        if !before.is_empty() {
192            let mut prefix = String::new();
193            for line in before {
194                if !prefix.is_empty() && !prefix.ends_with('\n') {
195                    prefix.push('\n');
196                }
197                prefix.push_str(line);
198                if !line.ends_with('\n') {
199                    prefix.push('\n');
200                }
201            }
202            if let Some(mut key_mut) = table.key_mut(key) {
203                key_mut.leaf_decor_mut().set_prefix(prefix.as_str());
204            }
205        }
206
207        if let Some(after) = lv.comment().after()
208            && let Some(item) = table.get_mut(key)
209        {
210            match item {
211                Item::Value(v) => v.decor_mut().set_suffix(format!(" {after}")),
212                Item::Table(t) => t.decor_mut().set_suffix(format!(" {after}")),
213                Item::ArrayOfTables(_) | Item::None => {}
214            }
215        }
216    }
217    Ok(())
218}
219
220fn to_toml_item(value: &Value) -> Result<Item, Box<dyn std::error::Error + Send + Sync + 'static>> {
221    match value {
222        Value::Map(map) => {
223            let mut table = Table::new();
224            build_table(&mut table, map)?;
225            Ok(Item::Table(table))
226        }
227        Value::Null => Err("cannot serialize null as TOML".into()),
228        other => Ok(Item::Value(to_toml_value(other)?)),
229    }
230}
231
232fn to_toml_value(
233    value: &Value,
234) -> Result<TomlValue, Box<dyn std::error::Error + Send + Sync + 'static>> {
235    match value {
236        Value::Bool(value) => Ok((*value).into()),
237        Value::Int(value) => Ok((*value as i64).into()),
238        Value::Float(value) => {
239            if !value.is_finite() {
240                return Err(format!("cannot serialize non-finite float {value} as TOML").into());
241            }
242            Ok((*value).into())
243        }
244        Value::String(value) => Ok(value.clone().into()),
245        Value::List(items) => {
246            let mut array = Array::new();
247            for item in items {
248                array.push(to_toml_value(item.value())?);
249            }
250            Ok(TomlValue::Array(array))
251        }
252        Value::Map(map) => {
253            let mut table = InlineTable::new();
254            for (key, item) in map.entries() {
255                if matches!(item.value(), Value::Null) {
256                    continue;
257                }
258                table.insert(key, to_toml_value(item.value())?);
259            }
260            Ok(TomlValue::InlineTable(table))
261        }
262        Value::Null => Err("cannot serialize null as TOML".into()),
263    }
264}
265
266/// Extract comment lines (lines starting with `#`) from raw TOML decor text.
267fn raw_comments_before(raw: &RawString, text: &str) -> Vec<String> {
268    let prefix_str = match raw.as_str() {
269        Some(s) if !s.is_empty() => s.to_string(),
270        Some(_) => return Vec::new(),
271        None => match raw.span() {
272            Some(span) => match text.get(span) {
273                Some(s) if !s.is_empty() => s.to_string(),
274                _ => return Vec::new(),
275            },
276            None => return Vec::new(),
277        },
278    };
279    prefix_str
280        .lines()
281        .filter_map(|l| {
282            let t = l.trim();
283            t.starts_with('#').then(|| t.to_string())
284        })
285        .collect()
286}
287
288/// Extract the first comment line (starting with `#`) from raw TOML inline suffix text.
289fn raw_comment_after(raw: &RawString, text: &str) -> Option<String> {
290    let suffix_str = match raw.as_str() {
291        Some(s) if !s.is_empty() => s.to_string(),
292        Some(_) => return None,
293        None => match raw.span() {
294            Some(span) => match text.get(span) {
295                Some(s) if !s.is_empty() => s.to_string(),
296                _ => return None,
297            },
298            None => return None,
299        },
300    };
301    suffix_str.lines().next().and_then(|l| {
302        let t = l.trim();
303        t.starts_with('#').then(|| t.to_string())
304    })
305}
306
307fn convert_table(
308    source: &Source,
309    text: &str,
310    single_line: bool,
311    table: &Table,
312    fallback_offset: usize,
313) -> Result<LocatedValue, Error> {
314    let location = location_from_span(source, text, single_line, table.span(), fallback_offset);
315    let mut map = Map::new();
316    for (key, item) in table {
317        let item_fallback = span_start(item.span(), fallback_offset);
318
319        let mut before: Vec<String> = Vec::new();
320        if let Some(key_obj) = table.key(key)
321            && let Some(raw_prefix) = key_obj.leaf_decor().prefix()
322        {
323            for line in raw_comments_before(raw_prefix, text) {
324                before.push(line);
325            }
326        }
327
328        let (mut located, suffix_raw) = match item {
329            Item::Value(value) => {
330                let suffix = value.decor().suffix().cloned();
331                let lv = convert_toml_value(
332                    source,
333                    text,
334                    single_line,
335                    value,
336                    location_from_span(source, text, single_line, value.span(), item_fallback),
337                )?;
338                (lv, suffix)
339            }
340            Item::Table(table) => {
341                if let Some(raw_prefix) = table.decor().prefix() {
342                    for line in raw_comments_before(raw_prefix, text) {
343                        before.push(line);
344                    }
345                }
346                let suffix = table.decor().suffix().cloned();
347                let lv = convert_table(source, text, single_line, table, item_fallback)?;
348                (lv, suffix)
349            }
350            Item::ArrayOfTables(array) => {
351                let loc = location_from_span(source, text, single_line, item.span(), item_fallback);
352                let mut list = Vec::new();
353                for index in 0..array.len() {
354                    if let Some(table) = array.get(index) {
355                        list.push(convert_table(
356                            source,
357                            text,
358                            single_line,
359                            table,
360                            span_start(table.span(), item_fallback),
361                        )?);
362                    }
363                }
364                (LocatedValue::new(Value::List(list), loc), None)
365            }
366            Item::None => {
367                return Err(Error::Parse {
368                    text: text.to_string(),
369                    location: Some(Box::new(location_from_span(
370                        source,
371                        text,
372                        single_line,
373                        item.span(),
374                        item_fallback,
375                    ))),
376                    message: "unexpected empty toml item".to_string(),
377                });
378            }
379        };
380
381        let after: Option<String> = if let Some(raw_suffix) = suffix_raw {
382            raw_comment_after(&raw_suffix, text)
383        } else {
384            None
385        };
386
387        if !before.is_empty() || after.is_some() {
388            located = located.with_comment(Comment::new().with_before(before).with_after(after));
389        }
390
391        map.insert(key.to_string(), located);
392    }
393    Ok(LocatedValue::new(Value::Map(map), location))
394}
395
396fn convert_toml_value(
397    source: &Source,
398    text: &str,
399    single_line: bool,
400    value: &TomlValue,
401    location: Location,
402) -> Result<LocatedValue, Error> {
403    match value {
404        TomlValue::String(value) => Ok(LocatedValue::new(
405            Value::String(value.value().to_string()),
406            location,
407        )),
408        TomlValue::Integer(value) => Ok(LocatedValue::new(
409            Value::Int(*value.value() as isize),
410            location,
411        )),
412        TomlValue::Float(value) => Ok(LocatedValue::new(Value::Float(*value.value()), location)),
413        TomlValue::Boolean(value) => Ok(LocatedValue::new(Value::Bool(*value.value()), location)),
414        TomlValue::Array(array) => {
415            let mut list = Vec::new();
416            let fallback_offset = span_start(array.span(), 0);
417            for index in 0..array.len() {
418                if let Some(value) = array.get(index) {
419                    let item_location = location_from_span(
420                        source,
421                        text,
422                        single_line,
423                        value.span(),
424                        fallback_offset,
425                    );
426                    list.push(convert_toml_value(
427                        source,
428                        text,
429                        single_line,
430                        value,
431                        item_location,
432                    )?);
433                }
434            }
435            Ok(LocatedValue::new(Value::List(list), location))
436        }
437        TomlValue::InlineTable(table) => {
438            let mut map = Map::new();
439            let fallback_offset = span_start(table.span(), 0);
440            for (key, value) in table {
441                let item_location =
442                    location_from_span(source, text, single_line, value.span(), fallback_offset);
443                let converted =
444                    convert_toml_value(source, text, single_line, value, item_location)?;
445                map.insert(key.to_string(), converted);
446            }
447            Ok(LocatedValue::new(Value::Map(map), location))
448        }
449        TomlValue::Datetime(_) => Err(Error::UnsupportedType {
450            text: text.to_string(),
451            location: Box::new(location),
452            found: "datetime",
453        }),
454    }
455}
456
457fn span_start(span: Option<std::ops::Range<usize>>, fallback_offset: usize) -> usize {
458    match span {
459        Some(range) => range.start,
460        None => fallback_offset,
461    }
462}
463
464fn location_from_span(
465    source: &Source,
466    text: &str,
467    single_line: bool,
468    span: Option<std::ops::Range<usize>>,
469    fallback_offset: usize,
470) -> Location {
471    if single_line {
472        return Location::in_source(source.clone(), None, None, None);
473    }
474    let mut length = 0usize;
475    if let Some(range) = &span {
476        length = char_count(text, range.start, range.end);
477    }
478    let offset = span_start(span, fallback_offset);
479    let (line, column) = line_column(text, offset);
480    Location::in_source(
481        source.clone(),
482        Some(line),
483        Some(column),
484        if length > 0 { Some(length) } else { None },
485    )
486}
487
488#[cfg(all(test, feature = "toml"))]
489mod tests {
490    use super::*;
491    use tanzim_source::SourceBuilder;
492
493    fn file_source(resource: &str) -> Source {
494        SourceBuilder::new()
495            .with_source("file")
496            .with_resource(resource)
497            .build()
498            .unwrap()
499    }
500
501    fn loc(value: Value) -> LocatedValue {
502        LocatedValue::new(value, Location::at("file", "test", None, None, None))
503    }
504
505    #[test]
506    fn unparses_complex_toml_round_trip() {
507        let mut nested = Map::new();
508        nested.insert("key".into(), loc(Value::String("value".into())));
509        let mut map = Map::new();
510        map.insert("name".into(), loc(Value::String("tanzim".into())));
511        map.insert("port".into(), loc(Value::Int(8080)));
512        map.insert("ratio".into(), loc(Value::Float(0.5)));
513        map.insert("debug".into(), loc(Value::Bool(true)));
514        map.insert(
515            "tags".into(),
516            loc(Value::List(vec![
517                loc(Value::String("a".into())),
518                loc(Value::String("b".into())),
519            ])),
520        );
521        map.insert("nested".into(), loc(Value::Map(nested)));
522
523        let text = unparse(&file_source("out.toml"), Value::Map(map)).unwrap();
524        let reparsed = Toml::new()
525            .parse(&file_source("out.toml"), text.as_bytes())
526            .unwrap();
527        let map = reparsed.value().as_map().unwrap();
528        assert_eq!(
529            map.get("name").unwrap().value().as_string().unwrap(),
530            "tanzim"
531        );
532        assert_eq!(map.get("port").unwrap().value().as_int().unwrap(), 8080);
533        assert_eq!(map.get("ratio").unwrap().value().as_float().unwrap(), 0.5);
534        assert!(map.get("debug").unwrap().value().as_bool().unwrap());
535        let tags = map.get("tags").unwrap().value().as_list().unwrap();
536        assert_eq!(tags[0].value().as_string().unwrap(), "a");
537        assert_eq!(tags[1].value().as_string().unwrap(), "b");
538        let nested = map.get("nested").unwrap().value().as_map().unwrap();
539        assert_eq!(
540            nested.get("key").unwrap().value().as_string().unwrap(),
541            "value"
542        );
543    }
544
545    #[test]
546    fn unparse_non_map_root_is_error() {
547        assert!(unparse(&file_source("out.toml"), Value::Int(1)).is_err());
548    }
549
550    #[test]
551    fn parses_toml_table() {
552        let parsed = Toml::new()
553            .parse(&file_source("config.toml"), b"hello = \"world\"\n")
554            .unwrap();
555        assert_eq!(
556            parsed
557                .value()
558                .as_map()
559                .unwrap()
560                .get("hello")
561                .unwrap()
562                .value()
563                .as_string()
564                .unwrap(),
565            "world"
566        );
567    }
568
569    #[test]
570    fn nested_table_key_has_line_number() {
571        let parsed = Toml::new()
572            .parse(
573                &file_source("config.toml"),
574                b"[https]\nfollow_redirects = false\ninsecure = true\nretries = 3\n",
575            )
576            .unwrap();
577        let https = parsed.value().as_map().unwrap().get("https").unwrap();
578        let nested = https.value().as_map().unwrap();
579        let retries = nested.get("retries").unwrap();
580        assert_eq!(retries.location().line, std::num::NonZeroU32::new(4));
581        assert_eq!(retries.location().column, std::num::NonZeroU32::new(11));
582    }
583
584    #[test]
585    fn parses_table_header_prefix_comment() {
586        let parsed = Toml::new()
587            .parse(
588                &file_source("baz.toml"),
589                b"# This is a comment\n[logging]\nlevel = \"debug\"\n",
590            )
591            .unwrap();
592        let root = parsed.value().as_map().unwrap();
593        let logging = root.get("logging").unwrap();
594        assert_eq!(logging.comment().before(), &["# This is a comment"]);
595        assert!(!root.contains_key("# This is a comment"));
596        assert_eq!(
597            logging
598                .value()
599                .as_map()
600                .unwrap()
601                .get("level")
602                .unwrap()
603                .value()
604                .as_string()
605                .unwrap(),
606            "debug"
607        );
608    }
609
610    #[test]
611    fn parses_inline_suffix_comments() {
612        let text = b"# This is a comment\n[logging]\n# log level\nlevel = \"debug\" # debug, info, warn, error\n# output serialize format\noutput_serialize_format = \"json\" # json, yaml\n";
613        let parsed = Toml::new().parse(&file_source("baz.toml"), text).unwrap();
614        let root = parsed.value().as_map().unwrap();
615        let logging_lv = root.get("logging").unwrap();
616        assert_eq!(logging_lv.comment().before(), &["# This is a comment"]);
617        let logging = logging_lv.value().as_map().unwrap();
618        let level = logging.get("level").unwrap();
619        assert_eq!(level.comment().before(), &["# log level"]);
620        assert_eq!(level.comment().after(), Some("# debug, info, warn, error"));
621        let osf = logging.get("output_serialize_format").unwrap();
622        assert_eq!(osf.comment().before(), &["# output serialize format"]);
623        assert_eq!(osf.comment().after(), Some("# json, yaml"));
624
625        let reparsed = unparse(&file_source("out.toml"), parsed.into_value()).unwrap();
626        assert!(reparsed.contains("# debug, info, warn, error"));
627        assert!(reparsed.contains("# json, yaml"));
628    }
629
630    #[test]
631    fn parses_and_unparses_prefix_comments() {
632        let parsed = Toml::new()
633            .parse(
634                &file_source("config.toml"),
635                b"# top comment\nhello = \"world\"\n",
636            )
637            .unwrap();
638        let map = parsed.value().as_map().unwrap();
639        let hello = map.get("hello").unwrap();
640        assert_eq!(hello.comment().before(), &["# top comment"]);
641        assert!(!map.contains_key("# top comment"));
642        assert_eq!(hello.value().as_string().unwrap(), "world");
643
644        let text = unparse(&file_source("out.toml"), parsed.into_value()).unwrap();
645        assert!(text.contains("# top comment"));
646        assert!(text.contains("hello = \"world\""));
647    }
648
649    #[test]
650    fn syntax_error_has_location() {
651        let error = Toml::new()
652            .parse(&file_source("config.toml"), b"hello = \n")
653            .unwrap_err();
654        if let Error::Parse { location, .. } = &error {
655            assert!(location.is_some());
656            assert_eq!(
657                location.as_ref().unwrap().line,
658                std::num::NonZeroU32::new(1)
659            );
660        } else {
661            panic!("expected parse error");
662        }
663        let message = format!("{error:#}");
664        assert!(message.contains('^'));
665    }
666}