Skip to main content

tanzim_parse/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub use tanzim_value::{Error, LocatedValue, Value};
4
5pub mod closure;
6pub mod span;
7
8#[cfg(feature = "env")]
9pub mod env;
10#[cfg(feature = "json")]
11pub mod json;
12#[cfg(feature = "toml")]
13pub mod toml;
14#[cfg(feature = "yaml")]
15pub mod yaml;
16
17/// Parses raw bytes into a [`LocatedValue`] tree for one format.
18///
19/// Implement this to add a new configuration format. This is the second pipeline stage: it turns
20/// the raw bytes a loader produced into a typed, source-located value tree for merging.
21///
22/// # Contract
23///
24/// - [`parse`](Parse::parse) returns one [`LocatedValue`] tree per payload. `source` is the
25///   source kind (e.g. `"file"`) and `resource` the path/identifier the bytes came from.
26/// - Every node in the tree — including the root — should carry a [`tanzim_value::Location`] that
27///   points back to the source, resource, and line/column, so downstream error messages can show
28///   users exactly where a bad value came from. Use [`tanzim_value::Location::at`] to build them.
29/// - [`supported_format_list`](Parse::supported_format_list) may return several extensions
30///   for one parser (e.g. `["yml", "yaml"]`). When a payload carries no format hint, selection
31///   instead falls back to probing — see Auto-detection below.
32///
33/// # Auto-detection
34///
35/// When a payload's `format` hint is `None`, the parse stage calls
36/// [`is_format_supported`][Parse::is_format_supported] on each registered
37/// parser in order. Return `Some(true)` if confident, `Some(false)` to skip, or `None`
38/// if unsure (another parser may then claim the bytes).
39///
40/// # Choosing an error
41///
42/// Failures are reported with [`tanzim_value::Error`]; every variant except `Parse` carries a
43/// [`Location`](tanzim_value::Location):
44///
45/// - [`Error::InvalidUtf8`] — the bytes aren't valid UTF-8.
46/// - [`Error::Parse`] — a syntax or structural error; set `location` when you can pinpoint it,
47///   otherwise `None`.
48/// - [`Error::UnsupportedNull`] — the input contained a null the config model doesn't represent.
49/// - [`Error::UnsupportedType`] — a value of a type that has no configuration representation
50///   (e.g. a date-time).
51///
52/// # Registering
53///
54/// Pass an instance to `tanzim::Config::with_parser`. The pipeline picks a parser by the payload's
55/// format hint when present, otherwise it probes each parser with
56/// [`is_format_supported`](Parse::is_format_supported). For a one-off parser you don't want
57/// to define a type for, use [`closure::Closure`] instead of implementing this trait.
58///
59/// # Example — custom CSV parser
60///
61/// ```rust
62/// use tanzim_parse::{Parse, Error, LocatedValue, Value};
63/// use tanzim_value::{Location, Map};
64///
65/// struct CsvParser;
66///
67/// impl Parse for CsvParser {
68///     fn name(&self) -> &str { "csv" }
69///     fn supported_format_list(&self) -> Vec<String> { vec!["csv".into()] }
70///     fn is_format_supported(&self, bytes: &[u8]) -> Option<bool> {
71///         Some(bytes.contains(&b','))
72///     }
73///     fn parse(&self, source: &str, resource: &str, bytes: &[u8])
74///         -> Result<LocatedValue, Error>
75///     {
76///         let text = std::str::from_utf8(bytes).map_err(|_| Error::InvalidUtf8 {
77///             location: Location::at(source, resource, None, None, None),
78///         })?;
79///         let mut map = Map::new();
80///         for (line_idx, line) in text.lines().enumerate() {
81///             if let Some((key, val)) = line.split_once(',') {
82///                 let loc = Location::at(source, resource, Some(line_idx + 1), None, None);
83///                 map.insert(key.trim().to_string(), LocatedValue {
84///                     value: Value::String(val.trim().to_string()),
85///                     location: loc,
86///                 });
87///             }
88///         }
89///         let root_loc = Location::at(source, resource, None, None, None);
90///         Ok(LocatedValue { value: Value::Map(map), location: root_loc })
91///     }
92/// }
93///
94/// let value = CsvParser
95///     .parse("file", "config.csv", b"host,127.0.0.1\nport,8080\n")
96///     .unwrap();
97///
98/// let map = value.value.as_map().unwrap();
99/// assert_eq!(map.get("host").unwrap().value.as_string().unwrap(), "127.0.0.1");
100/// assert_eq!(map.get("port").unwrap().value.as_string().unwrap(), "8080");
101/// // `port` is a string — this parser stores every field verbatim.
102/// ```
103pub trait Parse {
104    /// Human-readable name used in error messages.
105    fn name(&self) -> &str;
106    /// Format extensions this parser handles (e.g. `["json"]`, `["yml", "yaml"]`).
107    fn supported_format_list(&self) -> Vec<String>;
108    /// Probe `bytes` for auto-detection when `Payload::maybe_format` is `None`.
109    ///
110    /// Return `Some(true)` if confident, `Some(false)` if definitely not this format,
111    /// or `None` to abstain (another parser will be tried next).
112    fn is_format_supported(&self, bytes: &[u8]) -> Option<bool>;
113    /// Parse `bytes` into a [`LocatedValue`] tree.
114    ///
115    /// `source` is the source kind (e.g. `"file"`) and `resource` is the path or
116    /// identifier; both are used to populate [`tanzim_value::Location`] on every
117    /// node in the returned tree.
118    fn parse(&self, source: &str, resource: &str, bytes: &[u8]) -> Result<LocatedValue, Error>;
119}