tanzim_parse/lib.rs
1#![doc = include_str!("../README.md")]
2
3pub use tanzim_source::Source;
4pub use tanzim_value::{Error, LocatedValue, Value};
5
6pub mod closure;
7pub mod span;
8
9#[cfg(feature = "env")]
10pub mod env;
11#[cfg(feature = "json")]
12pub mod json;
13#[cfg(feature = "toml")]
14pub mod toml;
15#[cfg(feature = "yaml")]
16pub mod yaml;
17
18/// Parses raw bytes into a [`LocatedValue`] tree for one format.
19///
20/// Implement this to add a new configuration format. This is the second pipeline stage: it turns
21/// the raw bytes a loader produced into a typed, source-located value tree for merging.
22///
23/// # Contract
24///
25/// - [`parse`](Parse::parse) returns one [`LocatedValue`] tree per payload. `source` carries the
26/// source kind (e.g. `"file"`), the resource path/identifier, and any loader options.
27/// - Every node in the tree — including the root — should carry a [`tanzim_value::Location`] that
28/// points back to the source, resource, and line/column, so downstream error messages can show
29/// users exactly where a bad value came from. Use [`tanzim_value::Location::at`] to build them.
30/// - [`supported_format_list`](Parse::supported_format_list) may return several extensions
31/// for one parser (e.g. `["yml", "yaml"]`). When a payload carries no format hint, selection
32/// instead falls back to probing — see Auto-detection below.
33///
34/// # Auto-detection
35///
36/// When a payload's `format` hint is `None`, the parse stage calls
37/// [`is_format_supported`][Parse::is_format_supported] on each registered
38/// parser in order. Return `Some(true)` if confident, `Some(false)` to skip, or `None`
39/// if unsure (another parser may then claim the bytes).
40///
41/// # Choosing an error
42///
43/// Failures are reported with [`tanzim_value::Error`]; every variant except `Parse` carries a
44/// [`Location`](tanzim_value::Location):
45///
46/// - [`Error::InvalidUtf8`] — the bytes aren't valid UTF-8.
47/// - [`Error::Parse`] — a syntax or structural error; set `location` when you can pinpoint it,
48/// otherwise `None`.
49/// - [`Error::UnsupportedType`] — a value of a type that has no configuration representation
50/// (e.g. a date-time).
51///
52/// # Registering
53///
54/// Pass an instance to `tanzim::Config::with_parser`. The pipeline picks a parser by the payload's
55/// format hint when present, otherwise it probes each parser with
56/// [`is_format_supported`](Parse::is_format_supported). For a one-off parser you don't want
57/// to define a type for, use [`closure::Closure`] instead of implementing this trait.
58///
59/// # Example — custom CSV parser
60///
61/// ```rust
62/// use tanzim_parse::{Parse, Source};
63/// use tanzim_source::SourceBuilder;
64/// use tanzim_value::{Error, LocatedValue, Location, Map, Value};
65///
66/// struct CsvParser;
67///
68/// impl Parse for CsvParser {
69/// fn name(&self) -> &str { "csv" }
70/// fn supported_format_list(&self) -> Vec<String> { vec!["csv".into()] }
71/// fn is_format_supported(&self, bytes: &[u8]) -> Option<bool> {
72/// Some(bytes.contains(&b','))
73/// }
74/// fn parse(&self, source: &Source, bytes: &[u8]) -> Result<LocatedValue, Error> {
75/// let source_name = source.source();
76/// let resource = source.resource();
77/// let text = match std::str::from_utf8(bytes) {
78/// Ok(value) => value,
79/// Err(_) => {
80/// return Err(Error::InvalidUtf8 {
81/// location: Box::new(Location::at(source_name, resource, None, None, None)),
82/// });
83/// }
84/// };
85/// let mut map = Map::new();
86/// for (line_idx, line) in text.lines().enumerate() {
87/// if let Some((key, val)) = line.split_once(',') {
88/// let loc = Location::at(source_name, resource, Some(line_idx + 1), None, None);
89/// map.insert(key.trim().to_string(), LocatedValue::new(
90/// Value::String(val.trim().to_string()),
91/// loc,
92/// ));
93/// }
94/// }
95/// let root_loc = Location::at(source_name, resource, None, None, None);
96/// Ok(LocatedValue::new(Value::Map(map), root_loc))
97/// }
98/// }
99///
100/// let source = SourceBuilder::new()
101/// .with_source("file")
102/// .with_resource("config.csv")
103/// .build()
104/// .unwrap();
105/// let value = CsvParser
106/// .parse(&source, b"host,127.0.0.1\nport,8080\n")
107/// .unwrap();
108///
109/// let map = value.value().as_map().unwrap();
110/// assert_eq!(map.get("host").unwrap().value().as_string().unwrap(), "127.0.0.1");
111/// assert_eq!(map.get("port").unwrap().value().as_string().unwrap(), "8080");
112/// // `port` is a string — this parser stores every field verbatim.
113/// ```
114pub trait Parse {
115 /// Human-readable name used in error messages.
116 fn name(&self) -> &str;
117 /// Format extensions this parser handles (e.g. `["json"]`, `["yml", "yaml"]`).
118 fn supported_format_list(&self) -> Vec<String>;
119 /// Probe `bytes` for auto-detection when `Payload::maybe_format` is `None`.
120 ///
121 /// Return `Some(true)` if confident, `Some(false)` if definitely not this format,
122 /// or `None` to abstain (another parser will be tried next).
123 fn is_format_supported(&self, bytes: &[u8]) -> Option<bool>;
124 /// Parse `bytes` into a [`LocatedValue`] tree.
125 ///
126 /// `source` carries the source kind (e.g. `"file"`), the resource path or
127 /// identifier, and any loader options. Use [`Source::source`], [`Source::resource`],
128 /// and [`Source::options`] to access them. Every node in the returned tree should
129 /// carry a [`tanzim_value::Location`] built from those values.
130 fn parse(&self, source: &Source, bytes: &[u8]) -> Result<LocatedValue, Error>;
131}