cni_format/
lib.rs

1#![forbid(unsafe_code)]
2#![warn(
3    invalid_html_tags,
4    keyword_idents,
5    missing_docs,
6    non_ascii_idents,
7    trivial_casts,
8    trivial_numeric_casts,
9    unused_crate_dependencies,
10    unused_extern_crates,
11    unused_import_braces,
12    clippy::cargo,
13    clippy::pedantic
14)]
15//! This is a parser library for the
16//! [CNI configuration format (**C**o**N**figuration **I**nitialization format)][CNI]
17//! by libuconf.
18//! # CNI standard compliance
19//! The implementation is fully compliant with the `core` and
20//! `ini` part of the specification and with the extension `more-keys`.
21//!
22//! [CNI]: https://github.com/libuconf/cni/
23//!
24//! # Examples
25//! ```
26//! use std::collections::HashMap;
27//!
28//! let cni = r"
29//! [section]
30//! key = value
31//! rkey = `raw value with `` escaped`
32//! subsection.key = look, whitespace!
33//! ";
34//!
35//! let parsed = cni_format::from_str(&cni).expect("could not parse CNI");
36//!
37//! // You can get everything, section names will be prepended to key names.
38//! {
39//!     let mut result: HashMap<String, String> = HashMap::new();
40//!     result.insert("section.key".to_string(), "value".to_string());
41//!     result.insert("section.rkey".to_string(), "raw value with ` escaped".to_string());
42//!     result.insert("section.subsection.key".to_string(), "look, whitespace!".to_string());
43//!
44//!     assert_eq!(parsed, result);
45//! }
46//!
47//! // You can get values from one section only.
48//! # #[cfg(feature = "api")]
49//! {
50//!     let mut section: HashMap<String, String> = HashMap::new();
51//!     section.insert("key".to_string(), "value".to_string());
52//!     section.insert("rkey".to_string(), "raw value with ` escaped".to_string());
53//!     section.insert("subsection.key".to_string(), "look, whitespace!".to_string());
54//!
55//!     // use trait that adds CNI related functionality
56//!     use cni_format::CniExt;
57//!
58//!     // filter out values in section "section"
59//!     assert_eq!(parsed.sub_tree("section"), section);
60//! }
61//!
62//! // You can get child nodes from one section only, excluding subsections.
63//! # #[cfg(feature = "api")]
64//! {
65//!     let mut section: HashMap<String, String> = HashMap::new();
66//!     section.insert("key".to_string(), "value".to_string());
67//!     section.insert("rkey".to_string(), "raw value with ` escaped".to_string());
68//!
69//!     // use trait that adds CNI related functionality
70//!     use cni_format::CniExt;
71//!
72//!     // filter out values in section "section", but not in subsections
73//!     assert_eq!(parsed.sub_leaves("section"), section);
74//! }
75//! ```
76
77use std::collections::HashMap;
78use std::str::Chars;
79
80#[cfg(test)]
81mod tests;
82
83#[cfg(any(feature = "api", test, doctest, doc))]
84mod api;
85#[cfg(any(feature = "api", test, doctest, doc))]
86pub use api::{CniExt, SectionFilter};
87
88#[cfg(any(feature = "serializer", test, doctest, doc))]
89mod serializer;
90#[cfg(any(feature = "serializer", test, doctest, doc))]
91pub use serializer::to_str;
92
93/// A struct to pass parsing options. Contains the switches to enable
94/// the different extensions.
95#[derive(Default, Clone, Copy)]
96pub struct Opts {
97    /// Whether the ini compatibility is used. Default: false
98    ///
99    /// This allows semicolons to be used to start comments.
100    pub ini: bool,
101    /// Whether the `more-keys` extension is used. Default: false
102    ///
103    /// This allows a wider range of characters in keys and section headings.
104    pub more_keys: bool,
105}
106
107mod iter;
108
109/// implements Perl's / Raku's "\v", i.e. vertical white space
110fn is_vertical_ws(c: char) -> bool {
111    matches!(
112        c,
113        '\n' | '\u{B}' | '\u{C}' | '\r' | '\u{85}' | '\u{2028}' | '\u{2029}'
114    )
115}
116
117fn is_comment(c: char, opts: Opts) -> bool {
118    c == '#' || (opts.ini && c == ';')
119}
120
121fn is_key(c: char, opts: Opts) -> bool {
122    if opts.more_keys {
123        !matches!(c, '[' | ']' | '=' | '`') && !is_comment(c, opts) && !c.is_whitespace()
124    } else {
125        matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '-' | '_' | '.')
126    }
127}
128
129/// An iterator that visits all key/value pairs in declaration order, even
130/// key/value pairs that will be overwritten by later statements.
131///
132/// Calling `next` on this iterator after receiving a `Some(Err(_))` causes
133/// undefined behaviour.
134///
135/// If you just want to access the resulting key/value store, take a look at
136/// [`from_str`].
137pub struct CniParser<I: Iterator<Item = char>> {
138    /// The iterator stores the current position.
139    iter: iter::Iter<I>,
140    /// The current section name.
141    section: String,
142    /// The selected parsing options.
143    opts: Opts,
144}
145
146impl<I: Iterator<Item = char>> CniParser<I> {
147    /// Creates a new `CniParser` that will parse the given CNI format text.
148    /// The parsing options are set to the defaults.
149    #[must_use = "iterators are lazy and do nothing unless consumed"]
150    pub fn new(iter: I) -> Self {
151        Self {
152            iter: iter::Iter::new(iter),
153            section: String::new(),
154            opts: Opts::default(),
155        }
156    }
157
158    /// Creates a new `CniParser` that will parse the given CNI format text
159    /// with the given parsing options.
160    #[must_use = "iterators are lazy and do nothing unless consumed"]
161    pub fn new_opts(iter: I, opts: Opts) -> Self {
162        Self {
163            iter: iter::Iter::new(iter),
164            section: String::new(),
165            opts,
166        }
167    }
168
169    /// Skips whitespace.
170    fn skip_ws(&mut self) {
171        while matches!(
172            self.iter.peek(),
173            Some(c) if c.is_whitespace()
174        ) {
175            self.iter.next();
176        }
177    }
178
179    fn skip_comment(&mut self) {
180        // skip any whitespace
181        self.skip_ws();
182        // if we arrive at a comment symbol now, skip the comment after it
183        // otherwise do not because we might have also skipped over line ends
184        if matches!(
185            self.iter.peek(),
186            Some(&c) if is_comment(c, self.opts)
187        ) {
188            // continue until next vertical whitespace or EOF
189            while matches!(self.iter.next(), Some(c) if !is_vertical_ws(c)) {}
190        }
191    }
192
193    fn parse_key(&mut self) -> Result<String, &'static str> {
194        let mut key = String::new();
195
196        while matches!(self.iter.peek(), Some(&c) if is_key(c, self.opts)) {
197            key.push(self.iter.next().unwrap());
198        }
199
200        if key.starts_with('.') || key.ends_with('.') {
201            // key cannot start or end with a dot
202            Err("invalid key, can not start or end with a dot")
203        } else {
204            Ok(key)
205        }
206    }
207
208    fn parse_value(&mut self) -> Result<String, String> {
209        // since raw values might have escaped backtics, they have to
210        // be constructed as Strings and cannot be a reference.
211        let mut value = String::new();
212
213        if let Some('`') = self.iter.peek() {
214            // raw value, save starting line and column for potential diagnostics
215            let (line, col) = (self.iter.line, self.iter.col);
216
217            self.iter.next(); // consume backtick
218            loop {
219                if let Some('`') = self.iter.peek() {
220                    // check if this is an escaped backtick
221                    self.iter.next();
222                    if let Some('`') = self.iter.peek() {
223                        // escaped backtick
224                        self.iter.next();
225                        value.push('`');
226                    } else {
227                        // end of the value
228                        break;
229                    }
230                } else if let Some(c) = self.iter.next() {
231                    value.push(c);
232                } else {
233                    // current value must have been a None
234                    return Err(format!("line {}:{}: unterminated raw value", line, col));
235                }
236            }
237        } else {
238            // normal value: no comment starting character but white space, but not vertical space
239            while matches!(self.iter.peek(), Some(&c) if !is_comment(c, self.opts) && !( c.is_whitespace() && is_vertical_ws(c) ))
240            {
241                value.push(self.iter.next().unwrap());
242            }
243            // leading or trailing whitespace cannot be part of the value
244            value = value.trim().to_string();
245        }
246
247        Ok(value)
248    }
249}
250
251impl<'a> From<&'a str> for CniParser<Chars<'a>> {
252    /// Create a `CniParser` from a string slice.
253    #[must_use = "iterators are lazy and do nothing unless consumed"]
254    fn from(text: &'a str) -> Self {
255        Self::new(text.chars())
256    }
257}
258
259impl<I: Iterator<Item = char>> Iterator for CniParser<I> {
260    type Item = Result<(String, String), String>;
261
262    /// Try to parse until the next key/value pair.
263    fn next(&mut self) -> Option<Self::Item> {
264        loop {
265            self.skip_ws();
266            // we should be at start of a line now
267            let c = *self.iter.peek()?;
268            if is_vertical_ws(c) {
269                // empty line
270                self.iter.next();
271                continue;
272            } else if is_comment(c, self.opts) {
273                self.skip_comment();
274            } else if c == '[' {
275                // section heading
276                self.iter.next(); // consume [
277
278                let (line, col) = (self.iter.line, self.iter.col);
279                self.skip_ws();
280
281                // better error message before we store the new line and column.
282                if self.iter.peek().is_none() {
283                    return Some(Err(format!("line {}:{}: expected \"]\"", line, col)));
284                }
285
286                // this key can be empty
287                match self.parse_key() {
288                    Ok(key) => self.section = key.to_string(),
289                    Err(e) => return Some(Err(format!("line {}:{}: {}", line, col, e))),
290                };
291
292                let (line, col) = (self.iter.line, self.iter.col);
293                self.skip_ws();
294
295                if self.iter.next().map_or(true, |c| c != ']') {
296                    return Some(Err(format!("line {}:{}: expected \"]\"", line, col)));
297                }
298                self.skip_comment();
299            } else {
300                // this should be a key/value pair
301
302                // parse key, prepend it with section name if present
303                let key = match self.parse_key() {
304                    // this key cannot be empty
305                    Ok(key) if key.is_empty() => {
306                        return Some(Err(format!(
307                            "line {}:{}: expected key",
308                            self.iter.line, self.iter.col
309                        )))
310                    }
311                    // do not prepend an empty section
312                    Ok(key) if self.section.is_empty() => key,
313                    Ok(key) => format!("{}.{}", self.section, key),
314                    Err(e) => {
315                        return Some(Err(format!(
316                            "line {}:{}: {}",
317                            self.iter.line, self.iter.col, e
318                        )))
319                    }
320                };
321
322                let (line, col) = (self.iter.line, self.iter.col);
323                self.skip_ws();
324
325                if self.iter.next().map_or(true, |c| c != '=') {
326                    return Some(Err(format!("line {}:{}: expected \"=\"", line, col)));
327                }
328
329                self.skip_ws();
330
331                let value = match self.parse_value() {
332                    Ok(key) => key,
333                    Err(e) => return Some(Err(e)),
334                };
335
336                self.skip_comment();
337
338                break Some(Ok((key, value)));
339            }
340        }
341    }
342}
343
344/// Parses CNI format text and returns the resulting key/value store.
345/// The [parsing options][Opts] are set to the default values.
346///
347/// This just constructs a [`CniParser`] and collects it.
348///
349/// For more information see the [crate level documentation](index.html).
350///
351/// # Errors
352/// Returns an `Err` if the given text is not in a valid CNI format. The `Err`
353/// will contain a message explaining the error.
354pub fn from_str(text: &str) -> Result<HashMap<String, String>, String> {
355    CniParser::from(text).collect()
356}
357
358/// Parses CNI format text and returns the resulting key/value store,
359/// using the specified options.
360///
361/// This just constructs a [`CniParser`] and collects it.
362///
363/// For more information see the [crate level documentation](index.html).
364///
365/// # Errors
366/// Returns an `Err` if the given text is not in a valid CNI format. The `Err`
367/// will contain a message explaining the error.
368pub fn from_str_opts(text: &str, opts: Opts) -> Result<HashMap<String, String>, String> {
369    CniParser::new_opts(text.chars(), opts).collect()
370}