ini_roundtrip/
lib.rs

1/*!
2# Format preserving Ini streaming parser
3
4Simple INI parser with the following features:
5
6Features:
7* Format-preserving (you can write out again and get identical result)
8* Fast!
9* Streaming
10* `no_std` support
11
12Caveats:
13* The Display trait on [Item] does *not* preserve formatting, if this is
14  something you want, make sure to use the `raw` attributes to extract
15  the raw line instead.
16* Newlines are not saved. It is up to the caller to keep track of the
17  type of newline in use. Mixed newline (e.g. a mix of CR, CRLF and LF) is
18  supported on loading, but not on saving.
19
20## Examples
21
22```
23use ini_roundtrip as ini;
24
25let document = "\
26[SECTION]
27;this is a comment
28Key = Value  ";
29
30let elements = [
31    ini::Item::SectionEnd,
32    ini::Item::Section{name: "SECTION", raw: "[SECTION]"},
33    ini::Item::Comment{raw: ";this is a comment"},
34    ini::Item::Property{key: "Key", val: Some("Value"), raw: "Key = Value  "},
35    ini::Item::SectionEnd,
36];
37
38for (index, item) in ini::Parser::new(document).enumerate() {
39    assert_eq!(item, elements[index]);
40}
41```
42
43The `SectionEnd` pseudo-element is returned before a new section and at the end of the document.
44This helps processing sections after their properties finished parsing.
45
46The parser is very much line-based, it will continue no matter what and return nonsense as an item:
47
48```
49use ini_roundtrip as ini;
50
51let document = "\
52[SECTION
53nonsense";
54
55let elements = [
56    ini::Item::SectionEnd,
57    ini::Item::Error("[SECTION"),
58    ini::Item::Property{key: "nonsense", val: None, raw: "nonsense"},
59    ini::Item::SectionEnd,
60];
61
62for (index, item) in ini::Parser::new(document).enumerate() {
63    assert_eq!(item, elements[index]);
64}
65```
66
67Lines starting with `[` but contain either no closing `]` or a closing `]` not followed by a newline are returned as [`Item::Error`].
68Lines missing a `=` are returned as [`Item::Property`] with `None` value. See below for more details.
69
70Format
71------
72
73INI is not a well specified format, this parser tries to make as little assumptions as possible, but it does make decisions.
74
75* Newline is either `"\r\n"`, `"\n"` or `"\r"`. It can be mixed in a single document but this is not recommended.
76* Section header is `"[" section "]" newline`. `section` can be anything except contain newlines.
77* Property is `key "=" value newline`. `key` and `value` can be anything except contain newlines.
78* Comment is the raw line for lines starting with `;` or `#`
79* Blank is just `newline`.
80
81Padding whitespace is always trimmed, but the raw line is always stored as well.
82
83No further processing of the input is done, e.g. if escape sequences are necessary they must be processed by the caller.
84*/
85
86#![no_std]
87
88use core::fmt;
89use core::str;
90
91/// SAFETY: All the routines here work only with and slice only at ascii
92/// characters, and the user provided input is a &str. This means this crate
93/// cannot create invalid UTF-8 strings out of thin air and conversion between
94/// `&str` and `&[u8]` is a noop even when slicing
95#[inline]
96fn from_utf8(v: &[u8]) -> &str {
97    #[cfg(not(debug_assertions))]
98    return unsafe { str::from_utf8_unchecked(v) };
99    #[cfg(debug_assertions)]
100    return str::from_utf8(v).expect("Impossible: Non-UTF8");
101}
102
103/// Trims ascii whitespace from the start and end of the string slice.
104fn trim(s: &str) -> &str {
105    s.trim_matches(|chr: char| chr.is_ascii_whitespace())
106}
107
108/// A parsed element of syntatic meaning
109#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
110pub enum Item<'a> {
111    /// Syntax error.
112    ///
113    /// Section header element was malformed.
114    /// Malformed section headers are defined by a line starting with `[` but
115    /// not ending with `]`.
116    ///
117    /// ```
118    /// assert_eq!(
119    ///     ini_roundtrip::Parser::new("[Error").nth(1),
120    ///     Some(ini_roundtrip::Item::Error("[Error")));
121    /// ```
122    Error(&'a str),
123
124    /// Section header element.
125    ///
126    /// ```
127    /// assert_eq!(
128    ///     ini_roundtrip::Parser::new("[Section]").nth(1),
129    ///     Some(ini_roundtrip::Item::Section{name: "Section", raw: "[Section]"}));
130    /// ```
131    Section {
132        /// Trimmed name of the section
133        name: &'a str,
134        /// Raw line
135        raw: &'a str,
136    },
137
138    /// End of section.
139    ///
140    /// Pseudo-element emitted before a [`Section`](Item::Section) and at the
141    /// end of the document. This helps processing sections after their
142    /// properties finished parsing.
143    ///
144    /// ```
145    /// assert_eq!(
146    ///     ini_roundtrip::Parser::new("").next(),
147    ///     Some(ini_roundtrip::Item::SectionEnd));
148    /// ```
149    SectionEnd,
150
151    /// Property element.
152    ///
153    /// Key value must not contain `=`.
154    ///
155    /// The value is `None` if there is no `=`.
156    ///
157    /// ```
158    /// assert_eq!(
159    ///     ini_roundtrip::Parser::new("Key=Value").next(),
160    ///     Some(ini_roundtrip::Item::Property{key: "Key", val: Some("Value"), raw: "Key=Value"}));
161    /// assert_eq!(
162    ///     ini_roundtrip::Parser::new("Key").next(),
163    ///     Some(ini_roundtrip::Item::Property{key: "Key", val: None, raw: "Key"}));
164    /// ```
165    Property {
166        /// Trimmed key
167        key: &'a str,
168        /// Trimmed value (if any)
169        val: Option<&'a str>,
170        /// Raw line
171        raw: &'a str,
172    },
173
174    /// Comment.
175    ///
176    /// ```
177    /// assert_eq!(
178    ///     ini_roundtrip::Parser::new(";comment").next(),
179    ///     Some(ini_roundtrip::Item::Comment{raw: ";comment"}));
180    /// ```
181    Comment {
182        /// Raw line
183        raw: &'a str,
184    },
185
186    /// Blank line.
187    ///
188    /// Allows faithful reproduction of the whole ini document including blank
189    /// lines.
190    ///
191    /// ```
192    /// assert_eq!(
193    ///     ini_roundtrip::Parser::new("\n").next(),
194    ///     Some(ini_roundtrip::Item::Blank{raw: ""}));
195    /// ```
196    Blank {
197        /// Raw line
198        raw: &'a str,
199    },
200}
201
202impl fmt::Display for Item<'_> {
203    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
204        match *self {
205            Item::Error(error) => writeln!(f, "{error}"),
206            Item::Section { name, raw: _ } => writeln!(f, "[{name}]"),
207            Item::SectionEnd => Ok(()),
208            Item::Property {
209                key,
210                val: Some(value),
211                raw: _,
212            } => writeln!(f, "{key}={value}"),
213            Item::Property {
214                key,
215                val: None,
216                raw: _,
217            } => writeln!(f, "{key}"),
218            Item::Comment { raw: comment } => writeln!(f, ";{comment}"),
219            Item::Blank { raw: _ } => f.write_str("\n"),
220        }
221    }
222}
223
224/// Ini streaming parser.
225///
226/// The whole document must be available before parsing starts.
227/// The parser then returns each element as it is being parsed.
228///
229/// See [`crate`] documentation for more information.
230#[derive(Clone, Debug)]
231pub struct Parser<'a> {
232    line: u32,
233    section_ended: bool,
234    state: &'a [u8],
235}
236
237impl<'a> Parser<'a> {
238    /// Constructs a new `Parser` instance.
239    #[inline]
240    #[must_use]
241    pub const fn new(s: &'a str) -> Self {
242        let state = s.as_bytes();
243        Parser {
244            line: 0,
245            section_ended: false,
246            state,
247        }
248    }
249
250    /// Returns the line number the parser is currently at.
251    #[inline]
252    #[must_use]
253    pub const fn line(&self) -> u32 {
254        self.line
255    }
256
257    /// Returns the remainder of the input string.
258    #[inline]
259    #[must_use]
260    pub fn remainder(&self) -> &'a str {
261        from_utf8(self.state)
262    }
263
264    #[inline]
265    fn skip_ln(&mut self, mut s: &'a [u8]) {
266        if !s.is_empty() {
267            if s[0] == b'\r' {
268                s = &s[1..];
269            }
270            if !s.is_empty() && s[0] == b'\n' {
271                s = &s[1..];
272            }
273            self.line += 1;
274        }
275        self.state = s;
276    }
277
278    fn get_line_and_advance(&mut self, s: &'a [u8]) -> &'a str {
279        let i = parse::find_nl(s);
280        let line = from_utf8(&s[..i]);
281        self.skip_ln(&s[i..]);
282        line
283    }
284}
285
286impl<'a> Iterator for Parser<'a> {
287    type Item = Item<'a>;
288
289    fn next(&mut self) -> Option<Item<'a>> {
290        let s = self.state;
291
292        match s.first().copied() {
293            // Terminal case
294            None => {
295                if self.section_ended {
296                    None
297                } else {
298                    self.section_ended = true;
299                    Some(Item::SectionEnd)
300                }
301            }
302            // Blank
303            Some(b'\r' | b'\n') => {
304                let line = self.get_line_and_advance(s);
305                Some(Item::Blank { raw: line })
306            }
307            // Comment
308            Some(b';' | b'#') => {
309                let line = self.get_line_and_advance(s);
310                Some(Item::Comment { raw: line })
311            }
312            // Section
313            Some(b'[') => {
314                if self.section_ended {
315                    self.section_ended = false;
316                    let i = parse::find_nl(s);
317                    if s[i - 1] != b']' {
318                        let error = from_utf8(&s[..i]);
319                        self.skip_ln(&s[i..]);
320                        return Some(Item::Error(error));
321                    }
322                    let section = from_utf8(&s[1..i - 1]);
323                    let section = trim(section);
324                    self.skip_ln(&s[i..]);
325                    Some(Item::Section {
326                        name: section,
327                        raw: from_utf8(&s[..i]),
328                    })
329                } else {
330                    self.section_ended = true;
331                    Some(Item::SectionEnd)
332                }
333            }
334            // Property
335            _ => {
336                let eol_or_eq = parse::find_nl_chr(s, b'=');
337                let key = from_utf8(&s[..eol_or_eq]);
338                let key = trim(key);
339                if s.get(eol_or_eq) != Some(&b'=') {
340                    // Key only case
341                    self.skip_ln(&s[eol_or_eq..]);
342                    if key.is_empty() {
343                        return Some(Item::Blank {
344                            raw: from_utf8(&s[..eol_or_eq]),
345                        });
346                    }
347                    Some(Item::Property {
348                        key,
349                        val: None,
350                        raw: from_utf8(&s[..eol_or_eq]),
351                    })
352                } else {
353                    // Key + value case
354                    let val_start = &s[eol_or_eq + 1..];
355
356                    let i = parse::find_nl(val_start);
357                    let value = from_utf8(&val_start[..i]);
358                    let value = trim(value);
359
360                    self.skip_ln(&val_start[i..]);
361
362                    Some(Item::Property {
363                        key,
364                        val: Some(value),
365                        raw: from_utf8(&s[..eol_or_eq + i + 1]),
366                    })
367                }
368            }
369        }
370    }
371}
372
373impl core::iter::FusedIterator for Parser<'_> {}
374
375mod parse;
376#[cfg(test)]
377mod tests;