ini_roundtrip/lib.rs
1/*!
2# Format preserving Ini streaming parser
3
4Simple INI parser with the following features:
5
6Features:
7* Format-preserving (you can write out again and get identical result)
8* Fast!
9* Streaming
10* `no_std` support
11
12Caveats:
13* The Display trait on [Item] does *not* preserve formatting, if this is
14 something you want, make sure to use the `raw` attributes to extract
15 the raw line instead.
16* Newlines are not saved. It is up to the caller to keep track of the
17 type of newline in use. Mixed newline (e.g. a mix of CR, CRLF and LF) is
18 supported on loading, but not on saving.
19
20## Examples
21
22```
23use ini_roundtrip as ini;
24
25let document = "\
26[SECTION]
27;this is a comment
28Key = Value ";
29
30let elements = [
31 ini::Item::SectionEnd,
32 ini::Item::Section{name: "SECTION", raw: "[SECTION]"},
33 ini::Item::Comment{raw: ";this is a comment"},
34 ini::Item::Property{key: "Key", val: Some("Value"), raw: "Key = Value "},
35 ini::Item::SectionEnd,
36];
37
38for (index, item) in ini::Parser::new(document).enumerate() {
39 assert_eq!(item, elements[index]);
40}
41```
42
43The `SectionEnd` pseudo-element is returned before a new section and at the end of the document.
44This helps processing sections after their properties finished parsing.
45
46The parser is very much line-based, it will continue no matter what and return nonsense as an item:
47
48```
49use ini_roundtrip as ini;
50
51let document = "\
52[SECTION
53nonsense";
54
55let elements = [
56 ini::Item::SectionEnd,
57 ini::Item::Error("[SECTION"),
58 ini::Item::Property{key: "nonsense", val: None, raw: "nonsense"},
59 ini::Item::SectionEnd,
60];
61
62for (index, item) in ini::Parser::new(document).enumerate() {
63 assert_eq!(item, elements[index]);
64}
65```
66
67Lines starting with `[` but contain either no closing `]` or a closing `]` not followed by a newline are returned as [`Item::Error`].
68Lines missing a `=` are returned as [`Item::Property`] with `None` value. See below for more details.
69
70Format
71------
72
73INI is not a well specified format, this parser tries to make as little assumptions as possible, but it does make decisions.
74
75* Newline is either `"\r\n"`, `"\n"` or `"\r"`. It can be mixed in a single document but this is not recommended.
76* Section header is `"[" section "]" newline`. `section` can be anything except contain newlines.
77* Property is `key "=" value newline`. `key` and `value` can be anything except contain newlines.
78* Comment is the raw line for lines starting with `;` or `#`
79* Blank is just `newline`.
80
81Padding whitespace is always trimmed, but the raw line is always stored as well.
82
83No further processing of the input is done, e.g. if escape sequences are necessary they must be processed by the caller.
84*/
85
86#![no_std]
87
88use core::fmt;
89use core::str;
90
91/// SAFETY: All the routines here work only with and slice only at ascii
92/// characters, and the user provided input is a &str. This means this crate
93/// cannot create invalid UTF-8 strings out of thin air and conversion between
94/// `&str` and `&[u8]` is a noop even when slicing
95#[inline]
96fn from_utf8(v: &[u8]) -> &str {
97 #[cfg(not(debug_assertions))]
98 return unsafe { str::from_utf8_unchecked(v) };
99 #[cfg(debug_assertions)]
100 return str::from_utf8(v).expect("Impossible: Non-UTF8");
101}
102
103/// Trims ascii whitespace from the start and end of the string slice.
104fn trim(s: &str) -> &str {
105 s.trim_matches(|chr: char| chr.is_ascii_whitespace())
106}
107
108/// A parsed element of syntatic meaning
109#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
110pub enum Item<'a> {
111 /// Syntax error.
112 ///
113 /// Section header element was malformed.
114 /// Malformed section headers are defined by a line starting with `[` but
115 /// not ending with `]`.
116 ///
117 /// ```
118 /// assert_eq!(
119 /// ini_roundtrip::Parser::new("[Error").nth(1),
120 /// Some(ini_roundtrip::Item::Error("[Error")));
121 /// ```
122 Error(&'a str),
123
124 /// Section header element.
125 ///
126 /// ```
127 /// assert_eq!(
128 /// ini_roundtrip::Parser::new("[Section]").nth(1),
129 /// Some(ini_roundtrip::Item::Section{name: "Section", raw: "[Section]"}));
130 /// ```
131 Section {
132 /// Trimmed name of the section
133 name: &'a str,
134 /// Raw line
135 raw: &'a str,
136 },
137
138 /// End of section.
139 ///
140 /// Pseudo-element emitted before a [`Section`](Item::Section) and at the
141 /// end of the document. This helps processing sections after their
142 /// properties finished parsing.
143 ///
144 /// ```
145 /// assert_eq!(
146 /// ini_roundtrip::Parser::new("").next(),
147 /// Some(ini_roundtrip::Item::SectionEnd));
148 /// ```
149 SectionEnd,
150
151 /// Property element.
152 ///
153 /// Key value must not contain `=`.
154 ///
155 /// The value is `None` if there is no `=`.
156 ///
157 /// ```
158 /// assert_eq!(
159 /// ini_roundtrip::Parser::new("Key=Value").next(),
160 /// Some(ini_roundtrip::Item::Property{key: "Key", val: Some("Value"), raw: "Key=Value"}));
161 /// assert_eq!(
162 /// ini_roundtrip::Parser::new("Key").next(),
163 /// Some(ini_roundtrip::Item::Property{key: "Key", val: None, raw: "Key"}));
164 /// ```
165 Property {
166 /// Trimmed key
167 key: &'a str,
168 /// Trimmed value (if any)
169 val: Option<&'a str>,
170 /// Raw line
171 raw: &'a str,
172 },
173
174 /// Comment.
175 ///
176 /// ```
177 /// assert_eq!(
178 /// ini_roundtrip::Parser::new(";comment").next(),
179 /// Some(ini_roundtrip::Item::Comment{raw: ";comment"}));
180 /// ```
181 Comment {
182 /// Raw line
183 raw: &'a str,
184 },
185
186 /// Blank line.
187 ///
188 /// Allows faithful reproduction of the whole ini document including blank
189 /// lines.
190 ///
191 /// ```
192 /// assert_eq!(
193 /// ini_roundtrip::Parser::new("\n").next(),
194 /// Some(ini_roundtrip::Item::Blank{raw: ""}));
195 /// ```
196 Blank {
197 /// Raw line
198 raw: &'a str,
199 },
200}
201
202impl fmt::Display for Item<'_> {
203 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
204 match *self {
205 Item::Error(error) => writeln!(f, "{error}"),
206 Item::Section { name, raw: _ } => writeln!(f, "[{name}]"),
207 Item::SectionEnd => Ok(()),
208 Item::Property {
209 key,
210 val: Some(value),
211 raw: _,
212 } => writeln!(f, "{key}={value}"),
213 Item::Property {
214 key,
215 val: None,
216 raw: _,
217 } => writeln!(f, "{key}"),
218 Item::Comment { raw: comment } => writeln!(f, ";{comment}"),
219 Item::Blank { raw: _ } => f.write_str("\n"),
220 }
221 }
222}
223
224/// Ini streaming parser.
225///
226/// The whole document must be available before parsing starts.
227/// The parser then returns each element as it is being parsed.
228///
229/// See [`crate`] documentation for more information.
230#[derive(Clone, Debug)]
231pub struct Parser<'a> {
232 line: u32,
233 section_ended: bool,
234 state: &'a [u8],
235}
236
237impl<'a> Parser<'a> {
238 /// Constructs a new `Parser` instance.
239 #[inline]
240 #[must_use]
241 pub const fn new(s: &'a str) -> Self {
242 let state = s.as_bytes();
243 Parser {
244 line: 0,
245 section_ended: false,
246 state,
247 }
248 }
249
250 /// Returns the line number the parser is currently at.
251 #[inline]
252 #[must_use]
253 pub const fn line(&self) -> u32 {
254 self.line
255 }
256
257 /// Returns the remainder of the input string.
258 #[inline]
259 #[must_use]
260 pub fn remainder(&self) -> &'a str {
261 from_utf8(self.state)
262 }
263
264 #[inline]
265 fn skip_ln(&mut self, mut s: &'a [u8]) {
266 if !s.is_empty() {
267 if s[0] == b'\r' {
268 s = &s[1..];
269 }
270 if !s.is_empty() && s[0] == b'\n' {
271 s = &s[1..];
272 }
273 self.line += 1;
274 }
275 self.state = s;
276 }
277
278 fn get_line_and_advance(&mut self, s: &'a [u8]) -> &'a str {
279 let i = parse::find_nl(s);
280 let line = from_utf8(&s[..i]);
281 self.skip_ln(&s[i..]);
282 line
283 }
284}
285
286impl<'a> Iterator for Parser<'a> {
287 type Item = Item<'a>;
288
289 fn next(&mut self) -> Option<Item<'a>> {
290 let s = self.state;
291
292 match s.first().copied() {
293 // Terminal case
294 None => {
295 if self.section_ended {
296 None
297 } else {
298 self.section_ended = true;
299 Some(Item::SectionEnd)
300 }
301 }
302 // Blank
303 Some(b'\r' | b'\n') => {
304 let line = self.get_line_and_advance(s);
305 Some(Item::Blank { raw: line })
306 }
307 // Comment
308 Some(b';' | b'#') => {
309 let line = self.get_line_and_advance(s);
310 Some(Item::Comment { raw: line })
311 }
312 // Section
313 Some(b'[') => {
314 if self.section_ended {
315 self.section_ended = false;
316 let i = parse::find_nl(s);
317 if s[i - 1] != b']' {
318 let error = from_utf8(&s[..i]);
319 self.skip_ln(&s[i..]);
320 return Some(Item::Error(error));
321 }
322 let section = from_utf8(&s[1..i - 1]);
323 let section = trim(section);
324 self.skip_ln(&s[i..]);
325 Some(Item::Section {
326 name: section,
327 raw: from_utf8(&s[..i]),
328 })
329 } else {
330 self.section_ended = true;
331 Some(Item::SectionEnd)
332 }
333 }
334 // Property
335 _ => {
336 let eol_or_eq = parse::find_nl_chr(s, b'=');
337 let key = from_utf8(&s[..eol_or_eq]);
338 let key = trim(key);
339 if s.get(eol_or_eq) != Some(&b'=') {
340 // Key only case
341 self.skip_ln(&s[eol_or_eq..]);
342 if key.is_empty() {
343 return Some(Item::Blank {
344 raw: from_utf8(&s[..eol_or_eq]),
345 });
346 }
347 Some(Item::Property {
348 key,
349 val: None,
350 raw: from_utf8(&s[..eol_or_eq]),
351 })
352 } else {
353 // Key + value case
354 let val_start = &s[eol_or_eq + 1..];
355
356 let i = parse::find_nl(val_start);
357 let value = from_utf8(&val_start[..i]);
358 let value = trim(value);
359
360 self.skip_ln(&val_start[i..]);
361
362 Some(Item::Property {
363 key,
364 val: Some(value),
365 raw: from_utf8(&s[..eol_or_eq + i + 1]),
366 })
367 }
368 }
369 }
370 }
371}
372
373impl core::iter::FusedIterator for Parser<'_> {}
374
375mod parse;
376#[cfg(test)]
377mod tests;