str_reader/
lib.rs

1//! # String reader
2//!
3//! Zero-allocation string reader. The string reader can be used to parse
4//! all kinds of values from strings. It can be used for construction of
5//! traditional lexical analyzers for example. It is useful in situation when
6//! you need to parse simple formatted strings but regular expressions are too
7//! heavy-weight.
8//!
9//! # Example
10//!
11//! Parsing HTTP response header:
12//!
13//! ```
14//! use std::num::ParseIntError;
15//!
16//! use str_reader::{ParseError, StringReader};
17//!
18//! /// Parse the first line of an HTTP response header.
19//! fn parse_http_response_line(line: &str) -> Result<(u16, &str), HttpParseError> {
20//!     let mut reader = StringReader::new(line);
21//!
22//!     reader.match_str("HTTP/")?;
23//!
24//!     match reader.read_word() {
25//!         "1.0" => (),
26//!         "1.1" => (),
27//!         _ => return Err(HttpParseError),
28//!     }
29//!
30//!     let status_code = reader.read_u16()?;
31//!
32//!     Ok((status_code, reader.as_str().trim()))
33//! }
34//!
35//! #[derive(Debug)]
36//! struct HttpParseError;
37//!
38//! impl From<ParseError> for HttpParseError {
39//!     fn from(_: ParseError) -> Self {
40//!         Self
41//!     }
42//! }
43//!
44//! impl From<ParseIntError> for HttpParseError {
45//!     fn from(_: ParseIntError) -> Self {
46//!         Self
47//!     }
48//! }
49//!
50//! let (status_code, status_msg) = parse_http_response_line("HTTP/1.1 404 Not Found").unwrap();
51//!
52//! assert_eq!(status_code, 404);
53//! assert_eq!(status_msg, "Not Found");
54//! ```
55
56use std::{
57    error::Error,
58    fmt::{self, Display, Formatter},
59    num::{ParseFloatError, ParseIntError},
60    result,
61    str::{Chars, FromStr},
62};
63
64/// String reader error.
65#[derive(Debug, Copy, Clone)]
66pub enum ParseError {
67    EmptyInput,
68    NoMatch,
69}
70
71impl Display for ParseError {
72    fn fmt(&self, f: &mut Formatter) -> result::Result<(), fmt::Error> {
73        let msg = match *self {
74            Self::EmptyInput => "input is empty",
75            Self::NoMatch => "the input does not match",
76        };
77
78        f.write_str(msg)
79    }
80}
81
82impl Error for ParseError {}
83
84/// String reader.
85pub struct StringReader<'a> {
86    input: Chars<'a>,
87    current: Option<char>,
88}
89
90impl<'a> StringReader<'a> {
91    /// Create a new reader for a given input.
92    ///
93    /// # Arguments
94    ///
95    /// * `input` - input string or an object that can be referenced as a
96    ///   string
97    pub fn new<T>(input: &'a T) -> Self
98    where
99        T: AsRef<str> + ?Sized,
100    {
101        let input = input.as_ref().chars();
102
103        // We do not want to advance the input just yet. If we did that the
104        // string matching methods would not work.
105        let current = input.clone().next();
106
107        Self { input, current }
108    }
109
110    /// Get the current character (if any) without advancing the input.
111    #[inline]
112    pub fn current_char(&self) -> Option<char> {
113        self.current
114    }
115
116    /// Get the next character or return an error if the input is empty.
117    pub fn read_char(&mut self) -> Result<char, ParseError> {
118        let res = self.input.next().ok_or(ParseError::EmptyInput)?;
119
120        // Peek for the next character without advancing the input.
121        self.current = self.input.clone().next();
122
123        Ok(res)
124    }
125
126    /// Match a given character to the input and, if successful, advance the
127    /// input by exactly one character. An error is returned if the input
128    /// character does not match with the given one or if the input is empty.
129    ///
130    /// # Arguments
131    ///
132    /// * `expected` - expected character
133    pub fn match_char(&mut self, expected: char) -> Result<(), ParseError> {
134        let c = self.current_char().ok_or(ParseError::EmptyInput)?;
135
136        if c != expected {
137            return Err(ParseError::NoMatch);
138        }
139
140        self.skip_char();
141
142        Ok(())
143    }
144
145    /// Skip one character.
146    pub fn skip_char(&mut self) {
147        // Remove the current character.
148        self.input.next();
149
150        // Peek for the next character without advancing the input.
151        self.current = self.input.clone().next();
152    }
153
154    /// Skip all whitespace characters.
155    pub fn skip_whitespace(&mut self) {
156        let rest = self.input.as_str().trim_start();
157
158        self.input = rest.chars();
159
160        // Peek for the next character without advancing the input.
161        self.current = self.input.clone().next();
162    }
163
164    /// Match a given string to the input and, if successful, advance the input
165    /// by the length of the given string. An error is returned if the input
166    /// does not start with the given string.
167    ///
168    /// # Arguments
169    ///
170    /// * `val` - expected string
171    pub fn match_str(&mut self, val: &str) -> Result<(), ParseError> {
172        let input = self.input.as_str();
173
174        if input.starts_with(val) {
175            let (_, rest) = input.split_at(val.len());
176
177            self.input = rest.chars();
178
179            // Peek for the next character without advancing the input.
180            self.current = self.input.clone().next();
181
182            Ok(())
183        } else {
184            Err(ParseError::NoMatch)
185        }
186    }
187
188    /// Read until a given condition is true or until the end of the input and
189    /// return the string.
190    ///
191    /// # Arguments
192    ///
193    /// * `cnd` - a closure that takes a single character and returns
194    /// true/false
195    pub fn read_until<F>(&mut self, cnd: F) -> &'a str
196    where
197        F: FnMut(char) -> bool,
198    {
199        let rest = self.input.as_str();
200
201        let index = rest.find(cnd).unwrap_or_else(|| rest.len());
202
203        self.split_to(index)
204    }
205
206    /// Read one word from the input and return it. A word ends with the first
207    /// whitespace character or with the end of the input. The method skips all
208    /// initial whitespace characters (if any).
209    #[inline]
210    pub fn read_word(&mut self) -> &'a str {
211        self.skip_whitespace();
212        self.read_until(char::is_whitespace)
213    }
214
215    /// Read the next word and parse it. The input won't be advanced if the
216    /// word cannot be parsed.
217    pub fn parse_word<T>(&mut self) -> Result<T, T::Err>
218    where
219        T: FromStr,
220    {
221        let (word, rest) = self.first_word();
222
223        let parsed = word.parse()?;
224
225        self.input = rest.chars();
226
227        // Peek for the next character without advancing the input.
228        self.current = self.input.clone().next();
229
230        Ok(parsed)
231    }
232
233    /// Read a decimal integer as i8.
234    #[inline]
235    pub fn read_i8(&mut self) -> Result<i8, ParseIntError> {
236        self.parse_word()
237    }
238
239    /// Read a decimal integer as u8.
240    #[inline]
241    pub fn read_u8(&mut self) -> Result<u8, ParseIntError> {
242        self.parse_word()
243    }
244
245    /// Read a decimal integer as i16.
246    #[inline]
247    pub fn read_i16(&mut self) -> Result<i16, ParseIntError> {
248        self.parse_word()
249    }
250
251    /// Read a decimal integer as u16.
252    #[inline]
253    pub fn read_u16(&mut self) -> Result<u16, ParseIntError> {
254        self.parse_word()
255    }
256
257    /// Read a decimal integer as i32.
258    #[inline]
259    pub fn read_i32(&mut self) -> Result<i32, ParseIntError> {
260        self.parse_word()
261    }
262
263    /// Read a decimal integer as u32.
264    #[inline]
265    pub fn read_u32(&mut self) -> Result<u32, ParseIntError> {
266        self.parse_word()
267    }
268
269    /// Read a decimal integer as i64.
270    #[inline]
271    pub fn read_i64(&mut self) -> Result<i64, ParseIntError> {
272        self.parse_word()
273    }
274
275    /// Read a decimal integer as u64.
276    #[inline]
277    pub fn read_u64(&mut self) -> Result<u64, ParseIntError> {
278        self.parse_word()
279    }
280
281    /// Read a decimal integer as i128.
282    #[inline]
283    pub fn read_i128(&mut self) -> Result<i128, ParseIntError> {
284        self.parse_word()
285    }
286
287    /// Read a decimal integer as u128.
288    #[inline]
289    pub fn read_u128(&mut self) -> Result<u128, ParseIntError> {
290        self.parse_word()
291    }
292
293    /// Read a decimal integer as isize.
294    #[inline]
295    pub fn read_isize(&mut self) -> Result<isize, ParseIntError> {
296        self.parse_word()
297    }
298
299    /// Read a decimal integer as usize.
300    #[inline]
301    pub fn read_usize(&mut self) -> Result<usize, ParseIntError> {
302        self.parse_word()
303    }
304
305    /// Read a floating point number as f32.
306    #[inline]
307    pub fn read_f32(&mut self) -> Result<f32, ParseFloatError> {
308        self.parse_word()
309    }
310
311    /// Read a floating point number as f64.
312    #[inline]
313    pub fn read_f64(&mut self) -> Result<f64, ParseFloatError> {
314        self.parse_word()
315    }
316
317    /// Check if the reader is empty.
318    #[inline]
319    pub fn is_empty(&self) -> bool {
320        self.current_char().is_none()
321    }
322
323    /// Get the rest of the input.
324    #[inline]
325    pub fn as_str(&self) -> &'a str {
326        self.input.as_str()
327    }
328
329    /// Get the first word and the remainder of the input without advancing the
330    /// input.
331    fn first_word(&self) -> (&'a str, &'a str) {
332        let input = self.input.as_str().trim_start();
333
334        let index = input
335            .find(char::is_whitespace)
336            .unwrap_or_else(|| input.len());
337
338        input.split_at(index)
339    }
340
341    /// Split the input at a given index and return the first part.
342    ///
343    /// The input will contain the remaining part after this operation.
344    fn split_to(&mut self, index: usize) -> &'a str {
345        let rest = self.input.as_str();
346
347        let (word, rest) = rest.split_at(index);
348
349        self.input = rest.chars();
350
351        // Peek for the next character without advancing the input.
352        self.current = self.input.clone().next();
353
354        word
355    }
356}
357
358#[cfg(test)]
359mod tests {
360    use super::StringReader;
361
362    #[test]
363    fn test_reader() {
364        let input = "Hello, World!!!   1234\n\tfoo-bar";
365
366        let mut reader = StringReader::new(input);
367
368        assert!(!reader.is_empty());
369        assert_eq!(reader.current_char(), Some('H'));
370        assert_eq!(reader.as_str(), input);
371
372        let word = reader.read_word();
373
374        assert_eq!(word, "Hello,");
375        assert_eq!(reader.as_str(), " World!!!   1234\n\tfoo-bar");
376
377        reader.skip_whitespace();
378
379        assert_eq!(reader.as_str(), "World!!!   1234\n\tfoo-bar");
380
381        let c = reader.read_char();
382
383        assert_eq!(c.ok(), Some('W'));
384
385        let res = reader.match_char('o');
386
387        assert!(res.is_ok());
388
389        let res = reader.match_char('R');
390
391        assert!(res.is_err());
392
393        let res = reader.match_str("RLD!!!");
394
395        assert!(res.is_err());
396
397        let res = reader.match_str("rld!!!");
398
399        assert!(res.is_ok());
400        assert_eq!(reader.as_str(), "   1234\n\tfoo-bar");
401
402        let n = reader.read_u32();
403
404        assert_eq!(n.ok(), Some(1234));
405        assert_eq!(reader.as_str(), "\n\tfoo-bar");
406
407        let n = reader.read_u32();
408
409        assert!(n.is_err());
410        assert_eq!(reader.as_str(), "\n\tfoo-bar");
411
412        let word = reader.read_word();
413
414        assert_eq!(word, "foo-bar");
415        assert_eq!(reader.as_str(), "");
416        assert!(reader.is_empty());
417
418        let word = reader.read_word();
419
420        assert_eq!(word, "");
421
422        let c = reader.read_char();
423
424        assert!(c.is_err());
425        assert!(reader.is_empty());
426        assert_eq!(reader.as_str(), "");
427    }
428}