byte_parser/lib.rs
1//! # Byte Parser
2//! A library that provides a functional way to easely parse a string or a slice.
3//!
4//! ## Basic Example
5//! ```
6//! use byte_parser::{StrParser, ParseIterator};
7//!
8//! let mut parser = StrParser::new("\
9//! key: value\n\
10//! other key: more : value\n\
11//! also valid\
12//! ");
13//!
14//! let lines: Vec<(&str, &str)> = parser
15//! .split_on_byte(b'\n')
16//! .map_and_collect(|line| {
17//!
18//! let key = line
19//! .record()
20//! .consume_while_byte_fn(|&b| b != b':')
21//! .to_str();
22//!
23//! let has_colon = line.advance().is_some();
24//! if !has_colon {
25//! return ("", key.trim_start());
26//! }
27//!
28//! let value = line
29//! .record()
30//! .consume_to_str();
31//!
32//! (key, value.trim_start())
33//! });
34//!
35//! assert_eq!(lines[0], ("key", "value"));
36//! assert_eq!(lines[1], ("other key", "more : value"));
37//! assert_eq!(lines[2], ("", "also valid"));
38//! ```
39//!
40//! ## Example parsing a number
41//! ```
42//! # use std::str::FromStr;
43//! use byte_parser::{StrParser, ParseIterator};
44//!
45//! #[derive(Debug, PartialEq)]
46//! pub enum Number {
47//! Uint(usize),
48//! Integer(isize),
49//! Float(f32)
50//! }
51//!
52//! impl Number {
53//! /// # Panics
54//! /// Panics if invalid utf8 is found.
55//! /// Or if the digit is to large.
56//! pub fn from_parser<'s, I>(iter: &mut I) -> Option<Self>
57//! where I: ParseIterator<'s> {
58//! let mut iter = iter.record();
59//!
60//! // there could be a leading minus -
61//! let is_negative = iter
62//! .next_if(|&b| b == b'-')
63//! .is_some();
64//!
65//! // consume first digits
66//! iter
67//! .while_byte_fn(u8::is_ascii_digit)
68//! .consume_at_least(1)
69//! .ok()?;
70//!
71//! // there could be a dot
72//! let has_dot = iter
73//! .next_if(|&b| b == b'.')
74//! .is_some();
75//!
76//! if !has_dot {
77//! let s = iter.to_str();
78//! let num = match is_negative {
79//! true => Self::Integer(
80//! s.parse().expect("digit to large")
81//! ),
82//! false => Self::Uint(
83//! s.parse().expect("digit to large")
84//! )
85//! };
86//!
87//! return Some(num)
88//! }
89//!
90//! // consume next digits
91//! iter.consume_while_byte_fn(u8::is_ascii_digit);
92//!
93//! Some(Self::Float(
94//! iter.to_str().parse().expect("digit to large")
95//! ))
96//! }
97//! }
98//!
99//! impl FromStr for Number {
100//! type Err = ();
101//! fn from_str(s: &str) -> Result<Self, ()> {
102//! let mut parser = StrParser::new(s);
103//! let num = Self::from_parser(&mut parser)
104//! .ok_or(())?;
105//!
106//! // parser not exhausted
107//! if parser.advance().is_some() {
108//! return Err(())
109//! }
110//!
111//! Ok(num)
112//! }
113//! }
114//!
115//! assert_eq!(Number::Float(1.23), "1.23".parse().unwrap());
116//! assert_eq!(Number::Float(-32.1), "-32.1".parse().unwrap());
117//! assert_eq!(Number::Uint(420), "420".parse().unwrap());
118//! assert_eq!(Number::Integer(-42), "-42".parse().unwrap());
119//! assert!(".42".parse::<Number>().is_err());
120//! assert!("5.42 ".parse::<Number>().is_err());
121//! ```
122
123#![cfg_attr(docsrs, feature(doc_cfg))]
124
125pub mod position;
126mod parse_iterator;
127mod expect_byte;
128pub mod ignore_byte;
129pub mod while_byte_fn;
130pub mod split_on_byte;
131pub mod recorder;
132pub mod stop;
133pub mod pit;
134#[cfg(feature = "unstable-parse-iter")]
135#[cfg_attr(docsrs, doc(cfg(feature = "unstable-parse-iter")))]
136pub mod parse_iter;
137
138pub use parse_iterator::ParseIterator;
139pub use expect_byte::ExpectByte;
140use recorder::Recorder;
141use position::Position;
142use pit::ParserPointInTime;
143
144/// `ParseIterator` implementation for a slice.
145#[derive(Debug)]
146pub struct Parser<'s> {
147 slice: &'s [u8],
148 pit: ParserPointInTime
149}
150
151impl<'s> Parser<'s> {
152
153 /// Creates a new `Parser` from a slice.
154 pub fn new(slice: &'s [u8]) -> Self {
155 Self {
156 slice,
157 pit: ParserPointInTime {
158 pos: Position::null()
159 }
160 }
161 }
162
163}
164
165impl<'s> ParseIterator<'s> for Parser<'s> {
166
167 type PointInTime = ParserPointInTime;
168
169 fn slice(&self) -> &'s [u8] {
170 self.slice
171 }
172
173 fn pit(&self) -> Self::PointInTime {
174 self.pit
175 }
176
177 fn restore_pit(&mut self, pit: Self::PointInTime) {
178 self.pit = pit;
179 }
180
181 fn advance(&mut self) -> Option<()> {
182 let n = self.pit.pos + 1;
183
184 if n < self.slice.len() {
185 self.pit.pos = n.into();
186 Some(())
187 } else {
188 None
189 }
190 }
191
192 fn recorder(&self) -> Option<&Recorder> {
193 None
194 }
195
196 #[inline]
197 unsafe fn is_valid_utf8() -> bool {
198 false
199 }
200
201}
202
203
204/// `ParseIterator` implementation for a str.
205#[derive(Debug)]
206pub struct StrParser<'s> {
207 inner: &'s str,
208 pit: ParserPointInTime
209}
210
211impl<'s> StrParser<'s> {
212
213 /// Creates a new `StrParser` from a str.
214 pub fn new(inner: &'s str) -> Self {
215 Self {
216 inner,
217 pit: ParserPointInTime::new()
218 }
219 }
220
221}
222
223impl<'s> ParseIterator<'s> for StrParser<'s> {
224
225 type PointInTime = ParserPointInTime;
226
227 fn slice(&self) -> &'s [u8] {
228 self.inner.as_bytes()
229 }
230
231 fn pit(&self) -> Self::PointInTime {
232 self.pit
233 }
234
235 fn restore_pit(&mut self, pit: Self::PointInTime) {
236 self.pit = pit;
237 }
238
239 fn advance(&mut self) -> Option<()> {
240 let n = self.pit.pos + 1;
241
242 if n < self.inner.len() {
243 self.pit.pos = n.into();
244 Some(())
245 } else {
246 None
247 }
248 }
249
250 fn recorder(&self) -> Option<&Recorder> {
251 None
252 }
253
254 #[inline]
255 unsafe fn is_valid_utf8() -> bool {
256 true
257 }
258
259}
260
261#[cfg(feature = "unstable-parse-iter")]
262#[cfg_attr(docsrs, doc(cfg(feature = "unstable-parse-iter")))]
263/// From a `ParseIterator` generate an `Iterator`.
264///
265/// ## Example parsing arguments
266/// ```
267/// # use byte_parser::{StrParser, ParseIterator, parse_iter};
268/// fn args(arg_str: &str) -> impl Iterator<Item=&str> {
269/// parse_iter(
270/// StrParser::new(arg_str),
271/// |parser| {
272///
273/// // if we are in a string
274/// if parser.advance_if(|&b| b == b'\'')? {
275/// let s = parser.record()
276/// .while_byte_fn(|&b| b != b'\'')
277/// .consume_to_str();
278///
279/// // consume the ending ' and
280/// // if a space follows consume it to
281/// parser.advance();
282/// parser.advance_if(|&b| b == b' ');
283/// return Some(s)
284/// }
285///
286/// // consume until a whitespace or an '
287/// let s = parser.record()
288/// .while_byte_fn(|&b| !matches!(b, b' ' | b'\''))
289/// .consume_to_str();
290///
291/// // skip an empty space
292/// parser.advance_if(|&b| b == b' ');
293///
294/// Some(s)
295/// }
296/// )
297/// }
298///
299/// let args: Vec<_> = args("arg1 'arg 2' arg3'arg 4'arg5").collect();
300/// assert_eq!(args, ["arg1", "arg 2", "arg3", "arg 4", "arg5"]);
301/// ```
302pub fn parse_iter<'s, I, F, O>(i: I, f: F) -> parse_iter::ParseIter<I, F>
303where
304 I: ParseIterator<'s>,
305 F: FnMut(&mut I) -> Option<O> {
306 parse_iter::ParseIter::new(i, f)
307}
308
309// TESTS
310#[cfg(test)]
311mod tests {
312
313 use super::*;
314
315 #[test]
316 fn parser_advance() {
317
318 let s = b"my byte str";
319
320 let mut parser = Parser::new(s);
321
322 for b in s.iter() {
323 assert_eq!(*b, parser.next().unwrap());
324 }
325
326 assert_eq!(None, parser.next());
327
328 }
329
330 #[test]
331 fn str_parser_advance() {
332
333 let s = "my byte str";
334
335 let mut parser = StrParser::new(s);
336
337 for b in s.as_bytes().iter() {
338 assert_eq!(*b, parser.next().unwrap());
339 }
340
341 assert_eq!(None, parser.next());
342
343 }
344
345 #[test]
346 fn str_parser_to_str() {
347
348 let s = "my byte str";
349
350 let mut parser = StrParser::new(s);
351 parser.consume_len(3).unwrap();
352 let mut parser = parser.record();
353 assert_eq!("byte str", parser.consume_to_str());
354
355 }
356
357 #[test]
358 fn create_an_iterator() {
359 fn make_iter(s: &str) -> impl Iterator<Item=&str> {
360 let mut parser = StrParser::new(s);
361 std::iter::from_fn(move || {
362 let mut split = parser.split_on_byte(b' ');
363 let n = split.next()?;// probably split_on_byte to already move position to
364 Some(n.record().consume_to_str())
365 })
366 }
367 let mut iter = make_iter("my byte str");
368 assert_eq!(iter.next().unwrap(), "my");
369 assert_eq!(iter.next().unwrap(), "byte");
370 assert_eq!(iter.next().unwrap(), "str");
371 assert!(iter.next().is_none());
372 }
373
374 #[cfg(feature = "unstable-parse-iter")]
375 #[test]
376 fn test_parse_iter() {
377 let mut iter = parse_iter(
378 StrParser::new("my str"),
379 |parser| {
380 let mut split = parser.split_on_byte(b' ');
381 split.next()
382 .map(|n| n.record().consume_to_str())
383 }
384 );
385 assert_eq!(iter.next().unwrap(), "my");
386 assert_eq!(iter.next().unwrap(), "str");
387 assert!(iter.next().is_none());
388 }
389
390}