byte_parser/
lib.rs

1//! # Byte Parser
2//! A library that provides a functional way to easely parse a string or a slice.
3//!
4//! ## Basic Example
5//! ```
6//! use byte_parser::{StrParser, ParseIterator};
7//!
8//! let mut parser = StrParser::new("\
9//! 	key: value\n\
10//! 	other key: more : value\n\
11//! 	also valid\
12//! ");
13//!
14//! let lines: Vec<(&str, &str)> = parser
15//! 	.split_on_byte(b'\n')
16//! 	.map_and_collect(|line| {
17//!
18//! 		let key = line
19//! 			.record()
20//! 			.consume_while_byte_fn(|&b| b != b':')
21//! 			.to_str();
22//!
23//! 		let has_colon = line.advance().is_some();
24//! 		if !has_colon {
25//! 			return ("", key.trim_start());
26//! 		}
27//!
28//! 		let value = line
29//! 			.record()
30//! 			.consume_to_str();
31//!
32//! 		(key, value.trim_start())
33//! 	});
34//!
35//! assert_eq!(lines[0], ("key", "value"));
36//! assert_eq!(lines[1], ("other key", "more : value"));
37//! assert_eq!(lines[2], ("", "also valid"));
38//! ```
39//! 
40//! ## Example parsing a number
41//! ```
42//! # use std::str::FromStr;
43//! use byte_parser::{StrParser, ParseIterator};
44//!
45//! #[derive(Debug, PartialEq)]
46//! pub enum Number {
47//! 	Uint(usize),
48//! 	Integer(isize),
49//! 	Float(f32)
50//! }
51//!
52//! impl Number {
53//!		/// # Panics
54//!		/// Panics if invalid utf8 is found.
55//! 	/// Or if the digit is to large.
56//! 	pub fn from_parser<'s, I>(iter: &mut I) -> Option<Self>
57//! 	where I: ParseIterator<'s> {
58//! 		let mut iter = iter.record();
59//!
60//! 		// there could be a leading minus -
61//! 		let is_negative = iter
62//! 			.next_if(|&b| b == b'-')
63//! 			.is_some();
64//!
65//!			// consume first digits
66//! 		iter
67//! 			.while_byte_fn(u8::is_ascii_digit)
68//! 			.consume_at_least(1)
69//!				.ok()?;
70//!			
71//! 		// there could be a dot
72//! 		let has_dot = iter
73//! 			.next_if(|&b| b == b'.')
74//! 			.is_some();
75//!
76//! 		if !has_dot {
77//! 			let s = iter.to_str();
78//! 			let num = match is_negative {
79//! 				true => Self::Integer(
80//! 					s.parse().expect("digit to large")
81//! 				),
82//! 				false => Self::Uint(
83//! 					s.parse().expect("digit to large")
84//! 				)
85//! 			};
86//!
87//! 			return Some(num)
88//!			}
89//!
90//!			// consume next digits
91//! 		iter.consume_while_byte_fn(u8::is_ascii_digit);
92//!
93//! 		Some(Self::Float(
94//! 			iter.to_str().parse().expect("digit to large")
95//! 		))
96//! 	}
97//! }
98//!
99//! impl FromStr for Number {
100//! 	type Err = ();
101//! 	fn from_str(s: &str) -> Result<Self, ()> {
102//! 		let mut parser = StrParser::new(s);
103//!			let num = Self::from_parser(&mut parser)
104//! 			.ok_or(())?;
105//!
106//! 		// parser not exhausted
107//! 		if parser.advance().is_some() {
108//! 			return Err(())
109//! 		}
110//!
111//! 		Ok(num)
112//! 	}
113//! }
114//!
115//! assert_eq!(Number::Float(1.23), "1.23".parse().unwrap());
116//! assert_eq!(Number::Float(-32.1), "-32.1".parse().unwrap());
117//! assert_eq!(Number::Uint(420), "420".parse().unwrap());
118//! assert_eq!(Number::Integer(-42), "-42".parse().unwrap());
119//! assert!(".42".parse::<Number>().is_err());
120//! assert!("5.42 ".parse::<Number>().is_err());
121//! ```
122
123#![cfg_attr(docsrs, feature(doc_cfg))]
124
125pub mod position;
126mod parse_iterator;
127mod expect_byte;
128pub mod ignore_byte;
129pub mod while_byte_fn;
130pub mod split_on_byte;
131pub mod recorder;
132pub mod stop;
133pub mod pit;
134#[cfg(feature = "unstable-parse-iter")]
135#[cfg_attr(docsrs, doc(cfg(feature = "unstable-parse-iter")))]
136pub mod parse_iter;
137
138pub use parse_iterator::ParseIterator;
139pub use expect_byte::ExpectByte;
140use recorder::Recorder;
141use position::Position;
142use pit::ParserPointInTime;
143
144/// `ParseIterator` implementation for a slice.
145#[derive(Debug)]
146pub struct Parser<'s> {
147	slice: &'s [u8],
148	pit: ParserPointInTime
149}
150
151impl<'s> Parser<'s> {
152
153	/// Creates a new `Parser` from a slice.
154	pub fn new(slice: &'s [u8]) -> Self {
155		Self {
156			slice,
157			pit: ParserPointInTime {
158				pos: Position::null()
159			}
160		}
161	}
162
163}
164
165impl<'s> ParseIterator<'s> for Parser<'s> {
166
167	type PointInTime = ParserPointInTime;
168
169	fn slice(&self) -> &'s [u8] {
170		self.slice
171	}
172
173	fn pit(&self) -> Self::PointInTime {
174		self.pit
175	}
176
177	fn restore_pit(&mut self, pit: Self::PointInTime) {
178		self.pit = pit;
179	}
180
181	fn advance(&mut self) -> Option<()> {
182		let n = self.pit.pos + 1;
183
184		if n < self.slice.len() {
185			self.pit.pos = n.into();
186			Some(())
187		} else {
188			None
189		}
190	}
191
192	fn recorder(&self) -> Option<&Recorder> {
193		None
194	}
195
196	#[inline]
197	unsafe fn is_valid_utf8() -> bool {
198		false
199	}
200
201}
202
203
204/// `ParseIterator` implementation for a str.
205#[derive(Debug)]
206pub struct StrParser<'s> {
207	inner: &'s str,
208	pit: ParserPointInTime
209}
210
211impl<'s> StrParser<'s> {
212
213	/// Creates a new `StrParser` from a str.
214	pub fn new(inner: &'s str) -> Self {
215		Self {
216			inner,
217			pit: ParserPointInTime::new()
218		}
219	}
220
221}
222
223impl<'s> ParseIterator<'s> for StrParser<'s> {
224
225	type PointInTime = ParserPointInTime;
226
227	fn slice(&self) -> &'s [u8] {
228		self.inner.as_bytes()
229	}
230
231	fn pit(&self) -> Self::PointInTime {
232		self.pit
233	}
234
235	fn restore_pit(&mut self, pit: Self::PointInTime) {
236		self.pit = pit;
237	}
238
239	fn advance(&mut self) -> Option<()> {
240		let n = self.pit.pos + 1;
241
242		if n < self.inner.len() {
243			self.pit.pos = n.into();
244			Some(())
245		} else {
246			None
247		}
248	}
249
250	fn recorder(&self) -> Option<&Recorder> {
251		None
252	}
253
254	#[inline]
255	unsafe fn is_valid_utf8() -> bool {
256		true
257	}
258
259}
260
261#[cfg(feature = "unstable-parse-iter")]
262#[cfg_attr(docsrs, doc(cfg(feature = "unstable-parse-iter")))]
263/// From a `ParseIterator` generate an `Iterator`.
264/// 
265/// ## Example parsing arguments
266/// ```
267/// # use byte_parser::{StrParser, ParseIterator, parse_iter};
268/// fn args(arg_str: &str) -> impl Iterator<Item=&str> {
269/// 	parse_iter(
270/// 		StrParser::new(arg_str),
271/// 		|parser| {
272///
273/// 			// if we are in a string
274/// 			if parser.advance_if(|&b| b == b'\'')? {
275/// 				let s = parser.record()
276/// 					.while_byte_fn(|&b| b != b'\'')
277/// 					.consume_to_str();
278///
279/// 				// consume the ending ' and
280/// 				// if a space follows consume it to
281/// 				parser.advance();
282/// 				parser.advance_if(|&b| b == b' ');
283/// 				return Some(s)
284/// 			}
285///
286/// 			// consume until a whitespace or an '
287/// 			let s = parser.record()
288/// 				.while_byte_fn(|&b| !matches!(b, b' ' | b'\''))
289/// 				.consume_to_str();
290/// 			
291/// 			// skip an empty space
292/// 			parser.advance_if(|&b| b == b' ');
293/// 
294/// 			Some(s)
295/// 		}
296/// 	)
297/// }
298/// 
299/// let args: Vec<_> = args("arg1 'arg 2' arg3'arg 4'arg5").collect();
300/// assert_eq!(args, ["arg1", "arg 2", "arg3", "arg 4", "arg5"]);
301/// ```
302pub fn parse_iter<'s, I, F, O>(i: I, f: F) -> parse_iter::ParseIter<I, F>
303where
304	I: ParseIterator<'s>,
305	F: FnMut(&mut I) -> Option<O> {
306	parse_iter::ParseIter::new(i, f)
307}
308
309// TESTS
310#[cfg(test)]
311mod tests {
312
313	use super::*;
314
315	#[test]
316	fn parser_advance() {
317
318		let s = b"my byte str";
319
320		let mut parser = Parser::new(s);
321
322		for b in s.iter() {
323			assert_eq!(*b, parser.next().unwrap());
324		}
325
326		assert_eq!(None, parser.next());
327
328	}
329
330	#[test]
331	fn str_parser_advance() {
332
333		let s = "my byte str";
334
335		let mut parser = StrParser::new(s);
336
337		for b in s.as_bytes().iter() {
338			assert_eq!(*b, parser.next().unwrap());
339		}
340
341		assert_eq!(None, parser.next());
342
343	}
344
345	#[test]
346	fn str_parser_to_str() {
347
348		let s = "my byte str";
349
350		let mut parser = StrParser::new(s);
351		parser.consume_len(3).unwrap();
352		let mut parser = parser.record();
353		assert_eq!("byte str", parser.consume_to_str());
354
355	}
356
357	#[test]
358	fn create_an_iterator() {
359		fn make_iter(s: &str) -> impl Iterator<Item=&str> {
360			let mut parser = StrParser::new(s);
361			std::iter::from_fn(move || {
362				let mut split = parser.split_on_byte(b' ');
363				let n = split.next()?;// probably split_on_byte to already move position to
364				Some(n.record().consume_to_str())
365			})
366		}
367		let mut iter = make_iter("my byte str");
368		assert_eq!(iter.next().unwrap(), "my");
369		assert_eq!(iter.next().unwrap(), "byte");
370		assert_eq!(iter.next().unwrap(), "str");
371		assert!(iter.next().is_none());
372	}
373
374	#[cfg(feature = "unstable-parse-iter")]
375	#[test]
376	fn test_parse_iter() {
377		let mut iter = parse_iter(
378			StrParser::new("my str"),
379			|parser| {
380				let mut split = parser.split_on_byte(b' ');
381				split.next()
382					.map(|n| n.record().consume_to_str())
383			}
384		);
385		assert_eq!(iter.next().unwrap(), "my");
386		assert_eq!(iter.next().unwrap(), "str");
387		assert!(iter.next().is_none());
388	}
389
390}