byte_parser/
parse_iterator.rs

1
2use crate::{
3	pit::PointInTime,
4	ignore_byte::IgnoreByte,
5	while_byte_fn::WhileByteFn,
6	split_on_byte::SplitOnByte,
7	recorder::{Recorder, RecordIter},
8	stop::Stop,
9	expect_byte::ExpectByte
10};
11
12/// The main trait of this crate.
13///
14/// This trait allows to parse a slice or a str more easely.
15///
16/// This trait is lazy, if something should happen you need to `consume` it.  
17/// For example, `to_str` only works if you call `record` and `consume` first.
18pub trait ParseIterator<'s> {// s for slice
19
20	/// The type that is used to store information about the current position.
21	type PointInTime: PointInTime;
22
23	/// Returns the full underlying slice.
24	fn slice(&self) -> &'s [u8];
25
26	/// Returns the current position. Should be used in combination with
27	/// `restore_pit`.
28	fn pit(&self) -> Self::PointInTime;
29
30	/// Restore to a given position.
31	///
32	/// ## Warning
33	/// Only call this method with a pit that was received from this instance.
34	fn restore_pit(&mut self, pit: Self::PointInTime);
35
36	/// Advances the internal position.
37	fn advance(&mut self) -> Option<()>;
38
39	/// Returns a `Recorder` if recording was started.
40	fn recorder(&self) -> Option<&Recorder>;
41
42	/// Advances if `advance_if` returns `true`. 
43	/// Returns `None` if the iterator is empty.
44	fn advance_if<F>(&mut self, advance_if: F) -> Option<bool>
45	where F: FnOnce(&u8) -> bool {
46		match advance_if(&self.peek()?) {
47			true => self.advance().map(|_| true),
48			false => Some(false)
49		}
50	}
51
52	/// Returns the current byte if it exists.
53	#[inline]
54	fn byte(&self) -> Option<u8> {
55		let pos = self.pit().pos().opt()?;
56		self.slice().get(pos).map(|&a| a)
57	}
58
59	/// Returns the next byte if it exists and advances the internal position.
60	#[inline]
61	fn next(&mut self) -> Option<u8> {
62		self.advance()?;
63		// just check that everything works as expected
64		debug_assert!(self.byte().is_some());
65		self.byte()
66	}
67
68	/// Advances one if `next_if` returns `true`. 
69	/// Returns `None` if did not advance.
70	#[inline]
71	fn next_if<F>(&mut self, next_if: F) -> Option<u8>
72	where F: FnOnce(&u8) -> bool {
73		match next_if(&self.peek()?) {
74			true => self.next(),
75			false => None
76		}
77	}
78
79	/// Returns the next byte without advancing the internal position.
80	#[inline]
81	fn peek(&mut self) -> Option<u8> {
82		let pit = self.pit();
83		let n = self.next();
84		self.restore_pit(pit);
85		n
86	}
87
88	/// Returns the next x bytes without advancing the internal position.
89	#[inline]
90	fn peek_len(&mut self, len: usize) -> Option<&'s [u8]>
91	where Self: Sized {
92		let pit = self.pit();
93		let s = self.record()
94			.consume_len(len)
95			.map(|iter| iter.to_slice())
96			.ok();
97		self.restore_pit(pit);
98		s
99	}
100
101	/// Tries to get the byte at the given position, without advancing.
102	#[inline]
103	fn peek_at(&mut self, pos: usize) -> Option<u8> {
104		assert!(pos > 0, "peek_at pos must be bigger than 0");
105
106		let pit = self.pit();
107		let n = self.consume_len(pos - 1).ok()
108			.map(|p| p.next())
109			.flatten();
110		self.restore_pit(pit);
111		n
112	}
113
114	/// Skips a given byte when calling next.
115	///
116	/// ## Warning
117	/// If you later call `to_slice` or a similar methods
118	/// the skipped byte will still be returned.
119	///
120	/// ## Example
121	/// ```
122	/// # use byte_parser::{Parser, ParseIterator};
123	/// let mut parser = Parser::new(b"abc");
124	/// let mut parser = parser.ignore_byte(b'b');
125	/// assert_eq!(b'a', parser.next().unwrap());
126	/// assert_eq!(b'c', parser.next().unwrap());
127	/// ```
128	#[inline]
129	fn ignore_byte(&mut self, byte: u8) -> IgnoreByte<'_, Self>
130	where Self: Sized {
131		IgnoreByte::new(self, byte)
132	}
133
134	/// Advances while the function returns `true`.
135	#[inline]
136	fn while_byte_fn<F>(&mut self, f: F) -> WhileByteFn<'_, Self, F>
137	where 
138		Self: Sized,
139		F: Fn(&u8) -> bool {
140		WhileByteFn::new(self, f)
141	}
142
143	/// Consumes until the iterator is empty. 
144	/// Meaning that `advance` returns None.
145	#[inline]
146	fn consume(&mut self) -> &mut Self {
147		while let Some(_) = self.advance() {}
148		self
149	}
150
151	/// Consumes until the iterator is empty,
152	/// and returns how many times advance got called.
153	#[inline]
154	fn consume_and_count(&mut self) -> usize {
155		let mut c = 0;
156		while let Some(_) = self.advance() {
157			c += 1
158		}
159		c
160	}
161
162	/// Consumes a given length. Returns how much was consumed if could
163	/// not consume all.
164	#[inline]
165	fn consume_len(&mut self, len: usize) -> Result<&mut Self, usize> {
166		// we can not just increase the internal position
167		// because calling advance could increase the position more than once
168		for i in 0..len {
169			self.advance().ok_or(i)?;
170		}
171
172		Ok(self)
173	}
174
175	/// Consumes until the iterator is empty. 
176	/// Returns `Err(len)` if could not consume `len`.
177	#[inline]
178	fn consume_at_least(&mut self, len: usize) -> Result<&mut Self, usize> {
179		self.consume_len(len)?;
180		Ok(self.consume())
181	}
182
183	/// Consumes until the iterator is empty, returning how much was consumed. 
184	/// Returns `Err(len)` if could not consume `len`.
185	#[inline]
186	fn consume_at_least_and_count(&mut self, len: usize) -> Result<usize, usize> {
187		self.consume_len(len)?;
188		Ok(self.consume_and_count() + len)
189	}
190
191	/// Consumes while the function returns `true`.
192	#[inline]
193	fn consume_while_byte_fn<F>(&mut self, f: F) -> &mut Self
194	where 
195		Self: Sized,
196		F: Fn(&u8) -> bool {
197		self.while_byte_fn(f).consume();
198		self
199	}
200
201	/// Consumes while a give `byte` is returned.
202	#[inline]
203	fn consume_while_byte(&mut self, byte: u8) -> &mut Self
204	where Self: Sized {
205		self.consume_while_byte_fn(|&b| b == byte)
206	}
207
208	// Consumes while an ascii whitespace is returned.
209	// #[inline]
210	// fn consume_while_ascii_whitespace(&mut self) -> &mut Self
211	// where Self: Sized {
212	// 	self.consume_while_byte_fn(u8::is_ascii_whitespace)
213	// }
214
215	/// Splits the iterator at a given byte.
216	///
217	/// ## Example
218	/// ```
219	/// # use byte_parser::{StrParser, ParseIterator};
220	/// let mut parser = StrParser::new("Hello World!");
221	/// let mut splitter = parser.split_on_byte(b' ');
222	///
223	/// let hello = splitter.next().unwrap()
224	/// 	.record().consume_to_str();
225	/// let world = splitter.next().unwrap()
226	/// 	.record().consume_to_str();
227	///
228	/// assert_eq!(hello, "Hello");
229	/// assert_eq!(world, "World!");
230	/// assert!(splitter.next().is_none());
231	/// ```
232	#[inline]
233	fn split_on_byte(&mut self, byte: u8) -> SplitOnByte<'_, Self>
234	where Self: Sized {
235		SplitOnByte::new(self, byte)
236	}
237
238	#[inline]
239	fn count_byte(&mut self, byte: u8) -> usize
240	where Self: Sized {
241		self.while_byte_fn(|&b| b == byte)
242			.consume_and_count()
243	}
244
245	/// Starts a new `Recorder` which starts recording at this position.
246	#[inline]
247	fn record(&mut self) -> RecordIter<'_, Self>
248	where Self: Sized {
249		RecordIter::new(self)
250	}
251
252	/// Returns a slice from the start of recording until now.
253	///
254	/// ## Panics
255	/// If not called in context of a recorder. Meaning before
256	/// calling `record`.
257	#[inline]
258	fn to_slice(&self) -> &'s [u8] {
259		let start = self.recorder().expect("no recorder found").pos() + 1;
260		let end = self.pit().record_pos() + 1;
261
262		&self.slice()[start..end]
263	}
264
265	/// Returns a `str` from the start of recording until the current position
266	/// without checking if the data is valid utf8.
267	/// ## Panics
268	/// Panics if not called after `record` gets called.
269	/// ## Safety
270	/// This function is safe if `Self::is_valid_utf8` returns `true`.
271	#[inline]
272	unsafe fn to_str_unchecked(&self) -> &'s str {
273		std::str::from_utf8_unchecked(self.to_slice())
274	}
275
276	/// ## Safety
277	/// Returning `false` is always safe. 
278	/// If you return `true` the entire underlying slice must be valid utf8.
279	unsafe fn is_valid_utf8() -> bool;
280
281	/// Returns a `str` from the start of recording until the current position.
282	///
283	/// ## Example
284	/// ```
285	/// # use byte_parser::{Parser, StrParser, ParseIterator};
286	/// let str_from_slice = Parser::new(b"abc")
287	///		.record()
288	/// 	.consume()
289	/// 	.to_str();
290	/// assert_eq!(str_from_slice, "abc");
291	///
292	/// let str_from_str = StrParser::new("abc")
293	/// 	.record()
294	/// 	.consume()
295	/// 	.to_str();
296	/// assert_eq!(str_from_str, "abc");
297	/// ```
298	///
299	/// ## Panics
300	/// Panics if not called after `record` was called. 
301	/// Or if invalid utf8 is present.
302	#[inline]
303	fn to_str(&self) -> &'s str {
304		if unsafe { Self::is_valid_utf8() } {
305			// Safe because is_valid_utf8 guaranties everything is valid utf8
306			unsafe { self.to_str_unchecked() }
307		} else {
308			std::str::from_utf8(self.to_slice()).expect("invalid utf8")
309		}
310	}
311
312	/// Returns a `str` from the start of recording until the current position.
313	///
314	/// ## Example
315	/// ```
316	/// # use byte_parser::{Parser, StrParser, ParseIterator};
317	/// let str_from_slice = Parser::new(b"abc")
318	///		.record()
319	/// 	.consume()
320	/// 	.try_to_str().expect("slice is not valid utf8");
321	/// assert_eq!(str_from_slice, "abc");
322	///
323	/// let str_from_str = StrParser::new("abc")
324	/// 	.record()
325	/// 	.consume()
326	/// 		// can never return Err
327	/// 	.try_to_str().unwrap();
328	/// assert_eq!(str_from_str, "abc");
329	/// ```
330	///
331	/// ## Panics
332	/// Panics if not called after `record` was called.
333	#[inline]
334	fn try_to_str(&self) -> Result<&'s str, std::str::Utf8Error> {
335		if unsafe { Self::is_valid_utf8() } {
336			// Safe because is_valid_utf8 guaranties everything is valid utf8
337			Ok(unsafe { self.to_str_unchecked() })
338		} else {
339			std::str::from_utf8(self.to_slice())
340		}
341	}
342
343	/// Consumes the iterator and then returns a slice from the start of recording
344	/// until the current position.
345	///
346	/// ## Panics
347	/// Panics if not called after `record` was called.
348	#[inline]
349	fn consume_to_slice(&mut self) -> &'s [u8] {
350		self.consume().to_slice()
351	}
352
353	/// Consumes the iterator and then returns a str from the start of recording
354	/// until the current position. Without checking if the underlying data
355	/// is valid utf8.
356	///
357	/// ## Panics
358	/// Panics if not called after `record` was called.
359	#[inline]
360	unsafe fn consume_to_str_unchecked(&mut self) -> &'s str {
361		self.consume().to_str_unchecked()
362	}
363
364	/// Consumes the iterator and then returns a str from the start of recording
365	/// until the current position.
366	///
367	/// ## Panics
368	/// Panics if not called after `record` was called or if the data contains invalid
369	/// utf8.
370	#[inline]
371	fn consume_to_str(&mut self) -> &'s str {
372		self.consume().to_str()
373	}
374
375	/// Consumes the iterator and then returns a str from the start of recording
376	/// until the current position if the data is valid utf8.
377	///
378	/// ## Panics
379	/// Panics if not called after `record` was called.
380	#[inline]
381	fn consume_try_to_str(&mut self) -> Result<&'s str, std::str::Utf8Error> {
382		self.consume().try_to_str()
383	}
384
385	/// Returns ```&mut Self``` if the function returns `true` on the next byte.
386	/// Else returns the byte that was received.
387	#[inline]
388	fn expect_byte_fn<F>(&mut self, f: F) -> Result<&mut Self, Option<u8>>
389	where F: Fn(u8) -> bool {
390		self.next()
391			.expect_byte_fn(f)
392			.map(|_| self)
393	}
394
395	/// Returns ```&mut Self``` if the function byte is equal to the next byte.
396	/// Else returns the actual byte that was received.
397	#[inline]
398	fn expect_byte(&mut self, byte: u8) -> Result<&mut Self, Option<u8>> {
399		self.expect_byte_fn(|b| b == byte)
400	}
401
402	/// Returns ```&mut Self``` if the end was reached (next returns None).
403	#[inline]
404	fn expect_none(&mut self) -> Result<&mut Self, u8> {
405		match self.next() {
406			Some(b) => Err(b),
407			None => Ok(self)
408		}
409	}
410
411	/// Returns a `ParseIterator` that always returns None.
412	///
413	/// ## Example
414	/// ```
415	/// # use byte_parser::{StrParser, ParseIterator};
416	/// let mut s = StrParser::new("abc");
417	/// assert_eq!(b'a', s.next().unwrap());
418	/// let mut s = s.stop();
419	/// assert!(s.next().is_none());
420	/// ```
421	#[inline]
422	fn stop(&mut self) -> Stop<'_, Self>
423	where Self: Sized {
424		Stop::new(self)
425	}
426
427}
428
429#[cfg(test)]
430mod tests {
431
432	use crate::*;
433
434	#[test]
435	fn test_count_byte() {
436
437		let s = b"baaaab";
438
439		let mut parser = Parser::new( s );
440		assert_eq!( 0, parser.count_byte(b'a') );
441		assert_eq!( b'b', parser.next().unwrap() );
442		assert_eq!( 4, parser.count_byte(b'a') );
443		assert_eq!( b'b', parser.next().unwrap() );
444		assert!( parser.next().is_none() );
445		assert_eq!( 0, parser.count_byte(b'a') );
446
447	}
448
449	#[test]
450	fn combining_multiple_iters() {
451
452		let s = b"ab\raaa\r aab\raa";
453
454		Parser::new(s)
455			.ignore_byte(b'\r')
456			.split_on_byte(b' ')
457			.for_each( |parser| {
458
459				// lets ignore b
460				// and count a
461				let count_a = parser
462					.ignore_byte(b'b')
463					.count_byte(b'a');
464
465				assert_eq!( count_a, 4 );
466
467			} );
468
469	}
470
471	#[test]
472	fn expect_byte() {
473
474		let s = b"abaa";
475
476		assert!( Parser::new(s)
477			.expect_byte(b'a').unwrap()
478			.expect_byte(b'a').is_err() );
479
480	}
481
482	#[test]
483	fn advance_if() {
484
485		let mut parser = Parser::new(b"ab");
486
487		assert!(parser.advance_if(|&b| b == b'a').unwrap());
488		assert!(!parser.advance_if(|&b| b == b'a').unwrap());
489		assert!(parser.advance_if(|&b| b == b'b').unwrap());
490		assert!(parser.advance_if(|&b| b == b'b').is_none());
491
492	}
493
494	#[test]
495	fn next_if() {
496
497		let mut parser = Parser::new(b"ab");
498
499		assert_eq!(parser.next_if(|&b| b == b'a').unwrap(), b'a');
500		assert!(parser.next_if(|&b| b == b'x').is_none());
501		assert_eq!(parser.next_if(|&b| b == b'b').unwrap(), b'b');
502		assert!(parser.next_if(|&b| b == b'x').is_none());
503
504	}
505
506	#[test]
507	fn peek() {
508
509		let s = b"abaa";
510
511		assert_eq!( b'a', Parser::new(s).peek().unwrap() );
512		assert_eq!( b'a', Parser::new(s).peek_at(1).unwrap() );
513		assert_eq!( b'b', Parser::new(s).peek_at(2).unwrap() );
514		assert_eq!( b'a', Parser::new(s).peek_at(3).unwrap() );
515		assert!( Parser::new(s).peek_at(5).is_none() );
516
517	}
518
519	#[test]
520	fn consume() {
521
522		// normal
523		let mut parser = Parser::new( b"aaa" );
524		assert!( parser.consume().next().is_none() );
525
526		// len
527		let mut parser = Parser::new( b"aaa" );
528		assert!( parser.consume_len( 1 ).unwrap().next().is_some() );
529
530		let mut parser = Parser::new( b"aaa" );
531		parser.consume();
532		assert!(matches!( parser.consume_len(1), Err(0) ));
533
534		// at least
535		let mut parser = Parser::new( b"aaa" );
536		assert!( parser.consume_at_least( 1 ).is_ok() );
537		assert!( parser.next().is_none() );
538
539	}
540
541}