byte_parser/
split_on_byte.rs

1//! 
2//! Splits the iterator at a given byte.
3//!
4//! ## Example
5//! ```
6//! # use byte_parser::{StrParser, ParseIterator};
7//! let mut parser = StrParser::new("Hello World!");
8//! let mut splitter = parser.split_on_byte(b' ');
9//!
10//! let hello = splitter.next().unwrap()
11//! 	.record().consume_to_str();
12//! let world = splitter.next().unwrap()
13//! 	.record().consume_to_str();
14//!
15//! assert_eq!(hello, "Hello");
16//! assert_eq!(world, "World!");
17//! assert!(splitter.next().is_none());
18//! ```
19
20
21use crate::{
22	ParseIterator,
23	recorder::Recorder,
24	position::Position,
25	pit::PointInTime
26};
27
28use std::iter;
29
30
31#[derive(Debug)]
32pub struct SplitOnByte<'a, T> {
33	inner: SplitOnByteIter<'a, T>
34}
35
36impl<'s, 'a, T> SplitOnByte<'a, T>
37where T: ParseIterator<'s> {
38	pub(super) fn new(inner: &'a mut T, byte: u8) -> Self {
39		Self {
40			inner: SplitOnByteIter::new(inner, byte)
41		}
42	}
43}
44
45impl<'s, 'a, T> SplitOnByte<'a, T>
46where T: ParseIterator<'s> {
47
48	// next
49	pub fn next(&mut self) -> Option<&mut SplitOnByteIter<'a, T>> {
50		self.inner.reach_split_byte()?;
51		self.inner.pit.record_pos = None;// can this break when we use revert?
52
53		Some(&mut self.inner)
54	}
55
56	// for_each
57	pub fn for_each<F>(&mut self, mut f: F) -> &mut Self
58	where F: FnMut(&mut SplitOnByteIter<'a, T>) {
59
60		let mut call_next = || {
61			f(self.next()?);
62			Some(())
63		};
64
65		// do while
66		while let Some(_) = call_next() {}
67
68		self
69	}
70
71	// map
72	pub fn map_and_collect<F, A, B>(&mut self, mut f: F) -> B
73	where
74		F: FnMut(&mut SplitOnByteIter<'a, T>) -> A,
75		B: iter::FromIterator<A> {
76		iter::from_fn(|| {
77			Some(f(self.next()?))
78		})
79		.collect()
80	}
81
82}
83
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85pub struct SplitOnBytePointInTime {
86	pos: Position,// this value should never be read unless it is returned from fn pit()
87	byte_reached: bool,
88	record_pos: Option<Position>// used so that we not return the split byte
89}
90
91impl PointInTime for SplitOnBytePointInTime {
92
93	fn pos(&self) -> Position {
94		self.pos
95	}
96
97	unsafe fn set_pos(&mut self, pos: Position) {
98		self.pos = pos;
99	}
100
101	fn record_pos(&self) -> Position {
102		match self.record_pos {
103			Some(o) => o,
104			None => self.pos
105		}
106	}
107
108}
109
110
111#[derive(Debug)]
112pub struct SplitOnByteIter<'a, T> {
113	inner: &'a mut T,
114	byte: u8,
115	pit: SplitOnBytePointInTime
116}
117
118impl<'s, 'a, T> SplitOnByteIter<'a, T>
119where T: ParseIterator<'s> {
120	pub(super) fn new(inner: &'a mut T, byte: u8) -> Self {
121
122		let pit = SplitOnBytePointInTime {
123			pos: inner.pit().pos(),
124			// if the inner iterator has not already reached the end
125			// we want to set byte_reached to true so that we don't skip
126			// the first segment (part)
127			byte_reached: inner.peek().is_some(),
128			record_pos: None
129		};
130
131		Self {inner, byte, pit}
132	}
133
134	pub(super) fn reach_split_byte(&mut self) -> Option<()> {
135
136		// reach the byte if not already reached
137		while let Some(_) = self.advance() {}
138
139		if self.pit.byte_reached {// reset byte_reached
140			self.pit.byte_reached = false;
141			Some(())
142		} else { // we reached the end
143			None
144		}
145	}
146}
147
148impl<'s, 'a, T> ParseIterator<'s> for SplitOnByteIter<'a, T>
149where T: ParseIterator<'s> {
150
151	type PointInTime = SplitOnBytePointInTime;
152
153	// returns the full slice not only the split slice
154	fn slice(&self) -> &'s [u8] {
155		self.inner.slice()
156	}
157
158	fn pit(&self) -> Self::PointInTime {
159		self.pit
160	}
161
162	fn restore_pit(&mut self, pit: Self::PointInTime) {
163		// the inner pit doesnt know that the position changed
164		// safe because we just propagate our own position
165		unsafe {
166			let mut inner_pit = self.inner.pit();
167			inner_pit.set_pos(pit.pos());
168			self.inner.restore_pit(inner_pit);
169		}
170		self.pit = pit;
171	}
172
173	fn advance(&mut self) -> Option<()> {
174
175		if self.pit.byte_reached {
176			return None
177		}
178
179		let start = self.inner.pit().pos();
180		self.inner.advance()?;
181
182		self.pit.pos = self.inner.pit().pos();
183
184		if self.byte().unwrap() == self.byte {
185			self.pit.byte_reached = true;
186			self.pit.record_pos = Some(start);
187			None
188		} else {
189			self.pit.record_pos = None;
190			Some(())
191		}
192	}
193
194	fn recorder(&self) -> Option<&Recorder> {
195		self.inner.recorder()
196	}
197
198	#[inline]
199	unsafe fn is_valid_utf8() -> bool {
200		T::is_valid_utf8()
201	}
202
203}
204
205
206
207
208#[cfg(test)]
209mod tests {
210
211	use crate::*;
212
213	#[test]
214	fn test_split_on_byte_next() {
215
216		let s = b"my byte str";
217
218		let mut parser = Parser::new( s );
219		let mut parser_split = parser.split_on_byte(b' ');
220
221		let my = parser_split.next().unwrap();
222		assert_eq!( b'm', my.next().unwrap() );
223		assert_eq!( b'y', my.next().unwrap() );
224		assert!( my.next().is_none() );
225
226		let byte = parser_split.next().unwrap();
227		assert_eq!( b'b', byte.next().unwrap() );
228		assert_eq!( b'y', byte.next().unwrap() );
229		// skip the rest
230		//assert!( my.next().is_none() );
231
232		let str_part = parser_split.next().unwrap();
233		assert_eq!( b's', str_part.next().unwrap() );
234
235		assert!( parser_split.next().is_none() );
236
237	}
238
239	#[test]
240	fn test_split_on_byte_for_each() {
241
242		let s = b"my byte str";
243
244		let mut parser = Parser::new( s );
245		let mut parser_while = parser.split_on_byte(b' ');
246
247		let mut c = 0;
248		parser_while.for_each( |_| {
249			c += 1;
250		} );
251
252		assert_eq!( 3, c );
253
254	}
255
256	#[test]
257	fn if_peek_called_could_mess_up_byte_reached() {
258		// this test makes sure this doenst happen
259
260		let s = b"ab\raaa\r aab\raa";
261
262		Parser::new(s)
263			.ignore_byte(b'\r')
264			.split_on_byte(b' ')
265			.for_each( |parser| {
266
267				let a = parser
268					.ignore_byte(b'b')
269					.count_byte(b'a');
270				assert_eq!( 4, a );
271
272			} );
273
274	}
275
276	#[test]
277	fn return_empty_str() {
278		let mut s = StrParser::new("a ");
279		let mut split = s.split_on_byte(b' ');
280		let a = split.next().unwrap().record().consume_to_str();
281		assert_eq!(a, "a");
282		let none = split.next().unwrap().record().consume_to_str();
283		assert_eq!(none, "");
284		assert!(split.next().is_none());
285	}
286
287	#[test]
288	fn restoring_at_the_end_could_return_infinitely() {
289		let mut s = StrParser::new("a b");
290		let mut split = s.split_on_byte(b' ');
291		let _ = split.next().unwrap();
292		let _ = split.next().unwrap();
293		assert!(split.next().is_none());
294		let pit = s.pit();
295		let mut s = StrParser::new("a b");
296		s.restore_pit(pit);
297		assert!(s.next().is_none());
298		let mut split = s.split_on_byte(b' ');
299		assert!(split.next().is_none());
300	}
301
302}