unobtanium_segmenter/
sentence_grouped_iterator.rs

1
2use crate::SegmentedToken;
3
4/// Iterator that wraps another Iterator of [`SegmentedToken`s][SegmentedToken] and inserts a break after each end of sentence token, which interrupts a `for`-loop but can be used.
5///
6/// End of sentence markers that don't carry any text are automatically discarded.
7///
8/// It splits right after tokens tht have the at the [`is_end_of_sentence`][SegmentedToken::is_end_of_sentence] flag set.
9pub struct SentenceGroupedIterator<'a, I: Iterator<Item=SegmentedToken<'a>>> {
10	inner: I,
11	next_is_break: bool,
12	is_at_end: bool,
13}
14
15impl <'a, I: Iterator<Item=SegmentedToken<'a>>> SentenceGroupedIterator<'a, I> {
16
17	/// Create a new sentence collector that wraps the given iterator.
18	pub fn new(inner: I) -> Self {
19		Self {
20			inner,
21			next_is_break: false,
22			is_at_end: false
23		}
24	}
25
26	/// Returns wheter the last iteration has stopped because the inner iterator
27	/// has hit its end.
28	pub fn is_at_end(&self) -> bool {
29		self.is_at_end
30	}
31}
32
33impl <'a, I: Iterator<Item=SegmentedToken<'a>>> Iterator for SentenceGroupedIterator<'a, I> {
34	type Item = SegmentedToken<'a>;
35
36	fn next(&mut self) -> Option<Self::Item> {
37		if self.next_is_break {
38			self.next_is_break = false;
39			None
40		} else if let Some(token) = self.inner.next() {
41			self.is_at_end = false;
42			if token.is_end_of_sentence {
43				if token.text.is_empty() {
44					// Empty marker token, return None immedeately
45					None
46				} else {
47					// Non-empty, but marked token return it and schedule a break
48					self.next_is_break = true;
49					Some(token)
50				}
51			} else {
52				// Not end of sentence, just return the token
53				Some(token)
54			}
55		} else {
56			self.is_at_end = true;
57			None
58		}
59	}
60}