use crate::types::Match;
use super::{Trie, DEAD_STATE};
pub struct TrieFindIter<'a> {
trie: &'a Trie,
text: &'a [u8],
pos: usize,
state: usize,
outpos: u32,
}
impl<'a> TrieFindIter<'a> {
pub(super) fn new(trie: &'a Trie, text: &'a [u8]) -> Self {
Self {
trie,
text,
pos: 0,
state: 0,
outpos: u32::MAX,
}
}
#[inline]
fn next_overlapping(&mut self) -> Option<Match> {
if self.outpos != u32::MAX {
let output = &self.trie.outputs[self.outpos as usize];
self.outpos = output.parent;
return Some(Match {
pattern_id: output.pattern_id,
start: self.pos - output.length as usize,
end: self.pos,
});
}
while self.pos < self.text.len() {
let byte = self.text[self.pos];
self.pos += 1;
while self.state != 0 && !self.trie.states[self.state].edges.contains_key(&byte) {
self.state = self.trie.states[self.state].fail as usize;
}
self.state = self.trie.states[self.state]
.edges
.get(&byte)
.copied()
.unwrap_or(0) as usize;
self.outpos = self.trie.states[self.state].outpos.unwrap_or(u32::MAX);
if self.outpos != u32::MAX {
let output = &self.trie.outputs[self.outpos as usize];
self.outpos = output.parent;
return Some(Match {
pattern_id: output.pattern_id,
start: self.pos - output.length as usize,
end: self.pos,
});
}
}
None
}
#[inline]
fn next_leftmost(&mut self) -> Option<Match> {
let mut state: usize = 0;
let mut last_outpos: Option<u32> = None;
for read_pos in self.pos..self.text.len() {
let byte = self.text[read_pos];
loop {
if let Some(&next) = self.trie.states[state].edges.get(&byte) {
state = next as usize;
break;
} else if state == 0 {
break;
} else {
let fail = self.trie.states[state].fail;
if fail == DEAD_STATE {
if let Some(outpos) = last_outpos {
let output = &self.trie.outputs[outpos as usize];
return Some(Match {
pattern_id: output.pattern_id,
start: self.pos - output.length as usize,
end: self.pos,
});
}
state = 0;
break;
}
state = fail as usize;
}
}
if let Some(outpos) = self.trie.states[state].outpos {
last_outpos = Some(outpos);
self.pos = read_pos + 1;
}
}
if let Some(outpos) = last_outpos {
let output = &self.trie.outputs[outpos as usize];
return Some(Match {
pattern_id: output.pattern_id,
start: self.pos - output.length as usize,
end: self.pos,
});
}
None
}
}
impl Iterator for TrieFindIter<'_> {
type Item = Match;
#[inline]
fn next(&mut self) -> Option<Match> {
match self.trie.match_kind {
crate::types::MatchKind::Overlapping => self.next_overlapping(),
crate::types::MatchKind::LeftmostFirst
| crate::types::MatchKind::LeftmostLongest
| crate::types::MatchKind::WordPiece => self.next_leftmost(),
}
}
}