#![deny(missing_docs)]
#![deny(unsafe_code)]
#[derive(Debug, Clone, Default)]
pub struct StopResult {
pub safe_text: String,
pub stopped: Option<String>,
}
pub struct StopDetector {
sequences: Vec<String>,
buffer: String,
stopped: bool,
}
impl StopDetector {
pub fn new<I, S>(sequences: I) -> Self
where
I: IntoIterator<Item = S>,
S: Into<String>,
{
let sequences: Vec<String> = sequences
.into_iter()
.map(Into::into)
.filter(|s| !s.is_empty())
.collect();
Self {
sequences,
buffer: String::new(),
stopped: false,
}
}
pub fn push(&mut self, chunk: &str) -> StopResult {
if self.stopped {
return StopResult::default();
}
if self.sequences.is_empty() {
return StopResult {
safe_text: chunk.to_string(),
stopped: None,
};
}
self.buffer.push_str(chunk);
if let Some((pos, seq)) = first_match(&self.buffer, &self.sequences) {
let safe_text = self.buffer[..pos].to_string();
self.stopped = true;
self.buffer.clear();
return StopResult {
safe_text,
stopped: Some(seq),
};
}
let hold = longest_suffix_prefix_overlap(&self.buffer, &self.sequences);
let mut emit_end = self.buffer.len().saturating_sub(hold);
while emit_end > 0 && !self.buffer.is_char_boundary(emit_end) {
emit_end -= 1;
}
let safe_text = self.buffer[..emit_end].to_string();
self.buffer = self.buffer[emit_end..].to_string();
StopResult {
safe_text,
stopped: None,
}
}
pub fn flush(&mut self) -> String {
if self.stopped {
return String::new();
}
std::mem::take(&mut self.buffer)
}
pub fn is_stopped(&self) -> bool {
self.stopped
}
}
fn longest_suffix_prefix_overlap(buffer: &str, needles: &[String]) -> usize {
let mut longest = 0;
for n in needles {
let max_k = n.len().min(buffer.len());
for k in (1..=max_k).rev() {
if buffer.as_bytes()[buffer.len() - k..] == n.as_bytes()[..k] {
if k > longest {
longest = k;
}
break;
}
}
}
longest
}
fn first_match(haystack: &str, needles: &[String]) -> Option<(usize, String)> {
let mut best: Option<(usize, &String)> = None;
for n in needles {
if let Some(p) = haystack.find(n.as_str()) {
best = match best {
None => Some((p, n)),
Some((bp, _)) if p < bp => Some((p, n)),
Some(b) => Some(b),
};
}
}
best.map(|(p, n)| (p, n.clone()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn full_match_in_one_chunk() {
let mut d = StopDetector::new(["STOP"]);
let r = d.push("hello STOP trailing");
assert_eq!(r.safe_text, "hello ");
assert_eq!(r.stopped.as_deref(), Some("STOP"));
assert!(d.is_stopped());
}
#[test]
fn match_split_across_chunks() {
let mut d = StopDetector::new(["</done>"]);
let r1 = d.push("alpha </do");
assert!(r1.stopped.is_none());
assert_eq!(r1.safe_text, "alpha ");
let r2 = d.push("ne> tail");
assert_eq!(r2.stopped.as_deref(), Some("</done>"));
assert_eq!(r2.safe_text, "");
}
#[test]
fn no_partial_overlap_emits_everything() {
let mut d = StopDetector::new(["STOP"]);
let r1 = d.push("hello world");
assert!(r1.stopped.is_none());
assert_eq!(r1.safe_text, "hello world");
let tail = d.flush();
assert_eq!(tail, "");
}
#[test]
fn partial_overlap_holds_only_what_could_complete() {
let mut d = StopDetector::new(["STOP"]);
let r1 = d.push("abcSTO");
assert!(r1.stopped.is_none());
assert_eq!(r1.safe_text, "abc");
let r2 = d.push("P more");
assert_eq!(r2.stopped.as_deref(), Some("STOP"));
assert_eq!(r2.safe_text, "");
}
#[test]
fn multiple_sequences_first_one_wins() {
let mut d = StopDetector::new(["</a>", "</b>"]);
let r = d.push("hi </b> mid </a> end");
assert_eq!(r.stopped.as_deref(), Some("</b>"));
assert_eq!(r.safe_text, "hi ");
}
#[test]
fn no_stops_passes_through() {
let mut d = StopDetector::new::<_, &str>([]);
let r = d.push("anything goes here");
assert_eq!(r.safe_text, "anything goes here");
assert!(r.stopped.is_none());
assert!(d.flush().is_empty());
}
#[test]
fn empty_sequences_filtered_out() {
let mut d = StopDetector::new(["", "STOP", ""]);
let r = d.push("text STOP after");
assert_eq!(r.stopped.as_deref(), Some("STOP"));
}
#[test]
fn after_stop_pushes_return_empty() {
let mut d = StopDetector::new(["X"]);
let _ = d.push("aaXbb");
let r = d.push("more text");
assert_eq!(r.safe_text, "");
assert!(r.stopped.is_none());
}
#[test]
fn multibyte_chars_not_split() {
let mut d = StopDetector::new(["</done>"]);
let r = d.push("héllo</do");
assert!(r.stopped.is_none());
assert!(r.safe_text.starts_with("héllo"));
}
}