use lingua::DetectionResult;
use lingua::Language;
use lingua::LanguageDetector;
use std::vec::IntoIter;
use crate::language_detection::detect_chinese_or_japanese;
pub struct DetectionIterator<'a> {
last_offset: usize,
total_length: usize,
inner: IntoIter<DetectionResult>,
peek_buffer: Option<DetectionResult>,
text: &'a str,
}
impl<'a> DetectionIterator<'a> {
pub fn new(detections: Vec<DetectionResult>, text: &'a str) -> Self {
Self {
last_offset: 0,
total_length: text.len(),
inner: detections.into_iter(),
peek_buffer: None,
text,
}
}
pub fn detect(detector: &LanguageDetector, text: &'a str) -> Self {
let result_list = detector.detect_multiple_languages_of(text);
Self::new(result_list, text)
}
}
impl<'a> Iterator for DetectionIterator<'a> {
type Item = (usize, usize, Option<Language>);
fn next(&mut self) -> Option<Self::Item> {
let last_offset = self.last_offset;
if let Some(detection) = self.peek_buffer.take().or_else(|| self.inner.next()) {
if self.last_offset != detection.start_index() {
self.last_offset = detection.start_index();
self.peek_buffer = Some(detection);
Some((last_offset, self.last_offset, None))
} else {
self.last_offset = detection.end_index();
let mut language = detection.language();
match language {
Language::Chinese | Language::Japanese => {
if let Some((lang, _)) =
detect_chinese_or_japanese(&self.text[last_offset..self.last_offset])
{
language = lang;
}
}
_ => { }
}
Some((last_offset, self.last_offset, Some(language)))
}
} else if last_offset < self.total_length {
self.last_offset = self.total_length;
Some((last_offset, self.total_length, None))
} else {
None
}
}
}