use whatlang::Detector;
use crate::SegmentedToken;
use crate::augmentation::Augmenter;
#[derive(Debug, Clone, Default)]
pub struct AugmentationDetectLanguage {
detector: Detector,
}
impl AugmentationDetectLanguage {
pub fn new() -> Self {
Default::default()
}
pub fn new_with_detector(detector: Detector) -> Self {
Self { detector }
}
}
impl Augmenter for AugmentationDetectLanguage {
#[allow(clippy::collapsible_if)]
fn augment<'a>(&self, mut token: SegmentedToken<'a>) -> SegmentedToken<'a> {
if !token.is_known_word {
if let Some(info) = self.detector.detect(token.text) {
if token.detected_language_confidence < info.confidence() {
token.detected_script = Some(info.script());
token.detected_language = Some(info.lang());
token.detected_language_confidence = info.confidence();
token.is_detected_language_relible = info.is_reliable();
}
}
}
return token;
}
}