unobtanium_segmenter/augmentation/
detect_language.rs1use whatlang::Detector;
2
3use crate::augmentation::Augmenter;
4use crate::SegmentedToken;
5
6#[derive(Debug, Clone, Default)]
12pub struct AugmentationDetectLanguage {
13 detector: Detector,
14}
15
16impl AugmentationDetectLanguage {
17 pub fn new() -> Self {
19 Default::default()
20 }
21
22 pub fn new_with_detector(detector: Detector) -> Self {
24 Self {
25 detector
26 }
27 }
28}
29
30impl Augmenter for AugmentationDetectLanguage {
31 fn augment<'a>(&self, mut token: SegmentedToken<'a>) -> SegmentedToken<'a> {
32 if !token.is_known_word {
33 if let Some(info) = self.detector.detect(token.text) {
34 if token.detected_language_confidence < info.confidence() {
36 token.detected_script = Some(info.script());
37 token.detected_language = Some(info.lang());
38 token.detected_language_confidence = info.confidence();
39 token.is_detected_language_relible = info.is_reliable();
40 }
41 }
42 }
43 return token;
44 }
45}