unobtanium_segmenter/augmentation/
detect_language.rs1use whatlang::Detector;
6
7use crate::SegmentedToken;
8use crate::augmentation::Augmenter;
9
10#[derive(Debug, Clone, Default)]
16pub struct AugmentationDetectLanguage {
17 detector: Detector,
18}
19
20impl AugmentationDetectLanguage {
21 pub fn new() -> Self {
23 Default::default()
24 }
25
26 pub fn new_with_detector(detector: Detector) -> Self {
28 Self { detector }
29 }
30}
31
32impl Augmenter for AugmentationDetectLanguage {
33 #[allow(clippy::collapsible_if)]
34 fn augment<'a>(&self, mut token: SegmentedToken<'a>) -> SegmentedToken<'a> {
35 if !token.is_known_word {
36 if let Some(info) = self.detector.detect(token.text) {
37 if token.detected_language_confidence < info.confidence() {
39 token.detected_script = Some(info.script());
40 token.detected_language = Some(info.lang());
41 token.detected_language_confidence = info.confidence();
42 token.is_detected_language_relible = info.is_reliable();
43 }
44 }
45 }
46 return token;
47 }
48}