unobtanium_segmenter/augmentation/
detect_language.rs1use whatlang::detect;
2
3use crate::augmentation::Augmenter;
4use crate::SegmentedToken;
5
6#[derive(Debug, Clone, Default)]
12pub struct AugmentationDetectLanguage {}
13
14impl AugmentationDetectLanguage {
15 pub fn new() -> Self {
17 Default::default()
18 }
19}
20
21impl Augmenter for AugmentationDetectLanguage {
22 fn augment<'a>(&self, mut token: SegmentedToken<'a>) -> SegmentedToken<'a> {
23 if !token.is_known_word {
24 if let Some(info) = detect(token.text) {
25 if token.detected_language_confidence < info.confidence() {
27 token.detected_script = Some(info.script());
28 token.detected_language = Some(info.lang());
29 token.detected_language_confidence = info.confidence();
30 token.is_detected_language_relible = info.is_reliable();
31 }
32 }
33 }
34 return token;
35 }
36}