1use whatlang::{Detector, Info, Lang};
2
3#[derive(Debug, Clone, PartialEq)]
5pub struct DetectedLanguage {
6 pub tag: String,
8 pub confidence: f64,
10 pub reliable: bool,
12}
13
14#[must_use]
18pub fn detect(text: &str) -> Option<DetectedLanguage> {
19 let info = whatlang::detect(text)?;
20 Some(DetectedLanguage {
21 tag: lang_to_tag(info.lang()),
22 confidence: info.confidence(),
23 reliable: info.is_reliable(),
24 })
25}
26
27#[must_use]
29pub fn detect_with_allowlist(text: &str, allowed: &[&str]) -> Option<DetectedLanguage> {
30 let langs: Vec<Lang> = allowed.iter().filter_map(|t| tag_to_lang(t)).collect();
31 if langs.is_empty() {
32 return detect(text);
33 }
34 let detector = Detector::with_allowlist(langs);
35 let info: Info = detector.detect(text)?;
36 Some(DetectedLanguage {
37 tag: lang_to_tag(info.lang()),
38 confidence: info.confidence(),
39 reliable: info.is_reliable(),
40 })
41}
42
43fn lang_to_tag(lang: Lang) -> String {
45 match lang {
46 Lang::Eng => "en",
47 Lang::Fra => "fr",
48 Lang::Deu => "de",
49 Lang::Spa => "es",
50 Lang::Por => "pt",
51 Lang::Ita => "it",
52 Lang::Nld => "nl",
53 Lang::Rus => "ru",
54 Lang::Pol => "pl",
55 Lang::Swe => "sv",
56 Lang::Dan => "da",
57 Lang::Nob => "no",
58 Lang::Fin => "fi",
59 Lang::Ukr => "uk",
60 Lang::Ces => "cs",
61 Lang::Ron => "ro",
62 Lang::Hun => "hu",
63 Lang::Tur => "tr",
64 Lang::Jpn => "ja",
65 Lang::Cmn => "zh",
66 Lang::Kor => "ko",
67 Lang::Ara => "ar",
68 Lang::Hin => "hi",
69 _ => "und", }
71 .to_string()
72}
73
74fn tag_to_lang(tag: &str) -> Option<Lang> {
76 let primary = tag.split('-').next().unwrap_or(tag);
78 match primary {
79 "en" => Some(Lang::Eng),
80 "fr" => Some(Lang::Fra),
81 "de" => Some(Lang::Deu),
82 "es" => Some(Lang::Spa),
83 "pt" => Some(Lang::Por),
84 "it" => Some(Lang::Ita),
85 "nl" => Some(Lang::Nld),
86 "ru" => Some(Lang::Rus),
87 "pl" => Some(Lang::Pol),
88 "sv" => Some(Lang::Swe),
89 "da" => Some(Lang::Dan),
90 "no" => Some(Lang::Nob),
91 "fi" => Some(Lang::Fin),
92 "uk" => Some(Lang::Ukr),
93 "cs" => Some(Lang::Ces),
94 "ro" => Some(Lang::Ron),
95 "hu" => Some(Lang::Hun),
96 "tr" => Some(Lang::Tur),
97 "ja" => Some(Lang::Jpn),
98 "zh" => Some(Lang::Cmn),
99 "ko" => Some(Lang::Kor),
100 "ar" => Some(Lang::Ara),
101 "hi" => Some(Lang::Hin),
102 _ => None,
103 }
104}
105
106#[cfg(test)]
107mod tests {
108 use super::*;
109
110 #[test]
111 fn detect_english() {
112 let result =
113 detect("The quick brown fox jumped over the lazy dog. It was a beautiful day.");
114 assert!(result.is_some());
115 let d = result.unwrap();
116 assert_eq!(d.tag, "en");
117 assert!(d.confidence > 0.5);
118 }
119
120 #[test]
121 fn detect_french() {
122 let result =
123 detect("Bonjour le monde. Comment allez-vous aujourd'hui? C'est une belle journée.");
124 assert!(result.is_some());
125 let d = result.unwrap();
126 assert_eq!(d.tag, "fr");
127 }
128
129 #[test]
130 fn detect_german() {
131 let result = detect(
132 "Die schnelle braune Fuchs springt über den faulen Hund. Es war ein schöner Tag.",
133 );
134 assert!(result.is_some());
135 let d = result.unwrap();
136 assert_eq!(d.tag, "de");
137 }
138
139 #[test]
140 fn detect_spanish() {
141 let result =
142 detect("El rápido zorro marrón salta sobre el perro perezoso. Fue un día hermoso.");
143 assert!(result.is_some());
144 let d = result.unwrap();
145 assert_eq!(d.tag, "es");
146 }
147
148 #[test]
149 fn detect_too_short_returns_none() {
150 let result = detect("Hi");
151 if let Some(d) = result {
153 assert!(!d.reliable || d.confidence < 0.9);
155 }
156 }
157
158 #[test]
159 fn detect_with_allowlist_restricts() {
160 let text = "The quick brown fox jumped over the lazy dog.";
161 let result = detect_with_allowlist(text, &["en", "fr"]);
162 assert!(result.is_some());
163 let d = result.unwrap();
164 assert!(d.tag == "en" || d.tag == "fr");
165 }
166
167 #[test]
168 fn tag_roundtrip() {
169 for tag in ["en", "fr", "de", "es", "ja", "zh", "ko"] {
170 let lang = tag_to_lang(tag);
171 assert!(lang.is_some(), "tag_to_lang failed for {tag}");
172 let back = lang_to_tag(lang.unwrap());
173 assert_eq!(back, tag, "roundtrip failed for {tag}");
174 }
175 }
176
177 #[test]
178 fn tag_from_bcp47_with_region() {
179 let lang = tag_to_lang("en-US");
180 assert_eq!(lang, Some(Lang::Eng));
181 }
182}