jpx_core/extensions/
language.rs1use std::collections::HashSet;
4
5use serde_json::{Number, Value};
6
7use crate::functions::{Function, number_value};
8use crate::interpreter::SearchResult;
9use crate::registry::register_if_enabled;
10use crate::{Context, Runtime, arg, defn};
11
12defn!(DetectLanguageFn, vec![arg!(string)], None);
17
18impl Function for DetectLanguageFn {
19 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
20 self.signature.validate(args, ctx)?;
21 let text = args[0].as_str().unwrap();
22
23 match whatlang::detect(text) {
24 Some(info) => {
25 let name = info.lang().to_string();
26 Ok(Value::String(name))
27 }
28 None => Ok(Value::Null),
29 }
30 }
31}
32
33defn!(DetectLanguageIsoFn, vec![arg!(string)], None);
38
39impl Function for DetectLanguageIsoFn {
40 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
41 self.signature.validate(args, ctx)?;
42 let text = args[0].as_str().unwrap();
43
44 match whatlang::detect(text) {
45 Some(info) => {
46 let code = info.lang().code();
47 Ok(Value::String(code.to_string()))
48 }
49 None => Ok(Value::Null),
50 }
51 }
52}
53
54defn!(DetectScriptFn, vec![arg!(string)], None);
59
60impl Function for DetectScriptFn {
61 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
62 self.signature.validate(args, ctx)?;
63 let text = args[0].as_str().unwrap();
64
65 match whatlang::detect(text) {
66 Some(info) => {
67 let script = format!("{:?}", info.script());
68 Ok(Value::String(script))
69 }
70 None => Ok(Value::Null),
71 }
72 }
73}
74
75defn!(DetectLanguageConfidenceFn, vec![arg!(string)], None);
80
81impl Function for DetectLanguageConfidenceFn {
82 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
83 self.signature.validate(args, ctx)?;
84 let text = args[0].as_str().unwrap();
85
86 match whatlang::detect(text) {
87 Some(info) => Ok(number_value(info.confidence())),
88 None => Ok(Value::Null),
89 }
90 }
91}
92
93defn!(DetectLanguageInfoFn, vec![arg!(string)], None);
98
99impl Function for DetectLanguageInfoFn {
100 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
101 self.signature.validate(args, ctx)?;
102 let text = args[0].as_str().unwrap();
103
104 match whatlang::detect(text) {
105 Some(info) => {
106 let mut result = serde_json::Map::new();
107
108 result.insert(
109 "language".to_string(),
110 Value::String(info.lang().to_string()),
111 );
112 result.insert(
113 "code".to_string(),
114 Value::String(info.lang().code().to_string()),
115 );
116 result.insert(
117 "script".to_string(),
118 Value::String(format!("{:?}", info.script())),
119 );
120 result.insert(
121 "confidence".to_string(),
122 Number::from_f64(info.confidence()).map_or(Value::Null, Value::Number),
123 );
124 result.insert("reliable".to_string(), Value::Bool(info.is_reliable()));
125
126 Ok(Value::Object(result))
127 }
128 None => Ok(Value::Null),
129 }
130 }
131}
132
133pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
135 register_if_enabled(
136 runtime,
137 "detect_language",
138 enabled,
139 Box::new(DetectLanguageFn::new()),
140 );
141 register_if_enabled(
142 runtime,
143 "detect_language_iso",
144 enabled,
145 Box::new(DetectLanguageIsoFn::new()),
146 );
147 register_if_enabled(
148 runtime,
149 "detect_script",
150 enabled,
151 Box::new(DetectScriptFn::new()),
152 );
153 register_if_enabled(
154 runtime,
155 "detect_language_confidence",
156 enabled,
157 Box::new(DetectLanguageConfidenceFn::new()),
158 );
159 register_if_enabled(
160 runtime,
161 "detect_language_info",
162 enabled,
163 Box::new(DetectLanguageInfoFn::new()),
164 );
165}
166
167#[cfg(test)]
168mod tests {
169 use crate::Runtime;
170 use serde_json::json;
171
172 fn setup_runtime() -> Runtime {
173 Runtime::builder()
174 .with_standard()
175 .with_all_extensions()
176 .build()
177 }
178
179 #[test]
180 fn test_detect_language_english() {
181 let runtime = setup_runtime();
182 let expr = runtime
183 .compile("detect_language('This is a test of the language detection system.')")
184 .unwrap();
185 let result = expr.search(&json!(null)).unwrap();
186 assert_eq!(result.as_str().unwrap(), "English");
187 }
188
189 #[test]
190 fn test_detect_language_spanish() {
191 let runtime = setup_runtime();
192 let expr = runtime
193 .compile("detect_language('Esto es una prueba del sistema de deteccion de idiomas.')")
194 .unwrap();
195 let result = expr.search(&json!(null)).unwrap();
196 assert_eq!(result.as_str().unwrap(), "Espa\u{f1}ol");
198 }
199
200 #[test]
201 fn test_detect_language_french() {
202 let runtime = setup_runtime();
203 let expr = runtime
204 .compile("detect_language('Ceci est un test du systeme de detection de langue.')")
205 .unwrap();
206 let result = expr.search(&json!(null)).unwrap();
207 assert_eq!(result.as_str().unwrap(), "Fran\u{e7}ais");
209 }
210
211 #[test]
212 fn test_detect_language_german() {
213 let runtime = setup_runtime();
214 let expr = runtime
215 .compile("detect_language('Dies ist ein Test des Spracherkennungssystems.')")
216 .unwrap();
217 let result = expr.search(&json!(null)).unwrap();
218 assert_eq!(result.as_str().unwrap(), "Deutsch");
220 }
221
222 #[test]
223 fn test_detect_language_iso_english() {
224 let runtime = setup_runtime();
225 let expr = runtime
226 .compile("detect_language_iso('This is English text.')")
227 .unwrap();
228 let result = expr.search(&json!(null)).unwrap();
229 assert_eq!(result.as_str().unwrap(), "eng");
230 }
231
232 #[test]
233 fn test_detect_language_iso_spanish() {
234 let runtime = setup_runtime();
235 let expr = runtime
236 .compile("detect_language_iso('Este es un texto en espanol.')")
237 .unwrap();
238 let result = expr.search(&json!(null)).unwrap();
239 assert_eq!(result.as_str().unwrap(), "spa");
240 }
241
242 #[test]
243 fn test_detect_script_latin() {
244 let runtime = setup_runtime();
245 let expr = runtime.compile("detect_script('Hello world')").unwrap();
246 let result = expr.search(&json!(null)).unwrap();
247 assert_eq!(result.as_str().unwrap(), "Latin");
248 }
249
250 #[test]
251 fn test_detect_script_cyrillic() {
252 let runtime = setup_runtime();
253 let expr = runtime
254 .compile(
255 "detect_script('\u{41f}\u{440}\u{438}\u{432}\u{435}\u{442} \u{43c}\u{438}\u{440}')",
256 )
257 .unwrap();
258 let result = expr.search(&json!(null)).unwrap();
259 assert_eq!(result.as_str().unwrap(), "Cyrillic");
260 }
261
262 #[test]
263 fn test_detect_script_arabic() {
264 let runtime = setup_runtime();
265 let expr = runtime
266 .compile("detect_script('\u{645}\u{631}\u{62d}\u{628}\u{627} \u{628}\u{627}\u{644}\u{639}\u{627}\u{644}\u{645}')")
267 .unwrap();
268 let result = expr.search(&json!(null)).unwrap();
269 assert_eq!(result.as_str().unwrap(), "Arabic");
270 }
271
272 #[test]
273 fn test_detect_language_confidence() {
274 let runtime = setup_runtime();
275 let expr = runtime
276 .compile("detect_language_confidence('This is definitely English text for testing.')")
277 .unwrap();
278 let result = expr.search(&json!(null)).unwrap();
279 let confidence = result.as_f64().unwrap();
280 assert!(confidence > 0.5);
281 }
282
283 #[test]
284 fn test_detect_language_info() {
285 let runtime = setup_runtime();
286 let expr = runtime
287 .compile("detect_language_info('This is a test.')")
288 .unwrap();
289 let result = expr.search(&json!(null)).unwrap();
290 let obj = result.as_object().unwrap();
291
292 assert!(obj.contains_key("language"));
293 assert!(obj.contains_key("code"));
294 assert!(obj.contains_key("script"));
295 assert!(obj.contains_key("confidence"));
296 assert!(obj.contains_key("reliable"));
297
298 assert_eq!(obj.get("language").unwrap().as_str().unwrap(), "English");
299 assert_eq!(obj.get("code").unwrap().as_str().unwrap(), "eng");
300 assert_eq!(obj.get("script").unwrap().as_str().unwrap(), "Latin");
301 }
302
303 #[test]
304 fn test_detect_language_empty_string() {
305 let runtime = setup_runtime();
306 let expr = runtime.compile("detect_language('')").unwrap();
307 let result = expr.search(&json!(null)).unwrap();
308 assert!(result.is_null() || result.as_str().is_some());
311 }
312
313 #[test]
314 fn test_detect_language_short_text() {
315 let runtime = setup_runtime();
316 let expr = runtime.compile("detect_language('Hi')").unwrap();
317 let result = expr.search(&json!(null)).unwrap();
318 assert!(result.is_null() || result.as_str().is_some());
320 }
321}