Skip to main content

jpx_core/extensions/
language.rs

1//! Language detection functions.
2
3use std::collections::HashSet;
4
5use serde_json::{Number, Value};
6
7use crate::functions::{Function, number_value};
8use crate::interpreter::SearchResult;
9use crate::registry::register_if_enabled;
10use crate::{Context, Runtime, arg, defn};
11
12// =============================================================================
13// detect_language(text) -> string (full name like "English")
14// =============================================================================
15
16defn!(DetectLanguageFn, vec![arg!(string)], None);
17
18impl Function for DetectLanguageFn {
19    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
20        self.signature.validate(args, ctx)?;
21        let text = args[0].as_str().unwrap();
22
23        match whatlang::detect(text) {
24            Some(info) => {
25                let name = info.lang().to_string();
26                Ok(Value::String(name))
27            }
28            None => Ok(Value::Null),
29        }
30    }
31}
32
33// =============================================================================
34// detect_language_iso(text) -> string (ISO 639-3 code like "eng")
35// =============================================================================
36
37defn!(DetectLanguageIsoFn, vec![arg!(string)], None);
38
39impl Function for DetectLanguageIsoFn {
40    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
41        self.signature.validate(args, ctx)?;
42        let text = args[0].as_str().unwrap();
43
44        match whatlang::detect(text) {
45            Some(info) => {
46                let code = info.lang().code();
47                Ok(Value::String(code.to_string()))
48            }
49            None => Ok(Value::Null),
50        }
51    }
52}
53
54// =============================================================================
55// detect_script(text) -> string (script name like "Latin")
56// =============================================================================
57
58defn!(DetectScriptFn, vec![arg!(string)], None);
59
60impl Function for DetectScriptFn {
61    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
62        self.signature.validate(args, ctx)?;
63        let text = args[0].as_str().unwrap();
64
65        match whatlang::detect(text) {
66            Some(info) => {
67                let script = format!("{:?}", info.script());
68                Ok(Value::String(script))
69            }
70            None => Ok(Value::Null),
71        }
72    }
73}
74
75// =============================================================================
76// detect_language_confidence(text) -> number (0.0-1.0)
77// =============================================================================
78
79defn!(DetectLanguageConfidenceFn, vec![arg!(string)], None);
80
81impl Function for DetectLanguageConfidenceFn {
82    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
83        self.signature.validate(args, ctx)?;
84        let text = args[0].as_str().unwrap();
85
86        match whatlang::detect(text) {
87            Some(info) => Ok(number_value(info.confidence())),
88            None => Ok(Value::Null),
89        }
90    }
91}
92
93// =============================================================================
94// detect_language_info(text) -> object with full detection info
95// =============================================================================
96
97defn!(DetectLanguageInfoFn, vec![arg!(string)], None);
98
99impl Function for DetectLanguageInfoFn {
100    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
101        self.signature.validate(args, ctx)?;
102        let text = args[0].as_str().unwrap();
103
104        match whatlang::detect(text) {
105            Some(info) => {
106                let mut result = serde_json::Map::new();
107
108                result.insert(
109                    "language".to_string(),
110                    Value::String(info.lang().to_string()),
111                );
112                result.insert(
113                    "code".to_string(),
114                    Value::String(info.lang().code().to_string()),
115                );
116                result.insert(
117                    "script".to_string(),
118                    Value::String(format!("{:?}", info.script())),
119                );
120                result.insert(
121                    "confidence".to_string(),
122                    Number::from_f64(info.confidence()).map_or(Value::Null, Value::Number),
123                );
124                result.insert("reliable".to_string(), Value::Bool(info.is_reliable()));
125
126                Ok(Value::Object(result))
127            }
128            None => Ok(Value::Null),
129        }
130    }
131}
132
133/// Register language detection functions filtered by the enabled set.
134pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
135    register_if_enabled(
136        runtime,
137        "detect_language",
138        enabled,
139        Box::new(DetectLanguageFn::new()),
140    );
141    register_if_enabled(
142        runtime,
143        "detect_language_iso",
144        enabled,
145        Box::new(DetectLanguageIsoFn::new()),
146    );
147    register_if_enabled(
148        runtime,
149        "detect_script",
150        enabled,
151        Box::new(DetectScriptFn::new()),
152    );
153    register_if_enabled(
154        runtime,
155        "detect_language_confidence",
156        enabled,
157        Box::new(DetectLanguageConfidenceFn::new()),
158    );
159    register_if_enabled(
160        runtime,
161        "detect_language_info",
162        enabled,
163        Box::new(DetectLanguageInfoFn::new()),
164    );
165}