afrim_translator/lib.rs
1#![deny(missing_docs)]
2//! This crate provides a range of language-related functionalities, including translation,
3//! auto-suggestions, auto-correction and more.
4//! It's designed to enhance the language processing tasks within in input method engine.
5//!
6//! **Note**: We use [`IndexMap`] instead of [`HashMap`](std::collections::HashMap) for better performance
7//! when dealing with big datasets.
8//!
9//! ### Feature flags
10//!
11//! To reduce the amount of compiled code in the crate, you can enable feature manually. This is
12//! done by adding `default-features = false` to your dependency specification. Below is a list of
13//! the features available in this crate.
14//!
15//! * `rhai`: Enables the usage of rhai script files.
16//! * `rhai-wasm`: Like rhai, but wasm compatible.
17//! * `strsim`: Enables the text similarity algorithm for better predictions.
18//! * `serde`: Enables serde feature.
19//!
20//! # Example
21//!
22//! ```
23//! use afrim_translator::{Predicate, Translator};
24//! use indexmap::IndexMap;
25//!
26//! // Prepares the dictionary.
27//! let mut dictionary = IndexMap::new();
28//! dictionary.insert("jump".to_string(), vec!["sauter".to_string()]);
29//! dictionary.insert("jumper".to_string(), vec!["sauteur".to_string()]);
30//! dictionary.insert("nihao".to_string(), vec!["hello".to_string()]);
31//!
32//! // Builds the translator.
33//! let mut translator = Translator::new(dictionary, true);
34//!
35//! assert_eq!(
36//! translator.translate("jump"),
37//! vec![
38//! Predicate {
39//! code: "jump".to_owned(),
40//! remaining_code: "".to_owned(),
41//! texts: vec!["sauter".to_owned()],
42//! can_commit: true
43//! },
44//! // Auto-completion.
45//! Predicate {
46//! code: "jumper".to_owned(),
47//! remaining_code: "er".to_owned(),
48//! texts: vec!["sauteur".to_owned()],
49//! can_commit: false
50//! }
51//! ]
52//! );
53//! ```
54//!
55//! # Example with the strsim feature
56//!
57//! ```
58//! use afrim_translator::{Predicate, Translator};
59//! use indexmap::IndexMap;
60//!
61//! // Prepares the dictionary.
62//! let mut dictionary = IndexMap::new();
63//! dictionary.insert("jump".to_string(), vec!["sauter".to_string()]);
64//! dictionary.insert("jumper".to_string(), vec!["sauteur".to_string()]);
65//!
66//! // Builds the translator.
67//! let mut translator = Translator::new(dictionary, true);
68//!
69//! // Auto-suggestion / Auto-correction.
70//! #[cfg(feature = "strsim")]
71//! assert_eq!(
72//! translator.translate("junp"),
73//! vec![Predicate {
74//! code: "jump".to_owned(),
75//! remaining_code: "".to_owned(),
76//! texts: vec!["sauter".to_owned()],
77//! can_commit: false
78//! }]
79//! );
80//! ```
81//!
82//! # Example with the rhai feature
83//!
84//! ```
85//! #[cfg(feature = "rhai")]
86//! use afrim_translator::Engine;
87//! use afrim_translator::{Translator, Predicate};
88//! use indexmap::IndexMap;
89//!
90//! // Prepares the dictionary.
91//! let mut dictionary = IndexMap::new();
92//! dictionary.insert("jump".to_string(), vec!["sauter".to_string()]);
93//! dictionary.insert("jumper".to_string(), vec!["sauteur".to_string()]);
94//!
95//! // Prepares the script.
96//! #[cfg(feature = "rhai")]
97//! let engine = Engine::new();
98//! #[cfg(feature = "rhai")]
99//! let jump_translator = engine.compile(r#"
100//! // The main script function.
101//! fn translate(input) {
102//! if input == "jump" {
103//! [input, "", "\n", false]
104//! }
105//! }
106//! "#).unwrap();
107//!
108//! // Builds the translator.
109//! let mut translator = Translator::new(dictionary, true);
110//!
111//! // Registers the jump translator.
112//! #[cfg(feature = "rhai")]
113//! translator.register("jump".to_string(), jump_translator);
114//!
115//! assert_eq!(
116//! translator.translate("jump"),
117//! vec![
118//! Predicate {
119//! code: "jump".to_owned(),
120//! remaining_code: "".to_owned(),
121//! texts: vec!["sauter".to_owned()],
122//! can_commit: true
123//! },
124//! #[cfg(feature = "rhai")]
125//! // Programmable translation.
126//! Predicate {
127//! code: "jump".to_owned(),
128//! remaining_code: "".to_owned(),
129//! texts: vec!["\n".to_owned()],
130//! can_commit: false
131//! },
132//! // Auto-completion.
133//! Predicate {
134//! code: "jumper".to_owned(),
135//! remaining_code: "er".to_owned(),
136//! texts: vec!["sauteur".to_owned()],
137//! can_commit: false
138//! }
139//! ]
140//! );
141//! ```
142
143use indexmap::IndexMap;
144#[cfg(feature = "rhai")]
145pub use rhai::Engine;
146#[cfg(feature = "rhai")]
147use rhai::{Array, Scope, AST};
148use std::cmp::Ordering;
149#[cfg(feature = "strsim")]
150use strsim::{self};
151
152/// Struct representing the predicate.
153#[derive(Clone, Debug, Default, Eq, PartialEq)]
154#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
155pub struct Predicate {
156 /// The predicate code.
157 pub code: String,
158 /// The remaining code to match the predicate.
159 pub remaining_code: String,
160 /// The resulting predicate possible outputs.
161 pub texts: Vec<String>,
162 /// Whether the predicate can be commit.
163 pub can_commit: bool,
164}
165
166/// Core structure of the translator.
167pub struct Translator {
168 dictionary: IndexMap<String, Vec<String>>,
169 #[cfg(feature = "rhai")]
170 translators: IndexMap<String, AST>,
171 auto_commit: bool,
172}
173
174impl Translator {
175 /// Initiatializes a new translator.
176 ///
177 /// # Example
178 ///
179 /// ```
180 /// use afrim_translator::Translator;
181 /// use indexmap::IndexMap;
182 ///
183 /// let dictionary = IndexMap::new();
184 /// let translator = Translator::new(dictionary, false);
185 /// ```
186 pub fn new(dictionary: IndexMap<String, Vec<String>>, auto_commit: bool) -> Self {
187 Self {
188 dictionary,
189 auto_commit,
190 #[cfg(feature = "rhai")]
191 translators: IndexMap::default(),
192 }
193 }
194
195 #[cfg(feature = "rhai")]
196 /// Registers a translator.
197 ///
198 /// The provided name will be used for debugging in case of script error.
199 /// Note that the scripts are compiled using [`Engine`](crate::Engine::compile).
200 ///
201 /// # Example
202 ///
203 /// ```
204 /// use afrim_translator::{Engine, Predicate, Translator};
205 /// use indexmap::IndexMap;
206 ///
207 /// // We prepare the script.
208 /// let date_translator = r#"
209 /// // Date converter.
210 ///
211 /// const MONTHS = [
212 /// "Jan", "Feb", "Mar", "Apr", "May", "Jun",
213 /// "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
214 /// ];
215 ///
216 /// fn parse_date(input) {
217 /// let data = input.split('/');
218 ///
219 /// if data.len() != 3 {
220 /// return [];
221 /// }
222 ///
223 /// let day = parse_int(data[0]);
224 /// let month = parse_int(data[1]);
225 /// let year = parse_int(data[2]);
226 ///
227 /// if day in 1..31 && month in 1..13 && year in 1..2100 {
228 /// return [day, month, year];
229 /// }
230 /// }
231 ///
232 /// // Script main function.
233 /// fn translate(input) {
234 /// let date = parse_date(input);
235 ///
236 /// if date.is_empty() { return }
237 ///
238 /// let month = global::MONTHS[date[1]-1];
239 ///
240 /// [input, "", [`${date[0]}, ${month} ${date[2]}`], true]
241 /// }
242 /// "#;
243 /// let mut engine = Engine::new();
244 /// let date_translator = engine.compile(date_translator).unwrap();
245 ///
246 /// // We build the translator.
247 /// let mut translator = Translator::new(IndexMap::new(), true);
248 ///
249 /// // We register our date translator.
250 /// translator.register("date_translator".to_owned(), date_translator);
251 ///
252 /// assert_eq!(
253 /// translator.translate("09/02/2024"),
254 /// vec![
255 /// Predicate {
256 /// code: "09/02/2024".to_owned(),
257 /// remaining_code: "".to_owned(),
258 /// texts: vec!["9, Feb 2024".to_owned()],
259 /// can_commit: true
260 /// }
261 /// ]
262 /// );
263 /// ```
264 pub fn register(&mut self, name: String, ast: AST) {
265 self.translators.insert(name, ast);
266 }
267
268 #[cfg(feature = "rhai")]
269 /// Unregisters a translator.
270 ///
271 /// # Example
272 /// ```
273 /// use afrim_translator::{Engine, Predicate, Translator};
274 /// use indexmap::IndexMap;
275 ///
276 /// // We prepare the script.
277 /// let engine = Engine::new();
278 /// let erase_translator = engine.compile("fn translate(input) { [input, \"\", [], true] }").unwrap();
279 ///
280 /// // We build the translator.
281 /// let mut translator = Translator::new(IndexMap::new(), false);
282 ///
283 /// // We register the erase translator.
284 /// translator.register("erase".to_owned(), erase_translator);
285 /// assert_eq!(
286 /// translator.translate("hello"),
287 /// vec![
288 /// Predicate {
289 /// code: "hello".to_owned(),
290 /// remaining_code: "".to_owned(),
291 /// texts: vec![],
292 /// can_commit: true
293 /// }
294 /// ]
295 /// );
296 ///
297 /// // We unregister the erase translator.
298 /// translator.unregister("erase");
299 /// assert_eq!(translator.translate("hello"), vec![]);
300 /// ```
301 pub fn unregister(&mut self, name: &str) {
302 self.translators.shift_remove(name);
303 }
304
305 /// Generates a list of predicates based on the input.
306 ///
307 /// # Example
308 ///
309 /// ```
310 /// use indexmap::IndexMap;
311 /// use afrim_translator::{Predicate, Translator};
312 ///
313 /// // We prepares the dictionary.
314 /// let mut dictionary = IndexMap::new();
315 /// dictionary.insert("salut!".to_owned(), vec!["hello!".to_owned(), "hi!".to_owned()]);
316 /// dictionary.insert("salade".to_owned(), vec!["vegetable".to_owned()]);
317 ///
318 /// // We build the translator.
319 /// let translator = Translator::new(dictionary, false);
320 /// assert_eq!(
321 /// translator.translate("sal"),
322 /// vec![
323 /// Predicate {
324 /// code: "salut!".to_owned(),
325 /// remaining_code: "ut!".to_owned(),
326 /// texts: vec!["hello!".to_owned(), "hi!".to_owned()],
327 /// can_commit: false
328 /// },
329 /// Predicate {
330 /// code: "salade".to_owned(),
331 /// remaining_code: "ade".to_owned(),
332 /// texts: vec!["vegetable".to_owned()],
333 /// can_commit: false
334 /// }
335 /// ]
336 /// )
337 /// ```
338 pub fn translate(&self, input: &str) -> Vec<Predicate> {
339 #[cfg(feature = "rhai")]
340 let mut scope = Scope::new();
341 #[cfg(feature = "rhai")]
342 let engine = Engine::new();
343 let predicates = self.dictionary.iter().filter_map(|(key, values)| {
344 if input.chars().count() < 2 || input.len() > key.len() || key[0..1] != input[0..1] {
345 return None;
346 };
347
348 let predicate = (key == input).then_some((
349 1.0,
350 Predicate {
351 code: key.to_owned(),
352 remaining_code: "".to_owned(),
353 texts: values.to_owned(),
354 can_commit: self.auto_commit,
355 },
356 ));
357 #[cfg(feature = "strsim")]
358 let predicate = predicate.or_else(|| {
359 if key.len() == input.len() {
360 let confidence = strsim::hamming(key.as_ref(), input)
361 .map(|n| 1.0 - (n as f64 / key.len() as f64))
362 .unwrap_or(0.0);
363
364 (confidence > 0.7).then(|| {
365 (
366 confidence,
367 Predicate {
368 code: key.to_owned(),
369 remaining_code: "".to_owned(),
370 texts: values.to_owned(),
371 can_commit: false,
372 },
373 )
374 })
375 } else {
376 None
377 }
378 });
379 predicate.or_else(|| {
380 key.starts_with(input).then_some((
381 0.5,
382 Predicate {
383 code: key.to_owned(),
384 remaining_code: key.chars().skip(input.len()).collect(),
385 texts: values.to_owned(),
386 can_commit: false,
387 },
388 ))
389 })
390 });
391 #[cfg(feature = "rhai")]
392 let predicates =
393 predicates.chain(self.translators.iter().filter_map(|(_name, translator)| {
394 let mut data = engine
395 .call_fn::<Array>(&mut scope, translator, "translate", (input.to_owned(),))
396 .unwrap_or_default();
397
398 (data.len() == 4).then(|| {
399 let code = data.remove(0).into_string().unwrap();
400 let remaining_code = data.remove(0).into_string().unwrap();
401 let value = data.remove(0);
402 let values = if value.is_array() {
403 value.into_array().unwrap()
404 } else {
405 vec![value]
406 };
407 let values = values
408 .into_iter()
409 .map(|e| e.into_string().unwrap())
410 .collect();
411 let translated = data.remove(0).as_bool().unwrap();
412
413 (
414 1.0,
415 Predicate {
416 code,
417 remaining_code,
418 texts: values,
419 can_commit: translated,
420 },
421 )
422 })
423 }));
424 let mut predicates = predicates.collect::<Vec<(f64, Predicate)>>();
425
426 // from the best to the worst
427 predicates.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal));
428
429 predicates
430 .into_iter()
431 .map(|(_, predicate)| predicate)
432 .collect()
433 }
434}
435
436#[cfg(test)]
437mod tests {
438 #[test]
439 fn test_translate() {
440 #[cfg(feature = "rhai")]
441 use crate::Engine;
442 use crate::{Predicate, Translator};
443 use indexmap::IndexMap;
444
445 // We build the translation
446 let mut dictionary = IndexMap::new();
447 dictionary.insert("halo".to_string(), ["hello".to_string()].to_vec());
448
449 // We config the translator
450 #[cfg(not(feature = "rhai"))]
451 let translator = Translator::new(dictionary, true);
452 #[cfg(feature = "rhai")]
453 let mut translator = Translator::new(dictionary, true);
454
455 // Test the filtering
456 translator.translate("รน");
457 //
458 #[cfg(feature = "rhai")]
459 {
460 let engine = Engine::new();
461 let ast1 = engine.compile("fn translate(input) {}").unwrap();
462 let ast2 = engine
463 .compile(
464 r#"
465 fn translate(input) {
466 if input == "hi" {
467 ["hi", "", "hello", true]
468 }
469 }
470 "#,
471 )
472 .unwrap();
473 translator.register("none".to_string(), ast1);
474 translator.unregister("none");
475 translator.register("some".to_string(), ast2);
476 }
477
478 assert_eq!(translator.translate("h"), vec![]);
479 #[cfg(feature = "rhai")]
480 assert_eq!(
481 translator.translate("hi"),
482 vec![Predicate {
483 code: "hi".to_owned(),
484 remaining_code: "".to_owned(),
485 texts: vec!["hello".to_owned()],
486 can_commit: true
487 }]
488 );
489 assert_eq!(
490 translator.translate("ha"),
491 vec![Predicate {
492 code: "halo".to_owned(),
493 remaining_code: "lo".to_owned(),
494 texts: vec!["hello".to_owned()],
495 can_commit: false
496 }]
497 );
498 #[cfg(feature = "strsim")]
499 assert_eq!(
500 translator.translate("helo"),
501 vec![Predicate {
502 code: "halo".to_owned(),
503 remaining_code: "".to_owned(),
504 texts: vec!["hello".to_owned()],
505 can_commit: false
506 }]
507 );
508 }
509}