afrim_translator/
lib.rs

1#![deny(missing_docs)]
2//! This crate provides a range of language-related functionalities, including translation,
3//! auto-suggestions, auto-correction and more.
4//! It's designed to enhance the language processing tasks within in input method engine.
5//!
6//! **Note**: We use [`IndexMap`] instead of [`HashMap`](std::collections::HashMap) for better performance
7//! when dealing with big datasets.
8//!
9//! ### Feature flags
10//!
11//! To reduce the amount of compiled code in the crate, you can enable feature manually. This is
12//! done by adding `default-features = false` to your dependency specification. Below is a list of
13//! the features available in this crate.
14//!
15//! * `rhai`: Enables the usage of rhai script files.
16//! * `rhai-wasm`: Like rhai, but wasm compatible.
17//! * `strsim`: Enables the text similarity algorithm for better predictions.
18//! * `serde`: Enables serde feature.
19//!
20//! # Example
21//!
22//! ```
23//! use afrim_translator::{Predicate, Translator};
24//! use indexmap::IndexMap;
25//!
26//! // Prepares the dictionary.
27//! let mut dictionary = IndexMap::new();
28//! dictionary.insert("jump".to_string(), vec!["sauter".to_string()]);
29//! dictionary.insert("jumper".to_string(), vec!["sauteur".to_string()]);
30//! dictionary.insert("nihao".to_string(), vec!["hello".to_string()]);
31//!
32//! // Builds the translator.
33//! let mut translator = Translator::new(dictionary, true);
34//!
35//! assert_eq!(
36//!     translator.translate("jump"),
37//!     vec![
38//!         Predicate {
39//!             code: "jump".to_owned(),
40//!             remaining_code: "".to_owned(),
41//!             texts: vec!["sauter".to_owned()],
42//!             can_commit: true
43//!         },
44//!         // Auto-completion.
45//!         Predicate {
46//!             code: "jumper".to_owned(),
47//!             remaining_code: "er".to_owned(),
48//!             texts: vec!["sauteur".to_owned()],
49//!             can_commit: false
50//!         }
51//!     ]
52//! );
53//! ```
54//!
55//! # Example with the strsim feature
56//!
57//! ```
58//! use afrim_translator::{Predicate, Translator};
59//! use indexmap::IndexMap;
60//!
61//! // Prepares the dictionary.
62//! let mut dictionary = IndexMap::new();
63//! dictionary.insert("jump".to_string(), vec!["sauter".to_string()]);
64//! dictionary.insert("jumper".to_string(), vec!["sauteur".to_string()]);
65//!
66//! // Builds the translator.
67//! let mut translator = Translator::new(dictionary, true);
68//!
69//! // Auto-suggestion / Auto-correction.
70//! #[cfg(feature = "strsim")]
71//! assert_eq!(
72//!     translator.translate("junp"),
73//!     vec![Predicate {
74//!         code: "jump".to_owned(),
75//!         remaining_code: "".to_owned(),
76//!         texts: vec!["sauter".to_owned()],
77//!         can_commit: false
78//!     }]
79//! );
80//! ```
81//!
82//! # Example with the rhai feature
83//!
84//! ```
85//! #[cfg(feature = "rhai")]
86//! use afrim_translator::Engine;
87//! use afrim_translator::{Translator, Predicate};
88//! use indexmap::IndexMap;
89//!
90//! // Prepares the dictionary.
91//! let mut dictionary = IndexMap::new();
92//! dictionary.insert("jump".to_string(), vec!["sauter".to_string()]);
93//! dictionary.insert("jumper".to_string(), vec!["sauteur".to_string()]);
94//!
95//! // Prepares the script.
96//! #[cfg(feature = "rhai")]
97//! let engine = Engine::new();
98//! #[cfg(feature = "rhai")]
99//! let jump_translator = engine.compile(r#"
100//!     // The main script function.
101//!     fn translate(input) {
102//!         if input == "jump" {
103//!             [input, "", "\n", false]
104//!         }
105//!     }
106//! "#).unwrap();
107//!
108//! // Builds the translator.
109//! let mut translator = Translator::new(dictionary, true);
110//!
111//! // Registers the jump translator.
112//! #[cfg(feature = "rhai")]
113//! translator.register("jump".to_string(), jump_translator);
114//!
115//! assert_eq!(
116//!     translator.translate("jump"),
117//!     vec![
118//!         Predicate {
119//!             code: "jump".to_owned(),
120//!             remaining_code: "".to_owned(),
121//!             texts: vec!["sauter".to_owned()],
122//!             can_commit: true
123//!         },
124//!         #[cfg(feature = "rhai")]
125//!         // Programmable translation.
126//!         Predicate {
127//!             code: "jump".to_owned(),
128//!             remaining_code: "".to_owned(),
129//!             texts: vec!["\n".to_owned()],
130//!             can_commit: false
131//!         },
132//!         // Auto-completion.
133//!         Predicate {
134//!             code: "jumper".to_owned(),
135//!             remaining_code: "er".to_owned(),
136//!             texts: vec!["sauteur".to_owned()],
137//!             can_commit: false
138//!         }
139//!     ]
140//! );
141//! ```
142
143use indexmap::IndexMap;
144#[cfg(feature = "rhai")]
145pub use rhai::Engine;
146#[cfg(feature = "rhai")]
147use rhai::{Array, Scope, AST};
148use std::cmp::Ordering;
149#[cfg(feature = "strsim")]
150use strsim::{self};
151
152/// Struct representing the predicate.
153#[derive(Clone, Debug, Default, Eq, PartialEq)]
154#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
155pub struct Predicate {
156    /// The predicate code.
157    pub code: String,
158    /// The remaining code to match the predicate.
159    pub remaining_code: String,
160    /// The resulting predicate possible outputs.
161    pub texts: Vec<String>,
162    /// Whether the predicate can be commit.
163    pub can_commit: bool,
164}
165
166/// Core structure of the translator.
167pub struct Translator {
168    dictionary: IndexMap<String, Vec<String>>,
169    #[cfg(feature = "rhai")]
170    translators: IndexMap<String, AST>,
171    auto_commit: bool,
172}
173
174impl Translator {
175    /// Initiatializes a new translator.
176    ///
177    /// # Example
178    ///
179    /// ```
180    /// use afrim_translator::Translator;
181    /// use indexmap::IndexMap;
182    ///
183    /// let dictionary = IndexMap::new();
184    /// let translator = Translator::new(dictionary, false);
185    /// ```
186    pub fn new(dictionary: IndexMap<String, Vec<String>>, auto_commit: bool) -> Self {
187        Self {
188            dictionary,
189            auto_commit,
190            #[cfg(feature = "rhai")]
191            translators: IndexMap::default(),
192        }
193    }
194
195    #[cfg(feature = "rhai")]
196    /// Registers a translator.
197    ///
198    /// The provided name will be used for debugging in case of script error.
199    /// Note that the scripts are compiled using [`Engine`](crate::Engine::compile).
200    ///
201    /// # Example
202    ///
203    /// ```
204    /// use afrim_translator::{Engine, Predicate, Translator};
205    /// use indexmap::IndexMap;
206    ///
207    /// // We prepare the script.
208    /// let date_translator = r#"
209    ///    // Date converter.
210    ///    
211    ///    const MONTHS = [
212    ///        "Jan", "Feb", "Mar", "Apr", "May", "Jun",
213    ///        "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
214    ///    ];
215    ///    
216    ///    fn parse_date(input) {
217    ///        let data = input.split('/');
218    ///    
219    ///        if data.len() != 3 {
220    ///            return [];
221    ///        }
222    ///    
223    ///        let day = parse_int(data[0]);
224    ///        let month = parse_int(data[1]);
225    ///        let year = parse_int(data[2]);
226    ///    
227    ///        if day in 1..31 && month in 1..13 && year in 1..2100 {
228    ///            return [day, month, year];
229    ///        }
230    ///    }
231    ///    
232    ///    // Script main function.
233    ///    fn translate(input) {
234    ///        let date = parse_date(input);
235    ///    
236    ///        if date.is_empty() { return }
237    ///    
238    ///        let month = global::MONTHS[date[1]-1];
239    ///    
240    ///        [input, "", [`${date[0]}, ${month} ${date[2]}`], true]
241    ///    }
242    /// "#;
243    /// let mut engine = Engine::new();
244    /// let date_translator = engine.compile(date_translator).unwrap();
245    ///
246    /// // We build the translator.
247    /// let mut translator = Translator::new(IndexMap::new(), true);
248    ///
249    /// // We register our date translator.
250    /// translator.register("date_translator".to_owned(), date_translator);
251    ///
252    /// assert_eq!(
253    ///     translator.translate("09/02/2024"),
254    ///     vec![
255    ///         Predicate {
256    ///             code: "09/02/2024".to_owned(),
257    ///             remaining_code: "".to_owned(),
258    ///             texts: vec!["9, Feb 2024".to_owned()],
259    ///             can_commit: true
260    ///         }
261    ///     ]
262    /// );
263    /// ```
264    pub fn register(&mut self, name: String, ast: AST) {
265        self.translators.insert(name, ast);
266    }
267
268    #[cfg(feature = "rhai")]
269    /// Unregisters a translator.
270    ///
271    /// # Example
272    /// ```
273    /// use afrim_translator::{Engine, Predicate, Translator};
274    /// use indexmap::IndexMap;
275    ///
276    /// // We prepare the script.
277    /// let engine = Engine::new();
278    /// let erase_translator = engine.compile("fn translate(input) { [input, \"\", [], true] }").unwrap();
279    ///
280    /// // We build the translator.
281    /// let mut translator = Translator::new(IndexMap::new(), false);
282    ///
283    /// // We register the erase translator.
284    /// translator.register("erase".to_owned(), erase_translator);
285    /// assert_eq!(
286    ///     translator.translate("hello"),
287    ///     vec![
288    ///         Predicate {
289    ///             code: "hello".to_owned(),
290    ///             remaining_code: "".to_owned(),
291    ///             texts: vec![],
292    ///             can_commit: true
293    ///         }
294    ///     ]
295    /// );
296    ///
297    /// // We unregister the erase translator.
298    /// translator.unregister("erase");
299    /// assert_eq!(translator.translate("hello"), vec![]);
300    /// ```
301    pub fn unregister(&mut self, name: &str) {
302        self.translators.shift_remove(name);
303    }
304
305    /// Generates a list of predicates based on the input.
306    ///
307    /// # Example
308    ///
309    /// ```
310    /// use indexmap::IndexMap;
311    /// use afrim_translator::{Predicate, Translator};
312    ///
313    /// // We prepares the dictionary.
314    /// let mut dictionary = IndexMap::new();
315    /// dictionary.insert("salut!".to_owned(), vec!["hello!".to_owned(), "hi!".to_owned()]);
316    /// dictionary.insert("salade".to_owned(), vec!["vegetable".to_owned()]);
317    ///
318    /// // We build the translator.
319    /// let translator = Translator::new(dictionary, false);
320    /// assert_eq!(
321    ///     translator.translate("sal"),
322    ///     vec![
323    ///         Predicate {
324    ///             code: "salut!".to_owned(),
325    ///             remaining_code: "ut!".to_owned(),
326    ///             texts: vec!["hello!".to_owned(), "hi!".to_owned()],
327    ///             can_commit: false
328    ///         },
329    ///         Predicate {
330    ///             code: "salade".to_owned(),
331    ///             remaining_code: "ade".to_owned(),
332    ///             texts: vec!["vegetable".to_owned()],
333    ///             can_commit: false
334    ///         }
335    ///     ]
336    /// )
337    /// ```
338    pub fn translate(&self, input: &str) -> Vec<Predicate> {
339        #[cfg(feature = "rhai")]
340        let mut scope = Scope::new();
341        #[cfg(feature = "rhai")]
342        let engine = Engine::new();
343        let predicates = self.dictionary.iter().filter_map(|(key, values)| {
344            if input.chars().count() < 2 || input.len() > key.len() || key[0..1] != input[0..1] {
345                return None;
346            };
347
348            let predicate = (key == input).then_some((
349                1.0,
350                Predicate {
351                    code: key.to_owned(),
352                    remaining_code: "".to_owned(),
353                    texts: values.to_owned(),
354                    can_commit: self.auto_commit,
355                },
356            ));
357            #[cfg(feature = "strsim")]
358            let predicate = predicate.or_else(|| {
359                if key.len() == input.len() {
360                    let confidence = strsim::hamming(key.as_ref(), input)
361                        .map(|n| 1.0 - (n as f64 / key.len() as f64))
362                        .unwrap_or(0.0);
363
364                    (confidence > 0.7).then(|| {
365                        (
366                            confidence,
367                            Predicate {
368                                code: key.to_owned(),
369                                remaining_code: "".to_owned(),
370                                texts: values.to_owned(),
371                                can_commit: false,
372                            },
373                        )
374                    })
375                } else {
376                    None
377                }
378            });
379            predicate.or_else(|| {
380                key.starts_with(input).then_some((
381                    0.5,
382                    Predicate {
383                        code: key.to_owned(),
384                        remaining_code: key.chars().skip(input.len()).collect(),
385                        texts: values.to_owned(),
386                        can_commit: false,
387                    },
388                ))
389            })
390        });
391        #[cfg(feature = "rhai")]
392        let predicates =
393            predicates.chain(self.translators.iter().filter_map(|(_name, translator)| {
394                let mut data = engine
395                    .call_fn::<Array>(&mut scope, translator, "translate", (input.to_owned(),))
396                    .unwrap_or_default();
397
398                (data.len() == 4).then(|| {
399                    let code = data.remove(0).into_string().unwrap();
400                    let remaining_code = data.remove(0).into_string().unwrap();
401                    let value = data.remove(0);
402                    let values = if value.is_array() {
403                        value.into_array().unwrap()
404                    } else {
405                        vec![value]
406                    };
407                    let values = values
408                        .into_iter()
409                        .map(|e| e.into_string().unwrap())
410                        .collect();
411                    let translated = data.remove(0).as_bool().unwrap();
412
413                    (
414                        1.0,
415                        Predicate {
416                            code,
417                            remaining_code,
418                            texts: values,
419                            can_commit: translated,
420                        },
421                    )
422                })
423            }));
424        let mut predicates = predicates.collect::<Vec<(f64, Predicate)>>();
425
426        // from the best to the worst
427        predicates.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal));
428
429        predicates
430            .into_iter()
431            .map(|(_, predicate)| predicate)
432            .collect()
433    }
434}
435
436#[cfg(test)]
437mod tests {
438    #[test]
439    fn test_translate() {
440        #[cfg(feature = "rhai")]
441        use crate::Engine;
442        use crate::{Predicate, Translator};
443        use indexmap::IndexMap;
444
445        // We build the translation
446        let mut dictionary = IndexMap::new();
447        dictionary.insert("halo".to_string(), ["hello".to_string()].to_vec());
448
449        // We config the translator
450        #[cfg(not(feature = "rhai"))]
451        let translator = Translator::new(dictionary, true);
452        #[cfg(feature = "rhai")]
453        let mut translator = Translator::new(dictionary, true);
454
455        // Test the filtering
456        translator.translate("รน");
457        //
458        #[cfg(feature = "rhai")]
459        {
460            let engine = Engine::new();
461            let ast1 = engine.compile("fn translate(input) {}").unwrap();
462            let ast2 = engine
463                .compile(
464                    r#"
465                fn translate(input) {
466                    if input == "hi" {
467                        ["hi", "", "hello", true]
468                    }
469                }
470            "#,
471                )
472                .unwrap();
473            translator.register("none".to_string(), ast1);
474            translator.unregister("none");
475            translator.register("some".to_string(), ast2);
476        }
477
478        assert_eq!(translator.translate("h"), vec![]);
479        #[cfg(feature = "rhai")]
480        assert_eq!(
481            translator.translate("hi"),
482            vec![Predicate {
483                code: "hi".to_owned(),
484                remaining_code: "".to_owned(),
485                texts: vec!["hello".to_owned()],
486                can_commit: true
487            }]
488        );
489        assert_eq!(
490            translator.translate("ha"),
491            vec![Predicate {
492                code: "halo".to_owned(),
493                remaining_code: "lo".to_owned(),
494                texts: vec!["hello".to_owned()],
495                can_commit: false
496            }]
497        );
498        #[cfg(feature = "strsim")]
499        assert_eq!(
500            translator.translate("helo"),
501            vec![Predicate {
502                code: "halo".to_owned(),
503                remaining_code: "".to_owned(),
504                texts: vec!["hello".to_owned()],
505                can_commit: false
506            }]
507        );
508    }
509}