wordcut_engine/
replacer.rs

1use std::{fs::File, io::BufReader, path::Path};
2
3use regex::Regex;
4use thiserror::Error;
5
6#[allow(dead_code)]
7#[derive(Error, Debug)]
8pub enum ReplacerError {
9    #[error("Cannot create immediate rule `{0}` because `{1}`")]
10    CannotCreateImmRule(String, String),
11    #[error("Cannot load rules `{0}`")]
12    CannotLoadRules(String),
13    #[error("Cannot deserialize rules `{0}`")]
14    CannotDeserializeRules(String),
15}
16
17#[derive(Deserialize, Debug, Clone)]
18pub struct Rule {
19    pub pattern: String,
20    pub replacement: String,
21}
22
23#[derive(Debug, Clone)]
24pub struct ImmRule {
25    pub pattern: Regex,
26    pub replacement: String,
27}
28
29impl ImmRule {
30    #[allow(dead_code)]
31    pub fn from_rule(rule: &Rule) -> Result<ImmRule, ReplacerError> {
32        let pattern = Regex::new(&rule.pattern).map_err(|e| {
33            ReplacerError::CannotCreateImmRule(rule.pattern.clone(), format!("{}", e))
34        })?;
35        Ok(ImmRule {
36            pattern,
37            replacement: rule.replacement.clone(),
38        })
39    }
40
41    #[allow(dead_code)]
42    pub fn from_rules(rules: &[Rule]) -> Result<Vec<ImmRule>, ReplacerError> {
43        let mut imm_rules = Vec::new();
44        for rule in rules {
45            let imm_rule = Self::from_rule(rule)?;
46            imm_rules.push(imm_rule)
47        }
48        Ok(imm_rules)
49    }
50}
51
52#[allow(dead_code)]
53pub fn replace(rules: &[ImmRule], text: &str) -> String {
54    if rules.len() == 0 {
55        return text.to_string();
56    }
57    let mut mod_text = text.to_string();
58    for rule in rules {
59        mod_text = rule
60            .pattern
61            .replace_all(&text, &rule.replacement)
62            .to_string();
63    }
64    return mod_text;
65}
66
67#[allow(dead_code)]
68pub fn load_imm_rules<P: AsRef<Path>>(pathname: P) -> Result<Vec<ImmRule>, ReplacerError> {
69    let f = File::open(pathname).map_err(|e| ReplacerError::CannotLoadRules(format!("{}", e)))?;
70    let br = BufReader::new(f);
71    let rules: Vec<Rule> = serde_json::from_reader(br)
72        .map_err(|e| ReplacerError::CannotDeserializeRules(format!("{}", e)))?;
73    ImmRule::from_rules(&rules)
74}
75
76#[cfg(test)]
77mod tests {
78    extern crate serde_json;
79    use super::*;
80    use std::path::Path;
81
82    #[test]
83    fn sara_am() {
84        let rule = r###"{"pattern": "ํา", "replacement": "ำ"}"###;
85        let rule: Rule = serde_json::from_str(rule).unwrap();
86        let imm_rules = ImmRule::from_rules(&vec![rule]).unwrap();
87        let mod_text = replace(&imm_rules, "สําหรับข้อเสนอ");
88        assert_eq!(mod_text, "สำหรับข้อเสนอ");
89    }
90
91    #[test]
92    fn load_imm_rules_test() {
93        let path = Path::new(concat!(
94            env!("CARGO_MANIFEST_DIR"),
95            "/data/thai-replace-rules.json"
96        ));
97        let rules = load_imm_rules(&path).unwrap();
98        assert_eq!(rules.len(), 1);
99    }
100}