wordcut_engine/
replacer.rs1use std::{fs::File, io::BufReader, path::Path};
2
3use regex::Regex;
4use thiserror::Error;
5
6#[allow(dead_code)]
7#[derive(Error, Debug)]
8pub enum ReplacerError {
9 #[error("Cannot create immediate rule `{0}` because `{1}`")]
10 CannotCreateImmRule(String, String),
11 #[error("Cannot load rules `{0}`")]
12 CannotLoadRules(String),
13 #[error("Cannot deserialize rules `{0}`")]
14 CannotDeserializeRules(String),
15}
16
17#[derive(Deserialize, Debug, Clone)]
18pub struct Rule {
19 pub pattern: String,
20 pub replacement: String,
21}
22
23#[derive(Debug, Clone)]
24pub struct ImmRule {
25 pub pattern: Regex,
26 pub replacement: String,
27}
28
29impl ImmRule {
30 #[allow(dead_code)]
31 pub fn from_rule(rule: &Rule) -> Result<ImmRule, ReplacerError> {
32 let pattern = Regex::new(&rule.pattern).map_err(|e| {
33 ReplacerError::CannotCreateImmRule(rule.pattern.clone(), format!("{}", e))
34 })?;
35 Ok(ImmRule {
36 pattern,
37 replacement: rule.replacement.clone(),
38 })
39 }
40
41 #[allow(dead_code)]
42 pub fn from_rules(rules: &[Rule]) -> Result<Vec<ImmRule>, ReplacerError> {
43 let mut imm_rules = Vec::new();
44 for rule in rules {
45 let imm_rule = Self::from_rule(rule)?;
46 imm_rules.push(imm_rule)
47 }
48 Ok(imm_rules)
49 }
50}
51
52#[allow(dead_code)]
53pub fn replace(rules: &[ImmRule], text: &str) -> String {
54 if rules.len() == 0 {
55 return text.to_string();
56 }
57 let mut mod_text = text.to_string();
58 for rule in rules {
59 mod_text = rule
60 .pattern
61 .replace_all(&text, &rule.replacement)
62 .to_string();
63 }
64 return mod_text;
65}
66
67#[allow(dead_code)]
68pub fn load_imm_rules<P: AsRef<Path>>(pathname: P) -> Result<Vec<ImmRule>, ReplacerError> {
69 let f = File::open(pathname).map_err(|e| ReplacerError::CannotLoadRules(format!("{}", e)))?;
70 let br = BufReader::new(f);
71 let rules: Vec<Rule> = serde_json::from_reader(br)
72 .map_err(|e| ReplacerError::CannotDeserializeRules(format!("{}", e)))?;
73 ImmRule::from_rules(&rules)
74}
75
76#[cfg(test)]
77mod tests {
78 extern crate serde_json;
79 use super::*;
80 use std::path::Path;
81
82 #[test]
83 fn sara_am() {
84 let rule = r###"{"pattern": "ํา", "replacement": "ำ"}"###;
85 let rule: Rule = serde_json::from_str(rule).unwrap();
86 let imm_rules = ImmRule::from_rules(&vec![rule]).unwrap();
87 let mod_text = replace(&imm_rules, "สําหรับข้อเสนอ");
88 assert_eq!(mod_text, "สำหรับข้อเสนอ");
89 }
90
91 #[test]
92 fn load_imm_rules_test() {
93 let path = Path::new(concat!(
94 env!("CARGO_MANIFEST_DIR"),
95 "/data/thai-replace-rules.json"
96 ));
97 let rules = load_imm_rules(&path).unwrap();
98 assert_eq!(rules.len(), 1);
99 }
100}