ferrous_opencc/conversion/
mod.rs1use crate::config::ConversionNodeConfig;
4use crate::dictionary::{DictType, Dictionary};
5use crate::error::Result;
6use std::borrow::Cow;
7use std::path::Path;
8use std::sync::Arc;
9
10pub struct ConversionChain {
12 dictionaries: Vec<Arc<dyn Dictionary>>,
14}
15
16impl ConversionChain {
17 pub fn from_config(config: &[ConversionNodeConfig], config_dir: &Path) -> Result<Self> {
19 let dictionaries = config
20 .iter()
21 .map(|node| DictType::from_config(&node.dict, config_dir))
22 .collect::<Result<Vec<_>>>()?;
23 Ok(Self { dictionaries })
24 }
25
26 pub fn from_config_embedded(config: &[ConversionNodeConfig]) -> Result<Self> {
28 let dictionaries = config
29 .iter()
30 .map(|node| DictType::from_config_embedded(&node.dict))
32 .collect::<Result<Vec<_>>>()?;
33 Ok(Self { dictionaries })
34 }
35
36 pub fn convert(&self, text: &str) -> String {
39 let mut current_cow = Cow::Borrowed(text);
40
41 for dict in &self.dictionaries {
43 current_cow = self.apply_dict(current_cow, dict.as_ref());
44 }
45
46 current_cow.into_owned()
47 }
48
49 fn apply_dict<'a>(&self, text: Cow<'a, str>, dict: &dyn Dictionary) -> Cow<'a, str> {
51 let mut result: Option<String> = None;
52 let mut i = 0;
53
54 while i < text.len() {
55 let remaining_text = &text[i..];
56 if let Some((key, values)) = dict.match_prefix(remaining_text) {
57 if result.is_none() {
59 let mut new_string = String::with_capacity(text.len());
61 new_string.push_str(&text[..i]);
62 result = Some(new_string);
63 }
64
65 result.as_mut().unwrap().push_str(&values[0]);
67 i += key.len();
68 } else {
69 let ch = remaining_text.chars().next().unwrap();
71 if let Some(res_str) = result.as_mut() {
72 res_str.push(ch);
74 }
75 i += ch.len_utf8();
78 }
79 }
80
81 result.map(Cow::Owned).unwrap_or(text)
84 }
85}
86
87#[cfg(test)]
88mod tests {
89 use super::*;
90 use crate::dictionary::Dictionary;
91 use std::collections::HashMap;
92 use std::fmt::Debug;
93
94 #[derive(Debug, Default)]
95 struct MockDict {
96 entries: HashMap<String, Vec<Arc<str>>>,
97 max_key_length: usize,
98 }
99
100 impl MockDict {
101 fn add_entry(&mut self, key: &str, value: &str) {
102 self.entries.insert(key.to_string(), vec![Arc::from(value)]);
103 self.max_key_length = self.max_key_length.max(key.len());
104 }
105 }
106
107 impl Dictionary for MockDict {
108 fn match_prefix<'a, 'b>(&'a self, word: &'b str) -> Option<(&'b str, &'a [Arc<str>])> {
109 let mut longest_match_len = 0;
110 let mut result: Option<(&'b str, &'a [Arc<str>])> = None;
111
112 for (key, values) in &self.entries {
113 if word.starts_with(key) && key.len() > longest_match_len {
114 longest_match_len = key.len();
115 result = Some((&word[..key.len()], values.as_slice()));
116 }
117 }
118 result
119 }
120
121 fn max_key_length(&self) -> usize {
122 self.max_key_length
123 }
124 }
125
126 #[test]
127 fn test_apply_dict_greedy_replacement() {
128 let mut dict = MockDict::default();
129 dict.add_entry("a", "A");
130 dict.add_entry("ab", "AB");
131 dict.add_entry("abc", "ABC");
132 let dict_arc: Arc<dyn Dictionary> = Arc::new(dict);
133
134 let chain = ConversionChain {
135 dictionaries: vec![],
136 };
137
138 let result = chain.apply_dict(Cow::Borrowed("abcdef"), dict_arc.as_ref());
139 assert_eq!(result, "ABCdef");
140
141 let result2 = chain.apply_dict(Cow::Borrowed("abac"), dict_arc.as_ref());
142 assert_eq!(result2, "ABAc");
143
144 let result3 = chain.apply_dict(Cow::Borrowed("zyxw"), dict_arc.as_ref());
145 assert_eq!(result3, "zyxw");
146 }
147
148 #[test]
149 fn test_conversion_chain_with_multiple_dicts() {
150 let mut dict1 = MockDict::default();
152 dict1.add_entry("一个", "一個");
153 dict1.add_entry("项目", "項目");
154 let dict1_arc: Arc<dyn Dictionary> = Arc::new(dict1);
155
156 let mut dict2 = MockDict::default();
158 dict2.add_entry("一個", "一個");
159 dict2.add_entry("項目", "專案");
160 let dict2_arc: Arc<dyn Dictionary> = Arc::new(dict2);
161
162 let chain = ConversionChain {
163 dictionaries: vec![dict1_arc, dict2_arc],
164 };
165
166 let text_to_convert = "一个项目";
167
168 let result = chain.convert(text_to_convert);
169
170 assert_eq!(result, "一個專案");
173 }
174}