ferrous_opencc/conversion/
mod.rs1use crate::config::ConversionNodeConfig;
14use crate::dictionary::{DictType, Dictionary};
15use crate::error::Result;
16use std::borrow::Cow;
17use std::path::Path;
18use std::sync::Arc;
19
20pub struct ConversionChain {
22 dictionaries: Vec<Arc<dyn Dictionary>>,
24}
25
26impl ConversionChain {
27 pub(super) fn from_config(config: &[ConversionNodeConfig], config_dir: &Path) -> Result<Self> {
29 let dictionaries = config
30 .iter()
31 .map(|node| DictType::from_config(&node.dict, config_dir))
32 .collect::<Result<Vec<_>>>()?;
33 Ok(Self { dictionaries })
34 }
35
36 pub(super) fn from_config_embedded(config: &[ConversionNodeConfig]) -> Result<Self> {
38 let dictionaries = config
39 .iter()
40 .map(|node| DictType::from_config_embedded(&node.dict))
42 .collect::<Result<Vec<_>>>()?;
43 Ok(Self { dictionaries })
44 }
45
46 pub(super) fn convert(&self, text: &str) -> String {
49 let mut current_cow = Cow::Borrowed(text);
50
51 for dict in &self.dictionaries {
53 current_cow = self.apply_dict(current_cow, dict.as_ref());
54 }
55
56 current_cow.into_owned()
57 }
58
59 fn apply_dict<'a>(&self, text: Cow<'a, str>, dict: &dyn Dictionary) -> Cow<'a, str> {
61 let mut result: Option<String> = None;
62 let mut i = 0;
63
64 while i < text.len() {
65 let remaining_text = &text[i..];
66 if let Some((key, values)) = dict.match_prefix(remaining_text) {
67 let res_str = result.get_or_insert_with(|| {
69 let mut new_string = String::with_capacity(text.len());
71 new_string.push_str(&text[..i]);
72 new_string
73 });
74
75 res_str.push_str(&values[0]);
77 i += key.len();
78 } else {
79 if let Some(ch) = remaining_text.chars().next() {
81 if let Some(res_str) = result.as_mut() {
82 res_str.push(ch);
84 }
85 i += ch.len_utf8();
88 } else {
89 break;
91 }
92 }
93 }
94
95 result.map(Cow::Owned).unwrap_or(text)
98 }
99}
100
101#[cfg(test)]
102mod tests {
103 use super::*;
104 use crate::dictionary::Dictionary;
105 use std::collections::HashMap;
106 use std::fmt::Debug;
107
108 #[derive(Debug, Default)]
109 struct MockDict {
110 entries: HashMap<String, Vec<Arc<str>>>,
111 max_key_length: usize,
112 }
113
114 impl MockDict {
115 fn add_entry(&mut self, key: &str, value: &str) {
116 self.entries.insert(key.to_string(), vec![Arc::from(value)]);
117 self.max_key_length = self.max_key_length.max(key.len());
118 }
119 }
120
121 impl Dictionary for MockDict {
122 fn match_prefix<'a, 'b>(&'a self, word: &'b str) -> Option<(&'b str, &'a [Arc<str>])> {
123 let mut longest_match_len = 0;
124 let mut result: Option<(&'b str, &'a [Arc<str>])> = None;
125
126 for (key, values) in &self.entries {
128 if word.starts_with(key) && key.len() > longest_match_len {
129 longest_match_len = key.len();
130 result = Some((&word[..key.len()], values.as_slice()));
131 }
132 }
133 result
134 }
135
136 fn max_key_length(&self) -> usize {
137 self.max_key_length
138 }
139 }
140
141 #[test]
142 fn test_apply_dict_greedy_replacement() {
143 let mut dict = MockDict::default();
144 dict.add_entry("a", "A");
145 dict.add_entry("ab", "AB");
146 dict.add_entry("abc", "ABC");
147 let dict_arc: Arc<dyn Dictionary> = Arc::new(dict);
148
149 let chain = ConversionChain {
150 dictionaries: vec![],
151 };
152
153 let result = chain.apply_dict(Cow::Borrowed("abcdef"), dict_arc.as_ref());
154 assert_eq!(result, "ABCdef");
155
156 let result2 = chain.apply_dict(Cow::Borrowed("abac"), dict_arc.as_ref());
157 assert_eq!(result2, "ABAc");
158
159 let result3 = chain.apply_dict(Cow::Borrowed("zyxw"), dict_arc.as_ref());
160 assert_eq!(result3, "zyxw");
161 }
162
163 #[test]
164 fn test_conversion_chain_with_multiple_dicts() {
165 let mut dict1 = MockDict::default();
167 dict1.add_entry("一个", "一個");
168 dict1.add_entry("项目", "項目");
169 let dict1_arc: Arc<dyn Dictionary> = Arc::new(dict1);
170
171 let mut dict2 = MockDict::default();
173 dict2.add_entry("一個", "一個");
174 dict2.add_entry("項目", "專案");
175 let dict2_arc: Arc<dyn Dictionary> = Arc::new(dict2);
176
177 let chain = ConversionChain {
178 dictionaries: vec![dict1_arc, dict2_arc],
179 };
180
181 let text_to_convert = "一个项目";
182
183 let result = chain.convert(text_to_convert);
184
185 assert_eq!(result, "一個專案");
188 }
189}