ferrous_opencc/conversion/
mod.rs1use crate::config::ConversionNodeConfig;
4use crate::dictionary::{DictType, Dictionary};
5use crate::error::Result;
6use std::borrow::Cow;
7use std::path::Path;
8use std::sync::Arc;
9
10pub struct ConversionChain {
12 dictionaries: Vec<Arc<dyn Dictionary>>,
14}
15
16impl ConversionChain {
27 pub(super) fn from_config(config: &[ConversionNodeConfig], config_dir: &Path) -> Result<Self> {
29 let dictionaries = config
30 .iter()
31 .map(|node| DictType::from_config(&node.dict, config_dir))
32 .collect::<Result<Vec<_>>>()?;
33 Ok(Self { dictionaries })
34 }
35
36 pub(super) fn from_config_embedded(config: &[ConversionNodeConfig]) -> Result<Self> {
38 let dictionaries = config
39 .iter()
40 .map(|node| DictType::from_config_embedded(&node.dict))
42 .collect::<Result<Vec<_>>>()?;
43 Ok(Self { dictionaries })
44 }
45
46 pub(super) fn convert(&self, text: &str) -> String {
49 let mut current_cow = Cow::Borrowed(text);
50
51 for dict in &self.dictionaries {
53 current_cow = Self::apply_dict(current_cow, dict.as_ref());
54 }
55
56 current_cow.into_owned()
57 }
58
59 fn apply_dict<'a>(text: Cow<'a, str>, dict: &dyn Dictionary) -> Cow<'a, str> {
61 let mut result: Option<String> = None;
62 let mut i = 0;
63
64 while i < text.len() {
65 let remaining_text = &text[i..];
66 if let Some((key, values)) = dict.match_prefix(remaining_text) {
67 if let Some(values_0) = values.first() {
68 let res_str = result.get_or_insert_with(|| {
70 let mut new_string = String::with_capacity(text.len());
72 new_string.push_str(&text[..i]);
73 new_string
74 });
75
76 res_str.push_str(values_0);
78 i += key.len();
79 } else {
80 i = advance_char(i, remaining_text, result.as_mut());
82 }
83 } else {
84 i = advance_char(i, remaining_text, result.as_mut());
86 }
87 }
88
89 result.map(Cow::Owned).unwrap_or(text)
92 }
93}
94
95fn advance_char(mut i: usize, remaining_text: &str, result: Option<&mut String>) -> usize {
96 if let Some(ch) = remaining_text.chars().next() {
97 if let Some(res_str) = result {
98 res_str.push(ch);
99 }
100 i += ch.len_utf8();
101 } else {
102 i = remaining_text.len() + 1;
103 }
104 i
105}
106
107#[cfg(test)]
108mod tests {
109 use super::*;
110 use crate::dictionary::Dictionary;
111 use std::collections::HashMap;
112 use std::fmt::Debug;
113
114 #[derive(Debug, Default)]
115 struct MockDict {
116 entries: HashMap<String, Vec<Arc<str>>>,
117 max_key_length: usize,
118 }
119
120 impl MockDict {
121 fn add_entry(&mut self, key: &str, value: &str) {
122 self.entries.insert(key.to_string(), vec![Arc::from(value)]);
123 self.max_key_length = self.max_key_length.max(key.len());
124 }
125 }
126
127 impl Dictionary for MockDict {
128 fn match_prefix<'a, 'b>(&'a self, word: &'b str) -> Option<(&'b str, Vec<String>)> {
129 let mut longest_match_len = 0;
130 let mut result: Option<(&'b str, Vec<String>)> = None;
131
132 for (key, values) in &self.entries {
134 if word.starts_with(key) && key.len() > longest_match_len {
135 longest_match_len = key.len();
136 let string_values = values
137 .iter()
138 .map(std::string::ToString::to_string)
139 .collect();
140 result = Some((&word[..key.len()], string_values));
141 }
142 }
143 result
144 }
145
146 fn max_key_length(&self) -> usize {
147 self.max_key_length
148 }
149 }
150
151 #[test]
152 fn test_apply_dict_greedy_replacement() {
153 let mut dict = MockDict::default();
154 dict.add_entry("a", "A");
155 dict.add_entry("ab", "AB");
156 dict.add_entry("abc", "ABC");
157 let dict_arc: Arc<dyn Dictionary> = Arc::new(dict);
158
159 let result = ConversionChain::apply_dict(Cow::Borrowed("abcdef"), dict_arc.as_ref());
160 assert_eq!(result, "ABCdef");
161
162 let result2 = ConversionChain::apply_dict(Cow::Borrowed("abac"), dict_arc.as_ref());
163 assert_eq!(result2, "ABAc");
164
165 let result3 = ConversionChain::apply_dict(Cow::Borrowed("zyxw"), dict_arc.as_ref());
166 assert_eq!(result3, "zyxw");
167 }
168
169 #[test]
170 fn test_conversion_chain_with_multiple_dicts() {
171 let mut dict1 = MockDict::default();
173 dict1.add_entry("一个", "一個");
174 dict1.add_entry("项目", "項目");
175 let dict1_arc: Arc<dyn Dictionary> = Arc::new(dict1);
176
177 let mut dict2 = MockDict::default();
179 dict2.add_entry("一個", "一個");
180 dict2.add_entry("項目", "專案");
181 let dict2_arc: Arc<dyn Dictionary> = Arc::new(dict2);
182
183 let chain = ConversionChain {
184 dictionaries: vec![dict1_arc, dict2_arc],
185 };
186
187 let text_to_convert = "一个项目";
188
189 let result = chain.convert(text_to_convert);
190
191 assert_eq!(result, "一個專案");
194 }
195}