markdown_translator/
collector.rs1use std::collections::HashSet;
14
15use crate::functional::{TextItem, TextType, TextPriority, TextFilter, create_text_item};
16
17pub trait DomNode {
24 fn text_content(&self) -> Option<String>;
26 fn tag_name(&self) -> Option<String>;
28 fn get_attribute(&self, name: &str) -> Option<String>;
30 fn children(&self) -> Vec<Box<dyn DomNode>>;
32 fn is_text_node(&self) -> bool;
34}
35
36pub struct TextCollector {
43 filter: TextFilter,
44 translatable_attributes: HashSet<&'static str>,
45}
46
47impl TextCollector {
48 pub fn new() -> Self {
49 Self {
50 filter: TextFilter::new(),
51 translatable_attributes: Self::default_translatable_attributes(),
52 }
53 }
54
55 fn default_translatable_attributes() -> HashSet<&'static str> {
57 [
58 "title", "alt", "placeholder", "aria-label", "aria-describedby",
59 "data-tooltip", "data-title", "value"
60 ].into_iter().collect()
61 }
62
63 pub fn collect_texts(&self, root: &dyn DomNode) -> Vec<TextItem> {
65 self.collect_from_node(root, "root".to_string(), 0)
66 .into_iter()
67 .filter(|item| self.filter.should_translate(&item.text))
68 .collect::<Vec<_>>()
69 .pipe(|items| self.deduplicate_texts(items))
70 .pipe(|items| self.sort_by_priority(items))
71 }
72
73 fn collect_from_node(&self, node: &dyn DomNode, location: String, depth: usize) -> Vec<TextItem> {
75 let mut items = Vec::new();
76
77 items.extend(self.collect_attribute_texts(node, &location));
79
80 if node.is_text_node() {
82 if let Some(text) = node.text_content() {
83 let trimmed = text.trim();
84 if !trimmed.is_empty() {
85 items.push(create_text_item(trimmed.to_string(), location.clone()));
86 }
87 }
88 }
89
90 for (i, child) in node.children().iter().enumerate() {
92 let child_location = format!("{}/child[{}]", location, i);
93 items.extend(self.collect_from_node(child.as_ref(), child_location, depth + 1));
94 }
95
96 items
97 }
98
99 fn collect_attribute_texts(&self, node: &dyn DomNode, location: &str) -> Vec<TextItem> {
101 self.translatable_attributes
102 .iter()
103 .filter_map(|&attr_name| {
104 node.get_attribute(attr_name).map(|value| {
105 let location = format!("{}@{}", location, attr_name);
106 let mut item = create_text_item(value, location);
107
108 item.text_type = match attr_name {
110 "title" | "data-title" => TextType::Title,
111 "alt" => TextType::Alt,
112 "placeholder" => TextType::Placeholder,
113 _ => TextType::Other,
114 };
115
116 item
117 })
118 })
119 .collect()
120 }
121
122 fn deduplicate_texts(&self, items: Vec<TextItem>) -> Vec<TextItem> {
124 let mut seen = HashSet::new();
125 items
126 .into_iter()
127 .filter(|item| {
128 let key = item.text.trim().to_lowercase();
129 seen.insert(key)
130 })
131 .collect()
132 }
133
134 fn sort_by_priority(&self, mut items: Vec<TextItem>) -> Vec<TextItem> {
136 items.sort_by(|a, b| {
137 b.priority.cmp(&a.priority)
138 .then_with(|| a.text.len().cmp(&b.text.len()))
139 });
140 items
141 }
142}
143
144impl Default for TextCollector {
145 fn default() -> Self {
146 Self::new()
147 }
148}
149
150trait Pipe: Sized {
155 fn pipe<F, R>(self, f: F) -> R
156 where
157 F: FnOnce(Self) -> R;
158}
159
160impl<T> Pipe for T {
161 fn pipe<F, R>(self, f: F) -> R
162 where
163 F: FnOnce(Self) -> R,
164 {
165 f(self)
166 }
167}
168
169pub fn collect_translatable_texts(root: &dyn DomNode) -> Vec<TextItem> {
175 let collector = TextCollector::new();
176 collector.collect_texts(root)
177}
178
179pub fn group_texts_by_type(items: Vec<TextItem>) -> std::collections::HashMap<TextType, Vec<TextItem>> {
181 let mut groups = std::collections::HashMap::new();
182
183 for item in items {
184 groups.entry(item.text_type.clone()).or_insert_with(Vec::new).push(item);
185 }
186
187 groups
188}
189
190pub fn group_texts_by_priority(items: Vec<TextItem>) -> std::collections::HashMap<TextPriority, Vec<TextItem>> {
192 let mut groups = std::collections::HashMap::new();
193
194 for item in items {
195 let priority = item.priority.clone();
196 groups.entry(priority).or_insert_with(Vec::new).push(item);
197 }
198
199 groups
200}
201
202#[cfg(test)]
207pub struct TestDomNode {
208 pub tag_name: Option<String>,
209 pub text_content: Option<String>,
210 pub attributes: std::collections::HashMap<String, String>,
211 pub children: Vec<Box<dyn DomNode>>,
212 pub is_text: bool,
213}
214
215#[cfg(test)]
216impl TestDomNode {
217 pub fn new_element(tag: &str) -> Self {
218 Self {
219 tag_name: Some(tag.to_string()),
220 text_content: None,
221 attributes: std::collections::HashMap::new(),
222 children: Vec::new(),
223 is_text: false,
224 }
225 }
226
227 pub fn new_text(content: &str) -> Self {
228 Self {
229 tag_name: None,
230 text_content: Some(content.to_string()),
231 attributes: std::collections::HashMap::new(),
232 children: Vec::new(),
233 is_text: true,
234 }
235 }
236
237 pub fn with_attribute(mut self, name: &str, value: &str) -> Self {
238 self.attributes.insert(name.to_string(), value.to_string());
239 self
240 }
241
242 pub fn with_child(mut self, child: TestDomNode) -> Self {
243 self.children.push(Box::new(child));
244 self
245 }
246}
247
248#[cfg(test)]
249impl DomNode for TestDomNode {
250 fn text_content(&self) -> Option<String> {
251 self.text_content.clone()
252 }
253
254 fn tag_name(&self) -> Option<String> {
255 self.tag_name.clone()
256 }
257
258 fn get_attribute(&self, name: &str) -> Option<String> {
259 self.attributes.get(name).cloned()
260 }
261
262 fn children(&self) -> Vec<Box<dyn DomNode>> {
263 self.children.iter().map(|child| {
266 Box::new(TestDomNode {
268 tag_name: child.tag_name(),
269 text_content: child.text_content(),
270 attributes: std::collections::HashMap::new(), children: Vec::new(), is_text: child.is_text_node(),
273 }) as Box<dyn DomNode>
274 }).collect()
275 }
276
277 fn is_text_node(&self) -> bool {
278 self.is_text
279 }
280}
281
282#[cfg(test)]
287mod tests {
288 use super::*;
289
290 #[test]
291 fn test_text_collector_basic() {
292 let collector = TextCollector::new();
293
294 let root = TestDomNode::new_element("div")
295 .with_child(TestDomNode::new_text("Hello World"));
296
297 let texts = collector.collect_texts(&root);
298
299 assert_eq!(texts.len(), 1);
300 assert_eq!(texts[0].text, "Hello World");
301 }
302
303 #[test]
304 fn test_attribute_collection() {
305 let collector = TextCollector::new();
306
307 let root = TestDomNode::new_element("img")
308 .with_attribute("alt", "Beautiful sunset")
309 .with_attribute("title", "Sunset Photo");
310
311 let texts = collector.collect_texts(&root);
312
313 assert_eq!(texts.len(), 2);
314 let alt_text = texts.iter().find(|t| t.text == "Beautiful sunset").unwrap();
316 assert_eq!(alt_text.text_type, TextType::Alt);
317 }
318
319 #[test]
320 fn test_deduplication() {
321 let collector = TextCollector::new();
322
323 let root = TestDomNode::new_element("div")
324 .with_child(TestDomNode::new_text("Same text"))
325 .with_child(TestDomNode::new_text("Same text"))
326 .with_child(TestDomNode::new_text("Different text"));
327
328 let texts = collector.collect_texts(&root);
329
330 assert_eq!(texts.len(), 2);
332 }
333
334 #[test]
335 fn test_collect_translatable_texts_function() {
336 let root = TestDomNode::new_element("p")
337 .with_child(TestDomNode::new_text("This is a paragraph"));
338
339 let texts = collect_translatable_texts(&root);
340 assert!(!texts.is_empty());
341 }
342
343 #[test]
344 fn test_grouping_functions() {
345 let items = vec![
346 create_text_item("Title text".to_string(), "h1".to_string()),
347 create_text_item("Button text".to_string(), "button".to_string()),
348 ];
349
350 let grouped_by_type = group_texts_by_type(items.clone());
351 assert!(!grouped_by_type.is_empty());
352
353 let grouped_by_priority = group_texts_by_priority(items);
354 assert!(!grouped_by_priority.is_empty());
355 }
356}