use std::collections::HashSet;
use crate::functional::{TextItem, TextType, TextPriority, TextFilter, create_text_item};
pub trait DomNode {
fn text_content(&self) -> Option<String>;
fn tag_name(&self) -> Option<String>;
fn get_attribute(&self, name: &str) -> Option<String>;
fn children(&self) -> Vec<Box<dyn DomNode>>;
fn is_text_node(&self) -> bool;
}
pub struct TextCollector {
filter: TextFilter,
translatable_attributes: HashSet<&'static str>,
}
impl TextCollector {
pub fn new() -> Self {
Self {
filter: TextFilter::new(),
translatable_attributes: Self::default_translatable_attributes(),
}
}
fn default_translatable_attributes() -> HashSet<&'static str> {
[
"title", "alt", "placeholder", "aria-label", "aria-describedby",
"data-tooltip", "data-title", "value"
].into_iter().collect()
}
pub fn collect_texts(&self, root: &dyn DomNode) -> Vec<TextItem> {
self.collect_from_node(root, "root".to_string(), 0)
.into_iter()
.filter(|item| self.filter.should_translate(&item.text))
.collect::<Vec<_>>()
.pipe(|items| self.deduplicate_texts(items))
.pipe(|items| self.sort_by_priority(items))
}
fn collect_from_node(&self, node: &dyn DomNode, location: String, depth: usize) -> Vec<TextItem> {
let mut items = Vec::new();
items.extend(self.collect_attribute_texts(node, &location));
if node.is_text_node() {
if let Some(text) = node.text_content() {
let trimmed = text.trim();
if !trimmed.is_empty() {
items.push(create_text_item(trimmed.to_string(), location.clone()));
}
}
}
for (i, child) in node.children().iter().enumerate() {
let child_location = format!("{}/child[{}]", location, i);
items.extend(self.collect_from_node(child.as_ref(), child_location, depth + 1));
}
items
}
fn collect_attribute_texts(&self, node: &dyn DomNode, location: &str) -> Vec<TextItem> {
self.translatable_attributes
.iter()
.filter_map(|&attr_name| {
node.get_attribute(attr_name).map(|value| {
let location = format!("{}@{}", location, attr_name);
let mut item = create_text_item(value, location);
item.text_type = match attr_name {
"title" | "data-title" => TextType::Title,
"alt" => TextType::Alt,
"placeholder" => TextType::Placeholder,
_ => TextType::Other,
};
item
})
})
.collect()
}
fn deduplicate_texts(&self, items: Vec<TextItem>) -> Vec<TextItem> {
let mut seen = HashSet::new();
items
.into_iter()
.filter(|item| {
let key = item.text.trim().to_lowercase();
seen.insert(key)
})
.collect()
}
fn sort_by_priority(&self, mut items: Vec<TextItem>) -> Vec<TextItem> {
items.sort_by(|a, b| {
b.priority.cmp(&a.priority)
.then_with(|| a.text.len().cmp(&b.text.len()))
});
items
}
}
impl Default for TextCollector {
fn default() -> Self {
Self::new()
}
}
trait Pipe: Sized {
fn pipe<F, R>(self, f: F) -> R
where
F: FnOnce(Self) -> R;
}
impl<T> Pipe for T {
fn pipe<F, R>(self, f: F) -> R
where
F: FnOnce(Self) -> R,
{
f(self)
}
}
pub fn collect_translatable_texts(root: &dyn DomNode) -> Vec<TextItem> {
let collector = TextCollector::new();
collector.collect_texts(root)
}
pub fn group_texts_by_type(items: Vec<TextItem>) -> std::collections::HashMap<TextType, Vec<TextItem>> {
let mut groups = std::collections::HashMap::new();
for item in items {
groups.entry(item.text_type.clone()).or_insert_with(Vec::new).push(item);
}
groups
}
pub fn group_texts_by_priority(items: Vec<TextItem>) -> std::collections::HashMap<TextPriority, Vec<TextItem>> {
let mut groups = std::collections::HashMap::new();
for item in items {
let priority = item.priority.clone();
groups.entry(priority).or_insert_with(Vec::new).push(item);
}
groups
}
#[cfg(test)]
pub struct TestDomNode {
pub tag_name: Option<String>,
pub text_content: Option<String>,
pub attributes: std::collections::HashMap<String, String>,
pub children: Vec<Box<dyn DomNode>>,
pub is_text: bool,
}
#[cfg(test)]
impl TestDomNode {
pub fn new_element(tag: &str) -> Self {
Self {
tag_name: Some(tag.to_string()),
text_content: None,
attributes: std::collections::HashMap::new(),
children: Vec::new(),
is_text: false,
}
}
pub fn new_text(content: &str) -> Self {
Self {
tag_name: None,
text_content: Some(content.to_string()),
attributes: std::collections::HashMap::new(),
children: Vec::new(),
is_text: true,
}
}
pub fn with_attribute(mut self, name: &str, value: &str) -> Self {
self.attributes.insert(name.to_string(), value.to_string());
self
}
pub fn with_child(mut self, child: TestDomNode) -> Self {
self.children.push(Box::new(child));
self
}
}
#[cfg(test)]
impl DomNode for TestDomNode {
fn text_content(&self) -> Option<String> {
self.text_content.clone()
}
fn tag_name(&self) -> Option<String> {
self.tag_name.clone()
}
fn get_attribute(&self, name: &str) -> Option<String> {
self.attributes.get(name).cloned()
}
fn children(&self) -> Vec<Box<dyn DomNode>> {
self.children.iter().map(|child| {
Box::new(TestDomNode {
tag_name: child.tag_name(),
text_content: child.text_content(),
attributes: std::collections::HashMap::new(), children: Vec::new(), is_text: child.is_text_node(),
}) as Box<dyn DomNode>
}).collect()
}
fn is_text_node(&self) -> bool {
self.is_text
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_text_collector_basic() {
let collector = TextCollector::new();
let root = TestDomNode::new_element("div")
.with_child(TestDomNode::new_text("Hello World"));
let texts = collector.collect_texts(&root);
assert_eq!(texts.len(), 1);
assert_eq!(texts[0].text, "Hello World");
}
#[test]
fn test_attribute_collection() {
let collector = TextCollector::new();
let root = TestDomNode::new_element("img")
.with_attribute("alt", "Beautiful sunset")
.with_attribute("title", "Sunset Photo");
let texts = collector.collect_texts(&root);
assert_eq!(texts.len(), 2);
let alt_text = texts.iter().find(|t| t.text == "Beautiful sunset").unwrap();
assert_eq!(alt_text.text_type, TextType::Alt);
}
#[test]
fn test_deduplication() {
let collector = TextCollector::new();
let root = TestDomNode::new_element("div")
.with_child(TestDomNode::new_text("Same text"))
.with_child(TestDomNode::new_text("Same text"))
.with_child(TestDomNode::new_text("Different text"));
let texts = collector.collect_texts(&root);
assert_eq!(texts.len(), 2);
}
#[test]
fn test_collect_translatable_texts_function() {
let root = TestDomNode::new_element("p")
.with_child(TestDomNode::new_text("This is a paragraph"));
let texts = collect_translatable_texts(&root);
assert!(!texts.is_empty());
}
#[test]
fn test_grouping_functions() {
let items = vec![
create_text_item("Title text".to_string(), "h1".to_string()),
create_text_item("Button text".to_string(), "button".to_string()),
];
let grouped_by_type = group_texts_by_type(items.clone());
assert!(!grouped_by_type.is_empty());
let grouped_by_priority = group_texts_by_priority(items);
assert!(!grouped_by_priority.is_empty());
}
}