ferrum_email_render/
text_extractor.rs1use ferrum_email_core::{Node, Tag};
8
9pub fn extract_text(node: &Node) -> String {
19 let mut output = String::new();
20 extract_node(node, &mut output);
21 clean_text(&output)
23}
24
25fn extract_node(node: &Node, output: &mut String) {
26 match node {
27 Node::Text(text) => {
28 output.push_str(text);
29 }
30 Node::Element(element) => {
31 let tag = &element.tag;
32
33 match tag {
35 Tag::A => {
36 let link_text = extract_children_text(&element.children);
38 let href = element
39 .attrs
40 .iter()
41 .find(|a| a.name == "href")
42 .map(|a| a.value.as_str())
43 .unwrap_or("");
44
45 if !link_text.is_empty() && !href.is_empty() && link_text != href {
46 output.push_str(&link_text);
47 output.push_str(" (");
48 output.push_str(href);
49 output.push(')');
50 } else if !link_text.is_empty() {
51 output.push_str(&link_text);
52 } else if !href.is_empty() {
53 output.push_str(href);
54 }
55 return;
56 }
57 Tag::Img => {
58 let alt = element
59 .attrs
60 .iter()
61 .find(|a| a.name == "alt")
62 .map(|a| a.value.as_str())
63 .unwrap_or("");
64 if !alt.is_empty() {
65 output.push('[');
66 output.push_str(alt);
67 output.push(']');
68 }
69 return;
70 }
71 Tag::Hr => {
72 output.push_str("\n---\n");
73 return;
74 }
75 Tag::Br => {
76 output.push('\n');
77 return;
78 }
79 Tag::Head | Tag::Meta | Tag::Title => {
80 return;
82 }
83 _ => {}
84 }
85
86 if is_hidden_element(element) {
88 return;
89 }
90
91 let is_block = is_block_element(tag);
92
93 if is_block {
94 output.push('\n');
95 }
96
97 for child in &element.children {
98 extract_node(child, output);
99 }
100
101 if is_block {
102 output.push('\n');
103 }
104 }
105 Node::Fragment(nodes) => {
106 for node in nodes {
107 extract_node(node, output);
108 }
109 }
110 Node::None => {}
111 }
112}
113
114fn extract_children_text(children: &[Node]) -> String {
115 let mut output = String::new();
116 for child in children {
117 extract_node(child, &mut output);
118 }
119 output.trim().to_string()
120}
121
122fn is_block_element(tag: &Tag) -> bool {
123 matches!(
124 tag,
125 Tag::P
126 | Tag::Div
127 | Tag::H1
128 | Tag::H2
129 | Tag::H3
130 | Tag::H4
131 | Tag::H5
132 | Tag::H6
133 | Tag::Tr
134 | Tag::Table
135 | Tag::Pre
136 )
137}
138
139fn is_hidden_element(element: &ferrum_email_core::Element) -> bool {
140 if let Some(ref display) = element.style.display
142 && *display == ferrum_email_core::Display::None
143 {
144 return true;
145 }
146 element
148 .attrs
149 .iter()
150 .any(|a| a.name == "style" && a.value.contains("display:none"))
151}
152
153fn clean_text(input: &str) -> String {
155 let mut lines: Vec<&str> = input.lines().collect();
156
157 let lines: Vec<&str> = lines.iter_mut().map(|l| l.trim()).collect();
159
160 let mut result = String::new();
162 let mut blank_count = 0;
163
164 for line in &lines {
165 if line.is_empty() {
166 blank_count += 1;
167 if blank_count <= 2 {
168 result.push('\n');
169 }
170 } else {
171 blank_count = 0;
172 if !result.is_empty() && !result.ends_with('\n') {
173 result.push('\n');
174 }
175 result.push_str(line);
176 result.push('\n');
177 }
178 }
179
180 result.trim().to_string()
181}
182
183#[cfg(test)]
184mod tests {
185 use super::*;
186 use ferrum_email_core::{Element, Node, Tag};
187
188 #[test]
189 fn test_extract_text_from_text_node() {
190 let node = Node::text("Hello, World!");
191 assert_eq!(extract_text(&node), "Hello, World!");
192 }
193
194 #[test]
195 fn test_extract_text_from_link() {
196 let node = Node::Element(
197 Element::new(Tag::A)
198 .attr("href", "https://example.com")
199 .child(Node::text("Click here")),
200 );
201 assert_eq!(extract_text(&node), "Click here (https://example.com)");
202 }
203
204 #[test]
205 fn test_extract_text_from_hr() {
206 let node = Node::Element(Element::new(Tag::Hr));
207 assert_eq!(extract_text(&node), "---");
208 }
209
210 #[test]
211 fn test_extract_text_from_image() {
212 let node = Node::Element(
213 Element::new(Tag::Img)
214 .attr("alt", "Logo")
215 .attr("src", "logo.png"),
216 );
217 assert_eq!(extract_text(&node), "[Logo]");
218 }
219}