1use crate::{
9 Document, NodeId, NodeKind, Tag,
10 utils::{escape_attr, escape_text, is_void_element},
11};
12
13pub fn serialize_node(doc: &Document, id: NodeId, buf: &mut String) {
41 let Some(node) = doc.get(id) else { return };
42
43 match &node.kind {
44 NodeKind::Element { name, attributes, .. } => {
45 buf.push('<');
46 buf.push_str(name);
47
48 for (attr_name, attr_value) in attributes {
49 buf.push(' ');
50 buf.push_str(attr_name);
51 buf.push_str("=\"");
52 buf.push_str(&escape_attr(attr_value));
53 buf.push('"');
54 }
55
56 buf.push('>');
57
58 if !is_void_element(name) {
59 for child_id in doc.children(id) {
60 serialize_node(doc, child_id, buf);
61 }
62 buf.push_str("</");
63 buf.push_str(name);
64 buf.push('>');
65 }
66 }
67 NodeKind::Text { content } => {
68 buf.push_str(&escape_text(content));
69 }
70 NodeKind::Comment { content } => {
71 buf.push_str("<!--");
72 buf.push_str(content);
73 buf.push_str("-->");
74 }
75 }
76}
77
78pub fn serialize_inner_html(doc: &Document, id: NodeId, buf: &mut String) {
97 for child_id in doc.children(id) {
98 serialize_node(doc, child_id, buf);
99 }
100}
101
102pub fn collect_text(doc: &Document, id: NodeId, buf: &mut String) {
122 let Some(node) = doc.get(id) else { return };
123
124 match &node.kind {
125 NodeKind::Text { content } => buf.push_str(content),
126 NodeKind::Element { .. } => {
127 for child_id in doc.children(id) {
128 collect_text(doc, child_id, buf);
129 }
130 }
131 NodeKind::Comment { .. } => {}
132 }
133}
134
135pub trait HtmlSerializer {
166 #[must_use]
170 fn serialize_html(&self) -> String {
171 let mut buf = String::new();
172 self.serialize_html_into(&mut buf);
173 buf
174 }
175
176 fn serialize_html_into(&self, buf: &mut String);
178
179 #[must_use]
183 fn serialize_inner(&self) -> String {
184 let mut buf = String::new();
185 self.serialize_inner_into(&mut buf);
186 buf
187 }
188
189 fn serialize_inner_into(&self, buf: &mut String);
191
192 #[must_use]
196 fn extract_text(&self) -> String {
197 let mut buf = String::new();
198 self.extract_text_into(&mut buf);
199 buf
200 }
201
202 fn extract_text_into(&self, buf: &mut String);
204}
205
206impl HtmlSerializer for Tag<'_> {
207 #[inline]
208 fn serialize_html_into(&self, buf: &mut String) {
209 serialize_node(self.document(), self.node_id(), buf);
210 }
211
212 #[inline]
213 fn serialize_inner_into(&self, buf: &mut String) {
214 serialize_inner_html(self.document(), self.node_id(), buf);
215 }
216
217 #[inline]
218 fn extract_text_into(&self, buf: &mut String) {
219 collect_text(self.document(), self.node_id(), buf);
220 }
221}
222
223#[cfg(test)]
224mod tests {
225 use super::*;
226 use crate::Soup;
227
228 #[test]
229 fn test_serialize_node_element() {
230 let soup = Soup::parse("<div>text</div>");
231 let doc = soup.document();
232 let div = soup.find("div").unwrap().unwrap();
233
234 let mut buf = String::new();
235 serialize_node(doc, div.node_id(), &mut buf);
236 assert_eq!(buf, "<div>text</div>");
237 }
238
239 #[test]
240 fn test_serialize_node_with_attributes() {
241 let soup = Soup::parse("<a href=\"/page\" class=\"link\">click</a>");
242 let doc = soup.document();
243 let a = soup.find("a").unwrap().unwrap();
244
245 let mut buf = String::new();
246 serialize_node(doc, a.node_id(), &mut buf);
247 assert!(buf.contains("href=\"/page\""));
248 assert!(buf.contains("class=\"link\""));
249 assert!(buf.contains(">click</a>"));
250 }
251
252 #[test]
253 fn test_serialize_node_escapes_attr() {
254 let soup = Soup::parse("<div data-value=\"a & b\">text</div>");
255 let doc = soup.document();
256 let div = soup.find("div").unwrap().unwrap();
257
258 let mut buf = String::new();
259 serialize_node(doc, div.node_id(), &mut buf);
260 assert!(buf.contains("data-value="));
261 }
262
263 #[test]
264 fn test_serialize_node_void_element() {
265 let soup = Soup::parse("<div><br><hr></div>");
266 let doc = soup.document();
267 let div = soup.find("div").unwrap().unwrap();
268
269 let mut buf = String::new();
270 serialize_node(doc, div.node_id(), &mut buf);
271 assert!(buf.contains("<br>"));
272 assert!(buf.contains("<hr>"));
273 assert!(!buf.contains("</br>"));
274 assert!(!buf.contains("</hr>"));
275 }
276
277 #[test]
278 fn test_serialize_node_nested() {
279 let soup = Soup::parse("<div><span><b>deep</b></span></div>");
280 let doc = soup.document();
281 let div = soup.find("div").unwrap().unwrap();
282
283 let mut buf = String::new();
284 serialize_node(doc, div.node_id(), &mut buf);
285 assert_eq!(buf, "<div><span><b>deep</b></span></div>");
286 }
287
288 #[test]
289 fn test_serialize_node_comment() {
290 use crate::SoupConfig;
291
292 let config = SoupConfig { include_comments: true, ..Default::default() };
293 let soup = Soup::parse_with_config("<div>text<!-- comment -->more</div>", config);
294 let doc = soup.document();
295 let div = soup.find("div").unwrap().unwrap();
296
297 let mut buf = String::new();
298 serialize_node(doc, div.node_id(), &mut buf);
299 assert!(buf.contains("<!-- comment -->"));
300 assert!(buf.contains("text"));
301 assert!(buf.contains("more"));
302 }
303
304 #[test]
305 fn test_serialize_inner_html() {
306 let soup = Soup::parse("<div><span>A</span><span>B</span></div>");
307 let doc = soup.document();
308 let div = soup.find("div").unwrap().unwrap();
309
310 let mut buf = String::new();
311 serialize_inner_html(doc, div.node_id(), &mut buf);
312 assert_eq!(buf, "<span>A</span><span>B</span>");
313 }
314
315 #[test]
316 fn test_collect_text_simple() {
317 let soup = Soup::parse("<div>Hello World</div>");
318 let doc = soup.document();
319 let div = soup.find("div").unwrap().unwrap();
320
321 let mut buf = String::new();
322 collect_text(doc, div.node_id(), &mut buf);
323 assert_eq!(buf, "Hello World");
324 }
325
326 #[test]
327 fn test_collect_text_nested() {
328 let soup = Soup::parse("<div>Hello <b>Bold</b> Text</div>");
329 let doc = soup.document();
330 let div = soup.find("div").unwrap().unwrap();
331
332 let mut buf = String::new();
333 collect_text(doc, div.node_id(), &mut buf);
334 assert_eq!(buf, "Hello Bold Text");
335 }
336
337 #[test]
338 fn test_collect_text_skips_comments() {
339 let soup = Soup::parse("<div>text<!-- comment -->more</div>");
340 let doc = soup.document();
341 let div = soup.find("div").unwrap().unwrap();
342
343 let mut buf = String::new();
344 collect_text(doc, div.node_id(), &mut buf);
345 assert_eq!(buf, "textmore");
346 }
347
348 #[test]
349 fn test_collect_text_empty() {
350 let soup = Soup::parse("<div></div>");
351 let doc = soup.document();
352 let div = soup.find("div").unwrap().unwrap();
353
354 let mut buf = String::new();
355 collect_text(doc, div.node_id(), &mut buf);
356 assert_eq!(buf, "");
357 }
358
359 #[test]
360 fn test_html_serializer_serialize_html() {
361 let soup = Soup::parse("<div class=\"test\"><span>Hi</span></div>");
362 let div = soup.find("div").unwrap().unwrap();
363
364 let html = div.serialize_html();
365 assert!(html.starts_with("<div"));
366 assert!(html.ends_with("</div>"));
367 assert!(html.contains("<span>Hi</span>"));
368 }
369
370 #[test]
371 fn test_html_serializer_serialize_inner() {
372 let soup = Soup::parse("<div><span>A</span><span>B</span></div>");
373 let div = soup.find("div").unwrap().unwrap();
374
375 let inner = div.serialize_inner();
376 assert_eq!(inner, "<span>A</span><span>B</span>");
377 }
378
379 #[test]
380 fn test_html_serializer_extract_text() {
381 let soup = Soup::parse("<div>Hello <b>World</b>!</div>");
382 let div = soup.find("div").unwrap().unwrap();
383
384 let text = div.extract_text();
385 assert_eq!(text, "Hello World!");
386 }
387
388 #[test]
389 fn test_html_serializer_buffer_reuse() {
390 let soup = Soup::parse("<div>Test</div>");
391 let div = soup.find("div").unwrap().unwrap();
392
393 let mut buf = String::with_capacity(100);
394 div.serialize_html_into(&mut buf);
395 let cap1 = buf.capacity();
396
397 buf.clear();
398 div.serialize_html_into(&mut buf);
399 let cap2 = buf.capacity();
400
401 assert_eq!(cap1, cap2); }
403}