1use dom_query::Selection;
21pub use dom_query::Document;
22
23#[must_use]
27pub fn text(sel: &Selection) -> String {
28 let mut result = String::new();
29
30 if let Some(node) = sel.nodes().first() {
31 for child in node.children() {
32 if child.is_element() {
33 break; }
35 if child.is_text() {
36 let text_content = child.text();
37 result.push_str(&text_content);
38 }
39 }
40 }
41
42 result
43}
44
45#[must_use]
49pub fn tail(sel: &Selection) -> String {
50 let mut result = String::new();
51
52 if let Some(node) = sel.nodes().first() {
53 let mut next = node.next_sibling();
54
55 while let Some(sibling) = next {
56 if sibling.is_element() {
57 break; }
59 if sibling.is_text() {
60 let text_content = sibling.text();
61 result.push_str(&text_content);
62 }
63 next = sibling.next_sibling();
64 }
65 }
66
67 result
68}
69
70pub fn set_text(sel: &Selection, new_text: &str) {
74 if let Some(node) = sel.nodes().first() {
76 let mut to_remove = Vec::new();
77
78 for child in node.children() {
79 if child.is_element() {
80 break;
81 }
82 if child.is_text() {
83 to_remove.push(child);
84 }
85 }
86
87 for text_node in to_remove {
88 Selection::from(text_node).remove();
89 }
90 }
91
92 if !new_text.is_empty() {
94 let escaped = escape_html(new_text);
95 sel.prepend_html(escaped.as_str());
96 }
97}
98
99pub fn set_tail(sel: &Selection, new_tail: &str) {
103 for tail_node in tail_nodes(sel) {
105 Selection::from(tail_node).remove();
106 }
107
108 if !new_tail.is_empty() {
110 let escaped = escape_html(new_tail);
111 sel.after_html(escaped.as_str());
112 }
113}
114
115#[must_use]
119pub fn tail_nodes<'a>(sel: &Selection<'a>) -> Vec<dom_query::NodeRef<'a>> {
120 let mut nodes = Vec::new();
121
122 if let Some(node) = sel.nodes().first() {
123 let mut next = node.next_sibling();
124
125 while let Some(sibling) = next {
126 if sibling.is_element() {
127 break;
128 }
129 if sibling.is_text() {
130 nodes.push(sibling);
131 }
132 next = sibling.next_sibling();
133 }
134 }
135
136 nodes
137}
138
139#[must_use]
143pub fn is_void_element(tag: &str) -> bool {
144 matches!(
145 tag.to_lowercase().as_str(),
146 "area" | "base" | "br" | "col" | "embed" | "hr" | "img"
147 | "input" | "link" | "meta" | "param" | "source" | "track" | "wbr"
148 )
149}
150
151fn escape_html(text: &str) -> String {
153 text.replace('&', "&")
154 .replace('<', "<")
155 .replace('>', ">")
156 .replace('"', """)
157 .replace('\'', "'")
158}
159
160#[must_use]
162pub fn iter_text(sel: &Selection, separator: &str) -> String {
163 let mut result = String::new();
164 let mut last_level = 0;
165
166 if let Some(node) = sel.nodes().first() {
167 traverse_for_text(node, 0, &mut last_level, separator, &mut result);
168 }
169
170 result.trim().to_string()
171}
172
173fn traverse_for_text(
174 node: &dom_query::NodeRef,
175 level: usize,
176 last_level: &mut usize,
177 sep: &str,
178 result: &mut String,
179) {
180 if node.is_text() {
181 if level != *last_level && !result.is_empty() {
182 result.push_str(sep);
183 }
184 let text_content = node.text();
185 result.push_str(&text_content);
186 } else if node.is_element() {
187 if let Some(tag) = node.node_name() {
189 if is_void_element(&tag) && !result.is_empty() {
190 result.push_str(sep);
191 }
192 }
193 }
194 *last_level = level;
195
196 for child in node.children() {
197 traverse_for_text(&child, level + 1, last_level, sep, result);
198 }
199}
200
201#[must_use]
206pub fn element(tag: &str) -> Document {
207 match tag.to_lowercase().as_str() {
209 "tr" | "th" | "td" | "tbody" | "thead" | "tfoot" => {
210 Document::from(format!("<table><{tag}></{tag}></table>"))
211 }
212 _ => Document::from(format!("<{tag}></{tag}>")),
213 }
214}
215
216#[must_use]
218pub fn sub_element<'a>(parent: &Selection<'a>, tag: &str) -> Selection<'a> {
219 let html = format!("<{tag}></{tag}>");
220 parent.append_html(html.as_str());
221 parent.children().last()
222}
223
224pub fn remove(sel: &Selection, keep_tail: bool) {
230 if !keep_tail {
231 if let Some(node) = sel.nodes().first() {
233 let mut next = node.next_sibling();
234 let mut to_remove = Vec::new();
235
236 while let Some(sibling) = next {
237 if sibling.is_element() {
238 break;
239 }
240 if sibling.is_text() {
241 to_remove.push(sibling);
242 }
243 next = sibling.next_sibling();
244 }
245
246 for text_node in to_remove {
247 Selection::from(text_node).remove();
248 }
249 }
250 }
251 sel.remove();
252}
253
254pub fn strip(sel: &Selection) {
258 if let Some(node) = sel.nodes().first() {
259 if let Some(first_child) = node.first_child() {
261 node.insert_siblings_before(&first_child);
262 }
263 node.remove_from_parent();
265 }
266}
267
268fn is_safe_tag_selector(tag: &str) -> bool {
272 !tag.is_empty()
273 && tag
274 .chars()
275 .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
276}
277
278pub fn strip_elements(tree: &Selection, keep_tail: bool, tags: &[&str]) {
280 for tag in tags {
281 let nodes: Vec<_> = if is_safe_tag_selector(tag) {
282 tree.select(tag).nodes().to_vec()
283 } else {
284 let target = tag.to_ascii_lowercase();
286 tree.select("*")
287 .nodes()
288 .iter()
289 .copied()
290 .filter(|n| {
291 n.node_name()
292 .is_some_and(|name| name.to_ascii_lowercase() == target)
293 })
294 .collect()
295 };
296 for node in nodes.into_iter().rev() {
297 let sel = Selection::from(node);
298 remove(&sel, keep_tail);
299 }
300 }
301}
302
303#[must_use]
305pub fn iter<'a>(sel: &Selection<'a>, tags: &[&str]) -> Selection<'a> {
306 if tags.is_empty() {
307 sel.select("*")
308 } else {
309 sel.select(&tags.join(","))
310 }
311}
312
313#[must_use]
315pub fn iter_descendants<'a>(sel: &Selection<'a>, tags: &[&str]) -> Selection<'a> {
316 iter(sel, tags)
318}
319
320pub fn strip_tags(tree: &Selection, tags: &[&str]) {
324 for tag in tags {
325 let nodes: Vec<_> = if is_safe_tag_selector(tag) {
326 tree.select(tag).nodes().to_vec()
327 } else {
328 let target = tag.to_ascii_lowercase();
330 tree.select("*")
331 .nodes()
332 .iter()
333 .copied()
334 .filter(|n| {
335 n.node_name()
336 .is_some_and(|name| name.to_ascii_lowercase() == target)
337 })
338 .collect()
339 };
340 for node in nodes.into_iter().rev() {
341 let sel = Selection::from(node);
342 strip(&sel);
343 }
344 }
345}
346
347pub fn append(parent: &Selection, child: &Selection) {
349 parent.append_selection(child);
350}
351
352pub fn extend(parent: &Selection, children: &[&Selection]) {
354 for child in children {
355 append(parent, child);
356 }
357}
358
359#[cfg(test)]
360mod tests {
361 use super::*;
362
363 #[test]
364 fn test_text_before_children() {
365 let doc = Document::from("<div>Hello <span>World</span></div>");
366 let div = doc.select("div");
367 assert_eq!(text(&div), "Hello ");
368 }
369
370 #[test]
371 fn test_text_no_children() {
372 let doc = Document::from("<p>Just text</p>");
373 let p = doc.select("p");
374 assert_eq!(text(&p), "Just text");
375 }
376
377 #[test]
378 fn test_text_empty() {
379 let doc = Document::from("<div><span>only child</span></div>");
380 let div = doc.select("div");
381 assert_eq!(text(&div), "");
382 }
383
384 #[test]
385 fn test_tail_after_element() {
386 let doc = Document::from("<div><span>inner</span> tail text</div>");
387 let span = doc.select("span");
388 assert_eq!(tail(&span), " tail text");
389 }
390
391 #[test]
392 fn test_tail_no_tail() {
393 let doc = Document::from("<div><span>inner</span></div>");
394 let span = doc.select("span");
395 assert_eq!(tail(&span), "");
396 }
397
398 #[test]
399 fn test_tail_stops_at_next_element() {
400 let doc = Document::from("<div><span>1</span> tail <span>2</span></div>");
401 let first_span = doc.select("span").first();
402 assert_eq!(tail(&first_span), " tail ");
403 }
404
405 #[test]
406 fn test_tail_nodes() {
407 let doc = Document::from("<div><span>1</span> text1 text2 <span>2</span></div>");
408 let first_span = doc.select("span").first();
409 let nodes = tail_nodes(&first_span);
410 assert!(!nodes.is_empty());
411 }
412
413 #[test]
414 fn test_set_text() {
415 let doc = Document::from("<div>Old text<span>child</span></div>");
416 let div = doc.select("div");
417 set_text(&div, "New text");
418 assert_eq!(text(&div), "New text");
419 assert!(doc.select("span").exists());
420 }
421
422 #[test]
423 fn test_set_tail() {
424 let doc = Document::from("<div><span>inner</span>Old tail</div>");
425 let span = doc.select("span");
426 set_tail(&span, "New tail");
427 assert_eq!(tail(&span), "New tail");
428 }
429
430 #[test]
431 fn test_element_creation() {
432 let doc = element("p");
433 assert!(doc.select("p").exists());
434 }
435
436 #[test]
437 fn test_sub_element() {
438 let doc = Document::from("<div></div>");
439 let div = doc.select("div");
440 let _span = sub_element(&div, "span");
441 assert!(doc.select("div > span").exists());
442 }
443
444 #[test]
445 fn test_remove_with_tail() {
446 let doc = Document::from("<div>text <span>remove</span> keep this</div>");
447 let span = doc.select("span");
448 remove(&span, true); assert!(doc.select("span").is_empty());
450 assert!(doc.select("div").text().contains("keep this"));
451 }
452
453 #[test]
454 fn test_remove_without_tail() {
455 let doc = Document::from("<div>text <span>remove</span> remove this too</div>");
456 let span = doc.select("span");
457 remove(&span, false); let div_text = doc.select("div").text().to_string();
459 assert!(!div_text.contains("remove this"));
460 }
461
462 #[test]
463 fn test_strip() {
464 let doc = Document::from("<div><p><span>inner</span> text</p></div>");
465 let p = doc.select("p");
466 strip(&p);
467 assert!(doc.select("p").is_empty());
468 assert!(doc.select("span").exists());
469 }
470
471 #[test]
472 fn test_strip_preserves_children() {
473 let doc = Document::from("<div><p><span>inner</span> text</p></div>");
474 let p = doc.select("p");
475 strip(&p);
476 assert!(doc.select("p").is_empty());
477 assert_eq!(doc.select("span").length(), 1);
478 assert!(doc.select("div").text().contains("inner"));
479 }
480
481 #[test]
482 fn test_strip_empty_element() {
483 let doc = Document::from("<div><p></p><span>kept</span></div>");
484 let p = doc.select("p");
485 strip(&p);
486 assert!(doc.select("p").is_empty());
487 assert_eq!(doc.select("span").length(), 1);
488 }
489
490 #[test]
491 fn test_strip_elements_keep_tail() {
492 let doc = Document::from("<div><b>bold</b> tail<i>italic</i> more</div>");
493 let div = doc.select("div");
494 strip_elements(&div, true, &["b", "i"]);
495 assert!(doc.select("b").is_empty());
496 assert!(doc.select("i").is_empty());
497 let text_result = div.text().to_string();
498 assert!(text_result.contains("tail"));
499 assert!(text_result.contains("more"));
500 }
501
502 #[test]
503 fn test_strip_elements_remove_tail() {
504 let doc = Document::from("<div><b>bold</b> tail<i>italic</i> more</div>");
505 let div = doc.select("div");
506 strip_elements(&div, false, &["b", "i"]);
507 assert!(doc.select("b").is_empty());
508 assert!(doc.select("i").is_empty());
509 let text_result = div.text().to_string();
510 assert!(!text_result.contains("tail"));
511 assert!(!text_result.contains("more"));
512 }
513
514 #[test]
515 fn test_strip_tags() {
516 let doc = Document::from("<div><b>bold</b> text <i>italic</i></div>");
517 let div = doc.select("div");
518 strip_tags(&div, &["b", "i"]);
519 assert!(doc.select("b").is_empty());
520 assert!(doc.select("i").is_empty());
521 let text_result = div.text().to_string();
523 assert!(text_result.contains("bold"));
524 assert!(text_result.contains("italic"));
525 }
526
527 #[test]
528 fn test_iter_all_elements() {
529 let doc = Document::from("<div><p>1</p><span>2</span><p>3</p></div>");
530 let div = doc.select("div");
531 let all = iter(&div, &[]);
532 assert_eq!(all.length(), 3); }
534
535 #[test]
536 fn test_iter_filtered() {
537 let doc = Document::from("<div><p>1</p><span>2</span><p>3</p></div>");
538 let div = doc.select("div");
539 let only_p = iter(&div, &["p"]);
540 assert_eq!(only_p.length(), 2); }
542
543 #[test]
544 fn test_iter_text_with_separator() {
545 let doc = Document::from("<p>Hello<span>World</span>!</p>");
546 let p = doc.select("p");
547 let result = iter_text(&p, " ");
548 assert_eq!(result, "Hello World !");
549 }
550
551 #[test]
552 fn test_is_void_element() {
553 assert!(is_void_element("br"));
554 assert!(is_void_element("BR"));
555 assert!(is_void_element("img"));
556 assert!(is_void_element("hr"));
557 assert!(!is_void_element("div"));
558 assert!(!is_void_element("span"));
559 }
560
561 #[test]
562 fn test_extend() {
563 let doc = Document::from("<div></div>");
564 let div = doc.select("div");
565
566 let doc1 = Document::from("<span>1</span>");
567 let child1 = doc1.select("span");
568 let doc2 = Document::from("<span>2</span>");
569 let child2 = doc2.select("span");
570
571 extend(&div, &[&child1, &child2]);
572
573 assert_eq!(doc.select("div > span").length(), 2);
574 }
575}