node_html_parser/dom/
text.rs1use std::sync::OnceLock;
2
3static WHITESPACE_REGEX: OnceLock<regex::Regex> = OnceLock::new();
5
6#[derive(Debug, Clone)]
7pub struct TextNode {
8 pub raw: String,
9 pub range: Option<(usize, usize)>,
10 trimmed_raw_cache: Option<String>,
11 trimmed_txt_cache: Option<String>,
12}
13
14impl TextNode {
15 pub fn new(raw: String) -> Self {
16 Self {
17 raw,
18 range: None,
19 trimmed_raw_cache: None,
20 trimmed_txt_cache: None,
21 }
22 }
23 pub fn with_range(raw: String, start: usize, end: usize) -> Self {
24 Self {
25 raw,
26 range: Some((start, end)),
27 trimmed_raw_cache: None,
28 trimmed_txt_cache: None,
29 }
30 }
31 pub fn range(&self) -> Option<(usize, usize)> {
32 self.range
33 }
34 fn invalidate(&mut self) {
35 self.trimmed_raw_cache = None;
36 self.trimmed_txt_cache = None;
37 }
38 pub fn set_raw(&mut self, v: String) {
39 self.raw = v;
40 self.invalidate();
41 }
42 fn trim_alg(text: &str) -> String {
43 if text.is_empty() {
44 return String::new();
45 }
46 let bytes = text.as_bytes();
47 let mut start = 0usize;
48 let mut end = bytes.len() - 1;
49 while start < bytes.len() {
50 if !bytes[start].is_ascii_whitespace() {
51 break;
52 }
53 start += 1;
54 }
55 while end > start {
56 if !bytes[end].is_ascii_whitespace() {
57 break;
58 }
59 end -= 1;
60 }
61 let has_leading = start > 0;
62 let has_trailing = end < bytes.len() - 1;
63 format!(
64 "{}{}{}",
65 if has_leading { " " } else { "" },
66 &text[start..=end],
67 if has_trailing { " " } else { "" }
68 )
69 }
70 pub fn trimmed_raw_text(&mut self) -> &str {
71 if self.trimmed_raw_cache.is_none() {
72 self.trimmed_raw_cache = Some(Self::trim_alg(&self.raw));
73 }
74 self.trimmed_raw_cache.as_ref().unwrap()
75 }
76 pub fn trimmed_text(&mut self) -> &str {
77 if self.trimmed_txt_cache.is_none() {
78 let dec = html_escape::decode_html_entities(&self.raw).to_string();
79 self.trimmed_txt_cache = Some(Self::trim_alg(&dec));
80 }
81 self.trimmed_txt_cache.as_ref().unwrap()
82 }
83 pub fn is_whitespace(&self) -> bool {
84 WHITESPACE_REGEX
85 .get_or_init(|| regex::Regex::new(r"^(?:\s| )*$").unwrap())
86 .is_match(&self.raw)
87 }
88 pub fn text(&self) -> String {
89 html_escape::decode_html_entities(&self.raw).to_string()
90 }
91 pub fn raw_text(&self) -> &str {
92 &self.raw
93 }
94 pub fn decoded_text(&self) -> String {
95 self.text()
96 }
97}