node_html_parser/dom/
text.rs1#[derive(Debug, Clone)]
2pub struct TextNode {
3 pub raw: String,
4 pub range: Option<(usize, usize)>,
5 trimmed_raw_cache: Option<String>,
6 trimmed_txt_cache: Option<String>,
7}
8
9impl TextNode {
10 pub fn new(raw: String) -> Self {
11 Self {
12 raw,
13 range: None,
14 trimmed_raw_cache: None,
15 trimmed_txt_cache: None,
16 }
17 }
18 pub fn with_range(raw: String, start: usize, end: usize) -> Self {
19 Self {
20 raw,
21 range: Some((start, end)),
22 trimmed_raw_cache: None,
23 trimmed_txt_cache: None,
24 }
25 }
26 pub fn range(&self) -> Option<(usize, usize)> {
27 self.range
28 }
29 fn invalidate(&mut self) {
30 self.trimmed_raw_cache = None;
31 self.trimmed_txt_cache = None;
32 }
33 pub fn set_raw(&mut self, v: String) {
34 self.raw = v;
35 self.invalidate();
36 }
37 fn trim_alg(text: &str) -> String {
38 if text.is_empty() {
39 return String::new();
40 }
41 let bytes = text.as_bytes();
42 let mut start = 0usize;
43 let mut end = bytes.len() - 1;
44 while start < bytes.len() {
45 if !bytes[start].is_ascii_whitespace() {
46 break;
47 }
48 start += 1;
49 }
50 while end > start {
51 if !bytes[end].is_ascii_whitespace() {
52 break;
53 }
54 end -= 1;
55 }
56 let has_leading = start > 0;
57 let has_trailing = end < bytes.len() - 1;
58 format!(
59 "{}{}{}",
60 if has_leading { " " } else { "" },
61 &text[start..=end],
62 if has_trailing { " " } else { "" }
63 )
64 }
65 pub fn trimmed_raw_text(&mut self) -> &str {
66 if self.trimmed_raw_cache.is_none() {
67 self.trimmed_raw_cache = Some(Self::trim_alg(&self.raw));
68 }
69 self.trimmed_raw_cache.as_ref().unwrap()
70 }
71 pub fn trimmed_text(&mut self) -> &str {
72 if self.trimmed_txt_cache.is_none() {
73 let dec = html_escape::decode_html_entities(&self.raw).to_string();
74 self.trimmed_txt_cache = Some(Self::trim_alg(&dec));
75 }
76 self.trimmed_txt_cache.as_ref().unwrap()
77 }
78 pub fn is_whitespace(&self) -> bool {
79 regex::Regex::new(r"^(?:\s| )*$")
80 .unwrap()
81 .is_match(&self.raw)
82 }
83 pub fn text(&self) -> String {
84 html_escape::decode_html_entities(&self.raw).to_string()
85 }
86 pub fn raw_text(&self) -> &str {
87 &self.raw
88 }
89 pub fn decoded_text(&self) -> String {
90 self.text()
91 }
92}