node_html_parser/dom/
text.rs

1#[derive(Debug, Clone)]
2pub struct TextNode {
3	pub raw: String,
4	pub range: Option<(usize, usize)>,
5	trimmed_raw_cache: Option<String>,
6	trimmed_txt_cache: Option<String>,
7}
8
9impl TextNode {
10	pub fn new(raw: String) -> Self {
11		Self {
12			raw,
13			range: None,
14			trimmed_raw_cache: None,
15			trimmed_txt_cache: None,
16		}
17	}
18	pub fn with_range(raw: String, start: usize, end: usize) -> Self {
19		Self {
20			raw,
21			range: Some((start, end)),
22			trimmed_raw_cache: None,
23			trimmed_txt_cache: None,
24		}
25	}
26	pub fn range(&self) -> Option<(usize, usize)> {
27		self.range
28	}
29	fn invalidate(&mut self) {
30		self.trimmed_raw_cache = None;
31		self.trimmed_txt_cache = None;
32	}
33	pub fn set_raw(&mut self, v: String) {
34		self.raw = v;
35		self.invalidate();
36	}
37	fn trim_alg(text: &str) -> String {
38		if text.is_empty() {
39			return String::new();
40		}
41		let bytes = text.as_bytes();
42		let mut start = 0usize;
43		let mut end = bytes.len() - 1;
44		while start < bytes.len() {
45			if !bytes[start].is_ascii_whitespace() {
46				break;
47			}
48			start += 1;
49		}
50		while end > start {
51			if !bytes[end].is_ascii_whitespace() {
52				break;
53			}
54			end -= 1;
55		}
56		let has_leading = start > 0;
57		let has_trailing = end < bytes.len() - 1;
58		format!(
59			"{}{}{}",
60			if has_leading { " " } else { "" },
61			&text[start..=end],
62			if has_trailing { " " } else { "" }
63		)
64	}
65	pub fn trimmed_raw_text(&mut self) -> &str {
66		if self.trimmed_raw_cache.is_none() {
67			self.trimmed_raw_cache = Some(Self::trim_alg(&self.raw));
68		}
69		self.trimmed_raw_cache.as_ref().unwrap()
70	}
71	pub fn trimmed_text(&mut self) -> &str {
72		if self.trimmed_txt_cache.is_none() {
73			let dec = html_escape::decode_html_entities(&self.raw).to_string();
74			self.trimmed_txt_cache = Some(Self::trim_alg(&dec));
75		}
76		self.trimmed_txt_cache.as_ref().unwrap()
77	}
78	pub fn is_whitespace(&self) -> bool {
79		regex::Regex::new(r"^(?:\s|&nbsp;)*$")
80			.unwrap()
81			.is_match(&self.raw)
82	}
83	pub fn text(&self) -> String {
84		html_escape::decode_html_entities(&self.raw).to_string()
85	}
86	pub fn raw_text(&self) -> &str {
87		&self.raw
88	}
89	pub fn decoded_text(&self) -> String {
90		self.text()
91	}
92}