node_html_parser/dom/element/
attributes.rs1use super::main::HTMLElement;
2use regex::Regex;
3use std::collections::HashMap;
4use std::sync::OnceLock;
5
6static ATTR_PARSE_REGEX: OnceLock<Regex> = OnceLock::new();
8
9impl HTMLElement {
10 pub fn attrs_lower_decoded(&mut self) -> HashMap<String, String> {
11 self.ensure_lower_decoded();
12 self.cache_lower_decoded.clone().unwrap_or_default()
13 }
14
15 pub fn set_attributes(&mut self, attributes: &[(String, String)]) {
16 self.attrs = attributes
18 .iter()
19 .map(|(k, v)| (k.to_lowercase(), v.clone()))
20 .collect();
21 self.raw_attrs = attributes
22 .iter()
23 .map(|(k, v)| {
24 if v.is_empty() || v == "null" || v == "\"\"" {
26 k.clone()
27 } else {
28 format!("{}={}", k, quote_attribute(v))
29 }
30 })
31 .collect::<Vec<_>>()
32 .join(" ");
33 self.cache_raw_map = None;
34 self.cache_lower_decoded = None;
35 if let Some((_, idv)) = self.attrs.iter().find(|(kk, _)| kk == "id") {
37 self.id = idv.clone();
38 }
39 if self.attrs.iter().any(|(kk, _)| kk == "class") {
40 self.class_cache = None;
41 }
42 }
43 pub fn remove_attribute(&mut self, key: &str) {
44 self.build_raw_cache();
45 let mut raw_map = self.cache_raw_map.take().unwrap_or_default();
46 let target = key.to_lowercase();
47 raw_map.retain(|k, _| k.to_lowercase() != target);
48 self.attrs.retain(|(kk, _)| kk != &target);
50 self.raw_attrs = raw_map
51 .iter()
52 .map(|(k, v)| {
53 if v.is_empty() {
54 k.clone()
55 } else {
56 format!("{}={}", k, quote_attribute(v))
57 }
58 })
59 .collect::<Vec<_>>()
60 .join(" ");
61 self.cache_raw_map = None;
62 self.cache_lower_decoded = None;
63 if target == "id" {
64 self.id.clear();
65 }
66 if target == "class" {
67 self.class_cache = None;
68 }
69 self.attrs_complete = true; self.attrs_modified = true; }
72
73 pub fn get_attr(&self, key: &str) -> Option<&str> {
74 let k = key.to_lowercase();
76 if let Some(found) = self.attrs.iter().find(|(kk, _)| *kk == k) {
77 return Some(found.1.as_str());
78 }
79
80 if !self.attrs_complete && !self.raw_attrs.is_empty() {
82 let mut_ptr = self as *const HTMLElement as *mut HTMLElement;
84 unsafe {
85 (*mut_ptr).ensure_all_attrs();
86 return (*mut_ptr)
88 .attrs
89 .iter()
90 .find(|(kk, _)| *kk == k)
91 .map(|(_, v)| v.as_str());
92 }
93 }
94
95 None
96 }
97 pub fn has_attr(&self, key: &str) -> bool {
98 self.get_attr(key).is_some()
99 }
100
101 pub fn set_attr(&mut self, key: &str, val: &str) {
102 let k = key.to_lowercase();
103 if let Some(kv) = self.attrs.iter_mut().find(|(kk, _)| *kk == k) {
104 kv.1 = val.to_string();
105 } else {
106 self.attrs.push((k, val.to_string()));
107 }
108 self.rebuild_raw_attrs();
109 self.cache_raw_map = None;
110 self.cache_lower_decoded = None;
111 if key.eq_ignore_ascii_case("id") {
112 self.id = val.to_string();
113 }
114 }
115 pub fn remove_attr(&mut self, key: &str) {
116 let k = key.to_lowercase();
117 self.attrs.retain(|(kk, _)| *kk != k);
118 self.rebuild_raw_attrs();
119 self.cache_raw_map = None;
120 self.cache_lower_decoded = None;
121 if k == "id" {
122 self.id.clear();
123 }
124 }
125 pub fn remove_id(&mut self) {
127 self.remove_attribute("id");
128 }
129 pub fn set_id(&mut self, id: &str) {
131 self.set_attribute("id", id);
132 }
133 pub(super) fn rebuild_raw_attrs(&mut self) {
134 fn quote_attr(src: &str) -> String {
136 if src.is_empty() || src == "null" {
137 return src.to_string();
138 }
139 let replaced = src.replace('"', """);
141 let jsoned =
143 serde_json::to_string(&replaced).unwrap_or_else(|_| format!("\"{}\"", replaced));
144 let inner = jsoned.trim_matches('"');
146 let inner = inner
147 .replace("\\t", "\t")
148 .replace("\\n", "\n")
149 .replace("\\r", "\r")
150 .replace('\\', "");
151 format!("\"{}\"", inner)
152 }
153 self.raw_attrs = self
154 .attrs
155 .iter()
156 .map(|(k, v)| {
157 if v.is_empty() {
158 k.clone()
159 } else {
160 format!("{}={}", k, quote_attr(v))
161 }
162 })
163 .collect::<Vec<_>>()
164 .join(" ");
165 }
166
167 pub fn attributes(&mut self) -> std::collections::HashMap<String, String> {
168 self.build_raw_cache();
171 let mut out = std::collections::HashMap::new();
172 if let Some(raw) = &self.cache_raw_map {
173 for (orig_k, raw_v) in raw.iter() {
174 let decoded = html_escape::decode_html_entities(raw_v).to_string();
175 out.insert(orig_k.clone(), decoded);
177 }
178 }
179 out
180 }
181 pub fn raw_attributes(&mut self) -> HashMap<String, String> {
182 self.build_raw_cache();
183 self.cache_raw_map.clone().unwrap_or_default()
184 }
185 pub fn raw_attrs_str(&self) -> &str {
187 &self.raw_attrs
188 }
189
190 pub fn get_attribute(&mut self, key: &str) -> Option<String> {
191 self.ensure_lower_decoded();
192 self.cache_lower_decoded
193 .as_ref()
194 .unwrap()
195 .get(&key.to_lowercase())
196 .cloned()
197 }
198
199 pub fn set_attribute(&mut self, key: &str, value: &str) {
200 let quoted_value = if value.is_empty() {
202 None
203 } else {
204 Some(quote_attribute(value))
205 };
206
207 if self.raw_attrs.is_empty() {
208 if let Some(qv) = quoted_value {
209 self.raw_attrs = format!("{}={}", key, qv);
210 } else {
211 self.raw_attrs = key.to_string();
212 }
213 } else {
214 let re = ATTR_PARSE_REGEX.get_or_init(|| {
216 regex::Regex::new(
217 r#"([a-zA-Z()\[\]#@$.?:][a-zA-Z0-9-._:()\[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?"#,
218 )
219 .unwrap()
220 });
221
222 let mut result_attrs = Vec::new();
223 let mut found = false;
224
225 for cap in re.captures_iter(&self.raw_attrs) {
226 let existing_key = cap.get(1).unwrap().as_str();
227 if existing_key.eq_ignore_ascii_case(key) {
228 if let Some(qv) = "ed_value {
230 result_attrs.push(format!("{}={}", existing_key, qv));
231 } else {
232 result_attrs.push(existing_key.to_string());
233 }
234 found = true;
235 } else {
236 let existing_val = cap.get(2).map(|m| m.as_str()).unwrap_or("");
238 if existing_val.is_empty() {
239 result_attrs.push(existing_key.to_string());
240 } else {
241 result_attrs.push(format!("{}={}", existing_key, existing_val));
242 }
243 }
244 }
245
246 if !found {
248 if let Some(qv) = quoted_value {
249 result_attrs.push(format!("{}={}", key, qv));
250 } else {
251 result_attrs.push(key.to_string());
252 }
253 }
254
255 self.raw_attrs = result_attrs.join(" ");
256 }
257
258 self.ensure_all_attrs();
260 let lk = key.to_lowercase();
261 let decoded_val = html_escape::decode_html_entities(value).to_string();
262 if let Some(kv) = self.attrs.iter_mut().find(|(k, _)| *k == lk) {
263 kv.1 = decoded_val;
264 } else {
265 self.attrs.push((lk, decoded_val));
266 }
267
268 self.cache_raw_map = None;
270 self.cache_lower_decoded = None;
271 self.attrs_complete = true;
272 self.attrs_modified = true; if key.eq_ignore_ascii_case("id") {
276 self.id = value.to_string();
277 }
278 if key.eq_ignore_ascii_case("class") {
279 self.class_cache = None;
280 }
281 }
282
283 pub fn has_attribute(&mut self, key: &str) -> bool {
284 self.ensure_lower_decoded();
285 self.cache_lower_decoded
286 .as_ref()
287 .unwrap()
288 .contains_key(&key.to_lowercase())
289 }
290
291 pub(crate) fn ensure_all_attrs(&mut self) {
292 if self.attrs_complete {
293 return;
294 }
295
296 self.attrs.clear();
298 self.build_raw_cache();
299 if let Some(ref raw_map) = self.cache_raw_map {
300 for (key, value) in raw_map.iter() {
301 let decoded_val = html_escape::decode_html_entities(value).to_string();
302 self.attrs.push((key.to_lowercase(), decoded_val));
303 }
304 }
305
306 self.attrs_complete = true;
307 }
308 fn build_raw_cache(&mut self) {
309 if self.cache_raw_map.is_some() {
310 return;
311 }
312
313 let mut map = HashMap::new();
314 if !self.raw_attrs.is_empty() {
315 let re = ATTR_PARSE_REGEX.get_or_init(|| {
316 regex::Regex::new(
317 r#"([a-zA-Z()\[\]#@$.?:][a-zA-Z0-9-._:()\[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?"#,
318 )
319 .unwrap()
320 });
321 for cap in re.captures_iter(&self.raw_attrs) {
322 let key = cap.get(1).unwrap().as_str();
323 let mut val = cap.get(2).map(|m| m.as_str()).unwrap_or("").to_string();
324 if !val.is_empty() {
325 if (val.starts_with('\"') && val.ends_with('\"'))
326 || (val.starts_with('\'') && val.ends_with('\''))
327 {
328 val = val[1..val.len() - 1].to_string();
329 }
330 }
331 map.entry(key.to_string()).or_insert(val);
333 }
334 }
335 self.cache_raw_map = Some(map);
336 }
337
338 fn ensure_lower_decoded(&mut self) {
339 if self.cache_lower_decoded.is_some() {
340 return;
341 }
342
343 self.build_raw_cache();
344 let mut lower_decoded = HashMap::new();
345
346 if let Some(ref raw_map) = self.cache_raw_map {
347 for (key, value) in raw_map.iter() {
348 let decoded_val = html_escape::decode_html_entities(value).to_string();
349 let lower_key = key.to_lowercase();
350 lower_decoded.insert(lower_key, decoded_val);
351 }
352 }
353
354 self.cache_lower_decoded = Some(lower_decoded);
355 }
356}
357
358fn quote_attribute(val: &str) -> String {
359 if val.is_empty() {
360 return val.to_string();
361 }
362 let replaced = val.replace('"', """);
363 let jsoned = serde_json::to_string(&replaced).unwrap_or_else(|_| format!("\"{}\"", replaced));
364 let inner = jsoned.trim_matches('"');
365 let inner = inner
366 .replace("\\t", "\t")
367 .replace("\\n", "\n")
368 .replace("\\r", "\r")
369 .replace('\\', "");
370 format!("\"{}\"", inner)
371}