node_html_parser/dom/element/
attributes.rs1use super::main::HTMLElement;
2use regex::Regex;
3use std::collections::HashMap;
4
5impl HTMLElement {
6 pub fn attrs_lower_decoded(&mut self) -> HashMap<String, String> {
7 self.ensure_lower_decoded();
8 self.cache_lower_decoded.clone().unwrap_or_default()
9 }
10
11 pub fn set_attributes(&mut self, attributes: &[(String, String)]) {
12 self.attrs = attributes
14 .iter()
15 .map(|(k, v)| (k.to_lowercase(), v.clone()))
16 .collect();
17 self.raw_attrs = attributes
18 .iter()
19 .map(|(k, v)| {
20 if v.is_empty() || v == "null" || v == "\"\"" {
22 k.clone()
23 } else {
24 format!("{}={}", k, quote_attribute(v))
25 }
26 })
27 .collect::<Vec<_>>()
28 .join(" ");
29 self.cache_raw_map = None;
30 self.cache_lower_decoded = None;
31 if let Some((_, idv)) = self.attrs.iter().find(|(kk, _)| kk == "id") {
33 self.id = idv.clone();
34 }
35 if self.attrs.iter().any(|(kk, _)| kk == "class") {
36 self.class_cache = None;
37 }
38 }
39 pub fn remove_attribute(&mut self, key: &str) {
40 self.ensure_raw_attributes();
41 let mut raw_map = self.cache_raw_map.take().unwrap_or_default();
42 let target = key.to_lowercase();
43 raw_map.retain(|k, _| k.to_lowercase() != target);
44 self.attrs.retain(|(kk, _)| kk != &target);
46 self.raw_attrs = raw_map
47 .iter()
48 .map(|(k, v)| {
49 if v.is_empty() {
50 k.clone()
51 } else {
52 format!("{}={}", k, quote_attribute(v))
53 }
54 })
55 .collect::<Vec<_>>()
56 .join(" ");
57 self.cache_raw_map = None;
58 self.cache_lower_decoded = None;
59 if target == "id" {
60 self.id.clear();
61 }
62 if target == "class" {
63 self.class_cache = None;
64 }
65 self.attrs_complete = true; }
67
68 pub fn get_attr(&self, key: &str) -> Option<&str> {
69 let k = key.to_lowercase();
71 let mut_ptr = self as *const HTMLElement as *mut HTMLElement; unsafe {
73 (*mut_ptr).ensure_all_attrs();
74 }
75 self.attrs
76 .iter()
77 .find(|(kk, _)| *kk == k)
78 .map(|(_, v)| v.as_str())
79 }
80 pub fn has_attr(&self, key: &str) -> bool {
81 self.get_attr(key).is_some()
82 }
83
84 pub fn set_attr(&mut self, key: &str, val: &str) {
85 let k = key.to_lowercase();
86 if let Some(kv) = self.attrs.iter_mut().find(|(kk, _)| *kk == k) {
87 kv.1 = val.to_string();
88 } else {
89 self.attrs.push((k, val.to_string()));
90 }
91 self.rebuild_raw_attrs();
92 self.cache_raw_map = None;
93 self.cache_lower_decoded = None;
94 if key.eq_ignore_ascii_case("id") {
95 self.id = val.to_string();
96 }
97 }
98 pub fn remove_attr(&mut self, key: &str) {
99 let k = key.to_lowercase();
100 self.attrs.retain(|(kk, _)| *kk != k);
101 self.rebuild_raw_attrs();
102 self.cache_raw_map = None;
103 self.cache_lower_decoded = None;
104 if k == "id" {
105 self.id.clear();
106 }
107 }
108 pub fn remove_id(&mut self) { self.remove_attribute("id"); }
110 pub fn set_id(&mut self, id: &str) { self.set_attribute("id", id); }
112 pub(super) fn rebuild_raw_attrs(&mut self) {
113 fn quote_attr(src: &str) -> String {
115 if src.is_empty() || src == "null" {
116 return src.to_string();
117 }
118 let replaced = src.replace('"', """);
120 let jsoned =
122 serde_json::to_string(&replaced).unwrap_or_else(|_| format!("\"{}\"", replaced));
123 let inner = jsoned.trim_matches('"');
125 let inner = inner
126 .replace("\\t", "\t")
127 .replace("\\n", "\n")
128 .replace("\\r", "\r")
129 .replace('\\', "");
130 format!("\"{}\"", inner)
131 }
132 self.raw_attrs = self
133 .attrs
134 .iter()
135 .map(|(k, v)| {
136 if v.is_empty() {
137 k.clone()
138 } else {
139 format!("{}={}", k, quote_attr(v))
140 }
141 })
142 .collect::<Vec<_>>()
143 .join(" ");
144 }
145
146 fn ensure_raw_attributes(&mut self) {
148 if self.cache_raw_map.is_some() {
149 return;
150 }
151 let mut map = HashMap::new();
152 if !self.raw_attrs.is_empty() {
153 let re = regex::Regex::new(
154 r#"([a-zA-Z()\[\]#@$.?:][a-zA-Z0-9-._:()\[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?"#,
155 )
156 .unwrap();
157 for cap in re.captures_iter(&self.raw_attrs) {
158 let key = cap.get(1).unwrap().as_str();
159 let mut val = cap.get(2).map(|m| m.as_str()).unwrap_or("").to_string();
160 if !val.is_empty() {
161 if (val.starts_with('\"') && val.ends_with('\"'))
162 || (val.starts_with('\'') && val.ends_with('\''))
163 {
164 val = val[1..val.len() - 1].to_string();
165 }
166 }
167 map.entry(key.to_string()).or_insert(val);
169 }
170 }
171 self.cache_raw_map = Some(map);
172 }
173
174 pub fn attributes(&mut self) -> std::collections::HashMap<String, String> {
175 self.ensure_raw_attributes();
178 let mut out = std::collections::HashMap::new();
179 if let Some(raw) = &self.cache_raw_map {
180 for (orig_k, raw_v) in raw.iter() {
181 let decoded = html_escape::decode_html_entities(raw_v).to_string();
182 out.insert(orig_k.clone(), decoded);
184 }
185 }
186 out
187 }
188 pub fn raw_attributes(&mut self) -> HashMap<String, String> {
189 self.ensure_raw_attributes();
190 self.cache_raw_map.clone().unwrap_or_default()
191 }
192 pub fn raw_attrs_str(&self) -> &str { &self.raw_attrs }
194
195 pub fn get_attribute(&mut self, key: &str) -> Option<String> {
196 self.ensure_lower_decoded();
197 self.cache_lower_decoded
198 .as_ref()
199 .unwrap()
200 .get(&key.to_lowercase())
201 .cloned()
202 }
203
204 pub fn set_attribute(&mut self, key: &str, value: &str) {
205 self.ensure_raw_attributes();
208 let raw_snapshot = self.raw_attrs.clone();
209 let re = regex::Regex::new(
210 r#"([a-zA-Z()\[\]#@$.?:][a-zA-Z0-9-._:()\[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:\"[^\"]*\")|[^\s>]+))?"#,
211 ).unwrap();
212 let mut order: Vec<String> = Vec::new();
213 let mut seen_ci: Vec<String> = Vec::new();
214 for cap in re.captures_iter(&raw_snapshot) {
215 let k = cap.get(1).unwrap().as_str().to_string();
216 let k_ci = k.to_lowercase();
217 if !seen_ci.iter().any(|x| x == &k_ci) {
218 order.push(k.clone());
219 seen_ci.push(k_ci);
220 }
221 }
222 let mut new_map: std::collections::HashMap<String, String> =
224 std::collections::HashMap::new();
225 if let Some(raw) = &self.cache_raw_map {
226 for (k, v) in raw.iter() {
227 new_map.insert(k.clone(), v.clone());
228 }
229 }
230 let mut target_original: Option<String> = None;
232 for k in order.iter() {
233 if k.eq_ignore_ascii_case(key) {
234 target_original = Some(k.clone());
235 break;
236 }
237 }
238 if let Some(orig) = target_original {
239 new_map.insert(orig, value.to_string());
240 } else {
241 order.push(key.to_string());
242 new_map.insert(key.to_string(), value.to_string());
243 }
244 let mut parts = Vec::with_capacity(order.len());
246 for k in &order {
247 if let Some(v) = new_map.get(k) {
248 if v.is_empty() {
249 parts.push(k.clone());
250 } else {
251 parts.push(format!("{}={}", k, quote_attribute(v)));
252 }
253 }
254 }
255 self.raw_attrs = parts.join(" ");
256 self.cache_raw_map = None; self.cache_lower_decoded = None;
258 let lk = key.to_lowercase();
260 let decoded_val = html_escape::decode_html_entities(value).to_string();
261 if let Some(kv) = self.attrs.iter_mut().find(|(k, _)| *k == lk) {
262 kv.1 = decoded_val.clone();
263 } else {
264 self.attrs.push((lk.clone(), decoded_val.clone()));
265 }
266 self.attrs_complete = true;
267 if key.eq_ignore_ascii_case("id") {
268 self.id = value.to_string();
269 }
270 if key.eq_ignore_ascii_case("class") {
271 self.class_cache = None;
272 }
273 }
274
275 pub fn has_attribute(&mut self, key: &str) -> bool {
276 self.ensure_lower_decoded();
277 self.cache_lower_decoded
278 .as_ref()
279 .unwrap()
280 .contains_key(&key.to_lowercase())
281 }
282
283 pub(crate) fn ensure_all_attrs(&mut self) {
284 if self.attrs_complete {
285 return;
286 }
287 if self.raw_attrs.is_empty() {
288 self.attrs_complete = true;
289 return;
290 }
291 static ATTR_RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
292 let re = ATTR_RE.get_or_init(|| {
293 regex::Regex::new(
294 r#"([a-zA-Z()\[\]#@$.?:][a-zA-Z0-9-._:()\[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?"#,
295 )
296 .unwrap()
297 });
298 for cap in re.captures_iter(&self.raw_attrs) {
299 let key = cap.get(1).unwrap().as_str();
300 let val = cap.get(2).map(|m| m.as_str()).unwrap_or("");
301 let unquoted = if val.starts_with('"') || val.starts_with('\'') {
302 &val[1..val.len() - 1]
303 } else {
304 val
305 };
306 let lk = key.to_lowercase();
307 if !self.attrs.iter().any(|(k, _)| k == &lk) {
308 self.attrs
309 .push((lk, html_escape::decode_html_entities(unquoted).to_string()));
310 }
311 }
312 self.attrs_complete = true;
313 }
314 fn build_raw_cache(&mut self) {
315 let attr_re = Regex::new(
316 r#"([a-zA-Z()[\]#@$.?:][a-zA-Z0-9-._:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:\"[^\"]*\")|[^\s>]+))?"#,
317 )
318 .unwrap();
319 let mut raw_map = HashMap::new();
320 for cap in attr_re.captures_iter(&self.raw_attrs) {
321 let key = cap.get(1).unwrap().as_str();
322 let value = cap.get(2).map(|m| m.as_str()).unwrap_or("");
323 let mut chosen = key.to_string();
324 if raw_map.contains_key(&chosen) {
325 let mut suffix = 1;
326 while raw_map.contains_key(&format!("{}#dup{}", chosen, suffix)) {
327 suffix += 1;
328 }
329 chosen = format!("{}#dup{}", chosen, suffix);
330 }
331 let mut value = value.trim();
332 if (value.starts_with('"') && value.ends_with('"'))
333 || (value.starts_with('\'') && value.ends_with('\''))
334 {
335 value = &value[1..value.len() - 1];
336 }
337 raw_map.insert(chosen.clone(), value.to_string());
338 }
339 self.cache_raw_map = Some(raw_map);
340 }
341
342 fn ensure_lower_decoded(&mut self) {
343 if self.cache_lower_decoded.is_some() {
344 return;
345 }
346 self.ensure_raw_attributes();
347 let mut lower = HashMap::new();
348 if let Some(raw) = &self.cache_raw_map {
349 for (k, v) in raw {
350 lower.insert(
351 k.to_lowercase(),
352 html_escape::decode_html_entities(v).to_string(),
353 );
354 }
355 }
356 self.cache_lower_decoded = Some(lower);
357 }
358}
359
360fn quote_attribute(val: &str) -> String {
361 if val.is_empty() {
362 return val.to_string();
363 }
364 let replaced = val.replace('"', """);
365 let jsoned = serde_json::to_string(&replaced).unwrap_or_else(|_| format!("\"{}\"", replaced));
366 let inner = jsoned.trim_matches('"');
367 let inner = inner
368 .replace("\\t", "\t")
369 .replace("\\n", "\n")
370 .replace("\\r", "\r")
371 .replace('\\', "");
372 format!("\"{}\"", inner)
373}