Skip to main content

node_html_parser/dom/element/
attributes.rs

1use super::main::HTMLElement;
2use regex::Regex;
3use std::collections::HashMap;
4use std::sync::OnceLock;
5
6// 缓存属性解析相关的正则表达式
7static ATTR_PARSE_REGEX: OnceLock<Regex> = OnceLock::new();
8
9impl HTMLElement {
10	pub fn attrs_lower_decoded(&mut self) -> HashMap<String, String> {
11		self.ensure_lower_decoded();
12		self.cache_lower_decoded.clone().unwrap_or_default()
13	}
14
15	pub fn set_attributes(&mut self, attributes: &[(String, String)]) {
16		// 重建 raw_attrs 与 attrs(attrs 的 key 需小写且解码,这里假设传入 value 已为未转义文本,与 JS 行为接近)
17		self.attrs = attributes
18			.iter()
19			.map(|(k, v)| (k.to_lowercase(), v.clone()))
20			.collect();
21		self.raw_attrs = attributes
22			.iter()
23			.map(|(k, v)| {
24				// JS setAttributes: treats raw value 'null' OR '""' OR empty as boolean attribute (only name)
25				if v.is_empty() || v == "null" || v == "\"\"" {
26					k.clone()
27				} else {
28					format!("{}={}", k, quote_attribute(v))
29				}
30			})
31			.collect::<Vec<_>>()
32			.join(" ");
33		self.cache_raw_map = None;
34		self.cache_lower_decoded = None;
35		// 更新 id / class cache
36		if let Some((_, idv)) = self.attrs.iter().find(|(kk, _)| kk == "id") {
37			self.id = idv.clone();
38		}
39		if self.attrs.iter().any(|(kk, _)| kk == "class") {
40			self.class_cache = None;
41		}
42	}
43	pub fn remove_attribute(&mut self, key: &str) {
44		self.build_raw_cache();
45		let mut raw_map = self.cache_raw_map.take().unwrap_or_default();
46		let target = key.to_lowercase();
47		raw_map.retain(|k, _| k.to_lowercase() != target);
48		// sync structured attrs vector
49		self.attrs.retain(|(kk, _)| kk != &target);
50		self.raw_attrs = raw_map
51			.iter()
52			.map(|(k, v)| {
53				if v.is_empty() {
54					k.clone()
55				} else {
56					format!("{}={}", k, quote_attribute(v))
57				}
58			})
59			.collect::<Vec<_>>()
60			.join(" ");
61		self.cache_raw_map = None;
62		self.cache_lower_decoded = None;
63		if target == "id" {
64			self.id.clear();
65		}
66		if target == "class" {
67			self.class_cache = None;
68		}
69		self.attrs_complete = true; // attrs now reflect full set
70		self.attrs_modified = true; // Mark attributes as modified
71	}
72
73	pub fn get_attr(&self, key: &str) -> Option<&str> {
74		// First try already parsed attributes
75		let k = key.to_lowercase();
76		if let Some(found) = self.attrs.iter().find(|(kk, _)| *kk == k) {
77			return Some(found.1.as_str());
78		}
79
80		// If not found and attrs not complete, we need to ensure parsing
81		if !self.attrs_complete && !self.raw_attrs.is_empty() {
82			// Use unsafe to trigger ensure_all_attrs on self
83			let mut_ptr = self as *const HTMLElement as *mut HTMLElement;
84			unsafe {
85				(*mut_ptr).ensure_all_attrs();
86				// Now search again in the updated attrs
87				return (*mut_ptr)
88					.attrs
89					.iter()
90					.find(|(kk, _)| *kk == k)
91					.map(|(_, v)| v.as_str());
92			}
93		}
94
95		None
96	}
97	pub fn has_attr(&self, key: &str) -> bool {
98		self.get_attr(key).is_some()
99	}
100
101	pub fn set_attr(&mut self, key: &str, val: &str) {
102		let k = key.to_lowercase();
103		if let Some(kv) = self.attrs.iter_mut().find(|(kk, _)| *kk == k) {
104			kv.1 = val.to_string();
105		} else {
106			self.attrs.push((k, val.to_string()));
107		}
108		self.rebuild_raw_attrs();
109		self.cache_raw_map = None;
110		self.cache_lower_decoded = None;
111		if key.eq_ignore_ascii_case("id") {
112			self.id = val.to_string();
113		}
114	}
115	pub fn remove_attr(&mut self, key: &str) {
116		let k = key.to_lowercase();
117		self.attrs.retain(|(kk, _)| *kk != k);
118		self.rebuild_raw_attrs();
119		self.cache_raw_map = None;
120		self.cache_lower_decoded = None;
121		if k == "id" {
122			self.id.clear();
123		}
124	}
125	/// Convenience: remove the id attribute (safe wrapper for tests parity with JS removeAttribute('id'))
126	pub fn remove_id(&mut self) {
127		self.remove_attribute("id");
128	}
129	/// Convenience: set id attribute (safe wrapper to avoid direct raw mutation in tests)
130	pub fn set_id(&mut self, id: &str) {
131		self.set_attribute("id", id);
132	}
133	pub(super) fn rebuild_raw_attrs(&mut self) {
134		// 保持原有顺序,使用与 JS Quote 逻辑更接近的方式(参见 nodes/html.ts quoteAttribute)
135		fn quote_attr(src: &str) -> String {
136			if src.is_empty() || src == "null" {
137				return src.to_string();
138			}
139			// 先替换双引号
140			let replaced = src.replace('"', "&quot;");
141			// 模拟 JS: JSON.stringify 然后还原制表/换行/回车并移除反斜杠
142			let jsoned =
143				serde_json::to_string(&replaced).unwrap_or_else(|_| format!("\"{}\"", replaced));
144			// jsoned 形如 "...",去掉外层引号后处理内部转义
145			let inner = jsoned.trim_matches('"');
146			let inner = inner
147				.replace("\\t", "\t")
148				.replace("\\n", "\n")
149				.replace("\\r", "\r")
150				.replace('\\', "");
151			format!("\"{}\"", inner)
152		}
153		self.raw_attrs = self
154			.attrs
155			.iter()
156			.map(|(k, v)| {
157				if v.is_empty() {
158					k.clone()
159				} else {
160					format!("{}={}", k, quote_attr(v))
161				}
162			})
163			.collect::<Vec<_>>()
164			.join(" ");
165	}
166
167	pub fn attributes(&mut self) -> std::collections::HashMap<String, String> {
168		// JS: Element.attributes preserves original attribute name casing/order (first occurrence) while returning decoded values.
169		// We approximate with a HashMap (order not guaranteed) but keep original key casing from raw parsing.
170		self.build_raw_cache();
171		let mut out = std::collections::HashMap::new();
172		if let Some(raw) = &self.cache_raw_map {
173			for (orig_k, raw_v) in raw.iter() {
174				let decoded = html_escape::decode_html_entities(raw_v).to_string();
175				// Insert only if absent (first occurrence wins) – raw_map already keeps first, so direct insert.
176				out.insert(orig_k.clone(), decoded);
177			}
178		}
179		out
180	}
181	pub fn raw_attributes(&mut self) -> HashMap<String, String> {
182		self.build_raw_cache();
183		self.cache_raw_map.clone().unwrap_or_default()
184	}
185	/// Read-only snapshot of the original raw attribute string (public accessor for tests like issue 136)
186	pub fn raw_attrs_str(&self) -> &str {
187		&self.raw_attrs
188	}
189
190	pub fn get_attribute(&mut self, key: &str) -> Option<String> {
191		self.ensure_lower_decoded();
192		self.cache_lower_decoded
193			.as_ref()
194			.unwrap()
195			.get(&key.to_lowercase())
196			.cloned()
197	}
198
199	pub fn set_attribute(&mut self, key: &str, value: &str) {
200		// Update raw_attrs string representation, preserving original attribute order
201		let quoted_value = if value.is_empty() {
202			None
203		} else {
204			Some(quote_attribute(value))
205		};
206
207		if self.raw_attrs.is_empty() {
208			if let Some(qv) = quoted_value {
209				self.raw_attrs = format!("{}={}", key, qv);
210			} else {
211				self.raw_attrs = key.to_string();
212			}
213		} else {
214			// Parse existing attributes to preserve order
215			let re = ATTR_PARSE_REGEX.get_or_init(|| {
216				regex::Regex::new(
217					r#"([a-zA-Z()\[\]#@$.?:][a-zA-Z0-9-._:()\[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?"#,
218				)
219				.unwrap()
220			});
221
222			let mut result_attrs = Vec::new();
223			let mut found = false;
224
225			for cap in re.captures_iter(&self.raw_attrs) {
226				let existing_key = cap.get(1).unwrap().as_str();
227				if existing_key.eq_ignore_ascii_case(key) {
228					// Replace this attribute, preserve original case
229					if let Some(qv) = &quoted_value {
230						result_attrs.push(format!("{}={}", existing_key, qv));
231					} else {
232						result_attrs.push(existing_key.to_string());
233					}
234					found = true;
235				} else {
236					// Keep existing attribute as-is
237					let existing_val = cap.get(2).map(|m| m.as_str()).unwrap_or("");
238					if existing_val.is_empty() {
239						result_attrs.push(existing_key.to_string());
240					} else {
241						result_attrs.push(format!("{}={}", existing_key, existing_val));
242					}
243				}
244			}
245
246			// If not found, add at the end
247			if !found {
248				if let Some(qv) = quoted_value {
249					result_attrs.push(format!("{}={}", key, qv));
250				} else {
251					result_attrs.push(key.to_string());
252				}
253			}
254
255			self.raw_attrs = result_attrs.join(" ");
256		}
257
258		// Update structured attrs with decoded value
259		self.ensure_all_attrs();
260		let lk = key.to_lowercase();
261		let decoded_val = html_escape::decode_html_entities(value).to_string();
262		if let Some(kv) = self.attrs.iter_mut().find(|(k, _)| *k == lk) {
263			kv.1 = decoded_val;
264		} else {
265			self.attrs.push((lk, decoded_val));
266		}
267
268		// Clear caches to force rebuild
269		self.cache_raw_map = None;
270		self.cache_lower_decoded = None;
271		self.attrs_complete = true;
272		self.attrs_modified = true; // Mark attributes as modified
273
274		// Update element-specific caches
275		if key.eq_ignore_ascii_case("id") {
276			self.id = value.to_string();
277		}
278		if key.eq_ignore_ascii_case("class") {
279			self.class_cache = None;
280		}
281	}
282
283	pub fn has_attribute(&mut self, key: &str) -> bool {
284		self.ensure_lower_decoded();
285		self.cache_lower_decoded
286			.as_ref()
287			.unwrap()
288			.contains_key(&key.to_lowercase())
289	}
290
291	pub(crate) fn ensure_all_attrs(&mut self) {
292		if self.attrs_complete {
293			return;
294		}
295
296		// Clear existing attrs and rebuild from raw_attrs string
297		self.attrs.clear();
298		self.build_raw_cache();
299		if let Some(ref raw_map) = self.cache_raw_map {
300			for (key, value) in raw_map.iter() {
301				let decoded_val = html_escape::decode_html_entities(value).to_string();
302				self.attrs.push((key.to_lowercase(), decoded_val));
303			}
304		}
305
306		self.attrs_complete = true;
307	}
308	fn build_raw_cache(&mut self) {
309		if self.cache_raw_map.is_some() {
310			return;
311		}
312
313		let mut map = HashMap::new();
314		if !self.raw_attrs.is_empty() {
315			let re = ATTR_PARSE_REGEX.get_or_init(|| {
316				regex::Regex::new(
317					r#"([a-zA-Z()\[\]#@$.?:][a-zA-Z0-9-._:()\[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?"#,
318				)
319				.unwrap()
320			});
321			for cap in re.captures_iter(&self.raw_attrs) {
322				let key = cap.get(1).unwrap().as_str();
323				let mut val = cap.get(2).map(|m| m.as_str()).unwrap_or("").to_string();
324				if !val.is_empty() {
325					if (val.starts_with('\"') && val.ends_with('\"'))
326						|| (val.starts_with('\'') && val.ends_with('\''))
327					{
328						val = val[1..val.len() - 1].to_string();
329					}
330				}
331				// only first occurrence kept (JS behavior)
332				map.entry(key.to_string()).or_insert(val);
333			}
334		}
335		self.cache_raw_map = Some(map);
336	}
337
338	fn ensure_lower_decoded(&mut self) {
339		if self.cache_lower_decoded.is_some() {
340			return;
341		}
342
343		self.build_raw_cache();
344		let mut lower_decoded = HashMap::new();
345
346		if let Some(ref raw_map) = self.cache_raw_map {
347			for (key, value) in raw_map.iter() {
348				let decoded_val = html_escape::decode_html_entities(value).to_string();
349				let lower_key = key.to_lowercase();
350				lower_decoded.insert(lower_key, decoded_val);
351			}
352		}
353
354		self.cache_lower_decoded = Some(lower_decoded);
355	}
356}
357
358fn quote_attribute(val: &str) -> String {
359	if val.is_empty() {
360		return val.to_string();
361	}
362	let replaced = val.replace('"', "&quot;");
363	let jsoned = serde_json::to_string(&replaced).unwrap_or_else(|_| format!("\"{}\"", replaced));
364	let inner = jsoned.trim_matches('"');
365	let inner = inner
366		.replace("\\t", "\t")
367		.replace("\\n", "\n")
368		.replace("\\r", "\r")
369		.replace('\\', "");
370	format!("\"{}\"", inner)
371}