node_html_parser/dom/element/
attributes.rs

1use super::main::HTMLElement;
2use regex::Regex;
3use std::collections::HashMap;
4
5impl HTMLElement {
6	pub fn attrs_lower_decoded(&mut self) -> HashMap<String, String> {
7		self.ensure_lower_decoded();
8		self.cache_lower_decoded.clone().unwrap_or_default()
9	}
10
11	pub fn set_attributes(&mut self, attributes: &[(String, String)]) {
12		// 重建 raw_attrs 与 attrs(attrs 的 key 需小写且解码,这里假设传入 value 已为未转义文本,与 JS 行为接近)
13		self.attrs = attributes
14			.iter()
15			.map(|(k, v)| (k.to_lowercase(), v.clone()))
16			.collect();
17		self.raw_attrs = attributes
18			.iter()
19			.map(|(k, v)| {
20				// JS setAttributes: treats raw value 'null' OR '""' OR empty as boolean attribute (only name)
21				if v.is_empty() || v == "null" || v == "\"\"" {
22					k.clone()
23				} else {
24					format!("{}={}", k, quote_attribute(v))
25				}
26			})
27			.collect::<Vec<_>>()
28			.join(" ");
29		self.cache_raw_map = None;
30		self.cache_lower_decoded = None;
31		// 更新 id / class cache
32		if let Some((_, idv)) = self.attrs.iter().find(|(kk, _)| kk == "id") {
33			self.id = idv.clone();
34		}
35		if self.attrs.iter().any(|(kk, _)| kk == "class") {
36			self.class_cache = None;
37		}
38	}
39	pub fn remove_attribute(&mut self, key: &str) {
40		self.ensure_raw_attributes();
41		let mut raw_map = self.cache_raw_map.take().unwrap_or_default();
42		let target = key.to_lowercase();
43		raw_map.retain(|k, _| k.to_lowercase() != target);
44		// sync structured attrs vector
45		self.attrs.retain(|(kk, _)| kk != &target);
46		self.raw_attrs = raw_map
47			.iter()
48			.map(|(k, v)| {
49				if v.is_empty() {
50					k.clone()
51				} else {
52					format!("{}={}", k, quote_attribute(v))
53				}
54			})
55			.collect::<Vec<_>>()
56			.join(" ");
57		self.cache_raw_map = None;
58		self.cache_lower_decoded = None;
59		if target == "id" {
60			self.id.clear();
61		}
62		if target == "class" {
63			self.class_cache = None;
64		}
65		self.attrs_complete = true; // attrs now reflect full set
66	}
67
68	pub fn get_attr(&self, key: &str) -> Option<&str> {
69		// 需要可变以便确保延迟属性完成解析
70		let k = key.to_lowercase();
71		let mut_ptr = self as *const HTMLElement as *mut HTMLElement; // unsafe 以允许内部完成解析
72		unsafe {
73			(*mut_ptr).ensure_all_attrs();
74		}
75		self.attrs
76			.iter()
77			.find(|(kk, _)| *kk == k)
78			.map(|(_, v)| v.as_str())
79	}
80	pub fn has_attr(&self, key: &str) -> bool {
81		self.get_attr(key).is_some()
82	}
83
84	pub fn set_attr(&mut self, key: &str, val: &str) {
85		let k = key.to_lowercase();
86		if let Some(kv) = self.attrs.iter_mut().find(|(kk, _)| *kk == k) {
87			kv.1 = val.to_string();
88		} else {
89			self.attrs.push((k, val.to_string()));
90		}
91		self.rebuild_raw_attrs();
92		self.cache_raw_map = None;
93		self.cache_lower_decoded = None;
94		if key.eq_ignore_ascii_case("id") {
95			self.id = val.to_string();
96		}
97	}
98	pub fn remove_attr(&mut self, key: &str) {
99		let k = key.to_lowercase();
100		self.attrs.retain(|(kk, _)| *kk != k);
101		self.rebuild_raw_attrs();
102		self.cache_raw_map = None;
103		self.cache_lower_decoded = None;
104		if k == "id" {
105			self.id.clear();
106		}
107	}
108	/// Convenience: remove the id attribute (safe wrapper for tests parity with JS removeAttribute('id'))
109	pub fn remove_id(&mut self) { self.remove_attribute("id"); }
110	/// Convenience: set id attribute (safe wrapper to avoid direct raw mutation in tests)
111	pub fn set_id(&mut self, id: &str) { self.set_attribute("id", id); }
112	pub(super) fn rebuild_raw_attrs(&mut self) {
113		// 保持原有顺序,使用与 JS Quote 逻辑更接近的方式(参见 nodes/html.ts quoteAttribute)
114		fn quote_attr(src: &str) -> String {
115			if src.is_empty() || src == "null" {
116				return src.to_string();
117			}
118			// 先替换双引号
119			let replaced = src.replace('"', "&quot;");
120			// 模拟 JS: JSON.stringify 然后还原制表/换行/回车并移除反斜杠
121			let jsoned =
122				serde_json::to_string(&replaced).unwrap_or_else(|_| format!("\"{}\"", replaced));
123			// jsoned 形如 "...",去掉外层引号后处理内部转义
124			let inner = jsoned.trim_matches('"');
125			let inner = inner
126				.replace("\\t", "\t")
127				.replace("\\n", "\n")
128				.replace("\\r", "\r")
129				.replace('\\', "");
130			format!("\"{}\"", inner)
131		}
132		self.raw_attrs = self
133			.attrs
134			.iter()
135			.map(|(k, v)| {
136				if v.is_empty() {
137					k.clone()
138				} else {
139					format!("{}={}", k, quote_attr(v))
140				}
141			})
142			.collect::<Vec<_>>()
143			.join(" ");
144	}
145
146	// --- JS style attribute parsing (rawAttributes) ---
147	fn ensure_raw_attributes(&mut self) {
148		if self.cache_raw_map.is_some() {
149			return;
150		}
151		let mut map = HashMap::new();
152		if !self.raw_attrs.is_empty() {
153			let re = regex::Regex::new(
154				r#"([a-zA-Z()\[\]#@$.?:][a-zA-Z0-9-._:()\[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?"#,
155			)
156			.unwrap();
157			for cap in re.captures_iter(&self.raw_attrs) {
158				let key = cap.get(1).unwrap().as_str();
159				let mut val = cap.get(2).map(|m| m.as_str()).unwrap_or("").to_string();
160				if !val.is_empty() {
161					if (val.starts_with('\"') && val.ends_with('\"'))
162						|| (val.starts_with('\'') && val.ends_with('\''))
163					{
164						val = val[1..val.len() - 1].to_string();
165					}
166				}
167				// only first occurrence kept (JS behavior)
168				map.entry(key.to_string()).or_insert(val);
169			}
170		}
171		self.cache_raw_map = Some(map);
172	}
173
174	pub fn attributes(&mut self) -> std::collections::HashMap<String, String> {
175		// JS: Element.attributes preserves original attribute name casing/order (first occurrence) while returning decoded values.
176		// We approximate with a HashMap (order not guaranteed) but keep original key casing from raw parsing.
177		self.ensure_raw_attributes();
178		let mut out = std::collections::HashMap::new();
179		if let Some(raw) = &self.cache_raw_map {
180			for (orig_k, raw_v) in raw.iter() {
181				let decoded = html_escape::decode_html_entities(raw_v).to_string();
182				// Insert only if absent (first occurrence wins) – raw_map already keeps first, so direct insert.
183				out.insert(orig_k.clone(), decoded);
184			}
185		}
186		out
187	}
188	pub fn raw_attributes(&mut self) -> HashMap<String, String> {
189		self.ensure_raw_attributes();
190		self.cache_raw_map.clone().unwrap_or_default()
191	}
192	/// Read-only snapshot of the original raw attribute string (public accessor for tests like issue 136)
193	pub fn raw_attrs_str(&self) -> &str { &self.raw_attrs }
194
195	pub fn get_attribute(&mut self, key: &str) -> Option<String> {
196		self.ensure_lower_decoded();
197		self.cache_lower_decoded
198			.as_ref()
199			.unwrap()
200			.get(&key.to_lowercase())
201			.cloned()
202	}
203
204	pub fn set_attribute(&mut self, key: &str, value: &str) {
205		// JS preserves original attribute order; new attributes appended.
206		// Strategy: re-parse current raw_attrs to ordered vector of keys, update/append target, rebuild string.
207		self.ensure_raw_attributes();
208		let raw_snapshot = self.raw_attrs.clone();
209		let re = regex::Regex::new(
210			r#"([a-zA-Z()\[\]#@$.?:][a-zA-Z0-9-._:()\[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:\"[^\"]*\")|[^\s>]+))?"#,
211		).unwrap();
212		let mut order: Vec<String> = Vec::new();
213		let mut seen_ci: Vec<String> = Vec::new();
214		for cap in re.captures_iter(&raw_snapshot) {
215			let k = cap.get(1).unwrap().as_str().to_string();
216			let k_ci = k.to_lowercase();
217			if !seen_ci.iter().any(|x| x == &k_ci) {
218				order.push(k.clone());
219				seen_ci.push(k_ci);
220			}
221		}
222		// Build map (original casing -> raw value) from existing cache_raw_map (original keys) to preserve first casing.
223		let mut new_map: std::collections::HashMap<String, String> =
224			std::collections::HashMap::new();
225		if let Some(raw) = &self.cache_raw_map {
226			for (k, v) in raw.iter() {
227				new_map.insert(k.clone(), v.clone());
228			}
229		}
230		// Determine if key exists case-insensitively; if so update that original key.
231		let mut target_original: Option<String> = None;
232		for k in order.iter() {
233			if k.eq_ignore_ascii_case(key) {
234				target_original = Some(k.clone());
235				break;
236			}
237		}
238		if let Some(orig) = target_original {
239			new_map.insert(orig, value.to_string());
240		} else {
241			order.push(key.to_string());
242			new_map.insert(key.to_string(), value.to_string());
243		}
244		// Reconstruct raw_attrs following order vector.
245		let mut parts = Vec::with_capacity(order.len());
246		for k in &order {
247			if let Some(v) = new_map.get(k) {
248				if v.is_empty() {
249					parts.push(k.clone());
250				} else {
251					parts.push(format!("{}={}", k, quote_attribute(v)));
252				}
253			}
254		}
255		self.raw_attrs = parts.join(" ");
256		self.cache_raw_map = None; // force rebuild
257		self.cache_lower_decoded = None;
258		// sync structured attrs (store lowercase key, decoded value as get_attr expects)
259		let lk = key.to_lowercase();
260		let decoded_val = html_escape::decode_html_entities(value).to_string();
261		if let Some(kv) = self.attrs.iter_mut().find(|(k, _)| *k == lk) {
262			kv.1 = decoded_val.clone();
263		} else {
264			self.attrs.push((lk.clone(), decoded_val.clone()));
265		}
266		self.attrs_complete = true;
267		if key.eq_ignore_ascii_case("id") {
268			self.id = value.to_string();
269		}
270		if key.eq_ignore_ascii_case("class") {
271			self.class_cache = None;
272		}
273	}
274
275	pub fn has_attribute(&mut self, key: &str) -> bool {
276		self.ensure_lower_decoded();
277		self.cache_lower_decoded
278			.as_ref()
279			.unwrap()
280			.contains_key(&key.to_lowercase())
281	}
282
283	pub(crate) fn ensure_all_attrs(&mut self) {
284		if self.attrs_complete {
285			return;
286		}
287		if self.raw_attrs.is_empty() {
288			self.attrs_complete = true;
289			return;
290		}
291		static ATTR_RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
292		let re = ATTR_RE.get_or_init(|| {
293			regex::Regex::new(
294				r#"([a-zA-Z()\[\]#@$.?:][a-zA-Z0-9-._:()\[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?"#,
295			)
296			.unwrap()
297		});
298		for cap in re.captures_iter(&self.raw_attrs) {
299			let key = cap.get(1).unwrap().as_str();
300			let val = cap.get(2).map(|m| m.as_str()).unwrap_or("");
301			let unquoted = if val.starts_with('"') || val.starts_with('\'') {
302				&val[1..val.len() - 1]
303			} else {
304				val
305			};
306			let lk = key.to_lowercase();
307			if !self.attrs.iter().any(|(k, _)| k == &lk) {
308				self.attrs
309					.push((lk, html_escape::decode_html_entities(unquoted).to_string()));
310			}
311		}
312		self.attrs_complete = true;
313	}
314	fn build_raw_cache(&mut self) {
315		let attr_re = Regex::new(
316			r#"([a-zA-Z()[\]#@$.?:][a-zA-Z0-9-._:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:\"[^\"]*\")|[^\s>]+))?"#,
317		)
318		.unwrap();
319		let mut raw_map = HashMap::new();
320		for cap in attr_re.captures_iter(&self.raw_attrs) {
321			let key = cap.get(1).unwrap().as_str();
322			let value = cap.get(2).map(|m| m.as_str()).unwrap_or("");
323			let mut chosen = key.to_string();
324			if raw_map.contains_key(&chosen) {
325				let mut suffix = 1;
326				while raw_map.contains_key(&format!("{}#dup{}", chosen, suffix)) {
327					suffix += 1;
328				}
329				chosen = format!("{}#dup{}", chosen, suffix);
330			}
331			let mut value = value.trim();
332			if (value.starts_with('"') && value.ends_with('"'))
333				|| (value.starts_with('\'') && value.ends_with('\''))
334			{
335				value = &value[1..value.len() - 1];
336			}
337			raw_map.insert(chosen.clone(), value.to_string());
338		}
339		self.cache_raw_map = Some(raw_map);
340	}
341
342	fn ensure_lower_decoded(&mut self) {
343		if self.cache_lower_decoded.is_some() {
344			return;
345		}
346		self.ensure_raw_attributes();
347		let mut lower = HashMap::new();
348		if let Some(raw) = &self.cache_raw_map {
349			for (k, v) in raw {
350				lower.insert(
351					k.to_lowercase(),
352					html_escape::decode_html_entities(v).to_string(),
353				);
354			}
355		}
356		self.cache_lower_decoded = Some(lower);
357	}
358}
359
360fn quote_attribute(val: &str) -> String {
361	if val.is_empty() {
362		return val.to_string();
363	}
364	let replaced = val.replace('"', "&quot;");
365	let jsoned = serde_json::to_string(&replaced).unwrap_or_else(|_| format!("\"{}\"", replaced));
366	let inner = jsoned.trim_matches('"');
367	let inner = inner
368		.replace("\\t", "\t")
369		.replace("\\n", "\n")
370		.replace("\\r", "\r")
371		.replace('\\', "");
372	format!("\"{}\"", inner)
373}