use super::main::HTMLElement;
use regex::Regex;
use std::collections::HashMap;
use std::sync::OnceLock;
static ATTR_PARSE_REGEX: OnceLock<Regex> = OnceLock::new();
impl HTMLElement {
pub fn attrs_lower_decoded(&mut self) -> HashMap<String, String> {
self.ensure_lower_decoded();
self.cache_lower_decoded.clone().unwrap_or_default()
}
pub fn set_attributes(&mut self, attributes: &[(String, String)]) {
self.attrs = attributes
.iter()
.map(|(k, v)| (k.to_lowercase(), v.clone()))
.collect();
self.raw_attrs = attributes
.iter()
.map(|(k, v)| {
if v.is_empty() || v == "null" || v == "\"\"" {
k.clone()
} else {
format!("{}={}", k, quote_attribute(v))
}
})
.collect::<Vec<_>>()
.join(" ");
self.cache_raw_map = None;
self.cache_lower_decoded = None;
if let Some((_, idv)) = self.attrs.iter().find(|(kk, _)| kk == "id") {
self.id = idv.clone();
}
if self.attrs.iter().any(|(kk, _)| kk == "class") {
self.class_cache = None;
}
}
pub fn remove_attribute(&mut self, key: &str) {
self.build_raw_cache();
let mut raw_map = self.cache_raw_map.take().unwrap_or_default();
let target = key.to_lowercase();
raw_map.retain(|k, _| k.to_lowercase() != target);
self.attrs.retain(|(kk, _)| kk != &target);
self.raw_attrs = raw_map
.iter()
.map(|(k, v)| {
if v.is_empty() {
k.clone()
} else {
format!("{}={}", k, quote_attribute(v))
}
})
.collect::<Vec<_>>()
.join(" ");
self.cache_raw_map = None;
self.cache_lower_decoded = None;
if target == "id" {
self.id.clear();
}
if target == "class" {
self.class_cache = None;
}
self.attrs_complete = true; self.attrs_modified = true; }
pub fn get_attr(&self, key: &str) -> Option<&str> {
let k = key.to_lowercase();
if let Some(found) = self.attrs.iter().find(|(kk, _)| *kk == k) {
return Some(found.1.as_str());
}
if !self.attrs_complete && !self.raw_attrs.is_empty() {
let mut_ptr = self as *const HTMLElement as *mut HTMLElement;
unsafe {
(*mut_ptr).ensure_all_attrs();
return (*mut_ptr)
.attrs
.iter()
.find(|(kk, _)| *kk == k)
.map(|(_, v)| v.as_str());
}
}
None
}
pub fn has_attr(&self, key: &str) -> bool {
self.get_attr(key).is_some()
}
pub fn set_attr(&mut self, key: &str, val: &str) {
let k = key.to_lowercase();
if let Some(kv) = self.attrs.iter_mut().find(|(kk, _)| *kk == k) {
kv.1 = val.to_string();
} else {
self.attrs.push((k, val.to_string()));
}
self.rebuild_raw_attrs();
self.cache_raw_map = None;
self.cache_lower_decoded = None;
if key.eq_ignore_ascii_case("id") {
self.id = val.to_string();
}
}
pub fn remove_attr(&mut self, key: &str) {
let k = key.to_lowercase();
self.attrs.retain(|(kk, _)| *kk != k);
self.rebuild_raw_attrs();
self.cache_raw_map = None;
self.cache_lower_decoded = None;
if k == "id" {
self.id.clear();
}
}
pub fn remove_id(&mut self) {
self.remove_attribute("id");
}
pub fn set_id(&mut self, id: &str) {
self.set_attribute("id", id);
}
pub(super) fn rebuild_raw_attrs(&mut self) {
fn quote_attr(src: &str) -> String {
if src.is_empty() || src == "null" {
return src.to_string();
}
let replaced = src.replace('"', """);
let jsoned =
serde_json::to_string(&replaced).unwrap_or_else(|_| format!("\"{}\"", replaced));
let inner = jsoned.trim_matches('"');
let inner = inner
.replace("\\t", "\t")
.replace("\\n", "\n")
.replace("\\r", "\r")
.replace('\\', "");
format!("\"{}\"", inner)
}
self.raw_attrs = self
.attrs
.iter()
.map(|(k, v)| {
if v.is_empty() {
k.clone()
} else {
format!("{}={}", k, quote_attr(v))
}
})
.collect::<Vec<_>>()
.join(" ");
}
pub fn attributes(&mut self) -> std::collections::HashMap<String, String> {
self.build_raw_cache();
let mut out = std::collections::HashMap::new();
if let Some(raw) = &self.cache_raw_map {
for (orig_k, raw_v) in raw.iter() {
let decoded = html_escape::decode_html_entities(raw_v).to_string();
out.insert(orig_k.clone(), decoded);
}
}
out
}
pub fn raw_attributes(&mut self) -> HashMap<String, String> {
self.build_raw_cache();
self.cache_raw_map.clone().unwrap_or_default()
}
pub fn raw_attrs_str(&self) -> &str {
&self.raw_attrs
}
pub fn get_attribute(&mut self, key: &str) -> Option<String> {
self.ensure_lower_decoded();
self.cache_lower_decoded
.as_ref()
.unwrap()
.get(&key.to_lowercase())
.cloned()
}
pub fn set_attribute(&mut self, key: &str, value: &str) {
let quoted_value = if value.is_empty() {
None
} else {
Some(quote_attribute(value))
};
if self.raw_attrs.is_empty() {
if let Some(qv) = quoted_value {
self.raw_attrs = format!("{}={}", key, qv);
} else {
self.raw_attrs = key.to_string();
}
} else {
let re = ATTR_PARSE_REGEX.get_or_init(|| {
regex::Regex::new(
r#"([a-zA-Z()\[\]#@$.?:][a-zA-Z0-9-._:()\[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?"#,
)
.unwrap()
});
let mut result_attrs = Vec::new();
let mut found = false;
for cap in re.captures_iter(&self.raw_attrs) {
let existing_key = cap.get(1).unwrap().as_str();
if existing_key.eq_ignore_ascii_case(key) {
if let Some(qv) = "ed_value {
result_attrs.push(format!("{}={}", existing_key, qv));
} else {
result_attrs.push(existing_key.to_string());
}
found = true;
} else {
let existing_val = cap.get(2).map(|m| m.as_str()).unwrap_or("");
if existing_val.is_empty() {
result_attrs.push(existing_key.to_string());
} else {
result_attrs.push(format!("{}={}", existing_key, existing_val));
}
}
}
if !found {
if let Some(qv) = quoted_value {
result_attrs.push(format!("{}={}", key, qv));
} else {
result_attrs.push(key.to_string());
}
}
self.raw_attrs = result_attrs.join(" ");
}
self.ensure_all_attrs();
let lk = key.to_lowercase();
let decoded_val = html_escape::decode_html_entities(value).to_string();
if let Some(kv) = self.attrs.iter_mut().find(|(k, _)| *k == lk) {
kv.1 = decoded_val;
} else {
self.attrs.push((lk, decoded_val));
}
self.cache_raw_map = None;
self.cache_lower_decoded = None;
self.attrs_complete = true;
self.attrs_modified = true;
if key.eq_ignore_ascii_case("id") {
self.id = value.to_string();
}
if key.eq_ignore_ascii_case("class") {
self.class_cache = None;
}
}
pub fn has_attribute(&mut self, key: &str) -> bool {
self.ensure_lower_decoded();
self.cache_lower_decoded
.as_ref()
.unwrap()
.contains_key(&key.to_lowercase())
}
pub(crate) fn ensure_all_attrs(&mut self) {
if self.attrs_complete {
return;
}
self.attrs.clear();
self.build_raw_cache();
if let Some(ref raw_map) = self.cache_raw_map {
for (key, value) in raw_map.iter() {
let decoded_val = html_escape::decode_html_entities(value).to_string();
self.attrs.push((key.to_lowercase(), decoded_val));
}
}
self.attrs_complete = true;
}
fn build_raw_cache(&mut self) {
if self.cache_raw_map.is_some() {
return;
}
let mut map = HashMap::new();
if !self.raw_attrs.is_empty() {
let re = ATTR_PARSE_REGEX.get_or_init(|| {
regex::Regex::new(
r#"([a-zA-Z()\[\]#@$.?:][a-zA-Z0-9-._:()\[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?"#,
)
.unwrap()
});
for cap in re.captures_iter(&self.raw_attrs) {
let key = cap.get(1).unwrap().as_str();
let mut val = cap.get(2).map(|m| m.as_str()).unwrap_or("").to_string();
if !val.is_empty() {
if (val.starts_with('\"') && val.ends_with('\"'))
|| (val.starts_with('\'') && val.ends_with('\''))
{
val = val[1..val.len() - 1].to_string();
}
}
map.entry(key.to_string()).or_insert(val);
}
}
self.cache_raw_map = Some(map);
}
fn ensure_lower_decoded(&mut self) {
if self.cache_lower_decoded.is_some() {
return;
}
self.build_raw_cache();
let mut lower_decoded = HashMap::new();
if let Some(ref raw_map) = self.cache_raw_map {
for (key, value) in raw_map.iter() {
let decoded_val = html_escape::decode_html_entities(value).to_string();
let lower_key = key.to_lowercase();
lower_decoded.insert(lower_key, decoded_val);
}
}
self.cache_lower_decoded = Some(lower_decoded);
}
}
fn quote_attribute(val: &str) -> String {
if val.is_empty() {
return val.to_string();
}
let replaced = val.replace('"', """);
let jsoned = serde_json::to_string(&replaced).unwrap_or_else(|_| format!("\"{}\"", replaced));
let inner = jsoned.trim_matches('"');
let inner = inner
.replace("\\t", "\t")
.replace("\\n", "\n")
.replace("\\r", "\r")
.replace('\\', "");
format!("\"{}\"", inner)
}