lol_html/selectors_vm/
attribute_matcher.rs1use super::compiler::AttrExprOperands;
2use crate::base::Bytes;
3use crate::html::Namespace;
4use crate::parser::{AttributeBuffer, AttributeOutline};
5use memchr::{memchr, memchr2};
6use selectors::attr::{CaseSensitivity, ParsedCaseSensitivity};
7use std::cell::OnceCell;
8
9const ID_ATTR: &[u8] = b"id";
10const CLASS_ATTR: &[u8] = b"class";
11
12#[inline]
13const fn is_attr_whitespace(b: u8) -> bool {
14 b == b' ' || b == b'\n' || b == b'\r' || b == b'\t' || b == b'\x0c'
15}
16
17#[inline]
18fn to_unconditional(
19 parsed: ParsedCaseSensitivity,
20 is_html_element_in_html_document: bool,
21) -> CaseSensitivity {
22 match parsed {
23 ParsedCaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => {
24 if is_html_element_in_html_document {
25 CaseSensitivity::AsciiCaseInsensitive
26 } else {
27 CaseSensitivity::CaseSensitive
28 }
29 }
30 ParsedCaseSensitivity::CaseSensitive | ParsedCaseSensitivity::ExplicitCaseSensitive => {
31 CaseSensitivity::CaseSensitive
32 }
33 ParsedCaseSensitivity::AsciiCaseInsensitive => CaseSensitivity::AsciiCaseInsensitive,
34 }
35}
36
37type MemoizedAttrValue<'i> = OnceCell<Option<&'i [u8]>>;
38
39pub(crate) struct AttributeMatcher<'i> {
40 input: Bytes<'i>,
41 attributes: &'i AttributeBuffer,
42 id: MemoizedAttrValue<'i>,
43 class: MemoizedAttrValue<'i>,
44 is_html_element: bool,
45}
46
47impl<'i> AttributeMatcher<'i> {
48 #[inline]
49 #[must_use]
50 pub fn new(input: Bytes<'i>, attributes: &'i AttributeBuffer, ns: Namespace) -> Self {
51 AttributeMatcher {
52 input,
53 attributes,
54 id: OnceCell::new(),
55 class: OnceCell::new(),
56 is_html_element: ns == Namespace::Html,
57 }
58 }
59
60 #[inline]
61 fn find(&self, lowercased_name: &[u8]) -> Option<AttributeOutline> {
62 self.attributes
63 .iter()
64 .find(|&a| {
65 let Some(attr_name) = self.input.opt_slice(Some(a.name)) else {
66 return false;
67 };
68 if attr_name.len() != lowercased_name.len() {
69 return false;
70 }
71 attr_name
72 .iter()
73 .map(|c| c.to_ascii_lowercase())
74 .eq(lowercased_name.iter().copied())
75 })
76 .copied()
77 }
78
79 #[inline]
80 fn get_value(&self, lowercased_name: &[u8]) -> Option<&'i [u8]> {
81 self.find(lowercased_name)
82 .map(|a| self.input.slice(a.value).as_slice())
83 }
84
85 #[inline]
86 #[must_use]
87 pub fn has_attribute(&self, lowercased_name: &[u8]) -> bool {
88 self.find(lowercased_name).is_some()
89 }
90
91 #[inline]
92 #[must_use]
93 pub fn has_id(&self, id: &[u8]) -> bool {
94 match self.id.get_or_init(|| self.get_value(ID_ATTR)) {
95 Some(actual_id) => *actual_id == id,
96 None => false,
97 }
98 }
99
100 #[inline]
101 #[must_use]
102 pub fn has_class(&self, class_name: &[u8]) -> bool {
103 match self.class.get_or_init(|| self.get_value(CLASS_ATTR)) {
104 Some(class) => class
105 .split(|&b| is_attr_whitespace(b))
106 .any(|actual_class_name| actual_class_name == class_name),
107 None => false,
108 }
109 }
110
111 #[inline]
112 fn value_matches(&self, name: &[u8], matcher: impl Fn(&[u8]) -> bool) -> bool {
113 self.get_value(name).is_some_and(matcher)
114 }
115
116 #[inline]
117 pub fn attr_eq(&self, operand: &AttrExprOperands) -> bool {
118 self.value_matches(&operand.name, |actual_value| {
119 to_unconditional(operand.case_sensitivity, self.is_html_element)
120 .eq(actual_value, &operand.value)
121 })
122 }
123
124 #[inline]
125 pub fn matches_splitted_by_whitespace(&self, operand: &AttrExprOperands) -> bool {
126 self.value_matches(&operand.name, |actual_value| {
127 let case_sensitivity = to_unconditional(operand.case_sensitivity, self.is_html_element);
128
129 actual_value
130 .split(|&b| is_attr_whitespace(b))
131 .any(|part| case_sensitivity.eq(part, &operand.value))
132 })
133 }
134
135 #[inline]
136 pub fn has_attr_with_prefix(&self, operand: &AttrExprOperands) -> bool {
137 self.value_matches(&operand.name, |actual_value| {
138 let case_sensitivity = to_unconditional(operand.case_sensitivity, self.is_html_element);
139
140 let prefix_len = operand.value.len();
141
142 !actual_value.is_empty()
143 && actual_value.len() >= prefix_len
144 && actual_value
145 .get(..prefix_len)
146 .is_some_and(|prefix| case_sensitivity.eq(prefix, &operand.value))
147 })
148 }
149
150 #[inline]
151 pub fn has_dash_matching_attr(&self, operand: &AttrExprOperands) -> bool {
152 self.value_matches(&operand.name, |actual_value| {
153 let case_sensitivity = to_unconditional(operand.case_sensitivity, self.is_html_element);
154
155 if case_sensitivity.eq(actual_value, &operand.value) {
156 return true;
157 }
158
159 let prefix_len = operand.value.len();
160
161 actual_value.get(prefix_len) == Some(&b'-')
162 && actual_value
163 .get(..prefix_len)
164 .is_some_and(|prefix| case_sensitivity.eq(prefix, &operand.value))
165 })
166 }
167
168 #[inline]
169 pub fn has_attr_with_suffix(&self, operand: &AttrExprOperands) -> bool {
170 self.value_matches(&operand.name, |actual_value| {
171 let case_sensitivity = to_unconditional(operand.case_sensitivity, self.is_html_element);
172
173 let suffix_len = operand.value.len();
174 let value_len = actual_value.len();
175
176 !actual_value.is_empty()
177 && value_len >= suffix_len
178 && actual_value
179 .get(value_len - suffix_len..)
180 .is_some_and(|prefix| case_sensitivity.eq(prefix, &operand.value))
181 })
182 }
183
184 #[inline]
185 pub fn has_attr_with_substring(&self, operand: &AttrExprOperands) -> bool {
186 self.value_matches(&operand.name, |actual_value| {
187 let Some((&first_byte, rest)) = operand.value.split_first() else {
188 return false;
189 };
190
191 fn search(
192 mut haystack: &[u8],
193 rest: &[u8],
194 case_sensitivity: CaseSensitivity,
195 first_byte_searcher: impl Fn(&[u8]) -> Option<usize>,
196 ) -> Option<()> {
197 loop {
198 haystack = haystack.get(first_byte_searcher(haystack)? + 1..)?;
199 if case_sensitivity.eq(haystack.get(..rest.len())?, rest) {
200 return Some(());
201 }
202 }
203 }
204
205 match to_unconditional(operand.case_sensitivity, self.is_html_element) {
206 case @ CaseSensitivity::CaseSensitive => {
207 search(actual_value, rest, case, move |h| memchr(first_byte, h)).is_some()
208 }
209 case @ CaseSensitivity::AsciiCaseInsensitive => {
210 let lo = first_byte.to_ascii_lowercase();
211 let up = first_byte.to_ascii_uppercase();
212
213 search(actual_value, rest, case, move |h| memchr2(lo, up, h)).is_some()
214 }
215 }
216 })
217 }
218}