Skip to main content

lol_html/selectors_vm/
attribute_matcher.rs

1use super::compiler::AttrExprOperands;
2use crate::base::Bytes;
3use crate::html::Namespace;
4use crate::parser::{AttributeBuffer, AttributeOutline};
5use memchr::{memchr, memchr2};
6use selectors::attr::{CaseSensitivity, ParsedCaseSensitivity};
7use std::cell::OnceCell;
8
9const ID_ATTR: &[u8] = b"id";
10const CLASS_ATTR: &[u8] = b"class";
11
12#[inline]
13const fn is_attr_whitespace(b: u8) -> bool {
14    b == b' ' || b == b'\n' || b == b'\r' || b == b'\t' || b == b'\x0c'
15}
16
17#[inline]
18fn to_unconditional(
19    parsed: ParsedCaseSensitivity,
20    is_html_element_in_html_document: bool,
21) -> CaseSensitivity {
22    match parsed {
23        ParsedCaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => {
24            if is_html_element_in_html_document {
25                CaseSensitivity::AsciiCaseInsensitive
26            } else {
27                CaseSensitivity::CaseSensitive
28            }
29        }
30        ParsedCaseSensitivity::CaseSensitive | ParsedCaseSensitivity::ExplicitCaseSensitive => {
31            CaseSensitivity::CaseSensitive
32        }
33        ParsedCaseSensitivity::AsciiCaseInsensitive => CaseSensitivity::AsciiCaseInsensitive,
34    }
35}
36
37type MemoizedAttrValue<'i> = OnceCell<Option<&'i [u8]>>;
38
39pub(crate) struct AttributeMatcher<'i> {
40    input: Bytes<'i>,
41    attributes: &'i AttributeBuffer,
42    id: MemoizedAttrValue<'i>,
43    class: MemoizedAttrValue<'i>,
44    is_html_element: bool,
45}
46
47impl<'i> AttributeMatcher<'i> {
48    #[inline]
49    #[must_use]
50    pub fn new(input: Bytes<'i>, attributes: &'i AttributeBuffer, ns: Namespace) -> Self {
51        AttributeMatcher {
52            input,
53            attributes,
54            id: OnceCell::new(),
55            class: OnceCell::new(),
56            is_html_element: ns == Namespace::Html,
57        }
58    }
59
60    #[inline]
61    fn find(&self, lowercased_name: &[u8]) -> Option<AttributeOutline> {
62        self.attributes
63            .iter()
64            .find(|&a| {
65                let Some(attr_name) = self.input.opt_slice(Some(a.name)) else {
66                    return false;
67                };
68                if attr_name.len() != lowercased_name.len() {
69                    return false;
70                }
71                attr_name
72                    .iter()
73                    .map(|c| c.to_ascii_lowercase())
74                    .eq(lowercased_name.iter().copied())
75            })
76            .copied()
77    }
78
79    #[inline]
80    fn get_value(&self, lowercased_name: &[u8]) -> Option<&'i [u8]> {
81        self.find(lowercased_name)
82            .map(|a| self.input.slice(a.value).as_slice())
83    }
84
85    #[inline]
86    #[must_use]
87    pub fn has_attribute(&self, lowercased_name: &[u8]) -> bool {
88        self.find(lowercased_name).is_some()
89    }
90
91    #[inline]
92    #[must_use]
93    pub fn has_id(&self, id: &[u8]) -> bool {
94        match self.id.get_or_init(|| self.get_value(ID_ATTR)) {
95            Some(actual_id) => *actual_id == id,
96            None => false,
97        }
98    }
99
100    #[inline]
101    #[must_use]
102    pub fn has_class(&self, class_name: &[u8]) -> bool {
103        match self.class.get_or_init(|| self.get_value(CLASS_ATTR)) {
104            Some(class) => class
105                .split(|&b| is_attr_whitespace(b))
106                .any(|actual_class_name| actual_class_name == class_name),
107            None => false,
108        }
109    }
110
111    #[inline]
112    fn value_matches(&self, name: &[u8], matcher: impl Fn(&[u8]) -> bool) -> bool {
113        self.get_value(name).is_some_and(matcher)
114    }
115
116    #[inline]
117    pub fn attr_eq(&self, operand: &AttrExprOperands) -> bool {
118        self.value_matches(&operand.name, |actual_value| {
119            to_unconditional(operand.case_sensitivity, self.is_html_element)
120                .eq(actual_value, &operand.value)
121        })
122    }
123
124    #[inline]
125    pub fn matches_splitted_by_whitespace(&self, operand: &AttrExprOperands) -> bool {
126        self.value_matches(&operand.name, |actual_value| {
127            let case_sensitivity = to_unconditional(operand.case_sensitivity, self.is_html_element);
128
129            actual_value
130                .split(|&b| is_attr_whitespace(b))
131                .any(|part| case_sensitivity.eq(part, &operand.value))
132        })
133    }
134
135    #[inline]
136    pub fn has_attr_with_prefix(&self, operand: &AttrExprOperands) -> bool {
137        self.value_matches(&operand.name, |actual_value| {
138            let case_sensitivity = to_unconditional(operand.case_sensitivity, self.is_html_element);
139
140            let prefix_len = operand.value.len();
141
142            !actual_value.is_empty()
143                && actual_value.len() >= prefix_len
144                && actual_value
145                    .get(..prefix_len)
146                    .is_some_and(|prefix| case_sensitivity.eq(prefix, &operand.value))
147        })
148    }
149
150    #[inline]
151    pub fn has_dash_matching_attr(&self, operand: &AttrExprOperands) -> bool {
152        self.value_matches(&operand.name, |actual_value| {
153            let case_sensitivity = to_unconditional(operand.case_sensitivity, self.is_html_element);
154
155            if case_sensitivity.eq(actual_value, &operand.value) {
156                return true;
157            }
158
159            let prefix_len = operand.value.len();
160
161            actual_value.get(prefix_len) == Some(&b'-')
162                && actual_value
163                    .get(..prefix_len)
164                    .is_some_and(|prefix| case_sensitivity.eq(prefix, &operand.value))
165        })
166    }
167
168    #[inline]
169    pub fn has_attr_with_suffix(&self, operand: &AttrExprOperands) -> bool {
170        self.value_matches(&operand.name, |actual_value| {
171            let case_sensitivity = to_unconditional(operand.case_sensitivity, self.is_html_element);
172
173            let suffix_len = operand.value.len();
174            let value_len = actual_value.len();
175
176            !actual_value.is_empty()
177                && value_len >= suffix_len
178                && actual_value
179                    .get(value_len - suffix_len..)
180                    .is_some_and(|prefix| case_sensitivity.eq(prefix, &operand.value))
181        })
182    }
183
184    #[inline]
185    pub fn has_attr_with_substring(&self, operand: &AttrExprOperands) -> bool {
186        self.value_matches(&operand.name, |actual_value| {
187            let Some((&first_byte, rest)) = operand.value.split_first() else {
188                return false;
189            };
190
191            fn search(
192                mut haystack: &[u8],
193                rest: &[u8],
194                case_sensitivity: CaseSensitivity,
195                first_byte_searcher: impl Fn(&[u8]) -> Option<usize>,
196            ) -> Option<()> {
197                loop {
198                    haystack = haystack.get(first_byte_searcher(haystack)? + 1..)?;
199                    if case_sensitivity.eq(haystack.get(..rest.len())?, rest) {
200                        return Some(());
201                    }
202                }
203            }
204
205            match to_unconditional(operand.case_sensitivity, self.is_html_element) {
206                case @ CaseSensitivity::CaseSensitive => {
207                    search(actual_value, rest, case, move |h| memchr(first_byte, h)).is_some()
208                }
209                case @ CaseSensitivity::AsciiCaseInsensitive => {
210                    let lo = first_byte.to_ascii_lowercase();
211                    let up = first_byte.to_ascii_uppercase();
212
213                    search(actual_value, rest, case, move |h| memchr2(lo, up, h)).is_some()
214                }
215            }
216        })
217    }
218}