use html5ever::{local_name, ns, tendril::StrTendril, QualName};
use rustc_hash::FxHasher;
use selectors::attr::CaseSensitivity;
use std::hash::{Hash, Hasher};
#[derive(Debug)]
pub(crate) struct Class {
pub(crate) value: StrTendril,
pub(crate) cache: Cache,
}
const WHITESPACE_MASK: u64 =
(1 << b' ') | (1 << b'\t') | (1 << b'\n') | (1 << b'\r') | (1 << 0x0Cu8);
#[inline]
fn is_selector_whitespace(b: u8) -> bool {
b <= 32 && (WHITESPACE_MASK >> b) & 1 != 0
}
#[derive(Debug, Copy, Clone)]
pub(crate) struct BloomFilter(u64);
impl BloomFilter {
#[inline]
fn new() -> BloomFilter {
BloomFilter(0)
}
#[inline]
fn insert_hash(&mut self, hash: u64) {
self.0 |= (1u64 << (hash & 63)) | (1u64 << ((hash >> KEY_SIZE) & 63));
}
#[inline]
fn might_contain_hash(self, hash: u64) -> bool {
let bits = (1u64 << (hash & 63)) | (1u64 << ((hash >> KEY_SIZE) & 63));
(self.0 & bits) == bits
}
#[inline]
fn might_have_class(self, name: &[u8]) -> bool {
let hash = hash_class_name(name);
self.might_contain_hash(hash)
}
}
const KEY_SIZE: usize = 32;
#[derive(Debug)]
pub(crate) enum Cache {
Bloom(BloomFilter),
Single,
}
impl Class {
pub(crate) fn new(value: StrTendril) -> Class {
let mut cache = BloomFilter::new();
let bytes = value.as_bytes();
let mut classes = bytes
.split(|&b| is_selector_whitespace(b))
.filter(|s| !s.is_empty());
if let Some(class) = classes.next() {
if class.len() == value.len() {
return Class {
value,
cache: Cache::Single,
};
}
cache.insert_hash(hash_class_name(class));
}
for class in classes {
cache.insert_hash(hash_class_name(class));
}
Class {
value,
cache: Cache::Bloom(cache),
}
}
#[inline]
fn has_class_impl(&self, name: &[u8], case_sensitivity: CaseSensitivity) -> bool {
for class in self.value.as_bytes().split(|&b| is_selector_whitespace(b)) {
if case_sensitivity.eq(class, name) {
return true;
}
}
false
}
#[inline]
pub(crate) fn has_class(&self, name: &[u8], case_sensitivity: CaseSensitivity) -> bool {
match (&self.cache, case_sensitivity) {
(Cache::Single, case_sensitivity) => case_sensitivity.eq(self.value.as_bytes(), name),
(Cache::Bloom(bloom_filter), CaseSensitivity::CaseSensitive) => {
if bloom_filter.might_have_class(name) {
self.has_class_impl(name, case_sensitivity)
} else {
false
}
}
(Cache::Bloom(_), CaseSensitivity::AsciiCaseInsensitive) => {
self.has_class_impl(name, case_sensitivity)
}
}
}
}
#[inline]
pub(crate) fn hash_class_name(name: &[u8]) -> u64 {
let mut hasher = FxHasher::default();
name.hash(&mut hasher);
hasher.finish()
}
#[derive(Debug)]
pub(crate) struct Attributes {
pub(crate) attributes: Vec<html5ever::Attribute>,
pub(crate) class: Option<Class>,
}
pub(crate) const CSS_INLINE_ATTRIBUTE: &str = "data-css-inline";
pub(super) fn should_ignore(attributes: &[html5ever::Attribute]) -> bool {
attributes
.iter()
.any(|a| a.name.local == *CSS_INLINE_ATTRIBUTE && &*a.value == "ignore")
}
impl Attributes {
pub(crate) fn new(mut attributes: Vec<html5ever::Attribute>) -> Attributes {
let mut class = None;
if let Some(idx) = attributes
.iter()
.position(|attr| attr.name.local == local_name!("class"))
{
let attr = attributes.swap_remove(idx);
class = Some(Class::new(attr.value));
}
Attributes { attributes, class }
}
pub(crate) fn find(&self, needle: &QualName) -> Option<&str> {
self.attributes.iter().find_map(|probe| {
if probe.name == *needle {
Some(&*probe.value)
} else {
None
}
})
}
pub(crate) fn contains(&self, local: html5ever::LocalName) -> bool {
self.get(local).is_some()
}
pub(crate) fn get(&self, local: html5ever::LocalName) -> Option<&str> {
let needle = QualName::new(None, ns!(), local);
self.find(&needle)
}
pub(crate) fn get_css_inline(&self) -> Option<&str> {
self.attributes.iter().find_map(|probe| {
if probe.name.local == *CSS_INLINE_ATTRIBUTE {
Some(&*probe.value)
} else {
None
}
})
}
}
#[cfg(test)]
mod tests {
use super::Class;
use selectors::attr::CaseSensitivity;
use test_case::test_case;
#[test_case("a b")]
#[test_case("a")]
fn test_has_class(value: &str) {
let class = Class::new(value.into());
assert!(class.has_class(b"a", CaseSensitivity::CaseSensitive));
assert!(class.has_class(b"A", CaseSensitivity::AsciiCaseInsensitive));
assert!(!class.has_class(b"c", CaseSensitivity::CaseSensitive));
assert!(!class.has_class(b"C", CaseSensitivity::AsciiCaseInsensitive));
}
}