#![deny(missing_docs)]
const CRAWLER_KEYWORDS: &[(u8, &[u8])] = &[
(b'h', b"ttp://"),
(b'h', b"ttps://"),
(b'+', b"http"),
(b'@', b""),
(b'b', b"ot"),
(b'c', b"rawl"),
(b'c', b"hecker"),
(b's', b"pider"),
(b's', b"canner"),
(b's', b"crape"),
(b'f', b"eed"),
(b'f', b"etch"),
(b'm', b"onitor"),
(b'p', b"tst"),
(b'p', b"review"),
(b'a', b"rchive"),
];
const BROWSER_ENGINES: &[(u8, &[u8])] = &[
(b'g', b"ecko"),
(b'k', b"html"),
(b'k', b"onqueror"),
(b'w', b"ebkit"),
(b'c', b"hrome"),
(b'f', b"irefox"),
(b'm', b"sie"),
(b'e', b"dge"),
(b'o', b"pera"),
(b't', b"rident"),
(b'p', b"resto"),
(b'l', b"inks"),
(b'i', b"cab"),
];
const KEYWORD_FIRST_BYTES: [bool; 256] = first_byte_table(CRAWLER_KEYWORDS);
const ENGINE_FIRST_BYTES: [bool; 256] = first_byte_table(BROWSER_ENGINES);
const fn first_byte_table(needles: &[(u8, &[u8])]) -> [bool; 256] {
let mut table = [false; 256];
let mut i = 0;
while i < needles.len() {
table[needles[i].0 as usize] = true;
i += 1;
}
table
}
pub fn is_crawler(user_agent: &str) -> bool {
if user_agent.is_empty() {
return true;
}
let mut buffer = [0u8; 512];
let source = user_agent.as_bytes();
let lowered: &[u8] = if source.len() <= buffer.len() {
let slice = &mut buffer[..source.len()];
slice.copy_from_slice(source);
slice.make_ascii_lowercase();
slice
} else {
return false;
};
if contains_any(lowered, CRAWLER_KEYWORDS, &KEYWORD_FIRST_BYTES) {
return true;
}
let mozilla_prefix = lowered.starts_with(b"mozilla/") || lowered.starts_with(b"opera/");
let has_engine = contains_any(lowered, BROWSER_ENGINES, &ENGINE_FIRST_BYTES);
if !mozilla_prefix {
return !has_engine;
}
!has_engine && lowered.windows(12).any(|w| w == b"(compatible;")
}
fn contains_any(haystack: &[u8], needles: &[(u8, &[u8])], first_bytes: &[bool; 256]) -> bool {
for (position, &byte) in haystack.iter().enumerate() {
if !first_bytes[byte as usize] {
continue;
}
for &(first, rest) in needles {
if first != byte {
continue;
}
let after = position + 1;
if after + rest.len() <= haystack.len() && haystack[after..after + rest.len()] == *rest
{
return true;
}
}
}
false
}