#[derive(Debug, Clone, PartialEq)]
pub enum QueryNeedle {
AsciiLower(Vec<u8>),
UnicodeLower(String),
}
pub const QUERY_TRIGRAM_LEN: usize = 3;
impl QueryNeedle {
#[must_use]
pub fn new(value: &str) -> Self {
if value.is_ascii() {
Self::AsciiLower(
value
.bytes()
.map(|byte| byte.to_ascii_lowercase())
.collect(),
)
} else {
Self::UnicodeLower(value.to_lowercase())
}
}
#[must_use]
pub fn is_empty(&self) -> bool {
match self {
Self::AsciiLower(needle) => needle.is_empty(),
Self::UnicodeLower(needle) => needle.is_empty(),
}
}
#[must_use]
pub fn normalized_str(&self) -> &str {
match self {
Self::AsciiLower(needle) => unsafe { std::str::from_utf8_unchecked(needle) },
Self::UnicodeLower(needle) => needle,
}
}
#[must_use]
pub fn normalized_bytes(&self) -> &[u8] {
match self {
Self::AsciiLower(needle) => needle,
Self::UnicodeLower(needle) => needle.as_bytes(),
}
}
#[must_use]
pub fn normalized_trigrams(&self) -> Vec<[u8; QUERY_TRIGRAM_LEN]> {
let mut trigrams = self
.normalized_bytes()
.windows(QUERY_TRIGRAM_LEN)
.map(|window| [window[0], window[1], window[2]])
.collect::<Vec<_>>();
trigrams.sort_unstable();
trigrams.dedup();
trigrams
}
#[must_use]
pub fn matches_contains(&self, haystack: &str) -> bool {
self.matches_contains_preprocessed(haystack, None)
}
#[must_use]
pub fn matches_contains_preprocessed(
&self,
haystack: &str,
normalized_haystack: Option<&str>,
) -> bool {
match self {
Self::AsciiLower(needle) => normalized_haystack.map_or_else(
|| contains_case_insensitive_ascii(haystack, needle),
|normalized| contains_ascii_in_normalized_haystack(normalized, needle),
),
Self::UnicodeLower(needle) => normalized_haystack.map_or_else(
|| haystack.to_lowercase().contains(needle),
|normalized| normalized.contains(needle),
),
}
}
#[must_use]
pub fn matches_prefix(&self, haystack: &str) -> bool {
self.matches_prefix_preprocessed(haystack, None)
}
#[must_use]
pub fn matches_prefix_preprocessed(
&self,
haystack: &str,
normalized_haystack: Option<&str>,
) -> bool {
match self {
Self::AsciiLower(needle) => normalized_haystack.map_or_else(
|| starts_with_case_insensitive_ascii(haystack, needle),
|normalized| starts_with_ascii_in_normalized_haystack(normalized, needle),
),
Self::UnicodeLower(needle) => normalized_haystack.map_or_else(
|| haystack.to_lowercase().starts_with(needle),
|normalized| normalized.starts_with(needle),
),
}
}
#[must_use]
pub fn matches_suffix(&self, haystack: &str) -> bool {
self.matches_suffix_preprocessed(haystack, None)
}
#[must_use]
pub fn matches_suffix_preprocessed(
&self,
haystack: &str,
normalized_haystack: Option<&str>,
) -> bool {
match self {
Self::AsciiLower(needle) => normalized_haystack.map_or_else(
|| ends_with_case_insensitive_ascii(haystack, needle),
|normalized| ends_with_ascii_in_normalized_haystack(normalized, needle),
),
Self::UnicodeLower(needle) => normalized_haystack.map_or_else(
|| haystack.to_lowercase().ends_with(needle),
|normalized| normalized.ends_with(needle),
),
}
}
#[must_use]
pub fn matches_exact(&self, haystack: &str) -> bool {
self.matches_exact_preprocessed(haystack, None)
}
#[must_use]
pub fn matches_exact_preprocessed(
&self,
haystack: &str,
normalized_haystack: Option<&str>,
) -> bool {
match self {
Self::AsciiLower(needle) => normalized_haystack.map_or_else(
|| equals_case_insensitive_ascii(haystack, needle),
|normalized| equals_ascii_in_normalized_haystack(normalized, needle),
),
Self::UnicodeLower(needle) => normalized_haystack.map_or_else(
|| haystack.to_lowercase() == *needle,
|normalized| normalized == needle,
),
}
}
}
fn contains_ascii_in_normalized_haystack(haystack: &str, needle: &[u8]) -> bool {
if needle.is_empty() {
return true;
}
let haystack = haystack.as_bytes();
if needle.len() > haystack.len() {
return false;
}
haystack
.windows(needle.len())
.any(|window| window == needle)
}
fn starts_with_ascii_in_normalized_haystack(haystack: &str, needle: &[u8]) -> bool {
if needle.len() > haystack.len() {
return false;
}
&haystack.as_bytes()[..needle.len()] == needle
}
fn ends_with_ascii_in_normalized_haystack(haystack: &str, needle: &[u8]) -> bool {
if needle.is_empty() {
return true;
}
let haystack = haystack.as_bytes();
if needle.len() > haystack.len() {
return false;
}
&haystack[haystack.len() - needle.len()..] == needle
}
fn equals_ascii_in_normalized_haystack(haystack: &str, needle: &[u8]) -> bool {
haystack.as_bytes() == needle
}
fn contains_case_insensitive_ascii(haystack: &str, needle: &[u8]) -> bool {
if needle.is_empty() {
return true;
}
let haystack = haystack.as_bytes();
if needle.len() > haystack.len() {
return false;
}
haystack.windows(needle.len()).any(|window| {
window
.iter()
.zip(needle.iter())
.all(|(actual, expected)| actual.to_ascii_lowercase() == *expected)
})
}
fn starts_with_case_insensitive_ascii(haystack: &str, needle: &[u8]) -> bool {
let haystack = haystack.as_bytes();
if needle.len() > haystack.len() {
return false;
}
haystack[..needle.len()]
.iter()
.zip(needle.iter())
.all(|(actual, expected)| actual.to_ascii_lowercase() == *expected)
}
fn ends_with_case_insensitive_ascii(haystack: &str, needle: &[u8]) -> bool {
if needle.is_empty() {
return true;
}
let haystack = haystack.as_bytes();
if needle.len() > haystack.len() {
return false;
}
haystack[haystack.len() - needle.len()..]
.iter()
.zip(needle.iter())
.all(|(actual, expected)| actual.to_ascii_lowercase() == *expected)
}
fn equals_case_insensitive_ascii(haystack: &str, needle: &[u8]) -> bool {
let haystack = haystack.as_bytes();
if needle.len() != haystack.len() {
return false;
}
haystack
.iter()
.zip(needle.iter())
.all(|(actual, expected)| actual.to_ascii_lowercase() == *expected)
}