use std::cmp::Ordering;
use strsim::damerau_levenshtein;
use crate::phonemes::{CanPhonehash, Phonehash, PhonehashRepr};
pub trait SearchableItem: Clone {
type Repr: PhonehashRepr;
fn as_phoneme(&self) -> Phonehash<Self::Repr>;
fn as_str(&self) -> &str;
}
pub trait SearchableList {
type ListItem: SearchableItem;
fn len(&self) -> usize;
unsafe fn item_at_unchecked(&self, index: usize) -> &Self::ListItem;
fn phonehash_search(&self, query: &str, max_items: usize) -> Vec<Self::ListItem> {
let query_phonemes = query.phonehash::<<Self::ListItem as SearchableItem>::Repr>();
let mut size = self.len();
if size == 0 {
return Vec::new();
}
let mut base = 0usize;
while size > 0 {
let half = size / 2;
let mid = base + half;
let cmp = unsafe { self.item_at_unchecked(mid).as_phoneme() }.cmp(&query_phonemes);
if cmp == Ordering::Less {
base = mid + 1;
size -= half + 1;
} else {
size = half;
}
}
let mut result_with_dist = Vec::new();
size = self.len();
let max_item_index = (max_items + base).min(size);
while base < size {
let base_value = unsafe { self.item_at_unchecked(base).clone() };
if base_value.as_phoneme() != query_phonemes {
break;
}
result_with_dist.push((damerau_levenshtein(base_value.as_str(), query), base_value));
base += 1;
}
while base < max_item_index {
let base_value = unsafe { self.item_at_unchecked(base).clone() };
if !base_value.as_phoneme().starts_with(query_phonemes) {
break;
}
result_with_dist.push((damerau_levenshtein(base_value.as_str(), query), base_value));
base += 1;
}
result_with_dist.sort_by(|a, b| a.0.cmp(&b.0));
result_with_dist.into_iter().take(max_items).map(|v| v.1).collect()
}
}
impl<T: SearchableItem> SearchableList for [T] {
type ListItem = T;
fn len(&self) -> usize {
<[T]>::len(self)
}
unsafe fn item_at_unchecked(&self, index: usize) -> &Self::ListItem {
unsafe { self.get_unchecked(index) }
}
}
impl<T: SearchableItem> SearchableList for &[T] {
type ListItem = T;
fn len(&self) -> usize {
<[T]>::len(self)
}
unsafe fn item_at_unchecked(&self, index: usize) -> &Self::ListItem {
unsafe { self.get_unchecked(index) }
}
}
impl<T: SearchableItem> SearchableList for Vec<T> {
type ListItem = T;
fn len(&self) -> usize {
<[T]>::len(self)
}
unsafe fn item_at_unchecked(&self, index: usize) -> &Self::ListItem {
unsafe { self.get_unchecked(index) }
}
}
#[cfg(test)]
mod test {
use crate::phonemes::CanPhonehash;
use super::*;
#[test]
fn it_werks() {
#[derive(Debug, Clone, PartialEq, Eq)]
struct TestObject {
str: &'static str,
phoneme: Phonehash<u64>,
}
impl TestObject {
pub fn new(str: &'static str) -> Self {
Self {
str,
phoneme: str.phonehash(),
}
}
}
impl SearchableItem for TestObject {
type Repr = u64;
fn as_phoneme(&self) -> Phonehash<Self::Repr> {
self.phoneme
}
fn as_str(&self) -> &str {
self.str
}
}
let mut stuff = vec![
TestObject::new("aaaa"),
TestObject::new("knight rider"),
TestObject::new("nite writer"),
TestObject::new("neight rheyeder"),
TestObject::new("the amazing digital circus"),
];
stuff.sort_by_key(|v| v.phoneme);
assert_eq!(
stuff.phonehash_search("knight", 5),
vec![
TestObject::new("knight rider"),
TestObject::new("nite writer"),
TestObject::new("neight rheyeder"),
]
);
assert_eq!(
stuff.phonehash_search("knight writer", 5),
vec![
TestObject::new("knight rider"),
TestObject::new("nite writer"),
TestObject::new("neight rheyeder"),
]
);
assert_eq!(
stuff.phonehash_search("knight rider", 2),
vec![TestObject::new("knight rider"), TestObject::new("nite writer"),]
);
assert_eq!(
stuff.phonehash_search("nite writer", 2),
vec![TestObject::new("nite writer"), TestObject::new("knight rider"),]
);
assert_eq!(
stuff.phonehash_search("neight rheyeder", 2),
vec![TestObject::new("neight rheyeder"), TestObject::new("knight rider"),]
);
assert_eq!(
stuff.phonehash_search("knight", 2),
vec![TestObject::new("knight rider"), TestObject::new("nite writer")]
);
assert_eq!(
stuff.phonehash_search("nite", 2),
vec![TestObject::new("nite writer"), TestObject::new("knight rider")]
);
assert_eq!(
stuff.phonehash_search("neight", 2),
vec![TestObject::new("knight rider"), TestObject::new("nite writer")]
);
}
}