use core::fmt::{self, Write as _};
use std::{convert::Infallible, str::FromStr};
use deunicode::AsciiChars;
use itertools::peek_nth;
pub trait PhonehashRepr: Default + Copy + Clone + Eq + Ord {
fn stray_bits() -> u32;
fn max_phonemes() -> u32;
fn is_finalized(&self) -> bool;
fn finalize(&mut self, remaining: u32);
fn append(&mut self, elem: PhonehashElem) -> bool;
fn phoneme_at(&self, index: u32) -> Option<PhonehashElem>;
fn starts_with(&self, other: Self) -> bool;
}
macro_rules! impl_phonehash_repr_uint {
($ty:ty) => {
impl PhonehashRepr for $ty {
#[inline]
fn stray_bits() -> u32 {
Self::BITS % 3
}
#[inline]
fn max_phonemes() -> u32 {
Self::BITS / 3
}
#[inline]
fn is_finalized(&self) -> bool {
const FINAL_MASK: $ty = !0 << (<$ty>::BITS - 3);
(*self & FINAL_MASK) != 0
}
fn finalize(&mut self, remaining: u32) {
*self <<= 3 * remaining;
*self <<= Self::stray_bits();
}
fn append(&mut self, elem: PhonehashElem) -> bool {
let elem = (elem as u8) as Self;
if elem <= 1 || *self & 7 == elem || self.is_finalized() {
return false;
}
*self <<= 3;
*self |= elem as Self;
return true;
}
fn phoneme_at(&self, index: u32) -> Option<PhonehashElem> {
if index >= Self::max_phonemes() {
return None;
}
let result = self
.overflowing_shr(Self::stray_bits() + 3 * (Self::max_phonemes() - index - 1))
.0;
match result & 7 {
0 => Some(PhonehashElem::Space),
1 => Some(PhonehashElem::A),
2 => Some(PhonehashElem::B),
3 => Some(PhonehashElem::F),
4 => Some(PhonehashElem::S),
5 => Some(PhonehashElem::G),
6 => Some(PhonehashElem::M),
7 => Some(PhonehashElem::W),
_ => None,
}
}
fn starts_with(&self, other: Self) -> bool {
let mut max_phoneme_bits: Self = !Self::default();
while (other & max_phoneme_bits) != 0 {
max_phoneme_bits = max_phoneme_bits.overflowing_shr(3).0;
}
*self >= other && *self <= (other | max_phoneme_bits)
}
}
};
}
impl_phonehash_repr_uint!(u8);
impl_phonehash_repr_uint!(u16);
impl_phonehash_repr_uint!(u32);
impl_phonehash_repr_uint!(u64);
impl_phonehash_repr_uint!(u128);
impl_phonehash_repr_uint!(usize);
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(transparent)]
pub struct Phonehash<T: PhonehashRepr>(pub(crate) T);
impl<T: PhonehashRepr> Phonehash<T> {
pub fn new(s: &str) -> Self {
phonehash_elements(s).collect()
}
pub fn starts_with(&self, other: Self) -> bool {
self.0.starts_with(other.0)
}
}
impl<T: PhonehashRepr> FromStr for Phonehash<T> {
type Err = Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self::new(s))
}
}
impl<T: PhonehashRepr> FromIterator<PhonehashElem> for Phonehash<T> {
fn from_iter<I: IntoIterator<Item = PhonehashElem>>(iter: I) -> Self {
let mut repr: T = T::default();
let mut remaining_max = T::max_phonemes();
for item in iter {
if repr.append(item) {
remaining_max -= 1;
if remaining_max == 0 {
break;
}
}
}
repr.finalize(remaining_max);
Self(repr)
}
}
impl<T: PhonehashRepr> fmt::Display for Phonehash<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for i in 0..T::max_phonemes() {
self.0
.phoneme_at(i)
.map(|pelem| pelem.fmt(f))
.unwrap_or_else(|| f.write_char('?'))?;
}
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub enum PhonehashElem {
Space = 0,
A = 1, B = 2, F = 3, S = 4, G = 5, M = 6, W = 7, }
impl PhonehashElem {
pub fn is_space(&self) -> bool {
*self == Self::Space
}
}
impl fmt::Display for PhonehashElem {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
PhonehashElem::Space => f.write_char('_'),
PhonehashElem::A => f.write_char('A'),
PhonehashElem::B => f.write_char('B'),
PhonehashElem::F => f.write_char('F'),
PhonehashElem::S => f.write_char('S'),
PhonehashElem::G => f.write_char('G'),
PhonehashElem::M => f.write_char('M'),
PhonehashElem::W => f.write_char('W'),
}
}
}
pub fn phonehash_elements(s: &str) -> impl Iterator<Item = PhonehashElem> {
let mut ascii_alphas = peek_nth(
s.ascii_chars()
.flat_map(|s| s.unwrap_or_default().chars())
.map(|c| (c as u8).to_ascii_lowercase())
.flat_map(|c| -> Box<dyn Iterator<Item = u8>> {
match c {
b'0' => Box::new(b" zero ".iter().copied()),
b'1' => Box::new(b" one ".iter().copied()),
b'2' => Box::new(b" two ".iter().copied()),
b'3' => Box::new(b" three ".iter().copied()),
b'4' => Box::new(b" four ".iter().copied()),
b'5' => Box::new(b" five ".iter().copied()),
b'6' => Box::new(b" six ".iter().copied()),
b'7' => Box::new(b" seven ".iter().copied()),
b'8' => Box::new(b" eight ".iter().copied()),
b'9' => Box::new(b" nine ".iter().copied()),
b'$' => Box::new(b" dollar ".iter().copied()),
b'%' => Box::new(b" percent ".iter().copied()),
b'&' => Box::new(b" and ".iter().copied()),
b'+' => Box::new(b" plus ".iter().copied()),
b'a'..=b'z' => Box::new(std::iter::once(c)),
_ => Box::new(std::iter::once(b' ')),
}
}),
);
let mut check_silent_first_letter = true;
let ascii_alphas = std::iter::from_fn(move || -> Option<u8> {
loop {
match (
std::mem::take(&mut check_silent_first_letter),
ascii_alphas.next(),
ascii_alphas.peek_nth(0).copied(),
ascii_alphas.peek_nth(1).copied(),
) {
(_, None, _, _) => break None,
(_, Some(b'p'), Some(b'h'), _) => {
ascii_alphas.next(); break Some(b'f');
},
(_, Some(b' '), Some(b' '), _) => {
continue;
},
(_, Some(b'g'), Some(b'h'), _) => {
ascii_alphas.next(); continue;
},
(true, Some(b'k'), Some(b'n'), _) => {
break ascii_alphas.next(); },
(_, Some(b' '), Some(b'k'), Some(b'n')) => {
check_silent_first_letter = true;
break Some(b' ');
},
(_, Some(c), _, _) => break Some(c),
}
}
});
ascii_alphas.filter_map(|c| match c {
b' ' => Some(PhonehashElem::Space),
b'a' | b'e' | b'i' | b'o' | b'u' | b'y' => Some(PhonehashElem::A),
b'b' | b'd' | b't' | b'p' => Some(PhonehashElem::B),
b'f' | b'v' => Some(PhonehashElem::F),
b'c' | b's' | b'x' | b'k' | b'q' | b'z' => Some(PhonehashElem::S),
b'g' | b'j' => Some(PhonehashElem::G),
b'm' | b'n' => Some(PhonehashElem::M),
b'l' | b'r' | b'w' => Some(PhonehashElem::W),
_ => None, })
}
pub trait CanPhonehash {
fn phonehash_elements(&self) -> impl Iterator<Item = PhonehashElem>;
fn phonehash<T: PhonehashRepr>(&self) -> Phonehash<T> {
self.phonehash_elements().collect()
}
}
impl<T> CanPhonehash for T
where
T: AsRef<str>,
{
fn phonehash_elements(&self) -> impl Iterator<Item = PhonehashElem> {
phonehash_elements(self.as_ref())
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn it_werks() {
let s = "Phởenix Knight DAO++";
let h: Phonehash<u64> = s.phonehash();
assert_eq!(h.to_string(), "FMSMBWSBWS___________".to_string());
assert_eq!("knight".phonehash::<u64>().to_string(), "MB___________________");
assert_eq!("nite".phonehash::<u64>().to_string(), "MB___________________");
assert_eq!("knight".phonehash::<u8>().to_string(), "MB");
assert_eq!("nite".phonehash::<u8>().to_string(), "MB");
assert_eq!("phoenix".phonehash::<u64>().to_string(), "FMS__________________");
assert_eq!("foneks".phonehash::<u64>().to_string(), "FMS__________________");
assert_eq!("fone6".phonehash::<u64>().to_string(), "FMS__________________");
assert_eq!("knight rider".phonehash::<u64>().to_string(), "MBWBW________________");
assert_eq!("knightrider".phonehash::<u64>().to_string(), "MBWBW________________");
assert_eq!(
"knight rheyedhurr".phonehash::<u64>().to_string(),
"MBWBW________________"
);
assert!("knight rider".phonehash::<u64>().starts_with("nite".phonehash::<u64>()));
assert!(
!"knight"
.phonehash::<u64>()
.starts_with("nite rheyedhurr".phonehash::<u64>())
);
assert_eq!("Shiba".phonehash::<u64>().to_string(), "SB___________________");
assert_eq!("Sheba".phonehash::<u64>().to_string(), "SB___________________");
assert_eq!(
"aaaeeeeyyyyyeee lllaaaaammmaaaaaooo".phonehash::<u64>().to_string(),
"WM___________________"
);
assert_eq!("co-op".phonehash::<u64>().to_string(), "SB___________________");
assert_eq!("co op".phonehash::<u64>().to_string(), "SB___________________");
}
}