amt-phonetic 1.0.0

Articulatory Moment Transform — language-agnostic phonetic name matching
Documentation
//! Indexed retrieval.

use std::collections::hash_map::Entry;
use std::collections::HashMap;

/// Burkhard-Keller metric tree over 32-bit keys with Hamming distance.
///
/// Supports radius-bounded nearest-neighbor queries via triangle-inequality
/// pruning. For phonetic fingerprints, typical query time at radius ≤ 4
/// over 10⁴-10⁶ entries is in the low milliseconds.
///
/// # Example
///
/// ```
/// use amt::{encode_token, BKTree};
///
/// let mut tree: BKTree<String> = BKTree::new();
/// for name in ["Khaled", "Khalid", "Ahmed", "Robert"] {
///     let code = encode_token(name);
///     for &sp in &code.spectrals {
///         tree.add(sp, name.to_string());
///     }
/// }
///
/// let query = encode_token("Khaleed");
/// let hits = tree.query(query.spectrals[0], 4);
/// assert!(hits.iter().any(|(_, name)| name.as_str() == "Khaled"));
/// ```
#[derive(Debug)]
pub struct BKTree<T> {
    root: Option<Box<BKNode<T>>>,
    len: usize,
}

#[derive(Debug)]
struct BKNode<T> {
    key: u32,
    payload: T,
    children: HashMap<u32, Box<BKNode<T>>>,
}

impl<T> Default for BKTree<T> {
    fn default() -> Self {
        Self::new()
    }
}

impl<T> BKTree<T> {
    /// Create an empty tree.
    #[must_use]
    pub fn new() -> Self {
        Self { root: None, len: 0 }
    }

    /// Number of entries currently in the tree.
    #[must_use]
    pub fn len(&self) -> usize {
        self.len
    }

    /// True if the tree has no entries.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.len == 0
    }

    /// Insert `(key, payload)`.
    pub fn add(&mut self, key: u32, payload: T) {
        self.len += 1;
        match &mut self.root {
            None => {
                self.root = Some(Box::new(BKNode {
                    key,
                    payload,
                    children: HashMap::new(),
                }));
            }
            Some(root) => {
                let mut node: &mut BKNode<T> = root;
                loop {
                    let d = (node.key ^ key).count_ones();
                    match node.children.entry(d) {
                        Entry::Occupied(slot) => {
                            node = slot.into_mut();
                        }
                        Entry::Vacant(slot) => {
                            slot.insert(Box::new(BKNode {
                                key,
                                payload,
                                children: HashMap::new(),
                            }));
                            return;
                        }
                    }
                }
            }
        }
    }

    /// Return all entries within `radius` Hamming distance of `key`,
    /// sorted by distance (closest first).
    #[must_use]
    pub fn query(&self, key: u32, radius: u32) -> Vec<(u32, &T)> {
        let mut results: Vec<(u32, &T)> = Vec::new();
        let mut stack: Vec<&BKNode<T>> = Vec::new();

        if let Some(root) = &self.root {
            stack.push(root);
        }

        while let Some(node) = stack.pop() {
            let d = (node.key ^ key).count_ones();
            if d <= radius {
                results.push((d, &node.payload));
            }
            let lo = d.saturating_sub(radius);
            let hi = d.saturating_add(radius);
            for (&edge, child) in &node.children {
                if edge >= lo && edge <= hi {
                    stack.push(child);
                }
            }
        }

        results.sort_by_key(|&(d, _)| d);
        results
    }
}