Skip to main content

amt/
lib.rs

1//! # AMT — Articulatory Moment Transform
2//!
3//! Language-agnostic phonetic name matching via spectral fingerprinting of
4//! universal sonority class sequences.
5//!
6//! ## Quick start
7//!
8//! ```
9//! use amt::{encode_token, matches, similarity};
10//!
11//! // Encode a single name
12//! let code = encode_token("Khaled");
13//!
14//! // Test match across transliterations and scripts
15//! assert!(matches("Khaled", "Khalid"));
16//! assert!(matches("Khaled", "خالد"));
17//! assert!(matches("Gamal", "Jamal"));
18//! assert!(!matches("Khaled", "Robert"));
19//!
20//! // Graded similarity in [0, 1]
21//! let s = similarity("Khaled Sameer", "khaled samir");
22//! assert!(s > 0.9);
23//! ```
24//!
25//! ## Indexed fuzzy search
26//!
27//! ```
28//! use amt::{encode_token, BKTree};
29//!
30//! let mut tree: BKTree<String> = BKTree::new();
31//! for name in ["Khaled", "Khalid", "Ahmed", "Robert"] {
32//!     let code = encode_token(name);
33//!     for &sp in &code.spectrals {
34//!         tree.add(sp, name.to_string());
35//!     }
36//! }
37//!
38//! let query = encode_token("Khaleed");
39//! let hits = tree.query(query.spectrals[0], 4);
40//! ```
41//!
42//! ## Algorithm
43//!
44//! Each name is mapped to a sequence of 8 sonority classes, projected onto
45//! the first 4 Chebyshev polynomials, Gray-quantized, and packed into a
46//! 32-bit spectral key. A parallel 64-bit Bloom signature over skip-bigrams
47//! of the same sequence captures edit-tolerant co-occurrence patterns.
48//! Two names match if they share any spectral key.
49//!
50//! See the whitepaper in the repository for full details, benchmarks against
51//! Soundex / Metaphone / Double Metaphone / NYSIIS / Beider-Morse, and
52//! theoretical justifications.
53
54#![warn(missing_docs)]
55#![warn(rust_2018_idioms)]
56#![warn(missing_debug_implementations)]
57#![warn(unreachable_pub)]
58#![forbid(unsafe_code)]
59// Enable `#[doc(cfg(...))]` annotations on docs.rs only (requires nightly).
60#![cfg_attr(docsrs, feature(doc_cfg))]
61
62mod chebyshev;
63pub mod core;
64pub mod indexing;
65pub mod similarity;
66pub mod sonority;
67
68// Flattened re-exports — the common path. `self::` disambiguates from `::core`.
69pub use self::core::{encode, encode_batch, encode_token, preprocess, Code};
70pub use self::indexing::BKTree;
71pub use self::similarity::{matches, similarity, token_distance};
72pub use self::sonority::{class_of, Class};