clark_hash/lib.rs
1#![forbid(unsafe_code)]
2#![deny(missing_docs)]
3
4//! Clark Hash: stateless sparse-JL quantization for neural embeddings.
5//!
6//! The library centers around [`ClarkHash`], a deterministic codec that projects an
7//! input vector into a low-dimensional sparse signed sketch and then applies a fixed
8//! scalar quantizer. The resulting code can be scored asymmetrically against
9//! floating-point queries while staying fully online and fully stateless.
10//!
11//! The original codec and configuration names, [`SQuaJL`] and [`SQuaJLConfig`], remain
12//! public for compatibility with earlier experiments and papers.
13//!
14//! See the crate-level `README.md` for motivation, design notes, and usage examples.
15
16mod bitpack;
17mod hash;
18
19/// Configuration types and similarity-mode selection.
20pub mod config;
21/// Error types returned by the crate.
22pub mod error;
23/// A simple exact-scan index over quantized vectors.
24pub mod index;
25/// Encoded database vectors and prepared query sketches.
26pub mod quantized;
27/// The core stateless codec implementation.
28pub mod squajl;
29
30#[cfg(feature = "fastembed")]
31/// Optional local text-embedding integration powered by `fastembed`.
32pub mod fastembed_integration;
33
34pub use config::{SQuaJLConfig, SimilarityMetric};
35pub use error::{Result, SQuaJLError};
36#[cfg(feature = "fastembed")]
37pub use fastembed_integration::FastEmbedQuantizer;
38pub use index::{FlatIndex, ScoredIndex};
39pub use quantized::{QuantizedVector, QuerySketch};
40pub use squajl::SQuaJL;
41
42/// Package-level name for the stateless sparse-JL codec.
43pub type ClarkHash = SQuaJL;
44
45/// Package-level name for the codec configuration.
46pub type ClarkHashConfig = SQuaJLConfig;
47
48/// Package-level name for errors returned by this crate.
49pub type ClarkHashError = SQuaJLError;