bloom_lib/lib.rs
1//! # bloom-lib
2//!
3//! Probabilistic data structures for Rust.
4//!
5//! This crate provides space-efficient structures that answer set-membership,
6//! cardinality, frequency, and similarity questions with bounded, tunable
7//! error in a fraction of the memory an exact structure would require. They are
8//! built for streaming workloads: insertions are allocation-free, state is
9//! serializable, and compatible structures can be merged.
10//!
11//! ## Available structures
12//!
13//! - [`BloomFilter`] — probabilistic set membership with a tunable
14//! false-positive rate.
15//! - [`CuckooFilter`] — approximate membership that also supports deletion.
16//! - [`CountMinSketch`] — approximate frequency estimation for a stream.
17//! - [`HyperLogLog`] — distinct-count (cardinality) estimation in tiny memory.
18//! - [`MinHash`] — Jaccard similarity estimation between sets.
19//! - [`TopK`] — the most frequent items (heavy hitters) in a stream.
20//!
21//! ## Example
22//!
23//! ```
24//! # #[cfg(feature = "alloc")] {
25//! use bloom_lib::BloomFilter;
26//!
27//! // A filter sized for 100,000 items at a 0.1% false-positive rate.
28//! let mut filter = BloomFilter::new(100_000, 0.001).unwrap();
29//!
30//! filter.insert("session-token");
31//! assert!(filter.contains("session-token"));
32//! assert!(!filter.contains("never-seen"));
33//! # }
34//! ```
35//!
36//! ## Hashing
37//!
38//! Every structure is generic over [`core::hash::BuildHasher`] and defaults to
39//! the deterministic [`hash::DefaultHashBuilder`]. Determinism makes filters
40//! reproducible, mergeable, and stable across serialization. Supply a
41//! randomly-seeded hasher when the inputs are adversarial. See the [`hash`]
42//! module for details.
43//!
44//! ## Feature flags
45//!
46//! - `std` *(default)* — enables every structure and the
47//! [`std::error::Error`] implementation for [`Error`].
48//! - `alloc` — enables every structure without requiring `std`, for
49//! heap-capable `no_std` targets. Implied by `std`.
50//! - `serde` — derives `Serialize`/`Deserialize` for every structure. Implies
51//! `alloc`.
52//!
53//! With none of these features the crate exposes only [`VERSION`] and [`Error`].
54//!
55//! ## License
56//!
57//! Dual-licensed under Apache-2.0 OR MIT.
58
59#![doc(html_root_url = "https://docs.rs/bloom-lib")]
60#![cfg_attr(docsrs, feature(doc_cfg))]
61#![cfg_attr(not(feature = "std"), no_std)]
62#![deny(missing_docs)]
63#![deny(unsafe_op_in_unsafe_fn)]
64#![deny(unused_must_use)]
65#![deny(unused_results)]
66#![deny(clippy::unwrap_used)]
67#![deny(clippy::expect_used)]
68#![deny(clippy::todo)]
69#![deny(clippy::unimplemented)]
70#![deny(clippy::print_stdout)]
71#![deny(clippy::print_stderr)]
72#![deny(clippy::dbg_macro)]
73#![deny(clippy::unreachable)]
74#![deny(clippy::undocumented_unsafe_blocks)]
75#![deny(clippy::missing_safety_doc)]
76
77#[cfg(feature = "alloc")]
78extern crate alloc;
79
80mod error;
81pub mod hash;
82
83pub use crate::error::Error;
84
85#[cfg(feature = "alloc")]
86mod bit_set;
87#[cfg(feature = "alloc")]
88mod bloom;
89#[cfg(feature = "alloc")]
90mod count_min;
91#[cfg(feature = "alloc")]
92mod cuckoo;
93#[cfg(feature = "alloc")]
94mod hyperloglog;
95#[cfg(feature = "alloc")]
96mod minhash;
97#[cfg(feature = "alloc")]
98mod topk;
99
100#[cfg(feature = "alloc")]
101pub use crate::bloom::BloomFilter;
102#[cfg(feature = "alloc")]
103pub use crate::count_min::CountMinSketch;
104#[cfg(feature = "alloc")]
105pub use crate::cuckoo::CuckooFilter;
106#[cfg(feature = "alloc")]
107pub use crate::hyperloglog::HyperLogLog;
108#[cfg(feature = "alloc")]
109pub use crate::minhash::MinHash;
110#[cfg(feature = "alloc")]
111pub use crate::topk::TopK;
112
113/// Convenient re-exports for typical usage.
114///
115/// Glob-importing the prelude brings the structures, the hashing types, and the
116/// error type into scope:
117///
118/// ```
119/// # #[cfg(feature = "alloc")] {
120/// use bloom_lib::prelude::*;
121///
122/// let mut filter = BloomFilter::new(1_000, 0.01).unwrap();
123/// filter.insert("hello");
124/// assert!(filter.contains("hello"));
125/// # }
126/// ```
127pub mod prelude {
128 pub use crate::hash::{DefaultHashBuilder, DefaultHasher};
129 pub use crate::Error;
130
131 #[cfg(feature = "alloc")]
132 pub use crate::{BloomFilter, CountMinSketch, CuckooFilter, HyperLogLog, MinHash, TopK};
133}
134
135/// Crate version string, populated by Cargo at build time.
136pub const VERSION: &str = env!("CARGO_PKG_VERSION");