hyperloglog/
lib.rs

1//! # nano-hyperloglog
2//!
3//! A high-performance HyperLogLog implementation for cardinality estimation with pluggable storage backends.
4//!
5//! ## What is HyperLogLog?
6//!
7//! HyperLogLog is a probabilistic data structure for estimating the cardinality (number of unique elements)
8//! of a dataset. It offers remarkable space efficiency: you can count billions of unique items using just
9//! a few kilobytes of memory, with typical accuracy within 2% of the true count.
10//!
11//! ## Features
12//!
13//! - **Fixed memory usage**: Count billions of items with ~16KB (configurable via precision)
14//! - **High accuracy**: Typically within 0.8-2% of true count (depending on precision)
15//! - **Mergeable**: Combine HyperLogLogs from multiple sources with simple union operations
16//! - **Pluggable storage**: File-based or Elasticsearch backends for persistence
17//! - **Redis-compatible API**: Optional HTTP server with PFADD/PFCOUNT/PFMERGE endpoints
18//! - **Type-safe**: Leverage Rust's type system for compile-time guarantees
19//!
20//! ## Quick Start
21//!
22//! Add to your `Cargo.toml`:
23//!
24//! ```toml
25//! [dependencies]
26//! nano-hyperloglog = "0.1"
27//! ```
28//!
29//! Basic usage:
30//!
31//! ```rust
32//! use hyperloglog::HyperLogLog;
33//!
34//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
35//! // Create a HyperLogLog with precision 14 (16KB memory, ~0.8% error)
36//! let mut hll = HyperLogLog::new(14)?;
37//!
38//! // Add elements
39//! for i in 0..10000 {
40//!     hll.add(&i);
41//! }
42//!
43//! // Get estimated count
44//! let count = hll.count();
45//! println!("Estimated unique count: {}", count); // ~10000
46//! # Ok(())
47//! # }
48//! ```
49//!
50//! ## Merging HyperLogLogs
51//!
52//! HyperLogLogs can be merged to combine counts from multiple sources:
53//!
54//! ```rust
55//! use hyperloglog::HyperLogLog;
56//!
57//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
58//! let mut server1 = HyperLogLog::new(14)?;
59//! let mut server2 = HyperLogLog::new(14)?;
60//!
61//! // Each server tracks different users
62//! for i in 0..5000 {
63//!     server1.add(&i);
64//! }
65//! for i in 5000..10000 {
66//!     server2.add(&i);
67//! }
68//!
69//! // Merge to get total unique count
70//! server1.merge(&server2)?;
71//! let total = server1.count(); // ~10000
72//! # Ok(())
73//! # }
74//! ```
75//!
76//! ## Persistent Storage
77//!
78//! Use storage backends to persist HyperLogLogs:
79//!
80//! ```rust,no_run
81//! use hyperloglog::{HyperLogLog, Storage, storage::FileStorage};
82//!
83//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
84//! let storage = FileStorage::new("./data").await?;
85//!
86//! let mut hll = HyperLogLog::new(14)?;
87//! hll.add_str("user123");
88//! hll.add_str("user456");
89//!
90//! // Store
91//! storage.store("daily_visitors", &hll).await?;
92//!
93//! // Load later
94//! let loaded = storage.load("daily_visitors").await?;
95//! println!("Count: {}", loaded.count());
96//! # Ok(())
97//! # }
98//! ```
99//!
100//! ## Precision and Memory Tradeoffs
101//!
102//! | Precision | Memory  | Standard Error |
103//! |-----------|---------|----------------|
104//! | 10        | 1 KB    | ±1.625%        |
105//! | 12        | 4 KB    | ±0.813%        |
106//! | 14        | 16 KB   | ±0.406%        |
107//! | 16        | 64 KB   | ±0.203%        |
108//!
109//! ## Feature Flags
110//!
111//! - `file-storage` (default): Enable file-based storage backend
112//! - `elasticsearch-storage`: Enable Elasticsearch storage backend
113//! - `server`: Enable HTTP server with Redis-compatible API
114//! - `full`: Enable all features
115//!
116//! ## Examples
117//!
118//! See the `examples/` directory for more usage patterns:
119//! - `basic_usage.rs` - Simple cardinality estimation
120//! - `merging.rs` - Combining HyperLogLogs from multiple sources
121//! - `file_storage.rs` - Using persistent file storage
122//! - `precision_comparison.rs` - Comparing different precision values
123//! - `server.rs` - Running the HTTP server
124
125pub mod hll;
126pub mod error;
127
128#[cfg(feature = "file-storage")]
129pub mod storage;
130
131#[cfg(feature = "server")]
132pub mod api;
133
134pub use hll::HyperLogLog;
135pub use error::{HllError, Result};
136
137#[cfg(feature = "file-storage")]
138pub use storage::Storage;
139
140#[cfg(feature = "file-storage")]
141pub use storage::FileStorage;
142
143#[cfg(feature = "elasticsearch-storage")]
144pub use storage::ElasticsearchStorage;