fastset/lib.rs
1//! # fastset
2//!
3//! 
4//! 
5//! 
6//! 
7//!
8//! Fast set implementation for dense, bounded integer collections, offering quick updates and random access.
9//!
10//! ## Features
11//!
12//! - Tailored for unsigned integer (`usize`) elements, ideal for index-based applications
13//! - Fast insertion, removal, and membership check
14//! - `random` method for uniform random sampling
15//! - Paging mechanism to somewhat mitigate the large memory footprint[^1]
16//!
17//! Note that while paging improves the existing memory footprint,
18//! `fastset::Set` **is still not** a good solution for memory constrained applications
19//! or for applications with storage need for sparse elements spread over an extended range.
20//! For integers twice as sparse as the page size, the `fastset::Set` with paging
21//! has peak heap allocation ~ 8x that of `std::collections::HashSet`.
22//!
23//! [^1]: A paging mechanism is introduced in `0.4.0` that reduces the memory-footprint of `fastset::Set`.
24//! With the paging feature, `fastset::Set` achieves ~ 50% reduction in peak heap memory allocations
25//! with no additional performance overhead.
26//!
27//! ## Benchmarks
28//!
29//! | Operation | `fastset::Set` | `hashbrown::HashSet` | `std::collections::HashSet` |
30//! |-----------|----------------|----------------------|-----------------------------|
31//! | insert | 1.1632 ns | 4.7105 ns | 14.136 ns |
32//! | remove | 1.1647 ns | 3.0459 ns | 10.625 ns |
33//! | contains | 932.81 ps | 985.38 ps | 13.827 ns |
34//! | random | 651.26 ps | N/A | N/A |
35//!
36//! - CPU: AMD Ryzen™ 5 5600G with Radeon™ Graphics x 12
37//! - RAM: 58.8 GiB
38//! - OS: Guix System, 64-bit
39//!
40//! ## Usage
41//!
42//! ```rust
43//! use fastset::{set, Set};
44//! use nanorand::WyRand;
45//!
46//! let mut set = set![5, 10, 15, 20, 25, 30]; // Initialize set with elements
47//! assert!(set.contains(&5)); // Check for element presence
48//!
49//! set.insert(35); // Insert a new element
50//! assert!(set.contains(&35));
51//!
52//! set.remove(&5); // Remove an element
53//! assert!(!set.contains(&5));
54//!
55//! if let Some(taken) = set.take(&10) { // Remove and return an element
56//! assert_eq!(taken, 10);
57//! }
58//!
59//! let mut rng = WyRand::new();
60//! if let Some(element) = set.random(&mut rng) { // Get a random element
61//! set.remove(&element); // Remove the randomly selected element
62//! assert!(!set.contains(&element));
63//! }
64//!
65//! println!("Set: {:?}, Length: {}", set, set.len()); // Display the set and its length
66//! ```
67//!
68//! ## Delphic Sets
69//!
70//! `fastset::Set`, as implemented here, meets the conditions for being a Delphic set \[1, 2\]:
71//!
72//! Let Ω be a discrete universe. A set (S ⊆ Ω) is considered a member of a Delphic family if it supports the
73//! following operations within O(log |Ω|) time:
74//!
75//! - **Membership**: Verify if any element (x ∈ Ω) exists within (S).
76//! - **Cardinality**: Determine the size of (S), i.e., (|S|).
77//! - **Sampling**: Draw a uniform random sample from (S).
78//!
79//! A unit test in `src/set.rs` verifies the uniform sampling property with a basic [Chi-squared test](https://en.wikipedia.org/wiki/Chi-squared_test).
80//!
81//! ```rust
82//! use fastset::Set;
83//! use nanorand::WyRand;
84//! use statrs::distribution::{ChiSquared, ContinuousCDF};
85//!
86//! fn sampling_is_uniformly_at_random() {
87//! const SAMPLES: usize = 1_000_000;
88//! const EDGE_OF_THE_UNIVERSE: usize = 10000;
89//!
90//! let elements = (1..=EDGE_OF_THE_UNIVERSE).collect::<Vec<_>>();
91//! let set = Set::from(elements.clone());
92//! let mut rng = WyRand::new_seed(42u64);
93//! let mut counts = vec![0f64; elements.len()];
94//!
95//! (0..SAMPLES).for_each(|_| {
96//! if let Some(value) = set.random(&mut rng) {
97//! counts[value - 1] += 1.0;
98//! }
99//! });
100//!
101//! let e = SAMPLES as f64 / elements.len() as f64;
102//! let statistic: f64 = counts.iter().map(|&o| { (o - e) * (o - e) / e }).sum();
103//!
104//! let dof = elements.len() - 1;
105//! let chi = ChiSquared::new(dof as f64).unwrap();
106//! let acceptable = chi.inverse_cdf(0.99);
107//!
108//! // Null hypothesis: Elements are sampled uniformly at random
109//! assert!(
110//! statistic < acceptable,
111//! "Chi-square statistic {} is greater than what's acceptable ({})",
112//! statistic,
113//! acceptable,
114//! );
115//! }
116//! ```
117//!
118//! ## References
119//!
120//! \[1\]: **Chakraborty, Sourav, N. V. Vinodchandran, and Kuldeep S. Meel.** *"Distinct Elements in Streams: An Algorithm for the (Text) Book."* arXiv preprint arXiv:2301.10191 (2023).
121//!
122//! \[2\]: **Meel, Kuldeep S., Sourav Chakraborty, and N. V. Vinodchandran.** *"Estimation of the Size of Union of Delphic Sets: Achieving Independence from Stream Size."* Proceedings of the 41st ACM SIGMOD-SIGACT-SIGAI Symposium on Principles of Database Systems. 2022.
123//!
124mod set;
125pub use set::{Set, SetOps};
126/// The maximum capacity for the Set.
127///
128/// CAUTION: Setting the set's largest element or capacity near MAX_CAPACITY
129/// can significantly impact memory usage. For instance, with MAX_CAPACITY
130/// set to 1 billion, the `Set` could require approximately 16 GB of memory
131/// under certain conditions due to storage of elements, their indicators,
132/// and indexing structures. Ensure adequate memory is available when
133/// approaching this limit.
134const MAX_CAPACITY: usize = 1_000_000_000;
135
136/// Macro for creating a `Set` with the given elements.
137///
138/// # Example
139///
140/// ```
141/// use fastset::{Set, set};
142/// let set = set![1, 2, 3];
143/// ```
144#[macro_export]
145macro_rules! set {
146 ($($element:expr),*) => {{
147 let mut new_set = Set::with_max(30000); // Adjusted for crate-level visibility
148 $(new_set.insert($element);)*
149 new_set
150 }};
151}
152
153/// Macro for removing elements from a `Set`.
154///
155/// # Example
156///
157/// ```
158/// # use fastset::{Set, remove};
159/// let mut my_set = Set::new(10);
160/// my_set.insert(1);
161/// my_set.insert(2);
162///
163/// remove!(my_set, 1, 2);
164/// ```
165#[macro_export]
166macro_rules! remove {
167 ($set:expr, $($element:expr),*) => {
168 $( $set.remove(&$element); )*
169 };
170}
171
172/// Macro for inserting elements into a `Set`.
173///
174/// # Example
175///
176/// ```
177/// # use fastset::{Set, insert};
178/// let mut my_set = Set::new(10);
179///
180/// insert!(my_set, 1, 2, 3);
181/// ```
182#[macro_export]
183macro_rules! insert {
184 ($set:expr, $($element:expr),*) => {
185 $( $set.insert($element); )*
186 };
187}
188
189/// Macro for selecting a random element from a `Set`.
190///
191/// If no random number generator is provided, it will use `WyRand` as the default.
192///
193/// # Example
194///
195/// ```
196/// # use fastset::{Set, random};
197/// # use nanorand::{WyRand, Rng};
198/// let mut my_set = Set::new(10);
199/// my_set.insert(1);
200/// my_set.insert(2);
201///
202/// let random_elem = random!(my_set); // Use default RNG
203///
204/// let mut rng = WyRand::new();
205/// let random_elem_custom_rng = random!(my_set, &mut rng); // Use custom RNG
206/// ```
207#[macro_export]
208macro_rules! random {
209 ($set:expr, $rng:expr) => {{
210 $set.random($rng)
211 }};
212 ($set:expr) => {{
213 let mut rng = WyRand::new();
214 $set.random(&mut rng)
215 }};
216}