streaming_algorithms/lib.rs
1//! SIMD-accelerated implementations of various [streaming algorithms](https://en.wikipedia.org/wiki/Streaming_algorithm).
2//!
3//! <p style="font-family: 'Fira Sans',sans-serif;padding:0.3em 0"><strong>
4//! <a href="https://crates.io/crates/streaming_algorithms">đŸ“¦ Crates.io</a> │ <a href="https://github.com/alecmocatta/streaming_algorithms">đŸ“‘ GitHub</a> │ <a href="https://constellation.zulipchat.com/#narrow/stream/213236-subprojects">đŸ’¬ Chat</a>
5//! </strong></p>
6//!
7//! This library is a work in progress. PRs are very welcome! Currently implemented algorithms include:
8//!
9//! * Count–min sketch
10//! * Top k (Count–min sketch plus a doubly linked hashmap to track heavy hitters / top k keys when ordered by aggregated value)
11//! * HyperLogLog
12//! * Reservoir sampling
13//!
14//! A goal of this library is to enable composition of these algorithms; for example Top k + HyperLogLog to enable an approximate version of something akin to `SELECT key FROM table GROUP BY key ORDER BY COUNT(DISTINCT value) DESC LIMIT k`.
15//!
16//! Run your application with `RUSTFLAGS="-C target-cpu=native"` and the `nightly` feature to benefit from the SIMD-acceleration like so:
17//!
18//! ```bash
19//! RUSTFLAGS="-C target-cpu=native" cargo run --features "streaming_algorithms/nightly" --release
20//! ```
21//!
22//! See [this gist](https://gist.github.com/debasishg/8172796) for a good list of further algorithms to be implemented. Other resources are [Probabilistic data structures – Wikipedia](https://en.wikipedia.org/wiki/Category:Probabilistic_data_structures), [DataSketches – A similar Java library originating at Yahoo](https://datasketches.github.io/), and [Algebird – A similar Java library originating at Twitter](https://github.com/twitter/algebird).
23//!
24//! As these implementations are often in hot code paths, unsafe is used, albeit only when necessary to a) achieve the asymptotically optimal algorithm or b) mitigate an observed bottleneck.
25
26#![doc(html_root_url = "https://docs.rs/streaming_algorithms/0.3.0")]
27#![warn(
28 missing_copy_implementations,
29 missing_debug_implementations,
30 missing_docs,
31 trivial_casts,
32 trivial_numeric_casts,
33 unused_import_braces,
34 unused_qualifications,
35 unused_results,
36 clippy::pedantic
37)]
38// from https://github.com/rust-unofficial/patterns/blob/master/anti_patterns/deny-warnings.md
39#![allow(
40 dead_code,
41 clippy::doc_markdown,
42 clippy::inline_always,
43 clippy::module_name_repetitions,
44 clippy::if_not_else,
45 clippy::needless_pass_by_value,
46 clippy::suspicious_op_assign_impl,
47 clippy::float_cmp,
48 clippy::unsafe_derive_deserialize,
49 clippy::must_use_candidate,
50 clippy::unused_self
51)]
52
53mod count_min;
54mod distinct;
55mod linked_list;
56mod ordered_linked_list;
57mod sample;
58mod top;
59mod traits;
60
61pub use count_min::*;
62pub use distinct::*;
63pub use sample::*;
64pub use top::*;
65pub use traits::*;
66
67// TODO: replace all instances of the following with a.try_into().unwrap() if/when that exists https://github.com/rust-lang/rust/pull/47857
68#[allow(
69 clippy::cast_possible_truncation,
70 clippy::cast_sign_loss,
71 clippy::cast_precision_loss,
72 clippy::cast_lossless
73)]
74fn f64_to_usize(a: f64) -> usize {
75 assert!(a.is_sign_positive() && a <= usize::max_value() as f64 && a.fract() == 0.0);
76 a as usize
77}
78
79#[allow(
80 clippy::cast_possible_truncation,
81 clippy::cast_sign_loss,
82 clippy::cast_precision_loss
83)]
84fn f64_to_u8(a: f64) -> u8 {
85 assert!(a.is_sign_positive() && a <= f64::from(u8::max_value()) && a.fract() == 0.0);
86 a as u8
87}
88
89#[allow(clippy::cast_precision_loss, clippy::cast_lossless)]
90fn usize_to_f64(a: usize) -> f64 {
91 assert!(a as u64 <= 1_u64 << 53);
92 a as f64
93}
94#[allow(clippy::cast_precision_loss)]
95fn u64_to_f64(a: u64) -> f64 {
96 assert!(a <= 1_u64 << 53);
97 a as f64
98}