1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
//! SIMD-accelerated implementations of various [streaming algorithms](https://en.wikipedia.org/wiki/Streaming_algorithm).
//!
//! <p style="font-family: 'Fira Sans',sans-serif;padding:0.3em 0"><strong>
//! <a href="https://crates.io/crates/streaming_algorithms">📦&nbsp;&nbsp;Crates.io</a>&nbsp;&nbsp;│&nbsp;&nbsp;<a href="https://github.com/alecmocatta/streaming_algorithms">📑&nbsp;&nbsp;GitHub</a>&nbsp;&nbsp;│&nbsp;&nbsp;<a href="https://constellation.zulipchat.com/#narrow/stream/213236-subprojects">💬&nbsp;&nbsp;Chat</a>
//! </strong></p>
//!
//! This library is a work in progress. PRs are very welcome! Currently implemented algorithms include:
//!
//!  * Count–min sketch
//!  * Top k (Count–min sketch plus a doubly linked hashmap to track heavy hitters / top k keys when ordered by aggregated value)
//!  * HyperLogLog
//!  * Reservoir sampling
//!
//! A goal of this library is to enable composition of these algorithms; for example Top k + HyperLogLog to enable an approximate version of something akin to `SELECT key FROM table GROUP BY key ORDER BY COUNT(DISTINCT value) DESC LIMIT k`.
//!
//! Run your application with `RUSTFLAGS="-C target-cpu=native"` and the `nightly` feature to benefit from the SIMD-acceleration like so:
//!
//! ```bash
//! RUSTFLAGS="-C target-cpu=native" cargo run --features "streaming_algorithms/nightly" --release
//! ```
//!
//! See [this gist](https://gist.github.com/debasishg/8172796) for a good list of further algorithms to be implemented. Other resources are [Probabilistic data structures – Wikipedia](https://en.wikipedia.org/wiki/Category:Probabilistic_data_structures), [DataSketches – A similar Java library originating at Yahoo](https://datasketches.github.io/), and [Algebird  – A similar Java library originating at Twitter](https://github.com/twitter/algebird).
//!
//! As these implementations are often in hot code paths, unsafe is used, albeit only when necessary to a) achieve the asymptotically optimal algorithm or b) mitigate an observed bottleneck.

#![doc(html_root_url = "https://docs.rs/streaming_algorithms/0.3.0")]
#![warn(
	missing_copy_implementations,
	missing_debug_implementations,
	missing_docs,
	trivial_casts,
	trivial_numeric_casts,
	unused_import_braces,
	unused_qualifications,
	unused_results,
	clippy::pedantic
)]
// from https://github.com/rust-unofficial/patterns/blob/master/anti_patterns/deny-warnings.md
#![allow(
	dead_code,
	clippy::doc_markdown,
	clippy::inline_always,
	clippy::module_name_repetitions,
	clippy::if_not_else,
	clippy::needless_pass_by_value,
	clippy::suspicious_op_assign_impl,
	clippy::float_cmp,
	clippy::unsafe_derive_deserialize,
	clippy::must_use_candidate,
	clippy::unused_self
)]

mod count_min;
mod distinct;
mod linked_list;
mod ordered_linked_list;
mod sample;
mod top;
mod traits;

pub use count_min::*;
pub use distinct::*;
pub use sample::*;
pub use top::*;
pub use traits::*;

// TODO: replace all instances of the following with a.try_into().unwrap() if/when that exists https://github.com/rust-lang/rust/pull/47857
#[allow(
	clippy::cast_possible_truncation,
	clippy::cast_sign_loss,
	clippy::cast_precision_loss,
	clippy::cast_lossless
)]
fn f64_to_usize(a: f64) -> usize {
	assert!(a.is_sign_positive() && a <= usize::max_value() as f64 && a.fract() == 0.0);
	a as usize
}

#[allow(
	clippy::cast_possible_truncation,
	clippy::cast_sign_loss,
	clippy::cast_precision_loss
)]
fn f64_to_u8(a: f64) -> u8 {
	assert!(a.is_sign_positive() && a <= f64::from(u8::max_value()) && a.fract() == 0.0);
	a as u8
}

#[allow(clippy::cast_precision_loss, clippy::cast_lossless)]
fn usize_to_f64(a: usize) -> f64 {
	assert!(a as u64 <= 1_u64 << 53);
	a as f64
}
#[allow(clippy::cast_precision_loss)]
fn u64_to_f64(a: u64) -> f64 {
	assert!(a <= 1_u64 << 53);
	a as f64
}