amadeus_streaming/
lib.rs

1//! Harmonious distributed data processing & analysis in Rust.
2//!
3//! <p style="font-family: 'Fira Sans',sans-serif;padding:0.3em 0"><strong>
4//! <a href="https://crates.io/crates/amadeus">đŸ“¦&nbsp;&nbsp;Crates.io</a>&nbsp;&nbsp;│&nbsp;&nbsp;<a href="https://github.com/constellation-rs/amadeus">đŸ“‘&nbsp;&nbsp;GitHub</a>&nbsp;&nbsp;│&nbsp;&nbsp;<a href="https://constellation.zulipchat.com/#narrow/stream/213231-amadeus">đŸ’¬&nbsp;&nbsp;Chat</a>
5//! </strong></p>
6//!
7//! This is a support crate of [Amadeus](https://github.com/constellation-rs/amadeus) and is not intended to be used directly. These types are re-exposed in [`amadeus::source`](https://docs.rs/amadeus/0.3/amadeus/source/index.html).
8
9// SIMD-accelerated implementations of various [streaming algorithms](https://en.wikipedia.org/wiki/Streaming_algorithm).
10//
11// This library is a work in progress. PRs are very welcome! Currently implemented algorithms include:
12//
13//  * Count–min sketch
14//  * Top k (Count–min sketch plus a doubly linked hashmap to track heavy hitters / top k keys when ordered by aggregated value)
15//  * HyperLogLog
16//  * Reservoir sampling
17//
18// A goal of this library is to enable composition of these algorithms; for example Top k + HyperLogLog to enable an approximate version of something akin to `SELECT key FROM table GROUP BY key ORDER BY COUNT(DISTINCT value) DESC LIMIT k`.
19//
20// Run your application with `RUSTFLAGS="-C target-cpu=native"` and the `nightly` feature to benefit from the SIMD-acceleration like so:
21//
22// ```bash
23// RUSTFLAGS="-C target-cpu=native" cargo run --features "amadeus_streaming/nightly" --release
24// ```
25//
26// See [this gist](https://gist.github.com/debasishg/8172796) for a good list of further algorithms to be implemented. Other resources are [Probabilistic data structures – Wikipedia](https://en.wikipedia.org/wiki/Category:Probabilistic_data_structures), [DataSketches – A similar Java library originating at Yahoo](https://datasketches.github.io/), and [Algebird  – A similar Java library originating at Twitter](https://github.com/twitter/algebird).
27//
28// As these implementations are often in hot code paths, unsafe is used, albeit only when necessary to a) achieve the asymptotically optimal algorithm or b) mitigate an observed bottleneck.
29
30#![doc(html_root_url = "https://docs.rs/amadeus-streaming/0.4.3")]
31#![cfg_attr(nightly, feature(map_first_last))]
32#![cfg_attr(nightly, feature(unboxed_closures))]
33#![warn(
34	missing_copy_implementations,
35	missing_debug_implementations,
36	missing_docs,
37	trivial_casts,
38	trivial_numeric_casts,
39	unused_import_braces,
40	unused_qualifications,
41	unused_results,
42	clippy::pedantic
43)]
44// from https://github.com/rust-unofficial/patterns/blob/master/anti_patterns/deny-warnings.md
45#![allow(
46	dead_code,
47	clippy::doc_markdown,
48	clippy::inline_always,
49	clippy::module_name_repetitions,
50	clippy::if_not_else,
51	clippy::needless_pass_by_value,
52	clippy::suspicious_op_assign_impl,
53	clippy::float_cmp,
54	clippy::unsafe_derive_deserialize,
55	clippy::must_use_candidate,
56	clippy::unused_self,
57	clippy::missing_panics_doc,
58	clippy::let_underscore_drop
59)]
60
61mod count_min;
62mod distinct;
63mod linked_list;
64mod ordered_linked_list;
65mod sample;
66mod sort;
67mod top;
68mod traits;
69
70#[cfg(feature = "protobuf")]
71mod proto_util;
72
73pub use count_min::*;
74pub use distinct::*;
75pub use sample::*;
76pub use sort::*;
77pub use top::*;
78pub use traits::*;
79
80// TODO: replace all instances of the following with a.try_into().unwrap() if/when that exists https://github.com/rust-lang/rust/pull/47857
81#[allow(
82	clippy::cast_possible_truncation,
83	clippy::cast_sign_loss,
84	clippy::cast_precision_loss,
85	clippy::cast_lossless
86)]
87fn f64_to_usize(a: f64) -> usize {
88	assert!(a.is_sign_positive() && a <= usize::max_value() as f64 && a.fract() == 0.0);
89	a as usize
90}
91
92#[allow(
93	clippy::cast_possible_truncation,
94	clippy::cast_sign_loss,
95	clippy::cast_precision_loss
96)]
97fn f64_to_u8(a: f64) -> u8 {
98	assert!(a.is_sign_positive() && a <= f64::from(u8::max_value()) && a.fract() == 0.0);
99	a as u8
100}
101
102#[allow(clippy::cast_precision_loss, clippy::cast_lossless)]
103fn usize_to_f64(a: usize) -> f64 {
104	assert!(a as u64 <= 1_u64 << 53);
105	a as f64
106}
107#[allow(clippy::cast_precision_loss)]
108fn u64_to_f64(a: u64) -> f64 {
109	assert!(a <= 1_u64 << 53);
110	a as f64
111}