amadeus_streaming/lib.rs
1//! Harmonious distributed data processing & analysis in Rust.
2//!
3//! <p style="font-family: 'Fira Sans',sans-serif;padding:0.3em 0"><strong>
4//! <a href="https://crates.io/crates/amadeus">đŸ“¦ Crates.io</a> │ <a href="https://github.com/constellation-rs/amadeus">đŸ“‘ GitHub</a> │ <a href="https://constellation.zulipchat.com/#narrow/stream/213231-amadeus">đŸ’¬ Chat</a>
5//! </strong></p>
6//!
7//! This is a support crate of [Amadeus](https://github.com/constellation-rs/amadeus) and is not intended to be used directly. These types are re-exposed in [`amadeus::source`](https://docs.rs/amadeus/0.3/amadeus/source/index.html).
8
9// SIMD-accelerated implementations of various [streaming algorithms](https://en.wikipedia.org/wiki/Streaming_algorithm).
10//
11// This library is a work in progress. PRs are very welcome! Currently implemented algorithms include:
12//
13// * Count–min sketch
14// * Top k (Count–min sketch plus a doubly linked hashmap to track heavy hitters / top k keys when ordered by aggregated value)
15// * HyperLogLog
16// * Reservoir sampling
17//
18// A goal of this library is to enable composition of these algorithms; for example Top k + HyperLogLog to enable an approximate version of something akin to `SELECT key FROM table GROUP BY key ORDER BY COUNT(DISTINCT value) DESC LIMIT k`.
19//
20// Run your application with `RUSTFLAGS="-C target-cpu=native"` and the `nightly` feature to benefit from the SIMD-acceleration like so:
21//
22// ```bash
23// RUSTFLAGS="-C target-cpu=native" cargo run --features "amadeus_streaming/nightly" --release
24// ```
25//
26// See [this gist](https://gist.github.com/debasishg/8172796) for a good list of further algorithms to be implemented. Other resources are [Probabilistic data structures – Wikipedia](https://en.wikipedia.org/wiki/Category:Probabilistic_data_structures), [DataSketches – A similar Java library originating at Yahoo](https://datasketches.github.io/), and [Algebird – A similar Java library originating at Twitter](https://github.com/twitter/algebird).
27//
28// As these implementations are often in hot code paths, unsafe is used, albeit only when necessary to a) achieve the asymptotically optimal algorithm or b) mitigate an observed bottleneck.
29
30#![doc(html_root_url = "https://docs.rs/amadeus-streaming/0.4.3")]
31#![cfg_attr(nightly, feature(map_first_last))]
32#![cfg_attr(nightly, feature(unboxed_closures))]
33#![warn(
34 missing_copy_implementations,
35 missing_debug_implementations,
36 missing_docs,
37 trivial_casts,
38 trivial_numeric_casts,
39 unused_import_braces,
40 unused_qualifications,
41 unused_results,
42 clippy::pedantic
43)]
44// from https://github.com/rust-unofficial/patterns/blob/master/anti_patterns/deny-warnings.md
45#![allow(
46 dead_code,
47 clippy::doc_markdown,
48 clippy::inline_always,
49 clippy::module_name_repetitions,
50 clippy::if_not_else,
51 clippy::needless_pass_by_value,
52 clippy::suspicious_op_assign_impl,
53 clippy::float_cmp,
54 clippy::unsafe_derive_deserialize,
55 clippy::must_use_candidate,
56 clippy::unused_self,
57 clippy::missing_panics_doc,
58 clippy::let_underscore_drop
59)]
60
61mod count_min;
62mod distinct;
63mod linked_list;
64mod ordered_linked_list;
65mod sample;
66mod sort;
67mod top;
68mod traits;
69
70#[cfg(feature = "protobuf")]
71mod proto_util;
72
73pub use count_min::*;
74pub use distinct::*;
75pub use sample::*;
76pub use sort::*;
77pub use top::*;
78pub use traits::*;
79
80// TODO: replace all instances of the following with a.try_into().unwrap() if/when that exists https://github.com/rust-lang/rust/pull/47857
81#[allow(
82 clippy::cast_possible_truncation,
83 clippy::cast_sign_loss,
84 clippy::cast_precision_loss,
85 clippy::cast_lossless
86)]
87fn f64_to_usize(a: f64) -> usize {
88 assert!(a.is_sign_positive() && a <= usize::max_value() as f64 && a.fract() == 0.0);
89 a as usize
90}
91
92#[allow(
93 clippy::cast_possible_truncation,
94 clippy::cast_sign_loss,
95 clippy::cast_precision_loss
96)]
97fn f64_to_u8(a: f64) -> u8 {
98 assert!(a.is_sign_positive() && a <= f64::from(u8::max_value()) && a.fract() == 0.0);
99 a as u8
100}
101
102#[allow(clippy::cast_precision_loss, clippy::cast_lossless)]
103fn usize_to_f64(a: usize) -> f64 {
104 assert!(a as u64 <= 1_u64 << 53);
105 a as f64
106}
107#[allow(clippy::cast_precision_loss)]
108fn u64_to_f64(a: u64) -> f64 {
109 assert!(a <= 1_u64 << 53);
110 a as f64
111}