tagged_rendezvous/lib.rs
1//! This crate is a proof of concept for node selection based on a logarithmic
2//! rendezvous hashing implementation that allows nodes to self-exclude based
3//! on a generic type. While this is still a proof of concept, performance
4//! should be good enough for small to medium-sized implementations.
5//!
6//! Note that this implementation does not include any caching mechanism. This
7//! is intentional, and is done to keep the implementation as simple as
8//! possible. Users of this crate _should_ implement their own caching
9//! mechanism for best performance and evict on mutation, but this is not
10//! necessary, especially in small use cases.
11//!
12//! This implementation assumes that the typical use case is on some
13//! load-balancing node or equivalent, and that it is aware of nodes are
14//! available or unavailable. Additionally, it should be able to determine the
15//! weight of each node.
16//!
17//! The entry point of this crate is [`NodeSelection`] or [`BitNodeSelection`].
18//! Examples of construction and usage are included in their respective
19//! documentation.
20
21// Deny unsafe code, but allow unsafe code in tests.
22#![cfg_attr(not(test), forbid(unsafe_code))]
23#![warn(
24 clippy::cargo,
25 clippy::pedantic,
26 clippy::nursery,
27 clippy::missing_docs_in_private_items
28)]
29#![deny(missing_docs)]
30
31// Re-alias the rayon crate, to allow features to be the name of the crate
32#[cfg(feature = "rayon")]
33use rayon_crate as rayon;
34
35use std::cmp::Ordering;
36use std::fmt::Display;
37use std::hash::{Hash, Hasher};
38use std::io::Cursor;
39use std::iter::FromIterator;
40use std::num::NonZeroUsize;
41use std::ops::BitAnd;
42use std::sync::atomic::{AtomicUsize, Ordering as AtomicOrdering};
43
44#[cfg(feature = "rayon")]
45use rayon::iter::{
46 FromParallelIterator, IntoParallelIterator, IntoParallelRefIterator, ParallelExtend,
47 ParallelIterator,
48};
49#[cfg(feature = "rayon")]
50use rayon::slice::ParallelSliceMut;
51
52use dashmap::mapref::multiple::RefMulti;
53use dashmap::mapref::one::RefMut;
54use dashmap::{DashMap, DashSet};
55use murmur3::murmur3_x64_128;
56
57/// Contains the error type used by this crate.
58mod error;
59
60// Re-export the dashmap crate so users don't need to manually import it
61pub use dashmap;
62pub use error::*;
63
64/// Provides a way to consistently select some weighted bucket (or node) given
65/// some value.
66///
67/// The exact implementation is based on the algorithm described in
68/// [Schindelhauer and Schomaker "Weighted Distributed Hash Tables"][paper].
69/// As a result, modifications of weights (or additions and removals of nodes)
70/// can be performed with perfect stability and perfect precision of node
71/// weighing; subsequently, only the minimum number of keys are remapped to new
72/// nodes.
73///
74/// This node does not perform any sort of caching. As a result, it's best to
75/// use this with some form of cache, whose entries are invalidated when a node
76/// is added, removed, or has its weight modified. This is because lookups for
77/// a specific node are at best, linear, and at worst, quadratic. To be
78/// specific, there is always a linear lookup cost for a given key, determined
79/// by the number of nodes present. If nodes utilize tag exclusion, then the
80/// cost becomes quadratic to determine if a node should be excluded given a set
81/// of tags.
82///
83/// This implementation attempts to be performant and concurrent as possible,
84/// using a [`DashMap`] and [`DashSet`]s to provide concurrent access and
85/// `rayon` to provide parallel lookup, if the feature is enabled. In single
86/// threaded contexts, care should be used to not deadlock with multiple
87/// references.
88///
89/// # Excluding tags
90///
91/// This is slightly different from a standard weighted hash table as it also
92/// implements the ability to allow nodes to exclude themselves from being
93/// selected. This is provided via the `ExclusionTag` type, and should be an
94/// `enum`.
95///
96/// Generally speaking, `ExclusionTag`s must at least [`Hash`] and [`Eq`]. This
97/// is a requirement of storing the `ExclusionTag` in a Set implementation.
98/// However, if using `rayon` to allow for parallel lookup, the trait bounds on
99/// `ExclusionTag`s is restricted to types that are also [`Send`] and [`Sync`].
100///
101/// This feature is asymptotically expensive, as this requires checking all
102/// nodes to see if they exclude the given tags. As a result, this should be
103/// used with care, and at best the number of exclusions should be relatively
104/// small per node.
105///
106/// Of course, this feature can be disabled if `ExclusionTag` is the unit type,
107/// which reduces lookup costs to be linear.
108///
109/// Excluded nodes do not contribute to the total sum of weights. For example,
110/// if three nodes exist with equal weights, and one node is excluded, then the
111/// probability of selecting a node is the same as if only two nodes were
112/// present.
113///
114/// In a more complex case, if nodes with weights 1, 2, and 3 are present, and
115/// the node with weight 2 is excluded, then the probability of selecting a node
116/// becomes 25% and 75% respectively.
117///
118/// Note that if [`BitAnd`] can be performed on `ExclusionTags` in an efficient
119/// way (such as for bitflags), it is highly recommended to use
120/// [`BitNodeSelection`] instead.
121///
122/// # Examples
123///
124/// Nodes can be added and removed at any time and subsequent lookups will be
125/// immediately reflect the new state.
126///
127/// ```
128/// # use tagged_rendezvous::*;
129/// # fn main() -> () {
130/// # example();
131/// # }
132/// #
133/// # fn example() -> Option<()> {
134/// use std::num::NonZeroUsize;
135///
136/// let selector = NodeSelection::new();
137///
138/// // Add a node with a weight of 1
139/// let node_1 = Node::<(), _>::new(NonZeroUsize::new(1)?, ());
140/// let node_1_id = selector.add(node_1.clone());
141///
142/// // Lookups will always return the first node, since it's the only one.
143/// let looked_up_node = selector.get(b"hello world")?;
144/// assert_eq!(looked_up_node.value(), &node_1);
145/// // Drop the reference ASAP, see `get` docs for more info.
146/// std::mem::drop(looked_up_node);
147///
148/// // Add a node with a weight of 2
149/// let node_2 = Node::<(), _>::new(NonZeroUsize::new(2)?, ());
150/// selector.add(node_2.clone());
151///
152/// // Any caches should be invalidated now, since we changed the state.
153/// // Now there's a 33% chance of selecting the first node, and a 67% chance of
154/// // selecting the second node.
155///
156/// // Do something with the node...
157///
158/// // Now remove the first node
159/// selector.remove(node_1_id);
160///
161/// // Any caches should be invalidated now, since we changed the state.
162/// // Lookups will now always return the second node, since it's the only one.
163///
164/// let looked_up_node = selector.get(b"hello world")?;
165/// assert_eq!(looked_up_node.value(), &node_2);
166/// # None
167/// # }
168/// ```
169///
170/// Modifying a weight is pretty ergonomic, but remember that caches should be
171/// invalidated after modifying the weights.
172///
173/// ```
174/// # use tagged_rendezvous::*;
175/// # fn main() -> () {
176/// # example();
177/// # }
178/// #
179/// # fn example() -> Option<()> {
180/// use std::num::NonZeroUsize;
181///
182/// let selector = NodeSelection::new();
183///
184/// // Add a node with a weight of 1
185/// let node_1 = Node::<(), _>::new(NonZeroUsize::new(1)?, ());
186/// let node_1_id = selector.add(node_1.clone());
187///
188/// // Add a node with a weight of 2
189/// let node_2 = Node::<(), _>::new(NonZeroUsize::new(2)?, ());
190/// selector.add(node_2.clone());
191///
192/// // Now there's a 33% chance of selecting the first node, and a 67% chance of
193/// // selecting the second node. Lets modify the weight so there's an equal
194/// // chance of selecting either node.
195///
196/// selector.get_mut(node_1_id)?.set_weight(NonZeroUsize::new(1)?);
197///
198/// // Any caches should be invalidated now.
199/// # None
200/// # }
201/// ```
202///
203/// If exclusion tags are used, then excluded nodes do not contribute to the
204/// sum weights of nodes.
205///
206/// ```
207/// # use tagged_rendezvous::*;
208/// # fn main() -> () {
209/// # example();
210/// # }
211/// #
212/// # fn example() -> Option<()> {
213/// use std::num::NonZeroUsize;
214/// use std::iter::FromIterator;
215///
216/// use dashmap::DashSet;
217///
218/// let selector = NodeSelection::new();
219///
220/// #[derive(Clone, PartialEq, Eq, Hash)]
221/// enum ExclusionTag {
222/// Foo,
223/// }
224///
225/// // Add a 3 nodes, one with an exclusion tag.
226/// let exclusions = DashSet::from_iter([ExclusionTag::Foo]);
227/// let node_1 = Node::new(NonZeroUsize::new(1)?, ());
228/// selector.add(node_1.clone());
229/// let node_2 = Node::with_exclusions(NonZeroUsize::new(1)?, (), exclusions.clone());
230/// selector.add(node_2.clone());
231/// let node_3 = Node::new(NonZeroUsize::new(1)?, ());
232/// selector.add(node_3.clone());
233///
234/// // There's a 33% chance of selecting any node, if no tags are provided.
235/// selector.get(b"hello world")?;
236///
237/// // There's a equal chance of getting node 1 or node 3 since node 2 is
238/// // excluded.
239/// selector.get_with_exclusions(b"hello world", &exclusions)?;
240/// # None
241/// # }
242/// ```
243///
244/// [paper]: https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.414.9353
245#[derive(Clone, Debug)]
246pub struct NodeSelection<ExclusionTags: Hash + Eq, Metadata> {
247 /// The list of nodes to select from.
248 nodes: DashMap<NodeId, Node<ExclusionTags, Metadata>>,
249}
250
251impl<ExclusionTags, Metadata> NodeSelection<ExclusionTags, Metadata>
252where
253 ExclusionTags: Hash + Eq,
254{
255 /// Finds a node that is responsible for the provided item with no
256 /// exclusions. This lookup is performed in serial. Consider
257 /// [`par_get`](#par_get) that uses rayon to parallelize the
258 /// lookup.
259 ///
260 /// # Safety
261 ///
262 /// This method may deadlock if called when holding a mutable reference into
263 /// the map. Callers must ensure that references are dropped as soon as
264 /// possible, especially in single threaded contexts.
265 #[inline]
266 #[must_use]
267 pub fn get(&self, item: &[u8]) -> Option<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
268 self.get_internal(item, None)
269 }
270
271 /// Like [`get`](#get), but allows the caller to provide a list of
272 /// tags, which nodes that exclude those tags will be skipped.
273 ///
274 /// # Safety
275 ///
276 /// This method may deadlock if called when holding a mutable reference into
277 /// the map. Callers must ensure that references are dropped as soon as
278 /// possible, especially in single threaded contexts.
279 #[inline]
280 #[must_use]
281 pub fn get_with_exclusions(
282 &self,
283 item: &[u8],
284 tags: &DashSet<ExclusionTags>,
285 ) -> Option<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
286 self.get_internal(item, Some(tags))
287 }
288
289 /// Returns a node that is responsible for the provided item. This lookup
290 /// has asymptotic complexity of `O(m * n^2)`, where `m` is the number of
291 /// nodes, and `n` is the number of tags given. As a result, callers should
292 /// generally use a small number of tags, either on node restrictions or on
293 /// the provided tags for best performance.
294 ///
295 /// # Safety
296 ///
297 /// This method may deadlock if called when holding a mutable reference into
298 /// the map. Callers must ensure that references are dropped as soon as
299 /// possible, especially in single threaded contexts.
300 fn get_internal(
301 &self,
302 item: &[u8],
303 tags: Option<&DashSet<ExclusionTags>>,
304 ) -> Option<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
305 self.nodes
306 .iter()
307 .filter(|entry| {
308 !entry.value().exclusions.iter().any(|exclusions| {
309 tags.as_ref()
310 .map(|set| set.contains(&exclusions))
311 .unwrap_or_default()
312 })
313 })
314 .max_by(|left, right| f64_total_ordering(left.score(item), right.score(item)))
315 }
316
317 /// Returns the top `n` nodes that are responsible for the provided item
318 /// with no exclusions. This lookup is performed in serial. Consider
319 /// [`par_get_n`](#par_get_n) that uses rayon to parallelize the lookup. If
320 /// `n` is greater than the number of nodes, then all nodes are returned, in
321 /// order of score.
322 ///
323 /// # Safety
324 ///
325 /// This method may deadlock if called when holding a mutable reference into
326 /// the map. Callers must ensure that references are dropped as soon as
327 /// possible, especially in single threaded contexts.
328 #[inline]
329 #[must_use]
330 pub fn get_n(
331 &self,
332 item: &[u8],
333 n: usize,
334 ) -> Vec<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
335 self.get_n_internal(item, n, None)
336 }
337
338 /// Like [`get_n`](#get_n), but allows the caller to provide a list of
339 /// tags, which nodes that exclude those tags will be skipped.
340 ///
341 /// # Safety
342 ///
343 /// This method may deadlock if called when holding a mutable reference into
344 /// the map. Callers must ensure that references are dropped as soon as
345 /// possible, especially in single threaded contexts.
346 #[inline]
347 #[must_use]
348 pub fn get_n_with_exclusions(
349 &self,
350 item: &[u8],
351 n: usize,
352 tags: &DashSet<ExclusionTags>,
353 ) -> Vec<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
354 self.get_n_internal(item, n, Some(tags))
355 }
356
357 /// Returns a node that is responsible for the provided item. This lookup
358 /// has asymptotic complexity of `O(log(m) * n^2)`, where `m` is the number
359 /// of nodes, and `n` is the number of tags given. As a result, callers
360 /// should generally use a small number of tags, either on node restrictions
361 /// or on the provided tags for best performance.
362 ///
363 /// # Safety
364 ///
365 /// This method may deadlock if called when holding a mutable reference into
366 /// the map. Callers must ensure that references are dropped as soon as
367 /// possible, especially in single threaded contexts.
368 fn get_n_internal(
369 &self,
370 item: &[u8],
371 n: usize,
372 tags: Option<&DashSet<ExclusionTags>>,
373 ) -> Vec<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
374 let mut nodes = self
375 .nodes
376 .iter()
377 .filter(|entry| {
378 !entry.value().exclusions.iter().any(|exclusions| {
379 tags.as_ref()
380 .map(|set| set.contains(&exclusions))
381 .unwrap_or_default()
382 })
383 })
384 .collect::<Vec<_>>();
385
386 nodes.sort_unstable_by(|left, right| {
387 f64_total_ordering(left.score(item), right.score(item))
388 });
389
390 nodes.truncate(n);
391 nodes
392 }
393}
394
395impl<ExclusionTags, Metadata> NodeSelection<ExclusionTags, Metadata>
396where
397 ExclusionTags: Send + Sync + Hash + Eq,
398 Metadata: Send + Sync,
399{
400 /// Finds a node that is responsible for the provided item, filtering out
401 /// nodes that refuse to accept the provided tags. This lookup is performed
402 /// in parallel. Requires the `rayon` feature.
403 #[must_use]
404 #[cfg(feature = "rayon")]
405 pub fn par_get(
406 &self,
407 item: &[u8],
408 tags: Option<&DashSet<ExclusionTags>>,
409 ) -> Option<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
410 self.nodes
411 .par_iter()
412 .filter(|node| {
413 !node.exclusions.par_iter().any(|exclusions| {
414 tags.as_ref()
415 .map(|set| set.contains(&exclusions))
416 .unwrap_or_default()
417 })
418 })
419 .max_by(|left, right| f64_total_ordering(left.score(item), right.score(item)))
420 }
421
422 /// Finds `n` nodes that is responsible for the provided item, filtering out
423 /// nodes that refuse to accept the provided tags. This lookup is performed
424 /// in parallel. Requires the `rayon` feature.
425 #[must_use]
426 #[cfg(feature = "rayon")]
427 pub fn par_get_n(
428 &self,
429 item: &[u8],
430 n: usize,
431 tags: Option<&DashSet<ExclusionTags>>,
432 ) -> Vec<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
433 let mut nodes = self
434 .nodes
435 .par_iter()
436 .filter(|entry| {
437 !entry.value().exclusions.iter().any(|exclusions| {
438 tags.as_ref()
439 .map(|set| set.contains(&exclusions))
440 .unwrap_or_default()
441 })
442 })
443 .collect::<Vec<_>>();
444
445 nodes.par_sort_unstable_by(|left, right| {
446 f64_total_ordering(left.score(item), right.score(item))
447 });
448
449 nodes.truncate(n);
450 nodes
451 }
452}
453
454/// Like [`NodeSelection`], but specialized for bitflags.
455///
456/// Usage of [`BitNodeSelection`] is recommended over [`NodeSelection`] for
457/// performance reasons, but is sometimes not possible if `ExclusionTags` does
458/// not implement [`BitAnd`].
459///
460/// In most cases, the API is identical to [`NodeSelection`] and is intuitive
461/// for the bitflag use case. The only noteworthy different is that you must
462/// use [`BitNode`] instead of [`Node`].
463///
464/// Note that `ExclusionTags` must also implement [`Default`]. This is not a
465/// problem for raw bitflags, but for types derived from the [`bitflags` crate],
466/// then you will need to implement [`Default`] on the type. This default value
467/// should represent when all flags are disabled.
468///
469/// Performance on lookups are now `O(nm)`, where `n` is the number of nodes
470/// and `m` is the cost to perform `BitAnd` on `ExclusionTags`. For bitflags,
471/// this is `O(n)`.
472///
473/// # Examples
474///
475/// ```
476/// # use tagged_rendezvous::*;
477/// # fn main() -> () {
478/// # example();
479/// # }
480/// #
481/// # fn example() -> Option<()> {
482/// use std::num::NonZeroUsize;
483/// use std::iter::FromIterator;
484///
485/// use dashmap::DashSet;
486///
487/// const FLAG_A: u8 = 0b01;
488/// const FLAG_B: u8 = 0b10;
489///
490/// let selector = BitNodeSelection::new();
491///
492/// // Add a 3 nodes, one with an exclusion tag.
493/// let exclusions = FLAG_A | FLAG_B;
494/// let node_1 = BitNode::new(NonZeroUsize::new(1)?, ());
495/// selector.add(node_1.clone());
496/// let node_2 = BitNode::with_exclusions(NonZeroUsize::new(1)?, (), exclusions);
497/// selector.add(node_2.clone());
498/// let node_3 = BitNode::new(NonZeroUsize::new(1)?, ());
499/// selector.add(node_3.clone());
500///
501/// // There's a 33% chance of selecting any node, if no tags are provided.
502/// selector.get(b"hello world")?;
503///
504/// // There's a equal chance of getting node 1 or node 3 since node 2 is
505/// // excluded.
506/// selector.get_with_exclusions(b"hello world", exclusions)?;
507/// # None
508/// # }
509/// ```
510///
511/// More examples (albeit for a more general use case) can be found in
512/// [`NodeSelection`].
513///
514/// [`bitflags` crate]: https://docs.rs/bitflags/
515#[derive(Clone, Debug)]
516pub struct BitNodeSelection<ExclusionTags: Hash + Eq + BitAnd, Metadata> {
517 /// The list of nodes to select from.
518 nodes: DashMap<NodeId, BitNode<ExclusionTags, Metadata>>,
519}
520
521impl<ExclusionTags, Metadata> BitNodeSelection<ExclusionTags, Metadata>
522where
523 ExclusionTags: Hash + Eq + BitAnd<Output = ExclusionTags> + Copy + Default,
524{
525 /// Finds a node that is responsible for the provided item with no
526 /// exclusions. This lookup is performed in serial. Consider
527 /// [`par_get`](#par_get) that uses rayon to parallelize the
528 /// lookup.
529 ///
530 /// # Safety
531 ///
532 /// This method may deadlock if called when holding a mutable reference into
533 /// the map. Callers must ensure that references are dropped as soon as
534 /// possible, especially in single threaded contexts.
535 #[inline]
536 #[must_use]
537 pub fn get(&self, item: &[u8]) -> Option<RefMulti<NodeId, BitNode<ExclusionTags, Metadata>>> {
538 self.get_with_exclusions(item, ExclusionTags::default())
539 }
540
541 /// Like [`get`](#get), but allows the caller to provide a list of
542 /// tags, which nodes that exclude those tags will be skipped.
543 ///
544 /// # Safety
545 ///
546 /// This method may deadlock if called when holding a mutable reference into
547 /// the map. Callers must ensure that references are dropped as soon as
548 /// possible, especially in single threaded contexts.
549 #[inline]
550 #[must_use]
551 pub fn get_with_exclusions(
552 &self,
553 item: &[u8],
554 tags: ExclusionTags,
555 ) -> Option<RefMulti<NodeId, BitNode<ExclusionTags, Metadata>>> {
556 self.nodes
557 .iter()
558 .filter(|entry| entry.value().exclusions & tags == ExclusionTags::default())
559 .max_by(|left, right| f64_total_ordering(left.score(item), right.score(item)))
560 }
561
562 /// Returns the top `n` nodes that are responsible for the provided item
563 /// with no exclusions. This lookup is performed in serial. Consider
564 /// [`par_get_n`](#par_get_n) that uses rayon to parallelize the lookup. If
565 /// `n` is greater than the number of nodes, then all nodes are returned, in
566 /// order of score.
567 ///
568 /// # Safety
569 ///
570 /// This method may deadlock if called when holding a mutable reference into
571 /// the map. Callers must ensure that references are dropped as soon as
572 /// possible, especially in single threaded contexts.
573 #[inline]
574 #[must_use]
575 pub fn get_n(
576 &self,
577 item: &[u8],
578 n: usize,
579 ) -> Vec<RefMulti<NodeId, BitNode<ExclusionTags, Metadata>>> {
580 self.get_n_with_exclusions(item, n, ExclusionTags::default())
581 }
582
583 /// Like [`get_n`](#get_n), but allows the caller to provide a list of
584 /// tags, which nodes that exclude those tags will be skipped.
585 ///
586 /// # Safety
587 ///
588 /// This method may deadlock if called when holding a mutable reference into
589 /// the map. Callers must ensure that references are dropped as soon as
590 /// possible, especially in single threaded contexts.
591 #[inline]
592 #[must_use]
593 pub fn get_n_with_exclusions(
594 &self,
595 item: &[u8],
596 n: usize,
597 tags: ExclusionTags,
598 ) -> Vec<RefMulti<NodeId, BitNode<ExclusionTags, Metadata>>> {
599 let mut nodes = self
600 .nodes
601 .iter()
602 .filter(|entry| entry.value().exclusions & tags == ExclusionTags::default())
603 .collect::<Vec<_>>();
604
605 nodes.sort_unstable_by(|left, right| {
606 f64_total_ordering(left.score(item), right.score(item))
607 });
608
609 nodes.truncate(n);
610 nodes
611 }
612}
613
614impl<ExclusionTags, Metadata> BitNodeSelection<ExclusionTags, Metadata>
615where
616 ExclusionTags: Send + Sync + Hash + Eq + BitAnd<Output = ExclusionTags> + Default + Copy,
617 Metadata: Send + Sync,
618{
619 /// Finds a node that is responsible for the provided item, filtering out
620 /// nodes that refuse to accept the provided tags. This lookup is performed
621 /// in parallel. Requires the `rayon` feature.
622 #[cfg(feature = "rayon")]
623 pub fn par_get(
624 &self,
625 item: &[u8],
626 tags: ExclusionTags,
627 ) -> Option<RefMulti<NodeId, BitNode<ExclusionTags, Metadata>>> {
628 self.nodes
629 .par_iter()
630 .filter(|node| node.exclusions & tags == ExclusionTags::default())
631 .max_by(|left, right| f64_total_ordering(left.score(item), right.score(item)))
632 }
633
634 /// Finds `n` nodes that is responsible for the provided item, filtering out
635 /// nodes that refuse to accept the provided tags. This lookup is performed
636 /// in parallel. Requires the `rayon` feature.
637 #[must_use]
638 #[cfg(feature = "rayon")]
639 pub fn par_get_n(
640 &self,
641 item: &[u8],
642 n: usize,
643 tags: ExclusionTags,
644 ) -> Vec<RefMulti<NodeId, BitNode<ExclusionTags, Metadata>>> {
645 let mut nodes = self
646 .nodes
647 .par_iter()
648 .filter(|node| node.exclusions & tags == ExclusionTags::default())
649 .collect::<Vec<_>>();
650
651 nodes.par_sort_unstable_by(|left, right| {
652 f64_total_ordering(left.score(item), right.score(item))
653 });
654
655 nodes.truncate(n);
656 nodes
657 }
658}
659
660/// Generate a node and node selection implementation based on the provided
661/// bounds.
662macro_rules! impl_node_selection {
663 // this $bounds meme is a hack to get around the fact that we can't
664 // represent trait bounds in macros.
665 ($struct_name:ident, $node:ident $(: $bounds:path )?) => {
666 impl<ExclusionTags, Metadata> $struct_name<ExclusionTags, Metadata>
667 where
668 ExclusionTags: Hash + Eq + $($bounds)*,
669 {
670 /// Constructs a new empty instance.
671 #[inline]
672 #[must_use]
673 pub fn new() -> Self {
674 Self::default()
675 }
676
677 /// Constructs a new `$struct_name`, ensuring that at least
678 /// `capacity` nodes can be added without re-allocating.
679 #[inline]
680 #[must_use]
681 pub fn with_capacity(capacity: usize) -> Self {
682 Self {
683 nodes: DashMap::with_capacity(capacity),
684 }
685 }
686
687 /// Adds a new node to the selection with an opaque ID. If lookups
688 /// are cached, then callers must invalidate the cache after adding
689 /// a node. Otherwise, it is possible to have incorrectly
690 /// distributed selections.
691 ///
692 /// # Safety
693 ///
694 /// This method may deadlock if called when holding any sort of
695 /// reference into the map. Callers must ensure that references are
696 /// dropped as soon as possible, especially in single threaded
697 /// contexts.
698 ///
699 /// # Panics
700 ///
701 /// This method will panic if the provided node has an id that is
702 /// already present in the selection. For the non-panicking version
703 /// of this method, see [`try_add_node`](#try_add_node).
704 #[inline]
705 #[must_use]
706 pub fn add(&self, node: $node<ExclusionTags, Metadata>) -> NodeId {
707 let id = NodeId::new_opaque();
708 self.add_with_id(id, node);
709 id
710 }
711
712 /// Adds a new node to the selection with the provided ID. If
713 /// lookups are cached, then callers must invalidate the cache after
714 /// adding a node. Otherwise, it is possible to have incorrectly
715 /// distributed selections.
716 ///
717 /// # Safety
718 ///
719 /// This method may deadlock if called when holding any sort of
720 /// reference into the map. Callers must ensure that references are
721 /// dropped as soon as possible, especially in single threaded
722 /// contexts.
723 ///
724 /// # Panics
725 ///
726 /// This method will panic if the provided node has an id that is
727 /// already present in the selection. For the non-panicking version
728 /// of this method, see [`try_add_node`](#try_add_node).
729 #[inline]
730 pub fn add_with_id(&self, id: NodeId, node: $node<ExclusionTags, Metadata>) {
731 if self.nodes.insert(id, node).is_some() {
732 panic!("Node with duplicate id added: {}", id);
733 }
734 }
735
736 /// Tries to add the node to the selection with an opaque ID. If
737 /// lookups are cached, then callers must invalidate the cache after
738 /// adding a node. Otherwise, it is possible to have incorrectly
739 /// distributed selections.
740 ///
741 /// # Safety
742 ///
743 /// This method may deadlock if called when holding any sort of
744 /// reference into the map. Callers must ensure that references are
745 /// dropped as soon as possible, especially in single threaded
746 /// contexts.
747 ///
748 /// # Errors
749 ///
750 /// This method will fail if the node has an id that is already
751 /// present in the selection.
752 #[inline]
753 pub fn try_add(
754 &self,
755 node: $node<ExclusionTags, Metadata>,
756 ) -> Result<NodeId, DuplicateIdError> {
757 let id = NodeId::new_opaque();
758 self.try_add_with_id(id, node)?;
759 Ok(id)
760 }
761
762 /// Tries to add the node to the selection with the provided ID. If
763 /// lookups are cached, then callers must invalidate the cache after
764 /// adding a node. Otherwise, it is possible to have incorrectly
765 /// distributed selections.
766 ///
767 /// # Safety
768 ///
769 /// This method may deadlock if called when holding any sort of
770 /// reference into the map. Callers must ensure that references are
771 /// dropped as soon as possible, especially in single threaded
772 /// contexts.
773 ///
774 /// # Errors
775 ///
776 /// This method will fail if the node has an id that is already
777 /// present in the selection.
778 #[inline]
779 pub fn try_add_with_id(
780 &self,
781 id: NodeId,
782 node: $node<ExclusionTags, Metadata>,
783 ) -> Result<(), DuplicateIdError> {
784 if self.nodes.contains_key(&id) {
785 Err(DuplicateIdError(id))
786 } else {
787 self.nodes.insert(id, node);
788 Ok(())
789 }
790 }
791
792 /// Returns a mutable reference to the node given some ID. This is
793 /// useful for modifying the weight of a node. If lookups are
794 /// cached, then callers must invalidate the cache after adding a
795 /// node. Otherwise, it is possible to have incorrectly distributed
796 /// selections.
797 ///
798 /// # Safety
799 ///
800 /// This method may deadlock if called when holding any sort of
801 /// reference
802 /// into the map. Callers must ensure that references are dropped as
803 /// soon as possible, especially in single threaded contexts.
804 #[inline]
805 #[must_use]
806 pub fn get_mut(
807 &self,
808 id: NodeId,
809 ) -> Option<RefMut<NodeId, $node<ExclusionTags, Metadata>>> {
810 self.nodes.get_mut(&id)
811 }
812
813 /// Removes a node, returning the node if it existed. If lookups are
814 /// cached, then callers must invalidate the cache after
815 /// successfully removing a node. Otherwise, it is possible to have
816 /// incorrectly distributed selections.
817 ///
818 /// # Safety
819 ///
820 /// This method may deadlock if called when holding any sort of
821 /// reference into the map. Callers must ensure that references are
822 /// dropped as soon as possible, especially in single threaded
823 /// contexts.
824 #[inline]
825 #[must_use]
826 pub fn remove(&self, id: NodeId) -> Option<$node<ExclusionTags, Metadata>> {
827 self.nodes.remove(&id).map(|(_id, node)| node)
828 }
829
830 /// Returns if the node is selectable from this selector.
831 ///
832 /// # Safety
833 ///
834 /// This method may deadlock if called when holding a mutable
835 /// reference into the map. Callers must ensure that references are
836 /// dropped as soon as possible, especially in single threaded
837 /// contexts.
838 #[inline]
839 #[must_use]
840 pub fn contains(&self, id: NodeId) -> bool {
841 self.nodes.contains_key(&id)
842 }
843 }
844
845 impl<ExclusionTags, Metadata> Default for $struct_name<ExclusionTags, Metadata>
846 where
847 ExclusionTags: Hash + Eq + $($bounds)*,
848 {
849 #[inline]
850 fn default() -> Self {
851 Self {
852 nodes: DashMap::new(),
853 }
854 }
855 }
856
857 impl<ExclusionTags, Metadata> Extend<(NodeId, $node<ExclusionTags, Metadata>)>
858 for $struct_name<ExclusionTags, Metadata>
859 where
860 ExclusionTags: Hash + Eq + $($bounds)*,
861 {
862 #[inline]
863 fn extend<T: IntoIterator<Item = (NodeId, $node<ExclusionTags, Metadata>)>>(
864 &mut self,
865 iter: T,
866 ) {
867 self.nodes.extend(iter);
868 }
869 }
870
871 impl<ExclusionTags, Metadata> FromIterator<(NodeId, $node<ExclusionTags, Metadata>)>
872 for $struct_name<ExclusionTags, Metadata>
873 where
874 ExclusionTags: Hash + Eq + $($bounds)*,
875 {
876 #[inline]
877 fn from_iter<T: IntoIterator<Item = (NodeId, $node<ExclusionTags, Metadata>)>>(
878 iter: T,
879 ) -> Self {
880 Self {
881 nodes: DashMap::from_iter(iter),
882 }
883 }
884 }
885
886 #[cfg(feature = "rayon")]
887 impl<ExclusionTags, Metadata> FromParallelIterator<(NodeId, $node<ExclusionTags, Metadata>)>
888 for $struct_name<ExclusionTags, Metadata>
889 where
890 ExclusionTags: Send + Sync + Hash + Eq + $($bounds)*,
891 Metadata: Send + Sync,
892 {
893 #[inline]
894 fn from_par_iter<I>(into_iter: I) -> Self
895 where
896 I: IntoParallelIterator<Item = (NodeId, $node<ExclusionTags, Metadata>)>,
897 {
898 Self {
899 nodes: DashMap::from_par_iter(into_iter),
900 }
901 }
902 }
903
904 impl<ExclusionTags, Metadata> IntoIterator for $struct_name<ExclusionTags, Metadata>
905 where
906 ExclusionTags: Hash + Eq + $($bounds)*,
907 {
908 type Item = (NodeId, $node<ExclusionTags, Metadata>);
909 type IntoIter =
910 <DashMap<NodeId, $node<ExclusionTags, Metadata>> as IntoIterator>::IntoIter;
911
912 #[inline]
913 fn into_iter(self) -> Self::IntoIter {
914 self.nodes.into_iter()
915 }
916 }
917
918 #[cfg(feature = "rayon")]
919 impl<ExclusionTags, Metadata> IntoParallelIterator for $struct_name<ExclusionTags, Metadata>
920 where
921 ExclusionTags: Send + Sync + Hash + Eq + $($bounds)*,
922 Metadata: Send + Sync,
923 {
924 type Item = <Self as IntoIterator>::Item;
925 type Iter =
926 <DashMap<NodeId, $node<ExclusionTags, Metadata>> as IntoParallelIterator>::Iter;
927
928 #[inline]
929 fn into_par_iter(self) -> <Self as IntoParallelIterator>::Iter {
930 self.nodes.into_par_iter()
931 }
932 }
933
934 #[cfg(feature = "rayon")]
935 impl<ExclusionTags, Metadata> ParallelExtend<(NodeId, $node<ExclusionTags, Metadata>)>
936 for $struct_name<ExclusionTags, Metadata>
937 where
938 ExclusionTags: Send + Sync + Hash + Eq + $($bounds)*,
939 Metadata: Send + Sync,
940 {
941 #[inline]
942 fn par_extend<I>(&mut self, extendable: I)
943 where
944 I: IntoParallelIterator<Item = (NodeId, $node<ExclusionTags, Metadata>)>,
945 {
946 self.nodes.par_extend(extendable);
947 }
948 }
949 };
950}
951
952impl_node_selection!(NodeSelection, Node);
953
954impl_node_selection!(BitNodeSelection, BitNode: BitAnd);
955
956/// Generates a node implementation based on the provided node and its bounds.
957macro_rules! impl_node {
958 // this $bounds meme is a hack to get around the fact that we can't
959 // represent trait bounds in macros.
960 ($struct_name:ident, $excludes_type:ty $(: $bounds:path )?) => {
961 impl<ExclusionTags, Metadata> $struct_name<ExclusionTags, Metadata>
962 where
963 ExclusionTags: Hash + Eq + $($bounds)*,
964 {
965 /// Constructs a new node with an opaque ID, using the provided
966 /// metadata and exclusions.
967 ///
968 /// `weight` should not exceed 2^52. `weight` is internally casted
969 /// as a [`f64`], and loss of precision may occur if `weight`
970 /// exceeds [`f64`]'s mantissa of 52 bits. This should not be a
971 /// problem for most use cases.
972 #[inline]
973 #[must_use]
974 pub fn with_exclusions(
975 weight: NonZeroUsize,
976 metadata: Metadata,
977 exclusions: $excludes_type,
978 ) -> Self {
979 Self {
980 seed: rand::random(),
981 weight,
982 exclusions,
983 metadata,
984 }
985 }
986
987 /// Constructs a new node from its components.
988 ///
989 /// `weight` should not exceed 2^52. `weight` is internally casted
990 /// as a [`f64`], and loss of precision may occur if `weight`
991 /// exceeds [`f64`]'s mantissa of 52 bits. This should not be a
992 /// problem for most use cases.
993 #[inline]
994 #[must_use]
995 pub fn from_parts<Rng: rand::Rng>(
996 rng: &mut Rng,
997 weight: NonZeroUsize,
998 exclusions: $excludes_type,
999 metadata: Metadata,
1000 ) -> Self {
1001 Self {
1002 weight,
1003 seed: rng.gen(),
1004 exclusions,
1005 metadata,
1006 }
1007 }
1008
1009 /// Calculates the score of a node for a given input. This
1010 /// implements Murmur3 hash alongside highest random weight hashing,
1011 /// also known as Rendezvous hashing.
1012 fn score(&self, item: &[u8]) -> f64 {
1013 // truncation is intended
1014 #[allow(clippy::cast_possible_truncation)]
1015 let hash =
1016 Hash64(murmur3_x64_128(&mut Cursor::new(item), self.seed).unwrap() as u64)
1017 .as_normalized_float();
1018 let score = 1.0 / -hash.ln();
1019 // This is documented in construction of a node
1020 #[allow(clippy::cast_precision_loss)]
1021 {
1022 self.weight.get() as f64 * score
1023 }
1024 }
1025
1026 /// Sets the weight of the node.
1027 #[inline]
1028 pub fn set_weight(&mut self, weight: NonZeroUsize) {
1029 self.weight = weight;
1030 }
1031
1032 /// Fetches the associated data with the node.
1033 #[inline]
1034 pub const fn data(&self) -> &Metadata {
1035 &self.metadata
1036 }
1037 }
1038
1039 impl<ExclusionTags, Metadata> Hash for $struct_name<ExclusionTags, Metadata>
1040 where
1041 ExclusionTags: Hash + Eq + $($bounds)*,
1042 Metadata: Hash,
1043 {
1044 #[inline]
1045 fn hash<H: Hasher>(&self, state: &mut H) {
1046 self.weight.hash(state);
1047 self.seed.hash(state);
1048 self.metadata.hash(state);
1049 }
1050 }
1051
1052 impl<ExclusionTags, Metadata> PartialEq for $struct_name<ExclusionTags, Metadata>
1053 where
1054 ExclusionTags: Hash + Eq + $($bounds)*,
1055 Metadata: PartialEq,
1056 {
1057 #[inline]
1058 fn eq(&self, other: &Self) -> bool {
1059 self.weight == other.weight
1060 && self.seed == other.seed
1061 && self.metadata == other.metadata
1062 }
1063 }
1064
1065 impl<ExclusionTags, Metadata> Eq for $struct_name<ExclusionTags, Metadata>
1066 where
1067 ExclusionTags: Hash + Eq + $($bounds)*,
1068 Metadata: Eq,
1069 {
1070 }
1071
1072 impl<ExclusionTags, Metadata> PartialOrd for $struct_name<ExclusionTags, Metadata>
1073 where
1074 ExclusionTags: Hash + Eq + $($bounds)*,
1075 Metadata: PartialOrd,
1076 {
1077 #[inline]
1078 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1079 match self.metadata.partial_cmp(&other.metadata) {
1080 None | Some(Ordering::Equal) => match self.weight.cmp(&other.weight) {
1081 Ordering::Equal => Some(self.seed.cmp(&other.seed)),
1082 cmp => Some(cmp),
1083 },
1084 cmp => cmp,
1085 }
1086 }
1087 }
1088
1089 impl<ExclusionTags, Metadata> Ord for $struct_name<ExclusionTags, Metadata>
1090 where
1091 ExclusionTags: Hash + Eq + $($bounds)*,
1092 Metadata: Ord,
1093 {
1094 #[inline]
1095 fn cmp(&self, other: &Self) -> Ordering {
1096 match self.metadata.cmp(&other.metadata) {
1097 Ordering::Equal => match self.weight.cmp(&other.weight) {
1098 Ordering::Equal => self.seed.cmp(&other.seed),
1099 cmp => cmp,
1100 },
1101 cmp => cmp,
1102 }
1103 }
1104 }
1105 };
1106}
1107
1108/// A representation of logical node in some system.
1109///
1110/// Each node must contain a weight score, which is used to determine how much
1111/// traffic a node receives. This score is relative to the all other weights in
1112/// the node. For example, if you only had nodes with weights of 2 and 4, then
1113/// the nodes have a 33% and 66% chance of being selected. The important part is
1114/// the relative ratio of weights, not the absolute values. If it's preferred to
1115/// have an equal distribution of traffic, then the weights should be equal.
1116///
1117/// A node is generic over two types: `ExclusionTags` and `Metadata`.
1118/// `ExclusionTags` is a type that represents a set of tags that a node declares
1119/// to not accept. `Metadata` is a type that represents any additional data
1120/// that a server may need to store about a node, such as labels or a list of
1121/// IP addresses. The most basic node is where both types are the [`unit`] type.
1122///
1123/// Additionally, all nodes must have some unique identifier associated with it.
1124/// [`NodeId`] is a type that represents this unique identifier, and is only
1125/// used for [`NodeSelection`] to identify the node.
1126///
1127/// # Examples
1128///
1129/// The simplest example of a node is a node that accepts all traffic and
1130/// carries no additional metadata.
1131///
1132/// ```
1133/// # use tagged_rendezvous::*;
1134/// # fn main() -> () {
1135/// # example();
1136/// # }
1137/// #
1138/// # fn example() -> Option<()> {
1139/// use std::num::NonZeroUsize;
1140///
1141/// // Generates a node with an opaque id and a weight of 4.
1142/// let node_small = Node::<(), _>::new(NonZeroUsize::new(4)?, ());
1143/// assert_eq!(*node_small.data(), ());
1144/// # None
1145/// # }
1146/// ```
1147///
1148/// Using some metadata, we can get some information that we can use to
1149/// communicate with the node.
1150///
1151/// ```
1152/// # use tagged_rendezvous::*;
1153/// # fn main() -> () {
1154/// # example();
1155/// # }
1156/// #
1157/// # fn example() -> Option<()> {
1158/// use std::net::Ipv4Addr;
1159/// use std::num::NonZeroUsize;
1160///
1161/// // Note that no bounds are on the metadata, but we derive some to make it
1162/// // easier to test.
1163/// #[derive(Debug, PartialEq)]
1164/// struct Metadata {
1165/// ip_address: Ipv4Addr,
1166/// }
1167///
1168/// // Generates a node with an opaque id, a weight of 4, and with a metadata
1169/// // object with an IP address.
1170/// let metadata = Metadata { ip_address: Ipv4Addr::new(10, 0, 0, 1) };
1171/// let node_small = Node::<(), _>::new(NonZeroUsize::new(4)?, metadata);
1172/// assert_eq!(node_small.data().ip_address, Ipv4Addr::new(10, 0, 0, 1));
1173/// # None
1174/// # }
1175/// ```
1176///
1177/// Finally, you can define a type to use a set of exclusions for a node.
1178///
1179/// ```
1180/// # use tagged_rendezvous::*;
1181/// # fn main() -> () {
1182/// # example();
1183/// # }
1184/// #
1185/// # fn example() -> Option<()> {
1186/// use std::iter::FromIterator;
1187/// use std::net::Ipv4Addr;
1188/// use std::num::NonZeroUsize;
1189///
1190/// use dashmap::DashSet;
1191///
1192/// #[derive(PartialEq, Eq, Hash)]
1193/// enum Exclusions {
1194/// Foo,
1195/// Bar,
1196/// }
1197///
1198/// // Generates a node with an opaque id, a weight of 4, and refusing to
1199/// // accept traffic with the `Foo` tag.
1200/// let exclusions = DashSet::from_iter([Exclusions::Foo]);
1201/// let node_small = Node::with_exclusions(NonZeroUsize::new(4)?, (), exclusions);
1202/// # None
1203/// # }
1204/// ```
1205#[derive(Clone, Debug)]
1206pub struct Node<ExclusionTags: Hash + Eq, Metadata> {
1207 /// The weight of a node. This is a relative value, and is used to determine
1208 /// how much traffic a node receives.
1209 weight: NonZeroUsize,
1210 /// This random seed value is used as a way to differentiate nodes. This is
1211 /// used to act as a seed value for the hash function, so that two nodes
1212 /// don't have the same score before weights.
1213 seed: u32,
1214 /// A list of items that a node declares to not accept.
1215 exclusions: DashSet<ExclusionTags>,
1216 /// Associated data. This is generic, so it can be used for any type,
1217 /// including `Option<T>`.
1218 metadata: Metadata,
1219}
1220
1221impl<ExclusionTags, Metadata> Node<ExclusionTags, Metadata>
1222where
1223 ExclusionTags: Hash + Eq,
1224{
1225 /// Constructs a new node with an opaque ID and no exclusions.
1226 ///
1227 /// `weight` should not exceed 2^52. `weight` is internally casted as a
1228 /// [`f64`], and loss of precision may occur if `weight` exceeds [`f64`]'s
1229 /// mantissa of 52 bits.This should not be a problem for most use cases.
1230 #[inline]
1231 #[must_use]
1232 pub fn new(weight: NonZeroUsize, metadata: Metadata) -> Self {
1233 Self {
1234 seed: rand::random(),
1235 weight,
1236 exclusions: DashSet::new(),
1237 metadata,
1238 }
1239 }
1240}
1241
1242impl<ExclusionTags, Metadata> Node<ExclusionTags, Metadata>
1243where
1244 ExclusionTags: Hash + Eq,
1245 Metadata: Default,
1246{
1247 /// Constructs a new node with an opaque ID and no exclusions, using the
1248 /// default implementation of `Metadata`.
1249 ///
1250 /// `weight` should not exceed 2^52. `weight` is internally casted as a
1251 /// [`f64`], and loss of precision may occur if `weight` exceeds [`f64`]'s
1252 /// mantissa of 52 bits.This should not be a problem for most use cases.
1253 #[inline]
1254 #[must_use]
1255 pub fn with_default(weight: NonZeroUsize) -> Self {
1256 Self::new(weight, Metadata::default())
1257 }
1258}
1259
1260impl_node!(Node, DashSet<ExclusionTags>);
1261
1262/// Like [`Node`], but optimized for bitflags.
1263///
1264/// Note that `ExclusionTags` must also implement [`Default`]. This is not a
1265/// problem for raw bitflags, but for types derived from the [`bitflags` crate],
1266/// then you will need to implement [`Default`] on the type. This default value
1267/// should represent when all flags are disabled.
1268///
1269/// [`bitflags` crate]: https://docs.rs/bitflags/
1270#[derive(Clone, Debug)]
1271pub struct BitNode<ExclusionTags: Hash + Eq + BitAnd, Metadata> {
1272 /// The weight of a node. This is a relative value, and is used to determine
1273 /// how much traffic a node receives.
1274 weight: NonZeroUsize,
1275 /// This random seed value is used as a way to differentiate nodes. This is
1276 /// used to act as a seed value for the hash function, so that two nodes
1277 /// don't have the same score before weights.
1278 seed: u32,
1279 /// A list of items that a node declares to not accept.
1280 exclusions: ExclusionTags,
1281 /// Associated data. This is generic, so it can be used for any type,
1282 /// including `Option<T>`.
1283 metadata: Metadata,
1284}
1285
1286impl<ExclusionTags, Metadata> BitNode<ExclusionTags, Metadata>
1287where
1288 ExclusionTags: Hash + Eq + BitAnd + Default,
1289{
1290 /// Constructs a new node with an opaque ID and no exclusions.
1291 ///
1292 /// `weight` should not exceed 2^52. `weight` is internally casted as a
1293 /// [`f64`], and loss of precision may occur if `weight` exceeds [`f64`]'s
1294 /// mantissa of 52 bits. This should not be a problem for most use cases.
1295 #[inline]
1296 #[must_use]
1297 pub fn new(weight: NonZeroUsize, metadata: Metadata) -> Self {
1298 Self {
1299 seed: rand::random(),
1300 weight,
1301 exclusions: Default::default(),
1302 metadata,
1303 }
1304 }
1305}
1306
1307impl<ExclusionTags, Metadata> BitNode<ExclusionTags, Metadata>
1308where
1309 ExclusionTags: Hash + Eq + BitAnd + Default,
1310 Metadata: Default,
1311{
1312 /// Constructs a new node with an opaque ID and no exclusions, using the
1313 /// default implementation of `Metadata`.
1314 ///
1315 /// `weight` should not exceed 2^52. `weight` is internally casted as a
1316 /// [`f64`], and loss of precision may occur if `weight` exceeds [`f64`]'s
1317 /// mantissa of 52 bits.This should not be a problem for most use cases.
1318 #[inline]
1319 #[must_use]
1320 pub fn with_default(weight: NonZeroUsize) -> Self {
1321 Self::new(weight, Metadata::default())
1322 }
1323}
1324
1325impl_node!(BitNode, ExclusionTags: BitAnd);
1326
1327/// A representation of a 64 bit hash value.
1328struct Hash64(u64);
1329
1330impl Hash64 {
1331 /// Returns a value from [0, 1).
1332 fn as_normalized_float(&self) -> f64 {
1333 /// This is used to mask out the highest 11 bits of a [`f64`].
1334 const FIFTY_THREE_ONES: u64 = u64::MAX >> (u64::BITS - 53);
1335 let fifty_three_zeros: f64 = f64::from_bits((1_u64) << 53);
1336 f64::from_bits(self.0 & FIFTY_THREE_ONES) / fifty_three_zeros
1337 }
1338}
1339
1340/// The counter used for opaque node ids. This is just incrementally updated,
1341/// which should solve uniqueness issues. If there is a use case to generate
1342/// more than [`usize`] IDs, then please file an issue.
1343static NODE_ID_COUNTER: AtomicUsize = AtomicUsize::new(0);
1344
1345/// An opaque representation of a node ID.
1346#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
1347pub struct NodeId(usize);
1348
1349impl NodeId {
1350 /// Generates a new ID from the given number. This id must be unique, and is
1351 /// used as the sole determining factor for equality and ordering of nodes.
1352 /// This should only be used when some sort of unique identifier is already
1353 /// present, or when there needs to be finer control over the internal id
1354 /// used. Generally, this is not the case, and one should prefer to use
1355 /// [`new_opaque`](#new_opaque) instead.
1356 #[inline]
1357 #[must_use]
1358 pub const fn new(id: usize) -> Self {
1359 Self(id)
1360 }
1361
1362 /// Generates a new node with an opaque ID. This should be used and
1363 /// preferred over [`new`](#new) when the internal node ID is not important.
1364 #[inline]
1365 #[must_use]
1366 pub fn new_opaque() -> Self {
1367 Self::new(NODE_ID_COUNTER.fetch_add(1, AtomicOrdering::Relaxed))
1368 }
1369}
1370
1371impl Display for NodeId {
1372 #[inline]
1373 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1374 write!(f, "{}", self.0)
1375 }
1376}
1377
1378#[cfg(test)]
1379mod hash64 {
1380 use crate::Hash64;
1381
1382 #[test]
1383 fn normalized_float() {
1384 // These numbers are tested against the python implementation
1385 assert_eq!(
1386 Hash64(0x1234567890abcdef).as_normalized_float(),
1387 0.6355555368049276
1388 );
1389
1390 assert_eq!(
1391 Hash64(0xffffffffffffff).as_normalized_float(),
1392 0.9999999999999999
1393 );
1394 }
1395}
1396
1397/// wait for [`f64::total_cmp`] to be stabilized. Until then, use the actual
1398/// implementation. See the [GitHub issue] for more information.
1399///
1400/// [Github issue]: https://github.com/rust-lang/rust/issues/72599
1401#[allow(clippy::cast_sign_loss, clippy::cast_possible_wrap)]
1402fn f64_total_ordering(left: f64, right: f64) -> Ordering {
1403 let mut left = left.to_bits() as i64;
1404 let mut right = right.to_bits() as i64;
1405 left ^= (((left >> 63) as u64) >> 1) as i64;
1406 right ^= (((right >> 63) as u64) >> 1) as i64;
1407 left.cmp(&right)
1408}
1409
1410#[cfg(test)]
1411mod node_selection {
1412 use super::*;
1413
1414 #[should_panic]
1415 #[test]
1416 fn duplicate_id_will_panic() {
1417 let selector = NodeSelection::<(), ()>::new();
1418 let id = unsafe { NonZeroUsize::new_unchecked(1) };
1419 selector.add_with_id(NodeId::new(0), Node::with_default(id));
1420 selector.add_with_id(NodeId::new(0), Node::with_default(id));
1421 }
1422}
1423
1424#[cfg(test)]
1425mod node_selection_no_exclusions {
1426 use std::collections::BTreeMap;
1427
1428 use super::*;
1429
1430 #[test]
1431 fn sanity_check_weighted() {
1432 let node_selection = NodeSelection::<(), ()>::new();
1433
1434 let mut nodes: BTreeMap<_, usize> = unsafe {
1435 let mut map = BTreeMap::new();
1436 map.insert(Node::with_default(NonZeroUsize::new_unchecked(100)), 0);
1437 map.insert(Node::with_default(NonZeroUsize::new_unchecked(200)), 0);
1438 map.insert(Node::with_default(NonZeroUsize::new_unchecked(300)), 0);
1439 map
1440 };
1441
1442 for node in nodes.keys() {
1443 let _ = node_selection.add(node.clone());
1444 }
1445
1446 for _ in 0..600 {
1447 let node_selected = node_selection.get(&(rand::random::<f64>()).to_le_bytes());
1448 if let Some(node) = node_selected {
1449 let node = nodes.get_mut(node.value()).unwrap();
1450 *node += 1;
1451 }
1452 }
1453
1454 for (node, counts) in nodes {
1455 let anchor = node.weight.get() as usize;
1456 let range = (anchor - 50)..(anchor + 50);
1457 assert!(range.contains(&counts));
1458 }
1459 }
1460
1461 #[test]
1462 fn sanity_check_unweighted() {
1463 let node_selection = NodeSelection::<(), ()>::new();
1464
1465 let mut nodes: BTreeMap<_, usize> = {
1466 let weight = unsafe { NonZeroUsize::new_unchecked(1) };
1467 let mut map = BTreeMap::new();
1468 map.insert(Node::with_default(weight), 0);
1469 map.insert(Node::with_default(weight), 0);
1470 map.insert(Node::with_default(weight), 0);
1471 map
1472 };
1473
1474 for node in nodes.keys() {
1475 let _ = node_selection.add(node.clone());
1476 }
1477
1478 for _ in 0..600 {
1479 let node_selected = node_selection.get(&(rand::random::<f64>()).to_le_bytes());
1480 if let Some(node) = node_selected {
1481 let node = nodes.get_mut(node.value()).unwrap();
1482 *node += 1;
1483 }
1484 }
1485
1486 let range = (200 - 50)..(200 + 50);
1487 for counts in nodes.values() {
1488 assert!(range.contains(counts));
1489 }
1490 }
1491
1492 #[test]
1493 fn sanity_check_unweighted_n() {
1494 let node_selection = NodeSelection::<(), ()>::new();
1495
1496 let mut nodes: BTreeMap<_, usize> = {
1497 let weight = unsafe { NonZeroUsize::new_unchecked(1) };
1498 let mut map = BTreeMap::new();
1499 map.insert(Node::with_default(weight), 0);
1500 map.insert(Node::with_default(weight), 0);
1501 map.insert(Node::with_default(weight), 0);
1502 map
1503 };
1504
1505 for node in nodes.keys() {
1506 let _ = node_selection.add(node.clone());
1507 }
1508
1509 for _ in 0..600 {
1510 let nodes_selected = node_selection.get_n(&(rand::random::<f64>()).to_le_bytes(), 2);
1511 for node in nodes_selected {
1512 let node = nodes.get_mut(node.value()).unwrap();
1513 *node += 1;
1514 }
1515 }
1516
1517 let range = (400 - 30)..(400 + 30);
1518 for counts in nodes.values() {
1519 assert!(range.contains(counts));
1520 }
1521 }
1522}
1523
1524#[cfg(test)]
1525mod node_selection_exclusions {
1526 use std::collections::BTreeMap;
1527 use std::iter::FromIterator;
1528
1529 use super::*;
1530
1531 #[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug)]
1532 enum Exclusions {
1533 A,
1534 }
1535
1536 #[test]
1537 fn sanity_check_weighted() {
1538 let node_selection = NodeSelection::<Exclusions, ()>::new();
1539
1540 let exclusions = DashSet::from_iter([Exclusions::A]);
1541
1542 let mut nodes: BTreeMap<_, usize> = unsafe {
1543 let mut map = BTreeMap::new();
1544 map.insert(Node::with_default(NonZeroUsize::new_unchecked(100)), 0);
1545 map.insert(
1546 Node::with_exclusions(NonZeroUsize::new_unchecked(200), (), exclusions.clone()),
1547 0,
1548 );
1549 map.insert(Node::with_default(NonZeroUsize::new_unchecked(300)), 0);
1550 map
1551 };
1552
1553 for node in nodes.keys() {
1554 let _ = node_selection.add(node.clone());
1555 }
1556
1557 for _ in 0..600 {
1558 let node_selected = node_selection
1559 .get_with_exclusions(&(rand::random::<f64>()).to_le_bytes(), &exclusions);
1560 if let Some(node) = node_selected {
1561 let node = nodes.get_mut(node.value()).unwrap();
1562 *node += 1;
1563 }
1564 }
1565
1566 for (node, counts) in nodes {
1567 let anchor = node.weight.get() as usize * 3 / 2;
1568 if node.exclusions.is_empty() {
1569 let range = (anchor - 50)..(anchor + 50);
1570 assert!(range.contains(&counts));
1571 } else {
1572 assert_eq!(counts, 0);
1573 }
1574 }
1575 }
1576
1577 #[test]
1578 fn sanity_check_unweighted() {
1579 let node_selection = NodeSelection::<Exclusions, ()>::new();
1580
1581 let exclusions = DashSet::from_iter([Exclusions::A]);
1582
1583 let mut nodes: BTreeMap<_, usize> = {
1584 let mut map = BTreeMap::new();
1585 let weight = unsafe { NonZeroUsize::new_unchecked(1) };
1586 map.insert(Node::with_default(weight), 0);
1587 map.insert(Node::with_exclusions(weight, (), exclusions.clone()), 0);
1588 map.insert(Node::with_default(weight), 0);
1589 map
1590 };
1591
1592 for node in nodes.keys() {
1593 let _ = node_selection.add(node.clone());
1594 }
1595
1596 for _ in 0..600 {
1597 let node_selected = node_selection
1598 .get_with_exclusions(&(rand::random::<f64>()).to_le_bytes(), &exclusions);
1599 if let Some(node) = node_selected {
1600 let node = nodes.get_mut(node.value()).unwrap();
1601 *node += 1;
1602 }
1603 }
1604
1605 let range = (300 - 50)..(300 + 50);
1606 for (node, counts) in nodes {
1607 if node.exclusions.is_empty() {
1608 assert!(range.contains(&counts));
1609 } else {
1610 assert_eq!(counts, 0);
1611 }
1612 }
1613 }
1614
1615 #[test]
1616 fn sanity_check_unweighted_n() {
1617 let node_selection = NodeSelection::<Exclusions, ()>::new();
1618
1619 let exclusions = DashSet::from_iter([Exclusions::A]);
1620
1621 let mut nodes: BTreeMap<_, usize> = {
1622 let mut map = BTreeMap::new();
1623 let weight = unsafe { NonZeroUsize::new_unchecked(1) };
1624 map.insert(Node::with_default(weight), 0);
1625 map.insert(Node::with_exclusions(weight, (), exclusions.clone()), 0);
1626 map.insert(Node::with_default(weight), 0);
1627 map
1628 };
1629
1630 for node in nodes.keys() {
1631 let _ = node_selection.add(node.clone());
1632 }
1633
1634 for _ in 0..600 {
1635 let node_selected = node_selection.get_n_with_exclusions(
1636 &(rand::random::<f64>()).to_le_bytes(),
1637 2,
1638 &exclusions,
1639 );
1640 for node in node_selected {
1641 let node = nodes.get_mut(node.value()).unwrap();
1642 *node += 1;
1643 }
1644 }
1645
1646 let range = (600 - 50)..(600 + 50);
1647 for (node, counts) in nodes {
1648 if node.exclusions.is_empty() {
1649 assert!(range.contains(&counts));
1650 } else {
1651 assert_eq!(counts, 0);
1652 }
1653 }
1654 }
1655}
1656
1657#[cfg(test)]
1658mod bit_node_selection {
1659
1660 use std::collections::BTreeMap;
1661
1662 use super::*;
1663
1664 #[test]
1665 fn exclusions() {
1666 let node_selection = BitNodeSelection::<u8, ()>::new();
1667 let exclusions = 0b01;
1668
1669 let mut nodes: BTreeMap<_, usize> = unsafe {
1670 let mut map = BTreeMap::new();
1671 map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(100)), 0);
1672 map.insert(
1673 BitNode::with_exclusions(NonZeroUsize::new_unchecked(200), (), exclusions),
1674 0,
1675 );
1676 map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(300)), 0);
1677 map
1678 };
1679
1680 for node in nodes.keys() {
1681 let _ = node_selection.add(node.clone());
1682 }
1683
1684 for _ in 0..600 {
1685 let node_selected = node_selection
1686 .get_with_exclusions(&(rand::random::<f64>()).to_le_bytes(), exclusions);
1687 if let Some(node) = node_selected {
1688 let node = nodes.get_mut(node.value()).unwrap();
1689 *node += 1;
1690 }
1691 }
1692
1693 for (node, counts) in nodes {
1694 let anchor = node.weight.get() as usize * 3 / 2;
1695 if node.exclusions == 0 {
1696 let range = (anchor - 50)..(anchor + 50);
1697 assert!(range.contains(&counts));
1698 } else {
1699 assert_eq!(counts, 0);
1700 }
1701 }
1702 }
1703
1704 #[test]
1705 fn no_exclusions() {
1706 let node_selection = BitNodeSelection::<u8, ()>::new();
1707
1708 let mut nodes: BTreeMap<_, usize> = unsafe {
1709 let mut map = BTreeMap::new();
1710 map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(100)), 0);
1711 map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(200)), 0);
1712 map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(300)), 0);
1713 map
1714 };
1715
1716 for node in nodes.keys() {
1717 let _ = node_selection.add(node.clone());
1718 }
1719
1720 for _ in 0..600 {
1721 let node_selected = node_selection.get(&(rand::random::<f64>()).to_le_bytes());
1722 if let Some(node) = node_selected {
1723 let node = nodes.get_mut(node.value()).unwrap();
1724 *node += 1;
1725 }
1726 }
1727
1728 for (node, counts) in nodes {
1729 let anchor = node.weight.get() as usize;
1730 let range = (anchor - 50)..(anchor + 50);
1731 assert!(range.contains(&counts));
1732 }
1733 }
1734
1735 #[test]
1736 fn get_n() {
1737 let node_selection = BitNodeSelection::<u8, ()>::new();
1738
1739 let mut nodes: BTreeMap<_, usize> = unsafe {
1740 let mut map = BTreeMap::new();
1741 map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(200)), 0);
1742 map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(200)), 0);
1743 map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(200)), 0);
1744 map
1745 };
1746
1747 for node in nodes.keys() {
1748 let _ = node_selection.add(node.clone());
1749 }
1750
1751 for _ in 0..600 {
1752 let node_selected = node_selection.get_n(&(rand::random::<f64>()).to_le_bytes(), 2);
1753 for node in node_selected {
1754 let node = nodes.get_mut(node.value()).unwrap();
1755 *node += 1;
1756 }
1757 }
1758
1759 for (node, counts) in nodes {
1760 let anchor = node.weight.get() as usize * 2;
1761 let range = (anchor - 50)..(anchor + 50);
1762 assert!(range.contains(&counts));
1763 }
1764 }
1765}
1766
1767#[cfg(all(test, feature = "rayon"))]
1768mod par_tests {
1769 use std::collections::BTreeMap;
1770
1771 use super::*;
1772
1773 #[test]
1774 fn bit_node_selection_get() {
1775 let node_selection = BitNodeSelection::<u8, ()>::new();
1776 let exclusions = 0b01;
1777
1778 let mut nodes: BTreeMap<_, usize> = unsafe {
1779 let mut map = BTreeMap::new();
1780 map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(100)), 0);
1781 map.insert(
1782 BitNode::with_exclusions(NonZeroUsize::new_unchecked(200), (), exclusions),
1783 0,
1784 );
1785 map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(300)), 0);
1786 map
1787 };
1788
1789 for node in nodes.keys() {
1790 let _ = node_selection.add(node.clone());
1791 }
1792
1793 for _ in 0..600 {
1794 let node_selected =
1795 node_selection.par_get(&(rand::random::<f64>()).to_le_bytes(), exclusions);
1796 if let Some(node) = node_selected {
1797 let node = nodes.get_mut(node.value()).unwrap();
1798 *node += 1;
1799 }
1800 }
1801
1802 for (node, counts) in nodes {
1803 let anchor = node.weight.get() as usize * 3 / 2;
1804 if node.exclusions == 0 {
1805 let range = (anchor - 50)..(anchor + 50);
1806 assert!(range.contains(&counts));
1807 } else {
1808 assert_eq!(counts, 0);
1809 }
1810 }
1811 }
1812
1813 #[test]
1814 fn bit_node_selection_get_n() {
1815 let node_selection = BitNodeSelection::<u8, ()>::new();
1816 let exclusions = 0b01;
1817
1818 let mut nodes: BTreeMap<_, usize> = unsafe {
1819 let mut map = BTreeMap::new();
1820 map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(100)), 0);
1821 map.insert(
1822 BitNode::with_exclusions(NonZeroUsize::new_unchecked(200), (), exclusions),
1823 0,
1824 );
1825 map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(300)), 0);
1826 map
1827 };
1828
1829 for node in nodes.keys() {
1830 let _ = node_selection.add(node.clone());
1831 }
1832
1833 for _ in 0..600 {
1834 let node_selected =
1835 node_selection.par_get_n(&(rand::random::<f64>()).to_le_bytes(), 2, exclusions);
1836 for node in node_selected {
1837 let node = nodes.get_mut(node.value()).unwrap();
1838 *node += 1;
1839 }
1840 }
1841
1842 for (node, counts) in nodes {
1843 let anchor = 600 as usize;
1844 if node.exclusions == 0 {
1845 let range = (anchor - 50)..(anchor + 50);
1846 assert!(range.contains(&counts));
1847 } else {
1848 assert_eq!(counts, 0);
1849 }
1850 }
1851 }
1852
1853 #[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug)]
1854 enum Exclusions {
1855 A,
1856 }
1857
1858 #[test]
1859 fn node_selection_get() {
1860 let node_selection = NodeSelection::<Exclusions, ()>::new();
1861
1862 let exclusions = DashSet::from_iter([Exclusions::A]);
1863
1864 let mut nodes: BTreeMap<_, usize> = {
1865 let mut map = BTreeMap::new();
1866 let weight = unsafe { NonZeroUsize::new_unchecked(1) };
1867 map.insert(Node::with_default(weight), 0);
1868 map.insert(Node::with_exclusions(weight, (), exclusions.clone()), 0);
1869 map.insert(Node::with_default(weight), 0);
1870 map
1871 };
1872
1873 for node in nodes.keys() {
1874 let _ = node_selection.add(node.clone());
1875 }
1876
1877 for _ in 0..600 {
1878 let node_selected =
1879 node_selection.par_get(&(rand::random::<f64>()).to_le_bytes(), Some(&exclusions));
1880 if let Some(node) = node_selected {
1881 let node = nodes.get_mut(node.value()).unwrap();
1882 *node += 1;
1883 }
1884 }
1885
1886 let range = (300 - 50)..(300 + 50);
1887 for (node, counts) in nodes {
1888 if node.exclusions.is_empty() {
1889 assert!(range.contains(&counts));
1890 } else {
1891 assert_eq!(counts, 0);
1892 }
1893 }
1894 }
1895
1896 #[test]
1897 fn node_selection_get_n() {
1898 let node_selection = NodeSelection::<Exclusions, ()>::new();
1899
1900 let exclusions = DashSet::from_iter([Exclusions::A]);
1901
1902 let mut nodes: BTreeMap<_, usize> = {
1903 let mut map = BTreeMap::new();
1904 let weight = unsafe { NonZeroUsize::new_unchecked(1) };
1905 map.insert(Node::with_default(weight), 0);
1906 map.insert(Node::with_exclusions(weight, (), exclusions.clone()), 0);
1907 map.insert(Node::with_default(weight), 0);
1908 map
1909 };
1910
1911 for node in nodes.keys() {
1912 let _ = node_selection.add(node.clone());
1913 }
1914
1915 for _ in 0..600 {
1916 let node_selected = node_selection.par_get_n(
1917 &(rand::random::<f64>()).to_le_bytes(),
1918 2,
1919 Some(&exclusions),
1920 );
1921 for node in node_selected {
1922 let node = nodes.get_mut(node.value()).unwrap();
1923 *node += 1;
1924 }
1925 }
1926
1927 let range = (600 - 50)..(600 + 50);
1928 for (node, counts) in nodes {
1929 if node.exclusions.is_empty() {
1930 assert!(range.contains(&counts));
1931 } else {
1932 assert_eq!(counts, 0);
1933 }
1934 }
1935 }
1936}