tagged_rendezvous/
lib.rs

1//! This crate is a proof of concept for node selection based on a logarithmic
2//! rendezvous hashing implementation that allows nodes to self-exclude based
3//! on a generic type. While this is still a proof of concept, performance
4//! should be good enough for small to medium-sized implementations.
5//!
6//! Note that this implementation does not include any caching mechanism. This
7//! is intentional, and is done to keep the implementation as simple as
8//! possible. Users of this crate _should_ implement their own caching
9//! mechanism for best performance and evict on mutation, but this is not
10//! necessary, especially in small use cases.
11//!
12//! This implementation assumes that the typical use case is on some
13//! load-balancing node or equivalent, and that it is aware of nodes are
14//! available or unavailable. Additionally, it should be able to determine the
15//! weight of each node.
16//!
17//! The entry point of this crate is [`NodeSelection`] or [`BitNodeSelection`].
18//! Examples of construction and usage are included in their respective
19//! documentation.
20
21// Deny unsafe code, but allow unsafe code in tests.
22#![cfg_attr(not(test), forbid(unsafe_code))]
23#![warn(
24    clippy::cargo,
25    clippy::pedantic,
26    clippy::nursery,
27    clippy::missing_docs_in_private_items
28)]
29#![deny(missing_docs)]
30
31// Re-alias the rayon crate, to allow features to be the name of the crate
32#[cfg(feature = "rayon")]
33use rayon_crate as rayon;
34
35use std::cmp::Ordering;
36use std::fmt::Display;
37use std::hash::{Hash, Hasher};
38use std::io::Cursor;
39use std::iter::FromIterator;
40use std::num::NonZeroUsize;
41use std::ops::BitAnd;
42use std::sync::atomic::{AtomicUsize, Ordering as AtomicOrdering};
43
44#[cfg(feature = "rayon")]
45use rayon::iter::{
46    FromParallelIterator, IntoParallelIterator, IntoParallelRefIterator, ParallelExtend,
47    ParallelIterator,
48};
49#[cfg(feature = "rayon")]
50use rayon::slice::ParallelSliceMut;
51
52use dashmap::mapref::multiple::RefMulti;
53use dashmap::mapref::one::RefMut;
54use dashmap::{DashMap, DashSet};
55use murmur3::murmur3_x64_128;
56
57/// Contains the error type used by this crate.
58mod error;
59
60// Re-export the dashmap crate so users don't need to manually import it
61pub use dashmap;
62pub use error::*;
63
64/// Provides a way to consistently select some weighted bucket (or node) given
65/// some value.
66///
67/// The exact implementation is based on the algorithm described in
68/// [Schindelhauer and Schomaker "Weighted Distributed Hash Tables"][paper].
69/// As a result, modifications of weights (or additions and removals of nodes)
70/// can be performed with perfect stability and perfect precision of node
71/// weighing; subsequently, only the minimum number of keys are remapped to new
72/// nodes.
73///
74/// This node does not perform any sort of caching. As a result, it's best to
75/// use this with some form of cache, whose entries are invalidated when a node
76/// is added, removed, or has its weight modified. This is because lookups for
77/// a specific node are at best, linear, and at worst, quadratic. To be
78/// specific, there is always a linear lookup cost for a given key, determined
79/// by the number of nodes present. If nodes utilize tag exclusion, then the
80/// cost becomes quadratic to determine if a node should be excluded given a set
81/// of tags.
82///
83/// This implementation attempts to be performant and concurrent as possible,
84/// using a [`DashMap`] and [`DashSet`]s to provide concurrent access and
85/// `rayon` to provide parallel lookup, if the feature is enabled. In single
86/// threaded contexts, care should be used to not deadlock with multiple
87/// references.
88///
89/// # Excluding tags
90///
91/// This is slightly different from a standard weighted hash table as it also
92/// implements the ability to allow nodes to exclude themselves from being
93/// selected. This is provided via the `ExclusionTag` type, and should be an
94/// `enum`.
95///
96/// Generally speaking, `ExclusionTag`s must at least [`Hash`] and [`Eq`]. This
97/// is a requirement of storing the `ExclusionTag` in a Set implementation.
98/// However, if using `rayon` to allow for parallel lookup, the trait bounds on
99/// `ExclusionTag`s is restricted to types that are also [`Send`] and [`Sync`].
100///
101/// This feature is asymptotically expensive, as this requires checking all
102/// nodes to see if they exclude the given tags. As a result, this should be
103/// used with care, and at best the number of exclusions should be relatively
104/// small per node.
105///
106/// Of course, this feature can be disabled if `ExclusionTag` is the unit type,
107/// which reduces lookup costs to be linear.
108///
109/// Excluded nodes do not contribute to the total sum of weights. For example,
110/// if three nodes exist with equal weights, and one node is excluded, then the
111/// probability of selecting a node is the same as if only two nodes were
112/// present.
113///
114/// In a more complex case, if nodes with weights 1, 2, and 3 are present, and
115/// the node with weight 2 is excluded, then the probability of selecting a node
116/// becomes 25% and 75% respectively.
117///
118/// Note that if [`BitAnd`] can be performed on `ExclusionTags` in an efficient
119/// way (such as for bitflags), it is highly recommended to use
120/// [`BitNodeSelection`] instead.
121///
122/// # Examples
123///
124/// Nodes can be added and removed at any time and subsequent lookups will be
125/// immediately reflect the new state.
126///
127/// ```
128/// # use tagged_rendezvous::*;
129/// # fn main() -> () {
130/// # example();
131/// # }
132/// #
133/// # fn example() -> Option<()> {
134/// use std::num::NonZeroUsize;
135///
136/// let selector = NodeSelection::new();
137///
138/// // Add a node with a weight of 1
139/// let node_1 = Node::<(), _>::new(NonZeroUsize::new(1)?, ());
140/// let node_1_id = selector.add(node_1.clone());
141///
142/// // Lookups will always return the first node, since it's the only one.
143/// let looked_up_node = selector.get(b"hello world")?;
144/// assert_eq!(looked_up_node.value(), &node_1);
145/// // Drop the reference ASAP, see `get` docs for more info.
146/// std::mem::drop(looked_up_node);
147///
148/// // Add a node with a weight of 2
149/// let node_2 = Node::<(), _>::new(NonZeroUsize::new(2)?, ());
150/// selector.add(node_2.clone());
151///
152/// // Any caches should be invalidated now, since we changed the state.
153/// // Now there's a 33% chance of selecting the first node, and a 67% chance of
154/// // selecting the second node.
155///
156/// // Do something with the node...
157///
158/// // Now remove the first node
159/// selector.remove(node_1_id);
160///
161/// // Any caches should be invalidated now, since we changed the state.
162/// // Lookups will now always return the second node, since it's the only one.
163///
164/// let looked_up_node = selector.get(b"hello world")?;
165/// assert_eq!(looked_up_node.value(), &node_2);
166/// # None
167/// # }
168/// ```
169///
170/// Modifying a weight is pretty ergonomic, but remember that caches should be
171/// invalidated after modifying the weights.
172///
173/// ```
174/// # use tagged_rendezvous::*;
175/// # fn main() -> () {
176/// # example();
177/// # }
178/// #
179/// # fn example() -> Option<()> {
180/// use std::num::NonZeroUsize;
181///
182/// let selector = NodeSelection::new();
183///
184/// // Add a node with a weight of 1
185/// let node_1 = Node::<(), _>::new(NonZeroUsize::new(1)?, ());
186/// let node_1_id = selector.add(node_1.clone());
187///
188/// // Add a node with a weight of 2
189/// let node_2 = Node::<(), _>::new(NonZeroUsize::new(2)?, ());
190/// selector.add(node_2.clone());
191///
192/// // Now there's a 33% chance of selecting the first node, and a 67% chance of
193/// // selecting the second node. Lets modify the weight so there's an equal
194/// // chance of selecting either node.
195///
196/// selector.get_mut(node_1_id)?.set_weight(NonZeroUsize::new(1)?);
197///
198/// // Any caches should be invalidated now.
199/// # None
200/// # }
201/// ```
202///
203/// If exclusion tags are used, then excluded nodes do not contribute to the
204/// sum weights of nodes.
205///
206/// ```
207/// # use tagged_rendezvous::*;
208/// # fn main() -> () {
209/// # example();
210/// # }
211/// #
212/// # fn example() -> Option<()> {
213/// use std::num::NonZeroUsize;
214/// use std::iter::FromIterator;
215///
216/// use dashmap::DashSet;
217///
218/// let selector = NodeSelection::new();
219///
220/// #[derive(Clone, PartialEq, Eq, Hash)]
221/// enum ExclusionTag {
222///    Foo,
223/// }
224///
225/// // Add a 3 nodes, one with an exclusion tag.
226/// let exclusions = DashSet::from_iter([ExclusionTag::Foo]);
227/// let node_1 = Node::new(NonZeroUsize::new(1)?, ());
228/// selector.add(node_1.clone());
229/// let node_2 = Node::with_exclusions(NonZeroUsize::new(1)?, (), exclusions.clone());
230/// selector.add(node_2.clone());
231/// let node_3 = Node::new(NonZeroUsize::new(1)?, ());
232/// selector.add(node_3.clone());
233///
234/// // There's a 33% chance of selecting any node, if no tags are provided.
235/// selector.get(b"hello world")?;
236///
237/// // There's a equal chance of getting node 1 or node 3 since node 2 is
238/// // excluded.
239/// selector.get_with_exclusions(b"hello world", &exclusions)?;
240/// # None
241/// # }
242/// ```
243///
244/// [paper]: https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.414.9353
245#[derive(Clone, Debug)]
246pub struct NodeSelection<ExclusionTags: Hash + Eq, Metadata> {
247    /// The list of nodes to select from.
248    nodes: DashMap<NodeId, Node<ExclusionTags, Metadata>>,
249}
250
251impl<ExclusionTags, Metadata> NodeSelection<ExclusionTags, Metadata>
252where
253    ExclusionTags: Hash + Eq,
254{
255    /// Finds a node that is responsible for the provided item with no
256    /// exclusions. This lookup is performed in serial. Consider
257    /// [`par_get`](#par_get) that uses rayon to parallelize the
258    /// lookup.
259    ///
260    /// # Safety
261    ///
262    /// This method may deadlock if called when holding a mutable reference into
263    /// the map. Callers must ensure that references are dropped as soon as
264    /// possible, especially in single threaded contexts.
265    #[inline]
266    #[must_use]
267    pub fn get(&self, item: &[u8]) -> Option<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
268        self.get_internal(item, None)
269    }
270
271    /// Like [`get`](#get), but allows the caller to provide a list of
272    /// tags, which nodes that exclude those tags will be skipped.
273    ///
274    /// # Safety
275    ///
276    /// This method may deadlock if called when holding a mutable reference into
277    /// the map. Callers must ensure that references are dropped as soon as
278    /// possible, especially in single threaded contexts.
279    #[inline]
280    #[must_use]
281    pub fn get_with_exclusions(
282        &self,
283        item: &[u8],
284        tags: &DashSet<ExclusionTags>,
285    ) -> Option<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
286        self.get_internal(item, Some(tags))
287    }
288
289    /// Returns a node that is responsible for the provided item. This lookup
290    /// has asymptotic complexity of `O(m * n^2)`, where `m` is the number of
291    /// nodes, and `n` is the number of tags given. As a result, callers should
292    /// generally use a small number of tags, either on node restrictions or on
293    /// the provided tags for best performance.
294    ///
295    /// # Safety
296    ///
297    /// This method may deadlock if called when holding a mutable reference into
298    /// the map. Callers must ensure that references are dropped as soon as
299    /// possible, especially in single threaded contexts.
300    fn get_internal(
301        &self,
302        item: &[u8],
303        tags: Option<&DashSet<ExclusionTags>>,
304    ) -> Option<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
305        self.nodes
306            .iter()
307            .filter(|entry| {
308                !entry.value().exclusions.iter().any(|exclusions| {
309                    tags.as_ref()
310                        .map(|set| set.contains(&exclusions))
311                        .unwrap_or_default()
312                })
313            })
314            .max_by(|left, right| f64_total_ordering(left.score(item), right.score(item)))
315    }
316
317    /// Returns the top `n` nodes that are responsible for the provided item
318    /// with no exclusions. This lookup is performed in serial. Consider
319    /// [`par_get_n`](#par_get_n) that uses rayon to parallelize the lookup. If
320    /// `n` is greater than the number of nodes, then all nodes are returned, in
321    /// order of score.
322    ///
323    /// # Safety
324    ///
325    /// This method may deadlock if called when holding a mutable reference into
326    /// the map. Callers must ensure that references are dropped as soon as
327    /// possible, especially in single threaded contexts.
328    #[inline]
329    #[must_use]
330    pub fn get_n(
331        &self,
332        item: &[u8],
333        n: usize,
334    ) -> Vec<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
335        self.get_n_internal(item, n, None)
336    }
337
338    /// Like [`get_n`](#get_n), but allows the caller to provide a list of
339    /// tags, which nodes that exclude those tags will be skipped.
340    ///
341    /// # Safety
342    ///
343    /// This method may deadlock if called when holding a mutable reference into
344    /// the map. Callers must ensure that references are dropped as soon as
345    /// possible, especially in single threaded contexts.
346    #[inline]
347    #[must_use]
348    pub fn get_n_with_exclusions(
349        &self,
350        item: &[u8],
351        n: usize,
352        tags: &DashSet<ExclusionTags>,
353    ) -> Vec<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
354        self.get_n_internal(item, n, Some(tags))
355    }
356
357    /// Returns a node that is responsible for the provided item. This lookup
358    /// has asymptotic complexity of `O(log(m) * n^2)`, where `m` is the number
359    /// of nodes, and `n` is the number of tags given. As a result, callers
360    /// should generally use a small number of tags, either on node restrictions
361    /// or on the provided tags for best performance.
362    ///
363    /// # Safety
364    ///
365    /// This method may deadlock if called when holding a mutable reference into
366    /// the map. Callers must ensure that references are dropped as soon as
367    /// possible, especially in single threaded contexts.
368    fn get_n_internal(
369        &self,
370        item: &[u8],
371        n: usize,
372        tags: Option<&DashSet<ExclusionTags>>,
373    ) -> Vec<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
374        let mut nodes = self
375            .nodes
376            .iter()
377            .filter(|entry| {
378                !entry.value().exclusions.iter().any(|exclusions| {
379                    tags.as_ref()
380                        .map(|set| set.contains(&exclusions))
381                        .unwrap_or_default()
382                })
383            })
384            .collect::<Vec<_>>();
385
386        nodes.sort_unstable_by(|left, right| {
387            f64_total_ordering(left.score(item), right.score(item))
388        });
389
390        nodes.truncate(n);
391        nodes
392    }
393}
394
395impl<ExclusionTags, Metadata> NodeSelection<ExclusionTags, Metadata>
396where
397    ExclusionTags: Send + Sync + Hash + Eq,
398    Metadata: Send + Sync,
399{
400    /// Finds a node that is responsible for the provided item, filtering out
401    /// nodes that refuse to accept the provided tags. This lookup is performed
402    /// in parallel. Requires the `rayon` feature.
403    #[must_use]
404    #[cfg(feature = "rayon")]
405    pub fn par_get(
406        &self,
407        item: &[u8],
408        tags: Option<&DashSet<ExclusionTags>>,
409    ) -> Option<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
410        self.nodes
411            .par_iter()
412            .filter(|node| {
413                !node.exclusions.par_iter().any(|exclusions| {
414                    tags.as_ref()
415                        .map(|set| set.contains(&exclusions))
416                        .unwrap_or_default()
417                })
418            })
419            .max_by(|left, right| f64_total_ordering(left.score(item), right.score(item)))
420    }
421
422    /// Finds `n` nodes that is responsible for the provided item, filtering out
423    /// nodes that refuse to accept the provided tags. This lookup is performed
424    /// in parallel. Requires the `rayon` feature.
425    #[must_use]
426    #[cfg(feature = "rayon")]
427    pub fn par_get_n(
428        &self,
429        item: &[u8],
430        n: usize,
431        tags: Option<&DashSet<ExclusionTags>>,
432    ) -> Vec<RefMulti<NodeId, Node<ExclusionTags, Metadata>>> {
433        let mut nodes = self
434            .nodes
435            .par_iter()
436            .filter(|entry| {
437                !entry.value().exclusions.iter().any(|exclusions| {
438                    tags.as_ref()
439                        .map(|set| set.contains(&exclusions))
440                        .unwrap_or_default()
441                })
442            })
443            .collect::<Vec<_>>();
444
445        nodes.par_sort_unstable_by(|left, right| {
446            f64_total_ordering(left.score(item), right.score(item))
447        });
448
449        nodes.truncate(n);
450        nodes
451    }
452}
453
454/// Like [`NodeSelection`], but specialized for bitflags.
455///
456/// Usage of [`BitNodeSelection`] is recommended over [`NodeSelection`] for
457/// performance reasons, but is sometimes not possible if `ExclusionTags` does
458/// not implement [`BitAnd`].
459///
460/// In most cases, the API is identical to [`NodeSelection`] and is intuitive
461/// for the bitflag use case. The only noteworthy different is that you must
462/// use [`BitNode`] instead of [`Node`].
463///
464/// Note that `ExclusionTags` must also implement [`Default`]. This is not a
465/// problem for raw bitflags, but for types derived from the [`bitflags` crate],
466/// then you will need to implement [`Default`] on the type. This default value
467/// should represent when all flags are disabled.
468///
469/// Performance on lookups are now `O(nm)`, where `n` is the number of nodes
470/// and `m` is the cost to perform `BitAnd` on `ExclusionTags`. For bitflags,
471/// this is `O(n)`.
472///
473/// # Examples
474///
475/// ```
476/// # use tagged_rendezvous::*;
477/// # fn main() -> () {
478/// # example();
479/// # }
480/// #
481/// # fn example() -> Option<()> {
482/// use std::num::NonZeroUsize;
483/// use std::iter::FromIterator;
484///
485/// use dashmap::DashSet;
486///
487/// const FLAG_A: u8 = 0b01;
488/// const FLAG_B: u8 = 0b10;
489///
490/// let selector = BitNodeSelection::new();
491///
492/// // Add a 3 nodes, one with an exclusion tag.
493/// let exclusions = FLAG_A | FLAG_B;
494/// let node_1 = BitNode::new(NonZeroUsize::new(1)?, ());
495/// selector.add(node_1.clone());
496/// let node_2 = BitNode::with_exclusions(NonZeroUsize::new(1)?, (), exclusions);
497/// selector.add(node_2.clone());
498/// let node_3 = BitNode::new(NonZeroUsize::new(1)?, ());
499/// selector.add(node_3.clone());
500///
501/// // There's a 33% chance of selecting any node, if no tags are provided.
502/// selector.get(b"hello world")?;
503///
504/// // There's a equal chance of getting node 1 or node 3 since node 2 is
505/// // excluded.
506/// selector.get_with_exclusions(b"hello world", exclusions)?;
507/// # None
508/// # }
509/// ```
510///
511/// More examples (albeit for a more general use case) can be found in
512/// [`NodeSelection`].
513///
514/// [`bitflags` crate]: https://docs.rs/bitflags/
515#[derive(Clone, Debug)]
516pub struct BitNodeSelection<ExclusionTags: Hash + Eq + BitAnd, Metadata> {
517    /// The list of nodes to select from.
518    nodes: DashMap<NodeId, BitNode<ExclusionTags, Metadata>>,
519}
520
521impl<ExclusionTags, Metadata> BitNodeSelection<ExclusionTags, Metadata>
522where
523    ExclusionTags: Hash + Eq + BitAnd<Output = ExclusionTags> + Copy + Default,
524{
525    /// Finds a node that is responsible for the provided item with no
526    /// exclusions. This lookup is performed in serial. Consider
527    /// [`par_get`](#par_get) that uses rayon to parallelize the
528    /// lookup.
529    ///
530    /// # Safety
531    ///
532    /// This method may deadlock if called when holding a mutable reference into
533    /// the map. Callers must ensure that references are dropped as soon as
534    /// possible, especially in single threaded contexts.
535    #[inline]
536    #[must_use]
537    pub fn get(&self, item: &[u8]) -> Option<RefMulti<NodeId, BitNode<ExclusionTags, Metadata>>> {
538        self.get_with_exclusions(item, ExclusionTags::default())
539    }
540
541    /// Like [`get`](#get), but allows the caller to provide a list of
542    /// tags, which nodes that exclude those tags will be skipped.
543    ///
544    /// # Safety
545    ///
546    /// This method may deadlock if called when holding a mutable reference into
547    /// the map. Callers must ensure that references are dropped as soon as
548    /// possible, especially in single threaded contexts.
549    #[inline]
550    #[must_use]
551    pub fn get_with_exclusions(
552        &self,
553        item: &[u8],
554        tags: ExclusionTags,
555    ) -> Option<RefMulti<NodeId, BitNode<ExclusionTags, Metadata>>> {
556        self.nodes
557            .iter()
558            .filter(|entry| entry.value().exclusions & tags == ExclusionTags::default())
559            .max_by(|left, right| f64_total_ordering(left.score(item), right.score(item)))
560    }
561
562    /// Returns the top `n` nodes that are responsible for the provided item
563    /// with no exclusions. This lookup is performed in serial. Consider
564    /// [`par_get_n`](#par_get_n) that uses rayon to parallelize the lookup. If
565    /// `n` is greater than the number of nodes, then all nodes are returned, in
566    /// order of score.
567    ///
568    /// # Safety
569    ///
570    /// This method may deadlock if called when holding a mutable reference into
571    /// the map. Callers must ensure that references are dropped as soon as
572    /// possible, especially in single threaded contexts.
573    #[inline]
574    #[must_use]
575    pub fn get_n(
576        &self,
577        item: &[u8],
578        n: usize,
579    ) -> Vec<RefMulti<NodeId, BitNode<ExclusionTags, Metadata>>> {
580        self.get_n_with_exclusions(item, n, ExclusionTags::default())
581    }
582
583    /// Like [`get_n`](#get_n), but allows the caller to provide a list of
584    /// tags, which nodes that exclude those tags will be skipped.
585    ///
586    /// # Safety
587    ///
588    /// This method may deadlock if called when holding a mutable reference into
589    /// the map. Callers must ensure that references are dropped as soon as
590    /// possible, especially in single threaded contexts.
591    #[inline]
592    #[must_use]
593    pub fn get_n_with_exclusions(
594        &self,
595        item: &[u8],
596        n: usize,
597        tags: ExclusionTags,
598    ) -> Vec<RefMulti<NodeId, BitNode<ExclusionTags, Metadata>>> {
599        let mut nodes = self
600            .nodes
601            .iter()
602            .filter(|entry| entry.value().exclusions & tags == ExclusionTags::default())
603            .collect::<Vec<_>>();
604
605        nodes.sort_unstable_by(|left, right| {
606            f64_total_ordering(left.score(item), right.score(item))
607        });
608
609        nodes.truncate(n);
610        nodes
611    }
612}
613
614impl<ExclusionTags, Metadata> BitNodeSelection<ExclusionTags, Metadata>
615where
616    ExclusionTags: Send + Sync + Hash + Eq + BitAnd<Output = ExclusionTags> + Default + Copy,
617    Metadata: Send + Sync,
618{
619    /// Finds a node that is responsible for the provided item, filtering out
620    /// nodes that refuse to accept the provided tags. This lookup is performed
621    /// in parallel. Requires the `rayon` feature.
622    #[cfg(feature = "rayon")]
623    pub fn par_get(
624        &self,
625        item: &[u8],
626        tags: ExclusionTags,
627    ) -> Option<RefMulti<NodeId, BitNode<ExclusionTags, Metadata>>> {
628        self.nodes
629            .par_iter()
630            .filter(|node| node.exclusions & tags == ExclusionTags::default())
631            .max_by(|left, right| f64_total_ordering(left.score(item), right.score(item)))
632    }
633
634    /// Finds `n` nodes that is responsible for the provided item, filtering out
635    /// nodes that refuse to accept the provided tags. This lookup is performed
636    /// in parallel. Requires the `rayon` feature.
637    #[must_use]
638    #[cfg(feature = "rayon")]
639    pub fn par_get_n(
640        &self,
641        item: &[u8],
642        n: usize,
643        tags: ExclusionTags,
644    ) -> Vec<RefMulti<NodeId, BitNode<ExclusionTags, Metadata>>> {
645        let mut nodes = self
646            .nodes
647            .par_iter()
648            .filter(|node| node.exclusions & tags == ExclusionTags::default())
649            .collect::<Vec<_>>();
650
651        nodes.par_sort_unstable_by(|left, right| {
652            f64_total_ordering(left.score(item), right.score(item))
653        });
654
655        nodes.truncate(n);
656        nodes
657    }
658}
659
660/// Generate a node and node selection implementation based on the provided
661/// bounds.
662macro_rules! impl_node_selection {
663    // this $bounds meme is a hack to get around the fact that we can't
664    // represent trait bounds in macros.
665    ($struct_name:ident, $node:ident $(: $bounds:path )?) => {
666        impl<ExclusionTags, Metadata> $struct_name<ExclusionTags, Metadata>
667        where
668            ExclusionTags: Hash + Eq + $($bounds)*,
669        {
670            /// Constructs a new empty instance.
671            #[inline]
672            #[must_use]
673            pub fn new() -> Self {
674                Self::default()
675            }
676
677            /// Constructs a new `$struct_name`, ensuring that at least
678            /// `capacity` nodes can be added without re-allocating.
679            #[inline]
680            #[must_use]
681            pub fn with_capacity(capacity: usize) -> Self {
682                Self {
683                    nodes: DashMap::with_capacity(capacity),
684                }
685            }
686
687            /// Adds a new node to the selection with an opaque ID. If lookups
688            /// are cached, then callers must invalidate the cache after adding
689            /// a node. Otherwise, it is possible to have incorrectly
690            /// distributed selections.
691            ///
692            /// # Safety
693            ///
694            /// This method may deadlock if called when holding any sort of
695            /// reference into the map. Callers must ensure that references are
696            /// dropped as soon as possible, especially in single threaded
697            /// contexts.
698            ///
699            /// # Panics
700            ///
701            /// This method will panic if the provided node has an id that is
702            /// already present in the selection. For the non-panicking version
703            /// of this method, see [`try_add_node`](#try_add_node).
704            #[inline]
705            #[must_use]
706            pub fn add(&self, node: $node<ExclusionTags, Metadata>) -> NodeId {
707                let id = NodeId::new_opaque();
708                self.add_with_id(id, node);
709                id
710            }
711
712            /// Adds a new node to the selection with the provided ID. If
713            /// lookups are cached, then callers must invalidate the cache after
714            /// adding a node. Otherwise, it is possible to have incorrectly
715            /// distributed selections.
716            ///
717            /// # Safety
718            ///
719            /// This method may deadlock if called when holding any sort of
720            /// reference into the map. Callers must ensure that references are
721            /// dropped as soon as possible, especially in single threaded
722            /// contexts.
723            ///
724            /// # Panics
725            ///
726            /// This method will panic if the provided node has an id that is
727            /// already present in the selection. For the non-panicking version
728            /// of this method, see [`try_add_node`](#try_add_node).
729            #[inline]
730            pub fn add_with_id(&self, id: NodeId, node: $node<ExclusionTags, Metadata>) {
731                if self.nodes.insert(id, node).is_some() {
732                    panic!("Node with duplicate id added: {}", id);
733                }
734            }
735
736            /// Tries to add the node to the selection with an opaque ID. If
737            /// lookups are cached, then callers must invalidate the cache after
738            /// adding a node. Otherwise, it is possible to have incorrectly
739            /// distributed selections.
740            ///
741            /// # Safety
742            ///
743            /// This method may deadlock if called when holding any sort of
744            /// reference into the map. Callers must ensure that references are
745            /// dropped as soon as possible, especially in single threaded
746            /// contexts.
747            ///
748            /// # Errors
749            ///
750            /// This method will fail if the node has an id that is already
751            /// present in the selection.
752            #[inline]
753            pub fn try_add(
754                &self,
755                node: $node<ExclusionTags, Metadata>,
756            ) -> Result<NodeId, DuplicateIdError> {
757                let id = NodeId::new_opaque();
758                self.try_add_with_id(id, node)?;
759                Ok(id)
760            }
761
762            /// Tries to add the node to the selection with the provided ID. If
763            /// lookups are cached, then callers must invalidate the cache after
764            /// adding a node. Otherwise, it is possible to have incorrectly
765            /// distributed selections.
766            ///
767            /// # Safety
768            ///
769            /// This method may deadlock if called when holding any sort of
770            /// reference into the map. Callers must ensure that references are
771            /// dropped as soon as possible, especially in single threaded
772            /// contexts.
773            ///
774            /// # Errors
775            ///
776            /// This method will fail if the node has an id that is already
777            /// present in the selection.
778            #[inline]
779            pub fn try_add_with_id(
780                &self,
781                id: NodeId,
782                node: $node<ExclusionTags, Metadata>,
783            ) -> Result<(), DuplicateIdError> {
784                if self.nodes.contains_key(&id) {
785                    Err(DuplicateIdError(id))
786                } else {
787                    self.nodes.insert(id, node);
788                    Ok(())
789                }
790            }
791
792            /// Returns a mutable reference to the node given some ID. This is
793            /// useful for modifying the weight of a node. If lookups are
794            /// cached, then callers must invalidate the cache after adding a
795            /// node. Otherwise, it is possible to have incorrectly distributed
796            /// selections.
797            ///
798            /// # Safety
799            ///
800            /// This method may deadlock if called when holding any sort of
801            /// reference
802            /// into the map. Callers must ensure that references are dropped as
803            /// soon as possible, especially in single threaded contexts.
804            #[inline]
805            #[must_use]
806            pub fn get_mut(
807                &self,
808                id: NodeId,
809            ) -> Option<RefMut<NodeId, $node<ExclusionTags, Metadata>>> {
810                self.nodes.get_mut(&id)
811            }
812
813            /// Removes a node, returning the node if it existed. If lookups are
814            /// cached, then callers must invalidate the cache after
815            /// successfully removing a node. Otherwise, it is possible to have
816            /// incorrectly distributed selections.
817            ///
818            /// # Safety
819            ///
820            /// This method may deadlock if called when holding any sort of
821            /// reference into the map. Callers must ensure that references are
822            /// dropped as soon as possible, especially in single threaded
823            /// contexts.
824            #[inline]
825            #[must_use]
826            pub fn remove(&self, id: NodeId) -> Option<$node<ExclusionTags, Metadata>> {
827                self.nodes.remove(&id).map(|(_id, node)| node)
828            }
829
830            /// Returns if the node is selectable from this selector.
831            ///
832            /// # Safety
833            ///
834            /// This method may deadlock if called when holding a mutable
835            /// reference into the map. Callers must ensure that references are
836            /// dropped as soon as possible, especially in single threaded
837            /// contexts.
838            #[inline]
839            #[must_use]
840            pub fn contains(&self, id: NodeId) -> bool {
841                self.nodes.contains_key(&id)
842            }
843        }
844
845        impl<ExclusionTags, Metadata> Default for $struct_name<ExclusionTags, Metadata>
846        where
847            ExclusionTags: Hash + Eq + $($bounds)*,
848        {
849            #[inline]
850            fn default() -> Self {
851                Self {
852                    nodes: DashMap::new(),
853                }
854            }
855        }
856
857        impl<ExclusionTags, Metadata> Extend<(NodeId, $node<ExclusionTags, Metadata>)>
858            for $struct_name<ExclusionTags, Metadata>
859        where
860            ExclusionTags: Hash + Eq + $($bounds)*,
861        {
862            #[inline]
863            fn extend<T: IntoIterator<Item = (NodeId, $node<ExclusionTags, Metadata>)>>(
864                &mut self,
865                iter: T,
866            ) {
867                self.nodes.extend(iter);
868            }
869        }
870
871        impl<ExclusionTags, Metadata> FromIterator<(NodeId, $node<ExclusionTags, Metadata>)>
872            for $struct_name<ExclusionTags, Metadata>
873        where
874            ExclusionTags: Hash + Eq + $($bounds)*,
875        {
876            #[inline]
877            fn from_iter<T: IntoIterator<Item = (NodeId, $node<ExclusionTags, Metadata>)>>(
878                iter: T,
879            ) -> Self {
880                Self {
881                    nodes: DashMap::from_iter(iter),
882                }
883            }
884        }
885
886        #[cfg(feature = "rayon")]
887        impl<ExclusionTags, Metadata> FromParallelIterator<(NodeId, $node<ExclusionTags, Metadata>)>
888            for $struct_name<ExclusionTags, Metadata>
889        where
890            ExclusionTags: Send + Sync + Hash + Eq + $($bounds)*,
891            Metadata: Send + Sync,
892        {
893            #[inline]
894            fn from_par_iter<I>(into_iter: I) -> Self
895            where
896                I: IntoParallelIterator<Item = (NodeId, $node<ExclusionTags, Metadata>)>,
897            {
898                Self {
899                    nodes: DashMap::from_par_iter(into_iter),
900                }
901            }
902        }
903
904        impl<ExclusionTags, Metadata> IntoIterator for $struct_name<ExclusionTags, Metadata>
905        where
906            ExclusionTags: Hash + Eq + $($bounds)*,
907        {
908            type Item = (NodeId, $node<ExclusionTags, Metadata>);
909            type IntoIter =
910                <DashMap<NodeId, $node<ExclusionTags, Metadata>> as IntoIterator>::IntoIter;
911
912            #[inline]
913            fn into_iter(self) -> Self::IntoIter {
914                self.nodes.into_iter()
915            }
916        }
917
918        #[cfg(feature = "rayon")]
919        impl<ExclusionTags, Metadata> IntoParallelIterator for $struct_name<ExclusionTags, Metadata>
920        where
921            ExclusionTags: Send + Sync + Hash + Eq + $($bounds)*,
922            Metadata: Send + Sync,
923        {
924            type Item = <Self as IntoIterator>::Item;
925            type Iter =
926                <DashMap<NodeId, $node<ExclusionTags, Metadata>> as IntoParallelIterator>::Iter;
927
928            #[inline]
929            fn into_par_iter(self) -> <Self as IntoParallelIterator>::Iter {
930                self.nodes.into_par_iter()
931            }
932        }
933
934        #[cfg(feature = "rayon")]
935        impl<ExclusionTags, Metadata> ParallelExtend<(NodeId, $node<ExclusionTags, Metadata>)>
936            for $struct_name<ExclusionTags, Metadata>
937        where
938            ExclusionTags: Send + Sync + Hash + Eq + $($bounds)*,
939            Metadata: Send + Sync,
940        {
941            #[inline]
942            fn par_extend<I>(&mut self, extendable: I)
943            where
944                I: IntoParallelIterator<Item = (NodeId, $node<ExclusionTags, Metadata>)>,
945            {
946                self.nodes.par_extend(extendable);
947            }
948        }
949    };
950}
951
952impl_node_selection!(NodeSelection, Node);
953
954impl_node_selection!(BitNodeSelection, BitNode: BitAnd);
955
956/// Generates a node implementation based on the provided node and its bounds.
957macro_rules! impl_node {
958    // this $bounds meme is a hack to get around the fact that we can't
959    // represent trait bounds in macros.
960    ($struct_name:ident, $excludes_type:ty  $(: $bounds:path )?) => {
961        impl<ExclusionTags, Metadata> $struct_name<ExclusionTags, Metadata>
962        where
963            ExclusionTags: Hash + Eq + $($bounds)*,
964        {
965            /// Constructs a new node with an opaque ID, using the provided
966            /// metadata and exclusions.
967            ///
968            /// `weight` should not exceed 2^52. `weight` is internally casted
969            /// as a [`f64`], and loss of precision may occur if `weight`
970            /// exceeds [`f64`]'s mantissa of 52 bits. This should not be a
971            /// problem for most use cases.
972            #[inline]
973            #[must_use]
974            pub fn with_exclusions(
975                weight: NonZeroUsize,
976                metadata: Metadata,
977                exclusions: $excludes_type,
978            ) -> Self {
979                Self {
980                    seed: rand::random(),
981                    weight,
982                    exclusions,
983                    metadata,
984                }
985            }
986
987            /// Constructs a new node from its components.
988            ///
989            /// `weight` should not exceed 2^52. `weight` is internally casted
990            /// as a [`f64`], and loss of precision may occur if `weight`
991            /// exceeds [`f64`]'s mantissa of 52 bits. This should not be a
992            /// problem for most use cases.
993            #[inline]
994            #[must_use]
995            pub fn from_parts<Rng: rand::Rng>(
996                rng: &mut Rng,
997                weight: NonZeroUsize,
998                exclusions: $excludes_type,
999                metadata: Metadata,
1000            ) -> Self {
1001                Self {
1002                    weight,
1003                    seed: rng.gen(),
1004                    exclusions,
1005                    metadata,
1006                }
1007            }
1008
1009            /// Calculates the score of a node for a given input. This
1010            /// implements Murmur3 hash alongside highest random weight hashing,
1011            /// also known as Rendezvous hashing.
1012            fn score(&self, item: &[u8]) -> f64 {
1013                // truncation is intended
1014                #[allow(clippy::cast_possible_truncation)]
1015                let hash =
1016                    Hash64(murmur3_x64_128(&mut Cursor::new(item), self.seed).unwrap() as u64)
1017                        .as_normalized_float();
1018                let score = 1.0 / -hash.ln();
1019                // This is documented in construction of a node
1020                #[allow(clippy::cast_precision_loss)]
1021                {
1022                    self.weight.get() as f64 * score
1023                }
1024            }
1025
1026            /// Sets the weight of the node.
1027            #[inline]
1028            pub fn set_weight(&mut self, weight: NonZeroUsize) {
1029                self.weight = weight;
1030            }
1031
1032            /// Fetches the associated data with the node.
1033            #[inline]
1034            pub const fn data(&self) -> &Metadata {
1035                &self.metadata
1036            }
1037        }
1038
1039        impl<ExclusionTags, Metadata> Hash for $struct_name<ExclusionTags, Metadata>
1040        where
1041            ExclusionTags: Hash + Eq + $($bounds)*,
1042            Metadata: Hash,
1043        {
1044            #[inline]
1045            fn hash<H: Hasher>(&self, state: &mut H) {
1046                self.weight.hash(state);
1047                self.seed.hash(state);
1048                self.metadata.hash(state);
1049            }
1050        }
1051
1052        impl<ExclusionTags, Metadata> PartialEq for $struct_name<ExclusionTags, Metadata>
1053        where
1054            ExclusionTags: Hash + Eq + $($bounds)*,
1055            Metadata: PartialEq,
1056        {
1057            #[inline]
1058            fn eq(&self, other: &Self) -> bool {
1059                self.weight == other.weight
1060                    && self.seed == other.seed
1061                    && self.metadata == other.metadata
1062            }
1063        }
1064
1065        impl<ExclusionTags, Metadata> Eq for $struct_name<ExclusionTags, Metadata>
1066        where
1067            ExclusionTags: Hash + Eq + $($bounds)*,
1068            Metadata: Eq,
1069        {
1070        }
1071
1072        impl<ExclusionTags, Metadata> PartialOrd for $struct_name<ExclusionTags, Metadata>
1073        where
1074            ExclusionTags: Hash + Eq + $($bounds)*,
1075            Metadata: PartialOrd,
1076        {
1077            #[inline]
1078            fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1079                match self.metadata.partial_cmp(&other.metadata) {
1080                    None | Some(Ordering::Equal) => match self.weight.cmp(&other.weight) {
1081                        Ordering::Equal => Some(self.seed.cmp(&other.seed)),
1082                        cmp => Some(cmp),
1083                    },
1084                    cmp => cmp,
1085                }
1086            }
1087        }
1088
1089        impl<ExclusionTags, Metadata> Ord for $struct_name<ExclusionTags, Metadata>
1090        where
1091            ExclusionTags: Hash + Eq + $($bounds)*,
1092            Metadata: Ord,
1093        {
1094            #[inline]
1095            fn cmp(&self, other: &Self) -> Ordering {
1096                match self.metadata.cmp(&other.metadata) {
1097                    Ordering::Equal => match self.weight.cmp(&other.weight) {
1098                        Ordering::Equal => self.seed.cmp(&other.seed),
1099                        cmp => cmp,
1100                    },
1101                    cmp => cmp,
1102                }
1103            }
1104        }
1105    };
1106}
1107
1108/// A representation of logical node in some system.
1109///
1110/// Each node must contain a weight score, which is used to determine how much
1111/// traffic a node receives. This score is relative to the all other weights in
1112/// the node. For example, if you only had nodes with weights of 2 and 4, then
1113/// the nodes have a 33% and 66% chance of being selected. The important part is
1114/// the relative ratio of weights, not the absolute values. If it's preferred to
1115/// have an equal distribution of traffic, then the weights should be equal.
1116///
1117/// A node is generic over two types: `ExclusionTags` and `Metadata`.
1118/// `ExclusionTags` is a type that represents a set of tags that a node declares
1119/// to not accept. `Metadata` is a type that represents any additional data
1120/// that a server may need to store about a node, such as labels or a list of
1121/// IP addresses. The most basic node is where both types are the [`unit`] type.
1122///
1123/// Additionally, all nodes must have some unique identifier associated with it.
1124/// [`NodeId`] is a type that represents this unique identifier, and is only
1125/// used for [`NodeSelection`] to identify the node.
1126///
1127/// # Examples
1128///
1129/// The simplest example of a node is a node that accepts all traffic and
1130/// carries no additional metadata.
1131///
1132/// ```
1133/// # use tagged_rendezvous::*;
1134/// # fn main() -> () {
1135/// # example();
1136/// # }
1137/// #
1138/// # fn example() -> Option<()> {
1139/// use std::num::NonZeroUsize;
1140///
1141/// // Generates a node with an opaque id and a weight of 4.
1142/// let node_small = Node::<(), _>::new(NonZeroUsize::new(4)?, ());
1143/// assert_eq!(*node_small.data(), ());
1144/// # None
1145/// # }
1146/// ```
1147///
1148/// Using some metadata, we can get some information that we can use to
1149/// communicate with the node.
1150///
1151/// ```
1152/// # use tagged_rendezvous::*;
1153/// # fn main() -> () {
1154/// # example();
1155/// # }
1156/// #
1157/// # fn example() -> Option<()> {
1158/// use std::net::Ipv4Addr;
1159/// use std::num::NonZeroUsize;
1160///
1161/// // Note that no bounds are on the metadata, but we derive some to make it
1162/// // easier to test.
1163/// #[derive(Debug, PartialEq)]
1164/// struct Metadata {
1165///    ip_address: Ipv4Addr,
1166/// }
1167///
1168/// // Generates a node with an opaque id, a weight of 4, and with a metadata
1169/// // object with an IP address.
1170/// let metadata = Metadata { ip_address: Ipv4Addr::new(10, 0, 0, 1) };
1171/// let node_small = Node::<(), _>::new(NonZeroUsize::new(4)?, metadata);
1172/// assert_eq!(node_small.data().ip_address, Ipv4Addr::new(10, 0, 0, 1));
1173/// # None
1174/// # }
1175/// ```
1176///
1177/// Finally, you can define a type to use a set of exclusions for a node.
1178///
1179/// ```
1180/// # use tagged_rendezvous::*;
1181/// # fn main() -> () {
1182/// # example();
1183/// # }
1184/// #
1185/// # fn example() -> Option<()> {
1186/// use std::iter::FromIterator;
1187/// use std::net::Ipv4Addr;
1188/// use std::num::NonZeroUsize;
1189///
1190/// use dashmap::DashSet;
1191///
1192/// #[derive(PartialEq, Eq, Hash)]
1193/// enum Exclusions {
1194///     Foo,
1195///     Bar,
1196/// }
1197///
1198/// // Generates a node with an opaque id, a weight of 4, and refusing to
1199/// // accept traffic with the `Foo` tag.
1200/// let exclusions = DashSet::from_iter([Exclusions::Foo]);
1201/// let node_small = Node::with_exclusions(NonZeroUsize::new(4)?, (), exclusions);
1202/// # None
1203/// # }
1204/// ```
1205#[derive(Clone, Debug)]
1206pub struct Node<ExclusionTags: Hash + Eq, Metadata> {
1207    /// The weight of a node. This is a relative value, and is used to determine
1208    /// how much traffic a node receives.
1209    weight: NonZeroUsize,
1210    /// This random seed value is used as a way to differentiate nodes. This is
1211    /// used to act as a seed value for the hash function, so that two nodes
1212    /// don't have the same score before weights.
1213    seed: u32,
1214    /// A list of items that a node declares to not accept.
1215    exclusions: DashSet<ExclusionTags>,
1216    /// Associated data. This is generic, so it can be used for any type,
1217    /// including `Option<T>`.
1218    metadata: Metadata,
1219}
1220
1221impl<ExclusionTags, Metadata> Node<ExclusionTags, Metadata>
1222where
1223    ExclusionTags: Hash + Eq,
1224{
1225    /// Constructs a new node with an opaque ID and no exclusions.
1226    ///
1227    /// `weight` should not exceed 2^52. `weight` is internally casted as a
1228    /// [`f64`], and loss of precision may occur if `weight` exceeds [`f64`]'s
1229    /// mantissa of 52 bits.This should not be a problem for most use cases.
1230    #[inline]
1231    #[must_use]
1232    pub fn new(weight: NonZeroUsize, metadata: Metadata) -> Self {
1233        Self {
1234            seed: rand::random(),
1235            weight,
1236            exclusions: DashSet::new(),
1237            metadata,
1238        }
1239    }
1240}
1241
1242impl<ExclusionTags, Metadata> Node<ExclusionTags, Metadata>
1243where
1244    ExclusionTags: Hash + Eq,
1245    Metadata: Default,
1246{
1247    /// Constructs a new node with an opaque ID and no exclusions, using the
1248    /// default implementation of `Metadata`.
1249    ///
1250    /// `weight` should not exceed 2^52. `weight` is internally casted as a
1251    /// [`f64`], and loss of precision may occur if `weight` exceeds [`f64`]'s
1252    /// mantissa of 52 bits.This should not be a problem for most use cases.
1253    #[inline]
1254    #[must_use]
1255    pub fn with_default(weight: NonZeroUsize) -> Self {
1256        Self::new(weight, Metadata::default())
1257    }
1258}
1259
1260impl_node!(Node, DashSet<ExclusionTags>);
1261
1262/// Like [`Node`], but optimized for bitflags.
1263///
1264/// Note that `ExclusionTags` must also implement [`Default`]. This is not a
1265/// problem for raw bitflags, but for types derived from the [`bitflags` crate],
1266/// then you will need to implement [`Default`] on the type. This default value
1267/// should represent when all flags are disabled.
1268///
1269/// [`bitflags` crate]: https://docs.rs/bitflags/
1270#[derive(Clone, Debug)]
1271pub struct BitNode<ExclusionTags: Hash + Eq + BitAnd, Metadata> {
1272    /// The weight of a node. This is a relative value, and is used to determine
1273    /// how much traffic a node receives.
1274    weight: NonZeroUsize,
1275    /// This random seed value is used as a way to differentiate nodes. This is
1276    /// used to act as a seed value for the hash function, so that two nodes
1277    /// don't have the same score before weights.
1278    seed: u32,
1279    /// A list of items that a node declares to not accept.
1280    exclusions: ExclusionTags,
1281    /// Associated data. This is generic, so it can be used for any type,
1282    /// including `Option<T>`.
1283    metadata: Metadata,
1284}
1285
1286impl<ExclusionTags, Metadata> BitNode<ExclusionTags, Metadata>
1287where
1288    ExclusionTags: Hash + Eq + BitAnd + Default,
1289{
1290    /// Constructs a new node with an opaque ID and no exclusions.
1291    ///
1292    /// `weight` should not exceed 2^52. `weight` is internally casted as a
1293    /// [`f64`], and loss of precision may occur if `weight` exceeds [`f64`]'s
1294    /// mantissa of 52 bits. This should not be a problem for most use cases.
1295    #[inline]
1296    #[must_use]
1297    pub fn new(weight: NonZeroUsize, metadata: Metadata) -> Self {
1298        Self {
1299            seed: rand::random(),
1300            weight,
1301            exclusions: Default::default(),
1302            metadata,
1303        }
1304    }
1305}
1306
1307impl<ExclusionTags, Metadata> BitNode<ExclusionTags, Metadata>
1308where
1309    ExclusionTags: Hash + Eq + BitAnd + Default,
1310    Metadata: Default,
1311{
1312    /// Constructs a new node with an opaque ID and no exclusions, using the
1313    /// default implementation of `Metadata`.
1314    ///
1315    /// `weight` should not exceed 2^52. `weight` is internally casted as a
1316    /// [`f64`], and loss of precision may occur if `weight` exceeds [`f64`]'s
1317    /// mantissa of 52 bits.This should not be a problem for most use cases.
1318    #[inline]
1319    #[must_use]
1320    pub fn with_default(weight: NonZeroUsize) -> Self {
1321        Self::new(weight, Metadata::default())
1322    }
1323}
1324
1325impl_node!(BitNode, ExclusionTags: BitAnd);
1326
1327/// A representation of a 64 bit hash value.
1328struct Hash64(u64);
1329
1330impl Hash64 {
1331    /// Returns a value from [0, 1).
1332    fn as_normalized_float(&self) -> f64 {
1333        /// This is used to mask out the highest 11 bits of a [`f64`].
1334        const FIFTY_THREE_ONES: u64 = u64::MAX >> (u64::BITS - 53);
1335        let fifty_three_zeros: f64 = f64::from_bits((1_u64) << 53);
1336        f64::from_bits(self.0 & FIFTY_THREE_ONES) / fifty_three_zeros
1337    }
1338}
1339
1340/// The counter used for opaque node ids. This is just incrementally updated,
1341/// which should solve uniqueness issues. If there is a use case to generate
1342/// more than [`usize`] IDs, then please file an issue.
1343static NODE_ID_COUNTER: AtomicUsize = AtomicUsize::new(0);
1344
1345/// An opaque representation of a node ID.
1346#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
1347pub struct NodeId(usize);
1348
1349impl NodeId {
1350    /// Generates a new ID from the given number. This id must be unique, and is
1351    /// used as the sole determining factor for equality and ordering of nodes.
1352    /// This should only be used when some sort of unique identifier is already
1353    /// present, or when there needs to be finer control over the internal id
1354    /// used. Generally, this is not the case, and one should prefer to use
1355    /// [`new_opaque`](#new_opaque) instead.
1356    #[inline]
1357    #[must_use]
1358    pub const fn new(id: usize) -> Self {
1359        Self(id)
1360    }
1361
1362    /// Generates a new node with an opaque ID. This should be used and
1363    /// preferred over [`new`](#new) when the internal node ID is not important.
1364    #[inline]
1365    #[must_use]
1366    pub fn new_opaque() -> Self {
1367        Self::new(NODE_ID_COUNTER.fetch_add(1, AtomicOrdering::Relaxed))
1368    }
1369}
1370
1371impl Display for NodeId {
1372    #[inline]
1373    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1374        write!(f, "{}", self.0)
1375    }
1376}
1377
1378#[cfg(test)]
1379mod hash64 {
1380    use crate::Hash64;
1381
1382    #[test]
1383    fn normalized_float() {
1384        // These numbers are tested against the python implementation
1385        assert_eq!(
1386            Hash64(0x1234567890abcdef).as_normalized_float(),
1387            0.6355555368049276
1388        );
1389
1390        assert_eq!(
1391            Hash64(0xffffffffffffff).as_normalized_float(),
1392            0.9999999999999999
1393        );
1394    }
1395}
1396
1397/// wait for [`f64::total_cmp`] to be stabilized. Until then, use the actual
1398/// implementation. See the [GitHub issue] for more information.
1399///
1400/// [Github issue]:  https://github.com/rust-lang/rust/issues/72599
1401#[allow(clippy::cast_sign_loss, clippy::cast_possible_wrap)]
1402fn f64_total_ordering(left: f64, right: f64) -> Ordering {
1403    let mut left = left.to_bits() as i64;
1404    let mut right = right.to_bits() as i64;
1405    left ^= (((left >> 63) as u64) >> 1) as i64;
1406    right ^= (((right >> 63) as u64) >> 1) as i64;
1407    left.cmp(&right)
1408}
1409
1410#[cfg(test)]
1411mod node_selection {
1412    use super::*;
1413
1414    #[should_panic]
1415    #[test]
1416    fn duplicate_id_will_panic() {
1417        let selector = NodeSelection::<(), ()>::new();
1418        let id = unsafe { NonZeroUsize::new_unchecked(1) };
1419        selector.add_with_id(NodeId::new(0), Node::with_default(id));
1420        selector.add_with_id(NodeId::new(0), Node::with_default(id));
1421    }
1422}
1423
1424#[cfg(test)]
1425mod node_selection_no_exclusions {
1426    use std::collections::BTreeMap;
1427
1428    use super::*;
1429
1430    #[test]
1431    fn sanity_check_weighted() {
1432        let node_selection = NodeSelection::<(), ()>::new();
1433
1434        let mut nodes: BTreeMap<_, usize> = unsafe {
1435            let mut map = BTreeMap::new();
1436            map.insert(Node::with_default(NonZeroUsize::new_unchecked(100)), 0);
1437            map.insert(Node::with_default(NonZeroUsize::new_unchecked(200)), 0);
1438            map.insert(Node::with_default(NonZeroUsize::new_unchecked(300)), 0);
1439            map
1440        };
1441
1442        for node in nodes.keys() {
1443            let _ = node_selection.add(node.clone());
1444        }
1445
1446        for _ in 0..600 {
1447            let node_selected = node_selection.get(&(rand::random::<f64>()).to_le_bytes());
1448            if let Some(node) = node_selected {
1449                let node = nodes.get_mut(node.value()).unwrap();
1450                *node += 1;
1451            }
1452        }
1453
1454        for (node, counts) in nodes {
1455            let anchor = node.weight.get() as usize;
1456            let range = (anchor - 50)..(anchor + 50);
1457            assert!(range.contains(&counts));
1458        }
1459    }
1460
1461    #[test]
1462    fn sanity_check_unweighted() {
1463        let node_selection = NodeSelection::<(), ()>::new();
1464
1465        let mut nodes: BTreeMap<_, usize> = {
1466            let weight = unsafe { NonZeroUsize::new_unchecked(1) };
1467            let mut map = BTreeMap::new();
1468            map.insert(Node::with_default(weight), 0);
1469            map.insert(Node::with_default(weight), 0);
1470            map.insert(Node::with_default(weight), 0);
1471            map
1472        };
1473
1474        for node in nodes.keys() {
1475            let _ = node_selection.add(node.clone());
1476        }
1477
1478        for _ in 0..600 {
1479            let node_selected = node_selection.get(&(rand::random::<f64>()).to_le_bytes());
1480            if let Some(node) = node_selected {
1481                let node = nodes.get_mut(node.value()).unwrap();
1482                *node += 1;
1483            }
1484        }
1485
1486        let range = (200 - 50)..(200 + 50);
1487        for counts in nodes.values() {
1488            assert!(range.contains(counts));
1489        }
1490    }
1491
1492    #[test]
1493    fn sanity_check_unweighted_n() {
1494        let node_selection = NodeSelection::<(), ()>::new();
1495
1496        let mut nodes: BTreeMap<_, usize> = {
1497            let weight = unsafe { NonZeroUsize::new_unchecked(1) };
1498            let mut map = BTreeMap::new();
1499            map.insert(Node::with_default(weight), 0);
1500            map.insert(Node::with_default(weight), 0);
1501            map.insert(Node::with_default(weight), 0);
1502            map
1503        };
1504
1505        for node in nodes.keys() {
1506            let _ = node_selection.add(node.clone());
1507        }
1508
1509        for _ in 0..600 {
1510            let nodes_selected = node_selection.get_n(&(rand::random::<f64>()).to_le_bytes(), 2);
1511            for node in nodes_selected {
1512                let node = nodes.get_mut(node.value()).unwrap();
1513                *node += 1;
1514            }
1515        }
1516
1517        let range = (400 - 30)..(400 + 30);
1518        for counts in nodes.values() {
1519            assert!(range.contains(counts));
1520        }
1521    }
1522}
1523
1524#[cfg(test)]
1525mod node_selection_exclusions {
1526    use std::collections::BTreeMap;
1527    use std::iter::FromIterator;
1528
1529    use super::*;
1530
1531    #[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug)]
1532    enum Exclusions {
1533        A,
1534    }
1535
1536    #[test]
1537    fn sanity_check_weighted() {
1538        let node_selection = NodeSelection::<Exclusions, ()>::new();
1539
1540        let exclusions = DashSet::from_iter([Exclusions::A]);
1541
1542        let mut nodes: BTreeMap<_, usize> = unsafe {
1543            let mut map = BTreeMap::new();
1544            map.insert(Node::with_default(NonZeroUsize::new_unchecked(100)), 0);
1545            map.insert(
1546                Node::with_exclusions(NonZeroUsize::new_unchecked(200), (), exclusions.clone()),
1547                0,
1548            );
1549            map.insert(Node::with_default(NonZeroUsize::new_unchecked(300)), 0);
1550            map
1551        };
1552
1553        for node in nodes.keys() {
1554            let _ = node_selection.add(node.clone());
1555        }
1556
1557        for _ in 0..600 {
1558            let node_selected = node_selection
1559                .get_with_exclusions(&(rand::random::<f64>()).to_le_bytes(), &exclusions);
1560            if let Some(node) = node_selected {
1561                let node = nodes.get_mut(node.value()).unwrap();
1562                *node += 1;
1563            }
1564        }
1565
1566        for (node, counts) in nodes {
1567            let anchor = node.weight.get() as usize * 3 / 2;
1568            if node.exclusions.is_empty() {
1569                let range = (anchor - 50)..(anchor + 50);
1570                assert!(range.contains(&counts));
1571            } else {
1572                assert_eq!(counts, 0);
1573            }
1574        }
1575    }
1576
1577    #[test]
1578    fn sanity_check_unweighted() {
1579        let node_selection = NodeSelection::<Exclusions, ()>::new();
1580
1581        let exclusions = DashSet::from_iter([Exclusions::A]);
1582
1583        let mut nodes: BTreeMap<_, usize> = {
1584            let mut map = BTreeMap::new();
1585            let weight = unsafe { NonZeroUsize::new_unchecked(1) };
1586            map.insert(Node::with_default(weight), 0);
1587            map.insert(Node::with_exclusions(weight, (), exclusions.clone()), 0);
1588            map.insert(Node::with_default(weight), 0);
1589            map
1590        };
1591
1592        for node in nodes.keys() {
1593            let _ = node_selection.add(node.clone());
1594        }
1595
1596        for _ in 0..600 {
1597            let node_selected = node_selection
1598                .get_with_exclusions(&(rand::random::<f64>()).to_le_bytes(), &exclusions);
1599            if let Some(node) = node_selected {
1600                let node = nodes.get_mut(node.value()).unwrap();
1601                *node += 1;
1602            }
1603        }
1604
1605        let range = (300 - 50)..(300 + 50);
1606        for (node, counts) in nodes {
1607            if node.exclusions.is_empty() {
1608                assert!(range.contains(&counts));
1609            } else {
1610                assert_eq!(counts, 0);
1611            }
1612        }
1613    }
1614
1615    #[test]
1616    fn sanity_check_unweighted_n() {
1617        let node_selection = NodeSelection::<Exclusions, ()>::new();
1618
1619        let exclusions = DashSet::from_iter([Exclusions::A]);
1620
1621        let mut nodes: BTreeMap<_, usize> = {
1622            let mut map = BTreeMap::new();
1623            let weight = unsafe { NonZeroUsize::new_unchecked(1) };
1624            map.insert(Node::with_default(weight), 0);
1625            map.insert(Node::with_exclusions(weight, (), exclusions.clone()), 0);
1626            map.insert(Node::with_default(weight), 0);
1627            map
1628        };
1629
1630        for node in nodes.keys() {
1631            let _ = node_selection.add(node.clone());
1632        }
1633
1634        for _ in 0..600 {
1635            let node_selected = node_selection.get_n_with_exclusions(
1636                &(rand::random::<f64>()).to_le_bytes(),
1637                2,
1638                &exclusions,
1639            );
1640            for node in node_selected {
1641                let node = nodes.get_mut(node.value()).unwrap();
1642                *node += 1;
1643            }
1644        }
1645
1646        let range = (600 - 50)..(600 + 50);
1647        for (node, counts) in nodes {
1648            if node.exclusions.is_empty() {
1649                assert!(range.contains(&counts));
1650            } else {
1651                assert_eq!(counts, 0);
1652            }
1653        }
1654    }
1655}
1656
1657#[cfg(test)]
1658mod bit_node_selection {
1659
1660    use std::collections::BTreeMap;
1661
1662    use super::*;
1663
1664    #[test]
1665    fn exclusions() {
1666        let node_selection = BitNodeSelection::<u8, ()>::new();
1667        let exclusions = 0b01;
1668
1669        let mut nodes: BTreeMap<_, usize> = unsafe {
1670            let mut map = BTreeMap::new();
1671            map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(100)), 0);
1672            map.insert(
1673                BitNode::with_exclusions(NonZeroUsize::new_unchecked(200), (), exclusions),
1674                0,
1675            );
1676            map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(300)), 0);
1677            map
1678        };
1679
1680        for node in nodes.keys() {
1681            let _ = node_selection.add(node.clone());
1682        }
1683
1684        for _ in 0..600 {
1685            let node_selected = node_selection
1686                .get_with_exclusions(&(rand::random::<f64>()).to_le_bytes(), exclusions);
1687            if let Some(node) = node_selected {
1688                let node = nodes.get_mut(node.value()).unwrap();
1689                *node += 1;
1690            }
1691        }
1692
1693        for (node, counts) in nodes {
1694            let anchor = node.weight.get() as usize * 3 / 2;
1695            if node.exclusions == 0 {
1696                let range = (anchor - 50)..(anchor + 50);
1697                assert!(range.contains(&counts));
1698            } else {
1699                assert_eq!(counts, 0);
1700            }
1701        }
1702    }
1703
1704    #[test]
1705    fn no_exclusions() {
1706        let node_selection = BitNodeSelection::<u8, ()>::new();
1707
1708        let mut nodes: BTreeMap<_, usize> = unsafe {
1709            let mut map = BTreeMap::new();
1710            map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(100)), 0);
1711            map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(200)), 0);
1712            map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(300)), 0);
1713            map
1714        };
1715
1716        for node in nodes.keys() {
1717            let _ = node_selection.add(node.clone());
1718        }
1719
1720        for _ in 0..600 {
1721            let node_selected = node_selection.get(&(rand::random::<f64>()).to_le_bytes());
1722            if let Some(node) = node_selected {
1723                let node = nodes.get_mut(node.value()).unwrap();
1724                *node += 1;
1725            }
1726        }
1727
1728        for (node, counts) in nodes {
1729            let anchor = node.weight.get() as usize;
1730            let range = (anchor - 50)..(anchor + 50);
1731            assert!(range.contains(&counts));
1732        }
1733    }
1734
1735    #[test]
1736    fn get_n() {
1737        let node_selection = BitNodeSelection::<u8, ()>::new();
1738
1739        let mut nodes: BTreeMap<_, usize> = unsafe {
1740            let mut map = BTreeMap::new();
1741            map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(200)), 0);
1742            map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(200)), 0);
1743            map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(200)), 0);
1744            map
1745        };
1746
1747        for node in nodes.keys() {
1748            let _ = node_selection.add(node.clone());
1749        }
1750
1751        for _ in 0..600 {
1752            let node_selected = node_selection.get_n(&(rand::random::<f64>()).to_le_bytes(), 2);
1753            for node in node_selected {
1754                let node = nodes.get_mut(node.value()).unwrap();
1755                *node += 1;
1756            }
1757        }
1758
1759        for (node, counts) in nodes {
1760            let anchor = node.weight.get() as usize * 2;
1761            let range = (anchor - 50)..(anchor + 50);
1762            assert!(range.contains(&counts));
1763        }
1764    }
1765}
1766
1767#[cfg(all(test, feature = "rayon"))]
1768mod par_tests {
1769    use std::collections::BTreeMap;
1770
1771    use super::*;
1772
1773    #[test]
1774    fn bit_node_selection_get() {
1775        let node_selection = BitNodeSelection::<u8, ()>::new();
1776        let exclusions = 0b01;
1777
1778        let mut nodes: BTreeMap<_, usize> = unsafe {
1779            let mut map = BTreeMap::new();
1780            map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(100)), 0);
1781            map.insert(
1782                BitNode::with_exclusions(NonZeroUsize::new_unchecked(200), (), exclusions),
1783                0,
1784            );
1785            map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(300)), 0);
1786            map
1787        };
1788
1789        for node in nodes.keys() {
1790            let _ = node_selection.add(node.clone());
1791        }
1792
1793        for _ in 0..600 {
1794            let node_selected =
1795                node_selection.par_get(&(rand::random::<f64>()).to_le_bytes(), exclusions);
1796            if let Some(node) = node_selected {
1797                let node = nodes.get_mut(node.value()).unwrap();
1798                *node += 1;
1799            }
1800        }
1801
1802        for (node, counts) in nodes {
1803            let anchor = node.weight.get() as usize * 3 / 2;
1804            if node.exclusions == 0 {
1805                let range = (anchor - 50)..(anchor + 50);
1806                assert!(range.contains(&counts));
1807            } else {
1808                assert_eq!(counts, 0);
1809            }
1810        }
1811    }
1812
1813    #[test]
1814    fn bit_node_selection_get_n() {
1815        let node_selection = BitNodeSelection::<u8, ()>::new();
1816        let exclusions = 0b01;
1817
1818        let mut nodes: BTreeMap<_, usize> = unsafe {
1819            let mut map = BTreeMap::new();
1820            map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(100)), 0);
1821            map.insert(
1822                BitNode::with_exclusions(NonZeroUsize::new_unchecked(200), (), exclusions),
1823                0,
1824            );
1825            map.insert(BitNode::with_default(NonZeroUsize::new_unchecked(300)), 0);
1826            map
1827        };
1828
1829        for node in nodes.keys() {
1830            let _ = node_selection.add(node.clone());
1831        }
1832
1833        for _ in 0..600 {
1834            let node_selected =
1835                node_selection.par_get_n(&(rand::random::<f64>()).to_le_bytes(), 2, exclusions);
1836            for node in node_selected {
1837                let node = nodes.get_mut(node.value()).unwrap();
1838                *node += 1;
1839            }
1840        }
1841
1842        for (node, counts) in nodes {
1843            let anchor = 600 as usize;
1844            if node.exclusions == 0 {
1845                let range = (anchor - 50)..(anchor + 50);
1846                assert!(range.contains(&counts));
1847            } else {
1848                assert_eq!(counts, 0);
1849            }
1850        }
1851    }
1852
1853    #[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug)]
1854    enum Exclusions {
1855        A,
1856    }
1857
1858    #[test]
1859    fn node_selection_get() {
1860        let node_selection = NodeSelection::<Exclusions, ()>::new();
1861
1862        let exclusions = DashSet::from_iter([Exclusions::A]);
1863
1864        let mut nodes: BTreeMap<_, usize> = {
1865            let mut map = BTreeMap::new();
1866            let weight = unsafe { NonZeroUsize::new_unchecked(1) };
1867            map.insert(Node::with_default(weight), 0);
1868            map.insert(Node::with_exclusions(weight, (), exclusions.clone()), 0);
1869            map.insert(Node::with_default(weight), 0);
1870            map
1871        };
1872
1873        for node in nodes.keys() {
1874            let _ = node_selection.add(node.clone());
1875        }
1876
1877        for _ in 0..600 {
1878            let node_selected =
1879                node_selection.par_get(&(rand::random::<f64>()).to_le_bytes(), Some(&exclusions));
1880            if let Some(node) = node_selected {
1881                let node = nodes.get_mut(node.value()).unwrap();
1882                *node += 1;
1883            }
1884        }
1885
1886        let range = (300 - 50)..(300 + 50);
1887        for (node, counts) in nodes {
1888            if node.exclusions.is_empty() {
1889                assert!(range.contains(&counts));
1890            } else {
1891                assert_eq!(counts, 0);
1892            }
1893        }
1894    }
1895
1896    #[test]
1897    fn node_selection_get_n() {
1898        let node_selection = NodeSelection::<Exclusions, ()>::new();
1899
1900        let exclusions = DashSet::from_iter([Exclusions::A]);
1901
1902        let mut nodes: BTreeMap<_, usize> = {
1903            let mut map = BTreeMap::new();
1904            let weight = unsafe { NonZeroUsize::new_unchecked(1) };
1905            map.insert(Node::with_default(weight), 0);
1906            map.insert(Node::with_exclusions(weight, (), exclusions.clone()), 0);
1907            map.insert(Node::with_default(weight), 0);
1908            map
1909        };
1910
1911        for node in nodes.keys() {
1912            let _ = node_selection.add(node.clone());
1913        }
1914
1915        for _ in 0..600 {
1916            let node_selected = node_selection.par_get_n(
1917                &(rand::random::<f64>()).to_le_bytes(),
1918                2,
1919                Some(&exclusions),
1920            );
1921            for node in node_selected {
1922                let node = nodes.get_mut(node.value()).unwrap();
1923                *node += 1;
1924            }
1925        }
1926
1927        let range = (600 - 50)..(600 + 50);
1928        for (node, counts) in nodes {
1929            if node.exclusions.is_empty() {
1930                assert!(range.contains(&counts));
1931            } else {
1932                assert_eq!(counts, 0);
1933            }
1934        }
1935    }
1936}