swh_graph_stdlib/collections/
node_set.rs1use std::collections::HashSet;
13use std::hash::BuildHasher;
14use std::marker::PhantomData;
15
16use rapidhash::RapidBuildHasher;
17use sux::bits::bit_vec::BitVec;
18use sux::traits::{BitVecOps, BitVecOpsMut};
19
20const PROMOTION_THRESHOLD: usize = 64;
32
33pub type NodeId = usize;
34
35pub trait ReadNodeSet {
37 fn contains(&self, node: NodeId) -> bool;
38}
39
40pub trait NodeSet: ReadNodeSet {
42 fn insert(&mut self, node: NodeId);
43}
44
45pub struct EmptyNodeSet;
46
47impl ReadNodeSet for EmptyNodeSet {
48 #[inline(always)]
49 fn contains(&self, _node: usize) -> bool {
50 false
51 }
52}
53impl IntoIterator for EmptyNodeSet {
54 type Item = NodeId;
55 type IntoIter = std::iter::Empty<NodeId>;
56
57 #[inline(always)]
58 fn into_iter(self) -> Self::IntoIter {
59 std::iter::empty()
60 }
61}
62
63impl<S: BuildHasher> NodeSet for HashSet<usize, S> {
64 #[inline(always)]
65 fn insert(&mut self, node: usize) {
66 HashSet::insert(self, node);
67 }
68}
69impl<S: BuildHasher> ReadNodeSet for HashSet<usize, S> {
70 #[inline(always)]
71 fn contains(&self, node: usize) -> bool {
72 HashSet::contains(self, &node)
73 }
74}
75
76impl NodeSet for BitVec {
77 #[inline(always)]
78 fn insert(&mut self, node: usize) {
79 self.set(node, true);
80 }
81}
82impl ReadNodeSet for BitVec {
83 #[inline(always)]
84 fn contains(&self, node: usize) -> bool {
85 self.get(node)
86 }
87}
88
89#[derive(Debug, PartialEq, Eq)]
90pub struct SortedSlice<Item: Ord, S: AsRef<[Item]> + ?Sized> {
96 pub marker: PhantomData<Item>,
97 pub slice: S,
98}
99
100impl<Item: Ord, S: AsRef<[Item]>> SortedSlice<Item, S> {
101 pub fn new(slice: S) -> Self {
102 SortedSlice {
103 marker: PhantomData,
104 slice,
105 }
106 }
107}
108
109impl<S: AsRef<[NodeId]> + ?Sized> ReadNodeSet for SortedSlice<NodeId, S> {
110 #[inline(always)]
112 fn contains(&self, node: NodeId) -> bool {
113 self.slice.as_ref().binary_search(&node).is_ok()
114 }
115}
116
117impl<Item: Ord, S: AsRef<[Item]>> std::ops::Deref for SortedSlice<Item, S> {
118 type Target = S;
119
120 fn deref(&self) -> &Self::Target {
121 &self.slice
122 }
123}
124
125impl<'a, Item: Ord + Copy> IntoIterator for SortedSlice<Item, &'a [Item]> {
126 type Item = Item;
127 type IntoIter = std::iter::Copied<std::slice::Iter<'a, Item>>;
128
129 fn into_iter(self) -> Self::IntoIter {
130 self.slice.iter().copied()
131 }
132}
133
134impl<'a, Item: Ord, S: AsRef<[Item]>> From<&'a SortedSlice<Item, S>>
135 for SortedSlice<Item, &'a [Item]>
136{
137 fn from(v: &'a SortedSlice<Item, S>) -> Self {
138 SortedSlice {
139 marker: PhantomData,
140 slice: v.slice.as_ref(),
141 }
142 }
143}
144
145#[derive(Debug)]
166pub enum AdaptiveNodeSet {
167 Sparse {
168 max_items: usize,
169 data: HashSet<NodeId, RapidBuildHasher>,
170 },
171 Dense {
172 data: BitVec,
173 },
174}
175
176impl AdaptiveNodeSet {
177 #[inline(always)]
179 pub fn new(max_items: usize) -> Self {
180 AdaptiveNodeSet::Sparse {
181 max_items,
182 data: HashSet::with_hasher(RapidBuildHasher::default()),
183 }
184 }
185
186 #[inline(always)]
188 pub fn with_capacity(max_items: usize, capacity: usize) -> Self {
189 if capacity > max_items / PROMOTION_THRESHOLD {
190 AdaptiveNodeSet::Dense {
191 data: BitVec::new(max_items),
192 }
193 } else {
194 AdaptiveNodeSet::Sparse {
195 max_items,
196 data: HashSet::with_capacity_and_hasher(capacity, RapidBuildHasher::default()),
197 }
198 }
199 }
200}
201
202impl NodeSet for AdaptiveNodeSet {
203 #[inline(always)]
209 fn insert(&mut self, node: usize) {
210 match self {
211 AdaptiveNodeSet::Sparse { max_items, data } => {
212 data.insert(node);
213 if data.len() > *max_items / PROMOTION_THRESHOLD {
214 let mut new_data = BitVec::new(*max_items);
216 for node in data.iter() {
217 new_data.insert(*node);
218 }
219 *self = AdaptiveNodeSet::Dense { data: new_data };
220 }
221 }
222 AdaptiveNodeSet::Dense { data } => data.insert(node),
223 }
224 }
225}
226
227impl ReadNodeSet for AdaptiveNodeSet {
228 #[inline(always)]
234 fn contains(&self, node: usize) -> bool {
235 match self {
236 AdaptiveNodeSet::Sparse { max_items: _, data } => data.contains(&node),
237 AdaptiveNodeSet::Dense { data } => data.contains(node),
238 }
239 }
240}