Skip to main content

swh_graph_stdlib/collections/
small_node_set.rs

1// Copyright (C) 2025-2026  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use std::collections::HashSet;
7
8use crate::collections::{NodeSet, ReadNodeSet};
9use rapidhash::RapidBuildHasher;
10
11type NodeId = usize;
12
13const EMPTY: usize = usize::MAX;
14
15#[repr(C)]
16union _SmallNodeSet {
17    /// Always has its least significant bit set to 1.
18    ///
19    /// If equal to [`EMPTY`], then there is no node in the set.
20    /// Otherwise, there is a single node, obtained by shifting one bit right
21    node: usize,
22    /// Always has its least significant bit set to 0.
23    nodes: *mut HashSet<usize, RapidBuildHasher>,
24}
25
26// SAFETY: HashSet is send, so sending a pointer to it is safe
27unsafe impl Send for _SmallNodeSet {}
28
29impl Drop for _SmallNodeSet {
30    fn drop(&mut self) {
31        match unsafe { self.node } {
32            EMPTY => {
33                // the set is empty, nothing to do
34            }
35            value if value & 0b1 == 1 => {
36                // the set has one item, nothing to do
37            }
38            _ => {
39                // the set is a hashset
40                drop(unsafe { Box::from_raw(self.nodes) })
41            }
42        }
43    }
44}
45
46impl Clone for _SmallNodeSet {
47    fn clone(&self) -> Self {
48        match unsafe { self.node } {
49            EMPTY => _SmallNodeSet { node: EMPTY },
50            value if value & 0b1 == 1 => _SmallNodeSet { node: value },
51            _ => {
52                let hashset = unsafe { (*self.nodes).clone() };
53                let hashset_ptr = Box::into_raw(Box::new(hashset));
54                assert!(
55                    (hashset_ptr as usize) & 0b1 == 0,
56                    "hashset_ptr is not aligned: {hashset_ptr:?}"
57                );
58                _SmallNodeSet { nodes: hashset_ptr }
59            }
60        }
61    }
62}
63
64/// A [`NodeSet`] implementation optimized to store zero or one node
65///
66/// When storing zero or a single node, this structure takes only the size of one node.
67/// When storing two nodes or more, it is equivalent to `Box<HashSet<NodeId, RapidBuildHasher>>`.
68#[repr(transparent)]
69#[derive(Clone)]
70pub struct SmallNodeSet(_SmallNodeSet);
71
72// SAFETY: we don't have interior mutability
73unsafe impl Sync for SmallNodeSet {}
74
75impl Default for SmallNodeSet {
76    fn default() -> Self {
77        SmallNodeSet(_SmallNodeSet { node: EMPTY })
78    }
79}
80
81impl SmallNodeSet {
82    pub fn iter(&self) -> Iter<'_> {
83        match unsafe { self.0.node } {
84            EMPTY => Iter {
85                next: None,
86                iter: None,
87            },
88            value if value & 0b1 == 1 => Iter {
89                // single value
90                next: Some(value >> 1),
91                iter: None,
92            },
93            _ => {
94                let mut iter = unsafe { self.get_hashset() }.iter();
95                Iter {
96                    next: iter.next().copied(),
97                    iter: Some(iter),
98                }
99            }
100        }
101    }
102
103    unsafe fn get_hashset(&self) -> &HashSet<NodeId, RapidBuildHasher> {
104        unsafe { &*self.0.nodes }
105    }
106
107    unsafe fn get_hashset_mut(&mut self) -> &mut HashSet<NodeId, RapidBuildHasher> {
108        unsafe { &mut *self.0.nodes }
109    }
110
111    pub fn len(&self) -> usize {
112        match unsafe { self.0.node } {
113            EMPTY => 0,
114            value if value & 0b1 == 1 => 1,
115            _ => unsafe { self.get_hashset() }.len(),
116        }
117    }
118
119    pub fn is_empty(&self) -> bool {
120        match unsafe { self.0.node } {
121            EMPTY => true,
122            _ => false, // we don't support deletion or with_capacity() yet
123        }
124    }
125}
126
127impl std::fmt::Debug for SmallNodeSet {
128    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
129        f.debug_tuple("SmallNodeSet")
130            .field(&self.iter().collect::<HashSet<_>>())
131            .finish()
132    }
133}
134
135impl PartialEq for SmallNodeSet {
136    fn eq(&self, other: &Self) -> bool {
137        match unsafe { (self.0.node, other.0.node) } {
138            (EMPTY, EMPTY) => true,
139            (EMPTY, _) | (_, EMPTY) => false, // we don't support deletion or with_capacity() yet
140
141            (value1, value2) if value1 & 0b1 == 1 && value2 & 0b1 == 1 => value1 == value2,
142            (value1, _) if value1 & 0b1 == 1 => false, // ditto
143            (_, value2) if value2 & 0b1 == 1 => false, // ditto
144            (_, _) => unsafe { self.get_hashset() == other.get_hashset() },
145        }
146    }
147}
148
149impl Eq for SmallNodeSet {}
150
151impl<'a> IntoIterator for &'a SmallNodeSet {
152    type IntoIter = Iter<'a>;
153    type Item = NodeId;
154
155    fn into_iter(self) -> Self::IntoIter {
156        self.iter()
157    }
158}
159
160pub struct Iter<'a> {
161    next: Option<NodeId>,
162    iter: Option<std::collections::hash_set::Iter<'a, NodeId>>,
163}
164
165impl Iterator for Iter<'_> {
166    type Item = NodeId;
167
168    fn next(&mut self) -> Option<Self::Item> {
169        let next = self.next?;
170        self.next = self.iter.as_mut().and_then(|iter| iter.next()).copied();
171        Some(next)
172    }
173}
174
175impl NodeSet for SmallNodeSet {
176    fn insert(&mut self, node: NodeId) {
177        match unsafe { self.0.node } {
178            EMPTY => {
179                if node & (0b1 << (usize::BITS - 1)) == 0 {
180                    // the set is empty AND the node does fits in 63 bits: set the node in the
181                    // 63 most significant bits
182                    self.0.node = (node << 1) | 0b1;
183                } else {
184                    // promote directly to hashset because the node does not fit in 63 bits
185                    // (and we need the least-significant bit to indicate that this is a single
186                    // node)
187                    let hashset: HashSet<_, _> = [node].into_iter().collect();
188                    let hashset_ptr = Box::into_raw(Box::new(hashset));
189                    assert!(
190                        (hashset_ptr as usize) & 0b1 == 0,
191                        "hashset_ptr is not aligned: {hashset_ptr:?}"
192                    );
193                    self.0.nodes = hashset_ptr
194                }
195            }
196            value if value & 0b1 == 1 => {
197                // the set has one item: we need to promote it to a hashset
198                let old_node = value >> 1;
199                let hashset: HashSet<_, _> = [old_node, node].into_iter().collect();
200                let hashset_ptr = Box::into_raw(Box::new(hashset));
201                assert!(
202                    (hashset_ptr as usize) & 0b1 == 0,
203                    "hashset_ptr is not aligned: {hashset_ptr:?}"
204                );
205                self.0.nodes = hashset_ptr
206            }
207            _ => {
208                // the set is already a hashset
209                unsafe { self.get_hashset_mut() }.insert(node);
210            }
211        }
212    }
213}
214impl ReadNodeSet for SmallNodeSet {
215    fn contains(&self, node: NodeId) -> bool {
216        match unsafe { self.0.node } {
217            EMPTY => {
218                // the set is empty
219                false
220            }
221            value if value & 0b1 == 1 => {
222                // the set has one item
223                let existing_node = value >> 1;
224                node == existing_node
225            }
226            _ => {
227                // the set is a hashset
228                unsafe { self.get_hashset() }.contains(&node)
229            }
230        }
231    }
232}
233
234#[macro_export]
235macro_rules! smallnodeset {
236    [$($node:expr),* $(,)?] => {{
237        let mut nodes = SmallNodeSet::default();
238        $(
239            nodes.insert($node);
240        )*
241        nodes
242    }};
243
244}
245
246pub use smallnodeset;