swh_graph_stdlib/collections/
small_node_set.rs

1// Copyright (C) 2025  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use std::collections::HashSet;
7
8use crate::collections::{NodeSet, ReadNodeSet};
9use rapidhash::RapidBuildHasher;
10
11type NodeId = usize;
12
13const EMPTY: usize = usize::MAX;
14
15#[repr(C)]
16union _SmallNodeSet {
17    /// Always has its least significant bit set to 1.
18    ///
19    /// If equal to [`EMPTY`], then there is no node in the set.
20    /// Otherwise, there is a single node, obtained by shifting one bit right
21    node: usize,
22    /// Always has its least significant bit set to 0.
23    nodes: *mut HashSet<usize, RapidBuildHasher>,
24}
25
26// SAFETY: HashSet is send, so sending a pointer to it is safe
27unsafe impl Send for _SmallNodeSet {}
28
29impl Drop for _SmallNodeSet {
30    fn drop(&mut self) {
31        match unsafe { self.node } {
32            EMPTY => {
33                // the set is empty, nothing to do
34            }
35            value if value & 0b1 == 1 => {
36                // the set has one item, nothing to do
37            }
38            _ => {
39                // the set is a hashset
40                drop(unsafe { Box::from_raw(self.nodes) })
41            }
42        }
43    }
44}
45
46impl Clone for _SmallNodeSet {
47    fn clone(&self) -> Self {
48        match unsafe { self.node } {
49            EMPTY => _SmallNodeSet { node: EMPTY },
50            value if value & 0b1 == 1 => _SmallNodeSet { node: value },
51            _ => {
52                let hashset = unsafe { (*self.nodes).clone() };
53                let hashset_ptr = Box::into_raw(Box::new(hashset));
54                assert!(
55                    (hashset_ptr as usize) & 0b1 == 0,
56                    "hashset_ptr is not aligned: {hashset_ptr:?}"
57                );
58                _SmallNodeSet { nodes: hashset_ptr }
59            }
60        }
61    }
62}
63
64/// A [`NodeSet`] implementation optimized to store zero or one node
65///
66/// When storing zero or a single node, this structure takes only the size of one node.
67/// When storing two nodes or more, it is equivalent to `Box<HashSet<NodeId, RapidBuildHasher>>`.
68#[repr(transparent)]
69#[derive(Clone)]
70pub struct SmallNodeSet(_SmallNodeSet);
71
72impl Default for SmallNodeSet {
73    fn default() -> Self {
74        SmallNodeSet(_SmallNodeSet { node: EMPTY })
75    }
76}
77
78impl SmallNodeSet {
79    pub fn iter(&self) -> Iter<'_> {
80        match unsafe { self.0.node } {
81            EMPTY => Iter {
82                next: None,
83                iter: None,
84            },
85            value if value & 0b1 == 1 => Iter {
86                // single value
87                next: Some(value >> 1),
88                iter: None,
89            },
90            _ => {
91                let mut iter = unsafe { self.get_hashset() }.iter();
92                Iter {
93                    next: iter.next().copied(),
94                    iter: Some(iter),
95                }
96            }
97        }
98    }
99
100    unsafe fn get_hashset(&self) -> &HashSet<NodeId, RapidBuildHasher> {
101        unsafe { &*self.0.nodes }
102    }
103
104    unsafe fn get_hashset_mut(&mut self) -> &mut HashSet<NodeId, RapidBuildHasher> {
105        unsafe { &mut *self.0.nodes }
106    }
107
108    pub fn len(&self) -> usize {
109        match unsafe { self.0.node } {
110            EMPTY => 0,
111            value if value & 0b1 == 1 => 1,
112            _ => unsafe { self.get_hashset() }.len(),
113        }
114    }
115
116    pub fn is_empty(&self) -> bool {
117        match unsafe { self.0.node } {
118            EMPTY => true,
119            _ => false, // we don't support deletion or with_capacity() yet
120        }
121    }
122}
123
124impl std::fmt::Debug for SmallNodeSet {
125    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
126        f.debug_tuple("SmallNodeSet")
127            .field(&self.iter().collect::<HashSet<_>>())
128            .finish()
129    }
130}
131
132impl PartialEq for SmallNodeSet {
133    fn eq(&self, other: &Self) -> bool {
134        match unsafe { (self.0.node, other.0.node) } {
135            (EMPTY, EMPTY) => true,
136            (EMPTY, _) | (_, EMPTY) => false, // we don't support deletion or with_capacity() yet
137
138            (value1, value2) if value1 & 0b1 == 1 && value2 & 0b1 == 1 => value1 == value2,
139            (value1, _) if value1 & 0b1 == 1 => false, // ditto
140            (_, value2) if value2 & 0b1 == 1 => false, // ditto
141            (_, _) => unsafe { self.get_hashset() == other.get_hashset() },
142        }
143    }
144}
145
146impl Eq for SmallNodeSet {}
147
148impl<'a> IntoIterator for &'a SmallNodeSet {
149    type IntoIter = Iter<'a>;
150    type Item = NodeId;
151
152    fn into_iter(self) -> Self::IntoIter {
153        self.iter()
154    }
155}
156
157pub struct Iter<'a> {
158    next: Option<NodeId>,
159    iter: Option<std::collections::hash_set::Iter<'a, NodeId>>,
160}
161
162impl Iterator for Iter<'_> {
163    type Item = NodeId;
164
165    fn next(&mut self) -> Option<Self::Item> {
166        let next = self.next?;
167        self.next = self.iter.as_mut().and_then(|iter| iter.next()).copied();
168        Some(next)
169    }
170}
171
172impl NodeSet for SmallNodeSet {
173    fn insert(&mut self, node: NodeId) {
174        match unsafe { self.0.node } {
175            EMPTY => {
176                if node & (0b1 << (usize::BITS - 1)) == 0 {
177                    // the set is empty AND the node does fits in 63 bits: set the node in the
178                    // 63 most significant bits
179                    self.0.node = (node << 1) | 0b1;
180                } else {
181                    // promote directly to hashset because the node does not fit in 63 bits
182                    // (and we need the least-significant bit to indicate that this is a single
183                    // node)
184                    let hashset: HashSet<_, _> = [node].into_iter().collect();
185                    let hashset_ptr = Box::into_raw(Box::new(hashset));
186                    assert!(
187                        (hashset_ptr as usize) & 0b1 == 0,
188                        "hashset_ptr is not aligned: {hashset_ptr:?}"
189                    );
190                    self.0.nodes = hashset_ptr
191                }
192            }
193            value if value & 0b1 == 1 => {
194                // the set has one item: we need to promote it to a hashset
195                let old_node = value >> 1;
196                let hashset: HashSet<_, _> = [old_node, node].into_iter().collect();
197                let hashset_ptr = Box::into_raw(Box::new(hashset));
198                assert!(
199                    (hashset_ptr as usize) & 0b1 == 0,
200                    "hashset_ptr is not aligned: {hashset_ptr:?}"
201                );
202                self.0.nodes = hashset_ptr
203            }
204            _ => {
205                // the set is already a hashset
206                unsafe { self.get_hashset_mut() }.insert(node);
207            }
208        }
209    }
210}
211impl ReadNodeSet for SmallNodeSet {
212    fn contains(&self, node: NodeId) -> bool {
213        match unsafe { self.0.node } {
214            EMPTY => {
215                // the set is empty
216                false
217            }
218            value if value & 0b1 == 1 => {
219                // the set has one item
220                let existing_node = value >> 1;
221                node == existing_node
222            }
223            _ => {
224                // the set is a hashset
225                unsafe { self.get_hashset() }.contains(&node)
226            }
227        }
228    }
229}
230
231#[macro_export]
232macro_rules! smallnodeset {
233    [$($node:expr),* $(,)?] => {{
234        let mut nodes = SmallNodeSet::default();
235        $(
236            nodes.insert($node);
237        )*
238        nodes
239    }};
240
241}
242
243pub use smallnodeset;