Skip to main content

swh_graph_stdlib/labeling/
labels.rs

1// Copyright (C) 2026  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Associate information to a subset of nodes
7
8use std::borrow::{Borrow, BorrowMut};
9use std::ops::Range;
10
11use bytemuck::{AnyBitPattern, TransparentWrapper};
12use rapidhash::RapidHashMap;
13use rayon::prelude::*;
14use sux::bits::BitVec;
15use sux::traits::{BitVecOps, BitVecOpsMut};
16
17use crate::NodeId;
18
19pub trait Labels {
20    type Label: ToOwned + ?Sized;
21    type Config;
22
23    fn new(num_nodes: usize, config: Self::Config) -> Self;
24    fn insert(
25        &mut self,
26        node: NodeId,
27        label: <Self::Label as ToOwned>::Owned,
28    ) -> Option<<Self::Label as ToOwned>::Owned>;
29    fn get(&self, node: NodeId) -> Option<&Self::Label>;
30    fn contains_key(&self, node: NodeId) -> bool;
31    fn remove(&mut self, node: NodeId) -> Option<<Self::Label as ToOwned>::Owned>;
32    fn is_empty(&self) -> bool;
33}
34
35pub struct SparseLabels<Label: ToOwned + Sized> {
36    labels: RapidHashMap<NodeId, <Label as ToOwned>::Owned>,
37}
38
39impl<Label: ToOwned> Labels for SparseLabels<Label> {
40    type Label = Label;
41    type Config = ();
42
43    fn new(_num_nodes: usize, _config: Self::Config) -> Self {
44        Self {
45            labels: RapidHashMap::default(),
46        }
47    }
48    fn insert(
49        &mut self,
50        node: NodeId,
51        label: <Self::Label as ToOwned>::Owned,
52    ) -> Option<<Self::Label as ToOwned>::Owned> {
53        self.labels.insert(node, label)
54    }
55    fn get(&self, node: NodeId) -> Option<&Self::Label> {
56        self.labels.get(&node).map(Borrow::borrow)
57    }
58    fn contains_key(&self, node: NodeId) -> bool {
59        self.labels.contains_key(&node)
60    }
61    fn remove(&mut self, node: NodeId) -> Option<<Self::Label as ToOwned>::Owned> {
62        self.labels.remove(&node)
63    }
64    fn is_empty(&self) -> bool {
65        self.labels.is_empty()
66    }
67}
68
69impl<Label: ToOwned> SparseLabels<Label> {
70    /// Returns an iterator with each labeled node
71    pub fn iter_labeled(&self) -> impl Iterator<Item = (NodeId, &Label)> {
72        self.labels
73            .iter()
74            .map(|(&node, label)| (node, label.borrow()))
75    }
76
77    /// Returns a parallel iterator with each labeled node
78    pub fn into_iter_labeled(self) -> impl Iterator<Item = (NodeId, <Label as ToOwned>::Owned)> {
79        self.labels.into_iter()
80    }
81}
82
83pub struct DenseLabels<Label: Sized> {
84    labels: Box<[Label]>,
85    /// which labels are actually set, and which are the default value
86    is_set: BitVec,
87    /// number of labels actually set
88    len: usize,
89}
90
91impl<Label: Default + Clone> Labels for DenseLabels<Label> {
92    type Label = Label;
93    type Config = ();
94
95    fn new(num_nodes: usize, _config: Self::Config) -> Self {
96        Self {
97            labels: vec![Label::default(); num_nodes].into(),
98            is_set: BitVec::new(num_nodes),
99            len: 0,
100        }
101    }
102    fn insert(
103        &mut self,
104        node: NodeId,
105        mut label: <Self::Label as ToOwned>::Owned,
106    ) -> Option<Self::Label> {
107        let was_set = self.contains_key(node);
108        std::mem::swap(&mut self.labels[node], &mut label);
109        self.is_set.set(node, true);
110        if was_set {
111            Some(label)
112        } else {
113            self.len += 1;
114            None
115        }
116    }
117    fn get(&self, node: NodeId) -> Option<&Self::Label> {
118        if self.contains_key(node) {
119            Some(&self.labels[node])
120        } else {
121            None
122        }
123    }
124    fn contains_key(&self, node: NodeId) -> bool {
125        self.is_set.get(node)
126    }
127    fn remove(&mut self, node: NodeId) -> Option<Self::Label> {
128        if self.contains_key(node) {
129            self.len -= 1;
130            self.is_set.set(node, false);
131            let mut label = Label::default();
132            std::mem::swap(self.labels.get_mut(node)?, &mut label);
133            Some(label)
134        } else {
135            None
136        }
137    }
138    fn is_empty(&self) -> bool {
139        self.len == 0
140    }
141}
142
143impl<Label: Default + Clone> DenseLabels<Label> {
144    /// Returns an iterator with the label of each node, if any
145    pub fn iter(&self) -> impl Iterator<Item = Option<&Label>> {
146        self.is_set
147            .iter()
148            .zip(self.labels.iter())
149            .map(|(is_set, value)| if is_set { Some(value) } else { None })
150    }
151
152    /// Returns an iterator with each labeled node
153    pub fn iter_labeled(&self) -> impl Iterator<Item = (NodeId, &Label)> {
154        self.is_set
155            .iter()
156            .zip(self.labels.iter())
157            .enumerate()
158            .filter_map(|(node, (is_set, value))| if is_set { Some((node, value)) } else { None })
159    }
160
161    /// Returns a parallel iterator with the label of each node, if any
162    pub fn par_iter(&self) -> impl ParallelIterator<Item = Option<&Label>>
163    where
164        Label: Sync,
165    {
166        self.labels
167            .par_iter()
168            .enumerate()
169            .map(move |(node, value)| {
170                if self.is_set.get(node) {
171                    Some(value)
172                } else {
173                    None
174                }
175            })
176    }
177
178    /// Returns a parallel iterator with each labeled node
179    pub fn par_iter_labeled(&self) -> impl ParallelIterator<Item = (NodeId, &Label)>
180    where
181        Label: Sync,
182    {
183        self.labels
184            .par_iter()
185            .enumerate()
186            .filter_map(move |(node, value)| {
187                if self.is_set.get(node) {
188                    Some((node, value))
189                } else {
190                    None
191                }
192            })
193    }
194
195    /// Returns a parallel iterator with the label of each node, if any
196    pub fn into_par_iter(self) -> impl ParallelIterator<Item = Option<Label>>
197    where
198        Label: Send,
199    {
200        let Self { labels, is_set, .. } = self;
201        Vec::from(labels)
202            .into_par_iter()
203            .enumerate()
204            .map(
205                move |(node, value)| {
206                    if is_set.get(node) { Some(value) } else { None }
207                },
208            )
209    }
210
211    /// Returns a parallel iterator with each labeled node
212    pub fn into_par_iter_labeled(&self) -> impl ParallelIterator<Item = (NodeId, &Label)>
213    where
214        Label: Sync,
215    {
216        let Self { labels, is_set, .. } = self;
217        labels
218            .into_par_iter()
219            .enumerate()
220            .filter_map(move |(node, value)| {
221                if is_set.get(node) {
222                    Some((node, value))
223                } else {
224                    None
225                }
226            })
227    }
228}
229
230/// A [`Label`](super::MapReducer::Label) that is a view over a slice of an array
231pub trait StridableLabel: ToOwned<Owned: BorrowMut<Self>> {
232    type Word: Default + Copy;
233
234    fn from_stride(stride: &[Self::Word]) -> &Self;
235    fn swap_with_stride(&mut self, stride: &mut [Self::Word]);
236}
237
238/// Implementation of [`StridableLabel`] that simply wraps a slice.
239#[derive(Debug, PartialEq, Eq, TransparentWrapper, derive_more::AsRef, derive_more::AsMut)]
240#[repr(transparent)]
241pub struct SliceLabel<Word: AnyBitPattern>(pub [Word]);
242
243impl<Word: AnyBitPattern> ToOwned for SliceLabel<Word> {
244    type Owned = BoxLabel<Word>;
245
246    fn to_owned(&self) -> Self::Owned {
247        BoxLabel(self.0.to_owned().into())
248    }
249}
250
251/// Helper for [`SliceLabel`]
252#[derive(Debug, PartialEq, Eq, Default, TransparentWrapper)]
253#[repr(transparent)]
254pub struct BoxLabel<Word: AnyBitPattern>(pub Box<[Word]>);
255
256impl<Word: AnyBitPattern> Borrow<SliceLabel<Word>> for BoxLabel<Word> {
257    fn borrow(&self) -> &SliceLabel<Word> {
258        SliceLabel::wrap_ref(Self::peel_ref(self))
259    }
260}
261
262impl<Word: AnyBitPattern> BorrowMut<SliceLabel<Word>> for BoxLabel<Word> {
263    fn borrow_mut(&mut self) -> &mut SliceLabel<Word> {
264        SliceLabel::wrap_mut(Self::peel_mut(self))
265    }
266}
267
268impl<Word: AnyBitPattern + Default> StridableLabel for SliceLabel<Word> {
269    type Word = Word;
270
271    fn from_stride(stride: &[Self::Word]) -> &Self {
272        Self::wrap_ref(stride)
273    }
274    fn swap_with_stride(&mut self, stride: &mut [Self::Word]) {
275        stride.swap_with_slice(TransparentWrapper::peel_mut(self))
276    }
277}
278
279pub struct StriddenLabelsConfig {
280    pub num_words: usize,
281}
282
283pub struct StriddenLabels<Label: StridableLabel + ?Sized> {
284    labels: Box<[Label::Word]>,
285    /// How many `Label::Word`s are in a label
286    num_words: usize,
287    /// which labels are actually set, and which are the default value
288    is_set: BitVec,
289    /// number of labels actually set
290    len: usize,
291}
292
293impl<Label: StridableLabel + ?Sized> Labels for StriddenLabels<Label> {
294    type Label = Label;
295    type Config = StriddenLabelsConfig;
296
297    fn new(num_nodes: usize, StriddenLabelsConfig { num_words }: Self::Config) -> Self {
298        Self {
299            labels: vec![Label::Word::default(); num_nodes * num_words].into(),
300            num_words,
301            is_set: BitVec::new(num_nodes),
302            len: 0,
303        }
304    }
305    fn insert(
306        &mut self,
307        node: NodeId,
308        mut label: <Self::Label as ToOwned>::Owned,
309    ) -> Option<<Self::Label as ToOwned>::Owned> {
310        let was_set = self.contains_key(node);
311        let range = self.stride_range(node);
312        label.borrow_mut().swap_with_stride(&mut self.labels[range]);
313        self.is_set.set(node, true);
314        if was_set {
315            Some(label)
316        } else {
317            self.len += 1;
318            None
319        }
320    }
321    fn get(&self, node: NodeId) -> Option<&Self::Label> {
322        if self.contains_key(node) {
323            Some(Self::Label::from_stride(
324                &self.labels[self.stride_range(node)],
325            ))
326        } else {
327            None
328        }
329    }
330    fn contains_key(&self, node: NodeId) -> bool {
331        self.is_set.get(node)
332    }
333    fn remove(&mut self, node: NodeId) -> Option<<Self::Label as ToOwned>::Owned> {
334        if self.contains_key(node) {
335            self.len -= 1;
336            self.is_set.set(node, false);
337            let range = self.stride_range(node);
338            let label = Label::from_stride(&self.labels[range.clone()]).to_owned();
339            self.labels[range].fill(Label::Word::default());
340            Some(label)
341        } else {
342            None
343        }
344    }
345    fn is_empty(&self) -> bool {
346        self.len == 0
347    }
348}
349
350impl<Label: StridableLabel + ?Sized> StriddenLabels<Label> {
351    fn stride_range(&self, node: NodeId) -> Range<usize> {
352        (node * self.num_words)..(node + 1) * self.num_words
353    }
354}
355
356impl<Label: StridableLabel + ?Sized> StriddenLabels<Label> {
357    /// Returns an iterator with the label of each node, if any
358    pub fn iter(&self) -> impl Iterator<Item = Option<&Label>> {
359        self.is_set
360            .iter()
361            .zip(self.labels.chunks(self.num_words))
362            .map(|(is_set, value)| {
363                if is_set {
364                    Some(Label::from_stride(value))
365                } else {
366                    None
367                }
368            })
369    }
370
371    /// Returns an iterator with each labeled node
372    pub fn iter_labeled(&self) -> impl Iterator<Item = (NodeId, &Label)> {
373        self.is_set
374            .iter()
375            .zip(self.labels.chunks(self.num_words))
376            .enumerate()
377            .filter_map(|(node, (is_set, value))| {
378                if is_set {
379                    Some((node, Label::from_stride(value)))
380                } else {
381                    None
382                }
383            })
384    }
385
386    /// Returns a parallel iterator with the label of each node, if any
387    pub fn par_iter(&self) -> impl ParallelIterator<Item = Option<&Label>>
388    where
389        Label: StridableLabel<Word: Sync> + Sync,
390    {
391        self.labels
392            .par_chunks(self.num_words)
393            .enumerate()
394            .map(move |(node, value)| {
395                if self.is_set.get(node) {
396                    Some(Label::from_stride(value))
397                } else {
398                    None
399                }
400            })
401    }
402
403    /// Returns a parallel iterator with each labeled node
404    pub fn par_iter_labeled(&self) -> impl ParallelIterator<Item = (NodeId, &Label)>
405    where
406        Label: StridableLabel<Word: Sync> + Sync,
407    {
408        self.labels
409            .par_chunks(self.num_words)
410            .enumerate()
411            .filter_map(move |(node, value)| {
412                if self.is_set.get(node) {
413                    Some((node, Label::from_stride(value)))
414                } else {
415                    None
416                }
417            })
418    }
419}