swh_graph/map/
node2type.rs

1// Copyright (C) 2023-2024  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use crate::{NodeType, OutOfBoundError};
7use anyhow::{Context, Result};
8use log::info;
9use mmap_rs::{Mmap, MmapFlags, MmapMut};
10use std::path::Path;
11use sux::bits::BitFieldVec;
12use value_traits::slices::{SliceByValue, SliceByValueMut};
13
14/// Struct to create and load a `.node2type.bin` file and convert node ids to types.
15pub struct Node2Type<B> {
16    data: BitFieldVec<usize, B>,
17}
18
19impl<B: AsRef<[usize]>> Node2Type<B> {
20    #[inline]
21    /// Get the type of a node with id `node_id` without bounds checking
22    ///
23    /// # Safety
24    /// This function is unsafe because it does not check that `node_id` is
25    /// within bounds of the array if debug asserts are disabled
26    pub unsafe fn get_unchecked(&self, node_id: usize) -> NodeType {
27        NodeType::try_from(self.data.get_value_unchecked(node_id) as u8).unwrap()
28    }
29
30    #[inline]
31    /// Get the type of a node with id `node_id`
32    pub fn get(&self, node_id: usize) -> Result<NodeType, OutOfBoundError> {
33        NodeType::try_from(self.data.index_value(node_id) as u8).map_err(|_| OutOfBoundError {
34            index: node_id,
35            len: self.data.len(),
36        })
37    }
38}
39
40impl<B: AsRef<[usize]> + AsMut<[usize]>> Node2Type<B> {
41    #[inline]
42    /// Get the type of a node with id `node_id` without bounds checking
43    ///
44    /// # Safety
45    /// This function is unsafe because it does not check that `node_id` is
46    /// within bounds of the array if debug asserts are disabled
47    pub unsafe fn set_unchecked(&mut self, node_id: usize, node_type: NodeType) {
48        self.data.set_value_unchecked(node_id, node_type as usize);
49    }
50
51    #[inline]
52    /// Set the type of a node with id `node_id`
53    pub fn set(&mut self, node_id: usize, node_type: NodeType) {
54        self.data.set_value(node_id, node_type as usize);
55    }
56}
57
58/// Newtype for [`Mmap`]/[`MmapMut`] which can be dereferenced as slices of usize
59///
60/// instead of slices of u8, so it can be used as backend for [`BitFieldVec`].
61pub struct UsizeMmap<B>(B);
62
63impl<B: AsRef<[u8]>> AsRef<[usize]> for UsizeMmap<B> {
64    fn as_ref(&self) -> &[usize] {
65        bytemuck::cast_slice(self.0.as_ref())
66    }
67}
68
69impl<B: AsRef<[u8]> + AsMut<[u8]>> AsMut<[usize]> for UsizeMmap<B> {
70    fn as_mut(&mut self) -> &mut [usize] {
71        bytemuck::cast_slice_mut(self.0.as_mut())
72    }
73}
74
75impl Node2Type<UsizeMmap<MmapMut>> {
76    /// Create a new `.node2type.bin` file
77    pub fn new<P: AsRef<Path>>(path: P, num_nodes: usize) -> Result<Self> {
78        let path = path.as_ref();
79        // compute the size of the file we are creating in bytes;
80        // and make it a multiple of 8 bytes so BitFieldVec can
81        // read u64 words from it
82        let file_len = ((num_nodes * NodeType::BITWIDTH) as u64).div_ceil(64) * 8;
83        info!("The resulting file will be {} bytes long.", file_len);
84
85        // create the file
86        let node2type_file = std::fs::File::options()
87            .read(true)
88            .write(true)
89            .create_new(true)
90            .open(path)
91            .with_context(|| {
92                format!(
93                    "While creating the .node2type.bin file: {}",
94                    path.to_string_lossy()
95                )
96            })?;
97
98        // fallocate the file with zeros so we can fill it without ever resizing it
99        node2type_file
100            .set_len(file_len)
101            .with_context(|| "While fallocating the file with zeros")?;
102
103        // create a mutable mmap to the file so we can directly write it in place
104        let mmap = unsafe {
105            mmap_rs::MmapOptions::new(file_len as _)
106                .context("Could not initialize mmap")?
107                .with_flags(MmapFlags::SHARED)
108                .with_file(&node2type_file, 0)
109                .map_mut()
110                .with_context(|| "While mmapping the file")?
111        };
112        // use the BitFieldVec over the mmap
113        let mmap = UsizeMmap(mmap);
114        let node2type = unsafe { BitFieldVec::from_raw_parts(mmap, NodeType::BITWIDTH, num_nodes) };
115
116        Ok(Self { data: node2type })
117    }
118
119    /// Load a mutable `.node2type.bin` file
120    pub fn load_mut<P: AsRef<Path>>(path: P, num_nodes: usize) -> Result<Self> {
121        let path = path.as_ref();
122        let file_len = path
123            .metadata()
124            .with_context(|| format!("Could not stat {}", path.display()))?
125            .len();
126        let file = std::fs::File::open(path)
127            .with_context(|| format!("Could not open {}", path.display()))?;
128        let data = unsafe {
129            mmap_rs::MmapOptions::new(file_len as _)
130                .context("Could not initialize mmap")?
131                .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES | MmapFlags::RANDOM_ACCESS)
132                .with_file(&file, 0)
133                .map_mut()?
134        };
135
136        // use the BitFieldVec over the mmap
137        let data = UsizeMmap(data);
138        let node2type = unsafe { BitFieldVec::from_raw_parts(data, NodeType::BITWIDTH, num_nodes) };
139        Ok(Self { data: node2type })
140    }
141}
142
143impl Node2Type<UsizeMmap<Mmap>> {
144    /// Load a read-only `.node2type.bin` file
145    pub fn load<P: AsRef<Path>>(path: P, num_nodes: usize) -> Result<Self> {
146        let path = path.as_ref();
147        let file_len = path
148            .metadata()
149            .with_context(|| format!("Could not stat {}", path.display()))?
150            .len();
151        let expected_file_len = ((num_nodes * NodeType::BITWIDTH).div_ceil(64) * 8) as u64;
152        assert_eq!(
153            file_len,
154            expected_file_len,
155            "Expected {} to have size {} (because graph has {} nodes), but it has size {}",
156            path.display(),
157            expected_file_len,
158            num_nodes,
159            file_len,
160        );
161
162        let file = std::fs::File::open(path)
163            .with_context(|| format!("Could not open {}", path.display()))?;
164        let data = unsafe {
165            mmap_rs::MmapOptions::new(file_len as _)?
166                .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES | MmapFlags::RANDOM_ACCESS)
167                .with_file(&file, 0)
168                .map()?
169        };
170
171        // use the BitFieldVec over the mmap
172        let data = UsizeMmap(data);
173        let node2type = unsafe { BitFieldVec::from_raw_parts(data, NodeType::BITWIDTH, num_nodes) };
174        Ok(Self { data: node2type })
175    }
176}
177
178impl Node2Type<Vec<usize>> {
179    pub fn new_from_iter(types: impl ExactSizeIterator<Item = NodeType>) -> Self {
180        let num_nodes = types.len();
181        let file_len = ((num_nodes * NodeType::BITWIDTH) as u64).div_ceil(64) * 8;
182        let file_len = usize::try_from(file_len).expect("num_nodes overflowed usize");
183        let data = vec![0usize; file_len.div_ceil((usize::BITS / 8).try_into().unwrap())];
184        let data = unsafe { BitFieldVec::from_raw_parts(data, NodeType::BITWIDTH, num_nodes) };
185        let mut node2type = Node2Type { data };
186        for (i, type_) in types.enumerate() {
187            node2type.set(i, type_);
188        }
189        node2type
190    }
191}