swh_graph/map/
node2type.rs

1// Copyright (C) 2023-2024  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use crate::{NodeType, OutOfBoundError};
7use anyhow::{Context, Result};
8use log::info;
9use mmap_rs::{Mmap, MmapFlags, MmapMut};
10use std::path::Path;
11use sux::prelude::{BitFieldSlice, BitFieldSliceCore, BitFieldSliceMut, BitFieldVec};
12
13/// Struct to create and load a `.node2type.bin` file and convert node ids to types.
14pub struct Node2Type<B> {
15    data: BitFieldVec<usize, B>,
16}
17
18impl<B: AsRef<[usize]>> Node2Type<B> {
19    #[inline]
20    /// Get the type of a node with id `node_id` without bounds checking
21    ///
22    /// # Safety
23    /// This function is unsafe because it does not check that `node_id` is
24    /// within bounds of the array if debug asserts are disabled
25    pub unsafe fn get_unchecked(&self, node_id: usize) -> NodeType {
26        NodeType::try_from(self.data.get_unchecked(node_id) as u8).unwrap()
27    }
28
29    #[inline]
30    /// Get the type of a node with id `node_id`
31    pub fn get(&self, node_id: usize) -> Result<NodeType, OutOfBoundError> {
32        NodeType::try_from(self.data.get(node_id) as u8).map_err(|_| OutOfBoundError {
33            index: node_id,
34            len: self.data.len(),
35        })
36    }
37}
38
39impl<B: AsRef<[usize]> + AsMut<[usize]>> Node2Type<B> {
40    #[inline]
41    /// Get the type of a node with id `node_id` without bounds checking
42    ///
43    /// # Safety
44    /// This function is unsafe because it does not check that `node_id` is
45    /// within bounds of the array if debug asserts are disabled
46    pub unsafe fn set_unchecked(&mut self, node_id: usize, node_type: NodeType) {
47        self.data.set_unchecked(node_id, node_type as usize);
48    }
49
50    #[inline]
51    /// Set the type of a node with id `node_id`
52    pub fn set(&mut self, node_id: usize, node_type: NodeType) {
53        self.data.set(node_id, node_type as usize);
54    }
55}
56
57/// Newtype for [`Mmap`]/[`MmapMut`] which can be dereferenced as slices of usize
58///
59/// instead of slices of u8, so it can be used as backend for [`BitFieldVec`].
60pub struct UsizeMmap<B>(B);
61
62impl<B: AsRef<[u8]>> AsRef<[usize]> for UsizeMmap<B> {
63    fn as_ref(&self) -> &[usize] {
64        bytemuck::cast_slice(self.0.as_ref())
65    }
66}
67
68impl<B: AsRef<[u8]> + AsMut<[u8]>> AsMut<[usize]> for UsizeMmap<B> {
69    fn as_mut(&mut self) -> &mut [usize] {
70        bytemuck::cast_slice_mut(self.0.as_mut())
71    }
72}
73
74impl Node2Type<UsizeMmap<MmapMut>> {
75    /// Create a new `.node2type.bin` file
76    pub fn new<P: AsRef<Path>>(path: P, num_nodes: usize) -> Result<Self> {
77        let path = path.as_ref();
78        // compute the size of the file we are creating in bytes;
79        // and make it a multiple of 8 bytes so BitFieldVec can
80        // read u64 words from it
81        let file_len = ((num_nodes * NodeType::BITWIDTH) as u64).div_ceil(64) * 8;
82        info!("The resulting file will be {} bytes long.", file_len);
83
84        // create the file
85        let node2type_file = std::fs::File::options()
86            .read(true)
87            .write(true)
88            .create_new(true)
89            .open(path)
90            .with_context(|| {
91                format!(
92                    "While creating the .node2type.bin file: {}",
93                    path.to_string_lossy()
94                )
95            })?;
96
97        // fallocate the file with zeros so we can fill it without ever resizing it
98        node2type_file
99            .set_len(file_len)
100            .with_context(|| "While fallocating the file with zeros")?;
101
102        // create a mutable mmap to the file so we can directly write it in place
103        let mmap = unsafe {
104            mmap_rs::MmapOptions::new(file_len as _)
105                .context("Could not initialize mmap")?
106                .with_flags(MmapFlags::SHARED)
107                .with_file(&node2type_file, 0)
108                .map_mut()
109                .with_context(|| "While mmapping the file")?
110        };
111        // use the BitFieldVec over the mmap
112        let mmap = UsizeMmap(mmap);
113        let node2type = unsafe { BitFieldVec::from_raw_parts(mmap, NodeType::BITWIDTH, num_nodes) };
114
115        Ok(Self { data: node2type })
116    }
117
118    /// Load a mutable `.node2type.bin` file
119    pub fn load_mut<P: AsRef<Path>>(path: P, num_nodes: usize) -> Result<Self> {
120        let path = path.as_ref();
121        let file_len = path
122            .metadata()
123            .with_context(|| format!("Could not stat {}", path.display()))?
124            .len();
125        let file = std::fs::File::open(path)
126            .with_context(|| format!("Could not open {}", path.display()))?;
127        let data = unsafe {
128            mmap_rs::MmapOptions::new(file_len as _)
129                .context("Could not initialize mmap")?
130                .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES | MmapFlags::RANDOM_ACCESS)
131                .with_file(&file, 0)
132                .map_mut()?
133        };
134
135        // use the BitFieldVec over the mmap
136        let data = UsizeMmap(data);
137        let node2type = unsafe { BitFieldVec::from_raw_parts(data, NodeType::BITWIDTH, num_nodes) };
138        Ok(Self { data: node2type })
139    }
140}
141
142impl Node2Type<UsizeMmap<Mmap>> {
143    /// Load a read-only `.node2type.bin` file
144    pub fn load<P: AsRef<Path>>(path: P, num_nodes: usize) -> Result<Self> {
145        let path = path.as_ref();
146        let file_len = path
147            .metadata()
148            .with_context(|| format!("Could not stat {}", path.display()))?
149            .len();
150        let expected_file_len = ((num_nodes * NodeType::BITWIDTH).div_ceil(64) * 8) as u64;
151        assert_eq!(
152            file_len,
153            expected_file_len,
154            "Expected {} to have size {} (because graph has {} nodes), but it has size {}",
155            path.display(),
156            expected_file_len,
157            num_nodes,
158            file_len,
159        );
160
161        let file = std::fs::File::open(path)
162            .with_context(|| format!("Could not open {}", path.display()))?;
163        let data = unsafe {
164            mmap_rs::MmapOptions::new(file_len as _)?
165                .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES | MmapFlags::RANDOM_ACCESS)
166                .with_file(&file, 0)
167                .map()?
168        };
169
170        // use the BitFieldVec over the mmap
171        let data = UsizeMmap(data);
172        let node2type = unsafe { BitFieldVec::from_raw_parts(data, NodeType::BITWIDTH, num_nodes) };
173        Ok(Self { data: node2type })
174    }
175}
176
177impl Node2Type<UsizeMmap<Vec<u8>>> {
178    pub fn new_from_iter(types: impl ExactSizeIterator<Item = NodeType>) -> Self {
179        let num_nodes = types.len();
180        let file_len = ((num_nodes * NodeType::BITWIDTH) as u64).div_ceil(64) * 8;
181        let file_len = file_len.try_into().expect("num_nodes overflowed usize");
182        let data = UsizeMmap(vec![0; file_len]);
183        let data = unsafe { BitFieldVec::from_raw_parts(data, NodeType::BITWIDTH, num_nodes) };
184        let mut node2type = Node2Type { data };
185        for (i, type_) in types.enumerate() {
186            node2type.set(i, type_);
187        }
188        node2type
189    }
190}