swh_graph/map/
permutation.rs1use std::io::{Read, Seek, Write};
7use std::mem::MaybeUninit;
8use std::path::Path;
9use std::sync::atomic::{AtomicBool, Ordering};
10
11use anyhow::{bail, Context, Result};
12use byteorder::{BigEndian, ByteOrder, ReadBytesExt};
13use dsi_progress_logger::{progress_logger, ProgressLog};
14use mmap_rs::{Mmap, MmapFlags};
15use rayon::prelude::*;
16
17#[allow(clippy::len_without_is_empty)]
19pub trait Permutation {
20 fn len(&self) -> usize;
22 fn get(&self, old_node: usize) -> Option<usize>;
24 unsafe fn get_unchecked(&self, old_node: usize) -> usize;
30}
31
32pub struct OwnedPermutation<T: Sync + AsRef<[usize]>>(T);
34
35impl<T: Sync + AsRef<[usize]>> OwnedPermutation<T> {
36 #[inline(always)]
38 pub fn new_unchecked(perm: T) -> Self {
39 OwnedPermutation(perm)
40 }
41
42 #[inline]
45 pub fn new(perm: T) -> Result<Self> {
46 if let Some((old, new)) = perm
48 .as_ref()
49 .par_iter()
50 .enumerate()
51 .find_any(|&(_old, &new)| new >= perm.as_ref().len())
52 {
53 bail!(
54 "Found node {} has id {} in permutation, graph size is {}",
55 old,
56 new,
57 perm.as_ref().len()
58 );
59 }
60
61 let mut seen = Vec::with_capacity(perm.as_ref().len() as _);
63 seen.extend((0..perm.as_ref().len()).map(|_| AtomicBool::new(false)));
64 if let Some((old, _)) = perm
65 .as_ref()
66 .par_iter()
67 .map(|&node| seen[node].fetch_or(true, Ordering::SeqCst))
68 .enumerate()
69 .find_any(|&(_node, is_duplicated)| is_duplicated)
70 {
71 let new = perm.as_ref()[old];
72 bail!(
73 "At least two nodes are mapped to {}; one of them is {}",
74 new,
75 old,
76 );
77 }
78
79 Ok(OwnedPermutation(perm))
82 }
83
84 pub fn dump<W: Write>(&self, file: &mut W) -> std::io::Result<()> {
85 let mut pl = progress_logger!(
86 display_memory = true,
87 item_name = "byte",
88 local_speed = true,
89 expected_updates = Some(self.len() * 8),
90 );
91 pl.start("Writing permutation");
92
93 let chunk_size = 1_000_000; let mut buf = vec![0u8; chunk_size * 8];
95 for chunk in self.0.as_ref().chunks(chunk_size) {
96 let buf_slice = &mut buf[..chunk.len() * 8]; assert_eq!(
98 usize::BITS,
99 u64::BITS,
100 "Only 64-bits architectures are supported"
101 );
102 let chunk: &[u64] =
103 bytemuck::try_cast_slice(chunk).expect("Could not cast &[usize] to &[u64]");
104 BigEndian::write_u64_into(chunk, buf_slice);
105 file.write_all(buf_slice)?;
106 pl.update_with_count(chunk.len() * 8);
107 }
108 pl.done();
109 Ok(())
110 }
111}
112
113impl<T: Sync + AsRef<[usize]> + AsMut<[usize]>> OwnedPermutation<T> {
114 pub fn compose_in_place<T2: Permutation + Sync>(&mut self, other: T2) -> Result<()> {
116 if self.as_mut().len() != other.len() {
117 bail!(
118 "Cannot compose permutation of size {} with permutation of size {}",
119 self.as_ref().len(),
120 other.len()
121 );
122 }
123 self.as_mut()
124 .par_iter_mut()
125 .for_each(|x| *x = unsafe { other.get_unchecked(*x) });
126
127 Ok(())
128 }
129}
130
131impl OwnedPermutation<Vec<usize>> {
132 #[inline]
134 pub fn load_unchecked(num_nodes: usize, path: &Path) -> Result<Self> {
135 assert_eq!(
136 usize::BITS,
137 u64::BITS,
138 "Only 64-bits architectures are supported"
139 );
140
141 let mut file = std::fs::File::open(path).context("Could not open permutation")?;
142
143 let mut buf = [0u8; 8];
144 file.read_exact(&mut buf)?;
145 let epserde = &buf == b"epserde ";
146 if epserde {
147 use epserde::prelude::*;
148
149 let perm = <Vec<usize>>::load_full(path)?;
150 Ok(OwnedPermutation(perm))
151 } else {
152 let mut perm = Vec::with_capacity(num_nodes);
153 file.rewind()?;
154 file.read_u64_into::<BigEndian>(unsafe {
155 std::mem::transmute::<&mut [MaybeUninit<usize>], &mut [u64]>(
156 perm.spare_capacity_mut(),
157 )
158 })
159 .context("Could not read permutation")?;
160
161 unsafe { perm.set_len(num_nodes) };
163
164 Ok(OwnedPermutation(perm))
165 }
166 }
167
168 #[inline]
171 pub fn load(num_nodes: usize, path: &Path) -> Result<Self> {
172 let perm = Self::load_unchecked(num_nodes, path)?;
173 Self::new(perm.0)
174 }
175}
176
177impl<T: Sync + AsRef<[usize]>> AsRef<[usize]> for OwnedPermutation<T> {
178 #[inline(always)]
179 fn as_ref(&self) -> &[usize] {
180 self.0.as_ref()
181 }
182}
183
184impl<T: Sync + AsRef<[usize]> + AsMut<[usize]>> AsMut<[usize]> for OwnedPermutation<T> {
185 #[inline(always)]
186 fn as_mut(&mut self) -> &mut [usize] {
187 self.0.as_mut()
188 }
189}
190
191impl<T: Sync + AsRef<[usize]>> std::ops::Index<usize> for OwnedPermutation<T> {
192 type Output = usize;
193
194 #[inline(always)]
195 fn index(&self, old_node: usize) -> &Self::Output {
196 &self.0.as_ref()[old_node]
197 }
198}
199
200impl<T: Sync + AsRef<[usize]>> Permutation for OwnedPermutation<T> {
201 fn len(&self) -> usize {
202 self.as_ref().len()
203 }
204
205 fn get(&self, old_node: usize) -> Option<usize> {
206 self.0.as_ref().get(old_node).copied()
207 }
208
209 unsafe fn get_unchecked(&self, old_node: usize) -> usize {
210 *self.0.as_ref().get_unchecked(old_node)
211 }
212}
213
214pub struct MappedPermutation(Mmap);
216
217impl MappedPermutation {
218 #[inline(always)]
220 pub fn new_unchecked(perm: Mmap) -> Self {
221 MappedPermutation(perm)
222 }
223
224 #[inline]
227 pub fn new(perm: Mmap) -> Result<Self> {
228 assert_eq!(
229 perm.size() % 8,
230 0,
231 "mmap has size {}, which is not a multiple of 8",
232 perm.size()
233 );
234
235 if let Some((old, new)) = perm
237 .par_iter()
238 .chunks(8)
239 .map(|bytes| {
240 BigEndian::read_u64(bytes.into_iter().cloned().collect::<Vec<_>>().as_slice())
241 as usize
242 })
243 .enumerate()
244 .find_any(|&(_old, new)| new >= perm.len())
245 {
246 bail!(
247 "Found node {} has id {} in permutation, graph size is {}",
248 old,
249 new,
250 perm.len()
251 );
252 }
253
254 let mut seen = Vec::with_capacity(perm.len() as _);
256 seen.extend((0..perm.len()).map(|_| AtomicBool::new(false)));
257 if let Some((old, _)) = perm
258 .par_iter()
259 .chunks(8)
260 .map(|bytes| {
261 BigEndian::read_u64(bytes.into_iter().cloned().collect::<Vec<_>>().as_slice())
262 as usize
263 })
264 .map(|node| seen[node].fetch_or(true, Ordering::SeqCst))
265 .enumerate()
266 .find_any(|&(_node, is_duplicated)| is_duplicated)
267 {
268 let new = perm[old];
269 bail!(
270 "At least two nodes are mapped to {}; one of them is {}",
271 new,
272 old,
273 );
274 }
275
276 Ok(MappedPermutation(perm))
279 }
280
281 #[inline]
283 pub fn load_unchecked(path: &Path) -> Result<Self> {
284 assert_eq!(
285 usize::BITS,
286 u64::BITS,
287 "Only 64-bits architectures are supported"
288 );
289
290 let file_len = path.metadata()?.len();
291 assert_eq!(
292 file_len % 8,
293 0,
294 "{} has size {}, which is not a multiple of 8",
295 path.display(),
296 file_len
297 );
298
299 let file = std::fs::File::open(path).context("Could not open permutation")?;
300 let perm = unsafe {
301 mmap_rs::MmapOptions::new(file_len as _)
302 .context("Could not initialize permutation mmap")?
303 .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES | MmapFlags::RANDOM_ACCESS)
304 .with_file(&file, 0)
305 }
306 .map()
307 .context("Could not mmap permutation")?;
308
309 Ok(MappedPermutation(perm))
310 }
311
312 #[inline]
315 pub fn load(num_nodes: usize, path: &Path) -> Result<Self> {
316 let perm = Self::load_unchecked(path)?;
317 assert_eq!(
318 perm.len(),
319 num_nodes,
320 "Expected permutation to have length {}, got {}",
321 num_nodes,
322 perm.len()
323 );
324 Self::new(perm.0)
325 }
326}
327
328impl Permutation for MappedPermutation {
329 fn len(&self) -> usize {
330 self.0.size() / 8
331 }
332
333 fn get(&self, old_node: usize) -> Option<usize> {
334 let range = (old_node * 8)..((old_node + 1) * 8);
335 Some(BigEndian::read_u64(self.0.get(range)?) as usize)
336 }
337
338 unsafe fn get_unchecked(&self, old_node: usize) -> usize {
339 let range = (old_node * 8)..((old_node + 1) * 8);
340 BigEndian::read_u64(self.0.get_unchecked(range)) as usize
341 }
342}