swh_graph/map/
permutation.rs1use std::io::{Read, Seek, Write};
7use std::mem::MaybeUninit;
8use std::path::Path;
9use std::sync::atomic::{AtomicBool, Ordering};
10
11use anyhow::{bail, Context, Result};
12use byteorder::{BigEndian, ByteOrder, ReadBytesExt};
13use dsi_progress_logger::{progress_logger, ProgressLog};
14use mmap_rs::{Mmap, MmapFlags};
15use rayon::prelude::*;
16
17#[allow(clippy::len_without_is_empty)]
19pub trait Permutation {
20 fn len(&self) -> usize;
22 fn get(&self, old_node: usize) -> Option<usize>;
24 unsafe fn get_unchecked(&self, old_node: usize) -> usize;
30}
31
32pub struct OwnedPermutation<T: Sync + AsRef<[usize]>>(T);
34
35impl<T: Sync + AsRef<[usize]>> OwnedPermutation<T> {
36 #[inline(always)]
38 pub fn new_unchecked(perm: T) -> Self {
39 OwnedPermutation(perm)
40 }
41
42 #[inline]
45 pub fn new(perm: T) -> Result<Self> {
46 if let Some((old, new)) = perm
48 .as_ref()
49 .par_iter()
50 .enumerate()
51 .find_any(|&(_old, &new)| new >= perm.as_ref().len())
52 {
53 bail!(
54 "Found node {} with id {} in permutation, graph size is {}",
55 old,
56 new,
57 perm.as_ref().len()
58 );
59 }
60
61 let mut seen = Vec::with_capacity(perm.as_ref().len() as _);
63 seen.extend((0..perm.as_ref().len()).map(|_| AtomicBool::new(false)));
64 if let Some((old, _)) = perm
65 .as_ref()
66 .par_iter()
67 .map(|&node| seen[node].fetch_or(true, Ordering::SeqCst))
68 .enumerate()
69 .find_any(|&(_node, is_duplicated)| is_duplicated)
70 {
71 let new = perm.as_ref()[old];
72 bail!(
73 "At least two nodes are mapped to {}; one of them is {}",
74 new,
75 old,
76 );
77 }
78
79 Ok(OwnedPermutation(perm))
82 }
83
84 pub fn dump<W: Write>(&self, file: &mut W) -> std::io::Result<()> {
85 let mut pl = progress_logger!(
86 display_memory = true,
87 item_name = "byte",
88 local_speed = true,
89 expected_updates = Some(self.len() * 8),
90 );
91 pl.start("Writing permutation");
92
93 let chunk_size = 1_000_000; let mut buf = vec![0u8; chunk_size * 8];
95 for chunk in self.0.as_ref().chunks(chunk_size) {
96 let buf_slice = &mut buf[..chunk.len() * 8]; assert_eq!(
98 usize::BITS,
99 u64::BITS,
100 "Only 64-bits architectures are supported"
101 );
102 let chunk: &[u64] =
103 bytemuck::try_cast_slice(chunk).expect("Could not cast &[usize] to &[u64]");
104 BigEndian::write_u64_into(chunk, buf_slice);
105 file.write_all(buf_slice)?;
106 pl.update_with_count(chunk.len() * 8);
107 }
108 pl.done();
109 Ok(())
110 }
111}
112
113impl<T: Sync + AsRef<[usize]> + AsMut<[usize]>> OwnedPermutation<T> {
114 pub fn compose_in_place<T2: Permutation + Sync>(&mut self, other: T2) -> Result<()> {
116 if self.as_mut().len() != other.len() {
117 bail!(
118 "Cannot compose permutation of size {} with permutation of size {}",
119 self.as_ref().len(),
120 other.len()
121 );
122 }
123 self.as_mut()
124 .par_iter_mut()
125 .for_each(|x| *x = unsafe { other.get_unchecked(*x) });
126
127 Ok(())
128 }
129}
130
131impl OwnedPermutation<Vec<usize>> {
132 #[inline]
134 pub fn load_unchecked(num_nodes: usize, path: &Path) -> Result<Self> {
135 assert_eq!(
136 usize::BITS,
137 u64::BITS,
138 "Only 64-bits architectures are supported"
139 );
140
141 let mut file = std::fs::File::open(path).context("Could not open permutation")?;
142
143 let mut buf = [0u8; 8];
144 file.read_exact(&mut buf)?;
145 let epserde = &buf == b"epserde ";
146 if epserde {
147 use epserde::prelude::*;
148
149 let perm = unsafe { <Vec<usize>>::load_full(path) }?;
152 Ok(OwnedPermutation(perm))
153 } else {
154 let mut perm = Vec::with_capacity(num_nodes);
155 file.rewind()?;
156 file.read_u64_into::<BigEndian>(unsafe {
157 std::mem::transmute::<&mut [MaybeUninit<usize>], &mut [u64]>(
158 perm.spare_capacity_mut(),
159 )
160 })
161 .context("Could not read permutation")?;
162
163 unsafe { perm.set_len(num_nodes) };
165
166 Ok(OwnedPermutation(perm))
167 }
168 }
169
170 #[inline]
173 pub fn load(num_nodes: usize, path: &Path) -> Result<Self> {
174 let perm = Self::load_unchecked(num_nodes, path)?;
175 Self::new(perm.0)
176 }
177}
178
179impl<T: Sync + AsRef<[usize]>> AsRef<[usize]> for OwnedPermutation<T> {
180 #[inline(always)]
181 fn as_ref(&self) -> &[usize] {
182 self.0.as_ref()
183 }
184}
185
186impl<T: Sync + AsRef<[usize]> + AsMut<[usize]>> AsMut<[usize]> for OwnedPermutation<T> {
187 #[inline(always)]
188 fn as_mut(&mut self) -> &mut [usize] {
189 self.0.as_mut()
190 }
191}
192
193impl<T: Sync + AsRef<[usize]>> std::ops::Index<usize> for OwnedPermutation<T> {
194 type Output = usize;
195
196 #[inline(always)]
197 fn index(&self, old_node: usize) -> &Self::Output {
198 &self.0.as_ref()[old_node]
199 }
200}
201
202impl<T: Sync + AsRef<[usize]>> Permutation for OwnedPermutation<T> {
203 #[inline(always)]
204 fn len(&self) -> usize {
205 self.as_ref().len()
206 }
207
208 #[inline(always)]
209 fn get(&self, old_node: usize) -> Option<usize> {
210 self.0.as_ref().get(old_node).copied()
211 }
212
213 #[inline(always)]
214 unsafe fn get_unchecked(&self, old_node: usize) -> usize {
215 *self.0.as_ref().get_unchecked(old_node)
216 }
217}
218
219pub struct MappedPermutation(Mmap);
221
222impl MappedPermutation {
223 #[inline(always)]
225 pub fn new_unchecked(perm: Mmap) -> Self {
226 MappedPermutation(perm)
227 }
228
229 #[inline]
232 pub fn new(perm: Mmap) -> Result<Self> {
233 assert_eq!(
234 perm.size() % 8,
235 0,
236 "mmap has size {}, which is not a multiple of 8",
237 perm.size()
238 );
239 let num_nodes = perm.len() / 8;
240
241 if let Some((old, new)) = perm
243 .par_iter()
244 .chunks(8)
245 .map(|bytes| {
246 BigEndian::read_u64(bytes.into_iter().cloned().collect::<Vec<_>>().as_slice())
247 as usize
248 })
249 .enumerate()
250 .find_any(|&(_old, new)| new >= num_nodes)
251 {
252 bail!(
253 "Found node {} has id {} in permutation, graph size is {}",
254 old,
255 new,
256 perm.len()
257 );
258 }
259
260 let mut seen = Vec::with_capacity(perm.len() as _);
262 seen.extend((0..num_nodes).map(|_| AtomicBool::new(false)));
263 if let Some((old, _)) = perm
264 .par_iter()
265 .chunks(8)
266 .map(|bytes| {
267 BigEndian::read_u64(bytes.into_iter().cloned().collect::<Vec<_>>().as_slice())
268 as usize
269 })
270 .map(|node| seen[node].fetch_or(true, Ordering::SeqCst))
271 .enumerate()
272 .find_any(|&(_node, is_duplicated)| is_duplicated)
273 {
274 let new = perm[old];
275 bail!(
276 "At least two nodes are mapped to {}; one of them is {}",
277 new,
278 old,
279 );
280 }
281
282 Ok(MappedPermutation(perm))
285 }
286
287 #[inline]
289 pub fn load_unchecked(path: &Path) -> Result<Self> {
290 assert_eq!(
291 usize::BITS,
292 u64::BITS,
293 "Only 64-bits architectures are supported"
294 );
295
296 let file_len = path.metadata()?.len();
297 assert_eq!(
298 file_len % 8,
299 0,
300 "{} has size {}, which is not a multiple of 8",
301 path.display(),
302 file_len
303 );
304
305 let file = std::fs::File::open(path).context("Could not open permutation")?;
306 let perm = unsafe {
307 mmap_rs::MmapOptions::new(file_len as _)
308 .context("Could not initialize permutation mmap")?
309 .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES | MmapFlags::RANDOM_ACCESS)
310 .with_file(&file, 0)
311 }
312 .map()
313 .context("Could not mmap permutation")?;
314
315 Ok(MappedPermutation(perm))
316 }
317
318 #[inline]
321 pub fn load(num_nodes: usize, path: &Path) -> Result<Self> {
322 let perm = Self::load_unchecked(path)?;
323 assert_eq!(
324 perm.len(),
325 num_nodes,
326 "Expected permutation to have length {}, got {}",
327 num_nodes,
328 perm.len()
329 );
330 Self::new(perm.0)
331 }
332}
333
334impl Permutation for MappedPermutation {
335 #[inline(always)]
336 fn len(&self) -> usize {
337 self.0.size() / 8
338 }
339
340 #[inline(always)]
341 fn get(&self, old_node: usize) -> Option<usize> {
342 let range = (old_node * 8)..((old_node + 1) * 8);
343 Some(BigEndian::read_u64(self.0.get(range)?) as usize)
344 }
345
346 #[inline(always)]
347 unsafe fn get_unchecked(&self, old_node: usize) -> usize {
348 let range = (old_node * 8)..((old_node + 1) * 8);
349 BigEndian::read_u64(self.0.get_unchecked(range)) as usize
350 }
351}