swh_graph/map/
permutation.rs1use std::io::{Read, Seek, Write};
7use std::mem::MaybeUninit;
8use std::path::Path;
9use std::sync::atomic::{AtomicBool, Ordering};
10
11use anyhow::{bail, Context, Result};
12use byteorder::{BigEndian, ByteOrder, ReadBytesExt};
13use dsi_progress_logger::{progress_logger, ProgressLog};
14use mmap_rs::{Mmap, MmapFlags};
15use rayon::prelude::*;
16
17#[allow(clippy::len_without_is_empty)]
19pub trait Permutation {
20 fn len(&self) -> usize;
22 fn get(&self, old_node: usize) -> Option<usize>;
24 unsafe fn get_unchecked(&self, old_node: usize) -> usize;
30}
31
32pub struct OwnedPermutation<T: Sync + AsRef<[usize]>>(T);
34
35impl<T: Sync + AsRef<[usize]>> OwnedPermutation<T> {
36 #[inline(always)]
38 pub fn new_unchecked(perm: T) -> Self {
39 OwnedPermutation(perm)
40 }
41
42 #[inline]
45 pub fn new(perm: T) -> Result<Self> {
46 if let Some((old, new)) = perm
48 .as_ref()
49 .par_iter()
50 .enumerate()
51 .find_any(|&(_old, &new)| new >= perm.as_ref().len())
52 {
53 bail!(
54 "Found node {} has id {} in permutation, graph size is {}",
55 old,
56 new,
57 perm.as_ref().len()
58 );
59 }
60
61 let mut seen = Vec::with_capacity(perm.as_ref().len() as _);
63 seen.extend((0..perm.as_ref().len()).map(|_| AtomicBool::new(false)));
64 if let Some((old, _)) = perm
65 .as_ref()
66 .par_iter()
67 .map(|&node| seen[node].fetch_or(true, Ordering::SeqCst))
68 .enumerate()
69 .find_any(|&(_node, is_duplicated)| is_duplicated)
70 {
71 let new = perm.as_ref()[old];
72 bail!(
73 "At least two nodes are mapped to {}; one of them is {}",
74 new,
75 old,
76 );
77 }
78
79 Ok(OwnedPermutation(perm))
82 }
83
84 pub fn dump<W: Write>(&self, file: &mut W) -> std::io::Result<()> {
85 let mut pl = progress_logger!(
86 display_memory = true,
87 item_name = "byte",
88 local_speed = true,
89 expected_updates = Some(self.len() * 8),
90 );
91 pl.start("Writing permutation");
92
93 let chunk_size = 1_000_000; let mut buf = vec![0u8; chunk_size * 8];
95 for chunk in self.0.as_ref().chunks(chunk_size) {
96 let buf_slice = &mut buf[..chunk.len() * 8]; assert_eq!(
98 usize::BITS,
99 u64::BITS,
100 "Only 64-bits architectures are supported"
101 );
102 let chunk: &[u64] =
103 bytemuck::try_cast_slice(chunk).expect("Could not cast &[usize] to &[u64]");
104 BigEndian::write_u64_into(chunk, buf_slice);
105 file.write_all(buf_slice)?;
106 pl.update_with_count(chunk.len() * 8);
107 }
108 pl.done();
109 Ok(())
110 }
111}
112
113impl<T: Sync + AsRef<[usize]> + AsMut<[usize]>> OwnedPermutation<T> {
114 pub fn compose_in_place<T2: Permutation + Sync>(&mut self, other: T2) -> Result<()> {
116 if self.as_mut().len() != other.len() {
117 bail!(
118 "Cannot compose permutation of size {} with permutation of size {}",
119 self.as_ref().len(),
120 other.len()
121 );
122 }
123 self.as_mut()
124 .par_iter_mut()
125 .for_each(|x| *x = unsafe { other.get_unchecked(*x) });
126
127 Ok(())
128 }
129}
130
131impl OwnedPermutation<Vec<usize>> {
132 #[inline]
134 pub fn load_unchecked(num_nodes: usize, path: &Path) -> Result<Self> {
135 assert_eq!(
136 usize::BITS,
137 u64::BITS,
138 "Only 64-bits architectures are supported"
139 );
140
141 let mut file = std::fs::File::open(path).context("Could not open permutation")?;
142
143 let mut buf = [0u8; 8];
144 file.read_exact(&mut buf)?;
145 let epserde = &buf == b"epserde ";
146 if epserde {
147 use epserde::prelude::*;
148
149 let perm = unsafe { <Vec<usize>>::load_full(path) }?;
152 Ok(OwnedPermutation(perm))
153 } else {
154 let mut perm = Vec::with_capacity(num_nodes);
155 file.rewind()?;
156 file.read_u64_into::<BigEndian>(unsafe {
157 std::mem::transmute::<&mut [MaybeUninit<usize>], &mut [u64]>(
158 perm.spare_capacity_mut(),
159 )
160 })
161 .context("Could not read permutation")?;
162
163 unsafe { perm.set_len(num_nodes) };
165
166 Ok(OwnedPermutation(perm))
167 }
168 }
169
170 #[inline]
173 pub fn load(num_nodes: usize, path: &Path) -> Result<Self> {
174 let perm = Self::load_unchecked(num_nodes, path)?;
175 Self::new(perm.0)
176 }
177}
178
179impl<T: Sync + AsRef<[usize]>> AsRef<[usize]> for OwnedPermutation<T> {
180 #[inline(always)]
181 fn as_ref(&self) -> &[usize] {
182 self.0.as_ref()
183 }
184}
185
186impl<T: Sync + AsRef<[usize]> + AsMut<[usize]>> AsMut<[usize]> for OwnedPermutation<T> {
187 #[inline(always)]
188 fn as_mut(&mut self) -> &mut [usize] {
189 self.0.as_mut()
190 }
191}
192
193impl<T: Sync + AsRef<[usize]>> std::ops::Index<usize> for OwnedPermutation<T> {
194 type Output = usize;
195
196 #[inline(always)]
197 fn index(&self, old_node: usize) -> &Self::Output {
198 &self.0.as_ref()[old_node]
199 }
200}
201
202impl<T: Sync + AsRef<[usize]>> Permutation for OwnedPermutation<T> {
203 fn len(&self) -> usize {
204 self.as_ref().len()
205 }
206
207 fn get(&self, old_node: usize) -> Option<usize> {
208 self.0.as_ref().get(old_node).copied()
209 }
210
211 unsafe fn get_unchecked(&self, old_node: usize) -> usize {
212 *self.0.as_ref().get_unchecked(old_node)
213 }
214}
215
216pub struct MappedPermutation(Mmap);
218
219impl MappedPermutation {
220 #[inline(always)]
222 pub fn new_unchecked(perm: Mmap) -> Self {
223 MappedPermutation(perm)
224 }
225
226 #[inline]
229 pub fn new(perm: Mmap) -> Result<Self> {
230 assert_eq!(
231 perm.size() % 8,
232 0,
233 "mmap has size {}, which is not a multiple of 8",
234 perm.size()
235 );
236
237 if let Some((old, new)) = perm
239 .par_iter()
240 .chunks(8)
241 .map(|bytes| {
242 BigEndian::read_u64(bytes.into_iter().cloned().collect::<Vec<_>>().as_slice())
243 as usize
244 })
245 .enumerate()
246 .find_any(|&(_old, new)| new >= perm.len())
247 {
248 bail!(
249 "Found node {} has id {} in permutation, graph size is {}",
250 old,
251 new,
252 perm.len()
253 );
254 }
255
256 let mut seen = Vec::with_capacity(perm.len() as _);
258 seen.extend((0..perm.len()).map(|_| AtomicBool::new(false)));
259 if let Some((old, _)) = perm
260 .par_iter()
261 .chunks(8)
262 .map(|bytes| {
263 BigEndian::read_u64(bytes.into_iter().cloned().collect::<Vec<_>>().as_slice())
264 as usize
265 })
266 .map(|node| seen[node].fetch_or(true, Ordering::SeqCst))
267 .enumerate()
268 .find_any(|&(_node, is_duplicated)| is_duplicated)
269 {
270 let new = perm[old];
271 bail!(
272 "At least two nodes are mapped to {}; one of them is {}",
273 new,
274 old,
275 );
276 }
277
278 Ok(MappedPermutation(perm))
281 }
282
283 #[inline]
285 pub fn load_unchecked(path: &Path) -> Result<Self> {
286 assert_eq!(
287 usize::BITS,
288 u64::BITS,
289 "Only 64-bits architectures are supported"
290 );
291
292 let file_len = path.metadata()?.len();
293 assert_eq!(
294 file_len % 8,
295 0,
296 "{} has size {}, which is not a multiple of 8",
297 path.display(),
298 file_len
299 );
300
301 let file = std::fs::File::open(path).context("Could not open permutation")?;
302 let perm = unsafe {
303 mmap_rs::MmapOptions::new(file_len as _)
304 .context("Could not initialize permutation mmap")?
305 .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES | MmapFlags::RANDOM_ACCESS)
306 .with_file(&file, 0)
307 }
308 .map()
309 .context("Could not mmap permutation")?;
310
311 Ok(MappedPermutation(perm))
312 }
313
314 #[inline]
317 pub fn load(num_nodes: usize, path: &Path) -> Result<Self> {
318 let perm = Self::load_unchecked(path)?;
319 assert_eq!(
320 perm.len(),
321 num_nodes,
322 "Expected permutation to have length {}, got {}",
323 num_nodes,
324 perm.len()
325 );
326 Self::new(perm.0)
327 }
328}
329
330impl Permutation for MappedPermutation {
331 fn len(&self) -> usize {
332 self.0.size() / 8
333 }
334
335 fn get(&self, old_node: usize) -> Option<usize> {
336 let range = (old_node * 8)..((old_node + 1) * 8);
337 Some(BigEndian::read_u64(self.0.get(range)?) as usize)
338 }
339
340 unsafe fn get_unchecked(&self, old_node: usize) -> usize {
341 let range = (old_node * 8)..((old_node + 1) * 8);
342 BigEndian::read_u64(self.0.get_unchecked(range)) as usize
343 }
344}