1use rand::RngExt;
11use std::path::PathBuf;
12
13pub fn temp_dir<P: AsRef<std::path::Path>>(base: P) -> anyhow::Result<PathBuf> {
15 let mut base = base.as_ref().to_owned();
16 const ALPHABET: &[u8] = b"0123456789abcdef";
17 let mut rnd = rand::rng();
18 let mut random_str = String::new();
19 loop {
20 random_str.clear();
21 for _ in 0..16 {
22 let idx = rnd.random_range(0..ALPHABET.len());
23 random_str.push(ALPHABET[idx] as char);
24 }
25 base.push(&random_str);
26
27 if !base.exists() {
28 std::fs::create_dir(&base)?;
29 return Ok(base);
30 }
31 base.pop();
32 }
33}
34
35mod batch_codec;
36pub use batch_codec::*;
37
38mod circular_buffer;
39pub(crate) use circular_buffer::*;
40
41mod ragged_array;
42pub use ragged_array::RaggedArray;
43
44mod mmap_helper;
45pub use mmap_helper::*;
46
47mod java_perm;
48pub use java_perm::*;
49
50mod granularity;
51pub use granularity::*;
52
53pub mod matrix;
54pub use matrix::Matrix;
55
56pub mod sort_pairs;
57pub use sort_pairs::SortPairs;
58
59pub mod par_sort_pairs;
60pub use par_sort_pairs::ParSortPairs;
61
62pub mod par_sort_iters;
63pub use par_sort_iters::ParSortIters;
64
65use crate::graphs::{
66 arc_list_graph::NodeLabels,
67 bvgraph::{Decode, Encode},
68};
69
70pub struct Converter<D: Decode, E: Encode> {
74 pub decoder: D,
75 pub encoder: E,
76 pub offset: usize,
77}
78
79impl<D: Decode, E: Encode> Decode for Converter<D, E> {
80 #[inline(always)]
82 fn read_outdegree(&mut self) -> u64 {
83 let res = self.decoder.read_outdegree();
84 self.offset += self.encoder.write_outdegree(res).unwrap();
85 res
86 }
87 #[inline(always)]
88 fn read_reference_offset(&mut self) -> u64 {
89 let res = self.decoder.read_reference_offset();
90 self.offset += self.encoder.write_reference_offset(res).unwrap();
91 res
92 }
93 #[inline(always)]
94 fn read_block_count(&mut self) -> u64 {
95 let res = self.decoder.read_block_count();
96 self.offset += self.encoder.write_block_count(res).unwrap();
97 res
98 }
99 #[inline(always)]
100 fn read_block(&mut self) -> u64 {
101 let res = self.decoder.read_block();
102 self.offset += self.encoder.write_block(res).unwrap();
103 res
104 }
105 #[inline(always)]
106 fn read_interval_count(&mut self) -> u64 {
107 let res = self.decoder.read_interval_count();
108 self.offset += self.encoder.write_interval_count(res).unwrap();
109 res
110 }
111 #[inline(always)]
112 fn read_interval_start(&mut self) -> u64 {
113 let res = self.decoder.read_interval_start();
114 self.offset += self.encoder.write_interval_start(res).unwrap();
115 res
116 }
117 #[inline(always)]
118 fn read_interval_len(&mut self) -> u64 {
119 let res = self.decoder.read_interval_len();
120 self.offset += self.encoder.write_interval_len(res).unwrap();
121 res
122 }
123 #[inline(always)]
124 fn read_first_residual(&mut self) -> u64 {
125 let res = self.decoder.read_first_residual();
126 self.offset += self.encoder.write_first_residual(res).unwrap();
127 res
128 }
129 #[inline(always)]
130 fn read_residual(&mut self) -> u64 {
131 let res = self.decoder.read_residual();
132 self.offset += self.encoder.write_residual(res).unwrap();
133 res
134 }
135 #[inline(always)]
136 fn num_of_residuals(&mut self, num_of_residuals: usize) {
137 self.encoder.num_of_residuals(num_of_residuals);
138 }
139}
140
141#[derive(Clone, Copy, Debug)]
148pub enum MemoryUsage {
149 MemorySize(usize),
154 BatchSize(usize),
159}
160
161impl Default for MemoryUsage {
163 fn default() -> Self {
164 Self::from_perc(50.0)
165 }
166}
167
168impl core::fmt::Display for MemoryUsage {
169 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
170 match self {
171 MemoryUsage::MemorySize(size) => write!(f, "{} bytes", size),
172 MemoryUsage::BatchSize(size) => write!(f, "{} elements", size),
173 }
174 }
175}
176
177impl core::ops::Mul<usize> for MemoryUsage {
178 type Output = MemoryUsage;
179
180 fn mul(self, rhs: usize) -> Self::Output {
181 match self {
182 MemoryUsage::MemorySize(size) => MemoryUsage::MemorySize(size * rhs),
183 MemoryUsage::BatchSize(size) => MemoryUsage::BatchSize(size * rhs),
184 }
185 }
186}
187
188impl core::ops::Div<usize> for MemoryUsage {
189 type Output = MemoryUsage;
190
191 fn div(self, rhs: usize) -> Self::Output {
192 match self {
193 MemoryUsage::MemorySize(size) => MemoryUsage::MemorySize(size / rhs),
194 MemoryUsage::BatchSize(size) => MemoryUsage::BatchSize(size / rhs),
195 }
196 }
197}
198
199impl MemoryUsage {
200 pub fn from_perc(perc: f64) -> Self {
203 let system = sysinfo::System::new_with_specifics(
204 sysinfo::RefreshKind::nothing()
205 .with_memory(sysinfo::MemoryRefreshKind::nothing().with_ram()),
206 );
207 MemoryUsage::MemorySize(
208 usize::try_from((system.total_memory() as f64 * perc / 100.0) as u64)
209 .expect("System memory overflows usize"),
210 )
211 }
212
213 pub fn batch_size<T>(&self) -> usize {
220 match &self {
221 MemoryUsage::MemorySize(memory_size) => {
222 let elem_size = std::mem::size_of::<T>();
223 assert!(elem_size > 0, "Element size cannot be zero");
224 memory_size / elem_size
225 }
226 MemoryUsage::BatchSize(batch_size) => *batch_size,
227 }
228 }
229}
230
231pub fn humanize(value: f64) -> String {
233 const UNITS: &[&str] = &["", "K", "M", "G", "T", "P", "E"];
234 let mut v = value;
235 let mut unit: usize = 0;
236 while v >= 1000.0 && unit + 1 < UNITS.len() {
237 v /= 1000.0;
238 unit += 1;
239 }
240 if unit == 0 {
241 format!("{:.0}{}", v, UNITS[unit])
242 } else {
243 format!("{:.3}{}", v, UNITS[unit])
244 }
245}
246
247pub struct SplitIters<I> {
264 pub boundaries: Box<[usize]>,
265 pub iters: Box<[I]>,
266}
267
268impl<I> SplitIters<I> {
269 pub fn new(boundaries: Box<[usize]>, iters: Box<[I]>) -> Self {
270 Self { boundaries, iters }
271 }
272}
273
274impl<I> From<(Box<[usize]>, Box<[I]>)> for SplitIters<I> {
275 fn from((boundaries, iters): (Box<[usize]>, Box<[I]>)) -> Self {
276 Self::new(boundaries, iters)
277 }
278}
279
280impl<
298 I: Iterator<Item = (usize, usize)> + Send + Sync,
299 IT: IntoIterator<Item = (usize, usize), IntoIter = I>,
300> From<SplitIters<IT>>
301 for Vec<
302 crate::labels::proj::LeftIterator<
303 NodeLabels<(), std::iter::Map<I, fn((usize, usize)) -> ((usize, usize), ())>>,
304 >,
305 >
306{
307 fn from(split: SplitIters<IT>) -> Self {
308 Box::into_iter(split.iters)
309 .enumerate()
310 .map(|(i, iter)| {
311 let start_node = split.boundaries[i];
312 let end_node = split.boundaries[i + 1];
313 let num_partition_nodes = end_node - start_node;
314 #[allow(clippy::type_complexity)]
316 let map_fn: fn((usize, usize)) -> ((usize, usize), ()) = |pair| (pair, ());
317 let labeled_iter = iter.into_iter().map(map_fn);
318 let lender =
319 NodeLabels::try_new_from(num_partition_nodes, labeled_iter, start_node)
320 .expect("Iterator should start from the expected first node");
321 crate::labels::proj::LeftIterator(lender)
323 })
324 .collect()
325 }
326}
327
328impl<
340 L: Clone + Copy + 'static,
341 I: Iterator<Item = ((usize, usize), L)> + Send + Sync,
342 IT: IntoIterator<Item = ((usize, usize), L), IntoIter = I>,
343> From<SplitIters<IT>> for Vec<NodeLabels<L, I>>
344{
345 fn from(split: SplitIters<IT>) -> Self {
346 Box::into_iter(split.iters)
347 .enumerate()
348 .map(|(i, iter)| {
349 let start_node = split.boundaries[i];
350 let end_node = split.boundaries[i + 1];
351 let num_partition_nodes = end_node - start_node;
352
353 NodeLabels::try_new_from(num_partition_nodes, iter.into_iter(), start_node)
354 .expect("Iterator should start from the expected first node")
355 })
356 .collect()
357 }
358}