webgraph_cli/
lib.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9#![doc = include_str!("../README.md")]
10#![deny(unstable_features)]
11#![deny(trivial_casts)]
12#![deny(unconditional_recursion)]
13#![deny(clippy::empty_loop)]
14#![deny(unreachable_code)]
15#![deny(unreachable_pub)]
16#![deny(unreachable_patterns)]
17#![deny(unused_macro_rules)]
18#![deny(unused_doc_comments)]
19#![allow(clippy::type_complexity)]
20
21use anyhow::{Context, Result, anyhow, bail, ensure};
22use clap::{Args, CommandFactory, Parser, Subcommand, ValueEnum};
23use common_traits::{AsBytes, FromBytes, ToBytes, UnsignedInt};
24use dsi_bitstream::dispatch::Codes;
25use epserde::deser::Deserialize;
26use epserde::ser::Serialize;
27use std::io::{BufRead, BufReader, BufWriter, Read, Write};
28use std::path::{Path, PathBuf};
29use std::time::Duration;
30use std::time::SystemTime;
31use sux::bits::BitFieldVec;
32use webgraph::prelude::CompFlags;
33use webgraph::utils::{Granularity, MemoryUsage};
34
35macro_rules! SEQ_PROC_WARN {
36    () => {"Processing the graph sequentially: for parallel processing please build the Elias-Fano offsets list using 'webgraph build ef {}'"}
37}
38
39#[cfg(not(any(feature = "le_bins", feature = "be_bins")))]
40compile_error!("At least one of the features `le_bins` or `be_bins` must be enabled.");
41
42pub mod build_info {
43    include!(concat!(env!("OUT_DIR"), "/built.rs"));
44
45    pub fn version_string() -> String {
46        format!(
47            "{}
48git info: {} {} {}
49build info: built on {} for {} with {}",
50            PKG_VERSION,
51            GIT_VERSION.unwrap_or(""),
52            GIT_COMMIT_HASH.unwrap_or(""),
53            match GIT_DIRTY {
54                None => "",
55                Some(true) => "(dirty)",
56                Some(false) => "(clean)",
57            },
58            BUILD_DATE,
59            TARGET,
60            RUSTC_VERSION
61        )
62    }
63}
64
65#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
66/// Enum for instantaneous codes.
67///
68/// It is used to implement [`ValueEnum`] here instead of in [`dsi_bitstream`].
69///
70/// For CLI ergonomics and compatibility, these codes must be the same as those
71/// appearing in [`CompFlags::code_from_str`].
72pub enum PrivCode {
73    Unary,
74    Gamma,
75    Delta,
76    Zeta1,
77    Zeta2,
78    Zeta3,
79    Zeta4,
80    Zeta5,
81    Zeta6,
82    Zeta7,
83    Pi1,
84    Pi2,
85    Pi3,
86    Pi4,
87}
88
89impl From<PrivCode> for Codes {
90    fn from(value: PrivCode) -> Self {
91        match value {
92            PrivCode::Unary => Codes::Unary,
93            PrivCode::Gamma => Codes::Gamma,
94            PrivCode::Delta => Codes::Delta,
95            PrivCode::Zeta1 => Codes::Zeta(1),
96            PrivCode::Zeta2 => Codes::Zeta(2),
97            PrivCode::Zeta3 => Codes::Zeta(3),
98            PrivCode::Zeta4 => Codes::Zeta(4),
99            PrivCode::Zeta5 => Codes::Zeta(5),
100            PrivCode::Zeta6 => Codes::Zeta(6),
101            PrivCode::Zeta7 => Codes::Zeta(7),
102            PrivCode::Pi1 => Codes::Pi(1),
103            PrivCode::Pi2 => Codes::Pi(2),
104            PrivCode::Pi3 => Codes::Pi(3),
105            PrivCode::Pi4 => Codes::Pi(4),
106        }
107    }
108}
109
110#[derive(Args, Debug)]
111/// Shared CLI arguments for reading files containing arcs.
112pub struct ArcsArgs {
113    #[arg(long, default_value_t = '#')]
114    /// Ignore lines that start with this symbol.
115    pub line_comment_symbol: char,
116
117    #[arg(long, default_value_t = 0)]
118    /// How many lines to skip, ignoring comment lines.
119    pub lines_to_skip: usize,
120
121    #[arg(long)]
122    /// How many lines to parse, after skipping the first lines_to_skip and
123    /// ignoring comment lines.
124    pub max_arcs: Option<usize>,
125
126    #[arg(long, default_value_t = '\t')]
127    /// The column separator.
128    pub separator: char,
129
130    #[arg(long, default_value_t = 0)]
131    /// The index of the column containing the source node of an arc.
132    pub source_column: usize,
133
134    #[arg(long, default_value_t = 1)]
135    /// The index of the column containing the target node of an arc.
136    pub target_column: usize,
137
138    #[arg(long, default_value_t = false)]
139    /// Source and destinations are not node identifiers starting from 0, but labels.
140    pub labels: bool,
141}
142
143/// Parses the number of threads from a string.
144///
145/// This function is meant to be used with `#[arg(...,  value_parser =
146/// num_threads_parser)]`.
147pub fn num_threads_parser(arg: &str) -> Result<usize> {
148    let num_threads = arg.parse::<usize>()?;
149    ensure!(num_threads > 0, "Number of threads must be greater than 0");
150    Ok(num_threads)
151}
152
153/// Shared CLI arguments for commands that specify a number of threads.
154#[derive(Args, Debug)]
155pub struct NumThreadsArg {
156    #[arg(short = 'j', long, default_value_t = rayon::current_num_threads().max(1), value_parser = num_threads_parser)]
157    /// The number of threads to use.
158    pub num_threads: usize,
159}
160
161/// Shared CLI arguments for commands that specify a granularity.
162#[derive(Args, Debug)]
163pub struct GranularityArgs {
164    #[arg(long, conflicts_with("node_granularity"))]
165    /// The tentative number of arcs used to define the size of a parallel job
166    /// (advanced option).
167    pub arc_granularity: Option<u64>,
168
169    #[arg(long, conflicts_with("arc_granularity"))]
170    /// The tentative number of nodes used to define the size of a parallel job
171    /// (advanced option).
172    pub node_granularity: Option<usize>,
173}
174
175impl GranularityArgs {
176    pub fn into_granularity(&self) -> Granularity {
177        match (self.arc_granularity, self.node_granularity) {
178            (Some(_), Some(_)) => unreachable!(),
179            (Some(arc_granularity), None) => Granularity::Arcs(arc_granularity),
180            (None, Some(node_granularity)) => Granularity::Nodes(node_granularity),
181            (None, None) => Granularity::default(),
182        }
183    }
184}
185
186/// Shared CLI arguments for commands that specify a memory usage.
187#[derive(Args, Debug)]
188pub struct MemoryUsageArg {
189    #[clap(short = 'm', long = "memory-usage", value_parser = memory_usage_parser, default_value = "50%")]
190    /// The number of pairs to be used in batches.
191    /// If the number ends with a "b" or "B" it is interpreted as a number of bytes, otherwise as a number of elements.
192    /// You can use the SI and NIST multipliers k, M, G, T, P, ki, Mi, Gi, Ti, and Pi.
193    /// You can also use a percentage of the available memory by appending a "%" to the number.
194    pub memory_usage: MemoryUsage,
195}
196
197#[derive(Debug, Clone, Copy, ValueEnum)]
198/// Formats for storing and loading vectors of floats.
199pub enum FloatVectorFormat {
200    /// Java-compatible format: a sequence of big-endian floats (32 or 64 bits).
201    Java,
202    /// A slice of floats (32 or 64 bits) serialized using ε-serde.
203    Epserde,
204    /// ASCII format, one float per line.
205    Ascii,
206    /// A JSON Array.
207    Json,
208}
209
210impl FloatVectorFormat {
211    /// Stores float values in the specified `path` using the format defined by
212    /// `self`.
213    ///
214    /// If the result is a textual format, that is, ASCII or JSON, `precision`
215    /// will be used to truncate the float values to the specified number of
216    /// decimal digits. If `None`, [zmij](https://crates.io/crates/zmij)
217    /// formatting will be used.
218    pub fn store<F>(
219        &self,
220        path: impl AsRef<Path>,
221        values: &[F],
222        precision: Option<usize>,
223    ) -> Result<()>
224    where
225        F: ToBytes + core::fmt::Display + epserde::ser::Serialize + Copy + zmij::Float,
226        for<'a> &'a [F]: epserde::ser::Serialize,
227    {
228        create_parent_dir(&path)?;
229        let path_display = path.as_ref().display();
230        let file = std::fs::File::create(&path)
231            .with_context(|| format!("Could not create vector at {}", path_display))?;
232        let mut file = BufWriter::new(file);
233
234        match self {
235            FloatVectorFormat::Epserde => {
236                log::info!("Storing in ε-serde format at {}", path_display);
237                unsafe {
238                    values
239                        .serialize(&mut file)
240                        .with_context(|| format!("Could not write vector to {}", path_display))
241                }?;
242            }
243            FloatVectorFormat::Java => {
244                log::info!("Storing in Java format at {}", path_display);
245                for word in values.iter() {
246                    file.write_all(word.to_be_bytes().as_ref())
247                        .with_context(|| format!("Could not write vector to {}", path_display))?;
248                }
249            }
250            FloatVectorFormat::Ascii => {
251                log::info!("Storing in ASCII format at {}", path_display);
252                let mut buf = zmij::Buffer::new();
253                for word in values.iter() {
254                    match precision {
255                        None => writeln!(file, "{}", buf.format(*word)),
256                        Some(precision) => writeln!(file, "{word:.precision$}"),
257                    }
258                    .with_context(|| format!("Could not write vector to {}", path_display))?;
259                }
260            }
261            FloatVectorFormat::Json => {
262                log::info!("Storing in JSON format at {}", path_display);
263                let mut buf = zmij::Buffer::new();
264                write!(file, "[")?;
265                for word in values.iter().take(values.len().saturating_sub(1)) {
266                    match precision {
267                        None => write!(file, "{}, ", buf.format(*word)),
268                        Some(precision) => write!(file, "{word:.precision$}, "),
269                    }
270                    .with_context(|| format!("Could not write vector to {}", path_display))?;
271                }
272                if let Some(last) = values.last() {
273                    match precision {
274                        None => write!(file, "{}", buf.format(*last)),
275                        Some(precision) => write!(file, "{last:.precision$}"),
276                    }
277                    .with_context(|| format!("Could not write vector to {}", path_display))?;
278                }
279                write!(file, "]")?;
280            }
281        }
282
283        Ok(())
284    }
285
286    /// Loads float values from the specified `path` using the format defined
287    /// by `self`.
288    pub fn load<F>(&self, path: impl AsRef<Path>) -> Result<Vec<F>>
289    where
290        F: FromBytes + std::str::FromStr + Copy,
291        <F as AsBytes>::Bytes: for<'a> TryFrom<&'a [u8]>,
292        <F as std::str::FromStr>::Err: std::error::Error + Send + Sync + 'static,
293        Vec<F>: epserde::deser::Deserialize,
294    {
295        let path = path.as_ref();
296        let path_display = path.display();
297
298        match self {
299            FloatVectorFormat::Epserde => {
300                log::info!("Loading ε-serde format from {}", path_display);
301                Ok(unsafe {
302                    <Vec<F>>::load_full(path)
303                        .with_context(|| format!("Could not load vector from {}", path_display))?
304                })
305            }
306            FloatVectorFormat::Java => {
307                log::info!("Loading Java format from {}", path_display);
308                let file = std::fs::File::open(path)
309                    .with_context(|| format!("Could not open {}", path_display))?;
310                let file_len = file.metadata()?.len() as usize;
311                let byte_size = size_of::<F>();
312                ensure!(
313                    file_len % byte_size == 0,
314                    "File size ({}) is not a multiple of {} bytes",
315                    file_len,
316                    byte_size
317                );
318                let n = file_len / byte_size;
319                let mut reader = BufReader::new(file);
320                let mut result = Vec::with_capacity(n);
321                let mut buf = vec![0u8; byte_size];
322                for i in 0..n {
323                    reader.read_exact(&mut buf).with_context(|| {
324                        format!("Could not read value at index {i} from {}", path_display)
325                    })?;
326                    let bytes = buf.as_slice().try_into().map_err(|_| {
327                        anyhow!("Could not convert bytes at index {i} in {}", path_display)
328                    })?;
329                    result.push(F::from_be_bytes(bytes));
330                }
331                Ok(result)
332            }
333            FloatVectorFormat::Ascii => {
334                log::info!("Loading ASCII format from {}", path_display);
335                let file = std::fs::File::open(path)
336                    .with_context(|| format!("Could not open {}", path_display))?;
337                let reader = BufReader::new(file);
338                reader
339                    .lines()
340                    .enumerate()
341                    .filter(|(_, line)| line.as_ref().map_or(true, |l| !l.trim().is_empty()))
342                    .map(|(i, line)| {
343                        let line = line.with_context(|| {
344                            format!("Error reading line {} of {}", i + 1, path_display)
345                        })?;
346                        line.trim().parse::<F>().map_err(|e| {
347                            anyhow!("Error parsing line {} of {}: {}", i + 1, path_display, e)
348                        })
349                    })
350                    .collect()
351            }
352            FloatVectorFormat::Json => {
353                log::info!("Loading JSON format from {}", path_display);
354                let file = std::fs::File::open(path)
355                    .with_context(|| format!("Could not open {}", path_display))?;
356                let mut reader = BufReader::new(file);
357                let mut result = Vec::new();
358                let mut byte = [0u8; 1];
359
360                // Skip whitespace and opening bracket
361                loop {
362                    reader
363                        .read_exact(&mut byte)
364                        .with_context(|| format!("Unexpected end of file in {}", path_display))?;
365                    match byte[0] {
366                        b'[' => break,
367                        b if b.is_ascii_whitespace() => continue,
368                        _ => bail!("Expected '[' at start of JSON array in {}", path_display),
369                    }
370                }
371
372                // Parse comma-separated values until ']'
373                let mut token = String::new();
374                let mut index = 0usize;
375                loop {
376                    reader
377                        .read_exact(&mut byte)
378                        .with_context(|| format!("Unexpected end of file in {}", path_display))?;
379                    match byte[0] {
380                        b']' => {
381                            let trimmed = token.trim();
382                            if !trimmed.is_empty() {
383                                result.push(trimmed.parse::<F>().map_err(|e| {
384                                    anyhow!(
385                                        "Error parsing element {} of {}: {}",
386                                        index + 1,
387                                        path_display,
388                                        e
389                                    )
390                                })?);
391                            }
392                            break;
393                        }
394                        b',' => {
395                            let trimmed = token.trim();
396                            result.push(trimmed.parse::<F>().map_err(|e| {
397                                anyhow!(
398                                    "Error parsing element {} of {}: {}",
399                                    index + 1,
400                                    path_display,
401                                    e
402                                )
403                            })?);
404                            token.clear();
405                            index += 1;
406                        }
407                        c => {
408                            token.push(c as char);
409                        }
410                    }
411                }
412                Ok(result)
413            }
414        }
415    }
416}
417
418#[derive(Debug, Clone, Copy, ValueEnum)]
419/// How to store vectors of integers.
420pub enum IntVectorFormat {
421    /// Java-compatible format: a sequence of big-endian longs (64 bits).
422    Java,
423    /// A slice of usize serialized using ε-serde.
424    Epserde,
425    /// A BitFieldVec stored using ε-serde. It stores each element using
426    /// ⌊log₂(max)⌋ + 1 bits. It requires to allocate the `BitFieldVec` in RAM
427    /// before serializing it.
428    BitFieldVec,
429    /// ASCII format, one integer per line.
430    Ascii,
431    /// A JSON Array.
432    Json,
433}
434
435impl IntVectorFormat {
436    /// Stores a vector of `u64` in the specified `path` using the format defined by `self`.
437    ///
438    /// `max` is the maximum value of the vector. If it is not provided, it will
439    /// be computed from the data.
440    pub fn store(&self, path: impl AsRef<Path>, data: &[u64], max: Option<u64>) -> Result<()> {
441        // Ensure the parent directory exists
442        create_parent_dir(&path)?;
443
444        let mut file = std::fs::File::create(&path)
445            .with_context(|| format!("Could not create vector at {}", path.as_ref().display()))?;
446        let mut buf = BufWriter::new(&mut file);
447
448        debug_assert_eq!(
449            max,
450            max.map(|_| { data.iter().copied().max().unwrap_or(0) }),
451            "The wrong maximum value was provided for the vector"
452        );
453
454        match self {
455            IntVectorFormat::Epserde => {
456                log::info!("Storing in epserde format at {}", path.as_ref().display());
457                unsafe {
458                    data.serialize(&mut buf).with_context(|| {
459                        format!("Could not write vector to {}", path.as_ref().display())
460                    })
461                }?;
462            }
463            IntVectorFormat::BitFieldVec => {
464                log::info!(
465                    "Storing in BitFieldVec format at {}",
466                    path.as_ref().display()
467                );
468                let max = max.unwrap_or_else(|| {
469                    data.iter()
470                        .copied()
471                        .max()
472                        .unwrap_or_else(|| panic!("Empty vector"))
473                });
474                let bit_width = max.len() as usize;
475                log::info!("Using {} bits per element", bit_width);
476                let mut bit_field_vec = <BitFieldVec<u64, _>>::with_capacity(bit_width, data.len());
477                bit_field_vec.extend(data.iter().copied());
478                unsafe {
479                    bit_field_vec.store(&path).with_context(|| {
480                        format!("Could not write vector to {}", path.as_ref().display())
481                    })
482                }?;
483            }
484            IntVectorFormat::Java => {
485                log::info!("Storing in Java format at {}", path.as_ref().display());
486                for word in data.iter() {
487                    buf.write_all(&word.to_be_bytes()).with_context(|| {
488                        format!("Could not write vector to {}", path.as_ref().display())
489                    })?;
490                }
491            }
492            IntVectorFormat::Ascii => {
493                log::info!("Storing in ASCII format at {}", path.as_ref().display());
494                for word in data.iter() {
495                    writeln!(buf, "{}", word).with_context(|| {
496                        format!("Could not write vector to {}", path.as_ref().display())
497                    })?;
498                }
499            }
500            IntVectorFormat::Json => {
501                log::info!("Storing in JSON format at {}", path.as_ref().display());
502                write!(buf, "[")?;
503                for word in data.iter().take(data.len().saturating_sub(1)) {
504                    write!(buf, "{}, ", word).with_context(|| {
505                        format!("Could not write vector to {}", path.as_ref().display())
506                    })?;
507                }
508                if let Some(last) = data.last() {
509                    write!(buf, "{}", last).with_context(|| {
510                        format!("Could not write vector to {}", path.as_ref().display())
511                    })?;
512                }
513                write!(buf, "]")?;
514            }
515        };
516
517        Ok(())
518    }
519
520    #[cfg(target_pointer_width = "64")]
521    /// Stores a vector of `usize` in the specified `path` using the format defined by `self`.
522    /// `max` is the maximum value of the vector, if it is not provided, it will
523    /// be computed from the data.
524    ///
525    /// This helper method is available only on 64-bit architectures as Java's format
526    /// uses 64-bit integers.
527    pub fn store_usizes(
528        &self,
529        path: impl AsRef<Path>,
530        data: &[usize],
531        max: Option<usize>,
532    ) -> Result<()> {
533        self.store(
534            path,
535            unsafe { core::mem::transmute::<&[usize], &[u64]>(data) },
536            max.map(|x| x as u64),
537        )
538    }
539}
540
541/// Parses a batch size.
542///
543/// This function accepts either a number (possibly followed by a
544/// SI or NIST multiplier k, M, G, T, P, ki, Mi, Gi, Ti, or Pi), or a percentage
545/// (followed by a `%`) that is interpreted as a percentage of the core
546/// memory. If the value ends with a `b` or `B` it is interpreted as a number of
547/// bytes, otherwise as a number of elements.
548pub fn memory_usage_parser(arg: &str) -> anyhow::Result<MemoryUsage> {
549    const PREF_SYMS: [(&str, u64); 10] = [
550        ("ki", 1 << 10),
551        ("mi", 1 << 20),
552        ("gi", 1 << 30),
553        ("ti", 1 << 40),
554        ("pi", 1 << 50),
555        ("k", 1E3 as u64),
556        ("m", 1E6 as u64),
557        ("g", 1E9 as u64),
558        ("t", 1E12 as u64),
559        ("p", 1E15 as u64),
560    ];
561    let arg = arg.trim().to_ascii_lowercase();
562    ensure!(!arg.is_empty(), "empty string");
563
564    if arg.ends_with('%') {
565        let perc = arg[..arg.len() - 1].parse::<f64>()?;
566        ensure!((0.0..=100.0).contains(&perc), "percentage out of range");
567        return Ok(MemoryUsage::from_perc(perc));
568    }
569
570    let num_digits = arg
571        .chars()
572        .take_while(|c| c.is_ascii_digit() || *c == '.')
573        .count();
574
575    let number = arg[..num_digits].parse::<f64>()?;
576    let suffix = &arg[num_digits..].trim();
577
578    let prefix = suffix.strip_suffix('b').unwrap_or(suffix);
579    let multiplier = PREF_SYMS
580        .iter()
581        .find(|(x, _)| *x == prefix)
582        .map(|(_, m)| m)
583        .ok_or(anyhow!("invalid prefix symbol {}", suffix))?;
584
585    let value = (number * (*multiplier as f64)) as usize;
586    ensure!(value > 0, "batch size must be greater than zero");
587
588    if suffix.ends_with('b') {
589        Ok(MemoryUsage::MemorySize(value))
590    } else {
591        Ok(MemoryUsage::BatchSize(value))
592    }
593}
594
595#[derive(Args, Debug, Clone)]
596/// Shared CLI arguments for compression.
597pub struct CompressArgs {
598    /// The endianness of the graph to write
599    #[clap(short = 'E', long)]
600    pub endianness: Option<String>,
601
602    /// The compression windows
603    #[clap(short = 'w', long, default_value_t = 7)]
604    pub compression_window: usize,
605    /// The minimum interval length
606    #[clap(short = 'i', long, default_value_t = 4)]
607    pub min_interval_length: usize,
608    /// The maximum recursion depth for references (-1 for infinite recursion depth)
609    #[clap(short = 'r', long, default_value_t = 3)]
610    pub max_ref_count: isize,
611
612    #[arg(value_enum)]
613    #[clap(long, default_value = "gamma")]
614    /// The code to use for the outdegree
615    pub outdegrees: PrivCode,
616
617    #[arg(value_enum)]
618    #[clap(long, default_value = "unary")]
619    /// The code to use for the reference offsets
620    pub references: PrivCode,
621
622    #[arg(value_enum)]
623    #[clap(long, default_value = "gamma")]
624    /// The code to use for the blocks
625    pub blocks: PrivCode,
626
627    #[arg(value_enum)]
628    #[clap(long, default_value = "zeta3")]
629    /// The code to use for the residuals
630    pub residuals: PrivCode,
631
632    /// Whether to use Zuckerli's reference selection algorithm. This slows down the compression
633    /// process and requires more memory, but improves compression ratio and decoding speed.
634    #[clap(long)]
635    pub bvgraphz: bool,
636
637    /// How many nodes to process in a chunk; the default (10000) is usually a good
638    /// value.
639    #[clap(long, default_value = "10000")]
640    pub chunk_size: usize,
641}
642
643impl From<CompressArgs> for CompFlags {
644    fn from(value: CompressArgs) -> Self {
645        CompFlags {
646            outdegrees: value.outdegrees.into(),
647            references: value.references.into(),
648            blocks: value.blocks.into(),
649            intervals: PrivCode::Gamma.into(),
650            residuals: value.residuals.into(),
651            min_interval_length: value.min_interval_length,
652            compression_window: value.compression_window,
653            max_ref_count: match value.max_ref_count {
654                -1 => usize::MAX,
655                max_ref_count => {
656                    assert!(
657                        max_ref_count >= 0,
658                        "max_ref_count cannot be negative, except for -1, which means infinite recursion depth, but got {}",
659                        max_ref_count
660                    );
661                    value.max_ref_count as usize
662                }
663            },
664        }
665    }
666}
667
668/// Creates a [`ThreadPool`](rayon::ThreadPool) with the given number of threads.
669pub fn get_thread_pool(num_threads: usize) -> rayon::ThreadPool {
670    rayon::ThreadPoolBuilder::new()
671        .num_threads(num_threads)
672        .build()
673        .expect("Failed to create thread pool")
674}
675
676/// Appends a string to the filename of a path.
677///
678/// # Panics
679/// * Will panic if there is no filename.
680/// * Will panic in test mode if the path has an extension.
681pub fn append(path: impl AsRef<Path>, s: impl AsRef<str>) -> PathBuf {
682    debug_assert!(path.as_ref().extension().is_none());
683    let mut path_buf = path.as_ref().to_owned();
684    let mut filename = path_buf.file_name().unwrap().to_owned();
685    filename.push(s.as_ref());
686    path_buf.set_file_name(filename);
687    path_buf
688}
689
690/// Creates all parent directories of the given file path.
691pub fn create_parent_dir(file_path: impl AsRef<Path>) -> Result<()> {
692    // ensure that the dst directory exists
693    if let Some(parent_dir) = file_path.as_ref().parent() {
694        std::fs::create_dir_all(parent_dir).with_context(|| {
695            format!(
696                "Failed to create the directory {:?}",
697                parent_dir.to_string_lossy()
698            )
699        })?;
700    }
701    Ok(())
702}
703
704/// Parses a duration from a string.
705/// For compatibility with Java, if no suffix is given, it is assumed to be in milliseconds.
706/// You can use suffixes, the available ones are:
707/// - `s` for seconds
708/// - `m` for minutes
709/// - `h` for hours
710/// - `d` for days
711///
712/// Example: `1d2h3m4s567` this is parsed as: 1 day, 2 hours, 3 minutes, 4 seconds, and 567 milliseconds.
713fn parse_duration(value: &str) -> Result<Duration> {
714    if value.is_empty() {
715        bail!("Empty duration string, if you want every 0 milliseconds use `0`.");
716    }
717    let mut duration = Duration::from_secs(0);
718    let mut acc = String::new();
719    for c in value.chars() {
720        if c.is_ascii_digit() {
721            acc.push(c);
722        } else if c.is_whitespace() {
723            continue;
724        } else {
725            let dur = acc.parse::<u64>()?;
726            match c {
727                's' => duration += Duration::from_secs(dur),
728                'm' => duration += Duration::from_secs(dur * 60),
729                'h' => duration += Duration::from_secs(dur * 60 * 60),
730                'd' => duration += Duration::from_secs(dur * 60 * 60 * 24),
731                _ => return Err(anyhow!("Invalid duration suffix: {}", c)),
732            }
733            acc.clear();
734        }
735    }
736    if !acc.is_empty() {
737        let dur = acc.parse::<u64>()?;
738        duration += Duration::from_millis(dur);
739    }
740    Ok(duration)
741}
742
743/// Initializes the `env_logger` logger with a custom format including
744/// timestamps with elapsed time since initialization.
745pub fn init_env_logger() -> Result<()> {
746    use jiff::SpanRound;
747    use jiff::fmt::friendly::{Designator, Spacing, SpanPrinter};
748
749    let mut builder =
750        env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"));
751
752    let start = std::time::Instant::now();
753    let printer = SpanPrinter::new()
754        .spacing(Spacing::None)
755        .designator(Designator::Compact);
756    let span_round = SpanRound::new()
757        .largest(jiff::Unit::Day)
758        .smallest(jiff::Unit::Millisecond)
759        .days_are_24_hours();
760
761    builder.format(move |buf, record| {
762        let Ok(ts) = jiff::Timestamp::try_from(SystemTime::now()) else {
763            return Err(std::io::Error::other("Failed to get timestamp"));
764        };
765        let style = buf.default_level_style(record.level());
766        let elapsed = start.elapsed();
767        let span = jiff::Span::new()
768            .seconds(elapsed.as_secs() as i64)
769            .milliseconds(elapsed.subsec_millis() as i64);
770        let span = span.round(span_round).expect("Failed to round span");
771        writeln!(
772            buf,
773            "{} {} {style}{}{style:#} [{:?}] {} - {}",
774            ts.strftime("%F %T%.3f"),
775            printer.span_to_string(&span),
776            record.level(),
777            std::thread::current().id(),
778            record.target(),
779            record.args()
780        )
781    });
782    builder.init();
783    Ok(())
784}
785
786#[derive(Args, Debug)]
787pub struct GlobalArgs {
788    #[arg(long, value_parser = parse_duration, global=true, display_order = 1000)]
789    /// How often to log progress. Default is 10s. You can use the suffixes "s"
790    /// for seconds, "m" for minutes, "h" for hours, and "d" for days. If no
791    /// suffix is provided it is assumed to be in milliseconds.
792    /// Example: "1d2h3m4s567" is parsed as 1 day + 2 hours + 3 minutes + 4
793    /// seconds + 567 milliseconds = 93784567 milliseconds.
794    pub log_interval: Option<Duration>,
795}
796
797#[derive(Subcommand, Debug)]
798pub enum SubCommands {
799    #[command(subcommand)]
800    Analyze(analyze::SubCommands),
801    #[command(subcommand)]
802    Bench(bench::SubCommands),
803    #[command(subcommand)]
804    Build(build::SubCommands),
805    #[command(subcommand)]
806    Check(check::SubCommands),
807    #[command(subcommand)]
808    From(from::SubCommands),
809    #[command(subcommand)]
810    Perm(perm::SubCommands),
811    #[command(subcommand)]
812    Run(run::SubCommands),
813    #[command(subcommand)]
814    To(to::SubCommands),
815    #[command(subcommand)]
816    Transform(transform::SubCommands),
817}
818
819#[derive(Parser, Debug)]
820#[command(name = "webgraph", version=build_info::version_string())]
821/// Webgraph tools to build, convert, modify, and analyze graphs.
822#[doc = include_str!("common_env.txt")]
823pub struct Cli {
824    #[command(subcommand)]
825    pub command: SubCommands,
826    #[clap(flatten)]
827    pub args: GlobalArgs,
828}
829
830pub mod dist;
831pub mod rank;
832pub mod sccs;
833
834pub mod analyze;
835pub mod bench;
836pub mod build;
837pub mod check;
838pub mod from;
839pub mod perm;
840pub mod run;
841pub mod to;
842pub mod transform;
843
844/// The entry point of the command-line interface.
845pub fn cli_main<I, T>(args: I) -> Result<()>
846where
847    I: IntoIterator<Item = T>,
848    T: Into<std::ffi::OsString> + Clone,
849{
850    let start = std::time::Instant::now();
851    let cli = Cli::parse_from(args);
852    match cli.command {
853        SubCommands::Analyze(args) => {
854            analyze::main(cli.args, args)?;
855        }
856        SubCommands::Bench(args) => {
857            bench::main(cli.args, args)?;
858        }
859        SubCommands::Build(args) => {
860            build::main(cli.args, args, Cli::command())?;
861        }
862        SubCommands::Check(args) => {
863            check::main(cli.args, args)?;
864        }
865        SubCommands::From(args) => {
866            from::main(cli.args, args)?;
867        }
868        SubCommands::Perm(args) => {
869            perm::main(cli.args, args)?;
870        }
871        SubCommands::Run(args) => {
872            run::main(cli.args, args)?;
873        }
874        SubCommands::To(args) => {
875            to::main(cli.args, args)?;
876        }
877        SubCommands::Transform(args) => {
878            transform::main(cli.args, args)?;
879        }
880    }
881
882    log::info!(
883        "The command took {}",
884        pretty_print_elapsed(start.elapsed().as_secs_f64())
885    );
886
887    Ok(())
888}
889
890/// Pretty-prints seconds in a human-readable format.
891fn pretty_print_elapsed(elapsed: f64) -> String {
892    let mut result = String::new();
893    let mut elapsed_seconds = elapsed as u64;
894    let weeks = elapsed_seconds / (60 * 60 * 24 * 7);
895    elapsed_seconds %= 60 * 60 * 24 * 7;
896    let days = elapsed_seconds / (60 * 60 * 24);
897    elapsed_seconds %= 60 * 60 * 24;
898    let hours = elapsed_seconds / (60 * 60);
899    elapsed_seconds %= 60 * 60;
900    let minutes = elapsed_seconds / 60;
901    //elapsed_seconds %= 60;
902
903    match weeks {
904        0 => {}
905        1 => result.push_str("1 week "),
906        _ => result.push_str(&format!("{} weeks ", weeks)),
907    }
908    match days {
909        0 => {}
910        1 => result.push_str("1 day "),
911        _ => result.push_str(&format!("{} days ", days)),
912    }
913    match hours {
914        0 => {}
915        1 => result.push_str("1 hour "),
916        _ => result.push_str(&format!("{} hours ", hours)),
917    }
918    match minutes {
919        0 => {}
920        1 => result.push_str("1 minute "),
921        _ => result.push_str(&format!("{} minutes ", minutes)),
922    }
923
924    result.push_str(&format!("{:.3} seconds ({}s)", elapsed % 60.0, elapsed));
925    result
926}
927
928#[cfg(test)]
929mod tests {
930    use super::*;
931
932    mod float_vector_format {
933        use super::*;
934
935        #[test]
936        fn test_ascii_f64() {
937            let dir = tempfile::tempdir().unwrap();
938            let path = dir.path().join("test.txt");
939            let values: Vec<f64> = vec![1.5, 2.75, 3.0];
940            FloatVectorFormat::Ascii
941                .store(&path, &values, None)
942                .unwrap();
943            let content = std::fs::read_to_string(&path).unwrap();
944            // Default precision is f64::DIGITS (15)
945            for (line, expected) in content.lines().zip(&values) {
946                let parsed: f64 = line.trim().parse().unwrap();
947                assert!((parsed - expected).abs() < 1e-10);
948            }
949            assert_eq!(content.lines().count(), 3);
950        }
951
952        #[test]
953        fn test_ascii_f32() {
954            let dir = tempfile::tempdir().unwrap();
955            let path = dir.path().join("test.txt");
956            let values: Vec<f32> = vec![1.5, 2.75, 3.0];
957            FloatVectorFormat::Ascii
958                .store(&path, &values, None)
959                .unwrap();
960            let content = std::fs::read_to_string(&path).unwrap();
961            for (line, expected) in content.lines().zip(&values) {
962                let parsed: f32 = line.trim().parse().unwrap();
963                assert!((parsed - expected).abs() < 1e-6);
964            }
965        }
966
967        #[test]
968        fn test_ascii_with_precision() {
969            let dir = tempfile::tempdir().unwrap();
970            let path = dir.path().join("test.txt");
971            let values: Vec<f64> = vec![1.123456789, 2.987654321];
972            FloatVectorFormat::Ascii
973                .store(&path, &values, Some(3))
974                .unwrap();
975            let content = std::fs::read_to_string(&path).unwrap();
976            let lines: Vec<&str> = content.lines().collect();
977            assert_eq!(lines[0], "1.123");
978            assert_eq!(lines[1], "2.988");
979        }
980
981        #[test]
982        fn test_json_f64() {
983            let dir = tempfile::tempdir().unwrap();
984            let path = dir.path().join("test.json");
985            let values: Vec<f64> = vec![1.5, 2.75, 3.0];
986            FloatVectorFormat::Json.store(&path, &values, None).unwrap();
987            let content = std::fs::read_to_string(&path).unwrap();
988            let parsed: Vec<f64> = serde_json::from_str(&content).unwrap();
989            assert_eq!(parsed, values);
990        }
991
992        #[test]
993        fn test_json_with_precision() {
994            let dir = tempfile::tempdir().unwrap();
995            let path = dir.path().join("test.json");
996            let values: Vec<f64> = vec![1.123456789, 2.987654321];
997            FloatVectorFormat::Json
998                .store(&path, &values, Some(2))
999                .unwrap();
1000            let content = std::fs::read_to_string(&path).unwrap();
1001            assert_eq!(content, "[1.12, 2.99]");
1002        }
1003
1004        #[test]
1005        fn test_json_empty() {
1006            let dir = tempfile::tempdir().unwrap();
1007            let path = dir.path().join("test.json");
1008            let values: Vec<f64> = vec![];
1009            FloatVectorFormat::Json.store(&path, &values, None).unwrap();
1010            let content = std::fs::read_to_string(&path).unwrap();
1011            assert_eq!(content, "[]");
1012        }
1013
1014        #[test]
1015        fn test_json_single_element() {
1016            let dir = tempfile::tempdir().unwrap();
1017            let path = dir.path().join("test.json");
1018            let values: Vec<f64> = vec![42.0];
1019            FloatVectorFormat::Json.store(&path, &values, None).unwrap();
1020            let content = std::fs::read_to_string(&path).unwrap();
1021            let parsed: Vec<f64> = serde_json::from_str(&content).unwrap();
1022            assert_eq!(parsed, values);
1023        }
1024
1025        #[test]
1026        fn test_java_f64() {
1027            let dir = tempfile::tempdir().unwrap();
1028            let path = dir.path().join("test.bin");
1029            let values: Vec<f64> = vec![1.5, 2.75, 3.0];
1030            FloatVectorFormat::Java.store(&path, &values, None).unwrap();
1031            let bytes = std::fs::read(&path).unwrap();
1032            assert_eq!(bytes.len(), 3 * 8);
1033            for (i, expected) in values.iter().enumerate() {
1034                let chunk: [u8; 8] = bytes[i * 8..(i + 1) * 8].try_into().unwrap();
1035                let val = f64::from_be_bytes(chunk);
1036                assert_eq!(val, *expected);
1037            }
1038        }
1039
1040        #[test]
1041        fn test_java_f32() {
1042            let dir = tempfile::tempdir().unwrap();
1043            let path = dir.path().join("test.bin");
1044            let values: Vec<f32> = vec![1.5, 2.75, 3.0];
1045            FloatVectorFormat::Java.store(&path, &values, None).unwrap();
1046            let bytes = std::fs::read(&path).unwrap();
1047            assert_eq!(bytes.len(), 3 * 4);
1048            for (i, expected) in values.iter().enumerate() {
1049                let chunk: [u8; 4] = bytes[i * 4..(i + 1) * 4].try_into().unwrap();
1050                let val = f32::from_be_bytes(chunk);
1051                assert_eq!(val, *expected);
1052            }
1053        }
1054
1055        #[test]
1056        fn test_epserde_f64() {
1057            let dir = tempfile::tempdir().unwrap();
1058            let path = dir.path().join("test.bin");
1059            let values: Vec<f64> = vec![1.5, 2.75, 3.0];
1060            FloatVectorFormat::Epserde
1061                .store(&path, &values, None)
1062                .unwrap();
1063            // Just verify the file was created and is non-empty
1064            let metadata = std::fs::metadata(&path).unwrap();
1065            assert!(metadata.len() > 0);
1066        }
1067
1068        #[test]
1069        fn test_ascii_empty() {
1070            let dir = tempfile::tempdir().unwrap();
1071            let path = dir.path().join("test.txt");
1072            let values: Vec<f64> = vec![];
1073            FloatVectorFormat::Ascii
1074                .store(&path, &values, None)
1075                .unwrap();
1076            let content = std::fs::read_to_string(&path).unwrap();
1077            assert!(content.is_empty());
1078        }
1079
1080        #[test]
1081        fn test_creates_parent_dirs() {
1082            let dir = tempfile::tempdir().unwrap();
1083            let path = dir.path().join("a").join("b").join("test.txt");
1084            let values: Vec<f64> = vec![1.0];
1085            FloatVectorFormat::Ascii
1086                .store(&path, &values, None)
1087                .unwrap();
1088            assert!(path.exists());
1089        }
1090
1091        #[test]
1092        fn test_roundtrip_ascii_f64() {
1093            let dir = tempfile::tempdir().unwrap();
1094            let path = dir.path().join("test.txt");
1095            let values: Vec<f64> = vec![1.5, 2.75, 3.0, 0.0, -1.25];
1096            FloatVectorFormat::Ascii
1097                .store(&path, &values, None)
1098                .unwrap();
1099            let loaded: Vec<f64> = FloatVectorFormat::Ascii.load(&path).unwrap();
1100            assert_eq!(loaded, values);
1101        }
1102
1103        #[test]
1104        fn test_roundtrip_json_f64() {
1105            let dir = tempfile::tempdir().unwrap();
1106            let path = dir.path().join("test.json");
1107            let values: Vec<f64> = vec![1.5, 2.75, 3.0, 0.0, -1.25];
1108            FloatVectorFormat::Json.store(&path, &values, None).unwrap();
1109            let loaded: Vec<f64> = FloatVectorFormat::Json.load(&path).unwrap();
1110            assert_eq!(loaded, values);
1111        }
1112
1113        #[test]
1114        fn test_roundtrip_java_f64() {
1115            let dir = tempfile::tempdir().unwrap();
1116            let path = dir.path().join("test.bin");
1117            let values: Vec<f64> = vec![1.5, 2.75, 3.0, 0.0, -1.25];
1118            FloatVectorFormat::Java.store(&path, &values, None).unwrap();
1119            let loaded: Vec<f64> = FloatVectorFormat::Java.load(&path).unwrap();
1120            assert_eq!(loaded, values);
1121        }
1122
1123        #[test]
1124        fn test_roundtrip_epserde_f64() {
1125            let dir = tempfile::tempdir().unwrap();
1126            let path = dir.path().join("test.bin");
1127            let values: Vec<f64> = vec![1.5, 2.75, 3.0, 0.0, -1.25];
1128            FloatVectorFormat::Epserde
1129                .store(&path, &values, None)
1130                .unwrap();
1131            let loaded: Vec<f64> = FloatVectorFormat::Epserde.load(&path).unwrap();
1132            assert_eq!(loaded, values);
1133        }
1134
1135        #[test]
1136        fn test_roundtrip_empty() {
1137            let dir = tempfile::tempdir().unwrap();
1138            for (fmt, ext) in [
1139                (FloatVectorFormat::Ascii, "txt"),
1140                (FloatVectorFormat::Json, "json"),
1141                (FloatVectorFormat::Java, "bin"),
1142                (FloatVectorFormat::Epserde, "eps"),
1143            ] {
1144                let path = dir.path().join(format!("empty.{ext}"));
1145                let values: Vec<f64> = vec![];
1146                fmt.store(&path, &values, None).unwrap();
1147                let loaded: Vec<f64> = fmt.load(&path).unwrap();
1148                assert_eq!(loaded, values, "roundtrip failed for {ext}");
1149            }
1150        }
1151
1152        #[test]
1153        fn test_roundtrip_f32() {
1154            let dir = tempfile::tempdir().unwrap();
1155            let values: Vec<f32> = vec![1.5, 2.75, 3.0, 0.0, -1.25];
1156            for (fmt, ext) in [
1157                (FloatVectorFormat::Ascii, "txt"),
1158                (FloatVectorFormat::Json, "json"),
1159                (FloatVectorFormat::Java, "bin"),
1160                (FloatVectorFormat::Epserde, "eps"),
1161            ] {
1162                let path = dir.path().join(format!("f32.{ext}"));
1163                fmt.store(&path, &values, None).unwrap();
1164                let loaded: Vec<f32> = fmt.load(&path).unwrap();
1165                assert_eq!(loaded, values, "f32 roundtrip failed for {ext}");
1166            }
1167        }
1168    }
1169
1170    mod int_vector_format {
1171        use super::*;
1172
1173        #[test]
1174        fn test_ascii() {
1175            let dir = tempfile::tempdir().unwrap();
1176            let path = dir.path().join("test.txt");
1177            let data: Vec<u64> = vec![10, 20, 30];
1178            IntVectorFormat::Ascii.store(&path, &data, None).unwrap();
1179            let content = std::fs::read_to_string(&path).unwrap();
1180            let lines: Vec<u64> = content.lines().map(|l| l.trim().parse().unwrap()).collect();
1181            assert_eq!(lines, data);
1182        }
1183
1184        #[test]
1185        fn test_ascii_empty() {
1186            let dir = tempfile::tempdir().unwrap();
1187            let path = dir.path().join("test.txt");
1188            let data: Vec<u64> = vec![];
1189            IntVectorFormat::Ascii.store(&path, &data, None).unwrap();
1190            let content = std::fs::read_to_string(&path).unwrap();
1191            assert!(content.is_empty());
1192        }
1193
1194        #[test]
1195        fn test_json() {
1196            let dir = tempfile::tempdir().unwrap();
1197            let path = dir.path().join("test.json");
1198            let data: Vec<u64> = vec![10, 20, 30];
1199            IntVectorFormat::Json.store(&path, &data, None).unwrap();
1200            let content = std::fs::read_to_string(&path).unwrap();
1201            let parsed: Vec<u64> = serde_json::from_str(&content).unwrap();
1202            assert_eq!(parsed, data);
1203        }
1204
1205        #[test]
1206        fn test_json_empty() {
1207            let dir = tempfile::tempdir().unwrap();
1208            let path = dir.path().join("test.json");
1209            let data: Vec<u64> = vec![];
1210            IntVectorFormat::Json.store(&path, &data, None).unwrap();
1211            let content = std::fs::read_to_string(&path).unwrap();
1212            assert_eq!(content, "[]");
1213        }
1214
1215        #[test]
1216        fn test_json_single_element() {
1217            let dir = tempfile::tempdir().unwrap();
1218            let path = dir.path().join("test.json");
1219            let data: Vec<u64> = vec![42];
1220            IntVectorFormat::Json.store(&path, &data, None).unwrap();
1221            let content = std::fs::read_to_string(&path).unwrap();
1222            let parsed: Vec<u64> = serde_json::from_str(&content).unwrap();
1223            assert_eq!(parsed, data);
1224        }
1225
1226        #[test]
1227        fn test_java() {
1228            let dir = tempfile::tempdir().unwrap();
1229            let path = dir.path().join("test.bin");
1230            let data: Vec<u64> = vec![1, 256, 65535];
1231            IntVectorFormat::Java.store(&path, &data, None).unwrap();
1232            let bytes = std::fs::read(&path).unwrap();
1233            assert_eq!(bytes.len(), 3 * 8);
1234            for (i, expected) in data.iter().enumerate() {
1235                let chunk: [u8; 8] = bytes[i * 8..(i + 1) * 8].try_into().unwrap();
1236                let val = u64::from_be_bytes(chunk);
1237                assert_eq!(val, *expected);
1238            }
1239        }
1240
1241        #[test]
1242        fn test_java_empty() {
1243            let dir = tempfile::tempdir().unwrap();
1244            let path = dir.path().join("test.bin");
1245            let data: Vec<u64> = vec![];
1246            IntVectorFormat::Java.store(&path, &data, None).unwrap();
1247            let bytes = std::fs::read(&path).unwrap();
1248            assert!(bytes.is_empty());
1249        }
1250
1251        #[test]
1252        fn test_epserde() {
1253            let dir = tempfile::tempdir().unwrap();
1254            let path = dir.path().join("test.bin");
1255            let data: Vec<u64> = vec![10, 20, 30];
1256            IntVectorFormat::Epserde.store(&path, &data, None).unwrap();
1257            let metadata = std::fs::metadata(&path).unwrap();
1258            assert!(metadata.len() > 0);
1259        }
1260
1261        #[test]
1262        fn test_bitfieldvec() {
1263            let dir = tempfile::tempdir().unwrap();
1264            let path = dir.path().join("test.bin");
1265            let data: Vec<u64> = vec![1, 3, 7, 15];
1266            IntVectorFormat::BitFieldVec
1267                .store(&path, &data, Some(15))
1268                .unwrap();
1269            let metadata = std::fs::metadata(&path).unwrap();
1270            assert!(metadata.len() > 0);
1271        }
1272
1273        #[test]
1274        fn test_bitfieldvec_max_computed() {
1275            let dir = tempfile::tempdir().unwrap();
1276            let path = dir.path().join("test.bin");
1277            let data: Vec<u64> = vec![1, 3, 7, 15];
1278            // max is None, so it should be computed from data
1279            IntVectorFormat::BitFieldVec
1280                .store(&path, &data, None)
1281                .unwrap();
1282            assert!(path.exists());
1283        }
1284
1285        #[test]
1286        fn test_creates_parent_dirs() {
1287            let dir = tempfile::tempdir().unwrap();
1288            let path = dir.path().join("a").join("b").join("test.txt");
1289            let data: Vec<u64> = vec![1];
1290            IntVectorFormat::Ascii.store(&path, &data, None).unwrap();
1291            assert!(path.exists());
1292        }
1293
1294        #[cfg(target_pointer_width = "64")]
1295        #[test]
1296        fn test_store_usizes() {
1297            let dir = tempfile::tempdir().unwrap();
1298            let path = dir.path().join("test.txt");
1299            let data: Vec<usize> = vec![10, 20, 30];
1300            IntVectorFormat::Ascii
1301                .store_usizes(&path, &data, None)
1302                .unwrap();
1303            let content = std::fs::read_to_string(&path).unwrap();
1304            let lines: Vec<usize> = content.lines().map(|l| l.trim().parse().unwrap()).collect();
1305            assert_eq!(lines, data);
1306        }
1307
1308        #[cfg(target_pointer_width = "64")]
1309        #[test]
1310        fn test_store_usizes_java() {
1311            let dir = tempfile::tempdir().unwrap();
1312            let path = dir.path().join("test.bin");
1313            let data: Vec<usize> = vec![1, 256, 65535];
1314            IntVectorFormat::Java
1315                .store_usizes(&path, &data, None)
1316                .unwrap();
1317            let bytes = std::fs::read(&path).unwrap();
1318            assert_eq!(bytes.len(), 3 * 8);
1319            for (i, expected) in data.iter().enumerate() {
1320                let chunk: [u8; 8] = bytes[i * 8..(i + 1) * 8].try_into().unwrap();
1321                let val = u64::from_be_bytes(chunk) as usize;
1322                assert_eq!(val, *expected);
1323            }
1324        }
1325    }
1326}
webgraph_cli/lib.rs

webgraph_cli/
lib.rs