Skip to main content

webgraph_cli/to/
bvgraph.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use crate::create_parent_dir;
9use crate::*;
10use anyhow::Result;
11use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
12use dsi_bitstream::prelude::*;
13
14use mmap_rs::MmapFlags;
15use std::path::PathBuf;
16use tempfile::Builder;
17use webgraph::prelude::*;
18
19#[derive(Parser, Debug)]
20#[command(name = "bvgraph", about = "Recompresses a BvGraph, possibly applying a permutation to its node identifiers.", long_about = None)]
21pub struct CliArgs {
22    /// The basename of the source graph.
23    pub src: PathBuf,
24    /// The basename of the destination graph.
25    pub dst: PathBuf,
26
27    #[clap(flatten)]
28    pub num_threads: NumThreadsArg,
29
30    #[arg(long)]
31    /// The path to an optional permutation in binary big-endian format to be applied to the graph.
32    pub permutation: Option<PathBuf>,
33
34    #[clap(flatten)]
35    pub memory_usage: MemoryUsageArg,
36
37    #[clap(flatten)]
38    pub ca: CompressArgs,
39}
40
41pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
42    create_parent_dir(&args.dst)?;
43
44    let permutation = if let Some(path) = args.permutation.as_ref() {
45        Some(JavaPermutation::mmap(path, MmapFlags::RANDOM_ACCESS)?)
46    } else {
47        None
48    };
49
50    let target_endianness = args.ca.endianness.clone();
51    match get_endianness(&args.src)?.as_str() {
52        #[cfg(feature = "be_bins")]
53        BE::NAME => compress::<BE>(global_args, args, target_endianness, permutation)?,
54        #[cfg(feature = "le_bins")]
55        LE::NAME => compress::<LE>(global_args, args, target_endianness, permutation)?,
56        e => panic!("Unknown endianness: {}", e),
57    };
58    Ok(())
59}
60
61pub fn compress<E: Endianness>(
62    _global_args: GlobalArgs,
63    args: CliArgs,
64    target_endianness: Option<String>,
65    permutation: Option<JavaPermutation>,
66) -> Result<()>
67where
68    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
69    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek + Send + Sync + Clone,
70{
71    let dir = Builder::new().prefix("to_bvgraph_").tempdir()?;
72
73    let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
74    let chunk_size = args.ca.chunk_size;
75    let bvgraphz = args.ca.bvgraphz;
76    let mut builder = BvCompConfig::new(&args.dst)
77        .with_comp_flags(args.ca.into())
78        .with_tmp_dir(&dir);
79
80    if bvgraphz {
81        builder = builder.with_chunk_size(chunk_size);
82    }
83
84    if args.src.with_extension(EF_EXTENSION).exists() {
85        let graph = BvGraph::with_basename(&args.src).endianness::<E>().load()?;
86
87        if let Some(permutation) = permutation {
88            let memory_usage = args.memory_usage.memory_usage;
89            thread_pool.install(|| {
90                log::info!("Permuting graph with memory usage {}", memory_usage);
91                let start = std::time::Instant::now();
92                let sorted =
93                    webgraph::transform::permute_split(&graph, &permutation, memory_usage)?;
94                log::info!(
95                    "Permuted the graph. It took {:.3} seconds",
96                    start.elapsed().as_secs_f64()
97                );
98                builder.par_comp_lenders_endianness(
99                    &sorted,
100                    sorted.num_nodes(),
101                    &target_endianness.unwrap_or_else(|| BE::NAME.into()),
102                )
103            })?;
104        } else {
105            thread_pool.install(|| {
106                builder.par_comp_lenders_endianness(
107                    &graph,
108                    graph.num_nodes(),
109                    &target_endianness.unwrap_or_else(|| BE::NAME.into()),
110                )
111            })?;
112        }
113    } else {
114        log::warn!(
115            "The .ef file does not exist. The graph will be read sequentially which will result in slower compression. If you can, run `webgraph build ef` before recompressing."
116        );
117        let seq_graph = BvGraphSeq::with_basename(&args.src)
118            .endianness::<E>()
119            .load()?;
120
121        if let Some(permutation) = permutation {
122            let memory_usage = args.memory_usage.memory_usage;
123
124            log::info!("Permuting graph with memory usage {}", memory_usage);
125            let start = std::time::Instant::now();
126            let permuted = webgraph::transform::permute(&seq_graph, &permutation, memory_usage)?;
127            log::info!(
128                "Permuted the graph. It took {:.3} seconds",
129                start.elapsed().as_secs_f64()
130            );
131
132            thread_pool.install(|| {
133                builder.par_comp_lenders_endianness(
134                    &permuted,
135                    permuted.num_nodes(),
136                    &target_endianness.unwrap_or_else(|| BE::NAME.into()),
137                )
138            })?;
139        } else {
140            thread_pool.install(|| {
141                builder.par_comp_lenders_endianness(
142                    &seq_graph,
143                    seq_graph.num_nodes(),
144                    &target_endianness.unwrap_or_else(|| BE::NAME.into()),
145                )
146            })?;
147        }
148    }
149    Ok(())
150}