webgraph_cli/transform/
transpose.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use crate::*;
9use anyhow::Result;
10use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
11use dsi_bitstream::prelude::*;
12use std::path::PathBuf;
13use tempfile::Builder;
14use webgraph::prelude::*;
15
16#[derive(Parser, Debug)]
17#[command(name = "transpose", about = "Transposes a BvGraph.", long_about = None)]
18pub struct CliArgs {
19    /// The basename of the graph.
20    pub src: PathBuf,
21    /// The basename of the transposed graph.
22    pub dst: PathBuf,
23
24    #[clap(flatten)]
25    pub num_threads: NumThreadsArg,
26
27    #[clap(flatten)]
28    pub batch_size: BatchSizeArg,
29
30    #[clap(flatten)]
31    pub ca: CompressArgs,
32}
33
34pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
35    create_parent_dir(&args.dst)?;
36
37    match get_endianness(&args.src)?.as_str() {
38        #[cfg(feature = "be_bins")]
39        BE::NAME => transpose::<BE>(global_args, args),
40        #[cfg(feature = "le_bins")]
41        LE::NAME => transpose::<LE>(global_args, args),
42        e => panic!("Unknown endianness: {}", e),
43    }
44}
45
46pub fn transpose<E: Endianness>(_global_args: GlobalArgs, args: CliArgs) -> Result<()>
47where
48    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
49{
50    let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
51
52    // TODO!: speed it up by using random access graph if possible
53    let seq_graph = webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
54        .endianness::<E>()
55        .load()?;
56
57    // transpose the graph
58    let sorted = webgraph::transform::transpose(&seq_graph, args.batch_size.batch_size).unwrap();
59
60    let target_endianness = args.ca.endianness.clone();
61    let dir = Builder::new().prefix("transform_transpose_").tempdir()?;
62    BvComp::parallel_endianness(
63        &args.dst,
64        &sorted,
65        sorted.num_nodes(),
66        args.ca.into(),
67        &thread_pool,
68        dir,
69        &target_endianness.unwrap_or_else(|| E::NAME.into()),
70    )?;
71
72    Ok(())
73}