webgraph_cli/to/
bvgraph.rs1use crate::create_parent_dir;
9use crate::*;
10use anyhow::Result;
11use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
12use dsi_bitstream::prelude::*;
13
14use mmap_rs::MmapFlags;
15use std::path::PathBuf;
16use tempfile::Builder;
17use webgraph::prelude::*;
18
19#[derive(Parser, Debug)]
20#[command(name = "bvgraph", about = "Recompresses a BvGraph, possibly applying a permutation to its node identifiers.", long_about = None)]
21pub struct CliArgs {
22 pub src: PathBuf,
24 pub dst: PathBuf,
26
27 #[clap(flatten)]
28 pub num_threads: NumThreadsArg,
29
30 #[arg(long)]
31 pub permutation: Option<PathBuf>,
33
34 #[clap(flatten)]
35 pub memory_usage: MemoryUsageArg,
36
37 #[clap(flatten)]
38 pub ca: CompressArgs,
39}
40
41pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
42 create_parent_dir(&args.dst)?;
43
44 let permutation = if let Some(path) = args.permutation.as_ref() {
45 Some(JavaPermutation::mmap(path, MmapFlags::RANDOM_ACCESS)?)
46 } else {
47 None
48 };
49
50 let target_endianness = args.ca.endianness.clone();
51 match get_endianness(&args.src)?.as_str() {
52 #[cfg(feature = "be_bins")]
53 BE::NAME => compress::<BE>(global_args, args, target_endianness, permutation)?,
54 #[cfg(feature = "le_bins")]
55 LE::NAME => compress::<LE>(global_args, args, target_endianness, permutation)?,
56 e => panic!("Unknown endianness: {}", e),
57 };
58 Ok(())
59}
60
61pub fn compress<E: Endianness>(
62 _global_args: GlobalArgs,
63 args: CliArgs,
64 target_endianness: Option<String>,
65 permutation: Option<JavaPermutation>,
66) -> Result<()>
67where
68 MmapHelper<u32>: CodesReaderFactoryHelper<E>,
69 for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek + Send + Sync + Clone,
70{
71 let dir = Builder::new().prefix("to_bvgraph_").tempdir()?;
72
73 let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
74 let chunk_size = args.ca.chunk_size;
75 let bvgraphz = args.ca.bvgraphz;
76 let mut builder = BvCompConfig::new(&args.dst)
77 .with_comp_flags(args.ca.into())
78 .with_tmp_dir(&dir);
79
80 if bvgraphz {
81 builder = builder.with_chunk_size(chunk_size);
82 }
83
84 if args.src.with_extension(EF_EXTENSION).exists() {
85 let graph = BvGraph::with_basename(&args.src).endianness::<E>().load()?;
86
87 if let Some(permutation) = permutation {
88 let memory_usage = args.memory_usage.memory_usage;
89 thread_pool.install(|| {
90 log::info!("Permuting graph with memory usage {}", memory_usage);
91 let start = std::time::Instant::now();
92 let sorted =
93 webgraph::transform::permute_split(&graph, &permutation, memory_usage)?;
94 log::info!(
95 "Permuted the graph. It took {:.3} seconds",
96 start.elapsed().as_secs_f64()
97 );
98 builder.par_comp_lenders_endianness(
99 &sorted,
100 sorted.num_nodes(),
101 &target_endianness.unwrap_or_else(|| BE::NAME.into()),
102 )
103 })?;
104 } else {
105 thread_pool.install(|| {
106 builder.par_comp_lenders_endianness(
107 &graph,
108 graph.num_nodes(),
109 &target_endianness.unwrap_or_else(|| BE::NAME.into()),
110 )
111 })?;
112 }
113 } else {
114 log::warn!(
115 "The .ef file does not exist. The graph will be read sequentially which will result in slower compression. If you can, run `webgraph build ef` before recompressing."
116 );
117 let seq_graph = BvGraphSeq::with_basename(&args.src)
118 .endianness::<E>()
119 .load()?;
120
121 if let Some(permutation) = permutation {
122 let memory_usage = args.memory_usage.memory_usage;
123
124 log::info!("Permuting graph with memory usage {}", memory_usage);
125 let start = std::time::Instant::now();
126 let permuted = webgraph::transform::permute(&seq_graph, &permutation, memory_usage)?;
127 log::info!(
128 "Permuted the graph. It took {:.3} seconds",
129 start.elapsed().as_secs_f64()
130 );
131
132 thread_pool.install(|| {
133 builder.par_comp_lenders_endianness(
134 &permuted,
135 permuted.num_nodes(),
136 &target_endianness.unwrap_or_else(|| BE::NAME.into()),
137 )
138 })?;
139 } else {
140 thread_pool.install(|| {
141 builder.par_comp_lenders_endianness(
142 &seq_graph,
143 seq_graph.num_nodes(),
144 &target_endianness.unwrap_or_else(|| BE::NAME.into()),
145 )
146 })?;
147 }
148 }
149 Ok(())
150}