webgraph_cli/transform/
simplify.rs1use crate::*;
9use anyhow::Result;
10use dsi_bitstream::{dispatch::factory::CodesReaderFactoryHelper, prelude::*};
11use mmap_rs::MmapFlags;
12use std::path::PathBuf;
13use tempfile::Builder;
14use webgraph::graphs::union_graph::UnionGraph;
15use webgraph::prelude::*;
16
17#[derive(Parser, Debug)]
18#[command(name = "simplify", about = "Makes a BvGraph simple (undirected and loopless) by adding missing arcs and removing loops, optionally applying a permutation.", long_about = None)]
19pub struct CliArgs {
20 pub src: PathBuf,
22 pub dst: PathBuf,
24
25 #[arg(long)]
26 pub transposed: Option<PathBuf>,
29
30 #[clap(flatten)]
31 pub num_threads: NumThreadsArg,
32
33 #[clap(flatten)]
34 pub memory_usage: MemoryUsageArg,
35
36 #[clap(flatten)]
37 pub ca: CompressArgs,
38
39 #[arg(long)]
40 pub permutation: Option<PathBuf>,
42}
43
44pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
45 create_parent_dir(&args.dst)?;
46
47 match get_endianness(&args.src)?.as_str() {
48 #[cfg(feature = "be_bins")]
49 BE::NAME => simplify::<BE>(global_args, args),
50 #[cfg(feature = "le_bins")]
51 LE::NAME => simplify::<LE>(global_args, args),
52 e => panic!("Unknown endianness: {}", e),
53 }
54}
55
56fn no_ef_warn(basepath: impl AsRef<std::path::Path>) {
57 log::warn!(SEQ_PROC_WARN![], basepath.as_ref().display());
58}
59
60pub fn simplify<E: Endianness>(_global_args: GlobalArgs, args: CliArgs) -> Result<()>
61where
62 MmapHelper<u32>: CodesReaderFactoryHelper<E>,
63 for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek + Clone + Send + Sync,
64{
65 let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
67
68 let target_endianness = args.ca.endianness.clone().unwrap_or_else(|| E::NAME.into());
69
70 let dir = Builder::new().prefix("transform_simplify_").tempdir()?;
71 let chunk_size = args.ca.chunk_size;
72 let bvgraphz = args.ca.bvgraphz;
73 let mut builder = BvCompConfig::new(&args.dst)
74 .with_comp_flags(args.ca.into())
75 .with_tmp_dir(&dir);
76
77 if bvgraphz {
78 builder = builder.with_chunk_size(chunk_size);
79 }
80
81 match (args.permutation, args.transposed) {
82 (None, Some(t_path)) => {
85 log::info!("Transposed graph provided, using it to simplify the graph");
86
87 let has_ef_graph =
88 std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file());
89 let has_ef_t_graph =
90 std::fs::metadata(t_path.with_extension("ef")).is_ok_and(|x| x.is_file());
91
92 match (has_ef_graph, has_ef_t_graph) {
93 (true, true) => {
94 log::info!("Both .ef files found, using simplify split");
95
96 let graph =
97 webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
98 .endianness::<E>()
99 .load()?;
100 let num_nodes = graph.num_nodes();
101 let graph_t =
102 webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&t_path)
103 .endianness::<E>()
104 .load()?;
105
106 if graph_t.num_nodes() != num_nodes {
107 anyhow::bail!(
108 "The number of nodes in the graph and its transpose do not match! {} != {}",
109 num_nodes,
110 graph_t.num_nodes()
111 );
112 }
113
114 let sorted = NoSelfLoopsGraph(UnionGraph(graph, graph_t));
115
116 thread_pool.install(|| {
117 builder.par_comp_lenders_endianness(
118 &sorted,
119 sorted.num_nodes(),
120 &target_endianness,
121 )
122 })?;
123
124 return Ok(());
125 }
126 (true, false) => {
127 no_ef_warn(&t_path);
128 }
129 (false, true) => {
130 no_ef_warn(&args.src);
131 }
132 (false, false) => {
133 no_ef_warn(&args.src);
134 no_ef_warn(&t_path);
135 }
136 }
137
138 let seq_graph =
139 webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
140 .endianness::<E>()
141 .load()?;
142 let num_nodes = seq_graph.num_nodes();
143 let seq_graph_t =
144 webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&t_path)
145 .endianness::<E>()
146 .load()?;
147
148 if seq_graph_t.num_nodes() != num_nodes {
149 anyhow::bail!(
150 "The number of nodes in the graph and its transpose do not match! {} != {}",
151 num_nodes,
152 seq_graph_t.num_nodes()
153 );
154 }
155
156 let sorted = NoSelfLoopsGraph(UnionGraph(seq_graph, seq_graph_t));
157
158 thread_pool.install(|| {
159 builder.par_comp_lenders_endianness(&sorted, sorted.num_nodes(), &target_endianness)
160 })?;
161 }
162 (Some(perm_path), None | Some(_)) => {
165 log::info!("Permutation provided, applying it to the graph");
166
167 let perm = JavaPermutation::mmap(perm_path, MmapFlags::RANDOM_ACCESS)?;
168
169 if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
171 log::info!(".ef file found, using simplify split");
172 let graph =
173 webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
174 .endianness::<E>()
175 .load()?;
176
177 let perm_graph = PermutedGraph {
178 graph: &graph,
179 perm: &perm,
180 };
181
182 thread_pool.install(|| {
183 let sorted = webgraph::transform::simplify_split(
184 &perm_graph,
185 args.memory_usage.memory_usage,
186 )?;
187
188 builder.par_comp_lenders_endianness(
189 &sorted,
190 sorted.num_nodes(),
191 &target_endianness,
192 )
193 })?;
194
195 return Ok(());
196 }
197
198 no_ef_warn(&args.src);
199
200 let seq_graph =
201 webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
202 .endianness::<E>()
203 .load()?;
204
205 let perm_graph = PermutedGraph {
206 graph: &seq_graph,
207 perm: &perm,
208 };
209
210 let sorted =
212 webgraph::transform::simplify(&perm_graph, args.memory_usage.memory_usage)?;
213
214 thread_pool.install(|| {
215 builder.par_comp_lenders_endianness(&sorted, sorted.num_nodes(), &target_endianness)
216 })?;
217 }
218 (None, None) => {
220 log::info!(
221 "No permutation or transposed graph provided, computing the transpose on the fly"
222 );
223 if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
225 log::info!(".ef file found, using simplify split");
226
227 let graph =
228 webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
229 .endianness::<E>()
230 .load()?;
231
232 thread_pool.install(|| {
233 let sorted = webgraph::transform::simplify_split(
234 &graph,
235 args.memory_usage.memory_usage,
236 )?;
237
238 builder.par_comp_lenders_endianness(
239 &sorted,
240 sorted.num_nodes(),
241 &target_endianness,
242 )
243 })?;
244
245 return Ok(());
246 }
247
248 no_ef_warn(&args.src);
249
250 let seq_graph =
251 webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
252 .endianness::<E>()
253 .load()?;
254
255 let sorted =
257 webgraph::transform::simplify_sorted(seq_graph, args.memory_usage.memory_usage)?;
258
259 thread_pool.install(|| {
260 builder.par_comp_lenders_endianness(&sorted, sorted.num_nodes(), &target_endianness)
261 })?;
262 }
263 }
264
265 Ok(())
266}