1use crate::*;
9use anyhow::Result;
10use dsi_bitstream::{dispatch::factory::CodesReaderFactoryHelper, prelude::*};
11use mmap_rs::MmapFlags;
12use std::path::PathBuf;
13use tempfile::Builder;
14use webgraph::graphs::union_graph::UnionGraph;
15use webgraph::prelude::*;
16
17#[derive(Parser, Debug)]
18#[command(name = "simplify", about = "Makes a BvGraph simple (undirected and loopless) by adding missing arcs and removing loops, optionally applying a permutation.", long_about = None)]
19pub struct CliArgs {
20 pub src: PathBuf,
22 pub dst: PathBuf,
24
25 #[arg(long)]
26 pub transposed: Option<PathBuf>,
29
30 #[clap(flatten)]
31 pub num_threads: NumThreadsArg,
32
33 #[clap(flatten)]
34 pub batch_size: BatchSizeArg,
35
36 #[clap(flatten)]
37 pub ca: CompressArgs,
38
39 #[arg(long)]
40 pub permutation: Option<PathBuf>,
42}
43
44pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
45 create_parent_dir(&args.dst)?;
46
47 match get_endianness(&args.src)?.as_str() {
48 #[cfg(feature = "be_bins")]
49 BE::NAME => simplify::<BE>(global_args, args),
50 #[cfg(feature = "le_bins")]
51 LE::NAME => simplify::<LE>(global_args, args),
52 e => panic!("Unknown endianness: {}", e),
53 }
54}
55
56fn no_ef_warn(basepath: impl AsRef<std::path::Path>) {
57 log::warn!(
58 "The .ef file was not found so the simplification will proceed sequentially. This may be slow. To speed it up, you can use `webgraph build ef {}` which would allow us create batches in parallel",
59 basepath.as_ref().display()
60 );
61}
62
63pub fn simplify<E: Endianness>(_global_args: GlobalArgs, args: CliArgs) -> Result<()>
64where
65 MmapHelper<u32>: CodesReaderFactoryHelper<E>,
66 for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek + Clone + Send + Sync,
67{
68 let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
70
71 let target_endianness = args.ca.endianness.clone().unwrap_or_else(|| E::NAME.into());
72
73 let dir = Builder::new().prefix("transform_simplify_").tempdir()?;
74
75 match (args.permutation, args.transposed) {
76 (None, Some(t_path)) => {
79 log::info!("Transposed graph provided, using it to simplify the graph");
80
81 let has_ef_graph =
82 std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file());
83 let has_ef_t_graph =
84 std::fs::metadata(t_path.with_extension("ef")).is_ok_and(|x| x.is_file());
85
86 match (has_ef_graph, has_ef_t_graph) {
87 (true, true) => {
88 log::info!("Both .ef files found, using simplify split");
89
90 let graph =
91 webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
92 .endianness::<E>()
93 .load()?;
94 let num_nodes = graph.num_nodes();
95 let graph_t =
96 webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&t_path)
97 .endianness::<E>()
98 .load()?;
99
100 if graph_t.num_nodes() != num_nodes {
101 anyhow::bail!(
102 "The number of nodes in the graph and its transpose do not match! {} != {}",
103 num_nodes,
104 graph_t.num_nodes()
105 );
106 }
107
108 let sorted = NoSelfLoopsGraph(UnionGraph(graph, graph_t));
109
110 BvComp::parallel_endianness(
111 &args.dst,
112 &sorted,
113 num_nodes,
114 args.ca.into(),
115 &thread_pool,
116 dir,
117 &target_endianness,
118 )?;
119
120 return Ok(());
121 }
122 (true, false) => {
123 no_ef_warn(&args.src);
124 }
125 (false, true) => {
126 no_ef_warn(&t_path);
127 }
128 (false, false) => {
129 no_ef_warn(&args.src);
130 no_ef_warn(&t_path);
131 }
132 }
133
134 let seq_graph =
135 webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
136 .endianness::<E>()
137 .load()?;
138 let num_nodes = seq_graph.num_nodes();
139 let seq_graph_t =
140 webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&t_path)
141 .endianness::<E>()
142 .load()?;
143
144 if seq_graph_t.num_nodes() != num_nodes {
145 anyhow::bail!(
146 "The number of nodes in the graph and its transpose do not match! {} != {}",
147 num_nodes,
148 seq_graph_t.num_nodes()
149 );
150 }
151
152 let sorted = NoSelfLoopsGraph(UnionGraph(seq_graph, seq_graph_t));
153
154 BvComp::parallel_endianness(
155 &args.dst,
156 &sorted,
157 num_nodes,
158 args.ca.into(),
159 &thread_pool,
160 dir,
161 &target_endianness,
162 )?;
163 }
164 (Some(perm_path), None | Some(_)) => {
167 log::info!("Permutation provided, applying it to the graph");
168
169 let perm = JavaPermutation::mmap(perm_path, MmapFlags::RANDOM_ACCESS)?;
170
171 if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
173 log::info!(".ef file found, using simplify split");
174 let graph =
175 webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
176 .endianness::<E>()
177 .load()?;
178
179 let perm_graph = PermutedGraph {
180 graph: &graph,
181 perm: &perm,
182 };
183
184 let sorted = webgraph::transform::simplify_split(
185 &perm_graph,
186 args.batch_size.batch_size,
187 &thread_pool,
188 )?;
189
190 BvComp::parallel_endianness(
191 &args.dst,
192 &sorted,
193 graph.num_nodes(),
194 args.ca.into(),
195 &thread_pool,
196 dir,
197 &target_endianness,
198 )?;
199
200 return Ok(());
201 }
202
203 no_ef_warn(&args.src);
204
205 let seq_graph =
206 webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
207 .endianness::<E>()
208 .load()?;
209
210 let perm_graph = PermutedGraph {
211 graph: &seq_graph,
212 perm: &perm,
213 };
214
215 let sorted =
217 webgraph::transform::simplify(&perm_graph, args.batch_size.batch_size).unwrap();
218
219 BvComp::parallel_endianness(
220 &args.dst,
221 &sorted,
222 sorted.num_nodes(),
223 args.ca.into(),
224 &thread_pool,
225 dir,
226 &target_endianness,
227 )?;
228 }
229 (None, None) => {
231 log::info!(
232 "No permutation or transposed graph provided, computing the transpose on the fly"
233 );
234 if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
236 log::info!(".ef file found, using simplify split");
237
238 let graph =
239 webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
240 .endianness::<E>()
241 .load()?;
242
243 let sorted = webgraph::transform::simplify_split(
244 &graph,
245 args.batch_size.batch_size,
246 &thread_pool,
247 )?;
248
249 BvComp::parallel_endianness(
250 &args.dst,
251 &sorted,
252 graph.num_nodes(),
253 args.ca.into(),
254 &thread_pool,
255 dir,
256 &target_endianness,
257 )?;
258
259 return Ok(());
260 }
261
262 no_ef_warn(&args.src);
263
264 let seq_graph =
265 webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
266 .endianness::<E>()
267 .load()?;
268
269 let num_nodes = seq_graph.num_nodes();
270 let sorted =
272 webgraph::transform::simplify_sorted(seq_graph, args.batch_size.batch_size)
273 .unwrap();
274
275 BvComp::parallel_endianness(
276 &args.dst,
277 &sorted,
278 num_nodes,
279 args.ca.into(),
280 &thread_pool,
281 dir,
282 &target_endianness,
283 )?;
284 }
285 }
286
287 Ok(())
288}