Skip to main content

webgraph_cli/transform/
simplify.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use crate::*;
9use anyhow::Result;
10use dsi_bitstream::{dispatch::factory::CodesReaderFactoryHelper, prelude::*};
11use mmap_rs::MmapFlags;
12use std::path::PathBuf;
13use tempfile::Builder;
14use webgraph::graphs::union_graph::UnionGraph;
15use webgraph::prelude::*;
16
17#[derive(Parser, Debug)]
18#[command(name = "simplify", about = "Makes a BvGraph simple (undirected and loopless) by adding missing arcs and removing loops, optionally applying a permutation.", long_about = None)]
19pub struct CliArgs {
20    /// The basename of the graph.
21    pub src: PathBuf,
22    /// The basename of the simplified graph.
23    pub dst: PathBuf,
24
25    #[arg(long)]
26    /// The basename of a pre-computed transposed version of the source graph, which
27    /// will be use to speed up the simplification.
28    pub transposed: Option<PathBuf>,
29
30    #[clap(flatten)]
31    pub num_threads: NumThreadsArg,
32
33    #[clap(flatten)]
34    pub memory_usage: MemoryUsageArg,
35
36    #[clap(flatten)]
37    pub ca: CompressArgs,
38
39    #[arg(long)]
40    /// The path to an optional permutation in binary big-endian format to apply to the graph.
41    pub permutation: Option<PathBuf>,
42}
43
44pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
45    create_parent_dir(&args.dst)?;
46
47    match get_endianness(&args.src)?.as_str() {
48        #[cfg(feature = "be_bins")]
49        BE::NAME => simplify::<BE>(global_args, args),
50        #[cfg(feature = "le_bins")]
51        LE::NAME => simplify::<LE>(global_args, args),
52        e => panic!("Unknown endianness: {}", e),
53    }
54}
55
56fn no_ef_warn(basepath: impl AsRef<std::path::Path>) {
57    log::warn!(SEQ_PROC_WARN![], basepath.as_ref().display());
58}
59
60pub fn simplify<E: Endianness>(_global_args: GlobalArgs, args: CliArgs) -> Result<()>
61where
62    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
63    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek + Clone + Send + Sync,
64{
65    // TODO!: speed it up by using random access graph if possible
66    let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
67
68    let target_endianness = args.ca.endianness.clone().unwrap_or_else(|| E::NAME.into());
69
70    let dir = Builder::new().prefix("transform_simplify_").tempdir()?;
71    let chunk_size = args.ca.chunk_size;
72    let bvgraphz = args.ca.bvgraphz;
73    let mut builder = BvCompConfig::new(&args.dst)
74        .with_comp_flags(args.ca.into())
75        .with_tmp_dir(&dir);
76
77    if bvgraphz {
78        builder = builder.with_chunk_size(chunk_size);
79    }
80
81    match (args.permutation, args.transposed) {
82        // load the transposed graph and use it to directly compress the graph
83        // without doing any sorting
84        (None, Some(t_path)) => {
85            log::info!("Transposed graph provided, using it to simplify the graph");
86
87            let has_ef_graph =
88                std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file());
89            let has_ef_t_graph =
90                std::fs::metadata(t_path.with_extension("ef")).is_ok_and(|x| x.is_file());
91
92            match (has_ef_graph, has_ef_t_graph) {
93                (true, true) => {
94                    log::info!("Both .ef files found, using simplify split");
95
96                    let graph =
97                        webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
98                            .endianness::<E>()
99                            .load()?;
100                    let num_nodes = graph.num_nodes();
101                    let graph_t =
102                        webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&t_path)
103                            .endianness::<E>()
104                            .load()?;
105
106                    if graph_t.num_nodes() != num_nodes {
107                        anyhow::bail!(
108                            "The number of nodes in the graph and its transpose do not match! {} != {}",
109                            num_nodes,
110                            graph_t.num_nodes()
111                        );
112                    }
113
114                    let sorted = NoSelfLoopsGraph(UnionGraph(graph, graph_t));
115
116                    thread_pool.install(|| {
117                        builder.par_comp_lenders_endianness(
118                            &sorted,
119                            sorted.num_nodes(),
120                            &target_endianness,
121                        )
122                    })?;
123
124                    return Ok(());
125                }
126                (true, false) => {
127                    no_ef_warn(&t_path);
128                }
129                (false, true) => {
130                    no_ef_warn(&args.src);
131                }
132                (false, false) => {
133                    no_ef_warn(&args.src);
134                    no_ef_warn(&t_path);
135                }
136            }
137
138            let seq_graph =
139                webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
140                    .endianness::<E>()
141                    .load()?;
142            let num_nodes = seq_graph.num_nodes();
143            let seq_graph_t =
144                webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&t_path)
145                    .endianness::<E>()
146                    .load()?;
147
148            if seq_graph_t.num_nodes() != num_nodes {
149                anyhow::bail!(
150                    "The number of nodes in the graph and its transpose do not match! {} != {}",
151                    num_nodes,
152                    seq_graph_t.num_nodes()
153                );
154            }
155
156            let sorted = NoSelfLoopsGraph(UnionGraph(seq_graph, seq_graph_t));
157
158            thread_pool.install(|| {
159                builder.par_comp_lenders_endianness(&sorted, sorted.num_nodes(), &target_endianness)
160            })?;
161        }
162        // apply the permutation, don't care if the transposed graph is already computed
163        // as we cannot really exploit it
164        (Some(perm_path), None | Some(_)) => {
165            log::info!("Permutation provided, applying it to the graph");
166
167            let perm = JavaPermutation::mmap(perm_path, MmapFlags::RANDOM_ACCESS)?;
168
169            // if the .ef file exists, we can use the simplify split
170            if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
171                log::info!(".ef file found, using simplify split");
172                let graph =
173                    webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
174                        .endianness::<E>()
175                        .load()?;
176
177                let perm_graph = PermutedGraph {
178                    graph: &graph,
179                    perm: &perm,
180                };
181
182                thread_pool.install(|| {
183                    let sorted = webgraph::transform::simplify_split(
184                        &perm_graph,
185                        args.memory_usage.memory_usage,
186                    )?;
187
188                    builder.par_comp_lenders_endianness(
189                        &sorted,
190                        sorted.num_nodes(),
191                        &target_endianness,
192                    )
193                })?;
194
195                return Ok(());
196            }
197
198            no_ef_warn(&args.src);
199
200            let seq_graph =
201                webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
202                    .endianness::<E>()
203                    .load()?;
204
205            let perm_graph = PermutedGraph {
206                graph: &seq_graph,
207                perm: &perm,
208            };
209
210            // simplify the graph
211            let sorted =
212                webgraph::transform::simplify(&perm_graph, args.memory_usage.memory_usage)?;
213
214            thread_pool.install(|| {
215                builder.par_comp_lenders_endianness(&sorted, sorted.num_nodes(), &target_endianness)
216            })?;
217        }
218        // just compute the transpose on the fly
219        (None, None) => {
220            log::info!(
221                "No permutation or transposed graph provided, computing the transpose on the fly"
222            );
223            // if the .ef file exists, we can use the simplify split
224            if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
225                log::info!(".ef file found, using simplify split");
226
227                let graph =
228                    webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
229                        .endianness::<E>()
230                        .load()?;
231
232                thread_pool.install(|| {
233                    let sorted = webgraph::transform::simplify_split(
234                        &graph,
235                        args.memory_usage.memory_usage,
236                    )?;
237
238                    builder.par_comp_lenders_endianness(
239                        &sorted,
240                        sorted.num_nodes(),
241                        &target_endianness,
242                    )
243                })?;
244
245                return Ok(());
246            }
247
248            no_ef_warn(&args.src);
249
250            let seq_graph =
251                webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
252                    .endianness::<E>()
253                    .load()?;
254
255            // transpose the graph
256            let sorted =
257                webgraph::transform::simplify_sorted(seq_graph, args.memory_usage.memory_usage)?;
258
259            thread_pool.install(|| {
260                builder.par_comp_lenders_endianness(&sorted, sorted.num_nodes(), &target_endianness)
261            })?;
262        }
263    }
264
265    Ok(())
266}