webgraph_cli/dist/hyperball/
mod.rs1use crate::{get_thread_pool, FloatVectorFormat, GlobalArgs, GranularityArgs, NumThreadsArg};
8use anyhow::{ensure, Result};
9use clap::{ArgGroup, Args, Parser};
10use dsi_bitstream::prelude::*;
11use dsi_progress_logger::{concurrent_progress_logger, ProgressLog};
12use epserde::deser::{Deserialize, Flags};
13use rand::SeedableRng;
14use std::path::PathBuf;
15use webgraph::{
16 graphs::bvgraph::get_endianness,
17 prelude::{BvGraph, DCF, DEG_CUMUL_EXTENSION},
18};
19use webgraph_algo::distances::hyperball::HyperBallBuilder;
20
21#[derive(Args, Debug, Clone)]
22#[clap(group = ArgGroup::new("centralities"))]
23pub struct Centralities {
28 #[clap(long, value_enum, default_value_t = FloatVectorFormat::Ascii)]
30 pub fmt: FloatVectorFormat,
31 #[clap(long)]
32 pub precision: Option<usize>,
34
35 #[clap(long)]
37 pub sum_of_distances: Option<PathBuf>,
38 #[clap(long)]
40 pub reachable_nodes: Option<PathBuf>,
41 #[clap(long)]
43 pub harmonic: Option<PathBuf>,
44 #[clap(long)]
46 pub closeness: Option<PathBuf>,
47 #[clap(long)]
48 pub neighborhood_function: Option<PathBuf>,
50}
51
52impl Centralities {
53 pub fn should_compute_sum_of_distances(&self) -> bool {
54 self.sum_of_distances.is_some() || self.closeness.is_some()
55 }
56 pub fn should_compute_sum_of_inverse_distances(&self) -> bool {
57 self.harmonic.is_some()
58 }
59}
60
61#[derive(Parser, Debug)]
62#[command(
63 name = "hyperball",
64 about = "Use hyperball to compute centralities.",
65 long_about = ""
66)]
67pub struct CliArgs {
68 pub basename: PathBuf,
70
71 #[clap(long, default_value_t = false)]
72 pub symm: bool,
75
76 #[clap(short, long)]
80 pub transposed: Option<PathBuf>,
81
82 #[clap(short, long)]
85 pub neighborhood_function: bool,
86
87 #[clap(flatten)]
88 pub centralities: Centralities,
89
90 #[clap(short = 'm', long, default_value_t = 14)]
91 pub log2m: usize,
94
95 #[clap(long, default_value_t = usize::MAX)]
96 pub upper_bound: usize,
98
99 #[clap(long)]
100 pub threshold: Option<f64>,
104
105 #[clap(flatten)]
106 pub num_threads: NumThreadsArg,
107
108 #[clap(flatten)]
109 pub granularity: GranularityArgs,
110
111 #[clap(long, default_value_t = 0)]
112 pub seed: u64,
114}
115
116pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
117 ensure!(
118 !args.symm || args.transposed.is_none(),
119 "If the graph is symmetric, you should not pass the transpose."
120 );
121
122 match get_endianness(&args.basename)?.as_str() {
123 #[cfg(feature = "be_bins")]
124 BE::NAME => hyperball::<BE>(global_args, args),
125 #[cfg(feature = "le_bins")]
126 LE::NAME => hyperball::<LE>(global_args, args),
127 e => panic!("Unknown endianness: {}", e),
128 }
129}
130
131pub fn hyperball<E: Endianness>(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
132 let mut pl = concurrent_progress_logger![];
133 if let Some(log_interval) = global_args.log_interval {
134 pl.log_interval(log_interval);
135 }
136 let thread_pool = get_thread_pool(args.num_threads.num_threads);
137
138 let graph = BvGraph::with_basename(&args.basename).load()?;
139
140 log::info!("Loading DCF...");
141 if !args.basename.with_extension(DEG_CUMUL_EXTENSION).exists() {
142 log::error!(
143 "Missing DCF file. Please run `webgraph build dcf {}`.",
144 args.basename.display()
145 );
146 }
147 let deg_cumul = DCF::mmap(
148 args.basename.with_extension(DEG_CUMUL_EXTENSION),
149 Flags::RANDOM_ACCESS,
150 )?;
151
152 log::info!("Loading Transposed graph...");
153 let mut transposed = None;
154 if let Some(transposed_path) = args.transposed.as_ref() {
155 transposed = Some(BvGraph::with_basename(transposed_path).load()?);
156 }
157 let mut transposed_ref = transposed.as_ref();
158 if args.symm {
159 transposed_ref = Some(&graph);
160 }
161
162 let mut hb = HyperBallBuilder::with_hyper_log_log(
163 &graph,
164 transposed_ref,
165 deg_cumul.as_ref(),
166 args.log2m,
167 None,
168 )?
169 .granularity(args.granularity.into_granularity())
170 .sum_of_distances(args.centralities.should_compute_sum_of_distances())
171 .sum_of_inverse_distances(args.centralities.should_compute_sum_of_inverse_distances())
172 .build(&mut pl);
173
174 log::info!("Starting Hyperball...");
175 let rng = rand::rngs::SmallRng::seed_from_u64(args.seed);
176 hb.run(args.upper_bound, args.threshold, &thread_pool, rng, &mut pl)?;
177
178 log::info!("Storing the results...");
179
180 macro_rules! store_centrality {
183 ($flag:ident, $method:ident, $description:expr) => {{
184 if let Some(path) = args.centralities.$flag {
185 log::info!("Saving {} to {}", $description, path.display());
186 let value = hb.$method()?;
187 args.centralities
188 .fmt
189 .store(path, &value, args.centralities.precision)?;
190 }
191 }};
192 }
193
194 store_centrality!(sum_of_distances, sum_of_distances, "sum of distances");
195 store_centrality!(harmonic, harmonic_centralities, "harmonic centralities");
196 store_centrality!(closeness, closeness_centrality, "closeness centralities");
197 store_centrality!(reachable_nodes, reachable_nodes, "reachable nodes");
198 store_centrality!(
199 neighborhood_function,
200 neighborhood_function,
201 "neighborhood function"
202 );
203
204 Ok(())
205}