webgraph_cli/dist/hyperball/
mod.rs1use crate::{get_thread_pool, FloatVectorFormat, GlobalArgs, GranularityArgs, NumThreadsArg};
8use anyhow::{ensure, Result};
9use clap::{ArgGroup, Args, Parser};
10use dsi_bitstream::prelude::*;
11use dsi_progress_logger::{concurrent_progress_logger, ProgressLog};
12use epserde::deser::{Deserialize, Flags};
13use rand::SeedableRng;
14use std::path::PathBuf;
15use webgraph::{
16 graphs::bvgraph::get_endianness,
17 prelude::{BvGraph, DCF, DEG_CUMUL_EXTENSION},
18};
19use webgraph_algo::distances::hyperball::HyperBallBuilder;
20
21#[derive(Args, Debug, Clone)]
22#[clap(group = ArgGroup::new("centralities"))]
23pub struct Centralities {
28 #[clap(long, value_enum, default_value_t = FloatVectorFormat::Ascii)]
30 pub fmt: FloatVectorFormat,
31 #[clap(long)]
32 pub precision: Option<usize>,
34
35 #[clap(long)]
37 pub sum_of_distances: Option<PathBuf>,
38 #[clap(long)]
40 pub reachable_nodes: Option<PathBuf>,
41 #[clap(long)]
43 pub harmonic: Option<PathBuf>,
44 #[clap(long)]
46 pub closeness: Option<PathBuf>,
47 #[clap(long)]
48 pub neighborhood_function: Option<PathBuf>,
50}
51
52impl Centralities {
53 pub fn should_compute_sum_of_distances(&self) -> bool {
54 self.sum_of_distances.is_some() || self.closeness.is_some()
55 }
56 pub fn should_compute_sum_of_inverse_distances(&self) -> bool {
57 self.harmonic.is_some()
58 }
59}
60
61#[derive(Parser, Debug)]
62#[command(
63 name = "hyperball",
64 about = "Use hyperball to compute centralities.",
65 long_about = ""
66)]
67pub struct CliArgs {
68 pub basename: PathBuf,
70
71 #[clap(long, default_value_t = false)]
72 pub symm: bool,
75
76 #[clap(short, long)]
80 pub transposed: Option<PathBuf>,
81
82 #[clap(short, long)]
85 pub neighborhood_function: bool,
86
87 #[clap(flatten)]
88 pub centralities: Centralities,
89
90 #[clap(short = 'm', long, default_value_t = 14)]
91 pub log2m: usize,
94
95 #[clap(long, default_value_t = usize::MAX)]
96 pub upper_bound: usize,
98
99 #[clap(long)]
100 pub threshold: Option<f64>,
104
105 #[clap(flatten)]
106 pub num_threads: NumThreadsArg,
107
108 #[clap(flatten)]
109 pub granularity: GranularityArgs,
110
111 #[clap(long, default_value_t = 0)]
112 pub seed: u64,
114}
115
116pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
117 ensure!(
118 !args.symm || args.transposed.is_none(),
119 "If the graph is symmetric, you should not pass the transpose."
120 );
121
122 match get_endianness(&args.basename)?.as_str() {
123 #[cfg(feature = "be_bins")]
124 BE::NAME => hyperball::<BE>(global_args, args),
125 #[cfg(feature = "le_bins")]
126 LE::NAME => hyperball::<LE>(global_args, args),
127 e => panic!("Unknown endianness: {}", e),
128 }
129}
130
131pub fn hyperball<E: Endianness>(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
132 let mut pl = concurrent_progress_logger![];
133 if let Some(log_interval) = global_args.log_interval {
134 pl.log_interval(log_interval);
135 }
136 let thread_pool = get_thread_pool(args.num_threads.num_threads);
137
138 let graph = BvGraph::with_basename(&args.basename).load()?;
139
140 log::info!("Loading DCF...");
141 if !args.basename.with_extension(DEG_CUMUL_EXTENSION).exists() {
142 log::error!(
143 "Missing DCF file. Please run `webgraph build dcf {}`.",
144 args.basename.display()
145 );
146 }
147 let deg_cumul = unsafe {
148 DCF::mmap(
149 args.basename.with_extension(DEG_CUMUL_EXTENSION),
150 Flags::RANDOM_ACCESS,
151 )
152 }?;
153
154 log::info!("Loading Transposed graph...");
155 let mut transposed = None;
156 if let Some(transposed_path) = args.transposed.as_ref() {
157 transposed = Some(BvGraph::with_basename(transposed_path).load()?);
158 }
159 let mut transposed_ref = transposed.as_ref();
160 if args.symm {
161 transposed_ref = Some(&graph);
162 }
163
164 let mut hb = HyperBallBuilder::with_hyper_log_log(
165 &graph,
166 transposed_ref,
167 deg_cumul.uncase(),
168 args.log2m,
169 None,
170 )?
171 .granularity(args.granularity.into_granularity())
172 .sum_of_distances(args.centralities.should_compute_sum_of_distances())
173 .sum_of_inverse_distances(args.centralities.should_compute_sum_of_inverse_distances())
174 .build(&mut pl);
175
176 log::info!("Starting Hyperball...");
177 let rng = rand::rngs::SmallRng::seed_from_u64(args.seed);
178 thread_pool.install(|| hb.run(args.upper_bound, args.threshold, rng, &mut pl))?;
179
180 log::info!("Storing the results...");
181
182 macro_rules! store_centrality {
185 ($flag:ident, $method:ident, $description:expr) => {{
186 if let Some(path) = args.centralities.$flag {
187 log::info!("Saving {} to {}", $description, path.display());
188 let value = hb.$method()?;
189 args.centralities
190 .fmt
191 .store(path, &value, args.centralities.precision)?;
192 }
193 }};
194 }
195
196 store_centrality!(sum_of_distances, sum_of_distances, "sum of distances");
197 store_centrality!(harmonic, harmonic_centralities, "harmonic centralities");
198 store_centrality!(closeness, closeness_centrality, "closeness centralities");
199 store_centrality!(reachable_nodes, reachable_nodes, "reachable nodes");
200 store_centrality!(
201 neighborhood_function,
202 neighborhood_function,
203 "neighborhood function"
204 );
205
206 Ok(())
207}