webgraph_cli/dist/hyperball/
mod.rs1use crate::{FloatVectorFormat, GlobalArgs, GranularityArgs, NumThreadsArg, get_thread_pool};
8use anyhow::{Result, bail, ensure};
9use clap::{ArgGroup, Args, Parser};
10use dsi_bitstream::prelude::*;
11use dsi_progress_logger::{ProgressLog, concurrent_progress_logger};
12use epserde::deser::{Deserialize, Flags};
13use rand::SeedableRng;
14use std::path::PathBuf;
15use webgraph::{
16 graphs::bvgraph::get_endianness,
17 prelude::{BvGraph, DCF, DEG_CUMUL_EXTENSION},
18};
19use webgraph_algo::distances::hyperball::HyperBallBuilder;
20
21#[derive(Args, Debug, Clone)]
22#[clap(group = ArgGroup::new("centralities"))]
23pub struct Centralities {
28 #[clap(long, value_enum, default_value_t = FloatVectorFormat::Ascii)]
30 pub fmt: FloatVectorFormat,
31 #[clap(long)]
32 pub precision: Option<usize>,
34
35 #[clap(long)]
37 pub sum_of_distances: Option<PathBuf>,
38 #[clap(long)]
40 pub reachable_nodes: Option<PathBuf>,
41 #[clap(long)]
43 pub harmonic: Option<PathBuf>,
44 #[clap(long)]
46 pub closeness: Option<PathBuf>,
47 #[clap(long)]
48 pub neighborhood_function: Option<PathBuf>,
50}
51
52impl Centralities {
53 pub fn should_compute_sum_of_distances(&self) -> bool {
54 self.sum_of_distances.is_some() || self.closeness.is_some()
55 }
56 pub fn should_compute_sum_of_inverse_distances(&self) -> bool {
57 self.harmonic.is_some()
58 }
59}
60
61#[derive(Parser, Debug)]
62#[command(
63 name = "hyperball",
64 about = "Use hyperball to compute centralities.",
65 long_about = None
66)]
67pub struct CliArgs {
68 pub basename: PathBuf,
70
71 #[clap(long, default_value_t = false)]
72 pub symm: bool,
75
76 #[clap(short, long)]
80 pub transposed: Option<PathBuf>,
81
82 #[clap(flatten)]
83 pub centralities: Centralities,
84
85 #[clap(short = 'm', long, default_value_t = 14)]
86 pub log2m: usize,
89
90 #[clap(long, default_value_t = usize::MAX)]
91 pub upper_bound: usize,
93
94 #[clap(long)]
95 pub threshold: Option<f64>,
99
100 #[clap(flatten)]
101 pub num_threads: NumThreadsArg,
102
103 #[clap(flatten)]
104 pub granularity: GranularityArgs,
105
106 #[clap(long, default_value_t = 0)]
107 pub seed: u64,
109}
110
111pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
112 ensure!(
113 !args.symm || args.transposed.is_none(),
114 "If the graph is symmetric, you should not pass the transpose."
115 );
116
117 match get_endianness(&args.basename)?.as_str() {
118 #[cfg(feature = "be_bins")]
119 BE::NAME => hyperball::<BE>(global_args, args),
120 #[cfg(feature = "le_bins")]
121 LE::NAME => hyperball::<LE>(global_args, args),
122 e => panic!("Unknown endianness: {}", e),
123 }
124}
125
126pub fn hyperball<E: Endianness>(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
127 let mut pl = concurrent_progress_logger![];
128 if let Some(log_interval) = global_args.log_interval {
129 pl.log_interval(log_interval);
130 }
131 let thread_pool = get_thread_pool(args.num_threads.num_threads);
132
133 let graph = BvGraph::with_basename(&args.basename).load()?;
134
135 log::info!("Loading DCF...");
136 if !args.basename.with_extension(DEG_CUMUL_EXTENSION).exists() {
137 bail!(
138 "Missing DCF file. Please run `webgraph build dcf {}`.",
139 args.basename.display()
140 );
141 }
142 let deg_cumul = unsafe {
143 DCF::mmap(
144 args.basename.with_extension(DEG_CUMUL_EXTENSION),
145 Flags::RANDOM_ACCESS,
146 )
147 }?;
148
149 log::info!("Loading Transposed graph...");
150 let mut transposed = None;
151 if let Some(transposed_path) = args.transposed.as_ref() {
152 transposed = Some(BvGraph::with_basename(transposed_path).load()?);
153 }
154 let mut transposed_ref = transposed.as_ref();
155 if args.symm {
156 transposed_ref = Some(&graph);
157 }
158
159 let mut hb = HyperBallBuilder::with_hyper_log_log(
160 &graph,
161 transposed_ref,
162 deg_cumul.uncase(),
163 args.log2m,
164 None,
165 )?
166 .granularity(args.granularity.into_granularity())
167 .sum_of_distances(args.centralities.should_compute_sum_of_distances())
168 .sum_of_inverse_distances(args.centralities.should_compute_sum_of_inverse_distances())
169 .build(&mut pl);
170
171 log::info!("Starting Hyperball...");
172 let rng = rand::rngs::SmallRng::seed_from_u64(args.seed);
173 thread_pool.install(|| hb.run(args.upper_bound, args.threshold, rng, &mut pl))?;
174
175 log::info!("Storing the results...");
176
177 macro_rules! store_centrality {
180 ($flag:ident, $method:ident, $description:expr) => {{
181 if let Some(path) = args.centralities.$flag {
182 log::info!("Saving {} to {}", $description, path.display());
183 let value = hb.$method()?;
184 args.centralities
185 .fmt
186 .store(path, &value, args.centralities.precision)?;
187 }
188 }};
189 }
190
191 store_centrality!(sum_of_distances, sum_of_distances, "sum of distances");
192 store_centrality!(harmonic, harmonic_centralities, "harmonic centralities");
193 store_centrality!(closeness, closeness_centrality, "closeness centralities");
194 store_centrality!(reachable_nodes, reachable_nodes, "reachable nodes");
195 store_centrality!(
196 neighborhood_function,
197 neighborhood_function,
198 "neighborhood function"
199 );
200
201 Ok(())
202}