webgraph_cli/rank/
pagerank.rs1use crate::{FloatVectorFormat, GlobalArgs, GranularityArgs, NumThreadsArg, get_thread_pool};
8use anyhow::{Result, ensure};
9use clap::Parser;
10use dsi_bitstream::prelude::*;
11use dsi_progress_logger::{ProgressLog, concurrent_progress_logger, progress_logger};
12use predicates::prelude::*;
13use std::path::PathBuf;
14use webgraph::graphs::bvgraph::get_endianness;
15use webgraph::prelude::BvGraph;
16use webgraph_algo::rank::pagerank::preds::{L1Norm, MaxIter};
17use webgraph_algo::rank::{Mode, PageRank};
18
19#[derive(clap::ValueEnum, Debug, Clone, Copy, Default)]
21pub enum CliMode {
22 #[default]
24 StronglyPreferential,
25 WeaklyPreferential,
28 PseudoRank,
30}
31
32impl From<CliMode> for Mode {
33 fn from(m: CliMode) -> Self {
34 match m {
35 CliMode::StronglyPreferential => Mode::StronglyPreferential,
36 CliMode::WeaklyPreferential => Mode::WeaklyPreferential,
37 CliMode::PseudoRank => Mode::PseudoRank,
38 }
39 }
40}
41
42#[derive(Parser, Debug)]
43#[command(
44 name = "pagerank",
45 about = "Compute PageRank using parallel Gauss–Seidel iteration.",
46 long_about = None
47)]
48pub struct CliArgs {
49 pub transpose: PathBuf,
51
52 #[arg(short, long)]
53 pub output: PathBuf,
55
56 #[arg(short, long, default_value_t = 0.85)]
57 pub alpha: f64,
59
60 #[arg(long)]
61 pub max_iter: Option<usize>,
63
64 #[arg(short, long, default_value_t = 1e-6)]
65 pub threshold: f64,
67
68 #[arg(short, long)]
69 pub preference: Option<PathBuf>,
71
72 #[arg(long, value_enum, default_value_t = FloatVectorFormat::Ascii)]
73 pub preference_fmt: FloatVectorFormat,
75
76 #[arg(short, long, value_enum, default_value_t = CliMode::StronglyPreferential)]
77 pub mode: CliMode,
79
80 #[arg(long, value_enum, default_value_t = FloatVectorFormat::Ascii)]
81 pub fmt: FloatVectorFormat,
83
84 #[arg(long)]
85 pub precision: Option<usize>,
87
88 #[clap(flatten)]
89 pub num_threads: NumThreadsArg,
90
91 #[clap(flatten)]
92 pub granularity: GranularityArgs,
93}
94
95pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
96 ensure!(
97 (0.0..1.0).contains(&args.alpha),
99 "The damping factor must be in [0 . . 1), got {}",
100 args.alpha
101 );
102
103 match get_endianness(&args.transpose)?.as_str() {
104 #[cfg(feature = "be_bins")]
105 BE::NAME => pagerank::<BE>(global_args, args),
106 #[cfg(feature = "le_bins")]
107 LE::NAME => pagerank::<LE>(global_args, args),
108 e => panic!("Unknown endianness: {}", e),
109 }
110}
111
112pub fn pagerank<E: Endianness>(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
113 let mut pl = progress_logger![];
114 pl.display_memory(true);
115 if let Some(log_interval) = global_args.log_interval {
116 pl.log_interval(log_interval);
117 }
118
119 let mut cpl = concurrent_progress_logger![];
120 cpl.display_memory(true);
121 if let Some(log_interval) = global_args.log_interval {
122 cpl.log_interval(log_interval);
123 }
124
125 let thread_pool = get_thread_pool(args.num_threads.num_threads);
126
127 log::info!(
128 "Loading the transpose graph from {}",
129 args.transpose.display()
130 );
131 let transpose = BvGraph::with_basename(&args.transpose).load()?;
132
133 let preference: Option<Vec<f64>> = args
134 .preference
135 .as_ref()
136 .map(|path| args.preference_fmt.load(path))
137 .transpose()?;
138
139 let mut predicate = L1Norm::try_from(args.threshold)?.boxed();
141 if let Some(max_iter) = args.max_iter {
142 predicate = predicate.or(MaxIter::from(max_iter)).boxed();
143 }
144
145 let mut pr = PageRank::new(&transpose);
147 pr.alpha(args.alpha)
148 .mode(args.mode.into())
149 .granularity(args.granularity.into_granularity())
150 .preference(preference.as_deref());
151
152 thread_pool.install(|| pr.run_with_logging(predicate, &mut pl, &mut cpl));
154
155 log::info!(
156 "Completed after {} iteration(s), norm delta = {}",
157 pr.iterations(),
158 pr.norm_delta()
159 );
160
161 args.fmt.store(&args.output, pr.rank(), args.precision)?;
163
164 Ok(())
165}