Skip to main content

webgraph_cli/rank/
pagerank.rs

1/*
2 * SPDX-FileCopyrightText: 2026 Sebastiano Vigna
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
5 */
6
7use crate::{FloatVectorFormat, GlobalArgs, GranularityArgs, NumThreadsArg, get_thread_pool};
8use anyhow::{Result, ensure};
9use clap::Parser;
10use dsi_bitstream::prelude::*;
11use dsi_progress_logger::{ProgressLog, concurrent_progress_logger, progress_logger};
12use predicates::prelude::*;
13use std::path::PathBuf;
14use webgraph::graphs::bvgraph::get_endianness;
15use webgraph::prelude::BvGraph;
16use webgraph_algo::rank::pagerank::preds::{L1Norm, MaxIter};
17use webgraph_algo::rank::{Mode, PageRank};
18
19/// The PageRank mode.
20#[derive(clap::ValueEnum, Debug, Clone, Copy, Default)]
21pub enum CliMode {
22    /// Use the preference vector as dangling-node distribution.
23    #[default]
24    StronglyPreferential,
25    /// Use a uniform dangling-node distribution regardless of the preference
26    /// vector.
27    WeaklyPreferential,
28    /// Zero out the dangling-node contribution (pseudorank).
29    PseudoRank,
30}
31
32impl From<CliMode> for Mode {
33    fn from(m: CliMode) -> Self {
34        match m {
35            CliMode::StronglyPreferential => Mode::StronglyPreferential,
36            CliMode::WeaklyPreferential => Mode::WeaklyPreferential,
37            CliMode::PseudoRank => Mode::PseudoRank,
38        }
39    }
40}
41
42#[derive(Parser, Debug)]
43#[command(
44    name = "pagerank",
45    about = "Compute PageRank using parallel Gauss–Seidel iteration.",
46    long_about = None
47)]
48pub struct CliArgs {
49    /// The basename of the transpose of the graph.
50    pub transpose: PathBuf,
51
52    #[arg(short, long)]
53    /// Where to store the rank vector.
54    pub output: PathBuf,
55
56    #[arg(short, long, default_value_t = 0.85)]
57    /// The damping factor α (must be in the interval [0 . . 1).
58    pub alpha: f64,
59
60    #[arg(long)]
61    /// Maximum number of iterations.
62    pub max_iter: Option<usize>,
63
64    #[arg(short, long, default_value_t = 1e-6)]
65    /// The ℓ₁ error threshold to stop.
66    pub threshold: f64,
67
68    #[arg(short, long)]
69    /// Path to a preference (personalization) vector.
70    pub preference: Option<PathBuf>,
71
72    #[arg(long, value_enum, default_value_t = FloatVectorFormat::Ascii)]
73    /// The input format for the preference vector.
74    pub preference_fmt: FloatVectorFormat,
75
76    #[arg(short, long, value_enum, default_value_t = CliMode::StronglyPreferential)]
77    /// The PageRank mode.
78    pub mode: CliMode,
79
80    #[arg(long, value_enum, default_value_t = FloatVectorFormat::Ascii)]
81    /// The output format for the rank vector.
82    pub fmt: FloatVectorFormat,
83
84    #[arg(long)]
85    /// Decimal digits for text output formats.
86    pub precision: Option<usize>,
87
88    #[clap(flatten)]
89    pub num_threads: NumThreadsArg,
90
91    #[clap(flatten)]
92    pub granularity: GranularityArgs,
93}
94
95pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
96    ensure!(
97        // Note that 0.0..1.0 is [0.0..1.0) in mathematical notation
98        (0.0..1.0).contains(&args.alpha),
99        "The damping factor must be in [0 . . 1), got {}",
100        args.alpha
101    );
102
103    match get_endianness(&args.transpose)?.as_str() {
104        #[cfg(feature = "be_bins")]
105        BE::NAME => pagerank::<BE>(global_args, args),
106        #[cfg(feature = "le_bins")]
107        LE::NAME => pagerank::<LE>(global_args, args),
108        e => panic!("Unknown endianness: {}", e),
109    }
110}
111
112pub fn pagerank<E: Endianness>(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
113    let mut pl = progress_logger![];
114    pl.display_memory(true);
115    if let Some(log_interval) = global_args.log_interval {
116        pl.log_interval(log_interval);
117    }
118
119    let mut cpl = concurrent_progress_logger![];
120    cpl.display_memory(true);
121    if let Some(log_interval) = global_args.log_interval {
122        cpl.log_interval(log_interval);
123    }
124
125    let thread_pool = get_thread_pool(args.num_threads.num_threads);
126
127    log::info!(
128        "Loading the transpose graph from {}",
129        args.transpose.display()
130    );
131    let transpose = BvGraph::with_basename(&args.transpose).load()?;
132
133    let preference: Option<Vec<f64>> = args
134        .preference
135        .as_ref()
136        .map(|path| args.preference_fmt.load(path))
137        .transpose()?;
138
139    // Build stopping predicate
140    let mut predicate = L1Norm::try_from(args.threshold)?.boxed();
141    if let Some(max_iter) = args.max_iter {
142        predicate = predicate.or(MaxIter::from(max_iter)).boxed();
143    }
144
145    // Configure PageRank
146    let mut pr = PageRank::new(&transpose);
147    pr.alpha(args.alpha)
148        .mode(args.mode.into())
149        .granularity(args.granularity.into_granularity())
150        .preference(preference.as_deref());
151
152    // Run
153    thread_pool.install(|| pr.run_with_logging(predicate, &mut pl, &mut cpl));
154
155    log::info!(
156        "Completed after {} iteration(s), norm delta = {}",
157        pr.iterations(),
158        pr.norm_delta()
159    );
160
161    // Store results
162    args.fmt.store(&args.output, pr.rank(), args.precision)?;
163
164    Ok(())
165}