use std::collections::BTreeMap;
use crate::id::NodeId;
use crate::index::hybrid::AdjacencyIndex;
pub const DEFAULT_DAMPING: f32 = 0.85;
pub const DEFAULT_MAX_ITER: u32 = 15;
pub const DEFAULT_EPS: f32 = 1e-6;
pub const PPR_DEFAULT_MAX_NODES: usize = 250_000;
#[must_use]
pub fn exceeds_size_gate(adj: &(dyn AdjacencyIndex + Send + Sync), opt_in: bool) -> bool {
if opt_in {
return false;
}
let mut uniq: std::collections::BTreeSet<NodeId> = std::collections::BTreeSet::new();
for edge in adj.iter_edges() {
uniq.insert(edge.src);
uniq.insert(edge.dst);
if uniq.len() > PPR_DEFAULT_MAX_NODES {
return true;
}
}
false
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct PprConfig {
pub damping: f32,
pub max_iter: u32,
pub eps: f32,
}
impl Default for PprConfig {
fn default() -> Self {
Self {
damping: DEFAULT_DAMPING,
max_iter: DEFAULT_MAX_ITER,
eps: DEFAULT_EPS,
}
}
}
#[derive(Clone, Debug)]
pub struct SparseTransition {
pub nodes: Vec<NodeId>,
pub row_ptr: Vec<usize>,
pub col_idx: Vec<usize>,
pub values: Vec<f32>,
pub has_outgoing: Vec<bool>,
}
#[must_use]
pub fn sparse_transition_matrix(adj: &dyn AdjacencyIndex) -> SparseTransition {
let mut id_to_row: BTreeMap<NodeId, usize> = BTreeMap::new();
let mut triples: Vec<(NodeId, NodeId, f32)> = Vec::with_capacity(adj.edge_count());
for edge in adj.iter_edges() {
id_to_row.entry(edge.src).or_insert(0);
id_to_row.entry(edge.dst).or_insert(0);
triples.push((edge.src, edge.dst, edge.weight));
}
let nodes: Vec<NodeId> = id_to_row.keys().copied().collect();
for (i, id) in nodes.iter().enumerate() {
if let Some(slot) = id_to_row.get_mut(id) {
*slot = i;
}
}
let n = nodes.len();
let mut per_row: Vec<BTreeMap<usize, f32>> = (0..n).map(|_| BTreeMap::new()).collect();
for (s, d, w) in triples {
let si = id_to_row[&s];
let di = id_to_row[&d];
if w <= 0.0 || !w.is_finite() {
continue;
}
*per_row[si].entry(di).or_insert(0.0) += w;
}
let mut row_ptr: Vec<usize> = Vec::with_capacity(n + 1);
let mut col_idx: Vec<usize> = Vec::new();
let mut values: Vec<f32> = Vec::new();
let mut has_outgoing: Vec<bool> = vec![false; n];
row_ptr.push(0);
for (i, row) in per_row.iter().enumerate() {
let sum: f32 = row.values().sum();
if sum > 0.0 {
has_outgoing[i] = true;
for (&c, &w) in row {
col_idx.push(c);
values.push(w / sum);
}
}
row_ptr.push(col_idx.len());
}
SparseTransition {
nodes,
row_ptr,
col_idx,
values,
has_outgoing,
}
}
#[allow(clippy::many_single_char_names)]
#[must_use]
pub fn ppr(
adj: &dyn AdjacencyIndex,
personalization: &BTreeMap<NodeId, f32>,
cfg: PprConfig,
) -> BTreeMap<NodeId, f32> {
let m = sparse_transition_matrix(adj);
ppr_with_matrix(&m, personalization, cfg)
}
#[allow(clippy::many_single_char_names)]
#[must_use]
pub fn ppr_with_matrix(
m: &SparseTransition,
personalization: &BTreeMap<NodeId, f32>,
cfg: PprConfig,
) -> BTreeMap<NodeId, f32> {
let n = m.nodes.len();
if n == 0 {
return BTreeMap::new();
}
let damping = cfg.damping.clamp(0.0, 0.999);
let mut p = vec![0f32; n];
let mut psum = 0f32;
for (id, &w) in personalization {
if let Ok(idx) = m.nodes.binary_search(id)
&& w > 0.0
&& w.is_finite()
{
p[idx] += w;
psum += w;
}
}
if psum > 0.0 {
for v in &mut p {
*v /= psum;
}
} else {
let u = 1.0 / n as f32;
p.fill(u);
}
let mut r = p.clone();
let mut next = vec![0f32; n];
for _iter in 0..cfg.max_iter {
let mut dangling_mass = 0f32;
for i in 0..n {
if !m.has_outgoing[i] {
dangling_mass += r[i];
}
}
let teleport_scale = (1.0 - damping) + damping * dangling_mass;
for i in 0..n {
next[i] = teleport_scale * p[i];
}
for i in 0..n {
if !m.has_outgoing[i] {
continue;
}
let start = m.row_ptr[i];
let end = m.row_ptr[i + 1];
let ri = r[i];
if ri == 0.0 {
continue;
}
for k in start..end {
let j = m.col_idx[k];
next[j] += damping * ri * m.values[k];
}
}
let next_sum: f32 = next.iter().sum();
if next_sum > 0.0 {
for v in &mut next {
*v /= next_sum;
}
}
let mut delta = 0f32;
for i in 0..n {
delta += (next[i] - r[i]).abs();
}
std::mem::swap(&mut r, &mut next);
next.fill(0.0);
if delta < cfg.eps {
break;
}
}
let mut out: BTreeMap<NodeId, f32> = BTreeMap::new();
for (i, id) in m.nodes.iter().enumerate() {
out.insert(*id, r[i]);
}
out
}