use std::collections::VecDeque;
use ahash::AHashMap;
use crate::progress::ProgressCallback;
use crate::{errors::SqliteGraphError, graph::SqliteGraph};
pub fn pagerank(
graph: &SqliteGraph,
damping: f64,
iterations: usize,
) -> Result<Vec<(i64, f64)>, SqliteGraphError> {
let all_ids = graph.all_entity_ids()?;
let n = all_ids.len();
if n == 0 {
return Ok(Vec::new());
}
let mut scores: AHashMap<i64, f64> = all_ids.iter().map(|&id| (id, 1.0 / n as f64)).collect();
let mut outgoing_counts: AHashMap<i64, usize> = AHashMap::new();
for &id in &all_ids {
let count = graph.fetch_outgoing(id)?.len();
outgoing_counts.insert(id, count);
}
for _ in 0..iterations {
let mut new_scores: AHashMap<i64, f64> = AHashMap::new();
let base_score = (1.0 - damping) / n as f64;
for &id in &all_ids {
new_scores.insert(id, base_score);
}
let mut dangling_score = 0.0;
for &id in &all_ids {
let score = scores[&id];
let out_count = outgoing_counts[&id];
if out_count == 0 {
dangling_score += score;
} else {
let share = score / out_count as f64;
for &neighbor in &graph.fetch_outgoing(id)? {
*new_scores.get_mut(&neighbor).unwrap() += damping * share;
}
}
}
let dangling_share = damping * dangling_score / n as f64;
for (_, score) in new_scores.iter_mut() {
*score += dangling_share;
}
scores = new_scores;
}
let mut result: Vec<(i64, f64)> = scores.into_iter().collect();
result.sort_by(|a, b| {
b.1.partial_cmp(&a.1)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.0.cmp(&b.0))
});
Ok(result)
}
pub fn pagerank_with_progress<F>(
graph: &SqliteGraph,
damping: f64,
iterations: usize,
progress: &F,
) -> Result<Vec<(i64, f64)>, SqliteGraphError>
where
F: ProgressCallback,
{
let all_ids = graph.all_entity_ids()?;
let n = all_ids.len();
if n == 0 {
progress.on_complete();
return Ok(Vec::new());
}
let mut scores: AHashMap<i64, f64> = all_ids.iter().map(|&id| (id, 1.0 / n as f64)).collect();
let mut outgoing_counts: AHashMap<i64, usize> = AHashMap::new();
for &id in &all_ids {
let count = graph.fetch_outgoing(id)?.len();
outgoing_counts.insert(id, count);
}
for iteration in 0..iterations {
progress.on_progress(
iteration + 1,
Some(iterations),
&format!("PageRank iteration {}", iteration + 1),
);
let mut new_scores: AHashMap<i64, f64> = AHashMap::new();
let base_score = (1.0 - damping) / n as f64;
for &id in &all_ids {
new_scores.insert(id, base_score);
}
let mut dangling_score = 0.0;
for &id in &all_ids {
let score = scores[&id];
let out_count = outgoing_counts[&id];
if out_count == 0 {
dangling_score += score;
} else {
let share = score / out_count as f64;
for &neighbor in &graph.fetch_outgoing(id)? {
*new_scores.get_mut(&neighbor).unwrap() += damping * share;
}
}
}
let dangling_share = damping * dangling_score / n as f64;
for (_, score) in new_scores.iter_mut() {
*score += dangling_share;
}
scores = new_scores;
}
progress.on_complete();
let mut result: Vec<(i64, f64)> = scores.into_iter().collect();
result.sort_by(|a, b| {
b.1.partial_cmp(&a.1)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.0.cmp(&b.0))
});
Ok(result)
}
pub fn betweenness_centrality(graph: &SqliteGraph) -> Result<Vec<(i64, f64)>, SqliteGraphError> {
let all_ids = graph.all_entity_ids()?;
let n = all_ids.len();
if n == 0 {
return Ok(Vec::new());
}
let mut centrality: AHashMap<i64, f64> = all_ids.iter().map(|&id| (id, 0.0)).collect();
for &s in &all_ids {
let mut dist: AHashMap<i64, i64> = AHashMap::new();
let mut sigma: AHashMap<i64, f64> = AHashMap::new(); let mut predecessors: AHashMap<i64, Vec<i64>> = AHashMap::new();
dist.insert(s, 0);
sigma.insert(s, 1.0);
let mut queue = VecDeque::new();
queue.push_back(s);
while let Some(v) = queue.pop_front() {
for &w in &graph.fetch_outgoing(v)? {
if !dist.contains_key(&w) {
dist.insert(w, dist[&v] + 1);
queue.push_back(w);
}
if dist.get(&w) == Some(&(dist[&v] + 1)) {
*sigma.entry(w).or_insert(0.0) += sigma[&v];
predecessors.entry(w).or_default().push(v);
}
}
}
let mut delta: AHashMap<i64, f64> = all_ids.iter().map(|&id| (id, 0.0)).collect();
let mut nodes: Vec<i64> = dist.keys().copied().collect();
nodes.sort_by_key(|&id| std::cmp::Reverse(dist[&id]));
for w in nodes {
if w == s {
continue;
}
for &v in predecessors.get(&w).unwrap_or(&vec![]) {
let contribution = (sigma[&v] / sigma[&w]) * (1.0 + delta[&w]);
*delta.get_mut(&v).unwrap() += contribution;
}
if w != s {
*centrality.get_mut(&w).unwrap() += delta[&w];
}
}
}
let mut result: Vec<(i64, f64)> = centrality.into_iter().collect();
result.sort_by(|a, b| {
b.1.partial_cmp(&a.1)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.0.cmp(&b.0))
});
Ok(result)
}
pub fn betweenness_centrality_with_progress<F>(
graph: &SqliteGraph,
progress: &F,
) -> Result<Vec<(i64, f64)>, SqliteGraphError>
where
F: ProgressCallback,
{
let all_ids = graph.all_entity_ids()?;
let n = all_ids.len();
if n == 0 {
progress.on_complete();
return Ok(Vec::new());
}
let mut centrality: AHashMap<i64, f64> = all_ids.iter().map(|&id| (id, 0.0)).collect();
for (idx, &s) in all_ids.iter().enumerate() {
progress.on_progress(
idx + 1,
Some(n),
&format!("Betweenness: processing source {}/{}", idx + 1, n),
);
let mut dist: AHashMap<i64, i64> = AHashMap::new();
let mut sigma: AHashMap<i64, f64> = AHashMap::new(); let mut predecessors: AHashMap<i64, Vec<i64>> = AHashMap::new();
dist.insert(s, 0);
sigma.insert(s, 1.0);
let mut queue = VecDeque::new();
queue.push_back(s);
while let Some(v) = queue.pop_front() {
for &w in &graph.fetch_outgoing(v)? {
if !dist.contains_key(&w) {
dist.insert(w, dist[&v] + 1);
queue.push_back(w);
}
if dist.get(&w) == Some(&(dist[&v] + 1)) {
*sigma.entry(w).or_insert(0.0) += sigma[&v];
predecessors.entry(w).or_default().push(v);
}
}
}
let mut delta: AHashMap<i64, f64> = all_ids.iter().map(|&id| (id, 0.0)).collect();
let mut nodes: Vec<i64> = dist.keys().copied().collect();
nodes.sort_by_key(|&id| std::cmp::Reverse(dist[&id]));
for w in nodes {
if w == s {
continue;
}
for &v in predecessors.get(&w).unwrap_or(&vec![]) {
let contribution = (sigma[&v] / sigma[&w]) * (1.0 + delta[&w]);
*delta.get_mut(&v).unwrap() += contribution;
}
if w != s {
*centrality.get_mut(&w).unwrap() += delta[&w];
}
}
}
progress.on_complete();
let mut result: Vec<(i64, f64)> = centrality.into_iter().collect();
result.sort_by(|a, b| {
b.1.partial_cmp(&a.1)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.0.cmp(&b.0))
});
Ok(result)
}