1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
/*!
Information spread source location module.
This module implements function aiding in **information source location**, using the information
provided by [`si::Observers`]. The algorithms construct a [`Ranking`] of suspect nodes, based on
different criteria.
*/
use crate::{bfs, cn, si, Network, Weight};
use nalgebra::{DMatrix, DVector};
use rayon::prelude::*;
/// The result of running a source-location algorithm on an infected network.
///
/// It is a [`Vec`] of (suspect index, score) pairs, ordered by the score. The higher the node is
/// positioned, the higher the probability that it is the true information source.
pub type Ranking = Vec<(usize, f64)>;
/// Constructs a suspect ranking based on the Pearson correlation coefficient.
///
/// This algorithm calculates the [Pearson correlation
/// coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) between the vector
/// of **infection times** and the vector of **distances** from a given suspect. It is assumed that
/// the higher the correlation, the more probable the fact that the suspect is the true source.
///
/// It should be noted that this version uses **only** infection times, disregarding the originally
/// proposed additional suspect elimination based on their distance to the observer's **infectant**
/// (the node which infected the observer).
///
/// # References
/// > Xu, S., Teng, C., Zhou, Y., Peng, J., Zhang, Y., & Zhang, Z. K. (2019). [*Identifying the
/// > diffusion source in complex networks with limited
/// > observers.*](https://doi.org/10.1016/j.physa.2019.121267) Physica A: Statistical Mechanics
/// > and Its Applications, 527, 121267.
pub fn pearson(net: &Network, observers: &si::Observers) -> cn::Result<Ranking> {
// Average time of infection can be pre calculated
let avg_t =
observers.as_map().values().flatten().sum::<usize>() as f64 / observers.as_map().len() as f64;
let times_reduced: Vec<f64> = observers
.as_map()
.values()
.flatten()
.map(|t| (*t as f64 - avg_t))
.collect();
let t_sq_sum: f64 = times_reduced.iter().map(|t| t * t).sum();
// Need distance for these indexes - infectants and observers
let need_distance: Vec<usize> = observers.as_map().keys().copied().collect();
// The not-observer nodes
let unobservers: Vec<usize> = net
.nodes()
.filter(|node| !observers.as_map().contains_key(node))
.collect();
let mut scores: Vec<(usize, f64)> = unobservers
.par_iter()
.map(|unobserver| {
let distances = bfs::distance_many(net, *unobserver, &need_distance).unwrap();
let mut avg_d = 0.0;
let mut distances: Vec<f64> = observers
.as_map()
.keys()
.map(|o| {
let d = distances[o].unwrap() as f64;
avg_d += d;
d
})
.collect();
avg_d /= distances.len() as f64;
// Reduce the distances
for d in distances.iter_mut() {
let d_red = *d - avg_d;
*d = if d_red.abs() < f64::EPSILON {
// NaN guard
f64::EPSILON
} else {
d_red
};
}
let score: f64 = distances
.iter()
.zip(×_reduced)
.map(|(d, t)| d * t)
.sum::<f64>()
/ (distances.into_iter().map(|d| d * d).sum::<f64>() * t_sq_sum).sqrt();
(*unobserver, score)
})
.collect();
// Sort the scores
scores.sort_unstable_by(|(_, s1), (_, s2)| {
s2.partial_cmp(s1).unwrap_or(std::cmp::Ordering::Less)
});
Ok(scores)
}
/// Constructs the ranking using the LPTV (Limited Pinto-Thiran-Vetterli) algorithm.
///
/// This algorithm orders suspect nodes using the **maximum likelihood** estimator, as described in
/// the original paper. This "limited" variant does not utilize the information about observers'
/// **infectants** (nodes which infected the observers).
///
/// **Warning:** this method is currently **not implemented** for networks with [`Weight::Uniform`]
/// and will simply return an empty ranking.
///
/// # References
/// > Pinto, P. C., Thiran, P., & Vetterli, M. (2012). [*Locating the source of diffusion in
/// > large-scale networks.*](https://doi.org/10.1103/physrevlett.109.068702) Physical Review
/// > Letters, 109(6), 1–5.
pub fn lptv(net: &Network, observers: &si::Observers) -> cn::Result<Ranking> {
let need_distance: Vec<usize> = observers.as_map().keys().copied().collect();
let (ref_o, other_o) = need_distance.split_first().unwrap();
let (mean, mut variance) = match net.weight() {
Weight::Constant { c } => (1.0 / c, (1.0 - c) / (c * c)),
Weight::Uniform => return Ok(Vec::new()),
};
if variance.abs() < f64::EPSILON {
variance = f64::EPSILON;
}
let unobservers: Vec<usize> = net
.nodes()
.filter(|node| !observers.as_map().contains_key(node))
.collect();
// Time delays only need to be computed once
let delays: DVector<f64> = DVector::from_vec(
other_o
.par_iter()
.map(|o| {
observers.as_map().get(o).unwrap().unwrap() as f64
- observers.as_map().get(ref_o).unwrap().unwrap() as f64
})
.collect(),
);
let mut scores: Vec<(usize, f64)> = unobservers
.par_iter()
.filter_map(|&unobserver| {
// The algorithm operates on the bfs tree rooted at the suspect.
let tree = bfs::tree_active(net, unobserver, &need_distance).ok()?;
// Distances from the suspect to observers
let distances = bfs::distance_many(&tree, unobserver, &need_distance).ok()?;
// Paths from reference observer to others
let paths = bfs::path_many(&tree, *ref_o, &need_distance).ok()?;
let mut lambda_matrix: DMatrix<f64> =
DMatrix::from_fn(other_o.len(), other_o.len(), |i: usize, j: usize| {
let o_i = &other_o[i]; // Observator i index
let o_j = &other_o[j]; // Observator j index
if i == j {
variance * (paths[o_i].as_vec().as_ref().unwrap().len() + 1) as f64
} else {
// Path from i to reference
let path_i = &paths[o_i];
// Path from j to reference
let path_j = &paths[o_j];
// Path intersection (+1 because path length)
variance * (path_i.common_len(path_j).unwrap()) as f64
}
});
let _det: f64 = lambda_matrix.determinant().abs();
let _cpy = lambda_matrix.clone();
if !lambda_matrix.try_inverse_mut() {
panic!("Uh oh the covariance is not invertible!");
};
let mu: DVector<f64> = DVector::from_vec(
other_o
.iter()
.map(|o| {
mean * (distances[o].unwrap() as i64 - distances[ref_o].unwrap() as i64)
as f64
})
.collect(),
);
let d_red: DVector<f64> = &delays - μ
// The [0] is needed because score is really a 1x1 matrix.
let score = (d_red.transpose() * lambda_matrix * (d_red))[0];
Some((unobserver, score))
})
.collect();
scores.sort_unstable_by(|(_, s1), (_, s2)| s1.partial_cmp(s2).unwrap());
Ok(scores)
}