1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
/*!
Information spread source location module.

This module implements function aiding in **information source location**, using the information
provided by [`si::Observers`]. The algorithms construct a [`Ranking`] of suspect nodes, based on
different criteria.
*/

use crate::{bfs, cn, si, Network, Weight};
use nalgebra::{DMatrix, DVector};
use rayon::prelude::*;

/// The result of running a source-location algorithm on an infected network.
///
/// It is a [`Vec`] of (suspect index, score) pairs, ordered by the score. The higher the node is
/// positioned, the higher the probability that it is the true information source.
pub type Ranking = Vec<(usize, f64)>;

/// Constructs a suspect ranking based on the Pearson correlation coefficient.
///
/// This algorithm calculates the [Pearson correlation
/// coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) between the vector
/// of **infection times** and the vector of **distances** from a given suspect. It is assumed that
/// the higher the correlation, the more probable the fact that the suspect is the true source.
///
/// It should be noted that this version uses **only** infection times, disregarding the originally
/// proposed additional suspect elimination based on their distance to the observer's **infectant**
/// (the node which infected the observer).
///
/// # References
/// > Xu, S., Teng, C., Zhou, Y., Peng, J., Zhang, Y., & Zhang, Z. K. (2019). [*Identifying the
/// > diffusion source in complex networks with limited
/// > observers.*](https://doi.org/10.1016/j.physa.2019.121267) Physica A: Statistical Mechanics
/// > and Its Applications, 527, 121267.
pub fn pearson(net: &Network, observers: &si::Observers) -> cn::Result<Ranking> {
    // Average time of infection can be pre calculated
    let avg_t =
        observers.as_map().values().flatten().sum::<usize>() as f64 / observers.as_map().len() as f64;
    let times_reduced: Vec<f64> = observers
        .as_map()
        .values()
        .flatten()
        .map(|t| (*t as f64 - avg_t))
        .collect();
    let t_sq_sum: f64 = times_reduced.iter().map(|t| t * t).sum();

    // Need distance for these indexes - infectants and observers
    let need_distance: Vec<usize> = observers.as_map().keys().copied().collect();

    // The not-observer nodes
    let unobservers: Vec<usize> = net
        .nodes()
        .filter(|node| !observers.as_map().contains_key(node))
        .collect();
    let mut scores: Vec<(usize, f64)> = unobservers
        .par_iter()
        .map(|unobserver| {
            let distances = bfs::distance_many(net, *unobserver, &need_distance).unwrap();
            let mut avg_d = 0.0;
            let mut distances: Vec<f64> = observers
                .as_map()
                .keys()
                .map(|o| {
                    let d = distances[o].unwrap() as f64;
                    avg_d += d;
                    d
                })
                .collect();
            avg_d /= distances.len() as f64;
            // Reduce the distances
            for d in distances.iter_mut() {
                let d_red = *d - avg_d;
                *d = if d_red.abs() < f64::EPSILON {
                    // NaN guard
                    f64::EPSILON
                } else {
                    d_red
                };
            }

            let score: f64 = distances
                .iter()
                .zip(&times_reduced)
                .map(|(d, t)| d * t)
                .sum::<f64>()
                / (distances.into_iter().map(|d| d * d).sum::<f64>() * t_sq_sum).sqrt();
            (*unobserver, score)
        })
        .collect();

    // Sort the scores
    scores.sort_unstable_by(|(_, s1), (_, s2)| {
        s2.partial_cmp(s1).unwrap_or(std::cmp::Ordering::Less)
    });
    Ok(scores)
}
/// Constructs the ranking using the LPTV (Limited Pinto-Thiran-Vetterli) algorithm.
///
/// This algorithm orders suspect nodes using the **maximum likelihood** estimator, as described in
/// the original paper. This "limited" variant does not utilize the information about observers'
/// **infectants** (nodes which infected the observers).
///
/// **Warning:** this method is currently **not implemented** for networks with [`Weight::Uniform`]
/// and will simply return an empty ranking.
///
/// # References
/// > Pinto, P. C., Thiran, P., & Vetterli, M. (2012). [*Locating the source of diffusion in
/// > large-scale networks.*](https://doi.org/10.1103/physrevlett.109.068702) Physical Review
/// > Letters, 109(6), 1–5.
pub fn lptv(net: &Network, observers: &si::Observers) -> cn::Result<Ranking> {
    let need_distance: Vec<usize> = observers.as_map().keys().copied().collect();
    let (ref_o, other_o) = need_distance.split_first().unwrap();
    let (mean, mut variance) = match net.weight() {
        Weight::Constant { c } => (1.0 / c, (1.0 - c) / (c * c)),
        Weight::Uniform => return Ok(Vec::new()),
    };
    if variance.abs() < f64::EPSILON {
        variance = f64::EPSILON;
    }
    let unobservers: Vec<usize> = net
        .nodes()
        .filter(|node| !observers.as_map().contains_key(node))
        .collect();

    // Time delays only need to be computed once
    let delays: DVector<f64> = DVector::from_vec(
        other_o
            .par_iter()
            .map(|o| {
                observers.as_map().get(o).unwrap().unwrap() as f64
                    - observers.as_map().get(ref_o).unwrap().unwrap() as f64
            })
            .collect(),
    );

    let mut scores: Vec<(usize, f64)> = unobservers
        .par_iter()
        .filter_map(|&unobserver| {
            // The algorithm operates on the bfs tree rooted at the suspect.
            let tree = bfs::tree_active(net, unobserver, &need_distance).ok()?;
            // Distances from the suspect to observers
            let distances = bfs::distance_many(&tree, unobserver, &need_distance).ok()?;
            // Paths from reference observer to others
            let paths = bfs::path_many(&tree, *ref_o, &need_distance).ok()?;
            let mut lambda_matrix: DMatrix<f64> =
                DMatrix::from_fn(other_o.len(), other_o.len(), |i: usize, j: usize| {
                    let o_i = &other_o[i]; // Observator i index
                    let o_j = &other_o[j]; // Observator j index
                    if i == j {
                        variance * (paths[o_i].as_vec().as_ref().unwrap().len() + 1) as f64
                    } else {
                        // Path from i to reference
                        let path_i = &paths[o_i];
                        // Path from j to reference
                        let path_j = &paths[o_j];
                        // Path intersection (+1 because path length)
                        variance * (path_i.common_len(path_j).unwrap()) as f64
                    }
                });
            let _det: f64 = lambda_matrix.determinant().abs();
            let _cpy = lambda_matrix.clone();
            if !lambda_matrix.try_inverse_mut() {
                panic!("Uh oh the covariance is not invertible!");
            };
            let mu: DVector<f64> = DVector::from_vec(
                other_o
                    .iter()
                    .map(|o| {
                        mean * (distances[o].unwrap() as i64 - distances[ref_o].unwrap() as i64)
                            as f64
                    })
                    .collect(),
            );
            let d_red: DVector<f64> = &delays - &mu;
            // The [0] is needed because score is really a 1x1 matrix.
            let score = (d_red.transpose() * lambda_matrix * (d_red))[0];
            Some((unobserver, score))
        })
        .collect();

    scores.sort_unstable_by(|(_, s1), (_, s2)| s1.partial_cmp(s2).unwrap());
    Ok(scores)
}