/*!
Information spread source location module.

This module implements function aiding in **information source location**, using the information
provided by [`si::Observers`]. The algorithms construct a [`Ranking`] of suspect nodes, based on
different criteria.
*/

use crate::{bfs, cn, si, Network, Weight};
use nalgebra::{DMatrix, DVector};
use rayon::prelude::*;

/// The result of running a source-location algorithm on an infected network.
///
/// It is a [`Vec`] of (suspect index, score) pairs, ordered by the score. The higher the node is
/// positioned, the higher the probability that it is the true information source.
pub type Ranking = Vec<(usize, f64)>;

/// Constructs a suspect ranking based on the Pearson correlation coefficient.
///
/// This algorithm calculates the [Pearson correlation
/// coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) between the vector
/// of **infection times** and the vector of **distances** from a given suspect. It is assumed that
/// the higher the correlation, the more probable the fact that the suspect is the true source.
///
/// It should be noted that this version uses **only** infection times, disregarding the originally
/// proposed additional suspect elimination based on their distance to the observer's **infectant**
/// (the node which infected the observer).
///
/// # References
/// > Xu, S., Teng, C., Zhou, Y., Peng, J., Zhang, Y., & Zhang, Z. K. (2019). [*Identifying the
/// > diffusion source in complex networks with limited
/// > observers.*](https://doi.org/10.1016/j.physa.2019.121267) Physica A: Statistical Mechanics
/// > and Its Applications, 527, 121267.
pub fn pearson(net: &Network, observers: &si::Observers) -> cn::Result<Ranking> {
    // Average time of infection can be pre calculated
    let avg_t =
        observers.as_map().values().flatten().sum::<usize>() as f64 / observers.as_map().len() as f64;
    let times_reduced: Vec<f64> = observers
        .as_map()
        .values()
        .flatten()
        .map(|t| (*t as f64 - avg_t))
        .collect();
    let t_sq_sum: f64 = times_reduced.iter().map(|t| t * t).sum();

    // Need distance for these indexes - infectants and observers
    let need_distance: Vec<usize> = observers.as_map().keys().copied().collect();

    // The not-observer nodes
    let unobservers: Vec<usize> = net
        .nodes()
        .filter(|node| !observers.as_map().contains_key(node))
        .collect();
    let mut scores: Vec<(usize, f64)> = unobservers
        .par_iter()
        .map(|unobserver| {
            let distances = bfs::distance_many(net, *unobserver, &need_distance).unwrap();
            let mut avg_d = 0.0;
            let mut distances: Vec<f64> = observers
                .as_map()
                .keys()
                .map(|o| {
                    let d = distances[o].unwrap() as f64;
                    avg_d += d;
                    d
                })
                .collect();
            avg_d /= distances.len() as f64;
            // Reduce the distances
            for d in distances.iter_mut() {
                let d_red = *d - avg_d;
                *d = if d_red.abs() < f64::EPSILON {
                    // NaN guard
                    f64::EPSILON
                } else {
                    d_red
                };
            }

            let score: f64 = distances
                .iter()
                .zip(&times_reduced)
                .map(|(d, t)| d * t)
                .sum::<f64>()
                / (distances.into_iter().map(|d| d * d).sum::<f64>() * t_sq_sum).sqrt();
            (*unobserver, score)
        })
        .collect();

    // Sort the scores
    scores.sort_unstable_by(|(_, s1), (_, s2)| {
        s2.partial_cmp(s1).unwrap_or(std::cmp::Ordering::Less)
    });
    Ok(scores)
}
/// Constructs the ranking using the LPTV (Limited Pinto-Thiran-Vetterli) algorithm.
///
/// This algorithm orders suspect nodes using the **maximum likelihood** estimator, as described in
/// the original paper. This "limited" variant does not utilize the information about observers'
/// **infectants** (nodes which infected the observers).
///
/// **Warning:** this method is currently **not implemented** for networks with [`Weight::Uniform`]
/// and will simply return an empty ranking.
///
/// # References
/// > Pinto, P. C., Thiran, P., & Vetterli, M. (2012). [*Locating the source of diffusion in
/// > large-scale networks.*](https://doi.org/10.1103/physrevlett.109.068702) Physical Review
/// > Letters, 109(6), 1–5.
pub fn lptv(net: &Network, observers: &si::Observers) -> cn::Result<Ranking> {
    let need_distance: Vec<usize> = observers.as_map().keys().copied().collect();
    let (ref_o, other_o) = need_distance.split_first().unwrap();
    let (mean, mut variance) = match net.weight() {
        Weight::Constant { c } => (1.0 / c, (1.0 - c) / (c * c)),
        Weight::Uniform => return Ok(Vec::new()),
    };
    if variance.abs() < f64::EPSILON {
        variance = f64::EPSILON;
    }
    let unobservers: Vec<usize> = net
        .nodes()
        .filter(|node| !observers.as_map().contains_key(node))
        .collect();

    // Time delays only need to be computed once
    let delays: DVector<f64> = DVector::from_vec(
        other_o
            .par_iter()
            .map(|o| {
                observers.as_map().get(o).unwrap().unwrap() as f64
                    - observers.as_map().get(ref_o).unwrap().unwrap() as f64
            })
            .collect(),
    );

    let mut scores: Vec<(usize, f64)> = unobservers
        .par_iter()
        .filter_map(|&unobserver| {
            // The algorithm operates on the bfs tree rooted at the suspect.
            let tree = bfs::tree_active(net, unobserver, &need_distance).ok()?;
            // Distances from the suspect to observers
            let distances = bfs::distance_many(&tree, unobserver, &need_distance).ok()?;
            // Paths from reference observer to others
            let paths = bfs::path_many(&tree, *ref_o, &need_distance).ok()?;
            let mut lambda_matrix: DMatrix<f64> =
                DMatrix::from_fn(other_o.len(), other_o.len(), |i: usize, j: usize| {
                    let o_i = &other_o[i]; // Observator i index
                    let o_j = &other_o[j]; // Observator j index
                    if i == j {
                        variance * (paths[o_i].as_vec().as_ref().unwrap().len() + 1) as f64
                    } else {
                        // Path from i to reference
                        let path_i = &paths[o_i];
                        // Path from j to reference
                        let path_j = &paths[o_j];
                        // Path intersection (+1 because path length)
                        variance * (path_i.common_len(path_j).unwrap()) as f64
                    }
                });
            let _det: f64 = lambda_matrix.determinant().abs();
            let _cpy = lambda_matrix.clone();
            if !lambda_matrix.try_inverse_mut() {
                panic!("Uh oh the covariance is not invertible!");
            };
            let mu: DVector<f64> = DVector::from_vec(
                other_o
                    .iter()
                    .map(|o| {
                        mean * (distances[o].unwrap() as i64 - distances[ref_o].unwrap() as i64)
                            as f64
                    })
                    .collect(),
            );
            let d_red: DVector<f64> = &delays - &mu;
            // The [0] is needed because score is really a 1x1 matrix.
            let score = (d_red.transpose() * lambda_matrix * (d_red))[0];
            Some((unobserver, score))
        })
        .collect();

    scores.sort_unstable_by(|(_, s1), (_, s2)| s1.partial_cmp(s2).unwrap());
    Ok(scores)
}