rstats/
vecu8.rs

1use crate::{data_error, Vecu8, RE};
2
3impl Vecu8 for &[u8] {
4    /// Probability density function of bytes data
5    fn pdfu8(self) -> Vec<f64> {
6        let nf = self.len() as f64;
7        let mut pdfv = vec![0_f64; 256];
8        for &x in self {
9            pdfv[x as usize] += 1_f64
10        }
11        pdfv.iter_mut().for_each(|p| {
12            if *p > 0.0 {
13                *p /= nf
14            }
15        });
16        pdfv
17    }
18
19    /// Information (entropy) of &[u8] (in nats)
20    fn entropyu8(self) -> f64 {
21        let pdfv = self.pdfu8();
22        pdfv.iter()
23            .map(|&p| if p > 0.0 { -p * (p.ln()) } else { 0.0 })
24            .sum::<f64>()
25    }
26
27    /// Joint probability density function (here just co-occurence counts)
28    /// of successive pairs of values from two vectors of bytes
29    /// of the same lenghts n. Needs 4*256^2=262144 bytes of heap memory,
30    /// which will be sparse except for long input vectors.
31    fn jointpdfu8(self, v: &[u8]) -> Result<Vec<Vec<u32>>, RE> {
32        let n = self.len();
33        if v.len() != n {
34            return data_error("jointpdfu8: argument vectors must be of equal length!");
35        }
36        let mut res: Vec<Vec<u32>> = vec![vec![0_u32; 256]; 256];
37        self.iter()
38            .zip(v)
39            .for_each(|(&si, &vi)| res[si as usize][vi as usize] += 1);
40        Ok(res)
41    }
42
43    /// Joint entropy of &[u8],&[u8] (in nats)
44    fn jointentropyu8(self, v: &[u8]) -> Result<f64, RE> {
45        let n = self.len();
46        let nf = n as f64;
47        let mut entropy = 0_f64;
48        // for short vecs, it is quicker to iterate through args
49        if n < 65000 {
50            let mut jpdf = self.jointpdfu8(v)?;
51            for (&si, &vi) in self.iter().zip(v) {
52                let c = jpdf[si as usize][vi as usize];
53                if c > 0 {
54                    let p = (c as f64) / nf;
55                    entropy -= p * (p.ln());
56                    // prevent this pair's count being counted again
57                    jpdf[si as usize][vi as usize] = 0;
58                }
59            }
60            return Ok(entropy); // return value
61        }
62        // for long vecs, iterate through the counts array
63        let jpdf = self.jointpdfu8(v)?;
64        for v in jpdf {
65            for c in v {
66                if c > 0_u32 {
67                    let p = (c as f64) / nf;
68                    entropy -= p * (p.ln());
69                }
70            }
71        }
72        Ok(entropy)
73    }
74
75    /// Statistical pairwise dependence in range [0,1] of two &[u8] variables
76    /// returns 0 iff they are statistically pairwise independent
77    /// returns 1 if they are identical or all values are unique
78    fn dependenceu8(self, v: &[u8]) -> Result<f64, RE> {
79        Ok((self.entropyu8() + v.entropyu8()) / self.jointentropyu8(v)? - 1.0)
80    }
81
82    /// Independence in the range [1,2] of two &[u8] variables
83    /// e.g. 2 is returned iff they are statistically pairwise independent
84    /// returns 1 if they are identical or all values are unique
85    fn independenceu8(self, v: &[u8]) -> Result<f64, RE> {
86        Ok(2.0 * self.jointentropyu8(v)? / (self.entropyu8() + v.entropyu8()))
87    }
88}