1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
use crate::{Array1, Array2};
use anyhow::{anyhow, Error, Result};
use std::{
    fs::File,
    io::{BufRead, BufReader},
    path::PathBuf,
};

/// Compute the arithmetic mean of an array.
pub(in crate) fn mean(arr: &[f64]) -> Result<f64, Error> {
    if arr.is_empty() {
        return Err(anyhow!("Can't take mean of empty array"));
    }
    let sum = arr.iter().sum::<f64>();
    let count = arr.len() as f64;
    Ok(sum / count)
}

/// Compute the sample variance of an array using Bessel's correction.
pub(in crate) fn sample_variance(arr: &[f64]) -> Result<f64, Error> {
    let xbar = mean(arr)?;
    Ok(arr.iter().map(|x| (x - xbar).powi(2)).sum::<f64>() / (arr.len() as f64 - 1.0))
}

/// Clone a 2D array into one long 1D array.
pub(in crate) fn flatten(chains: &Array2) -> Array1 {
    let mut flattened = Vec::new();
    for chain in chains {
        flattened.extend(chain);
    }
    flattened
}

/// Splits each chain into two chains of equal length.  When the
/// number of total draws N is odd, the (N+1)/2th draw is ignored.
///
/// See more details in Stan reference manual section
/// ["Effective Sample Size"](http://mc-stan.org/users/documentation).
///
/// Current implementation assumes chains are all of equal size.
pub fn split_chains(chains: Array2) -> Result<Array2, Error> {
    if chains.is_empty() {
        return Err(anyhow!("Can't split empty array of chains"));
    }
    let num_draws = chains.iter().map(|c| c.len()).min().unwrap();
    if num_draws < 1 {
        return Err(anyhow!("No samples to split"));
    }
    let (half, offset) = if num_draws % 2 == 0 {
        (num_draws / 2, 0)
    } else {
        ((num_draws - 1) / 2, 1)
    };
    let mut split_draws = Vec::new();
    for chain in chains {
        split_draws.push(chain[..half].to_vec());
        split_draws.push(chain[(half + offset)..].to_vec());
    }
    Ok(split_draws)
}

/// Simplified CSV reader for tesing purposes only; does not actually implement
/// parsing for headers, quotation, or other more advanced features. Assumes
/// that all values aside from the commas will be numeric.
///
/// # Arguments
/// * `skip_rows` - Number of rows to skip before numeric values. For example,
///                 if there is a header row you can pass in the value `1`.
/// * `n_rows` - Number of rows to read in. Use if you only want a certain
///              subset of rows or if there are improper rows after the numeric
///              rows (e.g. in Stan sample files there are commented rows at the end).
pub fn read_csv(path: &PathBuf, skip_rows: usize, n_rows: usize) -> Array2 {
    let mut result: Array2 = Vec::new();
    let f = File::open(&path).unwrap();
    let f = BufReader::new(f);
    for line in f.lines().skip(skip_rows).take(n_rows) {
        if let Ok(line) = line {
            for (idx, value) in line.split(',').enumerate() {
                if idx >= result.len() {
                    result.push(Vec::new())
                }
                result[idx].push(value.parse::<f64>().unwrap());
            }
        }
    }
    result
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::Array1;

    #[test]
    fn test_stats() {
        // Test our basic stats functions using numbers computed with numpy.
        let arr = vec![
            2.13829088,
            -1.06214379,
            -0.79265699,
            -0.21300888,
            -1.07155142,
            -0.50425317,
            0.95708854,
            -1.23854172,
            1.37124938,
            1.17658286,
        ];
        let empty: Array1 = vec![];
        assert_abs_diff_eq!(
            sample_variance(&arr).unwrap(),
            1.492596054209826,
            epsilon = 1e-6
        );
        assert_abs_diff_eq!(mean(&arr).unwrap(), 0.07610557018217139, epsilon = 1e-6);

        assert!(sample_variance(&empty).is_err());
        assert!(mean(&empty).is_err());
    }

    #[test]
    fn test_split_empty_chains() {
        // Make sure the we Err on empty or minimum 0 length chains
        let a: Array1 = vec![1.0];
        let b: Array1 = vec![];
        let c: Array1 = vec![];
        let chains = vec![a, b, c];
        assert!(split_chains(chains).is_err());

        let a: Array1 = vec![];
        let b: Array1 = vec![];
        let chains = vec![a, b];
        assert!(split_chains(chains).is_err());
    }

    #[test]
    fn test_split_even_chains() {
        // Regular split with even numbers
        let a = vec![1.0, 2.0, 3.0, 4.0];
        let b = vec![5.0, 6.0, 7.0, 8.0];
        let chains = vec![a, b];
        let split = split_chains(chains).unwrap();
        assert_eq!(split[0], vec![1.0, 2.0]);
        assert_eq!(split[1], vec![3.0, 4.0]);
        assert_eq!(split[2], vec![5.0, 6.0]);
        assert_eq!(split[3], vec![7.0, 8.0]);
    }

    #[test]
    fn test_split_odd_chains() {
        // Make sure the middle value gets dropped per the Stan reference implementation
        let a = vec![1.0, 2.0, 3.0, 4.0, 4.5];
        let b = vec![5.0, 6.0, 7.0, 8.0, 8.5];
        let chains = vec![a, b];
        let split = split_chains(chains).unwrap();
        assert_eq!(split[0], vec![1.0, 2.0]);
        assert_eq!(split[1], vec![4.0, 4.5]);
        assert_eq!(split[2], vec![5.0, 6.0]);
        assert_eq!(split[3], vec![8.0, 8.5]);
    }

    #[test]
    fn test_flatten() {
        // Regular split with even numbers
        let a = vec![1.0, 2.0, 3.0, 4.0];
        let b = vec![5.0, 6.0, 7.0, 8.0];
        let chains = vec![a, b];
        let flattened = flatten(&chains);
        assert_eq!(flattened, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
    }
}