gpu-trace-perf 1.8.2

Plays a collection of GPU traces under different environments to evaluate driver changes on performance
Documentation
use crate::TraceResults;
use rand::Rng;
use statrs::statistics::Statistics;
use std::iter::zip;

struct StudentsTInverseCDFTable {
    df: u32,
    t: [f32; 7],
}

pub fn students_t_inverse_cdf(cdf: f64, df: u32) -> f32 {
    #[allow(clippy::approx_constant)]
    const STUDENTS_T: &[StudentsTInverseCDFTable] = &[
        StudentsTInverseCDFTable {
            df: 1,
            t: [3.078, 6.314, 12.706, 31.821, 63.656, 318.289, 636.578],
        },
        StudentsTInverseCDFTable {
            df: 2,
            t: [1.886, 2.920, 4.303, 6.965, 9.925, 22.328, 31.600],
        },
        StudentsTInverseCDFTable {
            df: 3,
            t: [1.638, 2.353, 3.182, 4.541, 5.841, 10.214, 12.924],
        },
        StudentsTInverseCDFTable {
            df: 4,
            t: [1.533, 2.132, 2.776, 3.747, 4.604, 7.173, 8.610],
        },
        StudentsTInverseCDFTable {
            df: 5,
            t: [1.476, 2.015, 2.571, 3.365, 4.032, 5.894, 6.869],
        },
        StudentsTInverseCDFTable {
            df: 6,
            t: [1.440, 1.943, 2.447, 3.143, 3.707, 5.208, 5.959],
        },
        StudentsTInverseCDFTable {
            df: 7,
            t: [1.415, 1.895, 2.365, 2.998, 3.499, 4.785, 5.408],
        },
        StudentsTInverseCDFTable {
            df: 8,
            t: [1.397, 1.860, 2.306, 2.896, 3.355, 4.501, 5.041],
        },
        StudentsTInverseCDFTable {
            df: 9,
            t: [1.383, 1.833, 2.262, 2.821, 3.250, 4.297, 4.781],
        },
        StudentsTInverseCDFTable {
            df: 10,
            t: [1.372, 1.812, 2.228, 2.764, 3.169, 4.144, 4.587],
        },
        StudentsTInverseCDFTable {
            df: 11,
            t: [1.363, 1.796, 2.201, 2.718, 3.106, 4.025, 4.437],
        },
        StudentsTInverseCDFTable {
            df: 12,
            t: [1.356, 1.782, 2.179, 2.681, 3.055, 3.930, 4.318],
        },
        StudentsTInverseCDFTable {
            df: 13,
            t: [1.350, 1.771, 2.160, 2.650, 3.012, 3.852, 4.221],
        },
        StudentsTInverseCDFTable {
            df: 14,
            t: [1.345, 1.761, 2.145, 2.624, 2.977, 3.787, 4.140],
        },
        StudentsTInverseCDFTable {
            df: 15,
            t: [1.341, 1.753, 2.131, 2.602, 2.947, 3.733, 4.073],
        },
        StudentsTInverseCDFTable {
            df: 16,
            t: [1.337, 1.746, 2.120, 2.583, 2.921, 3.686, 4.015],
        },
        StudentsTInverseCDFTable {
            df: 17,
            t: [1.333, 1.740, 2.110, 2.567, 2.898, 3.646, 3.965],
        },
        StudentsTInverseCDFTable {
            df: 18,
            t: [1.330, 1.734, 2.101, 2.552, 2.878, 3.610, 3.922],
        },
        StudentsTInverseCDFTable {
            df: 19,
            t: [1.328, 1.729, 2.093, 2.539, 2.861, 3.579, 3.883],
        },
        StudentsTInverseCDFTable {
            df: 20,
            t: [1.325, 1.725, 2.086, 2.528, 2.845, 3.552, 3.850],
        },
        StudentsTInverseCDFTable {
            df: 21,
            t: [1.323, 1.721, 2.080, 2.518, 2.831, 3.527, 3.819],
        },
        StudentsTInverseCDFTable {
            df: 22,
            t: [1.321, 1.717, 2.074, 2.508, 2.819, 3.505, 3.792],
        },
        StudentsTInverseCDFTable {
            df: 23,
            t: [1.319, 1.714, 2.069, 2.500, 2.807, 3.485, 3.768],
        },
        StudentsTInverseCDFTable {
            df: 24,
            t: [1.318, 1.711, 2.064, 2.492, 2.797, 3.467, 3.745],
        },
        StudentsTInverseCDFTable {
            df: 25,
            t: [1.316, 1.708, 2.060, 2.485, 2.787, 3.450, 3.725],
        },
        StudentsTInverseCDFTable {
            df: 26,
            t: [1.315, 1.706, 2.056, 2.479, 2.779, 3.435, 3.707],
        },
        StudentsTInverseCDFTable {
            df: 27,
            t: [1.314, 1.703, 2.052, 2.473, 2.771, 3.421, 3.689],
        },
        StudentsTInverseCDFTable {
            df: 28,
            t: [1.313, 1.701, 2.048, 2.467, 2.763, 3.408, 3.674],
        },
        StudentsTInverseCDFTable {
            df: 29,
            t: [1.311, 1.699, 2.045, 2.462, 2.756, 3.396, 3.660],
        },
        StudentsTInverseCDFTable {
            df: 30,
            t: [1.310, 1.697, 2.042, 2.457, 2.750, 3.385, 3.646],
        },
        StudentsTInverseCDFTable {
            df: 60,
            t: [1.296, 1.671, 2.000, 2.390, 2.660, 3.232, 3.460],
        },
        StudentsTInverseCDFTable {
            df: 120,
            t: [1.289, 1.658, 1.980, 2.358, 2.617, 3.160, 3.373],
        },
        StudentsTInverseCDFTable {
            df: !0u32,
            t: [1.282, 1.645, 1.960, 2.326, 2.576, 3.091, 3.291],
        },
    ];

    for entry in STUDENTS_T {
        if df <= entry.df {
            if cdf <= 0.90 {
                return entry.t[0];
            } else if cdf <= 0.95 {
                return entry.t[1];
            } else if cdf <= 0.975 {
                return entry.t[2];
            } else if cdf <= 0.99 {
                return entry.t[3];
            } else if cdf <= 0.995 {
                return entry.t[4];
            } else if cdf <= 0.999 {
                return entry.t[5];
            } else {
                /* 0.9995 */
                return entry.t[6];
            }
        }
    }
    unreachable!("didn't find df")
}

pub struct ResultStats {
    pub change: f64,
    pub means: [f64; 2],
    pub error: f64,
    pub n: [usize; 2],
}

impl ResultStats {
    pub fn new(a: &[f64], b: &[f64], alpha: f64) -> ResultStats {
        let means = [a.mean(), b.mean()];

        let n = [a.len(), b.len()];
        let n1 = n[0] as f64;
        let n2 = n[1] as f64;
        let s1 = a.std_dev();
        let s2 = b.std_dev();

        let df = n1 + n2 - 2.0;
        // Cohen's pooled variance
        let pooled_sd = (((n1 - 1.0) * s1 * s1 + (n2 - 1.0) * s2 * s2) / df).sqrt();

        let standard_error = pooled_sd * (1.0 / n1 + 1.0 / n2).sqrt();

        // We're doing two-tailed (you might increase or decrease perf), so 95%
        // CI uses the .975 value.
        let t = students_t_inverse_cdf(1.0 - alpha / 2.0, df as u32) as f64;
        let e = t * standard_error;
        ResultStats {
            means,
            change: (means[1] - means[0]) / means[0],
            error: e / means[0],
            n,
        }
    }

    pub fn has_fps(&self) -> bool {
        self.means[0] != 0.0 && self.means[1] != 0.0
    }
}

#[derive(Default, PartialEq, Debug)]
pub struct BootstrappedRelativeAndMaxChange {
    /// Average relative change to the mean fps of the traces.
    pub relative_mean_change: f64,
    pub relative_mean_error: f64,
    /// Average relative change to the max fps of the traces.
    pub relative_max_change: f64,
    pub relative_max_error: f64,
}

impl BootstrappedRelativeAndMaxChange {
    pub fn new<R: Rng>(
        results: &[TraceResults],
        iter: usize,
        rng: &mut R,
    ) -> BootstrappedRelativeAndMaxChange {
        // Set up the storage for where we resample into.
        let mut bootstrap_sample: Vec<_> = results.to_vec();

        let mut mean_changes = Vec::new();
        let mut max_changes = Vec::new();

        for _ in 0..iter {
            let mut iter_mean_changes = Vec::new();
            let mut iter_max_changes = Vec::new();
            for (sample, result) in zip(bootstrap_sample.iter_mut(), results.iter()) {
                sample.resample(result, rng);

                // Assume results[0] is baseline (a) and results[1] is comparison (b)
                iter_mean_changes
                    .push(sample.results[1].iter().mean() / sample.results[0].iter().mean());
                iter_max_changes.push(
                    Statistics::max(sample.results[1].iter())
                        / Statistics::max(sample.results[0].iter()),
                );
            }

            mean_changes.push(iter_mean_changes.geometric_mean());
            max_changes.push(iter_max_changes.geometric_mean());
        }

        BootstrappedRelativeAndMaxChange {
            relative_mean_change: mean_changes.iter().mean(),
            relative_mean_error: mean_changes.iter().std_dev(),
            relative_max_change: max_changes.iter().mean(),
            relative_max_error: max_changes.iter().std_dev(),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use assert_approx_eq::assert_approx_eq;

    #[test]
    fn test_stats() {
        // Results from scipy
        assert_approx_eq!(students_t_inverse_cdf(0.95, 2), 2.919_985, 0.001);
        assert_approx_eq!(students_t_inverse_cdf(0.995, 5), 4.032_142, 0.001);

        // Result from ministat
        let a = vec![1.0, 2.0, 3.0, 4.0, 5.0];
        let b = vec![5.0, 6.0, 7.0, 8.0, 9.0];
        let stats = ResultStats::new(&a, &b, 0.05);
        assert_approx_eq!(stats.change, 1.333_333, 0.001);
        assert_approx_eq!(stats.error, 0.768_668, 0.001);
    }

    /// Sets the found flag for the elements of vals[] that appear more than once
    /// in the sampled[].
    fn mark_sampled_with_replacement(found: &mut [bool], sampled: &[f64], vals: &[f64]) {
        for (i, val) in vals.iter().enumerate() {
            if sampled.iter().filter(|x| *x == val).count() >= 2 {
                found[i] = true;
            }
        }
    }

    /// Loops resampling from a sample until we've seen every element of the
    /// sample be drawn with replacement.
    #[test]
    fn resample_replacement_test() {
        for count in 1..4 {
            let mut a = Vec::new();
            let mut b = Vec::new();
            for i in 0..count {
                a.push(i as f64);
                b.push((i * 2) as f64);
            }
            b.push((count * 2) as f64);

            let trace = TraceResults {
                results: vec![a, b],
                logged: false,
            };

            let mut found_replacements_a = Vec::new();
            let mut found_replacements_b = Vec::new();
            println!("Checking that each element can get sampled with replacement");
            while found_replacements_a.iter().any(|x| !x) || found_replacements_b.iter().any(|x| !x)
            {
                let mut sample = trace.clone();
                sample.resample(&trace, &mut rand::rng());

                mark_sampled_with_replacement(
                    &mut found_replacements_a,
                    &sample.results[0],
                    &trace.results[0],
                );
                mark_sampled_with_replacement(
                    &mut found_replacements_b,
                    &sample.results[1],
                    &trace.results[1],
                );
            }
        }
    }
}