#[macro_use] extern crate lazy_static;
extern crate assert_approx_eq;
extern crate statrs;
extern crate stderrlog;
extern crate structopt;
extern crate walkdir;
use log::error;
use log_once::error_once;
use regex::Regex;
use statrs::statistics::Statistics;
use std::io::prelude::*;
use std::process::Command;
use structopt::StructOpt;
use walkdir::WalkDir;
#[derive(Debug, StructOpt)]
#[structopt(version = "1.3.1", author = "Eric Anholt <eric@anholt.net>", about = "Plays a collection of GPU traces under different environments to evaluate driver changes on performance")]
struct Opts {
#[structopt(short = "v", long = "verbose", parse(from_occurrences))]
verbose: i32,
#[structopt(subcommand)]
subcmd: SubCommand,
}
#[derive(Debug, StructOpt)]
enum SubCommand {
#[structopt(name = "run")]
Run(Run),
}
#[derive(Debug, StructOpt)]
struct Run {
#[structopt(short = "t", long = "traces", multiple = true, number_of_values = 1, takes_value = true, help = "path to directory containing traces")]
traces: Vec<String>,
#[structopt(index = 1, required = true, help = "command to set the runtime environment for the 'a' case")]
a: String,
#[structopt(index = 2, required = true, help = "command to set the runtime environment for the 'b' case")]
b: String,
}
fn collect_traces(directories: Vec<String>) -> Vec<String> {
let mut traces : Vec<String> = Vec::new();
for dir in directories {
let walk = WalkDir::new(dir)
.follow_links(true);
for entry in walk.into_iter().filter_map(|e| e.ok()) {
if let Some(path) = entry.path().to_str() {
if path.ends_with(".trace") || path.ends_with(".rdc") {
traces.push(path.to_string());
}
} else {
error!("Path {:?} not UTF-8, skipping", entry.path());
}
}
}
traces
}
struct TraceResults {
a: Vec<f64>,
b: Vec<f64>,
path: String,
}
fn parse_apitrace_pgpu_output(output: &str) -> Option<i64> {
lazy_static! {
static ref CALL_RE: Regex = Regex::new("^call [0-9]+ -?[0-9]+ ([0-9]+)").unwrap();
}
let mut total = 0;
let mut start_of_frame = true;
for line in output.lines() {
if line == "frame_end" {
start_of_frame = true;
} else {
let cap = CALL_RE.captures(line);
if let Some(cap) = cap {
if start_of_frame {
total = 0;
start_of_frame = false;
}
match cap[1].parse::<i64>() {
Ok(gpu) => {
if gpu >= 0 {
total += gpu;
} else {
error!("apitrace produced GL_TIME_ELAPSED < 0, skipping(gpu hang?)");
return None;
}
}
Err(_) => {
error!("failed to parse apitrace's GL_TIME_ELAPSED");
return None;
}
}
}
}
}
Some(total)
}
fn parse_renderdoc_wrapper_output(output: &str) -> Option<f32> {
lazy_static! {
static ref CALL_RE: Regex = Regex::new("EID [0-9]*: (.*)").unwrap();
}
let mut total = 0.0;
for line in output.lines() {
if let Some(cap) = CALL_RE.captures(line) {
match cap[1].parse::<f32>() {
Ok(time) => total += time,
_ => {
println!("Failed to parse renderdoc time event '{}'", line);
return None;
},
}
}
}
Some(total)
}
fn run_apitrace(trace: &str, environment: &str) -> Option<i64> {
let apitrace_command = [
"eglretrace", "--pgpu",
"--headless",
"--loop=1", trace,
];
let mut command = Command::new(environment);
command.args(&apitrace_command);
command.env("NIR_VALIDATE", "0");
let output = command.output()
.expect("failed to `trace");
if !output.status.success() {
let stderr = std::str::from_utf8(&output.stderr).unwrap();
if stderr.contains("waffle_context_create failed") {
error_once!("apitrace reported waffle_context_create() failed, likely due to trace requiring too new of a GL version");
return None;
}
println!("Failed to start apitrace:");
let stdout = std::str::from_utf8(&output.stdout).unwrap();
println!("{}", if stderr.len() > 2 { stderr } else { stdout });
println!("command: {:?}", command);
return None;
}
parse_apitrace_pgpu_output(&String::from_utf8_lossy(&output.stdout))
}
fn run_renderdoc(trace: &str, environment: &str) -> Option<f64> {
let renderdoc_command = [
"python3",
"-", trace,
];
let mut command = Command::new(environment);
command.args(&renderdoc_command);
command.env("NIR_VALIDATE", "0");
command.stdin(std::process::Stdio::piped());
command.stdout(std::process::Stdio::piped());
command.stderr(std::process::Stdio::piped());
let mut child = command.spawn().expect("failed to start python3");
let stdin = child.stdin.as_mut().expect("Failed to open stdin");
let renderdoc_wrapper_script = include_bytes!("gpu-trace-perf-renderdoc-wrapper.py");
stdin.write_all(renderdoc_wrapper_script).expect("failed to write to python's stdin");
let output = child.wait_with_output().expect("failed to read stdout");
if !output.status.success() {
let stderr = std::str::from_utf8(&output.stderr).unwrap();
if stderr.contains("API is unsupported") {
error_once!("renderdoc reported API (likely window system) unsupported, skipping trace");
return None;
}
println!("Failed to start renderdoc:");
println!("{}", stderr);
println!("command: {:?}", command);
if stderr.contains("FileNotFound") {
println!("TIP: Failure to find a file with a space in its name probably means your wrapper script didn't quote the arguments")
}
return None;
}
parse_renderdoc_wrapper_output(&String::from_utf8_lossy(&output.stdout)).map(|t| t as f64)
}
fn run_trace(trace: &str, environment: &str) -> Option<f64> {
if trace.ends_with(".trace") {
run_apitrace(trace, environment).map(|t| t as f64)
} else {
assert!(trace.ends_with(".rdc"));
run_renderdoc(trace, environment)
}
}
fn run_each_trace(run_opts: &Run, results: &mut Vec<TraceResults>, a_first : bool) {
for result in results.iter_mut() {
println!("Running {}", result.path);
let pair = if a_first {
(run_trace(&result.path, &run_opts.a), run_trace(&result.path, &run_opts.b))
} else {
(run_trace(&result.path, &run_opts.b), run_trace(&result.path, &run_opts.a))
};
if let (Some(a), Some(b)) = pair {
result.a.push(a);
result.b.push(b);
}
}
}
struct StudentsTInverseCDFTable {
df: u32,
t: [f32; 7],
}
fn students_t_inverse_cdf(cdf: f64, df: u32) -> f32 {
#[allow(clippy::approx_constant)]
const STUDENTS_T: &[StudentsTInverseCDFTable] = &[
StudentsTInverseCDFTable { df: 1, t: [3.078, 6.314, 12.706, 31.821, 63.656, 318.289, 636.578] },
StudentsTInverseCDFTable { df: 2, t: [1.886, 2.920, 4.303, 6.965, 9.925, 22.328, 31.600] },
StudentsTInverseCDFTable { df: 3, t: [1.638, 2.353, 3.182, 4.541, 5.841, 10.214, 12.924] },
StudentsTInverseCDFTable { df: 4, t: [1.533, 2.132, 2.776, 3.747, 4.604, 7.173, 8.610] },
StudentsTInverseCDFTable { df: 5, t: [1.476, 2.015, 2.571, 3.365, 4.032, 5.894, 6.869] },
StudentsTInverseCDFTable { df: 6, t: [1.440, 1.943, 2.447, 3.143, 3.707, 5.208, 5.959] },
StudentsTInverseCDFTable { df: 7, t: [1.415, 1.895, 2.365, 2.998, 3.499, 4.785, 5.408] },
StudentsTInverseCDFTable { df: 8, t: [1.397, 1.860, 2.306, 2.896, 3.355, 4.501, 5.041] },
StudentsTInverseCDFTable { df: 9, t: [1.383, 1.833, 2.262, 2.821, 3.250, 4.297, 4.781] },
StudentsTInverseCDFTable { df: 10, t: [1.372, 1.812, 2.228, 2.764, 3.169, 4.144, 4.587] },
StudentsTInverseCDFTable { df: 11, t: [1.363, 1.796, 2.201, 2.718, 3.106, 4.025, 4.437] },
StudentsTInverseCDFTable { df: 12, t: [1.356, 1.782, 2.179, 2.681, 3.055, 3.930, 4.318] },
StudentsTInverseCDFTable { df: 13, t: [1.350, 1.771, 2.160, 2.650, 3.012, 3.852, 4.221] },
StudentsTInverseCDFTable { df: 14, t: [1.345, 1.761, 2.145, 2.624, 2.977, 3.787, 4.140] },
StudentsTInverseCDFTable { df: 15, t: [1.341, 1.753, 2.131, 2.602, 2.947, 3.733, 4.073] },
StudentsTInverseCDFTable { df: 16, t: [1.337, 1.746, 2.120, 2.583, 2.921, 3.686, 4.015] },
StudentsTInverseCDFTable { df: 17, t: [1.333, 1.740, 2.110, 2.567, 2.898, 3.646, 3.965] },
StudentsTInverseCDFTable { df: 18, t: [1.330, 1.734, 2.101, 2.552, 2.878, 3.610, 3.922] },
StudentsTInverseCDFTable { df: 19, t: [1.328, 1.729, 2.093, 2.539, 2.861, 3.579, 3.883] },
StudentsTInverseCDFTable { df: 20, t: [1.325, 1.725, 2.086, 2.528, 2.845, 3.552, 3.850] },
StudentsTInverseCDFTable { df: 21, t: [1.323, 1.721, 2.080, 2.518, 2.831, 3.527, 3.819] },
StudentsTInverseCDFTable { df: 22, t: [1.321, 1.717, 2.074, 2.508, 2.819, 3.505, 3.792] },
StudentsTInverseCDFTable { df: 23, t: [1.319, 1.714, 2.069, 2.500, 2.807, 3.485, 3.768] },
StudentsTInverseCDFTable { df: 24, t: [1.318, 1.711, 2.064, 2.492, 2.797, 3.467, 3.745] },
StudentsTInverseCDFTable { df: 25, t: [1.316, 1.708, 2.060, 2.485, 2.787, 3.450, 3.725] },
StudentsTInverseCDFTable { df: 26, t: [1.315, 1.706, 2.056, 2.479, 2.779, 3.435, 3.707] },
StudentsTInverseCDFTable { df: 27, t: [1.314, 1.703, 2.052, 2.473, 2.771, 3.421, 3.689] },
StudentsTInverseCDFTable { df: 28, t: [1.313, 1.701, 2.048, 2.467, 2.763, 3.408, 3.674] },
StudentsTInverseCDFTable { df: 29, t: [1.311, 1.699, 2.045, 2.462, 2.756, 3.396, 3.660] },
StudentsTInverseCDFTable { df: 30, t: [1.310, 1.697, 2.042, 2.457, 2.750, 3.385, 3.646] },
StudentsTInverseCDFTable { df: 60, t: [1.296, 1.671, 2.000, 2.390, 2.660, 3.232, 3.460] },
StudentsTInverseCDFTable { df: 120, t: [1.289, 1.658, 1.980, 2.358, 2.617, 3.160, 3.373] },
StudentsTInverseCDFTable { df: !0u32, t: [1.282, 1.645, 1.960, 2.326, 2.576, 3.091, 3.291] },
];
for entry in STUDENTS_T {
if df <= entry.df {
if cdf <= 0.90 {
return entry.t[0];
} else if cdf <= 0.95 {
return entry.t[1];
} else if cdf <= 0.975 {
return entry.t[2];
} else if cdf <= 0.99 {
return entry.t[3];
} else if cdf <= 0.995 {
return entry.t[4];
} else if cdf <= 0.999 {
return entry.t[5];
} else {
return entry.t[6];
}
}
}
unreachable!("didn't find df")
}
struct ResultStats<'a> {
change: f64,
means: [f64; 2],
results: &'a TraceResults,
error: f64,
}
impl ResultStats<'_> {
fn new(results: &TraceResults, alpha: f64) -> ResultStats {
let means = [results.a.clone().mean(), results.b.clone().mean()];
let n1 = results.a.len() as f64;
let n2 = results.b.len() as f64;
let s1 = results.a.clone().std_dev();
let s2 = results.b.clone().std_dev();
let df = n1 + n2 - 2.0;
let pooled_sd = (((n1 - 1.0) * s1 * s1 + (n2 - 1.0) * s2 * s2) / df).sqrt();
let standard_error = pooled_sd * (1.0 / n1 + 1.0 / n2).sqrt();
let t = students_t_inverse_cdf(1.0 - alpha / 2.0, df as u32) as f64;
let e = t * standard_error;
ResultStats {
results,
means,
change: (means[1] - means[0]) / means[0],
error: e / means[0],
}
}
fn has_time(&self) -> bool {
self.means[0] != 0.0 && self.means[1] != 0.0
}
}
fn print_stats(results: &[TraceResults]) {
let mut stats = Vec::new();
let alpha = 0.05 / results.len() as f64;
let mut pathlen = 0;
for result in results {
if !result.a.is_empty() && !result.b.is_empty() {
stats.push(ResultStats::new(&result, alpha));
if result.path.len() > pathlen {
pathlen = result.path.len();
}
}
}
stats.sort_by(|a, b| {
if !a.has_time() {
if !b.has_time() {
std::cmp::Ordering::Equal
} else {
std::cmp::Ordering::Greater
}
} else if !b.has_time() {
std::cmp::Ordering::Less
} else {
a.change.partial_cmp(&b.change).unwrap()
}
});
for stat in stats {
let prefix = format!("{path:pathlen$}: ",
path = stat.results.path,
pathlen = pathlen);
let change = if stat.has_time() {
format!("{:>7.2}%", stat.change * 100.0)
} else {
"no time detected".to_string()
};
let error = if stat.results.a.len() > 1 && stat.has_time() {
format!(" (+/- {:5.1}%)", error = stat.error * 100.0)
} else {
"".to_string()
};
let count = if stat.results.a.len() == stat.results.b.len() {
format!("{}", stat.results.a.len())
} else {
format!("{}/{}", stat.results.a.len(), stat.results.b.len())
};
println!("{prefix}{change}{error} (n={count})",
prefix = prefix,
change = change,
error = error,
count = count);
}
}
fn run(run_opts: &Run, traces: Vec<String>) {
let mut results : Vec<TraceResults> =
traces
.into_iter()
.map(|path| TraceResults {
a: Vec::new(),
b: Vec::new(),
path
})
.collect();
let mut a_first = true;
loop {
run_each_trace(run_opts, &mut results, a_first);
print_stats(&results);
a_first = !a_first;
}
}
fn test_run_environment(environment: &str) -> bool {
let output = Command::new(environment)
.arg("true")
.output();
match output {
Ok(_) => true,
Err(err) => {
println!("Failed to spawn test invocation of '{} true': {:?}", environment, err);
println!("TIP: Exec format error here probably means you need #!/bin/sh in your shell script.");
false
}
}
}
fn main() {
let opts = Opts::from_args();
stderrlog::new()
.module(module_path!())
.verbosity(opts.verbose as usize)
.init()
.unwrap();
match &opts.subcmd {
SubCommand::Run(run_opts) => {
let traces = collect_traces(run_opts.traces.clone());
if traces.is_empty() {
println!("No traces found in the given directories:");
for t in &run_opts.traces {
println!(" {}", t);
}
std::process::exit(1);
}
if !test_run_environment(&run_opts.a) || !test_run_environment(&run_opts.b) {
std::process::exit(1);
}
run(&run_opts, traces);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use assert_approx_eq::assert_approx_eq;
#[test]
fn test_apitrace_parsing() {
let apitrace_input = "
# call no gpu_start gpu_dura cpu_start cpu_dura vsize_start vsize_dura rss_start rss_dura pixels program name
call 44 0 0 0 0 0 0 0 0 0 0 glViewport
call 56 25082334 50166 0 0 0 0 0 0 0 0 glClear
call 81 41719667 0 0 0 0 0 0 0 0 0 glClear
call 176 42206667 472583 0 0 0 0 0 0 0 7 glDrawArrays
frame_end
call 222 0 0 0 0 0 0 0 0 0 4 glClearColor
call 224 45001334 21666 0 0 0 0 0 0 0 4 glClear
call 231 45023750 38000 0 0 0 0 0 0 0 7 glClear
call 239 45062584 519333 0 0 0 0 0 0 0 7 glDrawArrays
frame_end
call 222 0 0 0 0 0 0 0 0 0 4 glClearColor
call 224 47438000 13666 0 0 0 0 0 0 0 4 glClear
call 231 47452417 59583 0 0 0 0 0 0 0 7 glClear
call 239 47512917 579083 0 0 0 0 0 0 0 7 glDrawArrays
frame_end
Rendered 3 frames in 0.0539452 secs, average of 55.612 fps
";
assert_eq!(parse_apitrace_pgpu_output(apitrace_input).unwrap(), 13_666 + 59_583 + 579_083)
}
#[test]
fn test_apitrace_parsing_negatve_start() {
let apitrace_input = "call 318 -8883437858 156 0 0 0 0 0 0 0 0 glBlitFramebuffer";
assert_eq!(parse_apitrace_pgpu_output(apitrace_input).unwrap(), 156)
}
#[test]
fn test_renderdoc_parsing() {
let renderdoc_input = "
Counter 1 (GPU Duration):
Time taken for this event on the GPU, as measured by delta between two GPU timestamps.
Returns 8 byte CompType.Double, representing CounterUnit.Seconds
Counter 2000000 (N vertices submitted):
N vertices submitted
Returns 8 byte CompType.UInt, representing CounterUnit.Absolute
EID 52: 0.000045
EID 370: 0.000004
EID 407: 0.000006
";
assert_approx_eq!(parse_renderdoc_wrapper_output(renderdoc_input).unwrap(), 0.000_045 + 0.000_004 + 0.000_006, 0.000_001);
}
#[test]
fn test_renderdoc_nan_parsing() {
let renderdoc_input = "
EID 52: 0.000045
EID 370: nan
EID 407: 0.000006
";
assert_eq!(parse_renderdoc_wrapper_output(renderdoc_input), None);
}
#[test]
fn test_stats() {
assert_approx_eq!(students_t_inverse_cdf(0.95, 2), 2.919_985, 0.001);
assert_approx_eq!(students_t_inverse_cdf(0.995, 5), 4.032_142, 0.001);
let results = TraceResults {
a: vec!(1.0, 2.0, 3.0, 4.0, 5.0),
b: vec!(5.0, 6.0, 7.0, 8.0, 9.0),
path: "".to_string(),
};
let stats = ResultStats::new(&results, 0.05);
assert_approx_eq!(stats.change, 1.333_333, 0.001);
assert_approx_eq!(stats.error, 0.768_668, 0.001);
}
}