gpu-trace-perf 1.4.0

Plays a collection of GPU traces under different environments to evaluate driver changes on performance
use std::process::Command;

use anyhow::{Context, Result, bail};
use regex::Regex;
use serde::Deserialize;

use crate::{Replay, ReplayOutput, replay_command};

pub struct ApitraceTrace {
    file: String,
}

impl ApitraceTrace {
    pub fn new(file: &str) -> ApitraceTrace {
        ApitraceTrace {
            file: file.to_owned(),
        }
    }
}

pub fn call_apitrace(command: Command) -> Result<ReplayOutput> {
    let mut command = command;

    let output = command
        .output()
        .with_context(|| format!("failed to run apitrace with command: {:?}", &command))?;

    if !output.status.success() {
        let stderr = std::str::from_utf8(&output.stderr).unwrap();
        if stderr.contains("waffle_context_create failed") {
            bail!(
                "apitrace reported waffle_context_create() failed, likely due to trace requiring too new of a GL version"
            );
        }

        println!("Failed to start apitrace:");
        let stdout = std::str::from_utf8(&output.stdout).unwrap();
        println!("{}", if stderr.len() > 2 { stderr } else { stdout });
        println!("command: {:?}", command);
        bail!("Failed to start apitrace");
    }

    Ok(ReplayOutput::from(output))
}

impl Replay for ApitraceTrace {
    fn replay(&self, wrapper: Option<&str>, envs: &[(String, String)]) -> Result<ReplayOutput> {
        // apitrace replay otherwise assumes x11 (glretrace) for glx traces instead of using waffle
        let apitrace_command = [
            "eglretrace",
            "--pgpu",
            "--headless",
            "--loop=1", // loop the last frame once so we know that caches are hot, shaders are compiled, etc.
            &self.file,
        ];

        call_apitrace(replay_command(&apitrace_command, wrapper, envs))
    }

    fn fps(&self, output: &ReplayOutput) -> Result<f64> {
        parse_apitrace_pgpu_output(&output.stdout)
    }

    fn name(&self) -> &str {
        &self.file
    }
}

// Returns the fps from the last frame of an apitrace replay --pgpu output
fn parse_apitrace_pgpu_output(output: &str) -> Result<f64> {
    lazy_static! {
        static ref CALL_RE: Regex = Regex::new("^call [0-9]+ -?[0-9]+ ([0-9]+)").unwrap();
    }

    let mut total = 0;
    let mut start_of_frame = true;
    for line in output.lines() {
        if line == "frame_end" {
            start_of_frame = true;
        } else {
            let cap = CALL_RE.captures(line);
            if let Some(cap) = cap {
                if start_of_frame {
                    total = 0;
                    start_of_frame = false;
                }
                match cap[1].parse::<i64>() {
                    Ok(gpu) => {
                        if gpu >= 0 {
                            total += gpu;
                        } else {
                            anyhow::bail!(
                                "apitrace produced GL_TIME_ELAPSED < 0, skipping(gpu hang?)"
                            );
                        }
                    }
                    Err(_) => {
                        anyhow::bail!("failed to parse apitrace's GL_TIME_ELAPSED");
                    }
                }
            }
        }
    }

    if total == 0 {
        anyhow::bail!("No times parsed");
    }

    Ok(1_000_000_000.0 / (total as f64))
}

/// If we're measuring times with u_trace, we don't need timestamps, but we do
/// need to loop the frame because there's a bunch of setup at the start that
/// we're not trying to measure.
pub struct ApitraceUtraceTrace {
    file: String,
    is_directx: bool,
}

impl ApitraceUtraceTrace {
    pub fn new(file: &str) -> ApitraceUtraceTrace {
        ApitraceUtraceTrace {
            file: file.to_string(),
            is_directx: apitrace_file_is_directx(file).unwrap_or_else(|e| {
                eprintln!("Failure calling apitrace info, assuming file is GL: {}", e);
                true
            }),
        }
    }
}

impl Replay for ApitraceUtraceTrace {
    fn replay(&self, wrapper: Option<&str>, envs: &[(String, String)]) -> Result<ReplayOutput> {
        // apitrace replay otherwise assumes x11 (glretrace) for glx traces
        // instead of using waffle
        //
        // loops the last frame twice so we know that caches are hot, shaders
        // are compiled, etc., and the middle frame utrace results will be one
        // of the hot ones.
        let apitrace_command = if self.is_directx {
            vec![
                "wine",
                "d3dretrace.exe",
                "--headless",
                "--loop=2",
                &self.file,
            ]
        } else {
            vec!["eglretrace", "--headless", "--loop=2", &self.file]
        };

        call_apitrace(replay_command(&apitrace_command, wrapper, envs))
    }

    fn fps(&self, _: &ReplayOutput) -> Result<f64> {
        unreachable!("shouldn't be called");
    }

    fn name(&self) -> &str {
        &self.file
    }
}

#[derive(Deserialize)]
struct ApitraceInfo {
    #[serde(rename = "API")]
    api: String,
}

pub fn apitrace_info_is_directx(input: &str) -> Result<bool> {
    let info = serde_json::from_str::<ApitraceInfo>(input)
        .with_context(|| format!("Parsing apitrace info output:\n{}", input))?;
    Ok(info.api == "DirectX")
}

pub fn apitrace_file_is_directx(file: &str) -> Result<bool> {
    let result = call_apitrace(replay_command(&["apitrace", "info", file], None, &[]))
        .context("Calling apitrace info")?;
    apitrace_info_is_directx(&result.stdout)
}

#[cfg(test)]
mod tests {
    use super::*;
    use assert_approx_eq::assert_approx_eq;

    #[test]
    fn test_apitrace_parsing() {
        // Actual apitrace output, trimmed down for the testcase.
        let apitrace_input = "
# call no gpu_start gpu_dura cpu_start cpu_dura vsize_start vsize_dura rss_start rss_dura pixels program name
call 44 0 0 0 0 0 0 0 0 0 0 glViewport
call 56 25082334 50166 0 0 0 0 0 0 0 0 glClear
call 81 41719667 0 0 0 0 0 0 0 0 0 glClear
call 176 42206667 472583 0 0 0 0 0 0 0 7 glDrawArrays
frame_end
call 222 0 0 0 0 0 0 0 0 0 4 glClearColor
call 224 45001334 21666 0 0 0 0 0 0 0 4 glClear
call 231 45023750 38000 0 0 0 0 0 0 0 7 glClear
call 239 45062584 519333 0 0 0 0 0 0 0 7 glDrawArrays
frame_end
call 222 0 0 0 0 0 0 0 0 0 4 glClearColor
call 224 47438000 13666 0 0 0 0 0 0 0 4 glClear
call 231 47452417 59583 0 0 0 0 0 0 0 7 glClear
call 239 47512917 579083 0 0 0 0 0 0 0 7 glDrawArrays
frame_end
Rendered 3 frames in 0.0539452 secs, average of 55.612 fps
";
        assert_approx_eq!(
            parse_apitrace_pgpu_output(apitrace_input).unwrap(),
            1.0 / ((13_666 + 59_583 + 579_083) as f64 / 1_000_000_000.0)
        )
    }

    #[test]
    fn test_apitrace_parsing_negatve_start() {
        let apitrace_input = "call 318 -8883437858 156 0 0 0 0 0 0 0 0 glBlitFramebuffer";
        assert_approx_eq!(
            parse_apitrace_pgpu_output(apitrace_input).unwrap(),
            1.0 / (156.0 / 1_000_000_000.0)
        );
    }

    #[test]
    fn test_apitrace_parsing_empty() {
        let apitrace_input = "
# call no gpu_start gpu_dura cpu_start cpu_dura vsize_start vsize_dura rss_start rss_dura pixels program name
call 44 0 0 0 0 0 0 0 0 0 0 glViewport
frame_end
";
        assert!(parse_apitrace_pgpu_output(apitrace_input).is_err());
    }

    #[test]
    fn test_apitrace_directx_info() -> Result<()> {
        assert!(apitrace_info_is_directx(
            r#"
{
  "FileName": "/home/anholt/src/traces-db/unigine/heaven-scene1-low-d3d11.trace-dxgi",
  "ContainerVersion": 6,
  "ContainerType": "Brotli",
  "API": "DirectX",
  "FramesCount": 104,
  "ActualDataSize": 130120914,
  "ContainerSize": 63387386
}
"#
        )?);
        assert!(!apitrace_info_is_directx(
            r#"
{
  "FileName": "/home/anholt/src/traces-db/neverball/neverball-v2.trace",
  "ContainerVersion": 6,
  "ContainerType": "Brotli",
  "API": "OpenGL + GLX/WGL/CGL",
  "FramesCount": 147,
  "ActualDataSize": 21503984,
  "ContainerSize": 1554696
}
"#
        )?);
        Ok(())
    }
}