gpu-trace-perf 1.4.0

Plays a collection of GPU traces under different environments to evaluate driver changes on performance
use anyhow::{Result, bail};
use regex::Regex;
use std::io::prelude::*;

use crate::{Replay, ReplayOutput, replay_command};

pub struct RenderdocPyTrace {
    file: String,
}

impl RenderdocPyTrace {
    pub fn new(file: &str) -> RenderdocPyTrace {
        RenderdocPyTrace {
            file: file.to_string(),
        }
    }
}

static RENDERDOC_WRAPPER_SCRIPT: &[u8] = include_bytes!("gpu-trace-perf-renderdoc-wrapper.py");

/// If we're not using u_trace, then we use a python script that wraps the renderdoc events (draws,
/// blits, etc) in timestamps and takes the diff.  This won't work well for tilers.
impl Replay for RenderdocPyTrace {
    fn replay(&self, wrapper: Option<&str>, envs: &[(String, String)]) -> Result<ReplayOutput> {
        let renderdoc_command = [
            "python3", "-", // script from stdin
            &self.file,
        ];

        let mut command = replay_command(&renderdoc_command, wrapper, envs);
        command.stdin(std::process::Stdio::piped());
        command.stdout(std::process::Stdio::piped());
        command.stderr(std::process::Stdio::piped());

        let mut child = command.spawn().expect("failed to start python3");
        let stdin = child.stdin.as_mut().expect("Failed to open stdin");
        stdin
            .write_all(RENDERDOC_WRAPPER_SCRIPT)
            .expect("failed to write to python's stdin");
        let output = child.wait_with_output().expect("failed to read stdout");

        if !output.status.success() {
            let stderr = std::str::from_utf8(&output.stderr).unwrap();
            if stderr.contains("API is unsupported") {
                bail!("renderdoc reported API (likely window system) unsupported, skipping trace");
            }

            println!("Failed to start renderdoc:");
            println!("{}", stderr);
            println!("command: {:?}", command);

            if stderr.contains("FileNotFound") {
                println!(
                    "TIP: Failure to find a file with a space in its name probably means your wrapper script didn't quote the arguments"
                )
            }
            bail!("Failed to start renderdoc");
        }
        Ok(ReplayOutput::from(output))
    }

    fn fps(&self, output: &ReplayOutput) -> Result<f64> {
        parse_renderdoc_wrapper_output(&output.stdout).map(|x| x as f64)
    }

    fn name(&self) -> &str {
        &self.file
    }
}

/// If we're measuring times with u_trace, we don't need timestamps, but we do
/// need to loop the frame because there's a bunch of setup at the start that
/// we're not trying to measure.  renderdoccmd is just the tool for that.
pub struct RenderdocUtraceTrace {
    file: String,
    is_directx: bool,
}

fn guess_filename_directx(file: &str) -> bool {
    [
        "dx8", "dx9", "dx10", "dx11", "dx12", "d3d8", "d3d9", "d3d10", "d3d11", "d3d12", "dxgi",
    ]
    .iter()
    .any(|x| file.contains(x))
}

impl RenderdocUtraceTrace {
    pub fn new(file: &str) -> RenderdocUtraceTrace {
        RenderdocUtraceTrace {
            file: file.to_string(),
            is_directx: guess_filename_directx(file),
        }
    }
}

/// If we're not using u_trace, then we use a python script that wraps the renderdoc events (draws,
/// blits, etc) in timestamps and takes the diff.  This won't work well for tilers.
impl Replay for RenderdocUtraceTrace {
    fn replay(&self, wrapper: Option<&str>, envs: &[(String, String)]) -> Result<ReplayOutput> {
        // Play 3 frames, so we can take one of the middle one as the most
        // representative -- frame 0 has a bunch of setup, but the final frame may
        // not have any draws in it for some reason.
        let renderdoccmd_command = if self.is_directx {
            vec!["wine", "renderdoccmd.exe", "replay", &self.file, "-l", "3"]
        } else {
            vec!["renderdoccmd", "replay", &self.file, "-l", "3"]
        };

        let mut command = replay_command(&renderdoccmd_command, wrapper, envs);
        command.stdout(std::process::Stdio::piped());
        command.stderr(std::process::Stdio::piped());

        let child = command.spawn().expect("failed to start renderdoccmd");
        let output = child.wait_with_output().expect("failed to read stdout");

        if !output.status.success() {
            let stderr = std::str::from_utf8(&output.stderr).unwrap();
            if stderr.contains("API is unsupported") {
                bail!("renderdoc reported API (likely window system) unsupported, skipping trace");
            }

            println!("Failed to start renderdoccmd ({:?}):", output.status.code());
            println!("{}", stderr);
            println!("command: {:?}", command);

            if stderr.contains("FileNotFound") {
                println!(
                    "TIP: Failure to find a file with a space in its name probably means your wrapper script didn't quote the arguments"
                )
            }
            bail!("Failed to start renderdoccmd: {:?}", output.status.code());
        }

        Ok(ReplayOutput::from(output))
    }

    fn fps(&self, _output: &ReplayOutput) -> Result<f64> {
        unreachable!("shouldn't be called");
    }

    fn name(&self) -> &str {
        &self.file
    }
}

// Returns the FPS for the frame from gpu-trace-perf-renderdoc-wrapper.py output
fn parse_renderdoc_wrapper_output(output: &str) -> Result<f32> {
    lazy_static! {
        static ref CALL_RE: Regex = Regex::new("EID [0-9]*: (.*)").unwrap();
    }

    let mut total = 0.0;
    for line in output.lines() {
        if let Some(cap) = CALL_RE.captures(line) {
            match cap[1].parse::<f32>() {
                Ok(time) => total += time,
                _ => {
                    bail!("Failed to parse renderdoc time event '{}'", line);
                }
            }
        }
    }

    if total == 0.0 || total.is_nan() {
        bail!("Bad total time {}", total);
    }

    Ok(1.0 / total)
}

#[cfg(test)]
mod tests {
    use super::*;
    use assert_approx_eq::assert_approx_eq;

    #[test]
    fn test_renderdoc_parsing() {
        // Actual renderdoc output, trimmed down for the testcase.
        let renderdoc_input = "
Counter 1 (GPU Duration):
    Time taken for this event on the GPU, as measured by delta between two GPU timestamps.
    Returns 8 byte CompType.Double, representing CounterUnit.Seconds
Counter 2000000 (N vertices submitted):
    N vertices submitted
    Returns 8 byte CompType.UInt, representing CounterUnit.Absolute
EID 52: 0.000045
EID 370: 0.000004
EID 407: 0.000006
";

        assert_approx_eq!(
            parse_renderdoc_wrapper_output(renderdoc_input).unwrap(),
            1.0 / (0.000_045 + 0.000_004 + 0.000_006),
            0.000_001
        );
    }

    #[test]
    fn test_renderdoc_nan_parsing() {
        // Actual renderdoc output, trimmed down for the testcase.
        let renderdoc_input = "
EID 52: 0.000045
EID 370: nan
EID 407: 0.000006
";

        assert!(parse_renderdoc_wrapper_output(renderdoc_input).is_err());
    }

    #[test]
    fn test_guess_filename_directx() {
        assert!(guess_filename_directx(
            "d3d11-renderdoc/witcher3_medium_1.rdc"
        ));
        assert!(!guess_filename_directx(
            "/home/anholt/src/traces-db/supertuxkart/supertuxkart-menu.rdc"
        ));
        assert!(!guess_filename_directx(
            "/home/anholt/src/traces-db/godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc"
        ));
    }
}