gpu-trace-perf 1.8.2

use anyhow::{Result, bail};
use log::{error, info};
use regex::Regex;
use std::{
    ffi::OsStr,
    io::prelude::*,
    path::{Path, PathBuf},
    time::Duration,
};

use crate::{ReplayOutput, TraceTool, replay_command, snapshot::SnapshotResult};

pub struct RenderdocPyTrace {
    file: PathBuf,
    name: String,
}

impl RenderdocPyTrace {
    pub fn new(root: &Path, file: &Path) -> RenderdocPyTrace {
        RenderdocPyTrace {
            file: file.to_owned(),
            name: crate::relative_test_name(root, file),
        }
    }
}

static RENDERDOC_WRAPPER_SCRIPT: &[u8] = include_bytes!("gpu-trace-perf-renderdoc-wrapper.py");
static RENDERDOC_SNAPSHOT_SCRIPT: &[u8] = include_bytes!("gpu-trace-perf-renderdoc-snapshot.py");

/// If we're not using u_trace, then we use a python script that wraps the renderdoc events (draws,
/// blits, etc) in timestamps and takes the diff.  This won't work well for tilers.
impl TraceTool for RenderdocPyTrace {
    fn replay(&self, wrapper: Option<&str>, envs: &[(String, String)]) -> Result<ReplayOutput> {
        let renderdoc_command: &[_] = &[
            OsStr::new("python3"),
            OsStr::new("-"), // script from stdin
            self.file.as_os_str(),
        ];

        let mut command = replay_command(renderdoc_command, wrapper, envs);
        command.stdin(std::process::Stdio::piped());
        command.stdout(std::process::Stdio::piped());
        command.stderr(std::process::Stdio::piped());

        let mut child = command.spawn().expect("failed to start python3");
        let stdin = child.stdin.as_mut().expect("Failed to open stdin");
        stdin
            .write_all(RENDERDOC_WRAPPER_SCRIPT)
            .expect("failed to write to python's stdin");
        let output = child.wait_with_output().expect("failed to read stdout");

        if !output.status.success() {
            let stderr = std::str::from_utf8(&output.stderr).unwrap();
            if stderr.contains("API is unsupported") {
                bail!("renderdoc reported API (likely window system) unsupported, skipping trace");
            }

            error!("Failed to start renderdoc:");
            error!("{}", stderr);
            error!("command: {:?}", command);

            if stderr.contains("FileNotFound") {
                info!(
                    "TIP: Failure to find a file with a space in its name probably means your wrapper script didn't quote the arguments"
                )
            }
            bail!("Failed to start renderdoc");
        }
        Ok(ReplayOutput::from(output))
    }

    fn fps(&self, output: &ReplayOutput) -> Result<f64> {
        parse_renderdoc_wrapper_output(&output.stdout).map(|x| x as f64)
    }

    fn name(&self) -> &str {
        &self.name
    }

    fn can_snapshot(&self) -> bool {
        true
    }

    fn snapshot(&self, output_dir: &str, loops: u32, timeout: Duration) -> Result<SnapshotResult> {
        let output_dir_path = self.output_dir(output_dir)?.unwrap();
        let loops_str = loops.to_string();

        let renderdoc_command: &[_] = &[
            OsStr::new("python3"),
            OsStr::new("-"), // script from stdin
            self.file.as_os_str(),
            output_dir_path.as_os_str(),
            OsStr::new("--loops"),
            OsStr::new(&loops_str),
        ];

        let command = replay_command(renderdoc_command, None, &[]);

        let start_time = std::time::Instant::now();

        let output =
            self.run_replay_command_with_timeout(command, Some(RENDERDOC_SNAPSHOT_SCRIPT), timeout);

        if output.exit_code != 0 {
            if output.stderr.contains("API is unsupported") {
                bail!("renderdoc reported API (likely window system) unsupported, skipping trace");
            }
            // On other failures (including timeout), fall through with empty
            // files so the caller records the failure with the log intact.
            error!("Failed to run renderdoc snapshot: {}", output.stderr);
        }

        // Look for the output snapshots where we expect them to be.  If we just
        // try to parse it out from gpu-trace-perf-renderdoc-snapshot.py output,
        // the stdout getting interleaved with renderdoc's ends up causing
        // frames to be lost.
        let mut files = Vec::new();
        for i in 1..=loops {
            let relative = PathBuf::from(format!("snapshot{i:04}.png"));
            if output_dir_path.join(&relative).exists() {
                files.push(relative);
            }
        }

        Ok(SnapshotResult {
            files,
            output,
            runtime: start_time.elapsed(),
        })
    }
}

/// If we're measuring times with u_trace, we don't need timestamps, but we do
/// need to loop the frame because there's a bunch of setup at the start that
/// we're not trying to measure.  renderdoccmd is just the tool for that.
pub struct RenderdocUtraceTrace {
    file: PathBuf,
    name: String,
    is_directx: bool,
}

fn guess_filename_directx(file: &Path) -> bool {
    let file_str = file.to_string_lossy();
    [
        "dx8", "dx9", "dx10", "dx11", "dx12", "d3d8", "d3d9", "d3d10", "d3d11", "d3d12", "dxgi",
    ]
    .iter()
    .any(|x| file_str.contains(x))
}

impl RenderdocUtraceTrace {
    pub fn new(root: &Path, file: &Path) -> RenderdocUtraceTrace {
        RenderdocUtraceTrace {
            file: file.to_owned(),
            name: crate::relative_test_name(root, file),
            is_directx: guess_filename_directx(file),
        }
    }
}

/// If we're not using u_trace, then we use a python script that wraps the renderdoc events (draws,
/// blits, etc) in timestamps and takes the diff.  This won't work well for tilers.
impl TraceTool for RenderdocUtraceTrace {
    fn replay(&self, wrapper: Option<&str>, envs: &[(String, String)]) -> Result<ReplayOutput> {
        // Play 3 frames, so we can take one of the middle one as the most
        // representative -- frame 0 has a bunch of setup, but the final frame may
        // not have any draws in it for some reason.
        let args: &[_] = if self.is_directx {
            &[
                OsStr::new("wine"),
                OsStr::new("renderdoccmd.exe"),
                OsStr::new("replay"),
                self.file.as_os_str(),
                OsStr::new("-l"),
                OsStr::new("3"),
            ]
        } else {
            &[
                OsStr::new("renderdoccmd"),
                OsStr::new("replay"),
                self.file.as_os_str(),
                OsStr::new("-l"),
                OsStr::new("3"),
            ]
        };

        let command = replay_command(args, wrapper, envs);
        let output: ReplayOutput = self.run_replay_command(command);

        if output.exit_code != 0 {
            if output.stderr.contains("API is unsupported") {
                bail!("renderdoc reported API (likely window system) unsupported, skipping trace");
            }

            if output.stderr.contains("FileNotFound") {
                println!(
                    "TIP: Failure to find a file with a space in its name probably means your wrapper script didn't quote the arguments"
                )
            }
        }

        Ok(output)
    }

    fn fps(&self, _output: &ReplayOutput) -> Result<f64> {
        unreachable!("shouldn't be called");
    }

    fn name(&self) -> &str {
        &self.name
    }
}

// Returns the FPS for the frame from gpu-trace-perf-renderdoc-wrapper.py output
fn parse_renderdoc_wrapper_output(output: &str) -> Result<f32> {
    lazy_static! {
        static ref CALL_RE: Regex = Regex::new("EID [0-9]*: (.*)").unwrap();
    }

    let mut total = 0.0;
    for line in output.lines() {
        if let Some(cap) = CALL_RE.captures(line) {
            match cap[1].parse::<f32>() {
                Ok(time) => total += time,
                _ => {
                    bail!("Failed to parse renderdoc time event '{line}'");
                }
            }
        }
    }

    if total == 0.0 || total.is_nan() {
        bail!("Bad total time {total}");
    }

    Ok(1.0 / total)
}

#[cfg(test)]
mod tests {
    use super::*;
    use assert_approx_eq::assert_approx_eq;

    #[test]
    fn test_renderdoc_parsing() {
        // Actual renderdoc output, trimmed down for the testcase.
        let renderdoc_input = "
Counter 1 (GPU Duration):
    Time taken for this event on the GPU, as measured by delta between two GPU timestamps.
    Returns 8 byte CompType.Double, representing CounterUnit.Seconds
Counter 2000000 (N vertices submitted):
    N vertices submitted
    Returns 8 byte CompType.UInt, representing CounterUnit.Absolute
EID 52: 0.000045
EID 370: 0.000004
EID 407: 0.000006
";

        assert_approx_eq!(
            parse_renderdoc_wrapper_output(renderdoc_input).unwrap(),
            1.0 / (0.000_045 + 0.000_004 + 0.000_006),
            0.000_001
        );
    }

    #[test]
    fn test_renderdoc_nan_parsing() {
        // Actual renderdoc output, trimmed down for the testcase.
        let renderdoc_input = "
EID 52: 0.000045
EID 370: nan
EID 407: 0.000006
";

        assert!(parse_renderdoc_wrapper_output(renderdoc_input).is_err());
    }

    #[test]
    fn test_guess_filename_directx() {
        assert!(guess_filename_directx(Path::new(
            "d3d11-renderdoc/witcher3_medium_1.rdc"
        )));
        assert!(!guess_filename_directx(Path::new(
            "/home/anholt/src/traces-db/supertuxkart/supertuxkart-menu.rdc"
        )));
        assert!(!guess_filename_directx(Path::new(
            "/home/anholt/src/traces-db/godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc"
        )));
    }
}