cubecl_runtime::benchmark

Trait Benchmark

Source
pub trait Benchmark {
    type Args: Clone;

    // Required methods
    fn prepare(&self) -> Self::Args;
    fn execute(&self, args: Self::Args);
    fn name(&self) -> String;
    fn sync(&self);

    // Provided methods
    fn num_samples(&self) -> usize { ... }
    fn options(&self) -> Option<String> { ... }
    fn shapes(&self) -> Vec<Vec<usize>> { ... }
    fn sync_elapsed(&self) -> Result<Duration, TimestampsError> { ... }
    fn run(&self, timing_method: TimingMethod) -> BenchmarkDurations { ... }
    fn run_one_full(&self, args: Self::Args) -> Duration { ... }
    fn run_one_device_only(&self, args: Self::Args) -> Duration { ... }
}
Expand description

Benchmark trait.

Required Associated Types§

Source

type Args: Clone

Benchmark arguments.

Required Methods§

Source

fn prepare(&self) -> Self::Args

Prepare the benchmark, run anything that is essential for the benchmark, but shouldn’t count as included in the duration.

§Notes

This should not include warmup, the benchmark will be run at least one time without measuring the execution time.

Source

fn execute(&self, args: Self::Args)

Execute the benchmark and returns the time it took to complete.

Source

fn name(&self) -> String

Name of the benchmark, should be short and it should match the name defined in the crate Cargo.toml

Source

fn sync(&self)

Wait for computation to complete.

Provided Methods§

Source

fn num_samples(&self) -> usize

Number of samples per run required to have a statistical significance.

Source

fn options(&self) -> Option<String>

The options passed to the benchmark.

Source

fn shapes(&self) -> Vec<Vec<usize>>

Shapes dimensions

Source

fn sync_elapsed(&self) -> Result<Duration, TimestampsError>

Wait for computation to complete and return hardware reported computation duration.

Source

fn run(&self, timing_method: TimingMethod) -> BenchmarkDurations

Run the benchmark a number of times.

Source

fn run_one_full(&self, args: Self::Args) -> Duration

Collect one sample directly measuring the full execute + sync step.

Source

fn run_one_device_only(&self, args: Self::Args) -> Duration

Collect one sample using timing measurements reported by the device.

Implementors§