1use constensor_core::{BestDevice, CompiledGraph, DType, Graph, GraphTensor, R3};
2use std::time::Instant;
3
4fn bench<T: DType, const B: usize, const M: usize, const K: usize, const N: usize>(
5 type_name: &str,
6 alpha: T,
7 beta: T,
8) {
9 let iterations = 1;
11 let mut total = std::time::Duration::new(0, 0);
12
13 let mut graph = Graph::empty();
14 let a = GraphTensor::<R3<B, M, K>, T, BestDevice<0>>::fill(&mut graph, T::from_f64(1.));
15 let b = GraphTensor::<R3<B, N, K>, T, BestDevice<0>>::fill(&mut graph, T::from_f64(2.)).t();
17 let o = GraphTensor::<R3<B, M, N>, T, BestDevice<0>>::fill(&mut graph, T::from_f64(3.));
19 let _c = a.matmul_axpby(b, o, alpha, beta);
20
21 graph.optimize();
22 let compiled: CompiledGraph<R3<B, M, N>, T, BestDevice<0>> = graph.compile().unwrap();
23
24 for _ in 0..iterations {
25 let start = Instant::now();
26
27 let tensor = std::hint::black_box(compiled.run().unwrap());
28 dbg!(tensor.data().unwrap());
29
30 total += start.elapsed();
31 }
32
33 let avg = total / (iterations as u32);
34 println!("Average execution time for {type_name} over {iterations} iterations: {avg:?}");
35}
36
37fn main() {
38 const B: usize = 1;
39 const M: usize = 2;
40 const N: usize = 2;
41 const K: usize = 2;
42
43 bench::<f32, B, M, K, N>("f32", 1.0, 1.0);
44 }