Skip to main content

02_matrix_multiply/
02_matrix_multiply.rs

1use apple_metal::{resource_options, MetalBuffer, MetalDevice};
2use apple_mps::{data_type, Matrix, MatrixDescriptor, MatrixMultiplication};
3
4fn as_bytes(values: &[f32]) -> &[u8] {
5    // SAFETY: `f32` has no invalid bit patterns and `values` remains alive for the returned slice.
6    unsafe {
7        core::slice::from_raw_parts(values.as_ptr().cast::<u8>(), core::mem::size_of_val(values))
8    }
9}
10
11fn read_f32s(buffer: &MetalBuffer, len: usize) -> Vec<f32> {
12    let ptr = buffer
13        .contents()
14        .expect("buffer has CPU-visible storage")
15        .cast::<f32>();
16    // SAFETY: The buffer stays alive for the duration of the slice read and was allocated large enough.
17    unsafe { core::slice::from_raw_parts(ptr, len) }.to_vec()
18}
19
20fn main() {
21    let device = MetalDevice::system_default().expect("no Metal device available");
22    let queue = device
23        .new_command_queue()
24        .expect("failed to create command queue");
25
26    let left_desc = MatrixDescriptor::contiguous(2, 3, data_type::FLOAT32)
27        .expect("FLOAT32 matrix descriptor should be supported");
28    let right_desc = MatrixDescriptor::contiguous(3, 2, data_type::FLOAT32)
29        .expect("FLOAT32 matrix descriptor should be supported");
30    let result_desc = MatrixDescriptor::contiguous(2, 2, data_type::FLOAT32)
31        .expect("FLOAT32 matrix descriptor should be supported");
32
33    let left_buffer = device
34        .new_buffer(
35            left_desc.matrix_bytes,
36            resource_options::STORAGE_MODE_SHARED,
37        )
38        .expect("failed to allocate left buffer");
39    let right_buffer = device
40        .new_buffer(
41            right_desc.matrix_bytes,
42            resource_options::STORAGE_MODE_SHARED,
43        )
44        .expect("failed to allocate right buffer");
45    let result_buffer = device
46        .new_buffer(
47            result_desc.matrix_bytes,
48            resource_options::STORAGE_MODE_SHARED,
49        )
50        .expect("failed to allocate result buffer");
51
52    let left_values = [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0];
53    let right_values = [7.0_f32, 8.0, 9.0, 10.0, 11.0, 12.0];
54    let zeros = [0.0_f32; 4];
55
56    let _ = left_buffer.write_bytes(as_bytes(&left_values));
57    let _ = right_buffer.write_bytes(as_bytes(&right_values));
58    let _ = result_buffer.write_bytes(as_bytes(&zeros));
59
60    let left =
61        Matrix::new_with_buffer(&left_buffer, left_desc).expect("failed to wrap left matrix");
62    let right =
63        Matrix::new_with_buffer(&right_buffer, right_desc).expect("failed to wrap right matrix");
64    let result =
65        Matrix::new_with_buffer(&result_buffer, result_desc).expect("failed to wrap result matrix");
66
67    let gemm = MatrixMultiplication::new_simple(&device, 2, 2, 3)
68        .expect("failed to create matrix multiplication kernel");
69    let command_buffer = queue
70        .new_command_buffer()
71        .expect("failed to allocate command buffer");
72    gemm.encode(&command_buffer, &left, &right, &result);
73    command_buffer.commit();
74    command_buffer.wait_until_completed();
75
76    let output = read_f32s(&result_buffer, 4);
77    let expected = [58.0_f32, 64.0, 139.0, 154.0];
78    for (actual, expected_value) in output.iter().zip(expected) {
79        assert!(
80            (actual - expected_value).abs() < 1.0e-4,
81            "unexpected matrix multiply result: {output:?}"
82        );
83    }
84
85    println!("matmul smoke passed: {output:?}");
86}