06_ndarray_matrix_multiplication/
06_ndarray_matrix_multiplication.rs1use apple_metal::{resource_options, MetalBuffer, MetalDevice};
2use apple_mps::{data_type, NDArray, NDArrayDescriptor, NDArrayMatrixMultiplication};
3
4fn as_bytes<T>(values: &[T]) -> &[u8] {
5 unsafe {
6 core::slice::from_raw_parts(values.as_ptr().cast::<u8>(), core::mem::size_of_val(values))
7 }
8}
9
10fn buffer_with_f32_values_padded(
11 device: &MetalDevice,
12 values: &[f32],
13 byte_len: usize,
14) -> MetalBuffer {
15 let buffer = device
16 .new_buffer(
17 byte_len.max(core::mem::size_of_val(values)),
18 resource_options::STORAGE_MODE_SHARED,
19 )
20 .expect("buffer");
21 let _ = buffer.write_bytes(as_bytes(values));
22 buffer
23}
24
25fn read_f32_values(buffer: &MetalBuffer, len: usize) -> Vec<f32> {
26 let ptr = buffer.contents().expect("buffer contents").cast::<f32>();
27 unsafe { core::slice::from_raw_parts(ptr, len).to_vec() }
28}
29
30fn main() {
31 let device = MetalDevice::system_default().expect("no Metal device available");
32 let queue = device.new_command_queue().expect("command queue");
33
34 let descriptor = NDArrayDescriptor::with_dimension_sizes(data_type::FLOAT32, &[2, 2, 1, 1])
35 .expect("descriptor");
36 let template = NDArray::new(&device, &descriptor).expect("template ndarray");
37 let byte_len = template.resource_size();
38 let rows = descriptor.length_of_dimension(1);
39 let row_stride_floats = byte_len / core::mem::size_of::<f32>() / rows;
40 let left_buffer =
41 buffer_with_f32_values_padded(&device, &[1.0, 2.0, 0.0, 0.0, 3.0, 4.0, 0.0, 0.0], byte_len);
42 let right_buffer =
43 buffer_with_f32_values_padded(&device, &[5.0, 6.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0], byte_len);
44 let destination_buffer = buffer_with_f32_values_padded(&device, &[0.0; 8], byte_len);
45
46 let left = NDArray::new_with_buffer(&left_buffer, 0, &descriptor).expect("left ndarray");
47 let right = NDArray::new_with_buffer(&right_buffer, 0, &descriptor).expect("right ndarray");
48 let destination =
49 NDArray::new_with_buffer(&destination_buffer, 0, &descriptor).expect("destination ndarray");
50
51 let kernel = NDArrayMatrixMultiplication::new(&device, 2).expect("ndarray matmul");
52 kernel.set_alpha(1.0);
53 kernel.set_beta(0.0);
54
55 let command_buffer = queue.new_command_buffer().expect("command buffer");
56 kernel.encode_to_destination(&command_buffer, &[&left, &right], &destination);
57 command_buffer.commit();
58 command_buffer.wait_until_completed();
59
60 let padded_output = read_f32_values(&destination_buffer, row_stride_floats * rows);
61 let output = [
62 padded_output[0],
63 padded_output[1],
64 padded_output[row_stride_floats],
65 padded_output[row_stride_floats + 1],
66 ];
67 println!("{output:?}");
68}