cubecl_std/tensor/contiguous/
launch.rs1use crate::tensor::{TensorHandle, copy_gpu_ref, launch_copy_perpendicular_ref};
2use cubecl_core::{Runtime, client::ComputeClient, ir::StorageType, prelude::TensorBinding};
3
4pub fn into_contiguous<R: Runtime>(
6 client: &ComputeClient<R>,
7 input: TensorBinding<R>,
8 dtype: StorageType,
9) -> TensorHandle<R> {
10 let num_elems: usize = input.shape.iter().product();
11
12 let handle = client.empty(num_elems * dtype.size());
13 let output = TensorHandle::new_contiguous(input.shape.to_vec(), handle, dtype);
14
15 copy_into(client, input, output.clone().binding(), dtype);
16
17 output
18}
19
20pub fn into_contiguous_pitched<R: Runtime>(
23 client: &ComputeClient<R>,
24 input: TensorBinding<R>,
25 dtype: StorageType,
26) -> TensorHandle<R> {
27 if input.shape.len() <= 1 {
28 return into_contiguous(client, input, dtype);
29 }
30
31 let output = TensorHandle::empty(client, input.shape.clone(), dtype);
32
33 copy_into(client, input, output.clone().binding(), dtype);
34
35 output
36}
37
38pub fn copy_into<R: Runtime>(
40 client: &ComputeClient<R>,
41 input: TensorBinding<R>,
42 output: TensorBinding<R>,
43 dtype: StorageType,
44) {
45 let rank = input.strides.len();
46
47 let is_cpu = client.properties().hardware.num_cpu_cores.is_some();
50 if input.strides[rank - 1] != 1 && is_cpu {
51 launch_copy_perpendicular_ref(client, input, output, dtype);
52 } else {
53 copy_gpu_ref(client, input, output, dtype);
54 };
55}