cubecl_core/frontend/
synchronization.rs

1use crate::{
2    ir::{Scope, Synchronization},
3    unexpanded,
4};
5
6// Among all backends, the memory order guarantee of WebGPU is the weakest
7// So Cubecl's memory order cannot be stronger than that of WebGPU
8
9/// # Coordinates the following among all invocations in the current cube:
10///
11/// * Memory writes to variables in cube address space(shared memory) complete,
12///   e.g. writes that were initiated actually land in the cube address space memory.
13///
14/// * Then all the invocations in the cube wait for each other to arrive at the barrier, i.e. this step.
15///
16/// * Then all the invocations int the cube begin executing after the barrier, and all writes to cube address space made before the barrier are now visible to any invocation in this cube.
17pub fn sync_cube() {}
18
19pub mod sync_cube {
20    use super::*;
21
22    pub fn expand(scope: &mut Scope) {
23        scope.register(Synchronization::SyncCube)
24    }
25}
26
27/// Synchronizes units within their plane (e.g., warp or SIMD group).
28///
29/// Warning: not all targets support plane-level synchronization.
30pub fn sync_plane() {
31    unexpanded!()
32}
33
34pub mod sync_plane {
35    use super::*;
36
37    pub fn expand(scope: &mut Scope) {
38        scope.register(Synchronization::SyncPlane);
39    }
40}
41
42/// * Sync_storage is the same but change "cube address space(shared memory)" to "storage address space(input args)". But the set of invocations that are collaborating is still only the invocations in the same cube.
43///
44/// * There is no guarantee about using barriers alone to make the writes to storage buffer in one cube become visible to invocations in a different cube.
45pub fn sync_storage() {}
46
47pub mod sync_storage {
48    use super::*;
49
50    pub fn expand(scope: &mut Scope) {
51        scope.register(Synchronization::SyncStorage)
52    }
53}
54
55/// `sync_async_proxy_shared` is a synchronization fence for the experimental SM 9.0+ copy
56/// functions, applying bidirectionally between the async proxy (i.e. TMA) and shared memory.
57/// Should be used after intializing the barriers, and before the copy operation.
58/// PTX: `fence.proxy.async.shared::cta`
59/// Experimental and subject to change.
60pub fn sync_async_proxy_shared() {
61    unexpanded!()
62}
63
64pub mod sync_async_proxy_shared {
65    use super::*;
66
67    pub fn expand(scope: &mut Scope) {
68        scope.register(Synchronization::SyncAsyncProxyShared)
69    }
70}