ptx_parser/unparser/instruction/
cp_reduce_async_bulk.rs

1//! Original PTX specification:
2//!
3//! cp.reduce.async.bulk.dst.src.completion_mechanism.redOp.type [dstMem], [srcMem], size, [mbar];
4//! .dst =                  { .shared::cluster };
5//! .src =                  { .shared::cta };
6//! .completion_mechanism = { .mbarrier::complete_tx::bytes };
7//! .redOp=                 { .and, .or, .xor, .add, .inc, .dec, .min, .max };
8//! .type =                 { .b32, .u32, .s32, .b64, .u64 };
9//! ----------------------------------------------------------------
10//! cp.reduce.async.bulk.dst.src.completion_mechanism{.level::cache_hint}.redOp.type [dstMem], [srcMem], size{, cache-policy};
11//! .dst =                  { .global      };
12//! .src =                  { .shared::cta };
13//! ----------------------------------------------------------------
14//! .completion_mechanism = { .bulk_group };
15//! .level::cache_hint    = { .L2::cache_hint };
16//! .redOp=                 { .and, .or, .xor, .add, .inc, .dec, .min, .max };
17//! .type =                 { .f16, .bf16, .b32, .u32, .s32, .b64, .u64, .s64, .f32, .f64 };
18//! ----------------------------------------------------------------
19//! cp.reduce.async.bulk.dst.src.completion_mechanism{.level::cache_hint}.add.noftz.type [dstMem], [srcMem], size{, cache-policy};
20//! .dst  =                 { .global };
21//! .src  =                 { .shared::cta };
22//! .completion_mechanism = { .bulk_group };
23//! .type =                 { .f16, .bf16 };
24
25#![allow(unused)]
26
27use crate::lexer::PtxToken;
28use crate::unparser::{PtxUnparser, common::*};
29
30pub mod section_0 {
31    use super::*;
32    use crate::r#type::instruction::cp_reduce_async_bulk::section_0::*;
33
34    impl PtxUnparser for CpReduceAsyncBulkDstSrcCompletionMechanismRedopType {
35        fn unparse_tokens(&self, tokens: &mut ::std::vec::Vec<PtxToken>) {
36            push_opcode(tokens, "cp");
37            push_directive(tokens, "reduce");
38            push_directive(tokens, "async");
39            push_directive(tokens, "bulk");
40            match &self.dst {
41                Dst::SharedCluster => {
42                    push_directive(tokens, "shared::cluster");
43                }
44            }
45            match &self.src {
46                Src::SharedCta => {
47                    push_directive(tokens, "shared::cta");
48                }
49            }
50            match &self.completion_mechanism {
51                CompletionMechanism::MbarrierCompleteTxBytes => {
52                    push_directive(tokens, "mbarrier::complete_tx::bytes");
53                }
54            }
55            match &self.redop {
56                Redop::And => {
57                    push_directive(tokens, "and");
58                }
59                Redop::Xor => {
60                    push_directive(tokens, "xor");
61                }
62                Redop::Add => {
63                    push_directive(tokens, "add");
64                }
65                Redop::Inc => {
66                    push_directive(tokens, "inc");
67                }
68                Redop::Dec => {
69                    push_directive(tokens, "dec");
70                }
71                Redop::Min => {
72                    push_directive(tokens, "min");
73                }
74                Redop::Max => {
75                    push_directive(tokens, "max");
76                }
77                Redop::Or => {
78                    push_directive(tokens, "or");
79                }
80            }
81            match &self.type_ {
82                Type::B32 => {
83                    push_directive(tokens, "b32");
84                }
85                Type::U32 => {
86                    push_directive(tokens, "u32");
87                }
88                Type::S32 => {
89                    push_directive(tokens, "s32");
90                }
91                Type::B64 => {
92                    push_directive(tokens, "b64");
93                }
94                Type::U64 => {
95                    push_directive(tokens, "u64");
96                }
97            }
98            self.dstmem.unparse_tokens(tokens);
99            tokens.push(PtxToken::Comma);
100            self.srcmem.unparse_tokens(tokens);
101            tokens.push(PtxToken::Comma);
102            self.size.unparse_tokens(tokens);
103            tokens.push(PtxToken::Comma);
104            self.mbar.unparse_tokens(tokens);
105            tokens.push(PtxToken::Semicolon);
106        }
107    }
108}
109
110pub mod section_1 {
111    use super::*;
112    use crate::r#type::instruction::cp_reduce_async_bulk::section_1::*;
113
114    impl PtxUnparser for CpReduceAsyncBulkDstSrcCompletionMechanismLevelCacheHintRedopType {
115        fn unparse_tokens(&self, tokens: &mut ::std::vec::Vec<PtxToken>) {
116            push_opcode(tokens, "cp");
117            push_directive(tokens, "reduce");
118            push_directive(tokens, "async");
119            push_directive(tokens, "bulk");
120            match &self.dst {
121                Dst::Global => {
122                    push_directive(tokens, "global");
123                }
124            }
125            match &self.src {
126                Src::SharedCta => {
127                    push_directive(tokens, "shared::cta");
128                }
129            }
130            match &self.completion_mechanism {
131                CompletionMechanism::MbarrierCompleteTxBytes => {
132                    push_directive(tokens, "mbarrier::complete_tx::bytes");
133                }
134            }
135            if self.level_cache_hint {
136                push_directive(tokens, "level::cache_hint");
137            }
138            match &self.redop {
139                Redop::And => {
140                    push_directive(tokens, "and");
141                }
142                Redop::Xor => {
143                    push_directive(tokens, "xor");
144                }
145                Redop::Add => {
146                    push_directive(tokens, "add");
147                }
148                Redop::Inc => {
149                    push_directive(tokens, "inc");
150                }
151                Redop::Dec => {
152                    push_directive(tokens, "dec");
153                }
154                Redop::Min => {
155                    push_directive(tokens, "min");
156                }
157                Redop::Max => {
158                    push_directive(tokens, "max");
159                }
160                Redop::Or => {
161                    push_directive(tokens, "or");
162                }
163            }
164            match &self.type_ {
165                Type::B32 => {
166                    push_directive(tokens, "b32");
167                }
168                Type::U32 => {
169                    push_directive(tokens, "u32");
170                }
171                Type::S32 => {
172                    push_directive(tokens, "s32");
173                }
174                Type::B64 => {
175                    push_directive(tokens, "b64");
176                }
177                Type::U64 => {
178                    push_directive(tokens, "u64");
179                }
180            }
181            self.dstmem.unparse_tokens(tokens);
182            tokens.push(PtxToken::Comma);
183            self.srcmem.unparse_tokens(tokens);
184            tokens.push(PtxToken::Comma);
185            self.size.unparse_tokens(tokens);
186            if self.cache_policy.is_some() {
187                tokens.push(PtxToken::Comma);
188            }
189            if let Some(opt_0) = self.cache_policy.as_ref() {
190                opt_0.unparse_tokens(tokens);
191            }
192            tokens.push(PtxToken::Semicolon);
193        }
194    }
195}
196
197pub mod section_2 {
198    use super::*;
199    use crate::r#type::instruction::cp_reduce_async_bulk::section_2::*;
200
201    impl PtxUnparser for CpReduceAsyncBulkDstSrcCompletionMechanismLevelCacheHintAddNoftzType {
202        fn unparse_tokens(&self, tokens: &mut ::std::vec::Vec<PtxToken>) {
203            push_opcode(tokens, "cp");
204            push_directive(tokens, "reduce");
205            push_directive(tokens, "async");
206            push_directive(tokens, "bulk");
207            match &self.dst {
208                Dst::Global => {
209                    push_directive(tokens, "global");
210                }
211            }
212            match &self.src {
213                Src::SharedCta => {
214                    push_directive(tokens, "shared::cta");
215                }
216            }
217            match &self.completion_mechanism {
218                CompletionMechanism::BulkGroup => {
219                    push_directive(tokens, "bulk_group");
220                }
221            }
222            if let Some(level_cache_hint_1) = self.level_cache_hint.as_ref() {
223                match level_cache_hint_1 {
224                    LevelCacheHint::L2CacheHint => {
225                        push_directive(tokens, "L2::cache_hint");
226                    }
227                }
228            }
229            push_directive(tokens, "add");
230            push_directive(tokens, "noftz");
231            match &self.type_ {
232                Type::Bf16 => {
233                    push_directive(tokens, "bf16");
234                }
235                Type::F16 => {
236                    push_directive(tokens, "f16");
237                }
238            }
239            self.dstmem.unparse_tokens(tokens);
240            tokens.push(PtxToken::Comma);
241            self.srcmem.unparse_tokens(tokens);
242            tokens.push(PtxToken::Comma);
243            self.size.unparse_tokens(tokens);
244            if self.cache_policy.is_some() {
245                tokens.push(PtxToken::Comma);
246            }
247            if let Some(opt_2) = self.cache_policy.as_ref() {
248                opt_2.unparse_tokens(tokens);
249            }
250            tokens.push(PtxToken::Semicolon);
251        }
252    }
253}