ptx_parser/unparser/instruction/
cp_reduce_async_bulk.rs

1//! Original PTX specification:
2//!
3//! cp.reduce.async.bulk.dst.src.completion_mechanism.redOp.type [dstMem], [srcMem], size, [mbar];
4//! .dst =                  { .shared::cluster };
5//! .src =                  { .shared::cta };
6//! .completion_mechanism = { .mbarrier::complete_tx::bytes };
7//! .redOp=                 { .and, .or, .xor, .add, .inc, .dec, .min, .max };
8//! .type =                 { .b32, .u32, .s32, .b64, .u64 };
9//! ----------------------------------------------------------------
10//! cp.reduce.async.bulk.dst.src.completion_mechanism{.level::cache_hint}.redOp.type [dstMem], [srcMem], size{, cache-policy};
11//! .dst =                  { .global      };
12//! .src =                  { .shared::cta };
13//! ----------------------------------------------------------------
14//! .completion_mechanism = { .bulk_group };
15//! .level::cache_hint    = { .L2::cache_hint };
16//! .redOp=                 { .and, .or, .xor, .add, .inc, .dec, .min, .max };
17//! .type =                 { .f16, .bf16, .b32, .u32, .s32, .b64, .u64, .s64, .f32, .f64 };
18//! ----------------------------------------------------------------
19//! cp.reduce.async.bulk.dst.src.completion_mechanism{.level::cache_hint}.add.noftz.type [dstMem], [srcMem], size{, cache-policy};
20//! .dst  =                 { .global };
21//! .src  =                 { .shared::cta };
22//! .completion_mechanism = { .bulk_group };
23//! .type =                 { .f16, .bf16 };
24
25#![allow(unused)]
26
27use crate::lexer::PtxToken;
28use crate::unparser::{PtxUnparser, common::*};
29
30pub mod section_0 {
31    use super::*;
32    use crate::r#type::instruction::cp_reduce_async_bulk::section_0::*;
33
34    impl PtxUnparser for CpReduceAsyncBulkDstSrcCompletionMechanismRedopType {
35        fn unparse_tokens(&self, tokens: &mut ::std::vec::Vec<PtxToken>) {
36            push_opcode(tokens, "cp");
37                    push_directive(tokens, "reduce");
38                    push_directive(tokens, "async");
39                    push_directive(tokens, "bulk");
40                    match &self.dst {
41                            Dst::SharedCluster => {
42                                    push_directive(tokens, "shared::cluster");
43                            }
44                    }
45                    match &self.src {
46                            Src::SharedCta => {
47                                    push_directive(tokens, "shared::cta");
48                            }
49                    }
50                    match &self.completion_mechanism {
51                            CompletionMechanism::MbarrierCompleteTxBytes => {
52                                    push_directive(tokens, "mbarrier::complete_tx::bytes");
53                            }
54                    }
55                    match &self.redop {
56                            Redop::And => {
57                                    push_directive(tokens, "and");
58                            }
59                            Redop::Xor => {
60                                    push_directive(tokens, "xor");
61                            }
62                            Redop::Add => {
63                                    push_directive(tokens, "add");
64                            }
65                            Redop::Inc => {
66                                    push_directive(tokens, "inc");
67                            }
68                            Redop::Dec => {
69                                    push_directive(tokens, "dec");
70                            }
71                            Redop::Min => {
72                                    push_directive(tokens, "min");
73                            }
74                            Redop::Max => {
75                                    push_directive(tokens, "max");
76                            }
77                            Redop::Or => {
78                                    push_directive(tokens, "or");
79                            }
80                    }
81                    match &self.type_ {
82                            Type::B32 => {
83                                    push_directive(tokens, "b32");
84                            }
85                            Type::U32 => {
86                                    push_directive(tokens, "u32");
87                            }
88                            Type::S32 => {
89                                    push_directive(tokens, "s32");
90                            }
91                            Type::B64 => {
92                                    push_directive(tokens, "b64");
93                            }
94                            Type::U64 => {
95                                    push_directive(tokens, "u64");
96                            }
97                    }
98                    self.dstmem.unparse_tokens(tokens);
99            tokens.push(PtxToken::Comma);
100                    self.srcmem.unparse_tokens(tokens);
101            tokens.push(PtxToken::Comma);
102                    self.size.unparse_tokens(tokens);
103            tokens.push(PtxToken::Comma);
104                    self.mbar.unparse_tokens(tokens);
105            tokens.push(PtxToken::Semicolon);
106        }
107    }
108
109}
110
111pub mod section_1 {
112    use super::*;
113    use crate::r#type::instruction::cp_reduce_async_bulk::section_1::*;
114
115    impl PtxUnparser for CpReduceAsyncBulkDstSrcCompletionMechanismLevelCacheHintRedopType {
116        fn unparse_tokens(&self, tokens: &mut ::std::vec::Vec<PtxToken>) {
117            push_opcode(tokens, "cp");
118                    push_directive(tokens, "reduce");
119                    push_directive(tokens, "async");
120                    push_directive(tokens, "bulk");
121                    match &self.dst {
122                            Dst::Global => {
123                                    push_directive(tokens, "global");
124                            }
125                    }
126                    match &self.src {
127                            Src::SharedCta => {
128                                    push_directive(tokens, "shared::cta");
129                            }
130                    }
131                    match &self.completion_mechanism {
132                            CompletionMechanism::MbarrierCompleteTxBytes => {
133                                    push_directive(tokens, "mbarrier::complete_tx::bytes");
134                            }
135                    }
136                    if self.level_cache_hint {
137                            push_directive(tokens, "level::cache_hint");
138                    }
139                    match &self.redop {
140                            Redop::And => {
141                                    push_directive(tokens, "and");
142                            }
143                            Redop::Xor => {
144                                    push_directive(tokens, "xor");
145                            }
146                            Redop::Add => {
147                                    push_directive(tokens, "add");
148                            }
149                            Redop::Inc => {
150                                    push_directive(tokens, "inc");
151                            }
152                            Redop::Dec => {
153                                    push_directive(tokens, "dec");
154                            }
155                            Redop::Min => {
156                                    push_directive(tokens, "min");
157                            }
158                            Redop::Max => {
159                                    push_directive(tokens, "max");
160                            }
161                            Redop::Or => {
162                                    push_directive(tokens, "or");
163                            }
164                    }
165                    match &self.type_ {
166                            Type::B32 => {
167                                    push_directive(tokens, "b32");
168                            }
169                            Type::U32 => {
170                                    push_directive(tokens, "u32");
171                            }
172                            Type::S32 => {
173                                    push_directive(tokens, "s32");
174                            }
175                            Type::B64 => {
176                                    push_directive(tokens, "b64");
177                            }
178                            Type::U64 => {
179                                    push_directive(tokens, "u64");
180                            }
181                    }
182                    self.dstmem.unparse_tokens(tokens);
183            tokens.push(PtxToken::Comma);
184                    self.srcmem.unparse_tokens(tokens);
185            tokens.push(PtxToken::Comma);
186                    self.size.unparse_tokens(tokens);
187            if self.cache_policy.is_some() { tokens.push(PtxToken::Comma); }
188                    if let Some(opt_0) = self.cache_policy.as_ref() {
189                        opt_0.unparse_tokens(tokens);
190                    }
191            tokens.push(PtxToken::Semicolon);
192        }
193    }
194
195}
196
197pub mod section_2 {
198    use super::*;
199    use crate::r#type::instruction::cp_reduce_async_bulk::section_2::*;
200
201    impl PtxUnparser for CpReduceAsyncBulkDstSrcCompletionMechanismLevelCacheHintAddNoftzType {
202        fn unparse_tokens(&self, tokens: &mut ::std::vec::Vec<PtxToken>) {
203            push_opcode(tokens, "cp");
204                    push_directive(tokens, "reduce");
205                    push_directive(tokens, "async");
206                    push_directive(tokens, "bulk");
207                    match &self.dst {
208                            Dst::Global => {
209                                    push_directive(tokens, "global");
210                            }
211                    }
212                    match &self.src {
213                            Src::SharedCta => {
214                                    push_directive(tokens, "shared::cta");
215                            }
216                    }
217                    match &self.completion_mechanism {
218                            CompletionMechanism::BulkGroup => {
219                                    push_directive(tokens, "bulk_group");
220                            }
221                    }
222                    if let Some(level_cache_hint_1) = self.level_cache_hint.as_ref() {
223                            match level_cache_hint_1 {
224                                    LevelCacheHint::L2CacheHint => {
225                                            push_directive(tokens, "L2::cache_hint");
226                                    }
227                            }
228                    }
229                    push_directive(tokens, "add");
230                    push_directive(tokens, "noftz");
231                    match &self.type_ {
232                            Type::Bf16 => {
233                                    push_directive(tokens, "bf16");
234                            }
235                            Type::F16 => {
236                                    push_directive(tokens, "f16");
237                            }
238                    }
239                    self.dstmem.unparse_tokens(tokens);
240            tokens.push(PtxToken::Comma);
241                    self.srcmem.unparse_tokens(tokens);
242            tokens.push(PtxToken::Comma);
243                    self.size.unparse_tokens(tokens);
244            if self.cache_policy.is_some() { tokens.push(PtxToken::Comma); }
245                    if let Some(opt_2) = self.cache_policy.as_ref() {
246                        opt_2.unparse_tokens(tokens);
247                    }
248            tokens.push(PtxToken::Semicolon);
249        }
250    }
251
252}
253