1use cubecl_core::ir::{self as gpu, BarrierLevel, ConstantScalarValue, Id};
2use std::fmt::{Display, Formatter};
3
4use super::{COUNTER_TMP_VAR, Dialect, Elem, Fragment, FragmentIdent, Item};
5
6pub trait Component<D: Dialect>: Display + FmtLeft {
7 fn item(&self) -> Item<D>;
8 fn is_const(&self) -> bool;
9 fn index(&self, index: usize) -> IndexedVariable<D>;
10 fn elem(&self) -> Elem<D> {
11 *self.item().elem()
12 }
13}
14
15pub trait FmtLeft: Display {
16 fn fmt_left(&self) -> String;
17}
18
19#[derive(new)]
20pub struct OptimizedArgs<const N: usize, D: Dialect> {
21 pub args: [Variable<D>; N],
22 pub optimization_factor: Option<usize>,
23}
24
25#[derive(Debug, Clone, Copy, PartialEq)]
26pub enum Variable<D: Dialect> {
27 AbsolutePos,
28 AbsolutePosBaseName, AbsolutePosX,
30 AbsolutePosY,
31 AbsolutePosZ,
32 UnitPos,
33 UnitPosBaseName, UnitPosX,
35 UnitPosY,
36 UnitPosZ,
37 CubePos,
38 CubePosBaseName, CubePosX,
40 CubePosY,
41 CubePosZ,
42 CubeDim,
43 CubeDimBaseName, CubeDimX,
45 CubeDimY,
46 CubeDimZ,
47 CubeCount,
48 CubeCountBaseName, CubeCountX,
50 CubeCountY,
51 CubeCountZ,
52 PlaneDim,
53 PlaneDimChecked,
54 PlanePos,
55 UnitPosPlane,
56 ClusterRank,
57 ClusterIndexX,
58 ClusterIndexY,
59 ClusterIndexZ,
60 GlobalInputArray(Id, Item<D>),
61 GlobalOutputArray(Id, Item<D>),
62 GlobalScalar {
63 id: Id,
64 elem: Elem<D>,
65 in_struct: bool,
66 },
67 ConstantArray(Id, Item<D>, u32),
68 ConstantScalar(ConstantScalarValue, Elem<D>),
69 TensorMap(Id),
70 LocalMut {
71 id: Id,
72 item: Item<D>,
73 },
74 LocalConst {
75 id: Id,
76 item: Item<D>,
77 },
78 Named {
79 name: &'static str,
80 item: Item<D>,
81 },
82 Slice {
83 id: Id,
84 item: Item<D>,
85 },
86 SharedMemory(Id, Item<D>, u32),
87 LocalArray(Id, Item<D>, u32),
88 WmmaFragment {
89 id: Id,
90 frag: Fragment<D>,
91 },
92 Pipeline {
93 id: Id,
94 item: Item<D>,
95 },
96 Barrier {
97 id: Id,
98 item: Item<D>,
99 level: BarrierLevel,
100 },
101 Tmp {
102 id: Id,
103 item: Item<D>,
104 is_declared: bool,
105 is_ptr: bool,
106 is_const: bool,
107 },
108}
109
110impl<D: Dialect> Component<D> for Variable<D> {
111 fn index(&self, index: usize) -> IndexedVariable<D> {
112 self.index(index)
113 }
114
115 fn item(&self) -> Item<D> {
116 match self {
117 Variable::AbsolutePos => Item::scalar(Elem::U32, true),
118 Variable::AbsolutePosBaseName => Item {
119 elem: Elem::U32,
120 vectorization: 3,
121 native: true,
122 },
123 Variable::AbsolutePosX => Item::scalar(Elem::U32, true),
124 Variable::AbsolutePosY => Item::scalar(Elem::U32, true),
125 Variable::AbsolutePosZ => Item::scalar(Elem::U32, true),
126 Variable::CubeCount => Item::scalar(Elem::U32, true),
127 Variable::CubeCountBaseName => Item {
128 elem: Elem::U32,
129 vectorization: 3,
130 native: true,
131 },
132 Variable::CubeCountX => Item::scalar(Elem::U32, true),
133 Variable::CubeCountY => Item::scalar(Elem::U32, true),
134 Variable::CubeCountZ => Item::scalar(Elem::U32, true),
135 Variable::CubeDimBaseName => Item {
136 elem: Elem::U32,
137 vectorization: 3,
138 native: true,
139 },
140 Variable::CubeDim => Item::scalar(Elem::U32, true),
141 Variable::CubeDimX => Item::scalar(Elem::U32, true),
142 Variable::CubeDimY => Item::scalar(Elem::U32, true),
143 Variable::CubeDimZ => Item::scalar(Elem::U32, true),
144 Variable::CubePos => Item::scalar(Elem::U32, true),
145 Variable::CubePosBaseName => Item {
146 elem: Elem::U32,
147 vectorization: 3,
148 native: true,
149 },
150 Variable::CubePosX => Item::scalar(Elem::U32, true),
151 Variable::CubePosY => Item::scalar(Elem::U32, true),
152 Variable::CubePosZ => Item::scalar(Elem::U32, true),
153 Variable::UnitPos => Item::scalar(Elem::U32, true),
154 Variable::UnitPosBaseName => Item {
155 elem: Elem::U32,
156 vectorization: 3,
157 native: true,
158 },
159 Variable::UnitPosX => Item::scalar(Elem::U32, true),
160 Variable::UnitPosY => Item::scalar(Elem::U32, true),
161 Variable::UnitPosZ => Item::scalar(Elem::U32, true),
162 Variable::PlaneDim => Item::scalar(Elem::U32, true),
163 Variable::PlaneDimChecked => Item::scalar(Elem::U32, true),
164 Variable::PlanePos => Item::scalar(Elem::U32, true),
165 Variable::UnitPosPlane => Item::scalar(Elem::U32, true),
166 Variable::ClusterRank => Item::scalar(Elem::U32, true),
167 Variable::ClusterIndexX => Item::scalar(Elem::U32, true),
168 Variable::ClusterIndexY => Item::scalar(Elem::U32, true),
169 Variable::ClusterIndexZ => Item::scalar(Elem::U32, true),
170 Variable::GlobalInputArray(_, e) => *e,
171 Variable::GlobalOutputArray(_, e) => *e,
172 Variable::LocalArray(_, e, _) => *e,
173 Variable::SharedMemory(_, e, _) => *e,
174 Variable::ConstantArray(_, e, _) => *e,
175 Variable::LocalMut { item, .. } => *item,
176 Variable::LocalConst { item, .. } => *item,
177 Variable::Named { item, .. } => *item,
178 Variable::Slice { item, .. } => *item,
179 Variable::ConstantScalar(_, e) => Item::scalar(*e, false),
180 Variable::GlobalScalar { elem, .. } => Item::scalar(*elem, false),
181 Variable::WmmaFragment { frag, .. } => Item::scalar(frag.elem, false),
182 Variable::Tmp { item, .. } => *item,
183 Variable::Pipeline { id: _, item } => *item,
184 Variable::Barrier { id: _, item, .. } => *item,
185 Variable::TensorMap(_) => unreachable!(),
186 }
187 }
188
189 fn is_const(&self) -> bool {
190 if let Variable::Tmp { is_const, .. } = self {
191 return *is_const;
192 }
193
194 matches!(
195 self,
196 Variable::LocalConst { .. } | Variable::GlobalInputArray { .. }
197 )
198 }
199}
200
201impl<D: Dialect> Display for Variable<D> {
202 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
203 match self {
204 Variable::GlobalInputArray(id, _) => f.write_fmt(format_args!("buffer_{id}")),
205 Variable::GlobalOutputArray(id, _) => write!(f, "buffer_{id}"),
206 Variable::TensorMap(id) => write!(f, "tensor_map_{id}"),
207 Variable::LocalMut { id, .. } => f.write_fmt(format_args!("l_mut_{id}")),
208 Variable::LocalConst { id, .. } => f.write_fmt(format_args!("l_{id}")),
209 Variable::Named { name, .. } => f.write_fmt(format_args!("{name}")),
210 Variable::Slice { id, .. } => {
211 write!(f, "slice_{id}")
212 }
213 Variable::GlobalScalar {
214 id,
215 elem,
216 in_struct,
217 } => match *in_struct {
218 true => write!(f, "scalars_{elem}.x[{id}]"),
219 false => write!(f, "scalars_{elem}[{id}]"),
220 },
221 Variable::ConstantScalar(number, elem) => match number {
222 ConstantScalarValue::Int(val, kind) => match kind {
223 gpu::IntKind::I8 => write!(f, "{elem}({})", *val as i8),
224 gpu::IntKind::I16 => write!(f, "{elem}({})", *val as i16),
225 gpu::IntKind::I32 => write!(f, "{elem}({})", *val as i32),
226 gpu::IntKind::I64 => write!(f, "{elem}({})", *val),
227 },
228 ConstantScalarValue::Float(val, kind) => match kind {
229 gpu::FloatKind::E2M1
230 | gpu::FloatKind::E2M3
231 | gpu::FloatKind::E3M2
232 | gpu::FloatKind::E4M3
233 | gpu::FloatKind::E5M2
234 | gpu::FloatKind::UE8M0 => todo!("Minifloat constants not supported yet"),
235 gpu::FloatKind::F16 => {
236 write!(f, "{elem}({:?})", half::f16::from_f64(*val))
237 }
238 gpu::FloatKind::BF16 => {
239 write!(f, "{elem}({:?})", half::bf16::from_f64(*val))
240 }
241 gpu::FloatKind::Flex32 => write!(f, "{elem}({:?})", *val as f32),
242 gpu::FloatKind::TF32 => write!(f, "{elem}({:?})", *val as f32),
243 gpu::FloatKind::F32 => write!(f, "{elem}({:?})", *val as f32),
244 gpu::FloatKind::F64 => write!(f, "{elem}({:?})", *val),
245 },
246 ConstantScalarValue::UInt(val, kind) => match kind {
247 gpu::UIntKind::U8 => write!(f, "{elem}({})", *val as u8),
248 gpu::UIntKind::U16 => write!(f, "{elem}({})", *val as u16),
249 gpu::UIntKind::U32 => write!(f, "{elem}({})", *val as u32),
250 gpu::UIntKind::U64 => write!(f, "{elem}({})", *val),
251 },
252 ConstantScalarValue::Bool(val) => write!(f, "{val}"),
253 },
254 Variable::SharedMemory(number, _, _) => {
255 write!(f, "shared_memory_{number}")
256 }
257
258 Variable::AbsolutePos => D::compile_absolute_pos(f),
259 Variable::AbsolutePosBaseName => D::compile_absolute_pos_base_name(f),
260 Variable::AbsolutePosX => D::compile_absolute_pos_x(f),
261 Variable::AbsolutePosY => D::compile_absolute_pos_y(f),
262 Variable::AbsolutePosZ => D::compile_absolute_pos_z(f),
263 Variable::CubeCount => D::compile_cube_count(f),
264 Variable::CubeCountBaseName => D::compile_cube_count_base_name(f),
265 Variable::CubeCountX => D::compile_cube_count_x(f),
266 Variable::CubeCountY => D::compile_cube_count_y(f),
267 Variable::CubeCountZ => D::compile_cube_count_z(f),
268 Variable::CubeDim => D::compile_cube_dim(f),
269 Variable::CubeDimBaseName => D::compile_cube_dim_base_name(f),
270 Variable::CubeDimX => D::compile_cube_dim_x(f),
271 Variable::CubeDimY => D::compile_cube_dim_y(f),
272 Variable::CubeDimZ => D::compile_cube_dim_z(f),
273 Variable::CubePos => D::compile_cube_pos(f),
274 Variable::CubePosBaseName => D::compile_cube_pos_base_name(f),
275 Variable::CubePosX => D::compile_cube_pos_x(f),
276 Variable::CubePosY => D::compile_cube_pos_y(f),
277 Variable::CubePosZ => D::compile_cube_pos_z(f),
278 Variable::UnitPos => D::compile_unit_pos(f),
279 Variable::UnitPosBaseName => D::compile_unit_pos_base_name(f),
280 Variable::UnitPosX => D::compile_unit_pos_x(f),
281 Variable::UnitPosY => D::compile_unit_pos_y(f),
282 Variable::UnitPosZ => D::compile_unit_pos_z(f),
283 Variable::PlaneDim => D::compile_plane_dim(f),
284 Variable::PlaneDimChecked => D::compile_plane_dim_checked(f),
285 Variable::PlanePos => D::compile_plane_pos(f),
286 Variable::UnitPosPlane => D::compile_unit_pos_plane(f),
287 Variable::ClusterRank => D::compile_cluster_pos(f),
288 Variable::ClusterIndexX => D::compile_cluster_pos_x(f),
289 Variable::ClusterIndexY => D::compile_cluster_pos_y(f),
290 Variable::ClusterIndexZ => D::compile_cluster_pos_z(f),
291
292 Variable::ConstantArray(number, _, _) => f.write_fmt(format_args!("arrays_{number}")),
293 Variable::LocalArray(id, _, _) => {
294 write!(f, "l_arr_{id}")
295 }
296 Variable::WmmaFragment { id: index, frag } => {
297 let name = match frag.ident {
298 FragmentIdent::A => "a",
299 FragmentIdent::B => "b",
300 FragmentIdent::Accumulator => "acc",
301 FragmentIdent::_Dialect(_) => "",
302 };
303 write!(f, "frag_{name}_{index}")
304 }
305 Variable::Tmp { id, .. } => write!(f, "_tmp_{id}"),
306 Variable::Pipeline { id, .. } => write!(f, "pipeline_{id}"),
307 Variable::Barrier { id, .. } => write!(f, "barrier_{id}"),
308 }
309 }
310}
311
312impl<D: Dialect> Variable<D> {
313 pub fn is_optimized(&self) -> bool {
314 self.item().is_optimized()
315 }
316
317 pub fn tmp(item: Item<D>) -> Self {
321 let inc = COUNTER_TMP_VAR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
322
323 Variable::Tmp {
324 id: inc as Id,
325 item,
326 is_declared: false,
327 is_ptr: false,
328 is_const: false,
329 }
330 }
331
332 pub fn to_const(&mut self) {
333 if let Variable::Tmp { is_const, .. } = self {
334 *is_const = true;
335 }
336 }
337
338 pub fn reinterpret_ptr(&self, f: &mut Formatter<'_>, item: Item<D>) -> Self {
340 let mut out = Self::tmp_ptr(item);
341
342 if self.is_const() {
343 out.to_const();
344 }
345
346 let elem = out.elem();
347 let qualifier = out.const_qualifier();
348 let addr_space = D::address_space_for_variable(self);
349 let out_fmt = out.fmt_left();
350
351 writeln!(
352 f,
353 "{out_fmt} = reinterpret_cast<{addr_space}{elem}{qualifier}*>({self});"
354 )
355 .unwrap();
356
357 out
358 }
359
360 pub fn tmp_ptr(item: Item<D>) -> Self {
364 let inc = COUNTER_TMP_VAR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
365
366 Variable::Tmp {
367 id: inc as Id,
368 item,
369 is_declared: false,
370 is_ptr: true,
371 is_const: false,
372 }
373 }
374
375 pub fn tmp_declared(item: Item<D>) -> Self {
381 let inc = COUNTER_TMP_VAR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
382
383 Variable::Tmp {
384 id: inc as Id,
385 item,
386 is_declared: true,
387 is_ptr: false,
388 is_const: false,
389 }
390 }
391
392 pub fn optimized_args<const N: usize>(args: [Self; N]) -> OptimizedArgs<N, D> {
393 let args_after = args.map(|a| a.optimized());
394
395 let item_reference_after = args_after[0].item();
396
397 let is_optimized = args_after
398 .iter()
399 .all(|var| var.elem() == item_reference_after.elem && var.is_optimized());
400
401 if is_optimized {
402 let vectorization_before = args
403 .iter()
404 .map(|var| var.item().vectorization)
405 .max()
406 .unwrap();
407 let vectorization_after = args_after
408 .iter()
409 .map(|var| var.item().vectorization)
410 .max()
411 .unwrap();
412
413 OptimizedArgs::new(args_after, Some(vectorization_before / vectorization_after))
414 } else {
415 OptimizedArgs::new(args, None)
416 }
417 }
418
419 pub fn optimized(&self) -> Self {
420 match self {
421 Variable::GlobalInputArray(id, item) => {
422 Variable::GlobalInputArray(*id, item.optimized())
423 }
424 Variable::GlobalOutputArray(id, item) => {
425 Variable::GlobalOutputArray(*id, item.optimized())
426 }
427 Variable::LocalMut { id, item } => Variable::LocalMut {
428 id: *id,
429 item: item.optimized(),
430 },
431 Variable::LocalConst { id, item } => Variable::LocalConst {
432 id: *id,
433 item: item.optimized(),
434 },
435 Variable::Slice { id, item } => Variable::Slice {
436 id: *id,
437 item: item.optimized(),
438 },
439 Variable::Tmp {
440 id,
441 item,
442 is_declared,
443 is_ptr,
444 is_const,
445 } => Variable::Tmp {
446 id: *id,
447 item: item.optimized(),
448 is_declared: *is_declared,
449 is_ptr: *is_ptr,
450 is_const: *is_const,
451 },
452 Variable::SharedMemory(id, item, size) => {
453 let before = item.vectorization;
454 let item = item.optimized();
455 let after = item.vectorization;
456 let scaling = (before / after) as u32;
457
458 Variable::SharedMemory(*id, item, size / scaling)
459 }
460 Variable::LocalArray(id, item, size) => {
461 let before = item.vectorization;
462 let item = item.optimized();
463 let after = item.vectorization;
464 let scaling = (before / after) as u32;
465
466 Variable::LocalArray(*id, item.optimized(), size / scaling)
467 }
468 _ => *self,
469 }
470 }
471
472 pub fn is_always_scalar(&self) -> bool {
473 match self {
474 Variable::AbsolutePos => true,
475 Variable::AbsolutePosBaseName => false,
476 Variable::AbsolutePosX => true,
477 Variable::AbsolutePosY => true,
478 Variable::AbsolutePosZ => true,
479 Variable::CubeCount => true,
480 Variable::CubeCountBaseName => false,
481 Variable::CubeCountX => true,
482 Variable::CubeCountY => true,
483 Variable::CubeCountZ => true,
484 Variable::CubeDim => true,
485 Variable::CubeDimBaseName => false,
486 Variable::CubeDimX => true,
487 Variable::CubeDimY => true,
488 Variable::CubeDimZ => true,
489 Variable::CubePos => true,
490 Variable::CubePosBaseName => true,
491 Variable::CubePosX => true,
492 Variable::CubePosY => true,
493 Variable::CubePosZ => true,
494 Variable::UnitPos => true,
495 Variable::UnitPosBaseName => true,
496 Variable::UnitPosPlane => true,
497 Variable::UnitPosX => true,
498 Variable::UnitPosY => true,
499 Variable::UnitPosZ => true,
500 Variable::PlaneDim => true,
501 Variable::PlaneDimChecked => true,
502 Variable::PlanePos => true,
503 Variable::ClusterRank => true,
504 Variable::ClusterIndexX => true,
505 Variable::ClusterIndexY => true,
506 Variable::ClusterIndexZ => true,
507
508 Variable::Barrier { .. } => false,
509 Variable::ConstantArray(_, _, _) => false,
510 Variable::ConstantScalar(_, _) => true,
511 Variable::GlobalInputArray(_, _) => false,
512 Variable::GlobalOutputArray(_, _) => false,
513 Variable::GlobalScalar { .. } => true,
514 Variable::LocalArray(_, _, _) => false,
515 Variable::LocalConst { .. } => false,
516 Variable::LocalMut { .. } => false,
517 Variable::Named { .. } => false,
518 Variable::Pipeline { .. } => false,
519 Variable::SharedMemory(_, _, _) => false,
520 Variable::Slice { .. } => false,
521 Variable::Tmp { .. } => false,
522 Variable::WmmaFragment { .. } => false,
523 Variable::TensorMap { .. } => false,
524 }
525 }
526
527 pub fn index(&self, index: usize) -> IndexedVariable<D> {
528 IndexedVariable {
529 var: *self,
530 index,
531 optimized: self.is_optimized(),
532 }
533 }
534
535 pub fn const_qualifier(&self) -> &str {
536 if self.is_const() { " const" } else { "" }
537 }
538
539 pub fn id(&self) -> Option<Id> {
540 match self {
541 Variable::GlobalInputArray(id, ..) => Some(*id),
542 Variable::GlobalOutputArray(id, ..) => Some(*id),
543 Variable::GlobalScalar { id, .. } => Some(*id),
544 Variable::ConstantArray(id, ..) => Some(*id),
545 Variable::LocalMut { id, .. } => Some(*id),
546 Variable::LocalConst { id, .. } => Some(*id),
547 Variable::Slice { id, .. } => Some(*id),
548 Variable::SharedMemory(id, ..) => Some(*id),
549 Variable::LocalArray(id, ..) => Some(*id),
550 Variable::WmmaFragment { id, .. } => Some(*id),
551 Variable::Pipeline { id, .. } => Some(*id),
552 Variable::Barrier { id, .. } => Some(*id),
553 Variable::Tmp { id, .. } => Some(*id),
554 _ => None,
555 }
556 }
557
558 pub fn fmt_ptr(&self) -> String {
561 match self {
562 Variable::Slice { .. }
563 | Variable::SharedMemory(_, _, _)
564 | Variable::GlobalInputArray(_, _)
565 | Variable::GlobalOutputArray(_, _) => format!("{self}"),
566 _ => format!("&{self}"),
567 }
568 }
569}
570
571impl<D: Dialect> FmtLeft for Variable<D> {
572 fn fmt_left(&self) -> String {
573 match self {
574 Self::LocalConst { item, .. } => match item.elem {
575 Elem::Atomic(_) => {
576 format!("{item}* {self}")
577 }
578 _ => {
579 format!("const {item} {self}")
580 }
581 },
582 Variable::Tmp {
583 item,
584 is_declared,
585 is_ptr,
586 is_const,
587 ..
588 } => {
589 if *is_declared {
590 return format!("{self}");
591 }
592 if *is_ptr {
593 if *is_const {
594 return format!("const {item} *{self}");
595 }
596 return format!("{item} *{self}");
597 }
598
599 format!("{item} {self}")
600 }
601 var => format!("{var}"),
602 }
603 }
604}
605
606#[derive(Debug, Clone)]
607pub struct IndexedVariable<D: Dialect> {
608 var: Variable<D>,
609 optimized: bool,
610 index: usize,
611}
612
613impl<D: Dialect> Component<D> for IndexedVariable<D> {
614 fn item(&self) -> Item<D> {
615 self.var.item()
616 }
617
618 fn index(&self, index: usize) -> IndexedVariable<D> {
619 self.var.index(index)
620 }
621
622 fn is_const(&self) -> bool {
623 self.var.is_const()
624 }
625}
626
627impl<D: Dialect> Display for IndexedVariable<D> {
628 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
629 let var = &self.var;
630 let ref_ = matches!(var, Variable::LocalConst { .. })
631 .then_some("const&")
632 .unwrap_or("&");
633
634 if self.var.item().vectorization > 1 {
635 if self.optimized {
636 let item = self.var.item();
637 let addr_space = D::address_space_for_variable(&self.var);
638 write!(
639 f,
640 "(reinterpret_cast<{addr_space}{item} {ref_}>({var})).i_{}",
641 self.index
642 )
643 } else {
644 write!(f, "{var}.i_{}", self.index)
645 }
646 } else if self.optimized {
647 let item = self.var.item();
648 let addr_space = D::address_space_for_variable(&self.var);
649 write!(f, "reinterpret_cast<{addr_space}{item} {ref_}>({var})")
650 } else {
651 write!(f, "{var}")
652 }
653 }
654}
655
656impl<D: Dialect> FmtLeft for IndexedVariable<D> {
657 fn fmt_left(&self) -> String {
658 match self.var {
659 Variable::LocalConst { item, .. } => format!("const {item} {self}"),
660 Variable::Tmp { item, is_ptr, .. } => {
661 if is_ptr {
662 format!("{item} *{self}")
663 } else {
664 format!("{item} {self}")
665 }
666 }
667 _ => format!("{self}"),
668 }
669 }
670}
671
672impl FmtLeft for &String {
673 fn fmt_left(&self) -> String {
674 self.to_string()
675 }
676}