#![allow(nonstandard_style)]
#![allow(unused_variables)]
pub mod ftz {
pub trait Mode {}
pub struct Enabled;
pub struct Disabled;
impl Mode for Enabled {}
impl Mode for Disabled {}
}
pub mod rounding {
pub trait Mode {}
pub struct NearestEven;
pub struct PositiveInf;
pub struct NegativeInf;
pub struct Zero;
pub struct Approx;
pub struct Full;
impl Mode for NearestEven {}
impl Mode for PositiveInf {}
impl Mode for NegativeInf {}
impl Mode for Zero {}
impl Mode for Approx {}
impl Mode for Full {}
}
pub mod nan {
pub trait Mode {}
pub struct Enabled;
pub struct Disabled;
impl Mode for Enabled {}
impl Mode for Disabled {}
}
pub mod atomic {
pub trait Mode {}
pub struct Add;
pub struct AddF;
pub struct And;
pub struct Or;
pub struct Xor;
pub struct Max;
pub struct Min;
pub struct Umax;
pub struct Umin;
pub struct Xchg;
impl Mode for Add {}
impl Mode for AddF {}
impl Mode for And {}
impl Mode for Or {}
impl Mode for Xor {}
impl Mode for Max {}
impl Mode for Min {}
impl Mode for Umax {}
impl Mode for Umin {}
impl Mode for Xchg {}
}
pub mod ordering {
pub trait Mode {}
pub trait LoadMode: Mode {}
pub trait StoreMode: Mode {}
pub trait AtomicMode: Mode {}
pub struct Weak;
pub struct Relaxed;
pub struct Acquire;
pub struct Release;
pub struct AcqRel;
impl Mode for Weak {}
impl LoadMode for Weak {}
impl StoreMode for Weak {}
impl Mode for Relaxed {}
impl LoadMode for Relaxed {}
impl StoreMode for Relaxed {}
impl AtomicMode for Relaxed {}
impl Mode for Acquire {}
impl LoadMode for Acquire {}
impl AtomicMode for Acquire {}
impl Mode for Release {}
impl StoreMode for Release {}
impl AtomicMode for Release {}
impl Mode for AcqRel {}
impl AtomicMode for AcqRel {}
}
pub mod scope {
pub trait Mode {}
pub struct TileBlock;
pub struct Device;
pub struct System;
impl Mode for TileBlock {}
impl Mode for Device {}
impl Mode for System {}
}
pub mod tma {
pub trait Mode {}
pub struct Enabled;
pub struct Disabled;
impl Mode for Enabled {}
impl Mode for Disabled {}
}
pub struct Latency<const CYCLES: u32>;
pub mod overflow {
pub trait Mode {}
pub struct None;
pub struct NoSignedWrap;
pub struct NoUnsignedWrap;
pub struct NoWrap;
impl Mode for None {}
impl Mode for NoSignedWrap {}
impl Mode for NoUnsignedWrap {}
impl Mode for NoWrap {}
}
pub mod predicate {
pub trait Mode {}
pub struct Equal;
pub struct NotEqual;
pub struct LessThan;
pub struct LessThanOrEqual;
pub struct GreaterThan;
pub struct GreaterThanOrEqual;
impl Mode for Equal {}
impl Mode for NotEqual {}
impl Mode for LessThan {}
impl Mode for LessThanOrEqual {}
impl Mode for GreaterThan {}
impl Mode for GreaterThanOrEqual {}
}
pub mod cmp_ordering {
pub trait Mode {}
pub struct Unordered;
pub struct Ordered;
impl Mode for Unordered {}
impl Mode for Ordered {}
}
pub mod padding {
pub trait Mode {}
pub struct None;
pub struct Zero;
pub struct NegZero;
pub struct Nan;
pub struct PosInf;
pub struct NegInf;
impl Mode for None {}
impl Mode for Zero {}
impl Mode for NegZero {}
impl Mode for Nan {}
impl Mode for PosInf {}
impl Mode for NegInf {}
}
pub mod dim_map {
pub trait Mode {}
pub struct Identity;
impl Mode for Identity {}
}
pub mod reverse {
pub trait Mode {}
pub struct Forward;
pub struct Reverse;
impl Mode for Forward {}
impl Mode for Reverse {}
}
pub mod signedness {
pub trait Mode {}
pub struct Signed;
pub struct Unsigned;
impl Mode for Signed {}
impl Mode for Unsigned {}
}
#[cutile_macro::module(tile_rust_crate = true)]
pub mod core {
pub use super::atomic;
pub use super::cmp_ordering;
pub use super::dim_map;
pub use super::ftz;
pub use super::nan;
pub use super::ordering;
pub use super::overflow;
pub use super::padding;
pub use super::predicate;
pub use super::reverse;
pub use super::rounding;
pub use super::scope;
pub use super::signedness;
pub use super::tma;
pub use super::Latency;
pub use half::{bf16, f16};
use std::marker::PhantomData;
use std::ops;
pub trait ElementType: Copy + Clone {
const ZERO: Self;
}
#[cuda_tile::ty(name = "bf16")]
impl ElementType for bf16 {
const ZERO: Self = bf16::ZERO;
}
#[cuda_tile::ty(name = "f16")]
impl ElementType for f16 {
const ZERO: Self = f16::ZERO;
}
#[cuda_tile::ty(name = "f32")]
impl ElementType for f32 {
const ZERO: Self = 0.0;
}
#[cuda_tile::ty(name = "i8")]
impl ElementType for i8 {
const ZERO: Self = 0;
}
#[cuda_tile::ty(name = "i8")]
impl ElementType for u8 {
const ZERO: Self = 0;
}
#[cuda_tile::ty(name = "i32")]
impl ElementType for i32 {
const ZERO: Self = 0;
}
#[cuda_tile::ty(name = "i32")]
impl ElementType for u32 {
const ZERO: Self = 0;
}
#[cuda_tile::ty(name = "i64")]
impl ElementType for i64 {
const ZERO: Self = 0;
}
#[cuda_tile::ty(name = "i64")]
impl ElementType for u64 {
const ZERO: Self = 0;
}
#[cuda_tile::ty(name = "f64")]
impl ElementType for f64 {
const ZERO: Self = 0.0;
}
#[cuda_tile::ty(name = "i16")]
impl ElementType for i16 {
const ZERO: Self = 0;
}
#[cuda_tile::ty(name = "i16")]
impl ElementType for u16 {
const ZERO: Self = 0;
}
#[cuda_tile::ty(name = "i1")]
impl ElementType for bool {
const ZERO: Self = false;
}
pub use cuda_core::f8e4m3fn;
pub use cuda_core::f8e5m2;
pub use cuda_core::tf32;
#[cuda_tile::ty(name = "tf32")]
impl ElementType for tf32 {
const ZERO: Self = tf32(0);
}
#[cuda_tile::ty(name = "f8e4m3fn")]
impl ElementType for f8e4m3fn {
const ZERO: Self = f8e4m3fn(0);
}
#[cuda_tile::ty(name = "f8e5m2")]
impl ElementType for f8e5m2 {
const ZERO: Self = f8e5m2(0);
}
pub trait Scalar {}
#[cuda_tile::ty(name="!cuda_tile.tile", type_params=["E"])]
impl<E: ElementType> Scalar for E {}
#[cuda_tile::variadic_trait(N = 6)]
pub trait BroadcastScalar<E: ElementType, const D: [i32; N]>
where
Self: ElementType,
{
fn broadcast(self, shape: Shape<D>) -> Tile<E, D>;
}
#[cuda_tile::variadic_trait_impl()]
#[cuda_tile::variadic_impl(N = 6)]
impl<E: ElementType, const D: [i32; N]> BroadcastScalar<E, D> for E {
fn broadcast(self, shape: Shape<D>) -> Tile<E, D> {
broadcast_scalar(self, shape)
}
}
pub trait Pointer {}
#[cuda_tile::ty(name="!cuda_tile.tile", pointer_type="!cuda_tile.ptr", type_params=["!cuda_tile.ptr<E>"])]
impl<E: ElementType> Pointer for *mut E {}
#[cuda_tile::ty(name="!cuda_tile.tile", type_params=["{D}xP"])]
#[cuda_tile::variadic_struct(N = 6)]
#[derive(Copy, Clone)]
pub struct PointerTile<P: Pointer, const D: [i32; N]> {
_type: PhantomData<P>,
}
#[cuda_tile::variadic_impl(N = 6)]
impl<P: Pointer, const D: [i32; N]> PointerTile<P, D> {
pub fn offset_tile<I: ElementType>(self, offset: Tile<I, D>) -> PointerTile<P, D> {
addptr_tile(self, offset)
}
pub fn offset(self, offset: i32) -> PointerTile<P, D> {
addptr(self, offset)
}
pub fn broadcast<const R: [i32; N]>(self, shape: Shape<R>) -> PointerTile<P, R> {
broadcast_ptr(self, shape)
}
#[cuda_tile::variadic_impl_fn(M = 6)]
pub fn reshape<const R: [i32; M]>(self, shape: Shape<R>) -> PointerTile<P, R> {
reshape_ptr(self, shape)
}
}
#[cuda_tile::ty(name="!cuda_tile.tile", type_params=["{D}xE"])]
#[cuda_tile::variadic_struct(N = 6)]
#[derive(Copy, Clone)]
pub struct Tile<E: ElementType, const D: [i32; N]> {
_type: PhantomData<E>,
}
#[cuda_tile::variadic_impl(N = 6)]
impl<E: ElementType, const D: [i32; N]> Tile<E, D> {
pub fn shape(&self) -> Shape<D> {
unreachable!()
}
pub fn broadcast<const R: [i32; N]>(self, shape: Shape<R>) -> Tile<E, R> {
broadcast(self, shape)
}
#[cuda_tile::variadic_impl_fn(M = 6)]
pub fn reshape<const R: [i32; M]>(self, shape: Shape<R>) -> Tile<E, R> {
reshape(self, shape)
}
}
#[cuda_tile::variadic_impl(N = 6)]
impl<E: ElementType, const D: [i32; N]> ops::Add<Tile<E, D>> for Tile<E, D> {
type Output = Tile<E, D>;
fn add(self, _rhs: Tile<E, D>) -> Tile<E, D> {
unreachable!()
}
}
#[cuda_tile::variadic_impl(N = 6)]
impl<E: ElementType, const D: [i32; N]> ops::Sub<Tile<E, D>> for Tile<E, D> {
type Output = Tile<E, D>;
fn sub(self, _rhs: Tile<E, D>) -> Tile<E, D> {
unreachable!()
}
}
#[cuda_tile::variadic_impl(N = 6)]
impl<E: ElementType, const D: [i32; N]> ops::Mul<Tile<E, D>> for Tile<E, D> {
type Output = Tile<E, D>;
fn mul(self, _rhs: Tile<E, D>) -> Tile<E, D> {
unreachable!()
}
}
#[cuda_tile::variadic_impl(N = 6)]
impl<E: ElementType, const D: [i32; N]> ops::Div<Tile<E, D>> for Tile<E, D> {
type Output = Tile<E, D>;
fn div(self, _rhs: Tile<E, D>) -> Tile<E, D> {
unreachable!()
}
}
#[cuda_tile::variadic_impl(N = 6)]
impl<E: ElementType, const D: [i32; N]> ops::Rem<Tile<E, D>> for Tile<E, D> {
type Output = Tile<E, D>;
fn rem(self, _rhs: Tile<E, D>) -> Tile<E, D> {
unreachable!()
}
}
#[cuda_tile::variadic_impl(N = 6)]
impl<E: ElementType, const D: [i32; N]> ops::BitAnd<Tile<E, D>> for Tile<E, D> {
type Output = Tile<E, D>;
fn bitand(self, _rhs: Tile<E, D>) -> Tile<E, D> {
unreachable!()
}
}
#[cuda_tile::variadic_impl(N = 6)]
impl<E: ElementType, const D: [i32; N]> ops::BitOr<Tile<E, D>> for Tile<E, D> {
type Output = Tile<E, D>;
fn bitor(self, _rhs: Tile<E, D>) -> Tile<E, D> {
unreachable!()
}
}
#[cuda_tile::variadic_impl(N = 6)]
impl<E: ElementType, const D: [i32; N]> ops::BitXor<Tile<E, D>> for Tile<E, D> {
type Output = Tile<E, D>;
fn bitxor(self, _rhs: Tile<E, D>) -> Tile<E, D> {
unreachable!()
}
}
#[cuda_tile::ty(name="!cuda_tile.tensor_view",
type_params=["{D}xE", "strides"],
type_meta=["base", "shape", "strides", "token"])]
#[cuda_tile::variadic_struct(N = 6)]
pub struct Tensor<E: ElementType, const D: [i32; N]> {
_type: PhantomData<E>,
}
#[cuda_tile::variadic_impl(N = 6)]
impl<E: ElementType, const S: [i32; N]> Tensor<E, S> {
pub fn partition<'a, const R: [i32; N]>(&'a self, tile: Shape<R>) -> Partition<'a, E, R> {
let tensor_token: Token = get_tensor_token(self);
let p: Partition<E, R> =
make_partition_view(self, tile, padding::Zero, dim_map::Identity, tensor_token);
p
}
pub fn partition_permuted<'a, const R: [i32; N], const I: [i32; N]>(
&'a self,
tile: Shape<R>,
dim_map: Array<I>,
) -> Partition<'a, E, R> {
let tensor_token: Token = get_tensor_token(self);
let p: Partition<E, R> =
make_partition_view(self, tile, padding::None, dim_map, tensor_token);
p
}
pub unsafe fn partition_mut<'a, const R: [i32; N]>(
&'a mut self,
tile: Shape<R>,
) -> PartitionMut<'a, E, R> {
let tensor_token: Token = get_tensor_token(self);
unsafe { make_partition_view_mut(self, tile, padding::None, tensor_token) }
}
pub fn shape<'b>(&self) -> Shape<'b, S> {
get_tensor_shape_meta(self)
}
pub fn load(&mut self) -> Tile<E, S> {
load_tile_mut(self)
}
pub fn load_tile<const R: [i32; N]>(&self, shape: Shape<R>, idx: [i32; N]) -> Tile<E, R> {
load_tile(self, shape, idx)
}
pub fn store(&mut self, result: Tile<E, S>) {
store_tile(self, result);
}
}
#[cuda_tile::ty(name="!cuda_tile.partition_view",
type_params=["tile"],
type_params_optional=["padding_value", "tensor_view", "dim_map"],
type_meta=["token", "tensor_view.shape()"])]
#[cuda_tile::variadic_struct(N = 6)]
pub struct Partition<'a, E: ElementType, const D: [i32; N]> {
_type: PhantomData<E>,
_tensor: PhantomData<&'a ()>,
}
#[cuda_tile::variadic_impl(N = 6)]
impl<'a, E: ElementType, const D: [i32; N]> Partition<'a, E, D> {
pub fn load(&self, index: [i32; N]) -> Tile<E, D> {
check_partition_access(self, index);
let result: Tile<E, D> = load_view_tko(
self,
index,
ordering::Weak,
scope::TileBlock,
None,
tma::Enabled,
);
result
}
}
#[cuda_tile::ty(name="!cuda_tile.partition_view",
type_params=["tile"],
type_params_optional=["padding_value", "tensor_view"],
type_meta=["token"])]
#[cuda_tile::variadic_struct(N = 6)]
pub struct PartitionMut<'a, E: ElementType, const D: [i32; N]> {
_type: PhantomData<E>,
_tensor: PhantomData<&'a mut ()>,
}
#[cuda_tile::variadic_impl(N = 6)]
impl<'a, E: ElementType, const D: [i32; N]> PartitionMut<'a, E, D> {
pub unsafe fn load(&self, index: [i32; N]) -> Tile<E, D> {
let result: Tile<E, D> = unsafe {
load_view_tko_mut(
self,
index,
ordering::Weak,
scope::TileBlock,
None,
tma::Enabled,
)
};
result
}
pub unsafe fn store(&mut self, tile: Tile<E, D>, index: [i32; N]) -> Token {
let token: Token = unsafe {
store_view_tko_mut(
self,
tile,
index,
ordering::Weak,
scope::TileBlock,
None,
tma::Enabled,
)
};
token
}
}
#[cuda_tile::ty(name="!cuda_tile.token", params=[])]
#[derive(Copy, Clone)]
pub struct Token {}
#[cuda_tile::variadic_struct(N = 6, constructor = "new")]
#[derive(Copy, Clone)]
pub struct Shape<'a, const D: [i32; N]> {
pub dims: &'a [i32],
}
#[macro_export]
macro_rules! const_shape {
() => {
Shape_0::const_new()
};
($x1:literal) => {
Shape_1::<$x1>::const_new()
};
($x1:literal, $x2:literal) => {
Shape_2::<$x1, $x2>::const_new()
};
($x1:literal, $x2:literal, $x3:literal) => {
Shape_3::<$x1, $x2, $x3>::const_new()
};
($x1:literal, $x2:literal, $x3:literal, $x4:literal) => {
Shape_4::<$x1, $x2, $x3, $x4>::const_new()
};
}
pub use const_shape;
#[cuda_tile::variadic_struct(N = 6, constructor = "new")]
#[derive(Copy, Clone)]
pub struct Array<'a, const D: [i32; N]> {
pub dims: &'a [i32],
}
#[cuda_tile::variadic_impl(N = 6)]
impl<'a, const D: [i32; N]> dim_map::Mode for Array<'a, D> {}
#[macro_export]
macro_rules! const_array {
() => {
Array_0::const_new()
};
($x1:literal) => {
Array_1::<$x1>::const_new()
};
($x1:literal, $x2:literal) => {
Array_2::<$x1, $x2>::const_new()
};
($x1:literal, $x2:literal, $x3:literal) => {
Array_3::<$x1, $x2, $x3>::const_new()
};
($x1:literal, $x2:literal, $x3:literal, $x4:literal) => {
Array_4::<$x1, $x2, $x3, $x4>::const_new()
};
}
pub use const_array;
#[macro_export]
macro_rules! cuda_tile_print {
($s:literal $(,$args:expr)*) => {
unreachable!();
};
}
pub use cuda_tile_print;
#[macro_export]
macro_rules! cuda_tile_assert {
($args:expr, $s:literal) => {
unreachable!();
};
}
pub use cuda_tile_assert;
#[cuda_tile::op(name="cuda_tile.get_num_tile_blocks", params=[])]
pub fn get_num_tile_blocks() -> (i32, i32, i32) {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.get_tile_block_id", params=[])]
pub fn get_tile_block_id() -> (i32, i32, i32) {
unreachable!()
}
#[cuda_tile::compiler_op(name = "cast")]
pub fn scalar_to_tile<E: ElementType>(scalar: impl Scalar) -> Tile<E, { [] }> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "cast")]
pub fn tile_to_scalar<E: ElementType, S: Scalar>(tile: Tile<E, { [] }>) -> S {
unreachable!()
}
#[cuda_tile::compiler_op(name = "convert")]
pub fn convert_scalar<S: Scalar>(x: impl Scalar) -> S {
unreachable!()
}
#[cuda_tile::compiler_op(name = "convert")]
#[cuda_tile::variadic_op(N = 6)]
pub fn convert_tile<TO: ElementType, FROM: ElementType, const S: [i32; N]>(
x: Tile<FROM, S>,
) -> Tile<TO, S> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "cast")]
pub fn pointer_to_tile<P: Pointer>(ptr: P) -> PointerTile<P, { [] }> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "cast")]
pub fn tile_to_pointer<P: Pointer>(tile: PointerTile<P, { [] }>) -> P {
unreachable!()
}
#[cuda_tile::compiler_op(name = "check")]
#[cuda_tile::variadic_op(N = 6)]
pub fn check_partition_access<E: ElementType, const S: [i32; N]>(
part: &Partition<E, S>,
index: [i32; N],
) {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.broadcast", params=["source"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn broadcast<E: ElementType, const S: [i32; N], const R: [i32; N]>(
source: Tile<E, S>,
shape: Shape<R>,
) -> Tile<E, R> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.cat", params=["lhs", "rhs"], attribute_params=["dim:integer"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn cat<E: ElementType, const SLhs: [i32; N], const SRhs: [i32; N], const SOut: [i32; N]>(
lhs: Tile<E, SLhs>,
rhs: Tile<E, SRhs>,
dim: i32,
) -> Tile<E, SOut> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.constant", params=[], attribute_params=["value:dense"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn constant<E: ElementType, const S: [i32; N]>(value: E, shape: Shape<S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.extract", params=["source", "...indices"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn extract<E: ElementType, const SIn: [i32; N], const SOut: [i32; N]>(
source: Tile<E, SIn>,
indices: [Tile<i32, { [] }>; N],
) -> Tile<E, SOut> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.iota")]
pub fn iota<E: ElementType, const S: [i32; 1]>(shape: Shape<S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "mma")]
pub fn mma<E1: ElementType, E2: ElementType, const M: i32, const N: i32, const K: i32>(
lhs: Tile<E1, { [M, K] }>,
rhs: Tile<E1, { [K, N] }>,
acc: Tile<E2, { [M, N] }>,
) -> Tile<E2, { [M, N] }> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.permute", params=["source"], attribute_params=["permutation:array"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn permute<E: ElementType, const A: [i32; N], const I: [i32; N], const R: [i32; N]>(
source: Tile<E, A>,
permutation: Array<I>,
) -> Tile<E, R> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.reshape", params=["source"])]
#[cuda_tile::variadic_op(N = 6, M = 6)]
pub fn reshape<E: ElementType, const S: [i32; N], const R: [i32; M]>(
source: Tile<E, S>,
shape: Shape<R>,
) -> Tile<E, R> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.reduce", params=["operand"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn reduce<E: ElementType, const S: [i32; N], F>(
operand: Tile<E, S>,
dim: i32,
identity: E,
f: F,
) -> Tile<E, S>
where
F: Fn(E, E) -> E,
{
unreachable!()
}
#[cuda_tile::compiler_op(name = "reduce")]
#[cuda_tile::variadic_op(N = 6, M = 6)]
pub fn reduce_min<E: ElementType, const S: [i32; N], const R: [i32; M]>(
x: Tile<E, S>,
dim: i32,
) -> Tile<E, R> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "reduce")]
#[cuda_tile::variadic_op(N = 6, M = 6)]
pub fn reduce_max<E: ElementType, const S: [i32; N], const R: [i32; M]>(
x: Tile<E, S>,
dim: i32,
) -> Tile<E, R> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "reduce")]
#[cuda_tile::variadic_op(N = 6, M = 6)]
pub fn reduce_sum<E: ElementType, const S: [i32; N], const R: [i32; M]>(
x: Tile<E, S>,
dim: i32,
) -> Tile<E, R> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "reduce")]
#[cuda_tile::variadic_op(N = 6, M = 6)]
pub fn reduce_prod<E: ElementType, const S: [i32; N], const R: [i32; M]>(
x: Tile<E, S>,
dim: i32,
) -> Tile<E, R> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.scan", params=["operand"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn scan_sum<E: ElementType, const S: [i32; N], R: reverse::Mode>(
operand: Tile<E, S>,
dim: i32,
reverse: R,
identity: E,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.scan", params=["operand"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn scan<E: ElementType, const S: [i32; N], R: reverse::Mode, F>(
operand: Tile<E, S>,
dim: i32,
reverse: R,
identity: E,
f: F,
) -> Tile<E, S>
where
F: Fn(E, E) -> E,
{
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.select", params=["cond", "val_if_true", "val_if_false"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn select<E: ElementType, const S: [i32; N]>(
cond: Tile<bool, S>,
val_if_true: Tile<E, S>,
val_if_false: Tile<E, S>,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.make_token", params=[])]
pub fn new_token_unordered() -> Token {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.join_tokens", params=["tokens"])]
pub fn join_tokens(tokens: &[Token]) -> Token {
unreachable!()
}
#[cuda_tile::variadic_op(N = 6)]
#[cuda_tile::compiler_op(name = "shape")]
pub fn get_shape_dim<const S: [i32; N]>(shape: Shape<S>, dim_idx: i32) -> i32 {
unreachable!()
}
#[cuda_tile::variadic_op(N = 6)]
#[cuda_tile::compiler_op(name = "return_type_meta_field", type_meta_field = "shape")]
pub fn get_tensor_shape_meta<'s, E: ElementType, const S: [i32; N]>(
tensor: &Tensor<E, S>,
) -> Shape<'s, S> {
unreachable!()
}
#[cuda_tile::variadic_op(N = 6)]
#[cuda_tile::compiler_op(name = "return_type_meta_field", type_meta_field = "token")]
pub fn get_tensor_token<E: ElementType, const S: [i32; N]>(tensor: &Tensor<E, S>) -> Token {
unreachable!()
}
#[cuda_tile::variadic_op(N = 6)]
#[cuda_tile::compiler_op(name = "set_type_meta_field", type_meta_field = "token")]
pub fn set_tensor_token<E: ElementType, const S: [i32; N]>(
tensor: &Tensor<E, S>,
token: Token,
) {
unreachable!()
}
#[cuda_tile::variadic_op(N = 6)]
#[cuda_tile::compiler_op(name = "return_type_meta_field", type_meta_field = "token")]
pub fn get_partition_token<E: ElementType, const D: [i32; N]>(view: &Partition<E, D>) -> Token {
unreachable!()
}
#[cuda_tile::variadic_op(N = 6)]
#[cuda_tile::compiler_op(name = "return_type_meta_field", type_meta_field = "token")]
pub fn get_partition_token_mut<E: ElementType, const D: [i32; N]>(
view: &PartitionMut<E, D>,
) -> Token {
unreachable!()
}
#[cuda_tile::compiler_op(name = "num_tiles")]
#[cuda_tile::variadic_op(N = 6)]
pub unsafe fn num_tiles<E: ElementType, const S: [i32; N]>(
view: &Partition<E, S>,
axis: i32,
) -> i32 {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.ftof", params = ["x"], static_params = ["rounding={NearestEven: rounding_mode=#cuda_tile.rounding<nearest_even>, PositiveInf: rounding_mode=#cuda_tile.rounding<positive_inf>, NegativeInf: rounding_mode=#cuda_tile.rounding<negative_inf>, Zero: rounding_mode=#cuda_tile.rounding<zero>, Approx: rounding_mode=#cuda_tile.rounding<approx>, Full: rounding_mode=#cuda_tile.rounding<full>}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn ftof<EIn: ElementType, EOut: ElementType, const S: [i32; N], R: rounding::Mode>(
x: Tile<EIn, S>,
rounding: R,
) -> Tile<EOut, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.ftoi", params = ["x"], static_params = ["rounding={NearestEven: rounding_mode=#cuda_tile.rounding<nearest_even>, PositiveInf: rounding_mode=#cuda_tile.rounding<positive_inf>, NegativeInf: rounding_mode=#cuda_tile.rounding<negative_inf>, Zero: rounding_mode=#cuda_tile.rounding<zero>, Approx: rounding_mode=#cuda_tile.rounding<approx>, Full: rounding_mode=#cuda_tile.rounding<full>}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn ftoi<EIn: ElementType, EOut: ElementType, const S: [i32; N], R: rounding::Mode>(
x: Tile<EIn, S>,
rounding: R,
) -> Tile<EOut, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.itof", params = ["x"], static_params = ["rounding={NearestEven: rounding_mode=#cuda_tile.rounding<nearest_even>, PositiveInf: rounding_mode=#cuda_tile.rounding<positive_inf>, NegativeInf: rounding_mode=#cuda_tile.rounding<negative_inf>, Zero: rounding_mode=#cuda_tile.rounding<zero>, Approx: rounding_mode=#cuda_tile.rounding<approx>, Full: rounding_mode=#cuda_tile.rounding<full>}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn itof<EIn: ElementType, EOut: ElementType, const S: [i32; N], R: rounding::Mode>(
x: Tile<EIn, S>,
rounding: R,
) -> Tile<EOut, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.exti", params=["from"], named_attributes=["signedness=inferred_signedness"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn exti<EIn: ElementType, EOut: ElementType, const S: [i32; N]>(
from: Tile<EIn, S>,
) -> Tile<EOut, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.trunci", params = ["from"], static_params = ["overflow={None: , NoSignedWrap: overflow=#cuda_tile.overflow<no_signed_wrap>, NoUnsignedWrap: overflow=#cuda_tile.overflow<no_unsigned_wrap>, NoWrap: overflow=#cuda_tile.overflow<no_wrap>}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn trunci<EIn: ElementType, EOut: ElementType, const S: [i32; N], O: overflow::Mode>(
from: Tile<EIn, S>,
overflow: O,
) -> Tile<EOut, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.bitcast", params=["source"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn bitcast<EIn: ElementType, EOut: ElementType, const S: [i32; N]>(
source: Tile<EIn, S>,
) -> Tile<EOut, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.int_to_ptr", params=["source"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn int_to_ptr<SRC_T: ElementType, PTR_T: ElementType, const S: [i32; N]>(
source: Tile<SRC_T, S>,
) -> PointerTile<*mut PTR_T, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.ptr_to_int", params=["source"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn ptr_to_int<E: ElementType, const S: [i32; N]>(
source: PointerTile<*mut E, S>,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.ptr_to_ptr", params=["source"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn ptr_to_ptr<EIn: ElementType, EOut: ElementType, const S: [i32; N]>(
source: PointerTile<*mut EIn, S>,
) -> PointerTile<*mut EOut, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.addi", params = ["lhs", "rhs"], static_params = ["overflow={None: , NoSignedWrap: overflow=#cuda_tile.overflow<no_signed_wrap>, NoUnsignedWrap: overflow=#cuda_tile.overflow<no_unsigned_wrap>, NoWrap: overflow=#cuda_tile.overflow<no_wrap>}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn addi<E: ElementType, const S: [i32; N], O: overflow::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
overflow: O,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.subi", params = ["lhs", "rhs"], static_params = ["overflow={None: , NoSignedWrap: overflow=#cuda_tile.overflow<no_signed_wrap>, NoUnsignedWrap: overflow=#cuda_tile.overflow<no_unsigned_wrap>, NoWrap: overflow=#cuda_tile.overflow<no_wrap>}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn subi<E: ElementType, const S: [i32; N], O: overflow::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
overflow: O,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.muli", params = ["lhs", "rhs"], static_params = ["overflow={None: , NoSignedWrap: overflow=#cuda_tile.overflow<no_signed_wrap>, NoUnsignedWrap: overflow=#cuda_tile.overflow<no_unsigned_wrap>, NoWrap: overflow=#cuda_tile.overflow<no_wrap>}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn muli<E: ElementType, const S: [i32; N], O: overflow::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
overflow: O,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.divi", params = ["lhs", "rhs"], static_params = ["rounding={Zero: , NearestEven: rounding=#cuda_tile.rounding<nearest_even>, PositiveInf: rounding=#cuda_tile.rounding<positive_inf>, NegativeInf: rounding=#cuda_tile.rounding<negative_inf>, Approx: rounding=#cuda_tile.rounding<approx>, Full: rounding=#cuda_tile.rounding<full>}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn divi<E: ElementType, const S: [i32; N], R: rounding::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
rounding: R,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.remi", params = ["lhs", "rhs"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn remi<E: ElementType, const S: [i32; N]>(lhs: Tile<E, S>, rhs: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.negi", params = ["x"], static_params = ["overflow={None: , NoSignedWrap: overflow=#cuda_tile.overflow<no_signed_wrap>, NoUnsignedWrap: overflow=#cuda_tile.overflow<no_unsigned_wrap>, NoWrap: overflow=#cuda_tile.overflow<no_wrap>}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn negi<E: ElementType, const S: [i32; N], O: overflow::Mode>(
x: Tile<E, S>,
overflow: O,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.absi", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn absi<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.mulhii", params=["x", "y"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn mulhii<E: ElementType, const S: [i32; N]>(x: Tile<E, S>, y: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.maxi", params=["lhs", "rhs"], named_attributes=["signedness=inferred_signedness"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn maxi<E: ElementType, const S: [i32; N]>(lhs: Tile<E, S>, rhs: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.mini", params=["lhs", "rhs"], named_attributes=["signedness=inferred_signedness"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn mini<E: ElementType, const S: [i32; N]>(lhs: Tile<E, S>, rhs: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(
name = "cuda_tile.mmai",
params = ["lhs", "rhs", "acc"],
static_params = [
"signedness_lhs={Signed: signedness_lhs=#cuda_tile.signedness<signed>, Unsigned: signedness_lhs=#cuda_tile.signedness<unsigned>}",
"signedness_rhs={Signed: signedness_rhs=#cuda_tile.signedness<signed>, Unsigned: signedness_rhs=#cuda_tile.signedness<unsigned>}"
]
)]
#[cuda_tile::variadic_op(N = 3)]
pub fn mmai<
EIn: ElementType,
const LHS: [i32; N],
const RHS: [i32; N],
const ACC: [i32; N],
SL: signedness::Mode,
SR: signedness::Mode,
>(
lhs: Tile<EIn, LHS>,
rhs: Tile<EIn, RHS>,
acc: Tile<i32, ACC>,
signedness_lhs: SL,
signedness_rhs: SR,
) -> Tile<i32, ACC> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.addf", params=["lhs", "rhs"], static_params=["rounding={NearestEven: rounding_mode=#cuda_tile.rounding<nearest_even>, PositiveInf: rounding_mode=#cuda_tile.rounding<positive_inf>, NegativeInf: rounding_mode=#cuda_tile.rounding<negative_inf>, Zero: rounding_mode=#cuda_tile.rounding<zero>, Approx: rounding_mode=#cuda_tile.rounding<approx>}", "ftz={Enabled: flush_to_zero=unit}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn addf<E: ElementType, const S: [i32; N], R: rounding::Mode, F: ftz::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
rounding: R,
ftz: F,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.subf", params=["lhs", "rhs"], static_params=["rounding={NearestEven: rounding_mode=#cuda_tile.rounding<nearest_even>, PositiveInf: rounding_mode=#cuda_tile.rounding<positive_inf>, NegativeInf: rounding_mode=#cuda_tile.rounding<negative_inf>, Zero: rounding_mode=#cuda_tile.rounding<zero>, Approx: rounding_mode=#cuda_tile.rounding<approx>}", "ftz={Enabled: flush_to_zero=unit}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn subf<E: ElementType, const S: [i32; N], R: rounding::Mode, F: ftz::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
rounding: R,
ftz: F,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.mulf", params=["lhs", "rhs"], static_params=["rounding={NearestEven: rounding_mode=#cuda_tile.rounding<nearest_even>, PositiveInf: rounding_mode=#cuda_tile.rounding<positive_inf>, NegativeInf: rounding_mode=#cuda_tile.rounding<negative_inf>, Zero: rounding_mode=#cuda_tile.rounding<zero>, Approx: rounding_mode=#cuda_tile.rounding<approx>}", "ftz={Enabled: flush_to_zero=unit}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn mulf<E: ElementType, const S: [i32; N], R: rounding::Mode, F: ftz::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
rounding: R,
ftz: F,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.divf", params=["lhs", "rhs"], static_params=["rounding={NearestEven: rounding_mode=#cuda_tile.rounding<nearest_even>, PositiveInf: rounding_mode=#cuda_tile.rounding<positive_inf>, NegativeInf: rounding_mode=#cuda_tile.rounding<negative_inf>, Zero: rounding_mode=#cuda_tile.rounding<zero>, Approx: rounding_mode=#cuda_tile.rounding<approx>}", "ftz={Enabled: flush_to_zero=unit}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn divf<E: ElementType, const S: [i32; N], R: rounding::Mode, F: ftz::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
rounding: R,
ftz: F,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.remf", params = ["lhs", "rhs"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn remf<E: ElementType, const S: [i32; N]>(lhs: Tile<E, S>, rhs: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.negf", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn negf<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.absf", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn absf<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.atan2", params = ["x", "y"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn atan2<E: ElementType, const S: [i32; N]>(x: Tile<E, S>, y: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.fma", params=["lhs", "rhs", "acc"], static_params=["rounding={NearestEven: rounding_mode=#cuda_tile.rounding<nearest_even>, PositiveInf: rounding_mode=#cuda_tile.rounding<positive_inf>, NegativeInf: rounding_mode=#cuda_tile.rounding<negative_inf>, Zero: rounding_mode=#cuda_tile.rounding<zero>, Approx: rounding_mode=#cuda_tile.rounding<approx>}", "ftz={Enabled: flush_to_zero=unit}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn fma<E: ElementType, const S: [i32; N], R: rounding::Mode, F: ftz::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
acc: Tile<E, S>,
rounding: R,
ftz: F,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.mmaf", params = ["lhs", "rhs", "acc"])]
#[cuda_tile::variadic_op(N = 3)]
pub fn mmaf<
EIn: ElementType,
EOut: ElementType,
const LHS: [i32; N],
const RHS: [i32; N],
const ACC: [i32; N],
>(
lhs: Tile<EIn, LHS>,
rhs: Tile<EIn, RHS>,
acc: Tile<EOut, ACC>,
) -> Tile<EOut, ACC> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.pow", params=["source", "exponent"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn pow<E: ElementType, const S: [i32; N]>(
source: Tile<E, S>,
exponent: Tile<E, S>,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.maxf", params=["lhs", "rhs"], static_params=["nan={Enabled: propagate_nan=unit}", "ftz={Enabled: flush_to_zero=unit}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn maxf<E: ElementType, const S: [i32; N], P: nan::Mode, F: ftz::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
nan: P,
ftz: F,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.minf", params=["lhs", "rhs"], static_params=["nan={Enabled: propagate_nan=unit}", "ftz={Enabled: flush_to_zero=unit}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn minf<E: ElementType, const S: [i32; N], P: nan::Mode, F: ftz::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
nan: P,
ftz: F,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "arithmetic")]
pub fn min<E: ElementType>(a: E, b: E) -> E {
unreachable!()
}
#[cuda_tile::compiler_op(name = "arithmetic")]
pub fn max<E: ElementType>(a: E, b: E) -> E {
unreachable!()
}
#[cuda_tile::compiler_op(name = "arithmetic")]
#[cuda_tile::variadic_op(N = 6)]
pub fn min_tile<E: ElementType, const S: [i32; N]>(a: Tile<E, S>, b: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "arithmetic")]
#[cuda_tile::variadic_op(N = 6)]
pub fn max_tile<E: ElementType, const S: [i32; N]>(a: Tile<E, S>, b: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "arithmetic")]
pub fn ceil_div<E: ElementType>(a: E, b: E) -> E {
unreachable!()
}
#[cuda_tile::compiler_op(name = "arithmetic")]
#[cuda_tile::variadic_op(N = 6)]
pub fn true_div<E: ElementType, const S: [i32; N]>(a: Tile<E, S>, b: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.ceil", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn ceil<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.floor", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn floor<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.sin", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn sin<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.cos", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn cos<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.tan", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn tan<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.sinh", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn sinh<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.cosh", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn cosh<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.tanh", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn tanh<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.exp", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn exp<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.exp2", params=["x"], static_params=["ftz={Enabled: flush_to_zero=unit}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn exp2<E: ElementType, const S: [i32; N], F: ftz::Mode>(
x: Tile<E, S>,
ftz: F,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.log", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn log<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.log2", params=["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn log2<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.sqrt", params=["x"], static_params=["rounding={NearestEven: rounding_mode=#cuda_tile.rounding<nearest_even>, PositiveInf: rounding_mode=#cuda_tile.rounding<positive_inf>, NegativeInf: rounding_mode=#cuda_tile.rounding<negative_inf>, Zero: rounding_mode=#cuda_tile.rounding<zero>, Approx: rounding_mode=#cuda_tile.rounding<approx>}", "ftz={Enabled: flush_to_zero=unit}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn sqrt<E: ElementType, const S: [i32; N], R: rounding::Mode, F: ftz::Mode>(
x: Tile<E, S>,
rounding: R,
ftz: F,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.rsqrt", params=["x"], static_params=["ftz={Enabled: flush_to_zero=unit}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn rsqrt<E: ElementType, const S: [i32; N], F: ftz::Mode>(
x: Tile<E, S>,
ftz: F,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(
name = "cuda_tile.cmpi",
params = ["lhs", "rhs"],
static_params = ["predicate={Equal: comparison_predicate=#cuda_tile.cmp_predicate<equal>, NotEqual: comparison_predicate=#cuda_tile.cmp_predicate<not_equal>, LessThan: comparison_predicate=#cuda_tile.cmp_predicate<less_than>, LessThanOrEqual: comparison_predicate=#cuda_tile.cmp_predicate<less_than_or_equal>, GreaterThan: comparison_predicate=#cuda_tile.cmp_predicate<greater_than>, GreaterThanOrEqual: comparison_predicate=#cuda_tile.cmp_predicate<greater_than_or_equal>}"],
named_attributes = ["signedness=inferred_signedness"]
)]
#[cuda_tile::variadic_op(N = 6)]
pub fn cmpi<E: ElementType, const S: [i32; N], P: predicate::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
predicate: P,
) -> Tile<bool, S> {
unreachable!()
}
#[cuda_tile::op(
name = "cuda_tile.cmpf",
params = ["lhs", "rhs"],
static_params = [
"predicate={Equal: comparison_predicate=#cuda_tile.cmp_predicate<equal>, NotEqual: comparison_predicate=#cuda_tile.cmp_predicate<not_equal>, LessThan: comparison_predicate=#cuda_tile.cmp_predicate<less_than>, LessThanOrEqual: comparison_predicate=#cuda_tile.cmp_predicate<less_than_or_equal>, GreaterThan: comparison_predicate=#cuda_tile.cmp_predicate<greater_than>, GreaterThanOrEqual: comparison_predicate=#cuda_tile.cmp_predicate<greater_than_or_equal>}",
"ordering={Unordered: comparison_ordering=#cuda_tile.comparison_ordering<unordered>, Ordered: comparison_ordering=#cuda_tile.comparison_ordering<ordered>}"
]
)]
#[cuda_tile::variadic_op(N = 6)]
pub fn cmpf<E: ElementType, const S: [i32; N], P: predicate::Mode, Ord: cmp_ordering::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
predicate: P,
ordering: Ord,
) -> Tile<bool, S> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "tile")]
#[cuda_tile::variadic_op(N = 6)]
pub fn eq_tile<E: ElementType, const S: [i32; N]>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
) -> Tile<bool, S> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "tile")]
#[cuda_tile::variadic_op(N = 6)]
pub fn ne_tile<E: ElementType, const S: [i32; N]>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
) -> Tile<bool, S> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "tile")]
#[cuda_tile::variadic_op(N = 6)]
pub fn gt_tile<E: ElementType, const S: [i32; N]>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
) -> Tile<bool, S> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "tile")]
#[cuda_tile::variadic_op(N = 6)]
pub fn ge_tile<E: ElementType, const S: [i32; N]>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
) -> Tile<bool, S> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "tile")]
#[cuda_tile::variadic_op(N = 6)]
pub fn lt_tile<E: ElementType, const S: [i32; N]>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
) -> Tile<bool, S> {
unreachable!()
}
#[cuda_tile::compiler_op(name = "tile")]
#[cuda_tile::variadic_op(N = 6)]
pub fn le_tile<E: ElementType, const S: [i32; N]>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
) -> Tile<bool, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.andi", params=["lhs", "rhs"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn andi<E: ElementType, const S: [i32; N]>(lhs: Tile<E, S>, rhs: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.ori", params=["lhs", "rhs"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn ori<E: ElementType, const S: [i32; N]>(lhs: Tile<E, S>, rhs: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.xori", params=["lhs", "rhs"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn xori<E: ElementType, const S: [i32; N]>(lhs: Tile<E, S>, rhs: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.noti", params = ["x"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn noti<E: ElementType, const S: [i32; N]>(x: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.shli", params = ["lhs", "rhs"], static_params = ["overflow={None: , NoSignedWrap: overflow=#cuda_tile.overflow<no_signed_wrap>, NoUnsignedWrap: overflow=#cuda_tile.overflow<no_unsigned_wrap>, NoWrap: overflow=#cuda_tile.overflow<no_wrap>}"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn shli<E: ElementType, const S: [i32; N], O: overflow::Mode>(
lhs: Tile<E, S>,
rhs: Tile<E, S>,
overflow: O,
) -> Tile<E, S> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.shri", params=["lhs", "rhs"], named_attributes=["signedness=inferred_signedness"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn shri<E: ElementType, const S: [i32; N]>(lhs: Tile<E, S>, rhs: Tile<E, S>) -> Tile<E, S> {
unreachable!()
}
#[doc(hidden)]
#[cuda_tile::op(name="cuda_tile.atomic_rmw_tko", params=["pointers", "arg"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn atomic_rmw_tko<
E: ElementType,
const S: [i32; N],
M: atomic::Mode,
O: ordering::AtomicMode,
Sc: scope::Mode,
>(
pointers: PointerTile<*mut E, S>,
arg: Tile<E, S>,
mode: M,
memory_ordering: O,
memory_scope: Sc,
mask: Option<Tile<bool, S>>,
token: Option<Token>,
) -> (Tile<E, S>, Token) {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.atomic_cas_tko", params=["pointers", "cmp", "val"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn atomic_cas_tko<
E: ElementType,
const S: [i32; N],
O: ordering::AtomicMode,
Sc: scope::Mode,
>(
pointers: PointerTile<*mut E, S>,
cmp: Tile<E, S>,
val: Tile<E, S>,
memory_ordering: O,
memory_scope: Sc,
mask: Option<Tile<bool, S>>,
token: Option<Token>,
) -> (Tile<E, S>, Token) {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.load_ptr_tko", params=["source"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn load_ptr_tko<
E: ElementType,
const S: [i32; N],
O: ordering::LoadMode,
Sc: scope::Mode,
const CYCLES: u32,
>(
source: PointerTile<*mut E, S>,
memory_ordering: O,
memory_scope: Option<Sc>,
mask: Option<Tile<bool, S>>,
padding_value: Option<E>,
token: Option<Token>,
latency: Latency<CYCLES>,
) -> (Tile<E, S>, Token) {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.store_ptr_tko", params=["destination", "value"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn store_ptr_tko<
E: ElementType,
const S: [i32; N],
O: ordering::StoreMode,
Sc: scope::Mode,
const CYCLES: u32,
>(
destination: PointerTile<*mut E, S>,
value: Tile<E, S>,
memory_ordering: O,
memory_scope: Option<Sc>,
mask: Option<Tile<bool, S>>,
token: Option<Token>,
latency: Latency<CYCLES>,
) -> Token {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.get_index_space_shape", params = ["src"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn get_index_space_shape<E: ElementType, const S: [i32; N]>(
src: &Partition<E, S>,
) -> [i32; N] {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.get_tensor_shape", params = ["src"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn get_tensor_shape<E: ElementType, const S: [i32; N]>(src: &Tensor<E, S>) -> [i32; N] {
unreachable!()
}
#[cuda_tile::op(name = "load_view_tko", params = ["view", "index"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn load_view_tko<
E: ElementType,
const D: [i32; N],
O: ordering::LoadMode,
Sc: scope::Mode,
T: tma::Mode,
>(
view: &Partition<E, D>,
index: [i32; N],
memory_ordering: O,
memory_scope: Sc,
latency: Option<i32>,
tma: T,
) -> Tile<E, D> {
unreachable!()
}
#[cuda_tile::op(name = "load_view_tko", params = ["view", "index"])]
#[cuda_tile::variadic_op(N = 6)]
pub unsafe fn load_view_tko_mut<
E: ElementType,
const D: [i32; N],
O: ordering::LoadMode,
Sc: scope::Mode,
T: tma::Mode,
>(
view: &PartitionMut<E, D>,
index: [i32; N],
memory_ordering: O,
memory_scope: Sc,
latency: Option<i32>,
tma: T,
) -> Tile<E, D> {
unreachable!()
}
#[cuda_tile::op(name = "store_view_tko", params = ["view", "tile", "index"])]
#[cuda_tile::variadic_op(N = 6)]
pub unsafe fn store_view_tko_mut<
E: ElementType,
const D: [i32; N],
O: ordering::StoreMode,
Sc: scope::Mode,
T: tma::Mode,
>(
view: &mut PartitionMut<E, D>,
tile: Tile<E, D>,
index: [i32; N],
memory_ordering: O,
memory_scope: Sc,
latency: Option<i32>,
tma: T,
) -> Token {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.make_tensor_view",
params=["base", "shape.dims", "strides.dims"],
has_variadic_params=true,
output_type_params=["strides"],
output_type_meta=["base", "shape", "strides", "token"]
)]
#[cuda_tile::variadic_op(N = 6)]
pub unsafe fn make_tensor_view<E: ElementType, const D: [i32; N], const C: [i32; N]>(
base: PointerTile<*mut E, { [] }>,
shape: Shape<D>,
strides: Array<C>,
token: Token,
) -> Tensor<E, D> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.make_partition_view",
params=["tensor_view"],
output_type_params=["tensor_view", "padding_value", "dim_map"],
output_type_meta=["token", "tensor_view.shape()"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn make_partition_view<
'a,
E: ElementType,
const TENSOR_SHAPE: [i32; N],
const TILE_SHAPE: [i32; N],
P: padding::Mode,
M: dim_map::Mode,
>(
tensor_view: &Tensor<E, TENSOR_SHAPE>,
tile: Shape<TILE_SHAPE>,
padding_value: P,
dim_map: M,
token: Token,
) -> Partition<'a, E, TILE_SHAPE> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.make_partition_view",
params=["tensor_view"],
output_type_params=["tensor_view", "padding_value"],
output_type_meta=["token"]
)]
#[cuda_tile::variadic_op(N = 6)]
pub unsafe fn make_partition_view_mut<
'a,
E: ElementType,
const TENSOR_SHAPE: [i32; N],
const TILE_SHAPE: [i32; N],
P: padding::Mode,
>(
tensor_view: &Tensor<E, TENSOR_SHAPE>,
shape: Shape<TILE_SHAPE>,
padding_value: P,
token: Token,
) -> PartitionMut<'a, E, TILE_SHAPE> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.get_global", params=[], named_attributes=["name:symbol_ref"])]
pub fn get_global<E: ElementType>() -> PointerTile<*mut E, { [] }> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.offset", params=["ptr", "offset"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn addptr<P: Pointer, const D: [i32; N]>(
ptr: PointerTile<P, D>,
offset: i32,
) -> PointerTile<P, D> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.offset", params=["ptr", "offset"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn addptr_tile<I: ElementType, P: Pointer, const D: [i32; N]>(
ptr: PointerTile<P, D>,
offset: Tile<I, D>,
) -> PointerTile<P, D> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.broadcast", params=["source"])]
#[cuda_tile::variadic_op(N = 6, method = "broadcast")]
pub fn broadcast_ptr<P: Pointer, const S: [i32; N], const R: [i32; N]>(
source: PointerTile<P, S>,
shape: Shape<R>,
) -> PointerTile<P, R> {
unreachable!()
}
#[cuda_tile::op(name="cuda_tile.reshape", params=["source"])]
#[cuda_tile::variadic_op(N = 6, M = 6, method = "reshape")]
pub fn reshape_ptr<P: Pointer, const S: [i32; N], const R: [i32; M]>(
source: PointerTile<P, S>,
shape: Shape<R>,
) -> PointerTile<P, R> {
unreachable!()
}
#[cuda_tile::op(name = "cuda_tile.print_tko", params = ["arg"])]
#[cuda_tile::variadic_op(N = 6)]
pub fn print_tko<E: ElementType, const S: [i32; N]>(
str: &str,
arg: Tile<E, S>,
token: Option<Token>,
) -> Token {
unreachable!()
}
#[cuda_tile::compiler_op(name = "assume")]
pub unsafe fn assume_div_by<T, const DIVISOR: i32>(x: T) -> T {
unreachable!()
}
#[cuda_tile::compiler_op(name = "assume")]
pub unsafe fn assume_div_by_every_along<
T,
const divisor: i32,
const every: i32,
const along: i32,
>(
x: T,
) -> T {
unreachable!()
}
#[cuda_tile::compiler_op(name = "assume")]
pub unsafe fn assume_bounds_lower<T, const LOWER: i32>(x: T) -> T {
unreachable!()
}
#[cuda_tile::compiler_op(name = "assume")]
pub unsafe fn assume_bounds_upper<T, const UPPER: i32>(x: T) -> T {
unreachable!()
}
#[cuda_tile::compiler_op(name = "assume")]
pub unsafe fn assume_bounds<T, const LOWER: i32, const UPPER: i32>(x: T) -> T {
unreachable!()
}
#[cuda_tile::compiler_op(name = "assume")]
pub unsafe fn assume_same_elements_1d<T, const GROUP0: i32>(x: T) -> T {
unreachable!()
}
#[cuda_tile::compiler_op(name = "assume")]
pub unsafe fn assume_same_elements_2d<T, const GROUP0: i32, const GROUP1: i32>(x: T) -> T {
unreachable!()
}
#[cuda_tile::compiler_op(name = "assume")]
pub unsafe fn assume_same_elements_3d<
T,
const GROUP0: i32,
const GROUP1: i32,
const GROUP2: i32,
>(
x: T,
) -> T {
unreachable!()
}
#[cuda_tile::compiler_op(name = "assume")]
pub unsafe fn assume_same_elements_4d<
T,
const GROUP0: i32,
const GROUP1: i32,
const GROUP2: i32,
const GROUP3: i32,
>(
x: T,
) -> T {
unreachable!()
}
#[cuda_tile::variadic_op(N = 6, trait_name = "BroadcastScalarFn")]
pub fn broadcast_scalar<E: ElementType, const S: [i32; N]>(
x: E,
shape: Shape<S>,
) -> Tile<E, S> {
let ones_shape: Shape<{ [1; N] }> = Shape::<{ [1; N] }> { dims: &[1i32; N] };
let tile_x: Tile<E, { [] }> = scalar_to_tile(x);
tile_x.reshape(ones_shape).broadcast(shape)
}
#[cuda_tile::variadic_op(N = 6)]
pub fn load_tile<E: ElementType, const S: [i32; N], const R: [i32; N]>(
x: &Tensor<E, S>,
tile_shape: Shape<R>,
idx: [i32; N],
) -> Tile<E, R> {
let tensor_token: Token = get_tensor_token(x);
let x_partition: Partition<E, R> = make_partition_view(
x,
tile_shape,
padding::Zero,
dim_map::Identity,
tensor_token,
);
let tile_x: Tile<E, R> = load_view_tko(
&x_partition,
idx,
ordering::Weak,
scope::TileBlock,
None,
tma::Enabled,
);
tile_x
}
#[cuda_tile::variadic_op(N = 6)]
pub fn load_tile_mut<E: ElementType, const S: [i32; N]>(y: &mut Tensor<E, S>) -> Tile<E, S> {
let tile_shape: Shape<S> = y.shape();
let tensor_token: Token = get_tensor_token(y);
let y_partition: PartitionMut<E, S> =
unsafe { make_partition_view_mut(y, tile_shape, padding::Zero, tensor_token) };
let tile_y: Tile<E, S> = unsafe {
load_view_tko_mut(
&y_partition,
[0i32; N],
ordering::Weak,
scope::TileBlock,
None,
tma::Enabled,
)
};
let new_token: Token = get_partition_token_mut(&y_partition);
set_tensor_token(y, new_token);
tile_y
}
#[cuda_tile::variadic_op(N = 6)]
pub fn store_tile<E: ElementType, const S: [i32; N]>(y: &mut Tensor<E, S>, result: Tile<E, S>) {
let tile_shape: Shape<S> = y.shape();
let tensor_token: Token = get_tensor_token(y);
let mut y_partition: PartitionMut<E, S> =
unsafe { make_partition_view_mut(y, tile_shape, padding::Zero, tensor_token) };
unsafe {
store_view_tko_mut(
&mut y_partition,
result,
[0i32; N],
ordering::Weak,
scope::TileBlock,
None,
tma::Enabled,
)
};
let new_token: Token = get_partition_token_mut(&y_partition);
set_tensor_token(y, new_token);
}
pub trait LoadTileLike<Y> {
type Out;
fn load_tile_like(&self, y: &Y) -> Self::Out;
}
impl<E1: ElementType, E2: ElementType, const S: [i32; 1]> LoadTileLike<Tensor<E2, S>>
for Tensor<E1, { [-1] }>
{
type Out = Tile<E1, S>;
fn load_tile_like(&self, y: &Tensor<E2, S>) -> Tile<E1, S> {
let x = self;
let pid: (i32, i32, i32) = get_tile_block_id();
let tile_shape: Shape<S> = y.shape();
let tensor_token: Token = get_tensor_token(x);
let x_partition: Partition<E1, S> = make_partition_view(
x,
tile_shape,
padding::None,
dim_map::Identity,
tensor_token,
);
let tile_x: Tile<E1, S> = load_view_tko(
&x_partition,
[pid.0],
ordering::Weak,
scope::TileBlock,
None,
tma::Enabled,
);
tile_x
}
}
impl<E1: ElementType, E2: ElementType, const S: [i32; 2]> LoadTileLike<Tensor<E2, S>>
for Tensor<E1, { [-1, -1] }>
{
type Out = Tile<E1, S>;
fn load_tile_like(&self, y: &Tensor<E2, S>) -> Tile<E1, S> {
let x = self;
let pid: (i32, i32, i32) = get_tile_block_id();
let tile_shape: Shape<S> = y.shape();
let tensor_token: Token = get_tensor_token(x);
let x_partition: Partition<E1, S> = make_partition_view(
x,
tile_shape,
padding::None,
dim_map::Identity,
tensor_token,
);
let tile_x: Tile<E1, S> = load_view_tko(
&x_partition,
[pid.0, pid.1],
ordering::Weak,
scope::TileBlock,
None,
tma::Enabled,
);
tile_x
}
}
impl<E1: ElementType, E2: ElementType, const S: [i32; 3]> LoadTileLike<Tensor<E2, S>>
for Tensor<E1, { [-1, -1, -1] }>
{
type Out = Tile<E1, S>;
fn load_tile_like(&self, y: &Tensor<E2, S>) -> Tile<E1, S> {
let x = self;
let pid: (i32, i32, i32) = get_tile_block_id();
let tile_shape: Shape<S> = y.shape();
let tensor_token: Token = get_tensor_token(x);
let x_partition: Partition<E1, S> = make_partition_view(
x,
tile_shape,
padding::None,
dim_map::Identity,
tensor_token,
);
let tile_x: Tile<E1, S> = load_view_tko(
&x_partition,
[pid.0, pid.1, pid.2],
ordering::Weak,
scope::TileBlock,
None,
tma::Enabled,
);
tile_x
}
}
pub fn load_tile_like<X, Y>(x: &X, y: &Y) -> <X as LoadTileLike<Y>>::Out
where
X: LoadTileLike<Y>,
{
x.load_tile_like(y)
}
#[cuda_tile::variadic_op(N = 6, M = 6)]
pub unsafe fn load_tensor<T: ElementType, const S: [i32; N], const R: [i32; M]>(
dst: &Tensor<i64, S>,
idx: [i32; N],
shape: Shape<R>,
strides: Array<{ [-1; M] }>,
) -> Tensor<T, R> {
let dims: &[i32] = &[];
let ones_shape: Shape<{ [1; N] }> = Shape::<{ [1; N] }> { dims: dims };
let dst_part: Partition<i64, { [1; N] }> = dst.partition(ones_shape);
let dst_ptr_int: Tile<i64, { [1; N] }> = dst_part.load(idx);
let dst_ptr_int: Tile<i64, { [] }> = dst_ptr_int.reshape(const_shape![]);
let dst_ptr: PointerTile<*mut T, { [] }> = int_to_ptr(dst_ptr_int);
let dst_tensor: Tensor<T, R> =
unsafe { make_tensor_view(dst_ptr, shape, strides, new_token_unordered()) };
dst_tensor
}
#[cuda_tile::compiler_op(name = "shape")]
#[cuda_tile::variadic_op(N = 6)]
pub fn permute_array<const I: [i32; N]>(source: [i32; N], permutation: Array<I>) -> [i32; N] {
unreachable!()
}
}