Enum TensorMapFormat

Source

pub enum TensorMapFormat {
    Tiled {
        tile_size: Vec<u32>,
    },
    Im2col {
        pixel_box_lower_corner: Vec<i32>,
        pixel_box_upper_corner: Vec<i32>,
        channels_per_pixel: u32,
        pixels_per_column: u32,
    },
    Im2colWide {
        pixel_box_lower_corner_width: i32,
        pixel_box_upper_corner_width: i32,
        channels_per_pixel: u32,
        pixels_per_column: u32,
    },
}

Expand description

Format of [TensorMap]

Variants§

§

Tiled

Simple tiling

Fields

§tile_size: Vec<u32>

Tile size that’s loaded from memory in each copy operation. Must have rank elements. In matmul, for example, this might be batch x m x k, or whatever the stage size is. If a dimension isn’t present in the tile, it should just be set to 1.

For CUDA, this must be a power of two and <= 256 on each dimension.

§

Im2col

Im2col indexing. Loads a “column” (not the same column as im2col) of pixels into shared memory, with a certain offset (kernel position). The corners are the bounds to load pixels from at offset 0, so the top left corner of the kernel. The offset is added to the corner offsets, so a (-1, -1) corner will stop the bounding box at (1, 1) for kernel offset (2, 2).

Fields

§pixel_box_lower_corner: Vec<i32>

Pixel box lower corner. This is the logical upper left corner in the input tensor, when offset is 0. The length of this value should equal the spatial dimensions of the input tensor (i.e. h, w for an NHWC tensor). Should normally be set to -padding.

§pixel_box_upper_corner: Vec<i32>

Pixel box top corner. This is the logical lower right corner in the input tensor, when offset is 0. The length of this value should equal the spatial dimensions of the input tensor (i.e. h, w for an NHWC tensor). Should normally be set to padding - kernel_size - 1 (where kernel_size accounts for dilation). This is not equal to padding, it’s equal to the bounding box for the top left corner of the kernel.

§channels_per_pixel: u32

Channels to load per pixel, should be a multiple or divisor of the matmul tile size. This is not the total number of channels in the tensor, but only the number loaded in each load. Must be <= 256 and aligned to 16 bytes.

§pixels_per_column: u32

Pixels per column, equivalent to the m/n dimension of each tile in the matrix multiplication. i.e. NHW for a 4D tensor. Must be <= 256 and aligned to 16 bytes

§

Im2colWide

Wide im2col

Fields

§pixel_box_lower_corner_width: i32

Pixel box lower corner width. TODO: How does this work?

§pixel_box_upper_corner_width: i32

Pixel box upper corner width. TODO: How does this work?

§channels_per_pixel: u32

Channels per pixel

§pixels_per_column: u32

Pixels per column

Enum TensorMapFormatCopy item path

Variants§

Tiled

Fields

Im2col

Fields

Im2colWide

Fields

Trait Implementations§

impl Clone for TensorMapFormat

fn clone(&self) -> TensorMapFormat

fn clone_from(&mut self, source: &Self)

impl Debug for TensorMapFormat

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

impl<'de> Deserialize<'de> for TensorMapFormat

fn deserialize<__D>( __deserializer: __D, ) -> Result<TensorMapFormat, <__D as Deserializer<'de>>::Error>where __D: Deserializer<'de>,

impl Hash for TensorMapFormat

fn hash<__H>(&self, state: &mut __H)where __H: Hasher,

fn hash_slice<H>(data: &[Self], state: &mut H)where H: Hasher, Self: Sized,

impl PartialEq for TensorMapFormat

fn eq(&self, other: &TensorMapFormat) -> bool

fn ne(&self, other: &Rhs) -> bool

impl Serialize for TensorMapFormat

fn serialize<__S>( &self, __serializer: __S, ) -> Result<<__S as Serializer>::Ok, <__S as Serializer>::Error>where __S: Serializer,

impl Eq for TensorMapFormat

impl StructuralPartialEq for TensorMapFormat

Auto Trait Implementations§

impl Freeze for TensorMapFormat

impl RefUnwindSafe for TensorMapFormat

impl Send for TensorMapFormat

impl Sync for TensorMapFormat

impl Unpin for TensorMapFormat

impl UnwindSafe for TensorMapFormat

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<Q, K> Equivalent<K> for Qwhere Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

fn equivalent(&self, key: &K) -> bool

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> CacheKey for Twhere T: Serialize + DeserializeOwned + PartialEq + Eq + Clone + Hash,

impl<T> CacheValue for Twhere T: Serialize + DeserializeOwned + PartialEq + Eq + Clone,

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

Enum TensorMapFormat

fn deserialize<D>( deserializer: D, ) -> Result<TensorMapFormat, <D as Deserializer<'de>>::Error>
where __D: Deserializer<'de>,

fn hash<H>(&self, state: &mut H)
where __H: Hasher,

fn hash_slice<H>(data: &[Self], state: &mut H)
where H: Hasher, Self: Sized,

fn serialize<S>( &self, serializer: S, ) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<Q, K> Equivalent<K> for Q
where Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

impl<T> CacheKey for T
where T: Serialize + DeserializeOwned + PartialEq + Eq + Clone + Hash,

impl<T> CacheValue for T
where T: Serialize + DeserializeOwned + PartialEq + Eq + Clone,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,