Skip to main content

oxicuda_vision/patch_embed/
mod.rs

1//! Patch embedding for Vision Transformers.
2//!
3//! Converts a CHW image to a sequence of patch tokens by applying a
4//! strided Conv2D with `kernel_size == stride == patch_size`.
5//! Also provides 2-D sinusoidal and learnable positional encodings.
6
7pub mod conv2d_patch;
8pub mod pos_embed;
9
10pub use conv2d_patch::{PatchEmbed, PatchEmbedConfig, PatchEmbedWeights, prepend_cls};
11pub use pos_embed::{LearnablePosEmbed, add_pos_embed, pos_2d_sincos};