pub fn conv3d_patch_embed(
patch: &Vjepa2PatchEmbedWeights,
video_ncthw: &[f32],
frames: usize,
height: usize,
width: usize,
) -> Result<Vec<f32>, Error>Expand description
3-D conv patch embedding: input [C, T, H, W] → tokens [seq, embed_dim].