use serde::Deserialize;
use std::path::Path;
pub const IMAGENET_MEAN: [f32; 3] = [0.485, 0.456, 0.406];
pub const IMAGENET_STD: [f32; 3] = [0.229, 0.224, 0.225];
#[derive(Debug, Clone, Deserialize)]
pub struct DinoV2Config {
pub hidden_size: usize,
pub num_hidden_layers: usize,
pub num_attention_heads: usize,
pub img_size: usize,
pub patch_size: usize,
#[serde(default = "default_mlp_ratio")]
pub mlp_ratio: f64,
#[serde(default = "default_dinov2_ln_eps")]
pub layer_norm_eps: f64,
#[serde(default)]
pub num_register_tokens: usize,
#[serde(default = "default_num_classes")]
pub num_classes: usize,
}
fn default_mlp_ratio() -> f64 {
4.0
}
fn default_dinov2_ln_eps() -> f64 {
1e-5
}
fn default_num_classes() -> usize {
1000
}
impl DinoV2Config {
pub fn from_file(path: &Path) -> anyhow::Result<Self> {
let data = std::fs::read_to_string(path)?;
Ok(serde_json::from_str(&data)?)
}
pub fn new(
img_size: usize,
depth: usize,
embed_dim: usize,
num_heads: usize,
num_register_tokens: usize,
) -> Self {
Self {
hidden_size: embed_dim,
num_hidden_layers: depth,
num_attention_heads: num_heads,
img_size,
patch_size: 14,
mlp_ratio: 4.0,
layer_norm_eps: 1e-5,
num_register_tokens,
num_classes: 1000,
}
}
pub fn intermediate_size(&self) -> usize {
(self.hidden_size as f64 * self.mlp_ratio) as usize
}
pub fn head_dim(&self) -> usize {
self.hidden_size / self.num_attention_heads
}
pub fn num_patches(&self) -> usize {
let n = self.img_size / self.patch_size;
n * n
}
pub fn seq_len(&self) -> usize {
1 + self.num_register_tokens + self.num_patches()
}
pub fn patch_dim(&self) -> usize {
3 * self.patch_size * self.patch_size
}
pub fn vit_small(img_size: usize) -> Self {
Self::new(img_size, 12, 384, 6, 0)
}
pub fn vit_base(img_size: usize) -> Self {
Self::new(img_size, 12, 768, 12, 0)
}
pub fn vit_large(img_size: usize) -> Self {
Self::new(img_size, 24, 1024, 16, 0)
}
}