use crate::{
error::{Error, Result},
options::ImageBudget,
};
pub(crate) const PATCH_SIZE: u32 = 16;
pub(crate) const DOWNSAMPLE_FACTOR: u32 = 2;
pub(crate) const TILE_PIXEL_UNIT: u32 = PATCH_SIZE * DOWNSAMPLE_FACTOR; pub(crate) const FULL_TILE_SIZE: u32 = 512;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TileGrid {
rows: u32,
cols: u32,
tile_h: u32,
tile_w: u32,
thumbnail: Option<(u32, u32)>,
}
impl TileGrid {
pub const fn new(
rows: u32,
cols: u32,
tile_h: u32,
tile_w: u32,
thumbnail: Option<(u32, u32)>,
) -> Self {
Self {
rows,
cols,
tile_h,
tile_w,
thumbnail,
}
}
pub const fn rows(&self) -> u32 {
self.rows
}
pub const fn cols(&self) -> u32 {
self.cols
}
pub const fn tile_h(&self) -> u32 {
self.tile_h
}
pub const fn tile_w(&self) -> u32 {
self.tile_w
}
pub const fn thumbnail(&self) -> Option<(u32, u32)> {
self.thumbnail
}
pub fn set_rows(&mut self, rows: u32) {
self.rows = rows;
}
pub fn set_cols(&mut self, cols: u32) {
self.cols = cols;
}
pub fn set_tile_h(&mut self, tile_h: u32) {
self.tile_h = tile_h;
}
pub fn set_tile_w(&mut self, tile_w: u32) {
self.tile_w = tile_w;
}
pub fn set_thumbnail(&mut self, thumbnail: Option<(u32, u32)>) {
self.thumbnail = thumbnail;
}
pub const fn with_rows(mut self, rows: u32) -> Self {
self.rows = rows;
self
}
pub const fn with_cols(mut self, cols: u32) -> Self {
self.cols = cols;
self
}
pub const fn with_tile_h(mut self, tile_h: u32) -> Self {
self.tile_h = tile_h;
self
}
pub const fn with_tile_w(mut self, tile_w: u32) -> Self {
self.tile_w = tile_w;
self
}
pub const fn with_thumbnail(mut self, thumbnail: Option<(u32, u32)>) -> Self {
self.thumbnail = thumbnail;
self
}
pub const fn num_tiles(&self) -> usize {
(self.rows as usize) * (self.cols as usize)
+ match self.thumbnail {
Some(_) => 1,
None => 0,
}
}
pub const fn num_image_tokens(&self) -> usize {
let main = (self.rows as usize)
* (self.cols as usize)
* (self.tile_h / TILE_PIXEL_UNIT) as usize
* (self.tile_w / TILE_PIXEL_UNIT) as usize;
let thumb = match self.thumbnail {
Some((th, tw)) => (th / TILE_PIXEL_UNIT) as usize * (tw / TILE_PIXEL_UNIT) as usize,
None => 0,
};
main + thumb
}
pub const fn to_placeholder_info(&self) -> crate::chat_template::ImagePlaceholderInfo {
let tokens_per_main_tile =
(self.tile_h / TILE_PIXEL_UNIT) as usize * (self.tile_w / TILE_PIXEL_UNIT) as usize;
let thumbnail_tokens = match self.thumbnail {
Some((th, tw)) => Some((th / TILE_PIXEL_UNIT) as usize * (tw / TILE_PIXEL_UNIT) as usize),
None => None,
};
crate::chat_template::ImagePlaceholderInfo::new(
self.rows as usize,
self.cols as usize,
tokens_per_main_tile,
thumbnail_tokens,
)
}
}
pub fn pick_tile_grid(src_w: u32, src_h: u32, budget: &ImageBudget) -> Result<TileGrid> {
if src_w == 0 || src_h == 0 {
return Err(Error::ImageTooSmall {
w: src_w,
h: src_h,
min_w: TILE_PIXEL_UNIT,
min_h: TILE_PIXEL_UNIT,
});
}
budget.validate()?;
let do_image_splitting = !(budget.min_tiles() == 1 && budget.max_tiles() == 1);
if is_image_too_large(
src_w,
src_h,
budget.max_image_tokens(),
budget.max_pixels_tolerance(),
) && do_image_splitting
{
let (rows, cols) = find_closest_aspect_ratio(
src_w as f64 / src_h as f64,
budget.min_tiles() as u32,
budget.max_tiles() as u32,
src_w,
src_h,
);
let thumbnail = if budget.use_thumbnail() && rows * cols != 1 {
let (tw, th) = smart_resize(
src_w,
src_h,
budget.min_image_tokens(),
budget.max_image_tokens(),
);
if smart_resize_tokens(tw, th) as usize > budget.max_image_tokens() {
return Err(Error::TileGridImpossible {
w: src_w,
h: src_h,
budget: *budget,
});
}
Some((th, tw))
} else {
None
};
Ok(TileGrid::new(
rows,
cols,
FULL_TILE_SIZE,
FULL_TILE_SIZE,
thumbnail,
))
} else {
let (tw, th) = smart_resize(
src_w,
src_h,
budget.min_image_tokens(),
budget.max_image_tokens(),
);
if smart_resize_tokens(tw, th) as usize > budget.max_image_tokens() {
return Err(Error::TileGridImpossible {
w: src_w,
h: src_h,
budget: *budget,
});
}
Ok(TileGrid::new(1, 1, th, tw, None))
}
}
fn is_image_too_large(src_w: u32, src_h: u32, max_image_tokens: usize, tolerance: f32) -> bool {
let total_factor = TILE_PIXEL_UNIT as f64; let patch_size = PATCH_SIZE; let h_bar = (patch_size).max(round_by_factor(src_h as f64, total_factor));
let w_bar = (patch_size).max(round_by_factor(src_w as f64, total_factor));
let threshold = (max_image_tokens as f64)
* (PATCH_SIZE as f64).powi(2)
* (DOWNSAMPLE_FACTOR as f64).powi(2)
* (tolerance as f64);
(h_bar as u64) * (w_bar as u64) > threshold as u64
}
fn round_by_factor(number: f64, factor: f64) -> u32 {
let q = number / factor;
let floored = q.floor();
let frac = q - floored;
let rounded = if (frac - 0.5).abs() < f64::EPSILON {
if (floored as i64) % 2 == 0 {
floored
} else {
floored + 1.0
}
} else {
q.round()
};
(rounded as u32) * (factor as u32)
}
fn find_closest_aspect_ratio(
src_aspect: f64,
min_tiles: u32,
max_tiles: u32,
src_w: u32,
src_h: u32,
) -> (u32, u32) {
let candidates = super::target_ratios::target_ratios(min_tiles, max_tiles);
let src_area = (src_w as u64) * (src_h as u64);
let mut best_ratio_diff = f64::INFINITY;
let mut best: Option<(u32, u32)> = None;
for &(cols, rows) in candidates {
let target_aspect = (cols as f64) / (rows as f64);
let ratio_diff = (src_aspect - target_aspect).abs();
if ratio_diff < best_ratio_diff {
best_ratio_diff = ratio_diff;
best = Some((cols, rows));
} else if ratio_diff == best_ratio_diff {
let target_area =
(FULL_TILE_SIZE as u64) * (FULL_TILE_SIZE as u64) * (cols as u64) * (rows as u64);
if src_area * 2 > target_area {
best = Some((cols, rows));
}
}
}
let (cols, rows) = best.unwrap_or((1, 1));
(rows, cols)
}
fn smart_resize_tokens(w: u32, h: u32) -> u32 {
(w / TILE_PIXEL_UNIT) * (h / TILE_PIXEL_UNIT)
}
fn smart_resize(src_w: u32, src_h: u32, min_tokens: usize, max_tokens: usize) -> (u32, u32) {
let total_factor = TILE_PIXEL_UNIT as f64; let min_pixels =
(min_tokens as f64) * (PATCH_SIZE as f64).powi(2) * (DOWNSAMPLE_FACTOR as f64).powi(2);
let max_pixels =
(max_tokens as f64) * (PATCH_SIZE as f64).powi(2) * (DOWNSAMPLE_FACTOR as f64).powi(2);
let mut h_bar = (TILE_PIXEL_UNIT).max(round_by_factor(src_h as f64, total_factor));
let mut w_bar = (TILE_PIXEL_UNIT).max(round_by_factor(src_w as f64, total_factor));
let rounded_area = (h_bar as f64) * (w_bar as f64);
if rounded_area > max_pixels {
let raw_area = (src_w as f64) * (src_h as f64);
let beta = (raw_area / max_pixels).sqrt();
h_bar =
TILE_PIXEL_UNIT.max(((src_h as f64 / beta / total_factor).floor() as u32) * TILE_PIXEL_UNIT);
w_bar =
TILE_PIXEL_UNIT.max(((src_w as f64 / beta / total_factor).floor() as u32) * TILE_PIXEL_UNIT);
} else if rounded_area < min_pixels {
let raw_area = (src_w as f64) * (src_h as f64);
let beta = (min_pixels / raw_area).sqrt();
h_bar = ((src_h as f64 * beta / total_factor).ceil() as u32) * TILE_PIXEL_UNIT;
w_bar = ((src_w as f64 * beta / total_factor).ceil() as u32) * TILE_PIXEL_UNIT;
}
(w_bar, h_bar)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn small_square_routes_single_tile() {
let g = pick_tile_grid(256, 256, &ImageBudget::new()).unwrap();
assert_eq!((g.rows(), g.cols()), (1, 1));
assert!(g.thumbnail().is_none());
}
#[test]
fn large_square_routes_multi_tile() {
let g = pick_tile_grid(1024, 1024, &ImageBudget::new()).unwrap();
assert!(g.rows() * g.cols() >= 2);
assert_eq!(g.tile_h(), FULL_TILE_SIZE);
assert_eq!(g.tile_w(), FULL_TILE_SIZE);
assert!(g.thumbnail().is_some());
}
#[test]
fn smart_resize_snaps_to_unit_multiples() {
let (w, h) = smart_resize(1920, 1080, 64, 256);
assert_eq!(w % TILE_PIXEL_UNIT, 0);
assert_eq!(h % TILE_PIXEL_UNIT, 0);
}
#[test]
fn aspect_picker_prefers_correct_ratio() {
let (rows, cols) = find_closest_aspect_ratio(3.0, 2, 10, 1024 * 9, 1024);
assert_eq!(cols as f64 / rows as f64, 3.0);
}
#[test]
fn zero_dimension_image_rejects() {
let r = pick_tile_grid(0, 100, &ImageBudget::new());
assert!(matches!(r, Err(Error::ImageTooSmall { .. })));
}
#[test]
fn extreme_aspect_ratios_reject_with_tile_grid_impossible() {
for (w, h) in [(1u32, 100_000u32), (100_000, 1), (1, 50_000), (50_000, 1)] {
let r = pick_tile_grid(w, h, &ImageBudget::new());
assert!(
matches!(r, Err(Error::TileGridImpossible { .. })),
"{w}x{h} must reject with TileGridImpossible, got {r:?}"
);
}
}
#[test]
fn to_placeholder_info_matches_preprocessed_image() {
let grid = pick_tile_grid(1024, 1024, &ImageBudget::new()).unwrap();
let info = grid.to_placeholder_info();
assert_eq!(
info.num_image_tokens(),
grid.num_image_tokens(),
"ImagePlaceholderInfo + TileGrid token counts must match"
);
let grid_s = pick_tile_grid(256, 256, &ImageBudget::new()).unwrap();
let info_s = grid_s.to_placeholder_info();
assert_eq!(info_s.num_image_tokens(), grid_s.num_image_tokens());
assert_eq!(info_s.thumbnail_tokens(), None);
let grid_w = pick_tile_grid(1920, 1080, &ImageBudget::new()).unwrap();
let info_w = grid_w.to_placeholder_info();
assert_eq!(info_w.num_image_tokens(), grid_w.num_image_tokens());
}
#[test]
fn upstream_set_iteration_order_drives_tie_break() {
let (rows, cols) = find_closest_aspect_ratio(2.5, 4, 4, 1280, 512);
assert_eq!(
(rows, cols),
(2, 2),
"upstream picks (2,2) for 1280×512 / min=max=4; ours must too"
);
let (rows_t, cols_t) = find_closest_aspect_ratio(0.4, 4, 4, 512, 1280);
assert_eq!((rows_t, cols_t), (4, 1));
let (rows_l, cols_l) = find_closest_aspect_ratio(2.5, 4, 4, 3200, 1280);
assert_eq!((rows_l, cols_l), (2, 2));
}
#[test]
fn one_by_one_grid_in_multi_tile_branch_attaches_no_thumbnail() {
let mut budget = ImageBudget::new();
budget.set_min_tiles(1);
budget.set_max_tiles(2);
let g = pick_tile_grid(1024, 1024, &budget).expect("budget is valid");
assert_eq!(
(g.rows(), g.cols()),
(1, 1),
"1024x1024 with min=1,max=2 should pick (1,1)"
);
assert_eq!(
g.thumbnail(),
None,
"1×1 selection in multi-tile branch must NOT attach a thumbnail"
);
}
#[test]
fn min_max_tiles_one_routes_single_tile_even_for_large_image() {
let mut budget = ImageBudget::new();
budget.set_min_tiles(1);
budget.set_max_tiles(1);
let g = pick_tile_grid(4096, 2160, &budget).expect("budget is valid");
assert_eq!(
(g.rows(), g.cols()),
(1, 1),
"min=max=1 must route to single-tile branch"
);
assert_eq!(
g.thumbnail(),
None,
"single-tile branch must never attach a thumbnail"
);
assert!(g.tile_h().is_multiple_of(TILE_PIXEL_UNIT));
assert!(g.tile_w().is_multiple_of(TILE_PIXEL_UNIT));
}
#[test]
fn pick_tile_grid_parity_cases() {
let cases: &[(u32, u32, ImageBudget, TileGrid)] = &[
(
256,
256,
ImageBudget::new(),
TileGrid::new(1, 1, 256, 256, None),
),
(
512,
512,
ImageBudget::new(),
TileGrid::new(1, 1, 512, 512, None),
),
(
723,
724,
ImageBudget::new(),
TileGrid::new(2, 2, 512, 512, Some((512, 480))),
),
(
32,
32,
ImageBudget::new(),
TileGrid::new(1, 1, 256, 256, None),
),
(
320,
240,
ImageBudget::new(),
TileGrid::new(1, 1, 256, 320, None),
),
(
384,
216,
ImageBudget::new(),
TileGrid::new(1, 1, 224, 384, None),
),
(
640,
480,
ImageBudget::new(),
TileGrid::new(1, 1, 416, 576, None),
),
(
480,
270,
ImageBudget::new(),
TileGrid::new(1, 1, 256, 480, None),
),
(
32,
1024,
ImageBudget::new(),
TileGrid::new(1, 1, 1472, 64, None),
),
(
1024,
32,
ImageBudget::new(),
TileGrid::new(1, 1, 64, 1472, None),
),
(
1,
8000,
ImageBudget::new(),
TileGrid::new(1, 1, 8000, 32, None),
),
(
8000,
1,
ImageBudget::new(),
TileGrid::new(1, 1, 32, 8000, None),
),
(
1024,
1024,
ImageBudget::new(),
TileGrid::new(2, 2, 512, 512, Some((512, 512))),
),
(
768,
768,
ImageBudget::new(),
TileGrid::new(2, 2, 512, 512, Some((512, 512))),
),
(
1920,
1080,
ImageBudget::new(),
TileGrid::new(2, 4, 512, 512, Some((384, 672))),
),
(
1080,
1920,
ImageBudget::new(),
TileGrid::new(4, 2, 512, 512, Some((672, 384))),
),
(
1280,
720,
ImageBudget::new(),
TileGrid::new(1, 2, 512, 512, Some((384, 672))),
),
(
2560,
1440,
ImageBudget::new(),
TileGrid::new(2, 4, 512, 512, Some((384, 672))),
),
(
1440,
2560,
ImageBudget::new(),
TileGrid::new(4, 2, 512, 512, Some((672, 384))),
),
(
1024,
768,
ImageBudget::new(),
TileGrid::new(2, 3, 512, 512, Some((416, 576))),
),
(
768,
1024,
ImageBudget::new(),
TileGrid::new(3, 2, 512, 512, Some((576, 416))),
),
(
1600,
800,
ImageBudget::new(),
TileGrid::new(2, 4, 512, 512, Some((352, 704))),
),
(
800,
1600,
ImageBudget::new(),
TileGrid::new(4, 2, 512, 512, Some((704, 352))),
),
(
720,
730,
ImageBudget::new(),
TileGrid::new(1, 1, 512, 480, None),
),
(
256,
256,
ImageBudget::new()
.with_min_image_tokens(32)
.with_max_image_tokens(64)
.with_min_tiles(2)
.with_max_tiles(4)
.with_use_thumbnail(false),
TileGrid::new(1, 1, 256, 256, None),
),
(
1024,
1024,
ImageBudget::new()
.with_min_image_tokens(32)
.with_max_image_tokens(64)
.with_min_tiles(2)
.with_max_tiles(4)
.with_use_thumbnail(false),
TileGrid::new(2, 2, 512, 512, None),
),
(
1920,
1080,
ImageBudget::new().with_min_tiles(4).with_max_tiles(10),
TileGrid::new(2, 4, 512, 512, Some((384, 672))),
),
(
1080,
1920,
ImageBudget::new().with_min_tiles(4).with_max_tiles(10),
TileGrid::new(4, 2, 512, 512, Some((672, 384))),
),
];
for (w, h, budget, expected) in cases {
let actual = pick_tile_grid(*w, *h, budget).unwrap();
assert_eq!(actual, *expected, "case w={w} h={h}");
}
}
}