jkl 0.2.1

Asset compression and packing tool
Documentation
use std::convert::Infallible;

use crate::{encode::FixedCode, image::Extent, math::Rect};

/// Size of the tile in number of blocks.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct TileSize {
    pub width: u16,
    pub height: u16,
}

impl FixedCode for TileSize {
    const SIZE: usize = 4;
    type Array = [u8; 4];
    type Error = Infallible;

    fn fix_encode(&self) -> [u8; 4] {
        let [w0, w1] = self.width.to_le_bytes();
        let [h0, h1] = self.height.to_le_bytes();

        [w0, w1, h0, h1]
    }

    fn fix_decode(input: &[u8; 4]) -> Result<Self, Infallible> {
        let [w0, w1, h0, h1] = *input;
        let width = u16::from_le_bytes([w0, w1]);
        let height = u16::from_le_bytes([h0, h1]);
        Ok(TileSize { width, height })
    }
}

impl TileSize {
    /// Finds the optimal tile size for the given extent and block size.
    ///
    /// The optimal tile size is the one that minimizes the cost function:
    /// cost = (flat_cost + size_cost * tile_size) * ceil(number_of_tiles / 64) * 64
    /// i.e. tile cost is linear to size plus a float amount and it is multiplied by the number of tiles rounded up to the nearest multiple of 64.
    /// This cost function models the GPU decompression cost, which runs thread per tile, each thread has a fixed overhead of dispatching and a cost proportional to the tile size,
    /// and the GPU executes threads in warps of 32 or 64 threads, so the number of tiles is rounded up to the nearest multiple of 64.
    pub fn find_optimal(
        extent: Extent,
        block_width: u16,
        block_height: u16,
        flat_cost: f32,
        size_cost: f32,
    ) -> Self {
        assert!(block_width > 0);
        assert!(block_height > 0);

        assert!(block_width <= 256);
        assert!(block_height <= 256);

        // The max tile size to consider is either enough to cover the entire extent,
        // or 16384 rounded to next multiple of block size, which is the maximum tile size.
        // Cap is 16384 + 255 = 16639
        let max_tile_width = extent
            .width()
            .min(16384)
            .next_multiple_of(usize::from(block_width));
        let max_tile_height = extent
            .height()
            .min(16384)
            .next_multiple_of(usize::from(block_height));

        let min_tile_width =
            (16usize.next_multiple_of(usize::from(block_width))).min(max_tile_width);
        let min_tile_height =
            (16usize.next_multiple_of(usize::from(block_height))).min(max_tile_height);

        let mut min_cost = f32::INFINITY;
        let mut best_width = min_tile_width;
        let mut best_height = min_tile_height;

        // All candidates for tile size are generated by multiplying block size by a factor from this list.
        // Those are power of two and power of two multiplied by 3.
        // Capped at 16384, because multiplying any block size by 16384 will reach or exceed the maximum tile size of `16384.next_multiple_of(block_size)`,
        // so there is no need to consider larger factors.
        const FACTORS: [usize; 28] = [
            1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
            2048, 3072, 4096, 6144, 8192, 12288, 16384,
        ];

        for y_factor in FACTORS {
            let tile_height = (usize::from(block_height) * y_factor).min(max_tile_height);

            // Skip tiny factors for tiny blocks.
            if tile_height < min_tile_height {
                continue;
            }

            for x_factor in FACTORS {
                let tile_width = (usize::from(block_width) * x_factor).min(max_tile_width);

                // Skip tiny factors for tiny blocks.
                if tile_width < min_tile_width {
                    continue;
                }

                let tiles_x = extent.width().div_ceil(tile_width);
                let tiles_y = extent.height().div_ceil(tile_height);
                let planes = extent.depth() * extent.layers();

                let tiles_count = tiles_x * tiles_y * planes;
                let warps_count = tiles_count.div_ceil(64);

                let cost = (flat_cost + size_cost * (tile_width as f32 * tile_height as f32))
                    * warps_count as f32;

                if cost < min_cost {
                    min_cost = cost;

                    best_width = tile_width;
                    best_height = tile_height;
                }

                // Break early if reached the max tile size, no need to consider larger factors.
                if tile_width == max_tile_width {
                    break;
                }
            }

            // Break early if reached the max tile size, no need to consider larger factors.
            if tile_height == max_tile_height {
                break;
            }
        }

        debug_assert!(best_width >= min_tile_width);
        debug_assert!(best_height >= min_tile_height);

        debug_assert!(best_width <= max_tile_width);
        debug_assert!(best_height <= max_tile_height);

        debug_assert!(best_width.is_multiple_of(usize::from(block_width)));
        debug_assert!(best_height.is_multiple_of(usize::from(block_height)));

        debug_assert!(best_width <= 65535);
        debug_assert!(best_height <= 65535);

        TileSize {
            width: best_width as u16,
            height: best_height as u16,
        }
    }

    pub fn iter_tiles(&self, extent: Extent) -> TilesIter {
        TilesIter {
            extent,
            tile_size: *self,
            tile_x: 0,
            tile_y: 0,
            tile_z: 0,
        }
    }

    #[inline]
    pub fn tiles_count(&self, extent: Extent) -> usize {
        let [width, height, depth] = self.tiles(extent);
        width * height * depth
    }

    #[inline]
    pub fn tiles(&self, extent: Extent) -> [usize; 3] {
        tiles_from_extent(*self, extent)
    }

    pub fn tile(&self, extent: Extent, index: usize) -> Tile {
        let [tiles_x, tiles_y, planes] = tiles_from_extent(*self, extent);

        let plane = index / (tiles_x * tiles_y);
        assert!(plane < planes);

        let tile_index = index % (tiles_x * tiles_y);

        let tile_y = tile_index / tiles_x;
        let tile_x = tile_index % tiles_x;

        let [x, y] = tile_pos_xy(*self, tile_x, tile_y);

        let [w, h] = tile_extent(extent.width(), extent.height(), *self, x, y);

        Tile {
            plane,
            rect: Rect { x, y, w, h },
        }
    }
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Tile {
    pub plane: usize,
    pub rect: Rect<usize>,
}

pub struct TilesIter {
    extent: Extent,
    tile_size: TileSize,
    tile_x: usize,
    tile_y: usize,
    tile_z: usize,
}

impl Clone for TilesIter {
    fn clone(&self) -> Self {
        TilesIter {
            extent: self.extent,
            tile_size: self.tile_size,
            tile_x: self.tile_x,
            tile_y: self.tile_y,
            tile_z: self.tile_z,
        }
    }
}

impl Iterator for TilesIter {
    type Item = Tile;

    fn size_hint(&self) -> (usize, Option<usize>) {
        let len = self.len();
        (len, Some(len))
    }

    fn next(&mut self) -> Option<Tile> {
        let [width, height, planes] = extent_to_raw(self.extent);
        let [tiles_x, tiles_y, _] = tiles_from_raw_extent(self.tile_size, [width, height, planes]);

        if self.tile_z >= planes || tiles_x == 0 || tiles_y == 0 {
            return None;
        }

        let [x, y] = tile_pos_xy(self.tile_size, self.tile_x, self.tile_y);
        let [w, h] = tile_extent(width, height, self.tile_size, x, y);
        let z = self.tile_z;

        advance_tile_cursor(
            &mut self.tile_x,
            &mut self.tile_y,
            &mut self.tile_z,
            tiles_x,
            tiles_y,
        );

        Some(Tile {
            plane: z,
            rect: Rect { x, y, w, h },
        })
    }
}

impl ExactSizeIterator for TilesIter {
    fn len(&self) -> usize {
        let [width, height, planes] = extent_to_raw(self.extent);
        let [tiles_x, tiles_y, _] = tiles_from_raw_extent(self.tile_size, [width, height, planes]);

        if tiles_x == 0 || tiles_y == 0 || planes == 0 {
            return 0;
        }

        remaining_tiles(
            self.tile_x,
            self.tile_y,
            self.tile_z,
            tiles_x,
            tiles_y,
            planes,
        )
    }
}

#[inline]
fn tiles_from_extent(tile_size: TileSize, extent: Extent) -> [usize; 3] {
    tiles_from_raw_extent(tile_size, extent_to_raw(extent))
}

#[inline]
fn extent_to_raw(extent: Extent) -> [usize; 3] {
    [
        extent.width(),
        extent.height(),
        extent.depth() * extent.layers(),
    ]
}

#[inline]
fn tiles_from_raw_extent(tile_size: TileSize, extent: [usize; 3]) -> [usize; 3] {
    let tiles_x = extent[0].div_ceil(usize::from(tile_size.width));
    let tiles_y = extent[1].div_ceil(usize::from(tile_size.height));
    [tiles_x, tiles_y, extent[2]]
}

#[inline]
fn tile_pos_xy(tile_size: TileSize, tile_x: usize, tile_y: usize) -> [usize; 2] {
    [
        tile_x * usize::from(tile_size.width),
        tile_y * usize::from(tile_size.height),
    ]
}

#[inline]
fn tile_extent(width: usize, height: usize, tile_size: TileSize, x: usize, y: usize) -> [usize; 2] {
    [
        usize::min(usize::from(tile_size.width), width - x),
        usize::min(usize::from(tile_size.height), height - y),
    ]
}

#[inline]
fn advance_tile_cursor(
    tile_x: &mut usize,
    tile_y: &mut usize,
    tile_z: &mut usize,
    tiles_x: usize,
    tiles_y: usize,
) {
    *tile_x += 1;
    if *tile_x >= tiles_x {
        *tile_x = 0;
        *tile_y += 1;

        if *tile_y >= tiles_y {
            *tile_y = 0;
            *tile_z += 1;
        }
    }
}

#[inline]
fn remaining_tiles(
    tile_x: usize,
    tile_y: usize,
    tile_z: usize,
    tiles_x: usize,
    tiles_y: usize,
    planes: usize,
) -> usize {
    let tiles_per_plane = tiles_x * tiles_y;
    let current_index = tile_z * tiles_per_plane + tile_y * tiles_x + tile_x;
    planes * tiles_per_plane - current_index
}