use std::io::Read;
use std::mem::size_of;
use crate::{cast, util::div_ceil, DecodingError, Offset, Size};
use crate::{convert_channels_for, util, Channels, ColorFormat, ImageViewMut};
use super::{DecodeContext, ReadSeek};
#[derive(Debug, Clone, Copy)]
pub(crate) struct PixelSize {
pub encoded_size: u8,
pub decoded_size: u8,
}
pub(crate) type ProcessPixelsFn = fn(&[u8], &mut [u8]);
#[inline]
pub(crate) fn process_pixels_helper<InPixel: cast::FromLeBytes, OutPixel: cast::IntoNeBytes>(
encoded: &[u8],
decoded: &mut [u8],
f: impl Fn(InPixel) -> OutPixel,
) {
let encoded: &[InPixel::Bytes] = cast::from_bytes(encoded).expect("Invalid input buffer");
let decoded: &mut [OutPixel::Bytes] =
cast::from_bytes_mut(decoded).expect("Invalid output buffer");
for (encoded, decoded) in encoded.iter().zip(decoded.iter_mut()) {
let input: InPixel = cast::FromLeBytes::from_le_bytes(*encoded);
*decoded = cast::IntoNeBytes::into_ne_bytes(f(input));
}
}
#[inline]
pub(crate) fn process_pixels_helper_unroll<const UNROLL: usize, InPixel, OutPixel, F>(
encoded: &[u8],
decoded: &mut [u8],
f: F,
) where
InPixel: cast::FromLeBytes,
OutPixel: cast::IntoNeBytes,
[InPixel; UNROLL]: cast::FromLeBytes,
[OutPixel; UNROLL]: cast::IntoNeBytes,
F: Copy + Fn(InPixel) -> OutPixel,
{
let pixels = encoded.len() / size_of::<InPixel>();
let rolled_chunks = pixels / UNROLL;
let encoded_chunks_bytes = rolled_chunks * size_of::<[InPixel; UNROLL]>();
let decoded_chunks_bytes = rolled_chunks * size_of::<[OutPixel; UNROLL]>();
process_pixels_helper(
&encoded[..encoded_chunks_bytes],
&mut decoded[..decoded_chunks_bytes],
move |input: [InPixel; UNROLL]| input.map(f),
);
let encoded: &[InPixel::Bytes] =
cast::from_bytes(&encoded[encoded_chunks_bytes..]).expect("Invalid input buffer");
let decoded: &mut [OutPixel::Bytes] =
cast::from_bytes_mut(&mut decoded[decoded_chunks_bytes..]).expect("Invalid output buffer");
debug_assert!(encoded.len() == decoded.len());
for (encoded, decoded) in encoded.iter().zip(decoded.iter_mut()) {
let input: InPixel = cast::FromLeBytes::from_le_bytes(*encoded);
*decoded = cast::IntoNeBytes::into_ne_bytes(f(input));
}
}
pub(crate) fn for_each_pixel_untyped(
r: &mut dyn Read,
image: &mut ImageViewMut,
mut context: DecodeContext,
native_color: ColorFormat,
pixel_size: PixelSize,
process_pixels: ProcessPixelsFn,
) -> Result<(), DecodingError> {
debug_assert_eq!(image.color().precision, native_color.precision);
debug_assert_eq!(native_color.bytes_per_pixel(), pixel_size.decoded_size);
let size_of_in = pixel_size.encoded_size as usize;
let mut line_buffer = UntypedLineBuffer::new(
image.width() as usize * size_of_in,
image.height(),
&mut context,
)?;
let mut conversion_buffer = ChannelConversionBuffer::new(native_color, image.color().channels);
for buf in image.rows_mut() {
let line = line_buffer
.next_line(r)?
.expect("height of image and line buffer must match");
debug_assert!(line.len() % size_of_in == 0);
conversion_buffer.process_pixels(line, buf, process_pixels);
}
Ok(())
}
pub(crate) fn for_each_pixel_rect_untyped(
r: &mut dyn ReadSeek,
image: &mut ImageViewMut,
offset: Offset,
mut context: DecodeContext,
native_color: ColorFormat,
pixel_size: PixelSize,
process_pixels: ProcessPixelsFn,
) -> Result<(), DecodingError> {
debug_assert_eq!(image.color.precision, native_color.precision);
debug_assert_eq!(native_color.bytes_per_pixel(), pixel_size.decoded_size);
let size_of_in = pixel_size.encoded_size as usize;
let surface_size = context.surface_size;
assert!(surface_size
.pixels()
.checked_mul(size_of_in as u64)
.map(|bytes| bytes <= i64::MAX as u64)
.unwrap_or(false));
let encoded_bytes_per_row = surface_size.width as u64 * size_of_in as u64;
let encoded_bytes_before_rect = offset.x as u64 * size_of_in as u64;
let encoded_bytes_after_rect =
(surface_size.width - offset.x - image.width()) as u64 * size_of_in as u64;
let image_bytes_per_pixel = image.color().bytes_per_pixel() as usize;
util::io_skip_exact(
r,
encoded_bytes_per_row * offset.y as u64 + encoded_bytes_before_rect,
)?;
let mut row: Box<[u8]> = context.alloc(image.width() as usize * size_of_in)?;
let mut conversion_buffer = ChannelConversionBuffer::new(native_color, image.color().channels);
for y in 0..image.height() {
if y > 0 {
util::io_skip_exact(r, encoded_bytes_before_rect + encoded_bytes_after_rect)?;
}
r.read_exact(&mut row)?;
let buf = image.get_row(y as usize);
debug_assert_eq!(row.len() / size_of_in, buf.len() / image_bytes_per_pixel);
conversion_buffer.process_pixels(&row, buf, process_pixels);
}
util::io_skip_exact(
r,
encoded_bytes_after_rect
+ (surface_size.height - offset.y - image.height()) as u64 * encoded_bytes_per_row,
)?;
Ok(())
}
pub(crate) type ProcessBlocksFn =
fn(encoded_blocks: &[u8], decoded: &mut [u8], row_pitch: usize, range: PixelRange);
#[derive(Debug, Clone)]
pub(crate) struct PixelRange {
pub width: u32,
pub width_offset: u8,
pub rows: RowRange,
}
#[derive(Debug, Clone, Copy)]
pub(crate) struct RowRange {
pub start: u8,
pub end: u8,
}
impl RowRange {
pub fn new(start: u8, end: u8) -> Self {
debug_assert!(start < end);
Self { start, end }
}
pub fn len(self) -> u8 {
self.end - self.start
}
pub fn iter(self) -> core::ops::Range<u8> {
self.start..self.end
}
}
#[inline]
pub(crate) fn process_2x1_blocks_helper<
const BYTES_PER_BLOCK: usize,
OutPixel: cast::IntoNeBytes,
>(
encoded_blocks: &[u8],
decoded: &mut [u8],
range: PixelRange,
process_block: impl Fn([u8; BYTES_PER_BLOCK]) -> [OutPixel; 2],
) {
let mut encoded_blocks: &[[u8; BYTES_PER_BLOCK]] =
cast::from_bytes(encoded_blocks).expect("Invalid block buffer");
let mut width = range.width as usize;
let mut decoded: &mut [OutPixel::Bytes] =
cast::from_bytes_mut(&mut decoded[..width * size_of::<OutPixel>()])
.expect("Invalid output buffer");
debug_assert!(decoded.len() == width);
let width_offset = range.width_offset;
if width_offset == 1 {
debug_assert!(width > 0);
let [_, p1] = process_block(encoded_blocks[0]);
decoded[0] = cast::IntoNeBytes::into_ne_bytes(p1);
width -= 1;
encoded_blocks = &encoded_blocks[1..];
decoded = &mut decoded[1..];
}
let width_half = width / 2;
let decoded_pairs: &mut [[OutPixel::Bytes; 2]] =
cast::as_array_chunks_mut(&mut decoded[..(width_half * 2)]).unwrap();
for (encoded, decoded) in encoded_blocks.iter().zip(decoded_pairs.iter_mut()) {
let [p0, p1] = process_block(*encoded);
decoded[0] = cast::IntoNeBytes::into_ne_bytes(p0);
decoded[1] = cast::IntoNeBytes::into_ne_bytes(p1);
}
if width % 2 == 1 {
let encoded = encoded_blocks.last().unwrap();
let [p0, _] = process_block(*encoded);
decoded[width - 1] = cast::IntoNeBytes::into_ne_bytes(p0);
}
}
#[inline]
pub(crate) fn process_8x1_blocks_helper<
OutPixel: cast::IntoNeBytes + Copy,
F: Fn(u8) -> [OutPixel; 8],
>(
encoded_blocks: &[u8],
decoded: &mut [u8],
stride: usize,
range: PixelRange,
process_block: F,
) {
general_process_blocks::<8, 1, 8, 1, OutPixel>(
encoded_blocks,
decoded,
stride,
range,
|block| process_block(block[0]),
);
}
#[inline]
pub(crate) fn process_4x4_blocks_helper<
const BYTES_PER_BLOCK: usize,
OutPixel: cast::IntoNeBytes + cast::Castable + Copy,
>(
mut encoded_blocks: &[u8],
mut decoded: &mut [u8],
stride: usize,
mut range: PixelRange,
process_block: impl Copy + Fn([u8; BYTES_PER_BLOCK]) -> [OutPixel; 16],
) {
debug_assert!(range.rows.len() <= 4);
debug_assert!(encoded_blocks.len() % BYTES_PER_BLOCK == 0);
debug_assert_eq!(
encoded_blocks.len() / BYTES_PER_BLOCK,
div_ceil(range.width_offset as u32 + range.width, 4) as usize
);
debug_assert!(
decoded.len()
>= stride * (range.rows.len() as usize - 1)
+ range.width as usize * size_of::<OutPixel>(),
"decoded.len() = {}, stride = {}, range = {:?}",
decoded.len(),
stride,
range
);
if range.width_offset != 0 {
let skip = handle_width_offset::<4, 4, 16, BYTES_PER_BLOCK, OutPixel, _>(
&mut encoded_blocks,
decoded,
stride,
&mut range,
process_block,
);
decoded = &mut decoded[skip..];
}
if range.rows.len() == 4 && stride % size_of::<OutPixel>() == 0 {
if let Some(decoded) = cast::from_bytes_mut::<OutPixel>(decoded) {
let encoded_blocks: &[[u8; BYTES_PER_BLOCK]] =
cast::from_bytes(encoded_blocks).expect("Invalid block buffer");
let stride = stride / size_of::<OutPixel>();
let full_blocks = range.width as usize / 4;
for (block_index, block) in encoded_blocks[..full_blocks].iter().enumerate() {
let pixel_index = block_index * 4;
let block = process_block(*block);
for y in 0..4 {
let row_start = stride * y + pixel_index;
let row = &mut decoded[row_start..row_start + 4];
for x in 0..4 {
row[x] = block[y * 4 + x];
}
}
}
if range.width % 4 != 0 {
let block = encoded_blocks[full_blocks];
let pixel_index = full_blocks * 4;
let block_w = range.width as usize - pixel_index;
let block = process_block(block);
for y in 0..4 {
let row_start = stride * y + pixel_index;
let row = &mut decoded[row_start..row_start + block_w];
for x in 0..block_w {
row[x] = block[y * 4 + x];
}
}
}
return;
}
}
general_process_blocks::<4, 4, 16, BYTES_PER_BLOCK, OutPixel>(
encoded_blocks,
decoded,
stride,
range,
process_block,
);
}
fn handle_width_offset<
const BLOCK_SIZE_X: u8,
const BLOCK_SIZE_Y: u8,
const BLOCK_PIXELS: usize,
const BYTES_PER_BLOCK: usize,
OutPixel: cast::IntoNeBytes + Copy,
F: Fn([u8; BYTES_PER_BLOCK]) -> [OutPixel; BLOCK_PIXELS],
>(
encoded_blocks: &mut &[u8],
decoded: &mut [u8],
stride: usize,
range: &mut PixelRange,
process_block: F,
) -> usize {
let offset = range.width_offset;
debug_assert!(offset < BLOCK_SIZE_X);
let pixel_w = u32::min((BLOCK_SIZE_X - offset) as u32, range.width);
if pixel_w == 0 {
return 0;
}
general_process_blocks::<BLOCK_SIZE_X, BLOCK_SIZE_Y, BLOCK_PIXELS, BYTES_PER_BLOCK, OutPixel>(
&encoded_blocks[..BYTES_PER_BLOCK],
decoded,
stride,
PixelRange {
width: pixel_w,
width_offset: range.width_offset,
rows: range.rows,
},
process_block,
);
range.width -= pixel_w;
range.width_offset = 0;
*encoded_blocks = &encoded_blocks[BYTES_PER_BLOCK..];
pixel_w as usize * size_of::<OutPixel>()
}
pub(crate) fn general_process_blocks<
const BLOCK_SIZE_X: u8,
const BLOCK_SIZE_Y: u8,
const BLOCK_PIXELS: usize,
const BYTES_PER_BLOCK: usize,
OutPixel: cast::IntoNeBytes + Copy,
>(
encoded_blocks: &[u8],
decoded: &mut [u8],
stride: usize,
range: PixelRange,
process_block: impl Fn([u8; BYTES_PER_BLOCK]) -> [OutPixel; BLOCK_PIXELS],
) {
debug_assert_eq!(BLOCK_SIZE_X as usize * BLOCK_SIZE_Y as usize, BLOCK_PIXELS);
debug_assert!(range.width_offset < BLOCK_SIZE_X);
let encoded_blocks: &[[u8; BYTES_PER_BLOCK]] =
cast::from_bytes(encoded_blocks).expect("Invalid block buffer");
let mut pixel_x = 0;
for (block_index, block) in encoded_blocks.iter().enumerate() {
let pixel_offset_x = if block_index == 0 {
range.width_offset as usize
} else {
0
};
let block_w = (BLOCK_SIZE_X as usize - pixel_offset_x)
.min(range.width as usize)
.min(
range.width as usize + range.width_offset as usize
- block_index * BLOCK_SIZE_X as usize,
);
let block = process_block(*block);
for y in range.rows.iter() {
let row_start =
(y - range.rows.start) as usize * stride + pixel_x * size_of::<OutPixel>();
let row = &mut decoded[row_start..(row_start + block_w * size_of::<OutPixel>())];
let row: &mut [OutPixel::Bytes] =
cast::from_bytes_mut(row).expect("Invalid output buffer");
debug_assert!(row.len() == block_w);
for x in 0..block_w {
row[x] = cast::IntoNeBytes::into_ne_bytes(
block[y as usize * BLOCK_SIZE_X as usize + x + pixel_offset_x],
);
}
}
pixel_x += block_w;
}
}
pub(crate) fn for_each_block_untyped<
const BLOCK_SIZE_X: u8,
const BLOCK_SIZE_Y: u8,
const BYTES_PER_BLOCK: usize,
OutPixel,
>(
r: &mut dyn Read,
image: &mut ImageViewMut,
context: DecodeContext,
native_color: ColorFormat,
process_pixels: ProcessBlocksFn,
) -> Result<(), DecodingError> {
fn inner(
r: &mut dyn Read,
image: &mut ImageViewMut,
mut context: DecodeContext,
block_size: (u8, u8),
bytes_per_block: usize,
native_color: ColorFormat,
process_blocks: ProcessBlocksFn,
) -> Result<(), DecodingError> {
let size = context.surface_size;
assert!(!size.is_empty());
let block_size_x = block_size.0 as u32;
let block_size_y = block_size.1 as u32;
let width_blocks = div_ceil(size.width, block_size_x);
let height_blocks = div_ceil(size.height, block_size_y);
let mut line_buffer = UntypedLineBuffer::new(
width_blocks as usize * bytes_per_block,
height_blocks,
&mut context,
)?;
let mut conversion_buffer =
ChannelConversionBuffer::new(native_color, image.color().channels);
let row_pitch = image.row_pitch();
let mut block_y = 0;
while let Some(block_line) = line_buffer.next_line(r)? {
let pixel_rows = block_size_y.min(size.height - block_y * block_size_y);
let buf = image.get_row_range((block_y * block_size_y) as usize, pixel_rows as usize);
let range = PixelRange {
width: size.width,
width_offset: 0,
rows: RowRange::new(0, pixel_rows as u8),
};
conversion_buffer.process_blocks(
bytes_per_block,
block_size_x,
block_line,
buf,
row_pitch,
range,
process_blocks,
);
block_y += 1;
}
Ok(())
}
debug_assert_eq!(image.color().precision, native_color.precision);
debug_assert_eq!(
native_color.bytes_per_pixel() as usize,
size_of::<OutPixel>()
);
inner(
r,
image,
context,
(BLOCK_SIZE_X, BLOCK_SIZE_Y),
BYTES_PER_BLOCK,
native_color,
process_pixels,
)
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn for_each_block_rect_untyped<
const BLOCK_SIZE_X: u8,
const BLOCK_SIZE_Y: u8,
const BYTES_PER_BLOCK: usize,
>(
r: &mut dyn ReadSeek,
image: &mut ImageViewMut,
offset: Offset,
context: DecodeContext,
native_color: ColorFormat,
process_pixels: ProcessBlocksFn,
) -> Result<(), DecodingError> {
#[allow(clippy::too_many_arguments)]
fn inner(
r: &mut dyn ReadSeek,
image: &mut ImageViewMut,
offset: Offset,
mut context: DecodeContext,
block_size: (u8, u8),
bytes_per_block: usize,
native_color: ColorFormat,
process_blocks: ProcessBlocksFn,
) -> Result<(), DecodingError> {
let surface_size = context.surface_size;
let image_width = image.width();
let image_height = image.height();
let block_size_x = block_size.0 as u32;
let block_size_y = block_size.1 as u32;
let blocks_per_line = div_ceil(surface_size.width, block_size_x);
let skip_block_lines_before = offset.y / block_size_y;
let block_lines_to_read =
div_ceil(image_height + offset.y, block_size_y) - skip_block_lines_before;
let skip_block_lines_after = div_ceil(surface_size.height, block_size_y)
- skip_block_lines_before
- block_lines_to_read;
util::io_skip_exact(
r,
blocks_per_line as u64 * skip_block_lines_before as u64 * bytes_per_block as u64,
)?;
let mut line_buffer = UntypedLineBuffer::new(
blocks_per_line as usize * bytes_per_block,
block_lines_to_read,
&mut context,
)?;
let mut conversion_buffer =
ChannelConversionBuffer::new(native_color, image.color.channels);
let block_range_start = offset.x / block_size_x;
let block_range_end = div_ceil(offset.x + image_width, block_size_x);
let block_range = (block_range_start as usize * bytes_per_block)
..(block_range_end as usize * bytes_per_block);
let width_offset = (offset.x % block_size_x) as u8;
let mut block_line_y = skip_block_lines_before;
let mut pixel_row = 0;
while let Some(block_line) = line_buffer.next_line(r)? {
let block_line = &block_line[block_range.clone()];
let rel_row_start = offset.y.saturating_sub(block_line_y * block_size_y);
let rel_row_end = offset.y + image_height - block_line_y * block_size_y;
debug_assert!(rel_row_start < block_size_y);
debug_assert!(rel_row_end > 0);
let row_start = rel_row_start as u8;
let row_end = rel_row_end.min(block_size_y) as u8;
let rows = RowRange::new(row_start, row_end);
let range = PixelRange {
width: image_width,
width_offset,
rows,
};
let row_pitch = image.row_pitch();
let out = &mut image.data()[pixel_row * row_pitch..];
conversion_buffer.process_blocks(
bytes_per_block,
block_size_x,
block_line,
out,
row_pitch,
range,
process_blocks,
);
block_line_y += 1;
pixel_row += rows.len() as usize;
}
util::io_skip_exact(
r,
blocks_per_line as u64 * skip_block_lines_after as u64 * bytes_per_block as u64,
)?;
Ok(())
}
debug_assert_eq!(image.color.precision, native_color.precision);
inner(
r,
image,
offset,
context,
(BLOCK_SIZE_X, BLOCK_SIZE_Y),
BYTES_PER_BLOCK,
native_color,
process_pixels,
)
}
struct ChannelConversionBuffer {
buffer: [u32; Self::BUFFER_BYTES / 4],
native_color: ColorFormat,
target: Channels,
}
impl ChannelConversionBuffer {
const BUFFER_BYTES: usize = 3072;
fn new(native_color: ColorFormat, target: Channels) -> Self {
Self {
buffer: [0_u32; Self::BUFFER_BYTES / 4],
native_color,
target,
}
}
fn process_pixels(&mut self, encoded: &[u8], out: &mut [u8], f: ProcessPixelsFn) {
if self.native_color.channels == self.target {
f(encoded, out);
return;
}
let out_bytes_per_pixel =
ColorFormat::new(self.target, self.native_color.precision).bytes_per_pixel() as usize;
let pixels = out.len() / out_bytes_per_pixel;
let encoded_bytes_per_pixel = encoded.len() / pixels;
debug_assert!(out_bytes_per_pixel % self.target.count() as usize == 0);
let buffer_bytes_per_pixel = self.native_color.bytes_per_pixel() as usize;
let buffer_pixels = Self::BUFFER_BYTES / buffer_bytes_per_pixel;
let buffer = cast::as_bytes_mut(&mut self.buffer);
for chunk_start in (0..pixels).step_by(buffer_pixels) {
let chunk_end = (chunk_start + buffer_pixels).min(pixels);
let chunk_size = chunk_end - chunk_start;
let encoded_chunk = &encoded
[chunk_start * encoded_bytes_per_pixel..chunk_end * encoded_bytes_per_pixel];
let out_chunk =
&mut out[chunk_start * out_bytes_per_pixel..chunk_end * out_bytes_per_pixel];
let buffer_chunk = &mut buffer[..chunk_size * buffer_bytes_per_pixel];
f(encoded_chunk, buffer_chunk);
convert_channels_for(self.native_color, self.target, buffer_chunk, out_chunk);
}
}
#[allow(clippy::too_many_arguments)]
fn process_blocks(
&mut self,
block_bytes: usize,
block_width: u32,
mut encoded_blocks: &[u8],
mut out: &mut [u8],
row_pitch: usize,
mut range: PixelRange,
f: ProcessBlocksFn,
) {
if self.native_color.channels == self.target {
f(encoded_blocks, out, row_pitch, range);
return;
}
let height = range.rows.len() as usize;
debug_assert!(height > 0);
let buffer_bytes_per_pixel = self.native_color.bytes_per_pixel() as usize;
let buffer_size = Size::new(
(Self::BUFFER_BYTES / (buffer_bytes_per_pixel * height)) as u32,
height as u32,
);
debug_assert!(buffer_size.width >= block_width);
let out_bytes_per_pixel =
ColorFormat::new(self.target, self.native_color.precision).bytes_per_pixel() as usize;
let buffer = cast::as_bytes_mut(&mut self.buffer);
if range.width_offset != 0 {
let offset_width = (block_width - range.width_offset as u32).min(range.width);
let buffer_stride = offset_width as usize * buffer_bytes_per_pixel;
let buffer = &mut buffer[..buffer_stride * height];
f(
&encoded_blocks[..block_bytes],
buffer,
buffer_stride,
PixelRange {
width: offset_width,
width_offset: range.width_offset,
rows: range.rows,
},
);
for y in 0..height {
let buffer_row = &buffer[y * buffer_stride..(y + 1) * buffer_stride];
let out_row = &mut out
[y * row_pitch..y * row_pitch + offset_width as usize * out_bytes_per_pixel];
convert_channels_for(self.native_color, self.target, buffer_row, out_row);
}
range.width_offset = 0;
range.width -= offset_width;
encoded_blocks = &encoded_blocks[block_bytes..];
out = &mut out[offset_width as usize * out_bytes_per_pixel..];
}
debug_assert!(range.width_offset == 0);
let preferred_chunk_size = util::round_down_to_multiple(buffer_size.width, block_width);
for chunk_start in (0..range.width).step_by(preferred_chunk_size as usize) {
let chunk_end = (chunk_start + preferred_chunk_size).min(range.width);
let chunk_size = chunk_end - chunk_start;
let block_offset = (chunk_start / block_width) as usize;
let block_count = div_ceil(chunk_size, block_width) as usize;
let encoded_chunk = &encoded_blocks
[block_offset * block_bytes..(block_offset + block_count) * block_bytes];
let out_chunk = &mut out[chunk_start as usize * out_bytes_per_pixel..];
let buffer_stride = chunk_size as usize * buffer_bytes_per_pixel;
let buffer_chunk = &mut buffer[..buffer_stride * height];
f(
encoded_chunk,
buffer_chunk,
buffer_stride,
PixelRange {
width: chunk_size,
width_offset: 0,
rows: range.rows,
},
);
for y in 0..height {
let buffer_row = &buffer_chunk[y * buffer_stride..(y + 1) * buffer_stride];
let out_row = &mut out_chunk
[y * row_pitch..y * row_pitch + chunk_size as usize * out_bytes_per_pixel];
convert_channels_for(self.native_color, self.target, buffer_row, out_row);
}
}
}
#[allow(clippy::too_many_arguments)]
fn process_bi_planar(
&mut self,
info: BiPlaneInfo,
mut plane1: &[u8],
mut plane2: &[u8],
mut out: &mut [u8],
mut range: PlaneRange,
f: ProcessBiPlanarFn,
) {
if self.native_color.channels == self.target {
f(plane1, plane2, out, range);
return;
}
let out_bytes_per_pixel =
ColorFormat::new(self.target, self.native_color.precision).bytes_per_pixel() as usize;
let plane1_bytes_per_pixel = info.plane1_element_size as usize;
debug_assert_eq!(range.width as usize * out_bytes_per_pixel, out.len());
debug_assert_eq!(plane1.len(), range.width as usize * plane1_bytes_per_pixel);
debug_assert_eq!(
plane2.len(),
div_ceil(range.offset + range.width, info.sub_sampling.0 as u32) as usize
* info.plane2_element_size as usize
);
let buffer_bytes_per_pixel = self.native_color.bytes_per_pixel() as usize;
let buffer_pixels = Self::BUFFER_BYTES / buffer_bytes_per_pixel;
let buffer = cast::as_bytes_mut(&mut self.buffer);
if range.offset != 0 {
let offset_width = (info.sub_sampling.0 as u32 - range.offset).min(range.width);
let plane1_chunk = &plane1[..offset_width as usize * plane1_bytes_per_pixel];
let plane2_chunk = &plane2[..info.plane2_element_size as usize];
let buffer_chunk = &mut buffer[..offset_width as usize * buffer_bytes_per_pixel];
let out_chunk = &mut out[..offset_width as usize * out_bytes_per_pixel];
f(
plane1_chunk,
plane2_chunk,
buffer_chunk,
PlaneRange {
offset: range.offset,
width: offset_width,
y: range.y,
},
);
convert_channels_for(self.native_color, self.target, buffer_chunk, out_chunk);
range.offset = 0;
range.width -= offset_width;
plane1 = &plane1[offset_width as usize * plane1_bytes_per_pixel..];
plane2 = &plane2[info.plane2_element_size as usize..];
out = &mut out[offset_width as usize * out_bytes_per_pixel..];
}
debug_assert!(range.offset == 0);
let preferred_chunk_size =
util::round_down_to_multiple(buffer_pixels, info.sub_sampling.0 as usize);
for chunk_start in (0..range.width as usize).step_by(preferred_chunk_size) {
let chunk_end = (chunk_start + preferred_chunk_size).min(range.width as usize);
let chunk_size = chunk_end - chunk_start;
let plane2_start = chunk_start / info.sub_sampling.0 as usize;
let plane2_end = div_ceil(chunk_end, info.sub_sampling.0 as usize);
let plane1_chunk =
&plane1[chunk_start * plane1_bytes_per_pixel..chunk_end * plane1_bytes_per_pixel];
let plane2_chunk = &plane2[plane2_start * info.plane2_element_size as usize
..plane2_end * info.plane2_element_size as usize];
let buffer_chunk = &mut buffer[..chunk_size * buffer_bytes_per_pixel];
let out_chunk =
&mut out[chunk_start * out_bytes_per_pixel..chunk_end * out_bytes_per_pixel];
f(
plane1_chunk,
plane2_chunk,
buffer_chunk,
PlaneRange {
offset: 0,
width: chunk_size as u32,
y: range.y,
},
);
convert_channels_for(self.native_color, self.target, buffer_chunk, out_chunk);
}
}
}
struct UntypedLineBuffer {
buf: Box<[u8]>,
buf_filled: usize,
bytes_per_line: usize,
lines_on_disk: usize,
current_line_start: usize,
}
impl UntypedLineBuffer {
fn new(
bytes_per_line: usize,
height: u32,
context: &mut DecodeContext,
) -> Result<Self, DecodingError> {
const TARGET_BUFFER_SIZE: usize = 64 * 1024;
let lines_in_buffer = (TARGET_BUFFER_SIZE / bytes_per_line).clamp(1, height as usize);
let buf_len = lines_in_buffer * bytes_per_line;
let buf = context.alloc(buf_len)?;
Ok(Self {
buf,
buf_filled: 0,
bytes_per_line,
lines_on_disk: height as usize,
current_line_start: buf_len,
})
}
fn next_line<R: Read + ?Sized>(&mut self, r: &mut R) -> Result<Option<&[u8]>, DecodingError> {
if self.current_line_start >= self.buf_filled {
if self.lines_on_disk == 0 {
return Ok(None);
}
let lines_to_read = (self.buf.len() / self.bytes_per_line).min(self.lines_on_disk);
self.lines_on_disk -= lines_to_read;
self.buf_filled = lines_to_read * self.bytes_per_line;
r.read_exact(&mut self.buf[..self.buf_filled])?;
self.current_line_start = 0;
}
let line_end = self.current_line_start + self.bytes_per_line;
let line = &self.buf[self.current_line_start..line_end];
self.current_line_start = line_end;
Ok(Some(line))
}
}
pub(crate) struct PlaneRange {
pub offset: u32,
pub width: u32,
pub y: u8,
}
pub(crate) type ProcessBiPlanarFn =
fn(plane1: &[u8], plane2: &[u8], decoded: &mut [u8], range: PlaneRange);
#[inline]
pub(crate) fn process_bi_planar_helper<
const SUB_SAMPLING_X: usize,
Plane1: cast::FromLeBytes + Copy + Default,
Plane2: cast::FromLeBytes,
OutPixel: cast::IntoNeBytes + Copy,
>(
plane1: &[u8],
plane2: &[u8],
decoded: &mut [u8],
mut range: PlaneRange,
f: impl Fn([Plane1; SUB_SAMPLING_X], Plane2, u8) -> [OutPixel; SUB_SAMPLING_X],
) {
let mut plane1: &[Plane1::Bytes] = cast::from_bytes(plane1).expect("Invalid plane1 buffer");
let mut plane2: &[Plane2::Bytes] = cast::from_bytes(plane2).expect("Invalid plane2 buffer");
let mut decoded: &mut [OutPixel::Bytes] =
cast::from_bytes_mut(decoded).expect("Invalid output buffer");
if range.offset > 0 {
debug_assert!(range.offset < SUB_SAMPLING_X as u32);
let w = (SUB_SAMPLING_X - range.offset as usize).min(range.width as usize);
let mut plane1_items = [Plane1::default(); SUB_SAMPLING_X];
for x in 0..w {
plane1_items[x] = Plane1::from_le_bytes(plane1[x]);
}
let plane2_item = Plane2::from_le_bytes(plane2[0]);
let out = f(plane1_items, plane2_item, range.y);
for x in 0..w {
decoded[x] = cast::IntoNeBytes::into_ne_bytes(out[x]);
}
range.offset = 0;
range.width -= w as u32;
plane1 = &plane1[w..];
plane2 = &plane2[1..];
decoded = &mut decoded[w..];
}
let full = range.width as usize / SUB_SAMPLING_X;
let full_w = full * SUB_SAMPLING_X;
let plane1_full: &[[Plane1::Bytes; SUB_SAMPLING_X]] =
cast::as_array_chunks(&plane1[..full_w]).expect("Invalid plane1 buffer");
let plane2_full: &[Plane2::Bytes] = &plane2[..full];
let decoded_full: &mut [[OutPixel::Bytes; SUB_SAMPLING_X]] =
cast::as_array_chunks_mut(&mut decoded[..full * SUB_SAMPLING_X])
.expect("Invalid output buffer");
for x in 0..full {
let plane1_items = plane1_full[x].map(Plane1::from_le_bytes);
let plane2_item = Plane2::from_le_bytes(plane2_full[x]);
let out = f(plane1_items, plane2_item, range.y);
decoded_full[x] = out.map(cast::IntoNeBytes::into_ne_bytes);
}
let rest_w = range.width as usize - full * SUB_SAMPLING_X;
if rest_w > 0 {
let mut plane1_items = [Plane1::default(); SUB_SAMPLING_X];
for x in 0..rest_w {
plane1_items[x] = Plane1::from_le_bytes(plane1[full_w + x]);
}
let plane2_item = Plane2::from_le_bytes(plane2[full]);
let out = f(plane1_items, plane2_item, range.y);
for x in 0..rest_w {
decoded[full_w + x] = cast::IntoNeBytes::into_ne_bytes(out[x]);
}
}
}
#[derive(Debug, Clone, Copy)]
pub(crate) struct BiPlaneInfo {
pub plane1_element_size: u8,
pub plane2_element_size: u8,
pub sub_sampling: (u8, u8),
}
pub(crate) fn for_each_bi_planar(
r: &mut dyn Read,
image: &mut ImageViewMut,
mut context: DecodeContext,
native_color: ColorFormat,
info: BiPlaneInfo,
process_bi_planar: ProcessBiPlanarFn,
) -> Result<(), DecodingError> {
let size = context.surface_size;
debug_assert_eq!(image.color().precision, native_color.precision);
let plain1_bytes_per_line = size.width as usize * info.plane1_element_size as usize;
let plane1 = context.alloc_read(plain1_bytes_per_line as u64 * size.height as u64, r)?;
let sub_sampling_x = info.sub_sampling.0 as u32;
let sub_sampling_y = info.sub_sampling.1 as u32;
let uv_width = div_ceil(size.width, sub_sampling_x);
let uv_lines = div_ceil(size.height, sub_sampling_y);
let uv_bytes_per_line = uv_width as usize * info.plane2_element_size as usize;
let mut line_buffer = UntypedLineBuffer::new(uv_bytes_per_line, uv_lines, &mut context)?;
let mut conversion_buffer = ChannelConversionBuffer::new(native_color, image.color().channels);
let mut y: usize = 0;
while let Some(uv_line) = line_buffer.next_line(r)? {
debug_assert!(y < size.height as usize);
for y_offset in 0..sub_sampling_y as u8 {
if y >= size.height as usize {
break;
}
let plane1_line = &plane1[y * plain1_bytes_per_line..(y + 1) * plain1_bytes_per_line];
let out_line = image.get_row(y);
conversion_buffer.process_bi_planar(
info,
plane1_line,
uv_line,
out_line,
PlaneRange {
offset: 0,
width: size.width,
y: y_offset,
},
process_bi_planar,
);
y += 1;
}
}
Ok(())
}
pub(crate) fn for_each_bi_planar_rect(
r: &mut dyn ReadSeek,
image: &mut ImageViewMut,
offset: Offset,
mut context: DecodeContext,
native_color: ColorFormat,
info: BiPlaneInfo,
process_bi_planar: ProcessBiPlanarFn,
) -> Result<(), DecodingError> {
let surface_size = context.surface_size;
let image_width = image.width();
let image_height = image.height();
debug_assert_eq!(image.color().precision, native_color.precision);
let plain1_bytes_per_line = surface_size.width as usize * info.plane1_element_size as usize;
util::io_skip_exact(r, plain1_bytes_per_line as u64 * offset.y as u64)?;
let plane1 = context.alloc_read(plain1_bytes_per_line as u64 * image_height as u64, r)?;
util::io_skip_exact(
r,
plain1_bytes_per_line as u64 * (surface_size.height - offset.y - image_height) as u64,
)?;
let sub_sampling_x = info.sub_sampling.0 as u32;
let sub_sampling_y = info.sub_sampling.1 as u32;
let uv_before = offset.y / sub_sampling_y;
let uv_after = div_ceil(surface_size.height, sub_sampling_y)
- div_ceil(offset.y + image_height, sub_sampling_y);
let uv_width = div_ceil(surface_size.width, sub_sampling_x);
let uv_lines = div_ceil(surface_size.height, sub_sampling_y) - uv_before - uv_after;
let uv_bytes_per_line = uv_width as usize * info.plane2_element_size as usize;
util::io_skip_exact(r, uv_before as u64 * uv_bytes_per_line as u64)?;
let mut line_buffer = UntypedLineBuffer::new(uv_bytes_per_line, uv_lines, &mut context)?;
let mut conversion_buffer = ChannelConversionBuffer::new(native_color, image.color().channels);
let mut y: usize = uv_before as usize * sub_sampling_y as usize;
while let Some(uv_line) = line_buffer.next_line(r)? {
debug_assert!(y < (offset.y + image_height) as usize);
for y_offset in 0..sub_sampling_y as u8 {
if y < offset.y as usize {
y += 1;
continue;
}
if y >= (offset.y + image_height) as usize {
break;
}
let plane1_start = (y - offset.y as usize) * plain1_bytes_per_line
+ offset.x as usize * info.plane1_element_size as usize;
let plane1_line = &plane1[plane1_start
..plane1_start + image_width as usize * info.plane1_element_size as usize];
let out_line = image.get_row(y - offset.y as usize);
let uv_start = (offset.x / sub_sampling_x) as usize * info.plane2_element_size as usize;
let uv_end = div_ceil(offset.x + image_width, sub_sampling_x) as usize
* info.plane2_element_size as usize;
let uv_line = &uv_line[uv_start..uv_end];
let offset = offset.x % sub_sampling_x;
conversion_buffer.process_bi_planar(
info,
plane1_line,
uv_line,
out_line,
PlaneRange {
offset,
width: image_width,
y: y_offset,
},
process_bi_planar,
);
y += 1;
}
}
util::io_skip_exact(r, uv_after as u64 * uv_bytes_per_line as u64)?;
Ok(())
}
pub(crate) fn read_exact_image<R: Read + ?Sized>(
r: &mut R,
image: &mut ImageViewMut,
) -> Result<(), std::io::Error> {
if image.is_contiguous() {
r.read_exact(image.data())
} else {
for row in image.rows_mut() {
r.read_exact(row)?;
}
Ok(())
}
}
pub(crate) fn for_each_slice(image: &mut ImageViewMut, mut f: impl FnMut(&mut [u8])) {
if image.is_contiguous() {
f(image.data())
} else {
image.rows_mut().for_each(f);
}
}