use anyhow::{Context, Result};
use fast_image_resize::images::Image;
use fast_image_resize::{FilterType, PixelType, ResizeAlg, ResizeOptions, Resizer};
use mozjpeg::{ColorSpace, Compress, Decompress};
use std::sync::OnceLock;
fn fwd_lut() -> &'static [u16; 256] {
static LUT: OnceLock<[u16; 256]> = OnceLock::new();
LUT.get_or_init(|| {
let mut t = [0u16; 256];
for (i, v) in t.iter_mut().enumerate() {
let s = i as f64 / 255.0;
let lin = if s <= 0.04045 {
s / 12.92
} else {
((s + 0.055) / 1.055).powf(2.4)
};
*v = (lin * 65535.0 + 0.5) as u16;
}
t
})
}
fn back_lut() -> &'static [u8; 65536] {
static LUT: OnceLock<Box<[u8; 65536]>> = OnceLock::new();
LUT.get_or_init(|| {
let mut t = vec![0u8; 65536].into_boxed_slice();
for (i, v) in t.iter_mut().enumerate() {
let lin = i as f64 / 65535.0;
let s = if lin <= 0.003_130_8 {
12.92 * lin
} else {
1.055 * lin.powf(1.0 / 2.4) - 0.055
};
*v = (s * 255.0 + 0.5) as u8;
}
t.try_into().unwrap()
})
}
pub struct Params {
pub max_width: u32,
pub max_height: u32,
pub quality: f32,
pub fast_preset: bool,
pub parallel: usize,
}
fn fit_dims(src_w: usize, src_h: usize, max_w: u32, max_h: u32) -> (usize, usize) {
let scale = f64::min(
max_w as f64 / src_w as f64,
f64::min(max_h as f64 / src_h as f64, 1.0),
);
(
((src_w as f64 * scale).round() as usize).max(1),
((src_h as f64 * scale).round() as usize).max(1),
)
}
fn dct_scale_num(src_w: usize, src_h: usize, dst_w: usize, dst_h: usize, margin: f64) -> u8 {
let (need_w, need_h) = (
(dst_w as f64 * margin).ceil() as usize,
(dst_h as f64 * margin).ceil() as usize,
);
for num in 1..=8u8 {
let sw = (src_w * num as usize).div_ceil(8);
let sh = (src_h * num as usize).div_ceil(8);
if (sw >= need_w && sh >= need_h) || (sw >= src_w && sh >= src_h) {
return num;
}
}
8
}
fn dct_margin() -> f64 {
std::env::var("OXIMG_DCT_MARGIN")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(1.7)
}
pub fn process(jpeg: &[u8], p: &Params) -> Result<Vec<u8>> {
let (pixels, dst_w, dst_h) = decode_and_resize(jpeg, p.max_width, p.max_height, p.parallel)?;
encode(&pixels, dst_w, dst_h, p)
}
pub fn process_path(path: &std::path::Path, p: &Params) -> Result<Vec<u8>> {
let dec = Decompress::new_path(path).context("open/parse JPEG")?;
let (pixels, dst_w, dst_h) = SCRATCH.with(|s| {
decode_resize(
&mut s.borrow_mut(),
dec,
p.max_width,
p.max_height,
p.parallel,
)
})?;
encode(&pixels, dst_w, dst_h, p)
}
thread_local! {
static SCRATCH: std::cell::RefCell<Scratch> = std::cell::RefCell::new(Scratch::default());
}
#[derive(Default)]
struct Scratch {
chunk8: Vec<u8>,
src16: Vec<u16>,
dst16: Vec<u16>,
resizer: Option<Resizer>,
}
fn u16_as_bytes(buf: &[u16]) -> &[u8] {
unsafe { std::slice::from_raw_parts(buf.as_ptr().cast(), buf.len() * 2) }
}
fn u16_as_bytes_mut(buf: &mut [u16]) -> &mut [u8] {
unsafe { std::slice::from_raw_parts_mut(buf.as_mut_ptr().cast(), buf.len() * 2) }
}
pub fn decode_and_resize(
jpeg: &[u8],
max_w: u32,
max_h: u32,
parallel: usize,
) -> Result<(Vec<u8>, usize, usize)> {
SCRATCH.with(|s| {
let dec = Decompress::new_mem(jpeg).context("invalid JPEG")?;
decode_resize(&mut s.borrow_mut(), dec, max_w, max_h, parallel)
})
}
#[allow(clippy::too_many_arguments)]
fn resize_bands(
src_bytes: &[u8],
dec_w: usize,
dec_h: usize,
dst_bytes: &mut [u8],
dst_w: usize,
dst_h: usize,
px: PixelType,
threads: usize,
fallback: &mut Option<Resizer>,
) -> Result<()> {
let opts = ResizeOptions::new().resize_alg(ResizeAlg::Convolution(FilterType::Lanczos3));
let src_view =
fast_image_resize::images::ImageRef::new(dec_w as u32, dec_h as u32, src_bytes, px)?;
if threads <= 1 || dst_h < 2 * threads {
let mut dst_view = Image::from_slice_u8(dst_w as u32, dst_h as u32, dst_bytes, px)?;
let resizer = fallback.get_or_insert_with(Resizer::new);
resizer.resize(&src_view, &mut dst_view, &opts)?;
return Ok(());
}
let row_bytes = dst_w * px.size();
let rows_per = dst_h.div_ceil(threads);
let sy = dec_h as f64 / dst_h as f64;
std::thread::scope(|sc| -> Result<()> {
let mut handles = Vec::new();
for (i, band) in dst_bytes.chunks_mut(rows_per * row_bytes).enumerate() {
let band_h = band.len() / row_bytes;
let crop_top = (i * rows_per) as f64 * sy;
let crop_h = band_h as f64 * sy;
let src_view = &src_view;
handles.push(sc.spawn(move || -> Result<()> {
let mut dst_view = Image::from_slice_u8(dst_w as u32, band_h as u32, band, px)?;
Resizer::new().resize(
src_view,
&mut dst_view,
&opts.crop(0.0, crop_top, dec_w as f64, crop_h),
)?;
Ok(())
}));
}
for h in handles {
h.join().expect("resize band panicked")?;
}
Ok(())
})
}
fn decode_resize<R: std::io::BufRead>(
s: &mut Scratch,
mut dec: Decompress<R>,
max_w: u32,
max_h: u32,
parallel: usize,
) -> Result<(Vec<u8>, usize, usize)> {
let timing = std::env::var("OXIMG_TIMING").is_ok();
let t0 = std::time::Instant::now();
let (src_w, src_h) = dec.size();
let (dst_w, dst_h) = fit_dims(src_w, src_h, max_w, max_h);
dec.scale(dct_scale_num(src_w, src_h, dst_w, dst_h, dct_margin()));
let mut started = dec.rgb().context("decode start failed")?;
let (dec_w, dec_h) = (started.width(), started.height());
let row_bytes = dec_w * 3;
let linear = linear_light() && (dec_w, dec_h) != (dst_w, dst_h);
if (dec_w, dec_h) == (dst_w, dst_h) {
let mut out = vec![0u8; dec_w * dec_h * 3];
started
.read_scanlines_into(&mut out)
.context("decode failed")?;
started.finish().context("decode finish failed")?;
if timing {
eprintln!(
"timing decode({dec_w}x{dec_h})={:.1}ms resize=0 (exact)",
t0.elapsed().as_secs_f64() * 1e3
);
}
return Ok((out, dst_w, dst_h));
}
if linear {
let fwd = fwd_lut();
s.src16.resize(dec_w * dec_h * 3, 0);
let chunk_rows = (256 * 1024 / row_bytes).clamp(1, dec_h);
s.chunk8.resize(chunk_rows * row_bytes, 0);
let mut filled = 0usize; while filled < dec_w * dec_h * 3 {
let want = (dec_h * row_bytes - filled).min(chunk_rows * row_bytes);
let got = started
.read_scanlines_into(&mut s.chunk8[..want])
.context("decode failed")?
.len();
anyhow::ensure!(got > 0, "decoder returned no scanlines");
for (d, src) in s.src16[filled..filled + got]
.iter_mut()
.zip(&s.chunk8[..got])
{
*d = fwd[*src as usize];
}
filled += got;
}
started.finish().context("decode finish failed")?;
let t_decode = t0.elapsed();
let t1 = std::time::Instant::now();
s.dst16.resize(dst_w * dst_h * 3, 0);
resize_bands(
u16_as_bytes(&s.src16),
dec_w,
dec_h,
u16_as_bytes_mut(&mut s.dst16),
dst_w,
dst_h,
PixelType::U16x3,
parallel,
&mut s.resizer,
)?;
let back = back_lut();
let mut out = vec![0u8; dst_w * dst_h * 3];
for (d, src) in out.iter_mut().zip(&s.dst16) {
*d = back[*src as usize];
}
if timing {
eprintln!(
"timing decode+fwd({dec_w}x{dec_h})={:.1}ms resize+back={:.1}ms",
t_decode.as_secs_f64() * 1e3,
t1.elapsed().as_secs_f64() * 1e3
);
}
Ok((out, dst_w, dst_h))
} else {
s.chunk8.resize(dec_w * dec_h * 3, 0);
started
.read_scanlines_into(&mut s.chunk8)
.context("decode failed")?;
started.finish().context("decode finish failed")?;
let t_decode = t0.elapsed();
let t1 = std::time::Instant::now();
let mut out = vec![0u8; dst_w * dst_h * 3];
resize_bands(
&s.chunk8,
dec_w,
dec_h,
&mut out,
dst_w,
dst_h,
PixelType::U8x3,
parallel,
&mut s.resizer,
)?;
if timing {
eprintln!(
"timing decode({dec_w}x{dec_h})={:.1}ms resize={:.1}ms",
t_decode.as_secs_f64() * 1e3,
t1.elapsed().as_secs_f64() * 1e3
);
}
Ok((out, dst_w, dst_h))
}
}
fn linear_light() -> bool {
std::env::var("OXIMG_RESIZE").as_deref() != Ok("srgb")
}
pub fn encode(rgb: &[u8], w: usize, h: usize, p: &Params) -> Result<Vec<u8>> {
let mut comp = Compress::new(ColorSpace::JCS_RGB);
if p.fast_preset {
comp.set_fastest_defaults();
comp.set_optimize_coding(true);
}
comp.set_size(w, h);
comp.set_quality(p.quality);
let mut started = comp.start_compress(Vec::with_capacity(64 * 1024))?;
started.write_scanlines(rgb)?;
Ok(started.finish()?)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn fit_dims_shrinks_proportionally() {
assert_eq!(fit_dims(7360, 4912, 500, 500), (500, 334));
assert_eq!(fit_dims(4912, 7360, 500, 500), (334, 500));
}
#[test]
fn fit_dims_never_enlarges() {
assert_eq!(fit_dims(300, 200, 500, 500), (300, 200));
}
#[test]
fn band_resize_matches_single_thread() {
let (sw, sh, dw, dh) = (317usize, 211usize, 123usize, 81usize);
let src: Vec<u8> = (0..sw * sh * 3).map(|i| ((i * 7919) % 251) as u8).collect();
let mut single = vec![0u8; dw * dh * 3];
resize_bands(
&src,
sw,
sh,
&mut single,
dw,
dh,
PixelType::U8x3,
1,
&mut None,
)
.unwrap();
for threads in [2, 3] {
let mut banded = vec![0u8; dw * dh * 3];
resize_bands(
&src,
sw,
sh,
&mut banded,
dw,
dh,
PixelType::U8x3,
threads,
&mut None,
)
.unwrap();
assert_eq!(single, banded, "threads={threads} output differs");
}
}
#[test]
fn dct_scale_picks_smallest_sufficient() {
assert_eq!(dct_scale_num(7360, 4912, 500, 334, 1.0), 1);
assert_eq!(dct_scale_num(1000, 667, 500, 334, 1.0), 4);
assert_eq!(dct_scale_num(500, 334, 500, 334, 1.0), 8);
}
}