use aksr::Builder;
use anyhow::Result;
use ndarray::{s, Array};
use rayon::prelude::*;
#[cfg(feature = "slsl")]
use slsl::{Tensor, UninitVec};
use std::sync::Mutex;
use crate::{
Hub, Image, ImageTensorLayout, ImageTransformInfo, LogitsSampler, ProcessorConfig, ResizeMode,
X,
};
#[derive(Builder, Debug, Clone)]
pub struct Processor {
pub image_width: u32,
pub image_height: u32,
pub images_transform_info: Vec<ImageTransformInfo>,
pub resize_mode: ResizeMode,
pub resize_filter: &'static str,
pub padding_value: u8,
pub do_normalize: bool,
pub image_mean: Vec<f32>,
pub image_std: Vec<f32>,
pub nchw: bool, pub image_tensor_layout: ImageTensorLayout,
#[cfg(feature = "tokenizers")]
pub tokenizer: Option<tokenizers::Tokenizer>,
pub vocab: Vec<String>,
pub unsigned: bool,
pub logits_sampler: Option<LogitsSampler>,
pub pad_image: bool,
pub pad_size: usize,
pub up_scale: f32,
pub do_resize: bool,
}
impl Default for Processor {
fn default() -> Self {
Self {
images_transform_info: vec![],
image_width: 0,
image_height: 0,
resize_mode: ResizeMode::FitAdaptive,
resize_filter: "Bilinear",
padding_value: 114,
do_normalize: true,
image_mean: vec![],
image_std: vec![],
nchw: true,
image_tensor_layout: ImageTensorLayout::NCHW,
#[cfg(feature = "tokenizers")]
tokenizer: None,
vocab: vec![],
unsigned: false,
logits_sampler: None,
pad_image: false,
pad_size: 8,
up_scale: 2.,
do_resize: true,
}
}
}
impl Processor {
pub fn try_from_config(config: &ProcessorConfig) -> Result<Self> {
let logits_sampler = LogitsSampler::new()
.with_temperature(config.temperature)
.with_topp(config.topp);
#[cfg(feature = "tokenizers")]
let tokenizer = config.try_build_tokenizer()?;
let vocab: Vec<String> = match &config.vocab_txt {
Some(x) => {
let file = if !std::path::PathBuf::from(&x).exists() {
Hub::default().try_fetch(x)?
} else {
x.to_string()
};
std::fs::read_to_string(file)?
.lines()
.map(|line| line.to_string())
.collect()
}
None => vec![],
};
Ok(Processor {
image_width: config.image_width.unwrap_or_default(),
image_height: config.image_height.unwrap_or_default(),
resize_mode: config.resize_mode.clone(),
resize_filter: config.resize_filter.unwrap_or("Bilinear"),
do_resize: config.do_resize,
padding_value: config.padding_value,
do_normalize: config.normalize,
image_mean: config.image_mean.clone(),
image_std: config.image_std.clone(),
nchw: config.nchw,
image_tensor_layout: config.image_tensor_layout,
unsigned: config.unsigned,
pad_image: config.pad_image,
pad_size: config.pad_size,
up_scale: config.up_scale,
#[cfg(feature = "tokenizers")]
tokenizer,
vocab,
logits_sampler: Some(logits_sampler),
..Default::default()
})
}
pub fn reset_image0_status(&mut self) {
self.images_transform_info.clear();
}
#[cfg(feature = "slsl")]
pub fn hwc_to_chw(input: &[f32], h: usize, w: usize) -> Vec<f32> {
let hw = h * w;
UninitVec::new(input.len()).init_with(|dst| {
dst.par_chunks_mut(hw).enumerate().for_each(|(c, plane)| {
plane.par_iter_mut().enumerate().for_each(|(idx, out)| {
let i = idx / w;
let j = idx % w;
*out = input[(i * w + j) * 3 + c];
});
});
})
}
#[cfg(feature = "slsl")]
pub fn hwc_to_chw_with_normalize_and_unsigned(input: &[f32], h: usize, w: usize) -> Vec<f32> {
let hw = h * w;
UninitVec::new(input.len()).init_with(|dst| {
dst.par_chunks_mut(hw).enumerate().for_each(|(c, plane)| {
plane.par_iter_mut().enumerate().for_each(|(idx, out)| {
let i = idx / w;
let j = idx % w;
let val = input[(i * w + j) * 3 + c];
*out = val.max(0.0f32) / 255.0f32;
});
});
})
}
#[cfg(feature = "slsl")]
pub fn hwc_to_chw_with_normalize(input: &[f32], h: usize, w: usize) -> Vec<f32> {
let hw = h * w;
UninitVec::new(input.len()).init_with(|dst| {
dst.par_chunks_mut(hw).enumerate().for_each(|(c, plane)| {
plane.par_iter_mut().enumerate().for_each(|(idx, out)| {
let i = idx / w;
let j = idx % w;
let val = input[(i * w + j) * 3 + c];
*out = val / 255.0f32;
});
});
})
}
#[cfg(feature = "slsl")]
pub fn hwc_to_chw_with_unsigned(input: &[f32], h: usize, w: usize) -> Vec<f32> {
let hw = h * w;
UninitVec::new(input.len()).init_with(|dst| {
dst.par_chunks_mut(hw).enumerate().for_each(|(c, plane)| {
plane.par_iter_mut().enumerate().for_each(|(idx, out)| {
let i = idx / w;
let j = idx % w;
let val = input[(i * w + j) * 3 + c];
*out = val.max(0.0f32);
});
});
})
}
#[cfg(feature = "slsl")]
pub fn hwc_to_chw_with_all_transforms(
input: &[f32],
h: usize,
w: usize,
do_normalize: bool,
unsigned: bool,
mean: &[f32],
std: &[f32],
) -> Vec<f32> {
let hw = h * w;
UninitVec::new(input.len()).init_with(|dst| {
dst.par_chunks_mut(hw).enumerate().for_each(|(c, plane)| {
let mean_c = mean[c];
let std_c = std[c];
plane.par_iter_mut().enumerate().for_each(|(idx, out)| {
let i = idx / w;
let j = idx % w;
let mut val = input[(i * w + j) * 3 + c];
if unsigned {
val = val.max(0.0f32);
}
if do_normalize {
val /= 255.0f32;
}
val = (val - mean_c) / std_c;
*out = val;
});
});
})
}
#[cfg(feature = "slsl")]
pub fn hwc_to_chw_with_normalize_and_standardize(
input: &[f32],
h: usize,
w: usize,
mean: &[f32],
std: &[f32],
) -> Vec<f32> {
let hw = h * w;
UninitVec::new(input.len()).init_with(|dst| {
dst.par_chunks_mut(hw).enumerate().for_each(|(c, plane)| {
let mean_c = mean[c];
let std_c = std[c];
plane.par_iter_mut().enumerate().for_each(|(idx, out)| {
let i = idx / w;
let j = idx % w;
let mut val = input[(i * w + j) * 3 + c];
val = (val / 255.0f32 - mean_c) / std_c;
*out = val;
});
});
})
}
#[cfg(feature = "slsl")]
pub fn hwc_to_chw_with_unsigned_and_standardize(
input: &[f32],
h: usize,
w: usize,
mean: &[f32],
std: &[f32],
) -> Vec<f32> {
let hw = h * w;
UninitVec::new(input.len()).init_with(|dst| {
dst.par_chunks_mut(hw).enumerate().for_each(|(c, plane)| {
let mean_c = mean[c];
let std_c = std[c];
plane.par_iter_mut().enumerate().for_each(|(idx, out)| {
let i = idx / w;
let j = idx % w;
let mut val = input[(i * w + j) * 3 + c];
val = (val.max(0.0f32) - mean_c) / std_c;
*out = val;
});
});
})
}
#[cfg(feature = "slsl")]
pub fn hwc_to_chw_with_standardize(
input: &[f32],
h: usize,
w: usize,
mean: &[f32],
std: &[f32],
) -> Vec<f32> {
let hw = h * w;
UninitVec::new(input.len()).init_with(|dst| {
dst.par_chunks_mut(hw).enumerate().for_each(|(c, plane)| {
let mean_c = mean[c];
let std_c = std[c];
plane.par_iter_mut().enumerate().for_each(|(idx, out)| {
let i = idx / w;
let j = idx % w;
let val = input[(i * w + j) * 3 + c];
*out = (val - mean_c) / std_c;
});
});
})
}
#[cfg(feature = "slsl")]
pub fn process_images_f32(&mut self, xs: &[Image]) -> Result<Tensor> {
if xs.is_empty() {
anyhow::bail!("Found no input images.");
}
let process_one = |image: &Image| -> Result<(Tensor, ImageTransformInfo)> {
let (image_processed, trans_info) = if self.pad_image {
image.pad(self.pad_size)?
} else if self.do_resize {
image.resize_with_info(
self.image_width,
self.image_height,
self.resize_filter,
&self.resize_mode,
self.padding_value,
)?
} else {
let (w0, h0) = image.dimensions();
(
image.clone(),
ImageTransformInfo::default()
.with_width_src(w0)
.with_height_src(h0),
)
};
let vec = image_processed.to_f32s();
let do_standardize = !self.image_std.is_empty() && !self.image_mean.is_empty();
let tensor = match self.image_tensor_layout {
ImageTensorLayout::NCHW => {
let vec = match (self.do_normalize, self.unsigned, do_standardize) {
(true, true, true) => Self::hwc_to_chw_with_all_transforms(
&vec,
self.image_height as usize,
self.image_width as usize,
true,
true,
&self.image_mean,
&self.image_std,
),
(true, false, true) => Self::hwc_to_chw_with_normalize_and_standardize(
&vec,
self.image_height as usize,
self.image_width as usize,
&self.image_mean,
&self.image_std,
),
(false, true, true) => Self::hwc_to_chw_with_unsigned_and_standardize(
&vec,
self.image_height as usize,
self.image_width as usize,
&self.image_mean,
&self.image_std,
),
(false, false, true) => Self::hwc_to_chw_with_standardize(
&vec,
self.image_height as usize,
self.image_width as usize,
&self.image_mean,
&self.image_std,
),
(true, true, false) => Self::hwc_to_chw_with_normalize_and_unsigned(
&vec,
self.image_height as usize,
self.image_width as usize,
),
(true, false, false) => Self::hwc_to_chw_with_normalize(
&vec,
self.image_height as usize,
self.image_width as usize,
),
(false, true, false) => Self::hwc_to_chw_with_unsigned(
&vec,
self.image_height as usize,
self.image_width as usize,
),
(false, false, false) => Self::hwc_to_chw(
&vec,
self.image_height as usize,
self.image_width as usize,
),
};
Tensor::from_vec(
vec,
(1, 3, self.image_height as usize, self.image_width as usize),
)?
}
ImageTensorLayout::NHWC => {
let mut vec = vec;
match (self.do_normalize, self.unsigned, do_standardize) {
(true, true, true) => {
vec.par_chunks_mut(3).for_each(|pixel| {
pixel.par_iter_mut().enumerate().for_each(|(c, val)| {
*val = (*val).max(0.0f32) / 255.0f32;
*val = (*val - self.image_mean[c]) / self.image_std[c];
});
});
}
(true, false, true) => {
vec.par_chunks_mut(3).for_each(|pixel| {
pixel.par_iter_mut().enumerate().for_each(|(c, val)| {
*val /= 255.0f32;
*val = (*val - self.image_mean[c]) / self.image_std[c];
});
});
}
(false, true, true) => {
vec.par_chunks_mut(3).for_each(|pixel| {
pixel.par_iter_mut().enumerate().for_each(|(c, val)| {
*val = (*val).max(0.0f32);
*val = (*val - self.image_mean[c]) / self.image_std[c];
});
});
}
(false, false, true) => {
vec.par_chunks_mut(3).for_each(|pixel| {
pixel.par_iter_mut().enumerate().for_each(|(c, val)| {
*val = (*val - self.image_mean[c]) / self.image_std[c];
});
});
}
(true, true, false) => {
vec.par_iter_mut().for_each(|x| {
*x = (*x).max(0.0f32);
*x /= 255.0f32;
});
}
(true, false, false) => {
vec.par_iter_mut().for_each(|x| *x /= 255.0f32);
}
(false, true, false) => {
vec.par_iter_mut().for_each(|x| {
*x = (*x).max(0.0f32);
});
}
(false, false, false) => {
}
}
Tensor::from_vec(
vec,
(1, self.image_height as usize, self.image_width as usize, 3),
)?
}
ImageTensorLayout::CHW => {
let vec = match (self.do_normalize, self.unsigned, do_standardize) {
(true, true, true) => Self::hwc_to_chw_with_all_transforms(
&vec,
self.image_height as usize,
self.image_width as usize,
true,
true,
&self.image_mean,
&self.image_std,
),
(true, false, true) => Self::hwc_to_chw_with_normalize_and_standardize(
&vec,
self.image_height as usize,
self.image_width as usize,
&self.image_mean,
&self.image_std,
),
(false, true, true) => Self::hwc_to_chw_with_unsigned_and_standardize(
&vec,
self.image_height as usize,
self.image_width as usize,
&self.image_mean,
&self.image_std,
),
(false, false, true) => Self::hwc_to_chw_with_standardize(
&vec,
self.image_height as usize,
self.image_width as usize,
&self.image_mean,
&self.image_std,
),
(true, true, false) => Self::hwc_to_chw_with_normalize_and_unsigned(
&vec,
self.image_height as usize,
self.image_width as usize,
),
(true, false, false) => Self::hwc_to_chw_with_normalize(
&vec,
self.image_height as usize,
self.image_width as usize,
),
(false, true, false) => Self::hwc_to_chw_with_unsigned(
&vec,
self.image_height as usize,
self.image_width as usize,
),
(false, false, false) => Self::hwc_to_chw(
&vec,
self.image_height as usize,
self.image_width as usize,
),
};
Tensor::from_vec(
vec,
(3, self.image_height as usize, self.image_width as usize),
)?
}
ImageTensorLayout::HWC => {
let mut vec = vec;
match (self.do_normalize, self.unsigned, do_standardize) {
(true, true, true) => {
vec.par_chunks_mut(3).for_each(|pixel| {
pixel.par_iter_mut().enumerate().for_each(|(c, val)| {
*val = (*val).max(0.0f32) / 255.0f32;
*val = (*val - self.image_mean[c]) / self.image_std[c];
});
});
}
(true, false, true) => {
vec.par_chunks_mut(3).for_each(|pixel| {
pixel.par_iter_mut().enumerate().for_each(|(c, val)| {
*val /= 255.0f32;
*val = (*val - self.image_mean[c]) / self.image_std[c];
});
});
}
(false, true, true) => {
vec.par_chunks_mut(3).for_each(|pixel| {
pixel.par_iter_mut().enumerate().for_each(|(c, val)| {
*val = (*val).max(0.0f32);
*val = (*val - self.image_mean[c]) / self.image_std[c];
});
});
}
(false, false, true) => {
vec.par_chunks_mut(3).for_each(|pixel| {
pixel.par_iter_mut().enumerate().for_each(|(c, val)| {
*val = (*val - self.image_mean[c]) / self.image_std[c];
});
});
}
(true, true, false) => {
vec.par_iter_mut().for_each(|x| {
*x = (*x).max(0.0f32);
*x /= 255.0f32;
});
}
(true, false, false) => {
vec.par_iter_mut().for_each(|x| *x /= 255.0f32);
}
(false, true, false) => {
vec.par_iter_mut().for_each(|x| {
*x = (*x).max(0.0f32);
});
}
(false, false, false) => {
}
}
Tensor::from_vec(
vec,
(self.image_height as usize, self.image_width as usize, 3),
)?
}
};
Ok((tensor, trans_info))
};
match xs.len() {
1 => {
let (tensor, trans_info) = process_one(&xs[0])?;
self.images_transform_info = vec![trans_info];
Ok(tensor)
}
_ => {
if self.pad_image {
anyhow::bail!("When pad_image is true, only one image is allowed.");
}
let results: Result<Vec<(Tensor, ImageTransformInfo)>> =
xs.par_iter().map(process_one).collect();
let results = results?;
let (tensors, trans_infos): (Vec<_>, Vec<_>) = results.into_iter().unzip();
let combined_tensor = Tensor::cat(&tensors, 0)?;
self.images_transform_info = trans_infos;
Ok(combined_tensor)
}
}
}
pub fn process_images(&mut self, xs: &[Image]) -> Result<X> {
let mut x = if self.pad_image {
if xs.len() != 1 {
anyhow::bail!("When pad_image is true, only one image is allowed.");
}
let (image, images_transform_info) = xs[0].pad(self.pad_size)?;
self.images_transform_info = vec![images_transform_info];
image.to_ndarray()?.insert_axis(0)?
} else if self.do_resize {
let (x, images_transform_info) = self.par_resize(xs)?;
self.images_transform_info = images_transform_info;
x
} else {
anyhow::bail!(
"When pad_image and do_resize are both false, at least one image is required."
);
};
if self.do_normalize {
x = x.normalize(0., 255.)?;
}
if !self.image_std.is_empty() && !self.image_mean.is_empty() {
x = x.standardize(&self.image_mean, &self.image_std, 3)?;
}
if self.nchw {
x = x.nhwc2nchw()?;
}
if self.unsigned {
x = x.unsigned();
}
Ok(x)
}
pub fn par_resize(&self, xs: &[Image]) -> Result<(X, Vec<ImageTransformInfo>)> {
match xs.len() {
0 => anyhow::bail!("Found no input images."),
1 => {
let (image, trans_info) = xs[0].resize_with_info(
self.image_width,
self.image_height,
self.resize_filter,
&self.resize_mode,
self.padding_value,
)?;
let y = image.to_ndarray()?.insert_axis(0)?;
Ok((y, vec![trans_info]))
}
_ => {
let ys = Mutex::new(
Array::zeros((
xs.len(),
self.image_height as usize,
self.image_width as usize,
3,
))
.into_dyn(),
);
let results: Result<Vec<ImageTransformInfo>> = xs
.par_iter()
.enumerate()
.map(|(idx, x)| {
let (image, trans_info) = x.resize_with_info(
self.image_width,
self.image_height,
self.resize_filter,
&self.resize_mode,
self.padding_value,
)?;
let y = image.to_ndarray()?;
{
let mut ys_guard = ys
.lock()
.map_err(|e| anyhow::anyhow!("Mutex lock error: {e}"))?;
ys_guard.slice_mut(s![idx, .., .., ..]).assign(&y);
}
Ok(trans_info)
})
.collect();
let ys_inner = ys
.into_inner()
.map_err(|e| anyhow::anyhow!("Mutex into_inner error: {e}"))?;
Ok((ys_inner.into(), results?))
}
}
}
#[cfg(feature = "tokenizers")]
pub fn encode_text(&self, x: &str, skip_special_tokens: bool) -> Result<tokenizers::Encoding> {
let tokenizer = self.tokenizer.as_ref().ok_or_else(|| {
anyhow::anyhow!(
"No tokenizer configured in Processor. Please initialize with a tokenizer."
)
})?;
tokenizer.encode(x, skip_special_tokens).map_err(|err| {
anyhow::anyhow!(
"Failed to encode text '{}': {}",
x.chars().take(50).collect::<String>(),
err
)
})
}
#[cfg(feature = "tokenizers")]
pub fn encode_texts(
&self,
xs: &[&str],
skip_special_tokens: bool,
) -> Result<Vec<tokenizers::Encoding>> {
let tokenizer = self.tokenizer.as_ref().ok_or_else(|| {
anyhow::anyhow!(
"No tokenizer configured in Processor. Please initialize with a tokenizer."
)
})?;
tokenizer
.encode_batch(xs.to_vec(), skip_special_tokens)
.map_err(|err| anyhow::anyhow!("Failed to encode batch of {} texts: {}", xs.len(), err))
}
#[cfg(feature = "tokenizers")]
pub fn encode_text_ids(&self, x: &str, skip_special_tokens: bool) -> Result<Vec<f32>> {
let ids: Vec<f32> = if x.is_empty() {
vec![0.0f32]
} else {
self.encode_text(x, skip_special_tokens)?
.get_ids()
.iter()
.map(|x| *x as f32)
.collect()
};
Ok(ids)
}
#[cfg(feature = "tokenizers")]
pub fn encode_texts_ids(
&self,
xs: &[&str],
skip_special_tokens: bool,
) -> Result<Vec<Vec<f32>>> {
let ids: Vec<Vec<f32>> = if xs.is_empty() {
vec![vec![0.0f32]]
} else {
self.encode_texts(xs, skip_special_tokens)?
.into_iter()
.map(|encoding| encoding.get_ids().iter().map(|x| *x as f32).collect())
.collect()
};
Ok(ids)
}
#[cfg(feature = "tokenizers")]
pub fn encode_text_tokens(&self, x: &str, skip_special_tokens: bool) -> Result<Vec<String>> {
Ok(self
.encode_text(x, skip_special_tokens)?
.get_tokens()
.to_vec())
}
#[cfg(feature = "tokenizers")]
pub fn encode_texts_tokens(
&self,
xs: &[&str],
skip_special_tokens: bool,
) -> Result<Vec<Vec<String>>> {
Ok(self
.encode_texts(xs, skip_special_tokens)?
.into_iter()
.map(|encoding| encoding.get_tokens().to_vec())
.collect())
}
#[cfg(feature = "tokenizers")]
pub fn decode_tokens(&self, ids: &[u32], skip_special_tokens: bool) -> Result<String> {
let tokenizer = self.tokenizer.as_ref().ok_or_else(|| {
anyhow::anyhow!(
"No tokenizer configured in Processor. Please initialize with a tokenizer."
)
})?;
tokenizer
.decode(ids, skip_special_tokens)
.map_err(|err| anyhow::anyhow!("Failed to decode {} token IDs: {}", ids.len(), err))
}
#[cfg(feature = "tokenizers")]
pub fn decode_tokens_batch2(
&self,
ids: &[&[u32]],
skip_special_tokens: bool,
) -> Result<Vec<String>> {
let tokenizer = self.tokenizer.as_ref().ok_or_else(|| {
anyhow::anyhow!(
"No tokenizer configured in Processor. Please initialize with a tokenizer."
)
})?;
tokenizer
.decode_batch(ids, skip_special_tokens)
.map_err(|err| {
anyhow::anyhow!(
"Failed to decode batch of {} token sequences: {}",
ids.len(),
err
)
})
}
#[cfg(feature = "tokenizers")]
pub fn decode_tokens_batch(
&self,
ids: &[Vec<u32>],
skip_special_tokens: bool,
) -> Result<Vec<String>> {
let tokenizer = self.tokenizer.as_ref().ok_or_else(|| {
anyhow::anyhow!(
"No tokenizer configured in Processor. Please initialize with a tokenizer."
)
})?;
tokenizer
.decode_batch(
&ids.iter().map(|x| x.as_slice()).collect::<Vec<_>>(),
skip_special_tokens,
)
.map_err(|err| {
anyhow::anyhow!(
"Failed to decode batch of {} token vectors: {}",
ids.len(),
err
)
})
}
}