#![allow(dead_code)]
use archmage::prelude::*;
#[cfg(target_arch = "aarch64")]
use archmage::intrinsics::aarch64 as simd_mem;
#[cfg(target_arch = "x86_64")]
use archmage::intrinsics::x86_64 as simd_mem;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum ImageContentType {
Photo,
Drawing,
Text,
Icon,
}
#[derive(Debug, Clone, Copy)]
pub struct ClassifierDiag {
pub content_type: ImageContentType,
pub low_frac: f32,
pub high_frac: f32,
pub is_bimodal: bool,
pub edge_density: f32,
pub uniformity: f32,
}
pub fn classify_image_type(
y_src: &[u8],
width: usize,
height: usize,
y_stride: usize,
alpha_histogram: &[u32; 256],
) -> ImageContentType {
classify_image_type_diag(y_src, width, height, y_stride, alpha_histogram).content_type
}
pub fn classify_image_type_diag(
y_src: &[u8],
width: usize,
height: usize,
y_stride: usize,
alpha_histogram: &[u32; 256],
) -> ClassifierDiag {
if width <= 128 && height <= 128 {
return ClassifierDiag {
content_type: ImageContentType::Icon,
low_frac: 0.0,
high_frac: 0.0,
is_bimodal: false,
edge_density: 0.0,
uniformity: 0.0,
};
}
let total: u32 = alpha_histogram.iter().sum();
if total == 0 {
return ClassifierDiag {
content_type: ImageContentType::Photo,
low_frac: 0.0,
high_frac: 0.0,
is_bimodal: false,
edge_density: 0.0,
uniformity: 0.0,
};
}
let low_quarter: u32 = alpha_histogram[..64].iter().sum();
let high_quarter: u32 = alpha_histogram[192..].iter().sum();
let low_frac = low_quarter as f32 / total as f32;
let high_frac = high_quarter as f32 / total as f32;
let is_bimodal = low_frac > 0.15 && high_frac > 0.15;
let edge_density = compute_edge_density(y_src, width, height, y_stride);
let uniformity = compute_color_uniformity(y_src, width, height, y_stride);
let content_type = if uniformity >= 0.45 {
ImageContentType::Photo
} else {
ImageContentType::Drawing };
ClassifierDiag {
content_type,
low_frac,
high_frac,
is_bimodal,
edge_density,
uniformity,
}
}
fn compute_edge_density(y_src: &[u8], width: usize, height: usize, y_stride: usize) -> f32 {
incant!(
compute_edge_density_impl(y_src, width, height, y_stride),
[v3, neon, scalar]
)
}
#[cfg(target_arch = "x86_64")]
#[cfg(target_arch = "x86_64")]
#[inline(always)]
fn compute_edge_density_impl_v3(
token: X64V3Token,
y_src: &[u8],
width: usize,
height: usize,
y_stride: usize,
) -> f32 {
compute_edge_density_sse2(token, y_src, width, height, y_stride)
}
#[cfg(target_arch = "aarch64")]
#[inline(always)]
fn compute_edge_density_impl_neon(
token: NeonToken,
y_src: &[u8],
width: usize,
height: usize,
y_stride: usize,
) -> f32 {
compute_edge_density_neon(token, y_src, width, height, y_stride)
}
#[inline(always)]
fn compute_edge_density_impl_scalar(
_token: ScalarToken,
y_src: &[u8],
width: usize,
height: usize,
y_stride: usize,
) -> f32 {
compute_edge_density_scalar(y_src, width, height, y_stride)
}
fn compute_edge_density_scalar(y_src: &[u8], width: usize, height: usize, y_stride: usize) -> f32 {
if width < 2 || height < 16 {
return 0.0;
}
let mut edge_count = 0u32;
let mut sample_count = 0u32;
let threshold = 32u8;
let mut y = 0;
while y < height {
let row = &y_src[y * y_stride..][..width];
for x in 1..width {
let diff = row[x].abs_diff(row[x - 1]);
if diff > threshold {
edge_count += 1;
}
sample_count += 1;
}
y += 16;
}
if sample_count == 0 {
return 0.0;
}
edge_count as f32 / sample_count as f32
}
#[cfg(target_arch = "x86_64")]
#[arcane]
fn compute_edge_density_sse2(
_token: X64V3Token,
y_src: &[u8],
width: usize,
height: usize,
y_stride: usize,
) -> f32 {
if width < 2 || height < 16 {
return 0.0;
}
let mut edge_count = 0u32;
let mut sample_count = 0u32;
let threshold_vec = _mm_set1_epi8(32i8);
let mut y = 0;
while y < height {
let row = &y_src[y * y_stride..];
let mut x = 1usize;
while x + 15 < width {
let curr_arr = <&[u8; 16]>::try_from(&row[x..x + 16]).unwrap();
let prev_arr = <&[u8; 16]>::try_from(&row[x - 1..x + 15]).unwrap();
let curr = simd_mem::_mm_loadu_si128(curr_arr);
let prev = simd_mem::_mm_loadu_si128(prev_arr);
let diff1 = _mm_subs_epu8(curr, prev);
let diff2 = _mm_subs_epu8(prev, curr);
let abs_diff = _mm_or_si128(diff1, diff2);
let above_thresh = _mm_subs_epu8(abs_diff, threshold_vec);
let zero = _mm_setzero_si128();
let mask = _mm_cmpeq_epi8(above_thresh, zero);
let edges = _mm_andnot_si128(mask, _mm_set1_epi8(-1i8));
let mask_bits = _mm_movemask_epi8(edges) as u32;
edge_count += mask_bits.count_ones();
sample_count += 16;
x += 16;
}
while x < width {
let diff = row[x].abs_diff(row[x - 1]);
if diff > 32 {
edge_count += 1;
}
sample_count += 1;
x += 1;
}
y += 16;
}
if sample_count == 0 {
return 0.0;
}
edge_count as f32 / sample_count as f32
}
#[cfg(target_arch = "aarch64")]
#[arcane]
fn compute_edge_density_neon(
_token: NeonToken,
y_src: &[u8],
width: usize,
height: usize,
y_stride: usize,
) -> f32 {
if width < 2 || height < 16 {
return 0.0;
}
let mut edge_count = 0u32;
let mut sample_count = 0u32;
let threshold_vec = vdupq_n_u8(32);
let mut y = 0;
while y < height {
let row = &y_src[y * y_stride..];
let mut x = 1usize;
while x + 15 < width {
let curr = simd_mem::vld1q_u8(<&[u8; 16]>::try_from(&row[x..x + 16]).unwrap());
let prev = simd_mem::vld1q_u8(<&[u8; 16]>::try_from(&row[x - 1..x + 15]).unwrap());
let abs_diff = vabdq_u8(curr, prev);
let above_thresh = vcgtq_u8(abs_diff, threshold_vec);
let ones = vandq_u8(above_thresh, vdupq_n_u8(1));
edge_count += vaddlvq_u8(ones) as u32;
sample_count += 16;
x += 16;
}
while x < width {
let diff = row[x].abs_diff(row[x - 1]);
if diff > 32 {
edge_count += 1;
}
sample_count += 1;
x += 1;
}
y += 16;
}
if sample_count == 0 {
return 0.0;
}
edge_count as f32 / sample_count as f32
}
fn compute_color_uniformity(y_src: &[u8], width: usize, height: usize, y_stride: usize) -> f32 {
let mb_w = width / 16;
let mb_h = height / 16;
if mb_w == 0 || mb_h == 0 {
return 0.0;
}
let mut uniform_count = 0u32;
let mut total_blocks = 0u32;
let mut mby = 0;
while mby < mb_h {
let mut mbx = 0;
while mbx < mb_w {
let mut seen = [false; 256];
let mut distinct = 0u32;
for dy in 0..16 {
let row_y = mby * 16 + dy;
if row_y >= height {
break;
}
let row = &y_src[row_y * y_stride..];
for dx in 0..16 {
let col_x = mbx * 16 + dx;
if col_x >= width {
break;
}
let val = row[col_x] as usize;
if !seen[val] {
seen[val] = true;
distinct += 1;
}
}
}
if distinct <= 32 {
uniform_count += 1;
}
total_blocks += 1;
mbx += 4;
}
mby += 4;
}
if total_blocks == 0 {
return 0.0;
}
uniform_count as f32 / total_blocks as f32
}
pub fn content_type_to_tuning(content_type: ImageContentType) -> (u8, u8, u8, u8) {
match content_type {
ImageContentType::Photo => (80, 30, 3, 4), ImageContentType::Drawing => (50, 60, 0, 4), ImageContentType::Text => (50, 60, 0, 4), ImageContentType::Icon => (0, 0, 0, 4), }
}