use crate::ocr::error::OcrResult;
use crate::ocr::layout::{LayoutAnalyzer, TextRegion};
use image::{GrayImage, Luma};
#[derive(Clone, Copy, Debug)]
pub enum Polarity {
DarkOnLight,
LightOnDark,
Both,
}
#[derive(Clone, Debug)]
pub struct SwtConfig {
pub canny_low: f32,
pub canny_high: f32,
pub max_stroke_width: u32,
pub max_cv: f32,
pub aspect_min: f32,
pub aspect_max: f32,
pub height_min: u32,
pub height_max: u32,
pub min_area: u32,
pub angle_tolerance: f32,
pub polarity: Polarity,
}
impl Default for SwtConfig {
fn default() -> Self {
Self {
canny_low: 50.0,
canny_high: 150.0,
max_stroke_width: 40,
max_cv: 0.5,
aspect_min: 0.1,
aspect_max: 10.0,
height_min: 8,
height_max: 400,
min_area: 20,
angle_tolerance: std::f32::consts::FRAC_PI_6, polarity: Polarity::Both,
}
}
}
pub struct SwtLayoutAnalyzer {
pub cfg: SwtConfig,
}
impl SwtLayoutAnalyzer {
pub fn new() -> Self {
Self {
cfg: SwtConfig::default(),
}
}
pub fn with_config(cfg: SwtConfig) -> Self {
Self { cfg }
}
}
impl Default for SwtLayoutAnalyzer {
fn default() -> Self {
Self::new()
}
}
impl LayoutAnalyzer for SwtLayoutAnalyzer {
fn detect_regions(&self, img: &GrayImage) -> OcrResult<Vec<TextRegion>> {
let mut regions = Vec::new();
match self.cfg.polarity {
Polarity::DarkOnLight => regions.extend(run_pass(img, &self.cfg, false)),
Polarity::LightOnDark => regions.extend(run_pass(img, &self.cfg, true)),
Polarity::Both => {
regions.extend(run_pass(img, &self.cfg, false));
regions.extend(run_pass(img, &self.cfg, true));
}
}
dedupe(&mut regions);
Ok(regions)
}
}
fn run_pass(img: &GrayImage, cfg: &SwtConfig, invert: bool) -> Vec<TextRegion> {
let (w, h) = img.dimensions();
let working = if invert { invert_image(img) } else { img.clone() };
let gx = imageproc::gradients::horizontal_sobel(&working);
let gy = imageproc::gradients::vertical_sobel(&working);
let edges = imageproc::edges::canny(&working, cfg.canny_low, cfg.canny_high);
let mut swt: Vec<f32> = vec![f32::INFINITY; (w * h) as usize];
for y in 0..h {
for x in 0..w {
if edges.get_pixel(x, y)[0] == 0 {
continue;
}
let dx = gx.get_pixel(x, y)[0] as f32;
let dy = gy.get_pixel(x, y)[0] as f32;
let mag = (dx * dx + dy * dy).sqrt();
if mag < 1.0 {
continue;
}
let ux = dx / mag;
let uy = dy / mag;
if let Some(stroke_len) =
cast_ray(&edges, &gx, &gy, x as f32, y as f32, -ux, -uy, cfg)
{
let steps = stroke_len.ceil() as u32;
for s in 0..=steps {
let t = s as f32;
let px = (x as f32 + -ux * t) as i32;
let py = (y as f32 + -uy * t) as i32;
if px < 0 || py < 0 || px >= w as i32 || py >= h as i32 {
break;
}
let idx = (py as u32 * w + px as u32) as usize;
if stroke_len < swt[idx] {
swt[idx] = stroke_len;
}
}
}
}
}
let mut mask = GrayImage::from_pixel(w, h, Luma([0]));
for y in 0..h {
for x in 0..w {
let idx = (y * w + x) as usize;
if swt[idx].is_finite() && swt[idx] <= cfg.max_stroke_width as f32 {
mask.put_pixel(x, y, Luma([255]));
}
}
}
let labelled = imageproc::region_labelling::connected_components(
&mask,
imageproc::region_labelling::Connectivity::Eight,
Luma([0u8]),
);
#[derive(Default)]
struct Stats {
min_x: u32,
min_y: u32,
max_x: u32,
max_y: u32,
count: u32,
sum: f32,
sum_sq: f32,
initialized: bool,
}
let mut stats: std::collections::HashMap<u32, Stats> = std::collections::HashMap::new();
for y in 0..h {
for x in 0..w {
let label = labelled.get_pixel(x, y)[0];
if label == 0 {
continue;
}
let sw = swt[(y * w + x) as usize];
if !sw.is_finite() {
continue;
}
let s = stats.entry(label).or_default();
if !s.initialized {
s.min_x = x;
s.min_y = y;
s.max_x = x;
s.max_y = y;
s.initialized = true;
}
s.min_x = s.min_x.min(x);
s.min_y = s.min_y.min(y);
s.max_x = s.max_x.max(x);
s.max_y = s.max_y.max(y);
s.count += 1;
s.sum += sw;
s.sum_sq += sw * sw;
}
}
let mut out = Vec::new();
for s in stats.values() {
if s.count < cfg.min_area {
continue;
}
let bw = s.max_x - s.min_x + 1;
let bh = s.max_y - s.min_y + 1;
if bh < cfg.height_min || bh > cfg.height_max {
continue;
}
let aspect = bw as f32 / bh.max(1) as f32;
if aspect < cfg.aspect_min || aspect > cfg.aspect_max {
continue;
}
let mean = s.sum / s.count as f32;
let var = (s.sum_sq / s.count as f32) - mean * mean;
let std = var.max(0.0).sqrt();
let cv = if mean > 0.0 { std / mean } else { f32::INFINITY };
if cv > cfg.max_cv {
continue;
}
out.push(TextRegion {
x: s.min_x,
y: s.min_y,
width: bw,
height: bh,
});
}
out
}
fn cast_ray(
edges: &GrayImage,
gx: &image::ImageBuffer<Luma<i16>, Vec<i16>>,
gy: &image::ImageBuffer<Luma<i16>, Vec<i16>>,
start_x: f32,
start_y: f32,
ux: f32,
uy: f32,
cfg: &SwtConfig,
) -> Option<f32> {
let (w, h) = edges.dimensions();
let mut t = 1.0f32;
while t <= cfg.max_stroke_width as f32 {
let px = (start_x + ux * t).round() as i32;
let py = (start_y + uy * t).round() as i32;
if px < 0 || py < 0 || px >= w as i32 || py >= h as i32 {
return None;
}
let xp = px as u32;
let yp = py as u32;
if xp as f32 == start_x && yp as f32 == start_y {
t += 1.0;
continue;
}
if edges.get_pixel(xp, yp)[0] != 0 {
let dx = gx.get_pixel(xp, yp)[0] as f32;
let dy = gy.get_pixel(xp, yp)[0] as f32;
let mag = (dx * dx + dy * dy).sqrt();
if mag < 1.0 {
t += 1.0;
continue;
}
let nx = dx / mag;
let ny = dy / mag;
let dot = nx * ux + ny * uy;
if dot.abs() >= cfg.angle_tolerance.cos() {
return Some(t);
}
}
t += 1.0;
}
None
}
fn invert_image(img: &GrayImage) -> GrayImage {
let (w, h) = img.dimensions();
let mut out = GrayImage::new(w, h);
for (x, y, px) in img.enumerate_pixels() {
out.put_pixel(x, y, Luma([255 - px[0]]));
}
out
}
fn dedupe(regions: &mut Vec<TextRegion>) {
regions.sort_by_key(|r| (r.y, r.x, r.width, r.height));
regions.dedup();
}