use crate::ocr::error::{OcrError, OcrResult};
use crate::ocr::features::extract;
use crate::ocr::recognize::Prototype;
use ab_glyph::{Font, FontRef, PxScale, ScaleFont};
use image::{GrayImage, Luma};
pub fn rasterize_glyph(font: &FontRef<'_>, ch: char, px_size: f32) -> Option<GrayImage> {
let scaled = font.as_scaled(PxScale::from(px_size));
let glyph = scaled.scaled_glyph(ch);
let outlined = scaled.outline_glyph(glyph)?;
let bounds = outlined.px_bounds();
let w = bounds.width().ceil() as u32;
let h = bounds.height().ceil() as u32;
if w == 0 || h == 0 {
return None;
}
let mut img = GrayImage::from_pixel(w, h, Luma([255]));
outlined.draw(|x, y, coverage| {
if x < w && y < h {
let ink = (coverage.clamp(0.0, 1.0) * 255.0) as u8;
let current = img.get_pixel(x, y)[0];
let new_val = current.saturating_sub(ink);
img.put_pixel(x, y, Luma([new_val]));
}
});
let t = imageproc::contrast::otsu_level(&img);
let bin = imageproc::contrast::threshold(&img, t);
Some(bin)
}
pub fn train_from_ttf_bytes(
font_bytes: &[u8],
chars: &str,
px_size: f32,
) -> OcrResult<Vec<Prototype>> {
let font = FontRef::try_from_slice(font_bytes)
.map_err(|e| OcrError::Config(format!("load font: {e}")))?;
let mut out = Vec::with_capacity(chars.chars().count());
for ch in chars.chars() {
if let Some(img) = rasterize_glyph(&font, ch, px_size) {
out.push(Prototype {
label: ch,
features: extract(&img),
});
}
}
if out.is_empty() {
return Err(OcrError::Config(format!(
"font produced no usable outlines for the supplied character set ({} chars)",
chars.chars().count()
)));
}
Ok(out)
}
pub fn train_from_ttf_path(
font_path: impl AsRef<std::path::Path>,
chars: &str,
px_size: f32,
) -> OcrResult<Vec<Prototype>> {
let bytes = std::fs::read(font_path.as_ref()).map_err(OcrError::Io)?;
train_from_ttf_bytes(&bytes, chars, px_size)
}
pub fn train_multiscale(
font_bytes: &[u8],
chars: &str,
px_sizes: &[f32],
) -> OcrResult<Vec<Prototype>> {
if px_sizes.is_empty() {
return Err(OcrError::Config("at least one px size required".into()));
}
let mut out = Vec::new();
for &px in px_sizes {
let mut set = train_from_ttf_bytes(font_bytes, chars, px)?;
out.append(&mut set);
}
Ok(out)
}
pub fn train_multiscale_from_path(
font_path: impl AsRef<std::path::Path>,
chars: &str,
px_sizes: &[f32],
) -> OcrResult<Vec<Prototype>> {
let bytes = std::fs::read(font_path.as_ref()).map_err(OcrError::Io)?;
train_multiscale(&bytes, chars, px_sizes)
}
pub fn train_multifont_multiscale(
fonts: &[(&str, &[u8])],
chars: &str,
px_sizes: &[f32],
) -> OcrResult<Vec<Prototype>> {
if fonts.is_empty() {
return Err(OcrError::Config("at least one font required".into()));
}
let mut out = Vec::new();
for (name, bytes) in fonts {
for &px in px_sizes {
match train_from_ttf_bytes(bytes, chars, px) {
Ok(mut set) => out.append(&mut set),
Err(e) => eprintln!(
"omniparse: skipping font '{}' @ {}px: {}",
name, px, e
),
}
}
}
if out.is_empty() {
return Err(OcrError::Config(
"no usable font/size combinations produced prototypes".into(),
));
}
Ok(out)
}
pub fn train_multifont_multiscale_from_paths(
font_paths: &[&str],
chars: &str,
px_sizes: &[f32],
) -> OcrResult<Vec<Prototype>> {
let mut buffers: Vec<(String, Vec<u8>)> = Vec::new();
for p in font_paths {
let bytes = std::fs::read(p).map_err(OcrError::Io)?;
buffers.push((p.to_string(), bytes));
}
let refs: Vec<(&str, &[u8])> = buffers
.iter()
.map(|(n, b)| (n.as_str(), b.as_slice()))
.collect();
train_multifont_multiscale(&refs, chars, px_sizes)
}
pub const DEFAULT_CHAR_SET: &str =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,:;!?'\"()[]{}-_/\\@#&*+=<>%$";