#[derive(Clone, Debug)]
pub struct BitmapImage {
pub pixels: Vec<u8>,
pub width: usize,
pub height: usize,
}
impl BitmapImage {
#[must_use]
pub fn new(width: usize, height: usize) -> Self {
Self {
pixels: vec![0u8; width * height],
width,
height,
}
}
#[must_use]
pub fn pixel(&self, col: usize, row: usize) -> u8 {
if col < self.width && row < self.height {
self.pixels[row * self.width + col]
} else {
0
}
}
pub fn set_pixel(&mut self, col: usize, row: usize, value: u8) {
if col < self.width && row < self.height {
self.pixels[row * self.width + col] = value;
}
}
#[must_use]
pub fn binarise(&self, threshold: u8) -> Self {
let pixels = self
.pixels
.iter()
.map(|&p| if p >= threshold { 255 } else { 0 })
.collect();
Self {
pixels,
width: self.width,
height: self.height,
}
}
#[must_use]
pub fn resize(&self, new_width: usize, new_height: usize) -> Self {
if new_width == 0 || new_height == 0 {
return Self::new(new_width, new_height);
}
let mut out = Self::new(new_width, new_height);
for row in 0..new_height {
let src_row = (row * self.height) / new_height;
for col in 0..new_width {
let src_col = (col * self.width) / new_width;
out.set_pixel(col, row, self.pixel(src_col, src_row));
}
}
out
}
#[must_use]
pub fn crop(&self, x: usize, y: usize, w: usize, h: usize) -> Self {
let mut out = Self::new(w, h);
for row in 0..h {
for col in 0..w {
out.set_pixel(col, row, self.pixel(x + col, y + row));
}
}
out
}
}
#[derive(Clone, Debug)]
pub struct GlyphTemplate {
pub character: char,
pub pixels: Vec<f32>,
pub width: usize,
pub height: usize,
}
impl GlyphTemplate {
#[must_use]
pub fn new(character: char, binary: &[u8], width: usize, height: usize) -> Self {
let pixels = binary
.iter()
.map(|&b| if b != 0 { 1.0_f32 } else { 0.0_f32 })
.collect();
Self {
character,
pixels,
width,
height,
}
}
#[must_use]
pub fn ncc_score(&self, patch: &[f32]) -> f32 {
if patch.len() != self.pixels.len() || self.pixels.is_empty() {
return 0.0;
}
let n = self.pixels.len() as f32;
let mean_t = self.pixels.iter().sum::<f32>() / n;
let mean_p = patch.iter().sum::<f32>() / n;
let mut num = 0.0_f32;
let mut den_t = 0.0_f32;
let mut den_p = 0.0_f32;
for (&t, &p) in self.pixels.iter().zip(patch.iter()) {
let dt = t - mean_t;
let dp = p - mean_p;
num += dt * dp;
den_t += dt * dt;
den_p += dp * dp;
}
let denom = (den_t * den_p).sqrt();
if denom < 1e-6 {
0.0
} else {
num / denom
}
}
}
#[derive(Clone, Debug, Default)]
pub struct GlyphAtlas {
templates: Vec<GlyphTemplate>,
}
impl GlyphAtlas {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn add(&mut self, template: GlyphTemplate) {
self.templates.push(template);
}
pub fn add_ascii_template(
&mut self,
ch: char,
binary: &[u8],
width: usize,
height: usize,
) {
self.add(GlyphTemplate::new(ch, binary, width, height));
}
#[must_use]
pub fn len(&self) -> usize {
self.templates.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.templates.is_empty()
}
#[must_use]
pub fn best_match(&self, patch: &BitmapImage, min_score: f32) -> Option<char> {
if self.templates.is_empty() {
return None;
}
let mut best_char = None;
let mut best_score = min_score;
for tmpl in &self.templates {
let resized = patch.resize(tmpl.width, tmpl.height);
let patch_f32: Vec<f32> = resized.pixels.iter().map(|&p| p as f32 / 255.0).collect();
let score = tmpl.ncc_score(&patch_f32);
if score > best_score {
best_score = score;
best_char = Some(tmpl.character);
}
}
best_char
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Bbox {
pub x: usize,
pub y: usize,
pub w: usize,
pub h: usize,
}
impl Bbox {
#[must_use]
pub fn cx(&self) -> usize {
self.x + self.w / 2
}
}
#[must_use]
pub fn find_blobs(binary: &BitmapImage, min_area: usize, max_area: usize) -> Vec<Bbox> {
if binary.width == 0 || binary.height == 0 {
return Vec::new();
}
let w = binary.width;
let h = binary.height;
let n = w * h;
let mut labels = vec![0usize; n];
let mut parent = vec![0usize; n + 1];
for i in 0..=n {
parent[i] = i;
}
let mut next_label = 1usize;
for row in 0..h {
for col in 0..w {
if binary.pixel(col, row) == 0 {
continue;
}
let idx = row * w + col;
let mut neighbours = Vec::with_capacity(4);
if col > 0 && binary.pixel(col - 1, row) != 0 {
neighbours.push(labels[idx - 1]);
}
if row > 0 && binary.pixel(col, row - 1) != 0 {
neighbours.push(labels[idx - w]);
}
if neighbours.is_empty() {
labels[idx] = next_label;
next_label += 1;
} else {
let min_label = *neighbours.iter().min().unwrap_or(&next_label);
labels[idx] = min_label;
for &nb in &neighbours {
union(&mut parent, min_label, nb);
}
}
}
}
for i in 0..n {
if labels[i] != 0 {
labels[i] = find(&mut parent, labels[i]);
}
}
use std::collections::HashMap;
let mut boxes: HashMap<usize, (usize, usize, usize, usize)> = HashMap::new(); for row in 0..h {
for col in 0..w {
let lbl = labels[row * w + col];
if lbl == 0 {
continue;
}
let e = boxes.entry(lbl).or_insert((col, row, col, row));
if col < e.0 {
e.0 = col;
}
if row < e.1 {
e.1 = row;
}
if col > e.2 {
e.2 = col;
}
if row > e.3 {
e.3 = row;
}
}
}
let mut result: Vec<Bbox> = boxes
.values()
.filter_map(|&(x0, y0, x1, y1)| {
let bw = x1 - x0 + 1;
let bh = y1 - y0 + 1;
let area = bw * bh;
if area >= min_area && area <= max_area {
Some(Bbox {
x: x0,
y: y0,
w: bw,
h: bh,
})
} else {
None
}
})
.collect();
result.sort_by_key(|b| b.x);
result
}
fn find(parent: &mut [usize], x: usize) -> usize {
if parent[x] != x {
parent[x] = find(parent, parent[x]);
}
parent[x]
}
fn union(parent: &mut [usize], a: usize, b: usize) {
let ra = find(parent, a);
let rb = find(parent, b);
if ra != rb {
parent[rb] = ra;
}
}
fn detect_subtitle_row_band(binary: &BitmapImage) -> (usize, usize) {
let h = binary.height;
let w = binary.width;
let start_row = h * 3 / 4;
let mut best_row = start_row;
let mut best_count = 0usize;
for row in start_row..h {
let count = (0..w)
.filter(|&c| binary.pixel(c, row) != 0)
.count();
if count > best_count {
best_count = count;
best_row = row;
}
}
let threshold = best_count / 4;
let mut top = best_row;
let mut bottom = best_row;
while top > 0 {
let count = (0..w)
.filter(|&c| binary.pixel(c, top - 1) != 0)
.count();
if count >= threshold {
top -= 1;
} else {
break;
}
}
while bottom + 1 < h {
let count = (0..w)
.filter(|&c| binary.pixel(c, bottom + 1) != 0)
.count();
if count >= threshold {
bottom += 1;
} else {
break;
}
}
(top, bottom)
}
pub struct BitmapOcr {
atlas: GlyphAtlas,
pub binarise_threshold: u8,
pub min_glyph_area: usize,
pub max_glyph_area: usize,
pub word_gap_px: usize,
}
impl BitmapOcr {
#[must_use]
pub fn new(atlas: GlyphAtlas) -> Self {
Self {
atlas,
binarise_threshold: 128,
min_glyph_area: 4,
max_glyph_area: 10_000,
word_gap_px: 8,
}
}
#[must_use]
pub fn extract_text(&self, image: &BitmapImage, min_score: f32) -> String {
let binary = image.binarise(self.binarise_threshold);
let blobs = find_blobs(&binary, self.min_glyph_area, self.max_glyph_area);
if blobs.is_empty() {
return String::new();
}
let mut result = String::new();
let mut prev_right: Option<usize> = None;
for blob in &blobs {
if let Some(right) = prev_right {
let gap = blob.x.saturating_sub(right);
if gap >= self.word_gap_px {
result.push(' ');
}
}
let patch = binary.crop(blob.x, blob.y, blob.w, blob.h);
if let Some(ch) = self.atlas.best_match(&patch, min_score) {
result.push(ch);
} else {
result.push('?');
}
prev_right = Some(blob.x + blob.w);
}
result
}
#[must_use]
pub fn extract_subtitle_region(&self, image: &BitmapImage, min_score: f32) -> String {
let binary = image.binarise(self.binarise_threshold);
let (top, bottom) = detect_subtitle_row_band(&binary);
let h = bottom.saturating_sub(top) + 1;
if h == 0 {
return String::new();
}
let cropped = image.crop(0, top, image.width, h);
self.extract_text(&cropped, min_score)
}
}
#[cfg(test)]
mod tests {
use super::*;
const GLYPH_I: &[u8] = &[
1, 1, 1, 1, 1,
0, 0, 1, 0, 0,
0, 0, 1, 0, 0,
0, 0, 1, 0, 0,
0, 0, 1, 0, 0,
0, 0, 1, 0, 0,
1, 1, 1, 1, 1,
];
const GLYPH_H: &[u8] = &[
1, 0, 0, 0, 1,
1, 0, 0, 0, 1,
1, 0, 0, 0, 1,
1, 1, 1, 1, 1,
1, 0, 0, 0, 1,
1, 0, 0, 0, 1,
1, 0, 0, 0, 1,
];
fn make_atlas() -> GlyphAtlas {
let mut atlas = GlyphAtlas::new();
atlas.add_ascii_template('I', GLYPH_I, 5, 7);
atlas.add_ascii_template('H', GLYPH_H, 5, 7);
atlas
}
fn render_glyph_at(pixels: &mut [u8], img_w: usize, glyph: &[u8], gx: usize, gy: usize, gw: usize, gh: usize) {
for row in 0..gh {
for col in 0..gw {
pixels[(gy + row) * img_w + (gx + col)] = glyph[row * gw + col] * 255;
}
}
}
#[test]
fn test_bitmap_image_pixel() {
let mut img = BitmapImage::new(4, 4);
img.set_pixel(2, 1, 200);
assert_eq!(img.pixel(2, 1), 200);
assert_eq!(img.pixel(3, 3), 0);
assert_eq!(img.pixel(10, 10), 0); }
#[test]
fn test_binarise() {
let img = BitmapImage {
pixels: vec![50, 100, 150, 200],
width: 4,
height: 1,
};
let bin = img.binarise(128);
assert_eq!(bin.pixels, vec![0, 0, 255, 255]);
}
#[test]
fn test_crop() {
let mut img = BitmapImage::new(6, 6);
img.set_pixel(2, 2, 255);
let crop = img.crop(1, 1, 3, 3);
assert_eq!(crop.pixel(1, 1), 255);
assert_eq!(crop.pixel(0, 0), 0);
}
#[test]
fn test_resize_preserves_nonzero() {
let mut img = BitmapImage::new(10, 10);
for i in 0..10 {
img.set_pixel(i, i, 255);
}
let resized = img.resize(5, 5);
assert!(resized.pixels.iter().any(|&p| p > 0));
}
#[test]
fn test_glyph_template_ncc_self() {
let tmpl = GlyphTemplate::new('A', &[0, 1, 0, 1, 1, 1, 0, 1, 0], 3, 3);
let patch: Vec<f32> = vec![0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0];
let score = tmpl.ncc_score(&patch);
assert!(score > 0.99, "self-NCC should be ~1.0, got {score}");
}
#[test]
fn test_glyph_template_ncc_opposite() {
let tmpl = GlyphTemplate::new('A', &[1, 0, 1, 0, 0, 0, 1, 0, 1], 3, 3);
let patch: Vec<f32> = vec![0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0];
let score = tmpl.ncc_score(&patch);
assert!(score < 0.0, "NCC of opposite should be negative, got {score}");
}
#[test]
fn test_glyph_atlas_best_match_i() {
let atlas = make_atlas();
let mut img = BitmapImage::new(5, 7);
for row in 0..7 {
for col in 0..5 {
img.set_pixel(col, row, GLYPH_I[row * 5 + col] * 255);
}
}
let ch = atlas.best_match(&img, 0.5);
assert_eq!(ch, Some('I'), "should recognise 'I'");
}
#[test]
fn test_glyph_atlas_best_match_h() {
let atlas = make_atlas();
let mut img = BitmapImage::new(5, 7);
for row in 0..7 {
for col in 0..5 {
img.set_pixel(col, row, GLYPH_H[row * 5 + col] * 255);
}
}
let ch = atlas.best_match(&img, 0.5);
assert_eq!(ch, Some('H'), "should recognise 'H'");
}
#[test]
fn test_find_blobs_single() {
let mut img = BitmapImage::new(10, 10);
for row in 3..6 {
for col in 3..6 {
img.set_pixel(col, row, 255);
}
}
let blobs = find_blobs(&img, 1, 1000);
assert_eq!(blobs.len(), 1);
assert_eq!(blobs[0].x, 3);
assert_eq!(blobs[0].y, 3);
assert_eq!(blobs[0].w, 3);
assert_eq!(blobs[0].h, 3);
}
#[test]
fn test_find_blobs_two_separate() {
let mut img = BitmapImage::new(20, 10);
img.set_pixel(1, 1, 255);
img.set_pixel(2, 1, 255);
img.set_pixel(15, 1, 255);
img.set_pixel(16, 1, 255);
let blobs = find_blobs(&img, 1, 1000);
assert_eq!(blobs.len(), 2, "should find two blobs");
assert!(blobs[0].x < blobs[1].x);
}
#[test]
fn test_extract_text_single_char() {
let atlas = make_atlas();
let ocr = BitmapOcr::new(atlas);
let img_w = 5;
let img_h = 7;
let mut pixels = vec![0u8; img_w * img_h];
render_glyph_at(&mut pixels, img_w, GLYPH_I, 0, 0, 5, 7);
let img = BitmapImage { pixels, width: img_w, height: img_h };
let text = ocr.extract_text(&img, 0.5);
assert!(text.contains('I'), "expected 'I', got: {text:?}");
}
#[test]
fn test_extract_text_two_chars() {
let atlas = make_atlas();
let ocr = BitmapOcr::new(atlas);
let img_w = 12; let img_h = 7;
let mut pixels = vec![0u8; img_w * img_h];
render_glyph_at(&mut pixels, img_w, GLYPH_H, 0, 0, 5, 7);
render_glyph_at(&mut pixels, img_w, GLYPH_I, 7, 0, 5, 7);
let img = BitmapImage { pixels, width: img_w, height: img_h };
let text = ocr.extract_text(&img, 0.5);
assert!(text.contains('H'), "expected 'H', got: {text:?}");
assert!(text.contains('I'), "expected 'I', got: {text:?}");
}
#[test]
fn test_empty_image_returns_empty() {
let atlas = make_atlas();
let ocr = BitmapOcr::new(atlas);
let img = BitmapImage::new(100, 50);
let text = ocr.extract_text(&img, 0.5);
assert!(text.is_empty(), "empty image should give empty text");
}
#[test]
fn test_atlas_empty_returns_none() {
let atlas = GlyphAtlas::new();
let img = BitmapImage::new(5, 7);
assert_eq!(atlas.best_match(&img, 0.0), None);
}
#[test]
fn test_find_blobs_area_filter() {
let mut img = BitmapImage::new(20, 10);
img.set_pixel(5, 5, 255);
for col in 10..15 {
img.set_pixel(col, 5, 255);
}
let blobs = find_blobs(&img, 2, 1000);
assert_eq!(blobs.len(), 1, "tiny blob should be filtered");
assert_eq!(blobs[0].x, 10);
}
}