use std::ffi::{CStr, CString, c_char, c_int};
use std::sync::LazyLock;
use crate::ffi::{Handle, HandleStore};
use crate::fitz::geometry::Rect;
use crate::fitz::pixmap::Pixmap;
static OCR_ENGINES: LazyLock<HandleStore<OcrEngine>> = LazyLock::new(HandleStore::new);
static OCR_RESULTS: LazyLock<HandleStore<OcrResult>> = LazyLock::new(HandleStore::new);
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OcrEngineType {
None = 0,
Tesseract = 1,
WindowsOcr = 2,
AppleVision = 3,
GoogleVision = 4,
AmazonTextract = 5,
AzureVision = 6,
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OcrPageSegMode {
OsdOnly = 0,
AutoOsd = 1,
AutoOnly = 2,
Auto = 3,
SingleColumn = 4,
SingleBlockVertText = 5,
SingleBlock = 6,
SingleLine = 7,
SingleWord = 8,
CircleWord = 9,
SingleChar = 10,
SparseText = 11,
SparseTextOsd = 12,
RawLine = 13,
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OcrEngineMode {
TesseractOnly = 0,
LstmOnly = 1,
TesseractLstmCombined = 2,
Default = 3,
}
#[derive(Debug, Clone)]
pub struct OcrConfig {
pub engine_type: OcrEngineType,
pub language: String,
pub psm: OcrPageSegMode,
pub oem: OcrEngineMode,
pub dpi: u32,
pub preprocess: bool,
pub min_confidence: i32,
}
impl Default for OcrConfig {
fn default() -> Self {
Self {
engine_type: OcrEngineType::None,
language: "eng".to_string(),
psm: OcrPageSegMode::Auto,
oem: OcrEngineMode::Default,
dpi: 300,
preprocess: true,
min_confidence: 60,
}
}
}
#[derive(Debug, Clone)]
pub struct OcrWord {
pub text: String,
pub bounds: Rect,
pub confidence: i32,
pub font: Option<String>,
pub font_size: Option<f32>,
pub bold: bool,
pub italic: bool,
}
impl OcrWord {
pub fn new(text: impl Into<String>, bounds: Rect, confidence: i32) -> Self {
Self {
text: text.into(),
bounds,
confidence,
font: None,
font_size: None,
bold: false,
italic: false,
}
}
}
#[derive(Debug, Clone)]
pub struct OcrLine {
pub text: String,
pub bounds: Rect,
pub words: Vec<OcrWord>,
pub confidence: i32,
}
impl OcrLine {
pub fn new(bounds: Rect) -> Self {
Self {
text: String::new(),
bounds,
words: Vec::new(),
confidence: 0,
}
}
pub fn add_word(&mut self, word: OcrWord) {
if !self.text.is_empty() {
self.text.push(' ');
}
self.text.push_str(&word.text);
self.words.push(word);
self.update_confidence();
}
fn update_confidence(&mut self) {
if self.words.is_empty() {
self.confidence = 0;
} else {
let sum: i32 = self.words.iter().map(|w| w.confidence).sum();
self.confidence = sum / self.words.len() as i32;
}
}
}
#[derive(Debug, Clone)]
pub struct OcrBlock {
pub bounds: Rect,
pub lines: Vec<OcrLine>,
pub block_type: i32,
}
impl OcrBlock {
pub fn new(bounds: Rect) -> Self {
Self {
bounds,
lines: Vec::new(),
block_type: 0, }
}
pub fn add_line(&mut self, line: OcrLine) {
self.lines.push(line);
}
pub fn text(&self) -> String {
self.lines
.iter()
.map(|l| l.text.as_str())
.collect::<Vec<_>>()
.join("\n")
}
}
#[derive(Debug, Clone)]
pub struct OcrResult {
pub blocks: Vec<OcrBlock>,
pub confidence: i32,
pub processing_time_ms: u64,
pub width: u32,
pub height: u32,
pub error: Option<String>,
}
impl OcrResult {
pub fn new(width: u32, height: u32) -> Self {
Self {
blocks: Vec::new(),
confidence: 0,
processing_time_ms: 0,
width,
height,
error: None,
}
}
pub fn error(msg: impl Into<String>) -> Self {
let mut result = Self::new(0, 0);
result.error = Some(msg.into());
result
}
pub fn text(&self) -> String {
self.blocks
.iter()
.map(|b| b.text())
.collect::<Vec<_>>()
.join("\n\n")
}
pub fn word_count(&self) -> usize {
self.blocks
.iter()
.flat_map(|b| &b.lines)
.flat_map(|l| &l.words)
.count()
}
pub fn line_count(&self) -> usize {
self.blocks.iter().map(|b| b.lines.len()).sum()
}
pub fn add_block(&mut self, block: OcrBlock) {
self.blocks.push(block);
self.update_confidence();
}
fn update_confidence(&mut self) {
let total_words: usize = self.word_count();
if total_words == 0 {
self.confidence = 0;
return;
}
let sum: i64 = self
.blocks
.iter()
.flat_map(|b| &b.lines)
.flat_map(|l| &l.words)
.map(|w| w.confidence as i64)
.sum();
self.confidence = (sum / total_words as i64) as i32;
}
}
pub struct OcrEngine {
config: OcrConfig,
initialized: bool,
}
impl OcrEngine {
pub fn new(config: OcrConfig) -> Self {
Self {
config,
initialized: false,
}
}
pub fn init(&mut self) -> Result<(), String> {
match self.config.engine_type {
OcrEngineType::None => {
self.initialized = true;
Ok(())
}
OcrEngineType::Tesseract => {
Err("Tesseract OCR not compiled in. Enable the 'tesseract' feature.".to_string())
}
_ => Err(format!(
"OCR engine {:?} not supported on this platform.",
self.config.engine_type
)),
}
}
pub fn is_initialized(&self) -> bool {
self.initialized
}
pub fn recognize(&self, _pixmap: &Pixmap) -> OcrResult {
if !self.initialized {
return OcrResult::error("OCR engine not initialized");
}
match self.config.engine_type {
OcrEngineType::None => {
OcrResult::new(_pixmap.width() as u32, _pixmap.height() as u32)
}
_ => OcrResult::error("OCR recognition not implemented for this engine"),
}
}
pub fn set_language(&mut self, lang: &str) {
self.config.language = lang.to_string();
self.initialized = false; }
pub fn language(&self) -> &str {
&self.config.language
}
pub fn set_psm(&mut self, psm: OcrPageSegMode) {
self.config.psm = psm;
}
pub fn set_oem(&mut self, oem: OcrEngineMode) {
self.config.oem = oem;
self.initialized = false;
}
pub fn available_languages(&self) -> Vec<String> {
vec!["eng".to_string()]
}
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_new_ocr_engine(_ctx: Handle, engine_type: c_int) -> Handle {
let config = OcrConfig {
engine_type: match engine_type {
1 => OcrEngineType::Tesseract,
2 => OcrEngineType::WindowsOcr,
3 => OcrEngineType::AppleVision,
_ => OcrEngineType::None,
},
..Default::default()
};
let engine = OcrEngine::new(config);
OCR_ENGINES.insert(engine)
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_drop_ocr_engine(_ctx: Handle, engine: Handle) {
OCR_ENGINES.remove(engine);
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_ocr_engine_init(_ctx: Handle, engine: Handle) -> c_int {
if let Some(arc) = OCR_ENGINES.get(engine) {
if let Ok(mut e) = arc.lock() {
return if e.init().is_ok() { 1 } else { 0 };
}
}
0
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_ocr_engine_is_initialized(_ctx: Handle, engine: Handle) -> c_int {
if let Some(arc) = OCR_ENGINES.get(engine) {
if let Ok(e) = arc.lock() {
return if e.is_initialized() { 1 } else { 0 };
}
}
0
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_ocr_engine_set_language(
_ctx: Handle,
engine: Handle,
lang: *const c_char,
) -> c_int {
if lang.is_null() {
return 0;
}
let lang_str = unsafe { CStr::from_ptr(lang) };
let lang_str = match lang_str.to_str() {
Ok(s) => s,
Err(_) => return 0,
};
if let Some(arc) = OCR_ENGINES.get(engine) {
if let Ok(mut e) = arc.lock() {
e.set_language(lang_str);
return 1;
}
}
0
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_ocr_engine_get_language(_ctx: Handle, engine: Handle) -> *mut c_char {
if let Some(arc) = OCR_ENGINES.get(engine) {
if let Ok(e) = arc.lock() {
if let Ok(s) = CString::new(e.language()) {
return s.into_raw();
}
}
}
std::ptr::null_mut()
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_ocr_engine_set_psm(_ctx: Handle, engine: Handle, psm: c_int) {
let mode = match psm {
0 => OcrPageSegMode::OsdOnly,
1 => OcrPageSegMode::AutoOsd,
2 => OcrPageSegMode::AutoOnly,
3 => OcrPageSegMode::Auto,
4 => OcrPageSegMode::SingleColumn,
5 => OcrPageSegMode::SingleBlockVertText,
6 => OcrPageSegMode::SingleBlock,
7 => OcrPageSegMode::SingleLine,
8 => OcrPageSegMode::SingleWord,
9 => OcrPageSegMode::CircleWord,
10 => OcrPageSegMode::SingleChar,
11 => OcrPageSegMode::SparseText,
12 => OcrPageSegMode::SparseTextOsd,
13 => OcrPageSegMode::RawLine,
_ => OcrPageSegMode::Auto,
};
if let Some(arc) = OCR_ENGINES.get(engine) {
if let Ok(mut e) = arc.lock() {
e.set_psm(mode);
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_new_ocr_result(_ctx: Handle, width: u32, height: u32) -> Handle {
let result = OcrResult::new(width, height);
OCR_RESULTS.insert(result)
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_drop_ocr_result(_ctx: Handle, result: Handle) {
OCR_RESULTS.remove(result);
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_ocr_result_text(_ctx: Handle, result: Handle) -> *mut c_char {
if let Some(arc) = OCR_RESULTS.get(result) {
if let Ok(r) = arc.lock() {
if let Ok(s) = CString::new(r.text()) {
return s.into_raw();
}
}
}
std::ptr::null_mut()
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_ocr_result_confidence(_ctx: Handle, result: Handle) -> c_int {
if let Some(arc) = OCR_RESULTS.get(result) {
if let Ok(r) = arc.lock() {
return r.confidence;
}
}
0
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_ocr_result_word_count(_ctx: Handle, result: Handle) -> c_int {
if let Some(arc) = OCR_RESULTS.get(result) {
if let Ok(r) = arc.lock() {
return r.word_count() as c_int;
}
}
0
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_ocr_result_line_count(_ctx: Handle, result: Handle) -> c_int {
if let Some(arc) = OCR_RESULTS.get(result) {
if let Ok(r) = arc.lock() {
return r.line_count() as c_int;
}
}
0
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_free_ocr_string(_ctx: Handle, s: *mut c_char) {
if !s.is_null() {
unsafe {
drop(CString::from_raw(s));
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn fz_ocr_is_available(_ctx: Handle, engine_type: c_int) -> c_int {
if engine_type == 0 { 1 } else { 0 }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ocr_config_default() {
let config = OcrConfig::default();
assert_eq!(config.engine_type, OcrEngineType::None);
assert_eq!(config.language, "eng");
assert_eq!(config.dpi, 300);
}
#[test]
fn test_ocr_word() {
let word = OcrWord::new("hello", Rect::new(0.0, 0.0, 50.0, 20.0), 95);
assert_eq!(word.text, "hello");
assert_eq!(word.confidence, 95);
}
#[test]
fn test_ocr_line() {
let mut line = OcrLine::new(Rect::new(0.0, 0.0, 200.0, 20.0));
line.add_word(OcrWord::new("hello", Rect::new(0.0, 0.0, 50.0, 20.0), 90));
line.add_word(OcrWord::new("world", Rect::new(60.0, 0.0, 120.0, 20.0), 80));
assert_eq!(line.text, "hello world");
assert_eq!(line.words.len(), 2);
assert_eq!(line.confidence, 85); }
#[test]
fn test_ocr_block() {
let mut block = OcrBlock::new(Rect::new(0.0, 0.0, 200.0, 100.0));
let mut line = OcrLine::new(Rect::new(0.0, 0.0, 200.0, 20.0));
line.add_word(OcrWord::new("test", Rect::new(0.0, 0.0, 40.0, 20.0), 100));
block.add_line(line);
assert_eq!(block.text(), "test");
}
#[test]
fn test_ocr_result() {
let mut result = OcrResult::new(100, 100);
assert_eq!(result.word_count(), 0);
assert_eq!(result.confidence, 0);
let mut block = OcrBlock::new(Rect::new(0.0, 0.0, 100.0, 50.0));
let mut line = OcrLine::new(Rect::new(0.0, 0.0, 100.0, 20.0));
line.add_word(OcrWord::new("OCR", Rect::new(0.0, 0.0, 30.0, 20.0), 100));
block.add_line(line);
result.add_block(block);
assert_eq!(result.word_count(), 1);
assert_eq!(result.confidence, 100);
assert_eq!(result.text(), "OCR");
}
#[test]
fn test_ocr_engine_noop() {
let config = OcrConfig::default();
let mut engine = OcrEngine::new(config);
assert!(!engine.is_initialized());
assert!(engine.init().is_ok());
assert!(engine.is_initialized());
}
#[test]
fn test_ocr_engine_ffi() {
let handle = fz_new_ocr_engine(0, 0); assert!(handle != 0);
let init_result = fz_ocr_engine_init(0, handle);
assert_eq!(init_result, 1);
let is_init = fz_ocr_engine_is_initialized(0, handle);
assert_eq!(is_init, 1);
fz_drop_ocr_engine(0, handle);
}
#[test]
fn test_ocr_result_ffi() {
let handle = fz_new_ocr_result(0, 100, 100);
assert!(handle != 0);
let word_count = fz_ocr_result_word_count(0, handle);
assert_eq!(word_count, 0);
let confidence = fz_ocr_result_confidence(0, handle);
assert_eq!(confidence, 0);
fz_drop_ocr_result(0, handle);
}
#[test]
fn test_ocr_availability() {
assert_eq!(fz_ocr_is_available(0, 0), 1);
assert_eq!(fz_ocr_is_available(0, 1), 0);
}
#[test]
fn test_ocr_engine_init_invalid() {
assert_eq!(fz_ocr_engine_init(0, 0), 0);
assert_eq!(fz_ocr_engine_init(0, 99999), 0);
}
#[test]
fn test_ocr_engine_is_initialized_invalid() {
assert_eq!(fz_ocr_engine_is_initialized(0, 0), 0);
}
#[test]
fn test_ocr_engine_set_language_null() {
let h = fz_new_ocr_engine(0, 0);
assert_eq!(fz_ocr_engine_set_language(0, h, std::ptr::null()), 0);
fz_drop_ocr_engine(0, h);
}
#[test]
fn test_ocr_engine_set_language_valid() {
let h = fz_new_ocr_engine(0, 0);
let lang = std::ffi::CString::new("fra").unwrap();
assert_eq!(fz_ocr_engine_set_language(0, h, lang.as_ptr()), 1);
let out = fz_ocr_engine_get_language(0, h);
assert!(!out.is_null());
fz_free_ocr_string(0, out);
fz_drop_ocr_engine(0, h);
}
#[test]
fn test_ocr_engine_get_language_invalid() {
assert!(fz_ocr_engine_get_language(0, 0).is_null());
}
#[test]
fn test_ocr_engine_set_psm() {
let h = fz_new_ocr_engine(0, 0);
fz_ocr_engine_set_psm(0, h, 7);
fz_ocr_engine_set_psm(0, h, 99);
fz_drop_ocr_engine(0, h);
}
#[test]
fn test_ocr_result_invalid() {
assert_eq!(fz_ocr_result_confidence(0, 0), 0);
assert_eq!(fz_ocr_result_word_count(0, 0), 0);
assert_eq!(fz_ocr_result_line_count(0, 0), 0);
assert!(fz_ocr_result_text(0, 0).is_null());
}
#[test]
fn test_ocr_result_text_empty() {
let h = fz_new_ocr_result(0, 100, 100);
let text = fz_ocr_result_text(0, h);
assert!(!text.is_null());
let s = unsafe { std::ffi::CStr::from_ptr(text).to_str().unwrap() };
assert!(s.is_empty());
fz_free_ocr_string(0, text);
fz_drop_ocr_result(0, h);
}
#[test]
fn test_fz_free_ocr_string_null() {
fz_free_ocr_string(0, std::ptr::null_mut());
}
#[test]
fn test_ocr_engine_tesseract_init_fails() {
let h = fz_new_ocr_engine(0, 1);
assert_eq!(fz_ocr_engine_init(0, h), 0);
fz_drop_ocr_engine(0, h);
}
}