use serde::{Deserialize, Serialize};
use super::formats::OutputFormat;
use crate::core::config_validation::validate_ocr_backend;
use crate::error::KreuzbergError;
use crate::types::OcrElementConfig;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OcrConfig {
#[serde(default = "default_tesseract_backend")]
pub backend: String,
#[serde(default = "default_eng")]
pub language: String,
#[serde(default)]
pub tesseract_config: Option<crate::types::TesseractConfig>,
#[serde(default)]
pub output_format: Option<OutputFormat>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub paddle_ocr_config: Option<serde_json::Value>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub element_config: Option<OcrElementConfig>,
}
impl Default for OcrConfig {
fn default() -> Self {
Self {
backend: default_tesseract_backend(),
language: default_eng(),
tesseract_config: None,
output_format: None,
paddle_ocr_config: None,
element_config: None,
}
}
}
impl OcrConfig {
pub fn validate(&self) -> Result<(), KreuzbergError> {
validate_ocr_backend(&self.backend)
}
}
fn default_tesseract_backend() -> String {
"tesseract".to_string()
}
fn default_eng() -> String {
"eng".to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ocr_config_default() {
let config = OcrConfig::default();
assert_eq!(config.backend, "tesseract");
assert_eq!(config.language, "eng");
assert!(config.tesseract_config.is_none());
assert!(config.output_format.is_none());
}
#[test]
fn test_ocr_config_with_tesseract() {
let config = OcrConfig {
backend: "tesseract".to_string(),
language: "fra".to_string(),
..Default::default()
};
assert_eq!(config.backend, "tesseract");
assert_eq!(config.language, "fra");
}
#[test]
fn test_validate_tesseract_backend() {
let config = OcrConfig {
backend: "tesseract".to_string(),
..Default::default()
};
assert!(config.validate().is_ok());
}
#[test]
fn test_validate_easyocr_backend() {
let config = OcrConfig {
backend: "easyocr".to_string(),
..Default::default()
};
assert!(config.validate().is_ok());
}
#[test]
fn test_validate_paddleocr_backend() {
let config = OcrConfig {
backend: "paddleocr".to_string(),
..Default::default()
};
assert!(config.validate().is_ok());
}
#[test]
fn test_validate_invalid_backend_typo() {
let config = OcrConfig {
backend: "tesseract_typo".to_string(),
..Default::default()
};
let result = config.validate();
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(err_msg.contains("Invalid OCR backend"));
}
#[test]
fn test_validate_invalid_backend_completely_wrong() {
let config = OcrConfig {
backend: "ocr_lib".to_string(),
..Default::default()
};
let result = config.validate();
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(err_msg.contains("Invalid OCR backend") || err_msg.contains("Valid options are"));
}
#[test]
fn test_validate_default_backend() {
let config = OcrConfig::default();
assert!(config.validate().is_ok());
}
}