#![allow(clippy::manual_flatten)]
#[cfg(feature = "ocr")]
mod ocr_model_tests {
use pdf_oxide::ocr::{OcrConfig, TextDetector, TextRecognizer};
use std::path::Path;
const DET_MODEL_PATH: &str = ".models/ch_PP-OCRv3_det_infer/inference.pdmodel";
const REC_MODEL_PATH: &str = ".models/ch_PP-OCRv3_rec_infer/inference.pdmodel";
const DICT_PATH: &str = ".models/ppocr_keys_v1.txt";
fn models_exist() -> bool {
Path::new(DET_MODEL_PATH).exists()
&& Path::new(REC_MODEL_PATH).exists()
&& Path::new(DICT_PATH).exists()
}
#[test]
fn test_ocr_model_files_present() {
if models_exist() {
println!("✓ All OCR model files are present");
println!(" Detection model: {}", DET_MODEL_PATH);
println!(" Recognition model: {}", REC_MODEL_PATH);
println!(" Dictionary: {}", DICT_PATH);
} else {
println!("⚠ OCR models not found. Skipping model loading tests.");
println!(" Expected files:");
println!(" - {}", DET_MODEL_PATH);
println!(" - {}", REC_MODEL_PATH);
println!(" - {}", DICT_PATH);
println!(" Download models using: ./scripts/setup_ocr_models.sh");
}
}
#[test]
#[ignore] fn test_ocr_detector_model_loading() {
if !models_exist() {
println!("Skipping: Models not present");
return;
}
let config = OcrConfig::builder()
.det_threshold(0.3)
.num_threads(4)
.build();
match TextDetector::new(DET_MODEL_PATH, config) {
Ok(_detector) => {
println!("✓ Detection model loaded successfully");
},
Err(e) => {
eprintln!("Failed to load detection model: {:?}", e);
panic!("Could not load detection model");
},
}
}
#[test]
#[ignore] fn test_ocr_recognizer_model_loading() {
if !models_exist() {
println!("Skipping: Models not present");
return;
}
let config = OcrConfig::builder()
.rec_threshold(0.5)
.num_threads(4)
.build();
match TextRecognizer::new(REC_MODEL_PATH, DICT_PATH, config) {
Ok(_recognizer) => {
println!("✓ Recognition model loaded successfully");
},
Err(e) => {
eprintln!("Failed to load recognition model: {:?}", e);
panic!("Could not load recognition model");
},
}
}
#[test]
#[ignore] fn test_ocr_both_models_load() {
if !models_exist() {
println!("Skipping: Models not present");
return;
}
let config = OcrConfig::default();
let det_result = TextDetector::new(DET_MODEL_PATH, config.clone());
let rec_result = TextRecognizer::new(REC_MODEL_PATH, DICT_PATH, config);
match (det_result, rec_result) {
(Ok(_), Ok(_)) => {
println!("✓ Both detection and recognition models loaded");
},
(Err(e), _) => panic!("Detection model failed: {:?}", e),
(_, Err(e)) => panic!("Recognition model failed: {:?}", e),
}
}
#[test]
fn test_ocr_config_with_model_paths() {
if !models_exist() {
println!("Skipping: Models not present");
return;
}
use std::path::PathBuf;
let det_path = PathBuf::from(DET_MODEL_PATH);
let rec_path = PathBuf::from(REC_MODEL_PATH);
let dict_path = PathBuf::from(DICT_PATH);
let config = OcrConfig::builder()
.det_model_path(det_path.clone())
.rec_model_path(rec_path.clone())
.dict_path(dict_path.clone())
.det_threshold(0.35)
.num_threads(4)
.build();
assert_eq!(config.det_model_path, Some(det_path));
assert_eq!(config.rec_model_path, Some(rec_path));
assert_eq!(config.dict_path, Some(dict_path));
assert!((config.det_threshold - 0.35).abs() < f32::EPSILON);
}
#[test]
fn test_ocr_model_configuration_for_cpu_inference() {
let config = OcrConfig::builder()
.det_threshold(0.35) .box_threshold(0.55) .rec_threshold(0.50) .num_threads(4) .det_max_side(960) .rec_target_height(48) .max_candidates(1000) .build();
assert_eq!(config.num_threads, 4);
assert_eq!(config.det_max_side, 960);
assert_eq!(config.rec_target_height, 48);
println!("✓ CPU-optimized OCR configuration created");
}
#[test]
fn test_ocr_model_path_information() {
println!("\n=== OCR Model Information ===");
println!("Detection Model: {}", DET_MODEL_PATH);
println!("Recognition Model: {}", REC_MODEL_PATH);
println!("Dictionary: {}", DICT_PATH);
if let Ok(metadata) = std::fs::metadata(DET_MODEL_PATH) {
println!(
"Detection size: {} bytes ({:.1} MB)",
metadata.len(),
metadata.len() as f64 / 1024.0 / 1024.0
);
}
if let Ok(metadata) = std::fs::metadata(REC_MODEL_PATH) {
println!(
"Recognition size: {} bytes ({:.1} MB)",
metadata.len(),
metadata.len() as f64 / 1024.0 / 1024.0
);
}
if let Ok(metadata) = std::fs::metadata(DICT_PATH) {
println!("Dictionary size: {} bytes", metadata.len());
}
println!("========================\n");
}
#[test]
fn test_ocr_dictionary_can_be_read() {
if !Path::new(DICT_PATH).exists() {
println!("Dictionary not found at {}", DICT_PATH);
return;
}
match std::fs::read_to_string(DICT_PATH) {
Ok(content) => {
let line_count = content.lines().count();
println!("✓ Dictionary loaded: {} characters", line_count);
let first_chars: Vec<&str> = content.lines().take(5).collect();
println!(" First characters: {:?}", first_chars);
},
Err(e) => {
eprintln!("Failed to read dictionary: {}", e);
},
}
}
#[test]
#[ignore] fn test_ocr_workflow_legal_document_with_models() {
if !models_exist() {
println!("Skipping: Models not present");
return;
}
let config = OcrConfig::builder()
.det_threshold(0.45) .box_threshold(0.65) .rec_threshold(0.60) .num_threads(4)
.detect_styles(true) .build();
let _det_result = TextDetector::new(DET_MODEL_PATH, config.clone());
let _rec_result = TextRecognizer::new(REC_MODEL_PATH, DICT_PATH, config);
println!("Legal document OCR workflow configured");
}
#[test]
#[ignore] fn test_ocr_workflow_batch_processing_with_models() {
if !models_exist() {
println!("Skipping: Models not present");
return;
}
let config = OcrConfig::builder()
.det_threshold(0.25) .box_threshold(0.40) .num_threads(8) .det_max_side(512) .build();
let _det_result = TextDetector::new(DET_MODEL_PATH, config.clone());
let _rec_result = TextRecognizer::new(REC_MODEL_PATH, DICT_PATH, config);
println!("Batch processing OCR workflow configured");
}
#[test]
fn test_ocr_models_directory_structure() {
let models_dir = Path::new(".models");
let det_dir = models_dir.join("ch_PP-OCRv3_det_infer");
let rec_dir = models_dir.join("ch_PP-OCRv3_rec_infer");
println!("\n=== Model Directory Structure ===");
if det_dir.exists() {
println!("Detection model directory: ✓");
if let Ok(entries) = std::fs::read_dir(&det_dir) {
for entry in entries {
if let Ok(entry) = entry {
println!(" - {}", entry.path().display());
}
}
}
} else {
println!("Detection model directory: ✗");
}
if rec_dir.exists() {
println!("Recognition model directory: ✓");
if let Ok(entries) = std::fs::read_dir(&rec_dir) {
for entry in entries {
if let Ok(entry) = entry {
println!(" - {}", entry.path().display());
}
}
}
} else {
println!("Recognition model directory: ✗");
}
println!("================================\n");
}
}
#[cfg(not(feature = "ocr"))]
mod ocr_models_not_enabled_tests {
#[test]
fn test_ocr_models_feature_disabled() {
println!("OCR feature is not enabled - model tests skipped");
}
}