#[cfg(test)]
mod tests {
use crate::operations::page_analysis::*;
use crate::{Document, Page};
use std::path::PathBuf;
use tempfile::TempDir;
fn create_text_pdf(title: &str, pages: usize) -> Document {
let mut doc = Document::new();
doc.set_title(title);
for i in 0..pages {
let mut page = Page::a4();
page.text()
.set_font(crate::text::Font::Helvetica, 12.0)
.at(50.0, 750.0)
.write(&format!("{} - Page {}", title, i + 1))
.unwrap();
page.text()
.set_font(crate::text::Font::Helvetica, 10.0)
.at(50.0, 700.0)
.write("This is a text-heavy page with lots of content.")
.unwrap();
page.text()
.set_font(crate::text::Font::Helvetica, 10.0)
.at(50.0, 680.0)
.write("Lorem ipsum dolor sit amet, consectetur adipiscing elit.")
.unwrap();
page.text()
.set_font(crate::text::Font::Helvetica, 10.0)
.at(50.0, 660.0)
.write("Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.")
.unwrap();
page.text()
.set_font(crate::text::Font::Helvetica, 10.0)
.at(50.0, 640.0)
.write("Ut enim ad minim veniam, quis nostrud exercitation ullamco.")
.unwrap();
doc.add_page(page);
}
doc
}
fn create_minimal_pdf(title: &str) -> Document {
let mut doc = Document::new();
doc.set_title(title);
let mut page = Page::a4();
page.text()
.set_font(crate::text::Font::Helvetica, 12.0)
.at(50.0, 750.0)
.write("Minimal")
.unwrap();
doc.add_page(page);
doc
}
fn save_test_pdf(doc: &mut Document, dir: &TempDir, name: &str) -> PathBuf {
let path = dir.path().join(name);
doc.save(&path).unwrap();
path
}
#[test]
fn test_analysis_options_default() {
let options = AnalysisOptions::default();
assert_eq!(options.min_text_fragment_size, 3);
assert_eq!(options.min_image_size, 50);
assert_eq!(options.scanned_threshold, 0.8);
assert_eq!(options.text_threshold, 0.7);
}
#[test]
fn test_analysis_options_custom() {
let options = AnalysisOptions {
min_text_fragment_size: 5,
min_image_size: 100,
scanned_threshold: 0.9,
text_threshold: 0.6,
ocr_options: None,
};
assert_eq!(options.min_text_fragment_size, 5);
assert_eq!(options.min_image_size, 100);
assert_eq!(options.scanned_threshold, 0.9);
assert_eq!(options.text_threshold, 0.6);
}
#[test]
fn test_page_type_methods() {
assert!(PageType::Scanned.is_scanned());
assert!(!PageType::Scanned.is_text());
assert!(!PageType::Scanned.is_mixed());
assert!(!PageType::Text.is_scanned());
assert!(PageType::Text.is_text());
assert!(!PageType::Text.is_mixed());
assert!(!PageType::Mixed.is_scanned());
assert!(!PageType::Mixed.is_text());
assert!(PageType::Mixed.is_mixed());
}
#[test]
fn test_content_analysis_structure() {
let analysis = ContentAnalysis {
page_number: 0,
page_type: PageType::Text,
text_ratio: 0.75,
image_ratio: 0.15,
blank_space_ratio: 0.10,
text_fragment_count: 25,
image_count: 1,
character_count: 1250,
};
assert_eq!(analysis.page_number, 0);
assert_eq!(analysis.page_type, PageType::Text);
assert_eq!(analysis.text_ratio, 0.75);
assert_eq!(analysis.image_ratio, 0.15);
assert_eq!(analysis.blank_space_ratio, 0.10);
assert_eq!(analysis.text_fragment_count, 25);
assert_eq!(analysis.image_count, 1);
assert_eq!(analysis.character_count, 1250);
}
#[test]
fn test_content_analysis_methods() {
let scanned_analysis = ContentAnalysis {
page_number: 0,
page_type: PageType::Scanned,
text_ratio: 0.05,
image_ratio: 0.90,
blank_space_ratio: 0.05,
text_fragment_count: 2,
image_count: 1,
character_count: 15,
};
assert!(scanned_analysis.is_scanned());
assert!(!scanned_analysis.is_text_heavy());
assert!(!scanned_analysis.is_mixed_content());
assert_eq!(scanned_analysis.dominant_content_ratio(), 0.90);
let text_analysis = ContentAnalysis {
page_number: 1,
page_type: PageType::Text,
text_ratio: 0.80,
image_ratio: 0.10,
blank_space_ratio: 0.10,
text_fragment_count: 50,
image_count: 0,
character_count: 2000,
};
assert!(!text_analysis.is_scanned());
assert!(text_analysis.is_text_heavy());
assert!(!text_analysis.is_mixed_content());
assert_eq!(text_analysis.dominant_content_ratio(), 0.80);
}
#[test]
fn test_page_type_classification_logic() {
let options = AnalysisOptions::default();
let is_scanned = 0.90 > options.scanned_threshold && 0.05 < 0.1;
assert!(is_scanned);
let is_text = 0.80 > options.text_threshold && 0.10 < 0.2;
assert!(is_text);
let is_mixed = !(0.40 > options.scanned_threshold && 0.50 < 0.1)
&& !(0.50 > options.text_threshold && 0.40 < 0.2);
assert!(is_mixed);
}
#[test]
fn test_analyzer_creation() {
let temp_dir = TempDir::new().unwrap();
let mut doc = create_text_pdf("Test Document", 1);
let input_path = save_test_pdf(&mut doc, &temp_dir, "test.pdf");
let analyzer = PageContentAnalyzer::from_file(&input_path);
assert!(analyzer.is_ok());
let custom_options = AnalysisOptions {
min_text_fragment_size: 5,
min_image_size: 100,
scanned_threshold: 0.85,
text_threshold: 0.75,
ocr_options: None,
};
let document = crate::parser::PdfReader::open_document(&input_path).unwrap();
let _analyzer = PageContentAnalyzer::with_options(document, custom_options);
}
#[test]
fn test_analyze_text_heavy_document() {
let temp_dir = TempDir::new().unwrap();
let mut doc = create_text_pdf("Text Document", 1);
let input_path = save_test_pdf(&mut doc, &temp_dir, "text.pdf");
let analyzer = PageContentAnalyzer::from_file(&input_path).unwrap();
let analyses = analyzer.analyze_document();
assert!(analyses.is_ok());
let analyses = analyses.unwrap();
assert_eq!(analyses.len(), 1);
let analysis = &analyses[0];
assert_eq!(analysis.page_number, 0);
assert!(analysis.character_count > 0);
}
#[test]
fn test_analyze_minimal_document() {
let temp_dir = TempDir::new().unwrap();
let mut doc = create_minimal_pdf("Minimal Document");
let input_path = save_test_pdf(&mut doc, &temp_dir, "minimal.pdf");
let analyzer = PageContentAnalyzer::from_file(&input_path).unwrap();
let analysis = analyzer.analyze_page(0);
assert!(analysis.is_ok());
let analysis = analysis.unwrap();
assert_eq!(analysis.page_number, 0);
assert!(analysis.character_count > 0);
assert_eq!(analysis.image_count, 0);
}
#[test]
fn test_analyze_specific_pages() {
let temp_dir = TempDir::new().unwrap();
let mut doc = create_text_pdf("Multi-Page Document", 3);
let input_path = save_test_pdf(&mut doc, &temp_dir, "multipage.pdf");
let analyzer = PageContentAnalyzer::from_file(&input_path).unwrap();
let page_numbers = vec![0, 2]; let analyses = analyzer.analyze_pages(&page_numbers);
assert!(analyses.is_ok());
let analyses = analyses.unwrap();
assert_eq!(analyses.len(), 2);
assert_eq!(analyses[0].page_number, 0);
assert_eq!(analyses[1].page_number, 2);
}
#[test]
fn test_is_scanned_page_convenience_method() {
let temp_dir = TempDir::new().unwrap();
let mut doc = create_text_pdf("Test Document", 1);
let input_path = save_test_pdf(&mut doc, &temp_dir, "test.pdf");
let analyzer = PageContentAnalyzer::from_file(&input_path).unwrap();
let is_scanned = analyzer.is_scanned_page(0);
assert!(is_scanned.is_ok());
let is_scanned = is_scanned.unwrap();
assert!(is_scanned || !is_scanned);
}
#[test]
fn test_find_scanned_pages() {
let temp_dir = TempDir::new().unwrap();
let mut doc = create_text_pdf("Test Document", 2);
let input_path = save_test_pdf(&mut doc, &temp_dir, "test.pdf");
let analyzer = PageContentAnalyzer::from_file(&input_path).unwrap();
let scanned_pages = analyzer.find_scanned_pages();
assert!(scanned_pages.is_ok());
let scanned_pages = scanned_pages.unwrap();
assert!(scanned_pages.len() <= 2);
}
#[test]
fn test_error_handling_invalid_file() {
let result = PageContentAnalyzer::from_file("nonexistent.pdf");
assert!(result.is_err());
}
#[test]
fn test_error_handling_invalid_page() {
let temp_dir = TempDir::new().unwrap();
let mut doc = create_text_pdf("Test Document", 1);
let input_path = save_test_pdf(&mut doc, &temp_dir, "test.pdf");
let analyzer = PageContentAnalyzer::from_file(&input_path).unwrap();
let result = analyzer.analyze_page(999);
if result.is_err() {
assert!(result.unwrap_err().to_string().contains("Page"));
} else {
let analysis = result.unwrap();
assert_eq!(analysis.page_number, 999);
}
}
#[test]
fn test_blank_space_ratio_calculation() {
let analysis = ContentAnalysis {
page_number: 0,
page_type: PageType::Mixed,
text_ratio: 0.30,
image_ratio: 0.40,
blank_space_ratio: 0.30,
text_fragment_count: 10,
image_count: 2,
character_count: 500,
};
let total_ratio = analysis.text_ratio + analysis.image_ratio + analysis.blank_space_ratio;
assert!((total_ratio - 1.0).abs() < 0.01);
}
#[test]
fn test_analysis_with_empty_document() {
let temp_dir = TempDir::new().unwrap();
let mut doc = Document::new();
let input_path = save_test_pdf(&mut doc, &temp_dir, "empty.pdf");
let analyzer = PageContentAnalyzer::from_file(&input_path).unwrap();
let analyses = analyzer.analyze_document();
assert!(analyses.is_ok());
let analyses = analyses.unwrap();
assert_eq!(analyses.len(), 0);
}
#[test]
fn test_threshold_edge_cases() {
let options = AnalysisOptions::default();
let exactly_scanned = 0.8 > options.scanned_threshold; assert!(!exactly_scanned);
let exactly_text = 0.7 > options.text_threshold; assert!(!exactly_text);
let just_above_scanned = 0.801 > options.scanned_threshold; assert!(just_above_scanned);
let just_above_text = 0.701 > options.text_threshold; assert!(just_above_text);
}
}