use anyhow::Result;
#[cfg(not(feature = "pdf"))]
use lc::readers::FileReader;
#[cfg(feature = "pdf")]
use lc::readers::{pdf::PdfReader, FileReader};
use std::fs;
use std::path::Path;
use tempfile::TempDir;
#[cfg(not(feature = "pdf"))]
struct PdfReader;
#[cfg(not(feature = "pdf"))]
impl PdfReader {
fn new() -> Self {
Self
}
}
#[cfg(not(feature = "pdf"))]
impl FileReader for PdfReader {
fn read_as_text(&self, _file_path: &str) -> Result<String> {
Err(anyhow::anyhow!("PDF support is not enabled. Please compile with the 'pdf' feature flag to enable PDF processing."))
}
fn read_as_text_from_bytes(&self, _bytes: &[u8]) -> Result<String> {
Err(anyhow::anyhow!("PDF support is not enabled. Please compile with the 'pdf' feature flag to enable PDF processing."))
}
fn read_as_text_from_reader(&self, _reader: Box<dyn std::io::Read>) -> Result<String> {
Err(anyhow::anyhow!("PDF support is not enabled. Please compile with the 'pdf' feature flag to enable PDF processing."))
}
fn can_handle(&self, extension: &str) -> bool {
extension.to_lowercase() == "pdf"
}
}
struct PdfTestSetup {
temp_dir: TempDir,
pdf_reader: PdfReader,
}
impl PdfTestSetup {
fn new() -> Result<Self> {
let temp_dir = TempDir::new()?;
let pdf_reader = PdfReader::new();
Ok(PdfTestSetup {
temp_dir,
pdf_reader,
})
}
fn create_simple_pdf(&self, filename: &str, content: &str) -> Result<String> {
let file_path = self.temp_dir.path().join(filename);
let pdf_content = format!(
r#"%PDF-1.4
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Kids [3 0 R]
/Count 1
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 0 R
/Resources <<
/Font <<
/F1 5 0 R
>>
>>
>>
endobj
4 0 obj
<<
/Length {}
>>
stream
BT
/F1 12 Tf
72 720 Td
({}) Tj
ET
endstream
endobj
5 0 obj
<<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
>>
endobj
xref
0 6
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000274 00000 n
0000000526 00000 n
trailer
<<
/Size 6
/Root 1 0 R
>>
startxref
623
%%EOF"#,
content.len() + 50, content
);
fs::write(&file_path, pdf_content)?;
Ok(file_path.to_string_lossy().to_string())
}
fn create_corrupted_pdf(&self, filename: &str) -> Result<String> {
let file_path = self.temp_dir.path().join(filename);
fs::write(&file_path, b"This is not a valid PDF file")?;
Ok(file_path.to_string_lossy().to_string())
}
fn create_empty_pdf(&self, filename: &str) -> Result<String> {
let file_path = self.temp_dir.path().join(filename);
fs::write(&file_path, b"")?;
Ok(file_path.to_string_lossy().to_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pdf_reader_creation() {
let reader = PdfReader::new();
assert!(reader.can_handle("pdf"));
assert!(reader.can_handle("PDF"));
assert!(!reader.can_handle("txt"));
assert!(!reader.can_handle("doc"));
}
#[test]
fn test_can_handle_extensions() {
let reader = PdfReader::new();
assert!(reader.can_handle("pdf"));
assert!(reader.can_handle("PDF"));
assert!(reader.can_handle("Pdf"));
assert!(!reader.can_handle("txt"));
assert!(!reader.can_handle("doc"));
assert!(!reader.can_handle("docx"));
assert!(!reader.can_handle(""));
assert!(!reader.can_handle("pdf.txt"));
}
#[cfg(feature = "pdf")]
#[test]
fn test_simple_text_extraction() -> Result<()> {
let setup = PdfTestSetup::new()?;
let test_text = "Simple test content for PDF extraction";
let pdf_path = setup.create_simple_pdf("simple.pdf", test_text)?;
let result = setup.pdf_reader.read_as_text(&pdf_path);
match result {
Ok(extracted_text) => {
assert!(!extracted_text.is_empty());
println!("Extracted text: {}", extracted_text);
}
Err(e) => {
println!("PDF extraction failed (expected with minimal PDF): {}", e);
}
}
Ok(())
}
#[cfg(feature = "pdf")]
#[test]
fn test_read_from_bytes() -> Result<()> {
let setup = PdfTestSetup::new()?;
let test_text = "Test content from bytes";
let pdf_path = setup.create_simple_pdf("bytes_test.pdf", test_text)?;
let pdf_bytes = fs::read(pdf_path)?;
let result = setup.pdf_reader.read_as_text_from_bytes(&pdf_bytes);
match result {
Ok(extracted_text) => {
assert!(!extracted_text.is_empty());
println!("Extracted from bytes: {}", extracted_text);
}
Err(e) => {
println!("PDF extraction from bytes failed: {}", e);
}
}
Ok(())
}
#[cfg(feature = "pdf")]
#[test]
fn test_read_from_reader() -> Result<()> {
let setup = PdfTestSetup::new()?;
let test_text = "Test content from reader";
let pdf_path = setup.create_simple_pdf("reader_test.pdf", test_text)?;
let file = fs::File::open(pdf_path)?;
let reader_box: Box<dyn std::io::Read> = Box::new(file);
let result = setup.pdf_reader.read_as_text_from_reader(reader_box);
match result {
Ok(extracted_text) => {
assert!(!extracted_text.is_empty());
println!("Extracted from reader: {}", extracted_text);
}
Err(e) => {
println!("PDF extraction from reader failed: {}", e);
}
}
Ok(())
}
#[test]
fn test_file_not_found() {
let reader = PdfReader::new();
let result = reader.read_as_text("/nonexistent/file.pdf");
assert!(result.is_err());
let error_msg = result.unwrap_err().to_string();
#[cfg(feature = "pdf")]
assert!(error_msg.contains("Failed to read PDF file"));
#[cfg(not(feature = "pdf"))]
assert!(error_msg.contains("PDF support is not enabled"));
}
#[test]
fn test_corrupted_pdf_handling() -> Result<()> {
let setup = PdfTestSetup::new()?;
let corrupted_path = setup.create_corrupted_pdf("corrupted.pdf")?;
let result = setup.pdf_reader.read_as_text(&corrupted_path);
assert!(result.is_err());
let error_msg = result.unwrap_err().to_string();
#[cfg(feature = "pdf")]
{
assert!(
error_msg.contains("Failed to extract text from PDF") || error_msg.contains("PDF")
);
}
#[cfg(not(feature = "pdf"))]
{
assert!(error_msg.contains("PDF support is not enabled"));
}
Ok(())
}
#[test]
fn test_empty_file_handling() -> Result<()> {
let setup = PdfTestSetup::new()?;
let empty_path = setup.create_empty_pdf("empty.pdf")?;
let result = setup.pdf_reader.read_as_text(&empty_path);
assert!(result.is_err());
Ok(())
}
#[cfg(not(feature = "pdf"))]
#[test]
fn test_pdf_feature_disabled() -> Result<()> {
let setup = PdfTestSetup::new()?;
let pdf_path = setup.create_simple_pdf("test.pdf", "test content")?;
let result = setup.pdf_reader.read_as_text(&pdf_path);
assert!(result.is_err());
let error_msg = result.unwrap_err().to_string();
assert!(error_msg.contains("PDF support is not enabled"));
assert!(error_msg.contains("pdf"));
Ok(())
}
#[test]
fn test_invalid_bytes() {
let reader = PdfReader::new();
let invalid_bytes = b"This is not a PDF file at all";
let result = reader.read_as_text_from_bytes(invalid_bytes);
assert!(result.is_err());
#[cfg(feature = "pdf")]
{
let error_msg = result.unwrap_err().to_string();
assert!(error_msg.contains("Failed to extract text from PDF"));
}
#[cfg(not(feature = "pdf"))]
{
let error_msg = result.unwrap_err().to_string();
assert!(error_msg.contains("PDF support is not enabled"));
}
}
#[cfg(feature = "pdf")]
#[test]
fn test_fixture_simple_text_pdf() {
let reader = PdfReader::new();
let fixture_path = "tests/pdf_fixtures/simple_text.pdf";
if Path::new(fixture_path).exists() {
let result = reader.read_as_text(fixture_path);
match result {
Ok(text) => {
println!("Successfully extracted text from fixture: {}", text);
assert!(!text.is_empty());
}
Err(e) => {
println!(
"Could not extract text from fixture (this may be expected): {}",
e
);
}
}
} else {
println!("Skipping fixture test - file not found: {}", fixture_path);
}
}
#[cfg(feature = "pdf")]
#[test]
fn test_error_message_quality() -> Result<()> {
let setup = PdfTestSetup::new()?;
let reader = PdfReader::new();
let result = reader.read_as_text("/nonexistent/path/file.pdf");
assert!(result.is_err());
let error = result.unwrap_err().to_string();
assert!(error.contains("Failed to read PDF file"));
assert!(error.contains("/nonexistent/path/file.pdf"));
let corrupted_path = setup.create_corrupted_pdf("bad.pdf")?;
let result = reader.read_as_text(&corrupted_path);
assert!(result.is_err());
let error = result.unwrap_err().to_string();
assert!(error.contains("Failed to extract text from PDF"));
Ok(())
}
#[test]
fn test_thread_safety() {
use std::sync::Arc;
use std::thread;
let reader = Arc::new(PdfReader::new());
let mut handles = vec![];
for i in 0..5 {
let reader_clone = Arc::clone(&reader);
let handle = thread::spawn(move || {
assert!(reader_clone.can_handle("pdf"));
let invalid_bytes = format!("Invalid PDF content {}", i);
let result = reader_clone.read_as_text_from_bytes(invalid_bytes.as_bytes());
assert!(result.is_err());
});
handles.push(handle);
}
for handle in handles {
handle.join().unwrap();
}
}
}
#[cfg(test)]
mod integration_tests {
use lc::readers::get_reader_for_extension;
#[test]
fn test_reader_factory_integration() {
let reader = get_reader_for_extension("pdf");
#[cfg(feature = "pdf")]
{
assert!(reader.is_some());
let reader = reader.unwrap();
assert!(reader.can_handle("pdf"));
}
#[cfg(not(feature = "pdf"))]
{
assert!(reader.is_none());
}
}
#[test]
fn test_reader_factory_case_insensitive() {
let extensions = ["pdf", "PDF", "Pdf", "pDf"];
for ext in &extensions {
let reader = get_reader_for_extension(ext);
#[cfg(feature = "pdf")]
{
assert!(reader.is_some(), "Failed for extension: {}", ext);
let reader = reader.unwrap();
assert!(reader.can_handle(ext));
}
#[cfg(not(feature = "pdf"))]
{
assert!(reader.is_none(), "Should be None for extension: {}", ext);
}
}
}
}
#[cfg(test)]
mod performance_tests {
use super::*;
use std::time::Instant;
#[test]
fn test_reader_creation_performance() {
let start = Instant::now();
for _ in 0..1000 {
let _reader = PdfReader::new();
}
let duration = start.elapsed();
println!("Created 1000 PDF readers in {:?}", duration);
assert!(duration.as_millis() < 100);
}
#[test]
fn test_can_handle_performance() {
let reader = PdfReader::new();
let start = Instant::now();
for _ in 0..10000 {
reader.can_handle("pdf");
reader.can_handle("txt");
reader.can_handle("doc");
}
let duration = start.elapsed();
println!("Performed 30000 can_handle checks in {:?}", duration);
assert!(duration.as_millis() < 100);
}
}