#[cfg(feature = "pdf")]
pub mod pdf;
use anyhow::Result;
use std::path::Path;
pub fn extract_text(path: &Path) -> Result<Option<String>> {
let ext = path
.extension()
.map(|e| e.to_string_lossy().to_lowercase())
.unwrap_or_default();
match ext.as_str() {
#[cfg(feature = "pdf")]
"pdf" => match pdf::extract_pdf_text(path) {
Ok(text) if !text.trim().is_empty() => Ok(Some(text)),
Ok(_) => {
tracing::warn!("PDF has no extractable text: {}", path.display());
Ok(None)
}
Err(e) => {
tracing::warn!("Failed to extract text from PDF {}: {}", path.display(), e);
Ok(None)
}
},
#[cfg(not(feature = "pdf"))]
"pdf" => {
tracing::warn!(
"PDF support not enabled. Rebuild with `pdf` feature to load: {}",
path.display()
);
Ok(None)
}
_ => {
match std::fs::read_to_string(path) {
Ok(content) if !content.trim().is_empty() => Ok(Some(content)),
Ok(_) => Ok(None),
Err(e) => {
tracing::debug!("Could not read {} as text: {}", path.display(), e);
Ok(None)
}
}
}
}
}
pub fn is_binary_document(path: &Path) -> bool {
let ext = path
.extension()
.map(|e| e.to_string_lossy().to_lowercase())
.unwrap_or_default();
matches!(ext.as_str(), "pdf")
}