mod format;
#[cfg(feature = "pdf-pdfium")]
mod pdfium;
#[cfg(all(feature = "pdf-pure-rust", not(feature = "pdf-pdfium")))]
mod pure_rust;
use async_trait::async_trait;
use cognee_models::Document;
use super::{DocumentLoader, LoaderError, LoaderOutput};
pub struct PdfLoader;
#[async_trait]
impl DocumentLoader for PdfLoader {
async fn extract(&self, bytes: &[u8], _doc: &Document) -> Result<LoaderOutput, LoaderError> {
let text = extract_impl(bytes)?;
Ok(LoaderOutput::Text(text))
}
fn engine_name(&self) -> &'static str {
"pypdf_loader"
}
}
#[cfg(feature = "pdf-pdfium")]
fn extract_impl(bytes: &[u8]) -> Result<String, LoaderError> {
pdfium::extract_text(bytes)
}
#[cfg(all(feature = "pdf-pure-rust", not(feature = "pdf-pdfium")))]
fn extract_impl(bytes: &[u8]) -> Result<String, LoaderError> {
pure_rust::extract_text(bytes)
}