use crate::models::Document;
use crate::traits::{CerebroError, Result, Ingestor};
use async_trait::async_trait;
pub struct PdfIngestor;
impl PdfIngestor {
pub fn new() -> Self {
Self
}
}
#[async_trait]
impl Ingestor for PdfIngestor {
async fn ingest(&self, file_path: &str) -> Result<Vec<Document>> {
let content = pdf_extract::extract_text(file_path)
.map_err(|e| CerebroError::IngestionError(format!("Failed to parse PDF: {}", e)))?;
let mut doc = Document::new(content.clone());
doc.metadata.insert("source".to_string(), file_path.to_string());
doc.metadata.insert("file_type".to_string(), "pdf".to_string());
Ok(vec![doc])
}
}