use std::{collections::VecDeque, fs::File, path::PathBuf};
use anyhow::Result;
use crate::Document;
pub(crate) fn import_document_from(path: &PathBuf) -> Result<Document> {
let ext = if let Some(ext) = path.extension() {
ext.to_str().unwrap()
} else {
""
}
.to_lowercase();
let reader: Box<dyn std::io::Read> = match ext.as_str() {
"txt" => {
Box::new(File::open(path)?)
}
#[cfg(feature = "pdf")]
"pdf" => {
let pdf = lopdf::Document::load(path)?;
let pages = pdf.get_pages();
let mut parts = vec![];
for (i, _) in pages.iter().enumerate() {
let page_number = (i + 1) as u32;
let page_text = pdf.extract_text(&[page_number]).map_err(|e| {
anyhow!(
"can't parse page {} of {}: {:?}",
page_number,
path.display(),
e
)
})?;
parts.push(page_text);
}
Box::new(VecDeque::from(parts.join("\n\n").into_bytes()))
}
_ => return Err(anyhow!("file extension '{ext}' not handled")),
};
Document::from_reader(path, reader)
}