use std::path::Path;
use lopdf::Document;
use crate::EdgePdfError;
pub struct RawPdfDocument {
pub document: Document,
pub num_pages: u32,
pub metadata: PdfMetadata,
}
impl std::fmt::Debug for RawPdfDocument {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("RawPdfDocument")
.field("num_pages", &self.num_pages)
.field("metadata", &self.metadata)
.finish_non_exhaustive()
}
}
#[derive(Debug, Clone, Default)]
pub struct PdfMetadata {
pub author: Option<String>,
pub title: Option<String>,
pub creation_date: Option<String>,
pub modification_date: Option<String>,
}
pub fn load_pdf(path: &Path, _password: Option<&str>) -> Result<RawPdfDocument, EdgePdfError> {
if !path.exists() {
return Err(EdgePdfError::LoadError(format!(
"File not found: {}",
path.display()
)));
}
let document = Document::load(path).map_err(|e| {
EdgePdfError::LoadError(format!("Failed to load PDF {}: {}", path.display(), e))
})?;
let pages = document.get_pages();
let num_pages = pages.len() as u32;
let metadata = extract_metadata(&document);
Ok(RawPdfDocument {
document,
num_pages,
metadata,
})
}
fn extract_metadata(doc: &Document) -> PdfMetadata {
let mut metadata = PdfMetadata::default();
if let Ok(info_ref) = doc.trailer.get(b"Info") {
if let Ok(info_ref) = info_ref.as_reference() {
if let Ok(info) = doc.get_object(info_ref) {
if let Ok(dict) = info.as_dict() {
metadata.author = extract_string_field(dict, b"Author");
metadata.title = extract_string_field(dict, b"Title");
metadata.creation_date = extract_string_field(dict, b"CreationDate");
metadata.modification_date = extract_string_field(dict, b"ModDate");
}
}
}
}
metadata
}
pub fn load_pdf_from_bytes(
data: &[u8],
_password: Option<&str>,
) -> Result<RawPdfDocument, EdgePdfError> {
if data.is_empty() {
return Err(EdgePdfError::LoadError("Empty PDF data".to_string()));
}
let document = Document::load_mem(data)
.map_err(|e| EdgePdfError::LoadError(format!("Failed to parse PDF from bytes: {e}")))?;
let pages = document.get_pages();
let num_pages = pages.len() as u32;
let metadata = extract_metadata(&document);
Ok(RawPdfDocument {
document,
num_pages,
metadata,
})
}
fn extract_string_field(dict: &lopdf::Dictionary, key: &[u8]) -> Option<String> {
dict.get(key).ok().and_then(|obj| match obj {
lopdf::Object::String(bytes, _) => String::from_utf8(bytes.clone()).ok(),
_ => None,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_load_nonexistent_file() {
let result = load_pdf(Path::new("/nonexistent/file.pdf"), None);
assert!(result.is_err());
match result.unwrap_err() {
EdgePdfError::LoadError(msg) => assert!(msg.contains("File not found")),
other => panic!("Unexpected error: {:?}", other),
}
}
}