use std::collections::HashMap;
use rpdfium_core::{Name, PdfSource};
use rpdfium_parser::{Object, ObjectStore};
use crate::error::{DocError, DocResult};
#[derive(Debug, Clone, Default)]
pub struct DocumentMetadata {
pub title: Option<String>,
pub author: Option<String>,
pub subject: Option<String>,
pub keywords: Option<String>,
pub creator: Option<String>,
pub producer: Option<String>,
pub creation_date: Option<String>,
pub mod_date: Option<String>,
}
pub fn parse_metadata<S: PdfSource>(
info_obj: &Object,
store: &ObjectStore<S>,
) -> DocResult<DocumentMetadata> {
let resolved = store
.deep_resolve(info_obj)
.map_err(|e| DocError::Parser(e.to_string()))?;
let dict = resolved.as_dict().ok_or(DocError::UnexpectedType)?;
Ok(DocumentMetadata {
title: extract_string_field(dict, &Name::title(), store),
author: extract_string_field(dict, &Name::author(), store),
subject: extract_string_field(dict, &Name::subject(), store),
keywords: extract_string_field(dict, &Name::keywords(), store),
creator: extract_string_field(dict, &Name::creator(), store),
producer: extract_string_field(dict, &Name::producer(), store),
creation_date: extract_string_field(dict, &Name::creation_date(), store),
mod_date: extract_string_field(dict, &Name::mod_date(), store),
})
}
fn extract_string_field<S: PdfSource>(
dict: &HashMap<Name, Object>,
key: &Name,
store: &ObjectStore<S>,
) -> Option<String> {
let obj = dict.get(key)?;
let resolved = store.deep_resolve(obj).ok()?;
resolved.as_string().map(|s| s.to_string_lossy())
}
#[cfg(test)]
mod tests {
use super::*;
use rpdfium_core::PdfString;
fn build_store() -> ObjectStore<Vec<u8>> {
let pdf = build_minimal_pdf();
ObjectStore::open(pdf, rpdfium_core::ParsingMode::Lenient).unwrap()
}
fn build_minimal_pdf() -> Vec<u8> {
let mut pdf = Vec::new();
pdf.extend_from_slice(b"%PDF-1.4\n");
let obj1_offset = pdf.len();
pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
let obj2_offset = pdf.len();
pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
let xref_offset = pdf.len();
pdf.extend_from_slice(b"xref\n0 3\n");
pdf.extend_from_slice(b"0000000000 65535 f \r\n");
pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
pdf
}
fn str_obj(s: &str) -> Object {
Object::String(PdfString::from_bytes(s.as_bytes().to_vec()))
}
#[test]
fn test_full_metadata() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(Name::title(), str_obj("My Document"));
dict.insert(Name::author(), str_obj("John Doe"));
dict.insert(Name::subject(), str_obj("Testing"));
dict.insert(Name::keywords(), str_obj("pdf rust"));
dict.insert(Name::creator(), str_obj("TestApp"));
dict.insert(Name::producer(), str_obj("rpdfium"));
dict.insert(Name::creation_date(), str_obj("D:20240101120000"));
dict.insert(Name::mod_date(), str_obj("D:20240615090000"));
let obj = Object::Dictionary(dict);
let meta = parse_metadata(&obj, &store).unwrap();
assert_eq!(meta.title.as_deref(), Some("My Document"));
assert_eq!(meta.author.as_deref(), Some("John Doe"));
assert_eq!(meta.subject.as_deref(), Some("Testing"));
assert_eq!(meta.keywords.as_deref(), Some("pdf rust"));
assert_eq!(meta.creator.as_deref(), Some("TestApp"));
assert_eq!(meta.producer.as_deref(), Some("rpdfium"));
assert_eq!(meta.creation_date.as_deref(), Some("D:20240101120000"));
assert_eq!(meta.mod_date.as_deref(), Some("D:20240615090000"));
}
#[test]
fn test_partial_metadata() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(Name::title(), str_obj("Partial"));
dict.insert(Name::producer(), str_obj("rpdfium"));
let obj = Object::Dictionary(dict);
let meta = parse_metadata(&obj, &store).unwrap();
assert_eq!(meta.title.as_deref(), Some("Partial"));
assert!(meta.author.is_none());
assert!(meta.subject.is_none());
assert!(meta.keywords.is_none());
assert!(meta.creator.is_none());
assert_eq!(meta.producer.as_deref(), Some("rpdfium"));
assert!(meta.creation_date.is_none());
assert!(meta.mod_date.is_none());
}
#[test]
fn test_empty_info_dict() {
let store = build_store();
let obj = Object::Dictionary(HashMap::new());
let meta = parse_metadata(&obj, &store).unwrap();
assert!(meta.title.is_none());
assert!(meta.author.is_none());
assert!(meta.subject.is_none());
assert!(meta.keywords.is_none());
assert!(meta.creator.is_none());
assert!(meta.producer.is_none());
assert!(meta.creation_date.is_none());
assert!(meta.mod_date.is_none());
}
#[test]
fn test_non_string_values_ignored() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(Name::title(), Object::Integer(42));
dict.insert(Name::author(), Object::Boolean(true));
dict.insert(Name::subject(), str_obj("Valid Subject"));
let obj = Object::Dictionary(dict);
let meta = parse_metadata(&obj, &store).unwrap();
assert!(meta.title.is_none());
assert!(meta.author.is_none());
assert_eq!(meta.subject.as_deref(), Some("Valid Subject"));
}
#[test]
fn test_metadata_default() {
let meta = DocumentMetadata::default();
assert!(meta.title.is_none());
assert!(meta.author.is_none());
}
#[test]
#[ignore = "CheckForSharedForm not yet implemented"]
fn test_cpdf_metadata_check_shared_form_email_at_top_level() {
todo!()
}
#[test]
#[ignore = "CheckForSharedForm not yet implemented"]
fn test_cpdf_metadata_check_shared_form_acrobat_at_top_level() {
todo!()
}
#[test]
#[ignore = "CheckForSharedForm not yet implemented"]
fn test_cpdf_metadata_check_shared_form_filesystem_at_top_level() {
todo!()
}
#[test]
#[ignore = "CheckForSharedForm not yet implemented"]
fn test_cpdf_metadata_check_shared_form_without_workflow() {
todo!()
}
#[test]
#[ignore = "CheckForSharedForm not yet implemented"]
fn test_cpdf_metadata_check_shared_form_as_child() {
todo!()
}
#[test]
#[ignore = "CheckForSharedForm not yet implemented"]
fn test_cpdf_metadata_check_shared_form_as_no_adhoc() {
todo!()
}
#[test]
#[ignore = "CheckForSharedForm not yet implemented"]
fn test_cpdf_metadata_check_shared_form_exceed_max_depth() {
todo!()
}
#[test]
#[ignore = "CheckForSharedForm not yet implemented"]
fn test_cpdf_metadata_check_shared_form_wrong_namespace() {
todo!()
}
#[test]
#[ignore = "CheckForSharedForm not yet implemented"]
fn test_cpdf_metadata_check_shared_form_multiple_errors() {
todo!()
}
}