use std::{collections::HashMap, path::Path};
use lopdf::{Dictionary, Document, Object, Stream, text_string};
use serde::{Deserialize, Serialize};
use xmp_writer::{DateTime, LangId, XmpWriter};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PdfMetadata {
pub title: String,
pub author: String,
pub application: String,
pub subject: String,
pub copyright_status: bool,
pub copyright_notice: String,
pub keywords: Vec<String>,
pub language: String,
pub custom_properties: HashMap<String, String>,
}
impl Default for PdfMetadata {
fn default() -> Self {
Self {
title: "".to_string(),
author: "".to_string(),
application: "typwriter".to_string(),
subject: "".to_string(),
copyright_status: true,
copyright_notice: format!(
"© {} Author. All rights reserved.",
chrono::Local::now().format("%Y")
),
keywords: vec![],
language: "en".to_string(),
custom_properties: HashMap::new(),
}
}
}
pub fn update_metadata(
path: &Path,
metadata: &PdfMetadata,
) -> Result<(), Box<dyn std::error::Error>> {
let mut doc = Document::load(path)?;
let pdfua_part = extract_pdfua_part(&doc);
let xmp_string = generate_xmp(metadata, pdfua_part);
update_xmp_stream(&mut doc, &xmp_string)?;
update_info_dict(&mut doc, metadata);
doc.save(path)?;
Ok(())
}
fn extract_pdfua_part(doc: &Document) -> Option<i32> {
let catalog = doc.catalog().ok()?;
let metadata_ref = catalog.get(b"Metadata").ok()?;
let metadata_id = metadata_ref.as_reference().ok()?;
if let Ok(Object::Stream(stream)) = doc.get_object(metadata_id) {
let content = stream
.decompressed_content()
.unwrap_or_else(|_| stream.content.clone());
let xmp_str = String::from_utf8_lossy(&content);
if let Some(start) = xmp_str.find("<pdfuaid:part>") {
let after_tag = &xmp_str[start + 14..];
if let Some(end) = after_tag.find("</pdfuaid:part>") {
let part_str = &after_tag[..end];
return part_str.trim().parse().ok();
}
}
}
None
}
fn generate_xmp(metadata: &PdfMetadata, pdfua_part: Option<i32>) -> String {
let mut xmp = XmpWriter::new();
xmp.title([(Some(LangId("x-default")), metadata.title.as_str())]);
xmp.description([(Some(LangId("x-default")), metadata.subject.as_str())]);
xmp.creator([metadata.author.as_str()]);
xmp.language([LangId(&metadata.language)]);
xmp.marked(metadata.copyright_status);
xmp.rights([(Some(LangId("x-default")), metadata.copyright_notice.as_str())]);
xmp.creator_tool(&metadata.application);
let now = chrono::Local::now();
let date = DateTime::date(
now.format("%Y").to_string().parse().unwrap_or(2024),
now.format("%m").to_string().parse().unwrap_or(1),
now.format("%d").to_string().parse().unwrap_or(1),
);
xmp.create_date(date);
xmp.modify_date(date);
if !metadata.keywords.is_empty() {
xmp.subject(metadata.keywords.iter().map(String::as_str));
xmp.pdf_keywords(&metadata.keywords.join(", "));
}
if let Some(part) = pdfua_part {
xmp.pdfua_part(part);
}
xmp.finish(None)
}
fn update_xmp_stream(
doc: &mut Document,
xmp_string: &str,
) -> Result<(), Box<dyn std::error::Error>> {
let catalog_id = doc.trailer.get(b"Root")?.as_reference()?;
{
let catalog = doc.catalog()?;
if let Ok(metadata_ref) = catalog.get(b"Metadata")
&& let Ok(metadata_id) = metadata_ref.as_reference() {
if let Ok(Object::Stream(stream)) = doc.get_object_mut(metadata_id) {
stream.set_plain_content(xmp_string.as_bytes().to_vec());
stream.dict.set("Length", xmp_string.len() as i64);
stream.dict.remove(b"Filter");
return Ok(());
}
}
}
let mut stream_dict = Dictionary::new();
stream_dict.set("Type", Object::Name(b"Metadata".to_vec()));
stream_dict.set("Subtype", Object::Name(b"XML".to_vec()));
stream_dict.set("Length", xmp_string.len() as i64);
let stream = Stream::new(stream_dict, xmp_string.as_bytes().to_vec());
let metadata_id = doc.add_object(Object::Stream(stream));
let catalog_mut = doc.get_object_mut(catalog_id)?;
if let Object::Dictionary(catalog_dict) = catalog_mut {
catalog_dict.set("Metadata", metadata_id);
}
Ok(())
}
fn update_info_dict(doc: &mut Document, metadata: &PdfMetadata) {
doc.trailer.remove(b"Info");
let mut dict = Dictionary::new();
dict.set("Title", text_string(&metadata.title));
dict.set("Subject", text_string(&metadata.subject));
dict.set("Author", text_string(&metadata.author));
dict.set("Producer", text_string(&metadata.application));
dict.set("Creator", text_string(&metadata.application));
let now = chrono::Local::now().format("%Y%m%d").to_string();
dict.set("CreationDate", text_string(&now));
dict.set("ModDate", text_string(&now));
dict.set("Keywords", text_string(&metadata.keywords.join(", ")));
metadata
.custom_properties
.iter()
.for_each(|(k, v)| dict.set(k.to_string(), text_string(v)));
let t = doc.add_object(Object::Dictionary(dict));
doc.trailer.set("Info", t);
}