use crate::error::Result;
use crate::parser::objects::{PdfDictionary, PdfName, PdfObject, PdfStream};
use quick_xml::events::Event;
use quick_xml::Reader;
use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum XmpNamespace {
DublinCore,
XmpBasic,
XmpRights,
XmpMediaManagement,
Pdf,
Photoshop,
Custom(String, String), }
impl XmpNamespace {
pub fn prefix(&self) -> &str {
match self {
XmpNamespace::DublinCore => "dc",
XmpNamespace::XmpBasic => "xmp",
XmpNamespace::XmpRights => "xmpRights",
XmpNamespace::XmpMediaManagement => "xmpMM",
XmpNamespace::Pdf => "pdf",
XmpNamespace::Photoshop => "photoshop",
XmpNamespace::Custom(prefix, _) => prefix,
}
}
pub fn uri(&self) -> &str {
match self {
XmpNamespace::DublinCore => "http://purl.org/dc/elements/1.1/",
XmpNamespace::XmpBasic => "http://ns.adobe.com/xap/1.0/",
XmpNamespace::XmpRights => "http://ns.adobe.com/xap/1.0/rights/",
XmpNamespace::XmpMediaManagement => "http://ns.adobe.com/xap/1.0/mm/",
XmpNamespace::Pdf => "http://ns.adobe.com/pdf/1.3/",
XmpNamespace::Photoshop => "http://ns.adobe.com/photoshop/1.0/",
XmpNamespace::Custom(_, uri) => uri,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum XmpValue {
Text(String),
Date(String),
Array(Vec<String>),
Bag(Vec<String>),
Alt(Vec<(String, String)>), Struct(HashMap<String, Box<XmpValue>>),
ArrayStruct(Vec<HashMap<String, Box<XmpValue>>>),
}
#[derive(Debug, Clone)]
pub struct XmpProperty {
pub namespace: XmpNamespace,
pub name: String,
pub value: XmpValue,
}
#[derive(Debug, Clone, PartialEq)]
enum ContainerType {
Seq,
Bag,
Alt,
Resource, }
#[derive(Debug, Clone)]
pub struct XmpMetadata {
properties: Vec<XmpProperty>,
custom_namespaces: HashMap<String, String>,
}
impl Default for XmpMetadata {
fn default() -> Self {
Self::new()
}
}
impl XmpMetadata {
pub fn new() -> Self {
Self {
properties: Vec::new(),
custom_namespaces: HashMap::new(),
}
}
pub fn add_property(&mut self, property: XmpProperty) {
self.properties.push(property);
}
pub fn set_text(
&mut self,
namespace: XmpNamespace,
name: impl Into<String>,
value: impl Into<String>,
) {
self.properties.push(XmpProperty {
namespace,
name: name.into(),
value: XmpValue::Text(value.into()),
});
}
pub fn set_date(
&mut self,
namespace: XmpNamespace,
name: impl Into<String>,
date: impl Into<String>,
) {
let date_str = date.into();
let name_str = name.into();
if !Self::is_valid_iso8601_date(&date_str) {
tracing::debug!(
"Warning: Invalid ISO 8601 date '{}' for property '{}'. Storing as text.",
date_str,
name_str
);
self.properties.push(XmpProperty {
namespace,
name: name_str,
value: XmpValue::Text(date_str),
});
} else {
self.properties.push(XmpProperty {
namespace,
name: name_str,
value: XmpValue::Date(date_str),
});
}
}
pub fn set_array(
&mut self,
namespace: XmpNamespace,
name: impl Into<String>,
values: Vec<String>,
) {
self.properties.push(XmpProperty {
namespace,
name: name.into(),
value: XmpValue::Array(values),
});
}
pub fn set_bag(
&mut self,
namespace: XmpNamespace,
name: impl Into<String>,
values: Vec<String>,
) {
self.properties.push(XmpProperty {
namespace,
name: name.into(),
value: XmpValue::Bag(values),
});
}
pub fn set_alt(
&mut self,
namespace: XmpNamespace,
name: impl Into<String>,
values: Vec<(String, String)>,
) {
self.properties.push(XmpProperty {
namespace,
name: name.into(),
value: XmpValue::Alt(values),
});
}
pub fn set_struct(
&mut self,
namespace: XmpNamespace,
name: impl Into<String>,
fields: HashMap<String, XmpValue>,
) {
let boxed_fields: HashMap<String, Box<XmpValue>> =
fields.into_iter().map(|(k, v)| (k, Box::new(v))).collect();
self.properties.push(XmpProperty {
namespace,
name: name.into(),
value: XmpValue::Struct(boxed_fields),
});
}
pub fn set_array_struct(
&mut self,
namespace: XmpNamespace,
name: impl Into<String>,
items: Vec<HashMap<String, XmpValue>>,
) {
let boxed_items: Vec<HashMap<String, Box<XmpValue>>> = items
.into_iter()
.map(|item| item.into_iter().map(|(k, v)| (k, Box::new(v))).collect())
.collect();
self.properties.push(XmpProperty {
namespace,
name: name.into(),
value: XmpValue::ArrayStruct(boxed_items),
});
}
pub fn register_namespace(&mut self, prefix: String, uri: String) {
self.custom_namespaces.insert(prefix, uri);
}
pub fn properties(&self) -> &[XmpProperty] {
&self.properties
}
pub fn to_xmp_packet(&self) -> String {
let mut xml = String::new();
xml.push_str("<?xpacket begin=\"\u{FEFF}\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n");
xml.push_str("<x:xmpmeta xmlns:x=\"adobe:ns:meta/\" x:xmptk=\"oxidize-pdf 1.4.0\">\n");
xml.push_str(" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n");
xml.push_str(" <rdf:Description rdf:about=\"\"");
let mut namespaces: HashMap<String, String> = HashMap::new();
for prop in &self.properties {
namespaces.insert(
prop.namespace.prefix().to_string(),
prop.namespace.uri().to_string(),
);
}
for (prefix, uri) in &self.custom_namespaces {
namespaces.insert(prefix.clone(), uri.clone());
}
for (prefix, uri) in &namespaces {
xml.push_str(&format!("\n xmlns:{}=\"{}\"", prefix, uri));
}
xml.push_str(">\n");
for prop in &self.properties {
let prefix = prop.namespace.prefix();
match &prop.value {
XmpValue::Text(text) => {
xml.push_str(&format!(
" <{}:{}>{}</{}:{}>\n",
prefix,
prop.name,
Self::escape_xml(text),
prefix,
prop.name
));
}
XmpValue::Date(date) => {
xml.push_str(&format!(
" <{}:{}>{}</{}:{}>\n",
prefix, prop.name, date, prefix, prop.name
));
}
XmpValue::Array(values) => {
xml.push_str(&format!(" <{}:{}>\n", prefix, prop.name));
xml.push_str(" <rdf:Seq>\n");
for value in values {
xml.push_str(&format!(
" <rdf:li>{}</rdf:li>\n",
Self::escape_xml(value)
));
}
xml.push_str(" </rdf:Seq>\n");
xml.push_str(&format!(" </{}:{}>\n", prefix, prop.name));
}
XmpValue::Bag(values) => {
xml.push_str(&format!(" <{}:{}>\n", prefix, prop.name));
xml.push_str(" <rdf:Bag>\n");
for value in values {
xml.push_str(&format!(
" <rdf:li>{}</rdf:li>\n",
Self::escape_xml(value)
));
}
xml.push_str(" </rdf:Bag>\n");
xml.push_str(&format!(" </{}:{}>\n", prefix, prop.name));
}
XmpValue::Alt(values) => {
xml.push_str(&format!(" <{}:{}>\n", prefix, prop.name));
xml.push_str(" <rdf:Alt>\n");
for (lang, value) in values {
xml.push_str(&format!(
" <rdf:li xml:lang=\"{}\">{}</rdf:li>\n",
lang,
Self::escape_xml(value)
));
}
xml.push_str(" </rdf:Alt>\n");
xml.push_str(&format!(" </{}:{}>\n", prefix, prop.name));
}
XmpValue::Struct(fields) => {
xml.push_str(&format!(" <{}:{}>\n", prefix, prop.name));
xml.push_str(" <rdf:Description>\n");
for (field_name, field_value) in fields {
Self::serialize_value(&mut xml, field_name, field_value, " ");
}
xml.push_str(" </rdf:Description>\n");
xml.push_str(&format!(" </{}:{}>\n", prefix, prop.name));
}
XmpValue::ArrayStruct(items) => {
xml.push_str(&format!(" <{}:{}>\n", prefix, prop.name));
xml.push_str(" <rdf:Seq>\n");
for item in items {
xml.push_str(" <rdf:li rdf:parseType=\"Resource\">\n");
for (field_name, field_value) in item {
Self::serialize_value(
&mut xml,
field_name,
field_value,
" ",
);
}
xml.push_str(" </rdf:li>\n");
}
xml.push_str(" </rdf:Seq>\n");
xml.push_str(&format!(" </{}:{}>\n", prefix, prop.name));
}
}
}
xml.push_str(" </rdf:Description>\n");
xml.push_str(" </rdf:RDF>\n");
xml.push_str("</x:xmpmeta>\n");
let padding = " ".repeat(2000); xml.push_str(&format!("<?xpacket end=\"w\"?>{}", padding));
xml
}
pub fn to_pdf_stream(&self) -> PdfStream {
let xmp_packet = self.to_xmp_packet();
let mut dict = PdfDictionary::new();
dict.insert(
"Type".to_string(),
PdfObject::Name(PdfName("Metadata".to_string())),
);
dict.insert(
"Subtype".to_string(),
PdfObject::Name(PdfName("XML".to_string())),
);
dict.insert(
"Length".to_string(),
PdfObject::Integer(xmp_packet.len() as i64),
);
PdfStream {
dict,
data: xmp_packet.into_bytes(),
}
}
pub fn from_pdf_stream(stream: &PdfStream) -> Result<Self> {
let xml_data = String::from_utf8_lossy(&stream.data);
Self::from_xmp_packet(&xml_data)
}
pub fn from_xmp_packet(xml: &str) -> Result<Self> {
if !xml.contains("<?xpacket") || !xml.contains("</x:xmpmeta>") {
return Err(crate::error::PdfError::ParseError(
"Invalid XMP packet: missing required XMP packet markers".to_string(),
));
}
let mut metadata = XmpMetadata::new();
let mut reader = Reader::from_str(xml);
reader.config_mut().trim_text(true);
let mut buf = Vec::new();
let mut current_ns: Option<XmpNamespace> = None;
let mut current_property: Option<String> = None;
let mut current_container: Option<ContainerType> = None;
let mut container_items: Vec<String> = Vec::new();
let mut alt_items: Vec<(String, String)> = Vec::new();
let mut text_buffer = String::new();
let mut current_lang = String::new();
let mut in_rdf_description = false;
let mut had_container = false;
let mut struct_items: Vec<HashMap<String, Box<XmpValue>>> = Vec::new();
let mut current_struct: Option<HashMap<String, Box<XmpValue>>> = None;
let mut struct_field_name: Option<String> = None;
let mut struct_field_value = String::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
if name == "rdf:Description" {
in_rdf_description = true;
for attr in e.attributes().flatten() {
let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
let value = String::from_utf8_lossy(&attr.value).to_string();
if let Some((ns, prop)) = Self::parse_property_name(&key) {
metadata.set_text(ns, &prop, value);
}
}
} else if name == "rdf:Seq" {
current_container = Some(ContainerType::Seq);
container_items.clear();
had_container = true;
} else if name == "rdf:Bag" {
current_container = Some(ContainerType::Bag);
container_items.clear();
had_container = true;
} else if name == "rdf:Alt" {
current_container = Some(ContainerType::Alt);
alt_items.clear();
had_container = true;
} else if name == "rdf:li" {
text_buffer.clear();
let has_parse_type_resource = e.attributes().flatten().any(|a| {
String::from_utf8_lossy(a.key.as_ref()) == "rdf:parseType"
&& String::from_utf8_lossy(&a.value) == "Resource"
});
if has_parse_type_resource {
current_container = Some(ContainerType::Resource);
current_struct = Some(HashMap::new());
} else if current_container == Some(ContainerType::Alt) {
current_lang = e
.attributes()
.flatten()
.find(|a| String::from_utf8_lossy(a.key.as_ref()) == "xml:lang")
.map(|a| String::from_utf8_lossy(&a.value).to_string())
.unwrap_or_else(|| "x-default".to_string());
}
} else if current_struct.is_some() {
struct_field_name = Some(name.clone());
struct_field_value.clear();
} else if in_rdf_description {
if let Some((ns, prop)) = Self::parse_property_name(&name) {
current_ns = Some(ns);
current_property = Some(prop);
text_buffer.clear();
}
}
}
Ok(Event::Text(e)) => {
let text = String::from_utf8_lossy(e.as_ref()).to_string();
if !text.trim().is_empty() {
if current_struct.is_some() {
struct_field_value.push_str(text.trim());
} else {
text_buffer.push_str(text.trim());
}
}
}
Ok(Event::End(ref e)) => {
let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
if name == "rdf:Description" {
in_rdf_description = false;
} else if name == "rdf:li" {
match current_container {
Some(ContainerType::Seq) | Some(ContainerType::Bag) => {
if !text_buffer.trim().is_empty() {
container_items.push(text_buffer.clone());
}
}
Some(ContainerType::Alt) => {
if !text_buffer.trim().is_empty() {
alt_items.push((current_lang.clone(), text_buffer.clone()));
}
}
Some(ContainerType::Resource) => {
if let Some(struct_data) = current_struct.take() {
struct_items.push(struct_data);
}
current_container = Some(ContainerType::Seq); }
None => {}
}
} else if current_struct.is_some() && struct_field_name.is_some() {
if let (Some(ref mut struct_data), Some(field_name)) =
(current_struct.as_mut(), struct_field_name.take())
{
let value = if struct_field_value.contains('T')
&& struct_field_value.contains(':')
|| (struct_field_value.len() >= 10
&& struct_field_value.chars().nth(4) == Some('-')
&& struct_field_value.chars().nth(7) == Some('-'))
{
Box::new(XmpValue::Date(struct_field_value.clone()))
} else {
Box::new(XmpValue::Text(struct_field_value.clone()))
};
struct_data.insert(field_name, value);
}
} else if name == "rdf:Seq" {
if let (Some(ns), Some(prop)) =
(current_ns.clone(), current_property.clone())
{
if !struct_items.is_empty() {
let unboxed_items: Vec<HashMap<String, XmpValue>> = struct_items
.iter()
.map(|item| {
item.iter()
.map(|(k, v)| (k.clone(), (**v).clone()))
.collect()
})
.collect();
metadata.set_array_struct(ns, &prop, unboxed_items);
struct_items.clear();
} else {
metadata.set_array(ns, &prop, container_items.clone());
}
}
current_container = None;
} else if name == "rdf:Bag" {
if let (Some(ns), Some(prop)) =
(current_ns.clone(), current_property.clone())
{
metadata.set_bag(ns, &prop, container_items.clone());
}
current_container = None;
} else if name == "rdf:Alt" {
if let (Some(ns), Some(prop)) =
(current_ns.clone(), current_property.clone())
{
metadata.set_alt(ns, &prop, alt_items.clone());
}
current_container = None;
alt_items.clear();
} else if in_rdf_description {
if let (Some(ns), Some(prop)) =
(current_ns.clone(), current_property.clone())
{
if had_container {
had_container = false;
} else if !text_buffer.trim().is_empty() {
let is_date = text_buffer.contains('T')
&& text_buffer.contains(':')
|| (text_buffer.len() >= 10
&& text_buffer.chars().nth(4) == Some('-')
&& text_buffer.chars().nth(7) == Some('-'));
if is_date {
metadata.set_date(ns, &prop, text_buffer.clone());
} else {
metadata.set_text(ns, &prop, text_buffer.clone());
}
}
}
current_ns = None;
current_property = None;
}
}
Ok(Event::Empty(ref e)) => {
let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
if let Some((ns, prop)) = Self::parse_property_name(&name) {
for attr in e.attributes().flatten() {
let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
let value = String::from_utf8_lossy(&attr.value).to_string();
if key == "rdf:resource" || key.contains(':') {
metadata.set_text(ns.clone(), &prop, value);
break;
}
}
}
}
Ok(Event::Eof) => break,
Err(e) => {
return Err(crate::error::PdfError::ParseError(format!(
"XML parsing error at position {}: {}",
reader.buffer_position(),
e
)));
}
_ => {}
}
buf.clear();
}
Ok(metadata)
}
fn parse_property_name(name: &str) -> Option<(XmpNamespace, String)> {
let parts: Vec<&str> = name.split(':').collect();
if parts.len() != 2 {
return None;
}
let ns = match parts[0] {
"dc" => XmpNamespace::DublinCore,
"xmp" => XmpNamespace::XmpBasic,
"xmpRights" => XmpNamespace::XmpRights,
"xmpMM" => XmpNamespace::XmpMediaManagement,
"pdf" => XmpNamespace::Pdf,
"photoshop" => XmpNamespace::Photoshop,
_ => return None, };
Some((ns, parts[1].to_string()))
}
fn serialize_value(xml: &mut String, name: &str, value: &XmpValue, indent: &str) {
match value {
XmpValue::Text(text) => {
xml.push_str(&format!(
"{}<{}>{}</{}>\n",
indent,
name,
Self::escape_xml(text),
name
));
}
XmpValue::Date(date) => {
xml.push_str(&format!("{}<{}>{}</{}>\n", indent, name, date, name));
}
XmpValue::Array(values) => {
xml.push_str(&format!("{}<{}>\n", indent, name));
xml.push_str(&format!("{} <rdf:Seq>\n", indent));
for val in values {
xml.push_str(&format!(
"{} <rdf:li>{}</rdf:li>\n",
indent,
Self::escape_xml(val)
));
}
xml.push_str(&format!("{} </rdf:Seq>\n", indent));
xml.push_str(&format!("{}</{}>\n", indent, name));
}
XmpValue::Bag(values) => {
xml.push_str(&format!("{}<{}>\n", indent, name));
xml.push_str(&format!("{} <rdf:Bag>\n", indent));
for val in values {
xml.push_str(&format!(
"{} <rdf:li>{}</rdf:li>\n",
indent,
Self::escape_xml(val)
));
}
xml.push_str(&format!("{} </rdf:Bag>\n", indent));
xml.push_str(&format!("{}</{}>\n", indent, name));
}
XmpValue::Alt(values) => {
xml.push_str(&format!("{}<{}>\n", indent, name));
xml.push_str(&format!("{} <rdf:Alt>\n", indent));
for (lang, val) in values {
xml.push_str(&format!(
"{} <rdf:li xml:lang=\"{}\">{}</rdf:li>\n",
indent,
lang,
Self::escape_xml(val)
));
}
xml.push_str(&format!("{} </rdf:Alt>\n", indent));
xml.push_str(&format!("{}</{}>\n", indent, name));
}
XmpValue::Struct(fields) => {
xml.push_str(&format!("{}<{}>\n", indent, name));
xml.push_str(&format!("{} <rdf:Description>\n", indent));
for (field_name, field_value) in fields {
Self::serialize_value(xml, field_name, field_value, &format!("{} ", indent));
}
xml.push_str(&format!("{} </rdf:Description>\n", indent));
xml.push_str(&format!("{}</{}>\n", indent, name));
}
XmpValue::ArrayStruct(items) => {
xml.push_str(&format!("{}<{}>\n", indent, name));
xml.push_str(&format!("{} <rdf:Seq>\n", indent));
for item in items {
xml.push_str(&format!(
"{} <rdf:li rdf:parseType=\"Resource\">\n",
indent
));
for (field_name, field_value) in item {
Self::serialize_value(
xml,
field_name,
field_value,
&format!("{} ", indent),
);
}
xml.push_str(&format!("{} </rdf:li>\n", indent));
}
xml.push_str(&format!("{} </rdf:Seq>\n", indent));
xml.push_str(&format!("{}</{}>\n", indent, name));
}
}
}
fn is_valid_iso8601_date(date: &str) -> bool {
if date.is_empty() {
return false;
}
if date.len() < 4 || !date[0..4].chars().all(|c| c.is_ascii_digit()) {
return false;
}
let year: i32 = match date[0..4].parse() {
Ok(y) => y,
Err(_) => return false,
};
if !(1000..=9999).contains(&year) {
return false;
}
if date.len() == 4 {
return true;
}
if date.len() < 7 || date.chars().nth(4) != Some('-') {
return false;
}
let month: u32 = match date[5..7].parse() {
Ok(m) => m,
Err(_) => return false,
};
if !(1..=12).contains(&month) {
return false;
}
if date.len() == 7 {
return true;
}
if date.len() < 10 || date.chars().nth(7) != Some('-') {
return false;
}
let day: u32 = match date[8..10].parse() {
Ok(d) => d,
Err(_) => return false,
};
if !(1..=31).contains(&day) {
return false;
}
if month == 2 && day > 29 {
return false; }
if [4, 6, 9, 11].contains(&month) && day > 30 {
return false; }
if date.len() == 10 {
return true;
}
if date.len() > 10 && date.chars().nth(10) != Some('T') {
return false;
}
if date.len() > 10 && date.contains(':') {
return true;
}
false
}
fn escape_xml(text: &str) -> String {
text.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
.replace('\'', "'")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_create_xmp_metadata() {
let mut xmp = XmpMetadata::new();
xmp.set_text(XmpNamespace::DublinCore, "title", "Test Document");
xmp.set_text(XmpNamespace::DublinCore, "creator", "oxidize-pdf");
xmp.set_date(XmpNamespace::XmpBasic, "CreateDate", "2025-10-08T12:00:00Z");
assert_eq!(xmp.properties().len(), 3);
}
#[test]
fn test_xmp_to_packet() {
let mut xmp = XmpMetadata::new();
xmp.set_text(XmpNamespace::DublinCore, "title", "Test & Document");
xmp.set_text(XmpNamespace::DublinCore, "creator", "Jane Doe");
let packet = xmp.to_xmp_packet();
assert!(packet.contains("<?xpacket begin"));
assert!(packet.contains("xmlns:dc="));
assert!(packet.contains("<dc:title>Test & Document</dc:title>"));
assert!(packet.contains("<dc:creator>Jane Doe</dc:creator>"));
assert!(packet.contains("<?xpacket end="));
}
#[test]
fn test_xmp_arrays() {
let mut xmp = XmpMetadata::new();
xmp.set_array(
XmpNamespace::DublinCore,
"subject",
vec!["PDF".to_string(), "Metadata".to_string(), "XMP".to_string()],
);
let packet = xmp.to_xmp_packet();
assert!(packet.contains("<rdf:Seq>"));
assert!(packet.contains("<rdf:li>PDF</rdf:li>"));
assert!(packet.contains("<rdf:li>Metadata</rdf:li>"));
}
#[test]
fn test_xmp_alt() {
let mut xmp = XmpMetadata::new();
xmp.set_alt(
XmpNamespace::DublinCore,
"description",
vec![
("x-default".to_string(), "English description".to_string()),
("es".to_string(), "Descripción en español".to_string()),
],
);
let packet = xmp.to_xmp_packet();
assert!(packet.contains("<rdf:Alt>"));
assert!(packet.contains("xml:lang=\"x-default\""));
assert!(packet.contains("English description"));
}
#[test]
fn test_to_pdf_stream() {
let mut xmp = XmpMetadata::new();
xmp.set_text(XmpNamespace::DublinCore, "title", "Test");
let stream = xmp.to_pdf_stream();
assert_eq!(
stream.dict.get("Type".into()),
Some(&PdfObject::Name(PdfName("Metadata".to_string())))
);
assert_eq!(
stream.dict.get("Subtype".into()),
Some(&PdfObject::Name(PdfName("XML".to_string())))
);
}
#[test]
fn test_xml_escape() {
assert_eq!(XmpMetadata::escape_xml("A & B < C"), "A & B < C");
assert_eq!(
XmpMetadata::escape_xml("'quote' \"double\""),
"'quote' "double""
);
}
#[test]
fn test_custom_namespace() {
let mut xmp = XmpMetadata::new();
xmp.register_namespace("custom".to_string(), "http://example.com/ns/".to_string());
let custom_ns =
XmpNamespace::Custom("custom".to_string(), "http://example.com/ns/".to_string());
xmp.set_text(custom_ns, "property", "value");
let packet = xmp.to_xmp_packet();
assert!(packet.contains("xmlns:custom=\"http://example.com/ns/\""));
assert!(packet.contains("<custom:property>value</custom:property>"));
}
#[test]
fn test_parse_simple_xmp() {
let xml = r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:xmp="http://ns.adobe.com/xap/1.0/">
<dc:title>Parsed Title</dc:title>
<dc:creator>Test Creator</dc:creator>
<xmp:CreateDate>2025-10-08T12:00:00Z</xmp:CreateDate>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>
<?xpacket end="w"?>"#;
let xmp = XmpMetadata::from_xmp_packet(xml).unwrap();
assert_eq!(xmp.properties().len(), 3);
let props: Vec<_> = xmp
.properties()
.iter()
.map(|p| (&p.name, &p.value))
.collect();
assert!(props
.iter()
.any(|(n, v)| *n == "title" && matches!(v, XmpValue::Text(t) if t == "Parsed Title")));
assert!(
props
.iter()
.any(|(n, v)| *n == "creator"
&& matches!(v, XmpValue::Text(t) if t == "Test Creator"))
);
assert!(props
.iter()
.any(|(n, v)| *n == "CreateDate" && matches!(v, XmpValue::Date(_))));
}
#[test]
fn test_parse_xmp_bags() {
let xml = r#"<?xpacket begin=""?>
<x:xmpmeta xmlns:x="adobe:ns:meta/">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:subject>
<rdf:Bag>
<rdf:li>PDF</rdf:li>
<rdf:li>Metadata</rdf:li>
<rdf:li>XMP</rdf:li>
</rdf:Bag>
</dc:subject>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta><?xpacket end="w"?>"#;
let xmp = XmpMetadata::from_xmp_packet(xml).unwrap();
assert_eq!(xmp.properties().len(), 1);
match &xmp.properties()[0].value {
XmpValue::Bag(items) => {
assert_eq!(items.len(), 3);
assert!(items.contains(&"PDF".to_string()));
assert!(items.contains(&"Metadata".to_string()));
assert!(items.contains(&"XMP".to_string()));
}
_ => panic!("Expected Bag value"),
}
}
#[test]
fn test_parse_xmp_alt() {
let xml = r#"<?xpacket begin=""?>
<x:xmpmeta xmlns:x="adobe:ns:meta/">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:rights>
<rdf:Alt>
<rdf:li xml:lang="x-default">Copyright 2025</rdf:li>
<rdf:li xml:lang="es">Copyright 2025</rdf:li>
</rdf:Alt>
</dc:rights>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta><?xpacket end="w"?>"#;
let xmp = XmpMetadata::from_xmp_packet(xml).unwrap();
assert_eq!(xmp.properties().len(), 1);
match &xmp.properties()[0].value {
XmpValue::Alt(items) => {
assert_eq!(items.len(), 2);
assert!(items
.iter()
.any(|(lang, val)| lang == "x-default" && val == "Copyright 2025"));
assert!(items
.iter()
.any(|(lang, val)| lang == "es" && val == "Copyright 2025"));
}
_ => panic!("Expected Alt value"),
}
}
#[test]
fn test_roundtrip_xmp() {
let mut xmp = XmpMetadata::new();
xmp.set_text(XmpNamespace::DublinCore, "title", "Roundtrip Test");
xmp.set_date(XmpNamespace::XmpBasic, "CreateDate", "2025-10-08T12:00:00Z");
xmp.set_bag(
XmpNamespace::DublinCore,
"subject",
vec!["Test".to_string(), "XMP".to_string()],
);
let packet = xmp.to_xmp_packet();
let parsed = XmpMetadata::from_xmp_packet(&packet).unwrap();
assert_eq!(parsed.properties().len(), 3);
}
#[test]
fn test_pdf_embedding() {
use crate::document::Document;
use crate::page::Page;
let mut doc = Document::new();
doc.set_title("PDF Embedding Test");
doc.set_author("oxidize-pdf Test Suite");
doc.set_subject("XMP Embedding Verification");
doc.add_page(Page::a4());
let pdf_bytes = doc.to_bytes().unwrap();
assert!(pdf_bytes.len() > 0, "PDF bytes should not be empty");
assert!(
pdf_bytes.starts_with(b"%PDF-"),
"PDF should start with %PDF- header"
);
let pdf_str = String::from_utf8_lossy(&pdf_bytes);
assert!(
pdf_str.contains("<?xpacket begin"),
"PDF should contain XMP packet begin"
);
assert!(
pdf_str.contains("</x:xmpmeta>"),
"PDF should contain XMP metadata"
);
assert!(
pdf_str.contains("PDF Embedding Test"),
"PDF should contain document title in XMP"
);
assert!(
pdf_str.contains("oxidize-pdf Test Suite"),
"PDF should contain author in XMP"
);
}
#[test]
fn test_structured_properties() {
let mut xmp = XmpMetadata::new();
let mut history_item = HashMap::new();
history_item.insert("action".to_string(), XmpValue::Text("saved".to_string()));
history_item.insert(
"when".to_string(),
XmpValue::Date("2025-10-08T12:00:00Z".to_string()),
);
history_item.insert(
"softwareAgent".to_string(),
XmpValue::Text("oxidize-pdf 1.4.0".to_string()),
);
xmp.set_struct(
XmpNamespace::XmpMediaManagement,
"History",
history_item.clone(),
);
assert_eq!(xmp.properties().len(), 1);
let packet = xmp.to_xmp_packet();
assert!(packet.contains("<xmpMM:History>"));
assert!(packet.contains("<rdf:Description>"));
assert!(packet.contains("<action>saved</action>"));
assert!(packet.contains("<when>2025-10-08T12:00:00Z</when>"));
assert!(packet.contains("<softwareAgent>oxidize-pdf 1.4.0</softwareAgent>"));
}
#[test]
fn test_array_of_structs() {
let mut xmp = XmpMetadata::new();
let mut item1 = HashMap::new();
item1.insert("action".to_string(), XmpValue::Text("created".to_string()));
item1.insert(
"when".to_string(),
XmpValue::Date("2025-10-08T10:00:00Z".to_string()),
);
let mut item2 = HashMap::new();
item2.insert("action".to_string(), XmpValue::Text("saved".to_string()));
item2.insert(
"when".to_string(),
XmpValue::Date("2025-10-08T12:00:00Z".to_string()),
);
xmp.set_array_struct(
XmpNamespace::XmpMediaManagement,
"History",
vec![item1, item2],
);
let packet = xmp.to_xmp_packet();
assert!(packet.contains("<xmpMM:History>"));
assert!(packet.contains("<rdf:Seq>"));
assert!(packet.contains("rdf:parseType=\"Resource\""));
assert!(packet.contains("<action>created</action>"));
assert!(packet.contains("<action>saved</action>"));
}
#[test]
fn test_parse_structured_properties() {
let xml = r#"<?xpacket begin=""?>
<x:xmpmeta xmlns:x="adobe:ns:meta/">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description xmlns:xmpMM="http://ns.adobe.com/xap/1.0/mm/" xmlns:stEvt="http://ns.adobe.com/xap/1.0/sType/ResourceEvent#">
<xmpMM:History>
<rdf:Seq>
<rdf:li rdf:parseType="Resource">
<stEvt:action>saved</stEvt:action>
<stEvt:when>2025-10-08T12:00:00Z</stEvt:when>
</rdf:li>
</rdf:Seq>
</xmpMM:History>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta><?xpacket end="w"?>"#;
let xmp = XmpMetadata::from_xmp_packet(xml).unwrap();
assert_eq!(xmp.properties().len(), 1);
let prop = &xmp.properties()[0];
assert_eq!(prop.name, "History");
match &prop.value {
XmpValue::ArrayStruct(items) => {
assert_eq!(items.len(), 1);
let item = &items[0];
assert!(
item.contains_key("stEvt:action") || item.contains_key("action"),
"Expected to find 'action' or 'stEvt:action', found keys: {:?}",
item.keys().collect::<Vec<_>>()
);
assert!(
item.contains_key("stEvt:when") || item.contains_key("when"),
"Expected to find 'when' or 'stEvt:when', found keys: {:?}",
item.keys().collect::<Vec<_>>()
);
}
_ => panic!("Expected ArrayStruct, got {:?}", prop.value),
}
}
#[test]
fn test_date_validation() {
let mut xmp = XmpMetadata::new();
xmp.set_date(XmpNamespace::XmpBasic, "CreateDate", "2025");
xmp.set_date(XmpNamespace::XmpBasic, "ModifyDate", "2025-10");
xmp.set_date(XmpNamespace::XmpBasic, "MetadataDate", "2025-10-08");
xmp.set_date(
XmpNamespace::XmpBasic,
"DateTimeOriginal",
"2025-10-08T12:00:00Z",
);
for prop in xmp.properties() {
match &prop.value {
XmpValue::Date(_) => {} _ => panic!("Expected date, got {:?}", prop.value),
}
}
}
#[test]
fn test_invalid_date_handling() {
let mut xmp = XmpMetadata::new();
xmp.set_date(XmpNamespace::XmpBasic, "InvalidDate1", "2025-13-01"); xmp.set_date(XmpNamespace::XmpBasic, "InvalidDate2", "2025-02-30"); xmp.set_date(XmpNamespace::XmpBasic, "InvalidDate3", "not-a-date");
for prop in xmp.properties() {
match &prop.value {
XmpValue::Text(_) => {} XmpValue::Date(d) => panic!("Invalid date '{}' was not rejected", d),
_ => {}
}
}
}
#[test]
fn test_malformed_xml_rejection() {
let bad_xml1 = r#"<rdf:RDF><rdf:Description/></rdf:RDF>"#;
assert!(XmpMetadata::from_xmp_packet(bad_xml1).is_err());
let bad_xml2 = r#"<?xpacket begin=""?><x:xmpmeta><rdf:RDF><rdf:Description"#;
assert!(XmpMetadata::from_xmp_packet(bad_xml2).is_err());
let bad_xml3 =
r#"<?xpacket begin=""?><x:xmpmeta><<<INVALID>>></x:xmpmeta><?xpacket end="w"?>"#;
assert!(XmpMetadata::from_xmp_packet(bad_xml3).is_err());
}
#[test]
fn test_complex_roundtrip() {
let mut xmp = XmpMetadata::new();
xmp.set_text(XmpNamespace::DublinCore, "title", "Complex Test");
xmp.set_date(XmpNamespace::XmpBasic, "CreateDate", "2025-10-08T12:00:00Z");
xmp.set_array(
XmpNamespace::DublinCore,
"creator",
vec!["Author 1".to_string(), "Author 2".to_string()],
);
xmp.set_bag(
XmpNamespace::DublinCore,
"subject",
vec!["PDF".to_string(), "XMP".to_string(), "Metadata".to_string()],
);
xmp.set_alt(
XmpNamespace::DublinCore,
"rights",
vec![
("x-default".to_string(), "Copyright 2025".to_string()),
("en".to_string(), "Copyright 2025".to_string()),
("es".to_string(), "Derechos de autor 2025".to_string()),
],
);
let mut history = HashMap::new();
history.insert("action".to_string(), XmpValue::Text("created".to_string()));
history.insert(
"when".to_string(),
XmpValue::Date("2025-10-08T10:00:00Z".to_string()),
);
xmp.set_struct(XmpNamespace::XmpMediaManagement, "History", history);
let packet = xmp.to_xmp_packet();
let xmp2 = XmpMetadata::from_xmp_packet(&packet).unwrap();
assert!(xmp2.properties().len() >= 6); }
#[test]
fn test_iso8601_date_validation() {
assert!(XmpMetadata::is_valid_iso8601_date("2025"));
assert!(XmpMetadata::is_valid_iso8601_date("2025-10"));
assert!(XmpMetadata::is_valid_iso8601_date("2025-10-08"));
assert!(XmpMetadata::is_valid_iso8601_date("2025-10-08T12:00:00Z"));
assert!(XmpMetadata::is_valid_iso8601_date(
"2025-10-08T12:00:00+01:00"
));
assert!(!XmpMetadata::is_valid_iso8601_date(""));
assert!(!XmpMetadata::is_valid_iso8601_date("not-a-date"));
assert!(!XmpMetadata::is_valid_iso8601_date("2025-13-01")); assert!(!XmpMetadata::is_valid_iso8601_date("2025-02-30")); assert!(!XmpMetadata::is_valid_iso8601_date("2025-04-31")); assert!(!XmpMetadata::is_valid_iso8601_date("999")); }
}