1use super::error::{PdfAError, PdfAResult};
4use super::types::PdfAConformance;
5use regex::Regex;
6use std::str::FromStr;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct XmpPdfAIdentifier {
11 pub part: u8,
13 pub conformance: PdfAConformance,
15 pub amd: Option<String>,
17 pub corr: Option<String>,
19}
20
21impl XmpPdfAIdentifier {
22 pub fn new(part: u8, conformance: PdfAConformance) -> Self {
24 Self {
25 part,
26 conformance,
27 amd: None,
28 corr: None,
29 }
30 }
31
32 pub fn to_rdf(&self) -> String {
34 let mut rdf = format!(
35 r#" <rdf:Description rdf:about=""
36 xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">
37 <pdfaid:part>{}</pdfaid:part>
38 <pdfaid:conformance>{}</pdfaid:conformance>"#,
39 self.part, self.conformance
40 );
41
42 if let Some(ref amd) = self.amd {
43 rdf.push_str(&format!("\n <pdfaid:amd>{}</pdfaid:amd>", amd));
44 }
45
46 if let Some(ref corr) = self.corr {
47 rdf.push_str(&format!("\n <pdfaid:corr>{}</pdfaid:corr>", corr));
48 }
49
50 rdf.push_str("\n </rdf:Description>");
51 rdf
52 }
53}
54
55#[derive(Debug, Clone, Default)]
57pub struct XmpMetadata {
58 pub title: Option<String>,
60 pub creator: Option<Vec<String>>,
62 pub description: Option<String>,
64 pub keywords: Option<Vec<String>>,
66 pub create_date: Option<String>,
68 pub modify_date: Option<String>,
70 pub creator_tool: Option<String>,
72 pub pdfa_id: Option<XmpPdfAIdentifier>,
74 pub document_id: Option<String>,
76 pub instance_id: Option<String>,
78}
79
80impl XmpMetadata {
81 pub fn new() -> Self {
83 Self::default()
84 }
85
86 pub fn parse(xml: &str) -> PdfAResult<Self> {
88 let mut metadata = Self::new();
89
90 if let Some(title) = Self::extract_simple_value(xml, "dc:title") {
92 metadata.title = Some(title);
93 }
94
95 if let Some(creator) = Self::extract_list_value(xml, "dc:creator") {
97 metadata.creator = Some(creator);
98 }
99
100 if let Some(desc) = Self::extract_simple_value(xml, "dc:description") {
102 metadata.description = Some(desc);
103 }
104
105 if let Some(keywords) = Self::extract_list_value(xml, "pdf:Keywords")
107 .or_else(|| Self::extract_list_value(xml, "dc:subject"))
108 {
109 metadata.keywords = Some(keywords);
110 }
111
112 if let Some(date) = Self::extract_simple_value(xml, "xmp:CreateDate") {
114 metadata.create_date = Some(date);
115 }
116 if let Some(date) = Self::extract_simple_value(xml, "xmp:ModifyDate") {
117 metadata.modify_date = Some(date);
118 }
119
120 if let Some(tool) = Self::extract_simple_value(xml, "xmp:CreatorTool") {
122 metadata.creator_tool = Some(tool);
123 }
124
125 if let (Some(part_str), Some(conf_str)) = (
127 Self::extract_simple_value(xml, "pdfaid:part"),
128 Self::extract_simple_value(xml, "pdfaid:conformance"),
129 ) {
130 if let (Ok(part), Ok(conformance)) =
131 (part_str.parse::<u8>(), PdfAConformance::from_str(&conf_str))
132 {
133 let mut pdfa_id = XmpPdfAIdentifier::new(part, conformance);
134 pdfa_id.amd = Self::extract_simple_value(xml, "pdfaid:amd");
135 pdfa_id.corr = Self::extract_simple_value(xml, "pdfaid:corr");
136 metadata.pdfa_id = Some(pdfa_id);
137 }
138 }
139
140 metadata.document_id = Self::extract_simple_value(xml, "xmpMM:DocumentID");
142 metadata.instance_id = Self::extract_simple_value(xml, "xmpMM:InstanceID");
143
144 Ok(metadata)
145 }
146
147 fn extract_simple_value(xml: &str, tag: &str) -> Option<String> {
149 let pattern = format!(r"<{tag}[^>]*>([^<]*)</{tag}>", tag = regex::escape(tag));
151 if let Ok(re) = Regex::new(&pattern) {
152 if let Some(caps) = re.captures(xml) {
153 return Some(caps[1].trim().to_string());
154 }
155 }
156
157 let alt_pattern = format!(
159 r"<{tag}[^>]*>\s*<rdf:Alt[^>]*>\s*<rdf:li[^>]*>([^<]*)</rdf:li>",
160 tag = regex::escape(tag)
161 );
162 if let Ok(re) = Regex::new(&alt_pattern) {
163 if let Some(caps) = re.captures(xml) {
164 return Some(caps[1].trim().to_string());
165 }
166 }
167
168 None
169 }
170
171 fn extract_list_value(xml: &str, tag: &str) -> Option<Vec<String>> {
173 let pattern = format!(r"(?s)<{tag}[^>]*>(.*?)</{tag}>", tag = regex::escape(tag));
175
176 if let Ok(re) = Regex::new(&pattern) {
177 if let Some(caps) = re.captures(xml) {
178 let content = &caps[1];
179 if let Ok(li_re) = Regex::new(r"<rdf:li[^>]*>([^<]*)</rdf:li>") {
181 let values: Vec<String> = li_re
182 .captures_iter(content)
183 .map(|c| c[1].trim().to_string())
184 .filter(|s| !s.is_empty())
185 .collect();
186 if !values.is_empty() {
187 return Some(values);
188 }
189 }
190 }
191 }
192
193 None
194 }
195
196 pub fn to_xml(&self) -> String {
198 let mut xml = String::from(
199 r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
200<x:xmpmeta xmlns:x="adobe:ns:meta/">
201 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">"#,
202 );
203
204 xml.push_str("\n <rdf:Description rdf:about=\"\"\n xmlns:dc=\"http://purl.org/dc/elements/1.1/\">");
206
207 if let Some(ref title) = self.title {
208 xml.push_str(&format!(
209 "\n <dc:title>\n <rdf:Alt>\n <rdf:li xml:lang=\"x-default\">{}</rdf:li>\n </rdf:Alt>\n </dc:title>",
210 Self::xml_escape(title)
211 ));
212 }
213
214 if let Some(ref creators) = self.creator {
215 xml.push_str("\n <dc:creator>\n <rdf:Seq>");
216 for creator in creators {
217 xml.push_str(&format!(
218 "\n <rdf:li>{}</rdf:li>",
219 Self::xml_escape(creator)
220 ));
221 }
222 xml.push_str("\n </rdf:Seq>\n </dc:creator>");
223 }
224
225 if let Some(ref desc) = self.description {
226 xml.push_str(&format!(
227 "\n <dc:description>\n <rdf:Alt>\n <rdf:li xml:lang=\"x-default\">{}</rdf:li>\n </rdf:Alt>\n </dc:description>",
228 Self::xml_escape(desc)
229 ));
230 }
231
232 xml.push_str("\n </rdf:Description>");
233
234 xml.push_str("\n <rdf:Description rdf:about=\"\"\n xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\">");
236
237 if let Some(ref tool) = self.creator_tool {
238 xml.push_str(&format!(
239 "\n <xmp:CreatorTool>{}</xmp:CreatorTool>",
240 Self::xml_escape(tool)
241 ));
242 }
243
244 if let Some(ref date) = self.create_date {
245 xml.push_str(&format!(
246 "\n <xmp:CreateDate>{}</xmp:CreateDate>",
247 date
248 ));
249 }
250
251 if let Some(ref date) = self.modify_date {
252 xml.push_str(&format!(
253 "\n <xmp:ModifyDate>{}</xmp:ModifyDate>",
254 date
255 ));
256 }
257
258 xml.push_str("\n </rdf:Description>");
259
260 if let Some(ref pdfa_id) = self.pdfa_id {
262 xml.push_str(&format!("\n{}", pdfa_id.to_rdf()));
263 }
264
265 if self.document_id.is_some() || self.instance_id.is_some() {
267 xml.push_str("\n <rdf:Description rdf:about=\"\"\n xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\">");
268 if let Some(ref doc_id) = self.document_id {
269 xml.push_str(&format!(
270 "\n <xmpMM:DocumentID>{}</xmpMM:DocumentID>",
271 doc_id
272 ));
273 }
274 if let Some(ref inst_id) = self.instance_id {
275 xml.push_str(&format!(
276 "\n <xmpMM:InstanceID>{}</xmpMM:InstanceID>",
277 inst_id
278 ));
279 }
280 xml.push_str("\n </rdf:Description>");
281 }
282
283 xml.push_str("\n </rdf:RDF>\n</x:xmpmeta>\n<?xpacket end=\"w\"?>");
284 xml
285 }
286
287 fn xml_escape(s: &str) -> String {
289 s.replace('&', "&")
290 .replace('<', "<")
291 .replace('>', ">")
292 .replace('"', """)
293 .replace('\'', "'")
294 }
295
296 pub fn validate_for_pdfa(&self) -> PdfAResult<()> {
298 if self.pdfa_id.is_none() {
300 return Err(PdfAError::XmpParseError(
301 "PDF/A identification is required".to_string(),
302 ));
303 }
304
305 Ok(())
306 }
307}
308
309#[cfg(test)]
310mod tests {
311 use super::*;
312
313 #[test]
314 fn test_xmp_pdfa_identifier_new() {
315 let id = XmpPdfAIdentifier::new(1, PdfAConformance::B);
316 assert_eq!(id.part, 1);
317 assert_eq!(id.conformance, PdfAConformance::B);
318 assert!(id.amd.is_none());
319 assert!(id.corr.is_none());
320 }
321
322 #[test]
323 fn test_xmp_pdfa_identifier_to_rdf() {
324 let id = XmpPdfAIdentifier::new(2, PdfAConformance::U);
325 let rdf = id.to_rdf();
326 assert!(rdf.contains("<pdfaid:part>2</pdfaid:part>"));
327 assert!(rdf.contains("<pdfaid:conformance>U</pdfaid:conformance>"));
328 }
329
330 #[test]
331 fn test_xmp_metadata_new() {
332 let metadata = XmpMetadata::new();
333 assert!(metadata.title.is_none());
334 assert!(metadata.creator.is_none());
335 assert!(metadata.pdfa_id.is_none());
336 }
337
338 #[test]
339 fn test_xmp_metadata_parse_title() {
340 let xml = r#"<dc:title><rdf:Alt><rdf:li xml:lang="x-default">Test Title</rdf:li></rdf:Alt></dc:title>"#;
341 let metadata = XmpMetadata::parse(xml).unwrap();
342 assert_eq!(metadata.title.as_deref(), Some("Test Title"));
343 }
344
345 #[test]
346 fn test_xmp_metadata_parse_pdfa_id() {
347 let xml = r#"
348 <pdfaid:part>1</pdfaid:part>
349 <pdfaid:conformance>B</pdfaid:conformance>
350 "#;
351 let metadata = XmpMetadata::parse(xml).unwrap();
352 assert!(metadata.pdfa_id.is_some());
353 let pdfa_id = metadata.pdfa_id.unwrap();
354 assert_eq!(pdfa_id.part, 1);
355 assert_eq!(pdfa_id.conformance, PdfAConformance::B);
356 }
357
358 #[test]
359 fn test_xmp_metadata_parse_creator_list() {
360 let xml = r#"
361 <dc:creator>
362 <rdf:Seq>
363 <rdf:li>Author One</rdf:li>
364 <rdf:li>Author Two</rdf:li>
365 </rdf:Seq>
366 </dc:creator>
367 "#;
368 let metadata = XmpMetadata::parse(xml).unwrap();
369 assert!(metadata.creator.is_some());
370 let creators = metadata.creator.unwrap();
371 assert_eq!(creators.len(), 2);
372 assert_eq!(creators[0], "Author One");
373 assert_eq!(creators[1], "Author Two");
374 }
375
376 #[test]
377 fn test_xmp_metadata_to_xml() {
378 let mut metadata = XmpMetadata::new();
379 metadata.title = Some("Test Document".to_string());
380 metadata.creator = Some(vec!["Test Author".to_string()]);
381 metadata.pdfa_id = Some(XmpPdfAIdentifier::new(1, PdfAConformance::B));
382
383 let xml = metadata.to_xml();
384 assert!(xml.contains("Test Document"));
385 assert!(xml.contains("Test Author"));
386 assert!(xml.contains("pdfaid:part"));
387 }
388
389 #[test]
390 fn test_xmp_metadata_validate_for_pdfa_missing_id() {
391 let metadata = XmpMetadata::new();
392 assert!(metadata.validate_for_pdfa().is_err());
393 }
394
395 #[test]
396 fn test_xmp_metadata_validate_for_pdfa_with_id() {
397 let mut metadata = XmpMetadata::new();
398 metadata.pdfa_id = Some(XmpPdfAIdentifier::new(1, PdfAConformance::B));
399 assert!(metadata.validate_for_pdfa().is_ok());
400 }
401
402 #[test]
403 fn test_xml_escape() {
404 assert_eq!(XmpMetadata::xml_escape("<test>"), "<test>");
405 assert_eq!(XmpMetadata::xml_escape("a & b"), "a & b");
406 assert_eq!(XmpMetadata::xml_escape("\"quoted\""), ""quoted"");
407 }
408
409 #[test]
410 fn test_xmp_pdfa_identifier_with_amd() {
411 let mut id = XmpPdfAIdentifier::new(1, PdfAConformance::B);
412 id.amd = Some("amd1".to_string());
413 let rdf = id.to_rdf();
414 assert!(rdf.contains("<pdfaid:amd>amd1</pdfaid:amd>"));
415 }
416
417 #[test]
418 fn test_xmp_metadata_parse_dates() {
419 let xml = r#"
420 <xmp:CreateDate>2024-01-15T10:30:00Z</xmp:CreateDate>
421 <xmp:ModifyDate>2024-01-16T14:00:00Z</xmp:ModifyDate>
422 "#;
423 let metadata = XmpMetadata::parse(xml).unwrap();
424 assert_eq!(
425 metadata.create_date.as_deref(),
426 Some("2024-01-15T10:30:00Z")
427 );
428 assert_eq!(
429 metadata.modify_date.as_deref(),
430 Some("2024-01-16T14:00:00Z")
431 );
432 }
433
434 #[test]
435 fn test_xmp_metadata_roundtrip() {
436 let mut original = XmpMetadata::new();
437 original.title = Some("Roundtrip Test".to_string());
438 original.creator = Some(vec!["Author".to_string()]);
439 original.pdfa_id = Some(XmpPdfAIdentifier::new(2, PdfAConformance::U));
440
441 let xml = original.to_xml();
442 let parsed = XmpMetadata::parse(&xml).unwrap();
443
444 assert_eq!(parsed.title, original.title);
445 assert_eq!(parsed.pdfa_id.as_ref().unwrap().part, 2);
446 assert_eq!(
447 parsed.pdfa_id.as_ref().unwrap().conformance,
448 PdfAConformance::U
449 );
450 }
451
452 #[test]
453 fn test_xmp_metadata_parse_simple_tag() {
454 let xml = r#"<xmp:CreatorTool>oxidize-pdf 1.6.0</xmp:CreatorTool>"#;
455 let metadata = XmpMetadata::parse(xml).unwrap();
456 assert_eq!(metadata.creator_tool.as_deref(), Some("oxidize-pdf 1.6.0"));
457 }
458
459 #[test]
460 fn test_xmp_pdfa_identifier_clone() {
461 let id1 = XmpPdfAIdentifier::new(3, PdfAConformance::A);
462 let id2 = id1.clone();
463 assert_eq!(id1, id2);
464 }
465
466 #[test]
467 fn test_xmp_metadata_clone() {
468 let mut metadata = XmpMetadata::new();
469 metadata.title = Some("Clone Test".to_string());
470 let cloned = metadata.clone();
471 assert_eq!(cloned.title, metadata.title);
472 }
473}