Skip to main content

fop_render/pdf/
compliance.rs

1//! PDF compliance modes (PDF/A-1b and PDF/UA-1)
2//!
3//! Implements ISO 19005-1 (PDF/A-1b) and ISO 14289-1 (PDF/UA-1) compliance.
4
5/// PDF compliance mode
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7pub enum PdfCompliance {
8    /// Standard PDF (no special compliance)
9    #[default]
10    Standard,
11    /// PDF/A-1b — archival format (ISO 19005-1)
12    PdfA1b,
13    /// PDF/UA-1 — accessible format (ISO 14289-1)
14    PdfUA1,
15    /// Both PDF/A-1b and PDF/UA-1
16    PdfA1bUA1,
17}
18
19impl PdfCompliance {
20    /// Whether this mode requires PDF/A-1b compliance
21    pub fn requires_pdfa(&self) -> bool {
22        matches!(self, PdfCompliance::PdfA1b | PdfCompliance::PdfA1bUA1)
23    }
24
25    /// Whether this mode requires PDF/UA-1 compliance
26    pub fn requires_pdfua(&self) -> bool {
27        matches!(self, PdfCompliance::PdfUA1 | PdfCompliance::PdfA1bUA1)
28    }
29}
30
31/// Minimal sRGB ICC profile (v2, 476 bytes).
32///
33/// This is a valid, conformant sRGB ICC profile suitable for use as
34/// an OutputIntent in PDF/A-1b documents. It encodes the IEC 61966-2-1
35/// sRGB colour space at a minimal size.
36///
37/// Layout (476 bytes total, declared in header bytes 0-3):
38/// - Header:        128 bytes (offsets   0–127)
39/// - Tag count:       4 bytes (offsets 128–131) = 9 tags
40/// - Tag table:     108 bytes (offsets 132–239) = 9 × 12 bytes
41/// - desc data:     100 bytes (offsets 240–339, 0x00F0–0x0153)
42/// - cprt data:      42 bytes (offsets 340–381, 0x0154–0x017D)
43/// - wtpt data:      20 bytes (offsets 382–401, 0x017E–0x0191)
44/// - rXYZ data:      20 bytes (offsets 402–421, 0x0192–0x01A5)
45/// - gXYZ data:      20 bytes (offsets 422–441, 0x01A6–0x01B9)
46/// - bXYZ data:      20 bytes (offsets 442–461, 0x01BA–0x01CD)
47/// - TRC data:       14 bytes (offsets 462–475, 0x01CE–0x01DB) shared by r/g/bTRC
48pub const SRGB_ICC_PROFILE: &[u8] = &[
49    // ── ICC profile header (128 bytes, offsets 0–127) ───────────────────────
50    0x00, 0x00, 0x01, 0xDC, // [0]  profile size = 476 (0x01DC)
51    0x00, 0x00, 0x00, 0x00, // [4]  CMM type (none)
52    0x02, 0x10, 0x00, 0x00, // [8]  profile version 2.1.0
53    0x6D, 0x6E, 0x74, 0x72, // [12] profile class: mntr (display device profile)
54    0x52, 0x47, 0x42, 0x20, // [16] colour space: RGB
55    0x58, 0x59, 0x5A, 0x20, // [20] PCS: XYZ
56    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // [24] date/time
57    0x61, 0x63, 0x73, 0x70, // [36] file signature: acsp
58    0x4D, 0x53, 0x46, 0x54, // [40] platform: MSFT
59    0x00, 0x00, 0x00, 0x00, // [44] profile flags
60    0x49, 0x45, 0x43, 0x20, // [48] device manufacturer: IEC
61    0x73, 0x52, 0x47, 0x42, // [52] device model: sRGB
62    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // [56] device attributes
63    0x00, 0x00, 0x00, 0x02, // [64] rendering intent: perceptual
64    // [68] PCS illuminant (D50 in s15Fixed16)
65    0x00, 0x00, 0xF6, 0xD6, // X = 0.9642
66    0x00, 0x01, 0x00, 0x00, // Y = 1.0000
67    0x00, 0x00, 0xD3, 0x2D, // Z = 0.8249
68    0x48, 0x50, 0x20, 0x20, // [80] profile creator: HP
69    // [84] MD5 profile ID (zeroed — optional for PDF/A)
70    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
71    // [100] reserved (28 bytes)
72    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
73    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
74    // ── Tag count (4 bytes, offset 128) ─────────────────────────────────────
75    0x00, 0x00, 0x00, 0x09, // 9 tags
76    // ── Tag table (9 × 12 = 108 bytes, offsets 132–239) ─────────────────────
77    // Each entry: 4-byte signature, 4-byte offset, 4-byte size
78    // desc  @ offset 0x00F0 (240), size 0x64 (100)
79    0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0xF0, 0x00, 0x00, 0x00, 0x64,
80    // cprt  @ offset 0x0154 (340), size 0x2A (42)
81    0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x54, 0x00, 0x00, 0x00, 0x2A,
82    // wtpt  @ offset 0x017E (382), size 0x14 (20)
83    0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x01, 0x7E, 0x00, 0x00, 0x00, 0x14,
84    // rXYZ  @ offset 0x0192 (402), size 0x14 (20)
85    0x72, 0x58, 0x59, 0x5A, 0x00, 0x00, 0x01, 0x92, 0x00, 0x00, 0x00, 0x14,
86    // gXYZ  @ offset 0x01A6 (422), size 0x14 (20)
87    0x67, 0x58, 0x59, 0x5A, 0x00, 0x00, 0x01, 0xA6, 0x00, 0x00, 0x00, 0x14,
88    // bXYZ  @ offset 0x01BA (442), size 0x14 (20)
89    0x62, 0x58, 0x59, 0x5A, 0x00, 0x00, 0x01, 0xBA, 0x00, 0x00, 0x00, 0x14,
90    // rTRC  @ offset 0x01CE (462), size 0x0E (14)  ← all three TRC share this
91    0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0xCE, 0x00, 0x00, 0x00, 0x0E,
92    // gTRC  @ offset 0x01CE (462), size 0x0E (14)
93    0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0xCE, 0x00, 0x00, 0x00, 0x0E,
94    // bTRC  @ offset 0x01CE (462), size 0x0E (14)
95    0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0xCE, 0x00, 0x00, 0x00, 0x0E,
96    // ── desc tag data (offset 240 = 0x00F0, size 100 = 0x64) ────────────────
97    0x64, 0x65, 0x73, 0x63, // type signature: desc
98    0x00, 0x00, 0x00, 0x00, // reserved
99    0x00, 0x00, 0x00, 0x13, // ASCII string length = 19
100    0x73, 0x52, 0x47, 0x42, 0x20, 0x49, 0x45, 0x43, // "sRGB IEC"
101    0x36, 0x31, 0x39, 0x36, 0x36, 0x2D, 0x32, 0x2D, // "61966-2-"
102    0x31, 0x00, 0x00, // "1" + 2-byte null terminator
103    // padding: type(4)+reserved(4)+len(4)+string17(17)+null2(2) = 31 used → 69 pad bytes needed
104    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
105    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
106    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
107    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
108    0x00, 0x00, 0x00, 0x00, 0x00,
109    // ── cprt tag data (offset 340 = 0x0154, size 42 = 0x2A) ─────────────────
110    0x74, 0x65, 0x78, 0x74, // type signature: text
111    0x00, 0x00, 0x00, 0x00, // reserved
112    // "Copyright IEC http://www.iec.ch" + null (32 bytes) + 2 bytes padding = 34 bytes
113    0x43, 0x6F, 0x70, 0x79, 0x72, 0x69, 0x67, 0x68, // "Copyrigh"
114    0x74, 0x20, 0x49, 0x45, 0x43, 0x20, 0x68, 0x74, // "t IEC ht"
115    0x74, 0x70, 0x3A, 0x2F, 0x2F, 0x77, 0x77, 0x77, // "tp://www"
116    0x2E, 0x69, 0x65, 0x63, 0x2E, 0x63, 0x68, 0x00, // ".iec.ch\0"
117    0x00, 0x00, // 2-byte pad to 42 total
118    // ── wtpt tag data (offset 382 = 0x017E, size 20 = 0x14) ─────────────────
119    // D50 white point in s15Fixed16: X=0.9642, Y=1.0000, Z=0.8249
120    0x58, 0x59, 0x5A, 0x20, // type: XYZ
121    0x00, 0x00, 0x00, 0x00, // reserved
122    0x00, 0x00, 0xF6, 0xD6, // X = 0.9642
123    0x00, 0x01, 0x00, 0x00, // Y = 1.0000
124    0x00, 0x00, 0xD3, 0x2D, // Z = 0.8249
125    // ── rXYZ tag data (offset 402 = 0x0192, size 20 = 0x14) ─────────────────
126    // sRGB red primary
127    0x58, 0x59, 0x5A, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6E, 0xA2, // X = 0.4361
128    0x00, 0x00, 0x38, 0xF2, // Y = 0.2225
129    0x00, 0x00, 0x03, 0x90, // Z = 0.0139
130    // ── gXYZ tag data (offset 422 = 0x01A6, size 20 = 0x14) ─────────────────
131    // sRGB green primary
132    0x58, 0x59, 0x5A, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x99, // X = 0.3851
133    0x00, 0x00, 0xB7, 0x85, // Y = 0.7169
134    0x00, 0x00, 0x18, 0xDA, // Z = 0.0971
135    // ── bXYZ tag data (offset 442 = 0x01BA, size 20 = 0x14) ─────────────────
136    // sRGB blue primary
137    0x58, 0x59, 0x5A, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x0E, // X = 0.0938
138    0x00, 0x00, 0x0B, 0xA3, // Y = 0.0454
139    0x00, 0x00, 0xB6, 0xCF, // Z = 0.7142
140    // ── TRC tag data (offset 462 = 0x01CE, size 14 = 0x0E) ──────────────────
141    // Shared by rTRC, gTRC, bTRC.  Single gamma value ≈ 2.2 (563/256).
142    0x63, 0x75, 0x72, 0x76, // type: curv
143    0x00, 0x00, 0x00, 0x00, // reserved
144    0x00, 0x00, 0x00, 0x01, // count = 1 (single gamma entry)
145    0x02,
146    0x33, // gamma = 563/256 ≈ 2.20
147          // ── END (total 476 bytes = 0x01DC) ──────────────────────────────────────
148];
149
150/// Generate XMP metadata stream for PDF/A compliance.
151///
152/// Returns the complete XMP packet as a UTF-8 string, including the required
153/// Adobe XML namespace declarations and optional PDF/A and PDF/UA identifiers.
154pub fn generate_xmp_metadata(
155    title: Option<&str>,
156    creator_tool: &str,
157    compliance: PdfCompliance,
158) -> String {
159    let title_str = title.unwrap_or("Untitled");
160
161    let pdfa_part = if compliance.requires_pdfa() {
162        r#"  <rdf:Description rdf:about=""
163     xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">
164   <pdfaid:part>1</pdfaid:part>
165   <pdfaid:conformance>B</pdfaid:conformance>
166  </rdf:Description>
167"#
168    } else {
169        ""
170    };
171
172    let pdfua_part = if compliance.requires_pdfua() {
173        r#"  <rdf:Description rdf:about=""
174     xmlns:pdfuaid="http://www.aiim.org/pdfua/ns/id/">
175   <pdfuaid:part>1</pdfuaid:part>
176  </rdf:Description>
177"#
178    } else {
179        ""
180    };
181
182    format!(
183        "<?xpacket begin=\"\u{FEFF}\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n\
184<x:xmpmeta xmlns:x=\"adobe:ns:meta/\">\n \
185<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n  \
186<rdf:Description rdf:about=\"\"\n     \
187xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n     \
188xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\">\n   \
189<dc:title>\n    <rdf:Alt>\n     \
190<rdf:li xml:lang=\"x-default\">{title}</rdf:li>\n    \
191</rdf:Alt>\n   </dc:title>\n   \
192<dc:format>application/pdf</dc:format>\n   \
193<xmp:CreatorTool>{tool}</xmp:CreatorTool>\n  \
194</rdf:Description>\n\
195{pdfa}{pdfua}\
196</rdf:RDF>\n\
197</x:xmpmeta>\n\
198<?xpacket end=\"w\"?>",
199        title = title_str,
200        tool = creator_tool,
201        pdfa = pdfa_part,
202        pdfua = pdfua_part,
203    )
204}
205
206/// Extracted Dublin Core fields from an XMP packet.
207///
208/// All fields are `Option<String>`; absent means the field was not found in
209/// the XMP source (or could not be parsed).
210#[derive(Debug, Default, Clone)]
211pub struct DcFields {
212    /// `dc:title` — document title
213    pub title: Option<String>,
214    /// `dc:creator` — document author/creator
215    pub creator: Option<String>,
216    /// `dc:description` — document subject/description
217    pub description: Option<String>,
218    /// `dc:date` — publication or creation date
219    pub date: Option<String>,
220    /// `dc:rights` — copyright or rights statement
221    pub rights: Option<String>,
222    /// `dc:language` — document language (e.g. "en", "fr")
223    pub language: Option<String>,
224}
225
226/// Extract Dublin Core metadata fields from a raw XMP packet string.
227///
228/// Uses lightweight text-search heuristics rather than a full XML parser so
229/// that there are no additional dependencies.  This is intentionally
230/// best-effort: if a field uses a non-standard layout it may not be found.
231pub fn extract_dc_fields(xmp: &str) -> DcFields {
232    DcFields {
233        title: extract_dc_value(xmp, "title"),
234        creator: extract_dc_value(xmp, "creator"),
235        description: extract_dc_value(xmp, "description"),
236        date: extract_dc_value(xmp, "date"),
237        rights: extract_dc_value(xmp, "rights"),
238        language: extract_dc_value(xmp, "language"),
239    }
240}
241
242/// Extract the text value of a `dc:<tag>` element from an XMP string.
243///
244/// Handles the common `<rdf:Alt><rdf:li ...>VALUE</rdf:li></rdf:Alt>` pattern
245/// (used by dc:title) as well as the simpler `<dc:TAG>VALUE</dc:TAG>` pattern.
246fn extract_dc_value(xmp: &str, tag: &str) -> Option<String> {
247    // Pattern 1: <dc:TAG> ... </dc:TAG>  (simple or with nested rdf:Alt/li)
248    let open_tag = format!("<dc:{}>", tag);
249    let close_tag = format!("</dc:{}>", tag);
250
251    let start_pos = xmp.find(&open_tag)?;
252    let after_open = start_pos + open_tag.len();
253    let end_pos = xmp[after_open..].find(&close_tag)?;
254    let inner = &xmp[after_open..after_open + end_pos];
255
256    // If the inner content contains <rdf:li, extract the text content of the
257    // first <rdf:li> element (the x-default or first available value).
258    if let Some(li_start) = inner.find("<rdf:li") {
259        let after_li_open = li_start + 7; // len("<rdf:li")
260                                          // Skip past the closing '>' of the opening <rdf:li ...> tag
261        let tag_close = inner[after_li_open..].find('>')?;
262        let val_start = after_li_open + tag_close + 1;
263        let val_end = inner[val_start..].find("</rdf:li")?;
264        let value = inner[val_start..val_start + val_end].trim().to_string();
265        if !value.is_empty() {
266            return Some(value);
267        }
268    }
269
270    // Plain text content directly inside <dc:TAG>...</dc:TAG>
271    let value = inner.trim().to_string();
272    if value.is_empty() {
273        None
274    } else {
275        Some(value)
276    }
277}
278
279/// Reconcile a user-supplied XMP packet for embedding in a PDF.
280///
281/// Performs the following transformations:
282/// 1. Wraps the packet in `<?xpacket begin="…" id="…"?>` / `<?xpacket end="w"?>`
283///    headers if they are absent.
284/// 2. Splices compliance identifiers (`pdfaid` for PDF/A-1b, `pdfuaid` for
285///    PDF/UA-1) into the RDF graph when the corresponding compliance mode is
286///    active and those blocks are not already present.
287///
288/// # Arguments
289/// * `source` — raw XMP XML string (must contain `<x:xmpmeta ...>`)
290/// * `compliance` — current PDF compliance mode
291pub fn reconcile_xmp(source: &str, compliance: PdfCompliance) -> String {
292    // Step 1: Ensure <?xpacket?> wrappers are present
293    let with_wrappers = if source.contains("<?xpacket") {
294        source.to_string()
295    } else {
296        // Wrap the entire source in xpacket processing instructions
297        format!(
298            "<?xpacket begin=\"\u{FEFF}\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n{}\n<?xpacket end=\"w\"?>",
299            source
300        )
301    };
302
303    // Step 2: Splice in compliance identifiers if not already present
304    let pdfa_block = r#"  <rdf:Description rdf:about=""
305     xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">
306   <pdfaid:part>1</pdfaid:part>
307   <pdfaid:conformance>B</pdfaid:conformance>
308  </rdf:Description>"#;
309
310    let pdfua_block = r#"  <rdf:Description rdf:about=""
311     xmlns:pdfuaid="http://www.aiim.org/pdfua/ns/id/">
312   <pdfuaid:part>1</pdfuaid:part>
313  </rdf:Description>"#;
314
315    // Determine which blocks need to be injected
316    let needs_pdfa = compliance.requires_pdfa() && !with_wrappers.contains("pdfaid:part");
317    let needs_pdfua = compliance.requires_pdfua() && !with_wrappers.contains("pdfuaid:part");
318
319    if !needs_pdfa && !needs_pdfua {
320        return with_wrappers;
321    }
322
323    // Inject just before </rdf:RDF>
324    let injection_point = "</rdf:RDF>";
325    if let Some(rdf_close_pos) = with_wrappers.find(injection_point) {
326        let mut injected = String::with_capacity(with_wrappers.len() + 256);
327        injected.push_str(&with_wrappers[..rdf_close_pos]);
328        if needs_pdfa {
329            injected.push('\n');
330            injected.push_str(pdfa_block);
331            injected.push('\n');
332        }
333        if needs_pdfua {
334            injected.push('\n');
335            injected.push_str(pdfua_block);
336            injected.push('\n');
337        }
338        injected.push_str(&with_wrappers[rdf_close_pos..]);
339        injected
340    } else {
341        // No </rdf:RDF> found — return with wrappers only, no injection
342        with_wrappers
343    }
344}
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349
350    #[test]
351    fn test_compliance_default() {
352        let c = PdfCompliance::default();
353        assert_eq!(c, PdfCompliance::Standard);
354    }
355
356    #[test]
357    fn test_compliance_pdfa_flags() {
358        assert!(PdfCompliance::PdfA1b.requires_pdfa());
359        assert!(!PdfCompliance::PdfA1b.requires_pdfua());
360        assert!(PdfCompliance::PdfA1bUA1.requires_pdfa());
361        assert!(PdfCompliance::PdfA1bUA1.requires_pdfua());
362        assert!(!PdfCompliance::Standard.requires_pdfa());
363    }
364
365    #[test]
366    fn test_xmp_metadata_pdfa() {
367        let xmp = generate_xmp_metadata(Some("Test Doc"), "fop-rs", PdfCompliance::PdfA1b);
368        assert!(xmp.contains("pdfaid:part"));
369        assert!(xmp.contains("<pdfaid:conformance>B</pdfaid:conformance>"));
370        assert!(!xmp.contains("pdfuaid"));
371    }
372
373    #[test]
374    fn test_xmp_metadata_pdfua() {
375        let xmp = generate_xmp_metadata(None, "fop-rs", PdfCompliance::PdfUA1);
376        assert!(!xmp.contains("pdfaid:part"));
377        assert!(xmp.contains("pdfuaid:part"));
378    }
379
380    #[test]
381    fn test_xmp_metadata_combined() {
382        let xmp = generate_xmp_metadata(Some("Test"), "fop-rs", PdfCompliance::PdfA1bUA1);
383        assert!(xmp.contains("pdfaid:part"));
384        assert!(xmp.contains("pdfuaid:part"));
385    }
386
387    #[test]
388    fn test_srgb_icc_profile_size() {
389        // The profile header declares its total byte count in the first 4 bytes.
390        // Verify that the declared size matches the actual compile-time array length.
391        assert!(
392            SRGB_ICC_PROFILE.len() >= 128,
393            "ICC profile must be at least 128 bytes (header only)"
394        );
395        let declared = u32::from_be_bytes([
396            SRGB_ICC_PROFILE[0],
397            SRGB_ICC_PROFILE[1],
398            SRGB_ICC_PROFILE[2],
399            SRGB_ICC_PROFILE[3],
400        ]) as usize;
401        assert_eq!(
402            declared,
403            SRGB_ICC_PROFILE.len(),
404            "ICC header declares {declared} bytes but array has {} bytes",
405            SRGB_ICC_PROFILE.len()
406        );
407    }
408}
409
410#[cfg(test)]
411mod tests_extended {
412    use super::*;
413
414    #[test]
415    fn test_compliance_standard_requires_nothing() {
416        let c = PdfCompliance::Standard;
417        assert!(!c.requires_pdfa());
418        assert!(!c.requires_pdfua());
419    }
420
421    #[test]
422    fn test_compliance_pdfua_only() {
423        let c = PdfCompliance::PdfUA1;
424        assert!(!c.requires_pdfa());
425        assert!(c.requires_pdfua());
426    }
427
428    #[test]
429    fn test_compliance_pdfa_variant_name() {
430        // Ensure all enum variants are distinct
431        assert_ne!(PdfCompliance::Standard, PdfCompliance::PdfA1b);
432        assert_ne!(PdfCompliance::PdfA1b, PdfCompliance::PdfUA1);
433        assert_ne!(PdfCompliance::PdfUA1, PdfCompliance::PdfA1bUA1);
434    }
435
436    #[test]
437    fn test_xmp_standard_contains_no_compliance_ids() {
438        let xmp = generate_xmp_metadata(Some("Doc"), "fop-rs", PdfCompliance::Standard);
439        assert!(!xmp.contains("pdfaid"));
440        assert!(!xmp.contains("pdfuaid"));
441    }
442
443    #[test]
444    fn test_xmp_metadata_contains_title() {
445        let xmp = generate_xmp_metadata(Some("My Title"), "fop-rs", PdfCompliance::Standard);
446        assert!(xmp.contains("My Title"));
447    }
448
449    #[test]
450    fn test_xmp_metadata_contains_creator_tool() {
451        let xmp = generate_xmp_metadata(None, "fop-render v1.0", PdfCompliance::Standard);
452        assert!(xmp.contains("fop-render v1.0"));
453    }
454
455    #[test]
456    fn test_xmp_metadata_no_title_uses_untitled() {
457        let xmp = generate_xmp_metadata(None, "fop", PdfCompliance::Standard);
458        assert!(xmp.contains("Untitled"));
459    }
460
461    #[test]
462    fn test_xmp_metadata_starts_with_xpacket() {
463        let xmp = generate_xmp_metadata(None, "fop", PdfCompliance::Standard);
464        assert!(xmp.starts_with("<?xpacket"));
465    }
466
467    #[test]
468    fn test_xmp_metadata_ends_with_xpacket() {
469        let xmp = generate_xmp_metadata(None, "fop", PdfCompliance::Standard);
470        assert!(xmp.ends_with("?>"));
471    }
472
473    #[test]
474    fn test_srgb_icc_profile_starts_with_signature() {
475        // ICC profile class for monitor (mntr) at offset 12: 0x6D 0x6E 0x74 0x72
476        assert_eq!(
477            &SRGB_ICC_PROFILE[12..16],
478            &[0x6D, 0x6E, 0x74, 0x72],
479            "ICC profile class should be 'mntr'"
480        );
481    }
482
483    #[test]
484    fn test_srgb_icc_profile_colour_space_rgb() {
485        // Colour space at offset 16–19 should be 'RGB ' (0x52 0x47 0x42 0x20)
486        assert_eq!(
487            &SRGB_ICC_PROFILE[16..20],
488            &[0x52, 0x47, 0x42, 0x20],
489            "ICC colour space should be 'RGB '"
490        );
491    }
492
493    #[test]
494    fn test_srgb_icc_profile_pcs_xyz() {
495        // PCS (Profile Connection Space) at offset 20–23 should be 'XYZ ' (0x58 0x59 0x5A 0x20)
496        assert_eq!(
497            &SRGB_ICC_PROFILE[20..24],
498            &[0x58, 0x59, 0x5A, 0x20],
499            "PCS should be 'XYZ '"
500        );
501    }
502
503    #[test]
504    fn test_compliance_copy_clone() {
505        let c = PdfCompliance::PdfA1b;
506        let c2 = c;
507        assert_eq!(c, c2);
508    }
509
510    // ── reconcile_xmp tests ──────────────────────────────────────────────────
511
512    #[test]
513    fn test_reconcile_xmp_adds_xpacket_wrappers() {
514        let source = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF></x:xmpmeta>"#;
515        let result = reconcile_xmp(source, PdfCompliance::Standard);
516        assert!(
517            result.starts_with("<?xpacket"),
518            "Should start with <?xpacket"
519        );
520        assert!(result.ends_with("?>"), "Should end with ?>");
521    }
522
523    #[test]
524    fn test_reconcile_xmp_keeps_existing_wrappers() {
525        let source = "<?xpacket begin=\"\u{FEFF}\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"><rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"></rdf:RDF></x:xmpmeta>\n<?xpacket end=\"w\"?>";
526        let result = reconcile_xmp(source, PdfCompliance::Standard);
527        // Should not double-wrap
528        let count = result.matches("<?xpacket").count();
529        assert_eq!(count, 2, "Should have exactly two xpacket PIs");
530    }
531
532    #[test]
533    fn test_reconcile_xmp_splices_pdfa() {
534        let source = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF></x:xmpmeta>"#;
535        let result = reconcile_xmp(source, PdfCompliance::PdfA1b);
536        assert!(result.contains("pdfaid:part"), "Should contain pdfaid:part");
537        assert!(
538            result.contains("pdfaid:conformance"),
539            "Should contain pdfaid:conformance"
540        );
541    }
542
543    #[test]
544    fn test_reconcile_xmp_splices_pdfua() {
545        let source = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF></x:xmpmeta>"#;
546        let result = reconcile_xmp(source, PdfCompliance::PdfUA1);
547        assert!(
548            result.contains("pdfuaid:part"),
549            "Should contain pdfuaid:part"
550        );
551        assert!(
552            !result.contains("pdfaid:part"),
553            "Should NOT contain pdfaid:part"
554        );
555    }
556
557    #[test]
558    fn test_reconcile_xmp_splices_both_for_combined() {
559        let source = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF></x:xmpmeta>"#;
560        let result = reconcile_xmp(source, PdfCompliance::PdfA1bUA1);
561        assert!(result.contains("pdfaid:part"), "Should contain pdfaid:part");
562        assert!(
563            result.contains("pdfuaid:part"),
564            "Should contain pdfuaid:part"
565        );
566    }
567
568    #[test]
569    fn test_reconcile_xmp_does_not_duplicate_existing_pdfa() {
570        let source = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/">
571<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
572  <rdf:Description rdf:about="" xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">
573    <pdfaid:part>1</pdfaid:part>
574    <pdfaid:conformance>B</pdfaid:conformance>
575  </rdf:Description>
576</rdf:RDF></x:xmpmeta>"#;
577        let result = reconcile_xmp(source, PdfCompliance::PdfA1b);
578        // Should not inject a second <pdfaid:part> block.
579        // The open tag "<pdfaid:part>" should appear exactly once in the output.
580        let count = result.matches("<pdfaid:part>").count();
581        assert_eq!(
582            count, 1,
583            "<pdfaid:part> open-tag should appear exactly once"
584        );
585    }
586
587    // ── extract_dc_fields tests ──────────────────────────────────────────────
588
589    #[test]
590    fn test_extract_dc_fields_title_from_alt() {
591        let xmp = r#"<?xpacket begin="" id="W"?>
592<x:xmpmeta xmlns:x="adobe:ns:meta/">
593<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
594  <rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
595    <dc:title><rdf:Alt><rdf:li xml:lang="x-default">My Test Document</rdf:li></rdf:Alt></dc:title>
596  </rdf:Description>
597</rdf:RDF></x:xmpmeta>
598<?xpacket end="w"?>"#;
599        let fields = extract_dc_fields(xmp);
600        assert_eq!(fields.title.as_deref(), Some("My Test Document"));
601    }
602
603    #[test]
604    fn test_extract_dc_fields_title_simple() {
605        let xmp = r#"<dc:title>Simple Title</dc:title>"#;
606        let fields = extract_dc_fields(xmp);
607        assert_eq!(fields.title.as_deref(), Some("Simple Title"));
608    }
609
610    #[test]
611    fn test_extract_dc_fields_creator() {
612        let xmp = r#"<dc:creator>Jane Doe</dc:creator>"#;
613        let fields = extract_dc_fields(xmp);
614        assert_eq!(fields.creator.as_deref(), Some("Jane Doe"));
615    }
616
617    #[test]
618    fn test_extract_dc_fields_description() {
619        let xmp = r#"<dc:description>A document about things.</dc:description>"#;
620        let fields = extract_dc_fields(xmp);
621        assert_eq!(
622            fields.description.as_deref(),
623            Some("A document about things.")
624        );
625    }
626
627    #[test]
628    fn test_extract_dc_fields_absent_returns_none() {
629        let xmp = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/"></x:xmpmeta>"#;
630        let fields = extract_dc_fields(xmp);
631        assert!(fields.title.is_none());
632        assert!(fields.creator.is_none());
633        assert!(fields.description.is_none());
634        assert!(fields.date.is_none());
635        assert!(fields.rights.is_none());
636        assert!(fields.language.is_none());
637    }
638
639    #[test]
640    fn test_extract_dc_date_from_xmp() {
641        let xmp = r#"<dc:date>2026-05-15</dc:date>"#;
642        let dc = extract_dc_fields(xmp);
643        assert_eq!(dc.date.as_deref(), Some("2026-05-15"));
644    }
645
646    #[test]
647    fn test_extract_dc_rights_from_xmp() {
648        let xmp = r#"<dc:rights>CC-BY 4.0</dc:rights>"#;
649        let dc = extract_dc_fields(xmp);
650        assert_eq!(dc.rights.as_deref(), Some("CC-BY 4.0"));
651    }
652
653    #[test]
654    fn test_extract_dc_language_from_xmp() {
655        let xmp = r#"<dc:language>en</dc:language>"#;
656        let dc = extract_dc_fields(xmp);
657        assert_eq!(dc.language.as_deref(), Some("en"));
658    }
659}