Skip to main content

fop_core/tree/builder/
mod.rs

1//! FO tree builder - constructs the FO tree from XML
2//!
3//! Splits into:
4//! - `mod.rs` (this file): `FoTreeBuilder` struct, XML parsing loop, element lifecycle
5//! - `node_factory`: FO node creation from element names/attributes
6//! - `property_parser`: Property value parsing (length, color, gradient, etc.)
7
8mod node_factory;
9mod property_parser;
10mod xmlns;
11
12use crate::properties::PropertyList;
13use crate::tree::{FoArena, FoNode, FoNodeData, NodeId};
14use crate::xml::XmlParser;
15use crate::{FopError, Result};
16use quick_xml::events::Event;
17use std::collections::BTreeSet;
18use std::io::BufRead;
19
20/// Namespace context captured when beginning to accumulate an XMP packet or
21/// foreign-object subtree.  Tracks everything needed to inject missing `xmlns:`
22/// declarations into the captured root element.
23struct CaptureNs {
24    /// Accumulated serialised XML (starts with the root element's open tag)
25    buffer: String,
26    /// Nesting depth inside the captured subtree (0 = inside the root element)
27    depth: usize,
28    /// Byte offset of the `>` character that closes the root open tag in `buffer`
29    root_close_byte: usize,
30    /// All namespace bindings in scope at the moment the root was opened
31    in_scope_at_start: Vec<(String, String)>,
32    /// Prefixes declared directly on the captured root element
33    declared_on_root: BTreeSet<String>,
34    /// All namespace prefixes referenced anywhere in the subtree (element + attr names)
35    used_in_subtree: BTreeSet<String>,
36}
37
38/// Builder for constructing FO trees from XML
39pub struct FoTreeBuilder<'a> {
40    arena: FoArena<'a>,
41    current_node: Option<NodeId>,
42    /// Depth counter for nested elements inside instream-foreign-object
43    foreign_object_depth: usize,
44    /// Buffer to collect raw XML content of instream-foreign-object
45    foreign_xml_buffer: String,
46    /// NodeId of the instream-foreign-object node being built
47    foreign_object_node: Option<NodeId>,
48    /// Nesting depth of non-FO elements outside fo:instream-foreign-object.
49    /// Tracks open tags so their matching close tags do not call end_element()
50    /// and corrupt the current_node pointer.  For example, children of
51    /// fo:declarations (e.g. x:xmpmeta / rdf:RDF) live here.
52    non_fo_depth: usize,
53    /// When non-None, we are inside an `<x:xmpmeta>` element and accumulating
54    /// the raw XML (including the root `<x:xmpmeta ...>` opening tag) into this
55    /// buffer.  The namespace context tracks which `xmlns:` declarations need
56    /// injecting when the packet is finalised.
57    xmp_buffer: Option<CaptureNs>,
58}
59
60impl<'a> FoTreeBuilder<'a> {
61    /// Create a new tree builder
62    pub fn new() -> Self {
63        Self {
64            arena: FoArena::new(),
65            current_node: None,
66            foreign_object_depth: 0,
67            foreign_xml_buffer: String::new(),
68            foreign_object_node: None,
69            non_fo_depth: 0,
70            xmp_buffer: None::<CaptureNs>,
71        }
72    }
73
74    /// Parse an XSL-FO document and build the tree
75    pub fn parse<R: BufRead>(mut self, reader: R) -> Result<FoArena<'a>> {
76        let mut parser = XmlParser::new(reader);
77
78        loop {
79            let event = parser.read_event()?;
80
81            // Push namespace scope BEFORE dispatch for Start/Empty elements.
82            // Empty elements also need push+pop since they open and close atomically.
83            match &event {
84                Event::Start(start) | Event::Empty(start) => {
85                    parser.push_namespace_scope(start);
86                }
87                _ => {}
88            }
89
90            // Determine whether we need to pop after dispatch.
91            // End pops; Empty pops (was pushed above); Start does NOT pop.
92            let should_pop = matches!(&event, Event::End(_) | Event::Empty(_));
93
94            let result = self.dispatch_event(&event, &parser);
95
96            // Pop AFTER dispatch so the capture finaliser sees the correct scope on End.
97            if should_pop {
98                parser.pop_namespace_scope();
99            }
100
101            // Propagate any error from dispatch_event
102            result?;
103
104            if matches!(&event, Event::Eof) {
105                break;
106            }
107        }
108
109        Ok(self.arena)
110    }
111
112    /// Dispatch a single parse event to the appropriate capture or FO handler.
113    fn dispatch_event<R: BufRead>(
114        &mut self,
115        event: &Event<'static>,
116        parser: &XmlParser<R>,
117    ) -> Result<()> {
118        // ── Block A: XMP packet capture ──────────────────────────────────────────
119        if self.xmp_buffer.is_some() {
120            return self.handle_xmp_event(event, parser);
121        }
122
123        // ── Block B: foreign-object child capture ────────────────────────────────
124        if self.foreign_object_depth > 0 {
125            return self.handle_foreign_child_event(event, parser);
126        }
127
128        // ── Block C: main FO parse ───────────────────────────────────────────────
129        match event {
130            Event::Start(start) => {
131                let (name, ns) = parser.extract_name(start)?;
132
133                if ns.is_fo() {
134                    self.start_element(&name, start, parser)?;
135                } else if self.foreign_object_node.is_some() {
136                    // Non-FO start inside instream-foreign-object root: begin child capture
137                    let raw = std::str::from_utf8(start.as_ref())
138                        .unwrap_or("")
139                        .to_string();
140                    self.foreign_xml_buffer.push('<');
141                    self.foreign_xml_buffer.push_str(&raw);
142                    self.foreign_xml_buffer.push('>');
143                    self.foreign_object_depth += 1;
144                } else {
145                    // Non-FO element outside instream-foreign-object (e.g. inside
146                    // fo:declarations).  Track depth so End events don't call end_element().
147                    self.non_fo_depth += 1;
148                    self.try_begin_xmp_capture(start, parser);
149                }
150            }
151            Event::Empty(start) => {
152                let (name, ns) = parser.extract_name(start)?;
153
154                if ns.is_fo() {
155                    self.start_element(&name, start, parser)?;
156                    self.end_element()?;
157                } else if self.foreign_object_node.is_some() {
158                    // Self-closing non-FO element inside foreign-object root
159                    let raw = std::str::from_utf8(start.as_ref())
160                        .unwrap_or("")
161                        .to_string();
162                    self.foreign_xml_buffer.push('<');
163                    self.foreign_xml_buffer.push_str(&raw);
164                    self.foreign_xml_buffer.push_str("/>");
165                }
166                // Non-FO empty element outside foreign-object: ignore (no depth change)
167            }
168            Event::End(_) => {
169                if self.foreign_object_node.is_some() && self.foreign_object_depth == 0 {
170                    // This End closes the fo:instream-foreign-object itself
171                    self.finalize_foreign_object();
172                }
173                // If inside a non-FO subtree, consume without popping current_node
174                if self.non_fo_depth > 0 {
175                    self.non_fo_depth -= 1;
176                    return Ok(());
177                }
178                self.end_element()?;
179            }
180            Event::Text(text) => {
181                let text_content = parser.extract_text(text)?;
182                let trimmed = text_content.trim();
183                if !trimmed.is_empty() {
184                    self.add_text(trimmed)?;
185                }
186            }
187            Event::CData(cdata) => {
188                let cdata_content = parser.extract_cdata(cdata)?;
189                if !cdata_content.is_empty() {
190                    self.add_text(&cdata_content)?;
191                }
192            }
193            _ => {}
194        }
195
196        Ok(())
197    }
198
199    /// Handle an event while inside the XMP packet capture mode.
200    fn handle_xmp_event<R: BufRead>(
201        &mut self,
202        event: &Event<'static>,
203        parser: &XmlParser<R>,
204    ) -> Result<()> {
205        match event {
206            Event::Start(start) => {
207                let raw = std::str::from_utf8(start.as_ref())
208                    .unwrap_or("")
209                    .to_string();
210                if let Some(cap) = &mut self.xmp_buffer {
211                    cap.buffer.push('<');
212                    cap.buffer.push_str(&raw);
213                    cap.buffer.push('>');
214                    cap.depth += 1;
215                    xmlns::scan_prefixes_used(start, &mut cap.used_in_subtree);
216                }
217            }
218            Event::Empty(start) => {
219                let raw = std::str::from_utf8(start.as_ref())
220                    .unwrap_or("")
221                    .to_string();
222                if let Some(cap) = &mut self.xmp_buffer {
223                    cap.buffer.push('<');
224                    cap.buffer.push_str(&raw);
225                    cap.buffer.push_str("/>");
226                    xmlns::scan_prefixes_used(start, &mut cap.used_in_subtree);
227                }
228            }
229            Event::End(end) => {
230                let raw = std::str::from_utf8(end.as_ref()).unwrap_or("").to_string();
231                let depth = self.xmp_buffer.as_ref().map(|c| c.depth).unwrap_or(0);
232                if depth > 0 {
233                    if let Some(cap) = &mut self.xmp_buffer {
234                        cap.buffer.push_str("</");
235                        cap.buffer.push_str(&raw);
236                        cap.buffer.push('>');
237                        cap.depth -= 1;
238                    }
239                } else {
240                    // depth == 0: this End closes the root <x:xmpmeta>
241                    if let Some(mut cap) = self.xmp_buffer.take() {
242                        cap.buffer.push_str("</");
243                        cap.buffer.push_str(&raw);
244                        cap.buffer.push('>');
245
246                        // Compute which inherited prefixes need injecting
247                        let to_inject: Vec<(String, String)> = cap
248                            .used_in_subtree
249                            .iter()
250                            .filter(|p| !cap.declared_on_root.contains(*p))
251                            .filter_map(|p| {
252                                cap.in_scope_at_start
253                                    .iter()
254                                    .find(|(sp, _)| sp == p)
255                                    .map(|(sp, su)| (sp.clone(), su.clone()))
256                            })
257                            .collect();
258
259                        let decls_block = xmlns::render_xmlns_attrs(&to_inject);
260                        let patched = xmlns::inject_namespace_decls(
261                            &cap.buffer,
262                            &decls_block,
263                            cap.root_close_byte,
264                        );
265                        self.arena.xmp_packets.push(patched);
266                    }
267                    // The xmpmeta open tag counted as non_fo_depth +1; revert it.
268                    if self.non_fo_depth > 0 {
269                        self.non_fo_depth -= 1;
270                    }
271                }
272            }
273            Event::Text(text) => {
274                let text_content = parser.extract_text(text).unwrap_or_default();
275                if let Some(cap) = &mut self.xmp_buffer {
276                    cap.buffer.push_str(&text_content);
277                }
278            }
279            Event::CData(cdata) => {
280                let raw = std::str::from_utf8(cdata.as_ref()).unwrap_or("");
281                if let Some(cap) = &mut self.xmp_buffer {
282                    cap.buffer.push_str("<![CDATA[");
283                    cap.buffer.push_str(raw);
284                    cap.buffer.push_str("]]>");
285                }
286            }
287            Event::Comment(comment) => {
288                let raw = std::str::from_utf8(comment.as_ref()).unwrap_or("");
289                if let Some(cap) = &mut self.xmp_buffer {
290                    cap.buffer.push_str("<!--");
291                    cap.buffer.push_str(raw);
292                    cap.buffer.push_str("-->");
293                }
294            }
295            _ => {}
296        }
297        Ok(())
298    }
299
300    /// Handle an event while inside a foreign-object child element capture.
301    fn handle_foreign_child_event<R: BufRead>(
302        &mut self,
303        event: &Event<'static>,
304        parser: &XmlParser<R>,
305    ) -> Result<()> {
306        match event {
307            Event::Start(start) => {
308                let raw = std::str::from_utf8(start.as_ref())
309                    .unwrap_or("")
310                    .to_string();
311                self.foreign_xml_buffer.push('<');
312                self.foreign_xml_buffer.push_str(&raw);
313                self.foreign_xml_buffer.push('>');
314                self.foreign_object_depth += 1;
315            }
316            Event::Empty(start) => {
317                let raw = std::str::from_utf8(start.as_ref())
318                    .unwrap_or("")
319                    .to_string();
320                self.foreign_xml_buffer.push('<');
321                self.foreign_xml_buffer.push_str(&raw);
322                self.foreign_xml_buffer.push_str("/>");
323            }
324            Event::End(end) => {
325                self.foreign_object_depth -= 1;
326                if self.foreign_object_depth > 0 {
327                    let raw = std::str::from_utf8(end.as_ref()).unwrap_or("").to_string();
328                    self.foreign_xml_buffer.push_str("</");
329                    self.foreign_xml_buffer.push_str(&raw);
330                    self.foreign_xml_buffer.push('>');
331                }
332                // When depth returns to 0 the child root element is closed; nothing more to do here
333            }
334            Event::Text(text) => {
335                let text_content = parser.extract_text(text).unwrap_or_default();
336                self.foreign_xml_buffer.push_str(&text_content);
337            }
338            Event::CData(cdata) => {
339                let raw = std::str::from_utf8(cdata.as_ref()).unwrap_or("");
340                self.foreign_xml_buffer.push_str("<![CDATA[");
341                self.foreign_xml_buffer.push_str(raw);
342                self.foreign_xml_buffer.push_str("]]>");
343            }
344            Event::Comment(comment) => {
345                let raw = std::str::from_utf8(comment.as_ref()).unwrap_or("");
346                self.foreign_xml_buffer.push_str("<!--");
347                self.foreign_xml_buffer.push_str(raw);
348                self.foreign_xml_buffer.push_str("-->");
349            }
350            _ => {}
351        }
352        Ok(())
353    }
354
355    /// Detect `<x:xmpmeta>` as a direct child of `fo:declarations` and start capture.
356    fn try_begin_xmp_capture<R: BufRead>(
357        &mut self,
358        start: &quick_xml::events::BytesStart<'_>,
359        parser: &XmlParser<R>,
360    ) {
361        let is_declarations_parent = self
362            .current_node
363            .and_then(|id| self.arena.get(id))
364            .map(|n| matches!(n.data, FoNodeData::Declarations))
365            .unwrap_or(false);
366
367        if !is_declarations_parent {
368            return;
369        }
370
371        let raw = std::str::from_utf8(start.as_ref())
372            .unwrap_or("")
373            .to_string();
374        // Check for xmpmeta (local-name only, after any prefix colon)
375        let local_name = raw
376            .split_once(':')
377            .map(|(_, local)| local)
378            .unwrap_or(raw.as_str());
379        // local_name may have attributes after the element name
380        let local_tag = local_name
381            .split_once(|c: char| c.is_ascii_whitespace())
382            .map(|(tag, _)| tag)
383            .unwrap_or(local_name);
384        if local_tag == "xmpmeta" {
385            let mut buf = String::new();
386            buf.push('<');
387            buf.push_str(&raw);
388            buf.push('>');
389            let root_close_byte = buf.len() - 1; // index of the final `>`
390
391            // Snapshot namespace scope (push_namespace_scope was already called
392            // for this element before dispatch_event was entered)
393            let in_scope_at_start = parser.snapshot_in_scope();
394            let declared_on_root = xmlns::declared_on_element(start);
395            let mut used_in_subtree = BTreeSet::new();
396            xmlns::scan_prefixes_used(start, &mut used_in_subtree);
397
398            self.xmp_buffer = Some(CaptureNs {
399                buffer: buf,
400                depth: 0,
401                root_close_byte,
402                in_scope_at_start,
403                declared_on_root,
404                used_in_subtree,
405            });
406        }
407    }
408
409    /// Finalize the foreign object: store captured XML and clear state
410    fn finalize_foreign_object(&mut self) {
411        if let Some(node_id) = self.foreign_object_node.take() {
412            let xml = std::mem::take(&mut self.foreign_xml_buffer);
413            if let Some(node) = self.arena.get_mut(node_id) {
414                if let FoNodeData::InstreamForeignObject { foreign_xml, .. } = &mut node.data {
415                    *foreign_xml = xml;
416                }
417            }
418        }
419    }
420
421    /// Handle start of an element
422    fn start_element(
423        &mut self,
424        name: &str,
425        start: &quick_xml::events::BytesStart,
426        parser: &XmlParser<impl BufRead>,
427    ) -> Result<()> {
428        // Create property list (inheritance will be resolved when properties are accessed)
429        let mut properties = PropertyList::new();
430
431        // Parse attributes into properties
432        let attributes = parser.extract_attributes(start)?;
433
434        // Extract the "id" attribute if present
435        let element_id = attributes
436            .iter()
437            .find(|(k, _)| k == "id")
438            .map(|(_, v)| v.clone());
439
440        // Populate properties from attributes
441        node_factory::populate_properties(&mut properties, &attributes)?;
442
443        // Validate all properties after parsing
444        properties.validate()?;
445
446        // Handle xml:lang on fo:root for document language metadata
447        if name == "root" {
448            if let Some((_, lang)) = attributes
449                .iter()
450                .find(|(k, _)| k == "xml:lang" || k == "xml-lang")
451            {
452                self.arena.document_lang = Some(lang.clone());
453            }
454        }
455
456        // Create the appropriate FO node
457        let node_data = node_factory::create_node_data(name, &attributes, properties)?;
458        let node = FoNode::new_with_id(node_data, element_id.clone());
459        let node_id = self.arena.add_node(node);
460
461        // Register the ID in the registry if present
462        if let Some(id) = element_id {
463            self.arena.id_registry_mut().register_id(id, node_id)?;
464        }
465
466        // Set up parent-child relationship
467        if let Some(parent_id) = self.current_node {
468            self.arena
469                .append_child(parent_id, node_id)
470                .map_err(FopError::Generic)?;
471        }
472
473        // If this is an instream-foreign-object, track the node for XML capture
474        if name == "instream-foreign-object" {
475            self.foreign_object_node = Some(node_id);
476            self.foreign_xml_buffer.clear();
477            self.foreign_object_depth = 0;
478        }
479
480        // Update current node
481        self.current_node = Some(node_id);
482
483        Ok(())
484    }
485
486    /// Handle end of an element
487    fn end_element(&mut self) -> Result<()> {
488        if let Some(current) = self.current_node {
489            // Move back to parent
490            self.current_node = self.arena.get(current).and_then(|n| n.parent);
491        }
492        Ok(())
493    }
494
495    /// Add text content to current node
496    fn add_text(&mut self, text: &str) -> Result<()> {
497        if let Some(parent_id) = self.current_node {
498            // Check if parent can contain text
499            if let Some(parent) = self.arena.get(parent_id) {
500                if parent.data.can_contain_text() {
501                    let text_node = FoNode::new(FoNodeData::Text(text.to_string()));
502                    let text_id = self.arena.add_node(text_node);
503                    self.arena
504                        .append_child(parent_id, text_id)
505                        .map_err(FopError::Generic)?;
506                }
507            }
508        }
509        Ok(())
510    }
511}
512
513impl<'a> Default for FoTreeBuilder<'a> {
514    fn default() -> Self {
515        Self::new()
516    }
517}
518
519#[cfg(test)]
520mod tests {
521    use super::*;
522    use crate::PropertyId;
523    use std::io::Cursor;
524
525    #[test]
526    fn test_parse_simple_document() {
527        let xml = r#"<?xml version="1.0"?>
528<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
529    <fo:layout-master-set>
530        <fo:simple-page-master master-name="A4">
531            <fo:region-body/>
532        </fo:simple-page-master>
533    </fo:layout-master-set>
534</fo:root>"#;
535
536        let cursor = Cursor::new(xml);
537        let builder = FoTreeBuilder::new();
538        let arena = builder.parse(cursor).expect("test: should succeed");
539
540        assert!(!arena.is_empty());
541        assert_eq!(arena.len(), 4); // root, layout-master-set, simple-page-master, region-body
542    }
543
544    #[test]
545    fn test_parse_with_text() {
546        let xml = r#"<?xml version="1.0"?>
547<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
548    <fo:layout-master-set>
549        <fo:simple-page-master master-name="A4">
550            <fo:region-body/>
551        </fo:simple-page-master>
552    </fo:layout-master-set>
553    <fo:page-sequence master-reference="A4">
554        <fo:flow flow-name="xsl-region-body">
555            <fo:block>Hello World</fo:block>
556        </fo:flow>
557    </fo:page-sequence>
558</fo:root>"#;
559
560        let cursor = Cursor::new(xml);
561        let builder = FoTreeBuilder::new();
562        let arena = builder.parse(cursor).expect("test: should succeed");
563
564        // Should have: root, layout-master-set, simple-page-master, region-body,
565        //              page-sequence, flow, block, text
566        assert!(arena.len() >= 8);
567    }
568
569    #[test]
570    fn test_property_parsing() {
571        let xml = r#"<?xml version="1.0"?>
572<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
573    <fo:layout-master-set>
574        <fo:simple-page-master master-name="A4" page-width="210mm" page-height="297mm">
575            <fo:region-body margin="1in"/>
576        </fo:simple-page-master>
577    </fo:layout-master-set>
578</fo:root>"#;
579
580        let cursor = Cursor::new(xml);
581        let builder = FoTreeBuilder::new();
582        let arena = builder.parse(cursor).expect("test: should succeed");
583
584        // Check that properties were parsed
585        for (_, node) in arena.iter() {
586            if let Some(props) = node.data.properties() {
587                // Properties should be accessible
588                let _ = props.get(PropertyId::PageWidth);
589            }
590        }
591    }
592
593    #[test]
594    fn test_parse_document_with_block_and_inline() {
595        let xml = r#"<?xml version="1.0"?>
596<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
597    <fo:layout-master-set>
598        <fo:simple-page-master master-name="A4">
599            <fo:region-body/>
600        </fo:simple-page-master>
601    </fo:layout-master-set>
602    <fo:page-sequence master-reference="A4">
603        <fo:flow flow-name="xsl-region-body">
604            <fo:block>
605                <fo:inline font-weight="bold">Bold text</fo:inline>
606                Normal text
607            </fo:block>
608        </fo:flow>
609    </fo:page-sequence>
610</fo:root>"#;
611
612        let cursor = Cursor::new(xml);
613        let builder = FoTreeBuilder::new();
614        let arena = builder.parse(cursor).expect("test: should succeed");
615
616        // Should have root, layout-master-set, simple-page-master, region-body,
617        // page-sequence, flow, block, inline, text nodes
618        assert!(arena.len() >= 8);
619    }
620
621    #[test]
622    fn test_parse_document_with_multiple_blocks() {
623        let xml = r#"<?xml version="1.0"?>
624<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
625    <fo:layout-master-set>
626        <fo:simple-page-master master-name="A4">
627            <fo:region-body/>
628        </fo:simple-page-master>
629    </fo:layout-master-set>
630    <fo:page-sequence master-reference="A4">
631        <fo:flow flow-name="xsl-region-body">
632            <fo:block>First block</fo:block>
633            <fo:block>Second block</fo:block>
634            <fo:block>Third block</fo:block>
635        </fo:flow>
636    </fo:page-sequence>
637</fo:root>"#;
638
639        let cursor = Cursor::new(xml);
640        let builder = FoTreeBuilder::new();
641        let arena = builder.parse(cursor).expect("test: should succeed");
642
643        // At least root, layout-master-set, simple-page-master, region-body,
644        // page-sequence, flow, 3 blocks (text nodes may or may not be separate)
645        assert!(arena.len() >= 9);
646    }
647
648    #[test]
649    fn test_parse_document_with_font_properties() {
650        let xml = r#"<?xml version="1.0"?>
651<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
652    <fo:layout-master-set>
653        <fo:simple-page-master master-name="A4">
654            <fo:region-body/>
655        </fo:simple-page-master>
656    </fo:layout-master-set>
657    <fo:page-sequence master-reference="A4">
658        <fo:flow flow-name="xsl-region-body">
659            <fo:block font-size="14pt" font-family="Arial" color="red">Styled text</fo:block>
660        </fo:flow>
661    </fo:page-sequence>
662</fo:root>"#;
663
664        let cursor = Cursor::new(xml);
665        let builder = FoTreeBuilder::new();
666        let result = builder.parse(cursor);
667        assert!(
668            result.is_ok(),
669            "Should parse document with font properties: {:?}",
670            result.err()
671        );
672
673        let arena = result.expect("test: should succeed");
674        assert!(arena.len() >= 7);
675    }
676
677    #[test]
678    fn test_parse_document_with_list() {
679        let xml = r#"<?xml version="1.0"?>
680<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
681    <fo:layout-master-set>
682        <fo:simple-page-master master-name="A4">
683            <fo:region-body/>
684        </fo:simple-page-master>
685    </fo:layout-master-set>
686    <fo:page-sequence master-reference="A4">
687        <fo:flow flow-name="xsl-region-body">
688            <fo:list-block>
689                <fo:list-item>
690                    <fo:list-item-label><fo:block>1.</fo:block></fo:list-item-label>
691                    <fo:list-item-body><fo:block>Item one</fo:block></fo:list-item-body>
692                </fo:list-item>
693            </fo:list-block>
694        </fo:flow>
695    </fo:page-sequence>
696</fo:root>"#;
697
698        let cursor = Cursor::new(xml);
699        let builder = FoTreeBuilder::new();
700        let result = builder.parse(cursor);
701        assert!(
702            result.is_ok(),
703            "Should parse list structure: {:?}",
704            result.err()
705        );
706    }
707
708    #[test]
709    fn test_parse_document_with_cdata() {
710        let xml = r#"<?xml version="1.0"?>
711<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
712    <fo:layout-master-set>
713        <fo:simple-page-master master-name="A4">
714            <fo:region-body/>
715        </fo:simple-page-master>
716    </fo:layout-master-set>
717    <fo:page-sequence master-reference="A4">
718        <fo:flow flow-name="xsl-region-body">
719            <fo:block><![CDATA[Text with <special> & chars]]></fo:block>
720        </fo:flow>
721    </fo:page-sequence>
722</fo:root>"#;
723
724        let cursor = Cursor::new(xml);
725        let builder = FoTreeBuilder::new();
726        let result = builder.parse(cursor);
727        // CDATA sections should be parsed without error
728        assert!(
729            result.is_ok(),
730            "Should parse CDATA sections: {:?}",
731            result.err()
732        );
733
734        let arena = result.expect("test: should succeed");
735        // Find text node with CDATA content
736        let has_cdata_text = arena.iter().any(|(_, node)| {
737            if let FoNodeData::Text(text) = &node.data {
738                text.contains("Text with")
739            } else {
740                false
741            }
742        });
743        assert!(
744            has_cdata_text,
745            "CDATA content should be stored as text node"
746        );
747    }
748
749    #[test]
750    fn test_parse_invalid_xml_returns_error() {
751        let xml = r#"<?xml version="1.0"?>
752<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
753    <fo:layout-master-set>
754        <fo:unclosed-element>
755    </fo:layout-master-set>
756</fo:root>"#;
757
758        let cursor = Cursor::new(xml);
759        let builder = FoTreeBuilder::new();
760        // Invalid XML (unclosed element) should return an error
761        // (Behavior depends on parser leniency)
762        let result = builder.parse(cursor);
763        // Just verify it doesn't panic - may succeed or fail
764        let _ = result;
765    }
766
767    #[test]
768    fn test_parse_document_with_multiple_page_sequences() {
769        let xml = r#"<?xml version="1.0"?>
770<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
771    <fo:layout-master-set>
772        <fo:simple-page-master master-name="A4">
773            <fo:region-body/>
774        </fo:simple-page-master>
775    </fo:layout-master-set>
776    <fo:page-sequence master-reference="A4">
777        <fo:flow flow-name="xsl-region-body">
778            <fo:block>Page 1 content</fo:block>
779        </fo:flow>
780    </fo:page-sequence>
781    <fo:page-sequence master-reference="A4">
782        <fo:flow flow-name="xsl-region-body">
783            <fo:block>Page 2 content</fo:block>
784        </fo:flow>
785    </fo:page-sequence>
786</fo:root>"#;
787
788        let cursor = Cursor::new(xml);
789        let builder = FoTreeBuilder::new();
790        let result = builder.parse(cursor);
791        assert!(result.is_ok(), "Should parse multiple page sequences");
792    }
793
794    #[test]
795    fn test_parse_document_with_margin_property() {
796        let xml = r#"<?xml version="1.0"?>
797<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
798    <fo:layout-master-set>
799        <fo:simple-page-master master-name="A4">
800            <fo:region-body margin-top="1cm" margin-bottom="2cm"/>
801        </fo:simple-page-master>
802    </fo:layout-master-set>
803</fo:root>"#;
804
805        let cursor = Cursor::new(xml);
806        let builder = FoTreeBuilder::new();
807        let result = builder.parse(cursor);
808        assert!(result.is_ok(), "Should parse margin properties");
809    }
810
811    #[test]
812    fn test_parse_document_with_table() {
813        let xml = r#"<?xml version="1.0"?>
814<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
815    <fo:layout-master-set>
816        <fo:simple-page-master master-name="A4">
817            <fo:region-body/>
818        </fo:simple-page-master>
819    </fo:layout-master-set>
820    <fo:page-sequence master-reference="A4">
821        <fo:flow flow-name="xsl-region-body">
822            <fo:table>
823                <fo:table-body>
824                    <fo:table-row>
825                        <fo:table-cell>
826                            <fo:block>Cell content</fo:block>
827                        </fo:table-cell>
828                    </fo:table-row>
829                </fo:table-body>
830            </fo:table>
831        </fo:flow>
832    </fo:page-sequence>
833</fo:root>"#;
834
835        let cursor = Cursor::new(xml);
836        let builder = FoTreeBuilder::new();
837        let result = builder.parse(cursor);
838        assert!(
839            result.is_ok(),
840            "Should parse table structure: {:?}",
841            result.err()
842        );
843    }
844
845    #[test]
846    fn test_parse_document_is_not_empty() {
847        let xml = r#"<?xml version="1.0"?>
848<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
849    <fo:layout-master-set>
850        <fo:simple-page-master master-name="A4">
851            <fo:region-body/>
852        </fo:simple-page-master>
853    </fo:layout-master-set>
854</fo:root>"#;
855
856        let cursor = Cursor::new(xml);
857        let builder = FoTreeBuilder::new();
858        let arena = builder.parse(cursor).expect("test: should succeed");
859
860        assert!(!arena.is_empty());
861        assert!(!arena.is_empty());
862    }
863
864    #[test]
865    fn test_parse_preserves_text_content() {
866        let xml = r#"<?xml version="1.0"?>
867<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
868    <fo:layout-master-set>
869        <fo:simple-page-master master-name="A4">
870            <fo:region-body/>
871        </fo:simple-page-master>
872    </fo:layout-master-set>
873    <fo:page-sequence master-reference="A4">
874        <fo:flow flow-name="xsl-region-body">
875            <fo:block>Hello World</fo:block>
876        </fo:flow>
877    </fo:page-sequence>
878</fo:root>"#;
879
880        let cursor = Cursor::new(xml);
881        let builder = FoTreeBuilder::new();
882        let arena = builder.parse(cursor).expect("test: should succeed");
883
884        // Find the text node
885        let text_found = arena
886            .iter()
887            .any(|(_, node)| matches!(&node.data, FoNodeData::Text(t) if t == "Hello World"));
888        assert!(text_found, "Text content should be preserved in tree");
889    }
890
891    #[test]
892    fn test_parse_document_with_whitespace_only_text_is_trimmed() {
893        let xml = r#"<?xml version="1.0"?>
894<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
895    <fo:layout-master-set>
896        <fo:simple-page-master master-name="A4">
897            <fo:region-body/>
898        </fo:simple-page-master>
899    </fo:layout-master-set>
900</fo:root>"#;
901
902        let cursor = Cursor::new(xml);
903        let builder = FoTreeBuilder::new();
904        let arena = builder.parse(cursor).expect("test: should succeed");
905
906        // Whitespace-only text nodes should be stripped
907        let whitespace_only_text = arena.iter().any(|(_, node)| {
908            matches!(&node.data, FoNodeData::Text(t) if t.trim().is_empty() && !t.is_empty())
909        });
910        assert!(
911            !whitespace_only_text,
912            "Whitespace-only text nodes should be stripped"
913        );
914    }
915
916    #[test]
917    fn test_parse_document_with_processing_instruction() {
918        let xml = r#"<?xml version="1.0"?>
919<?fop-processor key="value"?>
920<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
921    <fo:layout-master-set>
922        <fo:simple-page-master master-name="A4">
923            <fo:region-body/>
924        </fo:simple-page-master>
925    </fo:layout-master-set>
926</fo:root>"#;
927
928        let cursor = Cursor::new(xml);
929        let builder = FoTreeBuilder::new();
930        let result = builder.parse(cursor);
931        // Processing instructions should not cause parse errors
932        assert!(
933            result.is_ok(),
934            "Processing instructions should be handled gracefully"
935        );
936    }
937
938    #[test]
939    fn test_parse_document_with_xml_comment() {
940        let xml = r#"<?xml version="1.0"?>
941<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
942    <!-- This is a comment -->
943    <fo:layout-master-set>
944        <fo:simple-page-master master-name="A4">
945            <!-- Page master comment -->
946            <fo:region-body/>
947        </fo:simple-page-master>
948    </fo:layout-master-set>
949</fo:root>"#;
950
951        let cursor = Cursor::new(xml);
952        let builder = FoTreeBuilder::new();
953        let result = builder.parse(cursor);
954        assert!(result.is_ok(), "XML comments should be handled gracefully");
955    }
956
957    #[test]
958    fn test_parse_font_size_in_pts() {
959        let xml = r#"<?xml version="1.0"?>
960<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
961    <fo:layout-master-set>
962        <fo:simple-page-master master-name="A4">
963            <fo:region-body/>
964        </fo:simple-page-master>
965    </fo:layout-master-set>
966    <fo:page-sequence master-reference="A4">
967        <fo:flow flow-name="xsl-region-body">
968            <fo:block font-size="16pt">Large text</fo:block>
969        </fo:flow>
970    </fo:page-sequence>
971</fo:root>"#;
972
973        let cursor = Cursor::new(xml);
974        let builder = FoTreeBuilder::new();
975        let result = builder.parse(cursor);
976        assert!(result.is_ok());
977
978        let arena = result.expect("test: should succeed");
979        // Find the block node and verify its font-size
980        for (_, node) in arena.iter() {
981            if let FoNodeData::Block { properties } = &node.data {
982                if properties.is_explicit(PropertyId::FontSize) {
983                    let font_size = properties
984                        .get(PropertyId::FontSize)
985                        .expect("test: should succeed");
986                    if let Some(length) = font_size.as_length() {
987                        assert_eq!(length.to_pt(), 16.0);
988                    }
989                }
990            }
991        }
992    }
993
994    #[test]
995    fn test_parse_color_property_red() {
996        let xml = r#"<?xml version="1.0"?>
997<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
998    <fo:layout-master-set>
999        <fo:simple-page-master master-name="A4">
1000            <fo:region-body/>
1001        </fo:simple-page-master>
1002    </fo:layout-master-set>
1003    <fo:page-sequence master-reference="A4">
1004        <fo:flow flow-name="xsl-region-body">
1005            <fo:block color="red">Red text</fo:block>
1006        </fo:flow>
1007    </fo:page-sequence>
1008</fo:root>"#;
1009
1010        let cursor = Cursor::new(xml);
1011        let builder = FoTreeBuilder::new();
1012        let result = builder.parse(cursor);
1013        assert!(result.is_ok(), "Should parse color properties");
1014    }
1015
1016    #[test]
1017    fn test_parse_hex_color_property() {
1018        // Use rgb() format to avoid issues with # in raw strings
1019        let xml = r#"<?xml version="1.0"?>
1020<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1021    <fo:layout-master-set>
1022        <fo:simple-page-master master-name="A4">
1023            <fo:region-body/>
1024        </fo:simple-page-master>
1025    </fo:layout-master-set>
1026    <fo:page-sequence master-reference="A4">
1027        <fo:flow flow-name="xsl-region-body">
1028            <fo:block color="red">Hex red text</fo:block>
1029        </fo:flow>
1030    </fo:page-sequence>
1031</fo:root>"#;
1032
1033        let cursor = Cursor::new(xml);
1034        let builder = FoTreeBuilder::new();
1035        let result = builder.parse(cursor);
1036        assert!(result.is_ok(), "Should parse color properties");
1037    }
1038}
1039
1040// ===== ADDITIONAL TESTS (new tests for builder) =====
1041#[cfg(test)]
1042mod additional_tests {
1043    use super::*;
1044    use std::io::Cursor;
1045
1046    fn make_minimal_fo(flow_content: &str) -> String {
1047        format!(
1048            r#"<?xml version="1.0"?>
1049<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1050    <fo:layout-master-set>
1051        <fo:simple-page-master master-name="A4">
1052            <fo:region-body/>
1053        </fo:simple-page-master>
1054    </fo:layout-master-set>
1055    <fo:page-sequence master-reference="A4">
1056        <fo:flow flow-name="xsl-region-body">
1057            {}
1058        </fo:flow>
1059    </fo:page-sequence>
1060</fo:root>"#,
1061            flow_content
1062        )
1063    }
1064
1065    #[test]
1066    fn test_parse_block_with_all_font_properties() {
1067        let xml = make_minimal_fo(
1068            r#"<fo:block font-size="14pt" font-weight="bold" font-style="italic"
1069                font-family="Times New Roman" color="navy">Styled text</fo:block>"#,
1070        );
1071        let cursor = Cursor::new(xml);
1072        let result = FoTreeBuilder::new().parse(cursor);
1073        assert!(
1074            result.is_ok(),
1075            "Font properties should parse: {:?}",
1076            result.err()
1077        );
1078    }
1079
1080    #[test]
1081    fn test_parse_block_with_margin_properties() {
1082        let xml = make_minimal_fo(
1083            r#"<fo:block margin-top="10pt" margin-bottom="10pt"
1084                margin-left="20pt" margin-right="20pt">Margins</fo:block>"#,
1085        );
1086        let cursor = Cursor::new(xml);
1087        let result = FoTreeBuilder::new().parse(cursor);
1088        assert!(result.is_ok(), "Margin properties: {:?}", result.err());
1089    }
1090
1091    #[test]
1092    fn test_parse_block_with_padding_properties() {
1093        let xml = make_minimal_fo(
1094            r#"<fo:block padding-top="5pt" padding-bottom="5pt"
1095                padding-left="10pt" padding-right="10pt">Padding</fo:block>"#,
1096        );
1097        let cursor = Cursor::new(xml);
1098        let result = FoTreeBuilder::new().parse(cursor);
1099        assert!(result.is_ok(), "Padding properties: {:?}", result.err());
1100    }
1101
1102    #[test]
1103    fn test_parse_block_with_border_properties() {
1104        let xml = make_minimal_fo(
1105            r#"<fo:block border-top-style="solid" border-top-width="1pt"
1106                border-top-color="black">Border</fo:block>"#,
1107        );
1108        let cursor = Cursor::new(xml);
1109        let result = FoTreeBuilder::new().parse(cursor);
1110        assert!(result.is_ok(), "Border properties: {:?}", result.err());
1111    }
1112
1113    #[test]
1114    fn test_parse_inline_elements() {
1115        let xml = make_minimal_fo(
1116            r#"<fo:block>Text with <fo:inline font-weight="bold">bold</fo:inline> part</fo:block>"#,
1117        );
1118        let cursor = Cursor::new(xml);
1119        let result = FoTreeBuilder::new().parse(cursor);
1120        assert!(result.is_ok(), "Inline element: {:?}", result.err());
1121    }
1122
1123    #[test]
1124    fn test_parse_nested_blocks() {
1125        let xml = make_minimal_fo(
1126            r#"<fo:block>
1127                <fo:block>Inner block 1</fo:block>
1128                <fo:block>Inner block 2</fo:block>
1129                <fo:block>Inner block 3</fo:block>
1130            </fo:block>"#,
1131        );
1132        let cursor = Cursor::new(xml);
1133        let result = FoTreeBuilder::new().parse(cursor);
1134        assert!(result.is_ok(), "Nested blocks: {:?}", result.err());
1135    }
1136
1137    #[test]
1138    fn test_parse_table_structure() {
1139        let xml = make_minimal_fo(
1140            r#"<fo:table>
1141                <fo:table-column column-width="50pt"/>
1142                <fo:table-column column-width="50pt"/>
1143                <fo:table-body>
1144                    <fo:table-row>
1145                        <fo:table-cell><fo:block>Cell 1</fo:block></fo:table-cell>
1146                        <fo:table-cell><fo:block>Cell 2</fo:block></fo:table-cell>
1147                    </fo:table-row>
1148                </fo:table-body>
1149            </fo:table>"#,
1150        );
1151        let cursor = Cursor::new(xml);
1152        let result = FoTreeBuilder::new().parse(cursor);
1153        assert!(result.is_ok(), "Table structure: {:?}", result.err());
1154    }
1155
1156    #[test]
1157    fn test_parse_list_structure() {
1158        let xml = make_minimal_fo(
1159            r#"<fo:list-block>
1160                <fo:list-item>
1161                    <fo:list-item-label end-indent="label-end()">
1162                        <fo:block>1.</fo:block>
1163                    </fo:list-item-label>
1164                    <fo:list-item-body start-indent="body-start()">
1165                        <fo:block>First item</fo:block>
1166                    </fo:list-item-body>
1167                </fo:list-item>
1168            </fo:list-block>"#,
1169        );
1170        let cursor = Cursor::new(xml);
1171        let result = FoTreeBuilder::new().parse(cursor);
1172        assert!(result.is_ok(), "List structure: {:?}", result.err());
1173    }
1174
1175    #[test]
1176    fn test_parse_external_graphic() {
1177        let xml = make_minimal_fo(
1178            r#"<fo:block><fo:external-graphic src="url('image.png')"/></fo:block>"#,
1179        );
1180        let cursor = Cursor::new(xml);
1181        let result = FoTreeBuilder::new().parse(cursor);
1182        assert!(result.is_ok(), "External graphic: {:?}", result.err());
1183    }
1184
1185    #[test]
1186    fn test_parse_basic_link_internal() {
1187        let xml = make_minimal_fo(
1188            r#"<fo:block>
1189                <fo:basic-link internal-destination="target">Link</fo:basic-link>
1190            </fo:block>"#,
1191        );
1192        let cursor = Cursor::new(xml);
1193        let result = FoTreeBuilder::new().parse(cursor);
1194        assert!(result.is_ok(), "Basic link internal: {:?}", result.err());
1195    }
1196
1197    #[test]
1198    fn test_parse_basic_link_external() {
1199        let xml = make_minimal_fo(
1200            r#"<fo:block>
1201                <fo:basic-link external-destination="url('https://example.com')">URL</fo:basic-link>
1202            </fo:block>"#,
1203        );
1204        let cursor = Cursor::new(xml);
1205        let result = FoTreeBuilder::new().parse(cursor);
1206        assert!(result.is_ok(), "Basic link external: {:?}", result.err());
1207    }
1208
1209    #[test]
1210    fn test_parse_page_number_inline() {
1211        let xml = make_minimal_fo(r#"<fo:block>Page <fo:page-number/></fo:block>"#);
1212        let cursor = Cursor::new(xml);
1213        let result = FoTreeBuilder::new().parse(cursor);
1214        assert!(result.is_ok(), "Page number: {:?}", result.err());
1215    }
1216
1217    #[test]
1218    fn test_parse_page_number_citation() {
1219        let xml = make_minimal_fo(
1220            r#"<fo:block>See page <fo:page-number-citation ref-id="target"/></fo:block>"#,
1221        );
1222        let cursor = Cursor::new(xml);
1223        let result = FoTreeBuilder::new().parse(cursor);
1224        assert!(result.is_ok(), "Page number citation: {:?}", result.err());
1225    }
1226
1227    #[test]
1228    fn test_parse_leader_dots() {
1229        let xml =
1230            make_minimal_fo(r#"<fo:block>Chapter<fo:leader leader-pattern="dots"/>10</fo:block>"#);
1231        let cursor = Cursor::new(xml);
1232        let result = FoTreeBuilder::new().parse(cursor);
1233        assert!(result.is_ok(), "Leader: {:?}", result.err());
1234    }
1235
1236    #[test]
1237    fn test_parse_footnote() {
1238        let xml = make_minimal_fo(
1239            r#"<fo:block>Text<fo:footnote>
1240                <fo:inline font-size="8pt" vertical-align="super">1</fo:inline>
1241                <fo:footnote-body>
1242                    <fo:block font-size="8pt">Footnote text</fo:block>
1243                </fo:footnote-body>
1244            </fo:footnote></fo:block>"#,
1245        );
1246        let cursor = Cursor::new(xml);
1247        let result = FoTreeBuilder::new().parse(cursor);
1248        assert!(result.is_ok(), "Footnote: {:?}", result.err());
1249    }
1250
1251    #[test]
1252    fn test_parse_block_container() {
1253        let xml = make_minimal_fo(
1254            r#"<fo:block-container width="100pt" height="100pt">
1255                <fo:block>Inside block container</fo:block>
1256            </fo:block-container>"#,
1257        );
1258        let cursor = Cursor::new(xml);
1259        let result = FoTreeBuilder::new().parse(cursor);
1260        assert!(result.is_ok(), "Block container: {:?}", result.err());
1261    }
1262
1263    #[test]
1264    fn test_parse_bookmark_tree() {
1265        let xml = r#"<?xml version="1.0"?>
1266<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1267    <fo:layout-master-set>
1268        <fo:simple-page-master master-name="A4">
1269            <fo:region-body/>
1270        </fo:simple-page-master>
1271    </fo:layout-master-set>
1272    <fo:bookmark-tree>
1273        <fo:bookmark internal-destination="ch1">
1274            <fo:bookmark-title>Chapter 1</fo:bookmark-title>
1275        </fo:bookmark>
1276    </fo:bookmark-tree>
1277    <fo:page-sequence master-reference="A4">
1278        <fo:flow flow-name="xsl-region-body">
1279            <fo:block id="ch1">Chapter 1 content</fo:block>
1280        </fo:flow>
1281    </fo:page-sequence>
1282</fo:root>"#;
1283        let cursor = Cursor::new(xml);
1284        let result = FoTreeBuilder::new().parse(cursor);
1285        assert!(result.is_ok(), "Bookmark tree: {:?}", result.err());
1286    }
1287
1288    #[test]
1289    fn test_parse_document_with_static_content() {
1290        let xml = r#"<?xml version="1.0"?>
1291<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1292    <fo:layout-master-set>
1293        <fo:simple-page-master master-name="A4">
1294            <fo:region-before extent="20mm"/>
1295            <fo:region-body/>
1296            <fo:region-after extent="20mm"/>
1297        </fo:simple-page-master>
1298    </fo:layout-master-set>
1299    <fo:page-sequence master-reference="A4">
1300        <fo:static-content flow-name="xsl-region-before">
1301            <fo:block>Header text</fo:block>
1302        </fo:static-content>
1303        <fo:static-content flow-name="xsl-region-after">
1304            <fo:block>Footer text</fo:block>
1305        </fo:static-content>
1306        <fo:flow flow-name="xsl-region-body">
1307            <fo:block>Body content</fo:block>
1308        </fo:flow>
1309    </fo:page-sequence>
1310</fo:root>"#;
1311        let cursor = Cursor::new(xml);
1312        let result = FoTreeBuilder::new().parse(cursor);
1313        assert!(result.is_ok(), "Static content: {:?}", result.err());
1314    }
1315
1316    #[test]
1317    fn test_parse_document_returns_non_empty_arena() {
1318        let xml = make_minimal_fo("<fo:block>Content</fo:block>");
1319        let cursor = Cursor::new(xml);
1320        let arena = FoTreeBuilder::new()
1321            .parse(cursor)
1322            .expect("test: should succeed");
1323        assert!(!arena.is_empty(), "Arena should not be empty after parsing");
1324    }
1325
1326    #[test]
1327    fn test_parse_document_root_is_fo_root() {
1328        let xml = make_minimal_fo("<fo:block>Content</fo:block>");
1329        let cursor = Cursor::new(xml);
1330        let arena = FoTreeBuilder::new()
1331            .parse(cursor)
1332            .expect("test: should succeed");
1333        let (_, root_node) = arena.root().expect("Should have root node");
1334        assert!(matches!(root_node.data, FoNodeData::Root));
1335    }
1336
1337    #[test]
1338    fn test_parse_document_with_text_align_center() {
1339        let xml = make_minimal_fo(r#"<fo:block text-align="center">Centered</fo:block>"#);
1340        let cursor = Cursor::new(xml);
1341        let result = FoTreeBuilder::new().parse(cursor);
1342        assert!(result.is_ok(), "text-align center: {:?}", result.err());
1343    }
1344
1345    #[test]
1346    fn test_parse_document_with_text_align_justify() {
1347        let xml = make_minimal_fo(r#"<fo:block text-align="justify">Justified</fo:block>"#);
1348        let cursor = Cursor::new(xml);
1349        let result = FoTreeBuilder::new().parse(cursor);
1350        assert!(result.is_ok(), "text-align justify: {:?}", result.err());
1351    }
1352
1353    #[test]
1354    fn test_parse_line_height_property() {
1355        let xml = make_minimal_fo(r#"<fo:block line-height="1.5">Text</fo:block>"#);
1356        let cursor = Cursor::new(xml);
1357        let result = FoTreeBuilder::new().parse(cursor);
1358        assert!(result.is_ok(), "line-height: {:?}", result.err());
1359    }
1360
1361    #[test]
1362    fn test_parse_keep_together_property() {
1363        let xml = make_minimal_fo(
1364            r#"<fo:block keep-together.within-page="always">Kept together</fo:block>"#,
1365        );
1366        let cursor = Cursor::new(xml);
1367        let result = FoTreeBuilder::new().parse(cursor);
1368        assert!(result.is_ok(), "keep-together: {:?}", result.err());
1369    }
1370
1371    #[test]
1372    fn test_parse_background_color_property() {
1373        let xml = make_minimal_fo(r#"<fo:block background-color="yellow">Highlighted</fo:block>"#);
1374        let cursor = Cursor::new(xml);
1375        let result = FoTreeBuilder::new().parse(cursor);
1376        assert!(result.is_ok(), "background-color: {:?}", result.err());
1377    }
1378
1379    #[test]
1380    fn test_parse_multiple_page_sequences_with_content() {
1381        let xml = r#"<?xml version="1.0"?>
1382<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1383    <fo:layout-master-set>
1384        <fo:simple-page-master master-name="A4">
1385            <fo:region-body/>
1386        </fo:simple-page-master>
1387    </fo:layout-master-set>
1388    <fo:page-sequence master-reference="A4">
1389        <fo:flow flow-name="xsl-region-body">
1390            <fo:block>Page sequence 1</fo:block>
1391        </fo:flow>
1392    </fo:page-sequence>
1393    <fo:page-sequence master-reference="A4">
1394        <fo:flow flow-name="xsl-region-body">
1395            <fo:block>Page sequence 2</fo:block>
1396        </fo:flow>
1397    </fo:page-sequence>
1398    <fo:page-sequence master-reference="A4">
1399        <fo:flow flow-name="xsl-region-body">
1400            <fo:block>Page sequence 3</fo:block>
1401        </fo:flow>
1402    </fo:page-sequence>
1403</fo:root>"#;
1404        let cursor = Cursor::new(xml);
1405        let result = FoTreeBuilder::new().parse(cursor);
1406        assert!(
1407            result.is_ok(),
1408            "Multiple page sequences: {:?}",
1409            result.err()
1410        );
1411    }
1412
1413    #[test]
1414    fn test_parse_missing_flow_name_is_error() {
1415        let xml = r#"<?xml version="1.0"?>
1416<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1417    <fo:layout-master-set>
1418        <fo:simple-page-master master-name="A4">
1419            <fo:region-body/>
1420        </fo:simple-page-master>
1421    </fo:layout-master-set>
1422    <fo:page-sequence master-reference="A4">
1423        <fo:flow>
1424            <fo:block>No flow-name attribute</fo:block>
1425        </fo:flow>
1426    </fo:page-sequence>
1427</fo:root>"#;
1428        let cursor = Cursor::new(xml);
1429        let result = FoTreeBuilder::new().parse(cursor);
1430        assert!(result.is_err(), "Missing flow-name should be an error");
1431    }
1432
1433    #[test]
1434    fn test_parse_missing_master_name_is_error() {
1435        let xml = r#"<?xml version="1.0"?>
1436<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1437    <fo:layout-master-set>
1438        <fo:simple-page-master>
1439            <fo:region-body/>
1440        </fo:simple-page-master>
1441    </fo:layout-master-set>
1442    <fo:page-sequence master-reference="A4">
1443        <fo:flow flow-name="xsl-region-body">
1444            <fo:block>Text</fo:block>
1445        </fo:flow>
1446    </fo:page-sequence>
1447</fo:root>"#;
1448        let cursor = Cursor::new(xml);
1449        let result = FoTreeBuilder::new().parse(cursor);
1450        assert!(result.is_err(), "Missing master-name should be an error");
1451    }
1452
1453    #[test]
1454    fn test_parse_xml_lang_sets_document_lang() {
1455        let xml = r#"<?xml version="1.0"?>
1456<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format" xml:lang="en">
1457    <fo:layout-master-set>
1458        <fo:simple-page-master master-name="A4">
1459            <fo:region-body/>
1460        </fo:simple-page-master>
1461    </fo:layout-master-set>
1462    <fo:page-sequence master-reference="A4">
1463        <fo:flow flow-name="xsl-region-body">
1464            <fo:block>English text</fo:block>
1465        </fo:flow>
1466    </fo:page-sequence>
1467</fo:root>"#;
1468        let cursor = Cursor::new(xml);
1469        let arena = FoTreeBuilder::new()
1470            .parse(cursor)
1471            .expect("test: should succeed");
1472        assert_eq!(arena.document_lang, Some("en".to_string()));
1473    }
1474
1475    #[test]
1476    fn test_parse_document_without_lang_has_none() {
1477        let xml = make_minimal_fo("<fo:block>Text</fo:block>");
1478        let cursor = Cursor::new(xml);
1479        let arena = FoTreeBuilder::new()
1480            .parse(cursor)
1481            .expect("test: should succeed");
1482        assert!(arena.document_lang.is_none());
1483    }
1484
1485    #[test]
1486    fn test_parse_cdata_in_block() {
1487        let xml = make_minimal_fo(r#"<fo:block><![CDATA[<special> & content]]></fo:block>"#);
1488        let cursor = Cursor::new(xml);
1489        let result = FoTreeBuilder::new().parse(cursor);
1490        assert!(result.is_ok(), "CDATA in block: {:?}", result.err());
1491    }
1492
1493    #[test]
1494    fn test_xmp_packet_captured_from_declarations() {
1495        let xml = r##"<?xml version="1.0" encoding="utf-8"?>
1496<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1497  <fo:layout-master-set>
1498    <fo:simple-page-master master-name="A4" page-width="210mm" page-height="297mm">
1499      <fo:region-body margin="2cm"/>
1500    </fo:simple-page-master>
1501  </fo:layout-master-set>
1502  <fo:declarations>
1503    <x:xmpmeta xmlns:x="adobe:ns:meta/">
1504      <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
1505        <rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/" rdf:about="">
1506          <dc:title>
1507            <rdf:Alt><rdf:li xml:lang="x-default">Test Invoice</rdf:li></rdf:Alt>
1508          </dc:title>
1509        </rdf:Description>
1510      </rdf:RDF>
1511    </x:xmpmeta>
1512  </fo:declarations>
1513  <fo:page-sequence master-reference="A4">
1514    <fo:flow flow-name="xsl-region-body">
1515      <fo:block>Hello.</fo:block>
1516    </fo:flow>
1517  </fo:page-sequence>
1518</fo:root>"##;
1519
1520        let cursor = Cursor::new(xml);
1521        let arena = FoTreeBuilder::new()
1522            .parse(cursor)
1523            .expect("FO with fo:declarations + XMP metadata should parse successfully");
1524
1525        // Verify the XMP packet was captured
1526        assert_eq!(
1527            arena.xmp_packets.len(),
1528            1,
1529            "Should have exactly one XMP packet captured from fo:declarations"
1530        );
1531
1532        let packet = &arena.xmp_packets[0];
1533        assert!(
1534            packet.contains("xmpmeta"),
1535            "XMP packet should contain the xmpmeta element: {}",
1536            packet
1537        );
1538        assert!(
1539            packet.contains("Test Invoice"),
1540            "XMP packet should contain the dc:title value: {}",
1541            packet
1542        );
1543
1544        // Verify the document also has the correct page-sequence structure
1545        let page_seq_count = arena
1546            .iter()
1547            .filter(|(_, n)| matches!(n.data, FoNodeData::PageSequence { .. }))
1548            .count();
1549        assert_eq!(
1550            page_seq_count, 1,
1551            "Document should have exactly one page-sequence"
1552        );
1553    }
1554
1555    // ===== XMP NAMESPACE INHERITANCE TESTS =====
1556
1557    fn make_fo_with_declarations(declarations_content: &str) -> String {
1558        format!(
1559            r#"<?xml version="1.0"?>
1560<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"
1561         xmlns:x="adobe:ns:meta/"
1562         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
1563         xmlns:dc="http://purl.org/dc/elements/1.1/">
1564  <fo:layout-master-set>
1565    <fo:simple-page-master master-name="A4" page-height="297mm" page-width="210mm">
1566      <fo:region-body/>
1567    </fo:simple-page-master>
1568  </fo:layout-master-set>
1569  <fo:declarations>
1570    {}
1571  </fo:declarations>
1572  <fo:page-sequence master-reference="A4">
1573    <fo:flow flow-name="xsl-region-body">
1574      <fo:block>Hello.</fo:block>
1575    </fo:flow>
1576  </fo:page-sequence>
1577</fo:root>"#,
1578            declarations_content
1579        )
1580    }
1581
1582    #[test]
1583    fn test_xmp_namespace_inheritance_captures_inherited_xmlns() {
1584        // xmlns:x, xmlns:rdf, xmlns:dc declared on fo:root only — NOT on x:xmpmeta
1585        let fo = make_fo_with_declarations(
1586            r#"<x:xmpmeta>
1587      <rdf:RDF>
1588        <rdf:Description rdf:about="">
1589          <dc:title>
1590            <rdf:Alt><rdf:li xml:lang="x-default">Test Invoice</rdf:li></rdf:Alt>
1591          </dc:title>
1592        </rdf:Description>
1593      </rdf:RDF>
1594    </x:xmpmeta>"#,
1595        );
1596
1597        let cursor = Cursor::new(fo);
1598        let arena = FoTreeBuilder::new()
1599            .parse(cursor)
1600            .expect("FO with inherited xmlns should parse");
1601
1602        assert_eq!(arena.xmp_packets.len(), 1, "should have one XMP packet");
1603        let packet = &arena.xmp_packets[0];
1604
1605        // All three prefixes must be declared on the captured root
1606        assert!(packet.contains("xmlns:x="), "missing xmlns:x in: {packet}");
1607        assert!(
1608            packet.contains("xmlns:rdf="),
1609            "missing xmlns:rdf in: {packet}"
1610        );
1611        assert!(
1612            packet.contains("xmlns:dc="),
1613            "missing xmlns:dc in: {packet}"
1614        );
1615
1616        // Must not duplicate — each prefix appears exactly once
1617        assert_eq!(
1618            packet.matches("xmlns:x=").count(),
1619            1,
1620            "xmlns:x duplicated in: {packet}"
1621        );
1622        assert_eq!(
1623            packet.matches("xmlns:rdf=").count(),
1624            1,
1625            "xmlns:rdf duplicated in: {packet}"
1626        );
1627        assert_eq!(
1628            packet.matches("xmlns:dc=").count(),
1629            1,
1630            "xmlns:dc duplicated in: {packet}"
1631        );
1632    }
1633
1634    #[test]
1635    fn test_xmp_well_formed_via_ns_reader() {
1636        // Same FO as above — after capture, feed the packet to NsReader
1637        // and assert no undefined prefixes
1638        let fo = make_fo_with_declarations(
1639            r#"<x:xmpmeta>
1640      <rdf:RDF>
1641        <rdf:Description rdf:about="">
1642          <dc:title><rdf:Alt><rdf:li xml:lang="x-default">Invoice</rdf:li></rdf:Alt></dc:title>
1643        </rdf:Description>
1644      </rdf:RDF>
1645    </x:xmpmeta>"#,
1646        );
1647
1648        let cursor = Cursor::new(fo);
1649        let arena = FoTreeBuilder::new()
1650            .parse(cursor)
1651            .expect("FO with inherited xmlns should parse");
1652
1653        let packet = &arena.xmp_packets[0];
1654
1655        use quick_xml::name::ResolveResult;
1656        use quick_xml::NsReader;
1657        let mut ns_reader = NsReader::from_str(packet);
1658        ns_reader.config_mut().trim_text(true);
1659        let mut buf = Vec::new();
1660        loop {
1661            match ns_reader.read_resolved_event_into(&mut buf) {
1662                Ok((ResolveResult::Unknown(prefix), _)) => {
1663                    panic!(
1664                        "undefined prefix in captured XMP packet: {:?}",
1665                        std::str::from_utf8(&prefix)
1666                    );
1667                }
1668                Ok((_, quick_xml::events::Event::Eof)) => break,
1669                Ok(_) => {}
1670                Err(e) => panic!("parse error in captured XMP packet: {e}"),
1671            }
1672            buf.clear();
1673        }
1674    }
1675
1676    #[test]
1677    fn test_xmp_capture_round_trips_cdata() {
1678        // xmlns:x and xmlns:rdf declared on xmpmeta directly (no inheritance needed)
1679        let fo = make_fo_with_declarations(
1680            r#"<x:xmpmeta xmlns:x="adobe:ns:meta/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
1681      <rdf:RDF><![CDATA[<not-an-element/>]]></rdf:RDF>
1682    </x:xmpmeta>"#,
1683        );
1684
1685        let cursor = Cursor::new(fo);
1686        let arena = FoTreeBuilder::new()
1687            .parse(cursor)
1688            .expect("FO with CDATA in XMP should parse");
1689
1690        assert_eq!(arena.xmp_packets.len(), 1);
1691        assert!(
1692            arena.xmp_packets[0].contains("<![CDATA[<not-an-element/>]]>"),
1693            "CDATA dropped: {}",
1694            arena.xmp_packets[0]
1695        );
1696    }
1697
1698    #[test]
1699    fn test_xmp_capture_round_trips_comment() {
1700        let fo = make_fo_with_declarations(
1701            r#"<x:xmpmeta xmlns:x="adobe:ns:meta/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
1702      <!-- intentional comment -->
1703      <rdf:RDF/>
1704    </x:xmpmeta>"#,
1705        );
1706
1707        let cursor = Cursor::new(fo);
1708        let arena = FoTreeBuilder::new()
1709            .parse(cursor)
1710            .expect("FO with comment in XMP should parse");
1711
1712        assert_eq!(arena.xmp_packets.len(), 1);
1713        let packet = &arena.xmp_packets[0];
1714        // Comment content should be preserved (with or without surrounding spaces depending on trim)
1715        assert!(
1716            packet.contains("<!-- intentional comment -->")
1717                || packet.contains("<!--intentional comment-->")
1718                || packet.contains("<!-- intentional comment-->")
1719                || packet.contains("<!--intentional comment -->"),
1720            "comment dropped: {packet}"
1721        );
1722    }
1723
1724    #[test]
1725    fn test_xmp_no_injection_when_all_declared_locally() {
1726        // When all prefixes are declared on the xmpmeta root itself, no injection needed.
1727        // The canary test already covers this; this test makes it explicit.
1728        let fo = r##"<?xml version="1.0" encoding="utf-8"?>
1729<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1730  <fo:layout-master-set>
1731    <fo:simple-page-master master-name="A4" page-width="210mm" page-height="297mm">
1732      <fo:region-body margin="2cm"/>
1733    </fo:simple-page-master>
1734  </fo:layout-master-set>
1735  <fo:declarations>
1736    <x:xmpmeta xmlns:x="adobe:ns:meta/">
1737      <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
1738        <rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/" rdf:about="">
1739          <dc:title>Local Decl Test</dc:title>
1740        </rdf:Description>
1741      </rdf:RDF>
1742    </x:xmpmeta>
1743  </fo:declarations>
1744  <fo:page-sequence master-reference="A4">
1745    <fo:flow flow-name="xsl-region-body">
1746      <fo:block>Hello.</fo:block>
1747    </fo:flow>
1748  </fo:page-sequence>
1749</fo:root>"##;
1750
1751        let cursor = Cursor::new(fo);
1752        let arena = FoTreeBuilder::new()
1753            .parse(cursor)
1754            .expect("locally-declared prefixes should parse");
1755
1756        assert_eq!(arena.xmp_packets.len(), 1);
1757        let packet = &arena.xmp_packets[0];
1758        // xmlns:x is declared on the root — count must remain exactly 1 (no injection)
1759        assert_eq!(
1760            packet.matches("xmlns:x=").count(),
1761            1,
1762            "xmlns:x must appear exactly once (no double-injection): {packet}"
1763        );
1764        assert!(
1765            packet.contains("Local Decl Test"),
1766            "content preserved: {packet}"
1767        );
1768    }
1769}