1mod node_factory;
9mod property_parser;
10mod xmlns;
11
12use crate::properties::PropertyList;
13use crate::tree::{FoArena, FoNode, FoNodeData, NodeId};
14use crate::xml::XmlParser;
15use crate::{FopError, Result};
16use quick_xml::events::Event;
17use std::collections::BTreeSet;
18use std::io::BufRead;
19
20struct CaptureNs {
24 buffer: String,
26 depth: usize,
28 root_close_byte: usize,
30 in_scope_at_start: Vec<(String, String)>,
32 declared_on_root: BTreeSet<String>,
34 used_in_subtree: BTreeSet<String>,
36}
37
38pub struct FoTreeBuilder<'a> {
40 arena: FoArena<'a>,
41 current_node: Option<NodeId>,
42 foreign_object_depth: usize,
44 foreign_xml_buffer: String,
46 foreign_object_node: Option<NodeId>,
48 non_fo_depth: usize,
53 xmp_buffer: Option<CaptureNs>,
58}
59
60impl<'a> FoTreeBuilder<'a> {
61 pub fn new() -> Self {
63 Self {
64 arena: FoArena::new(),
65 current_node: None,
66 foreign_object_depth: 0,
67 foreign_xml_buffer: String::new(),
68 foreign_object_node: None,
69 non_fo_depth: 0,
70 xmp_buffer: None::<CaptureNs>,
71 }
72 }
73
74 pub fn parse<R: BufRead>(mut self, reader: R) -> Result<FoArena<'a>> {
76 let mut parser = XmlParser::new(reader);
77
78 loop {
79 let event = parser.read_event()?;
80
81 match &event {
84 Event::Start(start) | Event::Empty(start) => {
85 parser.push_namespace_scope(start);
86 }
87 _ => {}
88 }
89
90 let should_pop = matches!(&event, Event::End(_) | Event::Empty(_));
93
94 let result = self.dispatch_event(&event, &parser);
95
96 if should_pop {
98 parser.pop_namespace_scope();
99 }
100
101 result?;
103
104 if matches!(&event, Event::Eof) {
105 break;
106 }
107 }
108
109 Ok(self.arena)
110 }
111
112 fn dispatch_event<R: BufRead>(
114 &mut self,
115 event: &Event<'static>,
116 parser: &XmlParser<R>,
117 ) -> Result<()> {
118 if self.xmp_buffer.is_some() {
120 return self.handle_xmp_event(event, parser);
121 }
122
123 if self.foreign_object_depth > 0 {
125 return self.handle_foreign_child_event(event, parser);
126 }
127
128 match event {
130 Event::Start(start) => {
131 let (name, ns) = parser.extract_name(start)?;
132
133 if ns.is_fo() {
134 self.start_element(&name, start, parser)?;
135 } else if self.foreign_object_node.is_some() {
136 let raw = std::str::from_utf8(start.as_ref())
138 .unwrap_or("")
139 .to_string();
140 self.foreign_xml_buffer.push('<');
141 self.foreign_xml_buffer.push_str(&raw);
142 self.foreign_xml_buffer.push('>');
143 self.foreign_object_depth += 1;
144 } else {
145 self.non_fo_depth += 1;
148 self.try_begin_xmp_capture(start, parser);
149 }
150 }
151 Event::Empty(start) => {
152 let (name, ns) = parser.extract_name(start)?;
153
154 if ns.is_fo() {
155 self.start_element(&name, start, parser)?;
156 self.end_element()?;
157 } else if self.foreign_object_node.is_some() {
158 let raw = std::str::from_utf8(start.as_ref())
160 .unwrap_or("")
161 .to_string();
162 self.foreign_xml_buffer.push('<');
163 self.foreign_xml_buffer.push_str(&raw);
164 self.foreign_xml_buffer.push_str("/>");
165 }
166 }
168 Event::End(_) => {
169 if self.foreign_object_node.is_some() && self.foreign_object_depth == 0 {
170 self.finalize_foreign_object();
172 }
173 if self.non_fo_depth > 0 {
175 self.non_fo_depth -= 1;
176 return Ok(());
177 }
178 self.end_element()?;
179 }
180 Event::Text(text) => {
181 let text_content = parser.extract_text(text)?;
182 let trimmed = text_content.trim();
183 if !trimmed.is_empty() {
184 self.add_text(trimmed)?;
185 }
186 }
187 Event::CData(cdata) => {
188 let cdata_content = parser.extract_cdata(cdata)?;
189 if !cdata_content.is_empty() {
190 self.add_text(&cdata_content)?;
191 }
192 }
193 _ => {}
194 }
195
196 Ok(())
197 }
198
199 fn handle_xmp_event<R: BufRead>(
201 &mut self,
202 event: &Event<'static>,
203 parser: &XmlParser<R>,
204 ) -> Result<()> {
205 match event {
206 Event::Start(start) => {
207 let raw = std::str::from_utf8(start.as_ref())
208 .unwrap_or("")
209 .to_string();
210 if let Some(cap) = &mut self.xmp_buffer {
211 cap.buffer.push('<');
212 cap.buffer.push_str(&raw);
213 cap.buffer.push('>');
214 cap.depth += 1;
215 xmlns::scan_prefixes_used(start, &mut cap.used_in_subtree);
216 }
217 }
218 Event::Empty(start) => {
219 let raw = std::str::from_utf8(start.as_ref())
220 .unwrap_or("")
221 .to_string();
222 if let Some(cap) = &mut self.xmp_buffer {
223 cap.buffer.push('<');
224 cap.buffer.push_str(&raw);
225 cap.buffer.push_str("/>");
226 xmlns::scan_prefixes_used(start, &mut cap.used_in_subtree);
227 }
228 }
229 Event::End(end) => {
230 let raw = std::str::from_utf8(end.as_ref()).unwrap_or("").to_string();
231 let depth = self.xmp_buffer.as_ref().map(|c| c.depth).unwrap_or(0);
232 if depth > 0 {
233 if let Some(cap) = &mut self.xmp_buffer {
234 cap.buffer.push_str("</");
235 cap.buffer.push_str(&raw);
236 cap.buffer.push('>');
237 cap.depth -= 1;
238 }
239 } else {
240 if let Some(mut cap) = self.xmp_buffer.take() {
242 cap.buffer.push_str("</");
243 cap.buffer.push_str(&raw);
244 cap.buffer.push('>');
245
246 let to_inject: Vec<(String, String)> = cap
248 .used_in_subtree
249 .iter()
250 .filter(|p| !cap.declared_on_root.contains(*p))
251 .filter_map(|p| {
252 cap.in_scope_at_start
253 .iter()
254 .find(|(sp, _)| sp == p)
255 .map(|(sp, su)| (sp.clone(), su.clone()))
256 })
257 .collect();
258
259 let decls_block = xmlns::render_xmlns_attrs(&to_inject);
260 let patched = xmlns::inject_namespace_decls(
261 &cap.buffer,
262 &decls_block,
263 cap.root_close_byte,
264 );
265 self.arena.xmp_packets.push(patched);
266 }
267 if self.non_fo_depth > 0 {
269 self.non_fo_depth -= 1;
270 }
271 }
272 }
273 Event::Text(text) => {
274 let text_content = parser.extract_text(text).unwrap_or_default();
275 if let Some(cap) = &mut self.xmp_buffer {
276 cap.buffer.push_str(&text_content);
277 }
278 }
279 Event::CData(cdata) => {
280 let raw = std::str::from_utf8(cdata.as_ref()).unwrap_or("");
281 if let Some(cap) = &mut self.xmp_buffer {
282 cap.buffer.push_str("<![CDATA[");
283 cap.buffer.push_str(raw);
284 cap.buffer.push_str("]]>");
285 }
286 }
287 Event::Comment(comment) => {
288 let raw = std::str::from_utf8(comment.as_ref()).unwrap_or("");
289 if let Some(cap) = &mut self.xmp_buffer {
290 cap.buffer.push_str("<!--");
291 cap.buffer.push_str(raw);
292 cap.buffer.push_str("-->");
293 }
294 }
295 _ => {}
296 }
297 Ok(())
298 }
299
300 fn handle_foreign_child_event<R: BufRead>(
302 &mut self,
303 event: &Event<'static>,
304 parser: &XmlParser<R>,
305 ) -> Result<()> {
306 match event {
307 Event::Start(start) => {
308 let raw = std::str::from_utf8(start.as_ref())
309 .unwrap_or("")
310 .to_string();
311 self.foreign_xml_buffer.push('<');
312 self.foreign_xml_buffer.push_str(&raw);
313 self.foreign_xml_buffer.push('>');
314 self.foreign_object_depth += 1;
315 }
316 Event::Empty(start) => {
317 let raw = std::str::from_utf8(start.as_ref())
318 .unwrap_or("")
319 .to_string();
320 self.foreign_xml_buffer.push('<');
321 self.foreign_xml_buffer.push_str(&raw);
322 self.foreign_xml_buffer.push_str("/>");
323 }
324 Event::End(end) => {
325 self.foreign_object_depth -= 1;
326 if self.foreign_object_depth > 0 {
327 let raw = std::str::from_utf8(end.as_ref()).unwrap_or("").to_string();
328 self.foreign_xml_buffer.push_str("</");
329 self.foreign_xml_buffer.push_str(&raw);
330 self.foreign_xml_buffer.push('>');
331 }
332 }
334 Event::Text(text) => {
335 let text_content = parser.extract_text(text).unwrap_or_default();
336 self.foreign_xml_buffer.push_str(&text_content);
337 }
338 Event::CData(cdata) => {
339 let raw = std::str::from_utf8(cdata.as_ref()).unwrap_or("");
340 self.foreign_xml_buffer.push_str("<![CDATA[");
341 self.foreign_xml_buffer.push_str(raw);
342 self.foreign_xml_buffer.push_str("]]>");
343 }
344 Event::Comment(comment) => {
345 let raw = std::str::from_utf8(comment.as_ref()).unwrap_or("");
346 self.foreign_xml_buffer.push_str("<!--");
347 self.foreign_xml_buffer.push_str(raw);
348 self.foreign_xml_buffer.push_str("-->");
349 }
350 _ => {}
351 }
352 Ok(())
353 }
354
355 fn try_begin_xmp_capture<R: BufRead>(
357 &mut self,
358 start: &quick_xml::events::BytesStart<'_>,
359 parser: &XmlParser<R>,
360 ) {
361 let is_declarations_parent = self
362 .current_node
363 .and_then(|id| self.arena.get(id))
364 .map(|n| matches!(n.data, FoNodeData::Declarations))
365 .unwrap_or(false);
366
367 if !is_declarations_parent {
368 return;
369 }
370
371 let raw = std::str::from_utf8(start.as_ref())
372 .unwrap_or("")
373 .to_string();
374 let local_name = raw
376 .split_once(':')
377 .map(|(_, local)| local)
378 .unwrap_or(raw.as_str());
379 let local_tag = local_name
381 .split_once(|c: char| c.is_ascii_whitespace())
382 .map(|(tag, _)| tag)
383 .unwrap_or(local_name);
384 if local_tag == "xmpmeta" {
385 let mut buf = String::new();
386 buf.push('<');
387 buf.push_str(&raw);
388 buf.push('>');
389 let root_close_byte = buf.len() - 1; let in_scope_at_start = parser.snapshot_in_scope();
394 let declared_on_root = xmlns::declared_on_element(start);
395 let mut used_in_subtree = BTreeSet::new();
396 xmlns::scan_prefixes_used(start, &mut used_in_subtree);
397
398 self.xmp_buffer = Some(CaptureNs {
399 buffer: buf,
400 depth: 0,
401 root_close_byte,
402 in_scope_at_start,
403 declared_on_root,
404 used_in_subtree,
405 });
406 }
407 }
408
409 fn finalize_foreign_object(&mut self) {
411 if let Some(node_id) = self.foreign_object_node.take() {
412 let xml = std::mem::take(&mut self.foreign_xml_buffer);
413 if let Some(node) = self.arena.get_mut(node_id) {
414 if let FoNodeData::InstreamForeignObject { foreign_xml, .. } = &mut node.data {
415 *foreign_xml = xml;
416 }
417 }
418 }
419 }
420
421 fn start_element(
423 &mut self,
424 name: &str,
425 start: &quick_xml::events::BytesStart,
426 parser: &XmlParser<impl BufRead>,
427 ) -> Result<()> {
428 let mut properties = PropertyList::new();
430
431 let attributes = parser.extract_attributes(start)?;
433
434 let element_id = attributes
436 .iter()
437 .find(|(k, _)| k == "id")
438 .map(|(_, v)| v.clone());
439
440 node_factory::populate_properties(&mut properties, &attributes)?;
442
443 properties.validate()?;
445
446 if name == "root" {
448 if let Some((_, lang)) = attributes
449 .iter()
450 .find(|(k, _)| k == "xml:lang" || k == "xml-lang")
451 {
452 self.arena.document_lang = Some(lang.clone());
453 }
454 }
455
456 let node_data = node_factory::create_node_data(name, &attributes, properties)?;
458 let node = FoNode::new_with_id(node_data, element_id.clone());
459 let node_id = self.arena.add_node(node);
460
461 if let Some(id) = element_id {
463 self.arena.id_registry_mut().register_id(id, node_id)?;
464 }
465
466 if let Some(parent_id) = self.current_node {
468 self.arena
469 .append_child(parent_id, node_id)
470 .map_err(FopError::Generic)?;
471 }
472
473 if name == "instream-foreign-object" {
475 self.foreign_object_node = Some(node_id);
476 self.foreign_xml_buffer.clear();
477 self.foreign_object_depth = 0;
478 }
479
480 self.current_node = Some(node_id);
482
483 Ok(())
484 }
485
486 fn end_element(&mut self) -> Result<()> {
488 if let Some(current) = self.current_node {
489 self.current_node = self.arena.get(current).and_then(|n| n.parent);
491 }
492 Ok(())
493 }
494
495 fn add_text(&mut self, text: &str) -> Result<()> {
497 if let Some(parent_id) = self.current_node {
498 if let Some(parent) = self.arena.get(parent_id) {
500 if parent.data.can_contain_text() {
501 let text_node = FoNode::new(FoNodeData::Text(text.to_string()));
502 let text_id = self.arena.add_node(text_node);
503 self.arena
504 .append_child(parent_id, text_id)
505 .map_err(FopError::Generic)?;
506 }
507 }
508 }
509 Ok(())
510 }
511}
512
513impl<'a> Default for FoTreeBuilder<'a> {
514 fn default() -> Self {
515 Self::new()
516 }
517}
518
519#[cfg(test)]
520mod tests {
521 use super::*;
522 use crate::PropertyId;
523 use std::io::Cursor;
524
525 #[test]
526 fn test_parse_simple_document() {
527 let xml = r#"<?xml version="1.0"?>
528<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
529 <fo:layout-master-set>
530 <fo:simple-page-master master-name="A4">
531 <fo:region-body/>
532 </fo:simple-page-master>
533 </fo:layout-master-set>
534</fo:root>"#;
535
536 let cursor = Cursor::new(xml);
537 let builder = FoTreeBuilder::new();
538 let arena = builder.parse(cursor).expect("test: should succeed");
539
540 assert!(!arena.is_empty());
541 assert_eq!(arena.len(), 4); }
543
544 #[test]
545 fn test_parse_with_text() {
546 let xml = r#"<?xml version="1.0"?>
547<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
548 <fo:layout-master-set>
549 <fo:simple-page-master master-name="A4">
550 <fo:region-body/>
551 </fo:simple-page-master>
552 </fo:layout-master-set>
553 <fo:page-sequence master-reference="A4">
554 <fo:flow flow-name="xsl-region-body">
555 <fo:block>Hello World</fo:block>
556 </fo:flow>
557 </fo:page-sequence>
558</fo:root>"#;
559
560 let cursor = Cursor::new(xml);
561 let builder = FoTreeBuilder::new();
562 let arena = builder.parse(cursor).expect("test: should succeed");
563
564 assert!(arena.len() >= 8);
567 }
568
569 #[test]
570 fn test_property_parsing() {
571 let xml = r#"<?xml version="1.0"?>
572<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
573 <fo:layout-master-set>
574 <fo:simple-page-master master-name="A4" page-width="210mm" page-height="297mm">
575 <fo:region-body margin="1in"/>
576 </fo:simple-page-master>
577 </fo:layout-master-set>
578</fo:root>"#;
579
580 let cursor = Cursor::new(xml);
581 let builder = FoTreeBuilder::new();
582 let arena = builder.parse(cursor).expect("test: should succeed");
583
584 for (_, node) in arena.iter() {
586 if let Some(props) = node.data.properties() {
587 let _ = props.get(PropertyId::PageWidth);
589 }
590 }
591 }
592
593 #[test]
594 fn test_parse_document_with_block_and_inline() {
595 let xml = r#"<?xml version="1.0"?>
596<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
597 <fo:layout-master-set>
598 <fo:simple-page-master master-name="A4">
599 <fo:region-body/>
600 </fo:simple-page-master>
601 </fo:layout-master-set>
602 <fo:page-sequence master-reference="A4">
603 <fo:flow flow-name="xsl-region-body">
604 <fo:block>
605 <fo:inline font-weight="bold">Bold text</fo:inline>
606 Normal text
607 </fo:block>
608 </fo:flow>
609 </fo:page-sequence>
610</fo:root>"#;
611
612 let cursor = Cursor::new(xml);
613 let builder = FoTreeBuilder::new();
614 let arena = builder.parse(cursor).expect("test: should succeed");
615
616 assert!(arena.len() >= 8);
619 }
620
621 #[test]
622 fn test_parse_document_with_multiple_blocks() {
623 let xml = r#"<?xml version="1.0"?>
624<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
625 <fo:layout-master-set>
626 <fo:simple-page-master master-name="A4">
627 <fo:region-body/>
628 </fo:simple-page-master>
629 </fo:layout-master-set>
630 <fo:page-sequence master-reference="A4">
631 <fo:flow flow-name="xsl-region-body">
632 <fo:block>First block</fo:block>
633 <fo:block>Second block</fo:block>
634 <fo:block>Third block</fo:block>
635 </fo:flow>
636 </fo:page-sequence>
637</fo:root>"#;
638
639 let cursor = Cursor::new(xml);
640 let builder = FoTreeBuilder::new();
641 let arena = builder.parse(cursor).expect("test: should succeed");
642
643 assert!(arena.len() >= 9);
646 }
647
648 #[test]
649 fn test_parse_document_with_font_properties() {
650 let xml = r#"<?xml version="1.0"?>
651<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
652 <fo:layout-master-set>
653 <fo:simple-page-master master-name="A4">
654 <fo:region-body/>
655 </fo:simple-page-master>
656 </fo:layout-master-set>
657 <fo:page-sequence master-reference="A4">
658 <fo:flow flow-name="xsl-region-body">
659 <fo:block font-size="14pt" font-family="Arial" color="red">Styled text</fo:block>
660 </fo:flow>
661 </fo:page-sequence>
662</fo:root>"#;
663
664 let cursor = Cursor::new(xml);
665 let builder = FoTreeBuilder::new();
666 let result = builder.parse(cursor);
667 assert!(
668 result.is_ok(),
669 "Should parse document with font properties: {:?}",
670 result.err()
671 );
672
673 let arena = result.expect("test: should succeed");
674 assert!(arena.len() >= 7);
675 }
676
677 #[test]
678 fn test_parse_document_with_list() {
679 let xml = r#"<?xml version="1.0"?>
680<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
681 <fo:layout-master-set>
682 <fo:simple-page-master master-name="A4">
683 <fo:region-body/>
684 </fo:simple-page-master>
685 </fo:layout-master-set>
686 <fo:page-sequence master-reference="A4">
687 <fo:flow flow-name="xsl-region-body">
688 <fo:list-block>
689 <fo:list-item>
690 <fo:list-item-label><fo:block>1.</fo:block></fo:list-item-label>
691 <fo:list-item-body><fo:block>Item one</fo:block></fo:list-item-body>
692 </fo:list-item>
693 </fo:list-block>
694 </fo:flow>
695 </fo:page-sequence>
696</fo:root>"#;
697
698 let cursor = Cursor::new(xml);
699 let builder = FoTreeBuilder::new();
700 let result = builder.parse(cursor);
701 assert!(
702 result.is_ok(),
703 "Should parse list structure: {:?}",
704 result.err()
705 );
706 }
707
708 #[test]
709 fn test_parse_document_with_cdata() {
710 let xml = r#"<?xml version="1.0"?>
711<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
712 <fo:layout-master-set>
713 <fo:simple-page-master master-name="A4">
714 <fo:region-body/>
715 </fo:simple-page-master>
716 </fo:layout-master-set>
717 <fo:page-sequence master-reference="A4">
718 <fo:flow flow-name="xsl-region-body">
719 <fo:block><![CDATA[Text with <special> & chars]]></fo:block>
720 </fo:flow>
721 </fo:page-sequence>
722</fo:root>"#;
723
724 let cursor = Cursor::new(xml);
725 let builder = FoTreeBuilder::new();
726 let result = builder.parse(cursor);
727 assert!(
729 result.is_ok(),
730 "Should parse CDATA sections: {:?}",
731 result.err()
732 );
733
734 let arena = result.expect("test: should succeed");
735 let has_cdata_text = arena.iter().any(|(_, node)| {
737 if let FoNodeData::Text(text) = &node.data {
738 text.contains("Text with")
739 } else {
740 false
741 }
742 });
743 assert!(
744 has_cdata_text,
745 "CDATA content should be stored as text node"
746 );
747 }
748
749 #[test]
750 fn test_parse_invalid_xml_returns_error() {
751 let xml = r#"<?xml version="1.0"?>
752<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
753 <fo:layout-master-set>
754 <fo:unclosed-element>
755 </fo:layout-master-set>
756</fo:root>"#;
757
758 let cursor = Cursor::new(xml);
759 let builder = FoTreeBuilder::new();
760 let result = builder.parse(cursor);
763 let _ = result;
765 }
766
767 #[test]
768 fn test_parse_document_with_multiple_page_sequences() {
769 let xml = r#"<?xml version="1.0"?>
770<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
771 <fo:layout-master-set>
772 <fo:simple-page-master master-name="A4">
773 <fo:region-body/>
774 </fo:simple-page-master>
775 </fo:layout-master-set>
776 <fo:page-sequence master-reference="A4">
777 <fo:flow flow-name="xsl-region-body">
778 <fo:block>Page 1 content</fo:block>
779 </fo:flow>
780 </fo:page-sequence>
781 <fo:page-sequence master-reference="A4">
782 <fo:flow flow-name="xsl-region-body">
783 <fo:block>Page 2 content</fo:block>
784 </fo:flow>
785 </fo:page-sequence>
786</fo:root>"#;
787
788 let cursor = Cursor::new(xml);
789 let builder = FoTreeBuilder::new();
790 let result = builder.parse(cursor);
791 assert!(result.is_ok(), "Should parse multiple page sequences");
792 }
793
794 #[test]
795 fn test_parse_document_with_margin_property() {
796 let xml = r#"<?xml version="1.0"?>
797<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
798 <fo:layout-master-set>
799 <fo:simple-page-master master-name="A4">
800 <fo:region-body margin-top="1cm" margin-bottom="2cm"/>
801 </fo:simple-page-master>
802 </fo:layout-master-set>
803</fo:root>"#;
804
805 let cursor = Cursor::new(xml);
806 let builder = FoTreeBuilder::new();
807 let result = builder.parse(cursor);
808 assert!(result.is_ok(), "Should parse margin properties");
809 }
810
811 #[test]
812 fn test_parse_document_with_table() {
813 let xml = r#"<?xml version="1.0"?>
814<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
815 <fo:layout-master-set>
816 <fo:simple-page-master master-name="A4">
817 <fo:region-body/>
818 </fo:simple-page-master>
819 </fo:layout-master-set>
820 <fo:page-sequence master-reference="A4">
821 <fo:flow flow-name="xsl-region-body">
822 <fo:table>
823 <fo:table-body>
824 <fo:table-row>
825 <fo:table-cell>
826 <fo:block>Cell content</fo:block>
827 </fo:table-cell>
828 </fo:table-row>
829 </fo:table-body>
830 </fo:table>
831 </fo:flow>
832 </fo:page-sequence>
833</fo:root>"#;
834
835 let cursor = Cursor::new(xml);
836 let builder = FoTreeBuilder::new();
837 let result = builder.parse(cursor);
838 assert!(
839 result.is_ok(),
840 "Should parse table structure: {:?}",
841 result.err()
842 );
843 }
844
845 #[test]
846 fn test_parse_document_is_not_empty() {
847 let xml = r#"<?xml version="1.0"?>
848<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
849 <fo:layout-master-set>
850 <fo:simple-page-master master-name="A4">
851 <fo:region-body/>
852 </fo:simple-page-master>
853 </fo:layout-master-set>
854</fo:root>"#;
855
856 let cursor = Cursor::new(xml);
857 let builder = FoTreeBuilder::new();
858 let arena = builder.parse(cursor).expect("test: should succeed");
859
860 assert!(!arena.is_empty());
861 assert!(!arena.is_empty());
862 }
863
864 #[test]
865 fn test_parse_preserves_text_content() {
866 let xml = r#"<?xml version="1.0"?>
867<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
868 <fo:layout-master-set>
869 <fo:simple-page-master master-name="A4">
870 <fo:region-body/>
871 </fo:simple-page-master>
872 </fo:layout-master-set>
873 <fo:page-sequence master-reference="A4">
874 <fo:flow flow-name="xsl-region-body">
875 <fo:block>Hello World</fo:block>
876 </fo:flow>
877 </fo:page-sequence>
878</fo:root>"#;
879
880 let cursor = Cursor::new(xml);
881 let builder = FoTreeBuilder::new();
882 let arena = builder.parse(cursor).expect("test: should succeed");
883
884 let text_found = arena
886 .iter()
887 .any(|(_, node)| matches!(&node.data, FoNodeData::Text(t) if t == "Hello World"));
888 assert!(text_found, "Text content should be preserved in tree");
889 }
890
891 #[test]
892 fn test_parse_document_with_whitespace_only_text_is_trimmed() {
893 let xml = r#"<?xml version="1.0"?>
894<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
895 <fo:layout-master-set>
896 <fo:simple-page-master master-name="A4">
897 <fo:region-body/>
898 </fo:simple-page-master>
899 </fo:layout-master-set>
900</fo:root>"#;
901
902 let cursor = Cursor::new(xml);
903 let builder = FoTreeBuilder::new();
904 let arena = builder.parse(cursor).expect("test: should succeed");
905
906 let whitespace_only_text = arena.iter().any(|(_, node)| {
908 matches!(&node.data, FoNodeData::Text(t) if t.trim().is_empty() && !t.is_empty())
909 });
910 assert!(
911 !whitespace_only_text,
912 "Whitespace-only text nodes should be stripped"
913 );
914 }
915
916 #[test]
917 fn test_parse_document_with_processing_instruction() {
918 let xml = r#"<?xml version="1.0"?>
919<?fop-processor key="value"?>
920<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
921 <fo:layout-master-set>
922 <fo:simple-page-master master-name="A4">
923 <fo:region-body/>
924 </fo:simple-page-master>
925 </fo:layout-master-set>
926</fo:root>"#;
927
928 let cursor = Cursor::new(xml);
929 let builder = FoTreeBuilder::new();
930 let result = builder.parse(cursor);
931 assert!(
933 result.is_ok(),
934 "Processing instructions should be handled gracefully"
935 );
936 }
937
938 #[test]
939 fn test_parse_document_with_xml_comment() {
940 let xml = r#"<?xml version="1.0"?>
941<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
942 <!-- This is a comment -->
943 <fo:layout-master-set>
944 <fo:simple-page-master master-name="A4">
945 <!-- Page master comment -->
946 <fo:region-body/>
947 </fo:simple-page-master>
948 </fo:layout-master-set>
949</fo:root>"#;
950
951 let cursor = Cursor::new(xml);
952 let builder = FoTreeBuilder::new();
953 let result = builder.parse(cursor);
954 assert!(result.is_ok(), "XML comments should be handled gracefully");
955 }
956
957 #[test]
958 fn test_parse_font_size_in_pts() {
959 let xml = r#"<?xml version="1.0"?>
960<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
961 <fo:layout-master-set>
962 <fo:simple-page-master master-name="A4">
963 <fo:region-body/>
964 </fo:simple-page-master>
965 </fo:layout-master-set>
966 <fo:page-sequence master-reference="A4">
967 <fo:flow flow-name="xsl-region-body">
968 <fo:block font-size="16pt">Large text</fo:block>
969 </fo:flow>
970 </fo:page-sequence>
971</fo:root>"#;
972
973 let cursor = Cursor::new(xml);
974 let builder = FoTreeBuilder::new();
975 let result = builder.parse(cursor);
976 assert!(result.is_ok());
977
978 let arena = result.expect("test: should succeed");
979 for (_, node) in arena.iter() {
981 if let FoNodeData::Block { properties } = &node.data {
982 if properties.is_explicit(PropertyId::FontSize) {
983 let font_size = properties
984 .get(PropertyId::FontSize)
985 .expect("test: should succeed");
986 if let Some(length) = font_size.as_length() {
987 assert_eq!(length.to_pt(), 16.0);
988 }
989 }
990 }
991 }
992 }
993
994 #[test]
995 fn test_parse_color_property_red() {
996 let xml = r#"<?xml version="1.0"?>
997<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
998 <fo:layout-master-set>
999 <fo:simple-page-master master-name="A4">
1000 <fo:region-body/>
1001 </fo:simple-page-master>
1002 </fo:layout-master-set>
1003 <fo:page-sequence master-reference="A4">
1004 <fo:flow flow-name="xsl-region-body">
1005 <fo:block color="red">Red text</fo:block>
1006 </fo:flow>
1007 </fo:page-sequence>
1008</fo:root>"#;
1009
1010 let cursor = Cursor::new(xml);
1011 let builder = FoTreeBuilder::new();
1012 let result = builder.parse(cursor);
1013 assert!(result.is_ok(), "Should parse color properties");
1014 }
1015
1016 #[test]
1017 fn test_parse_hex_color_property() {
1018 let xml = r#"<?xml version="1.0"?>
1020<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1021 <fo:layout-master-set>
1022 <fo:simple-page-master master-name="A4">
1023 <fo:region-body/>
1024 </fo:simple-page-master>
1025 </fo:layout-master-set>
1026 <fo:page-sequence master-reference="A4">
1027 <fo:flow flow-name="xsl-region-body">
1028 <fo:block color="red">Hex red text</fo:block>
1029 </fo:flow>
1030 </fo:page-sequence>
1031</fo:root>"#;
1032
1033 let cursor = Cursor::new(xml);
1034 let builder = FoTreeBuilder::new();
1035 let result = builder.parse(cursor);
1036 assert!(result.is_ok(), "Should parse color properties");
1037 }
1038}
1039
1040#[cfg(test)]
1042mod additional_tests {
1043 use super::*;
1044 use std::io::Cursor;
1045
1046 fn make_minimal_fo(flow_content: &str) -> String {
1047 format!(
1048 r#"<?xml version="1.0"?>
1049<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1050 <fo:layout-master-set>
1051 <fo:simple-page-master master-name="A4">
1052 <fo:region-body/>
1053 </fo:simple-page-master>
1054 </fo:layout-master-set>
1055 <fo:page-sequence master-reference="A4">
1056 <fo:flow flow-name="xsl-region-body">
1057 {}
1058 </fo:flow>
1059 </fo:page-sequence>
1060</fo:root>"#,
1061 flow_content
1062 )
1063 }
1064
1065 #[test]
1066 fn test_parse_block_with_all_font_properties() {
1067 let xml = make_minimal_fo(
1068 r#"<fo:block font-size="14pt" font-weight="bold" font-style="italic"
1069 font-family="Times New Roman" color="navy">Styled text</fo:block>"#,
1070 );
1071 let cursor = Cursor::new(xml);
1072 let result = FoTreeBuilder::new().parse(cursor);
1073 assert!(
1074 result.is_ok(),
1075 "Font properties should parse: {:?}",
1076 result.err()
1077 );
1078 }
1079
1080 #[test]
1081 fn test_parse_block_with_margin_properties() {
1082 let xml = make_minimal_fo(
1083 r#"<fo:block margin-top="10pt" margin-bottom="10pt"
1084 margin-left="20pt" margin-right="20pt">Margins</fo:block>"#,
1085 );
1086 let cursor = Cursor::new(xml);
1087 let result = FoTreeBuilder::new().parse(cursor);
1088 assert!(result.is_ok(), "Margin properties: {:?}", result.err());
1089 }
1090
1091 #[test]
1092 fn test_parse_block_with_padding_properties() {
1093 let xml = make_minimal_fo(
1094 r#"<fo:block padding-top="5pt" padding-bottom="5pt"
1095 padding-left="10pt" padding-right="10pt">Padding</fo:block>"#,
1096 );
1097 let cursor = Cursor::new(xml);
1098 let result = FoTreeBuilder::new().parse(cursor);
1099 assert!(result.is_ok(), "Padding properties: {:?}", result.err());
1100 }
1101
1102 #[test]
1103 fn test_parse_block_with_border_properties() {
1104 let xml = make_minimal_fo(
1105 r#"<fo:block border-top-style="solid" border-top-width="1pt"
1106 border-top-color="black">Border</fo:block>"#,
1107 );
1108 let cursor = Cursor::new(xml);
1109 let result = FoTreeBuilder::new().parse(cursor);
1110 assert!(result.is_ok(), "Border properties: {:?}", result.err());
1111 }
1112
1113 #[test]
1114 fn test_parse_inline_elements() {
1115 let xml = make_minimal_fo(
1116 r#"<fo:block>Text with <fo:inline font-weight="bold">bold</fo:inline> part</fo:block>"#,
1117 );
1118 let cursor = Cursor::new(xml);
1119 let result = FoTreeBuilder::new().parse(cursor);
1120 assert!(result.is_ok(), "Inline element: {:?}", result.err());
1121 }
1122
1123 #[test]
1124 fn test_parse_nested_blocks() {
1125 let xml = make_minimal_fo(
1126 r#"<fo:block>
1127 <fo:block>Inner block 1</fo:block>
1128 <fo:block>Inner block 2</fo:block>
1129 <fo:block>Inner block 3</fo:block>
1130 </fo:block>"#,
1131 );
1132 let cursor = Cursor::new(xml);
1133 let result = FoTreeBuilder::new().parse(cursor);
1134 assert!(result.is_ok(), "Nested blocks: {:?}", result.err());
1135 }
1136
1137 #[test]
1138 fn test_parse_table_structure() {
1139 let xml = make_minimal_fo(
1140 r#"<fo:table>
1141 <fo:table-column column-width="50pt"/>
1142 <fo:table-column column-width="50pt"/>
1143 <fo:table-body>
1144 <fo:table-row>
1145 <fo:table-cell><fo:block>Cell 1</fo:block></fo:table-cell>
1146 <fo:table-cell><fo:block>Cell 2</fo:block></fo:table-cell>
1147 </fo:table-row>
1148 </fo:table-body>
1149 </fo:table>"#,
1150 );
1151 let cursor = Cursor::new(xml);
1152 let result = FoTreeBuilder::new().parse(cursor);
1153 assert!(result.is_ok(), "Table structure: {:?}", result.err());
1154 }
1155
1156 #[test]
1157 fn test_parse_list_structure() {
1158 let xml = make_minimal_fo(
1159 r#"<fo:list-block>
1160 <fo:list-item>
1161 <fo:list-item-label end-indent="label-end()">
1162 <fo:block>1.</fo:block>
1163 </fo:list-item-label>
1164 <fo:list-item-body start-indent="body-start()">
1165 <fo:block>First item</fo:block>
1166 </fo:list-item-body>
1167 </fo:list-item>
1168 </fo:list-block>"#,
1169 );
1170 let cursor = Cursor::new(xml);
1171 let result = FoTreeBuilder::new().parse(cursor);
1172 assert!(result.is_ok(), "List structure: {:?}", result.err());
1173 }
1174
1175 #[test]
1176 fn test_parse_external_graphic() {
1177 let xml = make_minimal_fo(
1178 r#"<fo:block><fo:external-graphic src="url('image.png')"/></fo:block>"#,
1179 );
1180 let cursor = Cursor::new(xml);
1181 let result = FoTreeBuilder::new().parse(cursor);
1182 assert!(result.is_ok(), "External graphic: {:?}", result.err());
1183 }
1184
1185 #[test]
1186 fn test_parse_basic_link_internal() {
1187 let xml = make_minimal_fo(
1188 r#"<fo:block>
1189 <fo:basic-link internal-destination="target">Link</fo:basic-link>
1190 </fo:block>"#,
1191 );
1192 let cursor = Cursor::new(xml);
1193 let result = FoTreeBuilder::new().parse(cursor);
1194 assert!(result.is_ok(), "Basic link internal: {:?}", result.err());
1195 }
1196
1197 #[test]
1198 fn test_parse_basic_link_external() {
1199 let xml = make_minimal_fo(
1200 r#"<fo:block>
1201 <fo:basic-link external-destination="url('https://example.com')">URL</fo:basic-link>
1202 </fo:block>"#,
1203 );
1204 let cursor = Cursor::new(xml);
1205 let result = FoTreeBuilder::new().parse(cursor);
1206 assert!(result.is_ok(), "Basic link external: {:?}", result.err());
1207 }
1208
1209 #[test]
1210 fn test_parse_page_number_inline() {
1211 let xml = make_minimal_fo(r#"<fo:block>Page <fo:page-number/></fo:block>"#);
1212 let cursor = Cursor::new(xml);
1213 let result = FoTreeBuilder::new().parse(cursor);
1214 assert!(result.is_ok(), "Page number: {:?}", result.err());
1215 }
1216
1217 #[test]
1218 fn test_parse_page_number_citation() {
1219 let xml = make_minimal_fo(
1220 r#"<fo:block>See page <fo:page-number-citation ref-id="target"/></fo:block>"#,
1221 );
1222 let cursor = Cursor::new(xml);
1223 let result = FoTreeBuilder::new().parse(cursor);
1224 assert!(result.is_ok(), "Page number citation: {:?}", result.err());
1225 }
1226
1227 #[test]
1228 fn test_parse_leader_dots() {
1229 let xml =
1230 make_minimal_fo(r#"<fo:block>Chapter<fo:leader leader-pattern="dots"/>10</fo:block>"#);
1231 let cursor = Cursor::new(xml);
1232 let result = FoTreeBuilder::new().parse(cursor);
1233 assert!(result.is_ok(), "Leader: {:?}", result.err());
1234 }
1235
1236 #[test]
1237 fn test_parse_footnote() {
1238 let xml = make_minimal_fo(
1239 r#"<fo:block>Text<fo:footnote>
1240 <fo:inline font-size="8pt" vertical-align="super">1</fo:inline>
1241 <fo:footnote-body>
1242 <fo:block font-size="8pt">Footnote text</fo:block>
1243 </fo:footnote-body>
1244 </fo:footnote></fo:block>"#,
1245 );
1246 let cursor = Cursor::new(xml);
1247 let result = FoTreeBuilder::new().parse(cursor);
1248 assert!(result.is_ok(), "Footnote: {:?}", result.err());
1249 }
1250
1251 #[test]
1252 fn test_parse_block_container() {
1253 let xml = make_minimal_fo(
1254 r#"<fo:block-container width="100pt" height="100pt">
1255 <fo:block>Inside block container</fo:block>
1256 </fo:block-container>"#,
1257 );
1258 let cursor = Cursor::new(xml);
1259 let result = FoTreeBuilder::new().parse(cursor);
1260 assert!(result.is_ok(), "Block container: {:?}", result.err());
1261 }
1262
1263 #[test]
1264 fn test_parse_bookmark_tree() {
1265 let xml = r#"<?xml version="1.0"?>
1266<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1267 <fo:layout-master-set>
1268 <fo:simple-page-master master-name="A4">
1269 <fo:region-body/>
1270 </fo:simple-page-master>
1271 </fo:layout-master-set>
1272 <fo:bookmark-tree>
1273 <fo:bookmark internal-destination="ch1">
1274 <fo:bookmark-title>Chapter 1</fo:bookmark-title>
1275 </fo:bookmark>
1276 </fo:bookmark-tree>
1277 <fo:page-sequence master-reference="A4">
1278 <fo:flow flow-name="xsl-region-body">
1279 <fo:block id="ch1">Chapter 1 content</fo:block>
1280 </fo:flow>
1281 </fo:page-sequence>
1282</fo:root>"#;
1283 let cursor = Cursor::new(xml);
1284 let result = FoTreeBuilder::new().parse(cursor);
1285 assert!(result.is_ok(), "Bookmark tree: {:?}", result.err());
1286 }
1287
1288 #[test]
1289 fn test_parse_document_with_static_content() {
1290 let xml = r#"<?xml version="1.0"?>
1291<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1292 <fo:layout-master-set>
1293 <fo:simple-page-master master-name="A4">
1294 <fo:region-before extent="20mm"/>
1295 <fo:region-body/>
1296 <fo:region-after extent="20mm"/>
1297 </fo:simple-page-master>
1298 </fo:layout-master-set>
1299 <fo:page-sequence master-reference="A4">
1300 <fo:static-content flow-name="xsl-region-before">
1301 <fo:block>Header text</fo:block>
1302 </fo:static-content>
1303 <fo:static-content flow-name="xsl-region-after">
1304 <fo:block>Footer text</fo:block>
1305 </fo:static-content>
1306 <fo:flow flow-name="xsl-region-body">
1307 <fo:block>Body content</fo:block>
1308 </fo:flow>
1309 </fo:page-sequence>
1310</fo:root>"#;
1311 let cursor = Cursor::new(xml);
1312 let result = FoTreeBuilder::new().parse(cursor);
1313 assert!(result.is_ok(), "Static content: {:?}", result.err());
1314 }
1315
1316 #[test]
1317 fn test_parse_document_returns_non_empty_arena() {
1318 let xml = make_minimal_fo("<fo:block>Content</fo:block>");
1319 let cursor = Cursor::new(xml);
1320 let arena = FoTreeBuilder::new()
1321 .parse(cursor)
1322 .expect("test: should succeed");
1323 assert!(!arena.is_empty(), "Arena should not be empty after parsing");
1324 }
1325
1326 #[test]
1327 fn test_parse_document_root_is_fo_root() {
1328 let xml = make_minimal_fo("<fo:block>Content</fo:block>");
1329 let cursor = Cursor::new(xml);
1330 let arena = FoTreeBuilder::new()
1331 .parse(cursor)
1332 .expect("test: should succeed");
1333 let (_, root_node) = arena.root().expect("Should have root node");
1334 assert!(matches!(root_node.data, FoNodeData::Root));
1335 }
1336
1337 #[test]
1338 fn test_parse_document_with_text_align_center() {
1339 let xml = make_minimal_fo(r#"<fo:block text-align="center">Centered</fo:block>"#);
1340 let cursor = Cursor::new(xml);
1341 let result = FoTreeBuilder::new().parse(cursor);
1342 assert!(result.is_ok(), "text-align center: {:?}", result.err());
1343 }
1344
1345 #[test]
1346 fn test_parse_document_with_text_align_justify() {
1347 let xml = make_minimal_fo(r#"<fo:block text-align="justify">Justified</fo:block>"#);
1348 let cursor = Cursor::new(xml);
1349 let result = FoTreeBuilder::new().parse(cursor);
1350 assert!(result.is_ok(), "text-align justify: {:?}", result.err());
1351 }
1352
1353 #[test]
1354 fn test_parse_line_height_property() {
1355 let xml = make_minimal_fo(r#"<fo:block line-height="1.5">Text</fo:block>"#);
1356 let cursor = Cursor::new(xml);
1357 let result = FoTreeBuilder::new().parse(cursor);
1358 assert!(result.is_ok(), "line-height: {:?}", result.err());
1359 }
1360
1361 #[test]
1362 fn test_parse_keep_together_property() {
1363 let xml = make_minimal_fo(
1364 r#"<fo:block keep-together.within-page="always">Kept together</fo:block>"#,
1365 );
1366 let cursor = Cursor::new(xml);
1367 let result = FoTreeBuilder::new().parse(cursor);
1368 assert!(result.is_ok(), "keep-together: {:?}", result.err());
1369 }
1370
1371 #[test]
1372 fn test_parse_background_color_property() {
1373 let xml = make_minimal_fo(r#"<fo:block background-color="yellow">Highlighted</fo:block>"#);
1374 let cursor = Cursor::new(xml);
1375 let result = FoTreeBuilder::new().parse(cursor);
1376 assert!(result.is_ok(), "background-color: {:?}", result.err());
1377 }
1378
1379 #[test]
1380 fn test_parse_multiple_page_sequences_with_content() {
1381 let xml = r#"<?xml version="1.0"?>
1382<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1383 <fo:layout-master-set>
1384 <fo:simple-page-master master-name="A4">
1385 <fo:region-body/>
1386 </fo:simple-page-master>
1387 </fo:layout-master-set>
1388 <fo:page-sequence master-reference="A4">
1389 <fo:flow flow-name="xsl-region-body">
1390 <fo:block>Page sequence 1</fo:block>
1391 </fo:flow>
1392 </fo:page-sequence>
1393 <fo:page-sequence master-reference="A4">
1394 <fo:flow flow-name="xsl-region-body">
1395 <fo:block>Page sequence 2</fo:block>
1396 </fo:flow>
1397 </fo:page-sequence>
1398 <fo:page-sequence master-reference="A4">
1399 <fo:flow flow-name="xsl-region-body">
1400 <fo:block>Page sequence 3</fo:block>
1401 </fo:flow>
1402 </fo:page-sequence>
1403</fo:root>"#;
1404 let cursor = Cursor::new(xml);
1405 let result = FoTreeBuilder::new().parse(cursor);
1406 assert!(
1407 result.is_ok(),
1408 "Multiple page sequences: {:?}",
1409 result.err()
1410 );
1411 }
1412
1413 #[test]
1414 fn test_parse_missing_flow_name_is_error() {
1415 let xml = r#"<?xml version="1.0"?>
1416<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1417 <fo:layout-master-set>
1418 <fo:simple-page-master master-name="A4">
1419 <fo:region-body/>
1420 </fo:simple-page-master>
1421 </fo:layout-master-set>
1422 <fo:page-sequence master-reference="A4">
1423 <fo:flow>
1424 <fo:block>No flow-name attribute</fo:block>
1425 </fo:flow>
1426 </fo:page-sequence>
1427</fo:root>"#;
1428 let cursor = Cursor::new(xml);
1429 let result = FoTreeBuilder::new().parse(cursor);
1430 assert!(result.is_err(), "Missing flow-name should be an error");
1431 }
1432
1433 #[test]
1434 fn test_parse_missing_master_name_is_error() {
1435 let xml = r#"<?xml version="1.0"?>
1436<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1437 <fo:layout-master-set>
1438 <fo:simple-page-master>
1439 <fo:region-body/>
1440 </fo:simple-page-master>
1441 </fo:layout-master-set>
1442 <fo:page-sequence master-reference="A4">
1443 <fo:flow flow-name="xsl-region-body">
1444 <fo:block>Text</fo:block>
1445 </fo:flow>
1446 </fo:page-sequence>
1447</fo:root>"#;
1448 let cursor = Cursor::new(xml);
1449 let result = FoTreeBuilder::new().parse(cursor);
1450 assert!(result.is_err(), "Missing master-name should be an error");
1451 }
1452
1453 #[test]
1454 fn test_parse_xml_lang_sets_document_lang() {
1455 let xml = r#"<?xml version="1.0"?>
1456<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format" xml:lang="en">
1457 <fo:layout-master-set>
1458 <fo:simple-page-master master-name="A4">
1459 <fo:region-body/>
1460 </fo:simple-page-master>
1461 </fo:layout-master-set>
1462 <fo:page-sequence master-reference="A4">
1463 <fo:flow flow-name="xsl-region-body">
1464 <fo:block>English text</fo:block>
1465 </fo:flow>
1466 </fo:page-sequence>
1467</fo:root>"#;
1468 let cursor = Cursor::new(xml);
1469 let arena = FoTreeBuilder::new()
1470 .parse(cursor)
1471 .expect("test: should succeed");
1472 assert_eq!(arena.document_lang, Some("en".to_string()));
1473 }
1474
1475 #[test]
1476 fn test_parse_document_without_lang_has_none() {
1477 let xml = make_minimal_fo("<fo:block>Text</fo:block>");
1478 let cursor = Cursor::new(xml);
1479 let arena = FoTreeBuilder::new()
1480 .parse(cursor)
1481 .expect("test: should succeed");
1482 assert!(arena.document_lang.is_none());
1483 }
1484
1485 #[test]
1486 fn test_parse_cdata_in_block() {
1487 let xml = make_minimal_fo(r#"<fo:block><![CDATA[<special> & content]]></fo:block>"#);
1488 let cursor = Cursor::new(xml);
1489 let result = FoTreeBuilder::new().parse(cursor);
1490 assert!(result.is_ok(), "CDATA in block: {:?}", result.err());
1491 }
1492
1493 #[test]
1494 fn test_xmp_packet_captured_from_declarations() {
1495 let xml = r##"<?xml version="1.0" encoding="utf-8"?>
1496<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1497 <fo:layout-master-set>
1498 <fo:simple-page-master master-name="A4" page-width="210mm" page-height="297mm">
1499 <fo:region-body margin="2cm"/>
1500 </fo:simple-page-master>
1501 </fo:layout-master-set>
1502 <fo:declarations>
1503 <x:xmpmeta xmlns:x="adobe:ns:meta/">
1504 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
1505 <rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/" rdf:about="">
1506 <dc:title>
1507 <rdf:Alt><rdf:li xml:lang="x-default">Test Invoice</rdf:li></rdf:Alt>
1508 </dc:title>
1509 </rdf:Description>
1510 </rdf:RDF>
1511 </x:xmpmeta>
1512 </fo:declarations>
1513 <fo:page-sequence master-reference="A4">
1514 <fo:flow flow-name="xsl-region-body">
1515 <fo:block>Hello.</fo:block>
1516 </fo:flow>
1517 </fo:page-sequence>
1518</fo:root>"##;
1519
1520 let cursor = Cursor::new(xml);
1521 let arena = FoTreeBuilder::new()
1522 .parse(cursor)
1523 .expect("FO with fo:declarations + XMP metadata should parse successfully");
1524
1525 assert_eq!(
1527 arena.xmp_packets.len(),
1528 1,
1529 "Should have exactly one XMP packet captured from fo:declarations"
1530 );
1531
1532 let packet = &arena.xmp_packets[0];
1533 assert!(
1534 packet.contains("xmpmeta"),
1535 "XMP packet should contain the xmpmeta element: {}",
1536 packet
1537 );
1538 assert!(
1539 packet.contains("Test Invoice"),
1540 "XMP packet should contain the dc:title value: {}",
1541 packet
1542 );
1543
1544 let page_seq_count = arena
1546 .iter()
1547 .filter(|(_, n)| matches!(n.data, FoNodeData::PageSequence { .. }))
1548 .count();
1549 assert_eq!(
1550 page_seq_count, 1,
1551 "Document should have exactly one page-sequence"
1552 );
1553 }
1554
1555 fn make_fo_with_declarations(declarations_content: &str) -> String {
1558 format!(
1559 r#"<?xml version="1.0"?>
1560<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"
1561 xmlns:x="adobe:ns:meta/"
1562 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
1563 xmlns:dc="http://purl.org/dc/elements/1.1/">
1564 <fo:layout-master-set>
1565 <fo:simple-page-master master-name="A4" page-height="297mm" page-width="210mm">
1566 <fo:region-body/>
1567 </fo:simple-page-master>
1568 </fo:layout-master-set>
1569 <fo:declarations>
1570 {}
1571 </fo:declarations>
1572 <fo:page-sequence master-reference="A4">
1573 <fo:flow flow-name="xsl-region-body">
1574 <fo:block>Hello.</fo:block>
1575 </fo:flow>
1576 </fo:page-sequence>
1577</fo:root>"#,
1578 declarations_content
1579 )
1580 }
1581
1582 #[test]
1583 fn test_xmp_namespace_inheritance_captures_inherited_xmlns() {
1584 let fo = make_fo_with_declarations(
1586 r#"<x:xmpmeta>
1587 <rdf:RDF>
1588 <rdf:Description rdf:about="">
1589 <dc:title>
1590 <rdf:Alt><rdf:li xml:lang="x-default">Test Invoice</rdf:li></rdf:Alt>
1591 </dc:title>
1592 </rdf:Description>
1593 </rdf:RDF>
1594 </x:xmpmeta>"#,
1595 );
1596
1597 let cursor = Cursor::new(fo);
1598 let arena = FoTreeBuilder::new()
1599 .parse(cursor)
1600 .expect("FO with inherited xmlns should parse");
1601
1602 assert_eq!(arena.xmp_packets.len(), 1, "should have one XMP packet");
1603 let packet = &arena.xmp_packets[0];
1604
1605 assert!(packet.contains("xmlns:x="), "missing xmlns:x in: {packet}");
1607 assert!(
1608 packet.contains("xmlns:rdf="),
1609 "missing xmlns:rdf in: {packet}"
1610 );
1611 assert!(
1612 packet.contains("xmlns:dc="),
1613 "missing xmlns:dc in: {packet}"
1614 );
1615
1616 assert_eq!(
1618 packet.matches("xmlns:x=").count(),
1619 1,
1620 "xmlns:x duplicated in: {packet}"
1621 );
1622 assert_eq!(
1623 packet.matches("xmlns:rdf=").count(),
1624 1,
1625 "xmlns:rdf duplicated in: {packet}"
1626 );
1627 assert_eq!(
1628 packet.matches("xmlns:dc=").count(),
1629 1,
1630 "xmlns:dc duplicated in: {packet}"
1631 );
1632 }
1633
1634 #[test]
1635 fn test_xmp_well_formed_via_ns_reader() {
1636 let fo = make_fo_with_declarations(
1639 r#"<x:xmpmeta>
1640 <rdf:RDF>
1641 <rdf:Description rdf:about="">
1642 <dc:title><rdf:Alt><rdf:li xml:lang="x-default">Invoice</rdf:li></rdf:Alt></dc:title>
1643 </rdf:Description>
1644 </rdf:RDF>
1645 </x:xmpmeta>"#,
1646 );
1647
1648 let cursor = Cursor::new(fo);
1649 let arena = FoTreeBuilder::new()
1650 .parse(cursor)
1651 .expect("FO with inherited xmlns should parse");
1652
1653 let packet = &arena.xmp_packets[0];
1654
1655 use quick_xml::name::ResolveResult;
1656 use quick_xml::NsReader;
1657 let mut ns_reader = NsReader::from_str(packet);
1658 ns_reader.config_mut().trim_text(true);
1659 let mut buf = Vec::new();
1660 loop {
1661 match ns_reader.read_resolved_event_into(&mut buf) {
1662 Ok((ResolveResult::Unknown(prefix), _)) => {
1663 panic!(
1664 "undefined prefix in captured XMP packet: {:?}",
1665 std::str::from_utf8(&prefix)
1666 );
1667 }
1668 Ok((_, quick_xml::events::Event::Eof)) => break,
1669 Ok(_) => {}
1670 Err(e) => panic!("parse error in captured XMP packet: {e}"),
1671 }
1672 buf.clear();
1673 }
1674 }
1675
1676 #[test]
1677 fn test_xmp_capture_round_trips_cdata() {
1678 let fo = make_fo_with_declarations(
1680 r#"<x:xmpmeta xmlns:x="adobe:ns:meta/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
1681 <rdf:RDF><![CDATA[<not-an-element/>]]></rdf:RDF>
1682 </x:xmpmeta>"#,
1683 );
1684
1685 let cursor = Cursor::new(fo);
1686 let arena = FoTreeBuilder::new()
1687 .parse(cursor)
1688 .expect("FO with CDATA in XMP should parse");
1689
1690 assert_eq!(arena.xmp_packets.len(), 1);
1691 assert!(
1692 arena.xmp_packets[0].contains("<![CDATA[<not-an-element/>]]>"),
1693 "CDATA dropped: {}",
1694 arena.xmp_packets[0]
1695 );
1696 }
1697
1698 #[test]
1699 fn test_xmp_capture_round_trips_comment() {
1700 let fo = make_fo_with_declarations(
1701 r#"<x:xmpmeta xmlns:x="adobe:ns:meta/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
1702 <!-- intentional comment -->
1703 <rdf:RDF/>
1704 </x:xmpmeta>"#,
1705 );
1706
1707 let cursor = Cursor::new(fo);
1708 let arena = FoTreeBuilder::new()
1709 .parse(cursor)
1710 .expect("FO with comment in XMP should parse");
1711
1712 assert_eq!(arena.xmp_packets.len(), 1);
1713 let packet = &arena.xmp_packets[0];
1714 assert!(
1716 packet.contains("<!-- intentional comment -->")
1717 || packet.contains("<!--intentional comment-->")
1718 || packet.contains("<!-- intentional comment-->")
1719 || packet.contains("<!--intentional comment -->"),
1720 "comment dropped: {packet}"
1721 );
1722 }
1723
1724 #[test]
1725 fn test_xmp_no_injection_when_all_declared_locally() {
1726 let fo = r##"<?xml version="1.0" encoding="utf-8"?>
1729<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1730 <fo:layout-master-set>
1731 <fo:simple-page-master master-name="A4" page-width="210mm" page-height="297mm">
1732 <fo:region-body margin="2cm"/>
1733 </fo:simple-page-master>
1734 </fo:layout-master-set>
1735 <fo:declarations>
1736 <x:xmpmeta xmlns:x="adobe:ns:meta/">
1737 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
1738 <rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/" rdf:about="">
1739 <dc:title>Local Decl Test</dc:title>
1740 </rdf:Description>
1741 </rdf:RDF>
1742 </x:xmpmeta>
1743 </fo:declarations>
1744 <fo:page-sequence master-reference="A4">
1745 <fo:flow flow-name="xsl-region-body">
1746 <fo:block>Hello.</fo:block>
1747 </fo:flow>
1748 </fo:page-sequence>
1749</fo:root>"##;
1750
1751 let cursor = Cursor::new(fo);
1752 let arena = FoTreeBuilder::new()
1753 .parse(cursor)
1754 .expect("locally-declared prefixes should parse");
1755
1756 assert_eq!(arena.xmp_packets.len(), 1);
1757 let packet = &arena.xmp_packets[0];
1758 assert_eq!(
1760 packet.matches("xmlns:x=").count(),
1761 1,
1762 "xmlns:x must appear exactly once (no double-injection): {packet}"
1763 );
1764 assert!(
1765 packet.contains("Local Decl Test"),
1766 "content preserved: {packet}"
1767 );
1768 }
1769}