1use crate::xml::Namespace;
4use fop_types::{FopError, Location, Result};
5use quick_xml::events::{BytesStart, Event};
6use quick_xml::Reader;
7use std::collections::HashMap;
8use std::io::BufRead;
9
10#[derive(Debug, Clone)]
12pub struct EntityResolver {
13 entities: HashMap<String, String>,
14}
15
16impl EntityResolver {
17 pub fn new() -> Self {
19 let mut entities = HashMap::new();
20
21 entities.insert("amp".to_string(), "&".to_string());
23 entities.insert("lt".to_string(), "<".to_string());
24 entities.insert("gt".to_string(), ">".to_string());
25 entities.insert("quot".to_string(), "\"".to_string());
26 entities.insert("apos".to_string(), "'".to_string());
27
28 Self { entities }
29 }
30
31 pub fn add_entity(&mut self, name: String, value: String) {
33 self.entities.insert(name, value);
34 }
35
36 pub fn resolve(&self, entity: &str, location: Location) -> Result<String> {
38 if let Some(hex_str) = entity
40 .strip_prefix("#x")
41 .or_else(|| entity.strip_prefix("#X"))
42 {
43 if let Ok(code) = u32::from_str_radix(hex_str, 16) {
45 if let Some(ch) = char::from_u32(code) {
46 return Ok(ch.to_string());
47 }
48 }
49 return Err(FopError::EntityError {
50 message: format!("Invalid hexadecimal character reference: {}", entity),
51 location,
52 });
53 } else if let Some(dec_str) = entity.strip_prefix('#') {
54 if let Ok(code) = dec_str.parse::<u32>() {
56 if let Some(ch) = char::from_u32(code) {
57 return Ok(ch.to_string());
58 }
59 }
60 return Err(FopError::EntityError {
61 message: format!("Invalid decimal character reference: {}", entity),
62 location,
63 });
64 }
65
66 self.entities
68 .get(entity)
69 .cloned()
70 .ok_or_else(|| FopError::EntityError {
71 message: format!("Unknown entity: &{};", entity),
72 location,
73 })
74 }
75
76 pub fn resolve_entities(&self, text: &str, location: Location) -> Result<String> {
78 let mut result = String::new();
79 let mut chars = text.chars().peekable();
80
81 while let Some(ch) = chars.next() {
82 if ch == '&' {
83 let mut entity_name = String::new();
85 let mut found_semicolon = false;
86
87 while let Some(&next_ch) = chars.peek() {
88 if next_ch == ';' {
89 chars.next(); found_semicolon = true;
91 break;
92 }
93 entity_name.push(next_ch);
94 chars.next();
95 }
96
97 if !found_semicolon {
98 return Err(FopError::EntityError {
99 message: format!("Unterminated entity reference: &{}", entity_name),
100 location,
101 });
102 }
103
104 let resolved = self.resolve(&entity_name, location)?;
105 result.push_str(&resolved);
106 } else {
107 result.push(ch);
108 }
109 }
110
111 Ok(result)
112 }
113}
114
115impl Default for EntityResolver {
116 fn default() -> Self {
117 Self::new()
118 }
119}
120
121#[derive(Debug, Clone, PartialEq)]
123pub struct ProcessingInstruction {
124 pub target: String,
125 pub data: Option<String>,
126}
127
128impl ProcessingInstruction {
129 pub fn new(target: String, data: Option<String>) -> Self {
130 Self { target, data }
131 }
132}
133
134pub struct XmlParser<R: BufRead> {
136 reader: Reader<R>,
137 buf: Vec<u8>,
138 namespace_map: HashMap<String, String>,
140 entity_resolver: EntityResolver,
142 processing_instructions: Vec<ProcessingInstruction>,
144}
145
146impl<R: BufRead> XmlParser<R> {
147 pub fn new(reader: R) -> Self {
149 let mut xml_reader = Reader::from_reader(reader);
150 xml_reader.config_mut().trim_text(true);
151 xml_reader.config_mut().expand_empty_elements = true;
152
153 Self {
154 reader: xml_reader,
155 buf: Vec::new(),
156 namespace_map: HashMap::new(),
157 entity_resolver: EntityResolver::new(),
158 processing_instructions: Vec::new(),
159 }
160 }
161
162 pub fn reader(&self) -> &Reader<R> {
164 &self.reader
165 }
166
167 pub fn reader_mut(&mut self) -> &mut Reader<R> {
169 &mut self.reader
170 }
171
172 pub fn entity_resolver(&self) -> &EntityResolver {
174 &self.entity_resolver
175 }
176
177 pub fn entity_resolver_mut(&mut self) -> &mut EntityResolver {
179 &mut self.entity_resolver
180 }
181
182 pub fn processing_instructions(&self) -> &[ProcessingInstruction] {
184 &self.processing_instructions
185 }
186
187 pub fn location(&self) -> Location {
189 let pos = self.reader.buffer_position();
190 Location::new(pos as usize, 0)
192 }
193
194 pub fn read_event(&mut self) -> Result<Event<'static>> {
196 self.buf.clear();
197 let event = self
198 .reader
199 .read_event_into(&mut self.buf)
200 .map(|e| e.into_owned())
201 .map_err(|e| {
202 let location = self.location();
203 FopError::XmlErrorWithLocation {
204 message: format!("XML parsing error: {}", e),
205 location,
206 suggestion: None,
207 }
208 })?;
209
210 if let Event::PI(ref pi) = event {
212 if let Ok(target) = std::str::from_utf8(pi.as_ref()) {
213 let parts: Vec<&str> = target.splitn(2, ' ').collect();
215 let pi_target = parts[0].to_string();
216 let pi_data = parts.get(1).map(|s| s.to_string());
217
218 self.processing_instructions
219 .push(ProcessingInstruction::new(pi_target, pi_data));
220 }
221 }
222
223 Ok(event)
224 }
225
226 pub fn update_namespaces(&mut self, start: &BytesStart) {
228 for attr in start.attributes().flatten() {
229 if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) {
230 if key.starts_with("xmlns") && key != "xmlns" {
231 if let Some(prefix) = key.strip_prefix("xmlns:") {
232 if let Ok(value) = attr.decode_and_unescape_value(self.reader.decoder()) {
233 self.namespace_map
235 .insert(prefix.to_string(), value.to_string());
236 }
237 }
238 } else if key == "xmlns" {
239 if let Ok(value) = attr.decode_and_unescape_value(self.reader.decoder()) {
240 self.namespace_map.insert(String::new(), value.to_string());
242 }
243 }
244 }
245 }
246 }
247
248 pub fn extract_name(&self, start: &BytesStart) -> Result<(String, Namespace)> {
250 let location = self.location();
251 let name = start.name();
252
253 let (ns_prefix, local_name) = if let Some(pos) =
255 name.as_ref().iter().position(|&b| b == b':')
256 {
257 let prefix = std::str::from_utf8(&name.as_ref()[..pos]).map_err(|e| {
258 FopError::XmlErrorWithLocation {
259 message: format!("Invalid UTF-8 in prefix: {}", e),
260 location,
261 suggestion: None,
262 }
263 })?;
264 let local = std::str::from_utf8(&name.as_ref()[pos + 1..]).map_err(|e| {
265 FopError::XmlErrorWithLocation {
266 message: format!("Invalid UTF-8 in local name: {}", e),
267 location,
268 suggestion: None,
269 }
270 })?;
271 (Some(prefix.to_string()), local)
272 } else {
273 let local =
274 std::str::from_utf8(name.as_ref()).map_err(|e| FopError::XmlErrorWithLocation {
275 message: format!("Invalid UTF-8 in element name: {}", e),
276 location,
277 suggestion: None,
278 })?;
279 (None, local)
280 };
281
282 let ns_uri = if let Some(ref prefix) = ns_prefix {
284 self.namespace_map.get(prefix).cloned().unwrap_or_default()
285 } else {
286 self.namespace_map.get("").cloned().unwrap_or_default()
287 };
288
289 let namespace = Namespace::from_uri(&ns_uri);
290
291 Ok((local_name.to_string(), namespace))
292 }
293
294 pub fn extract_attributes(&self, start: &BytesStart) -> Result<Vec<(String, String)>> {
296 let location = self.location();
297 let mut attrs = Vec::new();
298
299 for attr_result in start.attributes() {
300 let attr = attr_result.map_err(|e| FopError::XmlErrorWithLocation {
301 message: format!("Attribute parsing error: {}", e),
302 location,
303 suggestion: None,
304 })?;
305
306 let key = std::str::from_utf8(attr.key.as_ref())
307 .map_err(|e| FopError::XmlErrorWithLocation {
308 message: format!("Invalid UTF-8 in attribute name: {}", e),
309 location,
310 suggestion: None,
311 })?
312 .to_string();
313
314 if key.starts_with("xmlns") {
316 continue;
317 }
318
319 let value = attr
321 .decode_and_unescape_value(self.reader.decoder())
322 .map_err(|e| FopError::XmlErrorWithLocation {
323 message: format!("Attribute value decode error: {}", e),
324 location,
325 suggestion: None,
326 })?
327 .to_string();
328
329 attrs.push((key, value));
330 }
331
332 Ok(attrs)
333 }
334
335 pub fn extract_text(&self, text: &[u8]) -> Result<String> {
337 let location = self.location();
338 let text_str = std::str::from_utf8(text).map_err(|e| FopError::XmlErrorWithLocation {
339 message: format!("Invalid UTF-8 in text: {}", e),
340 location,
341 suggestion: None,
342 })?;
343
344 self.entity_resolver.resolve_entities(text_str, location)
346 }
347
348 pub fn extract_cdata(&self, cdata: &[u8]) -> Result<String> {
350 let location = self.location();
351 std::str::from_utf8(cdata)
352 .map(|s| s.to_string())
353 .map_err(|e| FopError::XmlErrorWithLocation {
354 message: format!("Invalid UTF-8 in CDATA: {}", e),
355 location,
356 suggestion: None,
357 })
358 }
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364 use std::io::Cursor;
365
366 #[test]
367 fn test_parse_simple_fo() {
368 let xml = r#"<?xml version="1.0"?>
369<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
370 <fo:layout-master-set>
371 <fo:simple-page-master master-name="A4">
372 </fo:simple-page-master>
373 </fo:layout-master-set>
374</fo:root>"#;
375
376 let cursor = Cursor::new(xml);
377 let mut parser = XmlParser::new(cursor);
378
379 let mut found_root = false;
380 let mut found_layout_master_set = false;
381
382 loop {
383 let event = parser.read_event();
384 match event {
385 Ok(Event::Start(ref start)) | Ok(Event::Empty(ref start)) => {
386 parser.update_namespaces(start);
387 let (name, ns) = parser.extract_name(start).expect("test: should succeed");
388
389 if name == "root" && ns.is_fo() {
390 found_root = true;
391 }
392 if name == "layout-master-set" && ns.is_fo() {
393 found_layout_master_set = true;
394 }
395 }
396 Ok(Event::Eof) => break,
397 Err(e) => panic!("Parse error: {}", e),
398 _ => {}
399 }
400 }
401
402 assert!(found_root);
403 assert!(found_layout_master_set);
404 }
405
406 #[test]
407 fn test_extract_attributes() {
408 let xml = r#"<?xml version="1.0"?>
409<fo:simple-page-master xmlns:fo="http://www.w3.org/1999/XSL/Format"
410 master-name="A4"
411 page-width="210mm"
412 page-height="297mm">
413</fo:simple-page-master>"#;
414
415 let cursor = Cursor::new(xml);
416 let mut parser = XmlParser::new(cursor);
417
418 loop {
419 let event = parser.read_event();
420 match event {
421 Ok(Event::Start(ref start)) | Ok(Event::Empty(ref start)) => {
422 parser.update_namespaces(start);
423 let attrs = parser
424 .extract_attributes(start)
425 .expect("test: should succeed");
426
427 let master_name = attrs
429 .iter()
430 .find(|(k, _)| k == "master-name")
431 .map(|(_, v)| v.as_str());
432
433 assert_eq!(master_name, Some("A4"));
434
435 let page_width = attrs
436 .iter()
437 .find(|(k, _)| k == "page-width")
438 .map(|(_, v)| v.as_str());
439
440 assert_eq!(page_width, Some("210mm"));
441
442 break;
443 }
444 Ok(Event::Eof) => break,
445 Err(e) => panic!("Parse error: {}", e),
446 _ => {}
447 }
448 }
449 }
450
451 #[test]
452 fn test_cdata_section() {
453 let xml = r#"<?xml version="1.0"?>
454<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format">
455 <![CDATA[<tag> & "quotes"]]>
456</fo:block>"#;
457
458 let cursor = Cursor::new(xml);
459 let mut parser = XmlParser::new(cursor);
460
461 let mut found_cdata = false;
462 let mut cdata_content = String::new();
463
464 loop {
465 match parser.read_event() {
466 Ok(Event::CData(ref cdata)) => {
467 found_cdata = true;
468 cdata_content = parser.extract_cdata(cdata).expect("test: should succeed");
469 }
470 Ok(Event::Eof) => break,
471 Ok(_) => {}
472 Err(e) => panic!("Parse error: {}", e),
473 }
474 }
475
476 assert!(found_cdata);
477 assert_eq!(cdata_content, r#"<tag> & "quotes""#);
478 }
479
480 #[test]
481 fn test_entity_resolution_builtin() {
482 let resolver = EntityResolver::new();
483 let location = Location::new(1, 1);
484
485 assert_eq!(
486 resolver
487 .resolve("amp", location)
488 .expect("test: should succeed"),
489 "&"
490 );
491 assert_eq!(
492 resolver
493 .resolve("lt", location)
494 .expect("test: should succeed"),
495 "<"
496 );
497 assert_eq!(
498 resolver
499 .resolve("gt", location)
500 .expect("test: should succeed"),
501 ">"
502 );
503 assert_eq!(
504 resolver
505 .resolve("quot", location)
506 .expect("test: should succeed"),
507 "\""
508 );
509 assert_eq!(
510 resolver
511 .resolve("apos", location)
512 .expect("test: should succeed"),
513 "'"
514 );
515 }
516
517 #[test]
518 fn test_entity_resolution_numeric_decimal() {
519 let resolver = EntityResolver::new();
520 let location = Location::new(1, 1);
521
522 assert_eq!(
524 resolver
525 .resolve("#65", location)
526 .expect("test: should succeed"),
527 "A"
528 );
529 assert_eq!(
531 resolver
532 .resolve("#36", location)
533 .expect("test: should succeed"),
534 "$"
535 );
536 }
537
538 #[test]
539 fn test_entity_resolution_numeric_hex() {
540 let resolver = EntityResolver::new();
541 let location = Location::new(1, 1);
542
543 assert_eq!(
545 resolver
546 .resolve("#x41", location)
547 .expect("test: should succeed"),
548 "A"
549 );
550 assert_eq!(
551 resolver
552 .resolve("#X41", location)
553 .expect("test: should succeed"),
554 "A"
555 );
556 assert_eq!(
558 resolver
559 .resolve("#xA9", location)
560 .expect("test: should succeed"),
561 "©"
562 );
563 }
564
565 #[test]
566 fn test_entity_resolution_custom() {
567 let mut resolver = EntityResolver::new();
568 resolver.add_entity("copy".to_string(), "©".to_string());
569
570 let location = Location::new(1, 1);
571 assert_eq!(
572 resolver
573 .resolve("copy", location)
574 .expect("test: should succeed"),
575 "©"
576 );
577 }
578
579 #[test]
580 fn test_entity_resolution_in_text() {
581 let resolver = EntityResolver::new();
582 let location = Location::new(1, 1);
583
584 let text = "Price: $100 & up";
585 let resolved = resolver
586 .resolve_entities(text, location)
587 .expect("test: should succeed");
588 assert_eq!(resolved, "Price: $100 & up");
589 }
590
591 #[test]
592 fn test_entity_resolution_unknown() {
593 let resolver = EntityResolver::new();
594 let location = Location::new(1, 1);
595
596 let result = resolver.resolve("unknown", location);
597 assert!(result.is_err());
598 }
599
600 #[test]
601 fn test_processing_instruction() {
602 let xml = r#"<?xml version="1.0"?>
603<?xml-stylesheet type="text/xsl" href="style.xsl"?>
604<?fop-renderer backend="pdf"?>
605<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
606</fo:root>"#;
607
608 let cursor = Cursor::new(xml);
609 let mut parser = XmlParser::new(cursor);
610
611 loop {
612 match parser.read_event() {
613 Ok(Event::Eof) => break,
614 Ok(_) => {}
615 Err(e) => panic!("Parse error: {}", e),
616 }
617 }
618
619 let pis = parser.processing_instructions();
620 assert_eq!(pis.len(), 2);
621
622 assert_eq!(pis[0].target, "xml-stylesheet");
623 assert!(pis[0].data.is_some());
624
625 assert_eq!(pis[1].target, "fop-renderer");
626 assert!(pis[1].data.is_some());
627 }
628
629 #[test]
630 fn test_entities_in_attributes() {
631 let xml = r#"<?xml version="1.0"?>
632<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format" title="Test & More">
633</fo:block>"#;
634
635 let cursor = Cursor::new(xml);
636 let mut parser = XmlParser::new(cursor);
637
638 loop {
639 match parser.read_event() {
640 Ok(Event::Start(ref start)) | Ok(Event::Empty(ref start)) => {
641 parser.update_namespaces(start);
642 let attrs = parser
643 .extract_attributes(start)
644 .expect("test: should succeed");
645
646 let title = attrs
647 .iter()
648 .find(|(k, _)| k == "title")
649 .map(|(_, v)| v.as_str());
650
651 assert_eq!(title, Some("Test & More"));
652 break;
653 }
654 Ok(Event::Eof) => break,
655 Ok(_) => {}
656 Err(e) => panic!("Parse error: {}", e),
657 }
658 }
659 }
660
661 #[test]
662 fn test_cdata_preserves_content() {
663 let xml = r#"<?xml version="1.0"?>
664<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format">
665 <![CDATA[Code with <tags> & "special" & chars]]>
666</fo:block>"#;
667
668 let cursor = Cursor::new(xml);
669 let mut parser = XmlParser::new(cursor);
670
671 let mut cdata_content = String::new();
672
673 loop {
674 match parser.read_event() {
675 Ok(Event::CData(ref cdata)) => {
676 cdata_content = parser.extract_cdata(cdata).expect("test: should succeed");
677 }
678 Ok(Event::Eof) => break,
679 Ok(_) => {}
680 Err(e) => panic!("Parse error: {}", e),
681 }
682 }
683
684 assert_eq!(cdata_content, r#"Code with <tags> & "special" & chars"#);
686 }
687
688 #[test]
689 fn test_multiple_entities() {
690 let resolver = EntityResolver::new();
691 let location = Location::new(1, 1);
692
693 let text = "<tag> & "text"";
694 let resolved = resolver
695 .resolve_entities(text, location)
696 .expect("test: should succeed");
697 assert_eq!(resolved, r#"<tag> & "text""#);
698 }
699
700 #[test]
701 fn test_unterminated_entity() {
702 let resolver = EntityResolver::new();
703 let location = Location::new(1, 1);
704
705 let text = "& no semicolon";
706 let result = resolver.resolve_entities(text, location);
707 assert!(result.is_err());
708 }
709
710 #[test]
711 fn test_location_tracking() {
712 let xml = r#"<?xml version="1.0"?>
713<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
714</fo:root>"#;
715
716 let cursor = Cursor::new(xml);
717 let parser = XmlParser::new(cursor);
718
719 let _location = parser.location();
721 }
722
723 #[test]
724 fn test_error_with_location() {
725 let xml = r#"<?xml version="1.0"?>
726<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
727 <unclosed-tag>
728</fo:root>"#;
729
730 let cursor = Cursor::new(xml);
731 let mut parser = XmlParser::new(cursor);
732
733 let mut error_found = false;
734
735 loop {
736 match parser.read_event() {
737 Ok(Event::Eof) => break,
738 Ok(_) => {}
739 Err(e) => {
740 error_found = true;
741 let error_str = format!("{}", e);
743 assert!(error_str.contains("line") || error_str.contains("XML parsing error"));
744 break;
745 }
746 }
747 }
748
749 assert!(error_found);
750 }
751}
752
753#[cfg(test)]
756mod additional_tests {
757 use super::*;
758 use std::io::Cursor;
759
760 #[test]
763 fn test_entity_resolver_apos() {
764 let resolver = EntityResolver::new();
765 let location = Location::new(1, 1);
766 assert_eq!(
767 resolver
768 .resolve("apos", location)
769 .expect("test: should succeed"),
770 "'"
771 );
772 }
773
774 #[test]
775 fn test_entity_resolver_quot() {
776 let resolver = EntityResolver::new();
777 let location = Location::new(1, 1);
778 assert_eq!(
779 resolver
780 .resolve("quot", location)
781 .expect("test: should succeed"),
782 "\""
783 );
784 }
785
786 #[test]
787 fn test_entity_resolver_gt() {
788 let resolver = EntityResolver::new();
789 let location = Location::new(1, 1);
790 assert_eq!(
791 resolver
792 .resolve("gt", location)
793 .expect("test: should succeed"),
794 ">"
795 );
796 }
797
798 #[test]
799 fn test_entity_resolver_empty_text() {
800 let resolver = EntityResolver::new();
801 let location = Location::new(1, 1);
802 let result = resolver
803 .resolve_entities("", location)
804 .expect("test: should succeed");
805 assert_eq!(result, "");
806 }
807
808 #[test]
809 fn test_entity_resolver_text_without_entities() {
810 let resolver = EntityResolver::new();
811 let location = Location::new(1, 1);
812 let result = resolver
813 .resolve_entities("hello world", location)
814 .expect("test: should succeed");
815 assert_eq!(result, "hello world");
816 }
817
818 #[test]
819 fn test_entity_resolver_only_entity() {
820 let resolver = EntityResolver::new();
821 let location = Location::new(1, 1);
822 let result = resolver
823 .resolve_entities("&", location)
824 .expect("test: should succeed");
825 assert_eq!(result, "&");
826 }
827
828 #[test]
829 fn test_entity_resolver_hex_zero() {
830 let resolver = EntityResolver::new();
832 let location = Location::new(1, 1);
833 let result = resolver
834 .resolve("#x0041", location)
835 .expect("test: should succeed");
836 assert_eq!(result, "A");
837 }
838
839 #[test]
840 fn test_entity_resolver_decimal_newline() {
841 let resolver = EntityResolver::new();
843 let location = Location::new(1, 1);
844 let result = resolver
845 .resolve("#10", location)
846 .expect("test: should succeed");
847 assert_eq!(result, "\n");
848 }
849
850 #[test]
851 fn test_entity_resolver_decimal_tab() {
852 let resolver = EntityResolver::new();
854 let location = Location::new(1, 1);
855 let result = resolver
856 .resolve("#9", location)
857 .expect("test: should succeed");
858 assert_eq!(result, "\t");
859 }
860
861 #[test]
862 fn test_entity_resolver_unicode_multibyte() {
863 let resolver = EntityResolver::new();
865 let location = Location::new(1, 1);
866 let result = resolver
867 .resolve("#x4e2d", location)
868 .expect("test: should succeed");
869 assert_eq!(result, "中");
870 }
871
872 #[test]
873 fn test_entity_resolver_add_multiple_custom() {
874 let mut resolver = EntityResolver::new();
875 resolver.add_entity("euro".to_string(), "€".to_string());
876 resolver.add_entity("yen".to_string(), "¥".to_string());
877 resolver.add_entity("pound".to_string(), "£".to_string());
878
879 let location = Location::new(1, 1);
880 assert_eq!(
881 resolver
882 .resolve("euro", location)
883 .expect("test: should succeed"),
884 "€"
885 );
886 assert_eq!(
887 resolver
888 .resolve("yen", location)
889 .expect("test: should succeed"),
890 "¥"
891 );
892 assert_eq!(
893 resolver
894 .resolve("pound", location)
895 .expect("test: should succeed"),
896 "£"
897 );
898 }
899
900 #[test]
901 fn test_entity_resolver_override_custom() {
902 let mut resolver = EntityResolver::new();
903 resolver.add_entity("amp".to_string(), "AMPERSAND".to_string());
905
906 let location = Location::new(1, 1);
907 assert_eq!(
908 resolver
909 .resolve("amp", location)
910 .expect("test: should succeed"),
911 "AMPERSAND"
912 );
913 }
914
915 #[test]
916 fn test_entity_resolver_resolve_entities_multiple() {
917 let resolver = EntityResolver::new();
918 let location = Location::new(1, 1);
919 let text = "<>&"'";
920 let result = resolver
921 .resolve_entities(text, location)
922 .expect("test: should succeed");
923 assert_eq!(result, "<>&\"'");
924 }
925
926 #[test]
927 fn test_entity_resolver_numeric_in_text() {
928 let resolver = EntityResolver::new();
929 let location = Location::new(1, 1);
930 let text = "AABBC";
931 let result = resolver
932 .resolve_entities(text, location)
933 .expect("test: should succeed");
934 assert_eq!(result, "AABBC");
935 }
936
937 #[test]
938 fn test_entity_resolver_hex_uppercase() {
939 let resolver = EntityResolver::new();
941 let location = Location::new(1, 1);
942 let result = resolver
943 .resolve("#X41", location)
944 .expect("test: should succeed");
945 assert_eq!(result, "A");
946 }
947
948 #[test]
951 fn test_processing_instruction_new() {
952 let pi = ProcessingInstruction::new("target".to_string(), Some("data".to_string()));
953 assert_eq!(pi.target, "target");
954 assert_eq!(pi.data, Some("data".to_string()));
955 }
956
957 #[test]
958 fn test_processing_instruction_no_data() {
959 let pi = ProcessingInstruction::new("target".to_string(), None);
960 assert_eq!(pi.target, "target");
961 assert!(pi.data.is_none());
962 }
963
964 #[test]
965 fn test_processing_instruction_equality() {
966 let pi1 = ProcessingInstruction::new("foo".to_string(), Some("bar".to_string()));
967 let pi2 = ProcessingInstruction::new("foo".to_string(), Some("bar".to_string()));
968 assert_eq!(pi1, pi2);
969 }
970
971 #[test]
972 fn test_processing_instruction_inequality() {
973 let pi1 = ProcessingInstruction::new("foo".to_string(), Some("bar".to_string()));
974 let pi2 = ProcessingInstruction::new("baz".to_string(), Some("bar".to_string()));
975 assert_ne!(pi1, pi2);
976 }
977
978 #[test]
981 fn test_nested_namespace_declarations() {
982 let xml = r#"<?xml version="1.0"?>
983<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"
984 xmlns:svg="http://www.w3.org/2000/svg">
985 <fo:layout-master-set></fo:layout-master-set>
986</fo:root>"#;
987
988 let cursor = Cursor::new(xml);
989 let mut parser = XmlParser::new(cursor);
990
991 let mut found_root = false;
992 loop {
993 let event = parser.read_event();
994 match event {
995 Ok(Event::Start(ref start)) | Ok(Event::Empty(ref start)) => {
996 parser.update_namespaces(start);
997 let result = parser.extract_name(start);
998 if let Ok((name, ns)) = result {
999 if name == "root" && ns.is_fo() {
1000 found_root = true;
1001 }
1002 }
1003 }
1004 Ok(Event::Eof) => break,
1005 Err(e) => panic!("Parse error: {}", e),
1006 _ => {}
1007 }
1008 }
1009 assert!(found_root);
1010 }
1011
1012 #[test]
1013 fn test_fox_extension_namespace() {
1014 let xml = r#"<?xml version="1.0"?>
1015<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"
1016 xmlns:fox="http://xmlgraphics.apache.org/fop/extensions">
1017 <fo:layout-master-set></fo:layout-master-set>
1018</fo:root>"#;
1019
1020 let cursor = Cursor::new(xml);
1021 let mut parser = XmlParser::new(cursor);
1022
1023 let mut found_root = false;
1024 loop {
1025 let event = parser.read_event();
1026 match event {
1027 Ok(Event::Start(ref start)) | Ok(Event::Empty(ref start)) => {
1028 parser.update_namespaces(start);
1029 if let Ok((name, ns)) = parser.extract_name(start) {
1030 if name == "root" && ns.is_fo() {
1031 found_root = true;
1032 }
1033 }
1034 }
1035 Ok(Event::Eof) => break,
1036 Err(e) => panic!("Parse error: {}", e),
1037 _ => {}
1038 }
1039 }
1040 assert!(found_root);
1041 }
1042
1043 #[test]
1046 fn test_empty_element_produces_start_end() {
1047 let xml = r#"<?xml version="1.0"?>
1049<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1050 <fo:layout-master-set>
1051 <fo:simple-page-master master-name="A4">
1052 <fo:region-body/>
1053 </fo:simple-page-master>
1054 </fo:layout-master-set>
1055</fo:root>"#;
1056
1057 let cursor = Cursor::new(xml);
1058 let mut parser = XmlParser::new(cursor);
1059
1060 let mut element_count = 0;
1061 loop {
1062 match parser.read_event() {
1063 Ok(Event::Start(ref start)) => {
1064 parser.update_namespaces(start);
1065 element_count += 1;
1066 }
1067 Ok(Event::Eof) => break,
1068 Ok(_) => {}
1069 Err(e) => panic!("Parse error: {}", e),
1070 }
1071 }
1072 assert!(element_count >= 4);
1074 }
1075
1076 #[test]
1077 fn test_multiple_attributes_preserved_order() {
1078 let xml = r#"<?xml version="1.0"?>
1079<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format"
1080 font-size="12pt"
1081 font-family="Arial"
1082 color="black"
1083 margin-top="10pt">text</fo:block>"#;
1084
1085 let cursor = Cursor::new(xml);
1086 let mut parser = XmlParser::new(cursor);
1087
1088 loop {
1089 match parser.read_event() {
1090 Ok(Event::Start(ref start)) => {
1091 parser.update_namespaces(start);
1092 let attrs = parser
1093 .extract_attributes(start)
1094 .expect("test: should succeed");
1095 assert_eq!(attrs.len(), 4);
1097 break;
1098 }
1099 Ok(Event::Eof) => break,
1100 Ok(_) => {}
1101 Err(e) => panic!("Parse error: {}", e),
1102 }
1103 }
1104 }
1105
1106 #[test]
1107 fn test_text_with_special_chars_in_cdata() {
1108 let xml = r#"<?xml version="1.0"?>
1109<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format"><![CDATA[a < b && c > d]]></fo:block>"#;
1110
1111 let cursor = Cursor::new(xml);
1112 let mut parser = XmlParser::new(cursor);
1113
1114 let mut cdata_text = String::new();
1115 loop {
1116 match parser.read_event() {
1117 Ok(Event::CData(ref cdata)) => {
1118 cdata_text = parser.extract_cdata(cdata).expect("test: should succeed");
1119 }
1120 Ok(Event::Eof) => break,
1121 Ok(_) => {}
1122 Err(e) => panic!("Parse error: {}", e),
1123 }
1124 }
1125 assert_eq!(cdata_text, "a < b && c > d");
1126 }
1127
1128 #[test]
1129 fn test_extract_cdata_preserves_angle_brackets() {
1130 let xml = r#"<?xml version="1.0"?>
1131<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format"><![CDATA[<tag attr="val"/>]]></fo:block>"#;
1132
1133 let cursor = Cursor::new(xml);
1134 let mut parser = XmlParser::new(cursor);
1135
1136 let mut cdata_text = String::new();
1137 loop {
1138 match parser.read_event() {
1139 Ok(Event::CData(ref cdata)) => {
1140 cdata_text = parser.extract_cdata(cdata).expect("test: should succeed");
1141 }
1142 Ok(Event::Eof) => break,
1143 Ok(_) => {}
1144 Err(e) => panic!("Parse error: {}", e),
1145 }
1146 }
1147 assert_eq!(cdata_text, r#"<tag attr="val"/>"#);
1148 }
1149
1150 #[test]
1151 fn test_comment_does_not_produce_text_event() {
1152 let xml = r#"<?xml version="1.0"?>
1153<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"><!-- this is a comment --></fo:root>"#;
1154
1155 let cursor = Cursor::new(xml);
1156 let mut parser = XmlParser::new(cursor);
1157
1158 let mut text_events = 0;
1159 loop {
1160 match parser.read_event() {
1161 Ok(Event::Text(_)) => {
1162 text_events += 1;
1163 }
1164 Ok(Event::Eof) => break,
1165 Ok(_) => {}
1166 Err(e) => panic!("Parse error: {}", e),
1167 }
1168 }
1169 assert_eq!(text_events, 0);
1171 }
1172
1173 #[test]
1174 fn test_multiple_processing_instructions() {
1175 let xml = r#"<?xml version="1.0"?>
1176<?stylesheet type="text/css"?>
1177<?renderer backend="pdf"?>
1178<?custom-pi data="value"?>
1179<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"></fo:root>"#;
1180
1181 let cursor = Cursor::new(xml);
1182 let mut parser = XmlParser::new(cursor);
1183
1184 loop {
1185 match parser.read_event() {
1186 Ok(Event::Eof) => break,
1187 Ok(_) => {}
1188 Err(e) => panic!("Parse error: {}", e),
1189 }
1190 }
1191
1192 let pis = parser.processing_instructions();
1193 assert_eq!(pis.len(), 3);
1194 assert_eq!(pis[0].target, "stylesheet");
1195 assert_eq!(pis[1].target, "renderer");
1196 assert_eq!(pis[2].target, "custom-pi");
1197 }
1198
1199 #[test]
1200 fn test_no_processing_instructions_when_none_present() {
1201 let xml = r#"<?xml version="1.0"?>
1202<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"></fo:root>"#;
1203
1204 let cursor = Cursor::new(xml);
1205 let mut parser = XmlParser::new(cursor);
1206
1207 loop {
1208 match parser.read_event() {
1209 Ok(Event::Eof) => break,
1210 Ok(_) => {}
1211 Err(e) => panic!("Parse error: {}", e),
1212 }
1213 }
1214
1215 let pis = parser.processing_instructions();
1216 assert_eq!(pis.len(), 0);
1217 }
1218
1219 #[test]
1220 fn test_attributes_with_apos_entity() {
1221 let xml = r#"<?xml version="1.0"?>
1222<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format" title="it's">text</fo:block>"#;
1223
1224 let cursor = Cursor::new(xml);
1225 let mut parser = XmlParser::new(cursor);
1226
1227 loop {
1228 match parser.read_event() {
1229 Ok(Event::Start(ref start)) => {
1230 parser.update_namespaces(start);
1231 let attrs = parser
1232 .extract_attributes(start)
1233 .expect("test: should succeed");
1234 let title = attrs
1235 .iter()
1236 .find(|(k, _)| k == "title")
1237 .map(|(_, v)| v.as_str());
1238 assert_eq!(title, Some("it's"));
1239 break;
1240 }
1241 Ok(Event::Eof) => break,
1242 Ok(_) => {}
1243 Err(e) => panic!("Parse error: {}", e),
1244 }
1245 }
1246 }
1247
1248 #[test]
1249 fn test_attributes_with_lt_entity() {
1250 let xml = r#"<?xml version="1.0"?>
1251<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format" title="a < b">text</fo:block>"#;
1252
1253 let cursor = Cursor::new(xml);
1254 let mut parser = XmlParser::new(cursor);
1255
1256 loop {
1257 match parser.read_event() {
1258 Ok(Event::Start(ref start)) => {
1259 parser.update_namespaces(start);
1260 let attrs = parser
1261 .extract_attributes(start)
1262 .expect("test: should succeed");
1263 let title = attrs
1264 .iter()
1265 .find(|(k, _)| k == "title")
1266 .map(|(_, v)| v.as_str());
1267 assert_eq!(title, Some("a < b"));
1268 break;
1269 }
1270 Ok(Event::Eof) => break,
1271 Ok(_) => {}
1272 Err(e) => panic!("Parse error: {}", e),
1273 }
1274 }
1275 }
1276
1277 #[test]
1278 fn test_attribute_with_numeric_entity() {
1279 let xml = r#"<?xml version="1.0"?>
1280<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format" title="ABC">text</fo:block>"#;
1281
1282 let cursor = Cursor::new(xml);
1283 let mut parser = XmlParser::new(cursor);
1284
1285 loop {
1286 match parser.read_event() {
1287 Ok(Event::Start(ref start)) => {
1288 parser.update_namespaces(start);
1289 let attrs = parser
1290 .extract_attributes(start)
1291 .expect("test: should succeed");
1292 let title = attrs
1293 .iter()
1294 .find(|(k, _)| k == "title")
1295 .map(|(_, v)| v.as_str());
1296 assert_eq!(title, Some("ABC"));
1297 break;
1298 }
1299 Ok(Event::Eof) => break,
1300 Ok(_) => {}
1301 Err(e) => panic!("Parse error: {}", e),
1302 }
1303 }
1304 }
1305
1306 #[test]
1307 fn test_xml_with_utf8_text() {
1308 let xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<fo:root xmlns:fo=\"http://www.w3.org/1999/XSL/Format\"><fo:block>日本語テスト</fo:block></fo:root>";
1309
1310 let cursor = Cursor::new(xml);
1311 let mut parser = XmlParser::new(cursor);
1312
1313 let mut text_content = String::new();
1314 loop {
1315 match parser.read_event() {
1316 Ok(Event::Text(ref text)) => {
1317 text_content = parser.extract_text(text).expect("test: should succeed");
1318 }
1319 Ok(Event::Eof) => break,
1320 Ok(_) => {}
1321 Err(e) => panic!("Parse error: {}", e),
1322 }
1323 }
1324 assert_eq!(text_content, "日本語テスト");
1325 }
1326
1327 #[test]
1328 fn test_entity_resolver_clone() {
1329 let mut resolver = EntityResolver::new();
1330 resolver.add_entity("test".to_string(), "TEST_VALUE".to_string());
1331 let cloned = resolver.clone();
1332
1333 let location = Location::new(1, 1);
1334 assert_eq!(
1335 cloned
1336 .resolve("test", location)
1337 .expect("test: should succeed"),
1338 "TEST_VALUE"
1339 );
1340 assert_eq!(
1341 cloned
1342 .resolve("amp", location)
1343 .expect("test: should succeed"),
1344 "&"
1345 );
1346 }
1347
1348 #[test]
1349 fn test_entity_resolver_default() {
1350 let resolver = EntityResolver::default();
1351 let location = Location::new(1, 1);
1352 assert_eq!(
1354 resolver
1355 .resolve("amp", location)
1356 .expect("test: should succeed"),
1357 "&"
1358 );
1359 assert_eq!(
1360 resolver
1361 .resolve("lt", location)
1362 .expect("test: should succeed"),
1363 "<"
1364 );
1365 assert_eq!(
1366 resolver
1367 .resolve("gt", location)
1368 .expect("test: should succeed"),
1369 ">"
1370 );
1371 assert_eq!(
1372 resolver
1373 .resolve("quot", location)
1374 .expect("test: should succeed"),
1375 "\""
1376 );
1377 assert_eq!(
1378 resolver
1379 .resolve("apos", location)
1380 .expect("test: should succeed"),
1381 "'"
1382 );
1383 }
1384
1385 #[test]
1386 fn test_xml_deeply_nested_elements() {
1387 let xml = r#"<?xml version="1.0"?>
1389<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1390 <fo:layout-master-set>
1391 <fo:simple-page-master master-name="p1">
1392 <fo:region-body/>
1393 </fo:simple-page-master>
1394 </fo:layout-master-set>
1395 <fo:page-sequence master-reference="p1">
1396 <fo:flow flow-name="xsl-region-body">
1397 <fo:block>
1398 <fo:inline>
1399 <fo:inline>
1400 <fo:inline>deep nesting</fo:inline>
1401 </fo:inline>
1402 </fo:inline>
1403 </fo:block>
1404 </fo:flow>
1405 </fo:page-sequence>
1406</fo:root>"#;
1407
1408 let cursor = Cursor::new(xml);
1409 let mut parser = XmlParser::new(cursor);
1410 let mut error = None;
1411
1412 loop {
1413 match parser.read_event() {
1414 Ok(Event::Eof) => break,
1415 Ok(_) => {}
1416 Err(e) => {
1417 error = Some(e);
1418 break;
1419 }
1420 }
1421 }
1422 assert!(error.is_none(), "Deep nesting should parse without error");
1423 }
1424
1425 #[test]
1426 fn test_xml_empty_text_nodes_trimmed() {
1427 let xml = r#"<?xml version="1.0"?>
1429<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1430 <fo:layout-master-set>
1431 </fo:layout-master-set>
1432</fo:root>"#;
1433
1434 let cursor = Cursor::new(xml);
1435 let mut parser = XmlParser::new(cursor);
1436
1437 let mut non_empty_text = 0;
1438 loop {
1439 match parser.read_event() {
1440 Ok(Event::Text(ref text)) => {
1441 let content = parser.extract_text(text).unwrap_or_default();
1442 if !content.is_empty() {
1443 non_empty_text += 1;
1444 }
1445 }
1446 Ok(Event::Eof) => break,
1447 Ok(_) => {}
1448 Err(e) => panic!("Parse error: {}", e),
1449 }
1450 }
1451 assert_eq!(non_empty_text, 0);
1453 }
1454
1455 #[test]
1456 fn test_xml_pi_target_with_data() {
1457 let xml = r#"<?xml version="1.0"?>
1458<?fop-config key="value" other="data"?>
1459<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"></fo:root>"#;
1460
1461 let cursor = Cursor::new(xml);
1462 let mut parser = XmlParser::new(cursor);
1463
1464 loop {
1465 match parser.read_event() {
1466 Ok(Event::Eof) => break,
1467 Ok(_) => {}
1468 Err(e) => panic!("Parse error: {}", e),
1469 }
1470 }
1471
1472 let pis = parser.processing_instructions();
1473 assert_eq!(pis.len(), 1);
1474 assert_eq!(pis[0].target, "fop-config");
1475 assert!(pis[0].data.is_some());
1476 let data = pis[0].data.as_ref().expect("test: should succeed");
1477 assert!(data.contains("key"));
1478 }
1479
1480 #[test]
1481 fn test_entity_resolver_unknown_entity_has_name_in_error() {
1482 let resolver = EntityResolver::new();
1483 let location = Location::new(5, 10);
1484 let result = resolver.resolve("nonexistent", location);
1485 assert!(result.is_err());
1486 let err = result.unwrap_err();
1487 let err_str = format!("{}", err);
1488 assert!(err_str.contains("nonexistent"));
1489 }
1490
1491 #[test]
1492 fn test_entity_resolver_invalid_hex_ref() {
1493 let resolver = EntityResolver::new();
1494 let location = Location::new(1, 1);
1495 let result = resolver.resolve("#xZZZZ", location);
1497 assert!(result.is_err());
1498 }
1499
1500 #[test]
1501 fn test_entity_resolver_invalid_decimal_ref() {
1502 let resolver = EntityResolver::new();
1503 let location = Location::new(1, 1);
1504 let result = resolver.resolve("#abc", location);
1506 assert!(result.is_err());
1507 }
1508}