Skip to main content

fop_core/xml/
parser.rs

1//! XML parser wrapper around quick-xml with enhanced features
2
3use crate::xml::Namespace;
4use fop_types::{FopError, Location, Result};
5use quick_xml::events::{BytesStart, Event};
6use quick_xml::Reader;
7use std::collections::HashMap;
8use std::io::BufRead;
9
10/// Entity resolver for handling XML entities
11#[derive(Debug, Clone)]
12pub struct EntityResolver {
13    entities: HashMap<String, String>,
14}
15
16impl EntityResolver {
17    /// Create a new entity resolver with built-in entities
18    pub fn new() -> Self {
19        let mut entities = HashMap::new();
20
21        // Built-in XML entities
22        entities.insert("amp".to_string(), "&".to_string());
23        entities.insert("lt".to_string(), "<".to_string());
24        entities.insert("gt".to_string(), ">".to_string());
25        entities.insert("quot".to_string(), "\"".to_string());
26        entities.insert("apos".to_string(), "'".to_string());
27
28        Self { entities }
29    }
30
31    /// Add a custom entity
32    pub fn add_entity(&mut self, name: String, value: String) {
33        self.entities.insert(name, value);
34    }
35
36    /// Resolve an entity reference
37    pub fn resolve(&self, entity: &str, location: Location) -> Result<String> {
38        // Handle numeric character references
39        if let Some(hex_str) = entity
40            .strip_prefix("#x")
41            .or_else(|| entity.strip_prefix("#X"))
42        {
43            // Hexadecimal
44            if let Ok(code) = u32::from_str_radix(hex_str, 16) {
45                if let Some(ch) = char::from_u32(code) {
46                    return Ok(ch.to_string());
47                }
48            }
49            return Err(FopError::EntityError {
50                message: format!("Invalid hexadecimal character reference: {}", entity),
51                location,
52            });
53        } else if let Some(dec_str) = entity.strip_prefix('#') {
54            // Decimal
55            if let Ok(code) = dec_str.parse::<u32>() {
56                if let Some(ch) = char::from_u32(code) {
57                    return Ok(ch.to_string());
58                }
59            }
60            return Err(FopError::EntityError {
61                message: format!("Invalid decimal character reference: {}", entity),
62                location,
63            });
64        }
65
66        // Named entity
67        self.entities
68            .get(entity)
69            .cloned()
70            .ok_or_else(|| FopError::EntityError {
71                message: format!("Unknown entity: &{};", entity),
72                location,
73            })
74    }
75
76    /// Resolve all entities in a string
77    pub fn resolve_entities(&self, text: &str, location: Location) -> Result<String> {
78        let mut result = String::new();
79        let mut chars = text.chars().peekable();
80
81        while let Some(ch) = chars.next() {
82            if ch == '&' {
83                // Find the end of the entity reference
84                let mut entity_name = String::new();
85                let mut found_semicolon = false;
86
87                while let Some(&next_ch) = chars.peek() {
88                    if next_ch == ';' {
89                        chars.next(); // consume semicolon
90                        found_semicolon = true;
91                        break;
92                    }
93                    entity_name.push(next_ch);
94                    chars.next();
95                }
96
97                if !found_semicolon {
98                    return Err(FopError::EntityError {
99                        message: format!("Unterminated entity reference: &{}", entity_name),
100                        location,
101                    });
102                }
103
104                let resolved = self.resolve(&entity_name, location)?;
105                result.push_str(&resolved);
106            } else {
107                result.push(ch);
108            }
109        }
110
111        Ok(result)
112    }
113}
114
115impl Default for EntityResolver {
116    fn default() -> Self {
117        Self::new()
118    }
119}
120
121/// Processing instruction data
122#[derive(Debug, Clone, PartialEq)]
123pub struct ProcessingInstruction {
124    pub target: String,
125    pub data: Option<String>,
126}
127
128impl ProcessingInstruction {
129    pub fn new(target: String, data: Option<String>) -> Self {
130        Self { target, data }
131    }
132}
133
134/// Wrapper around quick-xml Reader for parsing XSL-FO documents
135pub struct XmlParser<R: BufRead> {
136    reader: Reader<R>,
137    buf: Vec<u8>,
138    /// Namespace prefix to URI mapping
139    namespace_map: HashMap<String, String>,
140    /// Entity resolver
141    entity_resolver: EntityResolver,
142    /// Processing instructions encountered
143    processing_instructions: Vec<ProcessingInstruction>,
144}
145
146impl<R: BufRead> XmlParser<R> {
147    /// Create a new XML parser
148    pub fn new(reader: R) -> Self {
149        let mut xml_reader = Reader::from_reader(reader);
150        xml_reader.config_mut().trim_text(true);
151        xml_reader.config_mut().expand_empty_elements = true;
152
153        Self {
154            reader: xml_reader,
155            buf: Vec::new(),
156            namespace_map: HashMap::new(),
157            entity_resolver: EntityResolver::new(),
158            processing_instructions: Vec::new(),
159        }
160    }
161
162    /// Get a reference to the underlying reader
163    pub fn reader(&self) -> &Reader<R> {
164        &self.reader
165    }
166
167    /// Get a mutable reference to the underlying reader
168    pub fn reader_mut(&mut self) -> &mut Reader<R> {
169        &mut self.reader
170    }
171
172    /// Get the entity resolver
173    pub fn entity_resolver(&self) -> &EntityResolver {
174        &self.entity_resolver
175    }
176
177    /// Get a mutable reference to the entity resolver
178    pub fn entity_resolver_mut(&mut self) -> &mut EntityResolver {
179        &mut self.entity_resolver
180    }
181
182    /// Get processing instructions
183    pub fn processing_instructions(&self) -> &[ProcessingInstruction] {
184        &self.processing_instructions
185    }
186
187    /// Get current location (line and column)
188    pub fn location(&self) -> Location {
189        let pos = self.reader.buffer_position();
190        // quick-xml doesn't provide column info directly, so we approximate
191        Location::new(pos as usize, 0)
192    }
193
194    /// Read the next event
195    pub fn read_event(&mut self) -> Result<Event<'static>> {
196        self.buf.clear();
197        let event = self
198            .reader
199            .read_event_into(&mut self.buf)
200            .map(|e| e.into_owned())
201            .map_err(|e| {
202                let location = self.location();
203                FopError::XmlErrorWithLocation {
204                    message: format!("XML parsing error: {}", e),
205                    location,
206                    suggestion: None,
207                }
208            })?;
209
210        // Handle processing instructions
211        if let Event::PI(ref pi) = event {
212            if let Ok(target) = std::str::from_utf8(pi.as_ref()) {
213                // Parse target and data
214                let parts: Vec<&str> = target.splitn(2, ' ').collect();
215                let pi_target = parts[0].to_string();
216                let pi_data = parts.get(1).map(|s| s.to_string());
217
218                self.processing_instructions
219                    .push(ProcessingInstruction::new(pi_target, pi_data));
220            }
221        }
222
223        Ok(event)
224    }
225
226    /// Update namespace map from element attributes
227    pub fn update_namespaces(&mut self, start: &BytesStart) {
228        for attr in start.attributes().flatten() {
229            if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) {
230                if key.starts_with("xmlns") && key != "xmlns" {
231                    if let Some(prefix) = key.strip_prefix("xmlns:") {
232                        if let Ok(value) = attr.decode_and_unescape_value(self.reader.decoder()) {
233                            // Prefixed namespace
234                            self.namespace_map
235                                .insert(prefix.to_string(), value.to_string());
236                        }
237                    }
238                } else if key == "xmlns" {
239                    if let Ok(value) = attr.decode_and_unescape_value(self.reader.decoder()) {
240                        // Default namespace
241                        self.namespace_map.insert(String::new(), value.to_string());
242                    }
243                }
244            }
245        }
246    }
247
248    /// Extract element name and namespace from a BytesStart event
249    pub fn extract_name(&self, start: &BytesStart) -> Result<(String, Namespace)> {
250        let location = self.location();
251        let name = start.name();
252
253        // Extract namespace prefix and local name
254        let (ns_prefix, local_name) = if let Some(pos) =
255            name.as_ref().iter().position(|&b| b == b':')
256        {
257            let prefix = std::str::from_utf8(&name.as_ref()[..pos]).map_err(|e| {
258                FopError::XmlErrorWithLocation {
259                    message: format!("Invalid UTF-8 in prefix: {}", e),
260                    location,
261                    suggestion: None,
262                }
263            })?;
264            let local = std::str::from_utf8(&name.as_ref()[pos + 1..]).map_err(|e| {
265                FopError::XmlErrorWithLocation {
266                    message: format!("Invalid UTF-8 in local name: {}", e),
267                    location,
268                    suggestion: None,
269                }
270            })?;
271            (Some(prefix.to_string()), local)
272        } else {
273            let local =
274                std::str::from_utf8(name.as_ref()).map_err(|e| FopError::XmlErrorWithLocation {
275                    message: format!("Invalid UTF-8 in element name: {}", e),
276                    location,
277                    suggestion: None,
278                })?;
279            (None, local)
280        };
281
282        // Look up namespace URI from namespace map
283        let ns_uri = if let Some(ref prefix) = ns_prefix {
284            self.namespace_map.get(prefix).cloned().unwrap_or_default()
285        } else {
286            self.namespace_map.get("").cloned().unwrap_or_default()
287        };
288
289        let namespace = Namespace::from_uri(&ns_uri);
290
291        Ok((local_name.to_string(), namespace))
292    }
293
294    /// Extract attributes from a BytesStart event
295    pub fn extract_attributes(&self, start: &BytesStart) -> Result<Vec<(String, String)>> {
296        let location = self.location();
297        let mut attrs = Vec::new();
298
299        for attr_result in start.attributes() {
300            let attr = attr_result.map_err(|e| FopError::XmlErrorWithLocation {
301                message: format!("Attribute parsing error: {}", e),
302                location,
303                suggestion: None,
304            })?;
305
306            let key = std::str::from_utf8(attr.key.as_ref())
307                .map_err(|e| FopError::XmlErrorWithLocation {
308                    message: format!("Invalid UTF-8 in attribute name: {}", e),
309                    location,
310                    suggestion: None,
311                })?
312                .to_string();
313
314            // Skip xmlns attributes
315            if key.starts_with("xmlns") {
316                continue;
317            }
318
319            // quick-xml already handles entity unescaping in decode_and_unescape_value
320            let value = attr
321                .decode_and_unescape_value(self.reader.decoder())
322                .map_err(|e| FopError::XmlErrorWithLocation {
323                    message: format!("Attribute value decode error: {}", e),
324                    location,
325                    suggestion: None,
326                })?
327                .to_string();
328
329            attrs.push((key, value));
330        }
331
332        Ok(attrs)
333    }
334
335    /// Extract text content from Text event (with entity resolution)
336    pub fn extract_text(&self, text: &[u8]) -> Result<String> {
337        let location = self.location();
338        let text_str = std::str::from_utf8(text).map_err(|e| FopError::XmlErrorWithLocation {
339            message: format!("Invalid UTF-8 in text: {}", e),
340            location,
341            suggestion: None,
342        })?;
343
344        // Resolve entities in text content
345        self.entity_resolver.resolve_entities(text_str, location)
346    }
347
348    /// Extract CDATA content (no entity resolution)
349    pub fn extract_cdata(&self, cdata: &[u8]) -> Result<String> {
350        let location = self.location();
351        std::str::from_utf8(cdata)
352            .map(|s| s.to_string())
353            .map_err(|e| FopError::XmlErrorWithLocation {
354                message: format!("Invalid UTF-8 in CDATA: {}", e),
355                location,
356                suggestion: None,
357            })
358    }
359}
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364    use std::io::Cursor;
365
366    #[test]
367    fn test_parse_simple_fo() {
368        let xml = r#"<?xml version="1.0"?>
369<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
370    <fo:layout-master-set>
371        <fo:simple-page-master master-name="A4">
372        </fo:simple-page-master>
373    </fo:layout-master-set>
374</fo:root>"#;
375
376        let cursor = Cursor::new(xml);
377        let mut parser = XmlParser::new(cursor);
378
379        let mut found_root = false;
380        let mut found_layout_master_set = false;
381
382        loop {
383            let event = parser.read_event();
384            match event {
385                Ok(Event::Start(ref start)) | Ok(Event::Empty(ref start)) => {
386                    parser.update_namespaces(start);
387                    let (name, ns) = parser.extract_name(start).expect("test: should succeed");
388
389                    if name == "root" && ns.is_fo() {
390                        found_root = true;
391                    }
392                    if name == "layout-master-set" && ns.is_fo() {
393                        found_layout_master_set = true;
394                    }
395                }
396                Ok(Event::Eof) => break,
397                Err(e) => panic!("Parse error: {}", e),
398                _ => {}
399            }
400        }
401
402        assert!(found_root);
403        assert!(found_layout_master_set);
404    }
405
406    #[test]
407    fn test_extract_attributes() {
408        let xml = r#"<?xml version="1.0"?>
409<fo:simple-page-master xmlns:fo="http://www.w3.org/1999/XSL/Format"
410                       master-name="A4"
411                       page-width="210mm"
412                       page-height="297mm">
413</fo:simple-page-master>"#;
414
415        let cursor = Cursor::new(xml);
416        let mut parser = XmlParser::new(cursor);
417
418        loop {
419            let event = parser.read_event();
420            match event {
421                Ok(Event::Start(ref start)) | Ok(Event::Empty(ref start)) => {
422                    parser.update_namespaces(start);
423                    let attrs = parser
424                        .extract_attributes(start)
425                        .expect("test: should succeed");
426
427                    // Find specific attributes
428                    let master_name = attrs
429                        .iter()
430                        .find(|(k, _)| k == "master-name")
431                        .map(|(_, v)| v.as_str());
432
433                    assert_eq!(master_name, Some("A4"));
434
435                    let page_width = attrs
436                        .iter()
437                        .find(|(k, _)| k == "page-width")
438                        .map(|(_, v)| v.as_str());
439
440                    assert_eq!(page_width, Some("210mm"));
441
442                    break;
443                }
444                Ok(Event::Eof) => break,
445                Err(e) => panic!("Parse error: {}", e),
446                _ => {}
447            }
448        }
449    }
450
451    #[test]
452    fn test_cdata_section() {
453        let xml = r#"<?xml version="1.0"?>
454<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format">
455    <![CDATA[<tag> & "quotes"]]>
456</fo:block>"#;
457
458        let cursor = Cursor::new(xml);
459        let mut parser = XmlParser::new(cursor);
460
461        let mut found_cdata = false;
462        let mut cdata_content = String::new();
463
464        loop {
465            match parser.read_event() {
466                Ok(Event::CData(ref cdata)) => {
467                    found_cdata = true;
468                    cdata_content = parser.extract_cdata(cdata).expect("test: should succeed");
469                }
470                Ok(Event::Eof) => break,
471                Ok(_) => {}
472                Err(e) => panic!("Parse error: {}", e),
473            }
474        }
475
476        assert!(found_cdata);
477        assert_eq!(cdata_content, r#"<tag> & "quotes""#);
478    }
479
480    #[test]
481    fn test_entity_resolution_builtin() {
482        let resolver = EntityResolver::new();
483        let location = Location::new(1, 1);
484
485        assert_eq!(
486            resolver
487                .resolve("amp", location)
488                .expect("test: should succeed"),
489            "&"
490        );
491        assert_eq!(
492            resolver
493                .resolve("lt", location)
494                .expect("test: should succeed"),
495            "<"
496        );
497        assert_eq!(
498            resolver
499                .resolve("gt", location)
500                .expect("test: should succeed"),
501            ">"
502        );
503        assert_eq!(
504            resolver
505                .resolve("quot", location)
506                .expect("test: should succeed"),
507            "\""
508        );
509        assert_eq!(
510            resolver
511                .resolve("apos", location)
512                .expect("test: should succeed"),
513            "'"
514        );
515    }
516
517    #[test]
518    fn test_entity_resolution_numeric_decimal() {
519        let resolver = EntityResolver::new();
520        let location = Location::new(1, 1);
521
522        // &#65; = 'A'
523        assert_eq!(
524            resolver
525                .resolve("#65", location)
526                .expect("test: should succeed"),
527            "A"
528        );
529        // &#36; = '$'
530        assert_eq!(
531            resolver
532                .resolve("#36", location)
533                .expect("test: should succeed"),
534            "$"
535        );
536    }
537
538    #[test]
539    fn test_entity_resolution_numeric_hex() {
540        let resolver = EntityResolver::new();
541        let location = Location::new(1, 1);
542
543        // &#x41; = 'A'
544        assert_eq!(
545            resolver
546                .resolve("#x41", location)
547                .expect("test: should succeed"),
548            "A"
549        );
550        assert_eq!(
551            resolver
552                .resolve("#X41", location)
553                .expect("test: should succeed"),
554            "A"
555        );
556        // &#xA9; = '©'
557        assert_eq!(
558            resolver
559                .resolve("#xA9", location)
560                .expect("test: should succeed"),
561            "©"
562        );
563    }
564
565    #[test]
566    fn test_entity_resolution_custom() {
567        let mut resolver = EntityResolver::new();
568        resolver.add_entity("copy".to_string(), "©".to_string());
569
570        let location = Location::new(1, 1);
571        assert_eq!(
572            resolver
573                .resolve("copy", location)
574                .expect("test: should succeed"),
575            "©"
576        );
577    }
578
579    #[test]
580    fn test_entity_resolution_in_text() {
581        let resolver = EntityResolver::new();
582        let location = Location::new(1, 1);
583
584        let text = "Price: &#36;100 &amp; up";
585        let resolved = resolver
586            .resolve_entities(text, location)
587            .expect("test: should succeed");
588        assert_eq!(resolved, "Price: $100 & up");
589    }
590
591    #[test]
592    fn test_entity_resolution_unknown() {
593        let resolver = EntityResolver::new();
594        let location = Location::new(1, 1);
595
596        let result = resolver.resolve("unknown", location);
597        assert!(result.is_err());
598    }
599
600    #[test]
601    fn test_processing_instruction() {
602        let xml = r#"<?xml version="1.0"?>
603<?xml-stylesheet type="text/xsl" href="style.xsl"?>
604<?fop-renderer backend="pdf"?>
605<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
606</fo:root>"#;
607
608        let cursor = Cursor::new(xml);
609        let mut parser = XmlParser::new(cursor);
610
611        loop {
612            match parser.read_event() {
613                Ok(Event::Eof) => break,
614                Ok(_) => {}
615                Err(e) => panic!("Parse error: {}", e),
616            }
617        }
618
619        let pis = parser.processing_instructions();
620        assert_eq!(pis.len(), 2);
621
622        assert_eq!(pis[0].target, "xml-stylesheet");
623        assert!(pis[0].data.is_some());
624
625        assert_eq!(pis[1].target, "fop-renderer");
626        assert!(pis[1].data.is_some());
627    }
628
629    #[test]
630    fn test_entities_in_attributes() {
631        let xml = r#"<?xml version="1.0"?>
632<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format" title="Test &amp; More">
633</fo:block>"#;
634
635        let cursor = Cursor::new(xml);
636        let mut parser = XmlParser::new(cursor);
637
638        loop {
639            match parser.read_event() {
640                Ok(Event::Start(ref start)) | Ok(Event::Empty(ref start)) => {
641                    parser.update_namespaces(start);
642                    let attrs = parser
643                        .extract_attributes(start)
644                        .expect("test: should succeed");
645
646                    let title = attrs
647                        .iter()
648                        .find(|(k, _)| k == "title")
649                        .map(|(_, v)| v.as_str());
650
651                    assert_eq!(title, Some("Test & More"));
652                    break;
653                }
654                Ok(Event::Eof) => break,
655                Ok(_) => {}
656                Err(e) => panic!("Parse error: {}", e),
657            }
658        }
659    }
660
661    #[test]
662    fn test_cdata_preserves_content() {
663        let xml = r#"<?xml version="1.0"?>
664<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format">
665    <![CDATA[Code with <tags> & "special" &amp; chars]]>
666</fo:block>"#;
667
668        let cursor = Cursor::new(xml);
669        let mut parser = XmlParser::new(cursor);
670
671        let mut cdata_content = String::new();
672
673        loop {
674            match parser.read_event() {
675                Ok(Event::CData(ref cdata)) => {
676                    cdata_content = parser.extract_cdata(cdata).expect("test: should succeed");
677                }
678                Ok(Event::Eof) => break,
679                Ok(_) => {}
680                Err(e) => panic!("Parse error: {}", e),
681            }
682        }
683
684        // CDATA should preserve everything, including &amp;
685        assert_eq!(cdata_content, r#"Code with <tags> & "special" &amp; chars"#);
686    }
687
688    #[test]
689    fn test_multiple_entities() {
690        let resolver = EntityResolver::new();
691        let location = Location::new(1, 1);
692
693        let text = "&lt;tag&gt; &amp; &quot;text&quot;";
694        let resolved = resolver
695            .resolve_entities(text, location)
696            .expect("test: should succeed");
697        assert_eq!(resolved, r#"<tag> & "text""#);
698    }
699
700    #[test]
701    fn test_unterminated_entity() {
702        let resolver = EntityResolver::new();
703        let location = Location::new(1, 1);
704
705        let text = "&amp no semicolon";
706        let result = resolver.resolve_entities(text, location);
707        assert!(result.is_err());
708    }
709
710    #[test]
711    fn test_location_tracking() {
712        let xml = r#"<?xml version="1.0"?>
713<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
714</fo:root>"#;
715
716        let cursor = Cursor::new(xml);
717        let parser = XmlParser::new(cursor);
718
719        // Location should be available (just verify we can get it)
720        let _location = parser.location();
721    }
722
723    #[test]
724    fn test_error_with_location() {
725        let xml = r#"<?xml version="1.0"?>
726<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
727    <unclosed-tag>
728</fo:root>"#;
729
730        let cursor = Cursor::new(xml);
731        let mut parser = XmlParser::new(cursor);
732
733        let mut error_found = false;
734
735        loop {
736            match parser.read_event() {
737                Ok(Event::Eof) => break,
738                Ok(_) => {}
739                Err(e) => {
740                    error_found = true;
741                    // Error should contain location information
742                    let error_str = format!("{}", e);
743                    assert!(error_str.contains("line") || error_str.contains("XML parsing error"));
744                    break;
745                }
746            }
747        }
748
749        assert!(error_found);
750    }
751}
752
753// ===== ADDITIONAL TESTS (60+ new tests) =====
754
755#[cfg(test)]
756mod additional_tests {
757    use super::*;
758    use std::io::Cursor;
759
760    // ===== ENTITY RESOLVER EDGE CASES =====
761
762    #[test]
763    fn test_entity_resolver_apos() {
764        let resolver = EntityResolver::new();
765        let location = Location::new(1, 1);
766        assert_eq!(
767            resolver
768                .resolve("apos", location)
769                .expect("test: should succeed"),
770            "'"
771        );
772    }
773
774    #[test]
775    fn test_entity_resolver_quot() {
776        let resolver = EntityResolver::new();
777        let location = Location::new(1, 1);
778        assert_eq!(
779            resolver
780                .resolve("quot", location)
781                .expect("test: should succeed"),
782            "\""
783        );
784    }
785
786    #[test]
787    fn test_entity_resolver_gt() {
788        let resolver = EntityResolver::new();
789        let location = Location::new(1, 1);
790        assert_eq!(
791            resolver
792                .resolve("gt", location)
793                .expect("test: should succeed"),
794            ">"
795        );
796    }
797
798    #[test]
799    fn test_entity_resolver_empty_text() {
800        let resolver = EntityResolver::new();
801        let location = Location::new(1, 1);
802        let result = resolver
803            .resolve_entities("", location)
804            .expect("test: should succeed");
805        assert_eq!(result, "");
806    }
807
808    #[test]
809    fn test_entity_resolver_text_without_entities() {
810        let resolver = EntityResolver::new();
811        let location = Location::new(1, 1);
812        let result = resolver
813            .resolve_entities("hello world", location)
814            .expect("test: should succeed");
815        assert_eq!(result, "hello world");
816    }
817
818    #[test]
819    fn test_entity_resolver_only_entity() {
820        let resolver = EntityResolver::new();
821        let location = Location::new(1, 1);
822        let result = resolver
823            .resolve_entities("&amp;", location)
824            .expect("test: should succeed");
825        assert_eq!(result, "&");
826    }
827
828    #[test]
829    fn test_entity_resolver_hex_zero() {
830        // &#x0041; = 'A'
831        let resolver = EntityResolver::new();
832        let location = Location::new(1, 1);
833        let result = resolver
834            .resolve("#x0041", location)
835            .expect("test: should succeed");
836        assert_eq!(result, "A");
837    }
838
839    #[test]
840    fn test_entity_resolver_decimal_newline() {
841        // &#10; = newline
842        let resolver = EntityResolver::new();
843        let location = Location::new(1, 1);
844        let result = resolver
845            .resolve("#10", location)
846            .expect("test: should succeed");
847        assert_eq!(result, "\n");
848    }
849
850    #[test]
851    fn test_entity_resolver_decimal_tab() {
852        // &#9; = tab
853        let resolver = EntityResolver::new();
854        let location = Location::new(1, 1);
855        let result = resolver
856            .resolve("#9", location)
857            .expect("test: should succeed");
858        assert_eq!(result, "\t");
859    }
860
861    #[test]
862    fn test_entity_resolver_unicode_multibyte() {
863        // &#x4e2d; = '中' (U+4E2D, Chinese character)
864        let resolver = EntityResolver::new();
865        let location = Location::new(1, 1);
866        let result = resolver
867            .resolve("#x4e2d", location)
868            .expect("test: should succeed");
869        assert_eq!(result, "中");
870    }
871
872    #[test]
873    fn test_entity_resolver_add_multiple_custom() {
874        let mut resolver = EntityResolver::new();
875        resolver.add_entity("euro".to_string(), "€".to_string());
876        resolver.add_entity("yen".to_string(), "¥".to_string());
877        resolver.add_entity("pound".to_string(), "£".to_string());
878
879        let location = Location::new(1, 1);
880        assert_eq!(
881            resolver
882                .resolve("euro", location)
883                .expect("test: should succeed"),
884            "€"
885        );
886        assert_eq!(
887            resolver
888                .resolve("yen", location)
889                .expect("test: should succeed"),
890            "¥"
891        );
892        assert_eq!(
893            resolver
894                .resolve("pound", location)
895                .expect("test: should succeed"),
896            "£"
897        );
898    }
899
900    #[test]
901    fn test_entity_resolver_override_custom() {
902        let mut resolver = EntityResolver::new();
903        // Override the built-in amp entity
904        resolver.add_entity("amp".to_string(), "AMPERSAND".to_string());
905
906        let location = Location::new(1, 1);
907        assert_eq!(
908            resolver
909                .resolve("amp", location)
910                .expect("test: should succeed"),
911            "AMPERSAND"
912        );
913    }
914
915    #[test]
916    fn test_entity_resolver_resolve_entities_multiple() {
917        let resolver = EntityResolver::new();
918        let location = Location::new(1, 1);
919        let text = "&lt;&gt;&amp;&quot;&apos;";
920        let result = resolver
921            .resolve_entities(text, location)
922            .expect("test: should succeed");
923        assert_eq!(result, "<>&\"'");
924    }
925
926    #[test]
927    fn test_entity_resolver_numeric_in_text() {
928        let resolver = EntityResolver::new();
929        let location = Location::new(1, 1);
930        let text = "A&#65;B&#66;C";
931        let result = resolver
932            .resolve_entities(text, location)
933            .expect("test: should succeed");
934        assert_eq!(result, "AABBC");
935    }
936
937    #[test]
938    fn test_entity_resolver_hex_uppercase() {
939        // &#X41; (uppercase X) should also resolve
940        let resolver = EntityResolver::new();
941        let location = Location::new(1, 1);
942        let result = resolver
943            .resolve("#X41", location)
944            .expect("test: should succeed");
945        assert_eq!(result, "A");
946    }
947
948    // ===== PROCESSING INSTRUCTION TESTS =====
949
950    #[test]
951    fn test_processing_instruction_new() {
952        let pi = ProcessingInstruction::new("target".to_string(), Some("data".to_string()));
953        assert_eq!(pi.target, "target");
954        assert_eq!(pi.data, Some("data".to_string()));
955    }
956
957    #[test]
958    fn test_processing_instruction_no_data() {
959        let pi = ProcessingInstruction::new("target".to_string(), None);
960        assert_eq!(pi.target, "target");
961        assert!(pi.data.is_none());
962    }
963
964    #[test]
965    fn test_processing_instruction_equality() {
966        let pi1 = ProcessingInstruction::new("foo".to_string(), Some("bar".to_string()));
967        let pi2 = ProcessingInstruction::new("foo".to_string(), Some("bar".to_string()));
968        assert_eq!(pi1, pi2);
969    }
970
971    #[test]
972    fn test_processing_instruction_inequality() {
973        let pi1 = ProcessingInstruction::new("foo".to_string(), Some("bar".to_string()));
974        let pi2 = ProcessingInstruction::new("baz".to_string(), Some("bar".to_string()));
975        assert_ne!(pi1, pi2);
976    }
977
978    // ===== NAMESPACE TESTS =====
979
980    #[test]
981    fn test_nested_namespace_declarations() {
982        let xml = r#"<?xml version="1.0"?>
983<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"
984         xmlns:svg="http://www.w3.org/2000/svg">
985    <fo:layout-master-set></fo:layout-master-set>
986</fo:root>"#;
987
988        let cursor = Cursor::new(xml);
989        let mut parser = XmlParser::new(cursor);
990
991        let mut found_root = false;
992        loop {
993            let event = parser.read_event();
994            match event {
995                Ok(Event::Start(ref start)) | Ok(Event::Empty(ref start)) => {
996                    parser.update_namespaces(start);
997                    let result = parser.extract_name(start);
998                    if let Ok((name, ns)) = result {
999                        if name == "root" && ns.is_fo() {
1000                            found_root = true;
1001                        }
1002                    }
1003                }
1004                Ok(Event::Eof) => break,
1005                Err(e) => panic!("Parse error: {}", e),
1006                _ => {}
1007            }
1008        }
1009        assert!(found_root);
1010    }
1011
1012    #[test]
1013    fn test_fox_extension_namespace() {
1014        let xml = r#"<?xml version="1.0"?>
1015<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"
1016         xmlns:fox="http://xmlgraphics.apache.org/fop/extensions">
1017    <fo:layout-master-set></fo:layout-master-set>
1018</fo:root>"#;
1019
1020        let cursor = Cursor::new(xml);
1021        let mut parser = XmlParser::new(cursor);
1022
1023        let mut found_root = false;
1024        loop {
1025            let event = parser.read_event();
1026            match event {
1027                Ok(Event::Start(ref start)) | Ok(Event::Empty(ref start)) => {
1028                    parser.update_namespaces(start);
1029                    if let Ok((name, ns)) = parser.extract_name(start) {
1030                        if name == "root" && ns.is_fo() {
1031                            found_root = true;
1032                        }
1033                    }
1034                }
1035                Ok(Event::Eof) => break,
1036                Err(e) => panic!("Parse error: {}", e),
1037                _ => {}
1038            }
1039        }
1040        assert!(found_root);
1041    }
1042
1043    // ===== XML PARSER EVENT TESTS =====
1044
1045    #[test]
1046    fn test_empty_element_produces_start_end() {
1047        // With expand_empty_elements=true, empty elements produce Start+End
1048        let xml = r#"<?xml version="1.0"?>
1049<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1050    <fo:layout-master-set>
1051        <fo:simple-page-master master-name="A4">
1052            <fo:region-body/>
1053        </fo:simple-page-master>
1054    </fo:layout-master-set>
1055</fo:root>"#;
1056
1057        let cursor = Cursor::new(xml);
1058        let mut parser = XmlParser::new(cursor);
1059
1060        let mut element_count = 0;
1061        loop {
1062            match parser.read_event() {
1063                Ok(Event::Start(ref start)) => {
1064                    parser.update_namespaces(start);
1065                    element_count += 1;
1066                }
1067                Ok(Event::Eof) => break,
1068                Ok(_) => {}
1069                Err(e) => panic!("Parse error: {}", e),
1070            }
1071        }
1072        // root, layout-master-set, simple-page-master, region-body (expanded from empty)
1073        assert!(element_count >= 4);
1074    }
1075
1076    #[test]
1077    fn test_multiple_attributes_preserved_order() {
1078        let xml = r#"<?xml version="1.0"?>
1079<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format"
1080    font-size="12pt"
1081    font-family="Arial"
1082    color="black"
1083    margin-top="10pt">text</fo:block>"#;
1084
1085        let cursor = Cursor::new(xml);
1086        let mut parser = XmlParser::new(cursor);
1087
1088        loop {
1089            match parser.read_event() {
1090                Ok(Event::Start(ref start)) => {
1091                    parser.update_namespaces(start);
1092                    let attrs = parser
1093                        .extract_attributes(start)
1094                        .expect("test: should succeed");
1095                    // xmlns attrs are skipped, so we expect 4 non-namespace attrs
1096                    assert_eq!(attrs.len(), 4);
1097                    break;
1098                }
1099                Ok(Event::Eof) => break,
1100                Ok(_) => {}
1101                Err(e) => panic!("Parse error: {}", e),
1102            }
1103        }
1104    }
1105
1106    #[test]
1107    fn test_text_with_special_chars_in_cdata() {
1108        let xml = r#"<?xml version="1.0"?>
1109<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format"><![CDATA[a < b && c > d]]></fo:block>"#;
1110
1111        let cursor = Cursor::new(xml);
1112        let mut parser = XmlParser::new(cursor);
1113
1114        let mut cdata_text = String::new();
1115        loop {
1116            match parser.read_event() {
1117                Ok(Event::CData(ref cdata)) => {
1118                    cdata_text = parser.extract_cdata(cdata).expect("test: should succeed");
1119                }
1120                Ok(Event::Eof) => break,
1121                Ok(_) => {}
1122                Err(e) => panic!("Parse error: {}", e),
1123            }
1124        }
1125        assert_eq!(cdata_text, "a < b && c > d");
1126    }
1127
1128    #[test]
1129    fn test_extract_cdata_preserves_angle_brackets() {
1130        let xml = r#"<?xml version="1.0"?>
1131<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format"><![CDATA[<tag attr="val"/>]]></fo:block>"#;
1132
1133        let cursor = Cursor::new(xml);
1134        let mut parser = XmlParser::new(cursor);
1135
1136        let mut cdata_text = String::new();
1137        loop {
1138            match parser.read_event() {
1139                Ok(Event::CData(ref cdata)) => {
1140                    cdata_text = parser.extract_cdata(cdata).expect("test: should succeed");
1141                }
1142                Ok(Event::Eof) => break,
1143                Ok(_) => {}
1144                Err(e) => panic!("Parse error: {}", e),
1145            }
1146        }
1147        assert_eq!(cdata_text, r#"<tag attr="val"/>"#);
1148    }
1149
1150    #[test]
1151    fn test_comment_does_not_produce_text_event() {
1152        let xml = r#"<?xml version="1.0"?>
1153<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"><!-- this is a comment --></fo:root>"#;
1154
1155        let cursor = Cursor::new(xml);
1156        let mut parser = XmlParser::new(cursor);
1157
1158        let mut text_events = 0;
1159        loop {
1160            match parser.read_event() {
1161                Ok(Event::Text(_)) => {
1162                    text_events += 1;
1163                }
1164                Ok(Event::Eof) => break,
1165                Ok(_) => {}
1166                Err(e) => panic!("Parse error: {}", e),
1167            }
1168        }
1169        // Comments should not produce text events; trim_text should remove empty whitespace
1170        assert_eq!(text_events, 0);
1171    }
1172
1173    #[test]
1174    fn test_multiple_processing_instructions() {
1175        let xml = r#"<?xml version="1.0"?>
1176<?stylesheet type="text/css"?>
1177<?renderer backend="pdf"?>
1178<?custom-pi data="value"?>
1179<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"></fo:root>"#;
1180
1181        let cursor = Cursor::new(xml);
1182        let mut parser = XmlParser::new(cursor);
1183
1184        loop {
1185            match parser.read_event() {
1186                Ok(Event::Eof) => break,
1187                Ok(_) => {}
1188                Err(e) => panic!("Parse error: {}", e),
1189            }
1190        }
1191
1192        let pis = parser.processing_instructions();
1193        assert_eq!(pis.len(), 3);
1194        assert_eq!(pis[0].target, "stylesheet");
1195        assert_eq!(pis[1].target, "renderer");
1196        assert_eq!(pis[2].target, "custom-pi");
1197    }
1198
1199    #[test]
1200    fn test_no_processing_instructions_when_none_present() {
1201        let xml = r#"<?xml version="1.0"?>
1202<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"></fo:root>"#;
1203
1204        let cursor = Cursor::new(xml);
1205        let mut parser = XmlParser::new(cursor);
1206
1207        loop {
1208            match parser.read_event() {
1209                Ok(Event::Eof) => break,
1210                Ok(_) => {}
1211                Err(e) => panic!("Parse error: {}", e),
1212            }
1213        }
1214
1215        let pis = parser.processing_instructions();
1216        assert_eq!(pis.len(), 0);
1217    }
1218
1219    #[test]
1220    fn test_attributes_with_apos_entity() {
1221        let xml = r#"<?xml version="1.0"?>
1222<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format" title="it&apos;s">text</fo:block>"#;
1223
1224        let cursor = Cursor::new(xml);
1225        let mut parser = XmlParser::new(cursor);
1226
1227        loop {
1228            match parser.read_event() {
1229                Ok(Event::Start(ref start)) => {
1230                    parser.update_namespaces(start);
1231                    let attrs = parser
1232                        .extract_attributes(start)
1233                        .expect("test: should succeed");
1234                    let title = attrs
1235                        .iter()
1236                        .find(|(k, _)| k == "title")
1237                        .map(|(_, v)| v.as_str());
1238                    assert_eq!(title, Some("it's"));
1239                    break;
1240                }
1241                Ok(Event::Eof) => break,
1242                Ok(_) => {}
1243                Err(e) => panic!("Parse error: {}", e),
1244            }
1245        }
1246    }
1247
1248    #[test]
1249    fn test_attributes_with_lt_entity() {
1250        let xml = r#"<?xml version="1.0"?>
1251<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format" title="a &lt; b">text</fo:block>"#;
1252
1253        let cursor = Cursor::new(xml);
1254        let mut parser = XmlParser::new(cursor);
1255
1256        loop {
1257            match parser.read_event() {
1258                Ok(Event::Start(ref start)) => {
1259                    parser.update_namespaces(start);
1260                    let attrs = parser
1261                        .extract_attributes(start)
1262                        .expect("test: should succeed");
1263                    let title = attrs
1264                        .iter()
1265                        .find(|(k, _)| k == "title")
1266                        .map(|(_, v)| v.as_str());
1267                    assert_eq!(title, Some("a < b"));
1268                    break;
1269                }
1270                Ok(Event::Eof) => break,
1271                Ok(_) => {}
1272                Err(e) => panic!("Parse error: {}", e),
1273            }
1274        }
1275    }
1276
1277    #[test]
1278    fn test_attribute_with_numeric_entity() {
1279        let xml = r#"<?xml version="1.0"?>
1280<fo:block xmlns:fo="http://www.w3.org/1999/XSL/Format" title="&#65;BC">text</fo:block>"#;
1281
1282        let cursor = Cursor::new(xml);
1283        let mut parser = XmlParser::new(cursor);
1284
1285        loop {
1286            match parser.read_event() {
1287                Ok(Event::Start(ref start)) => {
1288                    parser.update_namespaces(start);
1289                    let attrs = parser
1290                        .extract_attributes(start)
1291                        .expect("test: should succeed");
1292                    let title = attrs
1293                        .iter()
1294                        .find(|(k, _)| k == "title")
1295                        .map(|(_, v)| v.as_str());
1296                    assert_eq!(title, Some("ABC"));
1297                    break;
1298                }
1299                Ok(Event::Eof) => break,
1300                Ok(_) => {}
1301                Err(e) => panic!("Parse error: {}", e),
1302            }
1303        }
1304    }
1305
1306    #[test]
1307    fn test_xml_with_utf8_text() {
1308        let xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<fo:root xmlns:fo=\"http://www.w3.org/1999/XSL/Format\"><fo:block>日本語テスト</fo:block></fo:root>";
1309
1310        let cursor = Cursor::new(xml);
1311        let mut parser = XmlParser::new(cursor);
1312
1313        let mut text_content = String::new();
1314        loop {
1315            match parser.read_event() {
1316                Ok(Event::Text(ref text)) => {
1317                    text_content = parser.extract_text(text).expect("test: should succeed");
1318                }
1319                Ok(Event::Eof) => break,
1320                Ok(_) => {}
1321                Err(e) => panic!("Parse error: {}", e),
1322            }
1323        }
1324        assert_eq!(text_content, "日本語テスト");
1325    }
1326
1327    #[test]
1328    fn test_entity_resolver_clone() {
1329        let mut resolver = EntityResolver::new();
1330        resolver.add_entity("test".to_string(), "TEST_VALUE".to_string());
1331        let cloned = resolver.clone();
1332
1333        let location = Location::new(1, 1);
1334        assert_eq!(
1335            cloned
1336                .resolve("test", location)
1337                .expect("test: should succeed"),
1338            "TEST_VALUE"
1339        );
1340        assert_eq!(
1341            cloned
1342                .resolve("amp", location)
1343                .expect("test: should succeed"),
1344            "&"
1345        );
1346    }
1347
1348    #[test]
1349    fn test_entity_resolver_default() {
1350        let resolver = EntityResolver::default();
1351        let location = Location::new(1, 1);
1352        // Default should have all 5 built-in entities
1353        assert_eq!(
1354            resolver
1355                .resolve("amp", location)
1356                .expect("test: should succeed"),
1357            "&"
1358        );
1359        assert_eq!(
1360            resolver
1361                .resolve("lt", location)
1362                .expect("test: should succeed"),
1363            "<"
1364        );
1365        assert_eq!(
1366            resolver
1367                .resolve("gt", location)
1368                .expect("test: should succeed"),
1369            ">"
1370        );
1371        assert_eq!(
1372            resolver
1373                .resolve("quot", location)
1374                .expect("test: should succeed"),
1375            "\""
1376        );
1377        assert_eq!(
1378            resolver
1379                .resolve("apos", location)
1380                .expect("test: should succeed"),
1381            "'"
1382        );
1383    }
1384
1385    #[test]
1386    fn test_xml_deeply_nested_elements() {
1387        // Test parsing with many levels of nesting
1388        let xml = r#"<?xml version="1.0"?>
1389<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1390    <fo:layout-master-set>
1391        <fo:simple-page-master master-name="p1">
1392            <fo:region-body/>
1393        </fo:simple-page-master>
1394    </fo:layout-master-set>
1395    <fo:page-sequence master-reference="p1">
1396        <fo:flow flow-name="xsl-region-body">
1397            <fo:block>
1398                <fo:inline>
1399                    <fo:inline>
1400                        <fo:inline>deep nesting</fo:inline>
1401                    </fo:inline>
1402                </fo:inline>
1403            </fo:block>
1404        </fo:flow>
1405    </fo:page-sequence>
1406</fo:root>"#;
1407
1408        let cursor = Cursor::new(xml);
1409        let mut parser = XmlParser::new(cursor);
1410        let mut error = None;
1411
1412        loop {
1413            match parser.read_event() {
1414                Ok(Event::Eof) => break,
1415                Ok(_) => {}
1416                Err(e) => {
1417                    error = Some(e);
1418                    break;
1419                }
1420            }
1421        }
1422        assert!(error.is_none(), "Deep nesting should parse without error");
1423    }
1424
1425    #[test]
1426    fn test_xml_empty_text_nodes_trimmed() {
1427        // With trim_text(true), whitespace-only text nodes are trimmed to empty
1428        let xml = r#"<?xml version="1.0"?>
1429<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
1430    <fo:layout-master-set>
1431    </fo:layout-master-set>
1432</fo:root>"#;
1433
1434        let cursor = Cursor::new(xml);
1435        let mut parser = XmlParser::new(cursor);
1436
1437        let mut non_empty_text = 0;
1438        loop {
1439            match parser.read_event() {
1440                Ok(Event::Text(ref text)) => {
1441                    let content = parser.extract_text(text).unwrap_or_default();
1442                    if !content.is_empty() {
1443                        non_empty_text += 1;
1444                    }
1445                }
1446                Ok(Event::Eof) => break,
1447                Ok(_) => {}
1448                Err(e) => panic!("Parse error: {}", e),
1449            }
1450        }
1451        // Whitespace-only nodes should be empty after trim
1452        assert_eq!(non_empty_text, 0);
1453    }
1454
1455    #[test]
1456    fn test_xml_pi_target_with_data() {
1457        let xml = r#"<?xml version="1.0"?>
1458<?fop-config key="value" other="data"?>
1459<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format"></fo:root>"#;
1460
1461        let cursor = Cursor::new(xml);
1462        let mut parser = XmlParser::new(cursor);
1463
1464        loop {
1465            match parser.read_event() {
1466                Ok(Event::Eof) => break,
1467                Ok(_) => {}
1468                Err(e) => panic!("Parse error: {}", e),
1469            }
1470        }
1471
1472        let pis = parser.processing_instructions();
1473        assert_eq!(pis.len(), 1);
1474        assert_eq!(pis[0].target, "fop-config");
1475        assert!(pis[0].data.is_some());
1476        let data = pis[0].data.as_ref().expect("test: should succeed");
1477        assert!(data.contains("key"));
1478    }
1479
1480    #[test]
1481    fn test_entity_resolver_unknown_entity_has_name_in_error() {
1482        let resolver = EntityResolver::new();
1483        let location = Location::new(5, 10);
1484        let result = resolver.resolve("nonexistent", location);
1485        assert!(result.is_err());
1486        let err = result.unwrap_err();
1487        let err_str = format!("{}", err);
1488        assert!(err_str.contains("nonexistent"));
1489    }
1490
1491    #[test]
1492    fn test_entity_resolver_invalid_hex_ref() {
1493        let resolver = EntityResolver::new();
1494        let location = Location::new(1, 1);
1495        // Non-hex characters after #x
1496        let result = resolver.resolve("#xZZZZ", location);
1497        assert!(result.is_err());
1498    }
1499
1500    #[test]
1501    fn test_entity_resolver_invalid_decimal_ref() {
1502        let resolver = EntityResolver::new();
1503        let location = Location::new(1, 1);
1504        // Non-decimal characters after #
1505        let result = resolver.resolve("#abc", location);
1506        assert!(result.is_err());
1507    }
1508}