serde_xml/
reader.rs

1//! Low-level XML reader/tokenizer.
2//!
3//! This module provides a fast, zero-copy XML tokenizer that produces events
4//! for elements, attributes, text content, and other XML constructs.
5
6use crate::error::{Error, Position, Result};
7use crate::escape::unescape;
8use memchr::{memchr, memchr2};
9use std::borrow::Cow;
10
11/// Whitespace lookup table for fast checking.
12static IS_WHITESPACE: [bool; 256] = {
13    let mut lut = [false; 256];
14    lut[b' ' as usize] = true;
15    lut[b'\t' as usize] = true;
16    lut[b'\n' as usize] = true;
17    lut[b'\r' as usize] = true;
18    lut
19};
20
21/// Name start character lookup table.
22static IS_NAME_START: [bool; 256] = {
23    let mut lut = [false; 256];
24    let mut i = b'A';
25    while i <= b'Z' {
26        lut[i as usize] = true;
27        i += 1;
28    }
29    let mut i = b'a';
30    while i <= b'z' {
31        lut[i as usize] = true;
32        i += 1;
33    }
34    lut[b'_' as usize] = true;
35    lut[b':' as usize] = true;
36    // Allow high bytes for UTF-8
37    let mut i: usize = 0x80;
38    while i < 256 {
39        lut[i] = true;
40        i += 1;
41    }
42    lut
43};
44
45/// Name character lookup table.
46static IS_NAME_CHAR: [bool; 256] = {
47    let mut lut = IS_NAME_START;
48    let mut i = b'0';
49    while i <= b'9' {
50        lut[i as usize] = true;
51        i += 1;
52    }
53    lut[b'-' as usize] = true;
54    lut[b'.' as usize] = true;
55    lut
56};
57
58/// An XML event produced by the reader.
59#[derive(Debug, Clone, PartialEq)]
60pub enum XmlEvent<'a> {
61    /// XML declaration: <?xml version="1.0"?>
62    XmlDecl {
63        /// XML version (e.g., "1.0").
64        version: Cow<'a, str>,
65        /// Character encoding (e.g., "UTF-8").
66        encoding: Option<Cow<'a, str>>,
67        /// Standalone declaration.
68        standalone: Option<bool>,
69    },
70    /// Start of an element: <name attr="value">
71    StartElement {
72        /// Element name.
73        name: Cow<'a, str>,
74        /// Element attributes.
75        attributes: Vec<Attribute<'a>>,
76    },
77    /// End of an element: </name>
78    EndElement {
79        /// Element name.
80        name: Cow<'a, str>,
81    },
82    /// Empty element: <name attr="value"/>
83    EmptyElement {
84        /// Element name.
85        name: Cow<'a, str>,
86        /// Element attributes.
87        attributes: Vec<Attribute<'a>>,
88    },
89    /// Text content between elements.
90    Text(Cow<'a, str>),
91    /// CDATA section: <![CDATA[...]]>
92    CData(Cow<'a, str>),
93    /// Comment: <!-- ... -->
94    Comment(Cow<'a, str>),
95    /// Processing instruction: <?target data?>
96    ProcessingInstruction {
97        /// Processing instruction target.
98        target: Cow<'a, str>,
99        /// Processing instruction data.
100        data: Option<Cow<'a, str>>,
101    },
102    /// End of document.
103    Eof,
104}
105
106/// An XML attribute.
107#[derive(Debug, Clone, PartialEq)]
108pub struct Attribute<'a> {
109    /// The attribute name.
110    pub name: Cow<'a, str>,
111    /// The attribute value.
112    pub value: Cow<'a, str>,
113}
114
115/// A fast, zero-copy XML reader.
116pub struct XmlReader<'a> {
117    input: &'a [u8],
118    pos: usize,
119    line: usize,
120    col: usize,
121    /// Stack of open element names for validation.
122    element_stack: Vec<String>,
123}
124
125impl<'a> XmlReader<'a> {
126    /// Creates a new XML reader from a string.
127    #[inline]
128    #[allow(clippy::should_implement_trait)]
129    pub fn from_str(s: &'a str) -> Self {
130        Self::from_bytes(s.as_bytes())
131    }
132
133    /// Creates a new XML reader from bytes.
134    #[inline]
135    pub fn from_bytes(input: &'a [u8]) -> Self {
136        Self {
137            input,
138            pos: 0,
139            line: 1,
140            col: 1,
141            element_stack: Vec::with_capacity(8), // Pre-allocate for typical nesting
142        }
143    }
144
145    /// Returns the current position in the input.
146    #[inline]
147    pub fn position(&self) -> Position {
148        Position {
149            line: self.line,
150            column: self.col,
151            offset: self.pos,
152        }
153    }
154
155    /// Returns whether there are any open elements.
156    #[inline]
157    pub fn depth(&self) -> usize {
158        self.element_stack.len()
159    }
160
161    /// Reads the next XML event.
162    #[inline]
163    pub fn next_event(&mut self) -> Result<XmlEvent<'a>> {
164        self.skip_whitespace_fast();
165
166        if self.pos >= self.input.len() {
167            if let Some(tag) = self.element_stack.pop() {
168                return Err(Error::unclosed_tag(tag).with_position(self.position()));
169            }
170            return Ok(XmlEvent::Eof);
171        }
172
173        if self.input[self.pos] == b'<' {
174            self.read_tag()
175        } else {
176            self.read_text()
177        }
178    }
179
180    /// Fast whitespace skipping using lookup table.
181    #[inline(always)]
182    fn skip_whitespace_fast(&mut self) {
183        while self.pos < self.input.len() {
184            let b = self.input[self.pos];
185            if !IS_WHITESPACE[b as usize] {
186                break;
187            }
188            if b == b'\n' {
189                self.line += 1;
190                self.col = 1;
191            } else {
192                self.col += 1;
193            }
194            self.pos += 1;
195        }
196    }
197
198    /// Reads text content using memchr for fast scanning.
199    #[inline]
200    fn read_text(&mut self) -> Result<XmlEvent<'a>> {
201        let start = self.pos;
202
203        // Fast path: find '<' using SIMD-accelerated memchr
204        match memchr(b'<', &self.input[self.pos..]) {
205            Some(offset) => {
206                // Update position tracking
207                self.update_position_for_range(self.pos, self.pos + offset);
208                self.pos += offset;
209            }
210            None => {
211                self.update_position_for_range(self.pos, self.input.len());
212                self.pos = self.input.len();
213            }
214        }
215
216        let text = std::str::from_utf8(&self.input[start..self.pos])
217            .map_err(|_| Error::new(crate::error::ErrorKind::InvalidUtf8))?;
218
219        // Trim whitespace from text
220        let trimmed = text.trim();
221        if trimmed.is_empty() {
222            return self.next_event();
223        }
224
225        // Unescape XML entities
226        match unescape(trimmed) {
227            Ok(unescaped) => Ok(XmlEvent::Text(unescaped)),
228            Err(e) => Err(Error::invalid_escape(e.entity)),
229        }
230    }
231
232    /// Updates line/column tracking for a range of bytes.
233    #[inline(always)]
234    fn update_position_for_range(&mut self, start: usize, end: usize) {
235        // Count newlines in the range
236        let slice = &self.input[start..end];
237        for &b in slice {
238            if b == b'\n' {
239                self.line += 1;
240                self.col = 1;
241            } else {
242                self.col += 1;
243            }
244        }
245    }
246
247    /// Reads a tag (element, comment, CDATA, PI, or declaration).
248    #[inline]
249    fn read_tag(&mut self) -> Result<XmlEvent<'a>> {
250        debug_assert_eq!(self.input[self.pos], b'<');
251        self.pos += 1;
252        self.col += 1;
253
254        if self.pos >= self.input.len() {
255            return Err(Error::unexpected_eof().with_position(self.position()));
256        }
257
258        match self.input[self.pos] {
259            b'/' => self.read_end_element(),
260            b'?' => self.read_processing_instruction(),
261            b'!' => self.read_special(),
262            _ => self.read_start_element(),
263        }
264    }
265
266    /// Reads a start element or empty element.
267    #[inline]
268    fn read_start_element(&mut self) -> Result<XmlEvent<'a>> {
269        let name = self.read_name()?;
270        let attributes = self.read_attributes()?;
271
272        self.skip_whitespace_fast();
273
274        if self.pos >= self.input.len() {
275            return Err(Error::unexpected_eof().with_position(self.position()));
276        }
277
278        if self.input[self.pos] == b'/' {
279            // Empty element: <name/>
280            self.pos += 1;
281            self.col += 1;
282            self.expect_char(b'>')?;
283            Ok(XmlEvent::EmptyElement {
284                name: Cow::Borrowed(name),
285                attributes,
286            })
287        } else if self.input[self.pos] == b'>' {
288            // Start element: <name>
289            self.pos += 1;
290            self.col += 1;
291            self.element_stack.push(name.to_string());
292            Ok(XmlEvent::StartElement {
293                name: Cow::Borrowed(name),
294                attributes,
295            })
296        } else {
297            Err(Error::syntax("expected '>' or '/>'").with_position(self.position()))
298        }
299    }
300
301    /// Reads an end element.
302    #[inline]
303    fn read_end_element(&mut self) -> Result<XmlEvent<'a>> {
304        debug_assert_eq!(self.input[self.pos], b'/');
305        self.pos += 1;
306        self.col += 1;
307
308        let name = self.read_name()?;
309        self.skip_whitespace_fast();
310        self.expect_char(b'>')?;
311
312        // Validate matching tags
313        match self.element_stack.pop() {
314            Some(expected) if expected == name => Ok(XmlEvent::EndElement {
315                name: Cow::Borrowed(name),
316            }),
317            Some(expected) => Err(Error::mismatched_tag(expected, name.to_string()).with_position(self.position())),
318            None => Err(Error::syntax(format!("unexpected closing tag: {}", name))
319                .with_position(self.position())),
320        }
321    }
322
323    /// Reads a processing instruction.
324    fn read_processing_instruction(&mut self) -> Result<XmlEvent<'a>> {
325        debug_assert_eq!(self.input[self.pos], b'?');
326        self.pos += 1;
327        self.col += 1;
328
329        let target = self.read_name()?;
330
331        // Check for XML declaration
332        if target.eq_ignore_ascii_case("xml") {
333            return self.read_xml_decl();
334        }
335
336        self.skip_whitespace_fast();
337
338        // Read data until ?> using memchr for speed
339        let data_start = self.pos;
340
341        while self.pos + 1 < self.input.len() {
342            if let Some(offset) = memchr(b'?', &self.input[self.pos..]) {
343                let check_pos = self.pos + offset;
344                if check_pos + 1 < self.input.len() && self.input[check_pos + 1] == b'>' {
345                    self.update_position_for_range(self.pos, check_pos);
346                    self.pos = check_pos;
347
348                    let data = std::str::from_utf8(&self.input[data_start..self.pos])
349                        .map_err(|_| Error::new(crate::error::ErrorKind::InvalidUtf8))?;
350                    self.pos += 2;
351                    self.col += 2;
352                    return Ok(XmlEvent::ProcessingInstruction {
353                        target: Cow::Borrowed(target),
354                        data: if data.trim().is_empty() {
355                            None
356                        } else {
357                            Some(Cow::Borrowed(data.trim()))
358                        },
359                    });
360                }
361                // Not the end, continue searching
362                self.update_position_for_range(self.pos, check_pos + 1);
363                self.pos = check_pos + 1;
364            } else {
365                break;
366            }
367        }
368
369        Err(Error::syntax("unterminated processing instruction").with_position(self.position()))
370    }
371
372    /// Reads an XML declaration.
373    fn read_xml_decl(&mut self) -> Result<XmlEvent<'a>> {
374        let attributes = self.read_attributes()?;
375        self.skip_whitespace_fast();
376
377        if self.pos + 1 >= self.input.len()
378            || self.input[self.pos] != b'?'
379            || self.input[self.pos + 1] != b'>'
380        {
381            return Err(Error::syntax("expected '?>'").with_position(self.position()));
382        }
383        self.pos += 2;
384        self.col += 2;
385
386        let mut version = None;
387        let mut encoding = None;
388        let mut standalone = None;
389
390        for attr in attributes {
391            match attr.name.as_ref() {
392                "version" => version = Some(attr.value),
393                "encoding" => encoding = Some(attr.value),
394                "standalone" => {
395                    standalone = Some(attr.value.as_ref() == "yes");
396                }
397                _ => {}
398            }
399        }
400
401        Ok(XmlEvent::XmlDecl {
402            version: version.unwrap_or(Cow::Borrowed("1.0")),
403            encoding,
404            standalone,
405        })
406    }
407
408    /// Reads special constructs (comments, CDATA, DOCTYPE).
409    fn read_special(&mut self) -> Result<XmlEvent<'a>> {
410        debug_assert_eq!(self.input[self.pos], b'!');
411        self.pos += 1;
412        self.col += 1;
413
414        if self.pos >= self.input.len() {
415            return Err(Error::unexpected_eof().with_position(self.position()));
416        }
417
418        // Check for comment: <!--
419        if self.pos + 1 < self.input.len()
420            && self.input[self.pos] == b'-'
421            && self.input[self.pos + 1] == b'-'
422        {
423            return self.read_comment();
424        }
425
426        // Check for CDATA: <![CDATA[
427        if self.pos + 6 < self.input.len() && &self.input[self.pos..self.pos + 7] == b"[CDATA[" {
428            return self.read_cdata();
429        }
430
431        // Check for DOCTYPE
432        if self.pos + 6 < self.input.len() && self.input[self.pos..].starts_with(b"DOCTYPE") {
433            return self.skip_doctype();
434        }
435
436        Err(Error::syntax("unknown construct after '<!'").with_position(self.position()))
437    }
438
439    /// Reads a comment using memchr for fast end detection.
440    fn read_comment(&mut self) -> Result<XmlEvent<'a>> {
441        self.pos += 2; // Skip --
442        self.col += 2;
443        let start = self.pos;
444
445        // Search for --> using memchr
446        while self.pos + 2 < self.input.len() {
447            if let Some(offset) = memchr(b'-', &self.input[self.pos..]) {
448                let check_pos = self.pos + offset;
449                if check_pos + 2 < self.input.len()
450                    && self.input[check_pos + 1] == b'-'
451                    && self.input[check_pos + 2] == b'>'
452                {
453                    self.update_position_for_range(self.pos, check_pos);
454                    let comment = std::str::from_utf8(&self.input[start..check_pos])
455                        .map_err(|_| Error::new(crate::error::ErrorKind::InvalidUtf8))?;
456                    self.pos = check_pos + 3;
457                    self.col += 3;
458                    return Ok(XmlEvent::Comment(Cow::Borrowed(comment.trim())));
459                }
460                self.update_position_for_range(self.pos, check_pos + 1);
461                self.pos = check_pos + 1;
462            } else {
463                break;
464            }
465        }
466
467        Err(Error::syntax("unterminated comment").with_position(self.position()))
468    }
469
470    /// Reads a CDATA section using memchr for fast end detection.
471    fn read_cdata(&mut self) -> Result<XmlEvent<'a>> {
472        self.pos += 7; // Skip [CDATA[
473        self.col += 7;
474        let start = self.pos;
475
476        // Search for ]]> using memchr
477        while self.pos + 2 < self.input.len() {
478            if let Some(offset) = memchr(b']', &self.input[self.pos..]) {
479                let check_pos = self.pos + offset;
480                if check_pos + 2 < self.input.len()
481                    && self.input[check_pos + 1] == b']'
482                    && self.input[check_pos + 2] == b'>'
483                {
484                    self.update_position_for_range(self.pos, check_pos);
485                    let data = std::str::from_utf8(&self.input[start..check_pos])
486                        .map_err(|_| Error::new(crate::error::ErrorKind::InvalidUtf8))?;
487                    self.pos = check_pos + 3;
488                    self.col += 3;
489                    return Ok(XmlEvent::CData(Cow::Borrowed(data)));
490                }
491                self.update_position_for_range(self.pos, check_pos + 1);
492                self.pos = check_pos + 1;
493            } else {
494                break;
495            }
496        }
497
498        Err(Error::syntax("unterminated CDATA section").with_position(self.position()))
499    }
500
501    /// Skips a DOCTYPE declaration.
502    fn skip_doctype(&mut self) -> Result<XmlEvent<'a>> {
503        let mut depth = 1;
504
505        while self.pos < self.input.len() && depth > 0 {
506            // Use memchr2 to find < or > quickly
507            if let Some(offset) = memchr2(b'<', b'>', &self.input[self.pos..]) {
508                self.update_position_for_range(self.pos, self.pos + offset);
509                self.pos += offset;
510
511                match self.input[self.pos] {
512                    b'<' => depth += 1,
513                    b'>' => depth -= 1,
514                    _ => {}
515                }
516                self.col += 1;
517                self.pos += 1;
518            } else {
519                self.update_position_for_range(self.pos, self.input.len());
520                self.pos = self.input.len();
521                break;
522            }
523        }
524
525        // Skip to next event
526        self.next_event()
527    }
528
529    /// Reads an XML name using lookup table.
530    #[inline]
531    fn read_name(&mut self) -> Result<&'a str> {
532        let start = self.pos;
533
534        // First character must be a name start char
535        if self.pos >= self.input.len() {
536            return Err(Error::unexpected_eof().with_position(self.position()));
537        }
538
539        let first = self.input[self.pos];
540        if !IS_NAME_START[first as usize] {
541            return Err(Error::invalid_name(format!("invalid name start character: {:?}", first as char))
542                .with_position(self.position()));
543        }
544        self.pos += 1;
545        self.col += 1;
546
547        // Subsequent characters - use lookup table
548        while self.pos < self.input.len() && IS_NAME_CHAR[self.input[self.pos] as usize] {
549            self.pos += 1;
550            self.col += 1;
551        }
552
553        std::str::from_utf8(&self.input[start..self.pos])
554            .map_err(|_| Error::new(crate::error::ErrorKind::InvalidUtf8))
555    }
556
557    /// Reads element attributes with pre-allocated vector.
558    #[inline]
559    fn read_attributes(&mut self) -> Result<Vec<Attribute<'a>>> {
560        let mut attributes = Vec::with_capacity(4); // Pre-allocate for typical case
561
562        loop {
563            self.skip_whitespace_fast();
564
565            if self.pos >= self.input.len() {
566                break;
567            }
568
569            // Check for end of attributes
570            let c = self.input[self.pos];
571            if c == b'>' || c == b'/' || c == b'?' {
572                break;
573            }
574
575            // Read attribute name
576            let name = self.read_name()?;
577            self.skip_whitespace_fast();
578
579            // Expect '='
580            self.expect_char(b'=')?;
581            self.skip_whitespace_fast();
582
583            // Read attribute value
584            let value = self.read_attribute_value()?;
585
586            attributes.push(Attribute {
587                name: Cow::Borrowed(name),
588                value,
589            });
590        }
591
592        Ok(attributes)
593    }
594
595    /// Reads an attribute value using memchr for fast quote finding.
596    #[inline]
597    fn read_attribute_value(&mut self) -> Result<Cow<'a, str>> {
598        if self.pos >= self.input.len() {
599            return Err(Error::unexpected_eof().with_position(self.position()));
600        }
601
602        let quote = self.input[self.pos];
603        if quote != b'"' && quote != b'\'' {
604            return Err(Error::syntax("expected quote").with_position(self.position()));
605        }
606        self.pos += 1;
607        self.col += 1;
608
609        let start = self.pos;
610
611        // Find closing quote using memchr
612        match memchr(quote, &self.input[self.pos..]) {
613            Some(offset) => {
614                let value = std::str::from_utf8(&self.input[start..self.pos + offset])
615                    .map_err(|_| Error::new(crate::error::ErrorKind::InvalidUtf8))?;
616                self.pos += offset + 1;
617                self.col += offset + 1;
618
619                // Unescape the value
620                match unescape(value) {
621                    Ok(unescaped) => Ok(unescaped),
622                    Err(e) => Err(Error::invalid_escape(e.entity)),
623                }
624            }
625            None => Err(Error::syntax("unterminated attribute value").with_position(self.position())),
626        }
627    }
628
629    /// Expects a specific character.
630    #[inline(always)]
631    fn expect_char(&mut self, expected: u8) -> Result<()> {
632        if self.pos >= self.input.len() {
633            return Err(Error::unexpected_eof().with_position(self.position()));
634        }
635
636        if self.input[self.pos] != expected {
637            return Err(Error::syntax(format!(
638                "expected '{}', found '{}'",
639                expected as char,
640                self.input[self.pos] as char
641            ))
642            .with_position(self.position()));
643        }
644
645        self.pos += 1;
646        self.col += 1;
647        Ok(())
648    }
649}
650
651#[cfg(test)]
652mod tests {
653    use super::*;
654
655    #[test]
656    fn test_simple_element() {
657        let mut reader = XmlReader::from_str("<root></root>");
658
659        match reader.next_event().unwrap() {
660            XmlEvent::StartElement { name, attributes } => {
661                assert_eq!(name, "root");
662                assert!(attributes.is_empty());
663            }
664            _ => panic!("expected StartElement"),
665        }
666
667        match reader.next_event().unwrap() {
668            XmlEvent::EndElement { name } => {
669                assert_eq!(name, "root");
670            }
671            _ => panic!("expected EndElement"),
672        }
673
674        assert!(matches!(reader.next_event().unwrap(), XmlEvent::Eof));
675    }
676
677    #[test]
678    fn test_empty_element() {
679        let mut reader = XmlReader::from_str("<root/>");
680
681        match reader.next_event().unwrap() {
682            XmlEvent::EmptyElement { name, attributes } => {
683                assert_eq!(name, "root");
684                assert!(attributes.is_empty());
685            }
686            _ => panic!("expected EmptyElement"),
687        }
688
689        assert!(matches!(reader.next_event().unwrap(), XmlEvent::Eof));
690    }
691
692    #[test]
693    fn test_attributes() {
694        let mut reader = XmlReader::from_str(r#"<root id="1" name="test"/>"#);
695
696        match reader.next_event().unwrap() {
697            XmlEvent::EmptyElement { name, attributes } => {
698                assert_eq!(name, "root");
699                assert_eq!(attributes.len(), 2);
700                assert_eq!(attributes[0].name, "id");
701                assert_eq!(attributes[0].value, "1");
702                assert_eq!(attributes[1].name, "name");
703                assert_eq!(attributes[1].value, "test");
704            }
705            _ => panic!("expected EmptyElement"),
706        }
707    }
708
709    #[test]
710    fn test_text_content() {
711        let mut reader = XmlReader::from_str("<root>Hello, World!</root>");
712
713        reader.next_event().unwrap(); // StartElement
714
715        match reader.next_event().unwrap() {
716            XmlEvent::Text(text) => {
717                assert_eq!(text, "Hello, World!");
718            }
719            _ => panic!("expected Text"),
720        }
721    }
722
723    #[test]
724    fn test_escaped_text() {
725        let mut reader = XmlReader::from_str("<root>&lt;Hello&gt;</root>");
726
727        reader.next_event().unwrap(); // StartElement
728
729        match reader.next_event().unwrap() {
730            XmlEvent::Text(text) => {
731                assert_eq!(text, "<Hello>");
732            }
733            _ => panic!("expected Text"),
734        }
735    }
736
737    #[test]
738    fn test_xml_declaration() {
739        let mut reader = XmlReader::from_str(r#"<?xml version="1.0" encoding="UTF-8"?><root/>"#);
740
741        match reader.next_event().unwrap() {
742            XmlEvent::XmlDecl { version, encoding, standalone } => {
743                assert_eq!(version, "1.0");
744                assert_eq!(encoding.as_deref(), Some("UTF-8"));
745                assert_eq!(standalone, None);
746            }
747            _ => panic!("expected XmlDecl"),
748        }
749    }
750
751    #[test]
752    fn test_comment() {
753        let mut reader = XmlReader::from_str("<!-- This is a comment --><root/>");
754
755        match reader.next_event().unwrap() {
756            XmlEvent::Comment(comment) => {
757                assert_eq!(comment, "This is a comment");
758            }
759            _ => panic!("expected Comment"),
760        }
761    }
762
763    #[test]
764    fn test_cdata() {
765        let mut reader = XmlReader::from_str("<root><![CDATA[<special>content</special>]]></root>");
766
767        reader.next_event().unwrap(); // StartElement
768
769        match reader.next_event().unwrap() {
770            XmlEvent::CData(data) => {
771                assert_eq!(data, "<special>content</special>");
772            }
773            _ => panic!("expected CData"),
774        }
775    }
776
777    #[test]
778    fn test_nested_elements() {
779        let xml = r#"<root><child1><grandchild/></child1><child2/></root>"#;
780        let mut reader = XmlReader::from_str(xml);
781
782        let events: Vec<_> = std::iter::from_fn(|| {
783            match reader.next_event() {
784                Ok(XmlEvent::Eof) => None,
785                Ok(event) => Some(event),
786                Err(_) => None,
787            }
788        }).collect();
789
790        assert_eq!(events.len(), 6);
791    }
792
793    #[test]
794    fn test_mismatched_tags() {
795        let mut reader = XmlReader::from_str("<root></wrong>");
796        reader.next_event().unwrap(); // StartElement
797        assert!(reader.next_event().is_err());
798    }
799
800    #[test]
801    fn test_unclosed_tag() {
802        let mut reader = XmlReader::from_str("<root>");
803        reader.next_event().unwrap(); // StartElement
804        assert!(reader.next_event().is_err());
805    }
806
807    #[test]
808    fn test_processing_instruction() {
809        let mut reader = XmlReader::from_str("<?target data?><root/>");
810
811        match reader.next_event().unwrap() {
812            XmlEvent::ProcessingInstruction { target, data } => {
813                assert_eq!(target, "target");
814                assert_eq!(data.as_deref(), Some("data"));
815            }
816            _ => panic!("expected ProcessingInstruction"),
817        }
818    }
819
820    #[test]
821    fn test_attribute_with_single_quotes() {
822        let mut reader = XmlReader::from_str("<root attr='value'/>");
823
824        match reader.next_event().unwrap() {
825            XmlEvent::EmptyElement { attributes, .. } => {
826                assert_eq!(attributes[0].value, "value");
827            }
828            _ => panic!("expected EmptyElement"),
829        }
830    }
831
832    #[test]
833    fn test_position_tracking() {
834        let xml = "<root>\n  <child/>\n</root>";
835        let mut reader = XmlReader::from_str(xml);
836
837        reader.next_event().unwrap(); // <root>
838        reader.next_event().unwrap(); // <child/>
839
840        let pos = reader.position();
841        assert!(pos.line >= 2);
842    }
843
844    #[test]
845    fn test_depth_tracking() {
846        let mut reader = XmlReader::from_str("<a><b><c></c></b></a>");
847
848        assert_eq!(reader.depth(), 0);
849        reader.next_event().unwrap(); // <a>
850        assert_eq!(reader.depth(), 1);
851        reader.next_event().unwrap(); // <b>
852        assert_eq!(reader.depth(), 2);
853        reader.next_event().unwrap(); // <c>
854        assert_eq!(reader.depth(), 3);
855        reader.next_event().unwrap(); // </c>
856        assert_eq!(reader.depth(), 2);
857    }
858}