Skip to main content

xsd_schema/parser/
reader.rs

1//! Tracked XML reader
2//!
3//! Wraps quick-xml Reader with byte position tracking for source mapping.
4
5use quick_xml::events::Event;
6use quick_xml::Reader;
7use std::io::BufRead;
8
9use crate::error::{SchemaError, SchemaResult};
10use crate::parser::location::SourceSpan;
11
12/// XML event with source span
13#[derive(Debug)]
14pub struct TrackedEvent<'a> {
15    /// The XML event
16    pub event: Event<'a>,
17    /// Byte span in source
18    pub span: SourceSpan,
19}
20
21impl<'a> TrackedEvent<'a> {
22    /// Create a new tracked event
23    pub fn new(event: Event<'a>, span: SourceSpan) -> Self {
24        Self { event, span }
25    }
26
27    /// Check if this is a start element event
28    pub fn is_start(&self) -> bool {
29        matches!(self.event, Event::Start(_))
30    }
31
32    /// Check if this is an empty element event
33    pub fn is_empty(&self) -> bool {
34        matches!(self.event, Event::Empty(_))
35    }
36
37    /// Check if this is an end element event
38    pub fn is_end(&self) -> bool {
39        matches!(self.event, Event::End(_))
40    }
41
42    /// Check if this is a text event
43    pub fn is_text(&self) -> bool {
44        matches!(self.event, Event::Text(_))
45    }
46
47    /// Check if this is an EOF event
48    pub fn is_eof(&self) -> bool {
49        matches!(self.event, Event::Eof)
50    }
51}
52
53/// Tracked XML reader that wraps quick-xml with position tracking
54///
55/// This reader provides byte spans for all XML events, enabling accurate
56/// source location tracking for error messages.
57pub struct TrackedReader<R> {
58    /// The underlying quick-xml reader
59    reader: Reader<R>,
60    /// Current buffer position (before event)
61    last_position: usize,
62}
63
64impl<'a> TrackedReader<&'a [u8]> {
65    /// Create a new reader from a byte slice
66    pub fn from_bytes(bytes: &'a [u8]) -> Self {
67        let mut reader = Reader::from_reader(bytes);
68        reader.trim_text(true);
69
70        Self {
71            reader,
72            last_position: 0,
73        }
74    }
75}
76
77impl<R: BufRead> TrackedReader<R> {
78    /// Create a new reader from a BufRead source
79    pub fn from_reader(reader: R) -> Self {
80        let mut xml_reader = Reader::from_reader(reader);
81        xml_reader.trim_text(true);
82
83        Self {
84            reader: xml_reader,
85            last_position: 0,
86        }
87    }
88
89    /// Read the next XML event with its source span
90    pub fn read_event<'b>(&mut self, buf: &'b mut Vec<u8>) -> SchemaResult<TrackedEvent<'b>> {
91        let start = self.reader.buffer_position();
92        self.last_position = start;
93
94        let event = self.reader.read_event_into(buf).map_err(|e| {
95            SchemaError::XmlError {
96                message: e.to_string(),
97                location: None, // Will be filled in by caller with proper source mapping
98            }
99        })?;
100
101        let end = self.reader.buffer_position();
102        let span = SourceSpan { start, end };
103
104        Ok(TrackedEvent::new(event, span))
105    }
106
107    /// Get the current buffer position
108    pub fn buffer_position(&self) -> usize {
109        self.reader.buffer_position()
110    }
111
112    /// Get the last event's start position
113    pub fn last_position(&self) -> usize {
114        self.last_position
115    }
116
117    /// Get a reference to the underlying reader for decoding
118    pub fn inner(&self) -> &Reader<R> {
119        &self.reader
120    }
121}
122
123/// Extract local name and prefix from a qualified name
124pub fn split_qname(qname: &[u8]) -> (&[u8], Option<&[u8]>) {
125    match qname.iter().position(|&b| b == b':') {
126        Some(pos) => (&qname[pos + 1..], Some(&qname[..pos])),
127        None => (qname, None),
128    }
129}
130
131/// Configuration for XML parsing
132#[derive(Debug, Clone)]
133pub struct ReaderConfig {
134    /// Trim whitespace in text nodes
135    pub trim_text: bool,
136    /// Check for duplicate attributes
137    pub check_duplicates: bool,
138}
139
140impl Default for ReaderConfig {
141    fn default() -> Self {
142        Self {
143            trim_text: true,
144            check_duplicates: true,
145        }
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    #[test]
154    fn test_tracked_reader_basic() {
155        let xml = b"<root><child/></root>";
156        let mut reader = TrackedReader::from_bytes(xml);
157        let mut buf = Vec::new();
158
159        // First event: Start element <root>
160        let event = reader.read_event(&mut buf).unwrap();
161        assert!(event.is_start());
162
163        // Second event: Empty element <child/>
164        buf.clear();
165        let event = reader.read_event(&mut buf).unwrap();
166        assert!(event.is_empty());
167
168        // Third event: End element </root>
169        buf.clear();
170        let event = reader.read_event(&mut buf).unwrap();
171        assert!(event.is_end());
172
173        // Fourth event: EOF
174        buf.clear();
175        let event = reader.read_event(&mut buf).unwrap();
176        assert!(event.is_eof());
177    }
178
179    #[test]
180    fn test_tracked_reader_spans() {
181        let xml = b"<root>text</root>";
182        let mut reader = TrackedReader::from_bytes(xml);
183        let mut buf = Vec::new();
184
185        // Start element span
186        let event = reader.read_event(&mut buf).unwrap();
187        assert!(event.span.start == 0);
188
189        // Text span
190        buf.clear();
191        let event = reader.read_event(&mut buf).unwrap();
192        assert!(event.is_text());
193        assert!(event.span.start > 0);
194    }
195
196    #[test]
197    fn test_split_qname() {
198        assert_eq!(split_qname(b"localName"), (&b"localName"[..], None));
199        assert_eq!(
200            split_qname(b"xs:element"),
201            (&b"element"[..], Some(&b"xs"[..]))
202        );
203        assert_eq!(split_qname(b"xsi:nil"), (&b"nil"[..], Some(&b"xsi"[..])));
204    }
205
206    #[test]
207    fn test_tracked_event_type_checks() {
208        let xml = b"<root/>";
209        let mut reader = TrackedReader::from_bytes(xml);
210        let mut buf = Vec::new();
211
212        let event = reader.read_event(&mut buf).unwrap();
213        assert!(event.is_empty());
214        assert!(!event.is_start());
215        assert!(!event.is_end());
216        assert!(!event.is_text());
217        assert!(!event.is_eof());
218    }
219
220    #[test]
221    fn test_reader_config_default() {
222        let config = ReaderConfig::default();
223        assert!(config.trim_text);
224        assert!(config.check_duplicates);
225    }
226}