1use embedded_io::Error;
2
3use crate::Result;
4use crate::attributes::AttributeReader;
5use crate::events::Event;
6
7use core::ops::Range;
8
9#[cfg(test)]
10extern crate std;
11
12macro_rules! trace {
13 ($($arg:tt)*) => {
14 #[cfg(feature = "log")]
15 log::trace!($($arg)*);
16 #[cfg(test)]
17 std::eprintln!($($arg)*);
18 };
19}
20
21pub struct Reader<R, Buffer> {
24 reader: R,
25 remaining: usize,
26 buffer: Buffer,
27 pos: usize,
28 end: usize,
29 at_start: bool,
30 self_closing: Option<Range<usize>>,
31}
32
33impl<'a, R: embedded_io::Read> Reader<R, &'a mut [u8]> {
34 pub fn new_borrowed(reader: R, total_size: usize, buffer: &'a mut [u8]) -> Result<Self> {
46 Self::new_with_read(reader, total_size, buffer)
47 }
48}
49
50#[cfg(feature = "alloc")]
51impl<R: embedded_io::Read> Reader<R, alloc::vec::Vec<u8>> {
52 pub fn new(reader: R, total_size: usize, buffer_size: usize) -> Result<Self> {
63 let buffer = alloc::vec![0; buffer_size];
64 Self::new_with_read(reader, total_size, buffer)
65 }
66}
67
68impl<R: embedded_io::Read, Buffer: AsRef<[u8]> + AsMut<[u8]>> Reader<R, Buffer> {
69 fn new_with_read(mut reader: R, total_size: usize, mut buffer: Buffer) -> Result<Self> {
70 let end = reader
71 .read(buffer.as_mut())
72 .map_err(|e| crate::Error::IoError(e.kind()))?;
73 let remaining = total_size - end;
74 Ok(Reader {
75 reader,
76 remaining,
77 buffer,
78 pos: 0,
79 end,
80 at_start: true,
81 self_closing: None,
82 })
83 }
84
85 pub fn next_event(&mut self) -> Result<Event<'_>> {
116 if self.at_start {
119 self.at_start = false;
120 let (start, end) = self.try_find("<?xml", "?>")?;
121 let block = core::str::from_utf8(&self.buffer.as_ref()[start..end])?;
122 let attrs = AttributeReader::from_block(block);
123 self.pos = end + 2;
124 return Ok(Event::Declaration { attrs });
125 };
126
127 if self.pos == self.end && self.remaining == 0 {
128 trace!("Pos = End");
129 return Ok(Event::EndOfFile);
130 }
131
132 if let Some(range) = self.self_closing.take() {
133 let block = &self.buffer.as_ref()[range].trim_ascii();
134 let name = core::str::from_utf8(block)?
135 .split_ascii_whitespace()
136 .next()
137 .ok_or(crate::Error::InvalidState)?;
138 return Ok(Event::EndElement { name });
139 }
140
141 let curr_end = match self.try_find_start("<") {
142 Ok(pos) => pos,
143 Err(crate::Error::Eof) => return Ok(Event::EndOfFile),
144 Err(e) => return Err(e),
145 };
146
147 let curr = self.buffer()[..curr_end].trim_ascii();
148 if !curr.is_empty() {
149 let block = self.buffer.as_ref()[self.pos..self.pos + curr_end].trim_ascii();
150 let content = core::str::from_utf8(block)?;
151 self.pos += curr_end;
152 return Ok(Event::Text { content });
153 }
154
155 self.pos += curr_end;
156 match self.ensure(3) {
157 Ok(()) => {}
158 Err(crate::Error::Eof) => {
159 return Ok(Event::EndOfFile);
160 }
161 Err(e) => return Err(e),
162 };
163
164 enum BlockType {
165 Cdata,
166 Comment,
167 Dtd,
168 PI,
169 EndElement,
170 StartElement,
171 }
172
173 let b = self.buffer();
174 let (ty, n_start, n_end) = match (b[1], b[2]) {
175 (b'!', b'[') => (BlockType::Cdata, "<![CDATA[", "]]>"),
176 (b'!', b'-') => (BlockType::Comment, "<!--", "-->"),
177 (b'!', _) => (BlockType::Dtd, "<!", ">"),
178 (b'?', _) => (BlockType::PI, "<?", "?>"),
179 (b'/', _) => (BlockType::EndElement, "</", ">"),
180 (_, _) => (BlockType::StartElement, "<", ">"),
181 };
182
183 let (start, end) = self.try_find(n_start, n_end)?;
184
185 let range = if matches!(ty, BlockType::StartElement) && self.buffer()[end - 1] == b'/' {
186 let range = self.pos + start..self.pos + end - 1;
187 self.self_closing = Some(range.clone());
188 range
189 } else {
190 self.pos + start..self.pos + end
191 };
192
193 let block = &self.buffer.as_ref()[range].trim_ascii();
194
195 let event = match ty {
196 BlockType::Cdata => Event::CDATA { data: block },
197 BlockType::Comment => Event::Comment {
198 content: core::str::from_utf8(block)?,
199 },
200 BlockType::Dtd => Event::Dtd {
201 content: core::str::from_utf8(block)?,
202 },
203 BlockType::PI => {
204 let (name, attrs) = Self::name_and_attrs(block)?;
205 Event::ProcessingInstruction { name, attrs }
206 }
207 BlockType::EndElement => Event::EndElement {
208 name: core::str::from_utf8(block)?,
209 },
210 BlockType::StartElement => {
211 let (name, attrs) = Self::name_and_attrs(block)?;
212 Event::StartElement { name, attrs }
213 }
214 };
215 self.pos += end + n_end.len();
216 Ok(event)
217 }
218 pub fn name_and_attrs(block: &[u8]) -> Result<(&str, AttributeReader<'_>)> {
219 let block = core::str::from_utf8(block)?;
220 let mut split = block.split_ascii_whitespace();
221 let name = split.next().unwrap_or("");
222 Ok((name, AttributeReader::from_split(split)))
223 }
224
225 fn advance(&mut self, offset: usize) -> Result<()> {
228 trace!(
229 "Advancing by {offset} bytes (remaining: {})",
230 self.remaining
231 );
232 if self.remaining == 0 {
233 return Err(crate::Error::Eof);
234 }
235 assert!(offset <= self.end);
236 assert!(offset <= self.buffer.as_ref().len());
237 trace!("Copying {} bytes to start of buffer", self.end - offset);
238 for i in offset..self.end {
239 self.buffer.as_mut()[i - offset] = self.buffer.as_ref()[i];
240 }
241 self.pos = 0;
242 self.end -= offset;
243 let data_start = self.buffer.as_ref().len() - offset;
244 let read_bytes = self
245 .reader
246 .read(&mut self.buffer.as_mut()[data_start..])
247 .map_err(|e| crate::Error::IoError(e.kind()))?;
248 self.end += read_bytes;
249 self.remaining -= read_bytes;
250 trace!(
251 "Read {read_bytes} bytes, new buffer len: {}, remaining: {}",
252 self.buffer().len(),
253 self.remaining
254 );
255 Ok(())
256 }
257
258 fn ensure(&mut self, size: usize) -> Result<()> {
260 trace!("Ensuring {size} bytes (remaining: {})", self.remaining);
261 let available = self.buffer().len();
262 if available >= size {
263 return Ok(());
264 }
265 if available + self.remaining < size {
266 return Err(crate::Error::Eof);
267 }
268 self.advance(self.pos)
269 }
270
271 fn try_find(&mut self, n_start: &str, n_end: &str) -> Result<(usize, usize)> {
275 trace!(
276 "Trying to find '{n_start}' and '{n_end}' (remaining: {})",
277 self.remaining
278 );
279 let n_start = n_start.as_bytes();
280 let n_end = n_end.as_bytes();
281 match find_span(self.buffer(), n_start, n_end) {
282 Some((start, Some(end))) => Ok((start, end)),
283 Some((start, None)) => {
284 self.advance(self.pos + start)?;
285 let Some(end) = memchr::memmem::find(self.buffer(), n_end) else {
286 return Err(crate::Error::Eof);
287 };
288 Ok((0, end))
289 }
290 None => {
291 self.advance(self.buffer.as_ref().len())?;
292 let Some((start, Some(end))) = find_span(self.buffer(), n_start, n_end) else {
293 return Err(crate::Error::Eof);
294 };
295 Ok((start, end))
296 }
297 }
298 }
299
300 fn try_find_start(&mut self, n_start: &str) -> Result<usize> {
303 trace!(
304 "Trying to find start '{n_start}' (pos: {}, remaining: {})",
305 self.pos, self.remaining
306 );
307 let n_start = n_start.as_bytes();
308 match memchr::memmem::find(self.buffer(), n_start) {
309 Some(pos) => Ok(pos),
310 None => {
311 self.advance(self.pos)?;
312 let Some(pos) = memchr::memmem::find(self.buffer(), n_start) else {
313 trace!("Needle not found!");
314 return Err(crate::Error::Eof);
315 };
316 Ok(pos)
317 }
318 }
319 }
320
321 fn buffer(&self) -> &[u8] {
322 &self.buffer.as_ref()[self.pos..self.end]
323 }
324}
325
326fn find_span(buffer: &[u8], start: &[u8], end: &[u8]) -> Option<(usize, Option<usize>)> {
327 let start = memchr::memmem::find(buffer, start)? + start.len();
328 let end = memchr::memmem::find(&buffer[start..], end).map(|pos| pos + start);
329 Some((start, end))
330}
331
332#[cfg(test)]
333#[rustfmt::skip]
334mod tests {
335 extern crate std;
336
337 use crate::*;
338 use super::*;
339
340 const LOREM: &str = "\
341 Lorem ipsum dolor sit amet, consetetur sadipscing elitr,seddiam \
342 nonumy eirmod tempor invidunt ut labore et dolore magna aliquya \
343 erat, sed diam voluptua. At vero eos et accusam et justo duo do \
344 ores et ea rebum. Stet clita kasd gubergren, no sea takimata sa \
345 ctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet,\
346 consetetur sadipscing elitr, sed diam nonumy eirmod tempor invid\
347 unt ut labore et dolore magna aliquyam erat, sed diam voluptua. \
348 At vero eos et accusam et justo duo dolores et ea rebum. Stet cl";
349
350 #[test]
351 #[cfg(feature = "alloc")]
352 fn test_window() {
353 let data = LOREM.as_bytes();
354 let mut buffer = data;
355 let mut parser = Reader::new(&mut buffer, data.len(), 256).unwrap();
356 assert_eq!(parser.buffer(), &data[..256]);
357 parser.advance(256).unwrap();
358 assert_eq!(parser.buffer(), &data[256..]);
359 }
360
361 #[test]
362 fn test_needle_range() {
363 let xml = "\
364 <root>\
365 <child>Text</child>\
366 <child>More text</child>\
367 </root>";
368 let data = xml.as_bytes();
369
370 let Some((start, Some(end))) = find_span(data, b"<", b">") else {
371 panic!("Failed to find span");
372 };
373 assert_eq!(&xml[start..end], "root");
374
375 let Some((start, Some(end))) = find_span(data, b"<child>", b"</child>") else {
376 panic!("Failed to find span");
377 };
378 assert_eq!(&xml[start..end], "Text");
379 }
380
381 #[test]
382 #[cfg(feature = "alloc")]
383 fn test_find() {
384 fn find_str<'a>(
385 parser: &'a mut OwnedReader<&'_ [u8]>,
386 n_start: &str,
387 n_end: &str,
388 ) -> Result<&'a str> {
389 let (start, end) = parser.try_find(n_start, n_end)?;
390 Ok(core::str::from_utf8(&parser.buffer[start..end])?)
391 }
392
393 let data = LOREM.as_bytes();
394 let buffer = data;
395 let mut parser = Reader::new(buffer, data.len(), 256).unwrap();
396 let ipsum = find_str(&mut parser, "Lorem ", " dolor").unwrap();
397 assert_eq!(ipsum, "ipsum");
398 let aliquyam = find_str(&mut parser, "no sea takimata ", " ctus est").unwrap();
399 assert_eq!(aliquyam, "sa");
400 assert_eq!(parser.buffer(), &data[253..509]);
401 }
402}