use crate::encoding::decode_to_utf8;
use crate::error::{ParseError, SourceLocation};
use crate::parser::ParseOptions;
use crate::tree::Document;
pub struct PushParser {
buffer: Vec<u8>,
options: ParseOptions,
finished: bool,
}
impl PushParser {
#[must_use]
pub fn new() -> Self {
Self {
buffer: Vec::new(),
options: ParseOptions::default(),
finished: false,
}
}
#[must_use]
pub fn with_options(options: ParseOptions) -> Self {
Self {
buffer: Vec::new(),
options,
finished: false,
}
}
pub fn push(&mut self, data: &[u8]) {
assert!(
!self.finished,
"push() called after finish() — parser has already been consumed"
);
self.buffer.extend_from_slice(data);
}
pub fn finish(mut self) -> Result<Document, ParseError> {
self.finished = true;
let utf8 = decode_to_utf8(&self.buffer).map_err(|e| ParseError {
message: e.message,
location: SourceLocation::default(),
diagnostics: Vec::new(),
})?;
crate::parser::parse_str_with_options(&utf8, &self.options)
}
#[must_use]
pub fn buffered_bytes(&self) -> usize {
self.buffer.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.buffer.is_empty()
}
pub fn reset(&mut self) {
self.buffer.clear();
self.finished = false;
}
}
impl Default for PushParser {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Debug for PushParser {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PushParser")
.field("buffered_bytes", &self.buffer.len())
.field("options", &self.options)
.field("finished", &self.finished)
.finish()
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn test_push_parser_single_chunk() {
let mut parser = PushParser::new();
parser.push(b"<root/>");
let doc = parser.finish().unwrap();
let root = doc.root_element().unwrap();
assert_eq!(doc.node_name(root), Some("root"));
}
#[test]
fn test_push_parser_multiple_chunks() {
let mut parser = PushParser::new();
parser.push(b"<root>");
parser.push(b"<child>text</child>");
parser.push(b"</root>");
let doc = parser.finish().unwrap();
let root = doc.root_element().unwrap();
assert_eq!(doc.node_name(root), Some("root"));
let child = doc.first_child(root).unwrap();
assert_eq!(doc.node_name(child), Some("child"));
assert_eq!(doc.text_content(child), "text");
}
#[test]
fn test_push_parser_split_token() {
let mut parser = PushParser::new();
parser.push(b"<ro");
parser.push(b"ot att");
parser.push(b"r=\"val");
parser.push(b"ue\"/>");
let doc = parser.finish().unwrap();
let root = doc.root_element().unwrap();
assert_eq!(doc.node_name(root), Some("root"));
assert_eq!(doc.attribute(root, "attr"), Some("value"));
}
#[test]
fn test_push_parser_byte_at_a_time() {
let xml = b"<root><child/></root>";
let mut parser = PushParser::new();
for &byte in xml {
parser.push(&[byte]);
}
let doc = parser.finish().unwrap();
let root = doc.root_element().unwrap();
assert_eq!(doc.node_name(root), Some("root"));
let child = doc.first_child(root).unwrap();
assert_eq!(doc.node_name(child), Some("child"));
}
#[test]
fn test_push_parser_xml_declaration_split() {
let mut parser = PushParser::new();
parser.push(b"<?xml ver");
parser.push(b"sion=\"1.0\" encoding=\"UTF-8\"?>");
parser.push(b"<root/>");
let doc = parser.finish().unwrap();
assert_eq!(doc.version.as_deref(), Some("1.0"));
assert_eq!(doc.encoding.as_deref(), Some("UTF-8"));
}
#[test]
fn test_push_parser_empty_input() {
let parser = PushParser::new();
let result = parser.finish();
assert!(result.is_err());
}
#[test]
fn test_push_parser_with_options_recover() {
let opts = ParseOptions::default().recover(true);
let mut parser = PushParser::with_options(opts);
parser.push(b"<a></b>");
let result = parser.finish();
assert!(result.is_ok());
}
#[test]
fn test_push_parser_with_options_no_blanks() {
let opts = ParseOptions::default().no_blanks(true);
let mut parser = PushParser::with_options(opts);
parser.push(b"<root> <child/> </root>");
let doc = parser.finish().unwrap();
let root = doc.root_element().unwrap();
let children: Vec<_> = doc.children(root).collect();
assert_eq!(children.len(), 1);
assert_eq!(doc.node_name(children[0]), Some("child"));
}
#[test]
fn test_push_parser_error_malformed() {
let mut parser = PushParser::new();
parser.push(b"<a></b>");
let result = parser.finish();
assert!(result.is_err());
}
#[test]
fn test_push_parser_buffered_bytes() {
let mut parser = PushParser::new();
assert_eq!(parser.buffered_bytes(), 0);
parser.push(b"<root>");
assert_eq!(parser.buffered_bytes(), 6);
parser.push(b"</root>");
assert_eq!(parser.buffered_bytes(), 13);
}
#[test]
fn test_push_parser_is_empty() {
let mut parser = PushParser::new();
assert!(parser.is_empty());
parser.push(b"<root/>");
assert!(!parser.is_empty());
}
#[test]
fn test_push_parser_reset() {
let mut parser = PushParser::new();
parser.push(b"<invalid");
parser.reset();
assert!(parser.is_empty());
assert_eq!(parser.buffered_bytes(), 0);
parser.push(b"<root/>");
let doc = parser.finish().unwrap();
assert!(doc.root_element().is_some());
}
#[test]
fn test_push_parser_default_trait() {
let parser = PushParser::default();
assert!(parser.is_empty());
}
#[test]
fn test_push_parser_debug_trait() {
let mut parser = PushParser::new();
parser.push(b"<root/>");
let debug_str = format!("{parser:?}");
assert!(debug_str.contains("PushParser"));
assert!(debug_str.contains("buffered_bytes: 7"));
}
#[test]
fn test_push_parser_utf8_bom() {
let mut parser = PushParser::new();
parser.push(b"\xEF\xBB\xBF");
parser.push(b"<root/>");
let doc = parser.finish().unwrap();
assert!(doc.root_element().is_some());
}
#[test]
fn test_push_parser_comment_split() {
let mut parser = PushParser::new();
parser.push(b"<root><!-");
parser.push(b"- comment -");
parser.push(b"-></root>");
let doc = parser.finish().unwrap();
let root = doc.root_element().unwrap();
let child = doc.first_child(root).unwrap();
assert_eq!(doc.node_text(child), Some(" comment "));
}
#[test]
fn test_push_parser_cdata_split() {
let mut parser = PushParser::new();
parser.push(b"<root><![CDA");
parser.push(b"TA[some data]]");
parser.push(b"></root>");
let doc = parser.finish().unwrap();
let root = doc.root_element().unwrap();
let child = doc.first_child(root).unwrap();
assert_eq!(doc.node_text(child), Some("some data"));
}
#[test]
fn test_push_parser_entity_references() {
let mut parser = PushParser::new();
parser.push(b"<root>&am");
parser.push(b"p; < ></root>");
let doc = parser.finish().unwrap();
let root = doc.root_element().unwrap();
assert_eq!(doc.text_content(root), "& < >");
}
#[test]
fn test_push_parser_roundtrip() {
let input = b"<root><child attr=\"val\">text</child></root>";
let mut parser = PushParser::new();
parser.push(&input[..10]);
parser.push(&input[10..25]);
parser.push(&input[25..]);
let doc = parser.finish().unwrap();
let output = crate::serial::serialize(&doc);
let expected = format!(
"<?xml version=\"1.0\"?>\n{}\n",
std::str::from_utf8(input).unwrap()
);
assert_eq!(output, expected);
}
#[test]
#[should_panic(expected = "push() called after finish()")]
fn test_push_parser_push_after_finish_panics() {
let mut parser = PushParser::new();
parser.push(b"<root/>");
parser.finished = true;
parser.push(b"more data");
}
#[test]
fn test_push_parser_large_document() {
let mut parser = PushParser::new();
parser.push(b"<root>");
for i in 0..100 {
let chunk = format!("<item id=\"{i}\">value {i}</item>");
parser.push(chunk.as_bytes());
}
parser.push(b"</root>");
let doc = parser.finish().unwrap();
let root = doc.root_element().unwrap();
let children: Vec<_> = doc.children(root).collect();
assert_eq!(children.len(), 100);
}
}