use quick_xml::events::Event;
use quick_xml::Reader;
#[derive(Debug, Clone)]
pub struct Attr {
pub name: String,
pub value: String,
}
#[derive(Debug, Clone)]
pub struct Node {
pub name: String,
pub attrs: Vec<Attr>,
pub text: String,
pub children: Vec<Node>,
pub line: u32,
pub col: u32,
}
impl Node {
fn new(name: String, attrs: Vec<Attr>, line: u32, col: u32) -> Self {
Node {
name,
attrs,
text: String::new(),
children: Vec::new(),
line,
col,
}
}
pub fn attr(&self, name: &str) -> Option<&str> {
self.attrs
.iter()
.find(|a| a.name == name)
.map(|a| a.value.as_str())
}
pub fn has_child(&self, name: &str) -> bool {
self.children.iter().any(|c| c.name == name)
}
pub fn children_named<'a>(&'a self, name: &'a str) -> impl Iterator<Item = &'a Node> {
self.children.iter().filter(move |c| c.name == name)
}
pub fn child(&self, name: &str) -> Option<&Node> {
self.children.iter().find(|c| c.name == name)
}
pub fn find_descendant(&self, name: &str) -> Option<&Node> {
for child in &self.children {
if child.name == name {
return Some(child);
}
if let Some(found) = child.find_descendant(name) {
return Some(found);
}
}
None
}
pub fn has_descendant(&self, name: &str) -> bool {
self.find_descendant(name).is_some()
}
}
#[derive(Debug, Clone)]
pub struct VastDocument {
pub root: Node,
pub parse_error: Option<String>,
}
impl VastDocument {
pub fn vast_root(&self) -> Option<&Node> {
if self.root.name == "VAST" {
Some(&self.root)
} else {
None
}
}
}
fn byte_offset_to_line_col(input: &[u8], offset: usize) -> (u32, u32) {
let safe = offset.min(input.len());
let mut line: u32 = 1;
let mut line_start: usize = 0;
for (i, &b) in input[..safe].iter().enumerate() {
if b == b'\n' {
line += 1;
line_start = i + 1;
}
}
let col = (safe - line_start) as u32 + 1;
(line, col)
}
pub fn parse(input: &str) -> VastDocument {
let input_bytes = input.as_bytes();
let mut reader = Reader::from_str(input);
reader.config_mut().trim_text(true);
let mut stack: Vec<Node> = Vec::new();
let mut parse_error: Option<String> = None;
loop {
match reader.read_event() {
Ok(Event::Start(e)) => {
let end_pos = reader.buffer_position() as usize;
let tag_bytes = e.as_ref();
let tag_len = tag_bytes.len() + 2; let start_pos = end_pos.saturating_sub(tag_len);
let (line, col) = byte_offset_to_line_col(input_bytes, start_pos);
let name = std::str::from_utf8(e.local_name().as_ref())
.unwrap_or("")
.to_owned();
let mut attrs = Vec::new();
for attr in e.attributes().flatten() {
let key = std::str::from_utf8(attr.key.local_name().as_ref())
.unwrap_or("")
.to_owned();
let val = std::str::from_utf8(attr.value.as_ref())
.unwrap_or("")
.to_owned();
attrs.push(Attr {
name: key,
value: val,
});
}
stack.push(Node::new(name, attrs, line, col));
}
Ok(Event::End(_)) if stack.len() > 1 => {
let finished = stack.pop().unwrap();
stack.last_mut().unwrap().children.push(finished);
}
Ok(Event::End(_)) => {}
Ok(Event::Empty(e)) => {
let end_pos = reader.buffer_position() as usize;
let tag_bytes = e.as_ref();
let tag_len = tag_bytes.len() + 3; let start_pos = end_pos.saturating_sub(tag_len);
let (line, col) = byte_offset_to_line_col(input_bytes, start_pos);
let name = std::str::from_utf8(e.local_name().as_ref())
.unwrap_or("")
.to_owned();
let mut attrs = Vec::new();
for attr in e.attributes().flatten() {
let key = std::str::from_utf8(attr.key.local_name().as_ref())
.unwrap_or("")
.to_owned();
let val = std::str::from_utf8(attr.value.as_ref())
.unwrap_or("")
.to_owned();
attrs.push(Attr {
name: key,
value: val,
});
}
let node = Node::new(name, attrs, line, col);
if let Some(parent) = stack.last_mut() {
parent.children.push(node);
} else {
stack.push(node);
}
}
Ok(Event::Text(e)) => {
if let Some(node) = stack.last_mut() {
if let Ok(text) = e.unescape() {
let trimmed = text.trim().to_owned();
if !trimmed.is_empty() {
node.text = trimmed;
}
}
}
}
Ok(Event::CData(e)) => {
if let Some(node) = stack.last_mut() {
let bytes = e.into_inner();
if let Ok(text) = std::str::from_utf8(&bytes) {
let trimmed = text.trim().to_owned();
if !trimmed.is_empty() {
node.text = trimmed;
}
}
}
}
Ok(Event::Eof) => break,
Err(e) => {
parse_error = Some(format!(
"XML parse error at position {}: {}",
reader.error_position(),
e
));
break;
}
_ => {}
}
}
let root = if stack.is_empty() {
Node::new("__empty__".to_owned(), Vec::new(), 0, 0)
} else {
while stack.len() > 1 {
let node = stack.pop().unwrap();
stack.last_mut().unwrap().children.push(node);
}
stack.pop().unwrap()
};
VastDocument { root, parse_error }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_minimal_vast() {
let xml = r#"<VAST version="4.1"></VAST>"#;
let doc = parse(xml);
assert!(doc.parse_error.is_none());
assert_eq!(doc.root.name, "VAST");
assert_eq!(doc.root.attr("version"), Some("4.1"));
}
#[test]
fn parses_self_closing_child() {
let xml = r#"<VAST version="4.1"><Ad id="1"/></VAST>"#;
let doc = parse(xml);
assert!(doc.root.has_child("Ad"));
}
#[test]
fn captures_cdata_text() {
let xml = r#"<VAST version="4.1"><Ad><InLine><Impression><![CDATA[https://example.com/imp]]></Impression></InLine></Ad></VAST>"#;
let doc = parse(xml);
let imp = doc
.root
.child("Ad")
.unwrap()
.child("InLine")
.unwrap()
.child("Impression")
.unwrap();
assert_eq!(imp.text, "https://example.com/imp");
}
#[test]
fn sets_parse_error_on_malformed_xml() {
let xml = r#"<VAST version="4.1"><Ad></VAST>"#;
let doc = parse(xml);
assert!(doc.parse_error.is_some());
}
}