anyxml 0.4.1

A fully spec-conformant XML library
Documentation
use std::{fmt::Write as _, fs::read_dir, path::Path};

use anyxml::{
    sax::{
        handler::{DebugHandler, DefaultSAXHandler},
        parser::XMLReaderBuilder,
    },
    tree::{Node, TreeBuildHandler, convert::NodeKind, node::NodeSpec},
    uri::URIString,
};

fn walk_tree(out: &mut String, node: impl Into<Node<dyn NodeSpec>>, depth: usize) {
    let node: Node<dyn NodeSpec> = node.into();
    write!(out, "{}", "  ".repeat(depth)).unwrap();
    match node.downcast() {
        NodeKind::Element(element) => {
            writeln!(
                out,
                "Element({:?}, {}, {}, {}, {})",
                element.node_type(),
                element.name(),
                element.local_name(),
                element.namespace_name().as_deref().unwrap_or("None"),
                element.prefix().as_deref().unwrap_or("None")
            )
            .unwrap();
            for att in element.attributes() {
                walk_tree(out, att, depth + 1);
            }
        }
        NodeKind::Attribute(attribute) => {
            writeln!(
                out,
                "Attribute({:?}, {}, {}, {}, {}, '{}')",
                attribute.node_type(),
                attribute.name(),
                attribute.local_name(),
                attribute.namespace_name().as_deref().unwrap_or("None"),
                attribute.prefix().as_deref().unwrap_or("None"),
                attribute.value()
            )
            .unwrap();
        }
        NodeKind::Text(text) => {
            writeln!(out, "Text({:?}, '{}')", text.node_type(), text.data()).unwrap();
        }
        NodeKind::CDATASection(cdata) => {
            writeln!(
                out,
                "CDATASection({:?}, '{}')",
                cdata.node_type(),
                cdata.data()
            )
            .unwrap();
        }
        NodeKind::EntityReference(ent) => {
            writeln!(
                out,
                "EntityReference({:?}, {})",
                ent.node_type(),
                ent.name()
            )
            .unwrap();
        }
        NodeKind::EntityDecl(ent) => {
            writeln!(
                out,
                "EntityDecl({:?}, {}, {}, {}, {}, '{}')",
                ent.node_type(),
                ent.name(),
                ent.system_id()
                    .as_deref()
                    .map(|id| id.as_escaped_str())
                    .unwrap_or("None"),
                ent.public_id().as_deref().unwrap_or("None"),
                ent.notation_name().as_deref().unwrap_or("None"),
                ent.value().as_deref().unwrap_or("None"),
            )
            .unwrap();
        }
        NodeKind::ProcessingInstruction(pi) => {
            writeln!(
                out,
                "ProcessingInstruction({:?}, {}, {})",
                pi.node_type(),
                pi.target(),
                pi.data().as_deref().unwrap_or("None")
            )
            .unwrap();
        }
        NodeKind::Comment(comment) => {
            writeln!(
                out,
                "Comment({:?}, '{}')",
                comment.node_type(),
                comment.data()
            )
            .unwrap();
        }
        NodeKind::Document(document) => {
            writeln!(
                out,
                "Document({:?}, '{}', '{}', {})",
                document.node_type(),
                document.version().as_deref().unwrap_or("None"),
                document.encoding().as_deref().unwrap_or("None"),
                document
                    .standalone()
                    .map_or("None", |sddecl| if sddecl { "yes" } else { "no" })
            )
            .unwrap();
        }
        NodeKind::DocumentType(doctype) => {
            writeln!(
                out,
                "DocumentType({:?}, {}, {}, {})",
                doctype.node_type(),
                doctype.name(),
                doctype
                    .system_id()
                    .as_deref()
                    .map(|uri| uri.as_escaped_str())
                    .unwrap_or("None"),
                doctype.public_id().as_deref().unwrap_or("None")
            )
            .unwrap();
        }
        NodeKind::DocumentFragment(_) => {
            panic!("DocumentFragment must not appear in the document tree.")
        }
        NodeKind::NotationDecl(notation) => {
            writeln!(
                out,
                "NotationDecl({:?}, {}, {}, {})",
                notation.node_type(),
                notation.name(),
                notation
                    .system_id()
                    .as_deref()
                    .map(|uri| uri.as_escaped_str())
                    .unwrap_or("None"),
                notation.public_id().as_deref().unwrap_or("None")
            )
            .unwrap();
        }
        NodeKind::ElementDecl(elemdecl) => {
            writeln!(
                out,
                "ElementDecl({:?}, {}, {})",
                elemdecl.node_type(),
                elemdecl.name(),
                elemdecl.content_spec()
            )
            .unwrap();
        }
        NodeKind::AttlistDecl(attlistdecl) => {
            writeln!(
                out,
                "AttlistDecl({:?}, {}, {}, {}, {})",
                attlistdecl.node_type(),
                attlistdecl.elem_name(),
                attlistdecl.attr_name(),
                attlistdecl.attr_type(),
                attlistdecl.default_decl()
            )
            .unwrap();
        }
        NodeKind::Namespace(namespace) => {
            writeln!(
                out,
                "Namespace({:?}, {}, {})",
                namespace.node_type(),
                namespace.prefix().as_deref().unwrap_or("None"),
                namespace.namespace_name()
            )
            .unwrap();
        }
    }
    if let Some(first_child) = node.first_child() {
        walk_tree(out, first_child, depth + 1);
    }
    if let Some(next_sibling) = node.next_sibling() {
        walk_tree(out, next_sibling, depth);
    }
}

#[test]
fn tree_walk_tests() {
    for ent in read_dir("resources/well-formed").unwrap() {
        if let Ok(ent) = ent
            && ent.metadata().unwrap().is_file()
        {
            let path = ent.path();
            let uri = URIString::parse_file_path(path.canonicalize().unwrap()).unwrap();
            let handler = TreeBuildHandler::with_handler(DebugHandler {
                child: DefaultSAXHandler,
                buffer: String::new(),
            });
            let mut reader = XMLReaderBuilder::new().set_handler(handler).build();
            reader.parse_uri(&uri, None).ok();

            let outname = path.file_name().unwrap().to_str().unwrap();
            let outname = format!("resources/well-formed/output/{outname}.sax");
            let outname = Path::new(outname.as_str());
            let output = std::fs::read_to_string(outname).unwrap();

            assert_eq!(
                output,
                reader.handler.handler.buffer,
                "uri: {}\n{}",
                uri.as_escaped_str(),
                reader.handler.handler.buffer,
            );
            assert!(!reader.handler.fatal_error);

            let document = reader.handler.document;
            let mut buf = String::new();
            walk_tree(&mut buf, document, 0);
            let outname = path.file_name().unwrap().to_str().unwrap();
            let outname = format!("resources/well-formed/output/{outname}.tree");
            let outname = Path::new(outname.as_str());
            let output = std::fs::read_to_string(outname).unwrap_or_default();
            assert_eq!(buf, output, "uri: {}\n{}", uri.as_escaped_str(), buf);
        }
    }
}

#[test]
fn tree_dump_tests() {
    for ent in read_dir("resources/well-formed").unwrap() {
        if let Ok(ent) = ent
            && ent.metadata().unwrap().is_file()
        {
            let path = ent.path();
            let uri = URIString::parse_file_path(path.canonicalize().unwrap()).unwrap();
            let mut reader = XMLReaderBuilder::new()
                .set_handler(TreeBuildHandler::default())
                .build();
            reader.parse_uri(&uri, None).ok();
            assert!(!reader.handler.fatal_error);

            let document = reader.handler.document;

            let mut buf = String::new();
            write!(buf, "{}", document).unwrap();
            let outname = path.file_name().unwrap().to_str().unwrap();
            let outname = format!("resources/well-formed/output/{outname}.tree.out");
            let outname = Path::new(outname.as_str());
            let output = std::fs::read_to_string(outname).unwrap_or_default();
            assert_eq!(buf, output, "uri: {}\n{}", uri.as_escaped_str(), buf);
        }
    }
}