anyxml 0.12.0

A fully spec-conformant XML library
Documentation
use std::{fmt::Write as _, fs::read_dir, path::Path};

use anyxml::{
    sax::{DebugHandler, DefaultSAXHandler, XMLReader},
    tree::{Node, TreeBuildHandler, convert::NodeKind, node::NodeSpec},
    uri::URIString,
};

fn walk_tree(out: &mut String, node: impl Into<Node<dyn NodeSpec>>, depth: usize) {
    let node: Node<dyn NodeSpec> = node.into();
    write!(out, "{}", "  ".repeat(depth)).unwrap();
    match node.downcast() {
        NodeKind::Element(element) => {
            writeln!(
                out,
                "Element({:?}, {}, {}, {}, {})",
                element.node_type(),
                element.name(),
                element.local_name(),
                element.namespace_name().as_deref().unwrap_or("None"),
                element.prefix().as_deref().unwrap_or("None")
            )
            .unwrap();
            for att in element.attributes() {
                walk_tree(out, att, depth + 1);
            }
        }
        NodeKind::Attribute(attribute) => {
            writeln!(
                out,
                "Attribute({:?}, {}, {}, {}, {}, '{}')",
                attribute.node_type(),
                attribute.name(),
                attribute.local_name(),
                attribute.namespace_name().as_deref().unwrap_or("None"),
                attribute.prefix().as_deref().unwrap_or("None"),
                attribute.value()
            )
            .unwrap();
        }
        NodeKind::Text(text) => {
            writeln!(out, "Text({:?}, '{}')", text.node_type(), text.data()).unwrap();
        }
        NodeKind::CDATASection(cdata) => {
            writeln!(
                out,
                "CDATASection({:?}, '{}')",
                cdata.node_type(),
                cdata.data()
            )
            .unwrap();
        }
        NodeKind::EntityReference(ent) => {
            writeln!(
                out,
                "EntityReference({:?}, {})",
                ent.node_type(),
                ent.name()
            )
            .unwrap();
        }
        NodeKind::EntityDecl(ent) => {
            writeln!(
                out,
                "EntityDecl({:?}, {}, {}, {}, {}, '{}')",
                ent.node_type(),
                ent.name(),
                ent.system_id()
                    .as_deref()
                    .map(|id| id.as_escaped_str())
                    .unwrap_or("None"),
                ent.public_id().as_deref().unwrap_or("None"),
                ent.notation_name().as_deref().unwrap_or("None"),
                ent.value().as_deref().unwrap_or("None"),
            )
            .unwrap();
        }
        NodeKind::ProcessingInstruction(pi) => {
            writeln!(
                out,
                "ProcessingInstruction({:?}, {}, {})",
                pi.node_type(),
                pi.target(),
                pi.data().as_deref().unwrap_or("None")
            )
            .unwrap();
        }
        NodeKind::Comment(comment) => {
            writeln!(
                out,
                "Comment({:?}, '{}')",
                comment.node_type(),
                comment.data()
            )
            .unwrap();
        }
        NodeKind::Document(document) => {
            writeln!(
                out,
                "Document({:?}, '{}', '{}', {})",
                document.node_type(),
                document.version().as_deref().unwrap_or("None"),
                document.encoding().as_deref().unwrap_or("None"),
                document
                    .standalone()
                    .map_or("None", |sddecl| if sddecl { "yes" } else { "no" })
            )
            .unwrap();
        }
        NodeKind::DocumentType(doctype) => {
            writeln!(
                out,
                "DocumentType({:?}, {}, {}, {})",
                doctype.node_type(),
                doctype.name(),
                doctype
                    .system_id()
                    .as_deref()
                    .map(|uri| uri.as_escaped_str())
                    .unwrap_or("None"),
                doctype.public_id().as_deref().unwrap_or("None")
            )
            .unwrap();
        }
        NodeKind::DocumentFragment(_) => {
            panic!("DocumentFragment must not appear in the document tree.")
        }
        NodeKind::NotationDecl(notation) => {
            writeln!(
                out,
                "NotationDecl({:?}, {}, {}, {})",
                notation.node_type(),
                notation.name(),
                notation
                    .system_id()
                    .as_deref()
                    .map(|uri| uri.as_escaped_str())
                    .unwrap_or("None"),
                notation.public_id().as_deref().unwrap_or("None")
            )
            .unwrap();
        }
        NodeKind::ElementDecl(elemdecl) => {
            writeln!(
                out,
                "ElementDecl({:?}, {}, {})",
                elemdecl.node_type(),
                elemdecl.name(),
                elemdecl.content_spec()
            )
            .unwrap();
        }
        NodeKind::AttlistDecl(attlistdecl) => {
            writeln!(
                out,
                "AttlistDecl({:?}, {}, {}, {}, {})",
                attlistdecl.node_type(),
                attlistdecl.elem_name(),
                attlistdecl.attr_name(),
                attlistdecl.attr_type(),
                attlistdecl.default_decl()
            )
            .unwrap();
        }
        NodeKind::Namespace(namespace) => {
            writeln!(
                out,
                "Namespace({:?}, {}, {})",
                namespace.node_type(),
                namespace.prefix().as_deref().unwrap_or("None"),
                namespace.namespace_name()
            )
            .unwrap();
        }
    }
    if let Some(first_child) = node.first_child() {
        walk_tree(out, first_child, depth + 1);
    }
    if let Some(next_sibling) = node.next_sibling() {
        walk_tree(out, next_sibling, depth);
    }
}

#[test]
fn tree_walk_tests() {
    for ent in read_dir("resources/well-formed").unwrap() {
        if let Ok(ent) = ent
            && ent.metadata().unwrap().is_file()
        {
            let path = ent.path();
            let uri = URIString::parse_file_path(path.canonicalize().unwrap()).unwrap();
            let mut handler = TreeBuildHandler::with_handler(DebugHandler {
                child: DefaultSAXHandler,
                buffer: String::new(),
            });
            handler.expand_entity_reference = false;
            let mut reader = XMLReader::builder().set_handler(handler).build();
            reader.parse_uri(&uri, None).ok();

            let outname = path.file_name().unwrap().to_str().unwrap();
            let outname = format!("resources/well-formed/output/{outname}.sax");
            let outname = Path::new(outname.as_str());
            let output = std::fs::read_to_string(outname).unwrap();

            assert_eq!(
                output,
                reader.handler.handler.buffer,
                "uri: {}\n{}",
                uri.as_escaped_str(),
                reader.handler.handler.buffer,
            );
            assert!(!reader.handler.fatal_error);

            let document = reader.handler.document;
            let mut buf = String::new();
            walk_tree(&mut buf, document, 0);
            let outname = path.file_name().unwrap().to_str().unwrap();
            let outname = format!("resources/well-formed/output/{outname}.tree");
            let outname = Path::new(outname.as_str());
            let output = std::fs::read_to_string(outname).unwrap_or_default();
            assert_eq!(buf, output, "uri: {}\n{}", uri.as_escaped_str(), buf);
        }
    }
}

#[test]
fn tree_dump_tests() {
    for ent in read_dir("resources/well-formed").unwrap() {
        if let Ok(ent) = ent
            && ent.metadata().unwrap().is_file()
        {
            let path = ent.path();
            let uri = URIString::parse_file_path(path.canonicalize().unwrap()).unwrap();
            let mut handler = TreeBuildHandler::default();
            handler.expand_entity_reference = false;
            let mut reader = XMLReader::builder().set_handler(handler).build();
            reader.parse_uri(&uri, None).ok();
            assert!(!reader.handler.fatal_error);

            let document = reader.handler.document;

            let mut buf = String::new();
            write!(buf, "{}", document).unwrap();
            let outname = path.file_name().unwrap().to_str().unwrap();
            let outname = format!("resources/well-formed/output/{outname}.tree.out");
            let outname = Path::new(outname.as_str());
            let output = std::fs::read_to_string(outname).unwrap_or_default();
            assert_eq!(buf, output, "uri: {}\n{}", uri.as_escaped_str(), buf);
        }
    }
}

#[test]
fn tree_deep_copy_tests() {
    for ent in read_dir("resources/well-formed").unwrap() {
        if let Ok(ent) = ent
            && ent.metadata().unwrap().is_file()
        {
            let path = ent.path();
            let uri = URIString::parse_file_path(path.canonicalize().unwrap()).unwrap();
            let mut reader = XMLReader::builder()
                .set_handler(TreeBuildHandler::default())
                .build();
            reader.parse_uri(&uri, None).ok();
            assert!(!reader.handler.fatal_error);

            let document = reader.handler.document;
            let copied = document.deep_copy_subtree().unwrap();

            assert_eq!(
                document.to_string(),
                copied.to_string(),
                "uri: {}\ndocument:\n{}\ncopied:\n{}",
                uri.as_escaped_str(),
                document,
                copied
            );
        }
    }
}

// reference of test method: https://www.w3.org/XML/2005/01/xml-id/runtests.xsl
#[test]
fn xml_id_tests() {
    let mut reader = XMLReader::builder()
        .set_handler(TreeBuildHandler::default())
        .build();
    reader
        .parse_uri(
            URIString::parse_file_path(
                Path::new("resources/xml-id/test-suite.xml")
                    .canonicalize()
                    .unwrap(),
            )
            .unwrap(),
            None,
        )
        .unwrap();

    let test_suite = reader.handler.document.clone();
    for test_case in test_suite.get_elements_by_qname("test-case") {
        eprintln!(
            "--- id: {} ---",
            test_case.get_attribute("id", None).unwrap()
        );
        if let Some(feature) = test_case.get_attribute("feature", None) {
            eprintln!("skip because the feature '{feature}' is not supported.");
            continue;
        }

        let file_path = test_case
            .get_elements_by_qname("file-path")
            .next()
            .unwrap()
            .first_child()
            .unwrap()
            .text_content();
        for scenario in test_case.get_elements_by_qname("scenario") {
            let input_file = scenario
                .get_elements_by_qname("input-file")
                .next()
                .unwrap()
                .first_child()
                .unwrap()
                .text_content();
            let target_file = format!("resources/xml-id/{file_path}/{input_file}");
            eprintln!("target-file='{target_file}'");
            let expected = scenario
                .get_elements_by_qname("result")
                .next()
                .unwrap()
                .first_child()
                .unwrap()
                .text_content();

            reader
                .parse_uri(
                    URIString::parse_file_path(Path::new(&target_file).canonicalize().unwrap())
                        .unwrap(),
                    None,
                )
                .unwrap();
            let mut buf = String::new();
            let document = reader.handler.document.clone();
            let mut children = document.document_element().map(Node::<dyn NodeSpec>::from);
            while let Some(child) = children {
                if let Some(element) = child.as_element() {
                    for att in element.attributes() {
                        if att.is_id() {
                            let id = att.value();
                            if document
                                .get_element_by_id(&id)
                                .unwrap()
                                .is_same_node(element.clone())
                            {
                                write!(
                                    buf,
                                    "{} on {} is an ID ({}) ",
                                    att.name(),
                                    element.name(),
                                    att.value()
                                )
                                .unwrap();
                            }
                        } else if att.name().as_ref() == "ref" {
                            if let Some(elem) = document.get_element_by_id(&att.value()) {
                                write!(buf, "Found {} ", elem.name()).unwrap();
                            } else {
                                write!(buf, "Found no element with id {} ", att.value()).unwrap();
                            }
                        }
                    }
                }

                if let Some(first) = child.first_child() {
                    children = Some(first);
                } else if let Some(next) = child.next_sibling() {
                    children = Some(next);
                } else {
                    children = None;
                    let mut parent = child.parent_node();
                    while let Some(now) = parent {
                        if now.is_same_node(document.document_element().unwrap()) {
                            break;
                        }
                        if let Some(next) = now.next_sibling() {
                            children = Some(next);
                            break;
                        }
                        parent = now.parent_node();
                    }
                }
            }

            assert_eq!(buf, expected);
        }
    }
}