roxmltree 0.11.0

Represent an XML as a read-only tree.
Documentation
#!/usr/bin/env python3

import sys
from lxml import etree


def escape_text(text):
    return text.encode('unicode_escape').decode("utf-8")


def split_qname(name):
    if name[0] == '{':
        return name[1:].split('}')
    else:
        return [None, name]


def print_ind(depth, *args, **kwargs):
    indent = '  ' * depth
    indent = indent[:-1]
    print(indent, *args, **kwargs)


def print_node(node, depth):
    if node.tag is etree.Comment:
        print_ind(depth, '- Comment: "{}"'.format(escape_text(node.text)))

        if node.tail:
            print_ind(depth, '- Text: "{}"'.format(escape_text(node.tail)))

        return

    if node.tag is etree.PI:
        print_ind(depth, '- PI:')
        print_ind(depth + 2, 'target: "{}"'.format(node.target))
        print_ind(depth + 2, 'value: "{}"'.format(escape_text(node.text)))

        if node.tail:
            print_ind(depth, '- Text: "{}"'.format(escape_text(node.tail)))

        return

    print_ind(depth, '- Element:')
    if node.tag[0] == '{':
        uri, tag = split_qname(node.tag)
        print_ind(depth + 2, 'tag_name: {}@{}'.format(tag, uri))
    else:
        print_ind(depth + 2, 'tag_name:', node.tag)

    if node.attrib:
        print_ind(depth + 2, 'attributes:')
        attrs = []
        for name, value in node.attrib.items():
            uri, tag = split_qname(name)
            if uri:
                attrs.append([tag + '@' + uri, value])
            else:
                attrs.append([tag, value])

        attrs = sorted(attrs, key=lambda x: x[0])

        for name, value in attrs:
            print_ind(depth + 3, '{}: "{}"'.format(name, escape_text(value)))

    if node.nsmap:
        print_ind(depth + 2, 'namespaces:')

        ns_list = []
        for name, value in node.nsmap.items():
            if not name and not value:
                ns_list.append(['None', '""'])
            elif not name:
                ns_list.append(['None', value])
            elif not value:
                ns_list.append([name, '""'])
            else:
                ns_list.append([name, value])

        ns_list = sorted(ns_list, key=lambda x: x[0])

        for name, value in ns_list:
            print_ind(depth + 3, '{}: {}'.format(name, value))

    if len(node):
        print_ind(depth + 2, 'children:')

        if node.text:
            print_ind(depth + 3, '- Text: "{}"'.format(escape_text(node.text)))

        for child in node:
            print_node(child, depth + 3)
    elif node.text:
        print_ind(depth + 2, 'children:')
        print_ind(depth + 3, '- Text: "{}"'.format(escape_text(node.text)))

    if node.tail:
        print_ind(depth, '- Text: "{}"'.format(escape_text(node.tail)))


tree = etree.parse(sys.argv[1])
root = tree.getroot()

print('Document:')
print_node(root, 1)