use-xml 0.1.0

Lightweight XML declaration, element, and attribute helpers for RustUse
Documentation
#![forbid(unsafe_code)]
#![doc = include_str!("../README.md")]

/// A conservative view of an XML declaration.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct XmlDeclaration {
    pub version: Option<String>,
    pub encoding: Option<String>,
    pub standalone: Option<String>,
}

/// A simple XML attribute.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct XmlAttribute {
    pub name: String,
    pub value: String,
}

/// A simple XML start element.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct XmlElement {
    pub name: String,
    pub attributes: Vec<XmlAttribute>,
}

/// Returns `true` when the input looks like XML.
pub fn looks_like_xml(input: &str) -> bool {
    has_xml_declaration(input) || extract_root_element(input).is_some()
}

/// Returns `true` when the input starts with an XML declaration.
pub fn has_xml_declaration(input: &str) -> bool {
    input.trim_start().starts_with("<?xml")
}

/// Extracts the XML declaration when present.
pub fn extract_xml_declaration(input: &str) -> Option<XmlDeclaration> {
    let trimmed = input.trim_start();
    if !trimmed.starts_with("<?xml") {
        return None;
    }

    let end = trimmed.find("?>")?;
    let declaration = &trimmed[5..end];
    let attributes = parse_attributes_fragment(declaration);

    Some(XmlDeclaration {
        version: attributes
            .iter()
            .find(|attribute| attribute.name == "version")
            .map(|attribute| attribute.value.clone()),
        encoding: attributes
            .iter()
            .find(|attribute| attribute.name == "encoding")
            .map(|attribute| attribute.value.clone()),
        standalone: attributes
            .iter()
            .find(|attribute| attribute.name == "standalone")
            .map(|attribute| attribute.value.clone()),
    })
}

/// Strips a leading XML declaration when present.
pub fn strip_xml_declaration(input: &str) -> &str {
    let trimmed = input.trim_start();
    if !trimmed.starts_with("<?xml") {
        return input;
    }

    if let Some(end) = trimmed.find("?>") {
        &trimmed[end + 2..]
    } else {
        input
    }
}

/// Extracts the root element start tag when present.
pub fn extract_root_element(input: &str) -> Option<XmlElement> {
    let candidate = strip_leading_xml_misc(strip_xml_declaration(input));
    let start = candidate.find('<')?;
    let tag_text = read_start_tag(&candidate[start + 1..])?;
    let tag = tag_text.trim().trim_end_matches('/').trim();

    if tag.is_empty() || tag.starts_with('/') {
        return None;
    }

    let mut parts = tag.splitn(2, char::is_whitespace);
    let name = parts.next()?.trim();
    if name.is_empty() {
        return None;
    }

    let attributes = parse_attributes_fragment(parts.next().unwrap_or_default());

    Some(XmlElement {
        name: name.to_string(),
        attributes,
    })
}

/// Extracts attributes from an element start tag.
pub fn extract_attributes(element: &str) -> Vec<XmlAttribute> {
    let mut tag = element.trim();
    if let Some(stripped) = tag.strip_prefix('<') {
        tag = stripped;
    }
    if let Some(stripped) = tag.strip_suffix('>') {
        tag = stripped;
    }
    tag = tag.trim().trim_end_matches('/').trim();

    let mut parts = tag.splitn(2, char::is_whitespace);
    let Some(name) = parts.next() else {
        return Vec::new();
    };

    if name.is_empty() || name.starts_with('!') || name.starts_with('?') || name.starts_with('/') {
        return Vec::new();
    }

    parse_attributes_fragment(parts.next().unwrap_or_default())
}

/// Returns the named attribute from an element when present.
pub fn get_attribute(element: &str, name: &str) -> Option<String> {
    extract_attributes(element)
        .into_iter()
        .find(|attribute| attribute.name == name)
        .map(|attribute| attribute.value)
}

/// Returns `true` when an element has the named attribute.
pub fn has_attribute(element: &str, name: &str) -> bool {
    get_attribute(element, name).is_some()
}

/// Escapes XML text content.
pub fn escape_xml(input: &str) -> String {
    let mut escaped = String::with_capacity(input.len());

    for ch in input.chars() {
        match ch {
            '&' => escaped.push_str("&amp;"),
            '<' => escaped.push_str("&lt;"),
            '>' => escaped.push_str("&gt;"),
            '"' => escaped.push_str("&quot;"),
            '\'' => escaped.push_str("&apos;"),
            _ => escaped.push(ch),
        }
    }

    escaped
}

/// Unescapes the most common XML entities.
pub fn unescape_xml(input: &str) -> String {
    input
        .replace("&lt;", "<")
        .replace("&gt;", ">")
        .replace("&quot;", "\"")
        .replace("&apos;", "'")
        .replace("&amp;", "&")
}

/// Strips XML comments from the input.
pub fn strip_xml_comments(input: &str) -> String {
    let mut output = String::new();
    let mut remaining = input;

    while let Some(start) = remaining.find("<!--") {
        output.push_str(&remaining[..start]);
        let comment_body = &remaining[start + 4..];

        if let Some(end) = comment_body.find("-->") {
            remaining = &comment_body[end + 3..];
        } else {
            remaining = "";
            break;
        }
    }

    output.push_str(remaining);
    output
}

fn strip_leading_xml_misc(mut input: &str) -> &str {
    loop {
        let trimmed = input.trim_start();

        if let Some(rest) = trimmed.strip_prefix("<!--") {
            if let Some(end) = rest.find("-->") {
                input = &rest[end + 3..];
                continue;
            }
            return "";
        }

        if trimmed.starts_with("<?") {
            if let Some(end) = trimmed.find("?>") {
                input = &trimmed[end + 2..];
                continue;
            }
            return "";
        }

        if trimmed.starts_with("<!") {
            if let Some(end) = trimmed.find('>') {
                input = &trimmed[end + 1..];
                continue;
            }
            return "";
        }

        return trimmed;
    }
}

fn read_start_tag(input: &str) -> Option<&str> {
    let mut in_quote = None;

    for (index, ch) in input.char_indices() {
        if let Some(quote) = in_quote {
            if ch == quote {
                in_quote = None;
            }
            continue;
        }

        if ch == '"' || ch == '\'' {
            in_quote = Some(ch);
            continue;
        }

        if ch == '>' {
            return Some(&input[..index]);
        }
    }

    None
}

fn parse_attributes_fragment(fragment: &str) -> Vec<XmlAttribute> {
    let mut attributes = Vec::new();
    let bytes = fragment.as_bytes();
    let mut index = 0;

    while index < bytes.len() {
        while index < bytes.len() && bytes[index].is_ascii_whitespace() {
            index += 1;
        }

        if index >= bytes.len() || bytes[index] == b'/' {
            break;
        }

        let name_start = index;
        while index < bytes.len()
            && !bytes[index].is_ascii_whitespace()
            && bytes[index] != b'='
            && bytes[index] != b'/'
        {
            index += 1;
        }

        if name_start == index {
            break;
        }

        let name = &fragment[name_start..index];

        while index < bytes.len() && bytes[index].is_ascii_whitespace() {
            index += 1;
        }

        if index >= bytes.len() || bytes[index] != b'=' {
            break;
        }
        index += 1;

        while index < bytes.len() && bytes[index].is_ascii_whitespace() {
            index += 1;
        }

        if index >= bytes.len() {
            break;
        }

        let quote = bytes[index];
        if quote != b'\'' && quote != b'"' {
            break;
        }
        index += 1;

        let value_start = index;
        while index < bytes.len() && bytes[index] != quote {
            index += 1;
        }

        if index >= bytes.len() {
            break;
        }

        attributes.push(XmlAttribute {
            name: name.to_string(),
            value: fragment[value_start..index].to_string(),
        });

        index += 1;
    }

    attributes
}