#![forbid(unsafe_code)]
#![doc = include_str!("../README.md")]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct XmlDeclaration {
pub version: Option<String>,
pub encoding: Option<String>,
pub standalone: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct XmlAttribute {
pub name: String,
pub value: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct XmlElement {
pub name: String,
pub attributes: Vec<XmlAttribute>,
}
pub fn looks_like_xml(input: &str) -> bool {
has_xml_declaration(input) || extract_root_element(input).is_some()
}
pub fn has_xml_declaration(input: &str) -> bool {
input.trim_start().starts_with("<?xml")
}
pub fn extract_xml_declaration(input: &str) -> Option<XmlDeclaration> {
let trimmed = input.trim_start();
if !trimmed.starts_with("<?xml") {
return None;
}
let end = trimmed.find("?>")?;
let declaration = &trimmed[5..end];
let attributes = parse_attributes_fragment(declaration);
Some(XmlDeclaration {
version: attributes
.iter()
.find(|attribute| attribute.name == "version")
.map(|attribute| attribute.value.clone()),
encoding: attributes
.iter()
.find(|attribute| attribute.name == "encoding")
.map(|attribute| attribute.value.clone()),
standalone: attributes
.iter()
.find(|attribute| attribute.name == "standalone")
.map(|attribute| attribute.value.clone()),
})
}
pub fn strip_xml_declaration(input: &str) -> &str {
let trimmed = input.trim_start();
if !trimmed.starts_with("<?xml") {
return input;
}
if let Some(end) = trimmed.find("?>") {
&trimmed[end + 2..]
} else {
input
}
}
pub fn extract_root_element(input: &str) -> Option<XmlElement> {
let candidate = strip_leading_xml_misc(strip_xml_declaration(input));
let start = candidate.find('<')?;
let tag_text = read_start_tag(&candidate[start + 1..])?;
let tag = tag_text.trim().trim_end_matches('/').trim();
if tag.is_empty() || tag.starts_with('/') {
return None;
}
let mut parts = tag.splitn(2, char::is_whitespace);
let name = parts.next()?.trim();
if name.is_empty() {
return None;
}
let attributes = parse_attributes_fragment(parts.next().unwrap_or_default());
Some(XmlElement {
name: name.to_string(),
attributes,
})
}
pub fn extract_attributes(element: &str) -> Vec<XmlAttribute> {
let mut tag = element.trim();
if let Some(stripped) = tag.strip_prefix('<') {
tag = stripped;
}
if let Some(stripped) = tag.strip_suffix('>') {
tag = stripped;
}
tag = tag.trim().trim_end_matches('/').trim();
let mut parts = tag.splitn(2, char::is_whitespace);
let Some(name) = parts.next() else {
return Vec::new();
};
if name.is_empty() || name.starts_with('!') || name.starts_with('?') || name.starts_with('/') {
return Vec::new();
}
parse_attributes_fragment(parts.next().unwrap_or_default())
}
pub fn get_attribute(element: &str, name: &str) -> Option<String> {
extract_attributes(element)
.into_iter()
.find(|attribute| attribute.name == name)
.map(|attribute| attribute.value)
}
pub fn has_attribute(element: &str, name: &str) -> bool {
get_attribute(element, name).is_some()
}
pub fn escape_xml(input: &str) -> String {
let mut escaped = String::with_capacity(input.len());
for ch in input.chars() {
match ch {
'&' => escaped.push_str("&"),
'<' => escaped.push_str("<"),
'>' => escaped.push_str(">"),
'"' => escaped.push_str("""),
'\'' => escaped.push_str("'"),
_ => escaped.push(ch),
}
}
escaped
}
pub fn unescape_xml(input: &str) -> String {
input
.replace("<", "<")
.replace(">", ">")
.replace(""", "\"")
.replace("'", "'")
.replace("&", "&")
}
pub fn strip_xml_comments(input: &str) -> String {
let mut output = String::new();
let mut remaining = input;
while let Some(start) = remaining.find("<!--") {
output.push_str(&remaining[..start]);
let comment_body = &remaining[start + 4..];
if let Some(end) = comment_body.find("-->") {
remaining = &comment_body[end + 3..];
} else {
remaining = "";
break;
}
}
output.push_str(remaining);
output
}
fn strip_leading_xml_misc(mut input: &str) -> &str {
loop {
let trimmed = input.trim_start();
if let Some(rest) = trimmed.strip_prefix("<!--") {
if let Some(end) = rest.find("-->") {
input = &rest[end + 3..];
continue;
}
return "";
}
if trimmed.starts_with("<?") {
if let Some(end) = trimmed.find("?>") {
input = &trimmed[end + 2..];
continue;
}
return "";
}
if trimmed.starts_with("<!") {
if let Some(end) = trimmed.find('>') {
input = &trimmed[end + 1..];
continue;
}
return "";
}
return trimmed;
}
}
fn read_start_tag(input: &str) -> Option<&str> {
let mut in_quote = None;
for (index, ch) in input.char_indices() {
if let Some(quote) = in_quote {
if ch == quote {
in_quote = None;
}
continue;
}
if ch == '"' || ch == '\'' {
in_quote = Some(ch);
continue;
}
if ch == '>' {
return Some(&input[..index]);
}
}
None
}
fn parse_attributes_fragment(fragment: &str) -> Vec<XmlAttribute> {
let mut attributes = Vec::new();
let bytes = fragment.as_bytes();
let mut index = 0;
while index < bytes.len() {
while index < bytes.len() && bytes[index].is_ascii_whitespace() {
index += 1;
}
if index >= bytes.len() || bytes[index] == b'/' {
break;
}
let name_start = index;
while index < bytes.len()
&& !bytes[index].is_ascii_whitespace()
&& bytes[index] != b'='
&& bytes[index] != b'/'
{
index += 1;
}
if name_start == index {
break;
}
let name = &fragment[name_start..index];
while index < bytes.len() && bytes[index].is_ascii_whitespace() {
index += 1;
}
if index >= bytes.len() || bytes[index] != b'=' {
break;
}
index += 1;
while index < bytes.len() && bytes[index].is_ascii_whitespace() {
index += 1;
}
if index >= bytes.len() {
break;
}
let quote = bytes[index];
if quote != b'\'' && quote != b'"' {
break;
}
index += 1;
let value_start = index;
while index < bytes.len() && bytes[index] != quote {
index += 1;
}
if index >= bytes.len() {
break;
}
attributes.push(XmlAttribute {
name: name.to_string(),
value: fragment[value_start..index].to_string(),
});
index += 1;
}
attributes
}