// Copyright 2023-2024 Hugo Osvaldo Barrera
//
// SPDX-License-Identifier: ISC
//! Utilities for handling XML data.
use std::{borrow::Cow, str::FromStr};
use http::{StatusCode, response::Parts, status::InvalidStatusCode};
use roxmltree::Node;
use crate::{
PropertyName, dav::check_status, encoding::normalise_percent_encoded, names,
requests::ParseResponseError,
};
/// Check all the statuses in a `multistatus` response.
///
/// # Errors
///
/// - If any of the `<DAV:status>` nodes is missing the status text, returns
/// [`ParseResponseError::InvalidResponse`].
///
/// - If the text inside a `<DAV:status>` node is not a valid status line, returns
/// [`ParseResponseError::InvalidStatusCode`].
///
/// - If any of the statuses are non-success, returns [`ParseResponseError::BadStatusCode`].
pub fn check_multistatus(root: Node) -> Result<(), ParseResponseError> {
let statuses = root
.descendants()
.filter(|node| node.tag_name() == names::STATUS);
for status in statuses {
let status = status.text().ok_or(ParseResponseError::InvalidResponse(
"missing text inside 'DAV:status'".into(),
))?;
check_status(parse_statusline(status)?)?;
}
Ok(())
}
/// Validate the HTTP status and parse the body as an XML document.
///
/// # Errors
///
/// - [`ParseResponseError::BadStatusCode`] if the status is not a success code.
/// - [`ParseResponseError::NotUtf8`] if the body is not valid UTF-8.
/// - [`ParseResponseError::Xml`] if the body is not valid XML.
pub fn validate_xml_response<'b>(
parts: &Parts,
body: &'b [u8],
) -> Result<roxmltree::Document<'b>, ParseResponseError> {
if !parts.status.is_success() {
return Err(ParseResponseError::BadStatusCode(parts.status));
}
let text = std::str::from_utf8(body)?;
roxmltree::Document::parse(text).map_err(ParseResponseError::from)
}
/// Parses a status line string into a [`StatusCode`].
///
/// Example input string: `HTTP/1.1 200 OK`.
///
/// # See also
///
/// - The [status element](https://www.rfc-editor.org/rfc/rfc2518#section-12.9.1.2)
/// - [Status-Line](https://www.rfc-editor.org/rfc/rfc2068#section-6.1)
///
/// # Errors
///
/// If the input string does not match a status line.
pub fn parse_statusline(status_line: &str) -> Result<StatusCode, InvalidStatusCode> {
let mut iter = status_line.splitn(3, ' ');
iter.next();
let code = iter.next().unwrap_or("");
StatusCode::from_str(code)
}
/// An XML element node with optional children and text content.
///
/// Represents complex XML structures for WebDAV operations
/// that require nested elements, such as setting calendar component types.
///
/// # Examples
///
/// Creating a `supported-calendar-component-set` property:
///
/// ```rust
/// use libdav::{ PropertyName, names};
/// use libdav::xmlutils::XmlNode;
///
/// let comp_name = PropertyName::new(names::CALDAV, "comp");
/// let vevent = XmlNode::new(&comp_name).with_attributes(vec![("name", "VEVENT")]);
/// let vtodo = XmlNode::new(&comp_name).with_attributes(vec![("name", "VTODO")]);
///
/// let supported_comps = PropertyName::new(names::CALDAV, "supported-calendar-component-set");
/// let prop = XmlNode::new(&supported_comps).with_children(vec![vevent, vtodo]);
/// ```
#[derive(Debug, PartialEq)]
pub struct XmlNode<'data> {
/// Element name (namespace and local name).
pub name: &'data PropertyName<'data, 'data>,
/// Attributes for this element as (name, value) pairs.
pub attributes: Vec<(&'data str, &'data str)>,
/// Child elements.
pub children: Vec<XmlNode<'data>>,
/// Text content of this element.
pub characters: &'data str,
}
impl<'data> XmlNode<'data> {
/// Create an empty XML node with just a name.
///
/// Convenience constructor for nodes that have no attributes, children, or text.
#[must_use]
pub fn new(name: &'data PropertyName<'data, 'data>) -> Self {
XmlNode {
name,
attributes: vec![],
children: vec![],
characters: "",
}
}
/// Set attributes on this node.
#[must_use]
pub fn with_attributes(mut self, attributes: Vec<(&'data str, &'data str)>) -> Self {
self.attributes = attributes;
self
}
/// Set child nodes on this node.
#[must_use]
pub fn with_children(mut self, children: Vec<XmlNode<'data>>) -> Self {
self.children = children;
self
}
/// Set text content on this node.
#[must_use]
pub fn with_text(mut self, text: &'data str) -> Self {
self.characters = text;
self
}
/// Render a complete XML node tree with children and attributes.
///
/// Render node as an XML string, including all children recursively.
/// Namespace prefixes are determined based on the node's namespace. Well-known namespaces used
/// in the tree are declared at the root element.
///
/// # Examples
///
/// ```rust
/// use libdav::{PropertyName, names};
/// use libdav::xmlutils::XmlNode;
///
/// let comp_name = PropertyName::new(names::CALDAV, "comp");
/// let vevent = XmlNode::new(&comp_name).with_attributes(vec![("name", "VEVENT")]);
///
/// let xml = vevent.render_node();
/// // Produces: <C:comp xmlns:C="urn:ietf:params:xml:ns:caldav" name="VEVENT"/>
/// ```
#[must_use]
pub fn render_node(&self) -> String {
// First, collect all unique namespaces used in the tree
let mut namespaces = std::collections::HashSet::new();
self.collect_namespaces(&mut namespaces);
let mut result = String::new();
self.render_node_internal(&mut result, 0, &namespaces);
result
}
/// Recursively collect all unique namespaces used in the node tree.
fn collect_namespaces(&self, namespaces: &mut std::collections::HashSet<&'data str>) {
namespaces.insert(self.name.namespace());
for child in &self.children {
child.collect_namespaces(namespaces);
}
}
/// Internal recursive rendering function.
fn render_node_internal(
&self,
result: &mut String,
depth: usize,
all_namespaces: &std::collections::HashSet<&str>,
) {
let prefix = namespace_to_prefix(self.name.namespace());
// Opening tag
result.push('<');
if !prefix.is_empty() {
result.push_str(prefix);
result.push(':');
}
result.push_str(self.name.name());
// Add all namespace declarations at the root (depth 0)
if depth == 0 {
// We need to output namespaces in a deterministic order
let mut sorted_namespaces: Vec<&str> = all_namespaces.iter().copied().collect();
sorted_namespaces.sort_unstable();
for namespace in sorted_namespaces {
let ns_prefix = namespace_to_prefix(namespace);
if ns_prefix.is_empty() {
continue;
}
result.push_str(" xmlns:");
result.push_str(ns_prefix);
result.push_str("=\"");
result.push_str(namespace);
result.push('"');
}
}
for (attr_name, attr_value) in &self.attributes {
result.push(' ');
result.push_str(attr_name);
result.push_str("=\"");
result.push_str(&escape_xml_entities(attr_value));
result.push('"');
}
if self.children.is_empty() && self.characters.is_empty() {
// Self-closing tag.
result.push_str("/>");
return;
}
result.push('>');
// Add text content
if !self.characters.is_empty() {
result.push_str(&escape_xml_entities(self.characters));
}
// Add children
for child in &self.children {
child.render_node_internal(result, depth + 1, all_namespaces);
}
// Closing tag
result.push_str("</");
if !prefix.is_empty() {
result.push_str(prefix);
result.push(':');
}
result.push_str(self.name.name());
result.push('>');
}
}
/// Map a namespace URI to a common prefix.
#[must_use]
pub(crate) fn namespace_to_prefix(namespace: &str) -> &'static str {
match namespace {
names::DAV => "D",
names::CALDAV => "C",
names::CARDDAV => "CARD",
names::APPLE => "A",
_ => "",
}
}
/// Find an `href` node and return its normalised text value.
pub(crate) fn get_normalised_href<'a>(node: &'a Node) -> Result<Cow<'a, str>, ParseResponseError> {
let href_node = node
.descendants()
.find(|node| node.tag_name() == names::HREF)
.ok_or(ParseResponseError::InvalidResponse(
"missing href in response".into(),
))?;
let text = href_node.text().ok_or(ParseResponseError::InvalidResponse(
"missing text in href".into(),
))?;
// Hrefs may be percent encoded: https://www.rfc-editor.org/rfc/rfc4918#section-8.3.1
normalise_percent_encoded(text).map_err(ParseResponseError::from)
}
/// Escape characters into XML entities.
#[must_use]
pub(crate) fn escape_xml_entities(input: &str) -> Cow<'_, str> {
let mut escaped: Option<String> = None;
let mut last_index = 0;
for (i, c) in input.char_indices() {
let replacement = match c {
'"' => """,
'\'' => "'",
'<' => "<",
'>' => ">",
'&' => "&",
_ => continue,
};
if let Some(ref mut res) = escaped {
res.push_str(&input[last_index..i]);
res.push_str(replacement);
} else {
let mut new_string = String::with_capacity(input.len());
new_string.push_str(&input[..i]);
new_string.push_str(replacement);
escaped = Some(new_string);
}
last_index = i + c.len_utf8();
}
if let Some(mut res) = escaped {
res.push_str(&input[last_index..]);
Cow::Owned(res)
} else {
Cow::Borrowed(input)
}
}
#[inline]
pub(crate) fn get_newline_corrected_text(
node: &Node,
property: &PropertyName<'_, '_>,
) -> Result<String, ParseResponseError> {
node.descendants()
.find(|node| node.tag_name() == *property)
.ok_or(ParseResponseError::InvalidResponse(format!(
"missing {} in response",
property.name()
)))?
.text()
.ok_or(ParseResponseError::InvalidResponse(format!(
"missing text in property {property:?}"
)))
// "\r\n" is usually converted into "\n" during parsing. This needs to be undone.
//
// See: https://github.com/RazrFalcon/roxmltree/issues/102
// See: https://www.w3.org/TR/xml/#sec-line-ends
// See: https://www.rfc-editor.org/rfc/rfc4791#section-9.6
.map(normalise_newlines)
}
/// Normalise newlines by replacing any `\n` with `\r\n`.
///
/// # Examples
///
/// ```rust
/// # use libdav::xmlutils::normalise_newlines;
/// // These inputs return the same value unchanged:
/// assert_eq!(normalise_newlines("hello\r\nworld"), "hello\r\nworld");
/// assert_eq!(normalise_newlines("hello\r\r\nworld"), "hello\r\r\nworld");
/// assert_eq!(normalise_newlines("hello\rworld"), "hello\rworld");
/// assert_eq!(normalise_newlines("hello\r\nworld\r\n"), "hello\r\nworld\r\n");
/// // These add a missing \r:
/// assert_eq!(normalise_newlines("hello\nworld"), "hello\r\nworld");
/// assert_eq!(normalise_newlines("hello\r\nworld\n"), "hello\r\nworld\r\n");
/// ```
#[must_use]
pub fn normalise_newlines(orig: &str) -> String {
let mut result = String::new();
let mut last_end = 0;
for (start, part) in orig.match_indices('\n') {
let line = &orig[last_end..start];
result.push_str(line.strip_suffix('\r').unwrap_or(line));
result.push_str("\r\n");
last_end = start + part.len();
}
result.push_str(&orig[last_end..orig.len()]);
result
}
#[cfg(test)]
mod test {
use std::borrow::Cow;
use crate::{
PropertyName, names,
xmlutils::{XmlNode, escape_xml_entities, get_newline_corrected_text},
};
#[test]
fn get_newline_corrected_text_without_returns() {
let without_returns = "<ns0:multistatus xmlns:ns0=\"DAV:\" xmlns:ns1=\"urn:ietf:params:xml:ns:caldav\"><ns0:response><ns0:href>/user/calendars/qdBEnN9jwjQFLry4/1ehsci7nhH31.ics</ns0:href><ns0:propstat><ns0:status>HTTP/1.1 200 OK</ns0:status><ns0:prop><ns0:getetag>\"2d2c827debd802fb3844309b53254b90dd7fd900\"</ns0:getetag><ns1:calendar-data>BEGIN:VCALENDAR\nVERSION:2.0\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\nBEGIN:VEVENT\nSUMMARY:hello\\, testing\nDTSTART:19970714T170000Z\nDTSTAMP:19970610T172345Z\nUID:92gDWceCowpO\nEND:VEVENT\nEND:VCALENDAR\n</ns1:calendar-data></ns0:prop></ns0:propstat></ns0:response></ns0:multistatus>";
let expected = "BEGIN:VCALENDAR\r\nVERSION:2.0\r\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\r\nBEGIN:VEVENT\r\nSUMMARY:hello\\, testing\r\nDTSTART:19970714T170000Z\r\nDTSTAMP:19970610T172345Z\r\nUID:92gDWceCowpO\r\nEND:VEVENT\r\nEND:VCALENDAR\r\n";
let doc = roxmltree::Document::parse(without_returns).unwrap();
let responses = doc
.root_element()
.descendants()
.find(|node| node.tag_name() == names::RESPONSE)
.unwrap();
assert_eq!(
get_newline_corrected_text(&responses, &names::CALENDAR_DATA).unwrap(),
expected
);
}
#[test]
fn get_newline_corrected_text_with_returns() {
let with_returns = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<multistatus xmlns=\"DAV:\" xmlns:C=\"urn:ietf:params:xml:ns:caldav\">\n <response>\n <href>/dav/calendars/user/vdirsyncer@fastmail.com/UvrlExcG9Jp0gEzQ/2H8kQfNQj8GP.ics</href>\n <propstat>\n <prop>\n <getetag>\"4d92fc1c8bdc18bbf83caf34eeab7e7167eb292e\"</getetag>\n <C:calendar-data><![CDATA[BEGIN:VCALENDAR\r\nVERSION:2.0\r\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\r\nBEGIN:VEVENT\r\nUID:jSayX7OSdp3V\r\nDTSTAMP:19970610T172345Z\r\nDTSTART:19970714T170000Z\r\nSUMMARY:hello\\, testing\r\nEND:VEVENT\r\nEND:VCALENDAR\r\n]]></C:calendar-data>\n </prop>\n <status>HTTP/1.1 200 OK</status>\n </propstat>\n </response>\n</multistatus>\n";
let expected = "BEGIN:VCALENDAR\r\nVERSION:2.0\r\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\r\nBEGIN:VEVENT\r\nUID:jSayX7OSdp3V\r\nDTSTAMP:19970610T172345Z\r\nDTSTART:19970714T170000Z\r\nSUMMARY:hello\\, testing\r\nEND:VEVENT\r\nEND:VCALENDAR\r\n";
let doc = roxmltree::Document::parse(with_returns).unwrap();
let responses = doc
.root_element()
.descendants()
.find(|node| node.tag_name() == names::RESPONSE)
.unwrap();
assert_eq!(
get_newline_corrected_text(&responses, &names::CALENDAR_DATA).unwrap(),
expected
);
}
// Tests for escape_xml_entities
#[test]
fn escape_xml_entities_basic_substitution() {
let input = "This is a <test> with \"quotes\" and &.";
let expected = "This is a <test> with "quotes" and &.";
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Owned(_)));
}
#[test]
fn escape_xml_entities_multibyte_characters() {
let input = "你好";
let expected = input;
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
}
#[test]
fn escape_xml_entities_multibyte_characters_and_tags() {
let input = "你好 <test>";
let expected = "你好 <test>";
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Owned(_)));
}
#[test]
fn escape_xml_entities_slash_no_change() {
let input = "Path/to/file";
let expected = input;
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
}
#[test]
fn escape_xml_entities_at_symbol_no_change() {
let input = "user@example.com";
let expected = input;
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
}
#[test]
fn escape_xml_entities_emoji_and_special_characters() {
let input = "😀 & <>";
let expected = "😀 & <>";
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Owned(_)));
}
#[test]
fn escape_xml_entities_empty_string() {
let input = "";
let expected = "";
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
}
#[test]
fn escape_xml_entities_no_special_characters() {
let input = "Just some normal text";
let expected = input;
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
}
#[test]
fn escape_xml_entities_url_encoded_slash() {
let input = "http://example.com/path%2Fto%2Ffile";
let expected = input;
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
}
#[test]
fn render_node_simple_element() {
let comp_name = PropertyName::new(names::CALDAV, "comp");
let vevent = XmlNode::new(&comp_name).with_attributes(vec![("name", "VEVENT")]);
let xml = vevent.render_node();
assert_eq!(
xml,
r#"<C:comp xmlns:C="urn:ietf:params:xml:ns:caldav" name="VEVENT"/>"#
);
}
#[test]
fn render_node_with_children() {
let comp_name = PropertyName::new(names::CALDAV, "comp");
let vevent = XmlNode::new(&comp_name).with_attributes(vec![("name", "VEVENT")]);
let vtodo = XmlNode::new(&comp_name).with_attributes(vec![("name", "VTODO")]);
let supported_comps = PropertyName::new(names::CALDAV, "supported-calendar-component-set");
let prop = XmlNode::new(&supported_comps).with_children(vec![vevent, vtodo]);
let xml = prop.render_node();
assert_eq!(
xml,
concat!(
r#"<C:supported-calendar-component-set xmlns:C="urn:ietf:params:xml:ns:caldav">"#,
r#"<C:comp name="VEVENT"/>"#,
r#"<C:comp name="VTODO"/>"#,
r#"</C:supported-calendar-component-set>"#,
),
);
}
#[test]
fn render_node_with_text_content() {
let name_prop = PropertyName::new(names::DAV, "displayname");
let node = XmlNode::new(&name_prop).with_text("My Calendar");
let xml = node.render_node();
assert_eq!(
xml,
r#"<D:displayname xmlns:D="DAV:">My Calendar</D:displayname>"#
);
}
#[test]
fn render_node_escapes_text_content() {
let name_prop = PropertyName::new(names::DAV, "displayname");
let node = XmlNode::new(&name_prop).with_text("Test <with> & \"quotes\"");
let xml = node.render_node();
assert_eq!(
xml,
r#"<D:displayname xmlns:D="DAV:">Test <with> & "quotes"</D:displayname>"#
);
}
#[test]
fn render_node_escapes_attributes() {
let prop = PropertyName::new(names::CALDAV, "comp");
let node = XmlNode::new(&prop).with_attributes(vec![("name", "TEST<>&\"")]);
let xml = node.render_node();
assert_eq!(
xml,
r#"<C:comp xmlns:C="urn:ietf:params:xml:ns:caldav" name="TEST<>&""/>"#
);
}
#[test]
fn render_node_child_with_different_namespace() {
// Create a child with CALDAV namespace
let comp_name = PropertyName::new(names::CALDAV, "supported-calendar-component-set");
let child = XmlNode::new(&comp_name);
// Create a parent with DAV namespace
let prop_name = PropertyName::new(names::DAV, "prop");
let parent = XmlNode::new(&prop_name).with_children(vec![child]);
let xml = parent.render_node();
// Both namespaces declared with prefixes (sorted alphabetically)
assert_eq!(
xml,
concat!(
r#"<D:prop xmlns:D="DAV:" xmlns:C="urn:ietf:params:xml:ns:caldav">"#,
r#"<C:supported-calendar-component-set/></D:prop>"#,
)
);
}
#[test]
fn render_node_dav_namespace() {
let display_name = PropertyName::new(names::DAV, "displayname");
let node = XmlNode::new(&display_name).with_text("Test");
let xml = node.render_node();
assert_eq!(xml, r#"<D:displayname xmlns:D="DAV:">Test</D:displayname>"#);
assert!(xml.contains("xmlns:D="));
}
}