xml-stinks 0.1.0

Powerful & easy manual XML deserialization using quick-xml
Documentation
//! Attribute.

use std::borrow::Cow;
use std::str::Utf8Error;

use quick_xml::events::attributes::{
    AttrError,
    Attribute as QuickXMLAttribute,
    Attributes,
};
use quick_xml::name::QName;
use quick_xml::Error as QuickXMLError;

use crate::escape::EscapeError;

/// Represent a XML attribute.
#[derive(Debug, Clone, PartialEq)]
pub struct Attribute<'data>
{
    inner: QuickXMLAttribute<'data>,
}

impl<'data> Attribute<'data>
{
    /// Returns a new `Attribute`.
    pub fn new(
        key: &'data (impl AsRef<[u8]> + ?Sized),
        value: impl Into<Cow<'data, [u8]>>,
    ) -> Self
    {
        Self {
            inner: QuickXMLAttribute {
                key: QName(key.as_ref()),
                value: value.into(),
            },
        }
    }

    /// Returns the key.
    ///
    /// # Errors
    /// Will return `Err` if the key is invalid UTF-8.
    pub fn key(&self) -> Result<&str, Error>
    {
        std::str::from_utf8(self.key_bytes()).map_err(Error::KeyNotUTF8)
    }

    /// Returns the key as bytes.
    #[must_use]
    pub fn key_bytes(&self) -> &[u8]
    {
        self.inner.key.as_ref()
    }

    /// Returns the value.
    ///
    /// # Errors
    /// Will return `Err` if:
    /// - The value is invalid UTF-8
    /// - Unescaping the value fails
    pub fn value(&self) -> Result<Cow<str>, Error>
    {
        self.inner.unescape_value().map_err(|err| match err {
            QuickXMLError::NonDecodable(Some(utf8_error)) => {
                Error::ValueNotUTF8(utf8_error)
            }
            QuickXMLError::EscapeError(escape_err) => {
                Error::UnescapeValueFailed(EscapeError::from_quick_xml(escape_err))
            }
            _ => {
                unreachable!();
            }
        })
    }

    /// Returns the value as bytes. They may or may not be escaped.
    #[must_use]
    pub fn value_bytes(&self) -> &[u8]
    {
        &self.inner.value
    }
}

// Crate-local functions
impl<'a> Attribute<'a>
{
    pub(crate) fn from_inner(inner: QuickXMLAttribute<'a>) -> Self
    {
        Self { inner }
    }

    pub(crate) fn into_inner(self) -> QuickXMLAttribute<'a>
    {
        self.inner
    }
}

/// Errors that can be raised when parsing [`Attribute`]s.
///
/// Recovery position in examples shows the position from which parsing of the
/// next attribute will be attempted.
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum Error
{
    /// Attribute key was not followed by `=`, position relative to the start of
    /// the owning tag is provided.
    ///
    /// Example of input that raises this error:
    /// ```xml
    /// <tag key another="attribute"/>
    /// <!--     ^~~ error position, recovery position (8) -->
    /// ```
    #[error("Position {0}: attribute key must be directly followed by `=` or space")]
    ExpectedEq(usize),

    /// Attribute value was not found after `=`, position relative to the start
    /// of the owning tag is provided.
    ///
    /// Example of input that raises this error:
    /// ```xml
    /// <tag key = />
    /// <!--       ^~~ error position, recovery position (10) -->
    /// ```
    ///
    /// This error can be returned only for the last attribute in the list,
    /// because otherwise any content after `=` will be threated as a value.
    /// The XML
    /// ```xml
    /// <tag key = another-key = "value"/>
    /// <!--                   ^ ^- recovery position (24) -->
    /// <!--                   '~~ error position (22) -->
    /// ```
    ///
    /// will be treated as `Attribute { key = b"key", value = b"another-key" }`
    /// and or [`Attribute`] is returned, or [`Error::UnquotedValue`] is raised,
    /// depending on the parsing mode.
    #[error("Position {0}: `=` must be followed by an attribute value")]
    ExpectedValue(usize),

    /// Attribute value is not quoted, position relative to the start of the
    /// owning tag is provided.
    ///
    /// Example of input that raises this error:
    /// ```xml
    /// <tag key = value />
    /// <!--       ^    ^~~ recovery position (15) -->
    /// <!--       '~~ error position (10) -->
    /// ```
    #[error("Position {0}: attribute value must be enclosed in `\"` or `'`")]
    UnquotedValue(usize),

    /// Attribute value was not finished with a matching quote, position relative
    /// to the start of owning tag and a quote is provided. That position is always
    /// a last character in the tag content.
    ///
    /// Example of input that raises this error:
    /// ```xml
    /// <tag key = "value  />
    /// <tag key = 'value  />
    /// <!--               ^~~ error position, recovery position (18) -->
    /// ```
    ///
    /// This error can be returned only for the last attribute in the list,
    /// because all input was consumed during scanning for a quote.
    #[error("Position {0}: missing closing quote `{1}` in attribute value")]
    ExpectedQuote(usize, u8),

    /// An attribute with the same name was already encountered. Two parameters
    /// define (1) the error position relative to the start of the owning tag
    /// for a new attribute and (2) the start position of a previously encountered
    /// attribute with the same name.
    ///
    /// Example of input that raises this error:
    /// ```xml
    /// <tag key = 'value'  key="value2" attr3='value3' />
    /// <!-- ^              ^            ^~~ recovery position (32) -->
    /// <!-- |              '~~ error position (19) -->
    /// <!-- '~~ previous position (4) -->
    /// ```
    #[error("Position {0}: duplicated attribute, previous declaration at position {1}")]
    Duplicated(usize, usize),

    /// Attribute key is not valid UTF-8.
    #[error("Attribute key is not valid UTF-8")]
    KeyNotUTF8(#[source] Utf8Error),

    /// Attribute value is not valid UTF-8.
    #[error("Attribute value is not valid UTF-8")]
    ValueNotUTF8(#[source] Utf8Error),

    /// Failed to unescape value.
    #[error("Failed to unescape value")]
    UnescapeValueFailed(#[source] EscapeError),
}

impl From<AttrError> for Error
{
    fn from(attr_err: AttrError) -> Self
    {
        match attr_err {
            AttrError::ExpectedEq(pos) => Self::ExpectedEq(pos),
            AttrError::ExpectedValue(pos) => Self::ExpectedValue(pos),
            AttrError::UnquotedValue(pos) => Self::UnquotedValue(pos),
            AttrError::ExpectedQuote(pos, quote) => Self::ExpectedQuote(pos, quote),
            AttrError::Duplicated(pos, same_attr_pos) => {
                Self::Duplicated(pos, same_attr_pos)
            }
        }
    }
}

/// Iterates through [`Attribute`]s.
#[derive(Debug)]
pub struct Iter<'a>
{
    attrs: Attributes<'a>,
}

impl<'a> Iter<'a>
{
    pub(crate) fn new(attrs: Attributes<'a>) -> Self
    {
        Self { attrs }
    }
}

impl<'a> Iterator for Iter<'a>
{
    type Item = Result<Attribute<'a>, Error>;

    fn next(&mut self) -> Option<Self::Item>
    {
        let attr = self.attrs.next()?;

        Some(attr.map(Attribute::from_inner).map_err(Into::into))
    }
}