xmltok 0.14.0

Pull-based, zero-allocation XML tokenizer with compact, lifetime-free tokens. A fork of xmlparser.
Documentation
use core::fmt;
use core::ops::{Deref, Range};

/// A string slice, holding offsets only.
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct DetachedStrSpan {
    start: u32,
    end: u32,
}

impl DetachedStrSpan {
    /// Slice the string using this span
    pub fn as_str<'a>(&self, s: &'a str, offset: usize) -> &'a str {
        &s[offset + self.start as usize..offset + self.end as usize]
    }

    /// Start offset
    pub fn start(&self) -> u32 {
        self.start
    }

    /// End offset
    pub fn end(&self) -> u32 {
        self.end
    }
}

/// A string slice, holding offsets only.
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct SmallDetachedStrSpan {
    /// starting offset of the span
    pub start: u16,
    /// ending offset of the span
    pub end: u16,
}

impl SmallDetachedStrSpan {
    /// Make an empty SmallDetachedStrSpan
    pub fn empty() -> Self {
        Self { start: 0, end: 0 }
    }

    /// Is empty
    pub fn is_empty(&self) -> bool {
        self.start == self.end
    }

    /// Slice the string using this span
    pub fn as_str<'a>(&self, s: &'a str, offset: usize) -> &'a str {
        &s[offset + self.start as usize..offset + self.end as usize]
    }

    /// Start offset
    pub fn start(&self) -> u16 {
        self.start
    }

    /// End offset
    pub fn end(&self) -> u16 {
        self.end
    }
}

/// A string slice.
///
/// Like `&str`, but also contains the position in the input XML
/// from which it was parsed.
#[must_use]
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub struct StrSpan<'a> {
    text: &'a str,
    start: usize,
}

impl<'a> From<&'a str> for StrSpan<'a> {
    #[inline]
    fn from(text: &'a str) -> Self {
        StrSpan { text, start: 0 }
    }
}

impl PartialEq<str> for StrSpan<'_> {
    fn eq(&self, other: &str) -> bool {
        self.text == other
    }
}

impl PartialEq<&str> for StrSpan<'_> {
    fn eq(&self, other: &&str) -> bool {
        self.text == *other
    }
}

impl PartialEq<StrSpan<'_>> for str {
    fn eq(&self, other: &StrSpan<'_>) -> bool {
        self == other.text
    }
}

impl PartialEq<StrSpan<'_>> for &str {
    fn eq(&self, other: &StrSpan<'_>) -> bool {
        *self == other.text
    }
}

impl<'a> StrSpan<'a> {
    /// Constructs a new `StrSpan` from substring.
    #[inline]
    pub(crate) fn from_substr(text: &str, start: usize, end: usize) -> StrSpan<'_> {
        debug_assert!(start <= end);
        StrSpan {
            text: &text[start..end],
            start,
        }
    }

    /// Returns `true` if self is empty.
    pub fn is_empty(&self) -> bool {
        self.text.is_empty()
    }

    /// Make a detached span for span
    ///
    /// The caller must guarantee that the span ends within `u32::MAX` bytes
    /// of `offset`. The tokenizer ensures this by validating the total token
    /// length (see `token_end32`) before detaching any of its sub-spans.
    pub fn detach(&self, offset: usize) -> DetachedStrSpan {
        if self.start < offset {
            // Placeholder spans created via `"".into()` (e.g. a missing
            // qname prefix) carry no real position; map them to 0..0.
            // Empty spans at real positions keep their offsets, so token
            // end positions stay derivable from their last sub-span.
            debug_assert!(self.is_empty());
            return DetachedStrSpan { start: 0, end: 0 };
        }
        debug_assert!(self.end() >= self.start);
        debug_assert!(self.end() - offset <= u32::MAX as usize);
        DetachedStrSpan {
            start: (self.start - offset) as u32,
            end: (self.end() - offset) as u32,
        }
    }

    /// Make a detached span for span
    ///
    /// The caller must guarantee that the span ends within `u16::MAX` bytes
    /// of `offset`. The tokenizer ensures this by validating the total token
    /// length (see `token_end16`) before detaching any of its sub-spans.
    pub fn detach_small(&self, offset: usize) -> SmallDetachedStrSpan {
        if self.start < offset {
            // See `detach()`: placeholder spans map to 0..0, real empty
            // spans keep their offsets.
            debug_assert!(self.is_empty());
            return SmallDetachedStrSpan { start: 0, end: 0 };
        }
        debug_assert!(self.end() >= self.start);
        debug_assert!(self.end() - offset <= u16::MAX as usize);
        SmallDetachedStrSpan {
            start: (self.start - offset) as u16,
            end: (self.end() - offset) as u16,
        }
    }

    /// Returns the start position of the span.
    #[inline]
    pub fn start(&self) -> usize {
        self.start
    }

    /// Returns the end position of the span.
    #[inline]
    pub fn end(&self) -> usize {
        self.start + self.text.len()
    }

    /// Returns the range of the span.
    #[inline]
    pub fn range(&self) -> Range<usize> {
        self.start..self.end()
    }

    /// Returns the span as a string slice
    #[inline]
    pub fn as_str(&self) -> &'a str {
        self.text
    }

    /// Returns an underling string region as `StrSpan`.
    #[inline]
    pub(crate) fn slice_region(&self, start: usize, end: usize) -> StrSpan<'a> {
        StrSpan::from_substr(self.text, start, end)
    }
}

impl fmt::Debug for StrSpan<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(
            f,
            "StrSpan({:?} {}..{})",
            self.as_str(),
            self.start(),
            self.end()
        )
    }
}

impl fmt::Display for StrSpan<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{}", self.as_str())
    }
}

impl Deref for StrSpan<'_> {
    type Target = str;

    fn deref(&self) -> &Self::Target {
        self.text
    }
}