Skip to main content

xml_syntax_reader/
types.rs

1/// Absolute byte range in the input stream.
2/// `start` is inclusive, `end` is exclusive: `[start, end)`.
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4pub struct Span {
5    pub start: u64,
6    pub end: u64,
7}
8
9impl Span {
10    #[inline]
11    pub fn new(start: u64, end: u64) -> Self {
12        Self { start, end }
13    }
14
15    #[inline]
16    pub fn len(&self) -> u64 {
17        self.end - self.start
18    }
19
20    #[inline]
21    pub fn is_empty(&self) -> bool {
22        self.start == self.end
23    }
24}
25
26/// Error from the XML syntax reader.
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub struct Error {
29    pub kind: ErrorKind,
30    /// Absolute byte offset in the stream where the error occurred.
31    pub offset: u64,
32}
33
34impl core::fmt::Display for Error {
35    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
36        write!(f, "{} at byte offset {}", self.kind, self.offset)
37    }
38}
39
40impl core::error::Error for Error {}
41
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub enum ErrorKind {
44    /// Unexpected byte encountered.
45    UnexpectedByte(u8),
46    /// Unexpected end of input within a construct.
47    UnexpectedEof,
48    /// Invalid character reference.
49    InvalidCharRef,
50    /// Double-hyphen (`--`) in comment body (XML 1.0 ยง2.5).
51    DoubleDashInComment,
52    /// Missing whitespace after `DOCTYPE` keyword.
53    DoctypeMissingWhitespace,
54    /// Missing or invalid name in `DOCTYPE` declaration.
55    DoctypeMissingName,
56    /// `]]>` appeared in text content (not allowed in well-formed XML).
57    CdataEndInContent,
58    /// Invalid UTF-8 byte sequence.
59    InvalidUtf8,
60    /// Name exceeded the 1000-byte limit.
61    NameTooLong,
62    /// Character reference value exceeded the 7-byte limit.
63    CharRefTooLong,
64    /// DOCTYPE internal subset bracket nesting exceeded the 1024 depth limit.
65    DoctypeBracketsTooDeep,
66    /// Malformed XML declaration (missing version, bad syntax, invalid standalone value).
67    MalformedXmlDeclaration,
68    /// PI target matching `[Xx][Mm][Ll]` appeared after the document start.
69    ReservedPITarget,
70}
71
72impl core::fmt::Display for ErrorKind {
73    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
74        match self {
75            ErrorKind::UnexpectedByte(b) => write!(f, "unexpected byte 0x{b:02X}"),
76            ErrorKind::UnexpectedEof => write!(f, "unexpected end of input"),
77            ErrorKind::InvalidCharRef => write!(f, "invalid character reference"),
78            ErrorKind::DoubleDashInComment => write!(f, "double-hyphen (--) in comment body"),
79            ErrorKind::DoctypeMissingWhitespace => {
80                write!(f, "missing whitespace after DOCTYPE keyword")
81            }
82            ErrorKind::DoctypeMissingName => {
83                write!(f, "missing or invalid name in DOCTYPE declaration")
84            }
85            ErrorKind::CdataEndInContent => write!(f, "]]> in text content"),
86            ErrorKind::InvalidUtf8 => write!(f, "invalid UTF-8"),
87            ErrorKind::NameTooLong => write!(f, "name exceeds 1000-byte limit"),
88            ErrorKind::CharRefTooLong => write!(f, "character reference exceeds 7-byte limit"),
89            ErrorKind::DoctypeBracketsTooDeep => {
90                write!(f, "DOCTYPE bracket nesting exceeds 1024 depth limit")
91            }
92            ErrorKind::MalformedXmlDeclaration => {
93                write!(f, "malformed XML declaration")
94            }
95            ErrorKind::ReservedPITarget => {
96                write!(f, "reserved PI target (xml) after document start")
97            }
98        }
99    }
100}
101
102/// Result of `Reader::parse()`.
103pub enum ParseError<E> {
104    /// XML syntax error.
105    Xml(Error),
106    /// Error returned by a `Visitor` callback.
107    Visitor(E),
108}
109
110impl<E: core::fmt::Debug> core::fmt::Debug for ParseError<E> {
111    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
112        match self {
113            ParseError::Xml(e) => write!(f, "ParseError::Xml({e:?})"),
114            ParseError::Visitor(e) => write!(f, "ParseError::Visitor({e:?})"),
115        }
116    }
117}
118
119impl<E: core::fmt::Display> core::fmt::Display for ParseError<E> {
120    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
121        match self {
122            ParseError::Xml(e) => write!(f, "XML error: {e}"),
123            ParseError::Visitor(e) => write!(f, "visitor error: {e}"),
124        }
125    }
126}
127
128impl<E: core::error::Error> core::error::Error for ParseError<E> {}
129
130impl<E> From<Error> for ParseError<E> {
131    fn from(e: Error) -> Self {
132        ParseError::Xml(e)
133    }
134}
135
136/// Encoding detected by `probe_encoding()`.
137#[derive(Debug, Clone, Copy, PartialEq, Eq)]
138pub enum Encoding {
139    Utf8,
140    Utf16Le,
141    Utf16Be,
142    Utf32Le,
143    Utf32Be,
144    /// Encoding declared in the XML declaration but not detectable from BOM.
145    Declared(DeclaredEncoding),
146    /// Could not determine encoding (e.g. empty or insufficient data).
147    Unknown,
148}
149
150/// Check if a byte is XML whitespace (SP, TAB, LF, CR).
151#[inline]
152pub(crate) fn is_xml_whitespace(b: u8) -> bool {
153    matches!(b, b' ' | b'\t' | b'\n' | b'\r')
154}
155
156/// Encoding name extracted from the XML declaration, stored inline to avoid allocation.
157#[derive(Debug, Clone, Copy, PartialEq, Eq)]
158pub struct DeclaredEncoding {
159    buf: [u8; 40],
160    len: u8,
161}
162
163impl DeclaredEncoding {
164    pub fn new(name: &[u8]) -> Option<Self> {
165        if name.len() > 40 {
166            return None;
167        }
168        let mut buf = [0u8; 40];
169        buf[..name.len()].copy_from_slice(name);
170        Some(Self {
171            buf,
172            len: name.len() as u8,
173        })
174    }
175
176    pub fn as_bytes(&self) -> &[u8] {
177        &self.buf[..self.len as usize]
178    }
179
180    pub fn as_str(&self) -> Option<&str> {
181        core::str::from_utf8(self.as_bytes()).ok()
182    }
183}