facet_dom/
parser.rs

1//! DOM parser trait.
2
3use crate::DomEvent;
4
5/// A parser that emits DOM events from a tree-structured document.
6///
7/// Implementations exist for HTML (using html5gum) and XML parsers.
8pub trait DomParser<'de> {
9    /// The error type for parsing failures.
10    type Error: std::error::Error + 'static;
11
12    /// Get the next event from the document.
13    ///
14    /// Returns `Ok(None)` when the document is fully parsed.
15    fn next_event(&mut self) -> Result<Option<DomEvent<'de>>, Self::Error>;
16
17    /// Peek at the next event without consuming it.
18    fn peek_event(&mut self) -> Result<Option<&DomEvent<'de>>, Self::Error>;
19
20    /// Skip the current node and all its descendants.
21    ///
22    /// This is used when encountering unknown elements that should be ignored.
23    /// After calling this, the parser should be positioned after the matching `NodeEnd`.
24    fn skip_node(&mut self) -> Result<(), Self::Error>;
25
26    /// Get the current span in the source document, if available.
27    fn current_span(&self) -> Option<facet_reflect::Span> {
28        None
29    }
30
31    /// Whether this parser is lenient about text in unexpected places.
32    ///
33    /// HTML parsers return `true` - text without a corresponding field is silently discarded.
34    /// XML parsers return `false` - text without a corresponding field is an error.
35    fn is_lenient(&self) -> bool {
36        false
37    }
38
39    /// Returns the format namespace for this parser (e.g., "xml", "html").
40    ///
41    /// This is used to select format-specific proxy types when a field has
42    /// `#[facet(xml::proxy = XmlProxy)]` or similar format-namespaced proxies.
43    ///
44    /// Returns `None` by default, which falls back to format-agnostic proxies.
45    fn format_namespace(&self) -> Option<&'static str> {
46        None
47    }
48
49    /// Capture the current node as raw markup and skip past it.
50    ///
51    /// Must be called right after receiving a NodeStart event. Returns the raw
52    /// source text for the entire element (from opening tag through closing tag).
53    ///
54    /// Returns `None` if raw capture is not supported by this parser.
55    fn capture_raw_node(&mut self) -> Result<Option<std::borrow::Cow<'de, str>>, Self::Error> {
56        Ok(None)
57    }
58}