facet_dom/parser.rs
1//! DOM parser trait.
2
3use crate::DomEvent;
4
5/// A parser that emits DOM events from a tree-structured document.
6///
7/// Implementations exist for HTML (using html5gum) and XML parsers.
8pub trait DomParser<'de> {
9 /// The error type for parsing failures.
10 type Error: std::error::Error + 'static;
11
12 /// Get the next event from the document.
13 ///
14 /// Returns `Ok(None)` when the document is fully parsed.
15 fn next_event(&mut self) -> Result<Option<DomEvent<'de>>, Self::Error>;
16
17 /// Peek at the next event without consuming it.
18 fn peek_event(&mut self) -> Result<Option<&DomEvent<'de>>, Self::Error>;
19
20 /// Skip the current node and all its descendants.
21 ///
22 /// This is used when encountering unknown elements that should be ignored.
23 /// After calling this, the parser should be positioned after the matching `NodeEnd`.
24 fn skip_node(&mut self) -> Result<(), Self::Error>;
25
26 /// Get the current span in the source document, if available.
27 fn current_span(&self) -> Option<facet_reflect::Span> {
28 None
29 }
30
31 /// Whether this parser is lenient about text in unexpected places.
32 ///
33 /// HTML parsers return `true` - text without a corresponding field is silently discarded.
34 /// XML parsers return `false` - text without a corresponding field is an error.
35 fn is_lenient(&self) -> bool {
36 false
37 }
38
39 /// Returns the format namespace for this parser (e.g., "xml", "html").
40 ///
41 /// This is used to select format-specific proxy types when a field has
42 /// `#[facet(xml::proxy = XmlProxy)]` or similar format-namespaced proxies.
43 ///
44 /// Returns `None` by default, which falls back to format-agnostic proxies.
45 fn format_namespace(&self) -> Option<&'static str> {
46 None
47 }
48
49 /// Capture the current node as raw markup and skip past it.
50 ///
51 /// Must be called right after receiving a NodeStart event. Returns the raw
52 /// source text for the entire element (from opening tag through closing tag).
53 ///
54 /// Returns `None` if raw capture is not supported by this parser.
55 fn capture_raw_node(&mut self) -> Result<Option<std::borrow::Cow<'de, str>>, Self::Error> {
56 Ok(None)
57 }
58}