facet_html/
lib.rs

1#![deny(unsafe_code)]
2#![deny(missing_docs, rustdoc::broken_intra_doc_links)]
3
4//! HTML parser and serializer implementing the facet format architecture.
5//!
6//! This crate provides:
7//! - **Parsing**: WHATWG-compliant HTML tokenization via html5gum
8//! - **Serialization**: Configurable HTML output (minified or pretty-printed)
9//!
10//! # Attributes
11//!
12//! After importing `use facet_html as html;`, you can use these attributes:
13//!
14//! - `#[facet(html::element)]` - Marks a field as a single HTML child element
15//! - `#[facet(html::elements)]` - Marks a field as collecting multiple HTML child elements  
16//! - `#[facet(html::attribute)]` - Marks a field as an HTML attribute (on the element tag)
17//! - `#[facet(html::text)]` - Marks a field as the text content of the element
18//!
19//! # Parsing Example
20//!
21//! ```rust
22//! use facet::Facet;
23//! use facet_html as html;
24//!
25//! #[derive(Debug, Facet, PartialEq)]
26//! #[facet(rename = "html")]
27//! struct Document {
28//!     #[facet(html::element, default)]
29//!     head: Option<Head>,
30//!     #[facet(html::element, default)]
31//!     body: Option<Body>,
32//! }
33//!
34//! #[derive(Debug, Facet, PartialEq)]
35//! #[facet(rename = "head")]
36//! struct Head {
37//!     #[facet(html::element, default)]
38//!     title: Option<Title>,
39//! }
40//!
41//! #[derive(Debug, Facet, PartialEq)]
42//! #[facet(rename = "title")]
43//! struct Title {
44//!     #[facet(html::text, default)]
45//!     text: String,
46//! }
47//!
48//! #[derive(Debug, Facet, PartialEq)]
49//! #[facet(rename = "body")]
50//! struct Body {
51//!     #[facet(html::attribute, default)]
52//!     class: Option<String>,
53//!     #[facet(html::text, default)]
54//!     content: String,
55//! }
56//!
57//! let html_input = r#"<html><head><title>Hello</title></head><body class="main">World</body></html>"#;
58//! let doc: Document = html::from_str(html_input).unwrap();
59//!
60//! assert_eq!(doc.head.unwrap().title.unwrap().text, "Hello");
61//! assert_eq!(doc.body.as_ref().unwrap().class, Some("main".to_string()));
62//! assert_eq!(doc.body.unwrap().content, "World");
63//! ```
64//!
65//! # Serialization Example
66//!
67//! ```rust
68//! use facet::Facet;
69//! use facet_html as html;
70//!
71//! #[derive(Debug, Facet)]
72//! #[facet(rename = "div")]
73//! struct MyDiv {
74//!     #[facet(html::attribute, default)]
75//!     class: Option<String>,
76//!     #[facet(html::text, default)]
77//!     content: String,
78//! }
79//!
80//! let div = MyDiv {
81//!     class: Some("container".into()),
82//!     content: "Hello!".into(),
83//! };
84//!
85//! // Minified output (default)
86//! let output = html::to_string(&div).unwrap();
87//! assert_eq!(output, r#"<div class="container">Hello!</div>"#);
88//!
89//! // Pretty-printed output
90//! let output_pretty = html::to_string_pretty(&div).unwrap();
91//! ```
92//!
93//! # Pre-defined HTML Element Types
94//!
95//! For typed definitions of all standard HTML5 elements, use the `facet-html-dom` crate:
96//!
97//! ```rust,ignore
98//! use facet_html_dom::{Html, Body, Div, P, A, FlowContent};
99//!
100//! // Parse a complete HTML document
101//! let doc: Html = facet_html::from_str(html_source)?;
102//!
103//! // Access typed elements
104//! if let Some(body) = &doc.body {
105//!     for child in &body.children {
106//!         match child {
107//!             FlowContent::P(p) => println!("Paragraph: {:?}", p),
108//!             FlowContent::Div(div) => println!("Div: {:?}", div),
109//!             _ => {}
110//!         }
111//!     }
112//! }
113//! ```
114//!
115//! The DOM crate provides typed structs for all HTML5 elements with proper nesting
116//! via content model enums (`FlowContent`, `PhrasingContent`). Unknown elements
117//! and attributes (like `data-*`, `aria-*`) are captured in `extra` fields.
118
119mod parser;
120mod serializer;
121
122pub use parser::{HtmlError, HtmlParser};
123pub use serializer::{
124    HtmlSerializeError, HtmlSerializer, SerializeOptions, to_string, to_string_pretty,
125    to_string_with_options, to_vec, to_vec_with_options,
126};
127
128// HTML extension attributes for use with #[facet(html::attr)] syntax.
129//
130// After importing `use facet_html as html;`, users can write:
131//   #[facet(html::element)]
132//   #[facet(html::elements)]
133//   #[facet(html::attribute)]
134//   #[facet(html::text)]
135//   #[facet(html::tag)]
136//   #[facet(html::custom_element)]
137
138// Generate HTML attribute grammar using the grammar DSL.
139// This generates:
140// - `Attr` enum with all HTML attribute variants
141// - `__attr!` macro that dispatches to attribute handlers and returns ExtensionAttr
142// - `__parse_attr!` macro for parsing (internal use)
143facet::define_attr_grammar! {
144    ns "html";
145    crate_path ::facet_html;
146
147    /// HTML attribute types for field and container configuration.
148    pub enum Attr {
149        /// Marks a field as a single HTML child element
150        Element,
151        /// Marks a field as collecting multiple HTML child elements
152        Elements,
153        /// Marks a field as an HTML attribute (on the element tag)
154        Attribute,
155        /// Marks a field as the text content of the element
156        Text,
157        /// Marks a field as storing the element's tag name (for custom elements).
158        ///
159        /// Used on a `String` field to capture the tag name of an unknown element
160        /// during deserialization. When serializing, this value becomes the element's tag.
161        Tag,
162        /// Marks an enum variant as a catch-all for unknown elements.
163        ///
164        /// When deserializing, if no other variant matches the element name,
165        /// this variant is selected. The variant's struct must have a field
166        /// marked with `#[facet(html::tag)]` to capture the element name.
167        CustomElement,
168    }
169}
170
171/// Deserialize an HTML document from a string.
172///
173/// # Example
174///
175/// ```rust
176/// use facet::Facet;
177/// use facet_html as html;
178///
179/// #[derive(Debug, Facet)]
180/// struct Div {
181///     #[facet(html::text, default)]
182///     text: String,
183/// }
184///
185/// let doc: Div = facet_html::from_str("<div>hello</div>").unwrap();
186/// assert_eq!(doc.text, "hello");
187/// ```
188pub fn from_str<'de, T: facet_core::Facet<'de>>(
189    s: &'de str,
190) -> Result<T, facet_format::DeserializeError<HtmlError>> {
191    let parser = HtmlParser::new(s.as_bytes());
192    let mut deserializer = facet_format::FormatDeserializer::new(parser);
193    deserializer.deserialize()
194}
195
196/// Deserialize an HTML document from bytes.
197pub fn from_slice<'de, T: facet_core::Facet<'de>>(
198    bytes: &'de [u8],
199) -> Result<T, facet_format::DeserializeError<HtmlError>> {
200    let parser = HtmlParser::new(bytes);
201    let mut deserializer = facet_format::FormatDeserializer::new(parser);
202    deserializer.deserialize()
203}