scrape_core/parser/html5.rs
1//! html5ever-based HTML parser implementation.
2
3use super::{ParseConfig, ParseError, ParseResult, Parser, private::Sealed};
4use crate::dom::Document;
5
6/// HTML5 spec-compliant parser using html5ever.
7///
8/// This parser uses the [html5ever](https://github.com/servo/html5ever) crate
9/// for spec-compliant HTML5 parsing. It handles malformed HTML gracefully
10/// using the HTML5 error recovery algorithm.
11///
12/// # Example
13///
14/// ```rust
15/// use scrape_core::{Html5everParser, Parser};
16///
17/// let parser = Html5everParser;
18/// let document = parser.parse("<html><body><h1>Hello</h1></body></html>").unwrap();
19/// assert!(document.root().is_some());
20/// ```
21#[derive(Debug, Default, Clone, Copy)]
22pub struct Html5everParser;
23
24impl Sealed for Html5everParser {}
25
26impl Parser for Html5everParser {
27 fn parse_with_config(&self, html: &str, config: &ParseConfig) -> ParseResult<Document> {
28 self.parse_with_config_and_capacity(html, config, 256)
29 }
30}
31
32impl Html5everParser {
33 /// Parses HTML with the given configuration and pre-allocated capacity.
34 ///
35 /// # Errors
36 ///
37 /// Returns [`ParseError`] if parsing fails.
38 pub fn parse_with_config_and_capacity(
39 &self,
40 html: &str,
41 config: &ParseConfig,
42 capacity: usize,
43 ) -> ParseResult<Document> {
44 if html.trim().is_empty() {
45 return Err(ParseError::EmptyInput);
46 }
47
48 super::sink::parse_html_document(html, config, capacity)
49 }
50}