h2s/lib.rs
1//! A declarative HTML parser library in Rust, which works like a deserializer from HTML to struct.
2//!
3//! # Example
4//!
5//! ```
6//! use h2s::FromHtml;
7//!
8//! #[derive(FromHtml, Debug, Eq, PartialEq)]
9//! pub struct Page {
10//! #[h2s(attr = "lang")]
11//! lang: String,
12//! #[h2s(select = "div > h1.blog-title")]
13//! blog_title: String,
14//! #[h2s(select = ".articles > div")]
15//! articles: Vec<Article>,
16//! }
17//!
18//! #[derive(FromHtml, Debug, Eq, PartialEq)]
19//! pub struct Article {
20//! #[h2s(select = "h2 > a")]
21//! title: String,
22//! #[h2s(select = "div > span")]
23//! view_count: usize,
24//! #[h2s(select = "h2 > a", attr = "href")]
25//! url: String,
26//! #[h2s(select = "ul > li")]
27//! tags: Vec<String>,
28//! #[h2s(select = "ul > li:nth-child(1)")]
29//! first_tag: Option<String>,
30//! }
31//!
32//! let html = r#"
33//! <html lang="en">
34//! <body>
35//! <div>
36//! <h1 class="blog-title">My tech blog</h1>
37//! <div class="articles">
38//! <div>
39//! <h2><a href="https://example.com/1">article1</a></h2>
40//! <div><span>901</span> Views</div>
41//! <ul><li>Tag1</li><li>Tag2</li></ul>
42//! </div>
43//! <div>
44//! <h2><a href="https://example.com/2">article2</a></h2>
45//! <div><span>849</span> Views</div>
46//! <ul></ul>
47//! </div>
48//! <div>
49//! <h2><a href="https://example.com/3">article3</a></h2>
50//! <div><span>103</span> Views</div>
51//! <ul><li>Tag3</li></ul>
52//! </div>
53//! </div>
54//! </div>
55//! </body>
56//! </html>
57//! "#;
58//!
59//! let page = h2s::parse::<Page>(html).unwrap();
60//!
61//! assert_eq!(page, Page {
62//! lang: "en".to_string(),
63//! blog_title: "My tech blog".to_string(),
64//! articles: vec![
65//! Article {
66//! title: "article1".to_string(),
67//! url: "https://example.com/1".to_string(),
68//! view_count: 901,
69//! tags: vec!["Tag1".to_string(), "Tag2".to_string()],
70//! first_tag: Some("Tag1".to_string()),
71//! },
72//! Article {
73//! title: "article2".to_string(),
74//! url: "https://example.com/2".to_string(),
75//! view_count: 849,
76//! tags: vec![],
77//! first_tag: None,
78//! },
79//! Article {
80//! title: "article3".to_string(),
81//! url: "https://example.com/3".to_string(),
82//! view_count: 103,
83//! tags: vec!["Tag3".to_string()],
84//! first_tag: Some("Tag3".to_string()),
85//! },
86//! ]
87//! });
88//!
89//! // When the input HTML document structure does not match the expected,
90//! // `h2s::parse` will return an error with a detailed reason.
91//! let invalid_html = html.replace(r#"<a href="https://example.com/3">article3</a>"#, "");
92//! let err = h2s::parse::<Page>(invalid_html).unwrap_err();
93//! assert_eq!(
94//! err.to_string(),
95//! "articles: [2]: title: mismatched number of selected elements by \"h2 > a\": expected exactly one element, but no elements found"
96//! );
97//! ```
98//!
99//! # Supported types
100//!
101//! You can use the following types as a field value of the struct to parse.
102//!
103//! ## Basic types
104//!
105//! - `String`
106//! - Numeric types ( `usize`, `i64`, `NonZeroU32`, ... )
107//! - And more built-in supported types ([List](./core/src/parseable.rs))
108//! - Or you can use any types by implementing yourself ([Example](./examples/custom_field_value.rs))
109//!
110//! ## Container types (where `T` is a basic type)
111//!
112//! - `[T;N]`
113//! - `Option<T>`
114//! - `Vec<T>`
115
116use h2s_core::html::{Backend, HtmlDocument};
117pub use h2s_core::*;
118pub use h2s_macro::*;
119
120use crate::backend::scraper::Scraper;
121
122pub mod backend;
123
124/// A shorthand method without specifying backend HTML parser
125#[cfg(feature = "backend-scraper")]
126pub fn parse<T>(html: impl AsRef<str>) -> Result<T, T::Error>
127where
128 for<'b> T: FromHtml,
129{
130 #[cfg(feature = "backend-scraper")]
131 parse_with_backend::<T, Scraper>(html)
132}
133
134/// Parsing with specific backend HTML parser
135pub fn parse_with_backend<T, B>(html: impl AsRef<str>) -> Result<T, T::Error>
136where
137 T: FromHtml,
138 B: Backend,
139{
140 T::from_html(B::parse_document(html).root_element())
141}