h2s/
lib.rs

1//! A declarative HTML parser library in Rust, which works like a deserializer from HTML to struct.
2//!
3//! # Example
4//!
5//! ```
6//! use h2s::FromHtml;
7//!
8//! #[derive(FromHtml, Debug, Eq, PartialEq)]
9//! pub struct Page {
10//!     #[h2s(attr = "lang")]
11//!     lang: String,
12//!     #[h2s(select = "div > h1.blog-title")]
13//!     blog_title: String,
14//!     #[h2s(select = ".articles > div")]
15//!     articles: Vec<Article>,
16//! }
17//!
18//! #[derive(FromHtml, Debug, Eq, PartialEq)]
19//! pub struct Article {
20//!     #[h2s(select = "h2 > a")]
21//!     title: String,
22//!     #[h2s(select = "div > span")]
23//!     view_count: usize,
24//!     #[h2s(select = "h2 > a", attr = "href")]
25//!     url: String,
26//!     #[h2s(select = "ul > li")]
27//!     tags: Vec<String>,
28//!     #[h2s(select = "ul > li:nth-child(1)")]
29//!     first_tag: Option<String>,
30//! }
31//!
32//! let html = r#"
33//! <html lang="en">
34//! <body>
35//!   <div>
36//!       <h1 class="blog-title">My tech blog</h1>
37//!       <div class="articles">
38//!           <div>
39//!               <h2><a href="https://example.com/1">article1</a></h2>
40//!               <div><span>901</span> Views</div>
41//!               <ul><li>Tag1</li><li>Tag2</li></ul>
42//!           </div>
43//!           <div>
44//!               <h2><a href="https://example.com/2">article2</a></h2>
45//!               <div><span>849</span> Views</div>
46//!               <ul></ul>
47//!           </div>
48//!           <div>
49//!               <h2><a href="https://example.com/3">article3</a></h2>
50//!               <div><span>103</span> Views</div>
51//!               <ul><li>Tag3</li></ul>
52//!           </div>
53//!       </div>
54//!   </div>
55//! </body>
56//! </html>
57//! "#;
58//!
59//! let page = h2s::parse::<Page>(html).unwrap();
60//!
61//! assert_eq!(page, Page {
62//!     lang: "en".to_string(),
63//!     blog_title: "My tech blog".to_string(),
64//!     articles: vec![
65//!         Article {
66//!             title: "article1".to_string(),
67//!             url: "https://example.com/1".to_string(),
68//!             view_count: 901,
69//!             tags: vec!["Tag1".to_string(), "Tag2".to_string()],
70//!             first_tag: Some("Tag1".to_string()),
71//!         },
72//!         Article {
73//!             title: "article2".to_string(),
74//!             url: "https://example.com/2".to_string(),
75//!             view_count: 849,
76//!             tags: vec![],
77//!             first_tag: None,
78//!         },
79//!         Article {
80//!             title: "article3".to_string(),
81//!             url: "https://example.com/3".to_string(),
82//!             view_count: 103,
83//!             tags: vec!["Tag3".to_string()],
84//!             first_tag: Some("Tag3".to_string()),
85//!         },
86//!     ]
87//! });
88//!
89//! // When the input HTML document structure does not match the expected,
90//! // `h2s::parse` will return an error with a detailed reason.
91//! let invalid_html = html.replace(r#"<a href="https://example.com/3">article3</a>"#, "");
92//! let err = h2s::parse::<Page>(invalid_html).unwrap_err();
93//! assert_eq!(
94//!   err.to_string(),
95//!   "articles: [2]: title: mismatched number of selected elements by \"h2 > a\": expected exactly one element, but no elements found"
96//! );
97//! ```
98//!
99//! # Supported types
100//!
101//! You can use the following types as a field value of the struct to parse.
102//!
103//! ## Basic types
104//!
105//!   - `String`
106//!   - Numeric types ( `usize`, `i64`, `NonZeroU32`, ... )
107//!   - And more built-in supported types ([List](./core/src/parseable.rs))
108//!   - Or you can use any types by implementing yourself ([Example](./examples/custom_field_value.rs))
109//!
110//! ## Container types (where `T` is a basic type)
111//!
112//!   - `[T;N]`
113//!   - `Option<T>`
114//!   - `Vec<T>`
115
116use h2s_core::html::{Backend, HtmlDocument};
117pub use h2s_core::*;
118pub use h2s_macro::*;
119
120use crate::backend::scraper::Scraper;
121
122pub mod backend;
123
124/// A shorthand method without specifying backend HTML parser
125#[cfg(feature = "backend-scraper")]
126pub fn parse<T>(html: impl AsRef<str>) -> Result<T, T::Error>
127where
128    for<'b> T: FromHtml,
129{
130    #[cfg(feature = "backend-scraper")]
131    parse_with_backend::<T, Scraper>(html)
132}
133
134/// Parsing with specific backend HTML parser
135pub fn parse_with_backend<T, B>(html: impl AsRef<str>) -> Result<T, T::Error>
136where
137    T: FromHtml,
138    B: Backend,
139{
140    T::from_html(B::parse_document(html).root_element())
141}