1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
//! A declarative HTML parser library in Rust, which works like a deserializer from HTML to struct.
//!
//! # Example
//!
//! ```
//! use h2s::FromHtml;
//!
//! #[derive(FromHtml, Debug, Eq, PartialEq)]
//! pub struct Page {
//!     #[h2s(attr = "lang")]
//!     lang: String,
//!     #[h2s(select = "div > h1.blog-title")]
//!     blog_title: String,
//!     #[h2s(select = ".articles > div")]
//!     articles: Vec<Article>,
//! }
//!
//! #[derive(FromHtml, Debug, Eq, PartialEq)]
//! pub struct Article {
//!     #[h2s(select = "h2 > a")]
//!     title: String,
//!     #[h2s(select = "div > span")]
//!     view_count: usize,
//!     #[h2s(select = "h2 > a", attr = "href")]
//!     url: String,
//!     #[h2s(select = "ul > li")]
//!     tags: Vec<String>,
//!     #[h2s(select = "ul > li:nth-child(1)")]
//!     first_tag: Option<String>,
//! }
//!
//! let html = r#"
//! <html lang="en">
//! <body>
//!   <div>
//!       <h1 class="blog-title">My tech blog</h1>
//!       <div class="articles">
//!           <div>
//!               <h2><a href="https://example.com/1">article1</a></h2>
//!               <div><span>901</span> Views</div>
//!               <ul><li>Tag1</li><li>Tag2</li></ul>
//!           </div>
//!           <div>
//!               <h2><a href="https://example.com/2">article2</a></h2>
//!               <div><span>849</span> Views</div>
//!               <ul></ul>
//!           </div>
//!           <div>
//!               <h2><a href="https://example.com/3">article3</a></h2>
//!               <div><span>103</span> Views</div>
//!               <ul><li>Tag3</li></ul>
//!           </div>
//!       </div>
//!   </div>
//! </body>
//! </html>
//! "#;
//!
//! let page = h2s::parse::<Page>(html).unwrap();
//!
//! assert_eq!(page, Page {
//!     lang: "en".to_string(),
//!     blog_title: "My tech blog".to_string(),
//!     articles: vec![
//!         Article {
//!             title: "article1".to_string(),
//!             url: "https://example.com/1".to_string(),
//!             view_count: 901,
//!             tags: vec!["Tag1".to_string(), "Tag2".to_string()],
//!             first_tag: Some("Tag1".to_string()),
//!         },
//!         Article {
//!             title: "article2".to_string(),
//!             url: "https://example.com/2".to_string(),
//!             view_count: 849,
//!             tags: vec![],
//!             first_tag: None,
//!         },
//!         Article {
//!             title: "article3".to_string(),
//!             url: "https://example.com/3".to_string(),
//!             view_count: 103,
//!             tags: vec!["Tag3".to_string()],
//!             first_tag: Some("Tag3".to_string()),
//!         },
//!     ]
//! });
//!
//! // When the input HTML document structure does not match the expected, `h2s::parse` will return an error with a detailed reason.
//! let invalid_html = html.replace(r#"<a href="https://example.com/3">article3</a>"#, "");
//! let err = h2s::parse::<Page>(invalid_html).unwrap_err();
//! assert_eq!(err.to_string(), "[articles(.articles > div)]: (index=2): [title(h2 > a)]: expected exactly one element, but no elements found");
//! ```
//!
//! # Supported types
//!
//! You can use the following types as a field value of the struct to parse.
//!
//! ## Basic types
//!
//!   - `String`
//!   - Numeric types ( `usize`, `i64`, `NonZeroU32`, ... )
//!   - And more built-in supported types ([List](./core/src/from_text.rs))
//!   - Or you can use any types by implementing yourself ([Example](./examples/from_text_custom.rs))
//!
//! ## Container types (where `T` is a basic type)
//!
//!   - `[T;N]`
//!   - `Option<T>`
//!   - `Vec<T>`

use crate::backend::scraper::Scraper;
use crate::backend::{Backend, DocumentRoot};
pub use h2s_core::*;
pub use h2s_macro::*;

pub mod backend;

/// A shorthand method without specifying backend HTML parser
#[cfg(any(feature = "backend-scraper"))]
pub fn parse<T>(html: impl AsRef<str>) -> Result<T, T::Error>
where
    for<'b> T: FromHtml<Args = ()>,
{
    #[cfg(feature = "backend-scraper")]
    parse_with_backend::<T, Scraper>(html, &())
}

/// Parsing with specific backend HTML parser
pub fn parse_with_backend<T, B>(html: impl AsRef<str>, args: &T::Args) -> Result<T, T::Error>
where
    T: FromHtml,
    B: Backend,
{
    T::from_html(&B::parse_document(html).root_element(), args)
}