scraper/lib.rs
1//! HTML parsing and querying with CSS selectors.
2//!
3//! `crate::` is on [Crates.io][crate] and [GitHub][github].
4//!
5//! [crate]: https://crates.io/crates/scraper
6//! [github]: https://github.com/programble/scraper
7//!
8//! Scraper provides an interface to Servo's `fast_html5ever` and `selectors` crates, for browser-grade
9//! parsing and querying. This project tweaks the main repo to fit the needs directly of the crate.
10//!
11//! # Examples
12//!
13//! ## Parsing a document
14//!
15//! ```
16//! use scraper::Html;
17//!
18//! let html = r#"
19//! <!DOCTYPE html>
20//! <meta charset="utf-8">
21//! <title>Hello, world!</title>
22//! <h1 class="foo">Hello, <i>world!</i></h1>
23//! "#;
24//!
25//! let document = Html::parse_document(html);
26//! ```
27//!
28//! ## Parsing a fragment
29//!
30//! ```
31//! use scraper::Html;
32//! let fragment = Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
33//! ```
34//!
35//! ## Parsing a selector
36//!
37//! ```
38//! use scraper::Selector;
39//! let selector = Selector::parse("h1.foo").unwrap();
40//! ```
41//!
42//! ## Selecting elements
43//!
44//! ```
45//! use scraper::{Html, Selector};
46//!
47//! let html = r#"
48//! <ul>
49//! <li>Foo</li>
50//! <li>Bar</li>
51//! <li>Baz</li>
52//! </ul>
53//! "#;
54//!
55//! let fragment = Html::parse_fragment(html);
56//! let selector = Selector::parse("li").unwrap();
57//!
58//! for element in fragment.select(&selector) {
59//! assert_eq!("li", element.value().name());
60//! }
61//! ```
62//!
63//! ## Selecting descendent elements
64//!
65//! ```
66//! use scraper::{Html, Selector};
67//!
68//! let html = r#"
69//! <ul>
70//! <li>Foo</li>
71//! <li>Bar</li>
72//! <li>Baz</li>
73//! </ul>
74//! "#;
75//!
76//! let fragment = Html::parse_fragment(html);
77//! let ul_selector = Selector::parse("ul").unwrap();
78//! let li_selector = Selector::parse("li").unwrap();
79//!
80//! let ul = fragment.select(&ul_selector).next().unwrap();
81//! for element in ul.select(&li_selector) {
82//! assert_eq!("li", element.value().name());
83//! }
84//! ```
85//!
86//! ## Accessing element attributes
87//!
88//! ```
89//! use scraper::{Html, Selector};
90//!
91//! let fragment = Html::parse_fragment(r#"<input name="foo" value="bar">"#);
92//! let selector = Selector::parse(r#"input[name="foo"]"#).unwrap();
93//!
94//! let input = fragment.select(&selector).next().unwrap();
95//! assert_eq!(Some("bar"), input.value().attr("value"));
96//! ```
97//!
98//! ## Serializing HTML and inner HTML
99//!
100//! ```
101//! use scraper::{Html, Selector};
102//!
103//! let fragment = Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
104//! let selector = Selector::parse("h1").unwrap();
105//!
106//! let h1 = fragment.select(&selector).next().unwrap();
107//!
108//! assert_eq!("<h1>Hello, <i>world!</i></h1>", h1.html());
109//! assert_eq!("Hello, <i>world!</i>", h1.inner_html());
110//! ```
111//!
112//! ## Accessing descendent text
113//!
114//! ```
115//! use scraper::{Html, Selector};
116//!
117//! let fragment = Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
118//! let selector = Selector::parse("h1").unwrap();
119//!
120//! let h1 = fragment.select(&selector).next().unwrap();
121//! let text = h1.text().collect::<Vec<_>>();
122//!
123//! assert_eq!(vec!["Hello, ", "world!"], text);
124//! ```
125
126pub use element_ref::ElementRef;
127pub use html::Html;
128pub use node::Node;
129pub use selector::Selector;
130pub use selectors::Element;
131
132pub mod element_ref;
133pub mod error;
134pub mod html;
135pub mod node;
136pub mod selector;
137
138#[macro_use]
139pub extern crate lazy_static;
140#[macro_use]
141pub extern crate fast_html5ever;
142
143#[cfg(test)]
144mod test;