reqwest-scraper - Web scraping integration with reqwest
Extends reqwest to support multiple web scraping methods.
Features
- Use JsonPath to select fields in json response
- Select elements in HTML response using CSS selector
- Evalute the value in HTML response using xpath expression
- Derive macro extract
Start Guide
- add dependency
= { = "0.12", = ["json"] } ="0.3.2" - use ScraperResponse
use ScraperResponse;
Json::select<T: DeserializeOwned>(path: &str) -> Result<Vec<T>>Json::select_one<T: DeserializeOwned>(path: &str) -> Result<T>Json::select_as_str(path: &str) -> Result<String>
use ScraperResponse;
pub async
Html::select(selector: &str) -> Result<Selectable>Selectable::iter() -> impl Iterator<SelectItem>Selectable::first() -> Option<SelectItem>SelectItem::name() -> &strSelectItem::id() -> Option<&str>SelectItem::has_class(class: &str, case_sensitive: CaseSensitivity) -> boolSelectItem::classes() -> ClassesSelectItem::attrs() -> AttrsSelectItem::attr(attr: &str) -> Option<&str>SelectItem::text() -> StringSelectItem::html() -> StringSelectItem::inner_html() -> StringSelectItem::children() -> impl Iterator<SelectItem>SelectItem::find(selector: &str) -> Result<Selectable>
use ScraperResponse;
async
XHtml::select(xpath: &str) -> Result<XPathResult>XPathResult::as_nodes() -> Vec<Node>XPathResult::as_strs() -> Vec<String>XPathResult::as_node() -> Option<Node>XPathResult::as_str() -> Option<String>Node::name() -> StringNode::id() -> Option<String>Node::classes() -> HashSet<String>Node::attr(attr: &str) -> Option<String>Node::has_attr(attr: &str) -> boolNode::text() -> String- TODO:
Node::html() -> String - TODO:
Node::inner_html() -> String Node::children() -> Vec<Node>Node::findnodes(relative_xpath: &str) -> Result<Vec<Node>>Node::findvalues(relative_xpath: &str) -> Result<Vec<String>>Node::findnode(relative_xpath: &str) -> Result<Option<Node>>Node::findvalue(relative_xpath: &str) -> Result<Option<String>>
async
use FromCssSelector & selector to extract html element into struct
// define struct and derive the FromCssSelector trait
// request
let html = get
.await?
.css_selector
.await?;
// Use the generated `from_html` method to extract data into the struct
let items = from_html?;
items.iter.for_each;
use FromXPath & xpath to extract html element into struct
// define struct and derive the FromXPath trait
let html = get
.await?
.xpath
.await?;
// Use the generated `from_xhtml` method to extract data into the struct
let items = from_xhtml?;
items.iter.for_each;