html-extractor 0.4.0

A Rust crate for extracting data from HTML

This crate provides an easy way to extract data from HTML.

[HtmlExtractor] is neither a parser nor a deserializer. It picks up only the desired data from HTML.

html_extractor! will help to implement [HtmlExtractor].

Examples

Extracting a simple value from HTML

use html_extractor::{html_extractor, HtmlExtractor};
html_extractor! {
#[derive(Debug, PartialEq)]
Foo {
foo: usize = (text of "#foo"),
}
}

fn main() {
let input = r#"
<div id="foo">1</div>
"#;
let foo = Foo::extract_from_str(input).unwrap();
assert_eq!(foo, Foo { foo: 1 });
}

Extracting a collection from HTML

use html_extractor::{html_extractor, HtmlExtractor};
html_extractor! {
#[derive(Debug, PartialEq)]
Foo {
foo: Vec<usize> = (text of ".foo", collect),
}
}

fn main() {
let input = r#"
<div class="foo">1</div>
<div class="foo">2</div>
<div class="foo">3</div>
<div class="foo">4</div>
"#;
let foo = Foo::extract_from_str(input).unwrap();
assert_eq!(foo, Foo { foo: vec![1, 2, 3, 4] });
}

Extracting with regex

use html_extractor::{html_extractor, HtmlExtractor};
html_extractor! {
#[derive(Debug, PartialEq)]
Foo {
(foo: usize,) = (text of "#foo", capture with "^foo=(.*)$"),
}
}

fn main() {
let input = r#"
<div id="foo">foo=1</div>
"#;
let foo = Foo::extract_from_str(input).unwrap();
assert_eq!(foo, Foo { foo: 1 });
}