Struct scraper::html::Html

source ·
pub struct Html {
    pub errors: Vec<Cow<'static, str>>,
    pub quirks_mode: QuirksMode,
    pub tree: Tree<Node>,
}
Expand description

An HTML tree.

Parsing does not fail hard. Instead, the quirks_mode is set and errors are added to the errors field. The tree will still be populated as best as possible.

Implements the TreeSink trait from the html5ever crate, which allows HTML to be parsed.

Fields§

§errors: Vec<Cow<'static, str>>

Parse errors.

§quirks_mode: QuirksMode

The quirks mode.

§tree: Tree<Node>

The node tree.

Implementations§

source§

impl Html

source

pub fn new_document() -> Self

Creates an empty HTML document.

source

pub fn new_fragment() -> Self

Creates an empty HTML fragment.

source

pub fn parse_document(document: &str) -> Self

Parses a string of HTML as a document.

This is a convenience method for the following:

use html5ever::driver::{self, ParseOpts};
use scraper::Html;
use tendril::TendrilSink;

let parser = driver::parse_document(Html::new_document(), ParseOpts::default());
let html = parser.one(document);
Examples found in repository?
examples/document.rs (line 21)
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
fn main() {
    let mut input = String::new();
    let mut stdout = io::stdout();
    let mut stdin = io::stdin();

    write!(stdout, "CSS selector: ").unwrap();
    stdout.flush().unwrap();
    stdin.read_line(&mut input).unwrap();
    let selector = Selector::parse(&input).unwrap();

    writeln!(stdout, "HTML document:").unwrap();
    stdout.flush().unwrap();
    input.clear();
    stdin.read_to_string(&mut input).unwrap();
    let document = Html::parse_document(&input);

    println!("{:#?}", document);

    for node in document.select(&selector) {
        println!("{:?}", node.value());
    }
}
source

pub fn parse_fragment(fragment: &str) -> Self

Parses a string of HTML as a fragment.

Examples found in repository?
examples/fragment.rs (line 21)
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
fn main() {
    let mut input = String::new();
    let mut stdout = io::stdout();
    let mut stdin = io::stdin();

    write!(stdout, "CSS selector: ").unwrap();
    stdout.flush().unwrap();
    stdin.read_line(&mut input).unwrap();
    let selector = Selector::parse(&input).unwrap();

    writeln!(stdout, "HTML fragment:").unwrap();
    stdout.flush().unwrap();
    input.clear();
    stdin.read_to_string(&mut input).unwrap();
    let fragment = Html::parse_fragment(&input);

    println!("{:#?}", fragment);

    for node in fragment.select(&selector) {
        println!("{:?}", node.value());
    }
}
source

pub fn select<'a, 'b>(&'a self, selector: &'b Selector) -> Select<'a, 'b>

Returns an iterator over elements matching a selector.

Examples found in repository?
examples/document.rs (line 25)
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
fn main() {
    let mut input = String::new();
    let mut stdout = io::stdout();
    let mut stdin = io::stdin();

    write!(stdout, "CSS selector: ").unwrap();
    stdout.flush().unwrap();
    stdin.read_line(&mut input).unwrap();
    let selector = Selector::parse(&input).unwrap();

    writeln!(stdout, "HTML document:").unwrap();
    stdout.flush().unwrap();
    input.clear();
    stdin.read_to_string(&mut input).unwrap();
    let document = Html::parse_document(&input);

    println!("{:#?}", document);

    for node in document.select(&selector) {
        println!("{:?}", node.value());
    }
}
More examples
Hide additional examples
examples/fragment.rs (line 25)
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
fn main() {
    let mut input = String::new();
    let mut stdout = io::stdout();
    let mut stdin = io::stdin();

    write!(stdout, "CSS selector: ").unwrap();
    stdout.flush().unwrap();
    stdin.read_line(&mut input).unwrap();
    let selector = Selector::parse(&input).unwrap();

    writeln!(stdout, "HTML fragment:").unwrap();
    stdout.flush().unwrap();
    input.clear();
    stdin.read_to_string(&mut input).unwrap();
    let fragment = Html::parse_fragment(&input);

    println!("{:#?}", fragment);

    for node in fragment.select(&selector) {
        println!("{:?}", node.value());
    }
}
source

pub fn root_element(&self) -> ElementRef<'_>

Returns the root <html> element.

source

pub fn html(&self) -> String

Serialize entire document into HTML.

Trait Implementations§

source§

impl Clone for Html

source§

fn clone(&self) -> Html

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for Html

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl PartialEq for Html

source§

fn eq(&self, other: &Html) -> bool

This method tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

This method tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
source§

impl<'a> Selectable<'a> for &'a Html

§

type Select<'b> = Select<'a, 'b>

Iterator over element references matching a [CSS selectorSelector
source§

fn select(self, selector: &Selector) -> Self::Select<'_>

Applies the given selector to the collection of elements represented by self
source§

impl Serialize for Html

source§

fn serialize<S: Serializer>( &self, serializer: &mut S, traversal_scope: TraversalScope ) -> Result<(), Error>

Take the serializer and call its methods to serialize this type. The type will dictate which methods are called and with what parameters.
source§

impl TreeSink for Html

Note: does not support the <template> element.

§

type Output = Html

The overall result of parsing. Read more
§

type Handle = NodeId

Handle is a reference to a DOM node. The tree builder requires that a Handle implements Clone to get another reference to the same node.
source§

fn finish(self) -> Self

Consume this sink and return the overall result of parsing. Read more
source§

fn parse_error(&mut self, msg: Cow<'static, str>)

Signal a parse error.
source§

fn set_quirks_mode(&mut self, mode: QuirksMode)

Set the document’s quirks mode.
source§

fn get_document(&mut self) -> Self::Handle

Get a handle to the Document node.
source§

fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool

Do two handles refer to the same node?
source§

fn elem_name(&self, target: &Self::Handle) -> ExpandedName<'_>

What is the name of this element? Read more
source§

fn create_element( &mut self, name: QualName, attrs: Vec<Attribute>, _flags: ElementFlags ) -> Self::Handle

Create an element. Read more
source§

fn create_comment(&mut self, text: StrTendril) -> Self::Handle

Create a comment node.
source§

fn append_doctype_to_document( &mut self, name: StrTendril, public_id: StrTendril, system_id: StrTendril )

Append a DOCTYPE element to the Document node.
source§

fn append(&mut self, parent: &Self::Handle, child: NodeOrText<Self::Handle>)

Append a node as the last child of the given node. If this would produce adjacent sibling text nodes, it should concatenate the text instead. Read more
source§

fn append_before_sibling( &mut self, sibling: &Self::Handle, new_node: NodeOrText<Self::Handle> )

Append a node as the sibling immediately before the given node. Read more
source§

fn remove_from_parent(&mut self, target: &Self::Handle)

Detach the given node from its parent.
source§

fn reparent_children(&mut self, node: &Self::Handle, new_parent: &Self::Handle)

Remove all the children from node and append them to new_parent.
source§

fn add_attrs_if_missing(&mut self, target: &Self::Handle, attrs: Vec<Attribute>)

Add each attribute to the given element, if no attribute with that name already exists. The tree builder promises this will never be called with something else than an element.
source§

fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle

Get a handle to a template’s template contents. The tree builder promises this will never be called with something else than a template element.
source§

fn mark_script_already_started(&mut self, _node: &Self::Handle)

Mark a HTML <script> as “already started”.
source§

fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Self::Handle

Create a Processing Instruction node.
source§

fn append_based_on_parent_node( &mut self, element: &Self::Handle, prev_element: &Self::Handle, child: NodeOrText<Self::Handle> )

When the insertion point is decided by the existence of a parent node of the element, we consider both possibilities and send the element which will be used if a parent node exists, along with the element to be used if there isn’t one.
§

fn pop(&mut self, _node: &Self::Handle)

Indicate that a node was popped off the stack of open elements.
§

fn associate_with_form( &mut self, _target: &Self::Handle, _form: &Self::Handle, _nodes: (&Self::Handle, Option<&Self::Handle>) )

Associate the given form-associatable element with the form element
§

fn is_mathml_annotation_xml_integration_point( &self, _handle: &Self::Handle ) -> bool

Returns true if the adjusted current node is an HTML integration point and the token is a start tag.
§

fn set_current_line(&mut self, _line_number: u64)

Called whenever the line number changes.
§

fn complete_script(&mut self, _node: &Self::Handle) -> NextParserState

Indicate that a script element is complete.
source§

impl Eq for Html

source§

impl StructuralPartialEq for Html

Auto Trait Implementations§

§

impl !RefUnwindSafe for Html

§

impl !Send for Html

§

impl !Sync for Html

§

impl Unpin for Html

§

impl UnwindSafe for Html

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.