Struct spider::page::Page

source ·
pub struct Page {
    pub status_code: StatusCode,
    pub error_status: Option<String>,
    pub external_domains_caseless: Box<HashSet<CaseInsensitiveString>>,
    pub final_redirect_destination: Option<String>,
    /* private fields */
}
Expand description

Represent a page visited. This page contains HTML scraped with scraper.

Fields§

§status_code: StatusCode

The status code of the page request.

§error_status: Option<String>

The error of the request if any.

§external_domains_caseless: Box<HashSet<CaseInsensitiveString>>

The external urls to group with the domain

§final_redirect_destination: Option<String>

The final destination of the page if redirects were performed [Not implemented in the chrome feature].

Implementations§

source§

impl Page

source

pub async fn new_page(url: &str, client: &Client) -> Self

Instantiate a new page and gather the html repro of standard fetch_page_html.

source

pub async fn new(url: &str, client: &Client) -> Self

Instantiate a new page and gather the html.

source

pub async fn screenshot( &self, _full_page: bool, _omit_background: bool, _format: CaptureScreenshotFormat, _quality: Option<i64>, _output_path: Option<impl AsRef<Path>>, _clip: Option<ClipViewport> ) -> Vec<u8>

Take a screenshot of the page. If the output path is set to None the screenshot will not be saved. The feature flag chrome_store_page is required.

source

pub fn is_empty(&self) -> bool

Page request fulfilled.

source

pub fn get_url(&self) -> &str

Url getter for page.

source

pub fn get_url_final(&self) -> &str

Url getter for page after redirects.

source

pub fn set_external( &mut self, external_domains_caseless: Box<HashSet<CaseInsensitiveString>> )

Set the external domains to treat as one

source

pub fn set_html_bytes(&mut self, html: Option<Bytes>)

Set the html directly of the page

source

pub fn get_url_parsed(&self) -> &Url

Parsed URL getter for page.

source

pub fn get_bytes(&self) -> Option<&Bytes>

Html getter for bytes on the page.

source

pub fn get_html(&self) -> String

Html getter for bytes on the page as string.

source

pub fn get_html_bytes_u8(&self) -> &[u8]

Html getter for page to u8.

source

pub fn get_html_encoded(&self, _label: &str) -> String

Html getter for getting the content with proper encoding. Pass in a proper encoding label like SHIFT_JIS. This fallsback to get_html without the encoding flag enabled.

Validate link and push into the map

Find the links as a stream using string resource validation for XML files

Find the links as a stream using string resource validation

Find the links as a stream using string resource validation

Find the links as a stream using string resource validation

Find all href links and return them using CSS selectors.

Find all href links and return them using CSS selectors gathering all resources.

Trait Implementations§

source§

impl Clone for Page

source§

fn clone(&self) -> Page

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for Page

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl !Freeze for Page

§

impl RefUnwindSafe for Page

§

impl Send for Page

§

impl Sync for Page

§

impl Unpin for Page

§

impl UnwindSafe for Page

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T> Instrument for T

source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<T> WithSubscriber for T

source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more