pub struct Page {
pub status_code: StatusCode,
pub error_status: Option<String>,
pub external_domains_caseless: Box<HashSet<CaseInsensitiveString>>,
pub final_redirect_destination: Option<String>,
/* private fields */
}
Expand description
Represent a page visited. This page contains HTML scraped with scraper.
Fields§
§status_code: StatusCode
The status code of the page request.
error_status: Option<String>
The error of the request if any.
external_domains_caseless: Box<HashSet<CaseInsensitiveString>>
The external urls to group with the domain
final_redirect_destination: Option<String>
The final destination of the page if redirects were performed [Not implemented in the chrome feature].
Implementations§
source§impl Page
impl Page
sourcepub async fn new_page(url: &str, client: &Client) -> Self
pub async fn new_page(url: &str, client: &Client) -> Self
Instantiate a new page and gather the html repro of standard fetch_page_html.
sourcepub async fn new(url: &str, client: &Client) -> Self
pub async fn new(url: &str, client: &Client) -> Self
Instantiate a new page and gather the html.
sourcepub async fn screenshot(
&self,
_full_page: bool,
_omit_background: bool,
_format: CaptureScreenshotFormat,
_quality: Option<i64>,
_output_path: Option<impl AsRef<Path>>,
_clip: Option<ClipViewport>
) -> Vec<u8>
pub async fn screenshot( &self, _full_page: bool, _omit_background: bool, _format: CaptureScreenshotFormat, _quality: Option<i64>, _output_path: Option<impl AsRef<Path>>, _clip: Option<ClipViewport> ) -> Vec<u8>
Take a screenshot of the page. If the output path is set to None the screenshot will not be saved.
The feature flag chrome_store_page
is required.
sourcepub fn get_url_final(&self) -> &str
pub fn get_url_final(&self) -> &str
Url getter for page after redirects.
sourcepub fn set_external(
&mut self,
external_domains_caseless: Box<HashSet<CaseInsensitiveString>>
)
pub fn set_external( &mut self, external_domains_caseless: Box<HashSet<CaseInsensitiveString>> )
Set the external domains to treat as one
sourcepub fn set_html_bytes(&mut self, html: Option<Bytes>)
pub fn set_html_bytes(&mut self, html: Option<Bytes>)
Set the html directly of the page
sourcepub fn get_url_parsed(&self) -> &Url
pub fn get_url_parsed(&self) -> &Url
Parsed URL getter for page.
sourcepub fn get_html_bytes_u8(&self) -> &[u8] ⓘ
pub fn get_html_bytes_u8(&self) -> &[u8] ⓘ
Html getter for page to u8.
sourcepub fn get_html_encoded(&self, _label: &str) -> String
pub fn get_html_encoded(&self, _label: &str) -> String
Html getter for getting the content with proper encoding. Pass in a proper encoding label like SHIFT_JIS. This fallsback to get_html without the encoding
flag enabled.
sourcepub fn push_link<A: PartialEq + Eq + Hash + From<String>>(
&self,
href: &str,
map: &mut HashSet<A>,
base_domain: &CompactString,
parent_host: &CompactString,
parent_host_scheme: &CompactString
)
pub fn push_link<A: PartialEq + Eq + Hash + From<String>>( &self, href: &str, map: &mut HashSet<A>, base_domain: &CompactString, parent_host: &CompactString, parent_host_scheme: &CompactString )
Validate link and push into the map
sourcepub async fn links_stream_xml_links_stream_base<A: PartialEq + Eq + Hash + From<String>>(
&self,
selectors: &(&CompactString, &SmallVec<[CompactString; 2]>),
xml: &str,
map: &mut HashSet<A>
)
pub async fn links_stream_xml_links_stream_base<A: PartialEq + Eq + Hash + From<String>>( &self, selectors: &(&CompactString, &SmallVec<[CompactString; 2]>), xml: &str, map: &mut HashSet<A> )
Find the links as a stream using string resource validation for XML files
sourcepub async fn links_stream_base<A: PartialEq + Eq + Hash + From<String>>(
&self,
selectors: &(&CompactString, &SmallVec<[CompactString; 2]>),
html: &str
) -> HashSet<A>
pub async fn links_stream_base<A: PartialEq + Eq + Hash + From<String>>( &self, selectors: &(&CompactString, &SmallVec<[CompactString; 2]>), html: &str ) -> HashSet<A>
Find the links as a stream using string resource validation
sourcepub async fn links_stream<A: PartialEq + Eq + Hash + From<String>>(
&self,
selectors: &(&CompactString, &SmallVec<[CompactString; 2]>)
) -> HashSet<A>
pub async fn links_stream<A: PartialEq + Eq + Hash + From<String>>( &self, selectors: &(&CompactString, &SmallVec<[CompactString; 2]>) ) -> HashSet<A>
Find the links as a stream using string resource validation
sourcepub async fn links_stream_full_resource<A: PartialEq + Eq + Hash + From<String>>(
&self,
selectors: &(&CompactString, &SmallVec<[CompactString; 2]>)
) -> HashSet<A>
pub async fn links_stream_full_resource<A: PartialEq + Eq + Hash + From<String>>( &self, selectors: &(&CompactString, &SmallVec<[CompactString; 2]>) ) -> HashSet<A>
Find the links as a stream using string resource validation
sourcepub async fn links(
&self,
selectors: &(CompactString, SmallVec<[CompactString; 2]>)
) -> HashSet<CaseInsensitiveString>
pub async fn links( &self, selectors: &(CompactString, SmallVec<[CompactString; 2]>) ) -> HashSet<CaseInsensitiveString>
Find all href links and return them using CSS selectors.
sourcepub async fn links_full(
&self,
selectors: &(CompactString, SmallVec<[CompactString; 2]>)
) -> HashSet<CaseInsensitiveString>
pub async fn links_full( &self, selectors: &(CompactString, SmallVec<[CompactString; 2]>) ) -> HashSet<CaseInsensitiveString>
Find all href links and return them using CSS selectors gathering all resources.