pub struct Configuration {Show 47 fields
pub respect_robots_txt: bool,
pub subdomains: bool,
pub tld: bool,
pub crawl_timeout: Option<Duration>,
pub preserve_host_header: bool,
pub blacklist_url: Option<Vec<CompactString>>,
pub whitelist_url: Option<Vec<CompactString>>,
pub user_agent: Option<Box<CompactString>>,
pub delay: u64,
pub request_timeout: Option<Box<Duration>>,
pub http2_prior_knowledge: bool,
pub proxies: Option<Vec<RequestProxy>>,
pub headers: Option<Box<SerializableHeaderMap>>,
pub redirect_limit: Box<usize>,
pub redirect_policy: RedirectPolicy,
pub cookie_str: Box<String>,
pub depth: usize,
pub depth_distance: usize,
pub stealth_mode: Tier,
pub viewport: Option<Viewport>,
pub budget: Option<HashMap<CaseInsensitiveString, u32>>,
pub wild_card_budgeting: bool,
pub external_domains_caseless: Box<HashSet<CaseInsensitiveString>>,
pub full_resources: bool,
pub accept_invalid_certs: bool,
pub auth_challenge_response: Option<AuthChallengeResponse>,
pub openai_config: Option<Box<GPTConfigs>>,
pub gemini_config: Option<Box<GeminiConfigs>>,
pub remote_multimodal: Option<Box<RemoteMultimodalConfigs>>,
pub shared_queue: bool,
pub return_page_links: bool,
pub retry: u8,
pub no_control_thread: bool,
pub only_html: bool,
pub concurrency_limit: Option<usize>,
pub normalize: bool,
pub shared: bool,
pub modify_headers: bool,
pub modify_http_client_headers: bool,
pub referer: Option<String>,
pub max_page_bytes: Option<f64>,
pub max_bytes_allowed: Option<u64>,
pub cache_policy: Option<BasicCachePolicy>,
pub network_interface: Option<String>,
pub local_address: Option<IpAddr>,
pub default_http_connect_timeout: Option<Duration>,
pub default_http_read_timeout: Option<Duration>,
/* private fields */
}Expand description
Structure to configure Website crawler
use spider::website::Website;
let mut website: Website = Website::new("https://choosealicense.com");
website.configuration.blacklist_url.insert(Default::default()).push("https://choosealicense.com/licenses/".to_string().into());
website.configuration.respect_robots_txt = true;
website.configuration.subdomains = true;
website.configuration.tld = true;Fields§
§respect_robots_txt: boolRespect robots.txt file and not scrape not allowed files. This may slow down crawls if robots.txt file has a delay included.
subdomains: boolAllow sub-domains.
tld: boolAllow all tlds for domain.
crawl_timeout: Option<Duration>The max timeout for the crawl.
preserve_host_header: boolPreserve the HTTP host header from being included.
blacklist_url: Option<Vec<CompactString>>List of pages to not crawl. [optional: regex pattern matching]
whitelist_url: Option<Vec<CompactString>>List of pages to only crawl. [optional: regex pattern matching]
user_agent: Option<Box<CompactString>>User-Agent for request.
delay: u64Polite crawling delay in milli seconds.
request_timeout: Option<Box<Duration>>Request max timeout per page. By default the request times out in 15s. Set to None to disable.
http2_prior_knowledge: boolUse HTTP2 for connection. Enable if you know the website has http2 support.
proxies: Option<Vec<RequestProxy>>Use proxy list for performing network request.
headers: Option<Box<SerializableHeaderMap>>Headers to include with request.
redirect_limit: Box<usize>The max redirections allowed for request.
redirect_policy: RedirectPolicyThe redirect policy type to use.
Cookie string to use for network requests ex: “foo=bar; Domain=blog.spider”
depth: usizeThe max depth to crawl for a website. Defaults to 25 to help prevent infinite recursion.
depth_distance: usizeThe depth to crawl pertaining to the root.
stealth_mode: TierUse stealth mode for requests.
viewport: Option<Viewport>Configure the viewport for chrome and viewport headers.
budget: Option<HashMap<CaseInsensitiveString, u32>>Crawl budget for the paths. This helps prevent crawling extra pages and limiting the amount.
wild_card_budgeting: boolIf wild card budgeting is found for the website.
external_domains_caseless: Box<HashSet<CaseInsensitiveString>>External domains to include case-insensitive.
full_resources: boolCollect all the resources found on the page.
accept_invalid_certs: boolDangerously accept invalid certficates.
auth_challenge_response: Option<AuthChallengeResponse>The auth challenge response. The ‘chrome_intercept’ flag is also required in order to intercept the response.
openai_config: Option<Box<GPTConfigs>>The OpenAI configs to use to help drive the chrome browser. This does nothing without the ‘openai’ flag.
gemini_config: Option<Box<GeminiConfigs>>The Gemini configs to use to help drive the chrome browser. This does nothing without the ‘gemini’ flag.
remote_multimodal: Option<Box<RemoteMultimodalConfigs>>Remote multimodal automation config (vision + LLM-driven steps).
Requires the agent feature for full functionality, uses stub type otherwise.
Use a shared queue strategy when crawling. This can scale workloads evenly that do not need priority.
return_page_links: boolReturn the page links in the subscription channels. This does nothing without the flag sync enabled.
retry: u8Retry count to attempt to swap proxies etc.
no_control_thread: boolSkip spawning a control thread that can pause, start, and shutdown the crawl.
only_html: boolExpect only to handle HTML to save on resources. This mainly only blocks the crawling and returning of resources from the server.
concurrency_limit: Option<usize>The concurrency limits to apply.
normalize: boolNormalize the html de-deplucating the content.
Share the state of the crawl requires the ‘disk’ feature flag.
modify_headers: boolModify the headers to act like a real-browser
modify_http_client_headers: boolModify the HTTP client headers only to act like a real-browser
referer: Option<String>The referer to use.
max_page_bytes: Option<f64>Determine the max bytes per page.
max_bytes_allowed: Option<u64>Determine the max bytes per browser context.
cache_policy: Option<BasicCachePolicy>The cache policy to use.
network_interface: Option<String>Bind the connections only on the network interface.
local_address: Option<IpAddr>Bind to a local IP Address.
default_http_connect_timeout: Option<Duration>The default http connect timeout
default_http_read_timeout: Option<Duration>The default http read timeout
Implementations§
Source§impl Configuration
impl Configuration
Sourcepub fn get_blacklist(&self) -> AllowList
pub fn get_blacklist(&self) -> AllowList
Handle the blacklist options.
Sourcepub fn set_whitelist(&mut self)
pub fn set_whitelist(&mut self)
Set the whitelist
Sourcepub fn configure_allowlist(&mut self)
pub fn configure_allowlist(&mut self)
Configure the allow list.
Sourcepub fn get_blacklist_compiled(&self) -> &AllowList
pub fn get_blacklist_compiled(&self) -> &AllowList
Get the blacklist compiled.
Sourcepub fn configure_budget(&mut self)
pub fn configure_budget(&mut self)
Setup the budget for crawling.
Sourcepub fn get_whitelist_compiled(&self) -> &AllowList
pub fn get_whitelist_compiled(&self) -> &AllowList
Get the whitelist compiled.
Sourcepub fn get_whitelist(&self) -> AllowList
pub fn get_whitelist(&self) -> AllowList
Handle the whitelist options.
Sourcepub fn with_respect_robots_txt(&mut self, respect_robots_txt: bool) -> &mut Self
pub fn with_respect_robots_txt(&mut self, respect_robots_txt: bool) -> &mut Self
Respect robots.txt file.
Sourcepub fn with_subdomains(&mut self, subdomains: bool) -> &mut Self
pub fn with_subdomains(&mut self, subdomains: bool) -> &mut Self
Include subdomains detection.
Sourcepub fn with_csp_bypass(&mut self, _enabled: bool) -> &mut Self
pub fn with_csp_bypass(&mut self, _enabled: bool) -> &mut Self
Bypass CSP protection detection. This does nothing without the feat flag chrome enabled.
Sourcepub fn with_network_interface(
&mut self,
network_interface: Option<String>,
) -> &mut Self
pub fn with_network_interface( &mut self, network_interface: Option<String>, ) -> &mut Self
Bind the connections only on the network interface.
Sourcepub fn with_local_address(&mut self, local_address: Option<IpAddr>) -> &mut Self
pub fn with_local_address(&mut self, local_address: Option<IpAddr>) -> &mut Self
Bind to a local IP Address.
Sourcepub fn with_crawl_timeout(
&mut self,
crawl_timeout: Option<Duration>,
) -> &mut Self
pub fn with_crawl_timeout( &mut self, crawl_timeout: Option<Duration>, ) -> &mut Self
The max duration for the crawl. This is useful when websites use a robots.txt with long durations and throttle the timeout removing the full concurrency.
Sourcepub fn with_delay(&mut self, delay: u64) -> &mut Self
pub fn with_delay(&mut self, delay: u64) -> &mut Self
Delay between request as ms.
Sourcepub fn with_http2_prior_knowledge(
&mut self,
http2_prior_knowledge: bool,
) -> &mut Self
pub fn with_http2_prior_knowledge( &mut self, http2_prior_knowledge: bool, ) -> &mut Self
Only use HTTP/2.
Sourcepub fn with_request_timeout(
&mut self,
request_timeout: Option<Duration>,
) -> &mut Self
pub fn with_request_timeout( &mut self, request_timeout: Option<Duration>, ) -> &mut Self
Max time to wait for request. By default request times out in 15s. Set to None to disable.
Sourcepub fn with_sitemap(&mut self, _sitemap_url: Option<&str>) -> &mut Self
pub fn with_sitemap(&mut self, _sitemap_url: Option<&str>) -> &mut Self
Set the sitemap url. This does nothing without the sitemap feature flag.
Sourcepub fn with_ignore_sitemap(&mut self, _ignore_sitemap: bool) -> &mut Self
pub fn with_ignore_sitemap(&mut self, _ignore_sitemap: bool) -> &mut Self
Ignore the sitemap when crawling. This method does nothing if the sitemap is not enabled.
Sourcepub fn with_user_agent(&mut self, user_agent: Option<&str>) -> &mut Self
pub fn with_user_agent(&mut self, user_agent: Option<&str>) -> &mut Self
Add user agent to request.
Sourcepub fn with_preserve_host_header(&mut self, preserve: bool) -> &mut Self
pub fn with_preserve_host_header(&mut self, preserve: bool) -> &mut Self
Preserve the HOST header.
Sourcepub fn with_remote_multimodal(
&mut self,
remote_multimodal: Option<RemoteMultimodalConfigs>,
) -> &mut Self
pub fn with_remote_multimodal( &mut self, remote_multimodal: Option<RemoteMultimodalConfigs>, ) -> &mut Self
Use a remote multimodal model to drive browser automation.
When the agent feature is not enabled, this uses a stub type.
Sourcepub fn with_openai(&mut self, _openai_config: Option<GPTConfigs>) -> &mut Self
pub fn with_openai(&mut self, _openai_config: Option<GPTConfigs>) -> &mut Self
The OpenAI configs to use to drive the browser. This method does nothing if the openai is not enabled.
Sourcepub fn with_gemini(
&mut self,
_gemini_config: Option<GeminiConfigs>,
) -> &mut Self
pub fn with_gemini( &mut self, _gemini_config: Option<GeminiConfigs>, ) -> &mut Self
The Gemini configs to use to drive the browser. This method does nothing if the gemini is not enabled.
Cookie string to use in request. This does nothing without the cookies flag enabled.
Sourcepub fn with_fingerprint(&mut self, _fingerprint: bool) -> &mut Self
pub fn with_fingerprint(&mut self, _fingerprint: bool) -> &mut Self
Set custom fingerprint ID for request. This does nothing without the chrome flag enabled.
Sourcepub fn with_proxies(&mut self, proxies: Option<Vec<String>>) -> &mut Self
pub fn with_proxies(&mut self, proxies: Option<Vec<String>>) -> &mut Self
Use proxies for request.
Sourcepub fn with_proxies_direct(
&mut self,
proxies: Option<Vec<RequestProxy>>,
) -> &mut Self
pub fn with_proxies_direct( &mut self, proxies: Option<Vec<RequestProxy>>, ) -> &mut Self
Use proxies for request with control between chrome and http.
Use a shared semaphore to evenly handle workloads. The default is false.
Sourcepub fn with_blacklist_url<T>(
&mut self,
blacklist_url: Option<Vec<T>>,
) -> &mut Self
pub fn with_blacklist_url<T>( &mut self, blacklist_url: Option<Vec<T>>, ) -> &mut Self
Add blacklist urls to ignore.
Sourcepub fn with_whitelist_url<T>(
&mut self,
whitelist_url: Option<Vec<T>>,
) -> &mut Self
pub fn with_whitelist_url<T>( &mut self, whitelist_url: Option<Vec<T>>, ) -> &mut Self
Add whitelist urls to allow.
Sourcepub fn with_return_page_links(&mut self, return_page_links: bool) -> &mut Self
pub fn with_return_page_links(&mut self, return_page_links: bool) -> &mut Self
Return the links found on the page in the channel subscriptions. This method does nothing if the decentralized is enabled.
Sourcepub fn with_headers(&mut self, headers: Option<HeaderMap>) -> &mut Self
pub fn with_headers(&mut self, headers: Option<HeaderMap>) -> &mut Self
Set HTTP headers for request using reqwest::header::HeaderMap.
Sourcepub fn with_redirect_limit(&mut self, redirect_limit: usize) -> &mut Self
pub fn with_redirect_limit(&mut self, redirect_limit: usize) -> &mut Self
Set the max redirects allowed for request.
Sourcepub fn with_redirect_policy(&mut self, policy: RedirectPolicy) -> &mut Self
pub fn with_redirect_policy(&mut self, policy: RedirectPolicy) -> &mut Self
Set the redirect policy to use.
Sourcepub fn with_referer(&mut self, referer: Option<String>) -> &mut Self
pub fn with_referer(&mut self, referer: Option<String>) -> &mut Self
Add a referer (mis-spelling) to the request.
Sourcepub fn with_referrer(&mut self, referer: Option<String>) -> &mut Self
pub fn with_referrer(&mut self, referer: Option<String>) -> &mut Self
Add a referer to the request.
Sourcepub fn with_full_resources(&mut self, full_resources: bool) -> &mut Self
pub fn with_full_resources(&mut self, full_resources: bool) -> &mut Self
Determine whether to collect all the resources found on pages.
Sourcepub fn with_dismiss_dialogs(&mut self, _dismiss_dialogs: bool) -> &mut Self
pub fn with_dismiss_dialogs(&mut self, _dismiss_dialogs: bool) -> &mut Self
Determine whether to dismiss dialogs. This method does nothing if the chrome is enabled.
Sourcepub fn with_cron(&mut self, _cron_str: &str, _cron_type: CronType) -> &mut Self
pub fn with_cron(&mut self, _cron_str: &str, _cron_type: CronType) -> &mut Self
Setup cron jobs to run. This does nothing without the cron flag enabled.
Sourcepub fn with_limit(&mut self, limit: u32) -> &mut Self
pub fn with_limit(&mut self, limit: u32) -> &mut Self
Set a crawl page limit. If the value is 0 there is no limit.
Sourcepub fn with_concurrency_limit(&mut self, limit: Option<usize>) -> &mut Self
pub fn with_concurrency_limit(&mut self, limit: Option<usize>) -> &mut Self
Set the concurrency limits. If you set the value to None to use the default limits using the system CPU cors * n.
Sourcepub fn with_evaluate_on_new_document(
&mut self,
_evaluate_on_new_document: Option<Box<String>>,
) -> &mut Self
pub fn with_evaluate_on_new_document( &mut self, _evaluate_on_new_document: Option<Box<String>>, ) -> &mut Self
Set a custom script to evaluate on new document creation. This does nothing without the feat flag chrome enabled.
Sourcepub fn with_auth_challenge_response(
&mut self,
_auth_challenge_response: Option<AuthChallengeResponse>,
) -> &mut Self
pub fn with_auth_challenge_response( &mut self, _auth_challenge_response: Option<AuthChallengeResponse>, ) -> &mut Self
Set the authentiation challenge response. This does nothing without the feat flag chrome enabled.
Sourcepub fn with_depth(&mut self, depth: usize) -> &mut Self
pub fn with_depth(&mut self, depth: usize) -> &mut Self
Set a crawl depth limit. If the value is 0 there is no limit.
Sourcepub fn with_caching(&mut self, _cache: bool) -> &mut Self
pub fn with_caching(&mut self, _cache: bool) -> &mut Self
Cache the page following HTTP rules. This method does nothing if the cache feature is not enabled.
Sourcepub fn with_cache_skip_browser(&mut self, _skip: bool) -> &mut Self
pub fn with_cache_skip_browser(&mut self, _skip: bool) -> &mut Self
Skip browser rendering entirely if cached response exists. This method does nothing if the cache features are not enabled.
Sourcepub fn with_service_worker_enabled(&mut self, _enabled: bool) -> &mut Self
pub fn with_service_worker_enabled(&mut self, _enabled: bool) -> &mut Self
Enable or disable Service Workers. This method does nothing if the chrome feature is not enabled.
Sourcepub fn with_auto_geolocation(&mut self, _enabled: bool) -> &mut Self
pub fn with_auto_geolocation(&mut self, _enabled: bool) -> &mut Self
Automatically setup geo-location configurations when using a proxy. This method does nothing if the chrome feature is not enabled.
Sourcepub fn with_retry(&mut self, retry: u8) -> &mut Self
pub fn with_retry(&mut self, retry: u8) -> &mut Self
Set the retry limit for request. Set the value to 0 for no retries. The default is 0.
Sourcepub fn with_default_http_connect_timeout(
&mut self,
default_http_connect_timeout: Option<Duration>,
) -> &mut Self
pub fn with_default_http_connect_timeout( &mut self, default_http_connect_timeout: Option<Duration>, ) -> &mut Self
The default http connect timeout.
Sourcepub fn with_default_http_read_timeout(
&mut self,
default_http_read_timeout: Option<Duration>,
) -> &mut Self
pub fn with_default_http_read_timeout( &mut self, default_http_read_timeout: Option<Duration>, ) -> &mut Self
The default http read timeout.
Sourcepub fn with_no_control_thread(&mut self, no_control_thread: bool) -> &mut Self
pub fn with_no_control_thread(&mut self, no_control_thread: bool) -> &mut Self
Skip setting up a control thread for pause, start, and shutdown programmatic handling. This does nothing without the ‘control’ flag enabled.
Sourcepub fn with_viewport(&mut self, viewport: Option<Viewport>) -> &mut Self
pub fn with_viewport(&mut self, viewport: Option<Viewport>) -> &mut Self
Configures the viewport of the browser, which defaults to 800x600. This method does nothing if the ‘chrome’ feature is not enabled.
Sourcepub fn with_stealth(&mut self, _stealth_mode: bool) -> &mut Self
pub fn with_stealth(&mut self, _stealth_mode: bool) -> &mut Self
Use stealth mode for the request. This does nothing without the chrome flag enabled.
Sourcepub fn with_wait_for_almost_idle_network0(
&mut self,
_wait_for_almost_idle_network0: Option<WaitForIdleNetwork>,
) -> &mut Self
pub fn with_wait_for_almost_idle_network0( &mut self, _wait_for_almost_idle_network0: Option<WaitForIdleNetwork>, ) -> &mut Self
Wait for network to be almost idle with a max timeout. This does nothing without the chrome flag enabled.
Sourcepub fn with_wait_for_idle_network0(
&mut self,
_wait_for_idle_network0: Option<WaitForIdleNetwork>,
) -> &mut Self
pub fn with_wait_for_idle_network0( &mut self, _wait_for_idle_network0: Option<WaitForIdleNetwork>, ) -> &mut Self
Wait for network request with a max timeout. This does nothing without the chrome flag enabled.
Sourcepub fn with_wait_for_idle_network(
&mut self,
_wait_for_idle_network: Option<WaitForIdleNetwork>,
) -> &mut Self
pub fn with_wait_for_idle_network( &mut self, _wait_for_idle_network: Option<WaitForIdleNetwork>, ) -> &mut Self
Wait for idle network request. This method does nothing if the chrome feature is not enabled.
Sourcepub fn with_wait_for_idle_dom(
&mut self,
_wait_for_idle_dom: Option<WaitForSelector>,
) -> &mut Self
pub fn with_wait_for_idle_dom( &mut self, _wait_for_idle_dom: Option<WaitForSelector>, ) -> &mut Self
Wait for idle dom mutations for target element. This method does nothing if the chrome feature is not enabled.
Sourcepub fn with_wait_for_selector(
&mut self,
_wait_for_selector: Option<WaitForSelector>,
) -> &mut Self
pub fn with_wait_for_selector( &mut self, _wait_for_selector: Option<WaitForSelector>, ) -> &mut Self
Wait for a selector. This method does nothing if the chrome feature is not enabled.
Sourcepub fn with_wait_for_delay(
&mut self,
_wait_for_delay: Option<WaitForDelay>,
) -> &mut Self
pub fn with_wait_for_delay( &mut self, _wait_for_delay: Option<WaitForDelay>, ) -> &mut Self
Wait for with delay. Should only be used for testing. This method does nothing if the ‘chrome’ feature is not enabled.
Sourcepub fn with_chrome_intercept(
&mut self,
_chrome_intercept: RequestInterceptConfiguration,
_url: &Option<Box<Url>>,
) -> &mut Self
pub fn with_chrome_intercept( &mut self, _chrome_intercept: RequestInterceptConfiguration, _url: &Option<Box<Url>>, ) -> &mut Self
Use request intercept for the request to only allow content required for the page that matches the host. If the content is from a 3rd party it needs to be part of our include list. This method does nothing if the chrome_intercept is not enabled.
Sourcepub fn with_chrome_connection(
&mut self,
_chrome_connection_url: Option<String>,
) -> &mut Self
pub fn with_chrome_connection( &mut self, _chrome_connection_url: Option<String>, ) -> &mut Self
Set the connection url for the chrome instance. This method does nothing if the chrome is not enabled.
Sourcepub fn with_execution_scripts(
&mut self,
_execution_scripts: Option<ExecutionScriptsMap>,
) -> &mut Self
pub fn with_execution_scripts( &mut self, _execution_scripts: Option<ExecutionScriptsMap>, ) -> &mut Self
Set JS to run on certain pages. This method does nothing if the chrome is not enabled.
Sourcepub fn with_automation_scripts(
&mut self,
_automation_scripts: Option<AutomationScriptsMap>,
) -> &mut Self
pub fn with_automation_scripts( &mut self, _automation_scripts: Option<AutomationScriptsMap>, ) -> &mut Self
Run web automated actions on certain pages. This method does nothing if the chrome is not enabled.
Sourcepub fn with_budget(&mut self, budget: Option<HashMap<&str, u32>>) -> &mut Self
pub fn with_budget(&mut self, budget: Option<HashMap<&str, u32>>) -> &mut Self
Set a crawl budget per path with levels support /a/b/c or for all paths with “*”. This does nothing without the budget flag enabled.
Sourcepub fn with_external_domains<'a, 'b>(
&mut self,
external_domains: Option<impl Iterator<Item = String> + 'a>,
) -> &mut Self
pub fn with_external_domains<'a, 'b>( &mut self, external_domains: Option<impl Iterator<Item = String> + 'a>, ) -> &mut Self
Group external domains to treat the crawl as one. If None is passed this will clear all prior domains.
Sourcepub fn with_danger_accept_invalid_certs(
&mut self,
accept_invalid_certs: bool,
) -> &mut Self
pub fn with_danger_accept_invalid_certs( &mut self, accept_invalid_certs: bool, ) -> &mut Self
Dangerously accept invalid certificates - this should be used as a last resort.
Sourcepub fn with_normalize(&mut self, normalize: bool) -> &mut Self
pub fn with_normalize(&mut self, normalize: bool) -> &mut Self
Normalize the content de-duplicating trailing slash pages and other pages that can be duplicated. This may initially show the link in your links_visited or subscription calls but, the following links will not be crawled.
Store all the links found on the disk to share the state. This does nothing without the disk flag enabled.
Sourcepub fn with_timezone_id(&mut self, _timezone_id: Option<String>) -> &mut Self
pub fn with_timezone_id(&mut self, _timezone_id: Option<String>) -> &mut Self
Overrides default host system timezone with the specified one. This does nothing without the chrome flag enabled.
Sourcepub fn with_locale(&mut self, _locale: Option<String>) -> &mut Self
pub fn with_locale(&mut self, _locale: Option<String>) -> &mut Self
Overrides default host system locale with the specified one. This does nothing without the chrome flag enabled.
Sourcepub fn with_screenshot(
&mut self,
_screenshot_config: Option<ScreenShotConfig>,
) -> &mut Self
pub fn with_screenshot( &mut self, _screenshot_config: Option<ScreenShotConfig>, ) -> &mut Self
Set the chrome screenshot configuration. This does nothing without the chrome flag enabled.
Sourcepub fn with_max_page_bytes(&mut self, max_page_bytes: Option<f64>) -> &mut Self
pub fn with_max_page_bytes(&mut self, max_page_bytes: Option<f64>) -> &mut Self
Set the max amount of bytes to collect per page. This method does nothing if the chrome is not enabled.
Sourcepub fn with_max_bytes_allowed(
&mut self,
max_bytes_allowed: Option<u64>,
) -> &mut Self
pub fn with_max_bytes_allowed( &mut self, max_bytes_allowed: Option<u64>, ) -> &mut Self
Set the max amount of bytes to collected for the browser context. This method does nothing if the chrome is not enabled.
Sourcepub fn with_block_assets(&mut self, only_html: bool) -> &mut Self
pub fn with_block_assets(&mut self, only_html: bool) -> &mut Self
Block assets from loading from the network.
Sourcepub fn with_modify_headers(&mut self, modify_headers: bool) -> &mut Self
pub fn with_modify_headers(&mut self, modify_headers: bool) -> &mut Self
Modify the headers to mimic a real browser.
Sourcepub fn with_modify_http_client_headers(
&mut self,
modify_http_client_headers: bool,
) -> &mut Self
pub fn with_modify_http_client_headers( &mut self, modify_http_client_headers: bool, ) -> &mut Self
Modify the HTTP client headers to mimic a real browser.
Sourcepub fn with_cache_policy(
&mut self,
cache_policy: Option<BasicCachePolicy>,
) -> &mut Self
pub fn with_cache_policy( &mut self, cache_policy: Option<BasicCachePolicy>, ) -> &mut Self
Set the cache policy.
Sourcepub fn with_webdriver_config(
&mut self,
_webdriver_config: Option<WebDriverConfig>,
) -> &mut Self
pub fn with_webdriver_config( &mut self, _webdriver_config: Option<WebDriverConfig>, ) -> &mut Self
Set the WebDriver configuration. This does nothing without the webdriver flag enabled.
Sourcepub fn with_search_config(&mut self, _search_config: Option<()>) -> &mut Self
pub fn with_search_config(&mut self, _search_config: Option<()>) -> &mut Self
Configure web search integration. This does nothing without the search flag enabled.
Trait Implementations§
Source§impl Clone for Configuration
impl Clone for Configuration
Source§fn clone(&self) -> Configuration
fn clone(&self) -> Configuration
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for Configuration
impl Debug for Configuration
Source§impl Default for Configuration
impl Default for Configuration
Source§fn default() -> Configuration
fn default() -> Configuration
Source§impl PartialEq for Configuration
impl PartialEq for Configuration
impl StructuralPartialEq for Configuration
Auto Trait Implementations§
impl Freeze for Configuration
impl RefUnwindSafe for Configuration
impl Send for Configuration
impl Sync for Configuration
impl Unpin for Configuration
impl UnwindSafe for Configuration
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more