Struct RequestParams

Source

pub struct RequestParams {Show 52 fields
    pub url: Option<String>,
    pub request: Option<RequestType>,
    pub limit: Option<u32>,
    pub return_format: Option<ReturnFormatHandling>,
    pub tld: Option<bool>,
    pub depth: Option<u32>,
    pub cache: Option<bool>,
    pub scroll: Option<u32>,
    pub budget: Option<HashMap<String, u32>>,
    pub blacklist: Option<Vec<String>>,
    pub whitelist: Option<Vec<String>>,
    pub locale: Option<String>,
    pub cookies: Option<String>,
    pub stealth: Option<bool>,
    pub headers: Option<HashMap<String, String>>,
    pub anti_bot: Option<bool>,
    pub webhooks: Option<WebhookSettings>,
    pub metadata: Option<bool>,
    pub viewport: Option<Viewport>,
    pub encoding: Option<String>,
    pub subdomains: Option<bool>,
    pub user_agent: Option<String>,
    pub store_data: Option<bool>,
    pub gpt_config: Option<HashMap<String, String>>,
    pub fingerprint: Option<bool>,
    pub storageless: Option<bool>,
    pub readability: Option<bool>,
    pub proxy_enabled: Option<bool>,
    pub respect_robots: Option<bool>,
    pub root_selector: Option<String>,
    pub full_resources: Option<bool>,
    pub text: Option<String>,
    pub sitemap: Option<bool>,
    pub external_domains: Option<Vec<String>>,
    pub return_embeddings: Option<bool>,
    pub return_headers: Option<bool>,
    pub return_page_links: Option<bool>,
    pub return_cookies: Option<bool>,
    pub request_timeout: Option<u8>,
    pub run_in_background: Option<bool>,
    pub skip_config_checks: Option<bool>,
    pub css_extraction_map: Option<CSSExtractionMap>,
    pub chunking_alg: Option<ChunkingAlgDict>,
    pub disable_intercept: Option<bool>,
    pub wait_for: Option<WaitFor>,
    pub execution_scripts: Option<ExecutionScriptsMap>,
    pub automation_scripts: Option<WebAutomationMap>,
    pub redirect_policy: Option<RedirectPolicy>,
    pub event_tracker: Option<EventTracker>,
    pub crawl_timeout: Option<Timeout>,
    pub evaluate_on_new_document: Option<Box<String>>,
    pub lite_mode: Option<bool>,
}

Expand description

Structure representing request parameters.

Fields§

§url: Option<String>

The URL to be crawled.

§request: Option<RequestType>

The type of request to be made.

§limit: Option<u32>

The maximum number of pages the crawler should visit.

§return_format: Option<ReturnFormatHandling>

The format in which the result should be returned.

§tld: Option<bool>

Specifies whether to only visit the top-level domain.

§depth: Option<u32>

The depth of the crawl.

§cache: Option<bool>

Specifies whether the request should be cached.

§scroll: Option<u32>

Perform an infinite scroll on the page as new content arises. The request param also needs to be set to ‘chrome’ or ‘smart’.

§budget: Option<HashMap<String, u32>>

The budget for various resources.

§blacklist: Option<Vec<String>>

The blacklist routes to ignore. This can be a Regex string pattern.

§whitelist: Option<Vec<String>>

The whitelist routes to only crawl. This can be a Regex string pattern and used with black_listing.

§locale: Option<String>

The locale to be used during the crawl.

§cookies: Option<String>

The cookies to be set for the request, formatted as a single string.

§stealth: Option<bool>

Specifies whether to use stealth techniques to avoid detection.

§headers: Option<HashMap<String, String>>

The headers to be used for the request.

§anti_bot: Option<bool>

Specifies whether anti-bot measures should be used.

§webhooks: Option<WebhookSettings>

Specifies whether to send data via webhooks.

§metadata: Option<bool>

Specifies whether to include metadata in the response.

§viewport: Option<Viewport>

The dimensions of the viewport.

§encoding: Option<String>

The encoding to be used for the request.

§subdomains: Option<bool>

Specifies whether to include subdomains in the crawl.

§user_agent: Option<String>

The user agent string to be used for the request.

§store_data: Option<bool>

Specifies whether the response data should be stored.

§gpt_config: Option<HashMap<String, String>>

Configuration settings for GPT (general purpose texture mappings).

§fingerprint: Option<bool>

Specifies whether to use fingerprinting protection.

§storageless: Option<bool>

Specifies whether to perform the request without using storage.

§readability: Option<bool>

Specifies whether readability optimizations should be applied.

§proxy_enabled: Option<bool>

Specifies whether to use a proxy for the request.

§respect_robots: Option<bool>

Specifies whether to respect the site’s robots.txt file.

§root_selector: Option<String>

CSS selector to be used to filter the content.

§full_resources: Option<bool>

Specifies whether to load all resources of the crawl target.

§text: Option<String>

The text string to extract data from.

§sitemap: Option<bool>

Specifies whether to use the sitemap links.

§external_domains: Option<Vec<String>>

External domains to include the crawl.

§return_embeddings: Option<bool>

Returns the OpenAI embeddings for the title and description. Other values, such as keywords, may also be included. Requires the metadata parameter to be set to true.

§return_headers: Option<bool>

Returns the HTTP response headers.

§return_page_links: Option<bool>

Returns the link(s) found on the page that match the crawler query.

§return_cookies: Option<bool>

Returns the HTTP response cookies.

§request_timeout: Option<u8>

The timeout for the request, in milliseconds.

§run_in_background: Option<bool>

Specifies whether to run the request in the background.

§skip_config_checks: Option<bool>

Specifies whether to skip configuration checks.

§css_extraction_map: Option<CSSExtractionMap>

Use CSS query selectors to scrape contents from the web page. Set the paths and the CSS extraction object map to perform extractions per path or page.

§chunking_alg: Option<ChunkingAlgDict>

The chunking algorithm to use.

§disable_intercept: Option<bool>

Disable request interception when running ‘request’ as ‘chrome’ or ‘smart’. This can help when the page uses 3rd party or external scripts to load content.

§wait_for: Option<WaitFor>

The wait for events on the page. You need to make your request chrome or smart.

§execution_scripts: Option<ExecutionScriptsMap>

Perform custom Javascript tasks on a url or url path. You need to make your request chrome or smart

§automation_scripts: Option<WebAutomationMap>

Perform web automated tasks on a url or url path. You need to make your request chrome or smart

§redirect_policy: Option<RedirectPolicy>

The redirect policy for HTTP request. Set the value to Loose to allow all.

§event_tracker: Option<EventTracker>

Track the request sent and responses received for chrome or smart. The responses will track the bytes used and the requests will have the monotime sent.

§crawl_timeout: Option<Timeout>

The timeout to stop the crawl.

§evaluate_on_new_document: Option<Box<String>>

Evaluates given script in every frame upon creation (before loading frame’s scripts).

§lite_mode: Option<bool>

Runs the request using lite_mode:Lite mode reduces data transfer costs by 70%, with trade-offs in speed, accuracy, geo-targeting, and reliability. It’s best suited for non-urgent data collection or when targeting websites with minimal anti-bot protections.

Struct RequestParamsCopy item path

Fields§

Trait Implementations§

impl Clone for RequestParams

fn clone(&self) -> RequestParams

fn clone_from(&mut self, source: &Self)

impl Debug for RequestParams

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for RequestParams

fn default() -> RequestParams

impl<'de> Deserialize<'de> for RequestParams

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

impl Serialize for RequestParams

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>where __S: Serializer,

Auto Trait Implementations§

impl Freeze for RequestParams

impl RefUnwindSafe for RequestParams

impl Send for RequestParams

impl Sync for RequestParams

impl Unpin for RequestParams

impl UnwindSafe for RequestParams

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

impl<T> ErasedDestructor for Twhere T: 'static,

Struct RequestParams

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,

impl<T> ErasedDestructor for T
where T: 'static,