Struct RequestParams

Source

pub struct RequestParams {Show 49 fields
    pub url: Option<String>,
    pub request: Option<RequestType>,
    pub limit: Option<u32>,
    pub return_format: Option<ReturnFormatHandling>,
    pub tld: Option<bool>,
    pub depth: Option<u32>,
    pub cache: Option<bool>,
    pub scroll: Option<u32>,
    pub budget: Option<HashMap<String, u32>>,
    pub blacklist: Option<Vec<String>>,
    pub whitelist: Option<Vec<String>>,
    pub locale: Option<String>,
    pub cookies: Option<String>,
    pub stealth: Option<bool>,
    pub headers: Option<HashMap<String, String>>,
    pub anti_bot: Option<bool>,
    pub webhooks: Option<WebhookSettings>,
    pub metadata: Option<bool>,
    pub viewport: Option<Viewport>,
    pub encoding: Option<String>,
    pub subdomains: Option<bool>,
    pub user_agent: Option<String>,
    pub store_data: Option<bool>,
    pub gpt_config: Option<HashMap<String, String>>,
    pub fingerprint: Option<bool>,
    pub storageless: Option<bool>,
    pub readability: Option<bool>,
    pub proxy_enabled: Option<bool>,
    pub respect_robots: Option<bool>,
    pub root_selector: Option<String>,
    pub full_resources: Option<bool>,
    pub text: Option<String>,
    pub sitemap: Option<bool>,
    pub external_domains: Option<Vec<String>>,
    pub return_embeddings: Option<bool>,
    pub return_headers: Option<bool>,
    pub return_page_links: Option<bool>,
    pub return_cookies: Option<bool>,
    pub request_timeout: Option<u8>,
    pub run_in_background: Option<bool>,
    pub skip_config_checks: Option<bool>,
    pub css_extraction_map: Option<CSSExtractionMap>,
    pub chunking_alg: Option<ChunkingAlgDict>,
    pub disable_intercept: Option<bool>,
    pub wait_for: Option<WaitFor>,
    pub execution_scripts: Option<ExecutionScriptsMap>,
    pub automation_scripts: Option<WebAutomationMap>,
    pub redirect_policy: Option<RedirectPolicy>,
    pub event_tracker: Option<EventTracker>,
}

Expand description

Structure representing request parameters.

Fields§

§url: Option<String>

The URL to be crawled.

§request: Option<RequestType>

The type of request to be made.

§limit: Option<u32>

The maximum number of pages the crawler should visit.

§return_format: Option<ReturnFormatHandling>

The format in which the result should be returned.

§tld: Option<bool>

Specifies whether to only visit the top-level domain.

§depth: Option<u32>

The depth of the crawl.

§cache: Option<bool>

Specifies whether the request should be cached.

§scroll: Option<u32>

Perform an infinite scroll on the page as new content arises. The request param also needs to be set to ‘chrome’ or ‘smart’.

§budget: Option<HashMap<String, u32>>

The budget for various resources.

§blacklist: Option<Vec<String>>

The blacklist routes to ignore. This can be a Regex string pattern.

§whitelist: Option<Vec<String>>

The whitelist routes to only crawl. This can be a Regex string pattern and used with black_listing.

§locale: Option<String>

The locale to be used during the crawl.

§cookies: Option<String>

The cookies to be set for the request, formatted as a single string.

§stealth: Option<bool>

Specifies whether to use stealth techniques to avoid detection.

§headers: Option<HashMap<String, String>>

The headers to be used for the request.

§anti_bot: Option<bool>

Specifies whether anti-bot measures should be used.

§webhooks: Option<WebhookSettings>

Specifies whether to send data via webhooks.

§metadata: Option<bool>

Specifies whether to include metadata in the response.

§viewport: Option<Viewport>

The dimensions of the viewport.

§encoding: Option<String>

The encoding to be used for the request.

§subdomains: Option<bool>

Specifies whether to include subdomains in the crawl.

§user_agent: Option<String>

The user agent string to be used for the request.

§store_data: Option<bool>

Specifies whether the response data should be stored.

§gpt_config: Option<HashMap<String, String>>

Configuration settings for GPT (general purpose texture mappings).

§fingerprint: Option<bool>

Specifies whether to use fingerprinting protection.

§storageless: Option<bool>

Specifies whether to perform the request without using storage.

§readability: Option<bool>

Specifies whether readability optimizations should be applied.

§proxy_enabled: Option<bool>

Specifies whether to use a proxy for the request.

§respect_robots: Option<bool>

Specifies whether to respect the site’s robots.txt file.

§root_selector: Option<String>

CSS selector to be used to filter the content.

§full_resources: Option<bool>

Specifies whether to load all resources of the crawl target.

§text: Option<String>

The text string to extract data from.

§sitemap: Option<bool>

Specifies whether to use the sitemap links.

§external_domains: Option<Vec<String>>

External domains to include the crawl.

§return_embeddings: Option<bool>

Returns the OpenAI embeddings for the title and description. Other values, such as keywords, may also be included. Requires the metadata parameter to be set to true.

§return_headers: Option<bool>

Returns the HTTP response headers.

§return_page_links: Option<bool>

Returns the link(s) found on the page that match the crawler query.

§return_cookies: Option<bool>

Returns the HTTP response cookies.

§request_timeout: Option<u8>

The timeout for the request, in milliseconds.

§run_in_background: Option<bool>

Specifies whether to run the request in the background.

§skip_config_checks: Option<bool>

Specifies whether to skip configuration checks.

§css_extraction_map: Option<CSSExtractionMap>

Use CSS query selectors to scrape contents from the web page. Set the paths and the CSS extraction object map to perform extractions per path or page.

§chunking_alg: Option<ChunkingAlgDict>

The chunking algorithm to use.

§disable_intercept: Option<bool>

Disable request interception when running ‘request’ as ‘chrome’ or ‘smart’. This can help when the page uses 3rd party or external scripts to load content.

§wait_for: Option<WaitFor>

The wait for events on the page. You need to make your request chrome or smart.

§execution_scripts: Option<ExecutionScriptsMap>

Perform custom Javascript tasks on a url or url path. You need to make your request chrome or smart

§automation_scripts: Option<WebAutomationMap>

Perform web automated tasks on a url or url path. You need to make your request chrome or smart

§redirect_policy: Option<RedirectPolicy>

The redirect policy for HTTP request. Set the value to Loose to allow all.

§event_tracker: Option<EventTracker>

Track the request sent and responses received for chrome or smart. The responses will track the bytes used and the requests will have the monotime sent.

Struct RequestParamsCopy item path

Fields§

Trait Implementations§

impl Clone for RequestParams

fn clone(&self) -> RequestParams

fn clone_from(&mut self, source: &Self)

impl Debug for RequestParams

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for RequestParams

fn default() -> RequestParams

impl<'de> Deserialize<'de> for RequestParams

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

impl Serialize for RequestParams

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>where __S: Serializer,

Auto Trait Implementations§

impl Freeze for RequestParams

impl RefUnwindSafe for RequestParams

impl Send for RequestParams

impl Sync for RequestParams

impl Unpin for RequestParams

impl UnwindSafe for RequestParams

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

impl<T> ErasedDestructor for Twhere T: 'static,

impl<T> MaybeSendSync for T

Struct RequestParams

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,

impl<T> ErasedDestructor for T
where T: 'static,