spider_client/shapes/
request.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Structure representing the Chunking algorithm dictionary.
5#[derive(Debug, Deserialize, Serialize, Clone)]
6pub struct ChunkingAlgDict {
7    /// The chunking algorithm to use, defined as a specific type.
8    pub r#type: ChunkingType,
9    /// The amount to chunk by.
10    pub value: i32,
11}
12
13// The nested structures
14#[derive(Serialize, Deserialize, Debug, Clone, Default)]
15pub struct Timeout {
16    /// The seconds up to 60.
17    pub secs: u64,
18    /// The nanoseconds.
19    pub nanos: u32,
20}
21
22#[derive(Serialize, Deserialize, Debug, Clone)]
23pub struct IdleNetwork {
24    /// The timeout to wait until.
25    pub timeout: Timeout,
26}
27
28/// Represents various web automation actions.
29#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
30pub enum WebAutomation {
31    /// Runs custom JavaScript code.
32    Evaluate(String),
33    /// Clicks on an element.
34    Click(String),
35    /// Clicks on all elements.
36    ClickAll(String),
37    /// Clicks on all elements.
38    ClickAllClickable(),
39    /// Clicks at the position x and y coordinates.
40    ClickPoint {
41        /// The horizontal (X) coordinate.
42        x: f64,
43        /// The vertical (Y) coordinate.
44        y: f64,
45    },
46    /// Click and hold on an element found by selector for a duration in ms.
47    ClickHold {
48        /// The CSS selector to target.
49        selector: String,
50        /// Duration to hold in milliseconds.
51        hold_for_ms: u64,
52    },
53    /// Click and hold at a specific point for a duration in ms.
54    ClickHoldPoint {
55        /// The horizontal (X) coordinate.
56        x: f64,
57        /// The vertical (Y) coordinate.
58        y: f64,
59        /// Duration to hold in milliseconds.
60        hold_for_ms: u64,
61    },
62    /// Click-and-drag from one element to another (selector-based).
63    ClickDrag {
64        /// CSS selector for the start element.
65        from: String,
66        /// CSS selector for the destination element.
67        to: String,
68        /// Optional key modifier (Rust: Option<i64>).
69        modifier: Option<i64>,
70    },
71    /// Click-and-drag from one point to another.
72    ClickDragPoint {
73        /// Start X coordinate.
74        from_x: f64,
75        /// Start Y coordinate.
76        from_y: f64,
77        /// End X coordinate.
78        to_x: f64,
79        /// End Y coordinate.
80        to_y: f64,
81        /// Optional key modifier (Rust: Option<i64>).
82        modifier: Option<i64>,
83    },
84    Type { 
85        /// The value to type.
86        value: String,
87        /// The click modifier.
88        modifier: Option<i64> 
89    },
90    /// Waits for a fixed duration in milliseconds.
91    Wait(u64),
92    /// Waits for the next navigation event.
93    WaitForNavigation,
94    /// Wait for dom updates to stop.
95    WaitForDom {
96        /// The selector of the element to wait for updates.
97        selector: Option<String>,
98        ///  The timeout to wait for in ms.
99        timeout: u32,
100    },
101    /// Waits for an element to appear.
102    WaitFor(String),
103    /// Waits for an element to appear with a timeout.
104    WaitForWithTimeout {
105        /// The selector of the element to wait for updates.
106        selector: String,
107        ///  The timeout to wait for in ms.
108        timeout: u64,
109    },
110    /// Waits for an element to appear and then clicks on it.
111    WaitForAndClick(String),
112    /// Scrolls the screen in the horizontal axis by a specified amount in pixels.
113    ScrollX(i32),
114    /// Scrolls the screen in the vertical axis by a specified amount in pixels.
115    ScrollY(i32),
116    /// Fills an input element with a specified value.
117    Fill {
118        /// The selector of the input element to fill.
119        selector: String,
120        ///  The value to fill the input element with.
121        value: String,
122    },
123    /// Scrolls the page until the end.
124    InfiniteScroll(u32),
125    /// Perform a screenshot on the page - fullscreen and omit background for params.
126    Screenshot {
127        /// Take a full page screenshot.
128        full_page: bool,
129        /// Omit the background.
130        omit_background: bool,
131        /// The output file to store the screenshot.
132        output: String,
133    },
134    /// Only continue to the next automation if the prior step was valid. Use this intermediate after a step to break out of the chain.
135    ValidateChain,
136}
137
138#[derive(Default, Serialize, Deserialize, Debug, Clone)]
139#[serde(tag = "type", rename_all = "PascalCase")]
140pub enum RedirectPolicy {
141    Loose,
142    #[default]
143    Strict,
144}
145
146pub type WebAutomationMap = std::collections::HashMap<String, Vec<WebAutomation>>;
147pub type ExecutionScriptsMap = std::collections::HashMap<String, String>;
148
149#[derive(Serialize, Deserialize, Debug, Clone)]
150pub struct Selector {
151    /// The timeout to wait until.
152    pub timeout: Timeout,
153    /// The selector to wait for.
154    pub selector: String,
155}
156
157#[derive(Serialize, Deserialize, Debug, Clone, Default)]
158pub struct Delay {
159    /// The timeout to wait until.
160    pub timeout: Timeout,
161}
162
163/// Default as true.
164fn default_some_true() -> Option<bool> {
165    Some(true)
166}
167
168#[derive(Serialize, Deserialize, Debug, Clone, Default)]
169pub struct WaitFor {
170    /// Wait until idle networks with a timeout of idleness.
171    pub idle_network: Option<IdleNetwork>,
172    /// Wait until network to be idle with a max timeout.
173    pub idle_network0: Option<IdleNetwork>,
174    /// Wait until network to almost be idle with a max timeout.
175    pub almost_idle_network0: Option<IdleNetwork>,
176    /// Wait until a selector exist. Can determine if a selector exist after executing all js and network events.
177    pub selector: Option<Selector>,
178    /// Wait for the dom to update
179    pub dom: Option<Selector>,
180    /// Wait until a hard delay.
181    pub delay: Option<Delay>,
182    /// Wait until page navigation happen. Default is true.
183    #[serde(default = "default_some_true")]
184    pub page_navigations: Option<bool>,
185}
186
187/// Query request to get a document.
188#[derive(Serialize, Deserialize, Debug, Clone, Default)]
189pub struct QueryRequest {
190    /// The exact website url.
191    pub url: Option<String>,
192    /// The website domain.
193    pub domain: Option<String>,
194    /// The path of the resource.
195    pub pathname: Option<String>,
196}
197
198/// Enum representing different types of Chunking.
199#[derive(Default, Debug, Deserialize, Serialize, Clone)]
200#[serde(rename_all = "lowercase")]
201pub enum ChunkingType {
202    #[default]
203    /// By the word count.
204    ByWords,
205    /// By the line count.
206    ByLines,
207    /// By the char length.
208    ByCharacterLength,
209    /// By sentence.
210    BySentence,
211}
212
213#[derive(Default, Debug, Deserialize, Serialize, Clone)]
214/// View port handling for chrome.
215pub struct Viewport {
216    /// Device screen Width
217    pub width: u32,
218    /// Device screen size
219    pub height: u32,
220    /// Device scale factor
221    pub device_scale_factor: Option<f64>,
222    /// Emulating Mobile?
223    pub emulating_mobile: bool,
224    /// Use landscape mode instead of portrait.
225    pub is_landscape: bool,
226    /// Touch screen device?
227    pub has_touch: bool,
228}
229
230// Define the CSSSelector struct
231#[derive(Debug, Clone, Default, Deserialize, Serialize)]
232pub struct CSSSelector {
233    /// The name of the selector group
234    pub name: String,
235    /// A vector of CSS selectors
236    pub selectors: Vec<String>,
237}
238
239// Define the CSSExtractionMap type
240pub type CSSExtractionMap = HashMap<String, Vec<CSSSelector>>;
241
242/// Represents the settings for a webhook configuration
243#[derive(Debug, Default, Deserialize, Serialize, Clone)]
244pub struct WebhookSettings {
245    /// The destination where the webhook information will be sent
246    destination: String,
247    /// Trigger an action when all credits are depleted
248    on_credits_depleted: bool,
249    /// Trigger an action when half of the credits are depleted
250    on_credits_half_depleted: bool,
251    /// Trigger an action on a website status update event
252    on_website_status: bool,
253    /// Send information about a new page find (such as links and bytes)
254    on_find: bool,
255    /// Handle the metadata of a found page
256    on_find_metadata: bool,
257}
258
259/// Proxy pool selection for outbound request routing.
260/// Choose a pool based on your use case (e.g., stealth, speed, or stability).
261///
262/// - 'residential'         → cost-effective entry-level residential pool
263/// - 'residential_fast'    → faster residential pool for higher throughput
264/// - 'residential_static'  → static residential IPs, rotated daily
265/// - 'residential_premium' → low-latency premium IPs
266/// - 'residential_core'    → balanced plan (quality vs. cost)
267/// - 'residential_plus'    → largest and highest quality core pool
268/// - 'mobile'              → 4G/5G mobile proxies for maximum evasion
269/// - 'isp'                 → ISP-grade datacenters
270#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, Hash)]
271pub enum ProxyType {
272    /// Cost-effective entry-level residential pool.
273    #[serde(rename = "residential")]
274    Residential,
275    /// 4G / 5G mobile proxies for maximum stealth and evasion.
276    #[serde(rename = "mobile")]
277    Mobile,
278    /// ISP-grade residential routing (alias: `datacenter`).
279    #[serde(rename = "isp", alias = "datacenter")]
280    #[default]
281    Isp
282}
283
284/// List of proxies.
285pub const PROXY_TYPE_LIST: [ProxyType; 3] = [
286    ProxyType::Residential,
287    ProxyType::Isp,
288    ProxyType::Mobile
289];
290
291impl ProxyType {
292    /// Get the canonical string representation of the proxy type.
293    pub fn as_str(&self) -> &'static str {
294        match self {
295            ProxyType::Residential => "residential",
296            ProxyType::Mobile => "mobile",
297            ProxyType::Isp => "isp"
298        }
299    }
300}
301
302/// Send multiple return formats.
303#[derive(Debug, Deserialize, Serialize, Clone)]
304#[serde(untagged)]
305pub enum ReturnFormatHandling {
306    /// A single return item.
307    Single(ReturnFormat),
308    /// Multiple return formats.
309    Multi(std::collections::HashSet<ReturnFormat>),
310}
311
312impl Default for ReturnFormatHandling {
313    fn default() -> ReturnFormatHandling {
314        ReturnFormatHandling::Single(ReturnFormat::Raw)
315    }
316}
317
318#[derive(Debug, Default, Deserialize, Serialize, Clone)]
319pub struct EventTracker {
320    /// The responses received.
321    pub responses: Option<bool>,
322    /// The request sent.
323    pub requests: Option<bool>,
324    /// Track the automation events with data changes and screenshots.
325    pub automation: Option<bool>,
326}
327
328#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
329#[serde(tag = "type")]
330pub enum LinkRewriteRule {
331    #[serde(rename = "replace")]
332    /// A string replacer.
333    Replace {
334        /// Only apply when the link's host matches this value.
335        #[serde(default)]
336        host: Option<String>,
337        find: String,
338        replace_with: String,
339    },
340
341    #[serde(rename = "regex")]
342    /// A regex replacer.
343    Regex {
344        /// Only apply when the link's host matches this value.
345        #[serde(default)]
346        host: Option<String>,
347        pattern: String,
348        replace_with: String,
349    },
350}
351
352/// Structure representing request parameters.
353#[derive(Debug, Default, Deserialize, Serialize, Clone)]
354pub struct RequestParams {
355    #[serde(default)]
356    /// The URL to be crawled.
357    pub url: Option<String>,
358    #[serde(default)]
359    /// The type of request to be made.
360    pub request: Option<RequestType>,
361    #[serde(default)]
362    /// The maximum number of pages the crawler should visit.
363    pub limit: Option<u32>,
364    #[serde(default)]
365    /// The format in which the result should be returned.
366    pub return_format: Option<ReturnFormatHandling>,
367    /// The country code for request
368    pub country_code: Option<String>,
369    #[serde(default)]
370    /// Specifies whether to only visit the top-level domain.
371    pub tld: Option<bool>,
372    #[serde(default)]
373    /// The depth of the crawl.
374    pub depth: Option<u32>,
375    #[serde(default)]
376    /// Specifies whether the request should be cached.
377    pub cache: Option<bool>,
378    #[serde(default)]
379    /// Perform an infinite scroll on the page as new content arises. The request param also needs to be set to 'chrome' or 'smart'.
380    pub scroll: Option<u32>,
381    #[serde(default)]
382    /// The budget for various resources.
383    pub budget: Option<HashMap<String, u32>>,
384    #[serde(default)]
385    /// The blacklist routes to ignore. This can be a Regex string pattern.
386    pub blacklist: Option<Vec<String>>,
387    #[serde(default)]
388    /// URL rewrite rule applied to every discovered link.
389    pub link_rewrite: Option<LinkRewriteRule>,
390    #[serde(default)]
391    /// The whitelist routes to only crawl. This can be a Regex string pattern and used with black_listing.
392    pub whitelist: Option<Vec<String>>,
393    #[serde(default)]
394    /// The locale to be used during the crawl.
395    pub locale: Option<String>,
396    #[serde(default)]
397    /// The cookies to be set for the request, formatted as a single string.
398    pub cookies: Option<String>,
399    #[serde(default)]
400    /// Specifies whether to use stealth techniques to avoid detection.
401    pub stealth: Option<bool>,
402    #[serde(default)]
403    /// The headers to be used for the request.
404    pub headers: Option<HashMap<String, String>>,
405    #[serde(default)]
406    /// Specifies whether to send data via webhooks.
407    pub webhooks: Option<WebhookSettings>,
408    #[serde(default)]
409    /// Specifies whether to include metadata in the response.
410    pub metadata: Option<bool>,
411    #[serde(default)]
412    /// The dimensions of the viewport.
413    pub viewport: Option<Viewport>,
414    #[serde(default)]
415    /// The encoding to be used for the request.
416    pub encoding: Option<String>,
417    #[serde(default)]
418    /// Specifies whether to include subdomains in the crawl.
419    pub subdomains: Option<bool>,
420    #[serde(default)]
421    /// The user agent string to be used for the request.
422    pub user_agent: Option<String>,
423    #[serde(default)]
424    /// Specifies whether to use fingerprinting protection.
425    pub fingerprint: Option<bool>,
426    #[serde(default)]
427    /// Specifies whether to perform the request without using storage.
428    pub storageless: Option<bool>,
429    #[serde(default)]
430    /// Specifies whether readability optimizations should be applied.
431    pub readability: Option<bool>,
432    #[serde(default)]
433    /// Specifies whether to use a proxy for the request. [Deprecated]: use the 'proxy' param instead.
434    pub proxy_enabled: Option<bool>,
435    #[serde(default)]
436    /// Specifies whether to respect the site's robots.txt file.
437    pub respect_robots: Option<bool>,
438    #[serde(default)]
439    /// CSS selector to be used to filter the content.
440    pub root_selector: Option<String>,
441    #[serde(default)]
442    /// Specifies whether to load all resources of the crawl target.
443    pub full_resources: Option<bool>,
444    #[serde(default)]
445    /// The text string to extract data from.
446    pub text: Option<String>,
447    #[serde(default)]
448    /// Specifies whether to use the sitemap links.
449    pub sitemap: Option<bool>,
450    #[serde(default)]
451    /// External domains to include the crawl.
452    pub external_domains: Option<Vec<String>>,
453    #[serde(default)]
454    /// Returns the OpenAI embeddings for the title and description. Other values, such as keywords, may also be included. Requires the `metadata` parameter to be set to `true`.
455    pub return_embeddings: Option<bool>,
456    #[serde(default)]
457    /// Returns the HTTP response headers.
458    pub return_headers: Option<bool>,
459    #[serde(default)]
460    /// Returns the link(s) found on the page that match the crawler query.
461    pub return_page_links: Option<bool>,
462    #[serde(default)]
463    /// Returns the HTTP response cookies.
464    pub return_cookies: Option<bool>,
465    #[serde(default)]
466    /// The timeout for the request, in seconds.
467    pub request_timeout: Option<u8>,
468    #[serde(default)]
469    /// Specifies whether to run the request in the background.
470    pub run_in_background: Option<bool>,
471    #[serde(default)]
472    /// Specifies whether to skip configuration checks.
473    pub skip_config_checks: Option<bool>,
474    #[serde(default)]
475    /// Use CSS query selectors to scrape contents from the web page. Set the paths and the CSS extraction object map to perform extractions per path or page.
476    pub css_extraction_map: Option<CSSExtractionMap>,
477    #[serde(default)]
478    /// The chunking algorithm to use.
479    pub chunking_alg: Option<ChunkingAlgDict>,
480    #[serde(default)]
481    /// Disable request interception when running 'request' as 'chrome' or 'smart'. This can help when the page uses 3rd party or external scripts to load content.
482    pub disable_intercept: Option<bool>,
483    #[serde(default)]
484    /// Disables service-provided hints that add request optimizations to improve crawl outcomes,
485    /// such as network blacklists, request-type selection, geo handling, and more.
486    pub disable_hints: Option<bool>,
487    #[serde(default)]
488    /// The wait for events on the page. You need to make your `request` `chrome` or `smart`.
489    pub wait_for: Option<WaitFor>,
490    #[serde(default)]
491    /// Perform custom Javascript tasks on a url or url path. You need to make your `request` `chrome` or `smart`
492    pub execution_scripts: Option<ExecutionScriptsMap>,
493    #[serde(default)]
494    /// Perform web automated tasks on a url or url path. You need to make your `request` `chrome` or `smart`
495    pub automation_scripts: Option<WebAutomationMap>,
496    #[serde(default)]
497    /// The redirect policy for HTTP request. Set the value to Loose to allow all.
498    pub redirect_policy: Option<RedirectPolicy>,
499    #[serde(default)]
500    /// Track the request sent and responses received for `chrome` or `smart`. The responses will track the bytes used and the requests will have the monotime sent.
501    pub event_tracker: Option<EventTracker>,
502    #[serde(default)]
503    /// The timeout to stop the crawl.
504    pub crawl_timeout: Option<Timeout>,
505    #[serde(default)]
506    /// Evaluates given script in every frame upon creation (before loading frame's scripts).
507    pub evaluate_on_new_document: Option<Box<String>>,
508    #[serde(default)]
509    /// Runs the request using lite_mode:Lite mode reduces data transfer costs by 50%, with trade-offs in speed, accuracy,
510    /// geo-targeting, and reliability. It’s best suited for non-urgent data collection or when
511    /// targeting websites with minimal anti-bot protections.
512    pub lite_mode: Option<bool>,
513    #[serde(default)]
514    /// The proxy to use for request.
515    pub proxy: Option<ProxyType>,
516    #[serde(default)]
517    /// Use a remote proxy at ~50% reduced cost for file downloads.
518    /// This requires a user-supplied static IP proxy endpoint.
519    pub remote_proxy: Option<String>,
520    #[serde(default)]
521    /// Set the maximum number of credits to use per page.
522    /// Credits are measured in decimal units, where 10,000 credits equal one dollar (100 credits per penny).
523    /// Credit limiting only applies to request that are Javascript rendered using smart_mode or chrome for the 'request' type.
524    pub max_credits_per_page: Option<f64>,
525}
526
527
528#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
529#[serde(rename_all = "lowercase")]
530pub enum TBS {
531    #[serde(rename = "qdr:h")]
532    PastHour,
533    #[serde(rename = "qdr:d")]
534    Past24Hours,
535    #[serde(rename = "qdr:w")]
536    PastWeek,
537    #[serde(rename = "qdr:m")]
538    PastMonth,
539    #[serde(rename = "qdr:y")]
540    PastYear,
541}
542
543/// The engine to use.
544#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
545pub enum Engine {
546    /// Google
547    Google,
548    /// Brave
549    Brave,
550    /// All
551    #[default]
552    All
553}
554
555/// The structure representing request parameters for a search request.
556#[derive(Debug, Default, Deserialize, Serialize, Clone)]
557pub struct SearchRequestParams {
558    /// The base request parameters.
559    #[serde(default, flatten)]
560    pub base: RequestParams,
561    // The search request.
562    pub search: String,
563    /// The search limit.
564    pub search_limit: Option<u32>,
565    // Fetch the page content. Defaults to true.
566    pub fetch_page_content: Option<bool>,
567    /// The search location of the request
568    pub location: Option<String>,
569    /// The country code of the request
570    pub country: Option<crate::shapes::country_codes::CountryCode>,
571    /// The language code of the request.
572    pub language: Option<String>,
573    /// The number of search results
574    pub num: Option<u32>,
575    /// The time period range.
576    pub tbs: Option<TBS>,
577    /// The page of the search results.
578    pub page: Option<u32>,
579    /// The websites limit if a list is sent from text or urls comma split. This helps automatic configuration of the system.
580    pub website_limit: Option<u32>,
581    /// Prioritize speed over output quantity.
582    pub quick_search: Option<bool>,
583    /// Auto paginate pages ( up to 100 pages ).
584    pub auto_pagination: Option<bool>,
585    /// The search engine to use.
586    pub engine: Option<Engine>
587}
588
589/// Structure representing request parameters for transforming files.
590#[derive(Debug, Default, Deserialize, Serialize, Clone)]
591pub struct TransformParams {
592    #[serde(default)]
593    /// The format in which the result should be returned.
594    pub return_format: Option<ReturnFormat>,
595    #[serde(default)]
596    /// Specifies whether readability optimizations should be applied.
597    pub readability: Option<bool>,
598    #[serde(default)]
599    /// Clean the markdown or text for AI.
600    pub clean: Option<bool>,
601    #[serde(default)]
602    /// Clean the markdown or text for AI removing footers, navigation, and more.
603    pub clean_full: Option<bool>,
604    /// The data being transformed.
605    pub data: Vec<Resource>,
606}
607
608#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Default)]
609/// Transformation resource to use.
610pub struct Resource {
611    #[serde(default)]
612    /// the html to transform
613    pub html: Option<bytes::Bytes>,
614    #[serde(default)]
615    /// the content to transform
616    pub content: Option<bytes::Bytes>,
617    #[serde(default)]
618    /// the url of the html incase of readability to improve transformations.
619    pub url: Option<String>,
620    #[serde(default)]
621    /// the language of the resource.
622    pub lang: Option<String>,
623}
624
625/// the request type to perform
626#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)]
627#[serde(rename_all = "lowercase")]
628pub enum RequestType {
629    /// Default HTTP request
630    Http,
631    /// Chrome browser rendering
632    Chrome,
633    #[default]
634    /// Smart mode defaulting to HTTP and using Chrome when needed.
635    SmartMode,
636}
637
638/// Enum representing different return formats.
639#[derive(Default, Debug, Deserialize, Serialize, Clone, PartialEq, Eq, Hash)]
640#[serde(rename_all = "lowercase")]
641pub enum ReturnFormat {
642    #[default]
643    /// The default return format of the resource.
644    Raw,
645    /// Return the response as Markdown.
646    Markdown,
647    /// Return the response as Commonmark.
648    Commonmark,
649    /// Return the response as Html2text.
650    Html2text,
651    /// Return the response as Text.
652    Text,
653    /// Returns a screenshot as Base64Url
654    Screenshot,
655    /// Return the response as XML.
656    Xml,
657    /// Return the response as Bytes.
658    Bytes,
659}
spider_client/shapes/request.rs

spider_client/shapes/
request.rs