use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct ChunkingAlgDict {
pub r#type: ChunkingType,
pub value: i32,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct Timeout {
pub secs: u64,
pub nanos: u32,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct IdleNetwork {
pub timeout: Timeout,
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum WebAutomation {
Evaluate(String),
Click(String),
ClickAll(String),
ClickAllClickable(),
ClickPoint {
x: f64,
y: f64,
},
ClickHold {
selector: String,
hold_for_ms: u64,
},
ClickHoldPoint {
x: f64,
y: f64,
hold_for_ms: u64,
},
ClickDrag {
from: String,
to: String,
modifier: Option<i64>,
},
ClickDragPoint {
from_x: f64,
from_y: f64,
to_x: f64,
to_y: f64,
modifier: Option<i64>,
},
Type {
value: String,
modifier: Option<i64>,
},
Wait(u64),
WaitForNavigation,
WaitForDom {
selector: Option<String>,
timeout: u32,
},
WaitFor(String),
WaitForWithTimeout {
selector: String,
timeout: u64,
},
WaitForAndClick(String),
ScrollX(i32),
ScrollY(i32),
Fill {
selector: String,
value: String,
},
InfiniteScroll(u32),
Screenshot {
full_page: bool,
omit_background: bool,
output: String,
},
ValidateChain,
}
#[derive(Default, Serialize, Deserialize, Debug, Clone)]
#[serde(tag = "type", rename_all = "PascalCase")]
pub enum RedirectPolicy {
Loose,
#[default]
Strict,
}
pub type WebAutomationMap = std::collections::HashMap<String, Vec<WebAutomation>>;
pub type ExecutionScriptsMap = std::collections::HashMap<String, String>;
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Selector {
pub timeout: Timeout,
pub selector: String,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct Delay {
pub timeout: Timeout,
}
fn default_some_true() -> Option<bool> {
Some(true)
}
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct WaitFor {
pub idle_network: Option<IdleNetwork>,
pub idle_network0: Option<IdleNetwork>,
pub almost_idle_network0: Option<IdleNetwork>,
pub selector: Option<Selector>,
pub dom: Option<Selector>,
pub delay: Option<Delay>,
#[serde(default = "default_some_true")]
pub page_navigations: Option<bool>,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct QueryRequest {
pub url: Option<String>,
pub domain: Option<String>,
pub pathname: Option<String>,
}
#[derive(Default, Debug, Deserialize, Serialize, Clone)]
#[serde(rename_all = "lowercase")]
pub enum ChunkingType {
#[default]
ByWords,
ByLines,
ByCharacterLength,
BySentence,
}
#[derive(Default, Debug, Deserialize, Serialize, Clone)]
pub struct Viewport {
pub width: u32,
pub height: u32,
pub device_scale_factor: Option<f64>,
pub emulating_mobile: bool,
pub is_landscape: bool,
pub has_touch: bool,
}
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct CSSSelector {
pub name: String,
pub selectors: Vec<String>,
}
pub type CSSExtractionMap = HashMap<String, Vec<CSSSelector>>;
#[derive(Debug, Default, Deserialize, Serialize, Clone)]
pub struct WebhookSettings {
destination: String,
on_credits_depleted: bool,
on_credits_half_depleted: bool,
on_website_status: bool,
on_find: bool,
on_find_metadata: bool,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, Hash)]
pub enum ProxyType {
#[serde(rename = "residential")]
Residential,
#[serde(rename = "mobile")]
Mobile,
#[serde(rename = "isp", alias = "datacenter")]
#[default]
Isp,
}
pub const PROXY_TYPE_LIST: [ProxyType; 3] =
[ProxyType::Residential, ProxyType::Isp, ProxyType::Mobile];
impl ProxyType {
pub fn as_str(&self) -> &'static str {
match self {
ProxyType::Residential => "residential",
ProxyType::Mobile => "mobile",
ProxyType::Isp => "isp",
}
}
}
#[derive(Debug, Deserialize, Serialize, Clone)]
#[serde(untagged)]
pub enum ReturnFormatHandling {
Single(ReturnFormat),
Multi(std::collections::HashSet<ReturnFormat>),
}
impl Default for ReturnFormatHandling {
fn default() -> ReturnFormatHandling {
ReturnFormatHandling::Single(ReturnFormat::Raw)
}
}
#[derive(Debug, Default, Deserialize, Serialize, Clone)]
pub struct EventTracker {
pub responses: Option<bool>,
pub requests: Option<bool>,
pub automation: Option<bool>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum LinkRewriteRule {
#[serde(rename = "replace")]
Replace {
#[serde(default)]
host: Option<String>,
find: String,
replace_with: String,
},
#[serde(rename = "regex")]
Regex {
#[serde(default)]
host: Option<String>,
pattern: String,
replace_with: String,
},
}
#[derive(Debug, Default, Deserialize, Serialize, Clone)]
pub struct RequestParams {
#[serde(default)]
pub url: Option<String>,
#[serde(default)]
pub request: Option<RequestType>,
#[serde(default)]
pub limit: Option<u32>,
#[serde(default)]
pub return_format: Option<ReturnFormatHandling>,
pub country_code: Option<String>,
#[serde(default)]
pub tld: Option<bool>,
#[serde(default)]
pub depth: Option<u32>,
#[serde(default)]
pub cache: Option<bool>,
#[serde(default)]
pub scroll: Option<u32>,
#[serde(default)]
pub budget: Option<HashMap<String, u32>>,
#[serde(default)]
pub blacklist: Option<Vec<String>>,
#[serde(default)]
pub link_rewrite: Option<LinkRewriteRule>,
#[serde(default)]
pub whitelist: Option<Vec<String>>,
#[serde(default)]
pub locale: Option<String>,
#[serde(default)]
pub cookies: Option<String>,
#[serde(default)]
pub stealth: Option<bool>,
#[serde(default)]
pub headers: Option<HashMap<String, String>>,
#[serde(default)]
pub webhooks: Option<WebhookSettings>,
#[serde(default)]
pub metadata: Option<bool>,
#[serde(default)]
pub viewport: Option<Viewport>,
#[serde(default)]
pub encoding: Option<String>,
#[serde(default)]
pub subdomains: Option<bool>,
#[serde(default)]
pub user_agent: Option<String>,
#[serde(default)]
pub fingerprint: Option<bool>,
#[serde(default)]
pub storageless: Option<bool>,
#[serde(default)]
pub readability: Option<bool>,
#[serde(default)]
pub proxy_enabled: Option<bool>,
#[serde(default)]
pub respect_robots: Option<bool>,
#[serde(default)]
pub root_selector: Option<String>,
#[serde(default)]
pub full_resources: Option<bool>,
#[serde(default)]
pub text: Option<String>,
#[serde(default)]
pub sitemap: Option<bool>,
#[serde(default)]
pub external_domains: Option<Vec<String>>,
#[serde(default)]
pub return_embeddings: Option<bool>,
#[serde(default)]
pub return_headers: Option<bool>,
#[serde(default)]
pub return_page_links: Option<bool>,
#[serde(default)]
pub return_cookies: Option<bool>,
#[serde(default)]
pub request_timeout: Option<u8>,
#[serde(default)]
pub run_in_background: Option<bool>,
#[serde(default)]
pub skip_config_checks: Option<bool>,
#[serde(default)]
pub css_extraction_map: Option<CSSExtractionMap>,
#[serde(default)]
pub chunking_alg: Option<ChunkingAlgDict>,
#[serde(default)]
pub disable_intercept: Option<bool>,
#[serde(default)]
pub disable_hints: Option<bool>,
#[serde(default)]
pub wait_for: Option<WaitFor>,
#[serde(default)]
pub execution_scripts: Option<ExecutionScriptsMap>,
#[serde(default)]
pub automation_scripts: Option<WebAutomationMap>,
#[serde(default)]
pub redirect_policy: Option<RedirectPolicy>,
#[serde(default)]
pub event_tracker: Option<EventTracker>,
#[serde(default)]
pub crawl_timeout: Option<Timeout>,
#[serde(default)]
pub evaluate_on_new_document: Option<Box<String>>,
#[serde(default)]
pub lite_mode: Option<bool>,
#[serde(default)]
pub proxy: Option<ProxyType>,
#[serde(default)]
pub remote_proxy: Option<String>,
#[serde(default)]
pub max_credits_per_page: Option<f64>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum TBS {
#[serde(rename = "qdr:h")]
PastHour,
#[serde(rename = "qdr:d")]
Past24Hours,
#[serde(rename = "qdr:w")]
PastWeek,
#[serde(rename = "qdr:m")]
PastMonth,
#[serde(rename = "qdr:y")]
PastYear,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
pub enum Engine {
Google,
Brave,
#[default]
All,
}
#[derive(Debug, Default, Deserialize, Serialize, Clone)]
pub struct SearchRequestParams {
#[serde(default, flatten)]
pub base: RequestParams,
pub search: String,
pub search_limit: Option<u32>,
pub fetch_page_content: Option<bool>,
pub location: Option<String>,
pub country: Option<crate::shapes::country_codes::CountryCode>,
pub language: Option<String>,
pub num: Option<u32>,
pub tbs: Option<TBS>,
pub page: Option<u32>,
pub website_limit: Option<u32>,
pub quick_search: Option<bool>,
pub auto_pagination: Option<bool>,
pub engine: Option<Engine>,
}
#[derive(Debug, Default, Deserialize, Serialize, Clone)]
pub struct TransformParams {
#[serde(default)]
pub return_format: Option<ReturnFormat>,
#[serde(default)]
pub readability: Option<bool>,
#[serde(default)]
pub clean: Option<bool>,
#[serde(default)]
pub clean_full: Option<bool>,
pub data: Vec<Resource>,
}
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Default)]
pub struct Resource {
#[serde(default)]
pub html: Option<bytes::Bytes>,
#[serde(default)]
pub content: Option<bytes::Bytes>,
#[serde(default)]
pub url: Option<String>,
#[serde(default)]
pub lang: Option<String>,
}
#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum RequestType {
Http,
Chrome,
#[default]
SmartMode,
}
#[derive(Default, Debug, Deserialize, Serialize, Clone, PartialEq, Eq, Hash)]
#[serde(rename_all = "lowercase")]
pub enum ReturnFormat {
#[default]
Raw,
Markdown,
Commonmark,
Html2text,
Text,
Screenshot,
Xml,
Bytes,
}