use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
use crate::serde_helpers::deserialize_string_or_array;
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub enum Format {
Markdown,
Html,
RawHtml,
Links,
Images,
Screenshot,
Summary,
ChangeTracking,
Json,
Attributes,
Branding,
Audio,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
pub struct Viewport {
pub width: u32,
pub height: u32,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ScreenshotOptions {
pub full_page: Option<bool>,
pub quality: Option<u8>,
pub viewport: Option<Viewport>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ChangeTrackingOptions {
pub modes: Option<Vec<ChangeTrackingMode>>,
pub schema: Option<Value>,
pub prompt: Option<String>,
pub tag: Option<String>,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum ChangeTrackingMode {
GitDiff,
Json,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
pub struct AttributeSelector {
pub selector: String,
pub attribute: String,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct JsonOptions {
pub schema: Option<Value>,
pub system_prompt: Option<String>,
pub prompt: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct LocationConfig {
pub country: Option<String>,
pub languages: Option<Vec<String>>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ProfileConfig {
pub name: String,
pub save_changes: Option<bool>,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum ProxyType {
Basic,
Stealth,
Enhanced,
Auto,
}
#[derive(Deserialize, Serialize, Debug, Clone)]
#[serde(tag = "type", rename_all = "camelCase")]
pub enum Action {
Wait {
#[serde(skip_serializing_if = "Option::is_none")]
milliseconds: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
selector: Option<String>,
},
Screenshot {
#[serde(skip_serializing_if = "Option::is_none")]
full_page: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
quality: Option<u8>,
#[serde(skip_serializing_if = "Option::is_none")]
viewport: Option<Viewport>,
},
Click {
selector: String,
},
Write {
text: String,
},
Press {
key: String,
},
Scroll {
direction: ScrollDirection,
#[serde(skip_serializing_if = "Option::is_none")]
selector: Option<String>,
},
Scrape,
#[serde(rename = "executeJavascript")]
ExecuteJavascript {
script: String,
},
Pdf {
#[serde(skip_serializing_if = "Option::is_none")]
format: Option<PdfFormat>,
#[serde(skip_serializing_if = "Option::is_none")]
landscape: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
scale: Option<f32>,
},
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum ScrollDirection {
Up,
Down,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
pub enum PdfFormat {
A0,
A1,
A2,
A3,
A4,
A5,
A6,
Letter,
Legal,
Tabloid,
Ledger,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct WebhookConfig {
pub url: String,
pub headers: Option<HashMap<String, String>>,
pub metadata: Option<HashMap<String, String>>,
pub events: Option<Vec<WebhookEvent>>,
}
impl From<String> for WebhookConfig {
fn from(url: String) -> Self {
Self {
url,
..Default::default()
}
}
}
impl From<&str> for WebhookConfig {
fn from(url: &str) -> Self {
Self {
url: url.to_string(),
..Default::default()
}
}
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub enum WebhookEvent {
Completed,
Failed,
Page,
Started,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub enum AgentWebhookEvent {
Started,
Action,
Completed,
Failed,
Cancelled,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct AgentWebhookConfig {
pub url: String,
pub headers: Option<HashMap<String, String>>,
pub metadata: Option<HashMap<String, String>>,
pub events: Option<Vec<AgentWebhookEvent>>,
}
impl From<String> for AgentWebhookConfig {
fn from(url: String) -> Self {
Self {
url,
..Default::default()
}
}
}
impl From<&str> for AgentWebhookConfig {
fn from(url: &str) -> Self {
Self {
url: url.to_string(),
..Default::default()
}
}
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct DocumentMetadata {
#[serde(rename = "sourceURL")]
pub source_url: Option<String>,
pub status_code: Option<u16>,
pub error: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub title: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub description: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub language: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub keywords: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub robots: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_title: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_description: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_url: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_image: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_audio: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_determiner: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_locale: Option<String>,
pub og_locale_alternate: Option<Vec<String>>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_site_name: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_video: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub article_section: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub article_tag: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub published_time: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub modified_time: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_keywords: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_description: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_subject: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_subject: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_audience: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_type: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_type: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_date: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_date_created: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_created: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub scrape_id: Option<String>,
pub num_pages: Option<u32>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub content_type: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub timezone: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub proxy_used: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub cache_state: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub cached_at: Option<String>,
pub credits_used: Option<u32>,
pub concurrency_limited: Option<bool>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
pub struct AttributeResult {
pub selector: String,
pub attribute: String,
pub values: Vec<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Document {
pub markdown: Option<String>,
pub html: Option<String>,
pub raw_html: Option<String>,
pub json: Option<Value>,
pub summary: Option<String>,
pub metadata: Option<DocumentMetadata>,
pub links: Option<Vec<String>>,
pub images: Option<Vec<String>>,
pub screenshot: Option<String>,
pub audio: Option<String>,
pub attributes: Option<Vec<AttributeResult>>,
pub actions: Option<HashMap<String, Value>>,
pub warning: Option<String>,
pub change_tracking: Option<Value>,
pub branding: Option<Value>,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub enum JobStatus {
Scraping,
Completed,
Failed,
Cancelled,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum SitemapMode {
Skip,
Include,
Only,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum AgentModel {
#[serde(rename = "spark-1-pro")]
Spark1Pro,
#[serde(rename = "spark-1-mini")]
Spark1Mini,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum SearchSource {
Web,
News,
Images,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum SearchCategory {
Github,
Research,
Pdf,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultWeb {
pub url: String,
pub title: Option<String>,
pub description: Option<String>,
pub category: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultNews {
pub title: Option<String>,
pub url: Option<String>,
pub snippet: Option<String>,
pub date: Option<String>,
pub image_url: Option<String>,
pub position: Option<u32>,
pub category: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultImage {
pub title: Option<String>,
pub image_url: Option<String>,
pub image_width: Option<u32>,
pub image_height: Option<u32>,
pub url: Option<String>,
pub position: Option<u32>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CrawlError {
pub id: String,
pub timestamp: Option<String>,
pub url: String,
pub code: Option<String>,
pub error: String,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CrawlErrorsResponse {
pub errors: Vec<CrawlError>,
#[serde(rename = "robotsBlocked")]
pub robots_blocked: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_full_document_with_array_metadata() {
let json = json!({
"markdown": "# Hello",
"metadata": {
"sourceURL": "https://example.com",
"statusCode": 200,
"title": "Example Page",
"description": ["A great page", "with multiple descriptions"],
"robots": ["index", "follow"],
"ogImage": ["https://img.jpg"],
"language": "en",
"keywords": ["rust", "sdk", "firecrawl"]
}
});
let doc: Document = serde_json::from_value(json).unwrap();
assert_eq!(doc.markdown, Some("# Hello".to_string()));
let meta = doc.metadata.unwrap();
assert_eq!(meta.title, Some("Example Page".to_string()));
assert_eq!(
meta.description,
Some("A great page, with multiple descriptions".to_string())
);
assert_eq!(meta.robots, Some("index, follow".to_string()));
assert_eq!(meta.og_image, Some("https://img.jpg".to_string()));
assert_eq!(meta.language, Some("en".to_string()));
assert_eq!(meta.keywords, Some("rust, sdk, firecrawl".to_string()));
}
}