use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
use serde_json::Value;
use std::collections::HashMap;
use crate::serde_helpers::deserialize_string_or_array;
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Format {
Markdown,
Html,
RawHtml,
Links,
Images,
Screenshot,
Summary,
ChangeTracking,
Json,
Attributes,
Branding,
Audio,
Video,
Question(QuestionFormat),
Highlights(HighlightsFormat),
Query(QueryFormat),
}
impl Serialize for Format {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
Format::Markdown => serializer.serialize_str("markdown"),
Format::Html => serializer.serialize_str("html"),
Format::RawHtml => serializer.serialize_str("rawHtml"),
Format::Links => serializer.serialize_str("links"),
Format::Images => serializer.serialize_str("images"),
Format::Screenshot => serializer.serialize_str("screenshot"),
Format::Summary => serializer.serialize_str("summary"),
Format::ChangeTracking => serializer.serialize_str("changeTracking"),
Format::Json => serializer.serialize_str("json"),
Format::Attributes => serializer.serialize_str("attributes"),
Format::Branding => serializer.serialize_str("branding"),
Format::Audio => serializer.serialize_str("audio"),
Format::Video => serializer.serialize_str("video"),
Format::Question(question) => question.serialize(serializer),
Format::Highlights(highlights) => highlights.serialize(serializer),
Format::Query(query) => query.serialize(serializer),
}
}
}
impl<'de> Deserialize<'de> for Format {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let value = Value::deserialize(deserializer)?;
match value {
Value::String(format) => match format.as_str() {
"markdown" => Ok(Format::Markdown),
"html" => Ok(Format::Html),
"rawHtml" => Ok(Format::RawHtml),
"links" => Ok(Format::Links),
"images" => Ok(Format::Images),
"screenshot" => Ok(Format::Screenshot),
"summary" => Ok(Format::Summary),
"changeTracking" => Ok(Format::ChangeTracking),
"json" => Ok(Format::Json),
"attributes" => Ok(Format::Attributes),
"branding" => Ok(Format::Branding),
"audio" => Ok(Format::Audio),
"video" => Ok(Format::Video),
_ => Err(de::Error::custom(format!("unknown format: {}", format))),
},
Value::Object(_) => match value.get("type").and_then(Value::as_str) {
Some("question") => QuestionFormat::deserialize(value)
.map(Format::Question)
.map_err(de::Error::custom),
Some("highlights") => HighlightsFormat::deserialize(value)
.map(Format::Highlights)
.map_err(de::Error::custom),
Some("query") => QueryFormat::deserialize(value)
.map(Format::Query)
.map_err(de::Error::custom),
Some(format_type) => Err(de::Error::custom(format!(
"unknown object format: {}",
format_type
))),
None => Err(de::Error::custom("object format must have a type")),
},
_ => Err(de::Error::custom("format must be a string or object")),
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct QuestionFormat {
pub question: String,
}
#[derive(Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
struct QuestionFormatWire {
#[serde(rename = "type")]
format_type: String,
question: String,
}
impl Serialize for QuestionFormat {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
QuestionFormatWire {
format_type: "question".to_string(),
question: self.question.clone(),
}
.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for QuestionFormat {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let wire = QuestionFormatWire::deserialize(deserializer)?;
if wire.format_type != "question" {
return Err(de::Error::custom(
"question format object must have type question",
));
}
Ok(Self {
question: wire.question,
})
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct HighlightsFormat {
pub query: String,
}
#[derive(Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
struct HighlightsFormatWire {
#[serde(rename = "type")]
format_type: String,
query: String,
}
impl Serialize for HighlightsFormat {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
HighlightsFormatWire {
format_type: "highlights".to_string(),
query: self.query.clone(),
}
.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for HighlightsFormat {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let wire = HighlightsFormatWire::deserialize(deserializer)?;
if wire.format_type != "highlights" {
return Err(de::Error::custom(
"highlights format object must have type highlights",
));
}
Ok(Self { query: wire.query })
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct QueryFormat {
pub prompt: String,
pub mode: Option<QueryFormatMode>,
}
#[derive(Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
struct QueryFormatWire {
#[serde(rename = "type")]
format_type: String,
prompt: String,
#[serde(skip_serializing_if = "Option::is_none")]
mode: Option<QueryFormatMode>,
}
impl Serialize for QueryFormat {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
QueryFormatWire {
format_type: "query".to_string(),
prompt: self.prompt.clone(),
mode: self.mode,
}
.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for QueryFormat {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let wire = QueryFormatWire::deserialize(deserializer)?;
if wire.format_type != "query" {
return Err(de::Error::custom(
"query format object must have type query",
));
}
Ok(Self {
prompt: wire.prompt,
mode: wire.mode,
})
}
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
pub enum QueryFormatMode {
#[serde(rename = "freeform")]
Freeform,
#[serde(rename = "directQuote")]
DirectQuote,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
pub struct Viewport {
pub width: u32,
pub height: u32,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ScreenshotOptions {
pub full_page: Option<bool>,
pub quality: Option<u8>,
pub viewport: Option<Viewport>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ChangeTrackingOptions {
pub modes: Option<Vec<ChangeTrackingMode>>,
pub schema: Option<Value>,
pub prompt: Option<String>,
pub tag: Option<String>,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum ChangeTrackingMode {
GitDiff,
Json,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
pub struct AttributeSelector {
pub selector: String,
pub attribute: String,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct JsonOptions {
pub schema: Option<Value>,
pub system_prompt: Option<String>,
pub prompt: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct LocationConfig {
pub country: Option<String>,
pub languages: Option<Vec<String>>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ProfileConfig {
pub name: String,
pub save_changes: Option<bool>,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum ProxyType {
Basic,
Stealth,
Enhanced,
Auto,
}
#[derive(Deserialize, Serialize, Debug, Clone)]
#[serde(tag = "type", rename_all = "camelCase")]
pub enum Action {
Wait {
#[serde(skip_serializing_if = "Option::is_none")]
milliseconds: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
selector: Option<String>,
},
Screenshot {
#[serde(skip_serializing_if = "Option::is_none")]
full_page: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
quality: Option<u8>,
#[serde(skip_serializing_if = "Option::is_none")]
viewport: Option<Viewport>,
},
Click {
selector: String,
},
Write {
text: String,
},
Press {
key: String,
},
Scroll {
direction: ScrollDirection,
#[serde(skip_serializing_if = "Option::is_none")]
selector: Option<String>,
},
Scrape,
#[serde(rename = "executeJavascript")]
ExecuteJavascript {
script: String,
},
Pdf {
#[serde(skip_serializing_if = "Option::is_none")]
format: Option<PdfFormat>,
#[serde(skip_serializing_if = "Option::is_none")]
landscape: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
scale: Option<f32>,
},
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum ScrollDirection {
Up,
Down,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
pub enum PdfFormat {
A0,
A1,
A2,
A3,
A4,
A5,
A6,
Letter,
Legal,
Tabloid,
Ledger,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct WebhookConfig {
pub url: String,
pub headers: Option<HashMap<String, String>>,
pub metadata: Option<HashMap<String, String>>,
pub events: Option<Vec<WebhookEvent>>,
}
impl From<String> for WebhookConfig {
fn from(url: String) -> Self {
Self {
url,
..Default::default()
}
}
}
impl From<&str> for WebhookConfig {
fn from(url: &str) -> Self {
Self {
url: url.to_string(),
..Default::default()
}
}
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub enum WebhookEvent {
Completed,
Failed,
Page,
Started,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub enum AgentWebhookEvent {
Started,
Action,
Completed,
Failed,
Cancelled,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct AgentWebhookConfig {
pub url: String,
pub headers: Option<HashMap<String, String>>,
pub metadata: Option<HashMap<String, String>>,
pub events: Option<Vec<AgentWebhookEvent>>,
}
impl From<String> for AgentWebhookConfig {
fn from(url: String) -> Self {
Self {
url,
..Default::default()
}
}
}
impl From<&str> for AgentWebhookConfig {
fn from(url: &str) -> Self {
Self {
url: url.to_string(),
..Default::default()
}
}
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct DocumentMetadata {
#[serde(rename = "sourceURL")]
pub source_url: Option<String>,
pub status_code: Option<u16>,
pub error: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub title: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub description: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub language: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub keywords: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub robots: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_title: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_description: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_url: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_image: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_audio: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_determiner: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_locale: Option<String>,
pub og_locale_alternate: Option<Vec<String>>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_site_name: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_video: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub article_section: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub article_tag: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub published_time: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub modified_time: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_keywords: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_description: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_subject: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_subject: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_audience: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_type: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_type: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_date: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_date_created: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_created: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub scrape_id: Option<String>,
pub num_pages: Option<u32>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub content_type: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub timezone: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub proxy_used: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub cache_state: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub cached_at: Option<String>,
pub credits_used: Option<u32>,
pub concurrency_limited: Option<bool>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
pub struct AttributeResult {
pub selector: String,
pub attribute: String,
pub values: Vec<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Document {
pub markdown: Option<String>,
pub html: Option<String>,
pub raw_html: Option<String>,
pub json: Option<Value>,
pub summary: Option<String>,
pub metadata: Option<DocumentMetadata>,
pub links: Option<Vec<String>>,
pub images: Option<Vec<String>>,
pub screenshot: Option<String>,
pub audio: Option<String>,
pub video: Option<String>,
pub attributes: Option<Vec<AttributeResult>>,
pub actions: Option<HashMap<String, Value>>,
pub answer: Option<String>,
pub highlights: Option<String>,
pub warning: Option<String>,
pub change_tracking: Option<Value>,
pub branding: Option<Value>,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub enum JobStatus {
Scraping,
Completed,
Failed,
Cancelled,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum SitemapMode {
Skip,
Include,
Only,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum AgentModel {
#[serde(rename = "spark-1-pro")]
Spark1Pro,
#[serde(rename = "spark-1-mini")]
Spark1Mini,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum SearchSource {
Web,
News,
Images,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum SearchCategory {
Github,
Research,
Pdf,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultWeb {
pub url: String,
pub title: Option<String>,
pub description: Option<String>,
pub category: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultNews {
pub title: Option<String>,
pub url: Option<String>,
pub snippet: Option<String>,
pub date: Option<String>,
pub image_url: Option<String>,
pub position: Option<u32>,
pub category: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultImage {
pub title: Option<String>,
pub image_url: Option<String>,
pub image_width: Option<u32>,
pub image_height: Option<u32>,
pub url: Option<String>,
pub position: Option<u32>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CrawlError {
pub id: String,
pub timestamp: Option<String>,
pub url: String,
pub code: Option<String>,
pub error: String,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CrawlErrorsResponse {
pub errors: Vec<CrawlError>,
#[serde(rename = "robotsBlocked")]
pub robots_blocked: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_full_document_with_array_metadata() {
let json = json!({
"markdown": "# Hello",
"video": "https://storage.googleapis.com/firecrawl/video.mp4",
"metadata": {
"sourceURL": "https://example.com",
"statusCode": 200,
"title": "Example Page",
"description": ["A great page", "with multiple descriptions"],
"robots": ["index", "follow"],
"ogImage": ["https://img.jpg"],
"language": "en",
"keywords": ["rust", "sdk", "firecrawl"]
}
});
let doc: Document = serde_json::from_value(json).unwrap();
assert_eq!(doc.markdown, Some("# Hello".to_string()));
assert_eq!(
doc.video,
Some("https://storage.googleapis.com/firecrawl/video.mp4".to_string())
);
let meta = doc.metadata.unwrap();
assert_eq!(meta.title, Some("Example Page".to_string()));
assert_eq!(
meta.description,
Some("A great page, with multiple descriptions".to_string())
);
assert_eq!(meta.robots, Some("index, follow".to_string()));
assert_eq!(meta.og_image, Some("https://img.jpg".to_string()));
assert_eq!(meta.language, Some("en".to_string()));
assert_eq!(meta.keywords, Some("rust, sdk, firecrawl".to_string()));
}
}