use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
use serde_json::Value;
use std::collections::HashMap;
use crate::serde_helpers::deserialize_string_or_array;
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Format {
Markdown,
Html,
RawHtml,
Links,
Images,
Screenshot,
Summary,
ChangeTracking,
Json,
Attributes,
Branding,
Product,
Menu,
Audio,
Video,
Question(QuestionFormat),
Highlights(HighlightsFormat),
Query(QueryFormat),
}
impl Serialize for Format {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
Format::Markdown => serializer.serialize_str("markdown"),
Format::Html => serializer.serialize_str("html"),
Format::RawHtml => serializer.serialize_str("rawHtml"),
Format::Links => serializer.serialize_str("links"),
Format::Images => serializer.serialize_str("images"),
Format::Screenshot => serializer.serialize_str("screenshot"),
Format::Summary => serializer.serialize_str("summary"),
Format::ChangeTracking => serializer.serialize_str("changeTracking"),
Format::Json => serializer.serialize_str("json"),
Format::Attributes => serializer.serialize_str("attributes"),
Format::Branding => serializer.serialize_str("branding"),
Format::Product => serializer.serialize_str("product"),
Format::Menu => serializer.serialize_str("menu"),
Format::Audio => serializer.serialize_str("audio"),
Format::Video => serializer.serialize_str("video"),
Format::Question(question) => question.serialize(serializer),
Format::Highlights(highlights) => highlights.serialize(serializer),
Format::Query(query) => query.serialize(serializer),
}
}
}
impl<'de> Deserialize<'de> for Format {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let value = Value::deserialize(deserializer)?;
match value {
Value::String(format) => match format.as_str() {
"markdown" => Ok(Format::Markdown),
"html" => Ok(Format::Html),
"rawHtml" => Ok(Format::RawHtml),
"links" => Ok(Format::Links),
"images" => Ok(Format::Images),
"screenshot" => Ok(Format::Screenshot),
"summary" => Ok(Format::Summary),
"changeTracking" => Ok(Format::ChangeTracking),
"json" => Ok(Format::Json),
"attributes" => Ok(Format::Attributes),
"branding" => Ok(Format::Branding),
"product" => Ok(Format::Product),
"menu" => Ok(Format::Menu),
"audio" => Ok(Format::Audio),
"video" => Ok(Format::Video),
_ => Err(de::Error::custom(format!("unknown format: {}", format))),
},
Value::Object(_) => match value.get("type").and_then(Value::as_str) {
Some("question") => QuestionFormat::deserialize(value)
.map(Format::Question)
.map_err(de::Error::custom),
Some("highlights") => HighlightsFormat::deserialize(value)
.map(Format::Highlights)
.map_err(de::Error::custom),
Some("query") => QueryFormat::deserialize(value)
.map(Format::Query)
.map_err(de::Error::custom),
Some(format_type) => Err(de::Error::custom(format!(
"unknown object format: {}",
format_type
))),
None => Err(de::Error::custom("object format must have a type")),
},
_ => Err(de::Error::custom("format must be a string or object")),
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct QuestionFormat {
pub question: String,
}
#[derive(Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
struct QuestionFormatWire {
#[serde(rename = "type")]
format_type: String,
question: String,
}
impl Serialize for QuestionFormat {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
QuestionFormatWire {
format_type: "question".to_string(),
question: self.question.clone(),
}
.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for QuestionFormat {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let wire = QuestionFormatWire::deserialize(deserializer)?;
if wire.format_type != "question" {
return Err(de::Error::custom(
"question format object must have type question",
));
}
Ok(Self {
question: wire.question,
})
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct HighlightsFormat {
pub query: String,
}
#[derive(Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
struct HighlightsFormatWire {
#[serde(rename = "type")]
format_type: String,
query: String,
}
impl Serialize for HighlightsFormat {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
HighlightsFormatWire {
format_type: "highlights".to_string(),
query: self.query.clone(),
}
.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for HighlightsFormat {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let wire = HighlightsFormatWire::deserialize(deserializer)?;
if wire.format_type != "highlights" {
return Err(de::Error::custom(
"highlights format object must have type highlights",
));
}
Ok(Self { query: wire.query })
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct QueryFormat {
pub prompt: String,
pub mode: Option<QueryFormatMode>,
}
#[derive(Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
struct QueryFormatWire {
#[serde(rename = "type")]
format_type: String,
prompt: String,
#[serde(skip_serializing_if = "Option::is_none")]
mode: Option<QueryFormatMode>,
}
impl Serialize for QueryFormat {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
QueryFormatWire {
format_type: "query".to_string(),
prompt: self.prompt.clone(),
mode: self.mode,
}
.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for QueryFormat {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let wire = QueryFormatWire::deserialize(deserializer)?;
if wire.format_type != "query" {
return Err(de::Error::custom(
"query format object must have type query",
));
}
Ok(Self {
prompt: wire.prompt,
mode: wire.mode,
})
}
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
pub enum QueryFormatMode {
#[serde(rename = "freeform")]
Freeform,
#[serde(rename = "directQuote")]
DirectQuote,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
pub struct Viewport {
pub width: u32,
pub height: u32,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ScreenshotOptions {
pub full_page: Option<bool>,
pub quality: Option<u8>,
pub viewport: Option<Viewport>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ChangeTrackingOptions {
pub modes: Option<Vec<ChangeTrackingMode>>,
pub schema: Option<Value>,
pub prompt: Option<String>,
pub tag: Option<String>,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum ChangeTrackingMode {
GitDiff,
Json,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
pub struct AttributeSelector {
pub selector: String,
pub attribute: String,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct JsonOptions {
pub schema: Option<Value>,
pub system_prompt: Option<String>,
pub prompt: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct LocationConfig {
pub country: Option<String>,
pub languages: Option<Vec<String>>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ProfileConfig {
pub name: String,
pub save_changes: Option<bool>,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum ProxyType {
Basic,
Stealth,
Enhanced,
Auto,
}
#[derive(Deserialize, Serialize, Debug, Clone)]
#[serde(tag = "type", rename_all = "camelCase")]
pub enum Action {
Wait {
#[serde(skip_serializing_if = "Option::is_none")]
milliseconds: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
selector: Option<String>,
},
Screenshot {
#[serde(skip_serializing_if = "Option::is_none")]
full_page: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
quality: Option<u8>,
#[serde(skip_serializing_if = "Option::is_none")]
viewport: Option<Viewport>,
},
Click {
selector: String,
},
Write {
text: String,
},
Press {
key: String,
},
Scroll {
direction: ScrollDirection,
#[serde(skip_serializing_if = "Option::is_none")]
selector: Option<String>,
},
Scrape,
#[serde(rename = "executeJavascript")]
ExecuteJavascript {
script: String,
},
Pdf {
#[serde(skip_serializing_if = "Option::is_none")]
format: Option<PdfFormat>,
#[serde(skip_serializing_if = "Option::is_none")]
landscape: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
scale: Option<f32>,
},
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum ScrollDirection {
Up,
Down,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
pub enum PdfFormat {
A0,
A1,
A2,
A3,
A4,
A5,
A6,
Letter,
Legal,
Tabloid,
Ledger,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct WebhookConfig {
pub url: String,
pub headers: Option<HashMap<String, String>>,
pub metadata: Option<HashMap<String, String>>,
pub events: Option<Vec<WebhookEvent>>,
}
impl From<String> for WebhookConfig {
fn from(url: String) -> Self {
Self {
url,
..Default::default()
}
}
}
impl From<&str> for WebhookConfig {
fn from(url: &str) -> Self {
Self {
url: url.to_string(),
..Default::default()
}
}
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub enum WebhookEvent {
Completed,
Failed,
Page,
Started,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub enum AgentWebhookEvent {
Started,
Action,
Completed,
Failed,
Cancelled,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct AgentWebhookConfig {
pub url: String,
pub headers: Option<HashMap<String, String>>,
pub metadata: Option<HashMap<String, String>>,
pub events: Option<Vec<AgentWebhookEvent>>,
}
impl From<String> for AgentWebhookConfig {
fn from(url: String) -> Self {
Self {
url,
..Default::default()
}
}
}
impl From<&str> for AgentWebhookConfig {
fn from(url: &str) -> Self {
Self {
url: url.to_string(),
..Default::default()
}
}
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct DocumentMetadata {
#[serde(rename = "sourceURL")]
pub source_url: Option<String>,
pub status_code: Option<u16>,
pub error: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub title: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub description: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub language: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub keywords: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub robots: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_title: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_description: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_url: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_image: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_audio: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_determiner: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_locale: Option<String>,
pub og_locale_alternate: Option<Vec<String>>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_site_name: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub og_video: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub article_section: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub article_tag: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub published_time: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub modified_time: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_keywords: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_description: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_subject: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_subject: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_audience: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_type: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_type: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_date: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dc_date_created: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub dcterms_created: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub scrape_id: Option<String>,
pub num_pages: Option<u32>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub content_type: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub timezone: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub proxy_used: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub cache_state: Option<String>,
#[serde(default, deserialize_with = "deserialize_string_or_array")]
pub cached_at: Option<String>,
pub credits_used: Option<u32>,
pub concurrency_limited: Option<bool>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
pub struct AttributeResult {
pub selector: String,
pub attribute: String,
pub values: Vec<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Document {
pub markdown: Option<String>,
pub html: Option<String>,
pub raw_html: Option<String>,
pub json: Option<Value>,
pub summary: Option<String>,
pub metadata: Option<DocumentMetadata>,
pub links: Option<Vec<String>>,
pub images: Option<Vec<String>>,
pub screenshot: Option<String>,
pub audio: Option<String>,
pub video: Option<String>,
pub attributes: Option<Vec<AttributeResult>>,
pub actions: Option<HashMap<String, Value>>,
pub answer: Option<String>,
pub highlights: Option<String>,
pub warning: Option<String>,
pub change_tracking: Option<Value>,
pub branding: Option<Value>,
pub product: Option<Product>,
pub menu: Option<Menu>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Product {
pub title: String,
pub brand: Option<String>,
pub category: Option<String>,
pub url: String,
pub description: Option<String>,
#[serde(default)]
pub variants: Vec<ProductVariant>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ProductImage {
pub url: String,
pub alt: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ProductPrice {
pub amount: f64,
pub currency: Option<String>,
pub formatted: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ProductAvailability {
#[serde(rename = "inStock")]
pub in_stock: bool,
pub text: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ProductVariant {
pub id: Option<String>,
pub sku: Option<String>,
pub title: Option<String>,
pub values: Option<HashMap<String, serde_json::Value>>,
pub price: Option<ProductPrice>,
pub sale: Option<ProductSale>,
pub availability: ProductAvailability,
pub images: Option<Vec<ProductImage>>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ProductSale {
pub original_price: ProductPrice,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Menu {
pub is_menu: bool,
pub confidence: f64,
pub currency: Option<String>,
pub source_url: String,
pub merchant: MenuMerchant,
#[serde(default)]
pub sections: Vec<MenuSection>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct MenuMerchant {
pub name: String,
#[serde(rename = "type")]
pub merchant_type: Option<String>,
pub location: Option<Value>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct MenuSection {
pub id: String,
pub name: String,
pub description: Option<String>,
#[serde(default)]
pub items: Vec<MenuItem>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct MenuItem {
pub id: String,
pub name: String,
pub description: Option<String>,
#[serde(default)]
pub images: Vec<MenuImage>,
pub price: Option<MenuPrice>,
pub availability: MenuAvailability,
#[serde(default)]
pub dietary: Vec<String>,
pub calories: Option<f64>,
#[serde(default)]
pub option_groups: Vec<Value>,
#[serde(default)]
pub identifiers: MenuItemIdentifiers,
pub url: Option<String>,
pub source_url: String,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct MenuImage {
pub url: String,
pub alt: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct MenuPrice {
pub amount: f64,
pub currency: Option<String>,
pub formatted: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct MenuAvailability {
#[serde(rename = "inStock")]
pub in_stock: bool,
pub text: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct MenuItemIdentifiers {
pub merchant_item_id: Option<String>,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub enum JobStatus {
Scraping,
Completed,
Failed,
Cancelled,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum SitemapMode {
Skip,
Include,
Only,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum AgentModel {
#[serde(rename = "spark-1-pro")]
Spark1Pro,
#[serde(rename = "spark-1-mini")]
Spark1Mini,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum SearchSource {
Web,
News,
Images,
}
#[derive(Deserialize, Serialize, Clone, Copy, Debug, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum SearchCategory {
Github,
Research,
Pdf,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultWeb {
pub url: String,
pub title: Option<String>,
pub description: Option<String>,
pub category: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultNews {
pub title: Option<String>,
pub url: Option<String>,
pub snippet: Option<String>,
pub date: Option<String>,
pub image_url: Option<String>,
pub position: Option<u32>,
pub category: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultImage {
pub title: Option<String>,
pub image_url: Option<String>,
pub image_width: Option<u32>,
pub image_height: Option<u32>,
pub url: Option<String>,
pub position: Option<u32>,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CrawlError {
pub id: String,
pub timestamp: Option<String>,
pub url: String,
pub code: Option<String>,
pub error: String,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CrawlErrorsResponse {
pub errors: Vec<CrawlError>,
#[serde(rename = "robotsBlocked")]
pub robots_blocked: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_full_document_with_array_metadata() {
let json = json!({
"markdown": "# Hello",
"video": "https://storage.googleapis.com/firecrawl/video.mp4",
"metadata": {
"sourceURL": "https://example.com",
"statusCode": 200,
"title": "Example Page",
"description": ["A great page", "with multiple descriptions"],
"robots": ["index", "follow"],
"ogImage": ["https://img.jpg"],
"language": "en",
"keywords": ["rust", "sdk", "firecrawl"]
}
});
let doc: Document = serde_json::from_value(json).unwrap();
assert_eq!(doc.markdown, Some("# Hello".to_string()));
assert_eq!(
doc.video,
Some("https://storage.googleapis.com/firecrawl/video.mp4".to_string())
);
let meta = doc.metadata.unwrap();
assert_eq!(meta.title, Some("Example Page".to_string()));
assert_eq!(
meta.description,
Some("A great page, with multiple descriptions".to_string())
);
assert_eq!(meta.robots, Some("index, follow".to_string()));
assert_eq!(meta.og_image, Some("https://img.jpg".to_string()));
assert_eq!(meta.language, Some("en".to_string()));
assert_eq!(meta.keywords, Some("rust, sdk, firecrawl".to_string()));
}
#[test]
fn test_format_menu_round_trip() {
let format = Format::Menu;
let serialized = serde_json::to_value(&format).unwrap();
assert_eq!(serialized, json!("menu"));
let deserialized: Format = serde_json::from_value(json!("menu")).unwrap();
assert_eq!(deserialized, Format::Menu);
}
#[test]
fn test_document_with_menu() {
let json = json!({
"menu": {
"isMenu": true,
"confidence": 0.95,
"currency": "USD",
"sourceUrl": "https://example.com/menu",
"merchant": {
"name": "Test Diner",
"type": "restaurant",
"location": { "city": "Springfield" }
},
"sections": [
{
"id": "s1",
"name": "Mains",
"items": [
{
"id": "i1",
"name": "Burger",
"images": [{ "url": "https://example.com/burger.jpg" }],
"price": { "amount": 12.5, "currency": "USD", "formatted": "$12.50" },
"availability": { "inStock": true },
"dietary": ["vegetarian"],
"optionGroups": [],
"identifiers": { "merchantItemId": "abc123" },
"sourceUrl": "https://example.com/menu#i1"
}
]
}
]
}
});
let doc: Document = serde_json::from_value(json).unwrap();
let menu = doc.menu.as_ref().expect("menu should be present");
assert!(menu.is_menu);
assert_eq!(menu.confidence, 0.95);
assert_eq!(menu.currency, Some("USD".to_string()));
assert_eq!(menu.source_url, "https://example.com/menu");
assert_eq!(menu.merchant.name, "Test Diner");
assert_eq!(menu.merchant.merchant_type, Some("restaurant".to_string()));
assert_eq!(menu.sections.len(), 1);
let section = &menu.sections[0];
assert_eq!(section.name, "Mains");
assert_eq!(section.items.len(), 1);
let item = §ion.items[0];
assert_eq!(item.name, "Burger");
assert!(item.availability.in_stock);
assert_eq!(item.dietary, vec!["vegetarian".to_string()]);
assert_eq!(
item.identifiers.merchant_item_id,
Some("abc123".to_string())
);
let price = item.price.as_ref().unwrap();
assert_eq!(price.amount, 12.5);
let reserialized = serde_json::to_value(&doc).unwrap();
let item_json = &reserialized["menu"]["sections"][0]["items"][0];
assert_eq!(item_json["sourceUrl"], "https://example.com/menu#i1");
assert_eq!(item_json["availability"]["inStock"], true);
assert_eq!(item_json["identifiers"]["merchantItemId"], "abc123");
}
}