spider_agent 2.51.208

A concurrent-safe multimodal agent for web automation and research.
Documentation
//! Real-world Spider Cloud example: e-commerce competitor intelligence.
//!
//! This flow turns a simple input into a multi-tool pipeline:
//! - `search` discover relevant product pages
//! - `links` expand page discovery from a seed URL
//! - `scrape` extract product-page content
//! - optional `transform` normalize output-ready content
//! - optional `ai_scrape` structured extraction (AI subscription required)
//!
//! Run:
//! ```bash
//! SPIDER_CLOUD_API_KEY=your-key cargo run -p spider_agent --example spider_cloud_ecommerce_competitor \
//!   -- "https://books.toscrape.com/" "travel books"
//! ```
//!
//! Optional env vars:
//! - `SPIDER_CLOUD_API_URL` (default: `https://api.spider.cloud`)
//! - `SPIDER_CLOUD_TOOL_PREFIX` (default: `spider_cloud`)
//! - `SPIDER_CLOUD_ENABLE_AI_ROUTES=1` (required for `/ai/*` routes)
//! - `SPIDER_CLOUD_RETURN_FORMAT` (default: `markdown`, supports `raw|bytes|markdown|commonmark|text`)
//! - `SPIDER_CLOUD_INCLUDE_TRANSFORM=1` to explicitly run `/transform`

use spider_agent::{Agent, SpiderCloudToolConfig};

#[derive(Debug, Clone)]
struct Step {
    suffix: &'static str,
    description: &'static str,
    body: serde_json::Value,
}

fn env_flag(name: &str) -> bool {
    matches!(
        std::env::var(name)
            .unwrap_or_default()
            .trim()
            .to_ascii_lowercase()
            .as_str(),
        "1" | "true" | "yes" | "on"
    )
}

fn tool_name(prefix: &str, suffix: &str) -> String {
    let p = prefix.trim().trim_end_matches('_');
    if p.is_empty() {
        suffix.to_string()
    } else {
        format!("{}_{}", p, suffix)
    }
}

fn summarize_usage(value: &serde_json::Value) -> String {
    let first = if let Some(arr) = value.as_array() {
        arr.first()
    } else {
        Some(value)
    };
    let Some(first) = first else {
        return "empty-response".to_string();
    };

    let status = first
        .get("status")
        .and_then(|v| v.as_u64())
        .map(|v| v.to_string())
        .unwrap_or_else(|| "n/a".to_string());
    let duration_ms = first
        .get("duration_elapsed_ms")
        .and_then(|v| v.as_u64())
        .map(|v| v.to_string())
        .unwrap_or_else(|| "n/a".to_string());
    let total_cost = first
        .get("costs")
        .and_then(|v| {
            v.get("total_cost_formatted")
                .or_else(|| v.get("total_cost"))
        })
        .map(|v| v.to_string())
        .unwrap_or_else(|| "n/a".to_string());

    format!(
        "status={}, duration_ms={}, total_cost={}",
        status, duration_ms, total_cost
    )
}

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    env_logger::init();

    let api_key = std::env::var("SPIDER_CLOUD_API_KEY")
        .expect("SPIDER_CLOUD_API_KEY environment variable must be set");
    let api_url =
        std::env::var("SPIDER_CLOUD_API_URL").unwrap_or_else(|_| "https://api.spider.cloud".into());
    let tool_prefix =
        std::env::var("SPIDER_CLOUD_TOOL_PREFIX").unwrap_or_else(|_| "spider_cloud".into());
    let enable_ai_routes = env_flag("SPIDER_CLOUD_ENABLE_AI_ROUTES");
    let include_transform = env_flag("SPIDER_CLOUD_INCLUDE_TRANSFORM");
    let return_format =
        std::env::var("SPIDER_CLOUD_RETURN_FORMAT").unwrap_or_else(|_| "markdown".into());

    let args: Vec<String> = std::env::args().skip(1).collect();
    let seed_url = args
        .first()
        .cloned()
        .unwrap_or_else(|| "https://books.toscrape.com/".to_string());
    let market_query = args
        .get(1)
        .cloned()
        .unwrap_or_else(|| "travel books".to_string());
    let detail_url = if seed_url.contains("books.toscrape.com") {
        "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html".to_string()
    } else {
        seed_url.clone()
    };

    let cloud_cfg = SpiderCloudToolConfig::new(api_key)
        .with_api_url(api_url.clone())
        .with_tool_name_prefix(tool_prefix.clone())
        .with_enable_ai_routes(enable_ai_routes);
    let agent = Agent::builder()
        .with_spider_cloud_config(cloud_cfg)
        .build()?;

    println!("=== E-commerce Competitor Intelligence ===");
    println!("Seed URL: {}", seed_url);
    println!("Market query: {}", market_query);
    println!("API URL: {}", api_url);
    println!("AI routes enabled: {}", enable_ai_routes);
    println!("Return format: {}", return_format);
    println!("Include transform: {}", include_transform);
    println!();

    let mut steps = vec![
        Step {
            suffix: "search",
            description: "discover category/product pages",
            body: serde_json::json!({
                "search": format!("site:{} {}", seed_url.trim_start_matches("https://").trim_start_matches("http://").trim_end_matches('/'), market_query),
                "num": 5,
                "fetch_page_content": false
            }),
        },
        Step {
            suffix: "links",
            description: "extract internal links from seed",
            body: serde_json::json!({
                "url": seed_url,
                "limit": 2
            }),
        },
        Step {
            suffix: "scrape",
            description: "extract content from a representative product URL",
            body: serde_json::json!({
                "url": detail_url,
                "return_format": return_format,
                "metadata": true
            }),
        },
    ];

    if include_transform {
        steps.push(Step {
            suffix: "transform",
            description: "normalize content for downstream systems",
            body: serde_json::json!({
                "url": detail_url,
                "return_format": return_format,
                "metadata": true
            }),
        });
    }

    if enable_ai_routes {
        steps.push(Step {
            suffix: "ai_scrape",
            description: "AI structured extraction with schema",
            body: serde_json::json!({
                "url": detail_url,
                "prompt": "Extract title, price, availability, and product category.",
                "cleaning_intent": "extraction",
                "metadata": true,
                "extraction_schema": {
                    "name": "product_intel",
                    "description": "Normalized product details",
                    "schema": {
                        "type": "object",
                        "properties": {
                            "title": { "type": "string" },
                            "price": { "type": "string" },
                            "availability": { "type": "string" },
                            "category": { "type": "string" }
                        },
                        "required": ["title", "price", "availability"]
                    }
                }
            }),
        });
    }

    for step in steps {
        let name = tool_name(&tool_prefix, step.suffix);
        let body = step.body.to_string();
        println!("Running {} ({})", name, step.description);

        match agent
            .execute_custom_tool(&name, None, None, Some(&body))
            .await
        {
            Ok(result) => {
                if !result.success {
                    println!(
                        "  failed: HTTP {} body={}",
                        result.status,
                        result.body.chars().take(240).collect::<String>()
                    );
                    continue;
                }

                let parsed = serde_json::from_str::<serde_json::Value>(&result.body)
                    .unwrap_or(serde_json::Value::Null);
                println!(
                    "  ok: HTTP {} | {}",
                    result.status,
                    summarize_usage(&parsed)
                );
                println!(
                    "  preview: {}",
                    result.body.chars().take(180).collect::<String>()
                );
            }
            Err(err) => println!("  error: {}", err),
        }
    }

    let usage = agent.usage();
    println!("\n=== Usage Snapshot ===");
    println!(
        "Total custom tool calls: {}",
        usage.total_custom_tool_calls()
    );
    for (tool, count) in &usage.custom_tool_calls {
        println!("- {}: {}", tool, count);
    }

    Ok(())
}