use spider_agent::automation::RemoteMultimodalEngine;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let api_key =
std::env::var("OPEN_ROUTER").expect("OPEN_ROUTER environment variable must be set");
let mut engine = RemoteMultimodalEngine::new(
"https://openrouter.ai/api/v1/chat/completions",
"qwen/qwen-2-vl-72b-instruct",
None, )
.with_api_key(Some(&api_key));
engine.cfg.extra_ai_data = true;
engine.cfg.include_html = true;
engine.cfg.request_json_object = true;
engine.cfg.max_tokens = 1024;
engine.user_message_extra = Some(
"Extract the book details including: title, price, availability, description, and UPC code."
.to_string(),
);
let html = r#"
<!DOCTYPE html>
<html>
<head><title>A Light in the Attic | Books to Scrape</title></head>
<body>
<div class="product_main">
<h1>A Light in the Attic</h1>
<p class="price_color">£51.77</p>
<p class="instock availability">In stock (22 available)</p>
<table class="table table-striped">
<tr><th>UPC</th><td>a897fe39b1053632</td></tr>
<tr><th>Product Type</th><td>Books</td></tr>
<tr><th>Price (excl. tax)</th><td>£51.77</td></tr>
<tr><th>Price (incl. tax)</th><td>£51.77</td></tr>
<tr><th>Tax</th><td>£0.00</td></tr>
<tr><th>Number of reviews</th><td>0</td></tr>
</table>
</div>
<div id="product_description">
<p>It's hard to imagine a world without A Light in the Attic.
This now-classic collection of poetry and drawings from Shel Silverstein
celebrates its 20th anniversary with this special edition.</p>
</div>
</body>
</html>
"#;
println!("=== Spider Agent Extraction Example ===\n");
let start = std::time::Instant::now();
let result = engine
.extract_from_html(
html,
"https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
Some("A Light in the Attic | Books to Scrape"),
)
.await?;
let duration = start.elapsed();
println!("Label: {}", result.label);
println!("Success: {}", result.success);
if let Some(extracted) = &result.extracted {
println!("\nExtracted Data:");
println!("{}", serde_json::to_string_pretty(extracted)?);
}
println!("\nUsage:");
println!(" Prompt tokens: {}", result.usage.prompt_tokens);
println!(" Completion tokens: {}", result.usage.completion_tokens);
println!(" Total tokens: {}", result.usage.total_tokens);
println!(" LLM calls: {}", result.usage.llm_calls);
println!("\nTime elapsed: {:?}", duration);
Ok(())
}