#[cfg(all(feature = "browser", feature = "llm"))]
use url_preview::{
BrowserPreviewService, McpConfig, McpTransport, BrowserUsagePolicy,
LLMExtractor, LLMExtractorConfig, ContentFormat,
OpenAIProvider, MockProvider, LLMProvider,
FetchResult, Preview,
};
use serde::{Deserialize, Serialize};
use schemars::JsonSchema;
use std::sync::Arc;
use std::env;
#[cfg(not(all(feature = "browser", feature = "llm")))]
fn main() {
eprintln!("This example requires both 'browser' and 'llm' features to be enabled.");
eprintln!("Run with: cargo run --example test_llm_with_browser --features \"browser llm\"");
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
struct WebPageInfo {
title: String,
heading: Option<String>,
description: Option<String>,
features: Vec<String>,
cta_buttons: Vec<String>,
nav_items: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
struct TechArticle {
title: String,
author: Option<String>,
date: Option<String>,
summary: String,
code_snippets: Vec<String>,
technologies: Vec<String>,
}
#[cfg(all(feature = "browser", feature = "llm"))]
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("🌐🤖 Browser + LLM Integration Test");
println!("{}", "=".repeat(60));
let use_mock = env::var("OPENAI_API_KEY").is_err();
if use_mock {
println!("⚠️ No OPENAI_API_KEY found, using mock provider");
println!(" Set OPENAI_API_KEY environment variable to use real extraction\n");
}
let mcp_config = McpConfig {
enabled: true,
server_command: vec![
"npx".to_string(),
"-y".to_string(),
"@playwright/mcp@latest".to_string(),
],
transport: McpTransport::Stdio,
browser_timeout: 30,
max_sessions: 3,
};
let browser_service = Arc::new(BrowserPreviewService::new(mcp_config, BrowserUsagePolicy::Always));
println!("▶️ Initializing browser service...");
browser_service.initialize().await?;
println!("✅ Browser service initialized\n");
let provider: Arc<dyn LLMProvider> = if use_mock {
Arc::new(MockProvider::new())
} else {
let api_key = env::var("OPENAI_API_KEY")?;
Arc::new(OpenAIProvider::new(api_key))
};
let llm_config = LLMExtractorConfig {
format: ContentFormat::Markdown, clean_html: true,
max_content_length: 100_000,
..Default::default()
};
let llm_extractor = LLMExtractor::with_config(provider, llm_config);
let test_cases = vec![
("https://react.dev", "React Documentation (SPA)", true),
("https://www.rust-lang.org", "Rust Language Site", false),
("https://angular.io", "Angular Framework (SPA)", true),
("https://svelte.dev", "Svelte Documentation", true),
];
for (url, description, is_spa) in test_cases {
println!("\n📄 Testing: {} - {}", url, description);
println!(" SPA: {}", if is_spa { "Yes" } else { "No" });
println!("{}", "-".repeat(60));
println!(" 1️⃣ Fetching with browser...");
match browser_service.generate_preview(url).await {
Ok(preview) => {
println!(" ✅ Browser preview generated:");
println!(" Title: {}", preview.title.as_deref().unwrap_or("(none)"));
println!(" Description: {}",
preview.description.as_deref()
.map(|d| if d.len() > 80 {
format!("{}...", &d[..80])
} else {
d.to_string()
})
.unwrap_or_else(|| "(none)".to_string())
);
println!("\n 2️⃣ Extracting structured data with LLM...");
let fetcher = Arc::new(url_preview::Fetcher::new());
match llm_extractor.extract::<WebPageInfo>(url, &fetcher).await {
Ok(info) => {
println!(" ✅ Structured data extracted:");
println!(" Title: {}", info.data.title);
if let Some(heading) = info.data.heading {
println!(" Heading: {}", heading);
}
println!(" Features: {} found", info.data.features.len());
for (i, feature) in info.data.features.iter().take(3).enumerate() {
println!(" {}. {}", i + 1, feature);
}
println!(" CTA Buttons: {:?}", info.data.cta_buttons);
println!(" Nav Items: {} found", info.data.nav_items.len());
if let Some(usage) = info.usage {
println!(" Token usage: {} prompt, {} completion",
usage.prompt_tokens, usage.completion_tokens);
}
}
Err(e) => println!(" ❌ LLM extraction error: {}", e),
}
}
Err(e) => println!(" ❌ Browser error: {}", e),
}
}
println!("\n\n📚 Testing technical article extraction:");
println!("{}", "=".repeat(60));
let article_url = "https://blog.rust-lang.org/2024/01/03/Rust-1.75.0.html";
println!("\nURL: {}", article_url);
match browser_service.generate_preview(article_url).await {
Ok(_) => {
println!("✅ Page loaded with browser");
let fetcher = Arc::new(url_preview::Fetcher::new());
match llm_extractor.extract::<TechArticle>(article_url, &fetcher).await {
Ok(article) => {
println!("\n📝 Article extracted:");
println!(" Title: {}", article.data.title);
if let Some(author) = article.data.author {
println!(" Author: {}", author);
}
if let Some(date) = article.data.date {
println!(" Date: {}", date);
}
println!(" Summary: {}",
if article.data.summary.len() > 150 {
format!("{}...", &article.data.summary[..150])
} else {
article.data.summary.clone()
}
);
println!(" Technologies: {:?}", article.data.technologies);
println!(" Code snippets: {} found", article.data.code_snippets.len());
}
Err(e) => println!("❌ Extraction error: {}", e),
}
}
Err(e) => println!("❌ Browser error: {}", e),
}
println!("\n\n🎉 All tests completed!");
Ok(())
}