use url_preview::{
LLMExtractor, LLMExtractorConfig, ContentFormat, Fetcher,
OpenAIProvider, AnthropicProvider, LocalProvider, MockProvider,
ApiKeyValidator, LLMConfig, PreviewError
};
use serde::{Deserialize, Serialize};
use schemars::JsonSchema;
use std::sync::Arc;
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
struct WebPageInfo {
title: String,
description: String,
main_topics: Vec<String>,
content_type: String, }
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("🚀 综合 LLM 功能测试");
println!("{}", "=".repeat(60));
test_configuration().await?;
test_auto_detection().await?;
test_all_providers().await?;
test_content_formats().await?;
println!("\n✅ 所有测试完成!");
Ok(())
}
async fn test_configuration() -> Result<(), Box<dyn std::error::Error>> {
println!("\n📋 1. 配置验证测试");
println!("{}", "-".repeat(30));
println!(" 🔑 API Key 验证:");
match ApiKeyValidator::validate_openai_key("sk-1234567890abcdefghij1234567890") {
Ok(_) => println!(" ✅ OpenAI key validation: 通过"),
Err(e) => println!(" ❌ OpenAI key validation: {}", e),
}
match ApiKeyValidator::validate_anthropic_key("sk-ant-1234567890abcdefghij1234567890") {
Ok(_) => println!(" ✅ Anthropic key validation: 通过"),
Err(e) => println!(" ❌ Anthropic key validation: {}", e),
}
if ApiKeyValidator::validate_openai_key("invalid").is_err() {
println!(" ✅ Invalid OpenAI key: 正确拒绝");
}
if ApiKeyValidator::validate_anthropic_key("invalid").is_err() {
println!(" ✅ Invalid Anthropic key: 正确拒绝");
}
println!(" 🤖 模型验证:");
if ApiKeyValidator::validate_model_name("openai", "gpt-4o-mini").is_ok() {
println!(" ✅ OpenAI model validation: 通过");
}
if ApiKeyValidator::validate_model_name("anthropic", "claude-3-5-sonnet-20241022").is_ok() {
println!(" ✅ Anthropic model validation: 通过");
}
if ApiKeyValidator::validate_model_name("openai", "invalid-model").is_err() {
println!(" ✅ Invalid model: 正确拒绝");
}
Ok(())
}
async fn test_auto_detection() -> Result<(), Box<dyn std::error::Error>> {
println!("\n🔍 2. 自动检测测试");
println!("{}", "-".repeat(30));
let provider = LLMConfig::auto_detect_provider().await?;
println!(" ✅ 自动检测到 provider: {}", provider.name());
Ok(())
}
async fn test_all_providers() -> Result<(), Box<dyn std::error::Error>> {
println!("\n🎯 3. Provider 特定测试");
println!("{}", "-".repeat(30));
let fetcher = Fetcher::new();
let test_url = "https://www.rust-lang.org/";
println!(" 🎭 Mock Provider:");
test_provider_extraction(
Arc::new(MockProvider::new()),
test_url,
&fetcher,
"Mock"
).await;
if let Ok(provider) = LLMConfig::openai_from_env() {
println!(" 🤖 OpenAI Provider:");
test_provider_extraction(
Arc::new(provider),
test_url,
&fetcher,
"OpenAI"
).await;
} else {
println!(" ⚠️ OpenAI Provider: OPENAI_API_KEY 未设置");
}
if let Ok(provider) = LLMConfig::anthropic_from_env() {
println!(" 🧠 Anthropic Provider:");
test_provider_extraction(
Arc::new(provider),
test_url,
&fetcher,
"Anthropic"
).await;
} else {
println!(" ⚠️ Anthropic Provider: ANTHROPIC_API_KEY 未设置");
}
if let Ok(provider) = LLMConfig::claude_code_from_env() {
println!(" 🔮 Claude Code API:");
test_provider_extraction(
Arc::new(provider),
test_url,
&fetcher,
"Claude-Code-API"
).await;
} else {
println!(" ⚠️ Claude Code API: 不可访问");
}
if let Ok(provider) = LLMConfig::local_from_env() {
println!(" 🏠 Local Provider:");
test_provider_extraction(
Arc::new(provider),
test_url,
&fetcher,
"Local"
).await;
} else {
println!(" ⚠️ Local Provider: LOCAL_LLM_ENDPOINT 未设置");
}
Ok(())
}
async fn test_provider_extraction(
provider: Arc<dyn url_preview::LLMProvider>,
url: &str,
fetcher: &Fetcher,
provider_name: &str
) {
let config = LLMExtractorConfig {
format: ContentFormat::Text,
clean_html: true,
max_content_length: 3_000,
model_params: Default::default(),
};
let extractor = LLMExtractor::with_config(provider, config);
match extractor.extract::<WebPageInfo>(url, fetcher).await {
Ok(result) => {
println!(" ✅ {} 提取成功!", provider_name);
println!(" 标题: {}", result.data.title);
println!(" 类型: {}", result.data.content_type);
println!(" 主题数: {}", result.data.main_topics.len());
if let Some(usage) = result.usage {
println!(" Token 使用: {}", usage.total_tokens);
}
}
Err(e) => {
println!(" ❌ {} 提取失败: {}", provider_name, e);
}
}
}
async fn test_content_formats() -> Result<(), Box<dyn std::error::Error>> {
println!("\n📝 4. 内容格式测试");
println!("{}", "-".repeat(30));
let provider = Arc::new(MockProvider::new());
let fetcher = Fetcher::new();
let test_url = "https://www.rust-lang.org/";
let formats = vec![
(ContentFormat::Html, "HTML"),
(ContentFormat::Markdown, "Markdown"),
(ContentFormat::Text, "Plain Text"),
];
for (format, name) in formats {
println!(" 📄 {} 格式:", name);
let config = LLMExtractorConfig {
format,
clean_html: true,
max_content_length: 5_000,
model_params: Default::default(),
};
let extractor = LLMExtractor::with_config(provider.clone(), config);
match extractor.extract::<WebPageInfo>(test_url, &fetcher).await {
Ok(result) => {
println!(" ✅ {} 格式处理成功", name);
println!(" 描述长度: {} 字符", result.data.description.len());
}
Err(e) => {
println!(" ❌ {} 格式处理失败: {}", name, e);
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_mock_provider_works() {
let provider = Arc::new(MockProvider::new());
let fetcher = Fetcher::new();
let extractor = LLMExtractor::new(provider);
let result = extractor.extract::<WebPageInfo>("https://example.com", &fetcher).await;
assert!(result.is_ok());
}
#[test]
fn test_all_content_formats() {
assert_eq!(ContentFormat::Html, ContentFormat::Html);
assert_ne!(ContentFormat::Html, ContentFormat::Text);
assert_ne!(ContentFormat::Markdown, ContentFormat::Text);
}
}