use serde::{Deserialize, Serialize};
const MAX_RESULTS_CAP: u32 = 20;
const DEFAULT_MAX_RESULTS: u32 = 5;
const TAVILY_ENDPOINT: &str = "https://api.tavily.com/search";
const TAVILY_API_KEY_ENV: &str = "TAVILY_API_KEY";
const PARSLEE_API_BASE_ENV: &str = "PARSLEE_API_BASE";
const PARSLEE_DEFAULT_BASE: &str = "https://api.parslee.ai";
const HTTP_TIMEOUT_SECS: u64 = 20;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct WebSearchResult {
pub title: String,
pub url: String,
pub snippet: String,
#[serde(default)]
pub score: f64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub published_date: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct WebSearchResponse {
pub query: String,
pub results: Vec<WebSearchResult>,
pub source: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchRequest {
pub query: String,
#[serde(default)]
pub max_results: Option<u32>,
}
#[derive(Debug, thiserror::Error)]
pub enum SearchError {
#[error("web search HTTP error: {0}")]
Http(String),
#[error("web search parse error: {0}")]
Parse(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SearchProvider {
Parslee { base: String, token: String },
Tavily { api_key: String },
DuckDuckGo,
}
pub async fn resolve_provider() -> SearchProvider {
if let Some(token) = car_auth::access_token_refreshing()
.await
.filter(|t| !t.is_empty())
{
let base = std::env::var(PARSLEE_API_BASE_ENV)
.ok()
.filter(|b| !b.is_empty())
.or_else(|| car_secrets::resolve_env_or_keychain(PARSLEE_API_BASE_ENV))
.unwrap_or_else(|| PARSLEE_DEFAULT_BASE.to_string());
return SearchProvider::Parslee { base, token };
}
if let Some(api_key) =
car_secrets::resolve_env_or_keychain(TAVILY_API_KEY_ENV).filter(|k| !k.is_empty())
{
return SearchProvider::Tavily { api_key };
}
SearchProvider::DuckDuckGo
}
fn clamp_max(max_results: Option<u32>) -> u32 {
max_results.unwrap_or(DEFAULT_MAX_RESULTS).clamp(1, MAX_RESULTS_CAP)
}
fn http_client() -> Result<reqwest::Client, SearchError> {
reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(HTTP_TIMEOUT_SECS))
.build()
.map_err(|e| SearchError::Http(e.to_string()))
}
pub async fn web_search(
query: &str,
max_results: Option<u32>,
) -> Result<WebSearchResponse, SearchError> {
let max = clamp_max(max_results);
match resolve_provider().await {
SearchProvider::Parslee { base, token } => parslee_search(query, max, &base, &token).await,
SearchProvider::Tavily { api_key } => tavily_search(query, max, &api_key).await,
SearchProvider::DuckDuckGo => ddg_search(query, max).await,
}
}
pub async fn tavily_search(
query: &str,
max_results: u32,
api_key: &str,
) -> Result<WebSearchResponse, SearchError> {
let body = serde_json::json!({
"api_key": api_key,
"query": query,
"max_results": max_results,
"search_depth": "basic",
"include_answer": false,
"include_raw_content": false,
"include_images": false,
});
let resp = http_client()?
.post(TAVILY_ENDPOINT)
.json(&body)
.send()
.await
.map_err(|e| SearchError::Http(format!("tavily: {e}")))?;
if !resp.status().is_success() {
return Err(SearchError::Http(format!("tavily: HTTP {}", resp.status())));
}
let json: serde_json::Value = resp
.json()
.await
.map_err(|e| SearchError::Parse(format!("tavily: {e}")))?;
Ok(parse_tavily(query, &json))
}
pub async fn parslee_search(
query: &str,
max_results: u32,
base: &str,
token: &str,
) -> Result<WebSearchResponse, SearchError> {
let client = http_client()?;
let base = base.trim_end_matches('/');
let org_id = parslee_org_id(&client, base, token).await?;
let url = format!("{base}/api/v1/orgs/{org_id}/search");
let body = serde_json::json!({ "query": query, "maxResults": max_results });
let resp = client
.post(&url)
.bearer_auth(token)
.json(&body)
.send()
.await
.map_err(|e| SearchError::Http(format!("parslee search: {e}")))?;
if !resp.status().is_success() {
return Err(SearchError::Http(format!(
"parslee search: HTTP {}",
resp.status()
)));
}
let json: serde_json::Value = resp
.json()
.await
.map_err(|e| SearchError::Parse(format!("parslee search: {e}")))?;
Ok(parse_parslee(query, &json))
}
async fn parslee_org_id(
client: &reqwest::Client,
base: &str,
token: &str,
) -> Result<String, SearchError> {
let resp = client
.get(format!("{base}/api/v1/organizations/me"))
.bearer_auth(token)
.send()
.await
.map_err(|e| SearchError::Http(format!("parslee org lookup: {e}")))?;
if !resp.status().is_success() {
return Err(SearchError::Http(format!(
"parslee org lookup: HTTP {}",
resp.status()
)));
}
let json: serde_json::Value = resp
.json()
.await
.map_err(|e| SearchError::Parse(format!("parslee org: {e}")))?;
json.get("organizationId")
.or_else(|| json.get("OrganizationId"))
.and_then(|v| v.as_str())
.map(String::from)
.ok_or_else(|| {
SearchError::Parse(
"parslee org response has no organizationId (finish onboarding)".to_string(),
)
})
}
fn parse_tavily(query: &str, json: &serde_json::Value) -> WebSearchResponse {
let results = json
.get("results")
.and_then(|r| r.as_array())
.map(|arr| {
arr.iter()
.filter_map(|r| {
let url = r.get("url").and_then(|v| v.as_str())?.to_string();
if url.is_empty() {
return None;
}
Some(WebSearchResult {
title: r.get("title").and_then(|v| v.as_str()).unwrap_or("").to_string(),
url,
snippet: r
.get("content")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
score: r.get("score").and_then(|v| v.as_f64()).unwrap_or(0.0),
published_date: r
.get("published_date")
.and_then(|v| v.as_str())
.map(String::from),
})
})
.collect()
})
.unwrap_or_default();
WebSearchResponse {
query: query.to_string(),
results,
source: "tavily".to_string(),
}
}
fn parse_parslee(query: &str, json: &serde_json::Value) -> WebSearchResponse {
let results = json
.get("results")
.and_then(|r| r.as_array())
.map(|arr| {
arr.iter()
.filter_map(|r| {
let url = r.get("url").and_then(|v| v.as_str())?.to_string();
if url.is_empty() {
return None;
}
Some(WebSearchResult {
title: r.get("title").and_then(|v| v.as_str()).unwrap_or("").to_string(),
url,
snippet: r
.get("snippet")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
score: r.get("score").and_then(|v| v.as_f64()).unwrap_or(0.0),
published_date: r
.get("publishedDate")
.and_then(|v| v.as_str())
.map(String::from),
})
})
.collect()
})
.unwrap_or_default();
WebSearchResponse {
query: query.to_string(),
results,
source: "parslee".to_string(),
}
}
const DDG_HTML_ENDPOINT: &str = "https://html.duckduckgo.com/html/";
const USER_AGENT: &str = "Mozilla/5.0 (compatible; CAR/1.0; +https://parslee.ai)";
pub async fn ddg_search(query: &str, max_results: u32) -> Result<WebSearchResponse, SearchError> {
let resp = http_client()?
.get(DDG_HTML_ENDPOINT)
.query(&[("q", query)])
.header(reqwest::header::USER_AGENT, USER_AGENT)
.send()
.await
.map_err(|e| SearchError::Http(format!("duckduckgo: {e}")))?;
if !resp.status().is_success() {
return Err(SearchError::Http(format!(
"duckduckgo: HTTP {}",
resp.status()
)));
}
let html = resp
.text()
.await
.map_err(|e| SearchError::Parse(format!("duckduckgo: {e}")))?;
Ok(parse_ddg(query, &html, max_results))
}
fn parse_ddg(query: &str, html: &str, max_results: u32) -> WebSearchResponse {
use scraper::{Html, Selector};
let doc = Html::parse_document(html);
let result_sel = Selector::parse("div.result, div.web-result").unwrap();
let a_sel = Selector::parse("a.result__a").unwrap();
let snippet_sel = Selector::parse("a.result__snippet, .result__snippet").unwrap();
let mut results = Vec::new();
for el in doc.select(&result_sel) {
if results.len() >= max_results as usize {
break;
}
let Some(a) = el.select(&a_sel).next() else {
continue;
};
let url = ddg_unwrap_href(a.value().attr("href").unwrap_or(""));
if url.is_empty() {
continue;
}
let title = a.text().collect::<String>().split_whitespace().collect::<Vec<_>>().join(" ");
let snippet = el
.select(&snippet_sel)
.next()
.map(|s| s.text().collect::<String>().split_whitespace().collect::<Vec<_>>().join(" "))
.unwrap_or_default();
results.push(WebSearchResult {
title,
url,
snippet,
score: 0.0,
published_date: None,
});
}
WebSearchResponse {
query: query.to_string(),
results,
source: "duckduckgo".to_string(),
}
}
fn ddg_unwrap_href(href: &str) -> String {
if href.starts_with("http://") || href.starts_with("https://") {
return href.to_string();
}
let abs = if let Some(rest) = href.strip_prefix("//") {
format!("https://{rest}")
} else {
return String::new();
};
reqwest::Url::parse(&abs)
.ok()
.and_then(|u| {
u.query_pairs()
.find(|(k, _)| k == "uddg")
.map(|(_, v)| v.into_owned())
})
.unwrap_or_default()
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct WebFetchResponse {
pub url: String,
pub status: u16,
pub content_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
pub text: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FetchRequest {
pub url: String,
}
pub async fn web_fetch(url: &str) -> Result<WebFetchResponse, SearchError> {
let resp = http_client()?
.get(url)
.header(reqwest::header::USER_AGENT, USER_AGENT)
.send()
.await
.map_err(|e| SearchError::Http(format!("fetch: {e}")))?;
let status = resp.status().as_u16();
let final_url = resp.url().to_string();
let content_type = resp
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|v| v.to_str().ok())
.unwrap_or("")
.to_string();
let body = resp
.text()
.await
.map_err(|e| SearchError::Parse(format!("fetch body: {e}")))?;
let (title, text) = if content_type.contains("html") || looks_like_html(&body) {
(extract_title(&body), html_to_text(&body))
} else {
(None, body)
};
Ok(WebFetchResponse {
url: final_url,
status,
content_type,
title,
text,
})
}
fn looks_like_html(body: &str) -> bool {
let head = body.get(..512).unwrap_or(body).to_ascii_lowercase();
head.contains("<!doctype html") || head.contains("<html") || head.contains("<body")
}
fn extract_title(html: &str) -> Option<String> {
use scraper::{Html, Selector};
let doc = Html::parse_document(html);
let sel = Selector::parse("title").ok()?;
doc.select(&sel)
.next()
.map(|t| t.text().collect::<String>().split_whitespace().collect::<Vec<_>>().join(" "))
.filter(|s| !s.is_empty())
}
fn html_to_text(html: &str) -> String {
use scraper::{Html, Selector};
let doc = Html::parse_document(html);
let content = Selector::parse(
"p, li, h1, h2, h3, h4, h5, h6, blockquote, td, th, caption, figcaption, dd, dt, pre",
)
.unwrap();
let parts: Vec<String> = doc
.select(&content)
.filter_map(|el| {
let t = el.text().collect::<String>().split_whitespace().collect::<Vec<_>>().join(" ");
(!t.is_empty()).then_some(t)
})
.collect();
let joined = parts.join("\n");
if !joined.is_empty() {
return joined;
}
Selector::parse("body")
.ok()
.and_then(|b| doc.select(&b).next())
.map(|b| b.text().collect::<String>().split_whitespace().collect::<Vec<_>>().join(" "))
.unwrap_or(joined)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn clamp_max_results() {
assert_eq!(clamp_max(None), DEFAULT_MAX_RESULTS);
assert_eq!(clamp_max(Some(0)), 1);
assert_eq!(clamp_max(Some(7)), 7);
assert_eq!(clamp_max(Some(999)), MAX_RESULTS_CAP);
}
#[test]
fn parses_tavily_shape_content_to_snippet() {
let json = serde_json::json!({
"query": "rust async",
"results": [
{ "title": "Async Rust", "url": "https://ex.com/a", "content": "tokio…",
"score": 0.92, "published_date": "2026-01-02" },
{ "title": "no url dropped", "url": "", "content": "x" }
],
"response_time": 1.2
});
let r = parse_tavily("rust async", &json);
assert_eq!(r.source, "tavily");
assert_eq!(r.results.len(), 1, "empty-url result dropped");
assert_eq!(r.results[0].url, "https://ex.com/a");
assert_eq!(r.results[0].snippet, "tokio…", "content → snippet");
assert_eq!(r.results[0].published_date.as_deref(), Some("2026-01-02"));
assert!((r.results[0].score - 0.92).abs() < 1e-9);
}
#[test]
fn parses_parslee_shape() {
let json = serde_json::json!({
"query": "q",
"results": [
{ "title": "T", "url": "https://ex.com/p", "snippet": "snip",
"score": 0.5, "publishedDate": "2026-06-13T00:00:00" }
],
"source": "Tavily"
});
let r = parse_parslee("q", &json);
assert_eq!(r.source, "parslee");
assert_eq!(r.results.len(), 1);
assert_eq!(r.results[0].snippet, "snip");
assert_eq!(
r.results[0].published_date.as_deref(),
Some("2026-06-13T00:00:00")
);
}
#[test]
fn missing_results_is_empty_not_error() {
let r = parse_tavily("q", &serde_json::json!({}));
assert!(r.results.is_empty());
}
#[test]
fn resolver_floors_at_duckduckgo() {
let p = SearchProvider::DuckDuckGo;
assert!(matches!(p, SearchProvider::DuckDuckGo));
}
#[test]
fn ddg_unwrap_redirect_and_direct() {
let real = ddg_unwrap_href(
"//duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com%2Fa%3Fx%3D1&rut=abc",
);
assert_eq!(real, "https://example.com/a?x=1");
assert_eq!(ddg_unwrap_href("https://ex.com/b"), "https://ex.com/b");
assert_eq!(ddg_unwrap_href("/about"), "");
}
#[test]
fn parse_ddg_extracts_results() {
let html = r#"
<div class="result web-result">
<a class="result__a" href="//duckduckgo.com/l/?uddg=https%3A%2F%2Fr.com%2F1">First & Best</a>
<a class="result__snippet">A snippet here.</a>
</div>
<div class="result">
<a class="result__a" href="//duckduckgo.com/l/?uddg=https%3A%2F%2Fr.com%2F2">Second</a>
</div>"#;
let r = parse_ddg("q", html, 10);
assert_eq!(r.source, "duckduckgo");
assert_eq!(r.results.len(), 2);
assert_eq!(r.results[0].url, "https://r.com/1");
assert_eq!(r.results[0].title, "First & Best");
assert_eq!(r.results[0].snippet, "A snippet here.");
assert_eq!(r.results[1].url, "https://r.com/2");
assert_eq!(parse_ddg("q", html, 1).results.len(), 1);
}
#[test]
fn html_to_text_drops_script_keeps_content() {
let html = r#"<html><head><title> Hi — Page </title><style>.x{}</style></head>
<body><script>var leak='SECRET';</script>
<h1>Heading</h1><p>Para one.</p><p>Para two.</p></body></html>"#;
assert_eq!(extract_title(html).as_deref(), Some("Hi — Page"));
let text = html_to_text(html);
assert!(text.contains("Heading"));
assert!(text.contains("Para one."));
assert!(text.contains("Para two."));
assert!(!text.contains("SECRET"), "script text must be excluded: {text}");
}
#[test]
fn looks_like_html_detects() {
assert!(looks_like_html("<!DOCTYPE html><html>…"));
assert!(looks_like_html("<html lang=en>"));
assert!(!looks_like_html("{\"json\": true}"));
}
#[tokio::test]
async fn parslee_search_resolves_org_then_returns_results() {
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/api/v1/organizations/me"))
.respond_with(
ResponseTemplate::new(200).set_body_json(serde_json::json!({
"organizationId": "org_test"
})),
)
.mount(&server)
.await;
Mock::given(method("POST"))
.and(path("/api/v1/orgs/org_test/search"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
"results": [
{ "title": "T", "url": "https://example.com", "snippet": "S", "score": 0.9 }
]
})))
.mount(&server)
.await;
let resp = parslee_search("q", 5, &server.uri(), "bearer-xyz")
.await
.expect("search should succeed");
assert_eq!(resp.source, "parslee");
assert_eq!(resp.results.len(), 1);
assert_eq!(resp.results[0].url, "https://example.com");
}
#[tokio::test]
async fn parslee_search_surfaces_401_on_org_lookup() {
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/api/v1/organizations/me"))
.respond_with(ResponseTemplate::new(401))
.mount(&server)
.await;
let err = parslee_search("q", 5, &server.uri(), "stale-token")
.await
.expect_err("a 401 org lookup must surface as an error");
match err {
SearchError::Http(m) => assert!(m.contains("HTTP 401"), "got: {m}"),
other => panic!("expected Http(401), got {other:?}"),
}
}
}