use crate::error::{NpcError, Result};
#[derive(Debug, Clone)]
pub struct SearchResult {
pub title: String,
pub url: String,
pub snippet: String,
}
pub async fn search_web(query: &str, num_results: usize, provider: &str, api_key: Option<&str>) -> Result<Vec<SearchResult>> {
match provider {
"brave" => search_brave(query, num_results, api_key).await,
"searxng" => search_searxng(query, num_results, None).await,
"startpage" => search_startpage(query, num_results).await,
"perplexity" => {
let (answer, _citations) = search_perplexity(query, api_key, None, None, None).await?;
Ok(vec![SearchResult { title: "Perplexity Answer".into(), url: String::new(), snippet: answer }])
}
"exa" => search_exa(query, api_key, num_results).await,
_ => search_duckduckgo(query, num_results).await,
}
}
pub async fn search_duckduckgo(query: &str, num_results: usize) -> Result<Vec<SearchResult>> {
let client = reqwest::Client::new();
let resp = client
.get("https://html.duckduckgo.com/html/")
.query(&[("q", query)])
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0")
.send()
.await?;
let html = resp.text().await?;
let mut results = Vec::new();
let link_re = regex::Regex::new(r#"<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>(.*?)</a>"#).unwrap();
let snippet_re = regex::Regex::new(r#"<a[^>]*class="result__snippet"[^>]*>(.*?)</a>"#).unwrap();
let tag_strip = regex::Regex::new(r"<[^>]+>").unwrap();
let links: Vec<(String, String)> = link_re.captures_iter(&html)
.map(|cap| (cap[1].to_string(), tag_strip.replace_all(&cap[2], "").to_string()))
.collect();
let snippets: Vec<String> = snippet_re.captures_iter(&html)
.map(|cap| tag_strip.replace_all(&cap[1], "").to_string())
.collect();
for (i, (url, title)) in links.iter().enumerate() {
if results.len() >= num_results { break; }
let actual_url = if url.contains("uddg=") {
url.split("uddg=").nth(1)
.and_then(|u| urlencoding::decode(u).ok())
.map(|u| u.into_owned())
.unwrap_or_else(|| url.clone())
} else {
url.clone()
};
results.push(SearchResult {
title: title.clone(),
url: actual_url,
snippet: snippets.get(i).cloned().unwrap_or_default(),
});
}
Ok(results)
}
pub async fn search_brave(query: &str, num_results: usize, api_key: Option<&str>) -> Result<Vec<SearchResult>> {
let key = api_key.map(String::from).or_else(|| std::env::var("BRAVE_API_KEY").ok())
.ok_or_else(|| NpcError::LlmRequest("BRAVE_API_KEY not set".into()))?;
let client = reqwest::Client::new();
let resp = client.get("https://api.search.brave.com/res/v1/web/search")
.query(&[("q", query), ("count", &num_results.to_string())])
.header("X-Subscription-Token", &key)
.header("Accept", "application/json")
.send().await?;
let json: serde_json::Value = resp.json().await?;
let mut results = Vec::new();
if let Some(web) = json.get("web").and_then(|w| w.get("results")).and_then(|r| r.as_array()) {
for item in web.iter().take(num_results) {
results.push(SearchResult {
title: item["title"].as_str().unwrap_or("").to_string(),
url: item["url"].as_str().unwrap_or("").to_string(),
snippet: item["description"].as_str().unwrap_or("").to_string(),
});
}
}
Ok(results)
}
pub async fn search_searxng(query: &str, num_results: usize, instance_url: Option<&str>) -> Result<Vec<SearchResult>> {
let instances = if let Some(url) = instance_url { vec![url.to_string()] }
else if let Ok(url) = std::env::var("SEARXNG_URL") { vec![url] }
else { vec!["https://search.sapti.me".into(), "https://searx.work".into()] };
let client = reqwest::Client::new();
for instance in &instances {
let url = format!("{}/search", instance);
if let Ok(resp) = client.get(&url).query(&[("q", query), ("format", "json"), ("categories", "general")])
.header("User-Agent", "npcsh/1.0").send().await {
if let Ok(json) = resp.json::<serde_json::Value>().await {
let mut results = Vec::new();
if let Some(arr) = json.get("results").and_then(|r| r.as_array()) {
for item in arr.iter().take(num_results) {
results.push(SearchResult {
title: item["title"].as_str().unwrap_or("").to_string(),
url: item["url"].as_str().unwrap_or("").to_string(),
snippet: item["content"].as_str().unwrap_or("").to_string(),
});
}
}
if !results.is_empty() { return Ok(results); }
}
}
}
Err(NpcError::LlmRequest("All SearxNG instances failed".into()))
}
pub async fn search_startpage(query: &str, num_results: usize) -> Result<Vec<SearchResult>> {
let client = reqwest::Client::new();
let resp = client.post("https://www.startpage.com/sp/search").form(&[("query", query), ("cat", "web")]).header("User-Agent", "Mozilla/5.0").header("Accept", "text/html").send().await?;
if !resp.status().is_success() { return Err(NpcError::LlmRequest("Startpage search failed".into())); }
let html = resp.text().await?;
let mut results = Vec::new();
let link_re = regex::Regex::new(r#"<a[^>]*href="(https?://[^"]*)"[^>]*>"#).unwrap();
for cap in link_re.captures_iter(&html) { if results.len() >= num_results { break; } results.push(SearchResult { title: String::new(), url: cap[1].to_string(), snippet: String::new() }); }
Ok(results)
}
pub async fn search_perplexity(query: &str, api_key: Option<&str>, max_tokens: Option<u32>, temperature: Option<f64>, top_p: Option<f64>) -> Result<(String, Vec<String>)> {
let key = api_key.map(String::from).or_else(|| std::env::var("PERPLEXITY_API_KEY").ok()).ok_or_else(|| NpcError::LlmRequest("PERPLEXITY_API_KEY not set".into()))?;
let body = serde_json::json!({"model": "sonar", "messages": [{"role": "system", "content": "Be precise and concise."}, {"role": "user", "content": query}], "max_tokens": max_tokens.unwrap_or(400), "temperature": temperature.unwrap_or(0.2), "top_p": top_p.unwrap_or(0.9), "stream": false});
let client = reqwest::Client::new();
let resp = client.post("https://api.perplexity.ai/chat/completions").header("Authorization", format!("Bearer {}", key)).header("Content-Type", "application/json").json(&body).send().await?;
if !resp.status().is_success() { let t = resp.text().await.unwrap_or_default(); return Err(NpcError::LlmRequest(format!("Perplexity error: {}", &t[..t.len().min(200)]))); }
let data: serde_json::Value = resp.json().await?;
let answer = data["choices"][0]["message"]["content"].as_str().unwrap_or("").to_string();
let citations = data.get("citations").and_then(|c| c.as_array()).map(|a| a.iter().filter_map(|v| v.as_str().map(String::from)).collect()).unwrap_or_default();
Ok((answer, citations))
}
pub async fn search_exa(query: &str, api_key: Option<&str>, top_k: usize) -> Result<Vec<SearchResult>> {
let key = api_key.map(String::from).or_else(|| std::env::var("EXA_API_KEY").ok()).ok_or_else(|| NpcError::LlmRequest("EXA_API_KEY not set".into()))?;
let body = serde_json::json!({"query": query, "contents": {"text": true}, "numResults": top_k});
let client = reqwest::Client::new();
let resp = client.post("https://api.exa.ai/search").header("x-api-key", &key).header("Content-Type", "application/json").json(&body).send().await?;
if !resp.status().is_success() { let t = resp.text().await.unwrap_or_default(); return Err(NpcError::LlmRequest(format!("Exa error: {}", t))); }
let data: serde_json::Value = resp.json().await?;
let mut results = Vec::new();
if let Some(arr) = data.get("results").and_then(|r| r.as_array()) { for item in arr.iter().take(top_k) { results.push(SearchResult { title: item["title"].as_str().unwrap_or("").to_string(), url: item["url"].as_str().unwrap_or("").to_string(), snippet: item["text"].as_str().unwrap_or("").chars().take(500).collect() }); } }
Ok(results)
}
pub async fn fetch_url(url: &str) -> Result<String> {
let client = reqwest::Client::new();
let resp = client
.get(url)
.header("User-Agent", "npcsh/1.0")
.send()
.await?;
Ok(resp.text().await?)
}