use futures::StreamExt;
use serde::Deserialize;
use std::sync::Arc;
use std::time::Duration;
use thiserror::Error;
use crate::params::SearxngParams;
const MAX_RESPONSE_BYTES: usize = 10 * 1024 * 1024;
const MAX_ERROR_BODY_BYTES: usize = 64 * 1024;
async fn read_capped(response: reqwest::Response, cap: usize) -> Result<Vec<u8>, SearchError> {
if let Some(declared) = response.content_length()
&& declared as usize > cap
{
return Err(SearchError::InvalidResponse(format!(
"response too large: declared {declared} bytes exceeds {cap} cap"
)));
}
let mut buf: Vec<u8> = Vec::with_capacity(64 * 1024);
let mut stream = response.bytes_stream();
while let Some(chunk) = stream.next().await {
let chunk = chunk.map_err(|e: reqwest::Error| SearchError::Transport(e.to_string()))?;
if buf.len() + chunk.len() > cap {
return Err(SearchError::InvalidResponse(format!(
"response too large: exceeded {cap}-byte cap"
)));
}
buf.extend_from_slice(&chunk);
}
Ok(buf)
}
#[derive(Debug, Error)]
pub enum SearchError {
#[error("SearXNG request timed out")]
Timeout,
#[error("SearXNG upstream error (status {status}): {body}")]
Upstream { status: u16, body: String },
#[error("SearXNG returned an invalid JSON response: {0}")]
InvalidResponse(String),
#[error("SearXNG transport error: {0}")]
Transport(String),
}
#[derive(Debug, Clone, Deserialize)]
pub struct SearxngResult {
#[serde(default)]
pub url: Option<String>,
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub engine: Option<String>,
#[serde(default)]
pub content: Option<String>,
#[serde(default)]
pub score: Option<f64>,
#[serde(default)]
pub engines: Vec<String>,
#[serde(default)]
pub positions: Vec<u32>,
#[serde(default)]
pub category: Option<String>,
#[serde(default)]
pub template: Option<String>,
#[serde(default, rename = "publishedDate")]
pub published_date: Option<String>,
#[serde(default)]
pub img_src: Option<String>,
#[serde(default)]
pub thumbnail_src: Option<String>,
#[serde(default)]
pub img_format: Option<String>,
#[serde(default)]
pub resolution: Option<String>,
}
#[derive(Debug, Clone, Deserialize, Default)]
pub struct SearxngResponse {
#[serde(default)]
pub query: String,
#[serde(default)]
pub number_of_results: u64,
#[serde(default)]
pub results: Vec<SearxngResult>,
#[serde(default)]
pub answers: Vec<serde_json::Value>,
#[serde(default)]
pub corrections: Vec<String>,
#[serde(default)]
pub infoboxes: Vec<serde_json::Value>,
#[serde(default)]
pub suggestions: Vec<String>,
#[serde(default)]
pub unresponsive_engines: Vec<serde_json::Value>,
}
#[derive(Debug, Clone)]
pub struct SearxngClient {
http: Arc<reqwest::Client>,
base_url: String,
timeout: Duration,
}
impl SearxngClient {
pub fn new(http: Arc<reqwest::Client>, base_url: impl Into<String>, timeout: Duration) -> Self {
let base_url = base_url.into();
let trimmed = base_url.trim_end_matches('/').to_string();
Self {
http,
base_url: trimmed,
timeout,
}
}
pub fn base_url(&self) -> &str {
&self.base_url
}
pub async fn fetch(&self, params: &SearxngParams) -> Result<SearxngResponse, SearchError> {
let mut url = url::Url::parse(&format!("{}/search", self.base_url))
.map_err(|e| SearchError::Transport(format!("invalid base_url: {e}")))?;
{
let mut q = url.query_pairs_mut();
q.append_pair("format", "json");
q.append_pair("q", ¶ms.q);
if let Some(c) = ¶ms.categories {
q.append_pair("categories", c);
}
if let Some(l) = ¶ms.language {
q.append_pair("language", l);
}
if let Some(t) = ¶ms.time_range {
q.append_pair("time_range", t);
}
if let Some(e) = ¶ms.engines {
q.append_pair("engines", e);
}
if let Some(p) = params.pageno {
q.append_pair("pageno", &p.to_string());
}
if let Some(s) = params.safesearch {
q.append_pair("safesearch", &s.to_string());
}
}
let response = self
.http
.get(url)
.timeout(self.timeout)
.send()
.await
.map_err(|e: reqwest::Error| {
if e.is_timeout() {
SearchError::Timeout
} else {
SearchError::Transport(e.without_url().to_string())
}
})?;
let status = response.status();
if !status.is_success() {
let body_bytes = read_capped(response, MAX_ERROR_BODY_BYTES)
.await
.unwrap_or_default();
let body = String::from_utf8_lossy(&body_bytes);
let trimmed: String = body.chars().take(500).collect();
return Err(SearchError::Upstream {
status: status.as_u16(),
body: trimmed,
});
}
let buf = read_capped(response, MAX_RESPONSE_BYTES).await?;
serde_json::from_slice::<SearxngResponse>(&buf)
.map_err(|e| SearchError::InvalidResponse(e.to_string()))
}
}