eggsearch 0.2.1

Lightweight MCP metasearch server for AI agents
Documentation
use std::time::Duration;

use reqwest::Client;
use serde::Deserialize;

use super::error::EngineError;
use super::models::SearchResult;

const ENGINE: &str = "searxng";
const MAX_BODY_BYTES: usize = 2 * 1024 * 1024;

#[derive(Debug, Deserialize)]
struct SearxngResponse {
    #[serde(default)]
    results: Vec<SearxngResult>,
}

#[derive(Debug, Deserialize)]
struct SearxngResult {
    title: Option<String>,
    url: Option<String>,
    content: Option<String>,
}

pub async fn search(
    client: &Client,
    base_url: &str,
    query: &str,
    max_results: usize,
    timeout: Duration,
) -> Result<Vec<SearchResult>, EngineError> {
    let endpoint = build_endpoint(base_url);

    let response = tokio::time::timeout(
        timeout,
        client
            .get(&endpoint)
            .query(&[
                ("q", query),
                ("format", "json"),
                ("categories", "general"),
                ("language", "en-US"),
            ])
            .header("Accept", "application/json")
            .header("Accept-Language", "en-US,en;q=0.9")
            .send(),
    )
    .await
    .map_err(|_| EngineError::Timeout { engine: ENGINE })?
    .map_err(|e| EngineError::Http {
        engine: ENGINE,
        source: e,
    })?;

    let status = response.status();
    if status.as_u16() == 403 || status.as_u16() == 400 {
        return Err(EngineError::BadStatus {
            engine: ENGINE,
            status: status.as_u16(),
        });
    }
    if !status.is_success() {
        return Err(EngineError::BadStatus {
            engine: ENGINE,
            status: status.as_u16(),
        });
    }

    let bytes = response.bytes().await.map_err(|e| EngineError::Http {
        engine: ENGINE,
        source: e,
    })?;
    if bytes.len() > MAX_BODY_BYTES {
        return Err(EngineError::ParseFailed {
            engine: ENGINE,
            reason: format!("response body too large: {} bytes", bytes.len()),
        });
    }

    let parsed: SearxngResponse = serde_json::from_slice(&bytes).map_err(|e| {
        EngineError::ParseFailed {
            engine: ENGINE,
            reason: format!("invalid JSON: {e}"),
        }
    })?;

    Ok(convert(parsed.results, max_results))
}

fn build_endpoint(base_url: &str) -> String {
    let trimmed = base_url.trim_end_matches('/');
    format!("{trimmed}/search")
}

fn convert(raw: Vec<SearxngResult>, max_results: usize) -> Vec<SearchResult> {
    let mut out = Vec::with_capacity(max_results);
    for r in raw {
        if out.len() >= max_results {
            break;
        }
        let Some(url) = r.url else { continue };
        if url.is_empty() || !url.starts_with("http") {
            continue;
        }
        let title = r
            .title
            .map(|t| t.split_whitespace().collect::<Vec<_>>().join(" "))
            .map(|t| t.trim().to_string())
            .filter(|t| !t.is_empty());
        let Some(title) = title else { continue };
        let snippet = r
            .content
            .map(|s| s.split_whitespace().collect::<Vec<_>>().join(" "))
            .map(|s| s.trim().to_string())
            .filter(|s| !s.is_empty());
        out.push(SearchResult {
            title,
            url,
            snippet,
            source_engine: ENGINE.to_string(),
        });
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_build_endpoint_trims_trailing_slash() {
        assert_eq!(
            build_endpoint("https://searx.example.org/"),
            "https://searx.example.org/search"
        );
        assert_eq!(
            build_endpoint("https://searx.example.org"),
            "https://searx.example.org/search"
        );
    }

    #[test]
    fn test_convert_extracts_results() {
        let raw = vec![
            SearxngResult {
                title: Some("Example Site".to_string()),
                url: Some("https://example.com".to_string()),
                content: Some("An example website for testing.".to_string()),
            },
            SearxngResult {
                title: Some("Rust Language".to_string()),
                url: Some("https://rust-lang.org".to_string()),
                content: Some("Systems programming language.".to_string()),
            },
        ];
        let out = convert(raw, 10);
        assert_eq!(out.len(), 2);
        assert_eq!(out[0].title, "Example Site");
        assert_eq!(out[0].url, "https://example.com");
        assert_eq!(out[0].snippet.as_deref(), Some("An example website for testing."));
        assert_eq!(out[0].source_engine, "searxng");
    }

    #[test]
    fn test_convert_respects_max_results() {
        let raw: Vec<SearxngResult> = (0..5)
            .map(|i| SearxngResult {
                title: Some(format!("T{i}")),
                url: Some(format!("https://example.com/{i}")),
                content: None,
            })
            .collect();
        let out = convert(raw, 2);
        assert_eq!(out.len(), 2);
    }

    #[test]
    fn test_convert_skips_missing_url() {
        let raw = vec![SearxngResult {
            title: Some("No URL".to_string()),
            url: None,
            content: None,
        }];
        let out = convert(raw, 10);
        assert!(out.is_empty());
    }

    #[test]
    fn test_convert_skips_empty_url() {
        let raw = vec![SearxngResult {
            title: Some("Empty".to_string()),
            url: Some(String::new()),
            content: None,
        }];
        let out = convert(raw, 10);
        assert!(out.is_empty());
    }

    #[test]
    fn test_convert_skips_non_http_urls() {
        let raw = vec![
            SearxngResult {
                title: Some("Relative".to_string()),
                url: Some("/relative".to_string()),
                content: None,
            },
            SearxngResult {
                title: Some("Valid".to_string()),
                url: Some("https://valid.com".to_string()),
                content: None,
            },
        ];
        let out = convert(raw, 10);
        assert_eq!(out.len(), 1);
        assert_eq!(out[0].url, "https://valid.com");
    }

    #[test]
    fn test_convert_skips_missing_title() {
        let raw = vec![SearxngResult {
            title: None,
            url: Some("https://example.com".to_string()),
            content: None,
        }];
        let out = convert(raw, 10);
        assert!(out.is_empty());
    }

    #[test]
    fn test_convert_drops_empty_snippet() {
        let raw = vec![SearxngResult {
            title: Some("Title".to_string()),
            url: Some("https://example.com".to_string()),
            content: Some(String::new()),
        }];
        let out = convert(raw, 10);
        assert_eq!(out.len(), 1);
        assert!(out[0].snippet.is_none());
    }

    #[test]
    fn test_parse_json_response_full() {
        let body = r#"{
            "query": "rust",
            "results": [
                {"title": "Rust Lang", "url": "https://rust-lang.org", "content": "A language"},
                {"title": "Wikipedia", "url": "https://en.wikipedia.org/wiki/Rust", "content": "Article"}
            ],
            "engines": ["bing", "google"]
        }"#;
        let parsed: SearxngResponse = serde_json::from_str(body).unwrap();
        assert_eq!(parsed.results.len(), 2);
    }
}