use crw_core::config::SearchConfig;
use crw_core::types::{SearchCategory, SearchRequest};
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct SearxngParams {
pub q: String,
pub categories: Option<String>,
pub language: Option<String>,
pub time_range: Option<String>,
pub engines: Option<String>,
pub pageno: Option<u32>,
pub safesearch: Option<u8>,
}
const LEADING_FILLER: &[&str] = &["top", "best", "good", "greatest", "finest", "cheapest"];
pub fn clean_query(query: &str) -> String {
let trimmed = query.trim();
if trimmed.contains('"') || trimmed.contains(':') {
return query.to_string();
}
let tokens: Vec<&str> = trimmed.split_whitespace().collect();
if tokens.len() < 3 {
return query.to_string();
}
if LEADING_FILLER.contains(&tokens[0].to_lowercase().as_str()) {
tokens[1..].join(" ")
} else {
query.to_string()
}
}
pub fn map_to_searxng_params(req: &SearchRequest, config: &SearchConfig) -> SearxngParams {
let mut query = clean_query(&req.query);
let mut engines: Vec<String> = Vec::new();
let mut passthrough_cats: Vec<String> = Vec::new();
if let Some(cats) = &req.categories {
for cat in cats {
match cat {
SearchCategory::Pdf => {
query.push_str(" filetype:pdf");
}
SearchCategory::Github => {
engines.extend(config.github_engines.iter().cloned());
}
SearchCategory::Research => {
engines.extend(config.research_engines.iter().cloned());
}
SearchCategory::Other(name) => passthrough_cats.push(name.clone()),
}
}
}
let mut category_names: Vec<String> = req
.sources
.as_ref()
.map(|srcs| {
srcs.iter()
.map(|s| s.searxng_category().to_string())
.collect()
})
.unwrap_or_default();
for name in passthrough_cats {
if !category_names.contains(&name) {
category_names.push(name);
}
}
let categories = if category_names.is_empty() {
None
} else {
Some(category_names.join(","))
};
let time_range = req.tbs.map(|t| t.searxng_time_range().to_string());
let language = match req.lang.as_deref().map(str::trim) {
Some(l) if !l.is_empty() => Some(l.to_string()),
_ => Some("en".to_string()),
};
let engines = if engines.is_empty() {
None
} else {
Some(engines.join(","))
};
SearxngParams {
q: query,
categories,
language,
time_range,
engines,
pageno: None,
safesearch: None,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crw_core::types::{SearchSource, SearchTimeFilter};
fn cfg() -> SearchConfig {
SearchConfig::default()
}
fn req(q: &str) -> SearchRequest {
SearchRequest {
query: q.into(),
limit: None,
lang: None,
tbs: None,
sources: None,
categories: None,
scrape_options: None,
summarize_results: None,
answer: None,
answer_top_n: None,
max_chars_per_source: None,
llm_api_key: None,
llm_provider: None,
llm_model: None,
base_url: None,
summary_prompt: None,
answer_prompt: None,
answer_temperature: None,
query_expand_variants: None,
multi_round: None,
answer_list_format: None,
max_content_chars: None,
}
}
#[test]
fn plain_query_is_passed_through() {
let p = map_to_searxng_params(&req("rust async"), &cfg());
assert_eq!(p.q, "rust async");
assert!(p.categories.is_none());
assert!(p.engines.is_none());
assert!(p.time_range.is_none());
}
#[test]
fn pdf_category_modifies_query_only() {
let mut r = req("rust");
r.categories = Some(vec![SearchCategory::Pdf]);
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(p.q, "rust filetype:pdf");
assert!(p.engines.is_none());
}
#[test]
fn github_category_sets_engines() {
let mut r = req("rust");
r.categories = Some(vec![SearchCategory::Github]);
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(p.engines.as_deref(), Some("github"));
assert_eq!(p.q, "rust");
}
#[test]
fn research_category_expands_to_default_engines() {
let mut r = req("transformers");
r.categories = Some(vec![SearchCategory::Research]);
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(
p.engines.as_deref(),
Some("arxiv,crossref,google scholar,semantic scholar")
);
}
#[test]
fn sources_join_to_searxng_categories() {
let mut r = req("rust");
r.sources = Some(vec![SearchSource::Web, SearchSource::News]);
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(p.categories.as_deref(), Some("general,news"));
}
#[test]
fn unknown_category_passes_through_to_searxng() {
let mut r = req("crispr");
r.categories = Some(vec![SearchCategory::Other("science".into())]);
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(p.categories.as_deref(), Some("science"));
assert!(p.engines.is_none());
assert_eq!(p.q, "crispr");
}
#[test]
fn passthrough_categories_merge_with_sources() {
let mut r = req("rust");
r.sources = Some(vec![SearchSource::Web]);
r.categories = Some(vec![
SearchCategory::Other("it".into()),
SearchCategory::Other("science".into()),
]);
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(p.categories.as_deref(), Some("general,it,science"));
}
#[test]
fn curated_and_passthrough_categories_coexist() {
let mut r = req("memory safety");
r.categories = Some(vec![
SearchCategory::Research,
SearchCategory::Other("it".into()),
]);
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(
p.engines.as_deref(),
Some("arxiv,crossref,google scholar,semantic scholar")
);
assert_eq!(p.categories.as_deref(), Some("it"));
}
#[test]
fn passthrough_category_dedupes_against_sources() {
let mut r = req("rust");
r.sources = Some(vec![SearchSource::News]);
r.categories = Some(vec![SearchCategory::Other("news".into())]);
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(p.categories.as_deref(), Some("news"));
}
#[test]
fn category_string_roundtrip_keeps_curated_and_passthrough() {
let cats: Vec<SearchCategory> =
serde_json::from_str(r#"["research","science","github"]"#).unwrap();
assert_eq!(
cats,
vec![
SearchCategory::Research,
SearchCategory::Other("science".into()),
SearchCategory::Github,
]
);
let back = serde_json::to_string(&cats).unwrap();
assert_eq!(back, r#"["research","science","github"]"#);
}
#[test]
fn tbs_hour_collapses_to_day() {
let mut r = req("rust");
r.tbs = Some(SearchTimeFilter::Hour);
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(p.time_range.as_deref(), Some("day"));
}
#[test]
fn tbs_year_maps_to_year() {
let mut r = req("rust");
r.tbs = Some(SearchTimeFilter::Year);
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(p.time_range.as_deref(), Some("year"));
}
#[test]
fn empty_lang_defaults_to_en() {
let mut r = req("rust");
r.lang = Some(String::new());
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(p.language.as_deref(), Some("en"));
}
#[test]
fn missing_lang_defaults_to_en() {
let p = map_to_searxng_params(&req("rust"), &cfg());
assert_eq!(p.language.as_deref(), Some("en"));
}
#[test]
fn explicit_lang_is_honored() {
let mut r = req("rust");
r.lang = Some("de".into());
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(p.language.as_deref(), Some("de"));
}
#[test]
fn clean_query_strips_leading_best() {
assert_eq!(
clean_query("best restaurants in belgrade"),
"restaurants in belgrade"
);
}
#[test]
fn clean_query_strips_leading_top() {
assert_eq!(clean_query("top museums in vienna"), "museums in vienna");
}
#[test]
fn clean_query_keeps_quoted_top_gun() {
assert_eq!(
clean_query("\"top gun\" movie review"),
"\"top gun\" movie review"
);
}
#[test]
fn clean_query_keeps_operator_query() {
assert_eq!(
clean_query("best site:imdb.com movie"),
"best site:imdb.com movie"
);
}
#[test]
fn clean_query_keeps_short_query() {
assert_eq!(clean_query("best buy"), "best buy");
assert_eq!(clean_query("top gun"), "top gun");
}
#[test]
fn clean_query_leaves_non_filler_leading_token() {
assert_eq!(clean_query("python snake habitat"), "python snake habitat");
}
#[test]
fn clean_query_applied_in_params() {
let p = map_to_searxng_params(&req("best coffee shops in lisbon"), &cfg());
assert_eq!(p.q, "coffee shops in lisbon");
}
#[test]
fn pdf_plus_github_combine() {
let mut r = req("memory");
r.categories = Some(vec![SearchCategory::Pdf, SearchCategory::Github]);
let p = map_to_searxng_params(&r, &cfg());
assert_eq!(p.q, "memory filetype:pdf");
assert_eq!(p.engines.as_deref(), Some("github"));
}
}