use std::collections::HashSet;
pub fn prepare_fts_query(query: &str) -> String {
let trimmed = query.trim();
if is_code_like(trimmed) {
return if trimmed.contains(' ') {
format!("\"{}\"", trimmed)
} else {
trimmed.to_string()
};
}
let terms = extract_technical_terms(trimmed);
if terms.is_empty() {
format!("\"{}\"", trimmed)
} else if terms.len() == 1 {
terms[0].clone()
} else {
terms.join(" OR ")
}
}
pub fn is_code_like(query: &str) -> bool {
query.contains("::")
|| query.contains("()")
|| query.contains('_') && !query.contains(' ')
|| query.chars().all(|c| c.is_alphanumeric() || c == '_')
}
pub fn extract_technical_terms(query: &str) -> Vec<String> {
let stop_words: HashSet<&str> = [
"how",
"what",
"why",
"when",
"where",
"which",
"who",
"does",
"do",
"is",
"are",
"was",
"were",
"can",
"could",
"will",
"would",
"work",
"works",
"working",
"handle",
"handles",
"handling",
"perform",
"performs",
"performing",
"combine",
"combines",
"combining",
"coordinate",
"coordinates",
"extract",
"extracts",
"build",
"builds",
"get",
"gets",
"set",
"sets",
"use",
"uses",
"using",
"create",
"creates",
"manage",
"manages",
"ensure",
"ensures",
"apply",
"applies",
"the",
"a",
"an",
"for",
"from",
"with",
"to",
"in",
"on",
"of",
"by",
"and",
"or",
"but",
"this",
"that",
"these",
"those",
"multiple",
"different",
"various",
"specific",
]
.into_iter()
.collect();
let mut terms = Vec::new();
for word in query.split_whitespace() {
let cleaned: String = word
.chars()
.filter(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
.collect();
if cleaned.is_empty() {
continue;
}
let lower = cleaned.to_lowercase();
if stop_words.contains(lower.as_str()) {
continue;
}
let is_snake_case = cleaned.contains('_');
let is_camel_case = cleaned.chars().skip(1).any(|c| c.is_uppercase());
let is_acronym = cleaned.len() >= 2 && cleaned.chars().all(|c| c.is_uppercase());
let is_technical = cleaned.len() > 2;
if is_snake_case || is_camel_case || is_acronym || is_technical {
if cleaned.contains('-') {
terms.push(format!("\"{}\"", cleaned));
} else {
terms.push(cleaned);
}
}
}
terms
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_code_like() {
assert!(is_code_like("rrf_fuse"));
assert!(is_code_like("std::env"));
assert!(is_code_like("execute()"));
assert!(is_code_like("QueryEngine"));
assert!(!is_code_like("How does RRF work?"));
assert!(!is_code_like("semantic search"));
}
#[test]
fn test_extract_technical_terms() {
let terms =
extract_technical_terms("How does RRF fusion combine results from multiple oracles?");
assert!(terms.contains(&"RRF".to_string()));
assert!(terms.contains(&"fusion".to_string()));
assert!(terms.contains(&"results".to_string()));
assert!(terms.contains(&"oracles".to_string()));
assert!(!terms.iter().any(|t| t.to_lowercase() == "how"));
assert!(!terms.iter().any(|t| t.to_lowercase() == "does"));
assert!(!terms.iter().any(|t| t.to_lowercase() == "from"));
let terms2 = extract_technical_terms("What is the QueryEngine interface?");
assert!(terms2.contains(&"QueryEngine".to_string()));
let terms3 = extract_technical_terms("How does MCP server handle JSON-RPC?");
assert!(terms3.contains(&"MCP".to_string()));
assert!(terms3.contains(&"\"JSON-RPC\"".to_string())); }
#[test]
fn test_prepare_fts_query() {
assert_eq!(prepare_fts_query("rrf_fuse"), "rrf_fuse");
assert_eq!(prepare_fts_query("QueryEngine"), "QueryEngine");
let result = prepare_fts_query("How does RRF fusion work?");
assert!(result.contains("RRF"));
assert!(result.contains("fusion"));
assert!(result.contains(" OR "));
}
}