use regex::Regex;
use std::sync::LazyLock;
pub(crate) fn sanitize_query(query: &str) -> String {
if query.trim().is_empty() {
return String::new();
}
static QUOTED_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#""[^"]*""#).unwrap());
let mut quoted_parts: Vec<String> = Vec::new();
let mut sanitized = QUOTED_RE
.replace_all(query, |caps: ®ex::Captures| {
let s = caps[0].to_string();
let idx = quoted_parts.len();
quoted_parts.push(s);
format!("\x00Q{idx}\x00")
})
.to_string();
static SPECIAL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"[+{}()"^]"#).unwrap());
sanitized = SPECIAL_RE.replace_all(&sanitized, " ").to_string();
static STAR_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*+").unwrap());
sanitized = STAR_RE.replace_all(&sanitized, "*").to_string();
static LEADING_STAR_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(^|\s)\*").unwrap());
sanitized = LEADING_STAR_RE.replace_all(&sanitized, "$1").to_string();
static DANGLING_START_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?i)^(AND|OR|NOT)\b\s*").unwrap());
static DANGLING_END_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?i)\s+(AND|OR|NOT)\s*$").unwrap());
loop {
let before = sanitized.clone();
sanitized = DANGLING_START_RE.replace(sanitized.trim(), "").to_string();
sanitized = DANGLING_END_RE.replace(sanitized.trim(), "").to_string();
if sanitized == before {
break;
}
}
static DOT_DASH_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\b(\w+(?:[._-]\w+)+\w*)\b").unwrap());
sanitized = DOT_DASH_RE.replace_all(&sanitized, r#""$1""#).to_string();
for (i, quoted) in quoted_parts.iter().enumerate() {
sanitized = sanitized.replace(&format!("\x00Q{i}\x00"), quoted);
}
let trimmed = sanitized.trim();
if trimmed.is_empty() {
return String::new();
}
trimmed.to_string()
}
pub(crate) fn quote_terms(query: &str) -> String {
query
.split_whitespace()
.map(|t| format!("\"{}\"", t.replace('"', "")))
.filter(|t| t.len() > 2)
.collect::<Vec<_>>()
.join(" ")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sanitize_passes_plain_words() {
assert_eq!(sanitize_query("database migrations"), "database migrations");
}
#[test]
fn sanitize_preserves_quoted_phrases() {
assert_eq!(sanitize_query("\"exact phrase\""), "\"exact phrase\"");
}
#[test]
fn sanitize_strips_special_chars() {
assert_eq!(sanitize_query("+hello"), "hello");
}
#[test]
fn sanitize_collapses_stars() {
assert_eq!(sanitize_query("a***test"), "a*test");
}
#[test]
fn sanitize_strips_dangling_operators() {
assert_eq!(sanitize_query("hello AND"), "hello");
assert_eq!(sanitize_query("AND OR foo"), "foo");
assert_eq!(sanitize_query("foo AND OR"), "foo");
}
#[test]
fn sanitize_quotes_dotted_and_hyphenated_terms() {
assert_eq!(sanitize_query("my-app.config.ts"), "\"my-app.config.ts\"");
}
#[test]
fn sanitize_empty_is_empty() {
assert_eq!(sanitize_query(" "), "");
assert_eq!(sanitize_query("AND"), "");
assert_eq!(sanitize_query("*\"()"), "");
}
#[test]
fn sanitize_trims_whitespace() {
assert_eq!(sanitize_query(" hello world "), "hello world");
}
#[test]
fn quote_terms_neutralizes_syntax() {
assert_eq!(quote_terms("cargo build"), "\"cargo\" \"build\"");
assert_eq!(quote_terms("don't"), "\"don't\"");
assert_eq!(quote_terms("a \"b\" c"), "\"a\" \"b\" \"c\"");
assert_eq!(quote_terms(" "), "");
}
}