use serde::{Deserialize, Serialize};
use crate::error::Result;
use crate::lexical::query::LexicalSearchResults;
use crate::lexical::search::searcher::LexicalSearchRequest;
use crate::lexical::store::LexicalStore;
use crate::spelling::corrector::{
CorrectionResult, CorrectorConfig, DidYouMean, SpellingCorrector,
};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpellCorrectedSearchResults {
pub results: LexicalSearchResults,
pub correction: CorrectionResult,
pub used_correction: bool,
pub did_you_mean: Option<String>,
}
impl SpellCorrectedSearchResults {
pub fn new(results: LexicalSearchResults, correction: CorrectionResult) -> Self {
SpellCorrectedSearchResults {
results,
correction,
used_correction: false,
did_you_mean: None,
}
}
pub fn effective_query(&self) -> &str {
self.correction.query()
}
pub fn has_suggestions(&self) -> bool {
self.correction.has_suggestions()
}
pub fn was_auto_corrected(&self) -> bool {
self.correction.auto_corrected
}
pub fn should_show_did_you_mean(&self) -> bool {
self.did_you_mean.is_some() || self.correction.should_show_did_you_mean()
}
pub fn correction_confidence(&self) -> f64 {
self.correction.confidence
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpellCorrectedSearchConfig {
pub enabled: bool,
pub corrector_config: CorrectorConfig,
pub retry_with_correction: bool,
pub show_did_you_mean: bool,
pub min_results_for_suggestions: usize,
}
impl Default for SpellCorrectedSearchConfig {
fn default() -> Self {
SpellCorrectedSearchConfig {
enabled: true,
corrector_config: CorrectorConfig::default(),
retry_with_correction: true,
show_did_you_mean: true,
min_results_for_suggestions: 2,
}
}
}
pub struct SpellCorrectedSearchEngine {
engine: LexicalStore,
corrector: SpellingCorrector,
config: SpellCorrectedSearchConfig,
did_you_mean: DidYouMean,
}
impl SpellCorrectedSearchEngine {
pub fn new(engine: LexicalStore) -> Self {
let corrector = SpellingCorrector::new();
let config = SpellCorrectedSearchConfig::default();
let did_you_mean = DidYouMean::new(SpellingCorrector::new());
SpellCorrectedSearchEngine {
engine,
corrector,
config,
did_you_mean,
}
}
pub fn with_config(engine: LexicalStore, config: SpellCorrectedSearchConfig) -> Self {
let mut corrector = SpellingCorrector::new();
corrector.set_config(config.corrector_config.clone());
let did_you_mean = DidYouMean::new(SpellingCorrector::new());
SpellCorrectedSearchEngine {
engine,
corrector,
config,
did_you_mean,
}
}
pub fn engine(&self) -> &LexicalStore {
&self.engine
}
pub fn engine_mut(&mut self) -> &mut LexicalStore {
&mut self.engine
}
pub fn set_spell_config(&mut self, config: SpellCorrectedSearchConfig) {
self.corrector.set_config(config.corrector_config.clone());
self.config = config;
}
pub fn search_with_correction(
&mut self,
query_str: &str,
default_field: &str,
) -> Result<SpellCorrectedSearchResults> {
use crate::lexical::query::parser::LexicalQueryParser;
let analyzer = self.engine.analyzer()?;
if !self.config.enabled {
let parser = LexicalQueryParser::new(analyzer).with_default_field(default_field);
let query = parser.parse(query_str)?;
let results = self.engine.search(LexicalSearchRequest::new(query))?;
let correction = CorrectionResult::new(query_str.to_string());
return Ok(SpellCorrectedSearchResults::new(results, correction));
}
let correction = self.corrector.correct(query_str);
let parser = LexicalQueryParser::new(analyzer).with_default_field(default_field);
let query = parser.parse(query_str)?;
let original_results = self.engine.search(LexicalSearchRequest::new(query))?;
let should_use_correction = self.should_use_correction(&original_results, &correction);
let (final_results, used_correction) = if should_use_correction {
let corrected_query = correction.query();
let query = parser.parse(corrected_query)?;
let corrected_results = self.engine.search(LexicalSearchRequest::new(query))?;
(corrected_results, true)
} else {
(original_results, false)
};
let did_you_mean = if self.config.show_did_you_mean && !used_correction {
self.did_you_mean.suggest(query_str)
} else {
None
};
let mut spell_results = SpellCorrectedSearchResults::new(final_results, correction);
spell_results.used_correction = used_correction;
spell_results.did_you_mean = did_you_mean;
Ok(spell_results)
}
pub fn search_field_with_correction(
&mut self,
field: &str,
query_str: &str,
) -> Result<SpellCorrectedSearchResults> {
use crate::lexical::query::parser::LexicalQueryParser;
let analyzer = self.engine.analyzer()?;
if !self.config.enabled {
let parser = LexicalQueryParser::new(analyzer);
let query = parser.parse_field(field, query_str)?;
let results = self.engine.search(LexicalSearchRequest::new(query))?;
let correction = CorrectionResult::new(query_str.to_string());
return Ok(SpellCorrectedSearchResults::new(results, correction));
}
let correction = self.corrector.correct(query_str);
let parser = LexicalQueryParser::new(analyzer);
let query = parser.parse_field(field, query_str)?;
let original_results = self.engine.search(LexicalSearchRequest::new(query))?;
let should_use_correction = self.should_use_correction(&original_results, &correction);
let (final_results, used_correction) = if should_use_correction {
let corrected_query = correction.query();
let query = parser.parse_field(field, corrected_query)?;
let corrected_results = self.engine.search(LexicalSearchRequest::new(query))?;
(corrected_results, true)
} else {
(original_results, false)
};
let did_you_mean = if self.config.show_did_you_mean && !used_correction {
self.did_you_mean.suggest(query_str)
} else {
None
};
let mut spell_results = SpellCorrectedSearchResults::new(final_results, correction);
spell_results.used_correction = used_correction;
spell_results.did_you_mean = did_you_mean;
Ok(spell_results)
}
pub fn is_word_correct(&self, word: &str) -> bool {
self.corrector.is_correct(word)
}
pub fn suggest_word(&self, word: &str) -> Vec<crate::spelling::suggest::Suggestion> {
self.corrector.suggest_word(word)
}
pub fn learn_from_index(&mut self) -> Result<()> {
Ok(())
}
pub fn corrector_stats(&self) -> crate::spelling::corrector::CorrectorStats {
self.corrector.stats()
}
pub fn clear_query_history(&mut self) {
self.corrector.clear_query_history();
}
fn should_use_correction(
&self,
original_results: &LexicalSearchResults,
correction: &CorrectionResult,
) -> bool {
if !correction.has_suggestions() {
return false;
}
if correction.auto_corrected {
return true;
}
if self.config.retry_with_correction {
let has_few_results =
original_results.total_hits < self.config.min_results_for_suggestions as u64;
let high_confidence = correction.confidence > 0.7;
if has_few_results && high_confidence {
return true;
}
}
false
}
}
pub struct SpellSearchUtils;
impl SpellSearchUtils {
pub fn extract_search_terms(query_str: &str) -> Vec<String> {
let stop_words = [
"and", "or", "not", "the", "is", "a", "an", "in", "on", "at", "to", "for", "of",
"with", "by",
];
query_str
.split(&[':', '(', ')', '"', '+', '-', ' ', '\t', '\n'][..])
.filter_map(|term| {
let cleaned = term.trim().to_lowercase();
if cleaned.len() > 2
&& cleaned.chars().all(|c| c.is_alphabetic())
&& !stop_words.contains(&cleaned.as_str())
{
Some(cleaned)
} else {
None
}
})
.collect()
}
pub fn build_corrected_query(original: &str, correction: &CorrectionResult) -> String {
if let Some(corrected) = &correction.corrected {
corrected.clone()
} else {
let mut result = original.to_string();
for (original_word, suggestions) in &correction.word_suggestions {
if let Some(best_suggestion) = suggestions.first()
&& best_suggestion.score > 0.6
{
result = result.replace(original_word, &best_suggestion.word);
}
}
result
}
}
pub fn format_did_you_mean(_original: &str, suggestion: &str) -> String {
format!("Did you mean: \"{suggestion}\"?")
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lexical::store::config::LexicalIndexConfig;
use crate::storage::file::{FileStorage, FileStorageConfig};
use std::sync::Arc;
use tempfile::TempDir;
#[allow(dead_code)]
#[test]
fn test_spell_corrected_search_engine_creation() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let spell_engine = SpellCorrectedSearchEngine::new(engine);
assert!(spell_engine.config.enabled);
assert!(spell_engine.config.retry_with_correction);
}
#[test]
fn test_spell_corrected_search_disabled() {
let temp_dir = TempDir::new().unwrap();
let engine_config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, engine_config).unwrap();
let spell_config = SpellCorrectedSearchConfig {
enabled: false,
..Default::default()
};
let mut spell_engine = SpellCorrectedSearchEngine::with_config(engine, spell_config);
let results = spell_engine
.search_with_correction("hello world", "title")
.unwrap();
assert!(!results.has_suggestions());
assert!(!results.used_correction);
assert_eq!(results.effective_query(), "hello world");
}
#[test]
fn test_spell_corrected_search_with_typos() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let mut spell_engine = SpellCorrectedSearchEngine::new(engine);
let results = spell_engine
.search_with_correction("helo wrld", "title")
.unwrap();
assert_eq!(results.correction.original, "helo wrld");
}
#[test]
fn test_word_correction_check() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let spell_engine = SpellCorrectedSearchEngine::new(engine);
assert!(spell_engine.is_word_correct("hello")); assert!(spell_engine.is_word_correct("the"));
let suggestions = spell_engine.suggest_word("helo");
assert!(!suggestions.is_empty() || !spell_engine.is_word_correct("hello"));
}
#[test]
fn test_spell_search_utils() {
let terms = SpellSearchUtils::extract_search_terms("title:hello AND body:world");
assert!(terms.contains(&"title".to_string()));
assert!(terms.contains(&"hello".to_string()));
assert!(terms.contains(&"body".to_string()));
assert!(terms.contains(&"world".to_string()));
assert!(!terms.contains(&"and".to_string()));
let corrected = SpellSearchUtils::build_corrected_query(
"original query",
&CorrectionResult::new("original query".to_string()),
);
assert_eq!(corrected, "original query");
let did_you_mean = SpellSearchUtils::format_did_you_mean("helo", "hello");
assert_eq!(did_you_mean, "Did you mean: \"hello\"?");
}
#[test]
fn test_spell_corrected_results() {
use crate::lexical::query::LexicalSearchResults;
let results = LexicalSearchResults {
hits: vec![],
total_hits: 0,
max_score: 0.0,
};
let correction = CorrectionResult::new("test query".to_string());
let spell_results = SpellCorrectedSearchResults::new(results, correction);
assert_eq!(spell_results.effective_query(), "test query");
assert!(!spell_results.has_suggestions());
assert!(!spell_results.was_auto_corrected());
assert!(!spell_results.used_correction);
assert_eq!(spell_results.correction_confidence(), 1.0);
}
#[test]
fn test_corrector_stats() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let spell_engine = SpellCorrectedSearchEngine::new(engine);
let stats = spell_engine.corrector_stats();
assert!(stats.dictionary_words > 0);
assert!(stats.dictionary_total_frequency > 0);
assert_eq!(stats.queries_learned, 0); }
}