use crate::error::Result;
use crate::lexical::query::Query;
use crate::lexical::query::matcher::{EmptyMatcher, Matcher, PostingMatcher};
use crate::lexical::query::scorer::{BM25Scorer, Scorer};
use crate::lexical::reader::LexicalIndexReader;
#[derive(Debug, Clone)]
pub struct TermQuery {
field: String,
term: String,
boost: f32,
}
impl TermQuery {
pub fn new<F, T>(field: F, term: T) -> Self
where
F: Into<String>,
T: Into<String>,
{
TermQuery {
field: field.into(),
term: term.into(),
boost: 1.0,
}
}
pub fn field(&self) -> &str {
&self.field
}
pub fn term(&self) -> &str {
&self.term
}
pub fn with_boost(mut self, boost: f32) -> Self {
self.boost = boost;
self
}
}
impl Query for TermQuery {
fn matcher(&self, reader: &dyn LexicalIndexReader) -> Result<Box<dyn Matcher>> {
match reader.postings(&self.field, &self.term)? {
Some(posting_iter) => {
Ok(Box::new(PostingMatcher::new(posting_iter)))
}
None => {
Ok(Box::new(EmptyMatcher::new()))
}
}
}
fn scorer(&self, reader: &dyn LexicalIndexReader) -> Result<Box<dyn Scorer>> {
let term_info = reader.term_info(&self.field, &self.term)?;
let field_stats = reader.field_stats(&self.field)?;
match (term_info, field_stats) {
(Some(term_info), Some(field_stats)) => {
let scorer = BM25Scorer::new(
term_info.doc_freq,
term_info.total_freq,
field_stats.doc_count,
field_stats.avg_length,
reader.doc_count(),
self.boost,
);
Ok(Box::new(scorer))
}
_ => {
let scorer = BM25Scorer::new(0, 0, 0, 0.0, 0, self.boost);
Ok(Box::new(scorer))
}
}
}
fn boost(&self) -> f32 {
self.boost
}
fn set_boost(&mut self, boost: f32) {
self.boost = boost;
}
fn description(&self) -> String {
if self.boost == 1.0 {
format!("{}:{}", self.field, self.term)
} else {
format!("{}:{}^{}", self.field, self.term, self.boost)
}
}
fn clone_box(&self) -> Box<dyn Query> {
Box::new(self.clone())
}
fn is_empty(&self, reader: &dyn LexicalIndexReader) -> Result<bool> {
match reader.term_info(&self.field, &self.term)? {
Some(term_info) => Ok(term_info.doc_freq == 0),
None => Ok(true),
}
}
fn cost(&self, reader: &dyn LexicalIndexReader) -> Result<u64> {
match reader.term_info(&self.field, &self.term)? {
Some(term_info) => Ok(term_info.doc_freq),
None => Ok(0),
}
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn field(&self) -> Option<&str> {
Some(&self.field)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lexical::index::inverted::reader::{InvertedIndexReader, InvertedIndexReaderConfig};
use crate::storage::memory::MemoryStorage;
use crate::storage::memory::MemoryStorageConfig;
use std::sync::Arc;
#[allow(dead_code)]
#[test]
fn test_term_query_creation() {
let query = TermQuery::new("title", "hello");
assert_eq!(query.field(), "title");
assert_eq!(query.term(), "hello");
assert_eq!(query.boost(), 1.0);
assert_eq!(query.description(), "title:hello");
}
#[test]
fn test_term_query_with_boost() {
let query = TermQuery::new("title", "hello").with_boost(2.0);
assert_eq!(query.boost(), 2.0);
assert_eq!(query.description(), "title:hello^2");
}
#[test]
fn test_term_query_matcher() {
let storage = Arc::new(MemoryStorage::new(MemoryStorageConfig::default()));
let reader =
InvertedIndexReader::new(vec![], storage, InvertedIndexReaderConfig::default())
.unwrap();
let query = TermQuery::new("title", "hello");
let matcher = query.matcher(&reader).unwrap();
assert!(matcher.is_exhausted() || matcher.doc_id() != u64::MAX);
}
#[test]
fn test_term_query_scorer() {
let storage = Arc::new(MemoryStorage::new(MemoryStorageConfig::default()));
let reader =
InvertedIndexReader::new(vec![], storage, InvertedIndexReaderConfig::default())
.unwrap();
let query = TermQuery::new("title", "hello");
let scorer = query.scorer(&reader).unwrap();
assert!(scorer.score(0, 1.0, None) >= 0.0);
}
#[test]
fn test_term_query_is_empty() {
let storage = Arc::new(MemoryStorage::new(MemoryStorageConfig::default()));
let reader =
InvertedIndexReader::new(vec![], storage, InvertedIndexReaderConfig::default())
.unwrap();
let query = TermQuery::new("title", "hello");
assert!(query.is_empty(&reader).unwrap());
let query = TermQuery::new("nonexistent", "hello");
assert!(query.is_empty(&reader).unwrap());
}
#[test]
fn test_term_query_cost() {
let storage = Arc::new(MemoryStorage::new(MemoryStorageConfig::default()));
let reader =
InvertedIndexReader::new(vec![], storage, InvertedIndexReaderConfig::default())
.unwrap();
let query = TermQuery::new("title", "hello");
assert_eq!(query.cost(&reader).unwrap(), 0);
}
#[test]
fn test_term_query_clone() {
let query = TermQuery::new("title", "hello").with_boost(2.0);
let cloned = query.clone_box();
assert_eq!(cloned.description(), "title:hello^2");
assert_eq!(cloned.boost(), 2.0);
}
}