use crate::score::bm25::{BM25FScorer, BM25PlusScorer};
use crate::score::bm25_additional::{
JaccardScorer, OkapiBM25Scorer, QueryRatioScorer, TFIDFScorer,
};
use crate::score::common::{BM25Params, FieldWeights};
use terraphim_types::{Document, DocumentType};
fn create_test_documents() -> Vec<Document> {
vec![
Document {
id: "doc1".to_string(),
title: "Introduction to Rust Programming".to_string(),
body: "Rust is a systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
description: Some("A comprehensive guide to Rust programming language".to_string()),
summarization: None,
tags: Some(vec!["programming".to_string(), "rust".to_string(), "systems".to_string()]),
rank: None,
stub: None,
url: "https://example.com/doc1".to_string(),
source_haystack: None,
doc_type: DocumentType::KgEntry,
synonyms: None,
route: None,
priority: None,
},
Document {
id: "doc2".to_string(),
title: "Advanced Rust Concepts".to_string(),
body: "This document covers advanced Rust concepts including ownership, borrowing, and lifetimes.".to_string(),
description: Some("Deep dive into advanced Rust programming concepts".to_string()),
summarization: None,
tags: Some(vec!["rust".to_string(), "advanced".to_string(), "ownership".to_string()]),
rank: None,
stub: None,
url: "https://example.com/doc2".to_string(),
source_haystack: None,
doc_type: DocumentType::KgEntry,
synonyms: None,
route: None,
priority: None,
},
Document {
id: "doc3".to_string(),
title: "Systems Programming with Rust".to_string(),
body: "Systems programming requires careful memory management and performance optimization.".to_string(),
description: Some("Guide to systems programming using Rust".to_string()),
summarization: None,
tags: Some(vec!["systems".to_string(), "programming".to_string(), "performance".to_string()]),
rank: None,
stub: None,
url: "https://example.com/doc3".to_string(),
source_haystack: None,
doc_type: DocumentType::KgEntry,
synonyms: None,
route: None,
priority: None,
},
]
}
#[test]
fn test_bm25_scorer_basic_functionality() {
let documents = create_test_documents();
let mut bm25_scorer = OkapiBM25Scorer::new();
bm25_scorer.initialize(&documents);
let query = "rust programming";
let scores: Vec<f64> = documents
.iter()
.map(|doc| bm25_scorer.score(query, doc))
.collect();
assert!(scores.iter().all(|&score| score >= 0.0));
assert!(scores[0] > scores[2]);
assert!(scores[0] > scores[1]);
}
#[test]
fn test_bm25f_scorer_field_weights() {
let documents = create_test_documents();
let weights = FieldWeights {
title: 2.0,
body: 1.0,
description: 1.5,
tags: 0.5,
};
let params = BM25Params {
k1: 1.2,
b: 0.75,
delta: 1.0,
};
let mut bm25f_scorer = BM25FScorer::with_params(params, weights);
bm25f_scorer.initialize(&documents);
let query = "rust";
let scores: Vec<f64> = documents
.iter()
.map(|doc| bm25f_scorer.score(query, doc))
.collect();
assert!(scores.iter().all(|&score| score >= 0.0));
assert!(scores[0] > scores[2]);
}
#[test]
fn test_bm25plus_scorer_enhanced_parameters() {
let documents = create_test_documents();
let params = BM25Params {
k1: 1.5,
b: 0.8,
delta: 1.2,
};
let mut bm25plus_scorer = BM25PlusScorer::with_params(params);
bm25plus_scorer.initialize(&documents);
let query = "systems programming";
let scores: Vec<f64> = documents
.iter()
.map(|doc| bm25plus_scorer.score(query, doc))
.collect();
assert!(scores.iter().all(|&score| score >= 0.0));
assert!(scores[2] > scores[0]);
assert!(scores[2] > scores[1]);
}
#[test]
fn test_tfidf_scorer_traditional_approach() {
let documents = create_test_documents();
let mut tfidf_scorer = TFIDFScorer::new();
tfidf_scorer.initialize(&documents);
let query = "rust";
let scores: Vec<f64> = documents
.iter()
.map(|doc| tfidf_scorer.score(query, doc))
.collect();
assert!(scores.iter().all(|&score| score >= 0.0));
assert!(scores[0] > 0.0);
assert!(scores[1] > 0.0);
}
#[test]
fn test_jaccard_scorer_similarity_based() {
let documents = create_test_documents();
let mut jaccard_scorer = JaccardScorer::new();
jaccard_scorer.initialize(&documents);
let query = "rust programming";
let scores: Vec<f64> = documents
.iter()
.map(|doc| jaccard_scorer.score(query, doc))
.collect();
assert!(scores.iter().all(|&score| (0.0..=1.0).contains(&score)));
assert!(scores[0] > scores[2]);
}
#[test]
fn test_query_ratio_scorer_term_matching() {
let documents = create_test_documents();
let mut query_ratio_scorer = QueryRatioScorer::new();
query_ratio_scorer.initialize(&documents);
let query = "rust systems";
let scores: Vec<f64> = documents
.iter()
.map(|doc| query_ratio_scorer.score(query, doc))
.collect();
assert!(scores.iter().all(|&score| (0.0..=1.0).contains(&score)));
assert!(scores[0] > scores[1]);
assert!(scores[0] > scores[2]);
}
#[test]
fn test_scorer_initialization_with_empty_documents() {
let empty_documents: Vec<Document> = vec![];
let mut bm25_scorer = OkapiBM25Scorer::new();
bm25_scorer.initialize(&empty_documents);
let mut bm25f_scorer = BM25FScorer::new();
bm25f_scorer.initialize(&empty_documents);
let mut bm25plus_scorer = BM25PlusScorer::new();
bm25plus_scorer.initialize(&empty_documents);
}
#[test]
fn test_scorer_empty_query_handling() {
let documents = create_test_documents();
let mut bm25_scorer = OkapiBM25Scorer::new();
bm25_scorer.initialize(&documents);
let empty_query = "";
let scores: Vec<f64> = documents
.iter()
.map(|doc| bm25_scorer.score(empty_query, doc))
.collect();
assert!(scores.iter().all(|&score| score == 0.0));
}
#[test]
fn test_scorer_case_insensitive_matching() {
let documents = create_test_documents();
let mut bm25_scorer = OkapiBM25Scorer::new();
bm25_scorer.initialize(&documents);
let query_lower = "rust programming";
let query_upper = "RUST PROGRAMMING";
let scores_lower: Vec<f64> = documents
.iter()
.map(|doc| bm25_scorer.score(query_lower, doc))
.collect();
let scores_upper: Vec<f64> = documents
.iter()
.map(|doc| bm25_scorer.score(query_upper, doc))
.collect();
for (lower, upper) in scores_lower.iter().zip(scores_upper.iter()) {
assert!((lower - upper).abs() < 0.001);
}
}
#[test]
fn test_scorer_parameter_sensitivity() {
let documents = create_test_documents();
let query = "rust programming";
let params_low_k1 = BM25Params {
k1: 0.5,
b: 0.75,
delta: 1.0,
};
let params_high_k1 = BM25Params {
k1: 2.0,
b: 0.75,
delta: 1.0,
};
let mut scorer_low = BM25FScorer::with_params(params_low_k1, FieldWeights::default());
let mut scorer_high = BM25FScorer::with_params(params_high_k1, FieldWeights::default());
scorer_low.initialize(&documents);
scorer_high.initialize(&documents);
let scores_low: Vec<f64> = documents
.iter()
.map(|doc| scorer_low.score(query, doc))
.collect();
let scores_high: Vec<f64> = documents
.iter()
.map(|doc| scorer_high.score(query, doc))
.collect();
assert_ne!(scores_low, scores_high);
}