mod common;
use common::pgwire_harness::TestServer;
const SCHEMALESS_DDL: &str = "CREATE COLLECTION docs_schemaless";
const STRICT_DDL: &str = "CREATE COLLECTION docs_strict TYPE DOCUMENT STRICT (\
id STRING PRIMARY KEY,\
content STRING\
)";
async fn seed_three(server: &TestServer, coll: &str) {
server
.exec(&format!(
"INSERT INTO {coll} (id, content) VALUES \
('r0', 'consensus algorithm distributed'), \
('r1', 'consensus memory replication'), \
('r2', 'cats and dogs')"
))
.await
.unwrap();
}
fn id_score_pairs(rows: &[Vec<String>]) -> Vec<(String, Option<f64>)> {
rows.iter()
.map(|r| {
let id = r[0].trim().to_string();
let cell = r[1].trim();
let score = if cell.is_empty() {
None
} else {
cell.parse::<f64>().ok()
};
(id, score)
})
.collect()
}
fn pair<'a>(pairs: &'a [(String, Option<f64>)], id: &str) -> &'a (String, Option<f64>) {
pairs
.iter()
.find(|(i, _)| i == id)
.unwrap_or_else(|| panic!("row {id} missing from result {pairs:?}"))
}
fn assert_term_rows_scored(pairs: &[(String, Option<f64>)], context: &str) {
for id in ["r0", "r1"] {
let (_, score) = pair(pairs, id);
assert!(
score.is_some(),
"[{context}] bm25_score for row {id} was NULL — \
the inverted index was never populated for this row. \
This is the silent-skip class: the FTS write site's \
format-detection guard failed and dropped the row."
);
let s = score.unwrap();
assert!(
s > 0.0,
"[{context}] bm25_score for row {id} must be positive when \
the term occurs in `content`; got {s}"
);
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn bm25_score_strict_returns_non_null_for_indexed_rows() {
let server = TestServer::start().await;
server.exec(STRICT_DDL).await.unwrap();
server
.exec("CREATE SEARCH INDEX idx_strict_bm25 ON docs_strict FIELDS content ANALYZER 'simple'")
.await
.unwrap();
seed_three(&server, "docs_strict").await;
let rows = server
.query_rows("SELECT id, bm25_score(content, 'consensus') FROM docs_strict ORDER BY id")
.await
.expect("bm25_score projection must succeed");
assert_eq!(rows.len(), 3, "expected 3 rows, got {rows:?}");
let pairs = id_score_pairs(&rows);
assert_term_rows_scored(&pairs, "strict / simple analyzer");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn bm25_score_schemaless_control_returns_non_null_for_indexed_rows() {
let server = TestServer::start().await;
server.exec(SCHEMALESS_DDL).await.unwrap();
server
.exec("CREATE SEARCH INDEX idx_schemaless_bm25 ON docs_schemaless FIELDS content ANALYZER 'simple'")
.await
.unwrap();
seed_three(&server, "docs_schemaless").await;
let rows = server
.query_rows("SELECT id, bm25_score(content, 'consensus') FROM docs_schemaless ORDER BY id")
.await
.expect("bm25_score projection on schemaless must succeed");
assert_eq!(rows.len(), 3, "expected 3 rows, got {rows:?}");
let pairs = id_score_pairs(&rows);
assert_term_rows_scored(&pairs, "schemaless control");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn bm25_score_strict_works_under_standard_analyzer() {
let server = TestServer::start().await;
server.exec(STRICT_DDL).await.unwrap();
server
.exec("CREATE SEARCH INDEX idx_strict_standard ON docs_strict FIELDS content ANALYZER 'standard'")
.await
.unwrap();
seed_three(&server, "docs_strict").await;
let rows = server
.query_rows("SELECT id, bm25_score(content, 'consensus') FROM docs_strict ORDER BY id")
.await
.expect("bm25_score under standard analyzer must succeed");
assert_eq!(rows.len(), 3, "expected 3 rows, got {rows:?}");
let pairs = id_score_pairs(&rows);
assert_term_rows_scored(&pairs, "strict / standard analyzer");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn fulltext_index_keyword_populates_strict_inverted_index() {
let server = TestServer::start().await;
server.exec(STRICT_DDL).await.unwrap();
server
.exec("CREATE FULLTEXT INDEX idx_strict_fulltext ON docs_strict FIELDS content ANALYZER 'simple'")
.await
.unwrap();
seed_three(&server, "docs_strict").await;
let rows = server
.query_rows("SELECT id, bm25_score(content, 'consensus') FROM docs_strict ORDER BY id")
.await
.expect("bm25_score after CREATE FULLTEXT INDEX must succeed");
assert_eq!(rows.len(), 3, "expected 3 rows, got {rows:?}");
let pairs = id_score_pairs(&rows);
assert_term_rows_scored(&pairs, "strict / FULLTEXT keyword");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn bm25_score_strict_indexes_existing_rows_when_index_created_after_insert() {
let server = TestServer::start().await;
server.exec(STRICT_DDL).await.unwrap();
seed_three(&server, "docs_strict").await;
server
.exec(
"CREATE SEARCH INDEX idx_strict_after ON docs_strict FIELDS content ANALYZER 'simple'",
)
.await
.unwrap();
let rows = server
.query_rows("SELECT id, bm25_score(content, 'consensus') FROM docs_strict ORDER BY id")
.await
.expect("bm25_score against post-hoc index must succeed");
assert_eq!(rows.len(), 3, "expected 3 rows, got {rows:?}");
let pairs = id_score_pairs(&rows);
assert_term_rows_scored(&pairs, "strict / DDL after INSERT");
}