use std::path::Path;
use std::sync::Mutex;
use anyhow::{Context, Result};
use tantivy::collector::TopDocs;
use tantivy::directory::error::OpenReadError;
use tantivy::query::{PhraseQuery, QueryParser};
use tantivy::schema::{NumericOptions, Schema, Value, FAST, STORED, STRING, TEXT};
use tantivy::{Index, IndexReader, IndexWriter, ReloadPolicy, TantivyDocument, TantivyError, Term};
use crate::store::record::{Category, Priority, Record};
const WRITER_HEAP_BYTES: usize = 50_000_000;
#[derive(Debug, Clone, Copy)]
pub(crate) struct Fields {
pub(crate) key: tantivy::schema::Field,
pub(crate) value: tantivy::schema::Field,
pub(crate) category: tantivy::schema::Field,
pub(crate) tags: tantivy::schema::Field,
pub(crate) priority: tantivy::schema::Field,
pub(crate) updated_at: tantivy::schema::Field,
}
pub struct Search {
pub(crate) index: Index,
pub(crate) fields: Fields,
writer: Mutex<IndexWriter>,
reader: IndexReader,
}
impl Search {
pub fn open(path: &Path) -> Result<Self> {
std::fs::create_dir_all(path)?;
let index = match Index::open_in_dir(path) {
Ok(idx) => idx,
Err(TantivyError::OpenReadError(OpenReadError::FileDoesNotExist(_))) => {
let (schema, _) = schema();
Index::create_in_dir(path, schema)?
}
Err(e) => return Err(e.into()),
};
let fields = fields_from_schema(&index.schema())?;
let writer = open_writer_with_retry(&index)?;
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()?;
Ok(Self {
index,
fields,
writer: Mutex::new(writer),
reader,
})
}
pub fn add_record(&self, record: &Record) -> Result<()> {
let mut writer = self.writer.lock().unwrap_or_else(|e| e.into_inner());
delete_by_key(&self.index, &writer, self.fields.key, &record.key)?;
if is_searchable(record) {
writer.add_document(record_to_doc(record, &self.fields))?;
}
writer.commit()?;
Ok(())
}
pub fn add_records(&self, records: &[&Record]) -> Result<usize> {
if records.is_empty() {
return Ok(0);
}
let mut latest_by_key = std::collections::BTreeMap::<String, &Record>::new();
for &record in records {
latest_by_key.insert(record.key.clone(), record);
}
let mut writer = self.writer.lock().unwrap_or_else(|e| e.into_inner());
for key in latest_by_key.keys() {
delete_by_key(&self.index, &writer, self.fields.key, key)?;
}
let indexable: Vec<&Record> = latest_by_key
.values()
.copied()
.filter(|r| is_searchable(r))
.collect();
if indexable.is_empty() {
writer.commit()?;
return Ok(0);
}
let total = indexable.len();
for (i, record) in indexable.iter().enumerate() {
if let Err(e) = writer.add_document(record_to_doc(record, &self.fields)) {
if let Err(rb) = writer.rollback() {
tracing::warn!(
staged = i,
total,
"tantivy rollback failed after staging error: {rb:#}"
);
}
return Err(anyhow::Error::from(e))
.with_context(|| format!("search index staging failed at record {i}/{total}"));
}
}
writer
.commit()
.with_context(|| format!("tantivy commit failed after staging {total} records"))?;
Ok(total)
}
pub fn close(self) -> Result<()> {
let mut writer = self
.writer
.into_inner()
.expect("search writer lock poisoned");
writer.commit()?;
Ok(())
}
pub fn delete_key(&self, key: &str) -> Result<()> {
let mut writer = self.writer.lock().unwrap_or_else(|e| e.into_inner());
delete_by_key(&self.index, &writer, self.fields.key, key)?;
writer.commit()?;
Ok(())
}
pub fn query_keys(&self, text: &str, limit: usize) -> Result<Vec<String>> {
if text.trim().is_empty() || limit == 0 {
return Ok(vec![]);
}
self.reader.reload()?;
let searcher = self.reader.searcher();
let mut parser = QueryParser::for_index(
&self.index,
vec![self.fields.key, self.fields.value, self.fields.tags],
);
parser.set_field_boost(self.fields.key, 2.0);
let (query, parse_warnings) = parser.parse_query_lenient(text);
if !parse_warnings.is_empty() {
tracing::warn!(
query = text,
warnings = ?parse_warnings,
"query parse warnings — proceeding with best-effort query"
);
}
let top_docs = searcher.search(&query, &TopDocs::with_limit(limit))?;
let mut keys: Vec<String> = Vec::with_capacity(top_docs.len());
let mut seen = std::collections::HashSet::new();
for (_score, doc_address) in top_docs {
let doc = match searcher.doc::<TantivyDocument>(doc_address) {
Ok(d) => d,
Err(e) => {
tracing::warn!(error = %e, "failed to retrieve doc — skipping");
continue;
}
};
if let Some(key) = doc.get_first(self.fields.key).and_then(|v| v.as_str()) {
let key = key.to_string();
if seen.insert(key.clone()) {
keys.push(key);
}
} else {
tracing::warn!(?doc_address, "indexed doc missing key field — skipping");
}
}
Ok(keys)
}
pub fn query_keys_scored(&self, text: &str, limit: usize) -> Result<Vec<(f32, String)>> {
if text.trim().is_empty() || limit == 0 {
return Ok(vec![]);
}
self.reader.reload()?;
let searcher = self.reader.searcher();
let mut parser = QueryParser::for_index(
&self.index,
vec![self.fields.key, self.fields.value, self.fields.tags],
);
parser.set_field_boost(self.fields.key, 2.0);
let (query, parse_warnings) = parser.parse_query_lenient(text);
if !parse_warnings.is_empty() {
tracing::warn!(
query = text,
warnings = ?parse_warnings,
"query parse warnings — proceeding with best-effort query"
);
}
let top_docs = searcher.search(&query, &TopDocs::with_limit(limit))?;
let mut results: Vec<(f32, String)> = Vec::with_capacity(top_docs.len());
let mut seen = std::collections::HashSet::new();
for (score, doc_address) in top_docs {
let doc = match searcher.doc::<TantivyDocument>(doc_address) {
Ok(d) => d,
Err(e) => {
tracing::warn!(error = %e, "failed to retrieve doc — skipping");
continue;
}
};
if let Some(key) = doc.get_first(self.fields.key).and_then(|v| v.as_str()) {
let key = key.to_string();
if seen.insert(key.clone()) {
results.push((score, key));
}
} else {
tracing::warn!(?doc_address, "indexed doc missing key field — skipping");
}
}
Ok(results)
}
}
fn is_searchable(record: &Record) -> bool {
let Some(path) = record.key.strip_prefix("file:") else {
return true;
};
let ext = Path::new(path)
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
matches!(
ext,
"rs" | "ts" | "tsx" | "js" | "jsx" | "mjs" | "cjs" | "py" | "pyi" | "go" | "java"
)
}
fn record_to_doc(record: &Record, fields: &Fields) -> TantivyDocument {
let mut doc = TantivyDocument::default();
doc.add_text(fields.key, &record.key);
doc.add_text(fields.value, &record.value);
doc.add_text(fields.category, category_str(&record.category));
doc.add_text(fields.tags, record.tags.join(" "));
doc.add_u64(fields.priority, priority_u64(&record.priority));
doc.add_u64(fields.updated_at, record.updated_at);
doc
}
fn category_str(cat: &Category) -> &'static str {
match cat {
Category::Gotcha => "gotcha",
Category::File => "file",
Category::Decision => "decision",
Category::Stage => "stage",
Category::Dependency => "dependency",
Category::DevNote => "dev_note",
Category::Session => "session",
Category::Analytics => "analytics",
}
}
fn priority_u64(p: &Priority) -> u64 {
match p {
Priority::Low => 0,
Priority::Normal => 1,
Priority::High => 2,
Priority::Critical => 3,
}
}
pub(crate) fn schema() -> (Schema, Fields) {
let mut b = Schema::builder();
let key = b.add_text_field("key", TEXT | STORED);
let value = b.add_text_field("value", TEXT | STORED);
let category = b.add_text_field("category", STRING | STORED | FAST);
let tags = b.add_text_field("tags", TEXT | STORED);
let priority = b.add_u64_field("priority", numeric_stored_fast());
let updated_at = b.add_u64_field("updated_at", numeric_stored_fast());
(
b.build(),
Fields {
key,
value,
category,
tags,
priority,
updated_at,
},
)
}
fn numeric_stored_fast() -> NumericOptions {
NumericOptions::default().set_stored().set_fast()
}
fn fields_from_schema(s: &Schema) -> Result<Fields> {
Ok(Fields {
key: s.get_field("key")?,
value: s.get_field("value")?,
category: s.get_field("category")?,
tags: s.get_field("tags")?,
priority: s.get_field("priority")?,
updated_at: s.get_field("updated_at")?,
})
}
fn open_writer_with_retry(index: &Index) -> Result<IndexWriter> {
let mut delay_ms = 1u64;
let mut last_err: Option<TantivyError> = None;
for _ in 0..6 {
match index.writer(WRITER_HEAP_BYTES) {
Ok(w) => return Ok(w),
Err(TantivyError::LockFailure(e, hint)) => {
last_err = Some(TantivyError::LockFailure(e, hint));
std::thread::sleep(std::time::Duration::from_millis(delay_ms));
delay_ms = (delay_ms * 2).min(16);
}
Err(e) => return Err(e.into()),
}
}
Err(last_err
.map(anyhow::Error::from)
.unwrap_or_else(|| anyhow::anyhow!("tantivy writer lock acquisition exhausted retries")))
}
fn delete_by_key(
index: &Index,
writer: &IndexWriter,
key_field: tantivy::schema::Field,
key: &str,
) -> Result<()> {
let mut tokenizer = index.tokenizer_for_field(key_field)?;
let mut stream = tokenizer.token_stream(key);
let mut tokens = Vec::new();
stream.process(&mut |token| tokens.push(token.text.clone()));
if tokens.is_empty() {
return Ok(());
}
if tokens.len() == 1 {
writer.delete_term(Term::from_field_text(key_field, &tokens[0]));
} else {
let terms = tokens
.into_iter()
.map(|token| Term::from_field_text(key_field, &token))
.collect();
let query = PhraseQuery::new(terms);
writer.delete_query(Box::new(query))?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tantivy::schema::FieldType;
use tempfile::TempDir;
#[test]
fn schema_has_all_six_fields() {
let (s, _) = schema();
for name in ["key", "value", "category", "tags", "priority", "updated_at"] {
assert!(s.get_field(name).is_ok(), "missing field: {name}");
}
}
#[test]
fn text_fields_are_stored() {
let (s, f) = schema();
for field in [f.key, f.value, f.tags] {
let entry = s.get_field_entry(field);
assert!(entry.is_stored(), "text field {field:?} must be stored");
}
}
#[test]
fn category_field_is_string_not_text() {
let (s, f) = schema();
let entry = s.get_field_entry(f.category);
let FieldType::Str(opts) = entry.field_type() else {
panic!("category must be a text field");
};
let indexing = opts
.get_indexing_options()
.expect("category must have indexing options");
assert_eq!(
indexing.tokenizer(),
"raw",
"category must use raw tokenizer (STRING), not default (TEXT)"
);
}
#[test]
fn u64_fields_are_stored_and_fast() {
let (s, f) = schema();
for field in [f.priority, f.updated_at] {
let entry = s.get_field_entry(field);
assert!(entry.is_stored(), "u64 field {field:?} must be stored");
let FieldType::U64(opts) = entry.field_type() else {
panic!("field {field:?} must be u64");
};
assert!(opts.is_fast(), "u64 field {field:?} must be FAST");
}
}
#[test]
fn open_creates_index_in_new_directory() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("search_index");
assert!(!path.exists());
Search::open(&path).unwrap();
assert!(path.exists(), "search_index dir must be created");
}
#[test]
fn open_creates_index_when_dir_is_empty() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("search_index");
std::fs::create_dir_all(&path).unwrap();
Search::open(&path).unwrap(); }
#[test]
fn open_reopens_existing_index() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("search_index");
Search::open(&path).unwrap();
Search::open(&path).unwrap();
}
#[test]
fn open_is_idempotent_schema_stays_stable() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("search_index");
let schema1 = {
let s = Search::open(&path).unwrap();
s.index.schema()
}; let schema2 = {
let s = Search::open(&path).unwrap();
s.index.schema()
};
assert_eq!(
schema1.num_fields(),
schema2.num_fields(),
"schema must not drift between opens"
);
}
#[test]
fn open_drop_reopen_stress_no_lock_failure() {
use std::sync::Arc;
use std::thread;
let dir = TempDir::new().unwrap();
let base = Arc::new(dir.path().to_path_buf());
let mut handles = Vec::new();
for t in 0..4 {
let base = Arc::clone(&base);
handles.push(thread::spawn(move || {
let path = base.join(format!("idx_{t}/search_index"));
for _ in 0..20 {
let s = Search::open(&path).expect("open must not return LockFailure");
let n = s.index.schema().num_fields();
assert_eq!(n, 6, "schema must have 6 fields after every reopen");
drop(s); }
}));
}
for h in handles {
h.join().unwrap();
}
}
#[test]
fn open_path_is_independent_per_project() {
let dir = TempDir::new().unwrap();
let a = dir.path().join("project_a/search_index");
let b = dir.path().join("project_b/search_index");
Search::open(&a).unwrap();
Search::open(&b).unwrap();
assert!(a.exists());
assert!(b.exists());
}
fn make_record(key: &str, value: &str, tags: &[&str]) -> Record {
use crate::store::record::{
Category, ConfidenceScore, Priority, QualityScore, RecordLifecycle, RecordSource,
RecordVersion, StalenessScore,
};
Record {
key: key.to_string(),
value: value.to_string(),
category: Category::Gotcha,
priority: Priority::Normal,
tags: tags.iter().map(|s| s.to_string()).collect(),
created_at: 0,
updated_at: 0,
ref_url: None,
staleness: StalenessScore::fresh(),
lifecycle: RecordLifecycle::Active,
version: RecordVersion {
device_id: uuid::Uuid::new_v4(),
logical_clock: 1,
wall_clock: 0,
},
quality: QualityScore::layer0_default(),
access_count: 0,
last_accessed: 0,
source: RecordSource::StaticAnalysis,
confidence: ConfidenceScore::for_new_record(&RecordSource::StaticAnalysis),
gap_analysis_score: 0.0,
payload: None,
}
}
fn open_search(dir: &TempDir) -> Search {
Search::open(&dir.path().join("search_index")).unwrap()
}
#[test]
fn query_keys_empty_and_whitespace_return_empty() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record("gotcha:foo", "async inference race", &[]);
s.add_record(&r).unwrap();
for blank in ["", " ", "\t", "\n", " \t "] {
let keys = s.query_keys(blank, 10).unwrap();
assert!(keys.is_empty(), "expected empty for {blank:?}");
}
}
#[test]
fn query_keys_zero_limit_returns_empty_even_with_matching_docs() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record("gotcha:foo", "async inference race", &[]);
s.add_record(&r).unwrap();
assert!(s.query_keys("async", 0).unwrap().is_empty());
}
#[test]
fn query_keys_matches_value_field() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record(
"gotcha:async-race",
"never use inference in async context",
&[],
);
s.add_record(&r).unwrap();
let keys = s.query_keys("inference", 10).unwrap();
assert_eq!(keys, vec!["gotcha:async-race"]);
}
#[test]
fn query_keys_matches_tags_field() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record(
"file:engine/mod.rs",
"engine entry point",
&["performance", "critical"],
);
s.add_record(&r).unwrap();
let keys = s.query_keys("performance", 10).unwrap();
assert_eq!(keys, vec!["file:engine/mod.rs"]);
}
#[test]
fn query_keys_matches_key_field() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record(
"gotcha:surrealkv-versioning",
"retention is always enabled",
&[],
);
s.add_record(&r).unwrap();
let keys = s.query_keys("surrealkv", 10).unwrap();
assert_eq!(keys, vec!["gotcha:surrealkv-versioning"]);
}
#[test]
fn query_keys_key_match_ranks_above_value_only_match() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let a = make_record(
"gotcha:petgraph-cycles",
"watch for cycles in traversal",
&[],
);
let b = make_record(
"gotcha:graph-general",
"petgraph handles directed and undirected graphs for traversal and cycle detection",
&[],
);
s.add_record(&a).unwrap();
s.add_record(&b).unwrap();
let keys = s.query_keys("petgraph", 10).unwrap();
assert_eq!(keys.len(), 2, "both records must match");
assert_eq!(
keys[0], "gotcha:petgraph-cycles",
"key match must rank first"
);
}
#[test]
fn query_keys_limit_caps_results() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let records: Vec<Record> = (0..20)
.map(|i| {
make_record(
&format!("gotcha:item-{i:02}"),
"tokio runtime executor gotcha",
&[],
)
})
.collect();
let refs: Vec<&Record> = records.iter().collect();
s.add_records(&refs).unwrap();
assert_eq!(s.query_keys("tokio", 1).unwrap().len(), 1);
assert_eq!(s.query_keys("tokio", 7).unwrap().len(), 7);
assert_eq!(s.query_keys("tokio", 20).unwrap().len(), 20);
assert_eq!(s.query_keys("tokio", 999).unwrap().len(), 20);
}
#[test]
fn query_keys_500k_noise_zero_false_positives_and_limit_correct() {
let device_id = uuid::Uuid::nil();
let make = |key: &str, value: &str| -> Record {
use crate::store::record::{
Category, ConfidenceScore, Priority, QualityScore, RecordLifecycle, RecordSource,
RecordVersion, StalenessScore,
};
Record {
key: key.to_string(),
value: value.to_string(),
category: Category::File,
priority: Priority::Normal,
tags: vec![],
created_at: 0,
updated_at: 0,
ref_url: None,
staleness: StalenessScore::fresh(),
lifecycle: RecordLifecycle::Active,
version: RecordVersion {
device_id,
logical_clock: 1,
wall_clock: 0,
},
quality: QualityScore::layer0_default(),
access_count: 0,
last_accessed: 0,
source: RecordSource::StaticAnalysis,
confidence: ConfidenceScore::for_new_record(&RecordSource::StaticAnalysis),
gap_analysis_score: 0.0,
payload: None,
}
};
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let noise: Vec<Record> = (0..500_000_usize)
.map(|i| make(
&format!("file:src/module_{i:06}.rs"),
&format!("module {i} handles initialization routing configuration management dispatch"),
))
.collect();
s.add_records(&noise.iter().collect::<Vec<_>>()).unwrap();
let targets: Vec<Record> = (0..20_usize)
.map(|i| {
make(
&format!("gotcha:target-{i:02}"),
&format!("zqx_sentinel_500k_proof unique term record {i} extra text filler"),
)
})
.collect();
s.add_records(&targets.iter().collect::<Vec<_>>()).unwrap();
let keys = s.query_keys("zqx_sentinel_500k_proof", 20).unwrap();
assert_eq!(
keys.len(),
20,
"expected 20 hits from 500,020 records, got {}",
keys.len()
);
let target_keys: Vec<String> = targets.iter().map(|r| r.key.clone()).collect();
for k in &target_keys {
assert!(keys.contains(k), "missing target key: {k}");
}
for k in &keys {
assert!(
k.starts_with("gotcha:target-"),
"noise doc '{k}' leaked into results"
);
}
let limited = s.query_keys("zqx_sentinel_500k_proof", 5).unwrap();
assert_eq!(
limited.len(),
5,
"limit=5 must cap results even with 20 matching docs in 500k corpus"
);
let over = s.query_keys("zqx_sentinel_500k_proof", 999).unwrap();
assert_eq!(
over.len(),
20,
"limit > match count must return all matches, not panic"
);
}
#[test]
fn query_keys_malformed_trailing_operator_does_not_error() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record("gotcha:async-race", "inference in async context", &[]);
s.add_record(&r).unwrap();
let keys = s.query_keys("inference AND", 10).unwrap();
assert!(
keys.contains(&"gotcha:async-race".to_string()),
"lenient parse must still match 'inference'"
);
}
#[test]
fn query_keys_unclosed_paren_does_not_error() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record("gotcha:foo", "tokio runtime issue", &[]);
s.add_record(&r).unwrap();
let _ = s.query_keys("(tokio", 10).unwrap();
}
#[test]
fn query_keys_unknown_field_ref_does_not_error() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let _ = s.query_keys("nonexistent_field:value", 10).unwrap();
}
#[test]
fn query_keys_special_chars_do_not_error() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
for q in ["!@#$%^&*()", "+++---", "\"\"", "\\n\\t", ":::", "NULL\0"] {
let result = s.query_keys(q, 10);
assert!(result.is_ok(), "query {q:?} must not return Err");
}
}
#[test]
fn query_keys_unicode_content_is_searchable() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record(
"decision:i18n",
"latency gotcha for データベース queries",
&[],
);
s.add_record(&r).unwrap();
let keys = s.query_keys("latency", 10).unwrap();
assert_eq!(keys, vec!["decision:i18n"]);
}
#[test]
fn query_keys_duplicate_indexing_returns_key_exactly_once() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record("gotcha:dup", "duplicate indexing scenario", &[]);
s.add_record(&r).unwrap();
s.add_record(&r).unwrap(); let keys = s.query_keys("duplicate", 10).unwrap();
assert_eq!(
keys,
vec!["gotcha:dup"],
"same key should only exist once in the index"
);
}
#[test]
fn query_keys_updated_record_replaces_old_terms() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let old = make_record("gotcha:update", "oldterm sentinel", &[]);
let new = make_record("gotcha:update", "newterm sentinel", &[]);
s.add_record(&old).unwrap();
assert_eq!(s.query_keys("oldterm", 10).unwrap(), vec!["gotcha:update"]);
s.add_record(&new).unwrap();
assert!(s.query_keys("oldterm", 10).unwrap().is_empty());
assert_eq!(s.query_keys("newterm", 10).unwrap(), vec!["gotcha:update"]);
}
#[test]
fn delete_key_removes_record_from_results() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record("gotcha:delete", "delete_me sentinel", &[]);
s.add_record(&r).unwrap();
assert_eq!(
s.query_keys("delete_me", 10).unwrap(),
vec!["gotcha:delete"]
);
s.delete_key("gotcha:delete").unwrap();
assert!(s.query_keys("delete_me", 10).unwrap().is_empty());
}
#[test]
fn query_keys_immediately_searchable_after_add_record() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record("gotcha:immediate", "petgraph traversal depth limit", &[]);
s.add_record(&r).unwrap();
let keys = s.query_keys("petgraph", 10).unwrap();
assert_eq!(keys, vec!["gotcha:immediate"]);
}
#[test]
fn query_keys_sees_all_records_after_add_records_batch() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let records: Vec<Record> = (0..10)
.map(|i| {
make_record(
&format!("gotcha:batch-{i}"),
"batchwrite rayon parallel",
&[],
)
})
.collect();
let refs: Vec<&Record> = records.iter().collect();
s.add_records(&refs).unwrap();
let keys = s.query_keys("rayon", 20).unwrap();
assert_eq!(
keys.len(),
10,
"all 10 batch records must be searchable immediately"
);
}
#[test]
fn query_keys_no_match_returns_empty() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record("gotcha:foo", "unrelated content about bananas", &[]);
s.add_record(&r).unwrap();
assert!(s
.query_keys("surrealdb_not_in_any_record", 10)
.unwrap()
.is_empty());
}
#[test]
fn query_keys_returns_key_strings_not_values() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let r = make_record(
"decision:use-surrealkv",
"SurrealKV chosen for durability guarantees",
&[],
);
s.add_record(&r).unwrap();
let keys = s.query_keys("durability", 10).unwrap();
assert_eq!(
keys,
vec!["decision:use-surrealkv"],
"must return the key string, not the value body"
);
}
#[test]
fn query_keys_multi_word_query_matches_records_with_all_terms() {
let dir = TempDir::new().unwrap();
let s = open_search(&dir);
let both = make_record("gotcha:both-terms", "tantivy petgraph integration", &[]);
let one = make_record("gotcha:one-term", "tantivy only record", &[]);
s.add_record(&both).unwrap();
s.add_record(&one).unwrap();
let keys = s.query_keys("tantivy petgraph", 10).unwrap();
assert!(!keys.is_empty());
assert_eq!(
keys[0], "gotcha:both-terms",
"record containing both query terms must rank first"
);
}
}